[Theta] Add extractor (#1068)

Authored by: alerikaisattera
This commit is contained in:
Aleri Kaisattera 2021-09-24 06:53:51 +06:00 committed by GitHub
parent 99e9e001de
commit eb6d4ad1ca
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 52 additions and 0 deletions

View file

@ -1428,6 +1428,7 @@ from .theplatform import (
from .thescene import TheSceneIE from .thescene import TheSceneIE
from .thestar import TheStarIE from .thestar import TheStarIE
from .thesun import TheSunIE from .thesun import TheSunIE
from .theta import ThetaIE
from .theweatherchannel import TheWeatherChannelIE from .theweatherchannel import TheWeatherChannelIE
from .thisamericanlife import ThisAmericanLifeIE from .thisamericanlife import ThisAmericanLifeIE
from .thisav import ThisAVIE from .thisav import ThisAVIE

51
yt_dlp/extractor/theta.py Normal file
View file

@ -0,0 +1,51 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import try_get
class ThetaIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?theta\.tv/(?P<id>[a-z0-9]+)'
_TESTS = [{
'url': 'https://www.theta.tv/davirus',
'skip': 'The live may have ended',
'info_dict': {
'id': 'DaVirus',
'ext': 'mp4',
'title': 'I choose you - My Community is King -👀 - YO HABLO ESPANOL - CODE DAVIRUS',
'thumbnail': r're:https://live-thumbnails-prod-theta-tv\.imgix\.net/thumbnail/.+\.jpg',
}
}, {
'url': 'https://www.theta.tv/mst3k',
'note': 'This channel is live 24/7',
'info_dict': {
'id': 'MST3K',
'ext': 'mp4',
'title': 'Mystery Science Theatre 3000 24/7 Powered by the THETA Network.',
'thumbnail': r're:https://user-prod-theta-tv\.imgix\.net/.+\.jpg',
}
}]
def _real_extract(self, url):
channel_id = self._match_id(url)
info = self._download_json(f'https://api.theta.tv/v1/channel?alias={channel_id}', channel_id)['body']
m3u8_playlist = next(
data['url'] for data in info['live_stream']['video_urls']
if data.get('type') != 'embed' and data.get('resolution') in ('master', 'source'))
formats = self._extract_m3u8_formats(m3u8_playlist, channel_id, 'mp4', m3u8_id='hls', live=True)
self._sort_formats(formats)
channel = try_get(info, lambda x: x['user']['username']) # using this field instead of channel_id due to capitalization
return {
'id': channel,
'title': try_get(info, lambda x: x['live_stream']['title']),
'channel': channel,
'view_count': try_get(info, lambda x: x['live_stream']['view_count']),
'is_live': True,
'formats': formats,
'thumbnail': try_get(info, lambda x: x['live_stream']['thumbnail_url']),
}