diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 3d167d86d..3990d97c6 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2415,6 +2415,7 @@ from .webcamerapl import WebcameraplIE from .webcaster import ( WebcasterFeedIE, WebcasterIE, + WebcasterPlayerEmbedIE, ) from .webofstories import ( WebOfStoriesIE, diff --git a/yt_dlp/extractor/matchtv.py b/yt_dlp/extractor/matchtv.py index 045164a99..3a3f6c2f7 100644 --- a/yt_dlp/extractor/matchtv.py +++ b/yt_dlp/extractor/matchtv.py @@ -1,11 +1,5 @@ from .common import InfoExtractor -from ..utils import ( - determine_ext, - extract_attributes, - join_nonempty, - xpath_text, -) -from ..utils.traversal import traverse_obj +from .webcaster import WebcasterBaseIE, WebcasterFeedBaseIE class MatchTVIE(InfoExtractor): @@ -42,53 +36,15 @@ class MatchTVIE(InfoExtractor): } -# WebcasterIE -class MatchTVVideoIE(InfoExtractor): +class MatchTVVideoIE(WebcasterBaseIE): _GEO_COUNTRIES = ['RU'] - _VALID_URL = r'https?://[.\w-]+/(?:quote|media)/start/free_(?P[^/]+)' + _VALID_URL = r'https?://bl\.video\.matchtv\.ru/(?:quote|media)/start/free_(?P[^/]+)' _TESTS = [] - def _real_extract(self, url): - video_id = self._match_id(url) - video = self._download_xml(url, video_id) - - title = xpath_text(video, './/event_name', 'event name', fatal=True) - - formats = [] - for format_id in (None, 'noise'): - track_tag = join_nonempty('track', format_id, delim='_') - for track in video.findall(f'.//iphone/{track_tag}'): - track_url = track.text - if not track_url: - continue - if determine_ext(track_url) == 'm3u8': - m3u8_formats = self._extract_m3u8_formats( - track_url, video_id, 'mp4', - entry_protocol='m3u8_native', - m3u8_id=join_nonempty('hls', format_id, delim='-'), fatal=False) - for f in m3u8_formats: - f.update({ - 'source_preference': 0 if format_id == 'noise' else 1, - 'format_note': track.get('title'), - }) - formats.extend(m3u8_formats) - - thumbnail = xpath_text(video, './/image', 'thumbnail') - - return { - 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, - 'formats': formats, - } - - -# WebcasterFeedIE -class MatchTVFeedIE(InfoExtractor): +class MatchTVFeedIE(WebcasterFeedBaseIE): _GEO_COUNTRIES = ['RU'] - _VALID_URL = r'https?://[.\w-]+/feed/start/free_(?P[^/]+)' - _EMBED_REGEX = [r'<(?:object|a|span[^>]+class=["\']webcaster-player["\'])[^>]+data(?:-config)?=(["\']).*?config=(?Phttps?://(?:(?!\1).)+)\1'] + _VALID_URL = r'https?://bl\.video\.matchtv\.ru/feed/start/free_(?P[^/]+)' _TESTS = [] _WEBPAGE_TESTS = [{ 'url': 'https://matchtv.ru/football/matchtvvideo_NI1593368_clip_Zolotoj_dubl_Cherchesova_Specialnyj_reportazh', @@ -115,20 +71,3 @@ class MatchTVFeedIE(InfoExtractor): 'thumbnail': r're:https?://[\w-]+.video.matchtv.ru/fc/[\w-]+/thumbnails/events/1101266/590556538.jpg', }, }] - - def _extract_from_webpage(self, url, webpage): - yield from super()._extract_from_webpage(url, webpage) - - yield from traverse_obj(self._yield_json_ld(webpage, None), ( - lambda _, v: v['@type'] == 'VideoObject', 'url', - {extract_attributes}, 'src', {self.url_result})) - - def _real_extract(self, url): - video_id = self._match_id(url) - - feed = self._download_xml(url, video_id) - - video_url = xpath_text( - feed, ('video_hd', 'video'), 'video url', fatal=True) - - return self.url_result(video_url) diff --git a/yt_dlp/extractor/webcaster.py b/yt_dlp/extractor/webcaster.py index b0865e3e6..efe90e875 100644 --- a/yt_dlp/extractor/webcaster.py +++ b/yt_dlp/extractor/webcaster.py @@ -8,23 +8,7 @@ from ..utils import ( ) -class WebcasterIE(InfoExtractor): - _VALID_URL = r'https?://bl\.webcaster\.pro/(?:quote|media)/start/free_(?P[^/]+)' - _TESTS = [{ - # http://video.khl.ru/quotes/393859 - 'url': 'http://bl.webcaster.pro/quote/start/free_c8cefd240aa593681c8d068cff59f407_hd/q393859/eb173f99dd5f558674dae55f4ba6806d/1480289104?sr%3D105%26fa%3D1%26type_id%3D18', - 'md5': '0c162f67443f30916ff1c89425dcd4cd', - 'info_dict': { - 'id': 'c8cefd240aa593681c8d068cff59f407_hd', - 'ext': 'mp4', - 'title': 'Сибирь - Нефтехимик. Лучшие моменты первого периода', - 'thumbnail': r're:^https?://.*\.jpg$', - }, - }, { - 'url': 'http://bl.webcaster.pro/media/start/free_6246c7a4453ac4c42b4398f840d13100_hd/2_2991109016/e8d0d82587ef435480118f9f9c41db41/4635726126', - 'only_matching': True, - }] - +class WebcasterBaseIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) @@ -61,14 +45,25 @@ class WebcasterIE(InfoExtractor): } -class WebcasterFeedIE(InfoExtractor): - _VALID_URL = r'https?://bl\.webcaster\.pro/feed/start/free_(?P[^/]+)' - _EMBED_REGEX = [r'<(?:object|a[^>]+class=["\']webcaster-player["\'])[^>]+data(?:-config)?=(["\']).*?config=(?Phttps?://bl\.webcaster\.pro/feed/start/free_.*?)(?:[?&]|\1)'] - _TEST = { - 'url': 'http://bl.webcaster.pro/feed/start/free_c8cefd240aa593681c8d068cff59f407_hd/q393859/eb173f99dd5f558674dae55f4ba6806d/1480289104', +class WebcasterIE(WebcasterBaseIE): + _VALID_URL = r'https?://bl\.webcaster\.pro/(?:quote|media)/start/free_(?P[^/]+)' + _TESTS = [{ + # http://video.khl.ru/quotes/393859 + 'url': 'http://bl.webcaster.pro/quote/start/free_c8cefd240aa593681c8d068cff59f407_hd/q393859/eb173f99dd5f558674dae55f4ba6806d/1480289104?sr%3D105%26fa%3D1%26type_id%3D18', + 'md5': '0c162f67443f30916ff1c89425dcd4cd', + 'info_dict': { + 'id': 'c8cefd240aa593681c8d068cff59f407_hd', + 'ext': 'mp4', + 'title': 'Сибирь - Нефтехимик. Лучшие моменты первого периода', + 'thumbnail': r're:^https?://.*\.jpg$', + }, + }, { + 'url': 'http://bl.webcaster.pro/media/start/free_6246c7a4453ac4c42b4398f840d13100_hd/2_2991109016/e8d0d82587ef435480118f9f9c41db41/4635726126', 'only_matching': True, - } + }] + +class WebcasterFeedBaseIE(InfoExtractor): def _extract_from_webpage(self, url, webpage): yield from super()._extract_from_webpage(url, webpage) @@ -89,4 +84,17 @@ class WebcasterFeedIE(InfoExtractor): video_url = xpath_text( feed, ('video_hd', 'video'), 'video url', fatal=True) - return self.url_result(video_url, WebcasterIE.ie_key()) + return self.url_result(video_url) + + +class WebcasterFeedIE(WebcasterFeedBaseIE): + _VALID_URL = r'https?://bl\.webcaster\.pro/feed/start/free_(?P[^/]+)' + _TEST = { + 'url': 'http://bl.webcaster.pro/feed/start/free_c8cefd240aa593681c8d068cff59f407_hd/q393859/eb173f99dd5f558674dae55f4ba6806d/1480289104', + 'only_matching': True, + } + + +class WebcasterPlayerEmbedIE(InfoExtractor): + _VALID_URL = False + _EMBED_REGEX = [r'<(?:object|a|span[^>]+class=["\']webcaster-player["\'])[^>]+data(?:-config)?=(["\']).*?config=(?Phttps?://(?:(?!\1).)+)\1']