From f3411af12e209bc5624e1ac31271b8aabe2d3c90 Mon Sep 17 00:00:00 2001 From: megumin <34505936+megumintyan@users.noreply.github.com> Date: Tue, 25 Jun 2024 01:49:09 +0300 Subject: [PATCH 1/3] [ie/matchtv] Fix extractor (#10190) Authored by: megumintyan --- yt_dlp/extractor/matchtv.py | 40 +++++++++++-------------------------- 1 file changed, 12 insertions(+), 28 deletions(-) diff --git a/yt_dlp/extractor/matchtv.py b/yt_dlp/extractor/matchtv.py index a67fa9fe4..93799fe85 100644 --- a/yt_dlp/extractor/matchtv.py +++ b/yt_dlp/extractor/matchtv.py @@ -1,51 +1,35 @@ -import random - from .common import InfoExtractor -from ..utils import xpath_text class MatchTVIE(InfoExtractor): - _VALID_URL = r'https?://matchtv\.ru(?:/on-air|/?#live-player)' + _VALID_URL = [ + r'https?://matchtv\.ru/on-air/?(?:$|[?#])', + r'https?://video\.matchtv\.ru/iframe/channel/106/?(?:$|[?#])', + ] _TESTS = [{ - 'url': 'http://matchtv.ru/#live-player', + 'url': 'http://matchtv.ru/on-air/', 'info_dict': { 'id': 'matchtv-live', - 'ext': 'flv', + 'ext': 'mp4', 'title': r're:^Матч ТВ - Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', - 'is_live': True, + 'live_status': 'is_live', }, 'params': { 'skip_download': True, }, }, { - 'url': 'http://matchtv.ru/on-air/', + 'url': 'https://video.matchtv.ru/iframe/channel/106', 'only_matching': True, }] def _real_extract(self, url): video_id = 'matchtv-live' - video_url = self._download_json( - 'http://player.matchtv.ntvplus.tv/player/smil', video_id, - query={ - 'ts': '', - 'quality': 'SD', - 'contentId': '561d2c0df7159b37178b4567', - 'sign': '', - 'includeHighlights': '0', - 'userId': '', - 'sessionId': random.randint(1, 1000000000), - 'contentType': 'channel', - 'timeShift': '0', - 'platform': 'portal', - }, - headers={ - 'Referer': 'http://player.matchtv.ntvplus.tv/embed-player/NTVEmbedPlayer.swf', - })['data']['videoUrl'] - f4m_url = xpath_text(self._download_xml(video_url, video_id), './to') - formats = self._extract_f4m_formats(f4m_url, video_id) + webpage = self._download_webpage('https://video.matchtv.ru/iframe/channel/106', video_id) + video_url = self._html_search_regex( + r'data-config="config=(https?://[^?"]+)[?"]', webpage, 'video URL').replace('/feed/', '/media/') + '.m3u8' return { 'id': video_id, 'title': 'Матч ТВ - Прямой эфир', 'is_live': True, - 'formats': formats, + 'formats': self._extract_m3u8_formats(video_url, video_id, 'mp4', live=True), } From b758877afa225747fba81c8a580e27583a231734 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A6sim?= Date: Thu, 27 Jun 2024 02:56:44 +0300 Subject: [PATCH 2/3] [ie/cloudycdn] Fix formats extraction (#10271) Authored by: Caesim404 --- yt_dlp/extractor/cloudycdn.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/yt_dlp/extractor/cloudycdn.py b/yt_dlp/extractor/cloudycdn.py index 58bde4666..6e757d79e 100644 --- a/yt_dlp/extractor/cloudycdn.py +++ b/yt_dlp/extractor/cloudycdn.py @@ -1,3 +1,5 @@ +import re + from .common import InfoExtractor from ..utils import ( int_or_none, @@ -35,6 +37,20 @@ class CloudyCDNIE(InfoExtractor): 'duration': 1205, 'upload_date': '20221130', }, + }, { + # Video-only m3u8 formats need manual fixup + 'url': 'https://embed.cloudycdn.services/ltv/media/08j_d24-6000-074', + 'md5': 'fc472e40f6e6238446509be411c920e2', + 'info_dict': { + 'id': '08j_d24-6000-074', + 'ext': 'mp4', + 'upload_date': '20240620', + 'duration': 1673, + 'title': 'D24-6000-074-cetstud', + 'timestamp': 1718902233, + 'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/788392/placeholder1718903938.jpg', + }, + 'params': {'format': 'bv'}, }] _WEBPAGE_TESTS = [{ 'url': 'https://www.tavaklase.lv/video/es-esmu-mina-um-2/', @@ -63,6 +79,9 @@ class CloudyCDNIE(InfoExtractor): formats, subtitles = [], {} for m3u8_url in traverse_obj(data, ('source', 'sources', ..., 'src', {url_or_none})): fmts, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, fatal=False) + for fmt in fmts: + if re.search(r'chunklist_b\d+_vo_', fmt['url']): + fmt['acodec'] = 'none' formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) From 0953209a857c51648aee89d205c086b0e1dd3864 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Wed, 26 Jun 2024 18:57:34 -0500 Subject: [PATCH 3/3] [ie/mediasite] Fix extraction (#10273) Fix regression in add96eb9f84cfffe85682bf2fb85135746994ee8 Closes #10270 Authored by: bashonly --- yt_dlp/extractor/mediasite.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/mediasite.py b/yt_dlp/extractor/mediasite.py index beb12f8a4..ad7ab27e2 100644 --- a/yt_dlp/extractor/mediasite.py +++ b/yt_dlp/extractor/mediasite.py @@ -15,6 +15,7 @@ from ..utils import ( url_or_none, urljoin, ) +from ..utils.traversal import traverse_obj _ID_RE = r'(?:[0-9a-f]{32,34}|[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12,14})' @@ -212,13 +213,14 @@ class MediasiteIE(InfoExtractor): stream_type, 'type%u' % stream_type) stream_formats = [] - for unum, video_url in enumerate(video_urls): - video_url = url_or_none(video_url.get('Location')) + for unum, video in enumerate(video_urls): + video_url = url_or_none(video.get('Location')) if not video_url: continue # XXX: if Stream.get('CanChangeScheme', False), switch scheme to HTTP/HTTPS - media_type = video_url.get('MediaType') + media_type = video.get('MediaType') + ext = mimetype2ext(video.get('MimeType')) if media_type == 'SS': stream_formats.extend(self._extract_ism_formats( video_url, resource_id, @@ -229,15 +231,20 @@ class MediasiteIE(InfoExtractor): video_url, resource_id, mpd_id=f'{stream_id}-{snum}.{unum}', fatal=False)) + elif ext in ('m3u', 'm3u8'): + stream_formats.extend(self._extract_m3u8_formats( + video_url, resource_id, + m3u8_id=f'{stream_id}-{snum}.{unum}', + fatal=False)) else: stream_formats.append({ 'format_id': f'{stream_id}-{snum}.{unum}', 'url': video_url, - 'ext': mimetype2ext(video_url.get('MimeType')), + 'ext': ext, }) - if stream.get('HasSlideContent', False): - images = player_options['PlayerLayoutOptions']['Images'] + images = traverse_obj(player_options, ('PlayerLayoutOptions', 'Images', {dict})) + if stream.get('HasSlideContent') and images: stream_formats.append(self.__extract_slides( stream_id=stream_id, snum=snum,