From 07275b708b4f46c3b3fc9ea941a842fb287cad02 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Mon, 22 Aug 2022 22:04:12 +0200 Subject: [PATCH] [extractor/medaltv] Fix extraction (#4739) Authored by: xenova --- yt_dlp/extractor/medaltv.py | 70 +++++++++++++++++++++++++++++-------- 1 file changed, 55 insertions(+), 15 deletions(-) diff --git a/yt_dlp/extractor/medaltv.py b/yt_dlp/extractor/medaltv.py index 5f0a9b42f6..80efcc7649 100644 --- a/yt_dlp/extractor/medaltv.py +++ b/yt_dlp/extractor/medaltv.py @@ -8,15 +8,33 @@ from ..utils import ( float_or_none, int_or_none, str_or_none, - try_get, + traverse_obj, ) class MedalTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?medal\.tv/clips/(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?medal\.tv/(?Pgames/[^/?#&]+/clips)/(?P[^/?#&]+)' _TESTS = [{ - 'url': 'https://medal.tv/clips/2mA60jWAGQCBH', - 'md5': '7b07b064331b1cf9e8e5c52a06ae68fa', + 'url': 'https://medal.tv/games/valorant/clips/jTBFnLKdLy15K', + 'md5': '6930f8972914b6b9fdc2bb3918098ba0', + 'info_dict': { + 'id': 'jTBFnLKdLy15K', + 'ext': 'mp4', + 'title': "Mornu's clutch", + 'description': '', + 'uploader': 'Aciel', + 'timestamp': 1651628243, + 'upload_date': '20220504', + 'uploader_id': '19335460', + 'uploader_url': 'https://medal.tv/users/19335460', + 'comment_count': int, + 'view_count': int, + 'like_count': int, + 'duration': 13, + } + }, { + 'url': 'https://medal.tv/games/cod%20cold%20war/clips/2mA60jWAGQCBH', + 'md5': '3d19d426fe0b2d91c26e412684e66a06', 'info_dict': { 'id': '2mA60jWAGQCBH', 'ext': 'mp4', @@ -26,9 +44,15 @@ class MedalTVIE(InfoExtractor): 'timestamp': 1603165266, 'upload_date': '20201020', 'uploader_id': '10619174', + 'thumbnail': 'https://cdn.medal.tv/10619174/thumbnail-34934644-720p.jpg?t=1080p&c=202042&missing', + 'uploader_url': 'https://medal.tv/users/10619174', + 'comment_count': int, + 'view_count': int, + 'like_count': int, + 'duration': 23, } }, { - 'url': 'https://medal.tv/clips/2um24TWdty0NA', + 'url': 'https://medal.tv/games/cod%20cold%20war/clips/2um24TWdty0NA', 'md5': 'b6dc76b78195fff0b4f8bf4a33ec2148', 'info_dict': { 'id': '2um24TWdty0NA', @@ -39,25 +63,42 @@ class MedalTVIE(InfoExtractor): 'timestamp': 1605580939, 'upload_date': '20201117', 'uploader_id': '5156321', + 'thumbnail': 'https://cdn.medal.tv/5156321/thumbnail-36787208-360p.jpg?t=1080p&c=202046&missing', + 'uploader_url': 'https://medal.tv/users/5156321', + 'comment_count': int, + 'view_count': int, + 'like_count': int, + 'duration': 9, } }, { - 'url': 'https://medal.tv/clips/37rMeFpryCC-9', + 'url': 'https://medal.tv/games/valorant/clips/37rMeFpryCC-9', 'only_matching': True, }, { - 'url': 'https://medal.tv/clips/2WRj40tpY_EU9', + 'url': 'https://medal.tv/games/valorant/clips/2WRj40tpY_EU9', 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) + path = self._match_valid_url(url).group('path') + webpage = self._download_webpage(url, video_id) - hydration_data = self._parse_json(self._search_regex( - r']*>\s*(?:var\s*)?hydrationData\s*=\s*({.+?})\s*', - webpage, 'hydration data', default='{}'), video_id) + next_data = self._search_json( + ']*__NEXT_DATA__[^>]*>', webpage, + 'next data', video_id, end_pattern='', fatal=False) - clip = try_get( - hydration_data, lambda x: x['clips'][video_id], dict) or {} + build_id = next_data.get('buildId') + if not build_id: + raise ExtractorError( + 'Could not find build ID.', video_id=video_id) + + locale = next_data.get('locale', 'en') + + api_response = self._download_json( + f'https://medal.tv/_next/data/{build_id}/{locale}/{path}/{video_id}.json', video_id) + + clip = traverse_obj(api_response, ('pageProps', 'clip')) or {} if not clip: raise ExtractorError( 'Could not find video information.', video_id=video_id) @@ -113,9 +154,8 @@ class MedalTVIE(InfoExtractor): # Necessary because the id of the author is not known in advance. # Won't raise an issue if no profile can be found as this is optional. - author = try_get( - hydration_data, lambda x: list(x['profiles'].values())[0], dict) or {} - author_id = str_or_none(author.get('id')) + author = traverse_obj(api_response, ('pageProps', 'profile')) or {} + author_id = str_or_none(author.get('userId')) author_url = format_field(author_id, None, 'https://medal.tv/users/%s') return {