From 8b8b442cb005a8d85315f301615f83fb736b967a Mon Sep 17 00:00:00 2001 From: sepro <4618135+seproDev@users.noreply.github.com> Date: Sun, 14 Jul 2024 01:19:17 +0200 Subject: [PATCH] [ie/youtube] Avoid poToken experiment player responses (#10456) Closes #10397 Authored by: seproDev --- yt_dlp/extractor/youtube.py | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index dd98c34b61..d1d57eb47e 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -1294,6 +1294,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'}, } _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt') + _POTOKEN_EXPERIMENTS = ('51217476', '51217102') _GEO_BYPASS = False @@ -3703,8 +3704,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data): initial_pr = None if webpage: - initial_pr = self._search_json( - self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False) + experiments = traverse_obj(master_ytcfg, ( + 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'serializedExperimentIds', {str}, {lambda x: x.split(',')}, ..., {str})) + if all(x in experiments for x in self._POTOKEN_EXPERIMENTS): + self.report_warning( + 'Webpage contains broken formats (poToken experiment detected). Ignoring initial player response') + master_ytcfg = self._get_default_ytcfg() + else: + initial_pr = self._search_json( + self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False) prs = [] if initial_pr and not self._invalid_player_response(initial_pr, video_id): @@ -3746,11 +3754,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor): player_url = self._download_player_url(video_id) tried_iframe_fallback = True - try: - pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response( - client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data) - except ExtractorError as e: - self.report_warning(e) + pr = initial_pr if client == 'web' and initial_pr else None + for retry in self.RetryManager(fatal=False): + try: + pr = pr or self._extract_player_response( + client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, + player_url if require_js_player else None, initial_pr, smuggled_data) + except ExtractorError as e: + self.report_warning(e) + break + experiments = traverse_obj(pr, ( + 'responseContext', 'serviceTrackingParams', lambda _, v: v['service'] == 'GFEEDBACK', + 'params', lambda _, v: v['key'] == 'e', 'value', {lambda x: x.split(',')}, ..., {str})) + if all(x in experiments for x in self._POTOKEN_EXPERIMENTS): + pr = None + retry.error = ExtractorError('API returned broken formats (poToken experiment detected)', expected=True) + if not pr: continue if pr_id := self._invalid_player_response(pr, video_id):