From 0d146c1e36f467af30e87b7af651bdee67b73500 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sun, 1 Dec 2024 15:25:09 +0000 Subject: [PATCH] [ie/youtube] Adjust player clients for site changes (#11663) Closes #11640 Authored by: bashonly --- README.md | 4 ++-- yt_dlp/extractor/youtube.py | 46 +++++++++++++++++++++++-------------- 2 files changed, 31 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 0a62d8e74c..772395d24d 100644 --- a/README.md +++ b/README.md @@ -1761,7 +1761,7 @@ $ yt-dlp --replace-in-metadata "title,uploader" "[ _]" "-" # EXTRACTOR ARGUMENTS -Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. E.g. `--extractor-args "youtube:player-client=mediaconnect,web;formats=incomplete" --extractor-args "funimation:version=uncut"` +Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. E.g. `--extractor-args "youtube:player-client=tv,mweb;formats=incomplete" --extractor-args "funimation:version=uncut"` Note: In CLI, `ARG` can use `-` instead of `_`; e.g. `youtube:player-client"` becomes `youtube:player_client"` @@ -1770,7 +1770,7 @@ The following extractors use this feature: #### youtube * `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes * `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively -* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music` and `_creator` (e.g. `ios_creator`); and `mweb`, `mediaconnect`, `android_vr`, `web_safari`, `web_embedded`, `tv` and `tv_embedded` with no variants. By default, `ios,mweb` is used, and `web_creator` is added as needed for age-gated videos when account age verification is required. Similarly, the `_music` variants are added for `music.youtube.com` URLs. Some clients, such as `web` and `android`, require a `po_token` for their formats to be downloadable. Some clients, such as the `_creator` variants, will only work with authentication. You can use `all` to use all the clients, and `default` for the default clients. You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=all,-web` +* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music` and `_creator` (e.g. `ios_creator`); and `mweb`, `android_vr`, `web_safari`, `web_embedded`, `tv` and `tv_embedded` with no variants. By default, `ios,mweb` is used, or `web_creator,mweb` is used when authenticating with cookies. The `_music` variants are added for `music.youtube.com` URLs. Some clients, such as `web` and `android`, require a `po_token` for their formats to be downloadable. Some clients, such as the `_creator` variants, will only work with authentication. Not all clients support authentication via cookies. You can use `all` to use all the clients, and `default` for the default clients. You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=all,-web` * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details * `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp. * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 7a9133466d..a67f09e623 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -83,6 +83,7 @@ INNERTUBE_CLIENTS = { }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 1, 'REQUIRE_PO_TOKEN': True, + 'SUPPORTS_COOKIES': True, }, # Safari UA returns pre-merged video+audio 144p/240p/360p/720p/1080p HLS formats 'web_safari': { @@ -95,6 +96,7 @@ INNERTUBE_CLIENTS = { }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 1, 'REQUIRE_PO_TOKEN': True, + 'SUPPORTS_COOKIES': True, }, 'web_embedded': { 'INNERTUBE_CONTEXT': { @@ -104,6 +106,7 @@ INNERTUBE_CLIENTS = { }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 56, + 'SUPPORTS_COOKIES': True, }, 'web_music': { 'INNERTUBE_HOST': 'music.youtube.com', @@ -114,6 +117,7 @@ INNERTUBE_CLIENTS = { }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 67, + 'SUPPORTS_COOKIES': True, }, # This client now requires sign-in for every video 'web_creator': { @@ -125,6 +129,7 @@ INNERTUBE_CLIENTS = { }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 62, 'REQUIRE_AUTH': True, + 'SUPPORTS_COOKIES': True, }, 'android': { 'INNERTUBE_CONTEXT': { @@ -157,6 +162,7 @@ INNERTUBE_CLIENTS = { 'REQUIRE_JS_PLAYER': False, 'REQUIRE_PO_TOKEN': True, 'REQUIRE_AUTH': True, + 'SUPPORTS_COOKIES': True, }, # This client now requires sign-in for every video 'android_creator': { @@ -191,6 +197,7 @@ INNERTUBE_CLIENTS = { }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 28, 'REQUIRE_JS_PLAYER': False, + 'SUPPORTS_COOKIES': True, }, # iOS clients have HLS live streams. Setting device model to get 60fps formats. # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558 @@ -225,6 +232,7 @@ INNERTUBE_CLIENTS = { 'INNERTUBE_CONTEXT_CLIENT_NAME': 26, 'REQUIRE_JS_PLAYER': False, 'REQUIRE_AUTH': True, + 'SUPPORTS_COOKIES': True, }, # This client now requires sign-in for every video 'ios_creator': { @@ -253,6 +261,7 @@ INNERTUBE_CLIENTS = { }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 2, + 'SUPPORTS_COOKIES': True, }, 'tv': { 'INNERTUBE_CONTEXT': { @@ -262,6 +271,7 @@ INNERTUBE_CLIENTS = { }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 7, + 'SUPPORTS_COOKIES': True, }, # This client now requires sign-in for every video # It was previously an age-gate workaround for videos that were `playable_in_embed` @@ -275,19 +285,7 @@ INNERTUBE_CLIENTS = { }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 85, 'REQUIRE_AUTH': True, - }, - # This client now requires sign-in for every video - # It may be able to receive pre-merged video+audio 720p/1080p streams - 'mediaconnect': { - 'INNERTUBE_CONTEXT': { - 'client': { - 'clientName': 'MEDIA_CONNECT_FRONTEND', - 'clientVersion': '0.1', - }, - }, - 'INNERTUBE_CONTEXT_CLIENT_NAME': 95, - 'REQUIRE_JS_PLAYER': False, - 'REQUIRE_AUTH': True, + 'SUPPORTS_COOKIES': True, }, } @@ -317,6 +315,7 @@ def build_innertube_clients(): ytcfg.setdefault('REQUIRE_JS_PLAYER', True) ytcfg.setdefault('REQUIRE_PO_TOKEN', False) ytcfg.setdefault('REQUIRE_AUTH', False) + ytcfg.setdefault('SUPPORTS_COOKIES', False) ytcfg.setdefault('PLAYER_PARAMS', None) ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en') @@ -1357,6 +1356,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): } _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt') _DEFAULT_CLIENTS = ('ios', 'mweb') + _DEFAULT_AUTHED_CLIENTS = ('web_creator', 'mweb') _GEO_BYPASS = False @@ -3823,12 +3823,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _get_requested_clients(self, url, smuggled_data): requested_clients = [] excluded_clients = [] + default_clients = self._DEFAULT_AUTHED_CLIENTS if self.is_authenticated else self._DEFAULT_CLIENTS allowed_clients = sorted( (client for client in INNERTUBE_CLIENTS if client[:1] != '_'), key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True) for client in self._configuration_arg('player_client'): if client == 'default': - requested_clients.extend(self._DEFAULT_CLIENTS) + requested_clients.extend(default_clients) elif client == 'all': requested_clients.extend(allowed_clients) elif client.startswith('-'): @@ -3838,7 +3839,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): else: requested_clients.append(client) if not requested_clients: - requested_clients.extend(self._DEFAULT_CLIENTS) + requested_clients.extend(default_clients) for excluded_client in excluded_clients: if excluded_client in requested_clients: requested_clients.remove(excluded_client) @@ -3850,9 +3851,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor): _, base_client, variant = _split_innertube_client(requested_client) music_client = f'{base_client}_music' if base_client != 'mweb' else 'web_music' if variant != 'music' and music_client in INNERTUBE_CLIENTS: - if not INNERTUBE_CLIENTS[music_client]['REQUIRE_AUTH'] or self.is_authenticated: + client_info = INNERTUBE_CLIENTS[music_client] + if not client_info['REQUIRE_AUTH'] or (self.is_authenticated and client_info['SUPPORTS_COOKIES']): requested_clients.append(music_client) + if self.is_authenticated: + unsupported_clients = [ + client for client in requested_clients if not INNERTUBE_CLIENTS[client]['SUPPORTS_COOKIES'] + ] + for client in unsupported_clients: + self.report_warning(f'Skipping client "{client}" since it does not support cookies', only_once=True) + requested_clients.remove(client) + return orderedSet(requested_clients) def _invalid_player_response(self, pr, video_id): @@ -3958,6 +3968,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): else: prs.append(pr) + ''' This code is pointless while web_creator is in _DEFAULT_AUTHED_CLIENTS # EU countries require age-verification for accounts to access age-restricted videos # If account is not age-verified, _is_agegated() will be truthy for non-embedded clients if self.is_authenticated and self._is_agegated(pr): @@ -3965,9 +3976,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): f'{video_id}: This video is age-restricted and YouTube is requiring ' 'account age-verification; some formats may be missing', only_once=True) # web_creator can work around the age-verification requirement - # android_vr and mediaconnect may also be able to work around age-verification + # android_vr may also be able to work around age-verification # tv_embedded may(?) still work around age-verification if the video is embeddable append_client('web_creator') + ''' prs.extend(deprioritized_prs)