mirror of
https://github.com/yt-dlp/yt-dlp
synced 2024-12-25 21:59:04 +01:00
[ie/youtube] Skip iOS formats that require PO Token (#11890)
Partial fix for https://github.com/yt-dlp/yt-dlp/issues/11868 Authored by: coletdjnz
This commit is contained in:
parent
6fc85f617a
commit
9f42e68a74
2 changed files with 29 additions and 17 deletions
|
@ -1775,7 +1775,7 @@ The following extractors use this feature:
|
||||||
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
|
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
|
||||||
* `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all`
|
* `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all`
|
||||||
* E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total
|
* E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total
|
||||||
* `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8)
|
* `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8), `missing_pot` (include formats that require a PO Token but are missing one)
|
||||||
* `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others
|
* `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others
|
||||||
* `innertube_key`: Innertube API key to use for all API requests. By default, no API key is used
|
* `innertube_key`: Innertube API key to use for all API requests. By default, no API key is used
|
||||||
* `raise_incomplete_data`: `Incomplete Data Received` raises an error instead of reporting a warning
|
* `raise_incomplete_data`: `Incomplete Data Received` raises an error instead of reporting a warning
|
||||||
|
|
|
@ -214,6 +214,7 @@ INNERTUBE_CLIENTS = {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
|
'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
|
||||||
|
'REQUIRE_PO_TOKEN': True,
|
||||||
'REQUIRE_JS_PLAYER': False,
|
'REQUIRE_JS_PLAYER': False,
|
||||||
},
|
},
|
||||||
# This client now requires sign-in for every video
|
# This client now requires sign-in for every video
|
||||||
|
@ -3973,13 +3974,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
)
|
)
|
||||||
|
|
||||||
require_po_token = self._get_default_ytcfg(client).get('REQUIRE_PO_TOKEN')
|
require_po_token = self._get_default_ytcfg(client).get('REQUIRE_PO_TOKEN')
|
||||||
if not po_token and require_po_token:
|
if not po_token and require_po_token and 'missing_pot' in self._configuration_arg('formats'):
|
||||||
self.report_warning(
|
self.report_warning(
|
||||||
f'No PO Token provided for {client} client, '
|
f'No PO Token provided for {client} client, '
|
||||||
f'which is required for working {client} formats. '
|
f'which may be required for working {client} formats. This client will be deprioritized', only_once=True)
|
||||||
f'You can manually pass a PO Token for this client with '
|
|
||||||
f'--extractor-args "youtube:po_token={client}+XXX"',
|
|
||||||
only_once=True)
|
|
||||||
deprioritize_pr = True
|
deprioritize_pr = True
|
||||||
|
|
||||||
pr = initial_pr if client == 'web' else None
|
pr = initial_pr if client == 'web' else None
|
||||||
|
@ -4053,6 +4051,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
or (live_status == 'post_live' and (duration or 0) > 2 * 3600)):
|
or (live_status == 'post_live' and (duration or 0) > 2 * 3600)):
|
||||||
return live_status
|
return live_status
|
||||||
|
|
||||||
|
def _report_pot_format_skipped(self, video_id, client_name, proto):
|
||||||
|
msg = (
|
||||||
|
f'{video_id}: {client_name} client {proto} formats require a PO Token which was not provided. '
|
||||||
|
'They will be skipped as they may yield HTTP Error 403. '
|
||||||
|
f'You can manually pass a PO Token for this client with --extractor-args "youtube:po_token={client_name}+XXX. '
|
||||||
|
'For more information, refer to https://github.com/yt-dlp/yt-dlp/wiki/Extractors#po-token-guide . '
|
||||||
|
'To enable these broken formats anyway, pass --extractor-args "youtube:formats=missing_pot"')
|
||||||
|
|
||||||
|
# Only raise a warning for non-default clients, to not confuse users.
|
||||||
|
# iOS HLS formats still work without PO Token, so we don't need to warn about them.
|
||||||
|
if client_name in (*self._DEFAULT_CLIENTS, *self._DEFAULT_AUTHED_CLIENTS):
|
||||||
|
self.write_debug(msg, only_once=True)
|
||||||
|
else:
|
||||||
|
self.report_warning(msg, only_once=True)
|
||||||
|
|
||||||
def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
|
def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
|
||||||
CHUNK_SIZE = 10 << 20
|
CHUNK_SIZE = 10 << 20
|
||||||
PREFERRED_LANG_VALUE = 10
|
PREFERRED_LANG_VALUE = 10
|
||||||
|
@ -4179,11 +4192,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
fmt_url = update_url_query(fmt_url, {'pot': po_token})
|
fmt_url = update_url_query(fmt_url, {'pot': po_token})
|
||||||
|
|
||||||
# Clients that require PO Token return videoplayback URLs that may return 403
|
# Clients that require PO Token return videoplayback URLs that may return 403
|
||||||
is_broken = (not po_token and self._get_default_ytcfg(client_name).get('REQUIRE_PO_TOKEN'))
|
require_po_token = (not po_token and self._get_default_ytcfg(client_name).get('REQUIRE_PO_TOKEN'))
|
||||||
if is_broken:
|
if require_po_token and 'missing_pot' not in self._configuration_arg('formats'):
|
||||||
self.report_warning(
|
self._report_pot_format_skipped(video_id, client_name, 'https')
|
||||||
f'{video_id}: {client_name} client formats require a PO Token which was not provided. '
|
continue
|
||||||
'They will be deprioritized as they may yield HTTP Error 403', only_once=True)
|
|
||||||
|
|
||||||
name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
|
name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
|
||||||
fps = int_or_none(fmt.get('fps')) or 0
|
fps = int_or_none(fmt.get('fps')) or 0
|
||||||
|
@ -4196,7 +4208,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
name, fmt.get('isDrc') and 'DRC',
|
name, fmt.get('isDrc') and 'DRC',
|
||||||
try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
|
try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
|
||||||
try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
|
try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
|
||||||
is_damaged and 'DAMAGED', is_broken and 'BROKEN',
|
is_damaged and 'DAMAGED', require_po_token and 'MISSING POT',
|
||||||
(self.get_param('verbose') or all_formats) and short_client_name(client_name),
|
(self.get_param('verbose') or all_formats) and short_client_name(client_name),
|
||||||
delim=', '),
|
delim=', '),
|
||||||
# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
|
# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
|
||||||
|
@ -4213,7 +4225,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
'language': join_nonempty(language_code, 'desc' if is_descriptive else '') or None,
|
'language': join_nonempty(language_code, 'desc' if is_descriptive else '') or None,
|
||||||
'language_preference': PREFERRED_LANG_VALUE if is_original else 5 if is_default else -10 if is_descriptive else -1,
|
'language_preference': PREFERRED_LANG_VALUE if is_original else 5 if is_default else -10 if is_descriptive else -1,
|
||||||
# Strictly de-prioritize broken, damaged and 3gp formats
|
# Strictly de-prioritize broken, damaged and 3gp formats
|
||||||
'preference': -20 if is_broken else -10 if is_damaged else -2 if itag == '17' else None,
|
'preference': -20 if require_po_token else -10 if is_damaged else -2 if itag == '17' else None,
|
||||||
}
|
}
|
||||||
mime_mobj = re.match(
|
mime_mobj = re.match(
|
||||||
r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
|
r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
|
||||||
|
@ -4271,10 +4283,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
# Clients that require PO Token return videoplayback URLs that may return 403
|
# Clients that require PO Token return videoplayback URLs that may return 403
|
||||||
# hls does not currently require PO Token
|
# hls does not currently require PO Token
|
||||||
if (not po_token and self._get_default_ytcfg(client_name).get('REQUIRE_PO_TOKEN')) and proto != 'hls':
|
if (not po_token and self._get_default_ytcfg(client_name).get('REQUIRE_PO_TOKEN')) and proto != 'hls':
|
||||||
self.report_warning(
|
if 'missing_pot' not in self._configuration_arg('formats'):
|
||||||
f'{video_id}: {client_name} client {proto} formats require a PO Token which was not provided. '
|
self._report_pot_format_skipped(video_id, client_name, proto)
|
||||||
'They will be deprioritized as they may yield HTTP Error 403', only_once=True)
|
return False
|
||||||
f['format_note'] = join_nonempty(f.get('format_note'), 'BROKEN', delim=' ')
|
f['format_note'] = join_nonempty(f.get('format_note'), 'MISSING POT', delim=' ')
|
||||||
f['source_preference'] -= 20
|
f['source_preference'] -= 20
|
||||||
|
|
||||||
if itag and all_formats:
|
if itag and all_formats:
|
||||||
|
|
Loading…
Reference in a new issue