[youtube] De-prioritize potentially damaged formats

Closes #2823
This commit is contained in:
pukkandan 2022-02-18 19:41:37 +05:30
parent 60f3e99592
commit 0ad92dfb18
No known key found for this signature in database
GPG key ID: 7EEE9E1E817D0A39

View file

@ -2936,6 +2936,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
])
streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
approx_duration = max(traverse_obj(streaming_formats, (..., 'approxDurationMs'), expected_type=float_or_none) or [0]) or None
for fmt in streaming_formats:
if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
@ -2995,12 +2996,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
itags[itag] = 'https'
stream_ids.append(stream_id)
tbr = float_or_none(
fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
language_preference = (
10 if audio_track.get('audioIsDefault') and 10
else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
else -1)
# Some formats may have much smaller duration than others (possibly damaged during encoding)
# Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) < approx_duration - 10000)
dct = {
'asr': int_or_none(fmt.get('audioSampleRate')),
'filesize': int_or_none(fmt.get('contentLength')),
@ -3009,7 +3012,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'%s%s' % (audio_track.get('displayName') or '',
' (default)' if language_preference > 0 else ''),
fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
throttled and 'THROTTLED', delim=', '),
throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),
'source_preference': -10 if throttled else -1,
'fps': int_or_none(fmt.get('fps')) or None,
'height': height,
@ -3020,6 +3023,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'language': join_nonempty(audio_track.get('id', '').split('.')[0],
'desc' if language_preference < -1 else ''),
'language_preference': language_preference,
'preference': -10 if is_damaged else None,
}
mime_mobj = re.match(
r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')