Separate --check-all-formats from --check-formats

Previously, `--check-formats` tested only the selected video formats, but ALL thumbnails
This commit is contained in:
pukkandan 2021-10-24 14:46:07 +05:30
parent 96565c7e55
commit 9f1a1c36e6
No known key found for this signature in database
GPG key ID: 0F00D95A001F4698
3 changed files with 68 additions and 53 deletions

View file

@ -224,7 +224,8 @@ class YoutubeDL(object):
allow_multiple_audio_streams: Allow multiple audio streams to be merged allow_multiple_audio_streams: Allow multiple audio streams to be merged
into a single file into a single file
check_formats Whether to test if the formats are downloadable. check_formats Whether to test if the formats are downloadable.
Can be True (check all), False (check none) Can be True (check all), False (check none),
'selected' (check selected formats),
or None (check only if requested by extractor) or None (check only if requested by extractor)
paths: Dictionary of output paths. The allowed keys are 'home' paths: Dictionary of output paths. The allowed keys are 'home'
'temp' and the keys of OUTTMPL_TYPES (in utils.py) 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
@ -1720,6 +1721,28 @@ class YoutubeDL(object):
return op(actual_value, comparison_value) return op(actual_value, comparison_value)
return _filter return _filter
def _check_formats(self, formats):
for f in formats:
self.to_screen('[info] Testing format %s' % f['format_id'])
temp_file = tempfile.NamedTemporaryFile(
suffix='.tmp', delete=False,
dir=self.get_output_path('temp') or None)
temp_file.close()
try:
success, _ = self.dl(temp_file.name, f, test=True)
except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
success = False
finally:
if os.path.exists(temp_file.name):
try:
os.remove(temp_file.name)
except OSError:
self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
if success:
yield f
else:
self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
def _default_format_spec(self, info_dict, download=True): def _default_format_spec(self, info_dict, download=True):
def can_merge(): def can_merge():
@ -1759,7 +1782,7 @@ class YoutubeDL(object):
allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False), allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
'video': self.params.get('allow_multiple_video_streams', False)} 'video': self.params.get('allow_multiple_video_streams', False)}
check_formats = self.params.get('check_formats') check_formats = self.params.get('check_formats') == 'selected'
def _parse_filter(tokens): def _parse_filter(tokens):
filter_parts = [] filter_parts = []
@ -1935,26 +1958,7 @@ class YoutubeDL(object):
if not check_formats: if not check_formats:
yield from formats yield from formats
return return
for f in formats: yield from self._check_formats(formats)
self.to_screen('[info] Testing format %s' % f['format_id'])
temp_file = tempfile.NamedTemporaryFile(
suffix='.tmp', delete=False,
dir=self.get_output_path('temp') or None)
temp_file.close()
try:
success, _ = self.dl(temp_file.name, f, test=True)
except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
success = False
finally:
if os.path.exists(temp_file.name):
try:
os.remove(temp_file.name)
except OSError:
self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
if success:
yield f
else:
self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
def _build_selector_function(selector): def _build_selector_function(selector):
if isinstance(selector, list): # , if isinstance(selector, list): # ,
@ -2111,42 +2115,45 @@ class YoutubeDL(object):
self.cookiejar.add_cookie_header(pr) self.cookiejar.add_cookie_header(pr)
return pr.get_header('Cookie') return pr.get_header('Cookie')
def _sort_thumbnails(self, thumbnails):
thumbnails.sort(key=lambda t: (
t.get('preference') if t.get('preference') is not None else -1,
t.get('width') if t.get('width') is not None else -1,
t.get('height') if t.get('height') is not None else -1,
t.get('id') if t.get('id') is not None else '',
t.get('url')))
def _sanitize_thumbnails(self, info_dict): def _sanitize_thumbnails(self, info_dict):
thumbnails = info_dict.get('thumbnails') thumbnails = info_dict.get('thumbnails')
if thumbnails is None: if thumbnails is None:
thumbnail = info_dict.get('thumbnail') thumbnail = info_dict.get('thumbnail')
if thumbnail: if thumbnail:
info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}] info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
if thumbnails: if not thumbnails:
thumbnails.sort(key=lambda t: ( return
t.get('preference') if t.get('preference') is not None else -1,
t.get('width') if t.get('width') is not None else -1,
t.get('height') if t.get('height') is not None else -1,
t.get('id') if t.get('id') is not None else '',
t.get('url')))
def thumbnail_tester(): def check_thumbnails(thumbnails):
def test_thumbnail(t): for t in thumbnails:
self.to_screen(f'[info] Testing thumbnail {t["id"]}') self.to_screen(f'[info] Testing thumbnail {t["id"]}')
try: try:
self.urlopen(HEADRequest(t['url'])) self.urlopen(HEADRequest(t['url']))
except network_exceptions as err: except network_exceptions as err:
self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...') self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
return False continue
return True yield t
return test_thumbnail
for i, t in enumerate(thumbnails): self._sort_thumbnails(thumbnails)
if t.get('id') is None: for i, t in enumerate(thumbnails):
t['id'] = '%d' % i if t.get('id') is None:
if t.get('width') and t.get('height'): t['id'] = '%d' % i
t['resolution'] = '%dx%d' % (t['width'], t['height']) if t.get('width') and t.get('height'):
t['url'] = sanitize_url(t['url']) t['resolution'] = '%dx%d' % (t['width'], t['height'])
t['url'] = sanitize_url(t['url'])
if self.params.get('check_formats'): if self.params.get('check_formats') is True:
info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse() info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1])).reverse()
else: else:
info_dict['thumbnails'] = thumbnails info_dict['thumbnails'] = thumbnails
def process_video_result(self, info_dict, download=True): def process_video_result(self, info_dict, download=True):
assert info_dict.get('_type', 'video') == 'video' assert info_dict.get('_type', 'video') == 'video'
@ -2252,7 +2259,6 @@ class YoutubeDL(object):
info_dict['requested_subtitles'] = self.process_subtitles( info_dict['requested_subtitles'] = self.process_subtitles(
info_dict['id'], subtitles, automatic_captions) info_dict['id'], subtitles, automatic_captions)
# We now pick which formats have to be downloaded
if info_dict.get('formats') is None: if info_dict.get('formats') is None:
# There's only one format available # There's only one format available
formats = [info_dict] formats = [info_dict]
@ -2335,6 +2341,9 @@ class YoutubeDL(object):
# TODO Central sorting goes here # TODO Central sorting goes here
if self.params.get('check_formats') is True:
formats = LazyList(self._check_formats(formats[::-1])).reverse()
if not formats or formats[0] is not info_dict: if not formats or formats[0] is not info_dict:
# only set the 'formats' fields if the original info_dict list them # only set the 'formats' fields if the original info_dict list them
# otherwise we end up with a circular reference, the first (and unique) # otherwise we end up with a circular reference, the first (and unique)

View file

@ -562,12 +562,16 @@ def parseOpts(overrideArguments=None):
help="Don't give any special preference to free containers (default)") help="Don't give any special preference to free containers (default)")
video_format.add_option( video_format.add_option(
'--check-formats', '--check-formats',
action='store_true', dest='check_formats', default=None, action='store_const', const='selected', dest='check_formats', default=None,
help='Check that the formats selected are actually downloadable') help='Check that the selected formats are actually downloadable')
video_format.add_option(
'--check-all-formats',
action='store_true', dest='check_formats',
help='Check all formats for whether they are actually downloadable')
video_format.add_option( video_format.add_option(
'--no-check-formats', '--no-check-formats',
action='store_false', dest='check_formats', action='store_false', dest='check_formats',
help='Do not check that the formats selected are actually downloadable') help='Do not check that the formats are actually downloadable')
video_format.add_option( video_format.add_option(
'-F', '--list-formats', '-F', '--list-formats',
action='store_true', dest='listformats', action='store_true', dest='listformats',

View file

@ -4050,6 +4050,8 @@ class LazyList(collections.abc.Sequence):
def __exhaust(self): def __exhaust(self):
self.__cache.extend(self.__iterable) self.__cache.extend(self.__iterable)
# Discard the emptied iterable to make it pickle-able
self.__iterable = []
return self.__cache return self.__cache
def exhaust(self): def exhaust(self):