Add option --break-match-filters

* Deprecates `--break-on-reject`

Closes #5962
This commit is contained in:
pukkandan 2023-03-04 01:13:05 +05:30
parent d21056f4cf
commit fe2ce85aff
No known key found for this signature in database
GPG key ID: 7EEE9E1E817D0A39
5 changed files with 62 additions and 31 deletions

View file

@ -114,7 +114,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t
* **Output template improvements**: Output templates can now have date-time formatting, numeric offsets, object traversal etc. See [output template](#output-template) for details. Even more advanced operations can also be done with the help of `--parse-metadata` and `--replace-in-metadata`
* **Other new options**: Many new options have been added such as `--alias`, `--print`, `--concat-playlist`, `--wait-for-video`, `--retry-sleep`, `--sleep-requests`, `--convert-thumbnails`, `--force-download-archive`, `--force-overwrites`, `--break-on-reject` etc
* **Other new options**: Many new options have been added such as `--alias`, `--print`, `--concat-playlist`, `--wait-for-video`, `--retry-sleep`, `--sleep-requests`, `--convert-thumbnails`, `--force-download-archive`, `--force-overwrites`, `--break-match-filter` etc
* **Improvements**: Regex and other operators in `--format`/`--match-filter`, multiple `--postprocessor-args` and `--downloader-args`, faster archive checking, more [format selection options](#format-selection), merge multi-video/audio, multiple `--config-locations`, `--exec` at different stages, etc
@ -519,7 +519,10 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
dogs" (caseless). Use "--match-filter -" to
interactively ask whether to download each
video
--no-match-filter Do not use generic video filter (default)
--no-match-filter Do not use any --match-filter (default)
--break-match-filters FILTER Same as "--match-filters" but stops the
download process when a video is rejected
--no-break-match-filters Do not use any --break-match-filters (default)
--no-playlist Download only the video, if the URL refers
to a video and a playlist
--yes-playlist Download the playlist, if the URL refers to
@ -533,8 +536,6 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
--max-downloads NUMBER Abort after downloading NUMBER files
--break-on-existing Stop the download process when encountering
a file that is in the archive
--break-on-reject Stop the download process when encountering
a file that has been filtered out
--break-per-input Alters --max-downloads, --break-on-existing,
--break-on-reject, and autonumber to reset
per input URL
@ -2133,6 +2134,7 @@ While these options are redundant, they are still expected to be used due to the
--reject-title REGEX --match-filter "title !~= (?i)REGEX"
--min-views COUNT --match-filter "view_count >=? COUNT"
--max-views COUNT --match-filter "view_count <=? COUNT"
--break-on-reject Use --break-match-filter
--user-agent UA --add-header "User-Agent:UA"
--referer URL --add-header "Referer:URL"
--playlist-start NUMBER -I NUMBER:

View file

@ -300,8 +300,6 @@ class YoutubeDL:
Videos already present in the file are not downloaded again.
break_on_existing: Stop the download process after attempting to download a
file that is in the archive.
break_on_reject: Stop the download process when encountering a video that
has been filtered out.
break_per_url: Whether break_on_reject and break_on_existing
should act on each input URL as opposed to for the entire queue
cookiefile: File name or text stream from where cookies should be read and dumped to
@ -414,6 +412,8 @@ class YoutubeDL:
- If it returns None, the video is downloaded.
- If it returns utils.NO_DEFAULT, the user is interactively
asked whether to download the video.
- Raise utils.DownloadCancelled(msg) to abort remaining
downloads when a video is rejected.
match_filter_func in utils.py is one example for this.
no_color: Do not emit color codes in output.
geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
@ -483,6 +483,9 @@ class YoutubeDL:
The following options are deprecated and may be removed in the future:
break_on_reject: Stop the download process when encountering a video that
has been filtered out.
- `raise DownloadCancelled(msg)` in match_filter instead
force_generic_extractor: Force downloader to use the generic extractor
- Use allowed_extractors = ['generic', 'default']
playliststart: - Use playlist_items
@ -1407,31 +1410,44 @@ class YoutubeDL:
return 'Skipping "%s" because it is age restricted' % video_title
match_filter = self.params.get('match_filter')
if match_filter is not None:
if match_filter is None:
return None
cancelled = None
try:
try:
ret = match_filter(info_dict, incomplete=incomplete)
except TypeError:
# For backward compatibility
ret = None if incomplete else match_filter(info_dict)
if ret is NO_DEFAULT:
while True:
filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
reply = input(self._format_screen(
f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
if reply in {'y', ''}:
return None
elif reply == 'n':
return f'Skipping {video_title}'
elif ret is not None:
return ret
return None
except DownloadCancelled as err:
if err.msg is not NO_DEFAULT:
raise
ret, cancelled = err.msg, err
if ret is NO_DEFAULT:
while True:
filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
reply = input(self._format_screen(
f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
if reply in {'y', ''}:
return None
elif reply == 'n':
if cancelled:
raise type(cancelled)(f'Skipping {video_title}')
return f'Skipping {video_title}'
return ret
if self.in_download_archive(info_dict):
reason = '%s has already been recorded in the archive' % video_title
break_opt, break_err = 'break_on_existing', ExistingVideoReached
else:
reason = check_filter()
break_opt, break_err = 'break_on_reject', RejectedVideoReached
try:
reason = check_filter()
except DownloadCancelled as e:
reason, break_opt, break_err = e.msg, 'match_filter', type(e)
else:
break_opt, break_err = 'break_on_reject', RejectedVideoReached
if reason is not None:
if not silent:
self.to_screen('[download] ' + reason)

View file

@ -403,7 +403,7 @@ def validate_options(opts):
except Exception:
raise ValueError('unsupported geo-bypass country or ip-block')
opts.match_filter = match_filter_func(opts.match_filter)
opts.match_filter = match_filter_func(opts.match_filter, opts.breaking_match_filter)
if opts.download_archive is not None:
opts.download_archive = expand_path(opts.download_archive)

View file

@ -613,8 +613,16 @@ def create_parser():
'Use "--match-filter -" to interactively ask whether to download each video'))
selection.add_option(
'--no-match-filter',
metavar='FILTER', dest='match_filter', action='store_const', const=None,
help='Do not use generic video filter (default)')
dest='match_filter', action='store_const', const=None,
help='Do not use any --match-filter (default)')
selection.add_option(
'--break-match-filters',
metavar='FILTER', dest='breaking_match_filter', action='append',
help='Same as "--match-filters" but stops the download process when a video is rejected')
selection.add_option(
'--no-break-match-filters',
dest='breaking_match_filter', action='store_const', const=None,
help='Do not use any --break-match-filters (default)')
selection.add_option(
'--no-playlist',
action='store_true', dest='noplaylist', default=False,
@ -646,11 +654,11 @@ def create_parser():
selection.add_option(
'--break-on-reject',
action='store_true', dest='break_on_reject', default=False,
help='Stop the download process when encountering a file that has been filtered out')
help=optparse.SUPPRESS_HELP)
selection.add_option(
'--break-per-input',
action='store_true', dest='break_per_url', default=False,
help='Alters --max-downloads, --break-on-existing, --break-on-reject, and autonumber to reset per input URL')
help='Alters --max-downloads, --break-on-existing, --break-match-filter, and autonumber to reset per input URL')
selection.add_option(
'--no-break-per-input',
action='store_false', dest='break_per_url',

View file

@ -1230,8 +1230,8 @@ class ExistingVideoReached(DownloadCancelled):
class RejectedVideoReached(DownloadCancelled):
""" --break-on-reject triggered """
msg = 'Encountered a video that did not match filter, stopping due to --break-on-reject'
""" --break-match-filter triggered """
msg = 'Encountered a video that did not match filter, stopping due to --break-match-filter'
class MaxDownloadsReached(DownloadCancelled):
@ -3911,16 +3911,21 @@ def match_str(filter_str, dct, incomplete=False):
for filter_part in re.split(r'(?<!\\)&', filter_str))
def match_filter_func(filters):
if not filters:
def match_filter_func(filters, breaking_filters=None):
if not filters and not breaking_filters:
return None
filters = set(variadic(filters))
breaking_filters = match_filter_func(breaking_filters) or (lambda _, __: None)
filters = set(variadic(filters or []))
interactive = '-' in filters
if interactive:
filters.remove('-')
def _match_func(info_dict, incomplete=False):
ret = breaking_filters(info_dict, incomplete)
if ret is not None:
raise RejectedVideoReached(ret)
if not filters or any(match_str(f, info_dict, incomplete) for f in filters):
return NO_DEFAULT if interactive and not incomplete else None
else: