Add slicing notation to --playlist-items

* Adds support for negative indices and step
* Add `-I` as alias for `--playlist-index`
* Deprecates `--playlist-start`, `--playlist-end`, `--playlist-reverse`, `--no-playlist-reverse`

Closes #2951, Closes #2853
This commit is contained in:
pukkandan 2022-06-17 10:18:21 +05:30
parent f0c9fb9682
commit 7e88d7d78f
No known key found for this signature in database
GPG key ID: 7EEE9E1E817D0A39
6 changed files with 306 additions and 175 deletions

View file

@ -427,16 +427,15 @@ You can also fork the project on github and run your fork's [build workflow](.gi
explicitly provided IP block in CIDR notation
## Video Selection:
--playlist-start NUMBER Playlist video to start at (default is 1)
--playlist-end NUMBER Playlist video to end at (default is last)
--playlist-items ITEM_SPEC Playlist video items to download. Specify
indices of the videos in the playlist
separated by commas like: "--playlist-items
1,2,5,8" if you want to download videos
indexed 1, 2, 5, 8 in the playlist. You can
specify range: "--playlist-items
1-3,7,10-13", it will download the videos at
index 1, 2, 3, 7, 10, 11, 12 and 13
-I, --playlist-items ITEM_SPEC Comma seperated playlist_index of the videos
to download. You can specify a range using
"[START]:[STOP][:STEP]". For backward
compatibility, START-STOP is also supported.
Use negative indices to count from the right
and negative STEP to download in reverse
order. Eg: "-I 1:3,7,-5::2" used on a
playlist of size 15 will download the videos
at index 1,2,3,7,11,13,15
--min-filesize SIZE Do not download any videos smaller than SIZE
(e.g. 50k or 44.6m)
--max-filesize SIZE Do not download any videos larger than SIZE
@ -540,9 +539,6 @@ You can also fork the project on github and run your fork's [build workflow](.gi
is disabled). May be useful for bypassing
bandwidth throttling imposed by a webserver
(experimental)
--playlist-reverse Download playlist videos in reverse order
--no-playlist-reverse Download playlist videos in default order
(default)
--playlist-random Download playlist videos in random order
--xattr-set-filesize Set file xattribute ytdl.filesize with
expected file size
@ -2000,6 +1996,10 @@ While these options are redundant, they are still expected to be used due to the
--max-views COUNT --match-filter "view_count <=? COUNT"
--user-agent UA --add-header "User-Agent:UA"
--referer URL --add-header "Referer:URL"
--playlist-start NUMBER -I NUMBER:
--playlist-end NUMBER -I :NUMBER
--playlist-reverse -I ::-1
--no-playlist-reverse Default
#### Not recommended

View file

@ -23,6 +23,7 @@ from yt_dlp.postprocessor.common import PostProcessor
from yt_dlp.utils import (
ExtractorError,
LazyList,
OnDemandPagedList,
int_or_none,
match_filter_func,
)
@ -989,41 +990,79 @@ class TestYoutubeDL(unittest.TestCase):
self.assertEqual(res, [])
def test_playlist_items_selection(self):
entries = [{
'id': compat_str(i),
'title': compat_str(i),
INDICES, PAGE_SIZE = list(range(1, 11)), 3
def entry(i, evaluated):
evaluated.append(i)
return {
'id': str(i),
'title': str(i),
'url': TEST_URL,
} for i in range(1, 5)]
playlist = {
}
def pagedlist_entries(evaluated):
def page_func(n):
start = PAGE_SIZE * n
for i in INDICES[start: start + PAGE_SIZE]:
yield entry(i, evaluated)
return OnDemandPagedList(page_func, PAGE_SIZE)
def page_num(i):
return (i + PAGE_SIZE - 1) // PAGE_SIZE
def generator_entries(evaluated):
for i in INDICES:
yield entry(i, evaluated)
def list_entries(evaluated):
return list(generator_entries(evaluated))
def lazylist_entries(evaluated):
return LazyList(generator_entries(evaluated))
def get_downloaded_info_dicts(params, entries):
ydl = YDL(params)
ydl.process_ie_result({
'_type': 'playlist',
'id': 'test',
'entries': entries,
'extractor': 'test:playlist',
'extractor_key': 'test:playlist',
'webpage_url': 'http://example.com',
}
def get_downloaded_info_dicts(params):
ydl = YDL(params)
# make a deep copy because the dictionary and nested entries
# can be modified
ydl.process_ie_result(copy.deepcopy(playlist))
'entries': entries,
})
return ydl.downloaded_info_dicts
def test_selection(params, expected_ids):
results = [
(v['playlist_autonumber'] - 1, (int(v['id']), v['playlist_index']))
for v in get_downloaded_info_dicts(params)]
self.assertEqual(results, list(enumerate(zip(expected_ids, expected_ids))))
def test_selection(params, expected_ids, evaluate_all=False):
expected_ids = list(expected_ids)
if evaluate_all:
generator_eval = pagedlist_eval = INDICES
elif not expected_ids:
generator_eval = pagedlist_eval = []
else:
generator_eval = INDICES[0: max(expected_ids)]
pagedlist_eval = INDICES[PAGE_SIZE * page_num(min(expected_ids)) - PAGE_SIZE:
PAGE_SIZE * page_num(max(expected_ids))]
test_selection({}, [1, 2, 3, 4])
test_selection({'playlistend': 10}, [1, 2, 3, 4])
test_selection({'playlistend': 2}, [1, 2])
test_selection({'playliststart': 10}, [])
test_selection({'playliststart': 2}, [2, 3, 4])
test_selection({'playlist_items': '2-4'}, [2, 3, 4])
for name, func, expected_eval in (
('list', list_entries, INDICES),
('Generator', generator_entries, generator_eval),
('LazyList', lazylist_entries, generator_eval),
('PagedList', pagedlist_entries, pagedlist_eval),
):
evaluated = []
entries = func(evaluated)
results = [(v['playlist_autonumber'] - 1, (int(v['id']), v['playlist_index']))
for v in get_downloaded_info_dicts(params, entries)]
self.assertEqual(results, list(enumerate(zip(expected_ids, expected_ids))), f'Entries of {name} for {params}')
self.assertEqual(sorted(evaluated), expected_eval, f'Evaluation of {name} for {params}')
test_selection({}, INDICES)
test_selection({'playlistend': 20}, INDICES, True)
test_selection({'playlistend': 2}, INDICES[:2])
test_selection({'playliststart': 11}, [], True)
test_selection({'playliststart': 2}, INDICES[1:])
test_selection({'playlist_items': '2-4'}, INDICES[1:4])
test_selection({'playlist_items': '2,4'}, [2, 4])
test_selection({'playlist_items': '10'}, [])
test_selection({'playlist_items': '20'}, [], True)
test_selection({'playlist_items': '0'}, [])
# Tests for https://github.com/ytdl-org/youtube-dl/issues/10591
@ -1032,11 +1071,33 @@ class TestYoutubeDL(unittest.TestCase):
# Tests for https://github.com/yt-dlp/yt-dlp/issues/720
# https://github.com/yt-dlp/yt-dlp/issues/302
test_selection({'playlistreverse': True}, [4, 3, 2, 1])
test_selection({'playliststart': 2, 'playlistreverse': True}, [4, 3, 2])
test_selection({'playlistreverse': True}, INDICES[::-1])
test_selection({'playliststart': 2, 'playlistreverse': True}, INDICES[:0:-1])
test_selection({'playlist_items': '2,4', 'playlistreverse': True}, [4, 2])
test_selection({'playlist_items': '4,2'}, [4, 2])
# Tests for --playlist-items start:end:step
test_selection({'playlist_items': ':'}, INDICES, True)
test_selection({'playlist_items': '::1'}, INDICES, True)
test_selection({'playlist_items': '::-1'}, INDICES[::-1], True)
test_selection({'playlist_items': ':6'}, INDICES[:6])
test_selection({'playlist_items': ':-6'}, INDICES[:-5], True)
test_selection({'playlist_items': '-1:6:-2'}, INDICES[:4:-2], True)
test_selection({'playlist_items': '9:-6:-2'}, INDICES[8:3:-2], True)
test_selection({'playlist_items': '1:inf:2'}, INDICES[::2], True)
test_selection({'playlist_items': '-2:inf'}, INDICES[-2:], True)
test_selection({'playlist_items': ':inf:-1'}, [], True)
test_selection({'playlist_items': '0-2:2'}, [2])
test_selection({'playlist_items': '1-:2'}, INDICES[::2], True)
test_selection({'playlist_items': '0--2:2'}, INDICES[1:-1:2], True)
test_selection({'playlist_items': '10::3'}, [10], True)
test_selection({'playlist_items': '-1::3'}, [10], True)
test_selection({'playlist_items': '11::3'}, [], True)
test_selection({'playlist_items': '-15::2'}, INDICES[1::2], True)
test_selection({'playlist_items': '-15::15'}, [], True)
def test_urlopen_no_file_protocol(self):
# see https://github.com/ytdl-org/youtube-dl/issues/8227
ydl = YDL()

View file

@ -74,13 +74,13 @@ from .utils import (
ExtractorError,
GeoRestrictedError,
HEADRequest,
InAdvancePagedList,
ISO3166Utils,
LazyList,
MaxDownloadsReached,
Namespace,
PagedList,
PerRequestProxyHandler,
PlaylistEntries,
Popen,
PostProcessingError,
ReExtractInfo,
@ -1410,7 +1410,7 @@ class YoutubeDL:
else:
self.report_error('no suitable InfoExtractor for URL %s' % url)
def __handle_extraction_exceptions(func):
def _handle_extraction_exceptions(func):
@functools.wraps(func)
def wrapper(self, *args, **kwargs):
while True:
@ -1483,7 +1483,7 @@ class YoutubeDL:
self.to_screen('')
raise
@__handle_extraction_exceptions
@_handle_extraction_exceptions
def __extract_info(self, url, ie, download, extra_info, process):
ie_result = ie.extract(url)
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
@ -1666,105 +1666,14 @@ class YoutubeDL:
}
def __process_playlist(self, ie_result, download):
# We process each entry in the playlist
playlist = ie_result.get('title') or ie_result.get('id')
self.to_screen('[download] Downloading playlist: %s' % playlist)
"""Process each entry in the playlist"""
title = ie_result.get('title') or ie_result.get('id') or '<Untitled>'
self.to_screen(f'[download] Downloading playlist: {title}')
if 'entries' not in ie_result:
raise EntryNotInPlaylist('There are no entries')
MissingEntry = object()
incomplete_entries = bool(ie_result.get('requested_entries'))
if incomplete_entries:
def fill_missing_entries(entries, indices):
ret = [MissingEntry] * max(indices)
for i, entry in zip(indices, entries):
ret[i - 1] = entry
return ret
ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
playlist_results = []
playliststart = self.params.get('playliststart', 1)
playlistend = self.params.get('playlistend')
# For backwards compatibility, interpret -1 as whole list
if playlistend == -1:
playlistend = None
playlistitems_str = self.params.get('playlist_items')
playlistitems = None
if playlistitems_str is not None:
def iter_playlistitems(format):
for string_segment in format.split(','):
if '-' in string_segment:
start, end = string_segment.split('-')
for item in range(int(start), int(end) + 1):
yield int(item)
else:
yield int(string_segment)
playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
ie_entries = ie_result['entries']
if isinstance(ie_entries, list):
playlist_count = len(ie_entries)
msg = f'Collected {playlist_count} videos; downloading %d of them'
ie_result['playlist_count'] = ie_result.get('playlist_count') or playlist_count
def get_entry(i):
return ie_entries[i - 1]
else:
msg = 'Downloading %d videos'
if not isinstance(ie_entries, (PagedList, LazyList)):
ie_entries = LazyList(ie_entries)
elif isinstance(ie_entries, InAdvancePagedList):
if ie_entries._pagesize == 1:
playlist_count = ie_entries._pagecount
def get_entry(i):
return YoutubeDL.__handle_extraction_exceptions(
lambda self, i: ie_entries[i - 1]
)(self, i)
entries, broken = [], False
items = playlistitems if playlistitems is not None else itertools.count(playliststart)
for i in items:
if i == 0:
continue
if playlistitems is None and playlistend is not None and playlistend < i:
break
entry = None
try:
entry = get_entry(i)
if entry is MissingEntry:
raise EntryNotInPlaylist()
except (IndexError, EntryNotInPlaylist):
if incomplete_entries:
raise EntryNotInPlaylist(f'Entry {i} cannot be found')
elif not playlistitems:
break
entries.append(entry)
try:
if entry is not None:
# TODO: Add auto-generated fields
self._match_entry(entry, incomplete=True, silent=True)
except (ExistingVideoReached, RejectedVideoReached):
broken = True
break
ie_result['entries'] = entries
# Save playlist_index before re-ordering
entries = [
((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
for i, entry in enumerate(entries, 1)
if entry is not None]
n_entries = len(entries)
if not (ie_result.get('playlist_count') or broken or playlistitems or playlistend):
ie_result['playlist_count'] = n_entries
if not playlistitems and (playliststart != 1 or playlistend):
playlistitems = list(range(playliststart, playliststart + n_entries))
ie_result['requested_entries'] = playlistitems
all_entries = PlaylistEntries(self, ie_result)
entries = orderedSet(all_entries.get_requested_items())
ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*entries)) or ([], [])
n_entries, ie_result['playlist_count'] = len(entries), all_entries.full_count
_infojson_written = False
write_playlist_files = self.params.get('allow_playlist_files', True)
@ -1787,28 +1696,29 @@ class YoutubeDL:
if self.params.get('playlistrandom', False):
random.shuffle(entries)
x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} videos'
f'{format_field(ie_result, "playlist_count", " of %s")}')
self.to_screen(f'[{ie_result["extractor"]}] playlist {playlist}: {msg % n_entries}')
failures = 0
max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
for i, entry_tuple in enumerate(entries, 1):
playlist_index, entry = entry_tuple
if 'playlist-index' in self.params['compat_opts']:
playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
for i, (playlist_index, entry) in enumerate(entries, 1):
# TODO: Add auto-generated fields
if self._match_entry(entry, incomplete=True) is not None:
continue
if 'playlist-index' in self.params.get('compat_opts', []):
playlist_index = ie_result['requested_entries'][i - 1]
self.to_screen('[download] Downloading video %s of %s' % (
self._format_screen(i, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
# This __x_forwarded_for_ip thing is a bit ugly but requires
# minimal changes
if x_forwarded_for:
entry['__x_forwarded_for_ip'] = x_forwarded_for
extra = {
entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
entry_result = self.__process_iterable_entry(entry, download, {
'n_entries': n_entries,
'__last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
'__last_playlist_index': max(ie_result['requested_entries']),
'playlist_count': ie_result.get('playlist_count'),
'playlist_index': playlist_index,
'playlist_autonumber': i,
'playlist': playlist,
'playlist': title,
'playlist_id': ie_result.get('id'),
'playlist_title': ie_result.get('title'),
'playlist_uploader': ie_result.get('uploader'),
@ -1818,20 +1728,17 @@ class YoutubeDL:
'webpage_url_basename': url_basename(ie_result['webpage_url']),
'webpage_url_domain': get_domain(ie_result['webpage_url']),
'extractor_key': ie_result['extractor_key'],
}
if self._match_entry(entry, incomplete=True) is not None:
continue
entry_result = self.__process_iterable_entry(entry, download, extra)
})
if not entry_result:
failures += 1
if failures >= max_failures:
self.report_error(
'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
break
playlist_results.append(entry_result)
ie_result['entries'] = playlist_results
entries[i - 1] = (playlist_index, entry_result)
# Update with processed data
ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*entries)) or ([], [])
# Write the updated info to json
if _infojson_written is True and self._write_info_json(
@ -1840,10 +1747,10 @@ class YoutubeDL:
return
ie_result = self.run_all_pps('playlist', ie_result)
self.to_screen(f'[download] Finished downloading playlist: {playlist}')
self.to_screen(f'[download] Finished downloading playlist: {title}')
return ie_result
@__handle_extraction_exceptions
@_handle_extraction_exceptions
def __process_iterable_entry(self, entry, download, extra_info):
return self.process_ie_result(
entry, download=download, extra_info=extra_info)

View file

@ -33,6 +33,7 @@ from .utils import (
DownloadCancelled,
DownloadError,
GeoUtils,
PlaylistEntries,
SameFileError,
decodeOption,
download_range_func,
@ -372,6 +373,12 @@ def validate_options(opts):
opts.parse_metadata = list(itertools.chain(*map(metadataparser_actions, parse_metadata)))
# Other options
if opts.playlist_items is not None:
try:
tuple(PlaylistEntries.parse_playlist_items(opts.playlist_items))
except Exception as err:
raise ValueError(f'Invalid playlist-items {opts.playlist_items!r}: {err}')
geo_bypass_code = opts.geo_bypass_ip_block or opts.geo_bypass_country
if geo_bypass_code is not None:
try:

View file

@ -500,15 +500,19 @@ def create_parser():
selection.add_option(
'--playlist-start',
dest='playliststart', metavar='NUMBER', default=1, type=int,
help='Playlist video to start at (default is %default)')
help=optparse.SUPPRESS_HELP)
selection.add_option(
'--playlist-end',
dest='playlistend', metavar='NUMBER', default=None, type=int,
help='Playlist video to end at (default is last)')
help=optparse.SUPPRESS_HELP)
selection.add_option(
'--playlist-items',
'-I', '--playlist-items',
dest='playlist_items', metavar='ITEM_SPEC', default=None,
help='Playlist video items to download. Specify indices of the videos in the playlist separated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13')
help=(
'Comma seperated playlist_index of the videos to download. '
'You can specify a range using "[START]:[STOP][:STEP]". For backward compatibility, START-STOP is also supported. '
'Use negative indices to count from the right and negative STEP to download in reverse order. '
'Eg: "-I 1:3,7,-5::2" used on a playlist of size 15 will download the videos at index 1,2,3,7,11,13,15'))
selection.add_option(
'--match-title',
dest='matchtitle', metavar='REGEX',
@ -885,11 +889,11 @@ def create_parser():
downloader.add_option(
'--playlist-reverse',
action='store_true',
help='Download playlist videos in reverse order')
help=optparse.SUPPRESS_HELP)
downloader.add_option(
'--no-playlist-reverse',
action='store_false', dest='playlist_reverse',
help='Download playlist videos in default order (default)')
help=optparse.SUPPRESS_HELP)
downloader.add_option(
'--playlist-random',
action='store_true',

View file

@ -2609,6 +2609,16 @@ def get_exe_version(exe, args=['--version'],
return detect_exe_version(out, version_re, unrecognized) if out else False
def frange(start=0, stop=None, step=1):
"""Float range"""
if stop is None:
start, stop = 0, start
sign = [-1, 1][step > 0] if step else 0
while sign * start < sign * stop:
yield start
start += step
class LazyList(collections.abc.Sequence):
"""Lazy immutable list from an iterable
Note that slices of a LazyList are lists and not LazyList"""
@ -2805,6 +2815,148 @@ class InAdvancePagedList(PagedList):
yield from page_results
class PlaylistEntries:
MissingEntry = object()
is_exhausted = False
def __init__(self, ydl, info_dict):
self.ydl, self.info_dict = ydl, info_dict
PLAYLIST_ITEMS_RE = re.compile(r'''(?x)
(?P<start>[+-]?\d+)?
(?P<range>[:-]
(?P<end>[+-]?\d+|inf(?:inite)?)?
(?::(?P<step>[+-]?\d+))?
)?''')
@classmethod
def parse_playlist_items(cls, string):
for segment in string.split(','):
if not segment:
raise ValueError('There is two or more consecutive commas')
mobj = cls.PLAYLIST_ITEMS_RE.fullmatch(segment)
if not mobj:
raise ValueError(f'{segment!r} is not a valid specification')
start, end, step, has_range = mobj.group('start', 'end', 'step', 'range')
if int_or_none(step) == 0:
raise ValueError(f'Step in {segment!r} cannot be zero')
yield slice(int_or_none(start), float_or_none(end), int_or_none(step)) if has_range else int(start)
def get_requested_items(self):
playlist_items = self.ydl.params.get('playlist_items')
playlist_start = self.ydl.params.get('playliststart', 1)
playlist_end = self.ydl.params.get('playlistend')
# For backwards compatibility, interpret -1 as whole list
if playlist_end in (-1, None):
playlist_end = ''
if not playlist_items:
playlist_items = f'{playlist_start}:{playlist_end}'
elif playlist_start != 1 or playlist_end:
self.ydl.report_warning('Ignoring playliststart and playlistend because playlistitems was given', only_once=True)
for index in self.parse_playlist_items(playlist_items):
for i, entry in self[index]:
yield i, entry
try:
# TODO: Add auto-generated fields
self.ydl._match_entry(entry, incomplete=True, silent=True)
except (ExistingVideoReached, RejectedVideoReached):
return
@property
def full_count(self):
if self.info_dict.get('playlist_count'):
return self.info_dict['playlist_count']
elif self.is_exhausted and not self.is_incomplete:
return len(self)
elif isinstance(self._entries, InAdvancePagedList):
if self._entries._pagesize == 1:
return self._entries._pagecount
@functools.cached_property
def _entries(self):
entries = self.info_dict.get('entries')
if entries is None:
raise EntryNotInPlaylist('There are no entries')
elif isinstance(entries, list):
self.is_exhausted = True
indices = self.info_dict.get('requested_entries')
self.is_incomplete = bool(indices)
if self.is_incomplete:
assert self.is_exhausted
ret = [self.MissingEntry] * max(indices)
for i, entry in zip(indices, entries):
ret[i - 1] = entry
return ret
if isinstance(entries, (list, PagedList, LazyList)):
return entries
return LazyList(entries)
@functools.cached_property
def _getter(self):
if isinstance(self._entries, list):
def get_entry(i):
try:
entry = self._entries[i]
except IndexError:
entry = self.MissingEntry
if not self.is_incomplete:
raise self.IndexError()
if entry is self.MissingEntry:
raise EntryNotInPlaylist(f'Entry {i} cannot be found')
return entry
else:
def get_entry(i):
try:
return type(self.ydl)._handle_extraction_exceptions(lambda _, i: self._entries[i])(self.ydl, i)
except (LazyList.IndexError, PagedList.IndexError):
raise self.IndexError()
return get_entry
def __getitem__(self, idx):
if isinstance(idx, int):
idx = slice(idx, idx)
# NB: PlaylistEntries[1:10] => (0, 1, ... 9)
step = 1 if idx.step is None else idx.step
if idx.start is None:
start = 0 if step > 0 else len(self) - 1
else:
start = idx.start - 1 if idx.start >= 0 else len(self) + idx.start
# NB: Do not call len(self) when idx == [:]
if idx.stop is None:
stop = 0 if step < 0 else float('inf')
else:
stop = idx.stop - 1 if idx.stop >= 0 else len(self) + idx.stop
stop += [-1, 1][step > 0]
for i in frange(start, stop, step):
if i < 0:
continue
try:
try:
entry = self._getter(i)
except self.IndexError:
self.is_exhausted = True
if step > 0:
break
continue
except IndexError:
if self.is_exhausted:
break
raise
yield i + 1, entry
def __len__(self):
return len(tuple(self[:]))
class IndexError(IndexError):
pass
def uppercase_escape(s):
unicode_escape = codecs.getdecoder('unicode_escape')
return re.sub(