[ie] Handle fragmented formats in _remove_duplicate_formats (#11637)

Authored by: Grub4K
This commit is contained in:
Simon Sawicki 2024-11-27 00:05:07 +01:00 committed by GitHub
parent 4b5eec0aaa
commit e0500cbf79
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1854,12 +1854,26 @@ class InfoExtractor:
@staticmethod @staticmethod
def _remove_duplicate_formats(formats): def _remove_duplicate_formats(formats):
format_urls = set() seen_urls = set()
seen_fragment_urls = set()
unique_formats = [] unique_formats = []
for f in formats: for f in formats:
if f['url'] not in format_urls: fragments = f.get('fragments')
format_urls.add(f['url']) if callable(fragments):
unique_formats.append(f) unique_formats.append(f)
elif fragments:
fragment_urls = frozenset(
fragment.get('url') or urljoin(f['fragment_base_url'], fragment['path'])
for fragment in fragments)
if fragment_urls not in seen_fragment_urls:
seen_fragment_urls.add(fragment_urls)
unique_formats.append(f)
elif f['url'] not in seen_urls:
seen_urls.add(f['url'])
unique_formats.append(f)
formats[:] = unique_formats formats[:] = unique_formats
def _is_valid_url(self, url, video_id, item='video', headers={}): def _is_valid_url(self, url, video_id, item='video', headers={}):