From c36513f1be2ef3d3cec864accbffda1afaa06ffd Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Tue, 21 May 2024 09:44:41 +1200 Subject: [PATCH 01/48] [rh:requests] Update to `requests` 2.32.0 (#9980) Authored by: coletdjnz --- pyproject.toml | 2 +- yt_dlp/networking/_requests.py | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 5fadd1449..74d7ff323 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,7 @@ dependencies = [ "certifi", "mutagen", "pycryptodomex", - "requests>=2.31.0,<3", + "requests>=2.32.0,<3", "urllib3>=1.26.17,<3", "websockets>=12.0", ] diff --git a/yt_dlp/networking/_requests.py b/yt_dlp/networking/_requests.py index e3edc77f3..75eee8824 100644 --- a/yt_dlp/networking/_requests.py +++ b/yt_dlp/networking/_requests.py @@ -21,8 +21,8 @@ urllib3_version = tuple(int_or_none(x, default=0) for x in urllib3.__version__.s if urllib3_version < (1, 26, 17): raise ImportError('Only urllib3 >= 1.26.17 is supported') -if requests.__build__ < 0x023100: - raise ImportError('Only requests >= 2.31.0 is supported') +if requests.__build__ < 0x023200: + raise ImportError('Only requests >= 2.32.0 is supported') import requests.adapters import requests.utils @@ -181,9 +181,13 @@ class RequestsHTTPAdapter(requests.adapters.HTTPAdapter): return super().proxy_manager_for(proxy, **proxy_kwargs, **self._pm_args, **extra_kwargs) def cert_verify(*args, **kwargs): - # lean on SSLContext for cert verification + # Lean on our SSLContext for cert verification pass + def _get_connection(self, request, *_, proxies=None, **__): + # Lean on our SSLContext for cert verification + return self.get_connection(request.url, proxies) + class RequestsSession(requests.sessions.Session): """ From 6e36d17f404556f0e3a43f441c477a71a91877d9 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 20 May 2024 18:01:17 -0500 Subject: [PATCH 02/48] [build] Exclude `requests` from `py2exe` (#9982) Authored by: bashonly --- README.md | 2 +- bundle/py2exe.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index cdd57b024..ad98af7c4 100644 --- a/README.md +++ b/README.md @@ -263,7 +263,7 @@ You can also run `make yt-dlp` instead to compile only the binary without updati ### Standalone Py2Exe Builds (Windows) -While we provide the option to build with [py2exe](https://www.py2exe.org), it is recommended to build [using PyInstaller](#standalone-pyinstaller-builds) instead since the py2exe builds **cannot contain `pycryptodomex`/`certifi` and needs VC++14** on the target computer to run. +While we provide the option to build with [py2exe](https://www.py2exe.org), it is recommended to build [using PyInstaller](#standalone-pyinstaller-builds) instead since the py2exe builds **cannot contain `pycryptodomex`/`certifi`/`requests` and need VC++14** on the target computer to run. If you wish to build it anyway, install Python (if it is not already installed) and you can run the following commands: diff --git a/bundle/py2exe.py b/bundle/py2exe.py index 281167492..403de0024 100755 --- a/bundle/py2exe.py +++ b/bundle/py2exe.py @@ -42,9 +42,9 @@ def main(): # py2exe cannot import Crypto 'Crypto', 'Cryptodome', - # py2exe appears to confuse this with our socks library. - # We don't use pysocks and urllib3.contrib.socks would fail to import if tried. - 'urllib3.contrib.socks' + # py2exe builds fail to run with requests >=2.32.0 + 'requests', + 'urllib3' ], 'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'], # Modules that are only imported dynamically must be added here From 3584b8390bd21c0393a3079eeee71aed56a1c1d8 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 20 May 2024 18:09:28 -0500 Subject: [PATCH 03/48] [ie/tiktok] Add `device_id` extractor-arg (#9951) Authored by: bashonly --- README.md | 1 + yt_dlp/extractor/tiktok.py | 31 +++++++++++++++++++++---------- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index ad98af7c4..1029d1a6d 100644 --- a/README.md +++ b/README.md @@ -1815,6 +1815,7 @@ The following extractors use this feature: * `manifest_app_version`: Default numeric app version to use with mobile API calls, e.g. `2023401020` * `aid`: Default app ID to use with mobile API calls, e.g. `1180` * `app_info`: Enable mobile API extraction with one or more app info strings in the format of `/[app_name]/[app_version]/[manifest_app_version]/[aid]`, where `iid` is the unique app install ID. `iid` is the only required value; all other values and their `/` separators can be omitted, e.g. `tiktok:app_info=1234567890123456789` or `tiktok:app_info=123,456/trill///1180,789//34.0.1/340001` +* `device_id`: Enable mobile API extraction with a genuine device ID to be used with mobile API calls. Default is a random 19-digit string #### rokfinchannel * `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks` diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 2fb41ba79..6d0d7eea3 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -1,8 +1,8 @@ +import functools import itertools import json import random import re -import string import time import uuid @@ -15,6 +15,7 @@ from ..utils import ( UnsupportedError, UserNotLive, determine_ext, + filter_dict, format_field, int_or_none, join_nonempty, @@ -49,11 +50,21 @@ class TikTokBaseIE(InfoExtractor): _APP_INFO = None _APP_USER_AGENT = None - @property + @functools.cached_property def _KNOWN_APP_INFO(self): - return self._configuration_arg('app_info', ie_key=TikTokIE) + # If we have a genuine device ID, we may not need any IID + default = [''] if self._KNOWN_DEVICE_ID else [] + return self._configuration_arg('app_info', default, ie_key=TikTokIE) - @property + @functools.cached_property + def _KNOWN_DEVICE_ID(self): + return self._configuration_arg('device_id', [None], ie_key=TikTokIE)[0] + + @functools.cached_property + def _DEVICE_ID(self): + return self._KNOWN_DEVICE_ID or str(random.randint(7250000000000000000, 7351147085025500000)) + + @functools.cached_property def _API_HOSTNAME(self): return self._configuration_arg( 'api_hostname', ['api16-normal-c-useast1a.tiktokv.com'], ie_key=TikTokIE)[0] @@ -115,7 +126,7 @@ class TikTokBaseIE(InfoExtractor): }, query=query) def _build_api_query(self, query): - return { + return filter_dict({ **query, 'device_platform': 'android', 'os': 'android', @@ -156,10 +167,10 @@ class TikTokBaseIE(InfoExtractor): 'build_number': self._APP_INFO['app_version'], 'region': 'US', 'ts': int(time.time()), - 'iid': self._APP_INFO['iid'], - 'device_id': random.randint(7250000000000000000, 7351147085025500000), + 'iid': self._APP_INFO.get('iid'), + 'device_id': self._DEVICE_ID, 'openudid': ''.join(random.choices('0123456789abcdef', k=16)), - } + }) def _call_api(self, ep, query, video_id, fatal=True, note='Downloading API JSON', errnote='Unable to download API page'): @@ -848,7 +859,7 @@ class TikTokUserIE(TikTokBaseIE): 'max_cursor': 0, 'min_cursor': 0, 'retry_type': 'no_retry', - 'device_id': ''.join(random.choices(string.digits, k=19)), # Some endpoints don't like randomized device_id, so it isn't directly set in _call_api. + 'device_id': self._DEVICE_ID, # Some endpoints don't like randomized device_id, so it isn't directly set in _call_api. } for page in itertools.count(1): @@ -896,7 +907,7 @@ class TikTokBaseListIE(TikTokBaseIE): # XXX: Conventionally, base classes shoul 'cursor': 0, 'count': 20, 'type': 5, - 'device_id': ''.join(random.choices(string.digits, k=19)) + 'device_id': self._DEVICE_ID, } for page in itertools.count(1): From 4ccd73fea0f6f4be343e1ec7f22dd03799addcf8 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 20 May 2024 18:11:24 -0500 Subject: [PATCH 04/48] [ie/tiktok] Extract all web formats (#9960) Closes #9506 Authored by: bashonly --- yt_dlp/extractor/tiktok.py | 122 ++++++++++++++++++++++++++++--------- 1 file changed, 94 insertions(+), 28 deletions(-) diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 6d0d7eea3..c96fa5038 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -20,6 +20,8 @@ from ..utils import ( int_or_none, join_nonempty, merge_dicts, + mimetype2ext, + parse_qs, qualities, remove_start, srt_subtitles_timecode, @@ -250,23 +252,22 @@ class TikTokBaseIE(InfoExtractor): }) return subtitles + def _parse_url_key(self, url_key): + format_id, codec, res, bitrate = self._search_regex( + r'v[^_]+_(?P(?P[^_]+)_(?P\d+p)_(?P\d+))', url_key, + 'url key', default=(None, None, None, None), group=('id', 'codec', 'res', 'bitrate')) + if not format_id: + return {}, None + return { + 'format_id': format_id, + 'vcodec': 'h265' if codec == 'bytevc1' else codec, + 'tbr': int_or_none(bitrate, scale=1000) or None, + 'quality': qualities(self.QUALITIES)(res), + }, res + def _parse_aweme_video_app(self, aweme_detail): aweme_id = aweme_detail['aweme_id'] video_info = aweme_detail['video'] - - def parse_url_key(url_key): - format_id, codec, res, bitrate = self._search_regex( - r'v[^_]+_(?P(?P[^_]+)_(?P\d+p)_(?P\d+))', url_key, - 'url key', default=(None, None, None, None), group=('id', 'codec', 'res', 'bitrate')) - if not format_id: - return {}, None - return { - 'format_id': format_id, - 'vcodec': 'h265' if codec == 'bytevc1' else codec, - 'tbr': int_or_none(bitrate, scale=1000) or None, - 'quality': qualities(self.QUALITIES)(res), - }, res - known_resolutions = {} def audio_meta(url): @@ -281,7 +282,7 @@ class TikTokBaseIE(InfoExtractor): } if ext == 'mp3' or '-music-' in url else {} def extract_addr(addr, add_meta={}): - parsed_meta, res = parse_url_key(addr.get('url_key', '')) + parsed_meta, res = self._parse_url_key(addr.get('url_key', '')) is_bytevc2 = parsed_meta.get('vcodec') == 'bytevc2' if res: known_resolutions.setdefault(res, {}).setdefault('height', int_or_none(addr.get('height'))) @@ -295,7 +296,7 @@ class TikTokBaseIE(InfoExtractor): 'acodec': 'aac', 'source_preference': -2 if 'aweme/v1' in url else -1, # Downloads from API might get blocked **add_meta, **parsed_meta, - # bytevc2 is bytedance's proprietary (unplayable) video codec + # bytevc2 is bytedance's own custom h266/vvc codec, as-of-yet unplayable 'preference': -100 if is_bytevc2 else -1, 'format_note': join_nonempty( add_meta.get('format_note'), '(API)' if 'aweme/v1' in url else None, @@ -307,6 +308,7 @@ class TikTokBaseIE(InfoExtractor): formats = [] width = int_or_none(video_info.get('width')) height = int_or_none(video_info.get('height')) + ratio = try_call(lambda: width / height) or 0.5625 if video_info.get('play_addr'): formats.extend(extract_addr(video_info['play_addr'], { 'format_id': 'play_addr', @@ -323,8 +325,8 @@ class TikTokBaseIE(InfoExtractor): 'format_id': 'download_addr', 'format_note': 'Download video%s' % (', watermarked' if video_info.get('has_watermark') else ''), 'vcodec': 'h264', - 'width': dl_width or width, - 'height': try_call(lambda: int(dl_width / 0.5625)) or height, # download_addr['height'] is wrong + 'width': dl_width, + 'height': try_call(lambda: int(dl_width / ratio)), # download_addr['height'] is wrong 'preference': -2 if video_info.get('has_watermark') else -1, })) if video_info.get('play_addr_h264'): @@ -431,26 +433,88 @@ class TikTokBaseIE(InfoExtractor): formats = [] width = int_or_none(video_info.get('width')) height = int_or_none(video_info.get('height')) + ratio = try_call(lambda: width / height) or 0.5625 + COMMON_FORMAT_INFO = { + 'ext': 'mp4', + 'vcodec': 'h264', + 'acodec': 'aac', + } + + for bitrate_info in traverse_obj(video_info, ('bitrateInfo', lambda _, v: v['PlayAddr']['UrlList'])): + format_info, res = self._parse_url_key( + traverse_obj(bitrate_info, ('PlayAddr', 'UrlKey', {str})) or '') + # bytevc2 is bytedance's own custom h266/vvc codec, as-of-yet unplayable + is_bytevc2 = format_info.get('vcodec') == 'bytevc2' + format_info.update({ + 'format_note': 'UNPLAYABLE' if is_bytevc2 else None, + 'preference': -100 if is_bytevc2 else -1, + 'filesize': traverse_obj(bitrate_info, ('PlayAddr', 'DataSize', {int_or_none})), + }) + + if dimension := (res and int(res[:-1])): + if dimension == 540: # '540p' is actually 576p + dimension = 576 + if ratio < 1: # portrait: res/dimension is width + y = int(dimension / ratio) + format_info.update({ + 'width': dimension, + 'height': y - (y % 2), + }) + else: # landscape: res/dimension is height + x = int(dimension * ratio) + format_info.update({ + 'width': x - (x % 2), + 'height': dimension, + }) + + for video_url in traverse_obj(bitrate_info, ('PlayAddr', 'UrlList', ..., {url_or_none})): + formats.append({ + **COMMON_FORMAT_INFO, + **format_info, + 'url': self._proto_relative_url(video_url), + }) + + # We don't have res string for play formats, but need quality for sorting & de-duplication + play_quality = traverse_obj(formats, (lambda _, v: v['width'] == width, 'quality', any)) for play_url in traverse_obj(video_info, ('playAddr', ((..., 'src'), None), {url_or_none})): formats.append({ + **COMMON_FORMAT_INFO, + 'format_id': 'play', 'url': self._proto_relative_url(play_url), - 'ext': 'mp4', 'width': width, 'height': height, + 'quality': play_quality, }) for download_url in traverse_obj(video_info, (('downloadAddr', ('download', 'url')), {url_or_none})): formats.append({ + **COMMON_FORMAT_INFO, 'format_id': 'download', 'url': self._proto_relative_url(download_url), - 'ext': 'mp4', - 'width': width, - 'height': height, }) self._remove_duplicate_formats(formats) + for f in traverse_obj(formats, lambda _, v: 'unwatermarked' not in v['url']): + f.update({ + 'format_note': join_nonempty(f.get('format_note'), 'watermarked', delim=', '), + 'preference': f.get('preference') or -2, + }) + + # Is it a slideshow with only audio for download? + if not formats and traverse_obj(music_info, ('playUrl', {url_or_none})): + audio_url = music_info['playUrl'] + ext = traverse_obj(parse_qs(audio_url), ( + 'mime_type', -1, {lambda x: x.replace('_', '/')}, {mimetype2ext})) or 'm4a' + formats.append({ + 'format_id': 'audio', + 'url': self._proto_relative_url(audio_url), + 'ext': ext, + 'acodec': 'aac' if ext == 'm4a' else ext, + 'vcodec': 'none', + }) + thumbnails = [] for thumb_url in traverse_obj(aweme_detail, ( (None, 'video'), ('thumbnail', 'cover', 'dynamicCover', 'originCover'), {url_or_none})): @@ -462,10 +526,17 @@ class TikTokBaseIE(InfoExtractor): return { 'id': video_id, + **traverse_obj(music_info, { + 'track': ('title', {str}), + 'album': ('album', {str}, {lambda x: x or None}), + 'artists': ('authorName', {str}, {lambda x: [x] if x else None}), + 'duration': ('duration', {int_or_none}), + }), **traverse_obj(aweme_detail, { 'title': ('desc', {str}), 'description': ('desc', {str}), - 'duration': ('video', 'duration', {int_or_none}), + # audio-only slideshows have a video duration of 0 and an actual audio duration + 'duration': ('video', 'duration', {int_or_none}, {lambda x: x or None}), 'timestamp': ('createTime', {int_or_none}), }), **traverse_obj(author_info or aweme_detail, { @@ -480,11 +551,6 @@ class TikTokBaseIE(InfoExtractor): 'repost_count': 'shareCount', 'comment_count': 'commentCount', }, expected_type=int_or_none), - **traverse_obj(music_info, { - 'track': ('title', {str}), - 'album': ('album', {str}, {lambda x: x or None}), - 'artists': ('authorName', {str}, {lambda x: [x] if x else None}), - }), 'channel_id': channel_id, 'uploader_url': user_url, 'formats': formats, From 3f7999533ebe41c2a579d91b4e4cb211cfcd3bc0 Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Wed, 22 May 2024 16:22:25 +0200 Subject: [PATCH 05/48] [rh:requests] Patch support for `requests` 2.32.2+ (#9992) Authored by: Grub4K --- .github/workflows/build.yml | 14 +++++++++++--- README.md | 2 +- bundle/py2exe.py | 6 +++--- pyproject.toml | 7 +++++-- yt_dlp/networking/_requests.py | 20 ++++++++++++++++---- 5 files changed, 36 insertions(+), 13 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d9352fedd..55cf3b3a2 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -360,7 +360,7 @@ jobs: - name: Install Requirements run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds python devscripts/install_deps.py -o --include build - python devscripts/install_deps.py --include py2exe --include curl-cffi + python devscripts/install_deps.py --include curl-cffi python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.8.0-py3-none-any.whl" - name: Prepare @@ -369,12 +369,20 @@ jobs: python devscripts/make_lazy_extractors.py - name: Build run: | - python -m bundle.py2exe - Move-Item ./dist/yt-dlp.exe ./dist/yt-dlp_min.exe python -m bundle.pyinstaller python -m bundle.pyinstaller --onedir + Move-Item ./dist/yt-dlp.exe ./dist/yt-dlp_real.exe Compress-Archive -Path ./dist/yt-dlp/* -DestinationPath ./dist/yt-dlp_win.zip + - name: Install Requirements (py2exe) + run: | + python devscripts/install_deps.py --include py2exe + - name: Build (py2exe) + run: | + python -m bundle.py2exe + Move-Item ./dist/yt-dlp.exe ./dist/yt-dlp_min.exe + Move-Item ./dist/yt-dlp_real.exe ./dist/yt-dlp.exe + - name: Verify --update-to if: vars.UPDATE_TO_VERIFICATION run: | diff --git a/README.md b/README.md index 1029d1a6d..2c909976a 100644 --- a/README.md +++ b/README.md @@ -263,7 +263,7 @@ You can also run `make yt-dlp` instead to compile only the binary without updati ### Standalone Py2Exe Builds (Windows) -While we provide the option to build with [py2exe](https://www.py2exe.org), it is recommended to build [using PyInstaller](#standalone-pyinstaller-builds) instead since the py2exe builds **cannot contain `pycryptodomex`/`certifi`/`requests` and need VC++14** on the target computer to run. +While we provide the option to build with [py2exe](https://www.py2exe.org), it is recommended to build [using PyInstaller](#standalone-pyinstaller-builds) instead since the py2exe builds **cannot contain `pycryptodomex`/`certifi` and need VC++14** on the target computer to run. If you wish to build it anyway, install Python (if it is not already installed) and you can run the following commands: diff --git a/bundle/py2exe.py b/bundle/py2exe.py index 403de0024..281167492 100755 --- a/bundle/py2exe.py +++ b/bundle/py2exe.py @@ -42,9 +42,9 @@ def main(): # py2exe cannot import Crypto 'Crypto', 'Cryptodome', - # py2exe builds fail to run with requests >=2.32.0 - 'requests', - 'urllib3' + # py2exe appears to confuse this with our socks library. + # We don't use pysocks and urllib3.contrib.socks would fail to import if tried. + 'urllib3.contrib.socks' ], 'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'], # Modules that are only imported dynamically must be added here diff --git a/pyproject.toml b/pyproject.toml index 74d7ff323..b9a36ba6d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,7 @@ dependencies = [ "certifi", "mutagen", "pycryptodomex", - "requests>=2.32.0,<3", + "requests>=2.31.0,<3", "urllib3>=1.26.17,<3", "websockets>=12.0", ] @@ -73,7 +73,10 @@ pyinstaller = [ "pyinstaller>=6.3; sys_platform!='darwin'", "pyinstaller==5.13.2; sys_platform=='darwin'", # needed for curl_cffi ] -py2exe = ["py2exe>=0.12"] +py2exe = [ + "py2exe>=0.12", + "requests==2.31.*", +] [project.urls] Documentation = "https://github.com/yt-dlp/yt-dlp#readme" diff --git a/yt_dlp/networking/_requests.py b/yt_dlp/networking/_requests.py index 75eee8824..6397a2c0c 100644 --- a/yt_dlp/networking/_requests.py +++ b/yt_dlp/networking/_requests.py @@ -21,13 +21,14 @@ urllib3_version = tuple(int_or_none(x, default=0) for x in urllib3.__version__.s if urllib3_version < (1, 26, 17): raise ImportError('Only urllib3 >= 1.26.17 is supported') -if requests.__build__ < 0x023200: - raise ImportError('Only requests >= 2.32.0 is supported') +if requests.__build__ < 0x023100: + raise ImportError('Only requests >= 2.31.0 is supported') import requests.adapters import requests.utils import urllib3.connection import urllib3.exceptions +import urllib3.util from ._helper import ( InstanceStoreMixin, @@ -180,14 +181,25 @@ class RequestsHTTPAdapter(requests.adapters.HTTPAdapter): extra_kwargs['proxy_ssl_context'] = self._proxy_ssl_context return super().proxy_manager_for(proxy, **proxy_kwargs, **self._pm_args, **extra_kwargs) + # Skip `requests` internal verification; we use our own SSLContext + # requests 2.31.0+ def cert_verify(*args, **kwargs): - # Lean on our SSLContext for cert verification pass + # requests 2.31.0-2.32.1 def _get_connection(self, request, *_, proxies=None, **__): - # Lean on our SSLContext for cert verification return self.get_connection(request.url, proxies) + # requests 2.32.2+: Reimplementation without `_urllib3_request_context` + def get_connection_with_tls_context(self, request, verify, proxies=None, cert=None): + url = urllib3.util.parse_url(request.url).url + + manager = self.poolmanager + if proxy := select_proxy(url, proxies): + manager = self.proxy_manager_for(proxy) + + return manager.connection_from_url(url) + class RequestsSession(requests.sessions.Session): """ From 78c57cc0e0998b8ed90e4306f410aa4be4115cd7 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Wed, 22 May 2024 09:30:25 -0500 Subject: [PATCH 06/48] [build] `macos` job requires `setuptools<70` (#9993) Authored by: bashonly --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index b9a36ba6d..8e3bce4bf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,6 +62,7 @@ build = [ "build", "hatchling", "pip", + "setuptools>=66.1.0,<70", "wheel", ] dev = [ From eef1e9f44ff14c5e65b759bb1eafa3946cdaf719 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Wed, 22 May 2024 17:17:10 -0500 Subject: [PATCH 07/48] [ie/tiktok] Fix subtitles extraction (#9961) Authored by: bashonly --- yt_dlp/extractor/tiktok.py | 56 ++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index c96fa5038..7772dd1f2 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -212,7 +212,31 @@ class TikTokBaseIE(InfoExtractor): raise ExtractorError('Unable to find video in feed', video_id=aweme_id) return self._parse_aweme_video_app(aweme_detail) - def _get_subtitles(self, aweme_detail, aweme_id): + def _extract_web_data_and_status(self, url, video_id, fatal=True): + webpage = self._download_webpage(url, video_id, headers={'User-Agent': 'Mozilla/5.0'}, fatal=fatal) or '' + video_data, status = {}, None + + if universal_data := self._get_universal_data(webpage, video_id): + self.write_debug('Found universal data for rehydration') + status = traverse_obj(universal_data, ('webapp.video-detail', 'statusCode', {int})) or 0 + video_data = traverse_obj(universal_data, ('webapp.video-detail', 'itemInfo', 'itemStruct', {dict})) + + elif sigi_data := self._get_sigi_state(webpage, video_id): + self.write_debug('Found sigi state data') + status = traverse_obj(sigi_data, ('VideoPage', 'statusCode', {int})) or 0 + video_data = traverse_obj(sigi_data, ('ItemModule', video_id, {dict})) + + elif next_data := self._search_nextjs_data(webpage, video_id, default={}): + self.write_debug('Found next.js data') + status = traverse_obj(next_data, ('props', 'pageProps', 'statusCode', {int})) or 0 + video_data = traverse_obj(next_data, ('props', 'pageProps', 'itemInfo', 'itemStruct', {dict})) + + elif fatal: + raise ExtractorError('Unable to extract webpage video data') + + return video_data, status + + def _get_subtitles(self, aweme_detail, aweme_id, user_url): # TODO: Extract text positioning info subtitles = {} # aweme/detail endpoint subs @@ -243,9 +267,10 @@ class TikTokBaseIE(InfoExtractor): }) # webpage subs if not subtitles: - for caption in traverse_obj(aweme_detail, ('video', 'subtitleInfos', ...), expected_type=dict): - if not caption.get('Url'): - continue + if user_url: # only _parse_aweme_video_app needs to extract the webpage here + aweme_detail, _ = self._extract_web_data_and_status( + f'{user_url}/video/{aweme_id}', aweme_id, fatal=False) + for caption in traverse_obj(aweme_detail, ('video', 'subtitleInfos', lambda _, v: v['Url'])): subtitles.setdefault(caption.get('LanguageCodeName') or 'en', []).append({ 'ext': remove_start(caption.get('Format'), 'web'), 'url': caption['Url'], @@ -412,7 +437,7 @@ class TikTokBaseIE(InfoExtractor): 'album': str_or_none(music_info.get('album')) or None, 'artists': re.split(r'(?:, | & )', music_author) if music_author else None, 'formats': formats, - 'subtitles': self.extract_subtitles(aweme_detail, aweme_id), + 'subtitles': self.extract_subtitles(aweme_detail, aweme_id, user_url), 'thumbnails': thumbnails, 'duration': int_or_none(traverse_obj(video_info, 'duration', ('download_addr', 'duration')), scale=1000), 'availability': self._availability( @@ -554,6 +579,7 @@ class TikTokBaseIE(InfoExtractor): 'channel_id': channel_id, 'uploader_url': user_url, 'formats': formats, + 'subtitles': self.extract_subtitles(aweme_detail, video_id, None), 'thumbnails': thumbnails, 'http_headers': { 'Referer': webpage_url, @@ -839,25 +865,7 @@ class TikTokIE(TikTokBaseIE): self.report_warning(f'{e}; trying with webpage') url = self._create_url(user_id, video_id) - webpage = self._download_webpage(url, video_id, headers={'User-Agent': 'Mozilla/5.0'}) - - if universal_data := self._get_universal_data(webpage, video_id): - self.write_debug('Found universal data for rehydration') - status = traverse_obj(universal_data, ('webapp.video-detail', 'statusCode', {int})) or 0 - video_data = traverse_obj(universal_data, ('webapp.video-detail', 'itemInfo', 'itemStruct', {dict})) - - elif sigi_data := self._get_sigi_state(webpage, video_id): - self.write_debug('Found sigi state data') - status = traverse_obj(sigi_data, ('VideoPage', 'statusCode', {int})) or 0 - video_data = traverse_obj(sigi_data, ('ItemModule', video_id, {dict})) - - elif next_data := self._search_nextjs_data(webpage, video_id, default={}): - self.write_debug('Found next.js data') - status = traverse_obj(next_data, ('props', 'pageProps', 'statusCode', {int})) or 0 - video_data = traverse_obj(next_data, ('props', 'pageProps', 'itemInfo', 'itemStruct', {dict})) - - else: - raise ExtractorError('Unable to extract webpage video data') + video_data, status = self._extract_web_data_and_status(url, video_id) if video_data and status == 0: return self._parse_aweme_video_web(video_data, url, video_id) From beaf832c7a9d57833f365ce18f6115b88071b296 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Wed, 22 May 2024 17:20:29 -0500 Subject: [PATCH 08/48] [ie/soundcloud] Add `formats` extractor-arg (#10004) Authored by: bashonly --- README.md | 3 ++ yt_dlp/extractor/soundcloud.py | 58 +++++++++++++++++++++++----------- 2 files changed, 42 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 2c909976a..887cfde23 100644 --- a/README.md +++ b/README.md @@ -1841,6 +1841,9 @@ The following extractors use this feature: #### afreecatvlive * `cdn`: One or more CDN IDs to use with the API call for stream URLs, e.g. `gcp_cdn`, `gs_cdn_pc_app`, `gs_cdn_mobile_web`, `gs_cdn_pc_web` +#### soundcloud +* `formats`: Formats to request from the API. Requested values should be in the format of `{protocol}_{extension}` (omitting the bitrate), e.g. `hls_opus,http_aac`. The `*` character functions as a wildcard, e.g. `*_mp3`, and can passed by itself to request all formats. Known protocols include `http`, `hls` and `hls-aes`; known extensions include `aac`, `opus` and `mp3`. Original `download` formats are always extracted. Default is `http_aac,hls_aac,http_opus,hls_opus,http_mp3,hls_mp3` + **Note**: These options may be changed/removed in the future without concern for backward compatibility diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index c9ca41a5c..358146171 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -1,3 +1,4 @@ +import functools import itertools import json import re @@ -12,6 +13,7 @@ from ..utils import ( error_to_compat_str, float_or_none, int_or_none, + join_nonempty, mimetype2ext, parse_qs, str_or_none, @@ -68,6 +70,16 @@ class SoundcloudBaseIE(InfoExtractor): 'original': 0, } + _DEFAULT_FORMATS = ['http_aac', 'hls_aac', 'http_opus', 'hls_opus', 'http_mp3', 'hls_mp3'] + + @functools.cached_property + def _is_requested(self): + return re.compile(r'|'.join(set( + re.escape(pattern).replace(r'\*', r'.*') if pattern != 'default' + else '|'.join(map(re.escape, self._DEFAULT_FORMATS)) + for pattern in self._configuration_arg('formats', ['default'], ie_key=SoundcloudIE) + ))).fullmatch + def _store_client_id(self, client_id): self.cache.store('soundcloud', 'client_id', client_id) @@ -216,7 +228,7 @@ class SoundcloudBaseIE(InfoExtractor): redirect_url = (self._download_json(download_url, track_id, fatal=False) or {}).get('redirectUri') if redirect_url: urlh = self._request_webpage( - HEADRequest(redirect_url), track_id, fatal=False) + HEADRequest(redirect_url), track_id, 'Checking for original download format', fatal=False) if urlh: format_url = urlh.url format_urls.add(format_url) @@ -258,7 +270,7 @@ class SoundcloudBaseIE(InfoExtractor): abr = f.get('abr') if abr: f['abr'] = int(abr) - if protocol == 'hls': + if protocol in ('hls', 'hls-aes'): protocol = 'm3u8' if ext == 'aac' else 'm3u8_native' else: protocol = 'http' @@ -274,11 +286,32 @@ class SoundcloudBaseIE(InfoExtractor): if extract_flat: break format_url = t['url'] - stream = None + protocol = traverse_obj(t, ('format', 'protocol', {str})) + if protocol == 'progressive': + protocol = 'http' + if protocol != 'hls' and '/hls' in format_url: + protocol = 'hls' + if protocol == 'encrypted-hls' or '/encrypted-hls' in format_url: + protocol = 'hls-aes' + + ext = None + if preset := traverse_obj(t, ('preset', {str_or_none})): + ext = preset.split('_')[0] + if ext not in KNOWN_EXTENSIONS: + ext = mimetype2ext(traverse_obj(t, ('format', 'mime_type', {str}))) + + identifier = join_nonempty(protocol, ext, delim='_') + if not self._is_requested(identifier): + self.write_debug(f'"{identifier}" is not a requested format, skipping') + continue + + stream = None for retry in self.RetryManager(fatal=False): try: - stream = self._download_json(format_url, track_id, query=query, headers=self._HEADERS) + stream = self._download_json( + format_url, track_id, f'Downloading {identifier} format info JSON', + query=query, headers=self._HEADERS) except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 429: self.report_warning( @@ -289,27 +322,14 @@ class SoundcloudBaseIE(InfoExtractor): else: self.report_warning(e.msg) - if not isinstance(stream, dict): - continue - stream_url = url_or_none(stream.get('url')) + stream_url = traverse_obj(stream, ('url', {url_or_none})) if invalid_url(stream_url): continue format_urls.add(stream_url) - stream_format = t.get('format') or {} - protocol = stream_format.get('protocol') - if protocol != 'hls' and '/hls' in format_url: - protocol = 'hls' - ext = None - preset = str_or_none(t.get('preset')) - if preset: - ext = preset.split('_')[0] - if ext not in KNOWN_EXTENSIONS: - ext = mimetype2ext(stream_format.get('mime_type')) add_format({ 'url': stream_url, 'ext': ext, - }, 'http' if protocol == 'progressive' else protocol, - t.get('snipped') or '/preview/' in format_url) + }, protocol, t.get('snipped') or '/preview/' in format_url) for f in formats: f['vcodec'] = 'none' From f2816634e3be88fe158b342ee33918de3c272a54 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Wed, 22 May 2024 17:25:07 -0500 Subject: [PATCH 09/48] [ie/crunchyroll] Fix stream extraction (#10005) Closes #9994 Authored by: bashonly --- yt_dlp/extractor/crunchyroll.py | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/crunchyroll.py b/yt_dlp/extractor/crunchyroll.py index 90967c160..ea54f0195 100644 --- a/yt_dlp/extractor/crunchyroll.py +++ b/yt_dlp/extractor/crunchyroll.py @@ -2,6 +2,7 @@ import base64 import uuid from .common import InfoExtractor +from ..networking import Request from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, @@ -24,6 +25,7 @@ class CrunchyrollBaseIE(InfoExtractor): _BASE_URL = 'https://www.crunchyroll.com' _API_BASE = 'https://api.crunchyroll.com' _NETRC_MACHINE = 'crunchyroll' + _SWITCH_USER_AGENT = 'Crunchyroll/1.8.0 Nintendo Switch/12.3.12.0 UE4/4.27' _REFRESH_TOKEN = None _AUTH_HEADERS = None _AUTH_EXPIRY = None @@ -179,10 +181,19 @@ class CrunchyrollBaseIE(InfoExtractor): display_id = identifier self._update_auth() - stream_response = self._download_json( - f'https://cr-play-service.prd.crunchyrollsvc.com/v1/{identifier}/console/switch/play', - display_id, note='Downloading stream info', errnote='Failed to download stream info', - headers=CrunchyrollBaseIE._AUTH_HEADERS) + headers = {**CrunchyrollBaseIE._AUTH_HEADERS, 'User-Agent': self._SWITCH_USER_AGENT} + try: + stream_response = self._download_json( + f'https://cr-play-service.prd.crunchyrollsvc.com/v1/{identifier}/console/switch/play', + display_id, note='Downloading stream info', errnote='Failed to download stream info', headers=headers) + except ExtractorError as error: + if self.get_param('ignore_no_formats_error'): + self.report_warning(error.orig_msg) + return [], {} + elif isinstance(error.cause, HTTPError) and error.cause.status == 420: + raise ExtractorError( + 'You have reached the rate-limit for active streams; try again later', expected=True) + raise available_formats = {'': ('', '', stream_response['url'])} for hardsub_lang, stream in traverse_obj(stream_response, ('hardSubs', {dict.items}, lambda _, v: v[1]['url'])): @@ -211,7 +222,7 @@ class CrunchyrollBaseIE(InfoExtractor): fatal=False, note=f'Downloading {f"{format_id} " if hardsub_lang else ""}MPD manifest') self._merge_subtitles(dash_subs, target=subtitles) else: - continue # XXX: Update this if/when meta mpd formats are working + continue # XXX: Update this if meta mpd formats work; will be tricky with token invalidation for f in adaptive_formats: if f.get('acodec') != 'none': f['language'] = audio_locale @@ -221,6 +232,15 @@ class CrunchyrollBaseIE(InfoExtractor): for locale, subtitle in traverse_obj(stream_response, (('subtitles', 'captions'), {dict.items}, ...)): subtitles.setdefault(locale, []).append(traverse_obj(subtitle, {'url': 'url', 'ext': 'format'})) + # Invalidate stream token to avoid rate-limit + error_msg = 'Unable to invalidate stream token; you may experience rate-limiting' + if stream_token := stream_response.get('token'): + self._request_webpage(Request( + f'https://cr-play-service.prd.crunchyrollsvc.com/v1/token/{identifier}/{stream_token}/inactive', + headers=headers, method='PATCH'), display_id, 'Invalidating stream token', error_msg, fatal=False) + else: + self.report_warning(error_msg) + return formats, subtitles From 7b5674949fd03a33b47b67b31d56a5adf1c48c91 Mon Sep 17 00:00:00 2001 From: vtexier Date: Thu, 23 May 2024 01:09:58 +0200 Subject: [PATCH 10/48] [ie/ArteTV] Label forced subtitles (#9945) Authored by: vtexier --- yt_dlp/extractor/arte.py | 64 +++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 37 deletions(-) diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py index 1c180b1fd..46fe006cc 100644 --- a/yt_dlp/extractor/arte.py +++ b/yt_dlp/extractor/arte.py @@ -5,6 +5,7 @@ from ..utils import ( ExtractorError, GeoRestrictedError, int_or_none, + join_nonempty, parse_iso8601, parse_qs, strip_or_none, @@ -31,20 +32,6 @@ class ArteTVIE(ArteTVBaseIE): _TESTS = [{ 'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/', 'only_matching': True, - }, { - 'url': 'https://www.arte.tv/pl/videos/100103-000-A/usa-dyskryminacja-na-porodowce/', - 'info_dict': { - 'id': '100103-000-A', - 'title': 'USA: Dyskryminacja na porodówce', - 'description': 'md5:242017b7cce59ffae340a54baefcafb1', - 'alt_title': 'ARTE Reportage', - 'upload_date': '20201103', - 'duration': 554, - 'thumbnail': r're:https://api-cdn\.arte\.tv/.+940x530', - 'timestamp': 1604417980, - 'ext': 'mp4', - }, - 'params': {'skip_download': 'm3u8'} }, { 'note': 'No alt_title', 'url': 'https://www.arte.tv/fr/videos/110371-000-A/la-chaleur-supplice-des-arbres-de-rue/', @@ -58,6 +45,23 @@ class ArteTVIE(ArteTVBaseIE): }, { 'url': 'https://www.arte.tv/de/videos/110203-006-A/zaz/', 'only_matching': True, + }, { + 'url': 'https://www.arte.tv/fr/videos/109067-000-A/la-loi-de-teheran/', + 'info_dict': { + 'id': '109067-000-A', + 'ext': 'mp4', + 'description': 'md5:d2ca367b8ecee028dddaa8bd1aebc739', + 'timestamp': 1713927600, + 'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/3rR6PLzfbigSkkeHtkCZNF/940x530', + 'duration': 7599, + 'title': 'La loi de Téhéran', + 'upload_date': '20240424', + 'subtitles': { + 'fr': 'mincount:1', + 'fr-acc': 'mincount:1', + 'fr-forced': 'mincount:1', + }, + }, }, { 'note': 'age-restricted', 'url': 'https://www.arte.tv/de/videos/006785-000-A/the-element-of-crime/', @@ -71,23 +75,7 @@ class ArteTVIE(ArteTVBaseIE): 'upload_date': '20230930', 'ext': 'mp4', }, - }, { - 'url': 'https://www.arte.tv/de/videos/085374-003-A/im-hohen-norden-geboren/', - 'info_dict': { - 'id': '085374-003-A', - 'ext': 'mp4', - 'description': 'md5:ab79ec7cc472a93164415b4e4916abf9', - 'timestamp': 1702872000, - 'thumbnail': 'https://api-cdn.arte.tv/img/v2/image/TnyHBfPxv3v2GEY3suXGZP/940x530', - 'duration': 2594, - 'title': 'Die kurze Zeit der Jugend', - 'alt_title': 'Im hohen Norden geboren', - 'upload_date': '20231218', - 'subtitles': { - 'fr': 'mincount:1', - 'fr-acc': 'mincount:1', - }, - }, + 'skip': '404 Not Found', }] _GEO_BYPASS = True @@ -143,16 +131,18 @@ class ArteTVIE(ArteTVBaseIE): updated_subs = {} for lang, sub_formats in subs.items(): for fmt in sub_formats: - if fmt.get('url', '').endswith('-MAL.m3u8'): - lang += '-acc' - updated_subs.setdefault(lang, []).append(fmt) + url = fmt.get('url') or '' + suffix = ('acc' if url.endswith('-MAL.m3u8') + else 'forced' if '_VO' not in url + else None) + updated_subs.setdefault(join_nonempty(lang, suffix), []).append(fmt) return updated_subs def _real_extract(self, url): mobj = self._match_valid_url(url) video_id = mobj.group('id') lang = mobj.group('lang') or mobj.group('lang_2') - langauge_code = self._LANG_MAP.get(lang) + language_code = self._LANG_MAP.get(lang) config = self._download_json(f'{self._API_BASE}/config/{lang}/{video_id}', video_id, headers={ 'x-validated-age': '18' @@ -180,10 +170,10 @@ class ArteTVIE(ArteTVBaseIE): m = self._VERSION_CODE_RE.match(stream_version_code) if m: lang_pref = int(''.join('01'[x] for x in ( - m.group('vlang') == langauge_code, # we prefer voice in the requested language + m.group('vlang') == language_code, # we prefer voice in the requested language not m.group('audio_desc'), # and not the audio description version bool(m.group('original_voice')), # but if voice is not in the requested language, at least choose the original voice - m.group('sub_lang') == langauge_code, # if subtitles are present, we prefer them in the requested language + m.group('sub_lang') == language_code, # if subtitles are present, we prefer them in the requested language not m.group('has_sub'), # but we prefer no subtitles otherwise not m.group('sdh_sub'), # and we prefer not the hard-of-hearing subtitles if there are subtitles ))) From 296df0da1d38a44d34c99b60a18066c301774537 Mon Sep 17 00:00:00 2001 From: panatexxa <91012623+panatexxa@users.noreply.github.com> Date: Thu, 23 May 2024 06:03:55 +0200 Subject: [PATCH 11/48] [ie/Moviepilot] Fix extractor (#9366) Authored by: panatexxa --- yt_dlp/extractor/moviepilot.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/moviepilot.py b/yt_dlp/extractor/moviepilot.py index 668c0984e..35c57bc70 100644 --- a/yt_dlp/extractor/moviepilot.py +++ b/yt_dlp/extractor/moviepilot.py @@ -14,7 +14,7 @@ class MoviepilotIE(InfoExtractor): 'display_id': 'interstellar-2', 'ext': 'mp4', 'title': 'Interstellar', - 'thumbnail': r're:https://\w+\.dmcdn\.net/v/SaV-q1ZganMw4HVXg/x1080', + 'thumbnail': r're:https://\w+\.dmcdn\.net/v/SaV-q1.*/x1080', 'timestamp': 1605010596, 'description': 'md5:0ae9cb452af52610c9ffc60f2fd0474c', 'uploader': 'Moviepilot', @@ -71,7 +71,7 @@ class MoviepilotIE(InfoExtractor): 'age_limit': 0, 'duration': 82, 'upload_date': '20201109', - 'thumbnail': r're:https://\w+\.dmcdn\.net/v/SaMes1Zg3lxLv9j5u/x1080', + 'thumbnail': r're:https://\w+\.dmcdn\.net/v/SaMes1Z.*/x1080', 'uploader': 'Moviepilot', 'like_count': int, 'view_count': int, @@ -92,6 +92,6 @@ class MoviepilotIE(InfoExtractor): 'ie_key': DailymotionIE.ie_key(), 'display_id': video_id, 'title': clip.get('title'), - 'url': f'https://www.dailymotion.com/video/{clip["videoRemoteId"]}', + 'url': f'https://www.dailymotion.com/video/{clip["video"]["remoteId"]}', 'description': clip.get('summary'), } From 06cb0638392b607b47d3c2ac48eb2ebecb0f060d Mon Sep 17 00:00:00 2001 From: "Amir Y. Perehodnik" Date: Thu, 23 May 2024 07:07:20 +0300 Subject: [PATCH 12/48] [ie/Instagram] Support `/reels/` URLs (#9539) Closes #6689 Authored by: amir16yp --- yt_dlp/extractor/instagram.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py index f7f21505e..46f9cd681 100644 --- a/yt_dlp/extractor/instagram.py +++ b/yt_dlp/extractor/instagram.py @@ -255,7 +255,7 @@ class InstagramIOSIE(InfoExtractor): class InstagramIE(InstagramBaseIE): - _VALID_URL = r'(?Phttps?://(?:www\.)?instagram\.com(?:/[^/]+)?/(?:p|tv|reel)/(?P[^/?#&]+))' + _VALID_URL = r'(?Phttps?://(?:www\.)?instagram\.com(?:/[^/]+)?/(?:p|tv|reels?(?!/audio/))/(?P[^/?#&]+))' _EMBED_REGEX = [r']+src=(["\'])(?P(?:https?:)?//(?:www\.)?instagram\.com/p/[^/]+/embed.*?)\1'] _TESTS = [{ 'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc', @@ -379,6 +379,9 @@ class InstagramIE(InstagramBaseIE): }, { 'url': 'https://www.instagram.com/marvelskies.fc/reel/CWqAgUZgCku/', 'only_matching': True, + }, { + 'url': 'https://www.instagram.com/reels/Cop84x6u7CP/', + 'only_matching': True, }] @classmethod From 65e709d23530959075816e966c42179ad46e8e3b Mon Sep 17 00:00:00 2001 From: HobbyistDev <105957301+HobbyistDev@users.noreply.github.com> Date: Thu, 23 May 2024 12:09:21 +0800 Subject: [PATCH 13/48] [ie/GodResource] Add extractor (#9629) Closes #9551 Authored by: HobbyistDev --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/godresource.py | 79 +++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 yt_dlp/extractor/godresource.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index cf408b682..91a876b22 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -715,6 +715,7 @@ from .globo import ( from .gmanetwork import GMANetworkVideoIE from .go import GoIE from .godtube import GodTubeIE +from .godresource import GodResourceIE from .gofile import GofileIE from .golem import GolemIE from .goodgame import GoodGameIE diff --git a/yt_dlp/extractor/godresource.py b/yt_dlp/extractor/godresource.py new file mode 100644 index 000000000..f010fff36 --- /dev/null +++ b/yt_dlp/extractor/godresource.py @@ -0,0 +1,79 @@ +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + determine_ext, + str_or_none, + unified_timestamp, + url_or_none +) +from ..utils.traversal import traverse_obj + + +class GodResourceIE(InfoExtractor): + _VALID_URL = r'https?://new\.godresource\.com/video/(?P\w+)' + _TESTS = [{ + # hls stream + 'url': 'https://new.godresource.com/video/A01mTKjyf6w', + 'info_dict': { + 'id': 'A01mTKjyf6w', + 'ext': 'mp4', + 'view_count': int, + 'timestamp': 1710978666, + 'channel_id': '5', + 'thumbnail': 'https://cdn-02.godresource.com/e42968ac-9e8b-4231-ab86-f4f9d775841f/thumbnail.jpg', + 'channel': 'Stedfast Baptist Church', + 'upload_date': '20240320', + 'title': 'GodResource video #A01mTKjyf6w', + } + }, { + # mp4 link + 'url': 'https://new.godresource.com/video/01DXmBbQv_X', + 'md5': '0e8f72aa89a106b9d5c011ba6f8717b7', + 'info_dict': { + 'id': '01DXmBbQv_X', + 'ext': 'mp4', + 'channel_id': '12', + 'view_count': int, + 'timestamp': 1687996800, + 'thumbnail': 'https://cdn-02.godresource.com/sodomitedeception/thumbnail.jpg', + 'channel': 'Documentaries', + 'title': 'The Sodomite Deception', + 'upload_date': '20230629', + } + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + + api_data = self._download_json( + f'https://api.godresource.com/api/Streams/{display_id}', display_id) + + video_url = api_data['streamUrl'] + is_live = api_data.get('isLive') or False + if (ext := determine_ext(video_url)) == 'm3u8': + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + video_url, display_id, live=is_live) + elif ext == 'mp4': + formats, subtitles = [{ + 'url': video_url, + 'ext': ext + }], {} + else: + raise ExtractorError(f'Unexpected video format {ext}') + + return { + 'id': display_id, + 'formats': formats, + 'subtitles': subtitles, + 'title': '', + 'is_live': is_live, + **traverse_obj(api_data, { + 'title': ('title', {str}), + 'thumbnail': ('thumbnail', {url_or_none}), + 'view_count': ('views', {int}), + 'channel': ('channelName', {str}), + 'channel_id': ('channelId', {str_or_none}), + 'timestamp': ('streamDateCreated', {unified_timestamp}), + 'modified_timestamp': ('streamDataModified', {unified_timestamp}) + }) + } From be7db1a5a8c483726c511c30ea4689cbb8b27962 Mon Sep 17 00:00:00 2001 From: six Date: Thu, 23 May 2024 00:13:00 -0400 Subject: [PATCH 14/48] [ie/NTSLive] Add extractor (#9641) Closes #9640 Authored by: lostfictions --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/nts.py | 76 +++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 yt_dlp/extractor/nts.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 91a876b22..9dfa28c4b 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1333,6 +1333,7 @@ from .nrk import ( NRKTVSeriesIE, ) from .nrl import NRLTVIE +from .nts import NTSLiveIE from .ntvcojp import NTVCoJpCUIE from .ntvde import NTVDeIE from .ntvru import NTVRuIE diff --git a/yt_dlp/extractor/nts.py b/yt_dlp/extractor/nts.py new file mode 100644 index 000000000..a801740fa --- /dev/null +++ b/yt_dlp/extractor/nts.py @@ -0,0 +1,76 @@ +from .common import InfoExtractor +from ..utils import parse_iso8601, url_or_none +from ..utils.traversal import traverse_obj + + +class NTSLiveIE(InfoExtractor): + IE_NAME = 'nts.live' + _VALID_URL = r'https?://(?:www\.)?nts\.live/shows/[^/?#]+/episodes/(?P[^/?#]+)' + _TESTS = [ + { + # embedded soundcloud + 'url': 'https://www.nts.live/shows/yu-su/episodes/yu-su-2nd-april-2024', + 'md5': 'b5444c04888c869d68758982de1a27d8', + 'info_dict': { + 'id': '1791563518', + 'ext': 'opus', + 'uploader_id': '995579326', + 'title': 'Pender Street Steppers & YU SU', + 'timestamp': 1712073600, + 'upload_date': '20240402', + 'thumbnail': 'https://i1.sndcdn.com/artworks-qKcNO0z0AQGGbv9s-GljJCw-original.jpg', + 'license': 'all-rights-reserved', + 'repost_count': int, + 'uploader_url': 'https://soundcloud.com/user-643553014', + 'uploader': 'NTS Latest', + 'description': 'md5:cd00ac535a63caaad722483ae3ff802a', + 'duration': 10784.157, + 'genres': ['Deep House', 'House', 'Leftfield Disco', 'Jazz Fusion', 'Dream Pop'], + 'modified_timestamp': 1712564687, + 'modified_date': '20240408', + }, + }, + { + # embedded mixcloud + 'url': 'https://www.nts.live/shows/absolute-fiction/episodes/absolute-fiction-23rd-july-2022', + 'info_dict': { + 'id': 'NTSRadio_absolute-fiction-23rd-july-2022', + 'ext': 'webm', + 'like_count': int, + 'title': 'Absolute Fiction', + 'comment_count': int, + 'uploader_url': 'https://www.mixcloud.com/NTSRadio/', + 'description': 'md5:ba49da971ae8d71ee45813c52c5e2a04', + 'tags': [], + 'duration': 3529, + 'timestamp': 1658588400, + 'repost_count': int, + 'upload_date': '20220723', + 'uploader_id': 'NTSRadio', + 'thumbnail': 'https://thumbnailer.mixcloud.com/unsafe/1024x1024/extaudio/5/1/a/d/ae3e-1be9-4fd4-983e-9c3294226eac', + 'uploader': 'Mixcloud NTS Radio', + 'genres': ['Minimal Synth', 'Post Punk', 'Industrial '], + 'modified_timestamp': 1658842165, + 'modified_date': '20220726', + }, + 'params': {'skip_download': 'm3u8'}, + }, + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + data = self._search_json(r'window\._REACT_STATE_\s*=', webpage, 'react state', video_id) + + return { + '_type': 'url_transparent', + **traverse_obj(data, ('episode', { + 'url': ('audio_sources', ..., 'url', {url_or_none}, any), + 'title': ('name', {str}), + 'description': ('description', {str}), + 'genres': ('genres', ..., 'value', {str}), + 'timestamp': ('broadcast', {parse_iso8601}), + 'modified_timestamp': ('updated', {parse_iso8601}), + })), + } From 0dd53faeca2ba0ce138e4092d07b5f2dbf2422f9 Mon Sep 17 00:00:00 2001 From: TuxCoder Date: Thu, 23 May 2024 06:25:16 +0200 Subject: [PATCH 15/48] [ie/orf:on] Improve extraction (#9677) Closes #9652 Authored by: TuxCoder --- yt_dlp/extractor/orf.py | 42 ++++++++++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/yt_dlp/extractor/orf.py b/yt_dlp/extractor/orf.py index 526e9acaf..13561202c 100644 --- a/yt_dlp/extractor/orf.py +++ b/yt_dlp/extractor/orf.py @@ -14,6 +14,7 @@ from ..utils import ( make_archive_id, mimetype2ext, orderedSet, + parse_age_limit, remove_end, smuggle_url, strip_jsonp, @@ -569,7 +570,7 @@ class ORFFM4StoryIE(InfoExtractor): class ORFONIE(InfoExtractor): IE_NAME = 'orf:on' - _VALID_URL = r'https?://on\.orf\.at/video/(?P\d{8})/(?P[\w-]+)' + _VALID_URL = r'https?://on\.orf\.at/video/(?P\d+)' _TESTS = [{ 'url': 'https://on.orf.at/video/14210000/school-of-champions-48', 'info_dict': { @@ -583,32 +584,55 @@ class ORFONIE(InfoExtractor): 'timestamp': 1706472362, 'upload_date': '20240128', } + }, { + 'url': 'https://on.orf.at/video/3220355', + 'md5': 'f94d98e667cf9a3851317efb4e136662', + 'info_dict': { + 'id': '3220355', + 'ext': 'mp4', + 'duration': 445.04, + 'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0002/60/thumb_159573_segments_highlight_teaser.png', + 'title': '50 Jahre Burgenland: Der Festumzug', + 'description': 'md5:1560bf855119544ee8c4fa5376a2a6b0', + 'media_type': 'episode', + 'timestamp': 52916400, + 'upload_date': '19710905', + } }] - def _extract_video(self, video_id, display_id): + def _extract_video(self, video_id): encrypted_id = base64.b64encode(f'3dSlfek03nsLKdj4Jsd{video_id}'.encode()).decode() api_json = self._download_json( - f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', display_id) + f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', video_id) + + if traverse_obj(api_json, 'is_drm_protected'): + self.report_drm(video_id) formats, subtitles = [], {} for manifest_type in traverse_obj(api_json, ('sources', {dict.keys}, ...)): for manifest_url in traverse_obj(api_json, ('sources', manifest_type, ..., 'src', {url_or_none})): if manifest_type == 'hls': fmts, subs = self._extract_m3u8_formats_and_subtitles( - manifest_url, display_id, fatal=False, m3u8_id='hls') + manifest_url, video_id, fatal=False, m3u8_id='hls') elif manifest_type == 'dash': fmts, subs = self._extract_mpd_formats_and_subtitles( - manifest_url, display_id, fatal=False, mpd_id='dash') + manifest_url, video_id, fatal=False, mpd_id='dash') else: continue formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) + for sub_url in traverse_obj(api_json, ( + '_embedded', 'subtitle', + ('xml_url', 'sami_url', 'stl_url', 'ttml_url', 'srt_url', 'vtt_url'), {url_or_none})): + self._merge_subtitles({'de': [{'url': sub_url}]}, target=subtitles) + return { 'id': video_id, 'formats': formats, 'subtitles': subtitles, **traverse_obj(api_json, { + 'age_limit': ('age_classification', {parse_age_limit}), 'duration': ('duration_second', {float_or_none}), 'title': (('title', 'headline'), {str}), 'description': (('description', 'teaser_text'), {str}), @@ -617,14 +641,14 @@ class ORFONIE(InfoExtractor): } def _real_extract(self, url): - video_id, display_id = self._match_valid_url(url).group('id', 'slug') - webpage = self._download_webpage(url, display_id) + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) return { 'id': video_id, 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None), 'description': self._html_search_meta( ['description', 'og:description', 'twitter:description'], webpage, default=None), - **self._search_json_ld(webpage, display_id, fatal=False), - **self._extract_video(video_id, display_id), + **self._search_json_ld(webpage, video_id, fatal=False), + **self._extract_video(video_id), } From 5bbfdb7c999b22f1aeca0c3489c167d6eb73013b Mon Sep 17 00:00:00 2001 From: BohwaZ Date: Thu, 23 May 2024 06:30:21 +0200 Subject: [PATCH 16/48] [ie/HearThisAt] Improve `_VALID_URL` (#9949) Closes #9755 Authored by: bohwaz, seproDev Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com> --- yt_dlp/extractor/hearthisat.py | 44 ++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/hearthisat.py b/yt_dlp/extractor/hearthisat.py index d1a400d8c..c7da8f97d 100644 --- a/yt_dlp/extractor/hearthisat.py +++ b/yt_dlp/extractor/hearthisat.py @@ -7,13 +7,14 @@ from ..utils import ( class HearThisAtIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?hearthis\.at/(?P[^/]+)/(?P[A-Za-z0-9\-]+)/?$' + _VALID_URL = r'https?://(?:www\.)?hearthis\.at/(?P<artist>[^/?#]+)/(?P<title>[\w.-]+)' _PLAYLIST_URL = 'https://hearthis.at/playlist.php' _TESTS = [{ 'url': 'https://hearthis.at/moofi/dr-kreep', 'md5': 'ab6ec33c8fed6556029337c7885eb4e0', 'info_dict': { 'id': '150939', + 'display_id': 'moofi - dr-kreep', 'ext': 'wav', 'title': 'Moofi - Dr. Kreep', 'thumbnail': r're:^https?://.*\.jpg$', @@ -21,15 +22,16 @@ class HearThisAtIE(InfoExtractor): 'description': 'md5:1adb0667b01499f9d27e97ddfd53852a', 'upload_date': '20150118', 'view_count': int, - 'duration': 71, - 'genre': 'Experimental', - } + 'duration': 70, + 'genres': ['Experimental'], + }, }, { # 'download' link redirects to the original webpage 'url': 'https://hearthis.at/twitchsf/dj-jim-hopkins-totally-bitchin-80s-dance-mix/', 'md5': '5980ceb7c461605d30f1f039df160c6e', 'info_dict': { 'id': '811296', + 'display_id': 'twitchsf - dj-jim-hopkins-totally-bitchin-80s-dance-mix', 'ext': 'mp3', 'title': 'TwitchSF - DJ Jim Hopkins - Totally Bitchin\' 80\'s Dance Mix!', 'description': 'md5:ef26815ca8f483272a87b137ff175be2', @@ -38,7 +40,39 @@ class HearThisAtIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', 'view_count': int, 'duration': 4360, - 'genre': 'Dance', + 'genres': ['Dance'], + }, + }, { + 'url': 'https://hearthis.at/tindalos/0001-tindalos-gnrique/eQd/', + 'md5': 'cd08e51911f147f6da2d9678905b0bd9', + 'info_dict': { + 'id': '2685222', + 'ext': 'mp3', + 'duration': 86, + 'view_count': int, + 'timestamp': 1545471670, + 'display_id': 'tindalos - 0001-tindalos-gnrique', + 'thumbnail': r're:^https?://.*\.jpg$', + 'genres': ['Other'], + 'title': 'Tindalos - Tindalos - générique n°1', + 'description': '', + 'upload_date': '20181222', + }, + }, { + 'url': 'https://hearthis.at/sithi2/biochip-c-classics-set-wolle-xdp-tresor.core-special-tresor-globus-berlin-13.07.20011/', + 'md5': 'b45ac60f0c8111eef6ddc10ec232e312', + 'info_dict': { + 'id': '7145959', + 'ext': 'mp3', + 'description': 'md5:d7ae36a453d78903f6b7ed6eb2fce1f2', + 'duration': 8986, + 'thumbnail': r're:^https?://.*\.jpg$', + 'title': 'md5:62669ce5b1b67f45c6f846033f37d3b9', + 'timestamp': 1588699409, + 'display_id': 'sithi2 - biochip-c-classics-set-wolle-xdp-tresor.core-special-tresor-globus-berlin-13.07.20011', + 'view_count': int, + 'upload_date': '20200505', + 'genres': ['Other'], }, }] From eead3bbc01f6529862bdad1f0b2adeabda4f006e Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Thu, 23 May 2024 16:25:16 +0000 Subject: [PATCH 17/48] [ie/brilliantpala] Fix login (#9788) Closes #9771 Authored by: pzhlkj6612 --- yt_dlp/extractor/brilliantpala.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/brilliantpala.py b/yt_dlp/extractor/brilliantpala.py index 0bf8622c1..950a70a5e 100644 --- a/yt_dlp/extractor/brilliantpala.py +++ b/yt_dlp/extractor/brilliantpala.py @@ -27,8 +27,17 @@ class BrilliantpalaBaseIE(InfoExtractor): r'"username"\s*:\s*"(?P<username>[^"]+)"', webpage, 'logged-in username') def _perform_login(self, username, password): - login_form = self._hidden_inputs(self._download_webpage( - self._LOGIN_API, None, 'Downloading login page')) + login_page, urlh = self._download_webpage_handle( + self._LOGIN_API, None, 'Downloading login page', expected_status=401) + if urlh.status != 401 and not urlh.url.startswith(self._LOGIN_API): + self.write_debug('Cookies are valid, no login required.') + return + + if urlh.status == 401: + self.write_debug('Got HTTP Error 401; cookies have been invalidated') + login_page = self._download_webpage(self._LOGIN_API, None, 'Re-downloading login page') + + login_form = self._hidden_inputs(login_page) login_form.update({ 'username': username, 'password': password, From 82f4f4444e26daf35b7302c406fe2312f78f619e Mon Sep 17 00:00:00 2001 From: kclauhk <78251477+kclauhk@users.noreply.github.com> Date: Fri, 24 May 2024 00:26:24 +0800 Subject: [PATCH 18/48] [ie/reddit] Fix subtitles extraction (#10006) Authored by: kclauhk --- yt_dlp/extractor/reddit.py | 61 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 59 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/reddit.py b/yt_dlp/extractor/reddit.py index 62f669f35..44c0353da 100644 --- a/yt_dlp/extractor/reddit.py +++ b/yt_dlp/extractor/reddit.py @@ -5,9 +5,11 @@ from ..utils import ( ExtractorError, float_or_none, int_or_none, + parse_qs, traverse_obj, try_get, unescapeHTML, + update_url_query, urlencode_postdata, url_or_none, ) @@ -76,7 +78,7 @@ class RedditIE(InfoExtractor): 'like_count': int, 'dislike_count': int, 'comment_count': int, - 'age_limit': 0, + 'age_limit': 18, 'channel_id': 'u_creepyt0es', }, 'params': { @@ -150,6 +152,51 @@ class RedditIE(InfoExtractor): 'like_count': int, }, 'skip': 'Requires account that has opted-in to the GenZedong subreddit', + }, { + # subtitles in HLS manifest + 'url': 'https://www.reddit.com/r/Unexpected/comments/1cl9h0u/the_insurance_claim_will_be_interesting/', + 'info_dict': { + 'id': 'a2mdj5d57qyc1', + 'ext': 'mp4', + 'display_id': '1cl9h0u', + 'title': 'The insurance claim will be interesting', + 'uploader': 'darrenpauli', + 'channel_id': 'Unexpected', + 'duration': 53, + 'upload_date': '20240506', + 'timestamp': 1714966382, + 'age_limit': 0, + 'comment_count': int, + 'dislike_count': int, + 'like_count': int, + 'subtitles': {'en': 'mincount:1'}, + }, + 'params': { + 'skip_download': True, + }, + }, { + # subtitles from caption-url + 'url': 'https://www.reddit.com/r/soccer/comments/1cxwzso/tottenham_1_0_newcastle_united_james_maddison_31/', + 'info_dict': { + 'id': 'xbmj4t3igy1d1', + 'ext': 'mp4', + 'display_id': '1cxwzso', + 'title': 'Tottenham [1] - 0 Newcastle United - James Maddison 31\'', + 'uploader': 'Woodstovia', + 'channel_id': 'soccer', + 'duration': 30, + 'upload_date': '20240522', + 'timestamp': 1716373798, + 'age_limit': 0, + 'comment_count': int, + 'dislike_count': int, + 'like_count': int, + 'subtitles': {'en': 'mincount:1'}, + }, + 'params': { + 'skip_download': True, + 'writesubtitles': True, + }, }, { 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj', 'only_matching': True, @@ -197,6 +244,12 @@ class RedditIE(InfoExtractor): elif not traverse_obj(login, ('json', 'data', 'cookie', {str})): raise ExtractorError('Unable to login, no cookie was returned') + def _get_subtitles(self, video_id): + # Fallback if there were no subtitles provided by DASH or HLS manifests + caption_url = f'https://v.redd.it/{video_id}/wh_ben_en.vtt' + if self._is_valid_url(caption_url, video_id, item='subtitles'): + return {'en': [{'url': caption_url}]} + def _real_extract(self, url): host, slug, video_id = self._match_valid_url(url).group('host', 'slug', 'id') @@ -307,6 +360,10 @@ class RedditIE(InfoExtractor): dash_playlist_url = playlist_urls[0] or f'https://v.redd.it/{video_id}/DASHPlaylist.mpd' hls_playlist_url = playlist_urls[1] or f'https://v.redd.it/{video_id}/HLSPlaylist.m3u8' + qs = traverse_obj(parse_qs(hls_playlist_url), { + 'f': ('f', 0, {lambda x: ','.join([x, 'subsAll']) if x else 'hd,subsAll'}), + }) + hls_playlist_url = update_url_query(hls_playlist_url, qs) formats = [{ 'url': unescapeHTML(reddit_video['fallback_url']), @@ -332,7 +389,7 @@ class RedditIE(InfoExtractor): 'id': video_id, 'display_id': display_id, 'formats': formats, - 'subtitles': subtitles, + 'subtitles': subtitles or self.extract_subtitles(video_id), 'duration': int_or_none(reddit_video.get('duration')), } From 63b569bc5e7d461753637a20ad84a575adee4c0a Mon Sep 17 00:00:00 2001 From: c-basalt <117849907+c-basalt@users.noreply.github.com> Date: Thu, 23 May 2024 14:15:56 -0400 Subject: [PATCH 19/48] [ie/taptap] Add extractors (#9776) Closes #9643 Authored by: c-basalt --- yt_dlp/extractor/_extractors.py | 6 + yt_dlp/extractor/taptap.py | 275 ++++++++++++++++++++++++++++++++ 2 files changed, 281 insertions(+) create mode 100644 yt_dlp/extractor/taptap.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 9dfa28c4b..dcdd24ce5 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1905,6 +1905,12 @@ from .syvdk import SYVDKIE from .syfy import SyfyIE from .sztvhu import SztvHuIE from .tagesschau import TagesschauIE +from .taptap import ( + TapTapMomentIE, + TapTapAppIE, + TapTapAppIntlIE, + TapTapPostIntlIE, +) from .tass import TassIE from .tbs import TBSIE from .tbsjp import ( diff --git a/yt_dlp/extractor/taptap.py b/yt_dlp/extractor/taptap.py new file mode 100644 index 000000000..56f2f0ef4 --- /dev/null +++ b/yt_dlp/extractor/taptap.py @@ -0,0 +1,275 @@ +import re +import uuid + +from .common import InfoExtractor +from ..utils import ( + clean_html, + int_or_none, + join_nonempty, + str_or_none, + url_or_none, +) +from ..utils.traversal import traverse_obj + + +class TapTapBaseIE(InfoExtractor): + _X_UA = 'V=1&PN=WebApp&LANG=zh_CN&VN_CODE=102&LOC=CN&PLT=PC&DS=Android&UID={uuid}&OS=Windows&OSV=10&DT=PC' + _VIDEO_API = 'https://www.taptap.cn/webapiv2/video-resource/v1/multi-get' + _INFO_API = None + _INFO_QUERY_KEY = 'id' + _DATA_PATH = None + _ID_PATH = None + _META_PATH = None + + def _get_api(self, url, video_id, query, **kwargs): + query = {**query, 'X-UA': self._X_UA.format(uuid=uuid.uuid4())} + return self._download_json(url, video_id, query=query, **kwargs)['data'] + + def _extract_video(self, video_id): + video_data = self._get_api(self._VIDEO_API, video_id, query={'video_ids': video_id})['list'][0] + + # h265 playlist contains both h265 and h264 formats + video_url = traverse_obj(video_data, ('play_url', ('url_h265', 'url'), {url_or_none}, any)) + formats = self._extract_m3u8_formats(video_url, video_id, fatal=False) + for format in formats: + if re.search(r'^(hev|hvc|hvt)\d', format.get('vcodec', '')): + format['format_id'] = join_nonempty(format.get('format_id'), 'h265', delim='_') + + return { + 'id': str(video_id), + 'formats': formats, + **traverse_obj(video_data, ({ + 'duration': ('info', 'duration', {int_or_none}), + 'thumbnail': ('thumbnail', ('original_url', 'url'), {url_or_none}), + }), get_all=False) + } + + def _real_extract(self, url): + video_id = self._match_id(url) + query = {self._INFO_QUERY_KEY: video_id} + + data = traverse_obj( + self._get_api(self._INFO_API, video_id, query=query), self._DATA_PATH) + + metainfo = traverse_obj(data, self._META_PATH) + entries = [{ + **metainfo, + **self._extract_video(id) + } for id in set(traverse_obj(data, self._ID_PATH))] + + return self.playlist_result(entries, **metainfo, id=video_id) + + +class TapTapMomentIE(TapTapBaseIE): + _VALID_URL = r'https?://www\.taptap\.cn/moment/(?P<id>\d+)' + _INFO_API = 'https://www.taptap.cn/webapiv2/moment/v3/detail' + _ID_PATH = ('moment', 'topic', (('videos', ...), 'pin_video'), 'video_id') + _META_PATH = ('moment', { + 'timestamp': ('created_time', {int_or_none}), + 'modified_timestamp': ('edited_time', {int_or_none}), + 'uploader': ('author', 'user', 'name', {str}), + 'uploader_id': ('author', 'user', 'id', {int}, {str_or_none}), + 'title': ('topic', 'title', {str}), + 'description': ('topic', 'summary', {str}), + }) + _TESTS = [{ + 'url': 'https://www.taptap.cn/moment/194618230982052443', + 'info_dict': { + 'id': '194618230982052443', + 'title': '《崩坏3》开放世界「后崩坏书」新篇章 于淹没之地仰视辰星', + 'description': 'md5:cf66f7819d413641b8b28c8543f4ecda', + 'timestamp': 1633453402, + 'upload_date': '20211005', + 'modified_timestamp': 1633453402, + 'modified_date': '20211005', + 'uploader': '乌酱', + 'uploader_id': '532896', + }, + 'playlist_count': 1, + 'playlist': [{ + 'info_dict': { + 'id': '2202584', + 'ext': 'mp4', + 'title': '《崩坏3》开放世界「后崩坏书」新篇章 于淹没之地仰视辰星', + 'description': 'md5:cf66f7819d413641b8b28c8543f4ecda', + 'duration': 66, + 'timestamp': 1633453402, + 'upload_date': '20211005', + 'modified_timestamp': 1633453402, + 'modified_date': '20211005', + 'uploader': '乌酱', + 'uploader_id': '532896', + 'thumbnail': r're:^https?://.*\.(png|jpg)', + } + }], + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.taptap.cn/moment/521630629209573493', + 'info_dict': { + 'id': '521630629209573493', + 'title': '《崩坏:星穹铁道》黄泉角色PV——「你的颜色」', + 'description': 'md5:2c81245da864428c904d53ae4ad2182b', + 'timestamp': 1711425600, + 'upload_date': '20240326', + 'modified_timestamp': 1711425600, + 'modified_date': '20240326', + 'uploader': '崩坏:星穹铁道', + 'uploader_id': '414732580', + }, + 'playlist_count': 1, + 'playlist': [{ + 'info_dict': { + 'id': '4006511', + 'ext': 'mp4', + 'title': '《崩坏:星穹铁道》黄泉角色PV——「你的颜色」', + 'description': 'md5:2c81245da864428c904d53ae4ad2182b', + 'duration': 173, + 'timestamp': 1711425600, + 'upload_date': '20240326', + 'modified_timestamp': 1711425600, + 'modified_date': '20240326', + 'uploader': '崩坏:星穹铁道', + 'uploader_id': '414732580', + 'thumbnail': r're:^https?://.*\.(png|jpg)', + } + }], + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.taptap.cn/moment/540493587511511299', + 'playlist_count': 2, + 'info_dict': { + 'id': '540493587511511299', + 'title': '中式民俗解谜《纸嫁衣7》、新系列《纸不语》公布!', + 'description': 'md5:d60842350e686ddb242291ddfb8e39c9', + 'timestamp': 1715920200, + 'upload_date': '20240517', + 'modified_timestamp': 1715942225, + 'modified_date': '20240517', + 'uploader': 'TapTap 编辑', + 'uploader_id': '7159244', + }, + 'params': {'skip_download': 'm3u8'}, + }] + + +class TapTapAppIE(TapTapBaseIE): + _VALID_URL = r'https?://www\.taptap\.cn/app/(?P<id>\d+)' + _INFO_API = 'https://www.taptap.cn/webapiv2/app/v4/detail' + _ID_PATH = (('app_videos', 'videos'), ..., 'video_id') + _META_PATH = { + 'title': ('title', {str}), + 'description': ('description', 'text', {str}, {clean_html}), + } + _TESTS = [{ + 'url': 'https://www.taptap.cn/app/168332', + 'info_dict': { + 'id': '168332', + 'title': '原神', + 'description': 'md5:e345f39a5fea5de2a46923f70d5f76ab', + }, + 'playlist_count': 2, + 'playlist': [{ + 'info_dict': { + 'id': '4058443', + 'ext': 'mp4', + 'title': '原神', + 'description': 'md5:e345f39a5fea5de2a46923f70d5f76ab', + 'duration': 26, + 'thumbnail': r're:^https?://.*\.(png|jpg)', + } + }, { + 'info_dict': { + 'id': '4058462', + 'ext': 'mp4', + 'title': '原神', + 'description': 'md5:e345f39a5fea5de2a46923f70d5f76ab', + 'duration': 295, + 'thumbnail': r're:^https?://.*\.(png|jpg)', + } + }], + 'params': {'skip_download': 'm3u8'}, + }] + + +class TapTapIntlBase(TapTapBaseIE): + _X_UA = 'V=1&PN=WebAppIntl2&LANG=zh_TW&VN_CODE=115&VN=0.1.0&LOC=CN&PLT=PC&DS=Android&UID={uuid}&CURR=&DT=PC&OS=Windows&OSV=NT%208.0.0' + _VIDEO_API = 'https://www.taptap.io/webapiv2/video-resource/v1/multi-get' + + +class TapTapAppIntlIE(TapTapIntlBase): + _VALID_URL = r'https?://www\.taptap\.io/app/(?P<id>\d+)' + _INFO_API = 'https://www.taptap.io/webapiv2/i/app/v5/detail' + _DATA_PATH = 'app' + _ID_PATH = (('app_videos', 'videos'), ..., 'video_id') + _META_PATH = { + 'title': ('title', {str}), + 'description': ('description', 'text', {str}, {clean_html}), + } + _TESTS = [{ + 'url': 'https://www.taptap.io/app/233287', + 'info_dict': { + 'id': '233287', + 'title': '《虹彩六號 M》', + 'description': 'md5:418285f9c15347fc3cf3e3a3c649f182', + }, + 'playlist_count': 1, + 'playlist': [{ + 'info_dict': { + 'id': '2149708997', + 'ext': 'mp4', + 'title': '《虹彩六號 M》', + 'description': 'md5:418285f9c15347fc3cf3e3a3c649f182', + 'duration': 78, + 'thumbnail': r're:^https?://.*\.(png|jpg)', + } + }], + 'params': {'skip_download': 'm3u8'}, + }] + + +class TapTapPostIntlIE(TapTapIntlBase): + _VALID_URL = r'https?://www\.taptap\.io/post/(?P<id>\d+)' + _INFO_API = 'https://www.taptap.io/webapiv2/creation/post/v1/detail' + _INFO_QUERY_KEY = 'id_str' + _DATA_PATH = 'post' + _ID_PATH = ((('videos', ...), 'pin_video'), 'video_id') + _META_PATH = { + 'timestamp': ('published_time', {int_or_none}), + 'modified_timestamp': ('edited_time', {int_or_none}), + 'uploader': ('user', 'name', {str}), + 'uploader_id': ('user', 'id', {int}, {str_or_none}), + 'title': ('title', {str}), + 'description': ('list_fields', 'summary', {str}), + } + _TESTS = [{ + 'url': 'https://www.taptap.io/post/571785', + 'info_dict': { + 'id': '571785', + 'title': 'Arknights x Rainbow Six Siege | Event PV', + 'description': 'md5:f7717c13f6d3108e22db7303e6690bf7', + 'timestamp': 1614664951, + 'upload_date': '20210302', + 'modified_timestamp': 1614664951, + 'modified_date': '20210302', + 'uploader': 'TapTap Editor', + 'uploader_id': '80224473', + }, + 'playlist_count': 1, + 'playlist': [{ + 'info_dict': { + 'id': '2149491903', + 'ext': 'mp4', + 'title': 'Arknights x Rainbow Six Siege | Event PV', + 'description': 'md5:f7717c13f6d3108e22db7303e6690bf7', + 'duration': 122, + 'timestamp': 1614664951, + 'upload_date': '20210302', + 'modified_timestamp': 1614664951, + 'modified_date': '20210302', + 'uploader': 'TapTap Editor', + 'uploader_id': '80224473', + 'thumbnail': r're:^https?://.*\.(png|jpg)', + } + }], + 'params': {'skip_download': 'm3u8'}, + }] From 3779f2a307ba3ef1d28e107cdd71b221dfb4eb36 Mon Sep 17 00:00:00 2001 From: sepro <4618135+seproDev@users.noreply.github.com> Date: Thu, 23 May 2024 22:18:20 +0200 Subject: [PATCH 20/48] [ie/ORFTVthek] Remove extractor (#10011) Authored by: seproDev --- yt_dlp/extractor/_extractors.py | 1 - yt_dlp/extractor/orf.py | 183 +------------------------------- 2 files changed, 3 insertions(+), 181 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index dcdd24ce5..6f0656e0c 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1387,7 +1387,6 @@ from .openrec import ( ) from .ora import OraTVIE from .orf import ( - ORFTVthekIE, ORFFM4StoryIE, ORFONIE, ORFRadioIE, diff --git a/yt_dlp/extractor/orf.py b/yt_dlp/extractor/orf.py index 13561202c..3c837becd 100644 --- a/yt_dlp/extractor/orf.py +++ b/yt_dlp/extractor/orf.py @@ -3,204 +3,24 @@ import functools import re from .common import InfoExtractor -from ..networking import HEADRequest from ..utils import ( - InAdvancePagedList, clean_html, determine_ext, float_or_none, int_or_none, - join_nonempty, make_archive_id, mimetype2ext, orderedSet, parse_age_limit, remove_end, - smuggle_url, strip_jsonp, try_call, - unescapeHTML, unified_strdate, - unsmuggle_url, url_or_none, ) from ..utils.traversal import traverse_obj -class ORFTVthekIE(InfoExtractor): - IE_NAME = 'orf:tvthek' - IE_DESC = 'ORF TVthek' - _VALID_URL = r'(?P<url>https?://tvthek\.orf\.at/(?:(?:[^/]+/){2}){1,2}(?P<id>\d+))(/[^/]+/(?P<vid>\d+))?(?:$|[?#])' - - _TESTS = [{ - 'url': 'https://tvthek.orf.at/profile/ZIB-2/1211/ZIB-2/14121079', - 'info_dict': { - 'id': '14121079', - }, - 'playlist_count': 11, - 'params': {'noplaylist': True} - }, { - 'url': 'https://tvthek.orf.at/profile/ZIB-2/1211/ZIB-2/14121079/Umfrage-Welches-Tier-ist-Sebastian-Kurz/15083150', - 'info_dict': { - 'id': '14121079', - }, - 'playlist_count': 1, - 'params': {'playlist_items': '5'} - }, { - 'url': 'https://tvthek.orf.at/profile/ZIB-2/1211/ZIB-2/14121079/Umfrage-Welches-Tier-ist-Sebastian-Kurz/15083150', - 'info_dict': { - 'id': '14121079', - }, - 'playlist': [{ - 'info_dict': { - 'id': '15083150', - 'ext': 'mp4', - 'description': 'md5:7be1c485425f5f255a5e4e4815e77d04', - 'thumbnail': 'https://api-tvthek.orf.at/uploads/media/segments/0130/59/824271ea35cd8931a0fb08ab316a5b0a1562342c.jpeg', - 'title': 'Umfrage: Welches Tier ist Sebastian Kurz?', - } - }], - 'playlist_count': 1, - 'params': {'noplaylist': True, 'skip_download': 'm3u8'} - }, { - 'url': 'http://tvthek.orf.at/program/Aufgetischt/2745173/Aufgetischt-Mit-der-Steirischen-Tafelrunde/8891389', - 'playlist': [{ - 'md5': '2942210346ed779588f428a92db88712', - 'info_dict': { - 'id': '8896777', - 'ext': 'mp4', - 'title': 'Aufgetischt: Mit der Steirischen Tafelrunde', - 'description': 'md5:c1272f0245537812d4e36419c207b67d', - 'duration': 2668, - 'upload_date': '20141208', - }, - }], - 'skip': 'Blocked outside of Austria / Germany', - }, { - 'url': 'http://tvthek.orf.at/topic/Im-Wandel-der-Zeit/8002126/Best-of-Ingrid-Thurnher/7982256', - 'info_dict': { - 'id': '7982259', - 'ext': 'mp4', - 'title': 'Best of Ingrid Thurnher', - 'upload_date': '20140527', - 'description': 'Viele Jahre war Ingrid Thurnher das "Gesicht" der ZIB 2. Vor ihrem Wechsel zur ZIB 2 im Jahr 1995 moderierte sie unter anderem "Land und Leute", "Österreich-Bild" und "Niederösterreich heute".', - }, - 'params': { - 'skip_download': True, # rtsp downloads - }, - 'skip': 'Blocked outside of Austria / Germany', - }, { - 'url': 'http://tvthek.orf.at/topic/Fluechtlingskrise/10463081/Heimat-Fremde-Heimat/13879132/Senioren-betreuen-Migrantenkinder/13879141', - 'only_matching': True, - }, { - 'url': 'http://tvthek.orf.at/profile/Universum/35429', - 'only_matching': True, - }] - - def _pagefunc(self, url, data_jsb, n, *, image=None): - sd = data_jsb[n] - video_id, title = str(sd['id']), sd['title'] - formats = [] - for fd in sd['sources']: - src = url_or_none(fd.get('src')) - if not src: - continue - format_id = join_nonempty('delivery', 'quality', 'quality_string', from_dict=fd) - ext = determine_ext(src) - if ext == 'm3u8': - m3u8_formats = self._extract_m3u8_formats( - src, video_id, 'mp4', m3u8_id=format_id, fatal=False, note=f'Downloading {format_id} m3u8 manifest') - if any('/geoprotection' in f['url'] for f in m3u8_formats): - self.raise_geo_restricted() - formats.extend(m3u8_formats) - elif ext == 'f4m': - formats.extend(self._extract_f4m_formats( - src, video_id, f4m_id=format_id, fatal=False)) - elif ext == 'mpd': - formats.extend(self._extract_mpd_formats( - src, video_id, mpd_id=format_id, fatal=False, note=f'Downloading {format_id} mpd manifest')) - else: - formats.append({ - 'format_id': format_id, - 'url': src, - 'protocol': fd.get('protocol'), - }) - - # Check for geoblocking. - # There is a property is_geoprotection, but that's always false - geo_str = sd.get('geoprotection_string') - http_url = next( - (f['url'] for f in formats if re.match(r'^https?://.*\.mp4$', f['url'])), - None) if geo_str else None - if http_url: - self._request_webpage( - HEADRequest(http_url), video_id, fatal=False, note='Testing for geoblocking', - errnote=f'This video seems to be blocked outside of {geo_str}. You may want to try the streaming-* formats') - - subtitles = {} - for sub in sd.get('subtitles', []): - sub_src = sub.get('src') - if not sub_src: - continue - subtitles.setdefault(sub.get('lang', 'de-AT'), []).append({ - 'url': sub_src, - }) - - upload_date = unified_strdate(sd.get('created_date')) - - thumbnails = [] - preview = sd.get('preview_image_url') - if preview: - thumbnails.append({ - 'id': 'preview', - 'url': preview, - 'preference': 0, - }) - image = sd.get('image_full_url') or image - if image: - thumbnails.append({ - 'id': 'full', - 'url': image, - 'preference': 1, - }) - - yield { - 'id': video_id, - 'title': title, - 'webpage_url': smuggle_url(f'{url}/part/{video_id}', {'force_noplaylist': True}), - 'formats': formats, - 'subtitles': subtitles, - 'description': sd.get('description'), - 'duration': int_or_none(sd.get('duration_in_seconds')), - 'upload_date': upload_date, - 'thumbnails': thumbnails, - } - - def _real_extract(self, url): - url, smuggled_data = unsmuggle_url(url) - playlist_id, video_id, base_url = self._match_valid_url(url).group('id', 'vid', 'url') - webpage = self._download_webpage(url, playlist_id) - - data_jsb = self._parse_json( - self._search_regex( - r'<div[^>]+class=(["\']).*?VideoPlaylist.*?\1[^>]+data-jsb=(["\'])(?P<json>.+?)\2', - webpage, 'playlist', group='json'), - playlist_id, transform_source=unescapeHTML)['playlist']['videos'] - - if not self._yes_playlist(playlist_id, video_id, smuggled_data): - data_jsb = [sd for sd in data_jsb if str(sd.get('id')) == video_id] - - playlist_count = len(data_jsb) - image = self._og_search_thumbnail(webpage) if playlist_count == 1 else None - - page_func = functools.partial(self._pagefunc, base_url, data_jsb, image=image) - return { - '_type': 'playlist', - 'entries': InAdvancePagedList(page_func, playlist_count, 1), - 'id': playlist_id, - } - - class ORFRadioIE(InfoExtractor): IE_NAME = 'orf:radio' @@ -583,6 +403,7 @@ class ORFONIE(InfoExtractor): 'media_type': 'episode', 'timestamp': 1706472362, 'upload_date': '20240128', + '_old_archive_ids': ['orftvthek 14210000'], } }, { 'url': 'https://on.orf.at/video/3220355', @@ -597,6 +418,7 @@ class ORFONIE(InfoExtractor): 'media_type': 'episode', 'timestamp': 52916400, 'upload_date': '19710905', + '_old_archive_ids': ['orftvthek 3220355'], } }] @@ -631,6 +453,7 @@ class ORFONIE(InfoExtractor): 'id': video_id, 'formats': formats, 'subtitles': subtitles, + '_old_archive_ids': [make_archive_id('ORFTVthek', video_id)], **traverse_obj(api_json, { 'age_limit': ('age_classification', {parse_age_limit}), 'duration': ('duration_second', {float_or_none}), From 90d2da311bbb5dc06f385ee428c7e4590936e995 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 25 May 2024 10:01:40 -0500 Subject: [PATCH 21/48] [ie/DiscoveryPlus] Fix dmax.de and related extractors (#10020) Closes #7530 Authored by: bashonly --- yt_dlp/extractor/dplay.py | 43 ++++++++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/yt_dlp/extractor/dplay.py b/yt_dlp/extractor/dplay.py index 363b4bec9..1ecc4baf6 100644 --- a/yt_dlp/extractor/dplay.py +++ b/yt_dlp/extractor/dplay.py @@ -355,12 +355,10 @@ class DiscoveryPlusBaseIE(DPlayBaseIE): video_id, headers=headers, data=json.dumps({ 'deviceInfo': { 'adBlocker': False, + 'drmSupported': False, }, 'videoId': video_id, - 'wisteriaProperties': { - 'platform': 'desktop', - 'product': self._PRODUCT, - }, + 'wisteriaProperties': {}, }).encode('utf-8'))['data']['attributes']['streaming'] def _real_extract(self, url): @@ -878,10 +876,31 @@ class DiscoveryPlusIndiaIE(DiscoveryPlusBaseIE): }) -class DiscoveryNetworksDeIE(DPlayBaseIE): +class DiscoveryNetworksDeIE(DiscoveryPlusBaseIE): _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show|sendungen)/(?P<programme>[^/]+)/(?:video/)?(?P<alternate_id>[^/]+)' _TESTS = [{ + 'url': 'https://dmax.de/sendungen/goldrausch-in-australien/german-gold', + 'info_dict': { + 'id': '4756322', + 'ext': 'mp4', + 'title': 'German Gold', + 'description': 'md5:f3073306553a8d9b40e6ac4cdbf09fc6', + 'display_id': 'goldrausch-in-australien/german-gold', + 'episode': 'Episode 1', + 'episode_number': 1, + 'season': 'Season 5', + 'season_number': 5, + 'series': 'Goldrausch in Australien', + 'duration': 2648.0, + 'upload_date': '20230517', + 'timestamp': 1684357500, + 'creators': ['DMAX'], + 'thumbnail': 'https://eu1-prod-images.disco-api.com/2023/05/09/f72fb510-7992-3b12-af7f-f16a2c22d1e3.jpeg', + 'tags': ['schatzsucher', 'schatz', 'nugget', 'bodenschätze', 'down under', 'australien', 'goldrausch'], + }, + 'params': {'skip_download': 'm3u8'}, + }, { 'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100', 'info_dict': { 'id': '78867', @@ -901,9 +920,7 @@ class DiscoveryNetworksDeIE(DPlayBaseIE): 'season_number': 1, 'thumbnail': r're:https://.+\.jpg', }, - 'params': { - 'skip_download': True, - }, + 'skip': '404 Not Found', }, { 'url': 'https://www.dmax.de/programme/dmax-highlights/video/tuning-star-sidney-hoffmann-exklusiv-bei-dmax/191023082312316', 'only_matching': True, @@ -920,8 +937,14 @@ class DiscoveryNetworksDeIE(DPlayBaseIE): country = 'GB' if domain == 'dplay.co.uk' else 'DE' realm = 'questuk' if country == 'GB' else domain.replace('.', '') return self._get_disco_api_info( - url, '%s/%s' % (programme, alternate_id), - 'sonic-eu1-prod.disco-api.com', realm, country) + url, f'{programme}/{alternate_id}', 'eu1-prod.disco-api.com', realm, country) + + def _update_disco_api_headers(self, headers, disco_base, display_id, realm): + headers.update({ + 'x-disco-params': f'realm={realm}', + 'x-disco-client': 'Alps:HyogaPlayer:0.0.0', + 'Authorization': self._get_auth(disco_base, display_id, realm), + }) class DiscoveryPlusShowBaseIE(DPlayBaseIE): From c92e4e625e9e6bbbbf8e3b20c3e7ebe57c16072d Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 25 May 2024 18:00:33 -0500 Subject: [PATCH 22/48] [ie/tele5] Overhaul extractor (#10024) Closes #3051, Closes #7955, Closes #8501, Closes #9792 Authored by: bashonly --- yt_dlp/extractor/tele5.py | 134 +++++++++++++++++--------------------- 1 file changed, 61 insertions(+), 73 deletions(-) diff --git a/yt_dlp/extractor/tele5.py b/yt_dlp/extractor/tele5.py index 72f67e402..a45537541 100644 --- a/yt_dlp/extractor/tele5.py +++ b/yt_dlp/extractor/tele5.py @@ -1,89 +1,77 @@ -from .dplay import DPlayIE -from ..compat import compat_urlparse -from ..utils import ( - ExtractorError, - extract_attributes, -) +import functools + +from .dplay import DiscoveryPlusBaseIE +from ..utils import join_nonempty +from ..utils.traversal import traverse_obj -class Tele5IE(DPlayIE): # XXX: Do not subclass from concrete IE - _WORKING = False - _VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)' - _GEO_COUNTRIES = ['DE'] +class Tele5IE(DiscoveryPlusBaseIE): + _VALID_URL = r'https?://(?:www\.)?tele5\.de/(?P<parent_slug>[\w-]+)/(?P<slug_a>[\w-]+)(?:/(?P<slug_b>[\w-]+))?' _TESTS = [{ - 'url': 'https://www.tele5.de/mediathek/filme-online/videos?vid=1549416', + # slug_a and slug_b + 'url': 'https://tele5.de/mediathek/stargate-atlantis/quarantane', 'info_dict': { - 'id': '1549416', + 'id': '6852024', 'ext': 'mp4', - 'upload_date': '20180814', - 'timestamp': 1534290623, - 'title': 'Pandorum', + 'title': 'Quarantäne', + 'description': 'md5:6af0373bd0fcc4f13e5d47701903d675', + 'episode': 'Episode 73', + 'episode_number': 73, + 'season': 'Season 4', + 'season_number': 4, + 'series': 'Stargate Atlantis', + 'upload_date': '20240525', + 'timestamp': 1716643200, + 'duration': 2503.2, + 'thumbnail': 'https://eu1-prod-images.disco-api.com/2024/05/21/c81fcb45-8902-309b-badb-4e6d546b575d.jpeg', + 'creators': ['Tele5'], + 'tags': [], }, - 'params': { - 'skip_download': True, - }, - 'skip': 'No longer available: "404 Seite nicht gefunden"', }, { - # jwplatform, nexx unavailable - 'url': 'https://www.tele5.de/filme/ghoul-das-geheimnis-des-friedhofmonsters/', + # only slug_a + 'url': 'https://tele5.de/mediathek/inside-out', 'info_dict': { - 'id': 'WJuiOlUp', + 'id': '6819502', 'ext': 'mp4', - 'upload_date': '20200603', - 'timestamp': 1591214400, - 'title': 'Ghoul - Das Geheimnis des Friedhofmonsters', - 'description': 'md5:42002af1d887ff3d5b2b3ca1f8137d97', + 'title': 'Inside out', + 'description': 'md5:7e5f32ed0be5ddbd27713a34b9293bfd', + 'series': 'Inside out', + 'upload_date': '20240523', + 'timestamp': 1716494400, + 'duration': 5343.4, + 'thumbnail': 'https://eu1-prod-images.disco-api.com/2024/05/15/181eba3c-f9f0-3faf-b14d-0097050a3aa4.jpeg', + 'creators': ['Tele5'], + 'tags': [], }, - 'params': { - 'skip_download': True, - }, - 'skip': 'No longer available, redirects to Filme page', }, { - 'url': 'https://tele5.de/mediathek/angel-of-mine/', + # playlist + 'url': 'https://tele5.de/mediathek/schlefaz', 'info_dict': { - 'id': '1252360', - 'ext': 'mp4', - 'upload_date': '20220109', - 'timestamp': 1641762000, - 'title': 'Angel of Mine', - 'description': 'md5:a72546a175e1286eb3251843a52d1ad7', + 'id': 'mediathek-schlefaz', }, - 'params': { - 'format': 'bestvideo', - }, - }, { - 'url': 'https://www.tele5.de/kalkofes-mattscheibe/video-clips/politik-und-gesellschaft?ve_id=1551191', - 'only_matching': True, - }, { - 'url': 'https://www.tele5.de/video-clip/?ve_id=1609440', - 'only_matching': True, - }, { - 'url': 'https://www.tele5.de/filme/schlefaz-dragon-crusaders/', - 'only_matching': True, - }, { - 'url': 'https://www.tele5.de/filme/making-of/avengers-endgame/', - 'only_matching': True, - }, { - 'url': 'https://www.tele5.de/star-trek/raumschiff-voyager/ganze-folge/das-vinculum/', - 'only_matching': True, - }, { - 'url': 'https://www.tele5.de/anders-ist-sevda/', - 'only_matching': True, + 'playlist_mincount': 3, }] def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - player_element = self._search_regex(r'(<hyoga-player\b[^>]+?>)', webpage, 'video player') - player_info = extract_attributes(player_element) - asset_id, country, realm = (player_info[x] for x in ('assetid', 'locale', 'realm', )) - endpoint = compat_urlparse.urlparse(player_info['endpoint']).hostname - source_type = player_info.get('sourcetype') - if source_type: - endpoint = '%s-%s' % (source_type, endpoint) - try: - return self._get_disco_api_info(url, asset_id, endpoint, realm, country) - except ExtractorError as e: - if getattr(e, 'message', '') == 'Missing deviceId in context': - self.report_drm(video_id) - raise + parent_slug, slug_a, slug_b = self._match_valid_url(url).group('parent_slug', 'slug_a', 'slug_b') + playlist_id = join_nonempty(parent_slug, slug_a, slug_b, delim='-') + + query = {'environment': 'tele5', 'v': '2'} + if not slug_b: + endpoint = f'page/{slug_a}' + query['parent_slug'] = parent_slug + else: + endpoint = f'videos/{slug_b}' + query['filter[show.slug]'] = slug_a + cms_data = self._download_json(f'https://de-api.loma-cms.com/feloma/{endpoint}/', playlist_id, query=query) + + return self.playlist_result(map( + functools.partial(self._get_disco_api_info, url, disco_host='eu1-prod.disco-api.com', realm='dmaxde', country='DE'), + traverse_obj(cms_data, ('blocks', ..., 'videoId', {str}))), playlist_id) + + def _update_disco_api_headers(self, headers, disco_base, display_id, realm): + headers.update({ + 'x-disco-params': f'realm={realm}', + 'x-disco-client': 'Alps:HyogaPlayer:0.0.0', + 'Authorization': self._get_auth(disco_base, display_id, realm), + }) From 1463945ae5fb05986a0bd1aa02e41d1a08d93a02 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 25 May 2024 18:03:05 -0500 Subject: [PATCH 23/48] [ie/jiocinema] Add extractors (#10026) Closes #5563, Closes #7759, Closes #8679, Closes #9349 Authored by: bashonly --- README.md | 3 + yt_dlp/extractor/_extractors.py | 8 +- yt_dlp/extractor/jiocinema.py | 403 ++++++++++++++++++++++++++++++++ yt_dlp/extractor/voot.py | 212 ----------------- 4 files changed, 410 insertions(+), 216 deletions(-) create mode 100644 yt_dlp/extractor/jiocinema.py delete mode 100644 yt_dlp/extractor/voot.py diff --git a/README.md b/README.md index 887cfde23..0636d2f6e 100644 --- a/README.md +++ b/README.md @@ -1835,6 +1835,9 @@ The following extractors use this feature: #### nflplusreplay * `type`: Type(s) of game replays to extract. Valid types are: `full_game`, `full_game_spanish`, `condensed_game` and `all_22`. You can use `all` to extract all available replay types, which is the default +#### jiocinema +* `refresh_token`: The `refreshToken` UUID from browser local storage can be passed to extend the life of your login session when logging in with `token` as username and the `accessToken` from browser local storage as password + #### jiosaavn * `bitrate`: Audio bitrates to request. One or more of `16`, `32`, `64`, `128`, `320`. Default is `128,320` diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 6f0656e0c..b807728ee 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -872,6 +872,10 @@ from .japandiet import ( SangiinIE, ) from .jeuxvideo import JeuxVideoIE +from .jiocinema import ( + JioCinemaIE, + JioCinemaSeriesIE, +) from .jiosaavn import ( JioSaavnSongIE, JioSaavnAlbumIE, @@ -2282,10 +2286,6 @@ from .voicy import ( VoicyChannelIE, ) from .volejtv import VolejTVIE -from .voot import ( - VootIE, - VootSeriesIE, -) from .voxmedia import ( VoxMediaVolumeIE, VoxMediaIE, diff --git a/yt_dlp/extractor/jiocinema.py b/yt_dlp/extractor/jiocinema.py new file mode 100644 index 000000000..e7186d75c --- /dev/null +++ b/yt_dlp/extractor/jiocinema.py @@ -0,0 +1,403 @@ +import base64 +import itertools +import json +import random +import re +import string +import time + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + float_or_none, + int_or_none, + jwt_decode_hs256, + parse_age_limit, + try_call, + url_or_none, +) +from ..utils.traversal import traverse_obj + + +class JioCinemaBaseIE(InfoExtractor): + _NETRC_MACHINE = 'jiocinema' + _GEO_BYPASS = False + _ACCESS_TOKEN = None + _REFRESH_TOKEN = None + _GUEST_TOKEN = None + _USER_ID = None + _DEVICE_ID = None + _API_HEADERS = {'Origin': 'https://www.jiocinema.com', 'Referer': 'https://www.jiocinema.com/'} + _APP_NAME = {'appName': 'RJIL_JioCinema'} + _APP_VERSION = {'appVersion': '5.0.0'} + _API_SIGNATURES = 'o668nxgzwff' + _METADATA_API_BASE = 'https://content-jiovoot.voot.com/psapi' + _ACCESS_HINT = 'the `accessToken` from your browser local storage' + _LOGIN_HINT = ( + 'Log in with "-u phone -p <PHONE_NUMBER>" to authenticate with OTP, ' + f'or use "-u token -p <ACCESS_TOKEN>" to log in with {_ACCESS_HINT}. ' + 'If you have previously logged in with yt-dlp and your session ' + 'has been cached, you can use "-u device -p <DEVICE_ID>"') + + def _cache_token(self, token_type): + assert token_type in ('access', 'refresh', 'all') + if token_type in ('access', 'all'): + self.cache.store( + JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-access', JioCinemaBaseIE._ACCESS_TOKEN) + if token_type in ('refresh', 'all'): + self.cache.store( + JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-refresh', JioCinemaBaseIE._REFRESH_TOKEN) + + def _call_api(self, url, video_id, note='Downloading API JSON', headers={}, data={}): + return self._download_json( + url, video_id, note, data=json.dumps(data, separators=(',', ':')).encode(), headers={ + 'Content-Type': 'application/json', + 'Accept': 'application/json', + **self._API_HEADERS, + **headers, + }, expected_status=(400, 403, 474)) + + def _call_auth_api(self, service, endpoint, note, headers={}, data={}): + return self._call_api( + f'https://auth-jiocinema.voot.com/{service}service/apis/v4/{endpoint}', + None, note=note, headers=headers, data=data) + + def _refresh_token(self): + if not JioCinemaBaseIE._REFRESH_TOKEN or not JioCinemaBaseIE._DEVICE_ID: + raise ExtractorError('User token has expired', expected=True) + response = self._call_auth_api( + 'token', 'refreshtoken', 'Refreshing token', + headers={'accesstoken': self._ACCESS_TOKEN}, data={ + **self._APP_NAME, + 'deviceId': self._DEVICE_ID, + 'refreshToken': self._REFRESH_TOKEN, + **self._APP_VERSION, + }) + refresh_token = response.get('refreshTokenId') + if refresh_token and refresh_token != JioCinemaBaseIE._REFRESH_TOKEN: + JioCinemaBaseIE._REFRESH_TOKEN = refresh_token + self._cache_token('refresh') + JioCinemaBaseIE._ACCESS_TOKEN = response['authToken'] + self._cache_token('access') + + def _fetch_guest_token(self): + JioCinemaBaseIE._DEVICE_ID = ''.join(random.choices(string.digits, k=10)) + guest_token = self._call_auth_api( + 'token', 'guest', 'Downloading guest token', data={ + **self._APP_NAME, + 'deviceType': 'phone', + 'os': 'ios', + 'deviceId': self._DEVICE_ID, + 'freshLaunch': False, + 'adId': self._DEVICE_ID, + **self._APP_VERSION, + }) + self._GUEST_TOKEN = guest_token['authToken'] + self._USER_ID = guest_token['userId'] + + def _call_login_api(self, endpoint, guest_token, data, note): + return self._call_auth_api( + 'user', f'loginotp/{endpoint}', note, headers={ + **self.geo_verification_headers(), + 'accesstoken': self._GUEST_TOKEN, + **self._APP_NAME, + **traverse_obj(guest_token, 'data', { + 'deviceType': ('deviceType', {str}), + 'os': ('os', {str}), + })}, data=data) + + def _is_token_expired(self, token): + return (try_call(lambda: jwt_decode_hs256(token)['exp']) or 0) <= int(time.time() - 180) + + def _perform_login(self, username, password): + if self._ACCESS_TOKEN and not self._is_token_expired(self._ACCESS_TOKEN): + return + + UUID_RE = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}' + + if username.lower() == 'token': + if try_call(lambda: jwt_decode_hs256(password)): + JioCinemaBaseIE._ACCESS_TOKEN = password + refresh_hint = 'the `refreshToken` UUID from your browser local storage' + refresh_token = self._configuration_arg('refresh_token', [''], ie_key=JioCinemaIE)[0] + if not refresh_token: + self.to_screen( + 'To extend the life of your login session, in addition to your access token, ' + 'you can pass --extractor-args "jiocinema:refresh_token=REFRESH_TOKEN" ' + f'where REFRESH_TOKEN is {refresh_hint}') + elif re.fullmatch(UUID_RE, refresh_token): + JioCinemaBaseIE._REFRESH_TOKEN = refresh_token + else: + self.report_warning(f'Invalid refresh_token value. Use {refresh_hint}') + else: + raise ExtractorError( + f'The password given could not be decoded as a token; use {self._ACCESS_HINT}', expected=True) + + elif username.lower() == 'device' and re.fullmatch(rf'(?:{UUID_RE}|\d+)', password): + JioCinemaBaseIE._REFRESH_TOKEN = self.cache.load(JioCinemaBaseIE._NETRC_MACHINE, f'{password}-refresh') + JioCinemaBaseIE._ACCESS_TOKEN = self.cache.load(JioCinemaBaseIE._NETRC_MACHINE, f'{password}-access') + if not JioCinemaBaseIE._REFRESH_TOKEN or not JioCinemaBaseIE._ACCESS_TOKEN: + raise ExtractorError(f'Failed to load cached tokens for device ID "{password}"', expected=True) + + elif username.lower() == 'phone' and re.fullmatch(r'\+?\d+', password): + self._fetch_guest_token() + guest_token = jwt_decode_hs256(self._GUEST_TOKEN) + initial_data = { + 'number': base64.b64encode(password.encode()).decode(), + **self._APP_VERSION, + } + response = self._call_login_api('send', guest_token, initial_data, 'Requesting OTP') + if not traverse_obj(response, ('OTPInfo', {dict})): + raise ExtractorError('There was a problem with the phone number login attempt') + + is_iphone = guest_token.get('os') == 'ios' + response = self._call_login_api('verify', guest_token, { + 'deviceInfo': { + 'consumptionDeviceName': 'iPhone' if is_iphone else 'Android', + 'info': { + 'platform': {'name': 'iPhone OS' if is_iphone else 'Android'}, + 'androidId': self._DEVICE_ID, + 'type': 'iOS' if is_iphone else 'Android' + } + }, + **initial_data, + 'otp': self._get_tfa_info('the one-time password sent to your phone') + }, 'Submitting OTP') + if traverse_obj(response, 'code') == 1043: + raise ExtractorError('Wrong OTP', expected=True) + JioCinemaBaseIE._REFRESH_TOKEN = response['refreshToken'] + JioCinemaBaseIE._ACCESS_TOKEN = response['authToken'] + + else: + raise ExtractorError(self._LOGIN_HINT, expected=True) + + user_token = jwt_decode_hs256(JioCinemaBaseIE._ACCESS_TOKEN)['data'] + JioCinemaBaseIE._USER_ID = user_token['userId'] + JioCinemaBaseIE._DEVICE_ID = user_token['deviceId'] + if JioCinemaBaseIE._REFRESH_TOKEN and username != 'device': + self._cache_token('all') + if self.get_param('cachedir') is not False: + self.to_screen( + f'NOTE: For subsequent logins you can use "-u device -p {JioCinemaBaseIE._DEVICE_ID}"') + elif not JioCinemaBaseIE._REFRESH_TOKEN: + JioCinemaBaseIE._REFRESH_TOKEN = self.cache.load( + JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-refresh') + if JioCinemaBaseIE._REFRESH_TOKEN: + self._cache_token('access') + self.to_screen(f'Logging in as device ID "{JioCinemaBaseIE._DEVICE_ID}"') + if self._is_token_expired(JioCinemaBaseIE._ACCESS_TOKEN): + self._refresh_token() + + +class JioCinemaIE(JioCinemaBaseIE): + IE_NAME = 'jiocinema' + _VALID_URL = r'https?://(?:www\.)?jiocinema\.com/?(?:movies?/[^/?#]+/|tv-shows/(?:[^/?#]+/){3})(?P<id>\d{3,})' + _TESTS = [{ + 'url': 'https://www.jiocinema.com/tv-shows/agnisakshi-ek-samjhauta/1/pradeep-to-stop-the-wedding/3759931', + 'info_dict': { + 'id': '3759931', + 'ext': 'mp4', + 'title': 'Pradeep to stop the wedding?', + 'description': 'md5:75f72d1d1a66976633345a3de6d672b1', + 'episode': 'Pradeep to stop the wedding?', + 'episode_number': 89, + 'season': 'Agnisakshi…Ek Samjhauta-S1', + 'season_number': 1, + 'series': 'Agnisakshi Ek Samjhauta', + 'duration': 1238.0, + 'thumbnail': r're:https?://.+\.jpg', + 'age_limit': 13, + 'season_id': '3698031', + 'upload_date': '20230606', + 'timestamp': 1686009600, + 'release_date': '20230607', + 'genres': ['Drama'], + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.jiocinema.com/movies/bhediya/3754021/watch', + 'info_dict': { + 'id': '3754021', + 'ext': 'mp4', + 'title': 'Bhediya', + 'description': 'md5:a6bf2900371ac2fc3f1447401a9f7bb0', + 'episode': 'Bhediya', + 'duration': 8500.0, + 'thumbnail': r're:https?://.+\.jpg', + 'age_limit': 13, + 'upload_date': '20230525', + 'timestamp': 1685026200, + 'release_date': '20230524', + 'genres': ['Comedy'], + }, + 'params': {'skip_download': 'm3u8'}, + }] + + def _extract_formats_and_subtitles(self, playback, video_id): + m3u8_url = traverse_obj(playback, ( + 'data', 'playbackUrls', lambda _, v: v['streamtype'] == 'hls', 'url', {url_or_none}, any)) + if not m3u8_url: # DRM-only content only serves dash urls + self.report_drm(video_id) + formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, m3u8_id='hls') + self._remove_duplicate_formats(formats) + + return { + # '/_definst_/smil:vod/' m3u8 manifests claim to have 720p+ formats but max out at 480p + 'formats': traverse_obj(formats, ( + lambda _, v: '/_definst_/smil:vod/' not in v['url'] or v['height'] <= 480)), + 'subtitles': subtitles, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + if not self._ACCESS_TOKEN and self._is_token_expired(self._GUEST_TOKEN): + self._fetch_guest_token() + elif self._ACCESS_TOKEN and self._is_token_expired(self._ACCESS_TOKEN): + self._refresh_token() + + playback = self._call_api( + f'https://apis-jiovoot.voot.com/playbackjv/v3/{video_id}', video_id, + 'Downloading playback JSON', headers={ + **self.geo_verification_headers(), + 'accesstoken': self._ACCESS_TOKEN or self._GUEST_TOKEN, + **self._APP_NAME, + 'deviceid': self._DEVICE_ID, + 'uniqueid': self._USER_ID, + 'x-apisignatures': self._API_SIGNATURES, + 'x-platform': 'androidweb', + 'x-platform-token': 'web', + }, data={ + '4k': False, + 'ageGroup': '18+', + 'appVersion': '3.4.0', + 'bitrateProfile': 'xhdpi', + 'capability': { + 'drmCapability': { + 'aesSupport': 'yes', + 'fairPlayDrmSupport': 'none', + 'playreadyDrmSupport': 'none', + 'widevineDRMSupport': 'none' + }, + 'frameRateCapability': [{ + 'frameRateSupport': '30fps', + 'videoQuality': '1440p' + }] + }, + 'continueWatchingRequired': False, + 'dolby': False, + 'downloadRequest': False, + 'hevc': False, + 'kidsSafe': False, + 'manufacturer': 'Windows', + 'model': 'Windows', + 'multiAudioRequired': True, + 'osVersion': '10', + 'parentalPinValid': True, + 'x-apisignatures': self._API_SIGNATURES + }) + + status_code = traverse_obj(playback, ('code', {int})) + if status_code == 474: + self.raise_geo_restricted(countries=['IN']) + elif status_code == 1008: + error_msg = 'This content is only available for premium users' + if self._ACCESS_TOKEN: + raise ExtractorError(error_msg, expected=True) + self.raise_login_required(f'{error_msg}. {self._LOGIN_HINT}', method=None) + elif status_code == 400: + raise ExtractorError('The requested content is not available', expected=True) + elif status_code is not None and status_code != 200: + raise ExtractorError( + f'JioCinema says: {traverse_obj(playback, ("message", {str})) or status_code}') + + metadata = self._download_json( + f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/query/asset-details', + video_id, fatal=False, query={ + 'ids': f'include:{video_id}', + 'responseType': 'common', + 'devicePlatformType': 'desktop', + }) + + return { + 'id': video_id, + 'http_headers': self._API_HEADERS, + **self._extract_formats_and_subtitles(playback, video_id), + **traverse_obj(playback, ('data', { + # fallback metadata + 'title': ('name', {str}), + 'description': ('fullSynopsis', {str}), + 'series': ('show', 'name', {str}, {lambda x: x or None}), + 'season': ('tournamentName', {str}, {lambda x: x if x != 'Season 0' else None}), + 'season_number': ('episode', 'season', {int_or_none}, {lambda x: x or None}), + 'episode': ('fullTitle', {str}), + 'episode_number': ('episode', 'episodeNo', {int_or_none}, {lambda x: x or None}), + 'age_limit': ('ageNemonic', {parse_age_limit}), + 'duration': ('totalDuration', {float_or_none}), + 'thumbnail': ('images', {url_or_none}), + })), + **traverse_obj(metadata, ('result', 0, { + 'title': ('fullTitle', {str}), + 'description': ('fullSynopsis', {str}), + 'series': ('showName', {str}, {lambda x: x or None}), + 'season': ('seasonName', {str}, {lambda x: x or None}), + 'season_number': ('season', {int_or_none}), + 'season_id': ('seasonId', {str}, {lambda x: x or None}), + 'episode': ('fullTitle', {str}), + 'episode_number': ('episode', {int_or_none}), + 'timestamp': ('uploadTime', {int_or_none}), + 'release_date': ('telecastDate', {str}), + 'age_limit': ('ageNemonic', {parse_age_limit}), + 'duration': ('duration', {float_or_none}), + 'genres': ('genres', ..., {str}), + 'thumbnail': ('seo', 'ogImage', {url_or_none}), + })), + } + + +class JioCinemaSeriesIE(JioCinemaBaseIE): + IE_NAME = 'jiocinema:series' + _VALID_URL = r'https?://(?:www\.)?jiocinema\.com/tv-shows/(?P<slug>[\w-]+)/(?P<id>\d{3,})' + _TESTS = [{ + 'url': 'https://www.jiocinema.com/tv-shows/naagin/3499917', + 'info_dict': { + 'id': '3499917', + 'title': 'naagin', + }, + 'playlist_mincount': 120, + }] + + def _entries(self, series_id): + seasons = self._download_json( + f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/season-by-show', series_id, + 'Downloading series metadata JSON', query={ + 'sort': 'season:asc', + 'id': series_id, + 'responseType': 'common', + }) + + for season_num, season in enumerate(traverse_obj(seasons, ('result', lambda _, v: v['id'])), 1): + season_id = season['id'] + label = season.get('season') or season_num + for page_num in itertools.count(1): + episodes = traverse_obj(self._download_json( + f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/series-wise-episode', + season_id, f'Downloading season {label} page {page_num} JSON', query={ + 'sort': 'episode:asc', + 'id': season_id, + 'responseType': 'common', + 'page': page_num, + }), ('result', lambda _, v: v['id'] and url_or_none(v['slug']))) + if not episodes: + break + for episode in episodes: + yield self.url_result( + episode['slug'], JioCinemaIE, **traverse_obj(episode, { + 'video_id': 'id', + 'video_title': ('fullTitle', {str}), + 'season_number': ('season', {int_or_none}), + 'episode_number': ('episode', {int_or_none}), + })) + + def _real_extract(self, url): + slug, series_id = self._match_valid_url(url).group('slug', 'id') + return self.playlist_result(self._entries(series_id), series_id, slug) diff --git a/yt_dlp/extractor/voot.py b/yt_dlp/extractor/voot.py deleted file mode 100644 index ef77bedd2..000000000 --- a/yt_dlp/extractor/voot.py +++ /dev/null @@ -1,212 +0,0 @@ -import json -import time -import uuid - -from .common import InfoExtractor -from ..compat import compat_str -from ..networking.exceptions import HTTPError -from ..utils import ( - ExtractorError, - float_or_none, - int_or_none, - jwt_decode_hs256, - parse_age_limit, - traverse_obj, - try_call, - try_get, - unified_strdate, -) - - -class VootBaseIE(InfoExtractor): - _NETRC_MACHINE = 'voot' - _GEO_BYPASS = False - _LOGIN_HINT = 'Log in with "-u <email_address> -p <password>", or use "-u token -p <auth_token>" to login with auth token.' - _TOKEN = None - _EXPIRY = 0 - _API_HEADERS = {'Origin': 'https://www.voot.com', 'Referer': 'https://www.voot.com/'} - - def _perform_login(self, username, password): - if self._TOKEN and self._EXPIRY: - return - - if username.lower() == 'token' and try_call(lambda: jwt_decode_hs256(password)): - VootBaseIE._TOKEN = password - VootBaseIE._EXPIRY = jwt_decode_hs256(password)['exp'] - self.report_login() - - # Mobile number as username is not supported - elif not username.isdigit(): - check_username = self._download_json( - 'https://userauth.voot.com/usersV3/v3/checkUser', None, data=json.dumps({ - 'type': 'email', - 'email': username - }, separators=(',', ':')).encode(), headers={ - **self._API_HEADERS, - 'Content-Type': 'application/json;charset=utf-8', - }, note='Checking username', expected_status=403) - if not traverse_obj(check_username, ('isExist', {bool})): - if traverse_obj(check_username, ('status', 'code', {int})) == 9999: - self.raise_geo_restricted(countries=['IN']) - raise ExtractorError('Incorrect username', expected=True) - auth_token = traverse_obj(self._download_json( - 'https://userauth.voot.com/usersV3/v3/login', None, data=json.dumps({ - 'type': 'traditional', - 'deviceId': str(uuid.uuid4()), - 'deviceBrand': 'PC/MAC', - 'data': { - 'email': username, - 'password': password - } - }, separators=(',', ':')).encode(), headers={ - **self._API_HEADERS, - 'Content-Type': 'application/json;charset=utf-8', - }, note='Logging in', expected_status=400), ('data', 'authToken', {dict})) - if not auth_token: - raise ExtractorError('Incorrect password', expected=True) - VootBaseIE._TOKEN = auth_token['accessToken'] - VootBaseIE._EXPIRY = auth_token['expirationTime'] - - else: - raise ExtractorError(self._LOGIN_HINT, expected=True) - - def _check_token_expiry(self): - if int(time.time()) >= self._EXPIRY: - raise ExtractorError('Access token has expired', expected=True) - - def _real_initialize(self): - if not self._TOKEN: - self.raise_login_required(self._LOGIN_HINT, method=None) - self._check_token_expiry() - - -class VootIE(VootBaseIE): - _WORKING = False - _VALID_URL = r'''(?x) - (?: - voot:| - https?://(?:www\.)?voot\.com/? - (?: - movies?/[^/]+/| - (?:shows|kids)/(?:[^/]+/){4} - ) - ) - (?P<id>\d{3,}) - ''' - _TESTS = [{ - 'url': 'https://www.voot.com/shows/ishq-ka-rang-safed/1/360558/is-this-the-end-of-kamini-/441353', - 'info_dict': { - 'id': '441353', - 'ext': 'mp4', - 'title': 'Is this the end of Kamini?', - 'description': 'md5:06291fbbbc4dcbe21235c40c262507c1', - 'timestamp': 1472103000, - 'upload_date': '20160825', - 'series': 'Ishq Ka Rang Safed', - 'season_number': 1, - 'episode': 'Is this the end of Kamini?', - 'episode_number': 340, - 'release_date': '20160825', - 'season': 'Season 1', - 'age_limit': 13, - 'duration': 1146.0, - }, - 'params': {'skip_download': 'm3u8'}, - }, { - 'url': 'https://www.voot.com/kids/characters/mighty-cat-masked-niyander-e-/400478/school-bag-disappears/440925', - 'only_matching': True, - }, { - 'url': 'https://www.voot.com/movies/pandavas-5/424627', - 'only_matching': True, - }, { - 'url': 'https://www.voot.com/movie/fight-club/621842', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - media_info = self._download_json( - 'https://psapi.voot.com/jio/voot/v1/voot-web/content/query/asset-details', video_id, - query={'ids': f'include:{video_id}', 'responseType': 'common'}, headers={'accesstoken': self._TOKEN}) - - try: - m3u8_url = self._download_json( - 'https://vootapi.media.jio.com/playback/v1/playbackrights', video_id, - 'Downloading playback JSON', data=b'{}', headers={ - **self.geo_verification_headers(), - **self._API_HEADERS, - 'Content-Type': 'application/json;charset=utf-8', - 'platform': 'androidwebdesktop', - 'vootid': video_id, - 'voottoken': self._TOKEN, - })['m3u8'] - except ExtractorError as e: - if isinstance(e.cause, HTTPError) and e.cause.status == 400: - self._check_token_expiry() - raise - - formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls') - self._remove_duplicate_formats(formats) - - return { - 'id': video_id, - # '/_definst_/smil:vod/' m3u8 manifests claim to have 720p+ formats but max out at 480p - 'formats': traverse_obj(formats, ( - lambda _, v: '/_definst_/smil:vod/' not in v['url'] or v['height'] <= 480)), - 'http_headers': self._API_HEADERS, - **traverse_obj(media_info, ('result', 0, { - 'title': ('fullTitle', {str}), - 'description': ('fullSynopsis', {str}), - 'series': ('showName', {str}), - 'season_number': ('season', {int_or_none}), - 'episode': ('fullTitle', {str}), - 'episode_number': ('episode', {int_or_none}), - 'timestamp': ('uploadTime', {int_or_none}), - 'release_date': ('telecastDate', {unified_strdate}), - 'age_limit': ('ageNemonic', {parse_age_limit}), - 'duration': ('duration', {float_or_none}), - })), - } - - -class VootSeriesIE(VootBaseIE): - _WORKING = False - _VALID_URL = r'https?://(?:www\.)?voot\.com/shows/[^/]+/(?P<id>\d{3,})' - _TESTS = [{ - 'url': 'https://www.voot.com/shows/chakravartin-ashoka-samrat/100002', - 'playlist_mincount': 442, - 'info_dict': { - 'id': '100002', - }, - }, { - 'url': 'https://www.voot.com/shows/ishq-ka-rang-safed/100003', - 'playlist_mincount': 341, - 'info_dict': { - 'id': '100003', - }, - }] - _SHOW_API = 'https://psapi.voot.com/media/voot/v1/voot-web/content/generic/season-by-show?sort=season%3Aasc&id={}&responseType=common' - _SEASON_API = 'https://psapi.voot.com/media/voot/v1/voot-web/content/generic/series-wise-episode?sort=episode%3Aasc&id={}&responseType=common&page={:d}' - - def _entries(self, show_id): - show_json = self._download_json(self._SHOW_API.format(show_id), video_id=show_id) - for season in show_json.get('result', []): - page_num = 1 - season_id = try_get(season, lambda x: x['id'], compat_str) - season_json = self._download_json(self._SEASON_API.format(season_id, page_num), - video_id=season_id, - note='Downloading JSON metadata page %d' % page_num) - episodes_json = season_json.get('result', []) - while episodes_json: - page_num += 1 - for episode in episodes_json: - video_id = episode.get('id') - yield self.url_result( - 'voot:%s' % video_id, ie=VootIE.ie_key(), video_id=video_id) - episodes_json = self._download_json(self._SEASON_API.format(season_id, page_num), - video_id=season_id, - note='Downloading JSON metadata page %d' % page_num)['result'] - - def _real_extract(self, url): - show_id = self._match_id(url) - return self.playlist_result(self._entries(show_id), playlist_id=show_id) From 0d067e77c3f5527946fb0c22ee1c7011994cba40 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 25 May 2024 18:16:17 -0500 Subject: [PATCH 24/48] [ie/dangalplay] Add extractors (#10021) Closes #8258 Authored by: bashonly --- yt_dlp/extractor/_extractors.py | 4 + yt_dlp/extractor/dangalplay.py | 197 ++++++++++++++++++++++++++++++++ 2 files changed, 201 insertions(+) create mode 100644 yt_dlp/extractor/dangalplay.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index b807728ee..973f8c321 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -453,6 +453,10 @@ from .damtomo import ( DamtomoRecordIE, DamtomoVideoIE, ) +from .dangalplay import ( + DangalPlayIE, + DangalPlaySeasonIE, +) from .daum import ( DaumIE, DaumClipIE, diff --git a/yt_dlp/extractor/dangalplay.py b/yt_dlp/extractor/dangalplay.py new file mode 100644 index 000000000..50e4136b5 --- /dev/null +++ b/yt_dlp/extractor/dangalplay.py @@ -0,0 +1,197 @@ +import hashlib +import json +import re +import time + +from .common import InfoExtractor +from ..networking.exceptions import HTTPError +from ..utils import ExtractorError, int_or_none, join_nonempty, url_or_none +from ..utils.traversal import traverse_obj + + +class DangalPlayBaseIE(InfoExtractor): + _NETRC_MACHINE = 'dangalplay' + _OTV_USER_ID = None + _LOGIN_HINT = 'Pass credentials as -u "token" -p "USER_ID" where USER_ID is the `otv_user_id` in browser local storage' + _API_BASE = 'https://ottapi.dangalplay.com' + _AUTH_TOKEN = 'jqeGWxRKK7FK5zEk3xCM' # from https://www.dangalplay.com/main.48ad19e24eb46acccef3.js + _SECRET_KEY = 'f53d31a4377e4ef31fa0' # same as above + + def _perform_login(self, username, password): + if self._OTV_USER_ID: + return + if username != 'token' or not re.fullmatch(r'[\da-f]{32}', password): + raise ExtractorError(self._LOGIN_HINT, expected=True) + self._OTV_USER_ID = password + + def _real_initialize(self): + if not self._OTV_USER_ID: + self.raise_login_required(f'Login required. {self._LOGIN_HINT}', method=None) + + def _extract_episode_info(self, metadata, episode_slug, series_slug): + return { + 'display_id': episode_slug, + 'episode_number': int_or_none(self._search_regex( + r'ep-(?:number-)?(\d+)', episode_slug, 'episode number', default=None)), + 'season_number': int_or_none(self._search_regex( + r'season-(\d+)', series_slug, 'season number', default='1')), + 'series': series_slug, + **traverse_obj(metadata, { + 'id': ('content_id', {str}), + 'title': ('display_title', {str}), + 'episode': ('title', {str}), + 'series': ('show_name', {str}, {lambda x: x or None}), + 'series_id': ('catalog_id', {str}), + 'duration': ('duration', {int_or_none}), + 'release_timestamp': ('release_date_uts', {int_or_none}), + }), + } + + def _call_api(self, path, display_id, note='Downloading JSON metadata', fatal=True, query={}): + return self._download_json( + f'{self._API_BASE}/{path}', display_id, note, fatal=fatal, + headers={'Accept': 'application/json'}, query={ + 'auth_token': self._AUTH_TOKEN, + 'region': 'IN', + **query, + }) + + +class DangalPlayIE(DangalPlayBaseIE): + IE_NAME = 'dangalplay' + _VALID_URL = r'https?://(?:www\.)?dangalplay.com/shows/(?P<series>[^/?#]+)/(?P<id>(?!episodes)[^/?#]+)/?(?:$|[?#])' + _TESTS = [{ + 'url': 'https://www.dangalplay.com/shows/kitani-mohabbat-hai-season-2/kitani-mohabbat-hai-season-2-ep-number-01', + 'info_dict': { + 'id': '647c61dc1e7171310dcd49b4', + 'ext': 'mp4', + 'release_timestamp': 1262304000, + 'episode_number': 1, + 'episode': 'EP 1 | KITANI MOHABBAT HAI SEASON 2', + 'series': 'kitani-mohabbat-hai-season-2', + 'season_number': 2, + 'title': 'EP 1 | KITANI MOHABBAT HAI SEASON 2', + 'release_date': '20100101', + 'duration': 2325, + 'season': 'Season 2', + 'display_id': 'kitani-mohabbat-hai-season-2-ep-number-01', + 'series_id': '645c9ea41e717158ca574966', + }, + }, { + 'url': 'https://www.dangalplay.com/shows/milke-bhi-hum-na-mile/milke-bhi-hum-na-mile-ep-number-01', + 'info_dict': { + 'id': '65d31d9ba73b9c3abd14a7f3', + 'ext': 'mp4', + 'episode': 'EP 1 | MILKE BHI HUM NA MILE', + 'release_timestamp': 1708367411, + 'episode_number': 1, + 'season': 'Season 1', + 'title': 'EP 1 | MILKE BHI HUM NA MILE', + 'duration': 156048, + 'release_date': '20240219', + 'season_number': 1, + 'series': 'MILKE BHI HUM NA MILE', + 'series_id': '645c9ea41e717158ca574966', + 'display_id': 'milke-bhi-hum-na-mile-ep-number-01', + }, + }] + + def _generate_api_data(self, data): + catalog_id = data['catalog_id'] + content_id = data['content_id'] + timestamp = str(int(time.time())) + unhashed = ''.join((catalog_id, content_id, self._OTV_USER_ID, timestamp, self._SECRET_KEY)) + + return json.dumps({ + 'catalog_id': catalog_id, + 'content_id': content_id, + 'category': '', + 'region': 'IN', + 'auth_token': self._AUTH_TOKEN, + 'id': self._OTV_USER_ID, + 'md5': hashlib.md5(unhashed.encode()).hexdigest(), + 'ts': timestamp, + }, separators=(',', ':')).encode() + + def _real_extract(self, url): + series_slug, episode_slug = self._match_valid_url(url).group('series', 'id') + metadata = self._call_api( + f'catalogs/shows/{series_slug}/episodes/{episode_slug}.gzip', + episode_slug, query={'item_language': ''})['data'] + + try: + details = self._download_json( + f'{self._API_BASE}/v2/users/get_all_details.gzip', episode_slug, + 'Downloading playback details JSON', headers={ + 'Accept': 'application/json', + 'Content-Type': 'application/json', + }, data=self._generate_api_data(metadata))['data'] + except ExtractorError as e: + if isinstance(e.cause, HTTPError) and e.cause.status == 422: + error_info = traverse_obj(e.cause.response.read().decode(), ({json.loads}, 'error', {dict})) or {} + if error_info.get('code') == '1016': + self.raise_login_required( + f'Your token has expired or is invalid. {self._LOGIN_HINT}', method=None) + elif msg := error_info.get('message'): + raise ExtractorError(msg) + raise + + m3u8_url = traverse_obj(details, ( + ('adaptive_url', ('adaptive_urls', 'hd', 'hls', ..., 'playback_url')), {url_or_none}, any)) + formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, episode_slug, 'mp4') + + return { + 'formats': formats, + 'subtitles': subtitles, + **self._extract_episode_info(metadata, episode_slug, series_slug), + } + + +class DangalPlaySeasonIE(DangalPlayBaseIE): + IE_NAME = 'dangalplay:season' + _VALID_URL = r'https?://(?:www\.)?dangalplay.com/shows/(?P<id>[^/?#]+)(?:/(?P<sub>ep-[^/?#]+)/episodes)?/?(?:$|[?#])' + _TESTS = [{ + 'url': 'https://www.dangalplay.com/shows/kitani-mohabbat-hai-season-1', + 'playlist_mincount': 170, + 'info_dict': { + 'id': 'kitani-mohabbat-hai-season-1', + }, + }, { + 'url': 'https://www.dangalplay.com/shows/kitani-mohabbat-hai-season-1/ep-01-30-1/episodes', + 'playlist_count': 30, + 'info_dict': { + 'id': 'kitani-mohabbat-hai-season-1-ep-01-30-1', + }, + }, { + # 1 season only, series page is season page + 'url': 'https://www.dangalplay.com/shows/milke-bhi-hum-na-mile', + 'playlist_mincount': 15, + 'info_dict': { + 'id': 'milke-bhi-hum-na-mile', + }, + }] + + def _entries(self, subcategories, series_slug): + for subcategory in subcategories: + data = self._call_api( + f'catalogs/shows/items/{series_slug}/subcategories/{subcategory}/episodes.gzip', + series_slug, f'Downloading episodes JSON for {subcategory}', fatal=False, query={ + 'order_by': 'asc', + 'status': 'published', + }) + for ep in traverse_obj(data, ('data', 'items', lambda _, v: v['friendly_id'])): + episode_slug = ep['friendly_id'] + yield self.url_result( + f'https://www.dangalplay.com/shows/{series_slug}/{episode_slug}', + DangalPlayIE, **self._extract_episode_info(ep, episode_slug, series_slug)) + + def _real_extract(self, url): + series_slug, subcategory = self._match_valid_url(url).group('id', 'sub') + subcategories = [subcategory] if subcategory else traverse_obj( + self._call_api( + f'catalogs/shows/items/{series_slug}.gzip', series_slug, + 'Downloading season info JSON', query={'item_language': ''}), + ('data', 'subcategories', ..., 'friendly_id', {str})) + + return self.playlist_result( + self._entries(subcategories, series_slug), join_nonempty(series_slug, subcategory)) From 3ba8de62d61d782256f5c1e9939a0762039657de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Finn=20R=2E=20G=C3=A4rtner?= <65015656+FinnRG@users.noreply.github.com> Date: Sun, 26 May 2024 01:40:35 +0200 Subject: [PATCH 25/48] [ie/Piapro] Fix extractor (#9311) Closes #9884 Authored by: FinnRG, seproDev --- yt_dlp/extractor/piapro.py | 38 ++++++++++++++------------------------ 1 file changed, 14 insertions(+), 24 deletions(-) diff --git a/yt_dlp/extractor/piapro.py b/yt_dlp/extractor/piapro.py index 3ae985da2..87d912d56 100644 --- a/yt_dlp/extractor/piapro.py +++ b/yt_dlp/extractor/piapro.py @@ -2,6 +2,8 @@ from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( ExtractorError, + clean_html, + get_element_by_class, parse_duration, parse_filesize, str_to_int, @@ -88,34 +90,22 @@ class PiaproIE(InfoExtractor): if category_id not in ('1', '2', '21', '22', '23', '24', '25'): raise ExtractorError('The URL does not contain audio.', expected=True) - str_duration, str_filesize = self._search_regex( - r'サイズ:</span>(.+?)/\(([0-9,]+?[KMG]?B))', webpage, 'duration and size', - group=(1, 2), default=(None, None)) - str_viewcount = self._search_regex(r'閲覧数:</span>([0-9,]+)\s+', webpage, 'view count', fatal=False) - - uploader_id, uploader = self._search_regex( - r'<a\s+class="cd_user-name"\s+href="/(.*)">([^<]+)さん<', webpage, 'uploader', - group=(1, 2), default=(None, None)) - content_id = self._search_regex(r'contentId\:\'(.+)\'', webpage, 'content ID') - create_date = self._search_regex(r'createDate\:\'(.+)\'', webpage, 'timestamp') - - player_webpage = self._download_webpage( - f'https://piapro.jp/html5_player_popup/?id={content_id}&cdate={create_date}', - video_id, note='Downloading player webpage') + def extract_info(name, description): + return self._search_regex(rf'{name}[::]\s*([\d\s,:/]+)\s*</p>', webpage, description, default=None) return { 'id': video_id, - 'title': self._html_search_regex(r'<h1\s+class="cd_works-title">(.+?)</h1>', webpage, 'title', fatal=False), - 'description': self._html_search_regex(r'(?s)<p\s+class="cd_dtl_cap">(.+?)</p>\s*<div', webpage, 'description', fatal=False), - 'uploader': uploader, - 'uploader_id': uploader_id, - 'timestamp': unified_timestamp(create_date, False), - 'duration': parse_duration(str_duration), - 'view_count': str_to_int(str_viewcount), + 'title': clean_html(get_element_by_class('contents_title', webpage)), + 'description': clean_html(get_element_by_class('contents_description', webpage)), + 'uploader': clean_html(get_element_by_class('contents_creator_txt', webpage)), + 'uploader_id': self._search_regex( + r'<a\s+href="/([^"]+)"', get_element_by_class('contents_creator', webpage), 'uploader id', default=None), + 'timestamp': unified_timestamp(extract_info('投稿日', 'timestamp'), False), + 'duration': parse_duration(extract_info('長さ', 'duration')), + 'view_count': str_to_int(extract_info('閲覧数', 'view count')), 'thumbnail': self._html_search_meta('twitter:image', webpage), - - 'filesize_approx': parse_filesize(str_filesize.replace(',', '')), - 'url': self._search_regex(r'mp3:\s*\'(.*?)\'\}', player_webpage, 'url'), + 'filesize_approx': parse_filesize((extract_info('サイズ', 'size') or '').replace(',', '')), + 'url': self._search_regex(r'\"url\":\s*\"(.*?)\"', webpage, 'url'), 'ext': 'mp3', 'vcodec': 'none', } From a2e9031605d87c469be9ce98dbbdf4960b727338 Mon Sep 17 00:00:00 2001 From: HobbyistDev <105957301+HobbyistDev@users.noreply.github.com> Date: Sun, 26 May 2024 07:54:17 +0800 Subject: [PATCH 26/48] [ie/XiaoHongShu] Add extractor (#9646) Closes #9529 Authored by: HobbyistDev --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/xiaohongshu.py | 83 +++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+) create mode 100644 yt_dlp/extractor/xiaohongshu.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 973f8c321..fc18ead3a 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2386,6 +2386,7 @@ from .xhamster import ( XHamsterEmbedIE, XHamsterUserIE, ) +from .xiaohongshu import XiaoHongShuIE from .ximalaya import ( XimalayaIE, XimalayaAlbumIE diff --git a/yt_dlp/extractor/xiaohongshu.py b/yt_dlp/extractor/xiaohongshu.py new file mode 100644 index 000000000..faad9d923 --- /dev/null +++ b/yt_dlp/extractor/xiaohongshu.py @@ -0,0 +1,83 @@ +import functools + +from .common import InfoExtractor +from ..utils import ( + float_or_none, + int_or_none, + js_to_json, + url_or_none, +) +from ..utils.traversal import traverse_obj + + +class XiaoHongShuIE(InfoExtractor): + _VALID_URL = r'https?://www\.xiaohongshu\.com/explore/(?P<id>[\da-f]+)' + IE_DESC = '小红书' + _TESTS = [{ + 'url': 'https://www.xiaohongshu.com/explore/6411cf99000000001300b6d9', + 'md5': '2a87a77ddbedcaeeda8d7eae61b61228', + 'info_dict': { + 'id': '6411cf99000000001300b6d9', + 'ext': 'mp4', + 'uploader_id': '5c31698d0000000007018a31', + 'description': '#今日快乐今日发[话题]# #吃货薯看这里[话题]# #香妃蛋糕[话题]# #小五卷蛋糕[话题]# #新手蛋糕卷[话题]#', + 'title': '香妃蛋糕也太香了吧🔥不需要卷❗️绝对的友好', + 'tags': ['今日快乐今日发', '吃货薯看这里', '香妃蛋糕', '小五卷蛋糕', '新手蛋糕卷'], + 'duration': 101.726, + 'thumbnail': r're:https?://sns-webpic-qc\.xhscdn\.com/\d+/[a-z0-9]+/[\w]+', + } + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + initial_state = self._search_json( + r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', display_id, transform_source=js_to_json) + + note_info = traverse_obj(initial_state, ('note', 'noteDetailMap', display_id, 'note')) + video_info = traverse_obj(note_info, ('video', 'media', 'stream', ('h264', 'av1', 'h265'), ...)) + + formats = [] + for info in video_info: + format_info = traverse_obj(info, { + 'fps': ('fps', {int_or_none}), + 'width': ('width', {int_or_none}), + 'height': ('height', {int_or_none}), + 'vcodec': ('videoCodec', {str}), + 'acodec': ('audioCodec', {str}), + 'abr': ('audioBitrate', {int_or_none}), + 'vbr': ('videoBitrate', {int_or_none}), + 'audio_channels': ('audioChannels', {int_or_none}), + 'tbr': ('avgBitrate', {int_or_none}), + 'format': ('qualityType', {str}), + 'filesize': ('size', {int_or_none}), + 'duration': ('duration', {functools.partial(float_or_none, scale=1000)}) + }) + + formats.extend(traverse_obj(info, (('mediaUrl', ('backupUrls', ...)), { + lambda u: url_or_none(u) and {'url': u, **format_info}}))) + + thumbnails = [] + for image_info in traverse_obj(note_info, ('imageList', ...)): + thumbnail_info = traverse_obj(image_info, { + 'height': ('height', {int_or_none}), + 'width': ('width', {int_or_none}), + }) + for thumb_url in traverse_obj(image_info, (('urlDefault', 'urlPre'), {url_or_none})): + thumbnails.append({ + 'url': thumb_url, + **thumbnail_info, + }) + + return { + 'id': display_id, + 'formats': formats, + 'thumbnails': thumbnails, + 'title': self._html_search_meta(['og:title'], webpage, default=None), + **traverse_obj(note_info, { + 'title': ('title', {str}), + 'description': ('desc', {str}), + 'tags': ('tagList', ..., 'name', {str}), + 'uploader_id': ('user', 'userId', {str}), + }), + } From e897bd8292a41999cf51dba91b390db5643c72db Mon Sep 17 00:00:00 2001 From: Simon Sawicki <contact@grub4k.xyz> Date: Sun, 26 May 2024 21:27:21 +0200 Subject: [PATCH 27/48] [misc] Add `hatch`, `ruff`, `pre-commit` and improve dev docs (#7409) Authored by: bashonly, seproDev, Grub4K Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com> --- .github/PULL_REQUEST_TEMPLATE.md | 1 - .github/workflows/core.yml | 2 +- .github/workflows/quick-test.yml | 16 +- .gitignore | 2 +- .pre-commit-config.yaml | 14 + .pre-commit-hatch.yaml | 9 + CONTRIBUTING.md | 77 +- Makefile | 7 +- devscripts/install_deps.py | 12 +- devscripts/run_tests.py | 14 +- pyproject.toml | 156 +++- setup.cfg | 6 - test/test_http_proxy.py | 1 + yt_dlp/extractor/_extractors.py | 1065 ++++++++++++----------- yt_dlp/extractor/abc.py | 4 +- yt_dlp/extractor/abematv.py | 5 +- yt_dlp/extractor/acfun.py | 4 +- yt_dlp/extractor/adn.py | 4 +- yt_dlp/extractor/adobetv.py | 4 +- yt_dlp/extractor/airtv.py | 2 +- yt_dlp/extractor/allstar.py | 1 - yt_dlp/extractor/alphaporno.py | 4 +- yt_dlp/extractor/alura.py | 12 +- yt_dlp/extractor/amara.py | 2 +- yt_dlp/extractor/amp.py | 2 +- yt_dlp/extractor/anchorfm.py | 2 +- yt_dlp/extractor/angel.py | 2 +- yt_dlp/extractor/appleconnect.py | 5 +- yt_dlp/extractor/appletrailers.py | 2 +- yt_dlp/extractor/arnes.py | 2 +- yt_dlp/extractor/atvat.py | 2 +- yt_dlp/extractor/awaan.py | 2 +- yt_dlp/extractor/banbye.py | 4 +- yt_dlp/extractor/bannedvideo.py | 6 +- yt_dlp/extractor/beeg.py | 1 - yt_dlp/extractor/bleacherreport.py | 2 +- yt_dlp/extractor/blogger.py | 2 +- yt_dlp/extractor/bostonglobe.py | 1 - yt_dlp/extractor/boxcast.py | 6 +- yt_dlp/extractor/brainpop.py | 2 +- yt_dlp/extractor/brightcove.py | 4 +- yt_dlp/extractor/cbs.py | 6 +- yt_dlp/extractor/cinetecamilano.py | 1 + yt_dlp/extractor/clippit.py | 4 +- yt_dlp/extractor/common.py | 2 +- yt_dlp/extractor/corus.py | 2 +- yt_dlp/extractor/crackle.py | 2 +- yt_dlp/extractor/cspan.py | 6 +- yt_dlp/extractor/ctsnews.py | 2 +- yt_dlp/extractor/dailymail.py | 2 +- yt_dlp/extractor/damtomo.py | 2 +- yt_dlp/extractor/democracynow.py | 4 +- yt_dlp/extractor/digitalconcerthall.py | 1 - yt_dlp/extractor/discoverygo.py | 2 +- yt_dlp/extractor/disney.py | 4 +- yt_dlp/extractor/douyutv.py | 2 +- yt_dlp/extractor/dplay.py | 2 +- yt_dlp/extractor/drtuber.py | 2 +- yt_dlp/extractor/duboku.py | 2 +- yt_dlp/extractor/dvtv.py | 4 +- yt_dlp/extractor/dw.py | 2 +- yt_dlp/extractor/ertgr.py | 4 +- yt_dlp/extractor/europa.py | 2 +- yt_dlp/extractor/euscreen.py | 3 +- yt_dlp/extractor/eyedotv.py | 4 +- yt_dlp/extractor/fancode.py | 8 +- yt_dlp/extractor/faz.py | 2 +- yt_dlp/extractor/fczenit.py | 2 +- yt_dlp/extractor/fifa.py | 1 - yt_dlp/extractor/filmon.py | 4 +- yt_dlp/extractor/gab.py | 2 +- yt_dlp/extractor/gamejolt.py | 2 +- yt_dlp/extractor/gaskrank.py | 1 + yt_dlp/extractor/generic.py | 2 +- yt_dlp/extractor/gettr.py | 2 +- yt_dlp/extractor/gigya.py | 1 - yt_dlp/extractor/glomex.py | 2 +- yt_dlp/extractor/go.py | 16 +- yt_dlp/extractor/godresource.py | 2 +- yt_dlp/extractor/gofile.py | 5 +- yt_dlp/extractor/gotostage.py | 9 +- yt_dlp/extractor/hbo.py | 4 +- yt_dlp/extractor/hearthisat.py | 2 +- yt_dlp/extractor/hketv.py | 2 +- yt_dlp/extractor/hrti.py | 2 +- yt_dlp/extractor/huya.py | 6 +- yt_dlp/extractor/ichinanalive.py | 2 +- yt_dlp/extractor/infoq.py | 4 +- yt_dlp/extractor/iprima.py | 6 +- yt_dlp/extractor/iqiyi.py | 10 +- yt_dlp/extractor/itprotv.py | 3 +- yt_dlp/extractor/itv.py | 9 +- yt_dlp/extractor/iwara.py | 4 +- yt_dlp/extractor/jamendo.py | 2 +- yt_dlp/extractor/japandiet.py | 4 +- yt_dlp/extractor/jove.py | 5 +- yt_dlp/extractor/jstream.py | 2 +- yt_dlp/extractor/kakao.py | 2 +- yt_dlp/extractor/kaltura.py | 8 +- yt_dlp/extractor/kankanews.py | 4 +- yt_dlp/extractor/kuwo.py | 4 +- yt_dlp/extractor/lcp.py | 2 +- yt_dlp/extractor/lecture2go.py | 2 +- yt_dlp/extractor/lecturio.py | 2 +- yt_dlp/extractor/leeco.py | 2 +- yt_dlp/extractor/libraryofcongress.py | 1 - yt_dlp/extractor/lifenews.py | 2 +- yt_dlp/extractor/limelight.py | 2 +- yt_dlp/extractor/linkedin.py | 2 +- yt_dlp/extractor/mainstreaming.py | 3 +- yt_dlp/extractor/manoto.py | 7 +- yt_dlp/extractor/medaltv.py | 2 +- yt_dlp/extractor/mediaklikk.py | 7 +- yt_dlp/extractor/mediaset.py | 4 +- yt_dlp/extractor/mediasite.py | 5 +- yt_dlp/extractor/microsoftstream.py | 2 +- yt_dlp/extractor/mildom.py | 4 +- yt_dlp/extractor/mit.py | 4 +- yt_dlp/extractor/monstercat.py | 2 +- yt_dlp/extractor/moviepilot.py | 2 +- yt_dlp/extractor/movingimage.py | 2 +- yt_dlp/extractor/msn.py | 2 +- yt_dlp/extractor/n1.py | 2 +- yt_dlp/extractor/naver.py | 2 +- yt_dlp/extractor/nba.py | 2 +- yt_dlp/extractor/nbc.py | 2 +- yt_dlp/extractor/ndr.py | 2 +- yt_dlp/extractor/nfhsnetwork.py | 8 +- yt_dlp/extractor/nhl.py | 2 +- yt_dlp/extractor/ninenews.py | 2 +- yt_dlp/extractor/ninenow.py | 2 +- yt_dlp/extractor/nitter.py | 11 +- yt_dlp/extractor/nobelprize.py | 6 +- yt_dlp/extractor/noz.py | 12 +- yt_dlp/extractor/nuevo.py | 6 +- yt_dlp/extractor/nuvid.py | 2 +- yt_dlp/extractor/nzherald.py | 5 +- yt_dlp/extractor/odkmedia.py | 2 +- yt_dlp/extractor/olympics.py | 5 +- yt_dlp/extractor/onenewsnz.py | 6 +- yt_dlp/extractor/onet.py | 4 +- yt_dlp/extractor/opencast.py | 2 +- yt_dlp/extractor/openrec.py | 2 +- yt_dlp/extractor/ora.py | 1 + yt_dlp/extractor/packtpub.py | 3 +- yt_dlp/extractor/panopto.py | 10 +- yt_dlp/extractor/paramountplus.py | 2 +- yt_dlp/extractor/pbs.py | 4 +- yt_dlp/extractor/pearvideo.py | 2 +- yt_dlp/extractor/peertube.py | 2 +- yt_dlp/extractor/piksel.py | 2 +- yt_dlp/extractor/pladform.py | 4 +- yt_dlp/extractor/platzi.py | 2 +- yt_dlp/extractor/playtvak.py | 2 +- yt_dlp/extractor/pluralsight.py | 2 +- yt_dlp/extractor/polsatgo.py | 2 +- yt_dlp/extractor/pornflip.py | 6 +- yt_dlp/extractor/pornovoisines.py | 2 +- yt_dlp/extractor/prx.py | 11 +- yt_dlp/extractor/puhutv.py | 2 +- yt_dlp/extractor/qingting.py | 1 - yt_dlp/extractor/qqmusic.py | 2 +- yt_dlp/extractor/radiocanada.py | 2 +- yt_dlp/extractor/radiocomercial.py | 2 +- yt_dlp/extractor/radiozet.py | 2 +- yt_dlp/extractor/radlive.py | 4 +- yt_dlp/extractor/rai.py | 4 +- yt_dlp/extractor/rbgtum.py | 2 +- yt_dlp/extractor/rcti.py | 4 +- yt_dlp/extractor/rds.py | 4 +- yt_dlp/extractor/redbulltv.py | 2 +- yt_dlp/extractor/reddit.py | 2 +- yt_dlp/extractor/redgifs.py | 2 +- yt_dlp/extractor/redtube.py | 2 +- yt_dlp/extractor/reuters.py | 2 +- yt_dlp/extractor/rmcdecouverte.py | 2 +- yt_dlp/extractor/rte.py | 2 +- yt_dlp/extractor/rtp.py | 9 +- yt_dlp/extractor/rtvcplay.py | 7 +- yt_dlp/extractor/rtvs.py | 1 - yt_dlp/extractor/rutube.py | 2 +- yt_dlp/extractor/rutv.py | 6 +- yt_dlp/extractor/ruutu.py | 2 +- yt_dlp/extractor/safari.py | 1 - yt_dlp/extractor/scrippsnetworks.py | 4 +- yt_dlp/extractor/scte.py | 2 +- yt_dlp/extractor/sendtonews.py | 8 +- yt_dlp/extractor/seznamzpravy.py | 2 +- yt_dlp/extractor/shahid.py | 2 +- yt_dlp/extractor/shemaroome.py | 2 +- yt_dlp/extractor/sixplay.py | 2 +- yt_dlp/extractor/skynewsarabia.py | 2 +- yt_dlp/extractor/sohu.py | 10 +- yt_dlp/extractor/sovietscloset.py | 5 +- yt_dlp/extractor/spankbang.py | 2 +- yt_dlp/extractor/springboardplatform.py | 6 +- yt_dlp/extractor/startv.py | 4 +- yt_dlp/extractor/stitcher.py | 2 +- yt_dlp/extractor/storyfire.py | 2 +- yt_dlp/extractor/streamable.py | 2 +- yt_dlp/extractor/stripchat.py | 2 +- yt_dlp/extractor/sunporno.py | 6 +- yt_dlp/extractor/syfy.py | 2 +- yt_dlp/extractor/tbs.py | 2 +- yt_dlp/extractor/teachable.py | 4 +- yt_dlp/extractor/teachertube.py | 2 +- yt_dlp/extractor/teamcoco.py | 2 +- yt_dlp/extractor/teamtreehouse.py | 2 +- yt_dlp/extractor/ted.py | 5 +- yt_dlp/extractor/tele13.py | 2 +- yt_dlp/extractor/telewebion.py | 1 + yt_dlp/extractor/tempo.py | 2 +- yt_dlp/extractor/tencent.py | 2 +- yt_dlp/extractor/theguardian.py | 2 +- yt_dlp/extractor/theintercept.py | 4 +- yt_dlp/extractor/theplatform.py | 24 +- yt_dlp/extractor/threeqsdn.py | 2 +- yt_dlp/extractor/toypics.py | 3 +- yt_dlp/extractor/triller.py | 2 +- yt_dlp/extractor/trueid.py | 4 +- yt_dlp/extractor/tumblr.py | 2 +- yt_dlp/extractor/turner.py | 16 +- yt_dlp/extractor/tv2.py | 4 +- yt_dlp/extractor/tv2hu.py | 2 +- yt_dlp/extractor/tvanouvelles.py | 2 +- yt_dlp/extractor/tvn24.py | 2 +- yt_dlp/extractor/tvp.py | 2 +- yt_dlp/extractor/tvplay.py | 2 +- yt_dlp/extractor/tvplayer.py | 2 +- yt_dlp/extractor/tweakers.py | 2 +- yt_dlp/extractor/twitter.py | 2 +- yt_dlp/extractor/udn.py | 2 +- yt_dlp/extractor/ukcolumn.py | 10 +- yt_dlp/extractor/urplay.py | 4 +- yt_dlp/extractor/usatoday.py | 2 +- yt_dlp/extractor/ustream.py | 4 +- yt_dlp/extractor/ustudio.py | 2 +- yt_dlp/extractor/veo.py | 1 - yt_dlp/extractor/vesti.py | 2 +- yt_dlp/extractor/vevo.py | 2 +- yt_dlp/extractor/vice.py | 4 +- yt_dlp/extractor/vidio.py | 2 +- yt_dlp/extractor/vidlii.py | 2 +- yt_dlp/extractor/vimeo.py | 10 +- yt_dlp/extractor/viu.py | 6 +- yt_dlp/extractor/vk.py | 2 +- yt_dlp/extractor/walla.py | 2 +- yt_dlp/extractor/washingtonpost.py | 1 - yt_dlp/extractor/wdr.py | 4 +- yt_dlp/extractor/weibo.py | 2 +- yt_dlp/extractor/whowatch.py | 4 +- yt_dlp/extractor/wimtv.py | 2 +- yt_dlp/extractor/wppilot.py | 12 +- yt_dlp/extractor/wsj.py | 2 +- yt_dlp/extractor/xhamster.py | 2 +- yt_dlp/extractor/xnxx.py | 2 +- yt_dlp/extractor/xstream.py | 4 +- yt_dlp/extractor/xvideos.py | 2 +- yt_dlp/extractor/xxxymovies.py | 2 +- yt_dlp/extractor/yandexmusic.py | 2 +- yt_dlp/extractor/zapiks.py | 4 +- yt_dlp/extractor/zhihu.py | 2 +- yt_dlp/extractor/zingmp3.py | 2 +- yt_dlp/extractor/zype.py | 2 +- 264 files changed, 1224 insertions(+), 1014 deletions(-) create mode 100644 .pre-commit-config.yaml create mode 100644 .pre-commit-hatch.yaml diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index c4d3e812e..4deee572f 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -28,7 +28,6 @@ Fixes # ### Before submitting a *pull request* make sure you have: - [ ] At least skimmed through [contributing guidelines](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#developer-instructions) including [yt-dlp coding conventions](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#yt-dlp-coding-conventions) - [ ] [Searched](https://github.com/yt-dlp/yt-dlp/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests -- [ ] Checked the code with [flake8](https://pypi.python.org/pypi/flake8) and [ran relevant tests](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#developer-instructions) ### In order to be accepted and merged into yt-dlp each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check all of the following options that apply: - [ ] I am the original author of this code and I am willing to release it under [Unlicense](http://unlicense.org/) diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml index 70769f967..fdfdebc65 100644 --- a/.github/workflows/core.yml +++ b/.github/workflows/core.yml @@ -53,7 +53,7 @@ jobs: with: python-version: ${{ matrix.python-version }} - name: Install test requirements - run: python3 ./devscripts/install_deps.py --include dev --include curl-cffi + run: python3 ./devscripts/install_deps.py --include test --include curl-cffi - name: Run tests continue-on-error: False run: | diff --git a/.github/workflows/quick-test.yml b/.github/workflows/quick-test.yml index 24b34911f..3afb51a30 100644 --- a/.github/workflows/quick-test.yml +++ b/.github/workflows/quick-test.yml @@ -15,13 +15,13 @@ jobs: with: python-version: '3.8' - name: Install test requirements - run: python3 ./devscripts/install_deps.py --include dev + run: python3 ./devscripts/install_deps.py --include test - name: Run tests run: | python3 -m yt_dlp -v || true python3 ./devscripts/run_tests.py core - flake8: - name: Linter + check: + name: Code check if: "!contains(github.event.head_commit.message, 'ci skip all')" runs-on: ubuntu-latest steps: @@ -29,9 +29,11 @@ jobs: - uses: actions/setup-python@v5 with: python-version: '3.8' - - name: Install flake8 - run: python3 ./devscripts/install_deps.py -o --include dev + - name: Install dev dependencies + run: python3 ./devscripts/install_deps.py -o --include static-analysis - name: Make lazy extractors run: python3 ./devscripts/make_lazy_extractors.py - - name: Run flake8 - run: flake8 . + - name: Run ruff + run: ruff check --output-format github . + - name: Run autopep8 + run: autopep8 --diff . diff --git a/.gitignore b/.gitignore index 630c2e01f..db322c4f0 100644 --- a/.gitignore +++ b/.gitignore @@ -67,7 +67,7 @@ cookies # Python *.pyc *.pyo -.pytest_cache +.*_cache wine-py2exe/ py2exe.log build/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..a821eeefb --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,14 @@ +repos: +- repo: local + hooks: + - id: linter + name: Apply linter fixes + entry: ruff check --fix . + language: system + types: [python] + require_serial: true + - id: format + name: Apply formatting fixes + entry: autopep8 --in-place . + language: system + types: [python] diff --git a/.pre-commit-hatch.yaml b/.pre-commit-hatch.yaml new file mode 100644 index 000000000..fb7d25e1d --- /dev/null +++ b/.pre-commit-hatch.yaml @@ -0,0 +1,9 @@ +repos: +- repo: local + hooks: + - id: fix + name: Apply code fixes + entry: hatch fmt + language: system + types: [python] + require_serial: true diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c94ec55a6..837b600e3 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -134,18 +134,53 @@ We follow [youtube-dl's policy](https://github.com/ytdl-org/youtube-dl#can-you-a # DEVELOPER INSTRUCTIONS -Most users do not need to build yt-dlp and can [download the builds](https://github.com/yt-dlp/yt-dlp/releases) or get them via [the other installation methods](README.md#installation). +Most users do not need to build yt-dlp and can [download the builds](https://github.com/yt-dlp/yt-dlp/releases), get them via [the other installation methods](README.md#installation) or directly run it using `python -m yt_dlp`. -To run yt-dlp as a developer, you don't need to build anything either. Simply execute +`yt-dlp` uses [`hatch`](<https://hatch.pypa.io>) as a project management tool. +You can easily install it using [`pipx`](<https://pipx.pypa.io>) via `pipx install hatch`, or else via `pip` or your package manager of choice. Make sure you are using at least version `1.10.0`, otherwise some functionality might not work as expected. - python3 -m yt_dlp +If you plan on contributing to `yt-dlp`, best practice is to start by running the following command: -To run all the available core tests, use: +```shell +$ hatch run setup +``` - python3 devscripts/run_tests.py +The above command will install a `pre-commit` hook so that required checks/fixes (linting, formatting) will run automatically before each commit. If any code needs to be linted or formatted, then the commit will be blocked and the necessary changes will be made; you should review all edits and re-commit the fixed version. + +After this you can use `hatch shell` to enable a virtual environment that has `yt-dlp` and its development dependencies installed. + +In addition, the following script commands can be used to run simple tasks such as linting or testing (without having to run `hatch shell` first): +* `hatch fmt`: Automatically fix linter violations and apply required code formatting changes + * See `hatch fmt --help` for more info +* `hatch test`: Run extractor or core tests + * See `hatch test --help` for more info See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases. +While it is strongly recommended to use `hatch` for yt-dlp development, if you are unable to do so, alternatively you can manually create a virtual environment and use the following commands: + +```shell +# To only install development dependencies: +$ python -m devscripts.install_deps --include dev + +# Or, for an editable install plus dev dependencies: +$ python -m pip install -e ".[default,dev]" + +# To setup the pre-commit hook: +$ pre-commit install + +# To be used in place of `hatch test`: +$ python -m devscripts.run_tests + +# To be used in place of `hatch fmt`: +$ ruff check --fix . +$ autopep8 --in-place . + +# To only check code instead of applying fixes: +$ ruff check . +$ autopep8 --diff . +``` + If you want to create a build of yt-dlp yourself, you can follow the instructions [here](README.md#compile). @@ -165,12 +200,16 @@ After you have ensured this site is distributing its content legally, you can fo 1. [Fork this repository](https://github.com/yt-dlp/yt-dlp/fork) 1. Check out the source code with: - git clone git@github.com:YOUR_GITHUB_USERNAME/yt-dlp.git + ```shell + $ git clone git@github.com:YOUR_GITHUB_USERNAME/yt-dlp.git + ``` 1. Start a new git branch with - cd yt-dlp - git checkout -b yourextractor + ```shell + $ cd yt-dlp + $ git checkout -b yourextractor + ``` 1. Start with this simple template and save it to `yt_dlp/extractor/yourextractor.py`: @@ -217,21 +256,27 @@ After you have ensured this site is distributing its content legally, you can fo # TODO more properties (see yt_dlp/extractor/common.py) } ``` -1. Add an import in [`yt_dlp/extractor/_extractors.py`](yt_dlp/extractor/_extractors.py). Note that the class name must end with `IE`. -1. Run `python3 devscripts/run_tests.py YourExtractor`. This *may fail* at first, but you can continually re-run it until you're done. Upon failure, it will output the missing fields and/or correct values which you can copy. If you decide to add more than one test, the tests will then be named `YourExtractor`, `YourExtractor_1`, `YourExtractor_2`, etc. Note that tests with an `only_matching` key in the test's dict are not included in the count. You can also run all the tests in one go with `YourExtractor_all` +1. Add an import in [`yt_dlp/extractor/_extractors.py`](yt_dlp/extractor/_extractors.py). Note that the class name must end with `IE`. Also note that when adding a parenthesized import group, the last import in the group must have a trailing comma in order for this formatting to be respected by our code formatter. +1. Run `hatch test YourExtractor`. This *may fail* at first, but you can continually re-run it until you're done. Upon failure, it will output the missing fields and/or correct values which you can copy. If you decide to add more than one test, the tests will then be named `YourExtractor`, `YourExtractor_1`, `YourExtractor_2`, etc. Note that tests with an `only_matching` key in the test's dict are not included in the count. You can also run all the tests in one go with `YourExtractor_all` 1. Make sure you have at least one test for your extractor. Even if all videos covered by the extractor are expected to be inaccessible for automated testing, tests should still be added with a `skip` parameter indicating why the particular test is disabled from running. 1. Have a look at [`yt_dlp/extractor/common.py`](yt_dlp/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](yt_dlp/extractor/common.py#L119-L440). Add tests and code for as many as you want. -1. Make sure your code follows [yt-dlp coding conventions](#yt-dlp-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart): +1. Make sure your code follows [yt-dlp coding conventions](#yt-dlp-coding-conventions), passes [ruff](https://docs.astral.sh/ruff/tutorial/#getting-started) code checks and is properly formatted: - $ flake8 yt_dlp/extractor/yourextractor.py + ```shell + $ hatch fmt --check + ``` + + You can use `hatch fmt` to automatically fix problems. 1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython and PyPy for Python 3.8 and above. Backward compatibility is not required for even older versions of Python. 1. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files, [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: - $ git add yt_dlp/extractor/_extractors.py - $ git add yt_dlp/extractor/yourextractor.py - $ git commit -m '[yourextractor] Add extractor' - $ git push origin yourextractor + ```shell + $ git add yt_dlp/extractor/_extractors.py + $ git add yt_dlp/extractor/yourextractor.py + $ git commit -m '[yourextractor] Add extractor' + $ git push origin yourextractor + ``` 1. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it. diff --git a/Makefile b/Makefile index cef4bc6cb..b8f010086 100644 --- a/Makefile +++ b/Makefile @@ -27,7 +27,7 @@ clean-dist: yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS clean-cache: find . \( \ - -type d -name .pytest_cache -o -type d -name __pycache__ -o -name "*.pyc" -o -name "*.class" \ + -type d -name ".*_cache" -o -type d -name __pycache__ -o -name "*.pyc" -o -name "*.class" \ \) -prune -exec rm -rf {} \; completion-bash: completions/bash/yt-dlp @@ -70,7 +70,8 @@ uninstall: rm -f $(DESTDIR)$(SHAREDIR)/fish/vendor_completions.d/yt-dlp.fish codetest: - flake8 . + ruff check . + autopep8 --diff . test: $(PYTHON) -m pytest @@ -151,7 +152,7 @@ yt-dlp.tar.gz: all --exclude '*.pyo' \ --exclude '*~' \ --exclude '__pycache__' \ - --exclude '.pytest_cache' \ + --exclude '.*_cache' \ --exclude '.git' \ -- \ README.md supportedsites.md Changelog.md LICENSE \ diff --git a/devscripts/install_deps.py b/devscripts/install_deps.py index d33fc637c..d29250545 100755 --- a/devscripts/install_deps.py +++ b/devscripts/install_deps.py @@ -42,17 +42,25 @@ def parse_args(): def main(): args = parse_args() project_table = parse_toml(read_file(args.input))['project'] + recursive_pattern = re.compile(rf'{project_table["name"]}\[(?P<group_name>[\w-]+)\]') optional_groups = project_table['optional-dependencies'] excludes = args.exclude or [] + def yield_deps(group): + for dep in group: + if mobj := recursive_pattern.fullmatch(dep): + yield from optional_groups.get(mobj.group('group_name'), []) + else: + yield dep + targets = [] if not args.only_optional: # `-o` should exclude 'dependencies' and the 'default' group targets.extend(project_table['dependencies']) if 'default' not in excludes: # `--exclude default` should exclude entire 'default' group - targets.extend(optional_groups['default']) + targets.extend(yield_deps(optional_groups['default'])) for include in filter(None, map(optional_groups.get, args.include or [])): - targets.extend(include) + targets.extend(yield_deps(include)) targets = [t for t in targets if re.match(r'[\w-]+', t).group(0).lower() not in excludes] diff --git a/devscripts/run_tests.py b/devscripts/run_tests.py index 6d638a974..c605aa62c 100755 --- a/devscripts/run_tests.py +++ b/devscripts/run_tests.py @@ -4,6 +4,7 @@ import argparse import functools import os import re +import shlex import subprocess import sys from pathlib import Path @@ -18,6 +19,8 @@ def parse_args(): 'test', help='a extractor tests, or one of "core" or "download"', nargs='*') parser.add_argument( '-k', help='run a test matching EXPRESSION. Same as "pytest -k"', metavar='EXPRESSION') + parser.add_argument( + '--pytest-args', help='arguments to passthrough to pytest') return parser.parse_args() @@ -26,15 +29,16 @@ def run_tests(*tests, pattern=None, ci=False): run_download = 'download' in tests tests = list(map(fix_test_name, tests)) - arguments = ['pytest', '-Werror', '--tb=short'] + pytest_args = args.pytest_args or os.getenv('HATCH_TEST_ARGS', '') + arguments = ['pytest', '-Werror', '--tb=short', *shlex.split(pytest_args)] if ci: arguments.append('--color=yes') + if pattern: + arguments.extend(['-k', pattern]) if run_core: arguments.extend(['-m', 'not download']) elif run_download: arguments.extend(['-m', 'download']) - elif pattern: - arguments.extend(['-k', pattern]) else: arguments.extend( f'test/test_download.py::TestDownload::test_{test}' for test in tests) @@ -46,13 +50,13 @@ def run_tests(*tests, pattern=None, ci=False): pass arguments = [sys.executable, '-Werror', '-m', 'unittest'] + if pattern: + arguments.extend(['-k', pattern]) if run_core: print('"pytest" needs to be installed to run core tests', file=sys.stderr, flush=True) return 1 elif run_download: arguments.append('test.test_download') - elif pattern: - arguments.extend(['-k', pattern]) else: arguments.extend( f'test.test_download.TestDownload.test_{test}' for test in tests) diff --git a/pyproject.toml b/pyproject.toml index 8e3bce4bf..96cb368b6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,9 +66,16 @@ build = [ "wheel", ] dev = [ - "flake8", - "isort", - "pytest", + "pre-commit", + "yt-dlp[static-analysis]", + "yt-dlp[test]", +] +static-analysis = [ + "autopep8~=2.0", + "ruff~=0.4.4", +] +test = [ + "pytest~=8.1", ] pyinstaller = [ "pyinstaller>=6.3; sys_platform!='darwin'", @@ -126,3 +133,146 @@ artifacts = ["/yt_dlp/extractor/lazy_extractors.py"] [tool.hatch.version] path = "yt_dlp/version.py" pattern = "_pkg_version = '(?P<version>[^']+)'" + +[tool.hatch.envs.default] +features = ["curl-cffi", "default"] +dependencies = ["pre-commit"] +path = ".venv" +installer = "uv" + +[tool.hatch.envs.default.scripts] +setup = "pre-commit install --config .pre-commit-hatch.yaml" +yt-dlp = "python -Werror -Xdev -m yt_dlp {args}" + +[tool.hatch.envs.hatch-static-analysis] +detached = true +features = ["static-analysis"] +dependencies = [] # override hatch ruff version +config-path = "pyproject.toml" + +[tool.hatch.envs.hatch-static-analysis.scripts] +format-check = "autopep8 --diff {args:.}" +format-fix = "autopep8 --in-place {args:.}" +lint-check = "ruff check {args:.}" +lint-fix = "ruff check --fix {args:.}" + +[tool.hatch.envs.hatch-test] +features = ["test"] +dependencies = [ + "pytest-randomly~=3.15", + "pytest-rerunfailures~=14.0", + "pytest-xdist[psutil]~=3.5", +] + +[tool.hatch.envs.hatch-test.scripts] +run = "python -m devscripts.run_tests {args}" +run-cov = "echo Code coverage not implemented && exit 1" + +[[tool.hatch.envs.hatch-test.matrix]] +python = [ + "3.8", + "3.9", + "3.10", + "3.11", + "3.12", + "pypy3.8", + "pypy3.9", + "pypy3.10", +] + +[tool.ruff] +line-length = 120 + +[tool.ruff.lint] +ignore = [ + "E402", # module level import not at top of file + "E501", # line too long + "E731", # do not assign a lambda expression, use a def + "E741", # ambiguous variable name +] +select = [ + "E", # pycodestyle errors + "W", # pycodestyle warnings + "F", # pyflakes + "I", # import order +] + +[tool.ruff.lint.per-file-ignores] +"devscripts/lazy_load_template.py" = ["F401"] +"!yt_dlp/extractor/**.py" = ["I"] + +[tool.ruff.lint.isort] +known-first-party = [ + "bundle", + "devscripts", + "test", +] +relative-imports-order = "closest-to-furthest" + +[tool.autopep8] +max_line_length = 120 +recursive = true +exit-code = true +jobs = 0 +select = [ + "E101", + "E112", + "E113", + "E115", + "E116", + "E117", + "E121", + "E122", + "E123", + "E124", + "E125", + "E126", + "E127", + "E128", + "E129", + "E131", + "E201", + "E202", + "E203", + "E211", + "E221", + "E222", + "E223", + "E224", + "E225", + "E226", + "E227", + "E228", + "E231", + "E241", + "E242", + "E251", + "E252", + "E261", + "E262", + "E265", + "E266", + "E271", + "E272", + "E273", + "E274", + "E275", + "E301", + "E302", + "E303", + "E304", + "E305", + "E306", + "E502", + "E701", + "E702", + "E704", + "W391", + "W504", +] + +[tool.pytest.ini_options] +addopts = "-ra -v --strict-markers" +markers = [ + "download", +] diff --git a/setup.cfg b/setup.cfg index aeb4cee58..340cc3b4d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -14,12 +14,6 @@ remove-duplicate-keys = true remove-unused-variables = true -[tool:pytest] -addopts = -ra -v --strict-markers -markers = - download - - [tox:tox] skipsdist = true envlist = py{38,39,310,311,312},pypy{38,39,310} diff --git a/test/test_http_proxy.py b/test/test_http_proxy.py index c1d7c53f5..1b21fe78e 100644 --- a/test/test_http_proxy.py +++ b/test/test_http_proxy.py @@ -93,6 +93,7 @@ if urllib3: This allows us to chain multiple TLS connections. """ + def __init__(self, socket, ssl_context, server_hostname=None, suppress_ragged_eofs=True, server_side=False): self.incoming = ssl.MemoryBIO() self.outgoing = ssl.MemoryBIO() diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index fc18ead3a..e287e04bc 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1,4 +1,5 @@ # flake8: noqa: F401 +# isort: off from .youtube import ( # Youtube is moved to the top to improve performance YoutubeIE, @@ -24,6 +25,8 @@ from .youtube import ( # Youtube is moved to the top to improve performance YoutubeConsentRedirectIE, ) +# isort: on + from .abc import ( ABCIE, ABCIViewIE, @@ -43,27 +46,33 @@ from .abematv import ( ) from .academicearth import AcademicEarthCourseIE from .acast import ( - ACastIE, ACastChannelIE, + ACastIE, +) +from .acfun import ( + AcFunBangumiIE, + AcFunVideoIE, +) +from .adn import ( + ADNIE, + ADNSeasonIE, ) -from .acfun import AcFunVideoIE, AcFunBangumiIE -from .adn import ADNIE, ADNSeasonIE from .adobeconnect import AdobeConnectIE from .adobetv import ( + AdobeTVChannelIE, AdobeTVEmbedIE, AdobeTVIE, AdobeTVShowIE, - AdobeTVChannelIE, AdobeTVVideoIE, ) from .adultswim import AdultSwimIE from .aenetworks import ( - AENetworksIE, AENetworksCollectionIE, + AENetworksIE, AENetworksShowIE, - HistoryTopicIE, - HistoryPlayerIE, BiographyIE, + HistoryPlayerIE, + HistoryTopicIE, ) from .aeonco import AeonCoIE from .afreecatv import ( @@ -79,77 +88,85 @@ from .agora import ( ) from .airtv import AirTVIE from .aitube import AitubeKZVideoIE +from .aliexpress import AliExpressLiveIE from .aljazeera import AlJazeeraIE +from .allocine import AllocineIE from .allstar import ( AllstarIE, AllstarProfileIE, ) from .alphaporno import AlphaPornoIE +from .alsace20tv import ( + Alsace20TVEmbedIE, + Alsace20TVIE, +) from .altcensored import ( - AltCensoredIE, AltCensoredChannelIE, + AltCensoredIE, ) from .alura import ( + AluraCourseIE, AluraIE, - AluraCourseIE ) from .amadeustv import AmadeusTVIE from .amara import AmaraIE -from .amcnetworks import AMCNetworksIE from .amazon import ( - AmazonStoreIE, AmazonReviewsIE, + AmazonStoreIE, ) from .amazonminitv import ( AmazonMiniTVIE, AmazonMiniTVSeasonIE, AmazonMiniTVSeriesIE, ) +from .amcnetworks import AMCNetworksIE from .americastestkitchen import ( AmericasTestKitchenIE, AmericasTestKitchenSeasonIE, ) from .anchorfm import AnchorFMEpisodeIE from .angel import AngelIE +from .antenna import ( + Ant1NewsGrArticleIE, + Ant1NewsGrEmbedIE, + AntennaGrWatchIE, +) from .anvato import AnvatoIE from .aol import AolIE -from .allocine import AllocineIE -from .aliexpress import AliExpressLiveIE -from .alsace20tv import ( - Alsace20TVIE, - Alsace20TVEmbedIE, -) from .apa import APAIE from .aparat import AparatIE from .appleconnect import AppleConnectIE +from .applepodcasts import ApplePodcastsIE from .appletrailers import ( AppleTrailersIE, AppleTrailersSectionIE, ) -from .applepodcasts import ApplePodcastsIE from .archiveorg import ( ArchiveOrgIE, YoutubeWebArchiveIE, ) from .arcpublishing import ArcPublishingIE -from .arkena import ArkenaIE from .ard import ( + ARDIE, ARDBetaMediathekIE, ARDMediathekCollectionIE, - ARDIE, ) +from .arkena import ArkenaIE +from .arnes import ArnesIE from .art19 import ( Art19IE, Art19ShowIE, ) from .arte import ( - ArteTVIE, - ArteTVEmbedIE, - ArteTVPlaylistIE, ArteTVCategoryIE, + ArteTVEmbedIE, + ArteTVIE, + ArteTVPlaylistIE, +) +from .asobichannel import ( + AsobiChannelIE, + AsobiChannelTagURLIE, ) -from .arnes import ArnesIE -from .asobichannel import AsobiChannelIE, AsobiChannelTagURLIE from .asobistage import AsobiStageIE from .atresplayer import AtresPlayerIE from .atscaleconf import AtScaleConfEventIE @@ -160,57 +177,60 @@ from .audiodraft import ( AudiodraftCustomIE, AudiodraftGenericIE, ) -from .audiomack import AudiomackIE, AudiomackAlbumIE +from .audiomack import ( + AudiomackAlbumIE, + AudiomackIE, +) from .audius import ( AudiusIE, - AudiusTrackIE, AudiusPlaylistIE, AudiusProfileIE, + AudiusTrackIE, ) from .awaan import ( AWAANIE, - AWAANVideoIE, AWAANLiveIE, AWAANSeasonIE, + AWAANVideoIE, ) from .axs import AxsIE from .azmedien import AZMedienIE from .baidu import BaiduVideoIE from .banbye import ( - BanByeIE, BanByeChannelIE, + BanByeIE, ) from .bandaichannel import BandaiChannelIE from .bandcamp import ( - BandcampIE, BandcampAlbumIE, - BandcampWeeklyIE, + BandcampIE, BandcampUserIE, + BandcampWeeklyIE, ) from .bannedvideo import BannedVideoIE from .bbc import ( - BBCCoUkIE, + BBCIE, BBCCoUkArticleIE, + BBCCoUkIE, BBCCoUkIPlayerEpisodesIE, BBCCoUkIPlayerGroupIE, BBCCoUkPlaylistIE, - BBCIE, ) +from .beatbump import ( + BeatBumpPlaylistIE, + BeatBumpVideoIE, +) +from .beatport import BeatportIE from .beeg import BeegIE from .behindkink import BehindKinkIE from .bellmedia import BellMediaIE -from .beatbump import ( - BeatBumpVideoIE, - BeatBumpPlaylistIE, -) -from .beatport import BeatportIE from .berufetv import BerufeTVIE from .bet import BetIE from .bfi import BFIPlayerIE from .bfmtv import ( BFMTVIE, - BFMTVLiveIE, BFMTVArticleIE, + BFMTVLiveIE, ) from .bibeltv import ( BibelTVLiveIE, @@ -221,37 +241,37 @@ from .bigflix import BigflixIE from .bigo import BigoIE from .bild import BildIE from .bilibili import ( - BiliBiliIE, + BilibiliAudioAlbumIE, + BilibiliAudioIE, BiliBiliBangumiIE, - BiliBiliBangumiSeasonIE, BiliBiliBangumiMediaIE, + BiliBiliBangumiSeasonIE, + BilibiliCategoryIE, BilibiliCheeseIE, BilibiliCheeseSeasonIE, - BiliBiliSearchIE, - BilibiliCategoryIE, - BilibiliAudioIE, - BilibiliAudioAlbumIE, - BiliBiliPlayerIE, - BilibiliSpaceVideoIE, - BilibiliSpaceAudioIE, BilibiliCollectionListIE, - BilibiliSeriesListIE, BilibiliFavoritesListIE, - BilibiliWatchlaterIE, + BiliBiliIE, + BiliBiliPlayerIE, BilibiliPlaylistIE, + BiliBiliSearchIE, + BilibiliSeriesListIE, + BilibiliSpaceAudioIE, + BilibiliSpaceVideoIE, + BilibiliWatchlaterIE, BiliIntlIE, BiliIntlSeriesIE, BiliLiveIE, ) from .biobiochiletv import BioBioChileTVIE from .bitchute import ( - BitChuteIE, BitChuteChannelIE, + BitChuteIE, ) from .blackboardcollaborate import BlackboardCollaborateIE from .bleacherreport import ( - BleacherReportIE, BleacherReportCMSIE, + BleacherReportIE, ) from .blerp import BlerpIE from .blogger import BloggerIE @@ -264,27 +284,27 @@ from .box import BoxIE from .boxcast import BoxCastVideoIE from .bpb import BpbIE from .br import BRIE -from .bravotv import BravoTVIE from .brainpop import ( - BrainPOPIE, - BrainPOPJrIE, BrainPOPELLIE, BrainPOPEspIE, BrainPOPFrIE, + BrainPOPIE, BrainPOPIlIE, + BrainPOPJrIE, ) +from .bravotv import BravoTVIE from .breitbart import BreitBartIE from .brightcove import ( BrightcoveLegacyIE, BrightcoveNewIE, ) from .brilliantpala import ( - BrilliantpalaElearnIE, BrilliantpalaClassesIE, + BrilliantpalaElearnIE, ) -from .businessinsider import BusinessInsiderIE from .bundesliga import BundesligaIE from .bundestag import BundestagIE +from .businessinsider import BusinessInsiderIE from .buzzfeed import BuzzFeedIE from .byutv import BYUtvIE from .c56 import C56IE @@ -292,40 +312,40 @@ from .callin import CallinIE from .caltrans import CaltransIE from .cam4 import CAM4IE from .camdemy import ( + CamdemyFolderIE, CamdemyIE, - CamdemyFolderIE ) from .camfm import ( CamFMEpisodeIE, - CamFMShowIE + CamFMShowIE, ) from .cammodels import CamModelsIE from .camsoda import CamsodaIE from .camtasia import CamtasiaEmbedIE from .canal1 import Canal1IE from .canalalpha import CanalAlphaIE -from .canalplus import CanalplusIE from .canalc2 import Canalc2IE +from .canalplus import CanalplusIE from .caracoltv import CaracolTvPlayIE from .cartoonnetwork import CartoonNetworkIE from .cbc import ( CBCIE, + CBCGemIE, + CBCGemLiveIE, + CBCGemPlaylistIE, CBCPlayerIE, CBCPlayerPlaylistIE, - CBCGemIE, - CBCGemPlaylistIE, - CBCGemLiveIE, ) from .cbs import ( CBSIE, ParamountPressExpressIE, ) from .cbsnews import ( + CBSLocalArticleIE, + CBSLocalIE, + CBSLocalLiveIE, CBSNewsEmbedIE, CBSNewsIE, - CBSLocalIE, - CBSLocalArticleIE, - CBSLocalLiveIE, CBSNewsLiveIE, CBSNewsLiveVideoIE, ) @@ -354,12 +374,12 @@ from .chzzk import ( from .cinemax import CinemaxIE from .cinetecamilano import CinetecaMilanoIE from .cineverse import ( - CineverseIE, CineverseDetailsIE, + CineverseIE, ) from .ciscolive import ( - CiscoLiveSessionIE, CiscoLiveSearchIE, + CiscoLiveSessionIE, ) from .ciscowebex import CiscoWebexIE from .cjsw import CJSWIE @@ -372,16 +392,13 @@ from .cloudycdn import CloudyCDNIE from .clubic import ClubicIE from .clyp import ClypIE from .cmt import CMTIE -from .cnbc import ( - CNBCVideoIE, -) +from .cnbc import CNBCVideoIE from .cnn import ( CNNIE, - CNNBlogsIE, CNNArticleIE, + CNNBlogsIE, CNNIndonesiaIE, ) -from .coub import CoubIE from .comedycentral import ( ComedyCentralIE, ComedyCentralTVIE, @@ -399,44 +416,48 @@ from .commonprotocols import ( from .condenast import CondeNastIE from .contv import CONtvIE from .corus import CorusIE +from .coub import CoubIE +from .cozytv import CozyTVIE from .cpac import ( CPACIE, CPACPlaylistIE, ) -from .cozytv import CozyTVIE from .cracked import CrackedIE from .crackle import CrackleIE from .craftsy import CraftsyIE from .crooksandliars import CrooksAndLiarsIE from .crowdbunker import ( - CrowdBunkerIE, CrowdBunkerChannelIE, + CrowdBunkerIE, ) from .crtvg import CrtvgIE from .crunchyroll import ( + CrunchyrollArtistIE, CrunchyrollBetaIE, CrunchyrollBetaShowIE, CrunchyrollMusicIE, - CrunchyrollArtistIE, ) -from .cspan import CSpanIE, CSpanCongressIE +from .cspan import ( + CSpanCongressIE, + CSpanIE, +) from .ctsnews import CtsNewsIE from .ctv import CTVIE from .ctvnews import CTVNewsIE from .cultureunplugged import CultureUnpluggedIE from .curiositystream import ( - CuriosityStreamIE, CuriosityStreamCollectionsIE, + CuriosityStreamIE, CuriosityStreamSeriesIE, ) from .cwtv import CWTVIE from .cybrary import ( + CybraryCourseIE, CybraryIE, - CybraryCourseIE ) from .dacast import ( - DacastVODIE, DacastPlaylistIE, + DacastVODIE, ) from .dailymail import DailyMailIE from .dailymotion import ( @@ -458,8 +479,8 @@ from .dangalplay import ( DangalPlaySeasonIE, ) from .daum import ( - DaumIE, DaumClipIE, + DaumIE, DaumPlaylistIE, DaumUserIE, ) @@ -467,49 +488,69 @@ from .daystar import DaystarClipIE from .dbtv import DBTVIE from .dctp import DctpTvIE from .deezer import ( - DeezerPlaylistIE, DeezerAlbumIE, + DeezerPlaylistIE, ) from .democracynow import DemocracynowIE from .detik import DetikEmbedIE +from .deuxm import ( + DeuxMIE, + DeuxMNewsIE, +) +from .dfb import DFBIE +from .dhm import DHMIE +from .digitalconcerthall import DigitalConcertHallIE +from .digiteka import DigitekaIE +from .discogs import DiscogsReleasePlaylistIE +from .discovery import DiscoveryIE +from .disney import DisneyIE +from .dispeak import DigitallySpeakingIE from .dlf import ( DLFIE, DLFCorpusIE, ) -from .dfb import DFBIE -from .dhm import DHMIE +from .dlive import ( + DLiveStreamIE, + DLiveVODIE, +) from .douyutv import ( DouyuShowIE, DouyuTVIE, ) from .dplay import ( - DPlayIE, - DiscoveryPlusIE, - HGTVDeIE, - GoDiscoveryIE, - TravelChannelIE, - CookingChannelIE, - HGTVUsaIE, - FoodNetworkIE, - InvestigationDiscoveryIE, - DestinationAmericaIE, - AmHistoryChannelIE, - ScienceChannelIE, - DIYNetworkIE, - DiscoveryLifeIE, - AnimalPlanetIE, TLCIE, - MotorTrendIE, - MotorTrendOnDemandIE, - DiscoveryPlusIndiaIE, + AmHistoryChannelIE, + AnimalPlanetIE, + CookingChannelIE, + DestinationAmericaIE, + DiscoveryLifeIE, DiscoveryNetworksDeIE, + DiscoveryPlusIE, + DiscoveryPlusIndiaIE, + DiscoveryPlusIndiaShowIE, DiscoveryPlusItalyIE, DiscoveryPlusItalyShowIE, - DiscoveryPlusIndiaShowIE, + DIYNetworkIE, + DPlayIE, + FoodNetworkIE, GlobalCyclingNetworkPlusIE, + GoDiscoveryIE, + HGTVDeIE, + HGTVUsaIE, + InvestigationDiscoveryIE, + MotorTrendIE, + MotorTrendOnDemandIE, + ScienceChannelIE, + TravelChannelIE, ) -from .dreisat import DreiSatIE from .drbonanza import DRBonanzaIE +from .dreisat import DreiSatIE +from .drooble import DroobleIE +from .dropbox import DropboxIE +from .dropout import ( + DropoutIE, + DropoutSeasonIE, +) from .drtuber import DrTuberIE from .drtv import ( DRTVIE, @@ -518,32 +559,21 @@ from .drtv import ( DRTVSeriesIE, ) from .dtube import DTubeIE -from .dvtv import DVTVIE from .duboku import ( DubokuIE, - DubokuPlaylistIE + DubokuPlaylistIE, ) from .dumpert import DumpertIE -from .deuxm import ( - DeuxMIE, - DeuxMNewsIE -) -from .digitalconcerthall import DigitalConcertHallIE -from .discogs import DiscogsReleasePlaylistIE -from .discovery import DiscoveryIE -from .disney import DisneyIE -from .dispeak import DigitallySpeakingIE -from .dropbox import DropboxIE -from .dropout import ( - DropoutSeasonIE, - DropoutIE -) from .duoplay import DuoplayIE +from .dvtv import DVTVIE from .dw import ( DWIE, DWArticleIE, ) -from .eagleplatform import EaglePlatformIE, ClipYouEmbedIE +from .eagleplatform import ( + ClipYouEmbedIE, + EaglePlatformIE, +) from .ebaumsworld import EbaumsWorldIE from .ebay import EbayIE from .egghead import ( @@ -567,8 +597,8 @@ from .epoch import EpochIE from .eporner import EpornerIE from .erocast import ErocastIE from .eroprofile import ( - EroProfileIE, EroProfileAlbumIE, + EroProfileIE, ) from .err import ERRJupiterIE from .ertgr import ( @@ -578,31 +608,33 @@ from .ertgr import ( ) from .espn import ( ESPNIE, - WatchESPNIE, ESPNArticleIE, - FiveThirtyEightIE, ESPNCricInfoIE, + FiveThirtyEightIE, + WatchESPNIE, ) from .ettutv import EttuTvIE -from .europa import EuropaIE, EuroParlWebstreamIE +from .europa import ( + EuropaIE, + EuroParlWebstreamIE, +) from .europeantour import EuropeanTourIE from .eurosport import EurosportIE from .euscreen import EUScreenIE from .expressen import ExpressenIE from .eyedotv import EyedoTVIE from .facebook import ( + FacebookAdsIE, FacebookIE, FacebookPluginsVideoIE, FacebookRedirectURLIE, FacebookReelIE, - FacebookAdsIE, +) +from .fancode import ( + FancodeLiveIE, + FancodeVodIE, ) from .fathom import FathomIE -from .fancode import ( - FancodeVodIE, - FancodeLiveIE -) - from .faz import FazIE from .fc2 import ( FC2IE, @@ -612,8 +644,8 @@ from .fc2 import ( from .fczenit import FczenitIE from .fifa import FifaIE from .filmon import ( - FilmOnIE, FilmOnChannelIE, + FilmOnIE, ) from .filmweb import FilmwebIE from .firsttv import FirstTVIE @@ -621,17 +653,17 @@ from .fivetv import FiveTVIE from .flextv import FlexTVIE from .flickr import FlickrIE from .floatplane import ( - FloatplaneIE, FloatplaneChannelIE, + FloatplaneIE, ) from .folketinget import FolketingetIE from .footyroom import FootyRoomIE from .formula1 import Formula1IE from .fourtube import ( FourTubeIE, - PornTubeIE, - PornerBrosIE, FuxIE, + PornerBrosIE, + PornTubeIE, ) from .fox import FOXIE from .fox9 import ( @@ -639,8 +671,8 @@ from .fox9 import ( FOX9NewsIE, ) from .foxnews import ( - FoxNewsIE, FoxNewsArticleIE, + FoxNewsIE, FoxNewsVideoIE, ) from .foxsports import FoxSportsIE @@ -648,20 +680,20 @@ from .fptplay import FptplayIE from .franceinter import FranceInterIE from .francetv import ( FranceTVIE, - FranceTVSiteIE, FranceTVInfoIE, + FranceTVSiteIE, ) from .freesound import FreesoundIE from .freespeech import FreespeechIE -from .frontendmasters import ( - FrontendMastersIE, - FrontendMastersLessonIE, - FrontendMastersCourseIE -) from .freetv import ( FreeTvIE, FreeTvMoviesIE, ) +from .frontendmasters import ( + FrontendMastersCourseIE, + FrontendMastersIE, + FrontendMastersLessonIE, +) from .fujitv import FujiTVFODPlus7IE from .funimation import ( FunimationIE, @@ -672,17 +704,17 @@ from .funk import FunkIE from .funker530 import Funker530IE from .fuyintv import FuyinTVIE from .gab import ( - GabTVIE, GabIE, + GabTVIE, ) from .gaia import GaiaIE from .gamejolt import ( - GameJoltIE, - GameJoltUserIE, + GameJoltCommunityIE, GameJoltGameIE, GameJoltGameSoundtrackIE, - GameJoltCommunityIE, + GameJoltIE, GameJoltSearchIE, + GameJoltUserIE, ) from .gamespot import GameSpotIE from .gamestar import GameStarIE @@ -691,13 +723,17 @@ from .gazeta import GazetaIE from .gdcvault import GDCVaultIE from .gedidigital import GediDigitalIE from .generic import GenericIE +from .genericembeds import ( + HTML5MediaEmbedIE, + QuotedHTMLIE, +) from .genius import ( GeniusIE, GeniusLyricsIE, ) from .getcourseru import ( + GetCourseRuIE, GetCourseRuPlayerIE, - GetCourseRuIE ) from .gettr import ( GettrIE, @@ -706,41 +742,45 @@ from .gettr import ( from .giantbomb import GiantBombIE from .glide import GlideIE from .globalplayer import ( + GlobalPlayerAudioEpisodeIE, + GlobalPlayerAudioIE, GlobalPlayerLiveIE, GlobalPlayerLivePlaylistIE, - GlobalPlayerAudioIE, - GlobalPlayerAudioEpisodeIE, - GlobalPlayerVideoIE + GlobalPlayerVideoIE, ) from .globo import ( - GloboIE, GloboArticleIE, + GloboIE, +) +from .glomex import ( + GlomexEmbedIE, + GlomexIE, ) from .gmanetwork import GMANetworkVideoIE from .go import GoIE -from .godtube import GodTubeIE from .godresource import GodResourceIE +from .godtube import GodTubeIE from .gofile import GofileIE from .golem import GolemIE from .goodgame import GoodGameIE from .googledrive import ( - GoogleDriveIE, GoogleDriveFolderIE, + GoogleDriveIE, ) from .googlepodcasts import ( - GooglePodcastsIE, GooglePodcastsFeedIE, + GooglePodcastsIE, ) from .googlesearch import GoogleSearchIE -from .gopro import GoProIE from .goplay import GoPlayIE +from .gopro import GoProIE from .goshgay import GoshgayIE from .gotostage import GoToStageIE from .gputechconf import GPUTechConfIE from .gronkh import ( - GronkhIE, GronkhFeedIE, - GronkhVodsIE + GronkhIE, + GronkhVodsIE, ) from .groupon import GrouponIE from .harpodeon import HarpodeonIE @@ -749,10 +789,10 @@ from .hearthisat import HearThisAtIE from .heise import HeiseIE from .hellporno import HellPornoIE from .hgtv import HGTVComShowIE -from .hketv import HKETVIE from .hidive import HiDiveIE from .historicfilms import HistoricFilmsIE from .hitrecord import HitRecordIE +from .hketv import HKETVIE from .hollywoodreporter import ( HollywoodReporterIE, HollywoodReporterPlaylistIE, @@ -761,8 +801,8 @@ from .holodex import HolodexIE from .hotnewhiphop import HotNewHipHopIE from .hotstar import ( HotStarIE, - HotStarPrefixIE, HotStarPlaylistIE, + HotStarPrefixIE, HotStarSeasonIE, HotStarSeriesIE, ) @@ -773,34 +813,30 @@ from .hrti import ( HRTiPlaylistIE, ) from .hse import ( - HSEShowIE, HSEProductIE, -) -from .genericembeds import ( - HTML5MediaEmbedIE, - QuotedHTMLIE, + HSEShowIE, ) from .huajiao import HuajiaoIE -from .huya import HuyaLiveIE from .huffpost import HuffPostIE from .hungama import ( + HungamaAlbumPlaylistIE, HungamaIE, HungamaSongIE, - HungamaAlbumPlaylistIE, ) +from .huya import HuyaLiveIE from .hypem import HypemIE from .hypergryph import MonsterSirenHypergryphMusicIE from .hytale import HytaleIE from .icareus import IcareusIE from .ichinanalive import ( - IchinanaLiveIE, IchinanaLiveClipIE, + IchinanaLiveIE, ) from .idolplus import IdolPlusIE from .ign import ( IGNIE, - IGNVideoIE, IGNArticleIE, + IGNVideoIE, ) from .iheart import ( IHeartRadioIE, @@ -810,12 +846,12 @@ from .ilpost import IlPostIE from .iltalehti import IltalehtiIE from .imdb import ( ImdbIE, - ImdbListIE + ImdbListIE, ) from .imgur import ( - ImgurIE, ImgurAlbumIE, ImgurGalleryIE, + ImgurIE, ) from .ina import InaIE from .inc import IncIE @@ -824,20 +860,20 @@ from .infoq import InfoQIE from .instagram import ( InstagramIE, InstagramIOSIE, - InstagramUserIE, - InstagramTagIE, InstagramStoryIE, + InstagramTagIE, + InstagramUserIE, ) from .internazionale import InternazionaleIE from .internetvideoarchive import InternetVideoArchiveIE from .iprima import ( + IPrimaCNNIE, IPrimaIE, - IPrimaCNNIE ) from .iqiyi import ( - IqiyiIE, + IqAlbumIE, IqIE, - IqAlbumIE + IqiyiIE, ) from .islamchannel import ( IslamChannelIE, @@ -845,16 +881,16 @@ from .islamchannel import ( ) from .israelnationalnews import IsraelNationalNewsIE from .itprotv import ( + ITProTVCourseIE, ITProTVIE, - ITProTVCourseIE ) from .itv import ( - ITVIE, ITVBTCCIE, + ITVIE, ) from .ivi import ( + IviCompilationIE, IviIE, - IviCompilationIE ) from .ivideon import IvideonIE from .iwara import ( @@ -865,15 +901,15 @@ from .iwara import ( from .ixigua import IxiguaIE from .izlesene import IzleseneIE from .jamendo import ( - JamendoIE, JamendoAlbumIE, + JamendoIE, ) from .japandiet import ( + SangiinIE, + SangiinInstructionIE, ShugiinItvLiveIE, ShugiinItvLiveRoomIE, ShugiinItvVodIE, - SangiinInstructionIE, - SangiinIE, ) from .jeuxvideo import JeuxVideoIE from .jiocinema import ( @@ -881,13 +917,13 @@ from .jiocinema import ( JioCinemaSeriesIE, ) from .jiosaavn import ( - JioSaavnSongIE, JioSaavnAlbumIE, JioSaavnPlaylistIE, + JioSaavnSongIE, ) -from .jove import JoveIE from .joj import JojIE from .joqrag import JoqrAgIE +from .jove import JoveIE from .jstream import JStreamIE from .jtbc import ( JTBCIE, @@ -914,17 +950,17 @@ from .kinopoisk import KinoPoiskIE from .kommunetv import KommunetvIE from .kompas import KompasVideoIE from .koo import KooIE -from .kth import KTHIE from .krasview import KrasViewIE +from .kth import KTHIE from .ku6 import Ku6IE from .kukululive import KukuluLiveIE from .kuwo import ( - KuwoIE, KuwoAlbumIE, - KuwoChartIE, - KuwoSingerIE, KuwoCategoryIE, + KuwoChartIE, + KuwoIE, KuwoMvIE, + KuwoSingerIE, ) from .la7 import ( LA7IE, @@ -944,14 +980,14 @@ from .lbry import ( ) from .lci import LCIIE from .lcp import ( - LcpPlayIE, LcpIE, + LcpPlayIE, ) from .lecture2go import Lecture2GoIE from .lecturio import ( - LecturioIE, LecturioCourseIE, LecturioDeCourseIE, + LecturioIE, ) from .leeco import ( LeIE, @@ -968,22 +1004,22 @@ from .lenta import LentaIE from .libraryofcongress import LibraryOfCongressIE from .libsyn import LibsynIE from .lifenews import ( - LifeNewsIE, LifeEmbedIE, + LifeNewsIE, ) from .likee import ( LikeeIE, - LikeeUserIE + LikeeUserIE, ) from .limelight import ( - LimelightMediaIE, LimelightChannelIE, LimelightChannelListIE, + LimelightMediaIE, ) from .linkedin import ( LinkedInIE, - LinkedInLearningIE, LinkedInLearningCourseIE, + LinkedInLearningIE, ) from .liputan6 import Liputan6IE from .listennotes import ListenNotesIE @@ -1000,25 +1036,23 @@ from .lnkgo import ( LnkIE, ) from .loom import ( - LoomIE, LoomFolderIE, + LoomIE, ) from .lovehomeporn import LoveHomePornIE from .lrt import ( LRTVODIE, - LRTStreamIE + LRTStreamIE, ) from .lsm import ( LSMLREmbedIE, LSMLTVEmbedIE, - LSMReplayIE -) -from .lumni import ( - LumniIE + LSMReplayIE, ) +from .lumni import LumniIE from .lynda import ( + LyndaCourseIE, LyndaIE, - LyndaCourseIE ) from .maariv import MaarivIE from .magellantv import MagellanTVIE @@ -1030,13 +1064,13 @@ from .mailru import ( ) from .mainstreaming import MainStreamingIE from .mangomolo import ( - MangomoloVideoIE, MangomoloLiveIE, + MangomoloVideoIE, ) from .manoto import ( ManotoTVIE, - ManotoTVShowIE, ManotoTVLiveIE, + ManotoTVShowIE, ) from .manyvids import ManyVidsIE from .maoritv import MaoriTVIE @@ -1052,13 +1086,14 @@ from .mdr import MDRIE from .medaltv import MedalTVIE from .mediaite import MediaiteIE from .mediaklikk import MediaKlikkIE +from .medialaan import MedialaanIE from .mediaset import ( MediasetIE, MediasetShowIE, ) from .mediasite import ( - MediasiteIE, MediasiteCatalogIE, + MediasiteIE, MediasiteNamedCatalogIE, ) from .mediastream import ( @@ -1068,26 +1103,30 @@ from .mediastream import ( from .mediaworksnz import MediaWorksNZVODIE from .medici import MediciIE from .megaphone import MegaphoneIE +from .megatvcom import ( + MegaTVComEmbedIE, + MegaTVComIE, +) from .meipai import MeipaiIE from .melonvod import MelonVODIE from .metacritic import MetacriticIE from .mgtv import MGTVIE +from .microsoftembed import MicrosoftEmbedIE from .microsoftstream import MicrosoftStreamIE from .microsoftvirtualacademy import ( - MicrosoftVirtualAcademyIE, MicrosoftVirtualAcademyCourseIE, + MicrosoftVirtualAcademyIE, ) -from .microsoftembed import MicrosoftEmbedIE from .mildom import ( - MildomIE, - MildomVodIE, MildomClipIE, + MildomIE, MildomUserVodIE, + MildomVodIE, ) from .minds import ( - MindsIE, MindsChannelIE, MindsGroupIE, + MindsIE, ) from .minoto import MinotoIE from .mirrativ import ( @@ -1095,31 +1134,34 @@ from .mirrativ import ( MirrativUserIE, ) from .mirrorcouk import MirrorCoUKIE -from .mit import TechTVMITIE, OCWMITIE +from .mit import ( + OCWMITIE, + TechTVMITIE, +) from .mitele import MiTeleIE from .mixch import ( - MixchIE, MixchArchiveIE, + MixchIE, ) from .mixcloud import ( MixcloudIE, - MixcloudUserIE, MixcloudPlaylistIE, + MixcloudUserIE, ) from .mlb import ( MLBIE, - MLBVideoIE, MLBTVIE, MLBArticleIE, + MLBVideoIE, ) from .mlssoccer import MLSSoccerIE from .mocha import MochaVideoIE from .mojvideo import MojvideoIE from .monstercat import MonstercatIE from .motherless import ( - MotherlessIE, - MotherlessGroupIE, MotherlessGalleryIE, + MotherlessGroupIE, + MotherlessIE, MotherlessUploaderIE, ) from .motorsport import MotorsportIE @@ -1129,23 +1171,26 @@ from .moviezine import MoviezineIE from .movingimage import MovingImageIE from .msn import MSNIE from .mtv import ( - MTVIE, - MTVVideoIE, - MTVServicesEmbeddedIE, MTVDEIE, - MTVJapanIE, + MTVIE, MTVItaliaIE, MTVItaliaProgrammaIE, + MTVJapanIE, + MTVServicesEmbeddedIE, + MTVVideoIE, ) from .muenchentv import MuenchenTVIE -from .murrtube import MurrtubeIE, MurrtubeUserIE +from .murrtube import ( + MurrtubeIE, + MurrtubeUserIE, +) from .museai import MuseAIIE from .musescore import MuseScoreIE from .musicdex import ( - MusicdexSongIE, MusicdexAlbumIE, MusicdexArtistIE, MusicdexPlaylistIE, + MusicdexSongIE, ) from .mx3 import ( Mx3IE, @@ -1156,7 +1201,10 @@ from .mxplayer import ( MxplayerIE, MxplayerShowIE, ) -from .myspace import MySpaceIE, MySpaceAlbumIE +from .myspace import ( + MySpaceAlbumIE, + MySpaceIE, +) from .myspass import MySpassIE from .myvideoge import MyVideoGeIE from .myvidster import MyVidsterIE @@ -1170,8 +1218,8 @@ from .nate import ( NateProgramIE, ) from .nationalgeographic import ( - NationalGeographicVideoIE, NationalGeographicTVIE, + NationalGeographicVideoIE, ) from .naver import ( NaverIE, @@ -1179,12 +1227,12 @@ from .naver import ( NaverNowIE, ) from .nba import ( - NBAWatchEmbedIE, - NBAWatchIE, - NBAWatchCollectionIE, - NBAEmbedIE, NBAIE, NBAChannelIE, + NBAEmbedIE, + NBAWatchCollectionIE, + NBAWatchEmbedIE, + NBAWatchIE, ) from .nbc import ( NBCIE, @@ -1198,35 +1246,35 @@ from .nbc import ( ) from .ndr import ( NDRIE, - NJoyIE, NDREmbedBaseIE, NDREmbedIE, NJoyEmbedIE, + NJoyIE, ) from .ndtv import NDTVIE from .nebula import ( - NebulaIE, - NebulaClassIE, - NebulaSubscriptionsIE, NebulaChannelIE, + NebulaClassIE, + NebulaIE, + NebulaSubscriptionsIE, ) from .nekohacker import NekoHackerIE from .nerdcubed import NerdCubedFeedIE -from .netzkino import NetzkinoIE from .neteasemusic import ( - NetEaseMusicIE, NetEaseMusicAlbumIE, - NetEaseMusicSingerIE, + NetEaseMusicDjRadioIE, + NetEaseMusicIE, NetEaseMusicListIE, NetEaseMusicMvIE, NetEaseMusicProgramIE, - NetEaseMusicDjRadioIE, + NetEaseMusicSingerIE, ) from .netverse import ( NetverseIE, NetversePlaylistIE, NetverseSearchIE, ) +from .netzkino import NetzkinoIE from .newgrounds import ( NewgroundsIE, NewgroundsPlaylistIE, @@ -1235,14 +1283,14 @@ from .newgrounds import ( from .newspicks import NewsPicksIE from .newsy import NewsyIE from .nextmedia import ( - NextMediaIE, - NextMediaActionNewsIE, AppleDailyIE, + NextMediaActionNewsIE, + NextMediaIE, NextTVIE, ) from .nexx import ( - NexxIE, NexxEmbedIE, + NexxIE, ) from .nfb import ( NFBIE, @@ -1256,43 +1304,43 @@ from .nfl import ( NFLPlusReplayIE, ) from .nhk import ( - NhkVodIE, - NhkVodProgramIE, NhkForSchoolBangumiIE, - NhkForSchoolSubjectIE, NhkForSchoolProgramListIE, + NhkForSchoolSubjectIE, NhkRadioNewsPageIE, NhkRadiruIE, NhkRadiruLiveIE, + NhkVodIE, + NhkVodProgramIE, ) from .nhl import NHLIE from .nick import ( - NickIE, NickBrIE, NickDeIE, + NickIE, NickRuIE, ) from .niconico import ( - NiconicoIE, - NiconicoPlaylistIE, - NiconicoUserIE, - NiconicoSeriesIE, NiconicoHistoryIE, + NiconicoIE, + NiconicoLiveIE, + NiconicoPlaylistIE, + NiconicoSeriesIE, + NiconicoUserIE, NicovideoSearchDateIE, NicovideoSearchIE, NicovideoSearchURLIE, NicovideoTagURLIE, - NiconicoLiveIE, +) +from .niconicochannelplus import ( + NiconicoChannelPlusChannelLivesIE, + NiconicoChannelPlusChannelVideosIE, + NiconicoChannelPlusIE, ) from .ninaprotocol import NinaProtocolIE from .ninecninemedia import ( - NineCNineMediaIE, CPTwentyFourIE, -) -from .niconicochannelplus import ( - NiconicoChannelPlusIE, - NiconicoChannelPlusChannelVideosIE, - NiconicoChannelPlusChannelLivesIE, + NineCNineMediaIE, ) from .ninegag import NineGagIE from .ninenews import NineNewsIE @@ -1317,24 +1365,24 @@ from .nowness import ( ) from .noz import NozIE from .npo import ( - AndereTijdenIE, NPOIE, - NPOLiveIE, - NPORadioIE, - NPORadioFragmentIE, - SchoolTVIE, - HetKlokhuisIE, VPROIE, WNLIE, + AndereTijdenIE, + HetKlokhuisIE, + NPOLiveIE, + NPORadioFragmentIE, + NPORadioIE, + SchoolTVIE, ) from .npr import NprIE from .nrk import ( NRKIE, - NRKPlaylistIE, - NRKSkoleIE, NRKTVIE, - NRKTVDirekteIE, + NRKPlaylistIE, NRKRadioPodkastIE, + NRKSkoleIE, + NRKTVDirekteIE, NRKTVEpisodeIE, NRKTVEpisodesIE, NRKTVSeasonIE, @@ -1346,18 +1394,18 @@ from .ntvcojp import NTVCoJpCUIE from .ntvde import NTVDeIE from .ntvru import NTVRuIE from .nubilesporn import NubilesPornIE +from .nuum import ( + NuumLiveIE, + NuumMediaIE, + NuumTabIE, +) +from .nuvid import NuvidIE from .nytimes import ( - NYTimesIE, NYTimesArticleIE, NYTimesCookingIE, NYTimesCookingRecipeIE, + NYTimesIE, ) -from .nuum import ( - NuumLiveIE, - NuumTabIE, - NuumMediaIE, -) -from .nuvid import NuvidIE from .nzherald import NZHeraldIE from .nzonscreen import NZOnScreenIE from .nzz import NZZIE @@ -1365,7 +1413,7 @@ from .odkmedia import OnDemandChinaEpisodeIE from .odnoklassniki import OdnoklassnikiIE from .oftv import ( OfTVIE, - OfTVPlaylistIE + OfTVPlaylistIE, ) from .oktoberfesttv import OktoberfestTVIE from .olympics import OlympicsReplayIE @@ -1378,8 +1426,8 @@ from .onefootball import OneFootballIE from .onenewsnz import OneNewsNZIE from .oneplace import OnePlacePodcastIE from .onet import ( - OnetIE, OnetChannelIE, + OnetIE, OnetMVPIE, OnetPlIE, ) @@ -1389,33 +1437,33 @@ from .opencast import ( OpencastPlaylistIE, ) from .openrec import ( - OpenRecIE, OpenRecCaptureIE, + OpenRecIE, OpenRecMovieIE, ) from .ora import OraTVIE from .orf import ( - ORFFM4StoryIE, - ORFONIE, - ORFRadioIE, - ORFPodcastIE, ORFIPTVIE, + ORFONIE, + ORFFM4StoryIE, + ORFPodcastIE, + ORFRadioIE, ) from .outsidetv import OutsideTVIE from .owncloud import OwnCloudIE from .packtpub import ( - PacktPubIE, PacktPubCourseIE, + PacktPubIE, ) from .palcomp3 import ( - PalcoMP3IE, PalcoMP3ArtistIE, + PalcoMP3IE, PalcoMP3VideoIE, ) from .panopto import ( PanoptoIE, PanoptoListIE, - PanoptoPlaylistIE + PanoptoPlaylistIE, ) from .paramountplus import ( ParamountPlusIE, @@ -1424,12 +1472,18 @@ from .paramountplus import ( from .parler import ParlerIE from .parlview import ParlviewIE from .patreon import ( + PatreonCampaignIE, PatreonIE, - PatreonCampaignIE ) -from .pbs import PBSIE, PBSKidsIE +from .pbs import ( + PBSIE, + PBSKidsIE, +) from .pearvideo import PearVideoIE -from .peekvids import PeekVidsIE, PlayVidsIE +from .peekvids import ( + PeekVidsIE, + PlayVidsIE, +) from .peertube import ( PeerTubeIE, PeerTubePlaylistIE, @@ -1437,7 +1491,7 @@ from .peertube import ( from .peertv import PeerTVIE from .peloton import ( PelotonIE, - PelotonLiveIE + PelotonLiveIE, ) from .performgroup import PerformGroupIE from .periscope import ( @@ -1457,8 +1511,8 @@ from .picarto import ( from .piksel import PikselIE from .pinkbike import PinkbikeIE from .pinterest import ( - PinterestIE, PinterestCollectionIE, + PinterestIE, ) from .pixivsketch import ( PixivSketchIE, @@ -1467,19 +1521,22 @@ from .pixivsketch import ( from .pladform import PladformIE from .planetmarathi import PlanetMarathiIE from .platzi import ( - PlatziIE, PlatziCourseIE, + PlatziIE, ) from .playplustv import PlayPlusTVIE from .playsuisse import PlaySuisseIE from .playtvak import PlaytvakIE from .playwire import PlaywireIE -from .plutotv import PlutoTVIE from .pluralsight import ( - PluralsightIE, PluralsightCourseIE, + PluralsightIE, +) +from .plutotv import PlutoTVIE +from .podbayfm import ( + PodbayFMChannelIE, + PodbayFMIE, ) -from .podbayfm import PodbayFMIE, PodbayFMChannelIE from .podchaser import PodchaserIE from .podomatic import PodomaticIE from .pokemon import ( @@ -1487,15 +1544,15 @@ from .pokemon import ( PokemonWatchIE, ) from .pokergo import ( - PokerGoIE, PokerGoCollectionIE, + PokerGoIE, ) from .polsatgo import PolsatGoIE from .polskieradio import ( - PolskieRadioIE, - PolskieRadioLegacyIE, PolskieRadioAuditionIE, PolskieRadioCategoryIE, + PolskieRadioIE, + PolskieRadioLegacyIE, PolskieRadioPlayerIE, PolskieRadioPodcastIE, PolskieRadioPodcastListIE, @@ -1506,57 +1563,62 @@ from .pornbox import PornboxIE from .pornflip import PornFlipIE from .pornhub import ( PornHubIE, - PornHubUserIE, - PornHubPlaylistIE, PornHubPagedVideoListIE, + PornHubPlaylistIE, + PornHubUserIE, PornHubUserVideosUploadIE, ) from .pornotube import PornotubeIE from .pornovoisines import PornoVoisinesIE from .pornoxo import PornoXOIE -from .puhutv import ( - PuhuTVIE, - PuhuTVSerieIE, -) from .pr0gramm import Pr0grammIE -from .prankcast import PrankCastIE, PrankCastPostIE +from .prankcast import ( + PrankCastIE, + PrankCastPostIE, +) from .premiershiprugby import PremiershipRugbyIE from .presstv import PressTVIE from .projectveritas import ProjectVeritasIE from .prosiebensat1 import ProSiebenSat1IE from .prx import ( - PRXStoryIE, - PRXSeriesIE, PRXAccountIE, + PRXSeriesIE, + PRXSeriesSearchIE, PRXStoriesSearchIE, - PRXSeriesSearchIE + PRXStoryIE, +) +from .puhutv import ( + PuhuTVIE, + PuhuTVSerieIE, ) from .puls4 import Puls4IE from .pyvideo import PyvideoIE from .qdance import QDanceIE from .qingting import QingTingIE from .qqmusic import ( - QQMusicIE, - QQMusicSingerIE, QQMusicAlbumIE, - QQMusicToplistIE, + QQMusicIE, QQMusicPlaylistIE, + QQMusicSingerIE, + QQMusicToplistIE, ) from .r7 import ( R7IE, R7ArticleIE, ) -from .radiko import RadikoIE, RadikoRadioIE +from .radiko import ( + RadikoIE, + RadikoRadioIE, +) from .radiocanada import ( - RadioCanadaIE, RadioCanadaAudioVideoIE, + RadioCanadaIE, ) from .radiocomercial import ( RadioComercialIE, RadioComercialPlaylistIE, ) from .radiode import RadioDeIE -from .radiojavan import RadioJavanIE from .radiofrance import ( FranceCultureIE, RadioFranceIE, @@ -1565,35 +1627,36 @@ from .radiofrance import ( RadioFranceProfileIE, RadioFranceProgramScheduleIE, ) -from .radiozet import RadioZetPodcastIE +from .radiojavan import RadioJavanIE from .radiokapital import ( RadioKapitalIE, RadioKapitalShowIE, ) +from .radiozet import RadioZetPodcastIE from .radlive import ( - RadLiveIE, RadLiveChannelIE, + RadLiveIE, RadLiveSeasonIE, ) from .rai import ( - RaiIE, RaiCulturaIE, + RaiIE, + RaiNewsIE, RaiPlayIE, RaiPlayLiveIE, RaiPlayPlaylistIE, RaiPlaySoundIE, RaiPlaySoundLiveIE, RaiPlaySoundPlaylistIE, - RaiNewsIE, RaiSudtirolIE, ) from .raywenderlich import ( - RayWenderlichIE, RayWenderlichCourseIE, + RayWenderlichIE, ) from .rbgtum import ( - RbgTumIE, RbgTumCourseIE, + RbgTumIE, RbgTumNewCourseIE, ) from .rcs import ( @@ -1607,12 +1670,15 @@ from .rcti import ( RCTIPlusTVIE, ) from .rds import RDSIE -from .redbee import ParliamentLiveUKIE, RTBFIE +from .redbee import ( + RTBFIE, + ParliamentLiveUKIE, +) from .redbulltv import ( - RedBullTVIE, RedBullEmbedIE, - RedBullTVRrnContentIE, RedBullIE, + RedBullTVIE, + RedBullTVRrnContentIE, ) from .reddit import RedditIE from .redge import RedCDNLivxIE @@ -1632,107 +1698,100 @@ from .reverbnation import ReverbNationIE from .rheinmaintv import RheinMainTVIE from .ridehome import RideHomeIE from .rinsefm import ( - RinseFMIE, RinseFMArtistPlaylistIE, + RinseFMIE, ) from .rmcdecouverte import RMCDecouverteIE from .rockstargames import RockstarGamesIE from .rokfin import ( - RokfinIE, - RokfinStackIE, RokfinChannelIE, + RokfinIE, RokfinSearchIE, + RokfinStackIE, +) +from .roosterteeth import ( + RoosterTeethIE, + RoosterTeethSeriesIE, ) -from .roosterteeth import RoosterTeethIE, RoosterTeethSeriesIE from .rottentomatoes import RottenTomatoesIE from .rozhlas import ( + MujRozhlasIE, RozhlasIE, RozhlasVltavaIE, - MujRozhlasIE, ) -from .rte import RteIE, RteRadioIE +from .rte import ( + RteIE, + RteRadioIE, +) +from .rtl2 import RTL2IE from .rtlnl import ( - RtlNlIE, - RTLLuTeleVODIE, RTLLuArticleIE, RTLLuLiveIE, RTLLuRadioIE, + RTLLuTeleVODIE, + RtlNlIE, ) -from .rtl2 import RTL2IE from .rtnews import ( - RTNewsIE, RTDocumentryIE, RTDocumentryPlaylistIE, + RTNewsIE, RuptlyIE, ) from .rtp import RTPIE from .rtrfm import RTRFMIE from .rts import RTSIE from .rtvcplay import ( - RTVCPlayIE, - RTVCPlayEmbedIE, RTVCKalturaIE, + RTVCPlayEmbedIE, + RTVCPlayIE, ) from .rtve import ( RTVEALaCartaIE, RTVEAudioIE, - RTVELiveIE, RTVEInfantilIE, + RTVELiveIE, RTVETelevisionIE, ) from .rtvs import RTVSIE from .rtvslo import RTVSLOIE +from .rudovideo import RudoVideoIE from .rule34video import Rule34VideoIE from .rumble import ( + RumbleChannelIE, RumbleEmbedIE, RumbleIE, - RumbleChannelIE, ) -from .rudovideo import RudoVideoIE from .rutube import ( - RutubeIE, RutubeChannelIE, RutubeEmbedIE, + RutubeIE, RutubeMovieIE, RutubePersonIE, RutubePlaylistIE, RutubeTagsIE, ) -from .glomex import ( - GlomexIE, - GlomexEmbedIE, -) -from .megatvcom import ( - MegaTVComIE, - MegaTVComEmbedIE, -) -from .antenna import ( - AntennaGrWatchIE, - Ant1NewsGrArticleIE, - Ant1NewsGrEmbedIE, -) from .rutv import RUTVIE from .ruutu import RuutuIE from .ruv import ( RuvIE, - RuvSpilaIE + RuvSpilaIE, ) from .s4c import ( S4CIE, - S4CSeriesIE + S4CSeriesIE, ) from .safari import ( - SafariIE, SafariApiIE, SafariCourseIE, + SafariIE, ) from .saitosan import SaitosanIE from .samplefocus import SampleFocusIE from .sapo import SapoIE from .sbs import SBSIE from .sbscokr import ( - SBSCoKrIE, SBSCoKrAllvodProgramIE, + SBSCoKrIE, SBSCoKrProgramsVodIE, ) from .screen9 import Screen9IE @@ -1740,24 +1799,27 @@ from .screencast import ScreencastIE from .screencastify import ScreencastifyIE from .screencastomatic import ScreencastOMaticIE from .scrippsnetworks import ( - ScrippsNetworksWatchIE, ScrippsNetworksIE, + ScrippsNetworksWatchIE, ) +from .scrolller import ScrolllerIE from .scte import ( SCTEIE, SCTECourseIE, ) -from .scrolller import ScrolllerIE from .sejmpl import SejmIE from .senalcolombia import SenalColombiaLiveIE -from .senategov import SenateISVPIE, SenateGovIE +from .senategov import ( + SenateGovIE, + SenateISVPIE, +) from .sendtonews import SendtoNewsIE from .servus import ServusIE from .sevenplus import SevenPlusIE from .sexu import SexuIE from .seznamzpravy import ( - SeznamZpravyIE, SeznamZpravyArticleIE, + SeznamZpravyIE, ) from .shahid import ( ShahidIE, @@ -1765,38 +1827,38 @@ from .shahid import ( ) from .sharepoint import SharePointIE from .sharevideos import ShareVideosEmbedIE -from .sibnet import SibnetEmbedIE from .shemaroome import ShemarooMeIE from .showroomlive import ShowRoomLiveIE +from .sibnet import SibnetEmbedIE from .simplecast import ( - SimplecastIE, SimplecastEpisodeIE, + SimplecastIE, SimplecastPodcastIE, ) from .sina import SinaIE from .sixplay import SixPlayIE from .skeb import SkebIE -from .skyit import ( - SkyItPlayerIE, - SkyItVideoIE, - SkyItVideoLiveIE, - SkyItIE, - SkyItArteIE, - CieloTVItIE, - TV8ItIE, -) -from .skylinewebcams import SkylineWebcamsIE -from .skynewsarabia import ( - SkyNewsArabiaIE, - SkyNewsArabiaArticleIE, -) -from .skynewsau import SkyNewsAUIE from .sky import ( SkyNewsIE, SkyNewsStoryIE, SkySportsIE, SkySportsNewsIE, ) +from .skyit import ( + CieloTVItIE, + SkyItArteIE, + SkyItIE, + SkyItPlayerIE, + SkyItVideoIE, + SkyItVideoLiveIE, + TV8ItIE, +) +from .skylinewebcams import SkylineWebcamsIE +from .skynewsarabia import ( + SkyNewsArabiaArticleIE, + SkyNewsArabiaIE, +) +from .skynewsau import SkyNewsAUIE from .slideshare import SlideshareIE from .slideslive import SlidesLiveIE from .slutload import SlutloadIE @@ -1813,29 +1875,29 @@ from .sonyliv import ( from .soundcloud import ( SoundcloudEmbedIE, SoundcloudIE, - SoundcloudSetIE, + SoundcloudPlaylistIE, SoundcloudRelatedIE, + SoundcloudSearchIE, + SoundcloudSetIE, + SoundcloudTrackStationIE, SoundcloudUserIE, SoundcloudUserPermalinkIE, - SoundcloudTrackStationIE, - SoundcloudPlaylistIE, - SoundcloudSearchIE, ) from .soundgasm import ( SoundgasmIE, - SoundgasmProfileIE + SoundgasmProfileIE, ) from .southpark import ( - SouthParkIE, SouthParkDeIE, SouthParkDkIE, SouthParkEsIE, + SouthParkIE, SouthParkLatIE, - SouthParkNlIE + SouthParkNlIE, ) from .sovietscloset import ( SovietsClosetIE, - SovietsClosetPlaylistIE + SovietsClosetPlaylistIE, ) from .spankbang import ( SpankBangIE, @@ -1846,12 +1908,6 @@ from .spike import ( BellatorIE, ParamountNetworkIE, ) -from .stageplus import StagePlusVODConcertIE -from .startrek import StarTrekIE -from .stitcher import ( - StitcherIE, - StitcherShowIE, -) from .sport5 import Sport5IE from .sportbox import SportBoxIE from .sportdeutschland import SportDeutschlandIE @@ -1875,19 +1931,25 @@ from .srmediathek import SRMediathekIE from .stacommu import ( StacommuLiveIE, StacommuVODIE, - TheaterComplexTownVODIE, TheaterComplexTownPPVIE, + TheaterComplexTownVODIE, ) +from .stageplus import StagePlusVODConcertIE from .stanfordoc import StanfordOpenClassroomIE +from .startrek import StarTrekIE from .startv import StarTVIE from .steam import ( - SteamIE, SteamCommunityBroadcastIE, + SteamIE, +) +from .stitcher import ( + StitcherIE, + StitcherShowIE, ) from .storyfire import ( StoryFireIE, - StoryFireUserIE, StoryFireSeriesIE, + StoryFireUserIE, ) from .streamable import StreamableIE from .streamcz import StreamCZIE @@ -1908,26 +1970,26 @@ from .svt import ( SVTSeriesIE, ) from .swearnet import SwearnetEpisodeIE -from .syvdk import SYVDKIE from .syfy import SyfyIE +from .syvdk import SYVDKIE from .sztvhu import SztvHuIE from .tagesschau import TagesschauIE from .taptap import ( - TapTapMomentIE, TapTapAppIE, TapTapAppIntlIE, + TapTapMomentIE, TapTapPostIntlIE, ) from .tass import TassIE from .tbs import TBSIE from .tbsjp import ( TBSJPEpisodeIE, - TBSJPProgramIE, TBSJPPlaylistIE, + TBSJPProgramIE, ) from .teachable import ( - TeachableIE, TeachableCourseIE, + TeachableIE, ) from .teachertube import ( TeacherTubeIE, @@ -1935,8 +1997,8 @@ from .teachertube import ( ) from .teachingchannel import TeachingChannelIE from .teamcoco import ( - TeamcocoIE, ConanClassicIE, + TeamcocoIE, ) from .teamtreehouse import TeamTreeHouseIE from .ted import ( @@ -1955,15 +2017,18 @@ from .telegram import TelegramEmbedIE from .telemb import TeleMBIE from .telemundo import TelemundoIE from .telequebec import ( - TeleQuebecIE, - TeleQuebecSquatIE, TeleQuebecEmissionIE, + TeleQuebecIE, TeleQuebecLiveIE, + TeleQuebecSquatIE, TeleQuebecVideoIE, ) from .teletask import TeleTaskIE from .telewebion import TelewebionIE -from .tempo import TempoIE, IVXPlayerIE +from .tempo import ( + IVXPlayerIE, + TempoIE, +) from .tencent import ( IflixEpisodeIE, IflixSeriesIE, @@ -1987,8 +2052,8 @@ from .theguardian import ( from .theholetv import TheHoleTvIE from .theintercept import TheInterceptIE from .theplatform import ( - ThePlatformIE, ThePlatformFeedIE, + ThePlatformIE, ) from .thestar import TheStarIE from .thesun import TheSunIE @@ -2000,50 +2065,51 @@ from .thisvid import ( ThisVidMemberIE, ThisVidPlaylistIE, ) +from .threeqsdn import ThreeQSDNIE from .threespeak import ( ThreeSpeakIE, ThreeSpeakUserIE, ) -from .threeqsdn import ThreeQSDNIE from .tiktok import ( - TikTokIE, - TikTokUserIE, - TikTokSoundIE, - TikTokEffectIE, - TikTokTagIE, - TikTokVMIE, - TikTokLiveIE, DouyinIE, + TikTokEffectIE, + TikTokIE, + TikTokLiveIE, + TikTokSoundIE, + TikTokTagIE, + TikTokUserIE, + TikTokVMIE, ) from .tmz import TMZIE from .tnaflix import ( - TNAFlixNetworkEmbedIE, - TNAFlixIE, EMPFlixIE, MovieFapIE, + TNAFlixIE, + TNAFlixNetworkEmbedIE, ) from .toggle import ( - ToggleIE, MeWatchIE, + ToggleIE, ) -from .toggo import ( - ToggoIE, -) +from .toggo import ToggoIE from .tonline import TOnlineIE from .toongoggles import ToonGogglesIE from .toutv import TouTvIE -from .toypics import ToypicsUserIE, ToypicsIE +from .toypics import ( + ToypicsIE, + ToypicsUserIE, +) from .traileraddict import TrailerAddictIE from .triller import ( TrillerIE, - TrillerUserIE, TrillerShortIE, + TrillerUserIE, ) from .trovo import ( + TrovoChannelClipIE, + TrovoChannelVodIE, TrovoIE, TrovoVodIE, - TrovoChannelVodIE, - TrovoChannelClipIE, ) from .trtcocuk import TrtCocukVideoIE from .trtworld import TrtWorldIE @@ -2052,26 +2118,26 @@ from .trunews import TruNewsIE from .truth import TruthIE from .trutv import TruTVIE from .tube8 import Tube8IE -from .tubetugraz import TubeTuGrazIE, TubeTuGrazSeriesIE +from .tubetugraz import ( + TubeTuGrazIE, + TubeTuGrazSeriesIE, +) from .tubitv import ( TubiTvIE, TubiTvShowIE, ) from .tumblr import TumblrIE from .tunein import ( - TuneInStationIE, - TuneInPodcastIE, TuneInPodcastEpisodeIE, + TuneInPodcastIE, TuneInShortenerIE, + TuneInStationIE, ) from .tv2 import ( TV2IE, - TV2ArticleIE, KatsomoIE, MTVUutisetArticleIE, -) -from .tv24ua import ( - TV24UAVideoIE, + TV2ArticleIE, ) from .tv2dk import ( TV2DKIE, @@ -2084,16 +2150,17 @@ from .tv2hu import ( from .tv4 import TV4IE from .tv5mondeplus import TV5MondePlusIE from .tv5unis import ( - TV5UnisVideoIE, TV5UnisIE, + TV5UnisVideoIE, ) +from .tv24ua import TV24UAVideoIE from .tva import ( TVAIE, QubIE, ) from .tvanouvelles import ( - TVANouvellesIE, TVANouvellesArticleIE, + TVANouvellesIE, ) from .tvc import ( TVCIE, @@ -2106,19 +2173,19 @@ from .tvland import TVLandIE from .tvn24 import TVN24IE from .tvnoe import TVNoeIE from .tvopengr import ( - TVOpenGrWatchIE, TVOpenGrEmbedIE, + TVOpenGrWatchIE, ) from .tvp import ( - TVPEmbedIE, TVPIE, + TVPEmbedIE, TVPStreamIE, TVPVODSeriesIE, TVPVODVideoIE, ) from .tvplay import ( - TVPlayIE, TVPlayHomeIE, + TVPlayIE, ) from .tvplayer import TVPlayerIE from .tweakers import TweakersIE @@ -2130,29 +2197,29 @@ from .twitcasting import ( TwitCastingUserIE, ) from .twitch import ( - TwitchVodIE, + TwitchClipsIE, TwitchCollectionIE, - TwitchVideosIE, + TwitchStreamIE, TwitchVideosClipsIE, TwitchVideosCollectionsIE, - TwitchStreamIE, - TwitchClipsIE, + TwitchVideosIE, + TwitchVodIE, ) from .twitter import ( - TwitterCardIE, - TwitterIE, TwitterAmplifyIE, TwitterBroadcastIE, - TwitterSpacesIE, + TwitterCardIE, + TwitterIE, TwitterShortenerIE, + TwitterSpacesIE, ) from .txxx import ( - TxxxIE, PornTopIE, + TxxxIE, ) from .udemy import ( + UdemyCourseIE, UdemyIE, - UdemyCourseIE ) from .udn import UDNEmbedIE from .ufctv import ( @@ -2161,16 +2228,13 @@ from .ufctv import ( ) from .ukcolumn import UkColumnIE from .uktvplay import UKTVPlayIE -from .digiteka import DigitekaIE -from .dlive import ( - DLiveVODIE, - DLiveStreamIE, -) -from .drooble import DroobleIE from .umg import UMGDeIE from .unistra import UnistraIE from .unity import UnityIE -from .unsupported import KnownDRMIE, KnownPiracyIE +from .unsupported import ( + KnownDRMIE, + KnownPiracyIE, +) from .uol import UOLIE from .uplynk import ( UplynkIE, @@ -2180,10 +2244,13 @@ from .urort import UrortIE from .urplay import URPlayIE from .usanetwork import USANetworkIE from .usatoday import USATodayIE -from .ustream import UstreamIE, UstreamChannelIE +from .ustream import ( + UstreamChannelIE, + UstreamIE, +) from .ustudio import ( - UstudioIE, UstudioEmbedIE, + UstudioIE, ) from .utreon import UtreonIE from .varzesh3 import Varzesh3IE @@ -2191,7 +2258,7 @@ from .vbox7 import Vbox7IE from .veo import VeoIE from .veoh import ( VeohIE, - VeohUserIE + VeohUserIE, ) from .vesti import VestiIE from .vevo import ( @@ -2199,14 +2266,14 @@ from .vevo import ( VevoPlaylistIE, ) from .vgtv import ( + VGTVIE, BTArticleIE, BTVestlendingenIE, - VGTVIE, ) from .vh1 import VH1IE from .vice import ( - ViceIE, ViceArticleIE, + ViceIE, ViceShowIE, ) from .viddler import ViddlerIE @@ -2218,42 +2285,46 @@ from .videocampus_sachsen import ( from .videodetective import VideoDetectiveIE from .videofyme import VideofyMeIE from .videoken import ( + VideoKenCategoryIE, VideoKenIE, VideoKenPlayerIE, VideoKenPlaylistIE, - VideoKenCategoryIE, VideoKenTopicIE, ) from .videomore import ( VideomoreIE, - VideomoreVideoIE, VideomoreSeasonIE, + VideomoreVideoIE, ) from .videopress import VideoPressIE from .vidio import ( VidioIE, + VidioLiveIE, VidioPremierIE, - VidioLiveIE ) from .vidlii import VidLiiIE from .vidly import VidlyIE from .viewlift import ( - ViewLiftIE, ViewLiftEmbedIE, + ViewLiftIE, ) from .viidea import ViideaIE +from .viki import ( + VikiChannelIE, + VikiIE, +) from .vimeo import ( - VimeoIE, + VHXEmbedIE, VimeoAlbumIE, VimeoChannelIE, VimeoGroupsIE, + VimeoIE, VimeoLikesIE, VimeoOndemandIE, VimeoProIE, VimeoReviewIE, VimeoUserIE, VimeoWatchLaterIE, - VHXEmbedIE, ) from .vimm import ( VimmIE, @@ -2263,46 +2334,41 @@ from .vine import ( VineIE, VineUserIE, ) -from .viki import ( - VikiIE, - VikiChannelIE, -) from .viously import ViouslyIE from .viqeo import ViqeoIE from .viu import ( ViuIE, - ViuPlaylistIE, ViuOTTIE, ViuOTTIndonesiaIE, + ViuPlaylistIE, ) from .vk import ( VKIE, - VKUserVideosIE, - VKWallPostIE, VKPlayIE, VKPlayLiveIE, + VKUserVideosIE, + VKWallPostIE, ) from .vocaroo import VocarooIE from .vodpl import VODPlIE from .vodplatform import VODPlatformIE from .voicy import ( - VoicyIE, VoicyChannelIE, + VoicyIE, ) from .volejtv import VolejTVIE from .voxmedia import ( - VoxMediaVolumeIE, VoxMediaIE, + VoxMediaVolumeIE, ) from .vrt import ( VRTIE, - VrtNUIE, - KetnetIE, DagelijkseKostIE, + KetnetIE, Radio1BeIE, + VrtNUIE, ) from .vtm import VTMIE -from .medialaan import MedialaanIE from .vuclip import VuClipIE from .vvvvid import ( VVVVIDIE, @@ -2310,20 +2376,20 @@ from .vvvvid import ( ) from .walla import WallaIE from .washingtonpost import ( - WashingtonPostIE, WashingtonPostArticleIE, + WashingtonPostIE, ) from .wat import WatIE from .wdr import ( WDRIE, - WDRPageIE, WDRElefantIE, WDRMobileIE, + WDRPageIE, ) from .webcamerapl import WebcameraplIE from .webcaster import ( - WebcasterIE, WebcasterFeedIE, + WebcasterIE, ) from .webofstories import ( WebOfStoriesIE, @@ -2331,42 +2397,42 @@ from .webofstories import ( ) from .weibo import ( WeiboIE, - WeiboVideoIE, WeiboUserIE, + WeiboVideoIE, ) from .weiqitv import WeiqiTVIE from .weverse import ( WeverseIE, - WeverseMediaIE, - WeverseMomentIE, - WeverseLiveTabIE, - WeverseMediaTabIE, WeverseLiveIE, + WeverseLiveTabIE, + WeverseMediaIE, + WeverseMediaTabIE, + WeverseMomentIE, ) from .wevidi import WeVidiIE from .weyyak import WeyyakIE +from .whowatch import WhoWatchIE from .whyp import WhypIE from .wikimedia import WikimediaIE from .wimbledon import WimbledonIE from .wimtv import WimTVIE -from .whowatch import WhoWatchIE from .wistia import ( + WistiaChannelIE, WistiaIE, WistiaPlaylistIE, - WistiaChannelIE, ) from .wordpress import ( - WordpressPlaylistEmbedIE, WordpressMiniAudioPlayerEmbedIE, + WordpressPlaylistEmbedIE, ) from .worldstarhiphop import WorldStarHipHopIE from .wppilot import ( - WPPilotIE, WPPilotChannelsIE, + WPPilotIE, ) from .wrestleuniverse import ( - WrestleUniverseVODIE, WrestleUniversePPVIE, + WrestleUniverseVODIE, ) from .wsj import ( WSJIE, @@ -2374,22 +2440,22 @@ from .wsj import ( ) from .wwe import WWEIE from .wykop import ( - WykopDigIE, WykopDigCommentIE, - WykopPostIE, + WykopDigIE, WykopPostCommentIE, + WykopPostIE, ) from .xanimu import XanimuIE from .xboxclips import XboxClipsIE from .xhamster import ( - XHamsterIE, XHamsterEmbedIE, + XHamsterIE, XHamsterUserIE, ) from .xiaohongshu import XiaoHongShuIE from .ximalaya import ( + XimalayaAlbumIE, XimalayaIE, - XimalayaAlbumIE ) from .xinpianchang import XinpianchangIE from .xminus import XMinusIE @@ -2397,27 +2463,27 @@ from .xnxx import XNXXIE from .xstream import XstreamIE from .xvideos import ( XVideosIE, - XVideosQuickiesIE + XVideosQuickiesIE, ) from .xxxymovies import XXXYMoviesIE from .yahoo import ( YahooIE, - YahooSearchIE, YahooJapanNewsIE, + YahooSearchIE, ) from .yandexdisk import YandexDiskIE from .yandexmusic import ( - YandexMusicTrackIE, YandexMusicAlbumIE, - YandexMusicPlaylistIE, - YandexMusicArtistTracksIE, YandexMusicArtistAlbumsIE, + YandexMusicArtistTracksIE, + YandexMusicPlaylistIE, + YandexMusicTrackIE, ) from .yandexvideo import ( YandexVideoIE, YandexVideoPreviewIE, - ZenYandexIE, ZenYandexChannelIE, + ZenYandexIE, ) from .yapfiles import YapFilesIE from .yappy import ( @@ -2431,24 +2497,26 @@ from .youku import ( YoukuShowIE, ) from .younow import ( - YouNowLiveIE, YouNowChannelIE, + YouNowLiveIE, YouNowMomentIE, ) from .youporn import YouPornIE from .zaiko import ( - ZaikoIE, ZaikoETicketIE, + ZaikoIE, ) from .zapiks import ZapiksIE from .zattoo import ( BBVTVIE, + EWETVIE, + SAKTVIE, + VTXTVIE, BBVTVLiveIE, BBVTVRecordingsIE, EinsUndEinsTVIE, EinsUndEinsTVLiveIE, EinsUndEinsTVRecordingsIE, - EWETVIE, EWETVLiveIE, EWETVRecordingsIE, GlattvisionTVIE, @@ -2466,13 +2534,11 @@ from .zattoo import ( QuantumTVIE, QuantumTVLiveIE, QuantumTVRecordingsIE, + SAKTVLiveIE, + SAKTVRecordingsIE, SaltTVIE, SaltTVLiveIE, SaltTVRecordingsIE, - SAKTVIE, - SAKTVLiveIE, - SAKTVRecordingsIE, - VTXTVIE, VTXTVLiveIE, VTXTVRecordingsIE, WalyTVIE, @@ -2483,7 +2549,10 @@ from .zattoo import ( ZattooMoviesIE, ZattooRecordingsIE, ) -from .zdf import ZDFIE, ZDFChannelIE +from .zdf import ( + ZDFIE, + ZDFChannelIE, +) from .zee5 import ( Zee5IE, Zee5SeriesIE, @@ -2493,16 +2562,16 @@ from .zenporn import ZenPornIE from .zetland import ZetlandDKArticleIE from .zhihu import ZhihuIE from .zingmp3 import ( - ZingMp3IE, ZingMp3AlbumIE, ZingMp3ChartHomeIE, - ZingMp3WeekChartIE, ZingMp3ChartMusicVideoIE, - ZingMp3UserIE, ZingMp3HubIE, + ZingMp3IE, ZingMp3LiveRadioIE, ZingMp3PodcastEpisodeIE, ZingMp3PodcastIE, + ZingMp3UserIE, + ZingMp3WeekChartIE, ) from .zoom import ZoomIE from .zype import ZypeIE diff --git a/yt_dlp/extractor/abc.py b/yt_dlp/extractor/abc.py index b21742281..2c0d296fd 100644 --- a/yt_dlp/extractor/abc.py +++ b/yt_dlp/extractor/abc.py @@ -6,10 +6,10 @@ import time from .common import InfoExtractor from ..compat import compat_str from ..utils import ( - dict_get, ExtractorError, - js_to_json, + dict_get, int_or_none, + js_to_json, parse_iso8601, str_or_none, traverse_obj, diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py index fee7375ea..b8c79b912 100644 --- a/yt_dlp/extractor/abematv.py +++ b/yt_dlp/extractor/abematv.py @@ -12,20 +12,21 @@ import urllib.parse import urllib.request import urllib.response import uuid -from ..utils.networking import clean_proxies + from .common import InfoExtractor from ..aes import aes_ecb_decrypt from ..utils import ( ExtractorError, + OnDemandPagedList, bytes_to_intlist, decode_base_n, int_or_none, intlist_to_bytes, - OnDemandPagedList, time_seconds, traverse_obj, update_url_query, ) +from ..utils.networking import clean_proxies def add_opener(ydl, handler): # FIXME: Create proper API in .networking diff --git a/yt_dlp/extractor/acfun.py b/yt_dlp/extractor/acfun.py index c3b4f432e..07933192f 100644 --- a/yt_dlp/extractor/acfun.py +++ b/yt_dlp/extractor/acfun.py @@ -3,10 +3,10 @@ from ..utils import ( float_or_none, format_field, int_or_none, - str_or_none, - traverse_obj, parse_codecs, parse_qs, + str_or_none, + traverse_obj, ) diff --git a/yt_dlp/extractor/adn.py b/yt_dlp/extractor/adn.py index 898d37298..2f3b67dad 100644 --- a/yt_dlp/extractor/adn.py +++ b/yt_dlp/extractor/adn.py @@ -10,18 +10,18 @@ from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7 from ..compat import compat_b64decode from ..networking.exceptions import HTTPError from ..utils import ( + ExtractorError, ass_subtitles_timecode, bytes_to_intlist, bytes_to_long, - ExtractorError, float_or_none, int_or_none, intlist_to_bytes, long_to_bytes, parse_iso8601, pkcs1pad, - strip_or_none, str_or_none, + strip_or_none, try_get, unified_strdate, urlencode_postdata, diff --git a/yt_dlp/extractor/adobetv.py b/yt_dlp/extractor/adobetv.py index d1525a1af..08e9e5182 100644 --- a/yt_dlp/extractor/adobetv.py +++ b/yt_dlp/extractor/adobetv.py @@ -4,11 +4,11 @@ import re from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + ISO639Utils, + OnDemandPagedList, float_or_none, int_or_none, - ISO639Utils, join_nonempty, - OnDemandPagedList, parse_duration, str_or_none, str_to_int, diff --git a/yt_dlp/extractor/airtv.py b/yt_dlp/extractor/airtv.py index 0b73a966e..6cc63cd7f 100644 --- a/yt_dlp/extractor/airtv.py +++ b/yt_dlp/extractor/airtv.py @@ -5,7 +5,7 @@ from ..utils import ( int_or_none, mimetype2ext, parse_iso8601, - traverse_obj + traverse_obj, ) diff --git a/yt_dlp/extractor/allstar.py b/yt_dlp/extractor/allstar.py index 87219f2f8..49df4bf3a 100644 --- a/yt_dlp/extractor/allstar.py +++ b/yt_dlp/extractor/allstar.py @@ -12,7 +12,6 @@ from ..utils import ( ) from ..utils.traversal import traverse_obj - _FIELDS = ''' _id clipImageSource diff --git a/yt_dlp/extractor/alphaporno.py b/yt_dlp/extractor/alphaporno.py index 8d5b472d3..f927965de 100644 --- a/yt_dlp/extractor/alphaporno.py +++ b/yt_dlp/extractor/alphaporno.py @@ -1,9 +1,9 @@ from .common import InfoExtractor from ..utils import ( - parse_iso8601, + int_or_none, parse_duration, parse_filesize, - int_or_none, + parse_iso8601, ) diff --git a/yt_dlp/extractor/alura.py b/yt_dlp/extractor/alura.py index b785c62c3..cb2b9891e 100644 --- a/yt_dlp/extractor/alura.py +++ b/yt_dlp/extractor/alura.py @@ -1,17 +1,13 @@ import re from .common import InfoExtractor - -from ..compat import ( - compat_urlparse, -) - +from ..compat import compat_urlparse from ..utils import ( + ExtractorError, + clean_html, + int_or_none, urlencode_postdata, urljoin, - int_or_none, - clean_html, - ExtractorError ) diff --git a/yt_dlp/extractor/amara.py b/yt_dlp/extractor/amara.py index 5018710e0..509b21a53 100644 --- a/yt_dlp/extractor/amara.py +++ b/yt_dlp/extractor/amara.py @@ -1,6 +1,6 @@ from .common import InfoExtractor -from .youtube import YoutubeIE from .vimeo import VimeoIE +from .youtube import YoutubeIE from ..utils import ( int_or_none, parse_iso8601, diff --git a/yt_dlp/extractor/amp.py b/yt_dlp/extractor/amp.py index 0d259c549..6b2bf2db2 100644 --- a/yt_dlp/extractor/amp.py +++ b/yt_dlp/extractor/amp.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..utils import ( - determine_ext, ExtractorError, + determine_ext, int_or_none, mimetype2ext, parse_iso8601, diff --git a/yt_dlp/extractor/anchorfm.py b/yt_dlp/extractor/anchorfm.py index 52f2ad057..5e78f372e 100644 --- a/yt_dlp/extractor/anchorfm.py +++ b/yt_dlp/extractor/anchorfm.py @@ -5,7 +5,7 @@ from ..utils import ( int_or_none, str_or_none, traverse_obj, - unified_timestamp + unified_timestamp, ) diff --git a/yt_dlp/extractor/angel.py b/yt_dlp/extractor/angel.py index 306b3651e..9f5b9b523 100644 --- a/yt_dlp/extractor/angel.py +++ b/yt_dlp/extractor/angel.py @@ -1,7 +1,7 @@ import re from .common import InfoExtractor -from ..utils import url_or_none, merge_dicts +from ..utils import merge_dicts, url_or_none class AngelIE(InfoExtractor): diff --git a/yt_dlp/extractor/appleconnect.py b/yt_dlp/extractor/appleconnect.py index d00b0f906..433eb4ed8 100644 --- a/yt_dlp/extractor/appleconnect.py +++ b/yt_dlp/extractor/appleconnect.py @@ -1,8 +1,5 @@ from .common import InfoExtractor -from ..utils import ( - str_to_int, - ExtractorError -) +from ..utils import ExtractorError, str_to_int class AppleConnectIE(InfoExtractor): diff --git a/yt_dlp/extractor/appletrailers.py b/yt_dlp/extractor/appletrailers.py index 2e0b0a8c9..21103aee5 100644 --- a/yt_dlp/extractor/appletrailers.py +++ b/yt_dlp/extractor/appletrailers.py @@ -1,5 +1,5 @@ -import re import json +import re from .common import InfoExtractor from ..compat import compat_urlparse diff --git a/yt_dlp/extractor/arnes.py b/yt_dlp/extractor/arnes.py index a493714d1..9a5524aab 100644 --- a/yt_dlp/extractor/arnes.py +++ b/yt_dlp/extractor/arnes.py @@ -4,8 +4,8 @@ from ..compat import ( compat_urllib_parse_urlparse, ) from ..utils import ( - format_field, float_or_none, + format_field, int_or_none, parse_iso8601, remove_start, diff --git a/yt_dlp/extractor/atvat.py b/yt_dlp/extractor/atvat.py index d60feba31..20ee34cca 100644 --- a/yt_dlp/extractor/atvat.py +++ b/yt_dlp/extractor/atvat.py @@ -2,10 +2,10 @@ import datetime as dt from .common import InfoExtractor from ..utils import ( + ExtractorError, float_or_none, jwt_encode_hs256, try_get, - ExtractorError, ) diff --git a/yt_dlp/extractor/awaan.py b/yt_dlp/extractor/awaan.py index 6fc938de9..a8dfb3efc 100644 --- a/yt_dlp/extractor/awaan.py +++ b/yt_dlp/extractor/awaan.py @@ -2,8 +2,8 @@ import base64 from .common import InfoExtractor from ..compat import ( - compat_urllib_parse_urlencode, compat_str, + compat_urllib_parse_urlencode, ) from ..utils import ( format_field, diff --git a/yt_dlp/extractor/banbye.py b/yt_dlp/extractor/banbye.py index 67af29a96..c4e07a79a 100644 --- a/yt_dlp/extractor/banbye.py +++ b/yt_dlp/extractor/banbye.py @@ -2,12 +2,12 @@ import math from .common import InfoExtractor from ..compat import ( - compat_urllib_parse_urlparse, compat_parse_qs, + compat_urllib_parse_urlparse, ) from ..utils import ( - format_field, InAdvancePagedList, + format_field, traverse_obj, unified_timestamp, ) diff --git a/yt_dlp/extractor/bannedvideo.py b/yt_dlp/extractor/bannedvideo.py index 51e722057..82dc9ab02 100644 --- a/yt_dlp/extractor/bannedvideo.py +++ b/yt_dlp/extractor/bannedvideo.py @@ -2,11 +2,11 @@ import json from .common import InfoExtractor from ..utils import ( - try_get, - int_or_none, - url_or_none, float_or_none, + int_or_none, + try_get, unified_timestamp, + url_or_none, ) diff --git a/yt_dlp/extractor/beeg.py b/yt_dlp/extractor/beeg.py index 042b3220b..da98ac314 100644 --- a/yt_dlp/extractor/beeg.py +++ b/yt_dlp/extractor/beeg.py @@ -1,5 +1,4 @@ from .common import InfoExtractor - from ..utils import ( int_or_none, str_or_none, diff --git a/yt_dlp/extractor/bleacherreport.py b/yt_dlp/extractor/bleacherreport.py index e875957cf..aa3d63ee7 100644 --- a/yt_dlp/extractor/bleacherreport.py +++ b/yt_dlp/extractor/bleacherreport.py @@ -1,5 +1,5 @@ -from .common import InfoExtractor from .amp import AMPIE +from .common import InfoExtractor from ..utils import ( ExtractorError, int_or_none, diff --git a/yt_dlp/extractor/blogger.py b/yt_dlp/extractor/blogger.py index 3d6e03304..ef0151de6 100644 --- a/yt_dlp/extractor/blogger.py +++ b/yt_dlp/extractor/blogger.py @@ -1,3 +1,4 @@ +from .common import InfoExtractor from ..utils import ( mimetype2ext, parse_duration, @@ -5,7 +6,6 @@ from ..utils import ( str_or_none, traverse_obj, ) -from .common import InfoExtractor class BloggerIE(InfoExtractor): diff --git a/yt_dlp/extractor/bostonglobe.py b/yt_dlp/extractor/bostonglobe.py index 92f8ea2cb..267586687 100644 --- a/yt_dlp/extractor/bostonglobe.py +++ b/yt_dlp/extractor/bostonglobe.py @@ -1,7 +1,6 @@ import re from .common import InfoExtractor - from ..utils import ( extract_attributes, ) diff --git a/yt_dlp/extractor/boxcast.py b/yt_dlp/extractor/boxcast.py index 51f9eb787..da06cc3f8 100644 --- a/yt_dlp/extractor/boxcast.py +++ b/yt_dlp/extractor/boxcast.py @@ -1,9 +1,5 @@ from .common import InfoExtractor -from ..utils import ( - js_to_json, - traverse_obj, - unified_timestamp -) +from ..utils import js_to_json, traverse_obj, unified_timestamp class BoxCastVideoIE(InfoExtractor): diff --git a/yt_dlp/extractor/brainpop.py b/yt_dlp/extractor/brainpop.py index 1200437e6..04b1dd80c 100644 --- a/yt_dlp/extractor/brainpop.py +++ b/yt_dlp/extractor/brainpop.py @@ -6,7 +6,7 @@ from ..utils import ( classproperty, int_or_none, traverse_obj, - urljoin + urljoin, ) diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py index 61b18412d..4190e1a09 100644 --- a/yt_dlp/extractor/brightcove.py +++ b/yt_dlp/extractor/brightcove.py @@ -12,10 +12,11 @@ from ..compat import ( ) from ..networking.exceptions import HTTPError from ..utils import ( + ExtractorError, + UnsupportedError, clean_html, dict_get, extract_attributes, - ExtractorError, find_xpath_attr, fix_xml_ampersands, float_or_none, @@ -29,7 +30,6 @@ from ..utils import ( try_get, unescapeHTML, unsmuggle_url, - UnsupportedError, update_url_query, url_or_none, ) diff --git a/yt_dlp/extractor/cbs.py b/yt_dlp/extractor/cbs.py index cf830210f..aca9782c7 100644 --- a/yt_dlp/extractor/cbs.py +++ b/yt_dlp/extractor/cbs.py @@ -5,14 +5,14 @@ from .youtube import YoutubeIE from ..utils import ( ExtractorError, extract_attributes, + find_xpath_attr, get_element_html_by_id, int_or_none, - find_xpath_attr, smuggle_url, - xpath_element, - xpath_text, update_url_query, url_or_none, + xpath_element, + xpath_text, ) diff --git a/yt_dlp/extractor/cinetecamilano.py b/yt_dlp/extractor/cinetecamilano.py index 9cffa11e8..745b71f24 100644 --- a/yt_dlp/extractor/cinetecamilano.py +++ b/yt_dlp/extractor/cinetecamilano.py @@ -1,4 +1,5 @@ import json + from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( diff --git a/yt_dlp/extractor/clippit.py b/yt_dlp/extractor/clippit.py index 006a713b2..67b56e00d 100644 --- a/yt_dlp/extractor/clippit.py +++ b/yt_dlp/extractor/clippit.py @@ -1,11 +1,11 @@ +import re + from .common import InfoExtractor from ..utils import ( parse_iso8601, qualities, ) -import re - class ClippitIE(InfoExtractor): diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index a952828fb..a33cef354 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1,5 +1,6 @@ import base64 import collections +import functools import getpass import hashlib import http.client @@ -21,7 +22,6 @@ import urllib.parse import urllib.request import xml.etree.ElementTree -from ..compat import functools # isort: split from ..compat import ( compat_etree_fromstring, compat_expanduser, diff --git a/yt_dlp/extractor/corus.py b/yt_dlp/extractor/corus.py index bcc34ddd8..0a98c980f 100644 --- a/yt_dlp/extractor/corus.py +++ b/yt_dlp/extractor/corus.py @@ -1,7 +1,7 @@ from .theplatform import ThePlatformFeedIE from ..utils import ( - dict_get, ExtractorError, + dict_get, float_or_none, int_or_none, ) diff --git a/yt_dlp/extractor/crackle.py b/yt_dlp/extractor/crackle.py index 1ef90b5a0..0cb7d940c 100644 --- a/yt_dlp/extractor/crackle.py +++ b/yt_dlp/extractor/crackle.py @@ -6,6 +6,7 @@ import time from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( + ExtractorError, determine_ext, float_or_none, int_or_none, @@ -13,7 +14,6 @@ from ..utils import ( parse_age_limit, parse_duration, url_or_none, - ExtractorError ) diff --git a/yt_dlp/extractor/cspan.py b/yt_dlp/extractor/cspan.py index 0075680e8..e56584e4e 100644 --- a/yt_dlp/extractor/cspan.py +++ b/yt_dlp/extractor/cspan.py @@ -1,10 +1,12 @@ import re from .common import InfoExtractor +from .senategov import SenateISVPIE +from .ustream import UstreamIE from ..compat import compat_HTMLParseError from ..utils import ( - determine_ext, ExtractorError, + determine_ext, extract_attributes, find_xpath_attr, get_element_by_attribute, @@ -19,8 +21,6 @@ from ..utils import ( str_to_int, unescapeHTML, ) -from .senategov import SenateISVPIE -from .ustream import UstreamIE class CSpanIE(InfoExtractor): diff --git a/yt_dlp/extractor/ctsnews.py b/yt_dlp/extractor/ctsnews.py index cec178f03..1817bd2ff 100644 --- a/yt_dlp/extractor/ctsnews.py +++ b/yt_dlp/extractor/ctsnews.py @@ -1,6 +1,6 @@ from .common import InfoExtractor -from ..utils import unified_timestamp from .youtube import YoutubeIE +from ..utils import unified_timestamp class CtsNewsIE(InfoExtractor): diff --git a/yt_dlp/extractor/dailymail.py b/yt_dlp/extractor/dailymail.py index 43401e111..4c25bea11 100644 --- a/yt_dlp/extractor/dailymail.py +++ b/yt_dlp/extractor/dailymail.py @@ -1,8 +1,8 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( - int_or_none, determine_protocol, + int_or_none, try_get, unescapeHTML, ) diff --git a/yt_dlp/extractor/damtomo.py b/yt_dlp/extractor/damtomo.py index 5e14d6aff..2e0f6f0d3 100644 --- a/yt_dlp/extractor/damtomo.py +++ b/yt_dlp/extractor/damtomo.py @@ -1,8 +1,8 @@ import re from .common import InfoExtractor -from ..utils import ExtractorError, clean_html, int_or_none, try_get, unified_strdate from ..compat import compat_str +from ..utils import ExtractorError, clean_html, int_or_none, try_get, unified_strdate class DamtomoBaseIE(InfoExtractor): diff --git a/yt_dlp/extractor/democracynow.py b/yt_dlp/extractor/democracynow.py index 1624d085c..177424937 100644 --- a/yt_dlp/extractor/democracynow.py +++ b/yt_dlp/extractor/democracynow.py @@ -1,11 +1,11 @@ -import re import os.path +import re from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( - url_basename, remove_start, + url_basename, ) diff --git a/yt_dlp/extractor/digitalconcerthall.py b/yt_dlp/extractor/digitalconcerthall.py index c11cd790b..4380c414e 100644 --- a/yt_dlp/extractor/digitalconcerthall.py +++ b/yt_dlp/extractor/digitalconcerthall.py @@ -1,5 +1,4 @@ from .common import InfoExtractor - from ..utils import ( ExtractorError, parse_resolution, diff --git a/yt_dlp/extractor/discoverygo.py b/yt_dlp/extractor/discoverygo.py index 1f3d8e31c..b2663a63d 100644 --- a/yt_dlp/extractor/discoverygo.py +++ b/yt_dlp/extractor/discoverygo.py @@ -2,9 +2,9 @@ import re from .common import InfoExtractor from ..utils import ( + ExtractorError, determine_ext, extract_attributes, - ExtractorError, int_or_none, parse_age_limit, remove_end, diff --git a/yt_dlp/extractor/disney.py b/yt_dlp/extractor/disney.py index 430de326f..d8dde0ca7 100644 --- a/yt_dlp/extractor/disney.py +++ b/yt_dlp/extractor/disney.py @@ -2,10 +2,10 @@ import re from .common import InfoExtractor from ..utils import ( - int_or_none, - unified_strdate, determine_ext, + int_or_none, join_nonempty, + unified_strdate, update_url_query, ) diff --git a/yt_dlp/extractor/douyutv.py b/yt_dlp/extractor/douyutv.py index ee8893d5a..244ffdf1c 100644 --- a/yt_dlp/extractor/douyutv.py +++ b/yt_dlp/extractor/douyutv.py @@ -1,5 +1,5 @@ -import time import hashlib +import time import urllib import uuid diff --git a/yt_dlp/extractor/dplay.py b/yt_dlp/extractor/dplay.py index 1ecc4baf6..ddf2128b0 100644 --- a/yt_dlp/extractor/dplay.py +++ b/yt_dlp/extractor/dplay.py @@ -4,8 +4,8 @@ import uuid from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( - determine_ext, ExtractorError, + determine_ext, float_or_none, int_or_none, remove_start, diff --git a/yt_dlp/extractor/drtuber.py b/yt_dlp/extractor/drtuber.py index e5dab6ac0..a9247edc0 100644 --- a/yt_dlp/extractor/drtuber.py +++ b/yt_dlp/extractor/drtuber.py @@ -2,8 +2,8 @@ import re from .common import InfoExtractor from ..utils import ( - int_or_none, NO_DEFAULT, + int_or_none, parse_duration, str_to_int, ) diff --git a/yt_dlp/extractor/duboku.py b/yt_dlp/extractor/duboku.py index 626e577e7..adc7705bc 100644 --- a/yt_dlp/extractor/duboku.py +++ b/yt_dlp/extractor/duboku.py @@ -5,9 +5,9 @@ import urllib.parse from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( + ExtractorError, clean_html, extract_attributes, - ExtractorError, get_elements_by_class, int_or_none, js_to_json, diff --git a/yt_dlp/extractor/dvtv.py b/yt_dlp/extractor/dvtv.py index e67143370..e6660dcd9 100644 --- a/yt_dlp/extractor/dvtv.py +++ b/yt_dlp/extractor/dvtv.py @@ -2,15 +2,15 @@ import re from .common import InfoExtractor from ..utils import ( - determine_ext, ExtractorError, + determine_ext, int_or_none, join_nonempty, js_to_json, mimetype2ext, + parse_iso8601, try_get, unescapeHTML, - parse_iso8601, ) diff --git a/yt_dlp/extractor/dw.py b/yt_dlp/extractor/dw.py index f7b852076..feab804af 100644 --- a/yt_dlp/extractor/dw.py +++ b/yt_dlp/extractor/dw.py @@ -1,10 +1,10 @@ from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import ( int_or_none, unified_strdate, url_or_none, ) -from ..compat import compat_urlparse class DWIE(InfoExtractor): diff --git a/yt_dlp/extractor/ertgr.py b/yt_dlp/extractor/ertgr.py index 9ecdf5d3b..19c6933e7 100644 --- a/yt_dlp/extractor/ertgr.py +++ b/yt_dlp/extractor/ertgr.py @@ -4,15 +4,15 @@ import re from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + ExtractorError, clean_html, determine_ext, - ExtractorError, dict_get, int_or_none, merge_dicts, - parse_qs, parse_age_limit, parse_iso8601, + parse_qs, str_or_none, try_get, url_or_none, diff --git a/yt_dlp/extractor/europa.py b/yt_dlp/extractor/europa.py index 29dfc8ae9..0cf889a1e 100644 --- a/yt_dlp/extractor/europa.py +++ b/yt_dlp/extractor/europa.py @@ -8,7 +8,7 @@ from ..utils import ( qualities, traverse_obj, unified_strdate, - xpath_text + xpath_text, ) diff --git a/yt_dlp/extractor/euscreen.py b/yt_dlp/extractor/euscreen.py index 65a1dc7c5..66fa42fa1 100644 --- a/yt_dlp/extractor/euscreen.py +++ b/yt_dlp/extractor/euscreen.py @@ -1,8 +1,7 @@ from .common import InfoExtractor - from ..utils import ( - parse_duration, js_to_json, + parse_duration, ) diff --git a/yt_dlp/extractor/eyedotv.py b/yt_dlp/extractor/eyedotv.py index d8b068e9c..4a13ab08d 100644 --- a/yt_dlp/extractor/eyedotv.py +++ b/yt_dlp/extractor/eyedotv.py @@ -1,8 +1,8 @@ from .common import InfoExtractor from ..utils import ( - xpath_text, - parse_duration, ExtractorError, + parse_duration, + xpath_text, ) diff --git a/yt_dlp/extractor/fancode.py b/yt_dlp/extractor/fancode.py index cddf25497..1e80f9a37 100644 --- a/yt_dlp/extractor/fancode.py +++ b/yt_dlp/extractor/fancode.py @@ -1,12 +1,6 @@ from .common import InfoExtractor - from ..compat import compat_str -from ..utils import ( - parse_iso8601, - ExtractorError, - try_get, - mimetype2ext -) +from ..utils import ExtractorError, mimetype2ext, parse_iso8601, try_get class FancodeVodIE(InfoExtractor): diff --git a/yt_dlp/extractor/faz.py b/yt_dlp/extractor/faz.py index bca62add9..796bac3c3 100644 --- a/yt_dlp/extractor/faz.py +++ b/yt_dlp/extractor/faz.py @@ -3,9 +3,9 @@ import re from .common import InfoExtractor from ..compat import compat_etree_fromstring from ..utils import ( + int_or_none, xpath_element, xpath_text, - int_or_none, ) diff --git a/yt_dlp/extractor/fczenit.py b/yt_dlp/extractor/fczenit.py index 8175b6b0f..b2dbb92d5 100644 --- a/yt_dlp/extractor/fczenit.py +++ b/yt_dlp/extractor/fczenit.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..utils import ( - int_or_none, float_or_none, + int_or_none, ) diff --git a/yt_dlp/extractor/fifa.py b/yt_dlp/extractor/fifa.py index f604cbd40..ae837f6a0 100644 --- a/yt_dlp/extractor/fifa.py +++ b/yt_dlp/extractor/fifa.py @@ -1,5 +1,4 @@ from .common import InfoExtractor - from ..utils import ( int_or_none, traverse_obj, diff --git a/yt_dlp/extractor/filmon.py b/yt_dlp/extractor/filmon.py index 0cd18f494..69ca87c84 100644 --- a/yt_dlp/extractor/filmon.py +++ b/yt_dlp/extractor/filmon.py @@ -2,10 +2,10 @@ from .common import InfoExtractor from ..compat import compat_str from ..networking.exceptions import HTTPError from ..utils import ( + ExtractorError, + int_or_none, qualities, strip_or_none, - int_or_none, - ExtractorError, ) diff --git a/yt_dlp/extractor/gab.py b/yt_dlp/extractor/gab.py index f9d22fd33..c10d290dc 100644 --- a/yt_dlp/extractor/gab.py +++ b/yt_dlp/extractor/gab.py @@ -7,7 +7,7 @@ from ..utils import ( parse_codecs, parse_duration, str_to_int, - unified_timestamp + unified_timestamp, ) diff --git a/yt_dlp/extractor/gamejolt.py b/yt_dlp/extractor/gamejolt.py index 1d3c0b110..b284e1e28 100644 --- a/yt_dlp/extractor/gamejolt.py +++ b/yt_dlp/extractor/gamejolt.py @@ -10,7 +10,7 @@ from ..utils import ( int_or_none, str_or_none, traverse_obj, - try_get + try_get, ) diff --git a/yt_dlp/extractor/gaskrank.py b/yt_dlp/extractor/gaskrank.py index bc56b03e3..6403be8cf 100644 --- a/yt_dlp/extractor/gaskrank.py +++ b/yt_dlp/extractor/gaskrank.py @@ -1,4 +1,5 @@ import re + from .common import InfoExtractor from ..utils import ( float_or_none, diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 2cfed0fd0..2818c718d 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -4,7 +4,7 @@ import types import urllib.parse import xml.etree.ElementTree -from .common import InfoExtractor # isort: split +from .common import InfoExtractor from .commonprotocols import RtmpIE from .youtube import YoutubeIE from ..compat import compat_etree_fromstring diff --git a/yt_dlp/extractor/gettr.py b/yt_dlp/extractor/gettr.py index 7795dc56f..b9dc7c63c 100644 --- a/yt_dlp/extractor/gettr.py +++ b/yt_dlp/extractor/gettr.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..utils import ( - bool_or_none, ExtractorError, + bool_or_none, dict_get, float_or_none, int_or_none, diff --git a/yt_dlp/extractor/gigya.py b/yt_dlp/extractor/gigya.py index c5bc86bb4..7baf8de8d 100644 --- a/yt_dlp/extractor/gigya.py +++ b/yt_dlp/extractor/gigya.py @@ -1,5 +1,4 @@ from .common import InfoExtractor - from ..utils import ( ExtractorError, urlencode_postdata, diff --git a/yt_dlp/extractor/glomex.py b/yt_dlp/extractor/glomex.py index 22aac0db9..515f3c567 100644 --- a/yt_dlp/extractor/glomex.py +++ b/yt_dlp/extractor/glomex.py @@ -3,9 +3,9 @@ import urllib.parse from .common import InfoExtractor from ..utils import ( + ExtractorError, determine_ext, extract_attributes, - ExtractorError, int_or_none, parse_qs, smuggle_url, diff --git a/yt_dlp/extractor/go.py b/yt_dlp/extractor/go.py index b075a02e0..fba98d79f 100644 --- a/yt_dlp/extractor/go.py +++ b/yt_dlp/extractor/go.py @@ -3,16 +3,16 @@ import re from .adobepass import AdobePassIE from ..compat import compat_str from ..utils import ( - int_or_none, - determine_ext, - parse_age_limit, - remove_start, - remove_end, - try_get, - urlencode_postdata, ExtractorError, - unified_timestamp, + determine_ext, + int_or_none, + parse_age_limit, + remove_end, + remove_start, traverse_obj, + try_get, + unified_timestamp, + urlencode_postdata, ) diff --git a/yt_dlp/extractor/godresource.py b/yt_dlp/extractor/godresource.py index f010fff36..276a6c7fe 100644 --- a/yt_dlp/extractor/godresource.py +++ b/yt_dlp/extractor/godresource.py @@ -4,7 +4,7 @@ from ..utils import ( determine_ext, str_or_none, unified_timestamp, - url_or_none + url_or_none, ) from ..utils.traversal import traverse_obj diff --git a/yt_dlp/extractor/gofile.py b/yt_dlp/extractor/gofile.py index c6eca0c4d..fac088462 100644 --- a/yt_dlp/extractor/gofile.py +++ b/yt_dlp/extractor/gofile.py @@ -1,10 +1,7 @@ import hashlib from .common import InfoExtractor -from ..utils import ( - ExtractorError, - try_get -) +from ..utils import ExtractorError, try_get class GofileIE(InfoExtractor): diff --git a/yt_dlp/extractor/gotostage.py b/yt_dlp/extractor/gotostage.py index 112293bef..9c1a6cb91 100644 --- a/yt_dlp/extractor/gotostage.py +++ b/yt_dlp/extractor/gotostage.py @@ -1,11 +1,8 @@ +import json + from .common import InfoExtractor from ..compat import compat_str -from ..utils import ( - try_get, - url_or_none -) - -import json +from ..utils import try_get, url_or_none class GoToStageIE(InfoExtractor): diff --git a/yt_dlp/extractor/hbo.py b/yt_dlp/extractor/hbo.py index 530bdb727..2551cfffd 100644 --- a/yt_dlp/extractor/hbo.py +++ b/yt_dlp/extractor/hbo.py @@ -2,11 +2,11 @@ import re from .common import InfoExtractor from ..utils import ( - xpath_text, - xpath_element, int_or_none, parse_duration, urljoin, + xpath_element, + xpath_text, ) diff --git a/yt_dlp/extractor/hearthisat.py b/yt_dlp/extractor/hearthisat.py index c7da8f97d..eb0a77952 100644 --- a/yt_dlp/extractor/hearthisat.py +++ b/yt_dlp/extractor/hearthisat.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..utils import ( - determine_ext, KNOWN_EXTENSIONS, + determine_ext, str_to_int, ) diff --git a/yt_dlp/extractor/hketv.py b/yt_dlp/extractor/hketv.py index e026996da..099c2a175 100644 --- a/yt_dlp/extractor/hketv.py +++ b/yt_dlp/extractor/hketv.py @@ -1,8 +1,8 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( - clean_html, ExtractorError, + clean_html, int_or_none, merge_dicts, parse_count, diff --git a/yt_dlp/extractor/hrti.py b/yt_dlp/extractor/hrti.py index 57b76e46b..41d50d000 100644 --- a/yt_dlp/extractor/hrti.py +++ b/yt_dlp/extractor/hrti.py @@ -4,8 +4,8 @@ from .common import InfoExtractor from ..networking import Request from ..networking.exceptions import HTTPError from ..utils import ( - clean_html, ExtractorError, + clean_html, int_or_none, parse_age_limit, try_get, diff --git a/yt_dlp/extractor/huya.py b/yt_dlp/extractor/huya.py index c4965f9bc..5379b5410 100644 --- a/yt_dlp/extractor/huya.py +++ b/yt_dlp/extractor/huya.py @@ -2,8 +2,8 @@ import hashlib import random import re -from ..compat import compat_urlparse, compat_b64decode - +from .common import InfoExtractor +from ..compat import compat_b64decode, compat_urlparse from ..utils import ( ExtractorError, int_or_none, @@ -13,8 +13,6 @@ from ..utils import ( update_url_query, ) -from .common import InfoExtractor - class HuyaLiveIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.|m\.)?huya\.com/(?P<id>[^/#?&]+)(?:\D|$)' diff --git a/yt_dlp/extractor/ichinanalive.py b/yt_dlp/extractor/ichinanalive.py index 9d55ddc02..c28d09f34 100644 --- a/yt_dlp/extractor/ichinanalive.py +++ b/yt_dlp/extractor/ichinanalive.py @@ -1,6 +1,6 @@ from .common import InfoExtractor -from ..utils import ExtractorError, str_or_none, traverse_obj, unified_strdate from ..compat import compat_str +from ..utils import ExtractorError, str_or_none, traverse_obj, unified_strdate class IchinanaLiveIE(InfoExtractor): diff --git a/yt_dlp/extractor/infoq.py b/yt_dlp/extractor/infoq.py index 192bcfe35..2bb48508c 100644 --- a/yt_dlp/extractor/infoq.py +++ b/yt_dlp/extractor/infoq.py @@ -1,3 +1,4 @@ +from .bokecc import BokeCCBaseIE from ..compat import ( compat_b64decode, compat_urllib_parse_unquote, @@ -6,10 +7,9 @@ from ..compat import ( from ..utils import ( ExtractorError, determine_ext, - update_url_query, traverse_obj, + update_url_query, ) -from .bokecc import BokeCCBaseIE class InfoQIE(BokeCCBaseIE): diff --git a/yt_dlp/extractor/iprima.py b/yt_dlp/extractor/iprima.py index f7aa579b3..d5a3d8095 100644 --- a/yt_dlp/extractor/iprima.py +++ b/yt_dlp/extractor/iprima.py @@ -3,12 +3,12 @@ import time from .common import InfoExtractor from ..utils import ( + ExtractorError, determine_ext, js_to_json, - urlencode_postdata, - ExtractorError, parse_qs, - traverse_obj + traverse_obj, + urlencode_postdata, ) diff --git a/yt_dlp/extractor/iqiyi.py b/yt_dlp/extractor/iqiyi.py index 3368ab1d9..85ed549de 100644 --- a/yt_dlp/extractor/iqiyi.py +++ b/yt_dlp/extractor/iqiyi.py @@ -4,20 +4,16 @@ import re import time from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urllib_parse_urlencode, - compat_urllib_parse_unquote -) from .openload import PhantomJSwrapper +from ..compat import compat_str, compat_urllib_parse_unquote, compat_urllib_parse_urlencode from ..utils import ( + ExtractorError, clean_html, decode_packed_codes, - ExtractorError, float_or_none, format_field, - get_element_by_id, get_element_by_attribute, + get_element_by_id, int_or_none, js_to_json, ohdave_rsa_encrypt, diff --git a/yt_dlp/extractor/itprotv.py b/yt_dlp/extractor/itprotv.py index 713fd4ec5..5d6fbaa01 100644 --- a/yt_dlp/extractor/itprotv.py +++ b/yt_dlp/extractor/itprotv.py @@ -1,12 +1,11 @@ import re from .common import InfoExtractor - from ..utils import ( int_or_none, str_or_none, traverse_obj, - urljoin + urljoin, ) diff --git a/yt_dlp/extractor/itv.py b/yt_dlp/extractor/itv.py index 9ac7be307..55c416521 100644 --- a/yt_dlp/extractor/itv.py +++ b/yt_dlp/extractor/itv.py @@ -1,23 +1,22 @@ import json -from .common import InfoExtractor from .brightcove import BrightcoveNewIE - +from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + JSON_LD_RE, + ExtractorError, base_url, clean_html, determine_ext, extract_attributes, - ExtractorError, get_element_by_class, - JSON_LD_RE, merge_dicts, parse_duration, smuggle_url, try_get, - url_or_none, url_basename, + url_or_none, urljoin, ) diff --git a/yt_dlp/extractor/iwara.py b/yt_dlp/extractor/iwara.py index e23fdfd6a..a11f3f11d 100644 --- a/yt_dlp/extractor/iwara.py +++ b/yt_dlp/extractor/iwara.py @@ -1,9 +1,9 @@ import functools -import urllib.parse -import urllib.error import hashlib import json import time +import urllib.error +import urllib.parse from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/jamendo.py b/yt_dlp/extractor/jamendo.py index a2bbba397..8557a81ad 100644 --- a/yt_dlp/extractor/jamendo.py +++ b/yt_dlp/extractor/jamendo.py @@ -1,8 +1,8 @@ import hashlib import random -from ..compat import compat_str from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( clean_html, int_or_none, diff --git a/yt_dlp/extractor/japandiet.py b/yt_dlp/extractor/japandiet.py index 6c650568a..19d2b923b 100644 --- a/yt_dlp/extractor/japandiet.py +++ b/yt_dlp/extractor/japandiet.py @@ -1,5 +1,6 @@ import re +from .common import InfoExtractor from ..utils import ( ExtractorError, clean_html, @@ -9,9 +10,8 @@ from ..utils import ( smuggle_url, traverse_obj, try_call, - unsmuggle_url + unsmuggle_url, ) -from .common import InfoExtractor def _parse_japanese_date(text): diff --git a/yt_dlp/extractor/jove.py b/yt_dlp/extractor/jove.py index 245fe73d4..8069fea4c 100644 --- a/yt_dlp/extractor/jove.py +++ b/yt_dlp/extractor/jove.py @@ -1,8 +1,5 @@ from .common import InfoExtractor -from ..utils import ( - ExtractorError, - unified_strdate -) +from ..utils import ExtractorError, unified_strdate class JoveIE(InfoExtractor): diff --git a/yt_dlp/extractor/jstream.py b/yt_dlp/extractor/jstream.py index 3e2e62712..00ac7ccca 100644 --- a/yt_dlp/extractor/jstream.py +++ b/yt_dlp/extractor/jstream.py @@ -1,6 +1,6 @@ import base64 -import re import json +import re from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/kakao.py b/yt_dlp/extractor/kakao.py index 43055e89d..563aa2d72 100644 --- a/yt_dlp/extractor/kakao.py +++ b/yt_dlp/extractor/kakao.py @@ -3,8 +3,8 @@ from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, int_or_none, - strip_or_none, str_or_none, + strip_or_none, traverse_obj, unified_timestamp, ) diff --git a/yt_dlp/extractor/kaltura.py b/yt_dlp/extractor/kaltura.py index 95e2deea5..4752d5a55 100644 --- a/yt_dlp/extractor/kaltura.py +++ b/yt_dlp/extractor/kaltura.py @@ -4,18 +4,18 @@ import re from .common import InfoExtractor from ..compat import ( - compat_urlparse, compat_parse_qs, + compat_urlparse, ) from ..utils import ( - clean_html, ExtractorError, + clean_html, format_field, int_or_none, - unsmuggle_url, + remove_start, smuggle_url, traverse_obj, - remove_start + unsmuggle_url, ) diff --git a/yt_dlp/extractor/kankanews.py b/yt_dlp/extractor/kankanews.py index 8f247b305..3d74c745c 100644 --- a/yt_dlp/extractor/kankanews.py +++ b/yt_dlp/extractor/kankanews.py @@ -1,7 +1,7 @@ -import time +import hashlib import random import string -import hashlib +import time import urllib.parse from .common import InfoExtractor diff --git a/yt_dlp/extractor/kuwo.py b/yt_dlp/extractor/kuwo.py index 3c93dedac..b77667160 100644 --- a/yt_dlp/extractor/kuwo.py +++ b/yt_dlp/extractor/kuwo.py @@ -3,10 +3,10 @@ import re from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( - get_element_by_id, - clean_html, ExtractorError, InAdvancePagedList, + clean_html, + get_element_by_id, remove_start, ) diff --git a/yt_dlp/extractor/lcp.py b/yt_dlp/extractor/lcp.py index 9846319e0..62874195f 100644 --- a/yt_dlp/extractor/lcp.py +++ b/yt_dlp/extractor/lcp.py @@ -1,5 +1,5 @@ -from .common import InfoExtractor from .arkena import ArkenaIE +from .common import InfoExtractor class LcpPlayIE(ArkenaIE): # XXX: Do not subclass from concrete IE diff --git a/yt_dlp/extractor/lecture2go.py b/yt_dlp/extractor/lecture2go.py index 10fb5d479..1a3ada1e5 100644 --- a/yt_dlp/extractor/lecture2go.py +++ b/yt_dlp/extractor/lecture2go.py @@ -4,8 +4,8 @@ from .common import InfoExtractor from ..utils import ( determine_ext, determine_protocol, - parse_duration, int_or_none, + parse_duration, ) diff --git a/yt_dlp/extractor/lecturio.py b/yt_dlp/extractor/lecturio.py index 629d208fc..90f0268d7 100644 --- a/yt_dlp/extractor/lecturio.py +++ b/yt_dlp/extractor/lecturio.py @@ -2,9 +2,9 @@ import re from .common import InfoExtractor from ..utils import ( + ExtractorError, clean_html, determine_ext, - ExtractorError, float_or_none, int_or_none, str_or_none, diff --git a/yt_dlp/extractor/leeco.py b/yt_dlp/extractor/leeco.py index 5d61a607f..a113b3d0d 100644 --- a/yt_dlp/extractor/leeco.py +++ b/yt_dlp/extractor/leeco.py @@ -11,9 +11,9 @@ from ..compat import ( compat_urllib_parse_urlencode, ) from ..utils import ( + ExtractorError, determine_ext, encode_data_uri, - ExtractorError, int_or_none, orderedSet, parse_iso8601, diff --git a/yt_dlp/extractor/libraryofcongress.py b/yt_dlp/extractor/libraryofcongress.py index b76ca0908..297993939 100644 --- a/yt_dlp/extractor/libraryofcongress.py +++ b/yt_dlp/extractor/libraryofcongress.py @@ -1,7 +1,6 @@ import re from .common import InfoExtractor - from ..utils import ( determine_ext, float_or_none, diff --git a/yt_dlp/extractor/lifenews.py b/yt_dlp/extractor/lifenews.py index 919cfcb37..ea150a58b 100644 --- a/yt_dlp/extractor/lifenews.py +++ b/yt_dlp/extractor/lifenews.py @@ -6,8 +6,8 @@ from ..compat import ( compat_urlparse, ) from ..utils import ( - determine_ext, ExtractorError, + determine_ext, int_or_none, parse_iso8601, remove_end, diff --git a/yt_dlp/extractor/limelight.py b/yt_dlp/extractor/limelight.py index 4e50f106f..1ff091ddb 100644 --- a/yt_dlp/extractor/limelight.py +++ b/yt_dlp/extractor/limelight.py @@ -3,13 +3,13 @@ import re from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( + ExtractorError, determine_ext, float_or_none, int_or_none, smuggle_url, try_get, unsmuggle_url, - ExtractorError, ) diff --git a/yt_dlp/extractor/linkedin.py b/yt_dlp/extractor/linkedin.py index e12f467ef..2a7c6f0e0 100644 --- a/yt_dlp/extractor/linkedin.py +++ b/yt_dlp/extractor/linkedin.py @@ -7,8 +7,8 @@ from ..utils import ( extract_attributes, float_or_none, int_or_none, - srt_subtitles_timecode, mimetype2ext, + srt_subtitles_timecode, traverse_obj, try_get, url_or_none, diff --git a/yt_dlp/extractor/mainstreaming.py b/yt_dlp/extractor/mainstreaming.py index fd9bba8bc..fa12a6a8d 100644 --- a/yt_dlp/extractor/mainstreaming.py +++ b/yt_dlp/extractor/mainstreaming.py @@ -1,14 +1,13 @@ import re from .common import InfoExtractor - from ..utils import ( int_or_none, js_to_json, parse_duration, traverse_obj, try_get, - urljoin + urljoin, ) diff --git a/yt_dlp/extractor/manoto.py b/yt_dlp/extractor/manoto.py index 2792e6e70..44c321c26 100644 --- a/yt_dlp/extractor/manoto.py +++ b/yt_dlp/extractor/manoto.py @@ -1,10 +1,5 @@ from .common import InfoExtractor -from ..utils import ( - clean_html, - int_or_none, - traverse_obj -) - +from ..utils import clean_html, int_or_none, traverse_obj _API_URL = 'https://dak1vd5vmi7x6.cloudfront.net/api/v1/publicrole/{}/{}?id={}' diff --git a/yt_dlp/extractor/medaltv.py b/yt_dlp/extractor/medaltv.py index 675ad8ccc..d040fb48f 100644 --- a/yt_dlp/extractor/medaltv.py +++ b/yt_dlp/extractor/medaltv.py @@ -4,8 +4,8 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( ExtractorError, - format_field, float_or_none, + format_field, int_or_none, str_or_none, traverse_obj, diff --git a/yt_dlp/extractor/mediaklikk.py b/yt_dlp/extractor/mediaklikk.py index fcc4827b5..c01597762 100644 --- a/yt_dlp/extractor/mediaklikk.py +++ b/yt_dlp/extractor/mediaklikk.py @@ -1,14 +1,11 @@ +from .common import InfoExtractor +from ..compat import compat_str, compat_urllib_parse_unquote from ..utils import ( ExtractorError, traverse_obj, unified_strdate, url_or_none, ) -from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_unquote, - compat_str -) class MediaKlikkIE(InfoExtractor): diff --git a/yt_dlp/extractor/mediaset.py b/yt_dlp/extractor/mediaset.py index e04a1ce90..b7df5c75a 100644 --- a/yt_dlp/extractor/mediaset.py +++ b/yt_dlp/extractor/mediaset.py @@ -5,11 +5,11 @@ from .theplatform import ThePlatformBaseIE from ..utils import ( ExtractorError, GeoRestrictedError, - int_or_none, OnDemandPagedList, + int_or_none, try_get, - urljoin, update_url_query, + urljoin, ) diff --git a/yt_dlp/extractor/mediasite.py b/yt_dlp/extractor/mediasite.py index 7ea78ab69..d3fec4ec2 100644 --- a/yt_dlp/extractor/mediasite.py +++ b/yt_dlp/extractor/mediasite.py @@ -1,5 +1,5 @@ -import re import json +import re from .common import InfoExtractor from ..compat import ( @@ -10,16 +10,15 @@ from ..utils import ( ExtractorError, float_or_none, mimetype2ext, + smuggle_url, str_or_none, try_call, try_get, - smuggle_url, unsmuggle_url, url_or_none, urljoin, ) - _ID_RE = r'(?:[0-9a-f]{32,34}|[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12,14})' diff --git a/yt_dlp/extractor/microsoftstream.py b/yt_dlp/extractor/microsoftstream.py index 5f5f16087..f6a0b416d 100644 --- a/yt_dlp/extractor/microsoftstream.py +++ b/yt_dlp/extractor/microsoftstream.py @@ -3,8 +3,8 @@ import base64 from .common import InfoExtractor from ..utils import ( merge_dicts, - parse_iso8601, parse_duration, + parse_iso8601, parse_resolution, try_get, url_basename, diff --git a/yt_dlp/extractor/mildom.py b/yt_dlp/extractor/mildom.py index f64d575dc..caf60c805 100644 --- a/yt_dlp/extractor/mildom.py +++ b/yt_dlp/extractor/mildom.py @@ -4,11 +4,11 @@ import uuid from .common import InfoExtractor from ..utils import ( + ExtractorError, + OnDemandPagedList, determine_ext, dict_get, - ExtractorError, float_or_none, - OnDemandPagedList, traverse_obj, ) diff --git a/yt_dlp/extractor/mit.py b/yt_dlp/extractor/mit.py index 38cc0c274..979584ed6 100644 --- a/yt_dlp/extractor/mit.py +++ b/yt_dlp/extractor/mit.py @@ -1,11 +1,11 @@ -import re import json +import re from .common import InfoExtractor from .youtube import YoutubeIE from ..utils import ( - clean_html, ExtractorError, + clean_html, get_element_by_id, ) diff --git a/yt_dlp/extractor/monstercat.py b/yt_dlp/extractor/monstercat.py index a69a12e18..411d41cb0 100644 --- a/yt_dlp/extractor/monstercat.py +++ b/yt_dlp/extractor/monstercat.py @@ -8,10 +8,10 @@ from ..utils import ( get_element_html_by_class, get_element_text_and_html_by_tag, int_or_none, - unified_strdate, strip_or_none, traverse_obj, try_call, + unified_strdate, ) diff --git a/yt_dlp/extractor/moviepilot.py b/yt_dlp/extractor/moviepilot.py index 35c57bc70..ed5be4fa6 100644 --- a/yt_dlp/extractor/moviepilot.py +++ b/yt_dlp/extractor/moviepilot.py @@ -1,5 +1,5 @@ -from .dailymotion import DailymotionIE from .common import InfoExtractor +from .dailymotion import DailymotionIE class MoviepilotIE(InfoExtractor): diff --git a/yt_dlp/extractor/movingimage.py b/yt_dlp/extractor/movingimage.py index cdd8ba4dc..6e0ea2652 100644 --- a/yt_dlp/extractor/movingimage.py +++ b/yt_dlp/extractor/movingimage.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..utils import ( - unescapeHTML, parse_duration, + unescapeHTML, ) diff --git a/yt_dlp/extractor/msn.py b/yt_dlp/extractor/msn.py index 77d1806a3..79728e106 100644 --- a/yt_dlp/extractor/msn.py +++ b/yt_dlp/extractor/msn.py @@ -3,8 +3,8 @@ import re from .common import InfoExtractor from ..compat import compat_str from ..utils import ( - determine_ext, ExtractorError, + determine_ext, int_or_none, unescapeHTML, ) diff --git a/yt_dlp/extractor/n1.py b/yt_dlp/extractor/n1.py index edc41443a..8a8a5fec7 100644 --- a/yt_dlp/extractor/n1.py +++ b/yt_dlp/extractor/n1.py @@ -2,8 +2,8 @@ import re from .common import InfoExtractor from ..utils import ( - unified_timestamp, extract_attributes, + unified_timestamp, ) diff --git a/yt_dlp/extractor/naver.py b/yt_dlp/extractor/naver.py index 885557e91..26400e383 100644 --- a/yt_dlp/extractor/naver.py +++ b/yt_dlp/extractor/naver.py @@ -4,8 +4,8 @@ import hmac import itertools import json import re -import urllib.parse import time +import urllib.parse from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/nba.py b/yt_dlp/extractor/nba.py index 81d11e3a5..ec4d6368e 100644 --- a/yt_dlp/extractor/nba.py +++ b/yt_dlp/extractor/nba.py @@ -7,9 +7,9 @@ from ..compat import ( compat_urllib_parse_unquote, ) from ..utils import ( + OnDemandPagedList, int_or_none, merge_dicts, - OnDemandPagedList, parse_duration, parse_iso8601, parse_qs, diff --git a/yt_dlp/extractor/nbc.py b/yt_dlp/extractor/nbc.py index 267fa8353..e88f98abf 100644 --- a/yt_dlp/extractor/nbc.py +++ b/yt_dlp/extractor/nbc.py @@ -3,9 +3,9 @@ import json import re import xml.etree.ElementTree +from .adobepass import AdobePassIE from .common import InfoExtractor from .theplatform import ThePlatformIE, default_ns -from .adobepass import AdobePassIE from ..compat import compat_urllib_parse_unquote from ..networking import HEADRequest from ..utils import ( diff --git a/yt_dlp/extractor/ndr.py b/yt_dlp/extractor/ndr.py index 41ea3629a..243221d46 100644 --- a/yt_dlp/extractor/ndr.py +++ b/yt_dlp/extractor/ndr.py @@ -3,8 +3,8 @@ import re from .common import InfoExtractor from ..compat import compat_urllib_parse_urlparse from ..utils import ( - determine_ext, ExtractorError, + determine_ext, int_or_none, merge_dicts, parse_iso8601, diff --git a/yt_dlp/extractor/nfhsnetwork.py b/yt_dlp/extractor/nfhsnetwork.py index febad8fdf..be732a32f 100644 --- a/yt_dlp/extractor/nfhsnetwork.py +++ b/yt_dlp/extractor/nfhsnetwork.py @@ -1,11 +1,5 @@ from .common import InfoExtractor - - -from ..utils import ( - try_get, - unified_strdate, - unified_timestamp -) +from ..utils import try_get, unified_strdate, unified_timestamp class NFHSNetworkIE(InfoExtractor): diff --git a/yt_dlp/extractor/nhl.py b/yt_dlp/extractor/nhl.py index 2521c40e0..64cddb408 100644 --- a/yt_dlp/extractor/nhl.py +++ b/yt_dlp/extractor/nhl.py @@ -3,8 +3,8 @@ from ..compat import compat_str from ..utils import ( determine_ext, int_or_none, - parse_iso8601, parse_duration, + parse_iso8601, ) diff --git a/yt_dlp/extractor/ninenews.py b/yt_dlp/extractor/ninenews.py index 900d9ba60..0b4f47b48 100644 --- a/yt_dlp/extractor/ninenews.py +++ b/yt_dlp/extractor/ninenews.py @@ -1,5 +1,5 @@ -from .common import InfoExtractor from .brightcove import BrightcoveNewIE +from .common import InfoExtractor from ..utils import ExtractorError from ..utils.traversal import traverse_obj diff --git a/yt_dlp/extractor/ninenow.py b/yt_dlp/extractor/ninenow.py index c655b75f4..b7170b0e7 100644 --- a/yt_dlp/extractor/ninenow.py +++ b/yt_dlp/extractor/ninenow.py @@ -2,8 +2,8 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( ExtractorError, - int_or_none, float_or_none, + int_or_none, smuggle_url, str_or_none, try_get, diff --git a/yt_dlp/extractor/nitter.py b/yt_dlp/extractor/nitter.py index 35d1311dc..249e7cd33 100644 --- a/yt_dlp/extractor/nitter.py +++ b/yt_dlp/extractor/nitter.py @@ -1,13 +1,14 @@ +import random +import re + from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( - parse_count, - unified_timestamp, - remove_end, determine_ext, + parse_count, + remove_end, + unified_timestamp, ) -import re -import random class NitterIE(InfoExtractor): diff --git a/yt_dlp/extractor/nobelprize.py b/yt_dlp/extractor/nobelprize.py index cddc72f71..513529bea 100644 --- a/yt_dlp/extractor/nobelprize.py +++ b/yt_dlp/extractor/nobelprize.py @@ -1,11 +1,11 @@ from .common import InfoExtractor from ..utils import ( - js_to_json, - mimetype2ext, determine_ext, - update_url_query, get_element_by_attribute, int_or_none, + js_to_json, + mimetype2ext, + update_url_query, ) diff --git a/yt_dlp/extractor/noz.py b/yt_dlp/extractor/noz.py index c7b803803..19cb972c0 100644 --- a/yt_dlp/extractor/noz.py +++ b/yt_dlp/extractor/noz.py @@ -1,11 +1,11 @@ from .common import InfoExtractor -from ..utils import ( - int_or_none, - find_xpath_attr, - xpath_text, - update_url_query, -) from ..compat import compat_urllib_parse_unquote +from ..utils import ( + find_xpath_attr, + int_or_none, + update_url_query, + xpath_text, +) class NozIE(InfoExtractor): diff --git a/yt_dlp/extractor/nuevo.py b/yt_dlp/extractor/nuevo.py index ec54041f1..5670445aa 100644 --- a/yt_dlp/extractor/nuevo.py +++ b/yt_dlp/extractor/nuevo.py @@ -1,9 +1,5 @@ from .common import InfoExtractor - -from ..utils import ( - float_or_none, - xpath_text -) +from ..utils import float_or_none, xpath_text class NuevoBaseIE(InfoExtractor): diff --git a/yt_dlp/extractor/nuvid.py b/yt_dlp/extractor/nuvid.py index 6ac351cb0..0ef0ec70b 100644 --- a/yt_dlp/extractor/nuvid.py +++ b/yt_dlp/extractor/nuvid.py @@ -2,8 +2,8 @@ import re from .common import InfoExtractor from ..utils import ( - parse_duration, int_or_none, + parse_duration, strip_or_none, traverse_obj, url_or_none, diff --git a/yt_dlp/extractor/nzherald.py b/yt_dlp/extractor/nzherald.py index 062f9a875..0a12aea71 100644 --- a/yt_dlp/extractor/nzherald.py +++ b/yt_dlp/extractor/nzherald.py @@ -3,10 +3,7 @@ import json from .brightcove import BrightcoveNewIE from .common import InfoExtractor from ..compat import compat_str -from ..utils import ( - ExtractorError, - traverse_obj -) +from ..utils import ExtractorError, traverse_obj class NZHeraldIE(InfoExtractor): diff --git a/yt_dlp/extractor/odkmedia.py b/yt_dlp/extractor/odkmedia.py index b852160b9..8321b0741 100644 --- a/yt_dlp/extractor/odkmedia.py +++ b/yt_dlp/extractor/odkmedia.py @@ -7,7 +7,7 @@ from ..utils import ( GeoRestrictedError, float_or_none, traverse_obj, - try_call + try_call, ) diff --git a/yt_dlp/extractor/olympics.py b/yt_dlp/extractor/olympics.py index 61d1f4048..5507d2fda 100644 --- a/yt_dlp/extractor/olympics.py +++ b/yt_dlp/extractor/olympics.py @@ -1,8 +1,5 @@ from .common import InfoExtractor -from ..utils import ( - int_or_none, - try_get -) +from ..utils import int_or_none, try_get class OlympicsReplayIE(InfoExtractor): diff --git a/yt_dlp/extractor/onenewsnz.py b/yt_dlp/extractor/onenewsnz.py index a46211e77..351b397de 100644 --- a/yt_dlp/extractor/onenewsnz.py +++ b/yt_dlp/extractor/onenewsnz.py @@ -1,10 +1,6 @@ from .brightcove import BrightcoveNewIE from .common import InfoExtractor - -from ..utils import ( - ExtractorError, - traverse_obj -) +from ..utils import ExtractorError, traverse_obj class OneNewsNZIE(InfoExtractor): diff --git a/yt_dlp/extractor/onet.py b/yt_dlp/extractor/onet.py index 0d59e8cb4..da10f3779 100644 --- a/yt_dlp/extractor/onet.py +++ b/yt_dlp/extractor/onet.py @@ -2,13 +2,13 @@ import re from .common import InfoExtractor from ..utils import ( - determine_ext, + NO_DEFAULT, ExtractorError, + determine_ext, float_or_none, get_element_by_class, int_or_none, js_to_json, - NO_DEFAULT, parse_iso8601, remove_start, strip_or_none, diff --git a/yt_dlp/extractor/opencast.py b/yt_dlp/extractor/opencast.py index 1fafd9afb..12bf55704 100644 --- a/yt_dlp/extractor/opencast.py +++ b/yt_dlp/extractor/opencast.py @@ -2,8 +2,8 @@ import re from .common import InfoExtractor from ..utils import ( - determine_ext, ExtractorError, + determine_ext, int_or_none, parse_iso8601, traverse_obj, diff --git a/yt_dlp/extractor/openrec.py b/yt_dlp/extractor/openrec.py index 82a81c6c2..c9a96aeb4 100644 --- a/yt_dlp/extractor/openrec.py +++ b/yt_dlp/extractor/openrec.py @@ -1,4 +1,5 @@ from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( ExtractorError, get_first, @@ -8,7 +9,6 @@ from ..utils import ( unified_strdate, unified_timestamp, ) -from ..compat import compat_str class OpenRecBaseIE(InfoExtractor): diff --git a/yt_dlp/extractor/ora.py b/yt_dlp/extractor/ora.py index d49909d52..0e7a8484e 100644 --- a/yt_dlp/extractor/ora.py +++ b/yt_dlp/extractor/ora.py @@ -1,4 +1,5 @@ import re + from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( diff --git a/yt_dlp/extractor/packtpub.py b/yt_dlp/extractor/packtpub.py index 56203306f..3e969c846 100644 --- a/yt_dlp/extractor/packtpub.py +++ b/yt_dlp/extractor/packtpub.py @@ -3,13 +3,12 @@ import json from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( - clean_html, ExtractorError, + clean_html, # remove_end, str_or_none, strip_or_none, unified_timestamp, - # urljoin, ) diff --git a/yt_dlp/extractor/panopto.py b/yt_dlp/extractor/panopto.py index 63c5fd68f..6b2596236 100644 --- a/yt_dlp/extractor/panopto.py +++ b/yt_dlp/extractor/panopto.py @@ -5,17 +5,13 @@ import json import random from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_urlparse, - compat_urlparse -) - +from ..compat import compat_urllib_parse_urlparse, compat_urlparse from ..utils import ( - bug_reports_message, ExtractorError, + OnDemandPagedList, + bug_reports_message, get_first, int_or_none, - OnDemandPagedList, parse_qs, srt_subtitles_timecode, traverse_obj, diff --git a/yt_dlp/extractor/paramountplus.py b/yt_dlp/extractor/paramountplus.py index 7e472a63e..3f19803c0 100644 --- a/yt_dlp/extractor/paramountplus.py +++ b/yt_dlp/extractor/paramountplus.py @@ -1,7 +1,7 @@ import itertools -from .common import InfoExtractor from .cbs import CBSBaseIE +from .common import InfoExtractor from ..utils import ( ExtractorError, int_or_none, diff --git a/yt_dlp/extractor/pbs.py b/yt_dlp/extractor/pbs.py index 2bb2ea9f1..f6f5a5c3e 100644 --- a/yt_dlp/extractor/pbs.py +++ b/yt_dlp/extractor/pbs.py @@ -3,10 +3,11 @@ import re from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + US_RATINGS, ExtractorError, determine_ext, - int_or_none, float_or_none, + int_or_none, js_to_json, orderedSet, strip_jsonp, @@ -14,7 +15,6 @@ from ..utils import ( traverse_obj, unified_strdate, url_or_none, - US_RATINGS, ) diff --git a/yt_dlp/extractor/pearvideo.py b/yt_dlp/extractor/pearvideo.py index e27e5a7ba..086eaaf00 100644 --- a/yt_dlp/extractor/pearvideo.py +++ b/yt_dlp/extractor/pearvideo.py @@ -3,8 +3,8 @@ import re from .common import InfoExtractor from ..utils import ( qualities, - unified_timestamp, traverse_obj, + unified_timestamp, ) diff --git a/yt_dlp/extractor/peertube.py b/yt_dlp/extractor/peertube.py index 730b2393e..b7919c073 100644 --- a/yt_dlp/extractor/peertube.py +++ b/yt_dlp/extractor/peertube.py @@ -4,6 +4,7 @@ import re from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + OnDemandPagedList, format_field, int_or_none, parse_resolution, @@ -12,7 +13,6 @@ from ..utils import ( unified_timestamp, url_or_none, urljoin, - OnDemandPagedList, ) diff --git a/yt_dlp/extractor/piksel.py b/yt_dlp/extractor/piksel.py index 97a9bf574..8870d7b99 100644 --- a/yt_dlp/extractor/piksel.py +++ b/yt_dlp/extractor/piksel.py @@ -2,8 +2,8 @@ import re from .common import InfoExtractor from ..utils import ( - dict_get, ExtractorError, + dict_get, int_or_none, join_nonempty, parse_iso8601, diff --git a/yt_dlp/extractor/pladform.py b/yt_dlp/extractor/pladform.py index d67f6005c..c72a3876c 100644 --- a/yt_dlp/extractor/pladform.py +++ b/yt_dlp/extractor/pladform.py @@ -1,11 +1,11 @@ from .common import InfoExtractor from ..utils import ( - determine_ext, ExtractorError, + determine_ext, int_or_none, parse_qs, - xpath_text, qualities, + xpath_text, ) diff --git a/yt_dlp/extractor/platzi.py b/yt_dlp/extractor/platzi.py index 166b98c4a..d978c080b 100644 --- a/yt_dlp/extractor/platzi.py +++ b/yt_dlp/extractor/platzi.py @@ -4,8 +4,8 @@ from ..compat import ( compat_str, ) from ..utils import ( - clean_html, ExtractorError, + clean_html, int_or_none, str_or_none, try_get, diff --git a/yt_dlp/extractor/playtvak.py b/yt_dlp/extractor/playtvak.py index c418f88cb..a01b42290 100644 --- a/yt_dlp/extractor/playtvak.py +++ b/yt_dlp/extractor/playtvak.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..compat import ( - compat_urlparse, compat_urllib_parse_urlencode, + compat_urlparse, ) from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/pluralsight.py b/yt_dlp/extractor/pluralsight.py index 809b65608..60c9efffe 100644 --- a/yt_dlp/extractor/pluralsight.py +++ b/yt_dlp/extractor/pluralsight.py @@ -10,8 +10,8 @@ from ..compat import ( compat_urlparse, ) from ..utils import ( - dict_get, ExtractorError, + dict_get, float_or_none, int_or_none, parse_duration, diff --git a/yt_dlp/extractor/polsatgo.py b/yt_dlp/extractor/polsatgo.py index 1cebb365e..ecf2132b4 100644 --- a/yt_dlp/extractor/polsatgo.py +++ b/yt_dlp/extractor/polsatgo.py @@ -3,10 +3,10 @@ import uuid from .common import InfoExtractor from ..utils import ( + ExtractorError, int_or_none, try_get, url_or_none, - ExtractorError, ) diff --git a/yt_dlp/extractor/pornflip.py b/yt_dlp/extractor/pornflip.py index 51a9cf38f..d711d3e67 100644 --- a/yt_dlp/extractor/pornflip.py +++ b/yt_dlp/extractor/pornflip.py @@ -1,9 +1,5 @@ from .common import InfoExtractor -from ..utils import ( - int_or_none, - parse_duration, - parse_iso8601 -) +from ..utils import int_or_none, parse_duration, parse_iso8601 class PornFlipIE(InfoExtractor): diff --git a/yt_dlp/extractor/pornovoisines.py b/yt_dlp/extractor/pornovoisines.py index 2e51b4f6b..b8e8701a8 100644 --- a/yt_dlp/extractor/pornovoisines.py +++ b/yt_dlp/extractor/pornovoisines.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..utils import ( - int_or_none, float_or_none, + int_or_none, unified_strdate, ) diff --git a/yt_dlp/extractor/prx.py b/yt_dlp/extractor/prx.py index 5bb183270..338794ed5 100644 --- a/yt_dlp/extractor/prx.py +++ b/yt_dlp/extractor/prx.py @@ -1,14 +1,15 @@ import itertools + from .common import InfoExtractor, SearchInfoExtractor from ..utils import ( - urljoin, - traverse_obj, + clean_html, int_or_none, mimetype2ext, - clean_html, - url_or_none, - unified_timestamp, str_or_none, + traverse_obj, + unified_timestamp, + url_or_none, + urljoin, ) diff --git a/yt_dlp/extractor/puhutv.py b/yt_dlp/extractor/puhutv.py index 4b8e5e90d..fc4c29e95 100644 --- a/yt_dlp/extractor/puhutv.py +++ b/yt_dlp/extractor/puhutv.py @@ -3,8 +3,8 @@ from ..compat import compat_str from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, - int_or_none, float_or_none, + int_or_none, parse_resolution, str_or_none, try_get, diff --git a/yt_dlp/extractor/qingting.py b/yt_dlp/extractor/qingting.py index aa690d492..cb00de2d5 100644 --- a/yt_dlp/extractor/qingting.py +++ b/yt_dlp/extractor/qingting.py @@ -1,5 +1,4 @@ from .common import InfoExtractor - from ..utils import traverse_obj diff --git a/yt_dlp/extractor/qqmusic.py b/yt_dlp/extractor/qqmusic.py index 92858259a..90141e63b 100644 --- a/yt_dlp/extractor/qqmusic.py +++ b/yt_dlp/extractor/qqmusic.py @@ -4,8 +4,8 @@ import time from .common import InfoExtractor from ..utils import ( - clean_html, ExtractorError, + clean_html, strip_jsonp, unescapeHTML, ) diff --git a/yt_dlp/extractor/radiocanada.py b/yt_dlp/extractor/radiocanada.py index 1a5a6355a..4a09dcdfc 100644 --- a/yt_dlp/extractor/radiocanada.py +++ b/yt_dlp/extractor/radiocanada.py @@ -1,8 +1,8 @@ from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( - determine_ext, ExtractorError, + determine_ext, int_or_none, unified_strdate, ) diff --git a/yt_dlp/extractor/radiocomercial.py b/yt_dlp/extractor/radiocomercial.py index 38f8cf786..0c219778f 100644 --- a/yt_dlp/extractor/radiocomercial.py +++ b/yt_dlp/extractor/radiocomercial.py @@ -14,7 +14,7 @@ from ..utils import ( try_call, unified_strdate, update_url, - urljoin + urljoin, ) from ..utils.traversal import traverse_obj diff --git a/yt_dlp/extractor/radiozet.py b/yt_dlp/extractor/radiozet.py index 67520172e..632c8c281 100644 --- a/yt_dlp/extractor/radiozet.py +++ b/yt_dlp/extractor/radiozet.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..utils import ( - traverse_obj, strip_or_none, + traverse_obj, ) diff --git a/yt_dlp/extractor/radlive.py b/yt_dlp/extractor/radlive.py index 3c00183be..325e278fc 100644 --- a/yt_dlp/extractor/radlive.py +++ b/yt_dlp/extractor/radlive.py @@ -1,13 +1,13 @@ import json +from .common import InfoExtractor from ..utils import ( ExtractorError, format_field, traverse_obj, try_get, - unified_timestamp + unified_timestamp, ) -from .common import InfoExtractor class RadLiveIE(InfoExtractor): diff --git a/yt_dlp/extractor/rai.py b/yt_dlp/extractor/rai.py index c1fc65c81..c2e7a6fb8 100644 --- a/yt_dlp/extractor/rai.py +++ b/yt_dlp/extractor/rai.py @@ -3,11 +3,11 @@ import re from .common import InfoExtractor from ..networking import HEADRequest from ..utils import ( + ExtractorError, + GeoRestrictedError, clean_html, determine_ext, - ExtractorError, filter_dict, - GeoRestrictedError, int_or_none, join_nonempty, parse_duration, diff --git a/yt_dlp/extractor/rbgtum.py b/yt_dlp/extractor/rbgtum.py index 54f194cbd..5f2d0c103 100644 --- a/yt_dlp/extractor/rbgtum.py +++ b/yt_dlp/extractor/rbgtum.py @@ -1,7 +1,7 @@ import re from .common import InfoExtractor -from ..utils import parse_qs, remove_start, traverse_obj, ExtractorError +from ..utils import ExtractorError, parse_qs, remove_start, traverse_obj class RbgTumIE(InfoExtractor): diff --git a/yt_dlp/extractor/rcti.py b/yt_dlp/extractor/rcti.py index 6a7c7f399..9c382e257 100644 --- a/yt_dlp/extractor/rcti.py +++ b/yt_dlp/extractor/rcti.py @@ -5,11 +5,11 @@ import time from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( - dict_get, ExtractorError, + dict_get, strip_or_none, traverse_obj, - try_get + try_get, ) diff --git a/yt_dlp/extractor/rds.py b/yt_dlp/extractor/rds.py index 1a1c6634e..cc76b898a 100644 --- a/yt_dlp/extractor/rds.py +++ b/yt_dlp/extractor/rds.py @@ -1,10 +1,10 @@ from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( + js_to_json, parse_duration, parse_iso8601, - js_to_json, ) -from ..compat import compat_str class RDSIE(InfoExtractor): diff --git a/yt_dlp/extractor/redbulltv.py b/yt_dlp/extractor/redbulltv.py index d1de2490f..fac51b9ef 100644 --- a/yt_dlp/extractor/redbulltv.py +++ b/yt_dlp/extractor/redbulltv.py @@ -1,8 +1,8 @@ from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( - float_or_none, ExtractorError, + float_or_none, ) diff --git a/yt_dlp/extractor/reddit.py b/yt_dlp/extractor/reddit.py index 44c0353da..bc3e5f7ee 100644 --- a/yt_dlp/extractor/reddit.py +++ b/yt_dlp/extractor/reddit.py @@ -10,8 +10,8 @@ from ..utils import ( try_get, unescapeHTML, update_url_query, - urlencode_postdata, url_or_none, + urlencode_postdata, ) diff --git a/yt_dlp/extractor/redgifs.py b/yt_dlp/extractor/redgifs.py index f9453202b..d0546bbfa 100644 --- a/yt_dlp/extractor/redgifs.py +++ b/yt_dlp/extractor/redgifs.py @@ -5,10 +5,10 @@ from ..compat import compat_parse_qs from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, + OnDemandPagedList, int_or_none, qualities, try_get, - OnDemandPagedList, ) diff --git a/yt_dlp/extractor/redtube.py b/yt_dlp/extractor/redtube.py index 965abbee8..14ed0edab 100644 --- a/yt_dlp/extractor/redtube.py +++ b/yt_dlp/extractor/redtube.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..utils import ( - determine_ext, ExtractorError, + determine_ext, int_or_none, merge_dicts, str_to_int, diff --git a/yt_dlp/extractor/reuters.py b/yt_dlp/extractor/reuters.py index 0a8f13b9f..9c9bac6af 100644 --- a/yt_dlp/extractor/reuters.py +++ b/yt_dlp/extractor/reuters.py @@ -2,8 +2,8 @@ import re from .common import InfoExtractor from ..utils import ( - js_to_json, int_or_none, + js_to_json, unescapeHTML, ) diff --git a/yt_dlp/extractor/rmcdecouverte.py b/yt_dlp/extractor/rmcdecouverte.py index 8d29b302b..bc59ed07e 100644 --- a/yt_dlp/extractor/rmcdecouverte.py +++ b/yt_dlp/extractor/rmcdecouverte.py @@ -1,5 +1,5 @@ -from .common import InfoExtractor from .brightcove import BrightcoveLegacyIE +from .common import InfoExtractor from ..compat import ( compat_parse_qs, compat_urlparse, diff --git a/yt_dlp/extractor/rte.py b/yt_dlp/extractor/rte.py index 7ba80d4ba..729804d23 100644 --- a/yt_dlp/extractor/rte.py +++ b/yt_dlp/extractor/rte.py @@ -3,13 +3,13 @@ import re from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( + ExtractorError, float_or_none, parse_iso8601, str_or_none, try_get, unescapeHTML, url_or_none, - ExtractorError, ) diff --git a/yt_dlp/extractor/rtp.py b/yt_dlp/extractor/rtp.py index 5928a207a..ec78d0a66 100644 --- a/yt_dlp/extractor/rtp.py +++ b/yt_dlp/extractor/rtp.py @@ -1,9 +1,10 @@ +import base64 +import json +import re +import urllib.parse + from .common import InfoExtractor from ..utils import js_to_json -import re -import json -import urllib.parse -import base64 class RTPIE(InfoExtractor): diff --git a/yt_dlp/extractor/rtvcplay.py b/yt_dlp/extractor/rtvcplay.py index 741c47262..e7dcd5fd6 100644 --- a/yt_dlp/extractor/rtvcplay.py +++ b/yt_dlp/extractor/rtvcplay.py @@ -1,16 +1,17 @@ import re -from .common import InfoExtractor, ExtractorError +from .common import InfoExtractor from ..utils import ( + ExtractorError, clean_html, determine_ext, - int_or_none, float_or_none, + int_or_none, js_to_json, mimetype2ext, traverse_obj, - urljoin, url_or_none, + urljoin, ) diff --git a/yt_dlp/extractor/rtvs.py b/yt_dlp/extractor/rtvs.py index a84a78da8..defb8d741 100644 --- a/yt_dlp/extractor/rtvs.py +++ b/yt_dlp/extractor/rtvs.py @@ -1,7 +1,6 @@ import re from .common import InfoExtractor - from ..utils import ( parse_duration, traverse_obj, diff --git a/yt_dlp/extractor/rutube.py b/yt_dlp/extractor/rutube.py index 287824d08..eb12f32fa 100644 --- a/yt_dlp/extractor/rutube.py +++ b/yt_dlp/extractor/rutube.py @@ -5,8 +5,8 @@ from ..compat import ( compat_str, ) from ..utils import ( - determine_ext, bool_or_none, + determine_ext, int_or_none, parse_qs, try_get, diff --git a/yt_dlp/extractor/rutv.py b/yt_dlp/extractor/rutv.py index d7f9a7337..726d49111 100644 --- a/yt_dlp/extractor/rutv.py +++ b/yt_dlp/extractor/rutv.py @@ -1,11 +1,7 @@ import re from .common import InfoExtractor -from ..utils import ( - ExtractorError, - int_or_none, - str_to_int -) +from ..utils import ExtractorError, int_or_none, str_to_int class RUTVIE(InfoExtractor): diff --git a/yt_dlp/extractor/ruutu.py b/yt_dlp/extractor/ruutu.py index 33f6652df..dc61387be 100644 --- a/yt_dlp/extractor/ruutu.py +++ b/yt_dlp/extractor/ruutu.py @@ -4,8 +4,8 @@ import re from .common import InfoExtractor from ..compat import compat_urllib_parse_urlparse from ..utils import ( - determine_ext, ExtractorError, + determine_ext, find_xpath_attr, int_or_none, traverse_obj, diff --git a/yt_dlp/extractor/safari.py b/yt_dlp/extractor/safari.py index 8d322d710..17dff0afa 100644 --- a/yt_dlp/extractor/safari.py +++ b/yt_dlp/extractor/safari.py @@ -2,7 +2,6 @@ import json import re from .common import InfoExtractor - from ..compat import ( compat_parse_qs, compat_urlparse, diff --git a/yt_dlp/extractor/scrippsnetworks.py b/yt_dlp/extractor/scrippsnetworks.py index 3912f7786..85d51cd59 100644 --- a/yt_dlp/extractor/scrippsnetworks.py +++ b/yt_dlp/extractor/scrippsnetworks.py @@ -1,8 +1,8 @@ -import json import hashlib +import json -from .aws import AWSIE from .anvato import AnvatoIE +from .aws import AWSIE from .common import InfoExtractor from ..utils import ( smuggle_url, diff --git a/yt_dlp/extractor/scte.py b/yt_dlp/extractor/scte.py index 9c2ca8c51..fc91d60e1 100644 --- a/yt_dlp/extractor/scte.py +++ b/yt_dlp/extractor/scte.py @@ -2,8 +2,8 @@ import re from .common import InfoExtractor from ..utils import ( - decode_packed_codes, ExtractorError, + decode_packed_codes, urlencode_postdata, ) diff --git a/yt_dlp/extractor/sendtonews.py b/yt_dlp/extractor/sendtonews.py index 1ecea71fc..99fcf51f1 100644 --- a/yt_dlp/extractor/sendtonews.py +++ b/yt_dlp/extractor/sendtonews.py @@ -2,12 +2,12 @@ import re from .common import InfoExtractor from ..utils import ( - float_or_none, - parse_iso8601, - update_url_query, - int_or_none, determine_protocol, + float_or_none, + int_or_none, + parse_iso8601, unescapeHTML, + update_url_query, ) diff --git a/yt_dlp/extractor/seznamzpravy.py b/yt_dlp/extractor/seznamzpravy.py index 79e888583..b31d566df 100644 --- a/yt_dlp/extractor/seznamzpravy.py +++ b/yt_dlp/extractor/seznamzpravy.py @@ -4,11 +4,11 @@ from ..compat import ( compat_urllib_parse_urlparse, ) from ..utils import ( - urljoin, int_or_none, parse_codecs, parse_qs, try_get, + urljoin, ) diff --git a/yt_dlp/extractor/shahid.py b/yt_dlp/extractor/shahid.py index d509e8879..89aee2728 100644 --- a/yt_dlp/extractor/shahid.py +++ b/yt_dlp/extractor/shahid.py @@ -5,9 +5,9 @@ import re from .aws import AWSIE from ..networking.exceptions import HTTPError from ..utils import ( - clean_html, ExtractorError, InAdvancePagedList, + clean_html, int_or_none, parse_iso8601, str_or_none, diff --git a/yt_dlp/extractor/shemaroome.py b/yt_dlp/extractor/shemaroome.py index ec9938b8c..cca86ed6c 100644 --- a/yt_dlp/extractor/shemaroome.py +++ b/yt_dlp/extractor/shemaroome.py @@ -4,8 +4,8 @@ from ..compat import ( compat_b64decode, ) from ..utils import ( - bytes_to_intlist, ExtractorError, + bytes_to_intlist, intlist_to_bytes, unified_strdate, ) diff --git a/yt_dlp/extractor/sixplay.py b/yt_dlp/extractor/sixplay.py index ef93b9276..44619a16c 100644 --- a/yt_dlp/extractor/sixplay.py +++ b/yt_dlp/extractor/sixplay.py @@ -6,8 +6,8 @@ from ..utils import ( determine_ext, int_or_none, parse_qs, - try_get, qualities, + try_get, ) diff --git a/yt_dlp/extractor/skynewsarabia.py b/yt_dlp/extractor/skynewsarabia.py index 867782778..234703cf7 100644 --- a/yt_dlp/extractor/skynewsarabia.py +++ b/yt_dlp/extractor/skynewsarabia.py @@ -1,8 +1,8 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( - parse_iso8601, parse_duration, + parse_iso8601, ) diff --git a/yt_dlp/extractor/sohu.py b/yt_dlp/extractor/sohu.py index c0ff4f9aa..a41ad303a 100644 --- a/yt_dlp/extractor/sohu.py +++ b/yt_dlp/extractor/sohu.py @@ -8,13 +8,13 @@ from ..compat import ( ) from ..utils import ( ExtractorError, - int_or_none, float_or_none, - url_or_none, - unified_timestamp, - try_get, - urljoin, + int_or_none, traverse_obj, + try_get, + unified_timestamp, + url_or_none, + urljoin, ) diff --git a/yt_dlp/extractor/sovietscloset.py b/yt_dlp/extractor/sovietscloset.py index 493eea2a6..773ddd344 100644 --- a/yt_dlp/extractor/sovietscloset.py +++ b/yt_dlp/extractor/sovietscloset.py @@ -1,8 +1,5 @@ from .common import InfoExtractor -from ..utils import ( - try_get, - unified_timestamp -) +from ..utils import try_get, unified_timestamp class SovietsClosetBaseIE(InfoExtractor): diff --git a/yt_dlp/extractor/spankbang.py b/yt_dlp/extractor/spankbang.py index 43da34a32..c73f7971d 100644 --- a/yt_dlp/extractor/spankbang.py +++ b/yt_dlp/extractor/spankbang.py @@ -2,8 +2,8 @@ import re from .common import InfoExtractor from ..utils import ( - determine_ext, ExtractorError, + determine_ext, merge_dicts, parse_duration, parse_resolution, diff --git a/yt_dlp/extractor/springboardplatform.py b/yt_dlp/extractor/springboardplatform.py index a98584a27..bdb8ef496 100644 --- a/yt_dlp/extractor/springboardplatform.py +++ b/yt_dlp/extractor/springboardplatform.py @@ -4,11 +4,11 @@ from .common import InfoExtractor from ..utils import ( ExtractorError, int_or_none, - xpath_attr, - xpath_text, - xpath_element, unescapeHTML, unified_timestamp, + xpath_attr, + xpath_element, + xpath_text, ) diff --git a/yt_dlp/extractor/startv.py b/yt_dlp/extractor/startv.py index bb6e8f1ea..312a4fde0 100644 --- a/yt_dlp/extractor/startv.py +++ b/yt_dlp/extractor/startv.py @@ -3,10 +3,10 @@ from ..compat import ( compat_str, ) from ..utils import ( - clean_html, ExtractorError, - traverse_obj, + clean_html, int_or_none, + traverse_obj, ) diff --git a/yt_dlp/extractor/stitcher.py b/yt_dlp/extractor/stitcher.py index 2fd200f87..46a15e6a1 100644 --- a/yt_dlp/extractor/stitcher.py +++ b/yt_dlp/extractor/stitcher.py @@ -1,9 +1,9 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + ExtractorError, clean_html, clean_podcast_url, - ExtractorError, int_or_none, str_or_none, try_get, diff --git a/yt_dlp/extractor/storyfire.py b/yt_dlp/extractor/storyfire.py index 566f77782..20a70a7bc 100644 --- a/yt_dlp/extractor/storyfire.py +++ b/yt_dlp/extractor/storyfire.py @@ -2,9 +2,9 @@ import functools from .common import InfoExtractor from ..utils import ( + OnDemandPagedList, format_field, int_or_none, - OnDemandPagedList, smuggle_url, ) diff --git a/yt_dlp/extractor/streamable.py b/yt_dlp/extractor/streamable.py index 462861e0e..c303ac53a 100644 --- a/yt_dlp/extractor/streamable.py +++ b/yt_dlp/extractor/streamable.py @@ -3,8 +3,8 @@ from ..utils import ( ExtractorError, float_or_none, int_or_none, - try_get, parse_codecs, + try_get, ) diff --git a/yt_dlp/extractor/stripchat.py b/yt_dlp/extractor/stripchat.py index b9523c865..a847925e4 100644 --- a/yt_dlp/extractor/stripchat.py +++ b/yt_dlp/extractor/stripchat.py @@ -3,7 +3,7 @@ from ..utils import ( ExtractorError, UserNotLive, lowercase_escape, - traverse_obj + traverse_obj, ) diff --git a/yt_dlp/extractor/sunporno.py b/yt_dlp/extractor/sunporno.py index 708873a95..501156e51 100644 --- a/yt_dlp/extractor/sunporno.py +++ b/yt_dlp/extractor/sunporno.py @@ -2,10 +2,10 @@ import re from .common import InfoExtractor from ..utils import ( - parse_duration, - int_or_none, - qualities, determine_ext, + int_or_none, + parse_duration, + qualities, ) diff --git a/yt_dlp/extractor/syfy.py b/yt_dlp/extractor/syfy.py index bd2d73842..29e5e573f 100644 --- a/yt_dlp/extractor/syfy.py +++ b/yt_dlp/extractor/syfy.py @@ -1,7 +1,7 @@ from .adobepass import AdobePassIE from ..utils import ( - update_url_query, smuggle_url, + update_url_query, ) diff --git a/yt_dlp/extractor/tbs.py b/yt_dlp/extractor/tbs.py index 808c6c73d..4e178593f 100644 --- a/yt_dlp/extractor/tbs.py +++ b/yt_dlp/extractor/tbs.py @@ -2,8 +2,8 @@ import re from .turner import TurnerBaseIE from ..compat import ( - compat_urllib_parse_urlparse, compat_parse_qs, + compat_urllib_parse_urlparse, ) from ..utils import ( float_or_none, diff --git a/yt_dlp/extractor/teachable.py b/yt_dlp/extractor/teachable.py index 5eac9aa3f..778fa1263 100644 --- a/yt_dlp/extractor/teachable.py +++ b/yt_dlp/extractor/teachable.py @@ -3,10 +3,10 @@ import re from .common import InfoExtractor from .wistia import WistiaIE from ..utils import ( - clean_html, ExtractorError, - int_or_none, + clean_html, get_element_by_class, + int_or_none, strip_or_none, urlencode_postdata, urljoin, diff --git a/yt_dlp/extractor/teachertube.py b/yt_dlp/extractor/teachertube.py index 90a976297..740240993 100644 --- a/yt_dlp/extractor/teachertube.py +++ b/yt_dlp/extractor/teachertube.py @@ -2,8 +2,8 @@ import re from .common import InfoExtractor from ..utils import ( - determine_ext, ExtractorError, + determine_ext, qualities, ) diff --git a/yt_dlp/extractor/teamcoco.py b/yt_dlp/extractor/teamcoco.py index d32f81262..3fb899cac 100644 --- a/yt_dlp/extractor/teamcoco.py +++ b/yt_dlp/extractor/teamcoco.py @@ -13,8 +13,8 @@ from ..utils import ( parse_qs, traverse_obj, unified_timestamp, - urljoin, url_or_none, + urljoin, ) diff --git a/yt_dlp/extractor/teamtreehouse.py b/yt_dlp/extractor/teamtreehouse.py index dd802db5b..ba25cdcf6 100644 --- a/yt_dlp/extractor/teamtreehouse.py +++ b/yt_dlp/extractor/teamtreehouse.py @@ -2,9 +2,9 @@ import re from .common import InfoExtractor from ..utils import ( + ExtractorError, clean_html, determine_ext, - ExtractorError, float_or_none, get_element_by_class, get_element_by_id, diff --git a/yt_dlp/extractor/ted.py b/yt_dlp/extractor/ted.py index c28a15498..0969bbb03 100644 --- a/yt_dlp/extractor/ted.py +++ b/yt_dlp/extractor/ted.py @@ -2,14 +2,13 @@ import itertools import re from .common import InfoExtractor - from ..utils import ( int_or_none, + parse_duration, str_to_int, try_get, - url_or_none, unified_strdate, - parse_duration, + url_or_none, ) diff --git a/yt_dlp/extractor/tele13.py b/yt_dlp/extractor/tele13.py index 212af3785..1705c2d55 100644 --- a/yt_dlp/extractor/tele13.py +++ b/yt_dlp/extractor/tele13.py @@ -1,9 +1,9 @@ from .common import InfoExtractor from .youtube import YoutubeIE from ..utils import ( + determine_ext, js_to_json, qualities, - determine_ext, ) diff --git a/yt_dlp/extractor/telewebion.py b/yt_dlp/extractor/telewebion.py index 5fdcddd8b..380c84d98 100644 --- a/yt_dlp/extractor/telewebion.py +++ b/yt_dlp/extractor/telewebion.py @@ -1,4 +1,5 @@ from __future__ import annotations + import functools import json import textwrap diff --git a/yt_dlp/extractor/tempo.py b/yt_dlp/extractor/tempo.py index 9318d6f9a..71e54eb0c 100644 --- a/yt_dlp/extractor/tempo.py +++ b/yt_dlp/extractor/tempo.py @@ -5,7 +5,7 @@ from ..utils import ( int_or_none, parse_iso8601, traverse_obj, - try_call + try_call, ) diff --git a/yt_dlp/extractor/tencent.py b/yt_dlp/extractor/tencent.py index 6618ea4e6..ae2cb483f 100644 --- a/yt_dlp/extractor/tencent.py +++ b/yt_dlp/extractor/tencent.py @@ -8,8 +8,8 @@ from .common import InfoExtractor from ..aes import aes_cbc_encrypt_bytes from ..utils import ( ExtractorError, - float_or_none, determine_ext, + float_or_none, int_or_none, js_to_json, traverse_obj, diff --git a/yt_dlp/extractor/theguardian.py b/yt_dlp/extractor/theguardian.py index a231eccf4..fb6407715 100644 --- a/yt_dlp/extractor/theguardian.py +++ b/yt_dlp/extractor/theguardian.py @@ -10,7 +10,7 @@ from ..utils import ( parse_qs, traverse_obj, unified_strdate, - urljoin + urljoin, ) diff --git a/yt_dlp/extractor/theintercept.py b/yt_dlp/extractor/theintercept.py index a991a4dfd..99f0d42ef 100644 --- a/yt_dlp/extractor/theintercept.py +++ b/yt_dlp/extractor/theintercept.py @@ -1,9 +1,9 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( - parse_iso8601, - int_or_none, ExtractorError, + int_or_none, + parse_iso8601, ) diff --git a/yt_dlp/extractor/theplatform.py b/yt_dlp/extractor/theplatform.py index 9160f5ec6..eeb33a660 100644 --- a/yt_dlp/extractor/theplatform.py +++ b/yt_dlp/extractor/theplatform.py @@ -1,29 +1,27 @@ -import re -import time -import hmac import binascii import hashlib +import hmac +import re +import time - -from .once import OnceIE from .adobepass import AdobePassIE -from ..networking import Request +from .once import OnceIE +from ..networking import HEADRequest, Request from ..utils import ( - determine_ext, ExtractorError, + determine_ext, + find_xpath_attr, float_or_none, int_or_none, - parse_qs, - unsmuggle_url, - update_url_query, - xpath_with_ns, mimetype2ext, - find_xpath_attr, + parse_qs, traverse_obj, + unsmuggle_url, update_url, + update_url_query, urlhandle_detect_ext, + xpath_with_ns, ) -from ..networking import HEADRequest default_ns = 'http://www.w3.org/2005/SMIL21/Language' _x = lambda p: xpath_with_ns(p, {'smil': default_ns}) diff --git a/yt_dlp/extractor/threeqsdn.py b/yt_dlp/extractor/threeqsdn.py index 7841f8da6..f7a13d2c3 100644 --- a/yt_dlp/extractor/threeqsdn.py +++ b/yt_dlp/extractor/threeqsdn.py @@ -1,8 +1,8 @@ from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( - determine_ext, ExtractorError, + determine_ext, float_or_none, int_or_none, join_nonempty, diff --git a/yt_dlp/extractor/toypics.py b/yt_dlp/extractor/toypics.py index aa7ee6c48..ccb2ef816 100644 --- a/yt_dlp/extractor/toypics.py +++ b/yt_dlp/extractor/toypics.py @@ -1,6 +1,7 @@ -from .common import InfoExtractor import re +from .common import InfoExtractor + class ToypicsIE(InfoExtractor): _WORKING = False diff --git a/yt_dlp/extractor/triller.py b/yt_dlp/extractor/triller.py index 56e51fea8..3bdeedd43 100644 --- a/yt_dlp/extractor/triller.py +++ b/yt_dlp/extractor/triller.py @@ -14,8 +14,8 @@ from ..utils import ( traverse_obj, unified_timestamp, url_basename, - urljoin, url_or_none, + urljoin, ) diff --git a/yt_dlp/extractor/trueid.py b/yt_dlp/extractor/trueid.py index 86f0990e8..efedac180 100644 --- a/yt_dlp/extractor/trueid.py +++ b/yt_dlp/extractor/trueid.py @@ -1,13 +1,13 @@ from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( - determine_ext, ExtractorError, + determine_ext, int_or_none, parse_age_limit, traverse_obj, unified_timestamp, - url_or_none + url_or_none, ) diff --git a/yt_dlp/extractor/tumblr.py b/yt_dlp/extractor/tumblr.py index a26bdcaae..f2d0c5901 100644 --- a/yt_dlp/extractor/tumblr.py +++ b/yt_dlp/extractor/tumblr.py @@ -3,7 +3,7 @@ from ..utils import ( ExtractorError, int_or_none, traverse_obj, - urlencode_postdata + urlencode_postdata, ) diff --git a/yt_dlp/extractor/turner.py b/yt_dlp/extractor/turner.py index 630d84bdc..b27db87bf 100644 --- a/yt_dlp/extractor/turner.py +++ b/yt_dlp/extractor/turner.py @@ -3,17 +3,17 @@ import re from .adobepass import AdobePassIE from ..compat import compat_str from ..utils import ( - fix_xml_ampersands, - xpath_text, - int_or_none, - determine_ext, - float_or_none, - parse_duration, - xpath_attr, - update_url_query, ExtractorError, + determine_ext, + fix_xml_ampersands, + float_or_none, + int_or_none, + parse_duration, strip_or_none, + update_url_query, url_or_none, + xpath_attr, + xpath_text, ) diff --git a/yt_dlp/extractor/tv2.py b/yt_dlp/extractor/tv2.py index 7756aa3f5..9b19e7995 100644 --- a/yt_dlp/extractor/tv2.py +++ b/yt_dlp/extractor/tv2.py @@ -3,10 +3,10 @@ import re from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( - determine_ext, ExtractorError, - int_or_none, + determine_ext, float_or_none, + int_or_none, js_to_json, parse_iso8601, remove_end, diff --git a/yt_dlp/extractor/tv2hu.py b/yt_dlp/extractor/tv2hu.py index 9c0a111c0..cd35ff5fb 100644 --- a/yt_dlp/extractor/tv2hu.py +++ b/yt_dlp/extractor/tv2hu.py @@ -1,8 +1,8 @@ # encoding: utf-8 from .common import InfoExtractor from ..utils import ( - traverse_obj, UnsupportedError, + traverse_obj, ) diff --git a/yt_dlp/extractor/tvanouvelles.py b/yt_dlp/extractor/tvanouvelles.py index b9f5e110e..dbebda4f4 100644 --- a/yt_dlp/extractor/tvanouvelles.py +++ b/yt_dlp/extractor/tvanouvelles.py @@ -1,7 +1,7 @@ import re -from .common import InfoExtractor from .brightcove import BrightcoveNewIE +from .common import InfoExtractor class TVANouvellesIE(InfoExtractor): diff --git a/yt_dlp/extractor/tvn24.py b/yt_dlp/extractor/tvn24.py index 527681315..ac480580a 100644 --- a/yt_dlp/extractor/tvn24.py +++ b/yt_dlp/extractor/tvn24.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..utils import ( - int_or_none, NO_DEFAULT, + int_or_none, unescapeHTML, ) diff --git a/yt_dlp/extractor/tvp.py b/yt_dlp/extractor/tvp.py index a8d00e243..f1ebf027a 100644 --- a/yt_dlp/extractor/tvp.py +++ b/yt_dlp/extractor/tvp.py @@ -4,10 +4,10 @@ import re from .common import InfoExtractor from ..utils import ( + ExtractorError, clean_html, determine_ext, dict_get, - ExtractorError, int_or_none, js_to_json, str_or_none, diff --git a/yt_dlp/extractor/tvplay.py b/yt_dlp/extractor/tvplay.py index 48a6efe1c..29185d34b 100644 --- a/yt_dlp/extractor/tvplay.py +++ b/yt_dlp/extractor/tvplay.py @@ -4,8 +4,8 @@ from .common import InfoExtractor from ..compat import compat_urlparse from ..networking.exceptions import HTTPError from ..utils import ( - determine_ext, ExtractorError, + determine_ext, int_or_none, parse_iso8601, qualities, diff --git a/yt_dlp/extractor/tvplayer.py b/yt_dlp/extractor/tvplayer.py index 228c2366e..d43bdc2ff 100644 --- a/yt_dlp/extractor/tvplayer.py +++ b/yt_dlp/extractor/tvplayer.py @@ -2,10 +2,10 @@ from .common import InfoExtractor from ..compat import compat_str from ..networking.exceptions import HTTPError from ..utils import ( + ExtractorError, extract_attributes, try_get, urlencode_postdata, - ExtractorError, ) diff --git a/yt_dlp/extractor/tweakers.py b/yt_dlp/extractor/tweakers.py index e8e1fc666..9249550c9 100644 --- a/yt_dlp/extractor/tweakers.py +++ b/yt_dlp/extractor/tweakers.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..utils import ( - int_or_none, determine_ext, + int_or_none, mimetype2ext, ) diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py index fc80dade8..1a11162a0 100644 --- a/yt_dlp/extractor/twitter.py +++ b/yt_dlp/extractor/twitter.py @@ -1,10 +1,10 @@ +import functools import json import random import re from .common import InfoExtractor from .periscope import PeriscopeBaseIE, PeriscopeIE -from ..compat import functools # isort: split from ..compat import ( compat_parse_qs, compat_urllib_parse_unquote, diff --git a/yt_dlp/extractor/udn.py b/yt_dlp/extractor/udn.py index 10668ac4b..d5849d29b 100644 --- a/yt_dlp/extractor/udn.py +++ b/yt_dlp/extractor/udn.py @@ -1,12 +1,12 @@ import re from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import ( determine_ext, int_or_none, js_to_json, ) -from ..compat import compat_urlparse class UDNEmbedIE(InfoExtractor): diff --git a/yt_dlp/extractor/ukcolumn.py b/yt_dlp/extractor/ukcolumn.py index f914613c0..f141804c8 100644 --- a/yt_dlp/extractor/ukcolumn.py +++ b/yt_dlp/extractor/ukcolumn.py @@ -1,11 +1,11 @@ -from ..utils import ( - unescapeHTML, - urljoin, - ExtractorError, -) from .common import InfoExtractor from .vimeo import VimeoIE from .youtube import YoutubeIE +from ..utils import ( + ExtractorError, + unescapeHTML, + urljoin, +) class UkColumnIE(InfoExtractor): diff --git a/yt_dlp/extractor/urplay.py b/yt_dlp/extractor/urplay.py index 7f97fc95f..928e6e1c2 100644 --- a/yt_dlp/extractor/urplay.py +++ b/yt_dlp/extractor/urplay.py @@ -1,9 +1,9 @@ from .common import InfoExtractor from ..utils import ( - dict_get, ExtractorError, - int_or_none, ISO639Utils, + dict_get, + int_or_none, parse_age_limit, try_get, unified_timestamp, diff --git a/yt_dlp/extractor/usatoday.py b/yt_dlp/extractor/usatoday.py index 3243f3e3b..42a28c509 100644 --- a/yt_dlp/extractor/usatoday.py +++ b/yt_dlp/extractor/usatoday.py @@ -1,4 +1,5 @@ from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( ExtractorError, get_element_by_attribute, @@ -6,7 +7,6 @@ from ..utils import ( try_get, update_url_query, ) -from ..compat import compat_str class USATodayIE(InfoExtractor): diff --git a/yt_dlp/extractor/ustream.py b/yt_dlp/extractor/ustream.py index 5df241653..046e3d768 100644 --- a/yt_dlp/extractor/ustream.py +++ b/yt_dlp/extractor/ustream.py @@ -7,10 +7,10 @@ from ..compat import ( compat_urlparse, ) from ..utils import ( - encode_data_uri, ExtractorError, - int_or_none, + encode_data_uri, float_or_none, + int_or_none, join_nonempty, mimetype2ext, str_or_none, diff --git a/yt_dlp/extractor/ustudio.py b/yt_dlp/extractor/ustudio.py index c3aeeb961..f6ce5b357 100644 --- a/yt_dlp/extractor/ustudio.py +++ b/yt_dlp/extractor/ustudio.py @@ -1,8 +1,8 @@ from .common import InfoExtractor from ..utils import ( int_or_none, - unified_strdate, unescapeHTML, + unified_strdate, ) diff --git a/yt_dlp/extractor/veo.py b/yt_dlp/extractor/veo.py index ef44d421e..205f8ea63 100644 --- a/yt_dlp/extractor/veo.py +++ b/yt_dlp/extractor/veo.py @@ -1,5 +1,4 @@ from .common import InfoExtractor - from ..utils import ( int_or_none, mimetype2ext, diff --git a/yt_dlp/extractor/vesti.py b/yt_dlp/extractor/vesti.py index 3f2dddbe9..a2e90226a 100644 --- a/yt_dlp/extractor/vesti.py +++ b/yt_dlp/extractor/vesti.py @@ -1,8 +1,8 @@ import re from .common import InfoExtractor -from ..utils import ExtractorError from .rutv import RUTVIE +from ..utils import ExtractorError class VestiIE(InfoExtractor): diff --git a/yt_dlp/extractor/vevo.py b/yt_dlp/extractor/vevo.py index aa40227a7..7715d6839 100644 --- a/yt_dlp/extractor/vevo.py +++ b/yt_dlp/extractor/vevo.py @@ -1,5 +1,5 @@ -import re import json +import re from .common import InfoExtractor from ..compat import compat_str diff --git a/yt_dlp/extractor/vice.py b/yt_dlp/extractor/vice.py index d31908fb1..b072d9d73 100644 --- a/yt_dlp/extractor/vice.py +++ b/yt_dlp/extractor/vice.py @@ -10,10 +10,10 @@ from .youtube import YoutubeIE from ..compat import compat_str from ..networking.exceptions import HTTPError from ..utils import ( - clean_html, ExtractorError, - int_or_none, OnDemandPagedList, + clean_html, + int_or_none, parse_age_limit, str_or_none, try_get, diff --git a/yt_dlp/extractor/vidio.py b/yt_dlp/extractor/vidio.py index 770aa284d..6322bb04b 100644 --- a/yt_dlp/extractor/vidio.py +++ b/yt_dlp/extractor/vidio.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..utils import ( - clean_html, ExtractorError, + clean_html, format_field, get_element_by_class, int_or_none, diff --git a/yt_dlp/extractor/vidlii.py b/yt_dlp/extractor/vidlii.py index 44353b7fc..e1219a8a0 100644 --- a/yt_dlp/extractor/vidlii.py +++ b/yt_dlp/extractor/vidlii.py @@ -3,8 +3,8 @@ import re from .common import InfoExtractor from ..networking import HEADRequest from ..utils import ( - format_field, float_or_none, + format_field, get_element_by_id, int_or_none, str_to_int, diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index 91b976403..ac96ade18 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -1,21 +1,21 @@ import base64 import functools -import re import itertools +import re from .common import InfoExtractor from ..compat import compat_str, compat_urlparse from ..networking import HEADRequest, Request from ..networking.exceptions import HTTPError from ..utils import ( + ExtractorError, + OnDemandPagedList, clean_html, determine_ext, - ExtractorError, get_element_by_class, - js_to_json, int_or_none, + js_to_json, merge_dicts, - OnDemandPagedList, parse_filesize, parse_iso8601, parse_qs, @@ -26,8 +26,8 @@ from ..utils import ( unified_timestamp, unsmuggle_url, urlencode_postdata, - urljoin, urlhandle_detect_ext, + urljoin, ) diff --git a/yt_dlp/extractor/viu.py b/yt_dlp/extractor/viu.py index 6f9af9f64..480f49b7b 100644 --- a/yt_dlp/extractor/viu.py +++ b/yt_dlp/extractor/viu.py @@ -1,8 +1,8 @@ -import re import json -import uuid import random +import re import urllib.parse +import uuid from .common import InfoExtractor from ..compat import compat_str @@ -10,10 +10,10 @@ from ..utils import ( ExtractorError, int_or_none, remove_end, + smuggle_url, strip_or_none, traverse_obj, try_get, - smuggle_url, unified_timestamp, unsmuggle_url, url_or_none, diff --git a/yt_dlp/extractor/vk.py b/yt_dlp/extractor/vk.py index 28d502685..132d65bca 100644 --- a/yt_dlp/extractor/vk.py +++ b/yt_dlp/extractor/vk.py @@ -20,6 +20,7 @@ from ..utils import ( parse_resolution, str_or_none, str_to_int, + traverse_obj, try_call, unescapeHTML, unified_timestamp, @@ -27,7 +28,6 @@ from ..utils import ( url_or_none, urlencode_postdata, urljoin, - traverse_obj, ) diff --git a/yt_dlp/extractor/walla.py b/yt_dlp/extractor/walla.py index a1a9c1708..3ac0f8387 100644 --- a/yt_dlp/extractor/walla.py +++ b/yt_dlp/extractor/walla.py @@ -2,8 +2,8 @@ import re from .common import InfoExtractor from ..utils import ( - xpath_text, int_or_none, + xpath_text, ) diff --git a/yt_dlp/extractor/washingtonpost.py b/yt_dlp/extractor/washingtonpost.py index 74501b1d2..1cfed2da5 100644 --- a/yt_dlp/extractor/washingtonpost.py +++ b/yt_dlp/extractor/washingtonpost.py @@ -1,7 +1,6 @@ import re from .common import InfoExtractor - from ..utils import traverse_obj diff --git a/yt_dlp/extractor/wdr.py b/yt_dlp/extractor/wdr.py index f80f140ed..0b7ddd239 100644 --- a/yt_dlp/extractor/wdr.py +++ b/yt_dlp/extractor/wdr.py @@ -6,16 +6,16 @@ from ..compat import ( compat_urlparse, ) from ..utils import ( + ExtractorError, determine_ext, dict_get, - ExtractorError, js_to_json, strip_jsonp, try_get, unified_strdate, update_url_query, - urlhandle_detect_ext, url_or_none, + urlhandle_detect_ext, ) diff --git a/yt_dlp/extractor/weibo.py b/yt_dlp/extractor/weibo.py index 2fca745aa..b6a659385 100644 --- a/yt_dlp/extractor/weibo.py +++ b/yt_dlp/extractor/weibo.py @@ -1,6 +1,6 @@ +import itertools import json import random -import itertools import urllib.parse from .common import InfoExtractor diff --git a/yt_dlp/extractor/whowatch.py b/yt_dlp/extractor/whowatch.py index f2808cd9f..492891d78 100644 --- a/yt_dlp/extractor/whowatch.py +++ b/yt_dlp/extractor/whowatch.py @@ -1,12 +1,12 @@ from .common import InfoExtractor +from ..compat import compat_str from ..utils import ( + ExtractorError, int_or_none, qualities, try_call, try_get, - ExtractorError, ) -from ..compat import compat_str class WhoWatchIE(InfoExtractor): diff --git a/yt_dlp/extractor/wimtv.py b/yt_dlp/extractor/wimtv.py index f9bf092df..d7d77c0db 100644 --- a/yt_dlp/extractor/wimtv.py +++ b/yt_dlp/extractor/wimtv.py @@ -1,9 +1,9 @@ from .common import InfoExtractor from ..utils import ( + ExtractorError, determine_ext, parse_duration, urlencode_postdata, - ExtractorError, ) diff --git a/yt_dlp/extractor/wppilot.py b/yt_dlp/extractor/wppilot.py index 5e590e2f4..0ef4e8e53 100644 --- a/yt_dlp/extractor/wppilot.py +++ b/yt_dlp/extractor/wppilot.py @@ -1,13 +1,13 @@ -from .common import InfoExtractor -from ..utils import ( - try_get, - ExtractorError, -) - import json import random import re +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + try_get, +) + class WPPilotBaseIE(InfoExtractor): _VIDEO_URL = 'https://pilot.wp.pl/api/v1/channel/%s' diff --git a/yt_dlp/extractor/wsj.py b/yt_dlp/extractor/wsj.py index 86e264679..35fe30362 100644 --- a/yt_dlp/extractor/wsj.py +++ b/yt_dlp/extractor/wsj.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..utils import ( - int_or_none, float_or_none, + int_or_none, unified_strdate, ) diff --git a/yt_dlp/extractor/xhamster.py b/yt_dlp/extractor/xhamster.py index 01ac5ddb6..0b3a620ec 100644 --- a/yt_dlp/extractor/xhamster.py +++ b/yt_dlp/extractor/xhamster.py @@ -4,11 +4,11 @@ import re from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + ExtractorError, clean_html, determine_ext, dict_get, extract_attributes, - ExtractorError, float_or_none, int_or_none, parse_duration, diff --git a/yt_dlp/extractor/xnxx.py b/yt_dlp/extractor/xnxx.py index 1452aaec3..74d4f0419 100644 --- a/yt_dlp/extractor/xnxx.py +++ b/yt_dlp/extractor/xnxx.py @@ -2,9 +2,9 @@ import re from .common import InfoExtractor from ..utils import ( + NO_DEFAULT, determine_ext, int_or_none, - NO_DEFAULT, str_to_int, ) diff --git a/yt_dlp/extractor/xstream.py b/yt_dlp/extractor/xstream.py index 8dd1cd9ef..322e86570 100644 --- a/yt_dlp/extractor/xstream.py +++ b/yt_dlp/extractor/xstream.py @@ -2,11 +2,11 @@ import re from .common import InfoExtractor from ..utils import ( + find_xpath_attr, int_or_none, parse_iso8601, - xpath_with_ns, xpath_text, - find_xpath_attr, + xpath_with_ns, ) diff --git a/yt_dlp/extractor/xvideos.py b/yt_dlp/extractor/xvideos.py index a489033ab..6b16ac291 100644 --- a/yt_dlp/extractor/xvideos.py +++ b/yt_dlp/extractor/xvideos.py @@ -3,9 +3,9 @@ import re from .common import InfoExtractor from ..compat import compat_urllib_parse_unquote from ..utils import ( + ExtractorError, clean_html, determine_ext, - ExtractorError, int_or_none, parse_duration, ) diff --git a/yt_dlp/extractor/xxxymovies.py b/yt_dlp/extractor/xxxymovies.py index e3e3a9fe6..aa6c84d09 100644 --- a/yt_dlp/extractor/xxxymovies.py +++ b/yt_dlp/extractor/xxxymovies.py @@ -1,7 +1,7 @@ from .common import InfoExtractor from ..utils import ( - parse_duration, int_or_none, + parse_duration, ) diff --git a/yt_dlp/extractor/yandexmusic.py b/yt_dlp/extractor/yandexmusic.py index 794dc3eae..acfe69bf4 100644 --- a/yt_dlp/extractor/yandexmusic.py +++ b/yt_dlp/extractor/yandexmusic.py @@ -5,8 +5,8 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( ExtractorError, - int_or_none, float_or_none, + int_or_none, try_get, ) diff --git a/yt_dlp/extractor/zapiks.py b/yt_dlp/extractor/zapiks.py index 88f526bbc..2a12aa509 100644 --- a/yt_dlp/extractor/zapiks.py +++ b/yt_dlp/extractor/zapiks.py @@ -2,11 +2,11 @@ import re from .common import InfoExtractor from ..utils import ( + int_or_none, parse_duration, parse_iso8601, - xpath_with_ns, xpath_text, - int_or_none, + xpath_with_ns, ) diff --git a/yt_dlp/extractor/zhihu.py b/yt_dlp/extractor/zhihu.py index c24b33874..18b22a5c7 100644 --- a/yt_dlp/extractor/zhihu.py +++ b/yt_dlp/extractor/zhihu.py @@ -1,5 +1,5 @@ from .common import InfoExtractor -from ..utils import format_field, float_or_none, int_or_none +from ..utils import float_or_none, format_field, int_or_none class ZhihuIE(InfoExtractor): diff --git a/yt_dlp/extractor/zingmp3.py b/yt_dlp/extractor/zingmp3.py index ff5eac89a..909a7a3ae 100644 --- a/yt_dlp/extractor/zingmp3.py +++ b/yt_dlp/extractor/zingmp3.py @@ -10,8 +10,8 @@ from ..utils import ( int_or_none, join_nonempty, try_call, + url_or_none, urljoin, - url_or_none ) from ..utils.traversal import traverse_obj diff --git a/yt_dlp/extractor/zype.py b/yt_dlp/extractor/zype.py index 2f3b4c47f..8d3156d64 100644 --- a/yt_dlp/extractor/zype.py +++ b/yt_dlp/extractor/zype.py @@ -3,8 +3,8 @@ import re from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( - dict_get, ExtractorError, + dict_get, int_or_none, js_to_json, parse_iso8601, From a4da9db87b6486b270c15dfa07ab5bfedc83f6bd Mon Sep 17 00:00:00 2001 From: Simon Sawicki <contact@grub4k.xyz> Date: Sun, 26 May 2024 23:09:53 +0200 Subject: [PATCH 28/48] Update to ytdl-commit-a08f2b7 (#10012) [ie] Rework JWPlayer extraction - https://github.com/ytdl-org/youtube-dl/commit/f66372403fd9e1661199fea100ba2600fa9697b2 [ie/gbnews] Add extractor - https://github.com/ytdl-org/youtube-dl/commit/70f230f9cf28e948662599b6257cb7d1262870e3 [ie/caffeinetv] Add extractor - https://github.com/ytdl-org/youtube-dl/commit/40bd5c18153afe765caa6726302ee1dd8a9a2ce6 [ie/youporn] Improve extraction - https://github.com/ytdl-org/youtube-dl/commit/0b2ce3685e02ea1a3ccee1026572e081b8f6ac83 [ie/youporn] Add playlist extractors - https://github.com/ytdl-org/youtube-dl/commit/668332b9733023ca2e927eeb2208725022248af8 Closes #9188, Closes #9523 Authored by: Grub4K, bashonly --- README.md | 2 +- yt_dlp/extractor/_extractors.py | 12 +- yt_dlp/extractor/caffeinetv.py | 74 ++++++ yt_dlp/extractor/common.py | 47 ++-- yt_dlp/extractor/gbnews.py | 107 +++++++++ yt_dlp/extractor/youporn.py | 391 +++++++++++++++++++++++++++++++- 6 files changed, 588 insertions(+), 45 deletions(-) create mode 100644 yt_dlp/extractor/caffeinetv.py create mode 100644 yt_dlp/extractor/gbnews.py diff --git a/README.md b/README.md index 0636d2f6e..5965d600e 100644 --- a/README.md +++ b/README.md @@ -2123,7 +2123,7 @@ with yt_dlp.YoutubeDL(ydl_opts) as ydl: ### New features -* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@be008e6**](https://github.com/ytdl-org/youtube-dl/commit/be008e657d79832642e2158557c899249c9e31cd) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21)) +* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@a08f2b7**](https://github.com/ytdl-org/youtube-dl/commit/a08f2b7e4567cdc50c0614ee0a4ffdff49b8b6e6) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21)) * **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in YouTube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index e287e04bc..37e6fc318 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -308,6 +308,7 @@ from .businessinsider import BusinessInsiderIE from .buzzfeed import BuzzFeedIE from .byutv import BYUtvIE from .c56 import C56IE +from .caffeinetv import CaffeineTVIE from .callin import CallinIE from .caltrans import CaltransIE from .cam4 import CAM4IE @@ -720,6 +721,7 @@ from .gamespot import GameSpotIE from .gamestar import GameStarIE from .gaskrank import GaskrankIE from .gazeta import GazetaIE +from .gbnews import GBNewsIE from .gdcvault import GDCVaultIE from .gedidigital import GediDigitalIE from .generic import GenericIE @@ -2501,7 +2503,15 @@ from .younow import ( YouNowLiveIE, YouNowMomentIE, ) -from .youporn import YouPornIE +from .youporn import ( + YouPornCategoryIE, + YouPornChannelIE, + YouPornCollectionIE, + YouPornIE, + YouPornStarIE, + YouPornTagIE, + YouPornVideosIE, +) from .zaiko import ( ZaikoETicketIE, ZaikoIE, diff --git a/yt_dlp/extractor/caffeinetv.py b/yt_dlp/extractor/caffeinetv.py new file mode 100644 index 000000000..aa107f858 --- /dev/null +++ b/yt_dlp/extractor/caffeinetv.py @@ -0,0 +1,74 @@ +from .common import InfoExtractor +from ..utils import ( + determine_ext, + int_or_none, + parse_iso8601, + traverse_obj, + urljoin, +) + + +class CaffeineTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?caffeine\.tv/[^/?#]+/video/(?P<id>[\da-f-]+)' + _TESTS = [{ + 'url': 'https://www.caffeine.tv/TsuSurf/video/cffc0a00-e73f-11ec-8080-80017d29f26e', + 'info_dict': { + 'id': 'cffc0a00-e73f-11ec-8080-80017d29f26e', + 'ext': 'mp4', + 'title': 'GOOOOD MORNINNNNN #highlights', + 'timestamp': 1654702180, + 'upload_date': '20220608', + 'uploader': 'RahJON Wicc', + 'uploader_id': 'TsuSurf', + 'duration': 3145, + 'age_limit': 17, + 'thumbnail': 'https://www.caffeine.tv/broadcasts/776b6f84-9cd5-42e3-af1d-4a776eeed697/replay/lobby.jpg', + 'comment_count': int, + 'view_count': int, + 'like_count': int, + 'tags': ['highlights', 'battlerap'], + }, + 'params': { + 'skip_download': 'm3u8', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + json_data = self._download_json( + f'https://api.caffeine.tv/social/public/activity/{video_id}', video_id) + broadcast_info = traverse_obj(json_data, ('broadcast_info', {dict})) or {} + + video_url = broadcast_info['video_url'] + ext = determine_ext(video_url) + if ext == 'm3u8': + formats = self._extract_m3u8_formats(video_url, video_id, 'mp4') + else: + formats = [{'url': video_url}] + + return { + 'id': video_id, + 'formats': formats, + **traverse_obj(json_data, { + 'like_count': ('like_count', {int_or_none}), + 'view_count': ('view_count', {int_or_none}), + 'comment_count': ('comment_count', {int_or_none}), + 'tags': ('tags', ..., {str}, {lambda x: x or None}), + 'uploader': ('user', 'name', {str}), + 'uploader_id': (((None, 'user'), 'username'), {str}, any), + 'is_live': ('is_live', {bool}), + }), + **traverse_obj(broadcast_info, { + 'title': ('broadcast_title', {str}), + 'duration': ('content_duration', {int_or_none}), + 'timestamp': ('broadcast_start_time', {parse_iso8601}), + 'thumbnail': ('preview_image_path', {lambda x: urljoin(url, x)}), + }), + 'age_limit': { + # assume Apple Store ratings: https://en.wikipedia.org/wiki/Mobile_software_content_rating_system + 'FOUR_PLUS': 0, + 'NINE_PLUS': 9, + 'TWELVE_PLUS': 12, + 'SEVENTEEN_PLUS': 17, + }.get(broadcast_info.get('content_rating'), 17), + } diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index a33cef354..38daad72e 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -3384,23 +3384,16 @@ class InfoExtractor: return formats def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json): - mobj = re.search( - r'''(?s)jwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?!</script>).*?\.\s*setup\s*\(\s*(?P<options>(?:\([^)]*\)|[^)])+)\s*\)''', - webpage) - if mobj: - try: - jwplayer_data = self._parse_json(mobj.group('options'), - video_id=video_id, - transform_source=transform_source) - except ExtractorError: - pass - else: - if isinstance(jwplayer_data, dict): - return jwplayer_data + return self._search_json( + r'''(?<!-)\bjwplayer\s*\(\s*(?P<q>'|")(?!(?P=q)).+(?P=q)\s*\)(?:(?!</script>).)*?\.\s*(?:setup\s*\(|(?P<load>load)\s*\(\s*\[)''', + webpage, 'JWPlayer data', video_id, + # must be a {...} or sequence, ending + contains_pattern=r'\{(?s:.*)}(?(load)(?:\s*,\s*\{(?s:.*)})*)', end_pattern=r'(?(load)\]|\))', + transform_source=transform_source, default=None) - def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs): + def _extract_jwplayer_data(self, webpage, video_id, *args, transform_source=js_to_json, **kwargs): jwplayer_data = self._find_jwplayer_data( - webpage, video_id, transform_source=js_to_json) + webpage, video_id, transform_source=transform_source) return self._parse_jwplayer_data( jwplayer_data, video_id, *args, **kwargs) @@ -3432,22 +3425,14 @@ class InfoExtractor: mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url) subtitles = {} - tracks = video_data.get('tracks') - if tracks and isinstance(tracks, list): - for track in tracks: - if not isinstance(track, dict): - continue - track_kind = track.get('kind') - if not track_kind or not isinstance(track_kind, str): - continue - if track_kind.lower() not in ('captions', 'subtitles'): - continue - track_url = urljoin(base_url, track.get('file')) - if not track_url: - continue - subtitles.setdefault(track.get('label') or 'en', []).append({ - 'url': self._proto_relative_url(track_url) - }) + for track in traverse_obj(video_data, ( + 'tracks', lambda _, v: v['kind'].lower() in ('captions', 'subtitles'))): + track_url = urljoin(base_url, track.get('file')) + if not track_url: + continue + subtitles.setdefault(track.get('label') or 'en', []).append({ + 'url': self._proto_relative_url(track_url) + }) entry = { 'id': this_video_id, diff --git a/yt_dlp/extractor/gbnews.py b/yt_dlp/extractor/gbnews.py new file mode 100644 index 000000000..bb1554eea --- /dev/null +++ b/yt_dlp/extractor/gbnews.py @@ -0,0 +1,107 @@ +import functools + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + extract_attributes, + get_elements_html_by_class, + url_or_none, +) +from ..utils.traversal import traverse_obj + + +class GBNewsIE(InfoExtractor): + IE_DESC = 'GB News clips, features and live streams' + _VALID_URL = r'https?://(?:www\.)?gbnews\.(?:uk|com)/(?:\w+/)?(?P<id>[^#?]+)' + + _PLATFORM = 'safari' + _SSMP_URL = 'https://mm-v2.simplestream.com/ssmp/api.php' + _TESTS = [{ + 'url': 'https://www.gbnews.com/news/bbc-claudine-gay-harvard-university-antisemitism-row', + 'info_dict': { + 'id': '52264136', + 'ext': 'mp4', + 'thumbnail': r're:https?://www\.gbnews\.\w+/.+\.(?:jpe?g|png|webp)', + 'display_id': 'bbc-claudine-gay-harvard-university-antisemitism-row', + 'description': 'The post was criticised by former employers of the broadcaster', + 'title': 'BBC deletes post after furious backlash over headline downplaying antisemitism', + }, + }, { + 'url': 'https://www.gbnews.com/royal/prince-harry-in-love-with-kate-meghan-markle-jealous-royal', + 'info_dict': { + 'id': '52328390', + 'ext': 'mp4', + 'thumbnail': r're:https?://www\.gbnews\.\w+/.+\.(?:jpe?g|png|webp)', + 'display_id': 'prince-harry-in-love-with-kate-meghan-markle-jealous-royal', + 'description': 'Ingrid Seward has published 17 books documenting the highs and lows of the Royal Family', + 'title': 'Royal author claims Prince Harry was \'in love\' with Kate - Meghan was \'jealous\'', + } + }, { + 'url': 'https://www.gbnews.uk/watchlive', + 'info_dict': { + 'id': '1069', + 'ext': 'mp4', + 'thumbnail': r're:https?://www\.gbnews\.\w+/.+\.(?:jpe?g|png|webp)', + 'display_id': 'watchlive', + 'live_status': 'is_live', + 'title': r're:^GB News Live', + }, + 'params': {'skip_download': 'm3u8'}, + }] + + @functools.lru_cache + def _get_ss_endpoint(self, data_id, data_env): + if not data_id: + data_id = 'GB003' + if not data_env: + data_env = 'production' + + json_data = self._download_json( + self._SSMP_URL, None, 'Downloading Simplestream JSON metadata', query={ + 'id': data_id, + 'env': data_env, + }) + meta_url = traverse_obj(json_data, ('response', 'api_hostname', {url_or_none})) + if not meta_url: + raise ExtractorError('No API host found') + + return meta_url + + def _real_extract(self, url): + display_id = self._match_id(url).rpartition('/')[2] + webpage = self._download_webpage(url, display_id) + + video_data = None + elements = get_elements_html_by_class('simplestream', webpage) + for html_tag in elements: + attributes = extract_attributes(html_tag) + if 'sidebar' not in (attributes.get('class') or ''): + video_data = attributes + if not video_data: + raise ExtractorError('Could not find video element', expected=True) + + endpoint_url = self._get_ss_endpoint(video_data.get('data-id'), video_data.get('data-env')) + + uvid = video_data['data-uvid'] + video_type = video_data.get('data-type') + if not video_type or video_type == 'vod': + video_type = 'show' + stream_data = self._download_json( + f'{endpoint_url}/api/{video_type}/stream/{uvid}', + uvid, 'Downloading stream JSON', query={ + 'key': video_data.get('data-key'), + 'platform': self._PLATFORM, + }) + if traverse_obj(stream_data, 'drm'): + self.report_drm(uvid) + + return { + 'id': uvid, + 'display_id': display_id, + 'title': self._og_search_title(webpage, default=None), + 'description': self._og_search_description(webpage, default=None), + 'formats': self._extract_m3u8_formats(traverse_obj(stream_data, ( + 'response', 'stream', {url_or_none})), uvid, 'mp4'), + 'thumbnail': self._og_search_thumbnail(webpage, default=None), + 'is_live': video_type == 'live', + } diff --git a/yt_dlp/extractor/youporn.py b/yt_dlp/extractor/youporn.py index 6d4e31bf3..0e047aa16 100644 --- a/yt_dlp/extractor/youporn.py +++ b/yt_dlp/extractor/youporn.py @@ -1,19 +1,27 @@ +import itertools import re from .common import InfoExtractor from ..utils import ( + ExtractorError, + clean_html, extract_attributes, + get_element_by_class, + get_element_by_id, + get_elements_html_by_class, int_or_none, merge_dicts, - str_to_int, + parse_count, + parse_qs, traverse_obj, unified_strdate, url_or_none, + urljoin, ) class YouPornIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?' + _VALID_URL = r'https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?/?(?:[#?]|$)' _EMBED_REGEX = [r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?youporn\.com/embed/\d+)'] _TESTS = [{ 'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/', @@ -34,7 +42,7 @@ class YouPornIE(InfoExtractor): 'tags': list, 'age_limit': 18, }, - 'skip': 'This video has been disabled', + 'skip': 'This video has been deactivated', }, { # Unknown uploader 'url': 'http://www.youporn.com/watch/561726/big-tits-awesome-brunette-on-amazing-webcam-show/?from=related3&al=2&from_id=561726&pos=4', @@ -72,7 +80,6 @@ class YouPornIE(InfoExtractor): 'id': '16290308', 'age_limit': 18, 'categories': [], - 'description': str, # TODO: detect/remove SEO spam description in ytdl backport 'display_id': 'tinderspecial-trailer1', 'duration': 298.0, 'ext': 'mp4', @@ -90,7 +97,17 @@ class YouPornIE(InfoExtractor): video_id, display_id = self._match_valid_url(url).group('id', 'display_id') self._set_cookie('.youporn.com', 'age_verified', '1') webpage = self._download_webpage(f'https://www.youporn.com/watch/{video_id}', video_id) - definitions = self._search_json(r'\bplayervars\s*:', webpage, 'player vars', video_id)['mediaDefinitions'] + + watchable = self._search_regex( + r'''(<div\s[^>]*\bid\s*=\s*('|")?watch-container(?(2)\2|(?!-)\b)[^>]*>)''', + webpage, 'watchability', default=None) + if not watchable: + msg = re.split(r'\s{2}', clean_html(get_element_by_id('mainContent', webpage)) or '')[0] + raise ExtractorError( + f'{self.IE_NAME} says: {msg}' if msg else 'Video unavailable', expected=True) + + player_vars = self._search_json(r'\bplayervars\s*:', webpage, 'player vars', video_id) + definitions = player_vars['mediaDefinitions'] def get_format_data(data, stream_type): info_url = traverse_obj(data, (lambda _, v: v['format'] == stream_type, 'videoUrl', {url_or_none}, any)) @@ -143,8 +160,10 @@ class YouPornIE(InfoExtractor): thumbnail = self._search_regex( r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P<thumbnail>.+?)\1', webpage, 'thumbnail', fatal=False, group='thumbnail') - duration = int_or_none(self._html_search_meta( - 'video:duration', webpage, 'duration', fatal=False)) + duration = traverse_obj(player_vars, ('duration', {int_or_none})) + if duration is None: + duration = int_or_none(self._html_search_meta( + 'video:duration', webpage, 'duration', fatal=False)) uploader = self._html_search_regex( r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>', @@ -160,11 +179,11 @@ class YouPornIE(InfoExtractor): view_count = None views = self._search_regex( - r'(<div[^>]+\bclass=["\']js_videoInfoViews["\']>)', webpage, - 'views', default=None) + r'(<div [^>]*\bdata-value\s*=[^>]+>)\s*<label>Views:</label>', + webpage, 'views', default=None) if views: - view_count = str_to_int(extract_attributes(views).get('data-value')) - comment_count = str_to_int(self._search_regex( + view_count = parse_count(extract_attributes(views).get('data-value')) + comment_count = parse_count(self._search_regex( r'>All [Cc]omments? \(([\d,.]+)\)', webpage, 'comment count', default=None)) @@ -182,7 +201,8 @@ class YouPornIE(InfoExtractor): data = self._search_json_ld(webpage, video_id, expected_type='VideoObject', fatal=False) data.pop('url', None) - return merge_dicts(data, { + + result = merge_dicts(data, { 'id': video_id, 'display_id': display_id, 'title': title, @@ -198,3 +218,350 @@ class YouPornIE(InfoExtractor): 'age_limit': age_limit, 'formats': formats, }) + + # Remove SEO spam "description" + description = result.get('description') + if description and description.startswith(f'Watch {result.get("title")} online'): + del result['description'] + + return result + + +class YouPornListBase(InfoExtractor): + def _get_next_url(self, url, pl_id, html): + return urljoin(url, self._search_regex( + r'''<a [^>]*?\bhref\s*=\s*("|')(?P<url>(?:(?!\1)[^>])+)\1''', + get_element_by_id('next', html) or '', 'next page', + group='url', default=None)) + + @classmethod + def _get_title_from_slug(cls, title_slug): + return re.sub(r'[_-]', ' ', title_slug) + + def _entries(self, url, pl_id, html=None, page_num=None): + start = page_num or 1 + for page in itertools.count(start): + if not html: + html = self._download_webpage( + url, pl_id, note=f'Downloading page {page}', fatal=page == start) + if not html: + return + for element in get_elements_html_by_class('video-title', html): + if video_url := traverse_obj(element, ({extract_attributes}, 'href', {lambda x: urljoin(url, x)})): + yield self.url_result(video_url) + + if page_num is not None: + return + next_url = self._get_next_url(url, pl_id, html) + if not next_url or next_url == url: + return + url = next_url + html = None + + def _real_extract(self, url, html=None): + m_dict = self._match_valid_url(url).groupdict() + pl_id, page_type, sort = (m_dict.get(k) for k in ('id', 'type', 'sort')) + qs = {k: v[-1] for k, v in parse_qs(url).items() if v} + + base_id = pl_id or 'YouPorn' + title = self._get_title_from_slug(base_id) + if page_type: + title = f'{page_type.capitalize()} {title}' + base_id = [base_id.lower()] + if sort is None: + title += ' videos' + else: + title = f'{title} videos by {re.sub(r"[_-]", " ", sort)}' + base_id.append(sort) + if qs: + filters = list(map('='.join, sorted(qs.items()))) + title += f' ({",".join(filters)})' + base_id.extend(filters) + pl_id = '/'.join(base_id) + + return self.playlist_result( + self._entries(url, pl_id, html=html, page_num=int_or_none(qs.get('page'))), + playlist_id=pl_id, playlist_title=title) + + +class YouPornCategoryIE(YouPornListBase): + IE_DESC = 'YouPorn category, with sorting, filtering and pagination' + _VALID_URL = r'''(?x) + https?://(?:www\.)?youporn\.com/ + (?P<type>category)/(?P<id>[^/?#&]+) + (?:/(?P<sort>popular|views|rating|time|duration))?/?(?:[#?]|$) + ''' + _TESTS = [{ + 'note': 'Full list with pagination', + 'url': 'https://www.youporn.com/category/popular-with-women/popular/', + 'info_dict': { + 'id': 'popular-with-women/popular', + 'title': 'Category popular with women videos by popular', + }, + 'playlist_mincount': 39, + }, { + 'note': 'Filtered paginated list with single page result', + 'url': 'https://www.youporn.com/category/popular-with-women/duration/?min_minutes=10', + 'info_dict': { + 'id': 'popular-with-women/duration/min_minutes=10', + 'title': 'Category popular with women videos by duration (min_minutes=10)', + }, + 'playlist_mincount': 2, + # 'playlist_maxcount': 30, + }, { + 'note': 'Single page of full list', + 'url': 'https://www.youporn.com/category/popular-with-women/popular?page=1', + 'info_dict': { + 'id': 'popular-with-women/popular/page=1', + 'title': 'Category popular with women videos by popular (page=1)', + }, + 'playlist_count': 36, + }] + + +class YouPornChannelIE(YouPornListBase): + IE_DESC = 'YouPorn channel, with sorting and pagination' + _VALID_URL = r'''(?x) + https?://(?:www\.)?youporn\.com/ + (?P<type>channel)/(?P<id>[^/?#&]+) + (?:/(?P<sort>rating|views|duration))?/?(?:[#?]|$) + ''' + _TESTS = [{ + 'note': 'Full list with pagination', + 'url': 'https://www.youporn.com/channel/x-feeds/', + 'info_dict': { + 'id': 'x-feeds', + 'title': 'Channel X-Feeds videos', + }, + 'playlist_mincount': 37, + }, { + 'note': 'Single page of full list (no filters here)', + 'url': 'https://www.youporn.com/channel/x-feeds/duration?page=1', + 'info_dict': { + 'id': 'x-feeds/duration/page=1', + 'title': 'Channel X-Feeds videos by duration (page=1)', + }, + 'playlist_count': 24, + }] + + @staticmethod + def _get_title_from_slug(title_slug): + return re.sub(r'_', ' ', title_slug).title() + + +class YouPornCollectionIE(YouPornListBase): + IE_DESC = 'YouPorn collection (user playlist), with sorting and pagination' + _VALID_URL = r'''(?x) + https?://(?:www\.)?youporn\.com/ + (?P<type>collection)s/videos/(?P<id>\d+) + (?:/(?P<sort>rating|views|time|duration))?/?(?:[#?]|$) + ''' + _TESTS = [{ + 'note': 'Full list with pagination', + 'url': 'https://www.youporn.com/collections/videos/33044251/', + 'info_dict': { + 'id': '33044251', + 'title': 'Collection Sexy Lips videos', + 'uploader': 'ph-littlewillyb', + }, + 'playlist_mincount': 50, + }, { + 'note': 'Single page of full list (no filters here)', + 'url': 'https://www.youporn.com/collections/videos/33044251/time?page=1', + 'info_dict': { + 'id': '33044251/time/page=1', + 'title': 'Collection Sexy Lips videos by time (page=1)', + 'uploader': 'ph-littlewillyb', + }, + 'playlist_count': 20, + }] + + def _real_extract(self, url): + pl_id = self._match_id(url) + html = self._download_webpage(url, pl_id) + playlist = super()._real_extract(url, html=html) + infos = re.sub(r'\s+', ' ', clean_html(get_element_by_class( + 'collection-infos', html)) or '') + title, uploader = self._search_regex( + r'^\s*Collection: (?P<title>.+?) \d+ VIDEOS \d+ VIEWS \d+ days LAST UPDATED From: (?P<uploader>[\w_-]+)', + infos, 'title/uploader', group=('title', 'uploader'), default=(None, None)) + if title: + playlist.update({ + 'title': playlist['title'].replace(playlist['id'].split('/')[0], title), + 'uploader': uploader, + }) + + return playlist + + +class YouPornTagIE(YouPornListBase): + IE_DESC = 'YouPorn tag (porntags), with sorting, filtering and pagination' + _VALID_URL = r'''(?x) + https?://(?:www\.)?youporn\.com/ + porn(?P<type>tag)s/(?P<id>[^/?#&]+) + (?:/(?P<sort>views|rating|time|duration))?/?(?:[#?]|$) + ''' + _TESTS = [{ + 'note': 'Full list with pagination', + 'url': 'https://www.youporn.com/porntags/austrian', + 'info_dict': { + 'id': 'austrian', + 'title': 'Tag austrian videos', + }, + 'playlist_mincount': 33, + 'expected_warnings': ['YouPorn tag pages are not correctly cached'], + }, { + 'note': 'Filtered paginated list with single page result', + 'url': 'https://www.youporn.com/porntags/austrian/duration/?min_minutes=10', + 'info_dict': { + 'id': 'austrian/duration/min_minutes=10', + 'title': 'Tag austrian videos by duration (min_minutes=10)', + }, + 'playlist_mincount': 10, + # number of videos per page is (row x col) 2x3 + 6x4 + 2, or + 3, + # or more, varying with number of ads; let's set max as 9x4 + # NB col 1 may not be shown in non-JS page with site CSS and zoom 100% + # 'playlist_maxcount': 32, + 'expected_warnings': ['YouPorn tag pages are not correctly cached'], + }, { + 'note': 'Single page of full list', + 'url': 'https://www.youporn.com/porntags/austrian/?page=1', + 'info_dict': { + 'id': 'austrian/page=1', + 'title': 'Tag austrian videos (page=1)', + }, + 'playlist_mincount': 32, + # 'playlist_maxcount': 34, + 'expected_warnings': ['YouPorn tag pages are not correctly cached'], + }] + + def _real_extract(self, url): + self.report_warning( + 'YouPorn tag pages are not correctly cached and ' + 'often return incorrect results', only_once=True) + return super()._real_extract(url) + + +class YouPornStarIE(YouPornListBase): + IE_DESC = 'YouPorn Pornstar, with description, sorting and pagination' + _VALID_URL = r'''(?x) + https?://(?:www\.)?youporn\.com/ + (?P<type>pornstar)/(?P<id>[^/?#&]+) + (?:/(?P<sort>rating|views|duration))?/?(?:[#?]|$) + ''' + _TESTS = [{ + 'note': 'Full list with pagination', + 'url': 'https://www.youporn.com/pornstar/daynia/', + 'info_dict': { + 'id': 'daynia', + 'title': 'Pornstar Daynia videos', + 'description': r're:Daynia Rank \d+ Videos \d+ Views [\d,.]+ .+ Subscribers \d+', + }, + 'playlist_mincount': 40, + }, { + 'note': 'Single page of full list (no filters here)', + 'url': 'https://www.youporn.com/pornstar/daynia/?page=1', + 'info_dict': { + 'id': 'daynia/page=1', + 'title': 'Pornstar Daynia videos (page=1)', + 'description': 're:.{180,}', + }, + 'playlist_count': 26, + }] + + @staticmethod + def _get_title_from_slug(title_slug): + return re.sub(r'_', ' ', title_slug).title() + + def _real_extract(self, url): + pl_id = self._match_id(url) + html = self._download_webpage(url, pl_id) + playlist = super()._real_extract(url, html=html) + INFO_ELEMENT_RE = r'''(?x) + <div [^>]*\bclass\s*=\s*('|")(?:[\w$-]+\s+|\s)*?pornstar-info-wrapper(?:\s+[\w$-]+|\s)*\1[^>]*> + (?P<info>[\s\S]+?)(?:</div>\s*){6,} + ''' + + if infos := self._search_regex(INFO_ELEMENT_RE, html, 'infos', group='info', default=''): + infos = re.sub( + r'(?:\s*nl=nl)+\s*', ' ', + re.sub(r'(?u)\s+', ' ', clean_html(re.sub('\n', 'nl=nl', infos)))).replace('ribe Subsc', '') + + return { + **playlist, + 'description': infos.strip() or None, + } + + +class YouPornVideosIE(YouPornListBase): + IE_DESC = 'YouPorn video (browse) playlists, with sorting, filtering and pagination' + _VALID_URL = r'''(?x) + https?://(?:www\.)?youporn\.com/ + (?:(?P<id>browse)/)? + (?P<sort>(?(id) + (?:duration|rating|time|views)| + (?:most_(?:favou?rit|view)ed|recommended|top_rated)?)) + (?:[/#?]|$) + ''' + _TESTS = [{ + 'note': 'Full list with pagination (too long for test)', + 'url': 'https://www.youporn.com/', + 'info_dict': { + 'id': 'youporn', + 'title': 'YouPorn videos', + }, + 'only_matching': True, + }, { + 'note': 'Full list with pagination (too long for test)', + 'url': 'https://www.youporn.com/recommended', + 'info_dict': { + 'id': 'youporn/recommended', + 'title': 'YouPorn videos by recommended', + }, + 'only_matching': True, + }, { + 'note': 'Full list with pagination (too long for test)', + 'url': 'https://www.youporn.com/top_rated', + 'info_dict': { + 'id': 'youporn/top_rated', + 'title': 'YouPorn videos by top rated', + }, + 'only_matching': True, + }, { + 'note': 'Full list with pagination (too long for test)', + 'url': 'https://www.youporn.com/browse/time', + 'info_dict': { + 'id': 'browse/time', + 'title': 'YouPorn videos by time', + }, + 'only_matching': True, + }, { + 'note': 'Filtered paginated list with single page result', + 'url': 'https://www.youporn.com/most_favorited/?res=VR&max_minutes=2', + 'info_dict': { + 'id': 'youporn/most_favorited/max_minutes=2/res=VR', + 'title': 'YouPorn videos by most favorited (max_minutes=2,res=VR)', + }, + 'playlist_mincount': 10, + # 'playlist_maxcount': 28, + }, { + 'note': 'Filtered paginated list with several pages', + 'url': 'https://www.youporn.com/most_favorited/?res=VR&max_minutes=5', + 'info_dict': { + 'id': 'youporn/most_favorited/max_minutes=5/res=VR', + 'title': 'YouPorn videos by most favorited (max_minutes=5,res=VR)', + }, + 'playlist_mincount': 45, + }, { + 'note': 'Single page of full list', + 'url': 'https://www.youporn.com/browse/time?page=1', + 'info_dict': { + 'id': 'browse/time/page=1', + 'title': 'YouPorn videos by time (page=1)', + }, + 'playlist_count': 36, + }] + + @staticmethod + def _get_title_from_slug(title_slug): + return 'YouPorn' if title_slug == 'browse' else title_slug From 96a134dea6397a5f2131947c427aac52c8b4e677 Mon Sep 17 00:00:00 2001 From: coletdjnz <coletdjnz@protonmail.com> Date: Mon, 27 May 2024 09:13:12 +1200 Subject: [PATCH 29/48] [ie/youtube] Extract upload timestamp if available (#9856) Closes #4962, Closes #9829 Authored by: coletdjnz --- README.md | 1 + test/test_utils.py | 7 ++ yt_dlp/extractor/youtube.py | 136 +++++++++++++++++++----------------- yt_dlp/options.py | 2 +- yt_dlp/utils/_utils.py | 19 ++--- 5 files changed, 92 insertions(+), 73 deletions(-) diff --git a/README.md b/README.md index 5965d600e..1b4071132 100644 --- a/README.md +++ b/README.md @@ -2333,6 +2333,7 @@ These options may no longer work as intended --write-annotations No supported site has annotations now --no-write-annotations Default --compat-options seperate-video-versions No longer needed + --compat-options no-youtube-prefer-utc-upload-date No longer supported #### Removed These options were deprecated since 2014 and have now been entirely removed diff --git a/test/test_utils.py b/test/test_utils.py index 816cf03f6..77fadbbea 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -5,6 +5,7 @@ import os import sys import unittest import warnings +import datetime as dt sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) @@ -27,6 +28,7 @@ from yt_dlp.utils import ( ExtractorError, InAdvancePagedList, LazyList, + NO_DEFAULT, OnDemandPagedList, Popen, age_restricted, @@ -768,6 +770,11 @@ class TestUtil(unittest.TestCase): def test_parse_iso8601(self): self.assertEqual(parse_iso8601('2014-03-23T23:04:26+0100'), 1395612266) + self.assertEqual(parse_iso8601('2014-03-23T23:04:26-07:00'), 1395641066) + self.assertEqual(parse_iso8601('2014-03-23T23:04:26', timezone=dt.timedelta(hours=-7)), 1395641066) + self.assertEqual(parse_iso8601('2014-03-23T23:04:26', timezone=NO_DEFAULT), None) + # default does not override timezone in date_str + self.assertEqual(parse_iso8601('2014-03-23T23:04:26-07:00', timezone=dt.timedelta(hours=-10)), 1395641066) self.assertEqual(parse_iso8601('2014-03-23T22:04:26+0000'), 1395612266) self.assertEqual(parse_iso8601('2014-03-23T22:04:26Z'), 1395612266) self.assertEqual(parse_iso8601('2014-03-23T22:04:26.1234Z'), 1395612266) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index e676c5cde..54da4e362 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -1325,6 +1325,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister', 'uploader_id': '@PhilippHagemeister', 'heatmap': 'count:100', + 'timestamp': 1349198244, } }, { @@ -1368,6 +1369,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister', 'uploader_id': '@PhilippHagemeister', 'heatmap': 'count:100', + 'timestamp': 1349198244, }, 'params': { 'skip_download': True, @@ -1454,6 +1456,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'comment_count': int, 'channel_is_verified': True, 'heatmap': 'count:100', + 'timestamp': 1401991663, }, }, { @@ -1513,6 +1516,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'Projekt Melody', 'uploader_url': 'https://www.youtube.com/@ProjektMelody', 'uploader_id': '@ProjektMelody', + 'timestamp': 1577508724, }, }, { @@ -1618,6 +1622,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@Olympics', 'uploader_id': '@Olympics', 'channel_is_verified': True, + 'timestamp': 1440707674, }, 'params': { 'skip_download': 'requires avconv', @@ -1651,6 +1656,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': '孫ᄋᄅ', 'uploader_url': 'https://www.youtube.com/@AllenMeow', 'uploader_id': '@AllenMeow', + 'timestamp': 1299776999, }, }, # url_encoded_fmt_stream_map is empty string @@ -1794,6 +1800,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, }], 'params': {'skip_download': True}, + 'skip': 'Not multifeed anymore', }, { # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536) @@ -1902,6 +1909,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'The Berkman Klein Center for Internet & Society', 'uploader_id': '@BKCHarvard', 'uploader_url': 'https://www.youtube.com/@BKCHarvard', + 'timestamp': 1422422076, }, 'params': { 'skip_download': True, @@ -1937,6 +1945,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': '@BernieSanders', 'channel_is_verified': True, 'heatmap': 'count:100', + 'timestamp': 1447987198, }, 'params': { 'skip_download': True, @@ -2000,6 +2009,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': '@Vsauce', 'comment_count': int, 'channel_is_verified': True, + 'timestamp': 1484761047, }, 'params': { 'skip_download': True, @@ -2155,6 +2165,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'l\'Or Vert asbl', 'uploader_url': 'https://www.youtube.com/@ElevageOrVert', 'uploader_id': '@ElevageOrVert', + 'timestamp': 1497343210, }, 'params': { 'skip_download': True, @@ -2193,6 +2204,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': '@Csharp-video-tutorialsBlogspot', 'channel_is_verified': True, 'heatmap': 'count:100', + 'timestamp': 1377976349, }, 'params': { 'skip_download': True, @@ -2275,6 +2287,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': '@CBSMornings', 'comment_count': int, 'channel_is_verified': True, + 'timestamp': 1405513526, } }, { @@ -2292,7 +2305,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'view_count': int, 'channel': 'Walk around Japan', 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'], - 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp', + 'thumbnail': 'https://i.ytimg.com/vi/cBvYw8_A0vQ/hqdefault.jpg', 'age_limit': 0, 'availability': 'public', 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw', @@ -2302,6 +2315,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'Walk around Japan', 'uploader_url': 'https://www.youtube.com/@walkaroundjapan7124', 'uploader_id': '@walkaroundjapan7124', + 'timestamp': 1605884416, }, 'params': { 'skip_download': True, @@ -2397,6 +2411,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'comment_count': int, 'channel_is_verified': True, 'heatmap': 'count:100', + 'timestamp': 1395685455, }, 'params': {'format': 'mhtml', 'skip_download': True} }, { # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939) @@ -2426,37 +2441,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@LeonNguyen', 'uploader_id': '@LeonNguyen', 'heatmap': 'count:100', + 'timestamp': 1641170939, } - }, { - # Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date - 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4', - 'info_dict': { - 'id': '2NUZ8W2llS4', - 'ext': 'mp4', - 'title': 'The NP that test your phone performance 🙂', - 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d', - 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA', - 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA', - 'duration': 21, - 'view_count': int, - 'age_limit': 0, - 'categories': ['Gaming'], - 'tags': 'count:23', - 'playable_in_embed': True, - 'live_status': 'not_live', - 'upload_date': '20220102', - 'like_count': int, - 'availability': 'public', - 'channel': 'Leon Nguyen', - 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp', - 'comment_count': int, - 'channel_follower_count': int, - 'uploader': 'Leon Nguyen', - 'uploader_url': 'https://www.youtube.com/@LeonNguyen', - 'uploader_id': '@LeonNguyen', - 'heatmap': 'count:100', - }, - 'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']} }, { # date text is premiered video, ensure upload date in UTC (published 1641172509) 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM', @@ -2488,38 +2474,41 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'comment_count': int, 'channel_is_verified': True, 'heatmap': 'count:100', + 'timestamp': 1641172509, } }, - { # continuous livestream. Microformat upload date should be preferred. - # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27 - 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU', + { # continuous livestream. + # Upload date was 2022-07-12T05:12:29-07:00, while stream start is 2022-07-12T15:59:30+00:00 + 'url': 'https://www.youtube.com/watch?v=jfKfPfyJRdk', 'info_dict': { - 'id': 'kgx4WGK0oNU', - 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}', + 'id': 'jfKfPfyJRdk', 'ext': 'mp4', - 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA', - 'availability': 'public', - 'age_limit': 0, - 'release_timestamp': 1637975704, - 'upload_date': '20210619', - 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA', - 'live_status': 'is_live', - 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg', - 'channel': 'Abao in Tokyo', - 'channel_follower_count': int, - 'release_date': '20211127', - 'tags': 'count:39', - 'categories': ['People & Blogs'], + 'channel_id': 'UCSJ4gkVC6NrvII8umztf0Ow', 'like_count': int, - 'view_count': int, - 'playable_in_embed': True, - 'description': 'md5:2ef1d002cad520f65825346e2084e49d', + 'uploader': 'Lofi Girl', + 'categories': ['Music'], 'concurrent_view_count': int, - 'uploader': 'Abao in Tokyo', - 'uploader_url': 'https://www.youtube.com/@abaointokyo', - 'uploader_id': '@abaointokyo', + 'playable_in_embed': True, + 'timestamp': 1657627949, + 'release_date': '20220712', + 'channel_url': 'https://www.youtube.com/channel/UCSJ4gkVC6NrvII8umztf0Ow', + 'description': 'md5:13a6f76df898f5674f9127139f3df6f7', + 'age_limit': 0, + 'thumbnail': 'https://i.ytimg.com/vi/jfKfPfyJRdk/maxresdefault.jpg', + 'release_timestamp': 1657641570, + 'uploader_url': 'https://www.youtube.com/@LofiGirl', + 'channel_follower_count': int, + 'channel_is_verified': True, + 'title': r're:^lofi hip hop radio 📚 - beats to relax/study to', + 'view_count': int, + 'live_status': 'is_live', + 'tags': 'count:32', + 'channel': 'Lofi Girl', + 'availability': 'public', + 'upload_date': '20220712', + 'uploader_id': '@LofiGirl', }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA', 'info_dict': { @@ -2545,6 +2534,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': '@lesmiscore', 'uploader': 'Lesmiscore', 'uploader_url': 'https://www.youtube.com/@lesmiscore', + 'timestamp': 1648005313, } }, { # Prefer primary title+description language metadata by default @@ -2572,6 +2562,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@coletdjnz', 'uploader_id': '@coletdjnz', 'uploader': 'cole-dlp-test-acc', + 'timestamp': 1662677394, }, 'params': {'skip_download': True} }, { @@ -2585,7 +2576,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'duration': 5, 'live_status': 'not_live', 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA', - 'upload_date': '20220728', + 'upload_date': '20220729', 'view_count': int, 'categories': ['People & Blogs'], 'thumbnail': r're:^https?://.*\.jpg', @@ -2598,6 +2589,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@coletdjnz', 'uploader_id': '@coletdjnz', 'uploader': 'cole-dlp-test-acc', + 'timestamp': 1659073275, + 'like_count': int, }, 'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}}, 'expected_warnings': [r'Preferring "fr" translated fields'], @@ -2663,6 +2656,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'Projekt Melody', 'uploader_id': '@ProjektMelody', 'uploader_url': 'https://www.youtube.com/@ProjektMelody', + 'timestamp': 1577508724, }, 'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'}, }, @@ -2697,6 +2691,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': '@sana_natori', 'channel_is_verified': True, 'heatmap': 'count:100', + 'timestamp': 1671798112, }, }, { @@ -2766,6 +2761,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@ChristopherSykesDocumentaries', 'uploader_id': '@ChristopherSykesDocumentaries', 'heatmap': 'count:100', + 'timestamp': 1211825920, }, 'params': { 'skip_download': True, @@ -4622,19 +4618,31 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': channel_handle, 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None), }) + + # We only want timestamp IF it has time precision AND a timezone + # Currently the uploadDate in microformats appears to be in US/Pacific timezone. + timestamp = ( + parse_iso8601(get_first(microformats, 'uploadDate'), timezone=NO_DEFAULT) + or parse_iso8601(search_meta('uploadDate'), timezone=NO_DEFAULT) + ) + upload_date = ( + dt.datetime.fromtimestamp(timestamp, dt.timezone.utc).strftime('%Y%m%d') if timestamp else + ( + unified_strdate(get_first(microformats, 'uploadDate')) + or unified_strdate(search_meta('uploadDate')) + )) + + # In the case we cannot get the timestamp: # The upload date for scheduled, live and past live streams / premieres in microformats # may be different from the stream date. Although not in UTC, we will prefer it in this case. # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139 - upload_date = ( - unified_strdate(get_first(microformats, 'uploadDate')) - or unified_strdate(search_meta('uploadDate'))) - if not upload_date or ( - live_status in ('not_live', None) - and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', []) - ): + if not upload_date or (not timestamp and live_status in ('not_live', None)): + # this should be in UTC, as configured in the cookie/client context upload_date = strftime_or_none( self._parse_time_text(self._get_text(vpir, 'dateText'))) or upload_date + info['upload_date'] = upload_date + info['timestamp'] = timestamp if upload_date and live_status not in ('is_live', 'post_live', 'is_upcoming'): # Newly uploaded videos' HLS formats are potentially problematic and need to be checked diff --git a/yt_dlp/options.py b/yt_dlp/options.py index faa1ee563..997b575cd 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -478,7 +478,7 @@ def create_parser(): }, 'aliases': { 'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx'], 'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx'], - '2021': ['2022', 'no-certifi', 'filename-sanitization', 'no-youtube-prefer-utc-upload-date'], + '2021': ['2022', 'no-certifi', 'filename-sanitization'], '2022': ['2023', 'no-external-downloader-progress', 'playlist-match-filter', 'prefer-legacy-http-handler', 'manifest-filesize-approx'], '2023': [], } diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index b63766912..5f458ea45 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -1134,7 +1134,7 @@ def is_path_like(f): return isinstance(f, (str, bytes, os.PathLike)) -def extract_timezone(date_str): +def extract_timezone(date_str, default=None): m = re.search( r'''(?x) ^.{8,}? # >=8 char non-TZ prefix, if present @@ -1146,21 +1146,25 @@ def extract_timezone(date_str): (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm $) ''', date_str) + timezone = None + if not m: m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str) timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip()) if timezone is not None: date_str = date_str[:-len(m.group('tz'))] - timezone = dt.timedelta(hours=timezone or 0) + timezone = dt.timedelta(hours=timezone) else: date_str = date_str[:-len(m.group('tz'))] - if not m.group('sign'): - timezone = dt.timedelta() - else: + if m.group('sign'): sign = 1 if m.group('sign') == '+' else -1 timezone = dt.timedelta( hours=sign * int(m.group('hours')), minutes=sign * int(m.group('minutes'))) + + if timezone is None and default is not NO_DEFAULT: + timezone = default or dt.timedelta() + return timezone, date_str @@ -1172,10 +1176,9 @@ def parse_iso8601(date_str, delimiter='T', timezone=None): date_str = re.sub(r'\.[0-9]+', '', date_str) - if timezone is None: - timezone, date_str = extract_timezone(date_str) + timezone, date_str = extract_timezone(date_str, timezone) - with contextlib.suppress(ValueError): + with contextlib.suppress(ValueError, TypeError): date_format = f'%Y-%m-%d{delimiter}%H:%M:%S' dt_ = dt.datetime.strptime(date_str, date_format) - timezone return calendar.timegm(dt_.timetuple()) From 347f13dd9bccc2b4db3ea25689410d45d8370ed4 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sun, 26 May 2024 16:16:36 -0500 Subject: [PATCH 30/48] [ie/tiktok:user] Fix extractor (#9661) Closes #3776, Closes #4996 Authored by: bashonly --- yt_dlp/extractor/tiktok.py | 348 ++++++++++++++++++++----------------- 1 file changed, 189 insertions(+), 159 deletions(-) diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 7772dd1f2..4113660a5 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -3,6 +3,7 @@ import itertools import json import random import re +import string import time import uuid @@ -11,7 +12,6 @@ from ..compat import compat_urllib_parse_urlparse from ..networking import HEADRequest from ..utils import ( ExtractorError, - LazyList, UnsupportedError, UserNotLive, determine_ext, @@ -236,7 +236,7 @@ class TikTokBaseIE(InfoExtractor): return video_data, status - def _get_subtitles(self, aweme_detail, aweme_id, user_url): + def _get_subtitles(self, aweme_detail, aweme_id, user_name): # TODO: Extract text positioning info subtitles = {} # aweme/detail endpoint subs @@ -267,9 +267,9 @@ class TikTokBaseIE(InfoExtractor): }) # webpage subs if not subtitles: - if user_url: # only _parse_aweme_video_app needs to extract the webpage here + if user_name: # only _parse_aweme_video_app needs to extract the webpage here aweme_detail, _ = self._extract_web_data_and_status( - f'{user_url}/video/{aweme_id}', aweme_id, fatal=False) + self._create_url(user_name, aweme_id), aweme_id, fatal=False) for caption in traverse_obj(aweme_detail, ('video', 'subtitleInfos', lambda _, v: v['Url'])): subtitles.setdefault(caption.get('LanguageCodeName') or 'en', []).append({ 'ext': remove_start(caption.get('Format'), 'web'), @@ -394,11 +394,7 @@ class TikTokBaseIE(InfoExtractor): }) stats_info = aweme_detail.get('statistics') or {} - author_info = aweme_detail.get('author') or {} music_info = aweme_detail.get('music') or {} - user_url = self._UPLOADER_URL_FORMAT % (traverse_obj(author_info, - 'sec_uid', 'id', 'uid', 'unique_id', - expected_type=str_or_none, get_all=False)) labels = traverse_obj(aweme_detail, ('hybrid_label', ..., 'text'), expected_type=str) contained_music_track = traverse_obj( @@ -412,6 +408,13 @@ class TikTokBaseIE(InfoExtractor): else: music_track, music_author = music_info.get('title'), traverse_obj(music_info, ('author', {str})) + author_info = traverse_obj(aweme_detail, ('author', { + 'uploader': ('unique_id', {str}), + 'uploader_id': ('uid', {str_or_none}), + 'channel': ('nickname', {str}), + 'channel_id': ('sec_uid', {str}), + })) + return { 'id': aweme_id, **traverse_obj(aweme_detail, { @@ -425,21 +428,20 @@ class TikTokBaseIE(InfoExtractor): 'repost_count': 'share_count', 'comment_count': 'comment_count', }, expected_type=int_or_none), - **traverse_obj(author_info, { - 'uploader': ('unique_id', {str}), - 'uploader_id': ('uid', {str_or_none}), - 'creators': ('nickname', {str}, {lambda x: [x] if x else None}), # for compat - 'channel': ('nickname', {str}), - 'channel_id': ('sec_uid', {str}), - }), - 'uploader_url': user_url, + **author_info, + 'channel_url': format_field(author_info, 'channel_id', self._UPLOADER_URL_FORMAT, default=None), + 'uploader_url': format_field( + author_info, ['uploader', 'uploader_id'], self._UPLOADER_URL_FORMAT, default=None), 'track': music_track, 'album': str_or_none(music_info.get('album')) or None, 'artists': re.split(r'(?:, | & )', music_author) if music_author else None, 'formats': formats, - 'subtitles': self.extract_subtitles(aweme_detail, aweme_id, user_url), + 'subtitles': self.extract_subtitles( + aweme_detail, aweme_id, traverse_obj(author_info, 'uploader', 'uploader_id', 'channel_id')), 'thumbnails': thumbnails, - 'duration': int_or_none(traverse_obj(video_info, 'duration', ('download_addr', 'duration')), scale=1000), + 'duration': (traverse_obj(video_info, ( + (None, 'download_addr'), 'duration', {functools.partial(int_or_none, scale=1000)}, any)) + or traverse_obj(music_info, ('duration', {int_or_none}))), 'availability': self._availability( is_private='Private' in labels, needs_subscription='Friends only' in labels, @@ -447,23 +449,17 @@ class TikTokBaseIE(InfoExtractor): '_format_sort_fields': ('quality', 'codec', 'size', 'br'), } - def _parse_aweme_video_web(self, aweme_detail, webpage_url, video_id): - video_info = aweme_detail['video'] - author_info = traverse_obj(aweme_detail, 'authorInfo', 'author', expected_type=dict, default={}) - music_info = aweme_detail.get('music') or {} - stats_info = aweme_detail.get('stats') or {} - channel_id = traverse_obj(author_info or aweme_detail, (('authorSecId', 'secUid'), {str}), get_all=False) - user_url = self._UPLOADER_URL_FORMAT % channel_id if channel_id else None - - formats = [] - width = int_or_none(video_info.get('width')) - height = int_or_none(video_info.get('height')) - ratio = try_call(lambda: width / height) or 0.5625 + def _extract_web_formats(self, aweme_detail): COMMON_FORMAT_INFO = { 'ext': 'mp4', 'vcodec': 'h264', 'acodec': 'aac', } + video_info = traverse_obj(aweme_detail, ('video', {dict})) or {} + play_width = int_or_none(video_info.get('width')) + play_height = int_or_none(video_info.get('height')) + ratio = try_call(lambda: play_width / play_height) or 0.5625 + formats = [] for bitrate_info in traverse_obj(video_info, ('bitrateInfo', lambda _, v: v['PlayAddr']['UrlList'])): format_info, res = self._parse_url_key( @@ -488,7 +484,7 @@ class TikTokBaseIE(InfoExtractor): else: # landscape: res/dimension is height x = int(dimension * ratio) format_info.update({ - 'width': x - (x % 2), + 'width': x + (x % 2), 'height': dimension, }) @@ -500,15 +496,15 @@ class TikTokBaseIE(InfoExtractor): }) # We don't have res string for play formats, but need quality for sorting & de-duplication - play_quality = traverse_obj(formats, (lambda _, v: v['width'] == width, 'quality', any)) + play_quality = traverse_obj(formats, (lambda _, v: v['width'] == play_width, 'quality', any)) for play_url in traverse_obj(video_info, ('playAddr', ((..., 'src'), None), {url_or_none})): formats.append({ **COMMON_FORMAT_INFO, 'format_id': 'play', 'url': self._proto_relative_url(play_url), - 'width': width, - 'height': height, + 'width': play_width, + 'height': play_height, 'quality': play_quality, }) @@ -528,8 +524,8 @@ class TikTokBaseIE(InfoExtractor): }) # Is it a slideshow with only audio for download? - if not formats and traverse_obj(music_info, ('playUrl', {url_or_none})): - audio_url = music_info['playUrl'] + if not formats and traverse_obj(aweme_detail, ('music', 'playUrl', {url_or_none})): + audio_url = aweme_detail['music']['playUrl'] ext = traverse_obj(parse_qs(audio_url), ( 'mime_type', -1, {lambda x: x.replace('_', '/')}, {mimetype2ext})) or 'm4a' formats.append({ @@ -540,23 +536,31 @@ class TikTokBaseIE(InfoExtractor): 'vcodec': 'none', }) - thumbnails = [] - for thumb_url in traverse_obj(aweme_detail, ( - (None, 'video'), ('thumbnail', 'cover', 'dynamicCover', 'originCover'), {url_or_none})): - thumbnails.append({ - 'url': self._proto_relative_url(thumb_url), - 'width': width, - 'height': height, - }) + return formats + + def _parse_aweme_video_web(self, aweme_detail, webpage_url, video_id, extract_flat=False): + author_info = traverse_obj(aweme_detail, (('authorInfo', 'author', None), { + 'channel': ('nickname', {str}), + 'channel_id': (('authorSecId', 'secUid'), {str}), + 'uploader': (('uniqueId', 'author'), {str}), + 'uploader_id': (('authorId', 'uid', 'id'), {str_or_none}), + }), get_all=False) return { 'id': video_id, - **traverse_obj(music_info, { + 'formats': None if extract_flat else self._extract_web_formats(aweme_detail), + 'subtitles': None if extract_flat else self.extract_subtitles(aweme_detail, video_id, None), + 'http_headers': {'Referer': webpage_url}, + **author_info, + 'channel_url': format_field(author_info, 'channel_id', self._UPLOADER_URL_FORMAT, default=None), + 'uploader_url': format_field( + author_info, ['uploader', 'uploader_id'], self._UPLOADER_URL_FORMAT, default=None), + **traverse_obj(aweme_detail, ('music', { 'track': ('title', {str}), 'album': ('album', {str}, {lambda x: x or None}), - 'artists': ('authorName', {str}, {lambda x: [x] if x else None}), + 'artists': ('authorName', {str}, {lambda x: re.split(r'(?:, | & )', x) if x else None}), 'duration': ('duration', {int_or_none}), - }), + })), **traverse_obj(aweme_detail, { 'title': ('desc', {str}), 'description': ('desc', {str}), @@ -564,26 +568,17 @@ class TikTokBaseIE(InfoExtractor): 'duration': ('video', 'duration', {int_or_none}, {lambda x: x or None}), 'timestamp': ('createTime', {int_or_none}), }), - **traverse_obj(author_info or aweme_detail, { - 'creators': ('nickname', {str}, {lambda x: [x] if x else None}), # for compat - 'channel': ('nickname', {str}), - 'uploader': (('uniqueId', 'author'), {str}), - 'uploader_id': (('authorId', 'uid', 'id'), {str_or_none}), - }, get_all=False), - **traverse_obj(stats_info, { + **traverse_obj(aweme_detail, ('stats', { 'view_count': 'playCount', 'like_count': 'diggCount', 'repost_count': 'shareCount', 'comment_count': 'commentCount', - }, expected_type=int_or_none), - 'channel_id': channel_id, - 'uploader_url': user_url, - 'formats': formats, - 'subtitles': self.extract_subtitles(aweme_detail, video_id, None), - 'thumbnails': thumbnails, - 'http_headers': { - 'Referer': webpage_url, - } + }), expected_type=int_or_none), + 'thumbnails': traverse_obj(aweme_detail, ( + (None, 'video'), ('thumbnail', 'cover', 'dynamicCover', 'originCover'), { + 'url': ({url_or_none}, {self._proto_relative_url}), + }, + )), } @@ -620,21 +615,21 @@ class TikTokIE(TikTokBaseIE): 'skip': '404 Not Found', }, { 'url': 'https://www.tiktok.com/@patroxofficial/video/6742501081818877190?langCountry=en', - 'md5': '6f3cf8cdd9b28cb8363fe0a9a160695b', + 'md5': 'f21112672ee4ce05ca390fb6522e1b6f', 'info_dict': { 'id': '6742501081818877190', 'ext': 'mp4', 'title': 'md5:5e2a23877420bb85ce6521dbee39ba94', 'description': 'md5:5e2a23877420bb85ce6521dbee39ba94', 'duration': 27, - 'height': 960, - 'width': 540, + 'height': 1024, + 'width': 576, 'uploader': 'patrox', 'uploader_id': '18702747', - 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAiFnldaILebi5heDoVU6bn4jBWWycX6-9U3xuNPqZ8Ws', + 'uploader_url': 'https://www.tiktok.com/@patrox', + 'channel_url': 'https://www.tiktok.com/@MS4wLjABAAAAiFnldaILebi5heDoVU6bn4jBWWycX6-9U3xuNPqZ8Ws', 'channel_id': 'MS4wLjABAAAAiFnldaILebi5heDoVU6bn4jBWWycX6-9U3xuNPqZ8Ws', 'channel': 'patroX', - 'creators': ['patroX'], 'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?', 'upload_date': '20190930', 'timestamp': 1569860870, @@ -646,7 +641,7 @@ class TikTokIE(TikTokBaseIE): 'track': 'Big Fun', }, }, { - # Banned audio, only available on the app + # Banned audio, was available on the app, now works with web too 'url': 'https://www.tiktok.com/@barudakhb_/video/6984138651336838402', 'info_dict': { 'id': '6984138651336838402', @@ -655,9 +650,9 @@ class TikTokIE(TikTokBaseIE): 'description': 'Balas @yolaaftwsr hayu yu ? #SquadRandom_ 🔥', 'uploader': 'barudakhb_', 'channel': 'md5:29f238c49bc0c176cb3cef1a9cea9fa6', - 'creators': ['md5:29f238c49bc0c176cb3cef1a9cea9fa6'], 'uploader_id': '6974687867511718913', - 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAbhBwQC-R1iKoix6jDFsF-vBdfx2ABoDjaZrM9fX6arU3w71q3cOWgWuTXn1soZ7d', + 'uploader_url': 'https://www.tiktok.com/@barudakhb_', + 'channel_url': 'https://www.tiktok.com/@MS4wLjABAAAAbhBwQC-R1iKoix6jDFsF-vBdfx2ABoDjaZrM9fX6arU3w71q3cOWgWuTXn1soZ7d', 'channel_id': 'MS4wLjABAAAAbhBwQC-R1iKoix6jDFsF-vBdfx2ABoDjaZrM9fX6arU3w71q3cOWgWuTXn1soZ7d', 'track': 'Boka Dance', 'artists': ['md5:29f238c49bc0c176cb3cef1a9cea9fa6'], @@ -680,7 +675,6 @@ class TikTokIE(TikTokBaseIE): 'description': 'Slap and Run!', 'uploader': 'user440922249', 'channel': 'Slap And Run', - 'creators': ['Slap And Run'], 'uploader_id': '7036055384943690754', 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAATh8Vewkn0LYM7Fo03iec3qKdeCUOcBIouRk1mkiag6h3o_pQu_dUXvZ2EZlGST7_', 'channel_id': 'MS4wLjABAAAATh8Vewkn0LYM7Fo03iec3qKdeCUOcBIouRk1mkiag6h3o_pQu_dUXvZ2EZlGST7_', @@ -694,7 +688,7 @@ class TikTokIE(TikTokBaseIE): 'repost_count': int, 'comment_count': int, }, - 'params': {'skip_download': True}, # XXX: unable to download video data: HTTP Error 403: Forbidden + 'skip': 'This video is unavailable', }, { # Video without title and description 'url': 'https://www.tiktok.com/@pokemonlife22/video/7059698374567611694', @@ -705,9 +699,9 @@ class TikTokIE(TikTokBaseIE): 'description': '', 'uploader': 'pokemonlife22', 'channel': 'Pokemon', - 'creators': ['Pokemon'], 'uploader_id': '6820838815978423302', - 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAA0tF1nBwQVVMyrGu3CqttkNgM68Do1OXUFuCY0CRQk8fEtSVDj89HqoqvbSTmUP2W', + 'uploader_url': 'https://www.tiktok.com/@pokemonlife22', + 'channel_url': 'https://www.tiktok.com/@MS4wLjABAAAA0tF1nBwQVVMyrGu3CqttkNgM68Do1OXUFuCY0CRQk8fEtSVDj89HqoqvbSTmUP2W', 'channel_id': 'MS4wLjABAAAA0tF1nBwQVVMyrGu3CqttkNgM68Do1OXUFuCY0CRQk8fEtSVDj89HqoqvbSTmUP2W', 'track': 'original sound', 'timestamp': 1643714123, @@ -752,13 +746,14 @@ class TikTokIE(TikTokBaseIE): 'title': 'TikTok video #7139980461132074283', 'description': '', 'channel': 'Antaura', - 'creators': ['Antaura'], 'uploader': '_le_cannibale_', 'uploader_id': '6604511138619654149', - 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAoShJqaw_5gvy48y3azFeFcT4jeyKWbB0VVYasOCt2tTLwjNFIaDcHAM4D-QGXFOP', + 'uploader_url': 'https://www.tiktok.com/@_le_cannibale_', + 'channel_url': 'https://www.tiktok.com/@MS4wLjABAAAAoShJqaw_5gvy48y3azFeFcT4jeyKWbB0VVYasOCt2tTLwjNFIaDcHAM4D-QGXFOP', 'channel_id': 'MS4wLjABAAAAoShJqaw_5gvy48y3azFeFcT4jeyKWbB0VVYasOCt2tTLwjNFIaDcHAM4D-QGXFOP', 'artists': ['nathan !'], 'track': 'grahamscott canon', + 'duration': 10, 'upload_date': '20220905', 'timestamp': 1662406249, 'view_count': int, @@ -769,18 +764,18 @@ class TikTokIE(TikTokBaseIE): }, }, { # only available via web - 'url': 'https://www.tiktok.com/@moxypatch/video/7206382937372134662', # FIXME - 'md5': '6aba7fad816e8709ff2c149679ace165', + 'url': 'https://www.tiktok.com/@moxypatch/video/7206382937372134662', + 'md5': '4cdefa501ac8ac20bf04986e10916fea', 'info_dict': { 'id': '7206382937372134662', 'ext': 'mp4', 'title': 'md5:1d95c0b96560ca0e8a231af4172b2c0a', 'description': 'md5:1d95c0b96560ca0e8a231af4172b2c0a', 'channel': 'MoxyPatch', - 'creators': ['MoxyPatch'], 'uploader': 'moxypatch', 'uploader_id': '7039142049363379205', - 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAFhqKnngMHJSsifL0w1vFOP5kn3Ndo1ODp0XuIBkNMBCkALTvwILdpu12g3pTtL4V', + 'uploader_url': 'https://www.tiktok.com/@moxypatch', + 'channel_url': 'https://www.tiktok.com/@MS4wLjABAAAAFhqKnngMHJSsifL0w1vFOP5kn3Ndo1ODp0XuIBkNMBCkALTvwILdpu12g3pTtL4V', 'channel_id': 'MS4wLjABAAAAFhqKnngMHJSsifL0w1vFOP5kn3Ndo1ODp0XuIBkNMBCkALTvwILdpu12g3pTtL4V', 'artists': ['your worst nightmare'], 'track': 'original sound', @@ -809,7 +804,6 @@ class TikTokIE(TikTokBaseIE): 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAA-0bQT0CqebTRr6I4IkYvMDMKSRSJHLNPBo5HrSklJwyA2psXLSZG5FP-LMNpHnJd', 'channel_id': 'MS4wLjABAAAA-0bQT0CqebTRr6I4IkYvMDMKSRSJHLNPBo5HrSklJwyA2psXLSZG5FP-LMNpHnJd', 'channel': 'tate mcrae', - 'creators': ['tate mcrae'], 'artists': ['tate mcrae'], 'track': 'original sound', 'upload_date': '20220609', @@ -821,7 +815,7 @@ class TikTokIE(TikTokBaseIE): 'comment_count': int, 'thumbnail': r're:^https://.+\.webp', }, - 'skip': 'Unavailable via feed API, no formats available via web', + 'skip': 'Unavailable via feed API, only audio available via web', }, { # Slideshow, audio-only m4a format 'url': 'https://www.tiktok.com/@hara_yoimiya/video/7253412088251534594', @@ -833,13 +827,14 @@ class TikTokIE(TikTokBaseIE): 'description': 'я ред флаг простите #переписка #щитпост #тревожныйтиппривязанности #рекомендации ', 'uploader': 'hara_yoimiya', 'uploader_id': '6582536342634676230', - 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAIAlDxriiPWLE-p8p1R_0Bx8qWKfi-7zwmGhzU8Mv25W8sNxjfIKrol31qTczzuLB', + 'uploader_url': 'https://www.tiktok.com/@hara_yoimiya', + 'channel_url': 'https://www.tiktok.com/@MS4wLjABAAAAIAlDxriiPWLE-p8p1R_0Bx8qWKfi-7zwmGhzU8Mv25W8sNxjfIKrol31qTczzuLB', 'channel_id': 'MS4wLjABAAAAIAlDxriiPWLE-p8p1R_0Bx8qWKfi-7zwmGhzU8Mv25W8sNxjfIKrol31qTczzuLB', - 'channel': 'лампочка', - 'creators': ['лампочка'], + 'channel': 'лампочка(!)', 'artists': ['Øneheart'], 'album': 'watching the stars', 'track': 'watching the stars', + 'duration': 60, 'upload_date': '20230708', 'timestamp': 1688816612, 'view_count': int, @@ -876,102 +871,141 @@ class TikTokIE(TikTokBaseIE): class TikTokUserIE(TikTokBaseIE): IE_NAME = 'tiktok:user' - _VALID_URL = r'https?://(?:www\.)?tiktok\.com/@(?P<id>[\w\.-]+)/?(?:$|[#?])' - _WORKING = False + _VALID_URL = r'(?:tiktokuser:|https?://(?:www\.)?tiktok\.com/@)(?P<id>[\w.-]+)/?(?:$|[#?])' _TESTS = [{ 'url': 'https://tiktok.com/@corgibobaa?lang=en', 'playlist_mincount': 45, 'info_dict': { - 'id': '6935371178089399301', + 'id': 'MS4wLjABAAAAepiJKgwWhulvCpSuUVsp7sgVVsFJbbNaLeQ6OQ0oAJERGDUIXhb2yxxHZedsItgT', 'title': 'corgibobaa', - 'thumbnail': r're:https://.+_1080x1080\.webp' }, - 'expected_warnings': ['Retrying'] }, { 'url': 'https://www.tiktok.com/@6820838815978423302', 'playlist_mincount': 5, 'info_dict': { - 'id': '6820838815978423302', + 'id': 'MS4wLjABAAAA0tF1nBwQVVMyrGu3CqttkNgM68Do1OXUFuCY0CRQk8fEtSVDj89HqoqvbSTmUP2W', 'title': '6820838815978423302', - 'thumbnail': r're:https://.+_1080x1080\.webp' }, - 'expected_warnings': ['Retrying'] }, { 'url': 'https://www.tiktok.com/@meme', 'playlist_mincount': 593, 'info_dict': { - 'id': '79005827461758976', + 'id': 'MS4wLjABAAAAiKfaDWeCsT3IHwY77zqWGtVRIy9v4ws1HbVi7auP1Vx7dJysU_hc5yRiGywojRD6', 'title': 'meme', - 'thumbnail': r're:https://.+_1080x1080\.webp' }, - 'expected_warnings': ['Retrying'] + }, { + 'url': 'tiktokuser:MS4wLjABAAAAM3R2BtjzVT-uAtstkl2iugMzC6AtnpkojJbjiOdDDrdsTiTR75-8lyWJCY5VvDrZ', + 'playlist_mincount': 31, + 'info_dict': { + 'id': 'MS4wLjABAAAAM3R2BtjzVT-uAtstkl2iugMzC6AtnpkojJbjiOdDDrdsTiTR75-8lyWJCY5VvDrZ', + }, }] + _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:115.0) Gecko/20100101 Firefox/115.0' + _API_BASE_URL = 'https://www.tiktok.com/api/creator/item_list/' - r''' # TODO: Fix by adding _signature to api_url - def _entries(self, webpage, user_id, username): - secuid = self._search_regex(r'\"secUid\":\"(?P<secUid>[^\"]+)', webpage, username) - verifyfp_cookie = self._get_cookies('https://www.tiktok.com').get('s_v_web_id') - if not verifyfp_cookie: - raise ExtractorError('Improper cookies (missing s_v_web_id).', expected=True) - api_url = f'https://m.tiktok.com/api/post/item_list/?aid=1988&cookie_enabled=true&count=30&verifyFp={verifyfp_cookie.value}&secUid={secuid}&cursor=' - cursor = '0' - for page in itertools.count(): - data_json = self._download_json(api_url + cursor, username, note='Downloading Page %d' % page) - for video in data_json.get('itemList', []): - video_id = video['id'] - video_url = f'https://www.tiktok.com/@{user_id}/video/{video_id}' - yield self._url_result(video_url, 'TikTok', video_id, str_or_none(video.get('desc'))) - if not data_json.get('hasMore'): - break - cursor = data_json['cursor'] - ''' - - def _video_entries_api(self, webpage, user_id, username): - query = { - 'user_id': user_id, - 'count': 21, - 'max_cursor': 0, - 'min_cursor': 0, - 'retry_type': 'no_retry', - 'device_id': self._DEVICE_ID, # Some endpoints don't like randomized device_id, so it isn't directly set in _call_api. + def _build_web_query(self, sec_uid, cursor): + return { + 'aid': '1988', + 'app_language': 'en', + 'app_name': 'tiktok_web', + 'browser_language': 'en-US', + 'browser_name': 'Mozilla', + 'browser_online': 'true', + 'browser_platform': 'Win32', + 'browser_version': '5.0 (Windows)', + 'channel': 'tiktok_web', + 'cookie_enabled': 'true', + 'count': '15', + 'cursor': cursor, + 'device_id': self._DEVICE_ID, + 'device_platform': 'web_pc', + 'focus_state': 'true', + 'from_page': 'user', + 'history_len': '2', + 'is_fullscreen': 'false', + 'is_page_visible': 'true', + 'language': 'en', + 'os': 'windows', + 'priority_region': '', + 'referer': '', + 'region': 'US', + 'screen_height': '1080', + 'screen_width': '1920', + 'secUid': sec_uid, + 'type': '1', # pagination type: 0 == oldest-to-newest, 1 == newest-to-oldest + 'tz_name': 'UTC', + 'verifyFp': f'verify_{"".join(random.choices(string.hexdigits, k=7))}', + 'webcast_language': 'en', } - for page in itertools.count(1): - for retry in self.RetryManager(): - try: - post_list = self._call_api( - 'aweme/post', query, username, note=f'Downloading user video list page {page}', - errnote='Unable to download user video list') - except ExtractorError as e: - if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0: - retry.error = e - continue - raise - yield from post_list.get('aweme_list', []) - if not post_list.get('has_more'): - break - query['max_cursor'] = post_list['max_cursor'] + def _entries(self, sec_uid, user_name): + display_id = user_name or sec_uid - def _entries_api(self, user_id, videos): - for video in videos: - yield { - **self._parse_aweme_video_app(video), - 'extractor_key': TikTokIE.ie_key(), - 'extractor': 'TikTok', - 'webpage_url': f'https://tiktok.com/@{user_id}/video/{video["aweme_id"]}', - } + cursor = int(time.time() * 1E3) + for page in itertools.count(1): + response = self._download_json( + self._API_BASE_URL, display_id, f'Downloading page {page}', + query=self._build_web_query(sec_uid, cursor), headers={'User-Agent': self._USER_AGENT}) + + for video in traverse_obj(response, ('itemList', lambda _, v: v['id'])): + video_id = video['id'] + webpage_url = self._create_url(display_id, video_id) + yield self.url_result( + webpage_url, TikTokIE, + **self._parse_aweme_video_web(video, webpage_url, video_id, extract_flat=True)) + + old_cursor = cursor + cursor = traverse_obj( + response, ('itemList', -1, 'createTime', {functools.partial(int_or_none, invscale=1E3)})) + if not cursor: + # User may not have posted within this ~1 week lookback, so manually adjust cursor + cursor = old_cursor - 7 * 86_400_000 + # In case 'hasMorePrevious' is wrong, break if we have gone back before TikTok existed + if cursor < 1472706000000 or not traverse_obj(response, 'hasMorePrevious'): + break + + def _get_sec_uid(self, user_url, user_name, msg): + webpage = self._download_webpage( + user_url, user_name, fatal=False, headers={'User-Agent': 'Mozilla/5.0'}, + note=f'Downloading {msg} webpage', errnote=f'Unable to download {msg} webpage') or '' + return (traverse_obj(self._get_universal_data(webpage, user_name), + ('webapp.user-detail', 'userInfo', 'user', 'secUid', {str})) + or traverse_obj(self._get_sigi_state(webpage, user_name), + ('LiveRoom', 'liveRoomUserInfo', 'user', 'secUid', {str}), + ('UserModule', 'users', ..., 'secUid', {str}, any))) def _real_extract(self, url): - user_name = self._match_id(url) - webpage = self._download_webpage(url, user_name, headers={ - 'User-Agent': 'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)' - }) - user_id = self._html_search_regex(r'snssdk\d*://user/profile/(\d+)', webpage, 'user ID', default=None) or user_name + user_name, sec_uid = self._match_id(url), None + if mobj := re.fullmatch(r'MS4wLjABAAAA[\w-]{64}', user_name): + user_name, sec_uid = None, mobj.group(0) + else: + sec_uid = (self._get_sec_uid(self._UPLOADER_URL_FORMAT % user_name, user_name, 'user') + or self._get_sec_uid(self._UPLOADER_URL_FORMAT % f'{user_name}/live', user_name, 'live')) - videos = LazyList(self._video_entries_api(webpage, user_id, user_name)) - thumbnail = traverse_obj(videos, (0, 'author', 'avatar_larger', 'url_list', 0)) + if not sec_uid: + webpage = self._download_webpage( + f'https://www.tiktok.com/embed/@{user_name}', user_name, + note='Downloading user embed page', fatal=False) or '' + data = traverse_obj(self._search_json( + r'<script[^>]+\bid=[\'"]__FRONTITY_CONNECT_STATE__[\'"][^>]*>', + webpage, 'data', user_name, default={}), + ('source', 'data', f'/embed/@{user_name}', {dict})) - return self.playlist_result(self._entries_api(user_id, videos), user_id, user_name, thumbnail=thumbnail) + for aweme_id in traverse_obj(data, ('videoList', ..., 'id', {str})): + webpage_url = self._create_url(user_name, aweme_id) + video_data, _ = self._extract_web_data_and_status(webpage_url, aweme_id, fatal=False) + sec_uid = self._parse_aweme_video_web( + video_data, webpage_url, aweme_id, extract_flat=True).get('channel_id') + if sec_uid: + break + + if not sec_uid: + raise ExtractorError( + 'Unable to extract secondary user ID. If you are able to get the channel_id ' + 'from a video posted by this user, try using "tiktokuser:channel_id" as the ' + 'input URL (replacing `channel_id` with its actual value)', expected=True) + + return self.playlist_result(self._entries(sec_uid, user_name), sec_uid, user_name) class TikTokBaseListIE(TikTokBaseIE): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor @@ -1098,7 +1132,6 @@ class DouyinIE(TikTokBaseIE): 'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98', 'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98', 'channel': '杨超越', - 'creators': ['杨超越'], 'duration': 19, 'timestamp': 1620905839, 'upload_date': '20210513', @@ -1123,7 +1156,6 @@ class DouyinIE(TikTokBaseIE): 'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA', 'channel_id': 'MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA', 'channel': '杨超越工作室', - 'creators': ['杨超越工作室'], 'duration': 42, 'timestamp': 1625739481, 'upload_date': '20210708', @@ -1148,7 +1180,6 @@ class DouyinIE(TikTokBaseIE): 'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98', 'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98', 'channel': '杨超越', - 'creators': ['杨超越'], 'duration': 17, 'timestamp': 1619098692, 'upload_date': '20210422', @@ -1190,7 +1221,6 @@ class DouyinIE(TikTokBaseIE): 'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98', 'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98', 'channel': '杨超越', - 'creators': ['杨超越'], 'duration': 15, 'timestamp': 1621261163, 'upload_date': '20210517', From 119d41f27061d220d276a2d38cfc8d873437452a Mon Sep 17 00:00:00 2001 From: imanoreotwe <4606611+imanoreotwe@users.noreply.github.com> Date: Sun, 26 May 2024 15:26:30 -0600 Subject: [PATCH 31/48] [ie/tiktok:collection] Add extractor (#9986) Closes #9984 Authored by: imanoreotwe, bashonly --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/tiktok.py | 58 +++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 37e6fc318..e9cd38a65 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -2074,6 +2074,7 @@ from .threespeak import ( ) from .tiktok import ( DouyinIE, + TikTokCollectionIE, TikTokEffectIE, TikTokIE, TikTokLiveIE, diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 4113660a5..ab8efc19e 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -1117,6 +1117,64 @@ class TikTokTagIE(TikTokBaseListIE): return self.playlist_result(self._entries(tag_id, display_id), tag_id, display_id) +class TikTokCollectionIE(TikTokBaseIE): + IE_NAME = 'tiktok:collection' + _VALID_URL = r'https?://www\.tiktok\.com/@(?P<user_id>[\w.-]+)/collection/(?P<title>[^/?#]+)-(?P<id>\d+)/?(?:[?#]|$)' + _TESTS = [{ + # playlist should have exactly 9 videos + 'url': 'https://www.tiktok.com/@imanoreotwe/collection/count-test-7371330159376370462', + 'info_dict': { + 'id': '7371330159376370462', + 'title': 'imanoreotwe-count-test' + }, + 'playlist_count': 9 + }, { + # tests returning multiple pages of a large collection + 'url': 'https://www.tiktok.com/@imanoreotwe/collection/%F0%9F%98%82-7111887189571160875', + 'info_dict': { + 'id': '7111887189571160875', + 'title': 'imanoreotwe-%F0%9F%98%82' + }, + 'playlist_mincount': 100 + }] + _API_BASE_URL = 'https://www.tiktok.com/api/collection/item_list/' + _PAGE_COUNT = 30 + + def _build_web_query(self, collection_id, cursor): + return { + 'aid': '1988', + 'collectionId': collection_id, + 'count': self._PAGE_COUNT, + 'cursor': cursor, + 'sourceType': '113', + } + + def _entries(self, collection_id): + cursor = 0 + for page in itertools.count(1): + response = self._download_json( + self._API_BASE_URL, collection_id, f'Downloading page {page}', + query=self._build_web_query(collection_id, cursor)) + + for video in traverse_obj(response, ('itemList', lambda _, v: v['id'])): + video_id = video['id'] + author = traverse_obj(video, ('author', ('uniqueId', 'secUid', 'id'), {str}, any)) or '_' + webpage_url = self._create_url(author, video_id) + yield self.url_result( + webpage_url, TikTokIE, + **self._parse_aweme_video_web(video, webpage_url, video_id, extract_flat=True)) + + if not traverse_obj(response, 'hasMore'): + break + cursor += self._PAGE_COUNT + + def _real_extract(self, url): + collection_id, title, user_name = self._match_valid_url(url).group('id', 'title', 'user_id') + + return self.playlist_result( + self._entries(collection_id), collection_id, '-'.join((user_name, title))) + + class DouyinIE(TikTokBaseIE): _VALID_URL = r'https?://(?:www\.)?douyin\.com/video/(?P<id>[0-9]+)' _TESTS = [{ From 5a2eebc76770fca91ffabeff658d560f716fec80 Mon Sep 17 00:00:00 2001 From: ocococococ <104170215+ocococococ@users.noreply.github.com> Date: Sun, 26 May 2024 23:33:15 +0200 Subject: [PATCH 32/48] [ie/LCI] Fix extractor (#10025) Authored by: ocococococ --- yt_dlp/extractor/lci.py | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/lci.py b/yt_dlp/extractor/lci.py index e7d2f8a24..708cb548d 100644 --- a/yt_dlp/extractor/lci.py +++ b/yt_dlp/extractor/lci.py @@ -1,9 +1,25 @@ from .common import InfoExtractor +from .wat import WatIE +from ..utils import ExtractorError, int_or_none +from ..utils.traversal import traverse_obj class LCIIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:lci|tf1info)\.fr/[^/]+/[\w-]+-(?P<id>\d+)\.html' + _VALID_URL = r'https?://(?:www\.)?(?:lci|tf1info)\.fr/(?:[^/?#]+/)+[\w-]+-(?P<id>\d+)\.html' _TESTS = [{ + 'url': 'https://www.tf1info.fr/replay-lci/videos/video-24h-pujadas-du-vendredi-24-mai-6708-2300831.html', + 'info_dict': { + 'id': '14113788', + 'ext': 'mp4', + 'title': '24H Pujadas du vendredi 24 mai 2024', + 'thumbnail': 'https://photos.tf1.fr/1280/720/24h-pujadas-du-24-mai-2024-55bf2d-0@1x.jpg', + 'upload_date': '20240524', + 'duration': 6158, + }, + 'params': { + 'skip_download': True, + }, + }, { 'url': 'https://www.tf1info.fr/politique/election-presidentielle-2022-second-tour-j-2-marine-le-pen-et-emmanuel-macron-en-interview-de-lci-vendredi-soir-2217486.html', 'info_dict': { 'id': '13875948', @@ -24,5 +40,10 @@ class LCIIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - wat_id = self._search_regex(r'watId["\']?\s*:\s*["\']?(\d+)', webpage, 'wat id') - return self.url_result('wat:' + wat_id, 'Wat', wat_id) + next_data = self._search_nextjs_data(webpage, video_id) + wat_id = traverse_obj(next_data, ( + 'props', 'pageProps', 'page', 'tms', 'videos', {dict.keys}, ..., {int_or_none}, any)) + if wat_id is None: + raise ExtractorError('Could not find wat_id') + + return self.url_result(f'wat:{wat_id}', WatIE, str(wat_id)) From 5c019f6328ad40d66561eac3c4de0b3cd070d0f6 Mon Sep 17 00:00:00 2001 From: Simon Sawicki <contact@grub4k.xyz> Date: Sun, 26 May 2024 23:37:49 +0200 Subject: [PATCH 33/48] [misc] Cleanup (#9765) Closes #9763 Authored by: bashonly, seproDev, Grub4K Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com> --- Makefile | 4 ++-- README.md | 7 +++--- devscripts/changelog_override.json | 16 +++++++++++++ devscripts/run_tests.bat | 4 ---- devscripts/run_tests.sh | 4 ---- pyinst.py | 17 -------------- setup.py | 36 ------------------------------ test/test_InfoExtractor.py | 2 +- yt_dlp/extractor/ceskatelevize.py | 2 +- yt_dlp/extractor/common.py | 2 +- yt_dlp/extractor/thisvid.py | 2 +- yt_dlp/extractor/vk.py | 4 ++-- yt_dlp/utils/_utils.py | 2 +- 13 files changed, 28 insertions(+), 74 deletions(-) delete mode 100644 devscripts/run_tests.bat delete mode 100755 devscripts/run_tests.sh delete mode 100755 pyinst.py delete mode 100755 setup.py diff --git a/Makefile b/Makefile index b8f010086..e1de7f3e9 100644 --- a/Makefile +++ b/Makefile @@ -74,11 +74,11 @@ codetest: autopep8 --diff . test: - $(PYTHON) -m pytest + $(PYTHON) -m pytest -Werror $(MAKE) codetest offlinetest: codetest - $(PYTHON) -m pytest -k "not download" + $(PYTHON) -m pytest -Werror -m "not download" CODE_FOLDERS_CMD = find yt_dlp -type f -name '__init__.py' | sed 's,/__init__.py,,' | grep -v '/__' | sort CODE_FOLDERS != $(CODE_FOLDERS_CMD) diff --git a/README.md b/README.md index 1b4071132..52c80f26e 100644 --- a/README.md +++ b/README.md @@ -108,7 +108,6 @@ File|Description [yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows (Win7 SP1+) standalone x86 (32-bit) binary [yt-dlp_min.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_min.exe)|Windows (Win7 SP1+) standalone x64 binary built with `py2exe`<br/> ([Not recommended](#standalone-py2exe-builds-windows)) [yt-dlp_linux](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux)|Linux standalone x64 binary -[yt-dlp_linux.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux.zip)|Unpackaged Linux executable (no auto-update) [yt-dlp_linux_armv7l](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_armv7l)|Linux standalone armv7l (32-bit) binary [yt-dlp_linux_aarch64](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_aarch64)|Linux standalone aarch64 (64-bit) binary [yt-dlp_win.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_win.zip)|Unpackaged Windows executable (no auto-update) @@ -170,7 +169,7 @@ Example usage: yt-dlp --update-to nightly # To install nightly with pip: -python3 -m pip install -U --pre yt-dlp[default] +python3 -m pip install -U --pre "yt-dlp[default]" ``` ## DEPENDENCIES @@ -202,7 +201,7 @@ While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly The following provide support for impersonating browser requests. This may be required for some sites that employ TLS fingerprinting. * [**curl_cffi**](https://github.com/yifeikong/curl_cffi) (recommended) - Python binding for [curl-impersonate](https://github.com/lwthiker/curl-impersonate). Provides impersonation targets for Chrome, Edge and Safari. Licensed under [MIT](https://github.com/yifeikong/curl_cffi/blob/main/LICENSE) - * Can be installed with the `curl-cffi` group, e.g. `pip install yt-dlp[default,curl-cffi]` + * Can be installed with the `curl-cffi` group, e.g. `pip install "yt-dlp[default,curl-cffi]"` * Currently only included in `yt-dlp.exe` and `yt-dlp_macos` builds @@ -1751,7 +1750,7 @@ $ yt-dlp --replace-in-metadata "title,uploader" "[ _]" "-" # EXTRACTOR ARGUMENTS -Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. E.g. `--extractor-args "youtube:player-client=android_embedded,web;include_live_dash" --extractor-args "funimation:version=uncut"` +Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. E.g. `--extractor-args "youtube:player-client=android_embedded,web;formats=incomplete" --extractor-args "funimation:version=uncut"` Note: In CLI, `ARG` can use `-` instead of `_`; e.g. `youtube:player-client"` becomes `youtube:player_client"` diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json index 046060cb2..4be1e58d4 100644 --- a/devscripts/changelog_override.json +++ b/devscripts/changelog_override.json @@ -147,5 +147,21 @@ "action": "add", "when": "9590cc6b4768e190183d7d071a6c78170889116a", "short": "[priority] Security: [[CVE-2024-22423](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2024-22423)] [Prevent RCE when using `--exec` with `%q` on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-hjq6-52gw-2g7p)\n - The shell escape function now properly escapes `%`, `\\` and `\\n`.\n - `utils.Popen` has been patched accordingly." + }, + { + "action": "change", + "when": "41ba4a808b597a3afed78c89675a30deb6844450", + "short": "[ie/tiktok] Extract via mobile API only if extractor-arg is passed (#9938)", + "authors": ["bashonly"] + }, + { + "action": "remove", + "when": "6e36d17f404556f0e3a43f441c477a71a91877d9" + }, + { + "action": "change", + "when": "beaf832c7a9d57833f365ce18f6115b88071b296", + "short": "[ie/soundcloud] Add `formats` extractor-arg (#10004)", + "authors": ["bashonly", "Grub4K"] } ] diff --git a/devscripts/run_tests.bat b/devscripts/run_tests.bat deleted file mode 100644 index 57b1f4bf4..000000000 --- a/devscripts/run_tests.bat +++ /dev/null @@ -1,4 +0,0 @@ -@echo off - ->&2 echo run_tests.bat is deprecated. Please use `devscripts/run_tests.py` instead -python %~dp0run_tests.py %~1 diff --git a/devscripts/run_tests.sh b/devscripts/run_tests.sh deleted file mode 100755 index 123ceb1ee..000000000 --- a/devscripts/run_tests.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/usr/bin/env sh - ->&2 echo 'run_tests.sh is deprecated. Please use `devscripts/run_tests.py` instead' -python3 devscripts/run_tests.py "$1" diff --git a/pyinst.py b/pyinst.py deleted file mode 100755 index 4a8ed2d34..000000000 --- a/pyinst.py +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/env python3 - -# Allow execution from anywhere -import os -import sys - -sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) - -import warnings - -from bundle.pyinstaller import main - -warnings.warn(DeprecationWarning('`pyinst.py` is deprecated and will be removed in a future version. ' - 'Use `bundle.pyinstaller` instead')) - -if __name__ == '__main__': - main() diff --git a/setup.py b/setup.py deleted file mode 100755 index 8d1e6d10b..000000000 --- a/setup.py +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env python3 - -# Allow execution from anywhere -import os -import sys - -sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) - -import warnings - - -if sys.argv[1:2] == ['py2exe']: - warnings.warn(DeprecationWarning('`setup.py py2exe` is deprecated and will be removed in a future version. ' - 'Use `bundle.py2exe` instead')) - - import bundle.py2exe - - bundle.py2exe.main() - -elif 'build_lazy_extractors' in sys.argv: - warnings.warn(DeprecationWarning('`setup.py build_lazy_extractors` is deprecated and will be removed in a future version. ' - 'Use `devscripts.make_lazy_extractors` instead')) - - import subprocess - - os.chdir(sys.path[0]) - print('running build_lazy_extractors') - subprocess.run([sys.executable, 'devscripts/make_lazy_extractors.py']) - -else: - - print( - 'ERROR: Building by calling `setup.py` is deprecated. ' - 'Use a build frontend like `build` instead. ', - 'Refer to https://build.pypa.io for more info', file=sys.stderr) - sys.exit(1) diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index c633ce3e4..744587e45 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -1912,7 +1912,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ self.assertEqual(self.ie._search_nextjs_data('', None, fatal=False), {}) self.assertEqual(self.ie._search_nextjs_data('', None, default=None), None) self.assertEqual(self.ie._search_nextjs_data('', None, default={}), {}) - with self.assertRaises(DeprecationWarning): + with self.assertWarns(DeprecationWarning): self.assertEqual(self.ie._search_nextjs_data('', None, default='{}'), {}) diff --git a/yt_dlp/extractor/ceskatelevize.py b/yt_dlp/extractor/ceskatelevize.py index 156b6a324..5d6335729 100644 --- a/yt_dlp/extractor/ceskatelevize.py +++ b/yt_dlp/extractor/ceskatelevize.py @@ -101,7 +101,7 @@ class CeskaTelevizeIE(InfoExtractor): site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize') playlist_title = self._og_search_title(webpage, default=None) if site_name and playlist_title: - playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, 1)[0] + playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, maxsplit=1)[0] playlist_description = self._og_search_description(webpage, default=None) if playlist_description: playlist_description = playlist_description.replace('\xa0', ' ') diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 38daad72e..b99b7e5ab 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -3517,7 +3517,7 @@ class InfoExtractor: # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as # of jwplayer.flash.swf rtmp_url_parts = re.split( - r'((?:mp4|mp3|flv):)', source_url, 1) + r'((?:mp4|mp3|flv):)', source_url, maxsplit=1) if len(rtmp_url_parts) == 3: rtmp_url, prefix, play_path = rtmp_url_parts a_format.update({ diff --git a/yt_dlp/extractor/thisvid.py b/yt_dlp/extractor/thisvid.py index 9d3368ed7..04b083811 100644 --- a/yt_dlp/extractor/thisvid.py +++ b/yt_dlp/extractor/thisvid.py @@ -134,7 +134,7 @@ class ThisVidPlaylistBaseIE(InfoExtractor): title = re.split( r'(?i)\s*\|\s*ThisVid\.com\s*$', self._og_search_title(webpage, default=None) - or self._html_search_regex(r'(?s)<title\b[^>]*>(.+?)</title', webpage, 'title', fatal=False) or '', 1)[0] or None + or self._html_search_regex(r'(?s)<title\b[^>]*>(.+?)</title', webpage, 'title', fatal=False) or '', maxsplit=1)[0] or None return self.playlist_from_matches( self._generate_playlist_entries(url, playlist_id, webpage), diff --git a/yt_dlp/extractor/vk.py b/yt_dlp/extractor/vk.py index 132d65bca..9a3c75b62 100644 --- a/yt_dlp/extractor/vk.py +++ b/yt_dlp/extractor/vk.py @@ -467,13 +467,13 @@ class VKIE(VKBaseIE): 'source_preference': 1, 'height': height, }) - elif format_id == 'hls': + elif format_id.startswith('hls') and format_id != 'hls_live_playback': fmts, subs = self._extract_m3u8_formats_and_subtitles( format_url, video_id, 'mp4', 'm3u8_native', m3u8_id=format_id, fatal=False, live=is_live) formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) - elif format_id.startswith('dash_'): + elif format_id.startswith('dash') and format_id not in ('dash_live_playback', 'dash_uni'): fmts, subs = self._extract_mpd_formats_and_subtitles( format_url, video_id, mpd_id=format_id, fatal=False) formats.extend(fmts) diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 5f458ea45..42803bb6d 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -2525,7 +2525,7 @@ def read_batch_urls(batch_fd): return False # "#" cannot be stripped out since it is part of the URI # However, it can be safely stripped out if following a whitespace - return re.split(r'\s#', url, 1)[0].rstrip() + return re.split(r'\s#', url, maxsplit=1)[0].rstrip() with contextlib.closing(batch_fd) as fd: return [url for url in map(fixup, fd) if url] From ae2af1104f80caf2f47544763a33db2c17a3e1de Mon Sep 17 00:00:00 2001 From: bashonly <bashonly@protonmail.com> Date: Sun, 26 May 2024 16:46:31 -0500 Subject: [PATCH 34/48] [cleanup] Misc Authored by: bashonly, seproDev, Grub4K --- devscripts/changelog_override.json | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json index 4be1e58d4..86e8ec2f9 100644 --- a/devscripts/changelog_override.json +++ b/devscripts/changelog_override.json @@ -163,5 +163,11 @@ "when": "beaf832c7a9d57833f365ce18f6115b88071b296", "short": "[ie/soundcloud] Add `formats` extractor-arg (#10004)", "authors": ["bashonly", "Grub4K"] + }, + { + "action": "change", + "when": "5c019f6328ad40d66561eac3c4de0b3cd070d0f6", + "short": "[cleanup] Misc (#9765)", + "authors": ["bashonly", "Grub4K", "seproDev"] } ] From ed274b60b1ad0193fcf8f4ebb6189b4b865525c6 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sun, 26 May 2024 21:55:43 +0000 Subject: [PATCH 35/48] Release 2024.05.26 Created by: bashonly :ci skip all :ci run dl --- CONTRIBUTORS | 20 +++++++++ Changelog.md | 110 ++++++++++++++++++++++++++++++++++++++++++++++ README.md | 21 ++++----- supportedsites.md | 37 ++++++++++------ yt_dlp/version.py | 6 +-- 5 files changed, 167 insertions(+), 27 deletions(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 8b5d19a64..b2a476bea 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -610,3 +610,23 @@ Offert4324 sta1us Tomoka1 trwstin +alexhuot1 +clienthax +DaPotato69 +emqi +hugohaa +imanoreotwe +JakeFinley96 +lostfictions +minamotorin +ocococococ +Podiumnoche +RasmusAntons +roeniss +shoxie007 +Szpachlarz +The-MAGI +TuxCoder +voidful +vtexier +WyohKnott diff --git a/Changelog.md b/Changelog.md index 6cf08beab..0d27f1a92 100644 --- a/Changelog.md +++ b/Changelog.md @@ -4,6 +4,116 @@ # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master --> +### 2024.05.26 + +#### Core changes +- [Better warning when requested subs format not found](https://github.com/yt-dlp/yt-dlp/commit/7e4259dff0b681a3f0e8a930799ce0394328c86e) ([#9873](https://github.com/yt-dlp/yt-dlp/issues/9873)) by [DaPotato69](https://github.com/DaPotato69) +- [Merged with youtube-dl a08f2b7](https://github.com/yt-dlp/yt-dlp/commit/a4da9db87b6486b270c15dfa07ab5bfedc83f6bd) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) +- [Warn if lack of ffmpeg alters format selection](https://github.com/yt-dlp/yt-dlp/commit/96da9525043f78aca4544d01761b13b2140e9ae6) ([#9805](https://github.com/yt-dlp/yt-dlp/issues/9805)) by [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev) +- **cookies** + - [Add `--cookies-from-browser` support for Whale](https://github.com/yt-dlp/yt-dlp/commit/dd9ad97b1fbdd36c086b8ba82328a4d954f78f8e) ([#9649](https://github.com/yt-dlp/yt-dlp/issues/9649)) by [roeniss](https://github.com/roeniss) + - [Get chrome session cookies with `--cookies-from-browser`](https://github.com/yt-dlp/yt-dlp/commit/f1f158976e38d38a260762accafe7bbe6d451151) ([#9747](https://github.com/yt-dlp/yt-dlp/issues/9747)) by [StefanLobbenmeier](https://github.com/StefanLobbenmeier) +- **windows**: [Improve shell quoting and tests](https://github.com/yt-dlp/yt-dlp/commit/64766459e37451b665c1464073c28361fbcf1c25) ([#9802](https://github.com/yt-dlp/yt-dlp/issues/9802)) by [Grub4K](https://github.com/Grub4K) (With fixes in [7e26bd5](https://github.com/yt-dlp/yt-dlp/commit/7e26bd53f9c5893518fde81dfd0079ec08dd841e)) + +#### Extractor changes +- [Add POST data hash to `--write-pages` filenames](https://github.com/yt-dlp/yt-dlp/commit/61b17437dc14a1c7e90ff48a6198df77828c6df4) ([#9879](https://github.com/yt-dlp/yt-dlp/issues/9879)) by [minamotorin](https://github.com/minamotorin) (With fixes in [c999bac](https://github.com/yt-dlp/yt-dlp/commit/c999bac02c5a4f755b2a82488a975e91c988ffd8) by [bashonly](https://github.com/bashonly)) +- [Make `_search_nextjs_data` non fatal](https://github.com/yt-dlp/yt-dlp/commit/3ee1194288981c4f2c4abd8315326de0c424d2ce) ([#8937](https://github.com/yt-dlp/yt-dlp/issues/8937)) by [Grub4K](https://github.com/Grub4K) +- **afreecatv**: live: [Add `cdn` extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/315b3544296bb83012e20ee3af9d3cbf5600dd1c) ([#9666](https://github.com/yt-dlp/yt-dlp/issues/9666)) by [bashonly](https://github.com/bashonly) +- **alura**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/fc2879ecb05aaad36869609d154e4321362c1f63) ([#9658](https://github.com/yt-dlp/yt-dlp/issues/9658)) by [hugohaa](https://github.com/hugohaa) +- **artetv**: [Label forced subtitles](https://github.com/yt-dlp/yt-dlp/commit/7b5674949fd03a33b47b67b31d56a5adf1c48c91) ([#9945](https://github.com/yt-dlp/yt-dlp/issues/9945)) by [vtexier](https://github.com/vtexier) +- **bbc**: [Fix and extend extraction](https://github.com/yt-dlp/yt-dlp/commit/7975ddf245d22af034d5b983eeb1c5ec6c2ce053) ([#9705](https://github.com/yt-dlp/yt-dlp/issues/9705)) by [dirkf](https://github.com/dirkf), [kylegustavo](https://github.com/kylegustavo), [pukkandan](https://github.com/pukkandan) +- **bilibili**: [Fix `--geo-verification-proxy` support](https://github.com/yt-dlp/yt-dlp/commit/2338827072dacab0f15348b70aec8685feefc8d1) ([#9817](https://github.com/yt-dlp/yt-dlp/issues/9817)) by [fireattack](https://github.com/fireattack) +- **bilibilispacevideo** + - [Better error message](https://github.com/yt-dlp/yt-dlp/commit/06d52c87314e0bbc16c43c405090843885577b88) ([#9839](https://github.com/yt-dlp/yt-dlp/issues/9839)) by [fireattack](https://github.com/fireattack) + - [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/4cc99d7b6cce8b39506ead01407445d576b63ee4) ([#9905](https://github.com/yt-dlp/yt-dlp/issues/9905)) by [c-basalt](https://github.com/c-basalt) +- **boosty**: [Add cookies support](https://github.com/yt-dlp/yt-dlp/commit/145dc6f6563e80d2da1b3e9aea2ffa795b71622c) ([#9522](https://github.com/yt-dlp/yt-dlp/issues/9522)) by [RasmusAntons](https://github.com/RasmusAntons) +- **brilliantpala**: [Fix login](https://github.com/yt-dlp/yt-dlp/commit/eead3bbc01f6529862bdad1f0b2adeabda4f006e) ([#9788](https://github.com/yt-dlp/yt-dlp/issues/9788)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **canalalpha**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/00a9f2e1f7fa69499221f2e8dd73a08efeef79bc) ([#9675](https://github.com/yt-dlp/yt-dlp/issues/9675)) by [kclauhk](https://github.com/kclauhk) +- **cbc.ca**: player: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/c8bf48f3a8fa29587e7c73ef5a7710385a5ea725) ([#9866](https://github.com/yt-dlp/yt-dlp/issues/9866)) by [carusocr](https://github.com/carusocr) +- **cda**: [Fix age-gated web extraction](https://github.com/yt-dlp/yt-dlp/commit/6d8a53d870ff6795f509085bfbf3981417999038) ([#9939](https://github.com/yt-dlp/yt-dlp/issues/9939)) by [dirkf](https://github.com/dirkf), [emqi](https://github.com/emqi), [Podiumnoche](https://github.com/Podiumnoche), [Szpachlarz](https://github.com/Szpachlarz) +- **commonmistakes**: [Raise error on blob URLs](https://github.com/yt-dlp/yt-dlp/commit/98d71d8c5e5dab08b561ee6f137e968d2a004262) ([#9897](https://github.com/yt-dlp/yt-dlp/issues/9897)) by [seproDev](https://github.com/seproDev) +- **crunchyroll** + - [Always make metadata available](https://github.com/yt-dlp/yt-dlp/commit/cb2fb4a643949322adba561ca73bcba3221ec0c5) ([#9772](https://github.com/yt-dlp/yt-dlp/issues/9772)) by [bashonly](https://github.com/bashonly) + - [Fix auth and remove cookies support](https://github.com/yt-dlp/yt-dlp/commit/ff38a011d57b763f3a69bebd25a5dc9044a717ce) ([#9749](https://github.com/yt-dlp/yt-dlp/issues/9749)) by [bashonly](https://github.com/bashonly) + - [Fix stream extraction](https://github.com/yt-dlp/yt-dlp/commit/f2816634e3be88fe158b342ee33918de3c272a54) ([#10005](https://github.com/yt-dlp/yt-dlp/issues/10005)) by [bashonly](https://github.com/bashonly) + - [Support browser impersonation](https://github.com/yt-dlp/yt-dlp/commit/5904853ae5788509fdc4892cb7ecdfa9ae7f78e6) ([#9857](https://github.com/yt-dlp/yt-dlp/issues/9857)) by [bashonly](https://github.com/bashonly) +- **dangalplay**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/0d067e77c3f5527946fb0c22ee1c7011994cba40) ([#10021](https://github.com/yt-dlp/yt-dlp/issues/10021)) by [bashonly](https://github.com/bashonly) +- **discoveryplus**: [Fix dmax.de and related extractors](https://github.com/yt-dlp/yt-dlp/commit/90d2da311bbb5dc06f385ee428c7e4590936e995) ([#10020](https://github.com/yt-dlp/yt-dlp/issues/10020)) by [bashonly](https://github.com/bashonly) +- **eplus**: [Handle URLs without videos](https://github.com/yt-dlp/yt-dlp/commit/351dc0bc334c4e1b5f00c152818c3ec0ed71f788) ([#9855](https://github.com/yt-dlp/yt-dlp/issues/9855)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **europarlwebstream**: [Support new URL format](https://github.com/yt-dlp/yt-dlp/commit/800a43983e5fb719526ce4cb3956216085c63268) ([#9647](https://github.com/yt-dlp/yt-dlp/issues/9647)) by [seproDev](https://github.com/seproDev), [voidful](https://github.com/voidful) +- **facebook**: [Fix DASH formats extraction](https://github.com/yt-dlp/yt-dlp/commit/e3b42d8b1b8bcfff7ba146c19fc3f6f6ba843cea) ([#9734](https://github.com/yt-dlp/yt-dlp/issues/9734)) by [bashonly](https://github.com/bashonly) +- **godresource**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/65e709d23530959075816e966c42179ad46e8e3b) ([#9629](https://github.com/yt-dlp/yt-dlp/issues/9629)) by [HobbyistDev](https://github.com/HobbyistDev) +- **googledrive**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/85ec2a337ac325cf6427cbafd56f0a034c1a5218) ([#9908](https://github.com/yt-dlp/yt-dlp/issues/9908)) by [WyohKnott](https://github.com/WyohKnott) +- **hearthisat**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/5bbfdb7c999b22f1aeca0c3489c167d6eb73013b) ([#9949](https://github.com/yt-dlp/yt-dlp/issues/9949)) by [bohwaz](https://github.com/bohwaz), [seproDev](https://github.com/seproDev) +- **hytale**: [Use `CloudflareStreamIE` explicitly](https://github.com/yt-dlp/yt-dlp/commit/31b417e1d1ccc67d5c027bf8878f483dc34cb118) ([#9672](https://github.com/yt-dlp/yt-dlp/issues/9672)) by [llamasblade](https://github.com/llamasblade) +- **instagram**: [Support `/reels/` URLs](https://github.com/yt-dlp/yt-dlp/commit/06cb0638392b607b47d3c2ac48eb2ebecb0f060d) ([#9539](https://github.com/yt-dlp/yt-dlp/issues/9539)) by [amir16yp](https://github.com/amir16yp) +- **jiocinema**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/1463945ae5fb05986a0bd1aa02e41d1a08d93a02) ([#10026](https://github.com/yt-dlp/yt-dlp/issues/10026)) by [bashonly](https://github.com/bashonly) +- **jiosaavn**: [Extract via API and fix playlists](https://github.com/yt-dlp/yt-dlp/commit/0c21c53885cf03f4040467ae8c44d7ff51016116) ([#9656](https://github.com/yt-dlp/yt-dlp/issues/9656)) by [bashonly](https://github.com/bashonly) +- **lci**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/5a2eebc76770fca91ffabeff658d560f716fec80) ([#10025](https://github.com/yt-dlp/yt-dlp/issues/10025)) by [ocococococ](https://github.com/ocococococ) +- **mixch**: [Extract comments](https://github.com/yt-dlp/yt-dlp/commit/b38018b781b062d5169d104ab430489aef8e7f1e) ([#9860](https://github.com/yt-dlp/yt-dlp/issues/9860)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **moviepilot**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/296df0da1d38a44d34c99b60a18066c301774537) ([#9366](https://github.com/yt-dlp/yt-dlp/issues/9366)) by [panatexxa](https://github.com/panatexxa) +- **netease**: program: [Improve `--no-playlist` message](https://github.com/yt-dlp/yt-dlp/commit/73f12119b52d98281804b0c072b2ed6aa841ec88) ([#9488](https://github.com/yt-dlp/yt-dlp/issues/9488)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **nfb**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/0a1a8e3005f66c44bf67633dccd4df19c3fccd1a) ([#9650](https://github.com/yt-dlp/yt-dlp/issues/9650)) by [rrgomes](https://github.com/rrgomes) +- **ntslive**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/be7db1a5a8c483726c511c30ea4689cbb8b27962) ([#9641](https://github.com/yt-dlp/yt-dlp/issues/9641)) by [lostfictions](https://github.com/lostfictions) +- **orf**: on: [Improve extraction](https://github.com/yt-dlp/yt-dlp/commit/0dd53faeca2ba0ce138e4092d07b5f2dbf2422f9) ([#9677](https://github.com/yt-dlp/yt-dlp/issues/9677)) by [TuxCoder](https://github.com/TuxCoder) +- **orftvthek**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/3779f2a307ba3ef1d28e107cdd71b221dfb4eb36) ([#10011](https://github.com/yt-dlp/yt-dlp/issues/10011)) by [seproDev](https://github.com/seproDev) +- **patreon** + - [Extract multiple embeds](https://github.com/yt-dlp/yt-dlp/commit/036e0d92c6052465673d459678322ea03e61483d) ([#9850](https://github.com/yt-dlp/yt-dlp/issues/9850)) by [bashonly](https://github.com/bashonly) + - [Fix Vimeo embed extraction](https://github.com/yt-dlp/yt-dlp/commit/c9ce57d9bf51541da2381d99bc096a9d0ddf1f27) ([#9712](https://github.com/yt-dlp/yt-dlp/issues/9712)) by [bashonly](https://github.com/bashonly) +- **piapro**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/3ba8de62d61d782256f5c1e9939a0762039657de) ([#9311](https://github.com/yt-dlp/yt-dlp/issues/9311)) by [FinnRG](https://github.com/FinnRG), [seproDev](https://github.com/seproDev) +- **pornhub**: [Fix login by email address](https://github.com/yt-dlp/yt-dlp/commit/518c1afc1592cae3e4eb39dc646b5bc059333112) ([#9914](https://github.com/yt-dlp/yt-dlp/issues/9914)) by [feederbox826](https://github.com/feederbox826) +- **qub**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6b54cccdcb892bca3e55993480d8b86f1c7e6da6) ([#7019](https://github.com/yt-dlp/yt-dlp/issues/7019)) by [alexhuot1](https://github.com/alexhuot1), [dirkf](https://github.com/dirkf) +- **reddit**: [Fix subtitles extraction](https://github.com/yt-dlp/yt-dlp/commit/82f4f4444e26daf35b7302c406fe2312f78f619e) ([#10006](https://github.com/yt-dlp/yt-dlp/issues/10006)) by [kclauhk](https://github.com/kclauhk) +- **soundcloud** + - [Add `formats` extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/beaf832c7a9d57833f365ce18f6115b88071b296) ([#10004](https://github.com/yt-dlp/yt-dlp/issues/10004)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) + - [Extract `genres`](https://github.com/yt-dlp/yt-dlp/commit/231c2eacc41b06b65c63edf94c0d04768a5da607) ([#9821](https://github.com/yt-dlp/yt-dlp/issues/9821)) by [bashonly](https://github.com/bashonly) +- **taptap**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/63b569bc5e7d461753637a20ad84a575adee4c0a) ([#9776](https://github.com/yt-dlp/yt-dlp/issues/9776)) by [c-basalt](https://github.com/c-basalt) +- **tele5**: [Overhaul extractor](https://github.com/yt-dlp/yt-dlp/commit/c92e4e625e9e6bbbbf8e3b20c3e7ebe57c16072d) ([#10024](https://github.com/yt-dlp/yt-dlp/issues/10024)) by [bashonly](https://github.com/bashonly) +- **theatercomplextown**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/8056a3026ed6ec6a6d0ed56fdd7ebcd16e928341) ([#9754](https://github.com/yt-dlp/yt-dlp/issues/9754)) by [bashonly](https://github.com/bashonly) +- **tiktok** + - [Add `device_id` extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/3584b8390bd21c0393a3079eeee71aed56a1c1d8) ([#9951](https://github.com/yt-dlp/yt-dlp/issues/9951)) by [bashonly](https://github.com/bashonly) + - [Extract all web formats](https://github.com/yt-dlp/yt-dlp/commit/4ccd73fea0f6f4be343e1ec7f22dd03799addcf8) ([#9960](https://github.com/yt-dlp/yt-dlp/issues/9960)) by [bashonly](https://github.com/bashonly) + - [Extract via mobile API only if extractor-arg is passed](https://github.com/yt-dlp/yt-dlp/commit/41ba4a808b597a3afed78c89675a30deb6844450) ([#9938](https://github.com/yt-dlp/yt-dlp/issues/9938)) by [bashonly](https://github.com/bashonly) + - [Fix subtitles extraction](https://github.com/yt-dlp/yt-dlp/commit/eef1e9f44ff14c5e65b759bb1eafa3946cdaf719) ([#9961](https://github.com/yt-dlp/yt-dlp/issues/9961)) by [bashonly](https://github.com/bashonly) + - collection: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/119d41f27061d220d276a2d38cfc8d873437452a) ([#9986](https://github.com/yt-dlp/yt-dlp/issues/9986)) by [bashonly](https://github.com/bashonly), [imanoreotwe](https://github.com/imanoreotwe) + - user: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/347f13dd9bccc2b4db3ea25689410d45d8370ed4) ([#9661](https://github.com/yt-dlp/yt-dlp/issues/9661)) by [bashonly](https://github.com/bashonly) +- **tv5monde**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6db96268c521e945d42649607db1574f5d92e082) ([#9143](https://github.com/yt-dlp/yt-dlp/issues/9143)) by [alard](https://github.com/alard), [seproDev](https://github.com/seproDev) +- **twitter** + - [Fix auth for x.com migration](https://github.com/yt-dlp/yt-dlp/commit/3e35aa32c74bc108375be8c8b6b3bfc90dfff1b4) ([#9952](https://github.com/yt-dlp/yt-dlp/issues/9952)) by [bashonly](https://github.com/bashonly) + - [Support x.com URLs](https://github.com/yt-dlp/yt-dlp/commit/4813173e4544f125d6f2afc31e600727d761b8dd) ([#9926](https://github.com/yt-dlp/yt-dlp/issues/9926)) by [bashonly](https://github.com/bashonly) +- **vk**: [Improve format extraction](https://github.com/yt-dlp/yt-dlp/commit/df5c9e733aaba703cf285c0372b6d61629330c82) ([#9885](https://github.com/yt-dlp/yt-dlp/issues/9885)) by [seproDev](https://github.com/seproDev) +- **wrestleuniverse**: [Avoid partial stream formats](https://github.com/yt-dlp/yt-dlp/commit/c4853655cb9a793129280806af643de43c48f4d5) ([#9800](https://github.com/yt-dlp/yt-dlp/issues/9800)) by [bashonly](https://github.com/bashonly) +- **xiaohongshu**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/a2e9031605d87c469be9ce98dbbdf4960b727338) ([#9646](https://github.com/yt-dlp/yt-dlp/issues/9646)) by [HobbyistDev](https://github.com/HobbyistDev) +- **xvideos**: quickies: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/b207d26f83fb8ab0ce56df74dff43ff583a3264f) ([#9834](https://github.com/yt-dlp/yt-dlp/issues/9834)) by [JakeFinley96](https://github.com/JakeFinley96) +- **youporn**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/351368cb9a6731b886a58f5a10fd6b302bbe47be) ([#8827](https://github.com/yt-dlp/yt-dlp/issues/8827)) by [The-MAGI](https://github.com/The-MAGI) +- **youtube** + - [Add `mediaconnect` client](https://github.com/yt-dlp/yt-dlp/commit/cf212d0a331aba05c32117573f760cdf3af8c62f) ([#9546](https://github.com/yt-dlp/yt-dlp/issues/9546)) by [clienthax](https://github.com/clienthax) + - [Extract upload timestamp if available](https://github.com/yt-dlp/yt-dlp/commit/96a134dea6397a5f2131947c427aac52c8b4e677) ([#9856](https://github.com/yt-dlp/yt-dlp/issues/9856)) by [coletdjnz](https://github.com/coletdjnz) + - [Fix comments extraction](https://github.com/yt-dlp/yt-dlp/commit/8e15177b4113c355989881e4e030f695a9b59c3a) ([#9775](https://github.com/yt-dlp/yt-dlp/issues/9775)) by [bbilly1](https://github.com/bbilly1), [jakeogh](https://github.com/jakeogh), [minamotorin](https://github.com/minamotorin), [shoxie007](https://github.com/shoxie007) + - [Remove `android` from default clients](https://github.com/yt-dlp/yt-dlp/commit/12d8ea8246fa901de302ff5cc748caddadc82f41) ([#9553](https://github.com/yt-dlp/yt-dlp/issues/9553)) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz) +- **zenyandex**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/c4b87dd885ee5391e5f481e7c8bd550a7c543623) ([#9813](https://github.com/yt-dlp/yt-dlp/issues/9813)) by [src-tinkerer](https://github.com/src-tinkerer) + +#### Networking changes +- [Add `extensions` attribute to `Response`](https://github.com/yt-dlp/yt-dlp/commit/bec9a59e8ec82c18e3bf9268eaa436793dd52e35) ([#9756](https://github.com/yt-dlp/yt-dlp/issues/9756)) by [bashonly](https://github.com/bashonly) +- **Request Handler** + - requests + - [Patch support for `requests` 2.32.2+](https://github.com/yt-dlp/yt-dlp/commit/3f7999533ebe41c2a579d91b4e4cb211cfcd3bc0) ([#9992](https://github.com/yt-dlp/yt-dlp/issues/9992)) by [Grub4K](https://github.com/Grub4K) + - [Update to `requests` 2.32.0](https://github.com/yt-dlp/yt-dlp/commit/c36513f1be2ef3d3cec864accbffda1afaa06ffd) ([#9980](https://github.com/yt-dlp/yt-dlp/issues/9980)) by [coletdjnz](https://github.com/coletdjnz) + +#### Misc. changes +- [Add `hatch`, `ruff`, `pre-commit` and improve dev docs](https://github.com/yt-dlp/yt-dlp/commit/e897bd8292a41999cf51dba91b390db5643c72db) ([#7409](https://github.com/yt-dlp/yt-dlp/issues/7409)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K), [seproDev](https://github.com/seproDev) +- **build** + - [Migrate `linux_exe` to static musl builds](https://github.com/yt-dlp/yt-dlp/commit/ac817bc83efd939dca3e40c4b527d0ccfc77172b) ([#9811](https://github.com/yt-dlp/yt-dlp/issues/9811)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) + - [Normalize `curl_cffi` group to `curl-cffi`](https://github.com/yt-dlp/yt-dlp/commit/02483bea1c4dbe1bace8ca4d19700104fbb8a00f) ([#9698](https://github.com/yt-dlp/yt-dlp/issues/9698)) by [bashonly](https://github.com/bashonly) (With fixes in [89f535e](https://github.com/yt-dlp/yt-dlp/commit/89f535e2656964b4061c25a7739d4d6ba0a30568)) + - [Run `macos_legacy` job on `macos-12`](https://github.com/yt-dlp/yt-dlp/commit/1a366403d9c26b992faa77e00f4d02ead57559e3) ([#9804](https://github.com/yt-dlp/yt-dlp/issues/9804)) by [bashonly](https://github.com/bashonly) + - [`macos` job requires `setuptools<70`](https://github.com/yt-dlp/yt-dlp/commit/78c57cc0e0998b8ed90e4306f410aa4be4115cd7) ([#9993](https://github.com/yt-dlp/yt-dlp/issues/9993)) by [bashonly](https://github.com/bashonly) +- **cleanup** + - [Remove questionable extractors](https://github.com/yt-dlp/yt-dlp/commit/01395a34345d1c6ba1b73ca92f94dd200dc45341) ([#9911](https://github.com/yt-dlp/yt-dlp/issues/9911)) by [seproDev](https://github.com/seproDev) + - Miscellaneous: [5c019f6](https://github.com/yt-dlp/yt-dlp/commit/5c019f6328ad40d66561eac3c4de0b3cd070d0f6), [ae2af11](https://github.com/yt-dlp/yt-dlp/commit/ae2af1104f80caf2f47544763a33db2c17a3e1de) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K), [seproDev](https://github.com/seproDev) +- **test** + - [Add HTTP proxy tests](https://github.com/yt-dlp/yt-dlp/commit/3c7a287e281d9f9a353dce8902ff78a84c24a040) ([#9578](https://github.com/yt-dlp/yt-dlp/issues/9578)) by [coletdjnz](https://github.com/coletdjnz) + - [Fix connect timeout test](https://github.com/yt-dlp/yt-dlp/commit/53b4d44f55cca66ac33dab092ef2a30b1164b684) ([#9906](https://github.com/yt-dlp/yt-dlp/issues/9906)) by [coletdjnz](https://github.com/coletdjnz) + ### 2024.04.09 #### Important changes diff --git a/README.md b/README.md index 52c80f26e..e757567b5 100644 --- a/README.md +++ b/README.md @@ -665,16 +665,17 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git The name of the browser to load cookies from. Currently supported browsers are: brave, chrome, chromium, edge, firefox, - opera, safari, vivaldi, whale. Optionally, the - KEYRING used for decrypting Chromium cookies - on Linux, the name/path of the PROFILE to - load cookies from, and the CONTAINER name - (if Firefox) ("none" for no container) can - be given with their respective seperators. - By default, all containers of the most - recently accessed profile are used. - Currently supported keyrings are: basictext, - gnomekeyring, kwallet, kwallet5, kwallet6 + opera, safari, vivaldi, whale. Optionally, + the KEYRING used for decrypting Chromium + cookies on Linux, the name/path of the + PROFILE to load cookies from, and the + CONTAINER name (if Firefox) ("none" for no + container) can be given with their + respective seperators. By default, all + containers of the most recently accessed + profile are used. Currently supported + keyrings are: basictext, gnomekeyring, + kwallet, kwallet5, kwallet6 --no-cookies-from-browser Do not load cookies from browser (default) --cache-dir DIR Location in the filesystem where yt-dlp can store some downloaded information (such as diff --git a/supportedsites.md b/supportedsites.md index ba77c0feb..387395613 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -14,7 +14,6 @@ - **6play** - **7plus** - **8tracks** - - **91porn** - **9c9media** - **9gag**: 9GAG - **9News** @@ -220,7 +219,7 @@ - **BusinessInsider** - **BuzzFeed** - **BYUtv**: (**Currently broken**) - - **CableAV** + - **CaffeineTV** - **Callin** - **Caltrans** - **CAM4** @@ -333,6 +332,8 @@ - **DailyWirePodcast** - **damtomo:record** - **damtomo:video** + - **dangalplay**: [*dangalplay*](## "netrc machine") + - **dangalplay:season**: [*dangalplay*](## "netrc machine") - **daum.net** - **daum.net:clip** - **daum.net:playlist** @@ -396,7 +397,6 @@ - **EinsUndEinsTV**: [*1und1tv*](## "netrc machine") - **EinsUndEinsTVLive**: [*1und1tv*](## "netrc machine") - **EinsUndEinsTVRecordings**: [*1und1tv*](## "netrc machine") - - **Einthusan** - **eitb.tv** - **ElementorEmbed** - **Elonet** @@ -498,6 +498,7 @@ - **GameStar** - **Gaskrank** - **Gazeta**: (**Currently broken**) + - **GBNews**: GB News clips, features and live streams - **GDCVault**: [*gdcvault*](## "netrc machine") (**Currently broken**) - **GediDigital** - **gem.cbc.ca**: [*cbcgem*](## "netrc machine") @@ -527,6 +528,7 @@ - **GMANetworkVideo** - **Go** - **GoDiscovery** + - **GodResource** - **GodTube**: (**Currently broken**) - **Gofile** - **Golem** @@ -630,11 +632,11 @@ - **iwara:user**: [*iwara*](## "netrc machine") - **Ixigua** - **Izlesene** - - **Jable** - - **JablePlaylist** - **Jamendo** - **JamendoAlbum** - **JeuxVideo**: (**Currently broken**) + - **jiocinema**: [*jiocinema*](## "netrc machine") + - **jiocinema:series**: [*jiocinema*](## "netrc machine") - **jiosaavn:album** - **jiosaavn:playlist** - **jiosaavn:song** @@ -974,6 +976,7 @@ - **NRKTVSeason** - **NRKTVSeries** - **NRLTV**: (**Currently broken**) + - **nts.live** - **ntv.ru** - **NubilesPorn**: [*nubiles-porn*](## "netrc machine") - **nuum:live** @@ -1015,7 +1018,6 @@ - **orf:on** - **orf:podcast** - **orf:radio** - - **orf:tvthek**: ORF TVthek - **OsnatelTV**: [*osnateltv*](## "netrc machine") - **OsnatelTVLive**: [*osnateltv*](## "netrc machine") - **OsnatelTVRecordings**: [*osnateltv*](## "netrc machine") @@ -1394,6 +1396,10 @@ - **SztvHu** - **t-online.de**: (**Currently broken**) - **Tagesschau**: (**Currently broken**) + - **TapTapApp** + - **TapTapAppIntl** + - **TapTapMoment** + - **TapTapPostIntl** - **Tass**: (**Currently broken**) - **TBS** - **TBSJPEpisode** @@ -1412,7 +1418,7 @@ - **TedSeries** - **TedTalk** - **Tele13** - - **Tele5**: (**Currently broken**) + - **Tele5** - **TeleBruxelles** - **TelecaribePlay** - **Telecinco**: telecinco.es, cuatro.com and mediaset.es @@ -1452,11 +1458,12 @@ - **ThreeSpeak** - **ThreeSpeakUser** - **TikTok** + - **tiktok:collection** - **tiktok:effect**: (**Currently broken**) - **tiktok:live** - **tiktok:sound**: (**Currently broken**) - **tiktok:tag**: (**Currently broken**) - - **tiktok:user**: (**Currently broken**) + - **tiktok:user** - **TLC** - **TMZ** - **TNAFlix** @@ -1501,7 +1508,7 @@ - **tv2play.hu** - **tv2playseries.hu** - **TV4**: tv4.se and tv4play.se - - **TV5MondePlus**: TV5MONDE+ + - **TV5MONDE** - **tv5unis** - **tv5unis:video** - **tv8.it** @@ -1639,8 +1646,6 @@ - **voicy**: (**Currently broken**) - **voicy:channel**: (**Currently broken**) - **VolejTV** - - **Voot**: [*voot*](## "netrc machine") (**Currently broken**) - - **VootSeries**: [*voot*](## "netrc machine") (**Currently broken**) - **VoxMedia** - **VoxMediaVolume** - **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl @@ -1715,10 +1720,10 @@ - **wykop:​post:comment** - **Xanimu** - **XboxClips** - - **XFileShare**: XFileShare based sites: Aparat, ClipWatching, GoUnlimited, GoVid, HolaVid, Streamty, TheVideoBee, Uqload, VidBom, vidlo, VidLocker, VidShare, VUp, WolfStream, XVideoSharing - **XHamster** - **XHamsterEmbed** - **XHamsterUser** + - **XiaoHongShu**: 小红书 - **ximalaya**: 喜马拉雅FM - **ximalaya:album**: 喜马拉雅FM 专辑 - **xinpianchang**: xinpianchang.com (**Currently broken**) @@ -1749,8 +1754,12 @@ - **YouNowLive** - **YouNowMoment** - **YouPorn** - - **YourPorn** - - **YourUpload** + - **YouPornCategory**: YouPorn category, with sorting, filtering and pagination + - **YouPornChannel**: YouPorn channel, with sorting and pagination + - **YouPornCollection**: YouPorn collection (user playlist), with sorting and pagination + - **YouPornStar**: YouPorn Pornstar, with description, sorting and pagination + - **YouPornTag**: YouPorn tag (porntags), with sorting, filtering and pagination + - **YouPornVideos**: YouPorn video (browse) playlists, with sorting, filtering and pagination - **youtube**: YouTube - **youtube:clip** - **youtube:favorites**: YouTube liked videos; ":ytfav" keyword (requires cookies) diff --git a/yt_dlp/version.py b/yt_dlp/version.py index 22c2c048d..415dc0eaf 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,8 +1,8 @@ # Autogenerated by devscripts/update-version.py -__version__ = '2024.04.09' +__version__ = '2024.05.26' -RELEASE_GIT_HEAD = 'ff07792676f404ffff6ee61b5638c9dc1a33a37a' +RELEASE_GIT_HEAD = 'ae2af1104f80caf2f47544763a33db2c17a3e1de' VARIANT = None @@ -12,4 +12,4 @@ CHANNEL = 'stable' ORIGIN = 'yt-dlp/yt-dlp' -_pkg_version = '2024.04.09' +_pkg_version = '2024.05.26' From 26603d0b34898818992bee4598e0607c07059511 Mon Sep 17 00:00:00 2001 From: sepro <4618135+seproDev@users.noreply.github.com> Date: Mon, 27 May 2024 00:06:34 +0200 Subject: [PATCH 36/48] [ie] Fix parsing of base URL in SMIL manifest (#9225) Authored by: seproDev --- yt_dlp/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index b99b7e5ab..1d2c443c0 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -2451,7 +2451,7 @@ class InfoExtractor: }) continue - src_url = src if src.startswith('http') else urllib.parse.urljoin(base, src) + src_url = src if src.startswith('http') else urllib.parse.urljoin(f'{base}/', src) src_url = src_url.strip() if proto == 'm3u8' or src_ext == 'm3u8': From ae2194e1dd4a99d32eb3cab7c48a0ff03101ef3b Mon Sep 17 00:00:00 2001 From: sepro <4618135+seproDev@users.noreply.github.com> Date: Mon, 27 May 2024 01:24:03 +0200 Subject: [PATCH 37/48] [ie/Piksel] Update domain (#9223) Authored by: seproDev --- yt_dlp/extractor/piksel.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/yt_dlp/extractor/piksel.py b/yt_dlp/extractor/piksel.py index 8870d7b99..02ae2fe1a 100644 --- a/yt_dlp/extractor/piksel.py +++ b/yt_dlp/extractor/piksel.py @@ -25,29 +25,31 @@ class PikselIE(InfoExtractor): )| (?:api|player)\.multicastmedia| (?:api-ovp|player)\.piksel - )\.com| + )\.(?:com|tech)| (?: mz-edge\.stream\.co| movie-s\.nhk\.or )\.jp| vidego\.baltimorecity\.gov )/v/(?:refid/(?P<refid>[^/]+)/prefid/)?(?P<id>[\w-]+)''' - _EMBED_REGEX = [r'<iframe[^>]+src=["\'](?P<url>(?:https?:)?//player\.piksel\.com/v/[a-z0-9]+)'] + _EMBED_REGEX = [r'<iframe[^>]+src=["\'](?P<url>(?:https?:)?//player\.piksel\.(?:com|tech)/v/[a-z0-9]+)'] _TESTS = [ { - 'url': 'http://player.piksel.com/v/ums2867l', + 'url': 'http://player.piksel.tech/v/ums2867l', 'md5': '34e34c8d89dc2559976a6079db531e85', 'info_dict': { 'id': 'ums2867l', 'ext': 'mp4', 'title': 'GX-005 with Caption', 'timestamp': 1481335659, - 'upload_date': '20161210' + 'upload_date': '20161210', + 'description': '', + 'thumbnail': 'https://thumbs.piksel.tech/thumbs/aid/t1488331553/3238987.jpg?w=640&h=480', } }, { # Original source: http://www.uscourts.gov/cameras-courts/state-washington-vs-donald-j-trump-et-al - 'url': 'https://player.piksel.com/v/v80kqp41', + 'url': 'https://player.piksel.tech/v/v80kqp41', 'md5': '753ddcd8cc8e4fa2dda4b7be0e77744d', 'info_dict': { 'id': 'v80kqp41', @@ -55,7 +57,8 @@ class PikselIE(InfoExtractor): 'title': 'WAW- State of Washington vs. Donald J. Trump, et al', 'description': 'State of Washington vs. Donald J. Trump, et al, Case Number 17-CV-00141-JLR, TRO Hearing, Civil Rights Case, 02/3/2017, 1:00 PM (PST), Seattle Federal Courthouse, Seattle, WA, Judge James L. Robart presiding.', 'timestamp': 1486171129, - 'upload_date': '20170204' + 'upload_date': '20170204', + 'thumbnail': 'https://thumbs.piksel.tech/thumbs/aid/t1495569155/3279887.jpg?w=640&h=360', } }, { @@ -65,7 +68,7 @@ class PikselIE(InfoExtractor): } ] - def _call_api(self, app_token, resource, display_id, query, host='https://player.piksel.com', fatal=True): + def _call_api(self, app_token, resource, display_id, query, host='https://player.piksel.tech', fatal=True): url = urljoin(host, f'/ws/ws_{resource}/api/{app_token}/mode/json/apiv/5') response = traverse_obj( self._download_json(url, display_id, query=query, fatal=fatal), ('response', {dict})) or {} @@ -146,7 +149,7 @@ class PikselIE(InfoExtractor): smil_url = dict_get(video_data, ['httpSmil', 'hdSmil', 'rtmpSmil']) if smil_url: - transform_source = None + transform_source = lambda x: x.replace('src="/', 'src="') if ref_id == 'nhkworld': # TODO: figure out if this is something to be fixed in urljoin, # _parse_smil_formats or keep it here From c53c2e40fde8f2e15c7c62f8ca1a5d9e90ddc079 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sun, 26 May 2024 23:22:46 -0500 Subject: [PATCH 38/48] [ie/tiktok:user] Fix extraction loop (#10035) Closes #10033 Authored by: bashonly --- yt_dlp/extractor/tiktok.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index ab8efc19e..7bcfdedbe 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -940,6 +940,7 @@ class TikTokUserIE(TikTokBaseIE): def _entries(self, sec_uid, user_name): display_id = user_name or sec_uid + seen_ids = set() cursor = int(time.time() * 1E3) for page in itertools.count(1): @@ -949,6 +950,9 @@ class TikTokUserIE(TikTokBaseIE): for video in traverse_obj(response, ('itemList', lambda _, v: v['id'])): video_id = video['id'] + if video_id in seen_ids: + continue + seen_ids.add(video_id) webpage_url = self._create_url(display_id, video_id) yield self.url_result( webpage_url, TikTokIE, @@ -956,8 +960,8 @@ class TikTokUserIE(TikTokBaseIE): old_cursor = cursor cursor = traverse_obj( - response, ('itemList', -1, 'createTime', {functools.partial(int_or_none, invscale=1E3)})) - if not cursor: + response, ('itemList', -1, 'createTime', {lambda x: int(x * 1E3)})) + if not cursor or old_cursor == cursor: # User may not have posted within this ~1 week lookback, so manually adjust cursor cursor = old_cursor - 7 * 86_400_000 # In case 'hasMorePrevious' is wrong, break if we have gone back before TikTok existed From 5e3e19c93c52830da98d9d1ed84ea7a559efefbd Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 27 May 2024 16:46:07 -0500 Subject: [PATCH 39/48] [cleanup] Misc (#10043) Authored by: bashonly --- README.md | 3 +++ yt_dlp/options.py | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index e757567b5..e8cd6d3a0 100644 --- a/README.md +++ b/README.md @@ -401,6 +401,9 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git --impersonate CLIENT[:OS] Client to impersonate for requests. E.g. chrome, chrome-110, chrome:windows-10. Pass --impersonate="" to impersonate any client. + Note that forcing impersonation for all + requests may have a detrimental impact on + download speed and stability --list-impersonate-targets List available clients to impersonate. -4, --force-ipv4 Make all connections via IPv4 -6, --force-ipv6 Make all connections via IPv6 diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 997b575cd..9615bfbaa 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -520,7 +520,8 @@ def create_parser(): metavar='CLIENT[:OS]', dest='impersonate', default=None, help=( 'Client to impersonate for requests. E.g. chrome, chrome-110, chrome:windows-10. ' - 'Pass --impersonate="" to impersonate any client.'), + 'Pass --impersonate="" to impersonate any client. Note that forcing impersonation ' + 'for all requests may have a detrimental impact on download speed and stability'), ) network.add_option( '--list-impersonate-targets', From 12b248ce60be1aa1362edd839d915bba70dbee4b Mon Sep 17 00:00:00 2001 From: trueauracoral <87541524+trueauracoral@users.noreply.github.com> Date: Mon, 27 May 2024 17:24:01 -0500 Subject: [PATCH 40/48] [ie/peertube] Support livestreams (#10044) Closes #2055 Authored by: trueauracoral, bashonly --- yt_dlp/extractor/peertube.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/peertube.py b/yt_dlp/extractor/peertube.py index b7919c073..fb4d02562 100644 --- a/yt_dlp/extractor/peertube.py +++ b/yt_dlp/extractor/peertube.py @@ -1470,11 +1470,15 @@ class PeerTubeIE(InfoExtractor): title = video['name'] - formats = [] + formats, is_live = [], False files = video.get('files') or [] for playlist in (video.get('streamingPlaylists') or []): if not isinstance(playlist, dict): continue + if playlist_url := url_or_none(playlist.get('playlistUrl')): + is_live = True + formats.extend(self._extract_m3u8_formats( + playlist_url, video_id, fatal=False, live=True)) playlist_files = playlist.get('files') if not (playlist_files and isinstance(playlist_files, list)): continue @@ -1498,6 +1502,7 @@ class PeerTubeIE(InfoExtractor): f['vcodec'] = 'none' else: f['fps'] = int_or_none(file_.get('fps')) + is_live = False formats.append(f) description = video.get('description') @@ -1555,6 +1560,7 @@ class PeerTubeIE(InfoExtractor): 'categories': categories, 'formats': formats, 'subtitles': subtitles, + 'is_live': is_live, 'webpage_url': webpage_url, } From 111b61ddef305584d45a48e7b7c73ffcedf062a2 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 27 May 2024 22:35:55 +0000 Subject: [PATCH 41/48] Release 2024.05.27 Created by: bashonly :ci skip all :ci run dl --- CONTRIBUTORS | 1 + Changelog.md | 11 +++++++++++ yt_dlp/version.py | 6 +++--- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index b2a476bea..e0d1668ee 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -630,3 +630,4 @@ TuxCoder voidful vtexier WyohKnott +trueauracoral diff --git a/Changelog.md b/Changelog.md index 0d27f1a92..267330208 100644 --- a/Changelog.md +++ b/Changelog.md @@ -4,6 +4,17 @@ # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master --> +### 2024.05.27 + +#### Extractor changes +- [Fix parsing of base URL in SMIL manifest](https://github.com/yt-dlp/yt-dlp/commit/26603d0b34898818992bee4598e0607c07059511) ([#9225](https://github.com/yt-dlp/yt-dlp/issues/9225)) by [seproDev](https://github.com/seproDev) +- **peertube**: [Support livestreams](https://github.com/yt-dlp/yt-dlp/commit/12b248ce60be1aa1362edd839d915bba70dbee4b) ([#10044](https://github.com/yt-dlp/yt-dlp/issues/10044)) by [bashonly](https://github.com/bashonly), [trueauracoral](https://github.com/trueauracoral) +- **piksel**: [Update domain](https://github.com/yt-dlp/yt-dlp/commit/ae2194e1dd4a99d32eb3cab7c48a0ff03101ef3b) ([#9223](https://github.com/yt-dlp/yt-dlp/issues/9223)) by [seproDev](https://github.com/seproDev) +- **tiktok**: user: [Fix extraction loop](https://github.com/yt-dlp/yt-dlp/commit/c53c2e40fde8f2e15c7c62f8ca1a5d9e90ddc079) ([#10035](https://github.com/yt-dlp/yt-dlp/issues/10035)) by [bashonly](https://github.com/bashonly) + +#### Misc. changes +- **cleanup**: Miscellaneous: [5e3e19c](https://github.com/yt-dlp/yt-dlp/commit/5e3e19c93c52830da98d9d1ed84ea7a559efefbd) by [bashonly](https://github.com/bashonly) + ### 2024.05.26 #### Core changes diff --git a/yt_dlp/version.py b/yt_dlp/version.py index 415dc0eaf..a90b288c9 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,8 +1,8 @@ # Autogenerated by devscripts/update-version.py -__version__ = '2024.05.26' +__version__ = '2024.05.27' -RELEASE_GIT_HEAD = 'ae2af1104f80caf2f47544763a33db2c17a3e1de' +RELEASE_GIT_HEAD = '12b248ce60be1aa1362edd839d915bba70dbee4b' VARIANT = None @@ -12,4 +12,4 @@ CHANNEL = 'stable' ORIGIN = 'yt-dlp/yt-dlp' -_pkg_version = '2024.05.26' +_pkg_version = '2024.05.27' From bef9a9e5361fd7a72e21d0f1a8c8afb70d89e8c5 Mon Sep 17 00:00:00 2001 From: Ben Galliart <bgallia@gmail.com> Date: Tue, 28 May 2024 23:25:05 -0500 Subject: [PATCH 42/48] [ie/TubiTv] Fix extractor (#9975) Closes #9937 Authored by: chilinux --- yt_dlp/extractor/tubitv.py | 101 ++++++++++++++++++++----------------- 1 file changed, 55 insertions(+), 46 deletions(-) diff --git a/yt_dlp/extractor/tubitv.py b/yt_dlp/extractor/tubitv.py index bd46bc363..78be86d58 100644 --- a/yt_dlp/extractor/tubitv.py +++ b/yt_dlp/extractor/tubitv.py @@ -7,33 +7,45 @@ from ..utils import ( int_or_none, js_to_json, traverse_obj, + url_or_none, urlencode_postdata, ) class TubiTvIE(InfoExtractor): - _VALID_URL = r'''(?x) - (?: - tubitv:| - https?://(?:www\.)?tubitv\.com/(?:video|movies|tv-shows)/ - ) - (?P<id>[0-9]+)''' + _VALID_URL = r'https?://(?:www\.)?tubitv\.com/(?P<type>video|movies|tv-shows)/(?P<id>\d+)' _LOGIN_URL = 'http://tubitv.com/login' _NETRC_MACHINE = 'tubitv' - _GEO_COUNTRIES = ['US'] _TESTS = [{ - 'url': 'https://tubitv.com/movies/383676/tracker', - 'md5': '566fa0f76870302d11af0de89511d3f0', + 'url': 'https://tubitv.com/movies/100004539/the-39-steps', 'info_dict': { - 'id': '383676', + 'id': '100004539', 'ext': 'mp4', - 'title': 'Tracker', - 'description': 'md5:ff320baf43d0ad2655e538c1d5cd9706', - 'uploader_id': 'f866e2677ea2f0dff719788e4f7f9195', - 'release_year': 2010, + 'title': 'The 39 Steps', + 'description': 'md5:bb2f2dd337f0dc58c06cb509943f54c8', + 'uploader_id': 'abc2558d54505d4f0f32be94f2e7108c', + 'release_year': 1935, 'thumbnail': r're:^https?://.+\.(jpe?g|png)$', - 'duration': 6122, + 'duration': 5187, }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://tubitv.com/tv-shows/554628/s01-e01-rise-of-the-snakes', + 'info_dict': { + 'id': '554628', + 'ext': 'mp4', + 'title': 'S01:E01 - Rise of the Snakes', + 'description': 'md5:ba136f586de53af0372811e783a3f57d', + 'episode': 'Rise of the Snakes', + 'episode_number': 1, + 'season': 'Season 1', + 'season_number': 1, + 'uploader_id': '2a9273e728c510d22aa5c57d0646810b', + 'release_year': 2011, + 'thumbnail': r're:^https?://.+\.(jpe?g|png)$', + 'duration': 1376, + }, + 'params': {'skip_download': 'm3u8'}, }, { 'url': 'http://tubitv.com/video/283829/the_comedian_at_the_friday', 'md5': '43ac06be9326f41912dc64ccf7a80320', @@ -81,45 +93,39 @@ class TubiTvIE(InfoExtractor): 'Login failed (invalid username/password)', expected=True) def _real_extract(self, url): - video_id = self._match_id(url) - video_data = self._download_json(f'https://tubitv.com/oz/videos/{video_id}/content', video_id, query={ - 'video_resources': ['dash', 'hlsv3', 'hlsv6', *self._UNPLAYABLE_FORMATS], - }) - title = video_data['title'] + video_id, video_type = self._match_valid_url(url).group('id', 'type') + webpage = self._download_webpage(f'https://tubitv.com/{video_type}/{video_id}/', video_id) + video_data = self._search_json( + r'window\.__data\s*=', webpage, 'data', video_id, + transform_source=js_to_json)['video']['byId'][video_id] formats = [] drm_formats = False - for resource in video_data['video_resources']: - if resource['type'] in ('dash', ): - formats += self._extract_mpd_formats(resource['manifest']['url'], video_id, mpd_id=resource['type'], fatal=False) - elif resource['type'] in ('hlsv3', 'hlsv6'): - formats += self._extract_m3u8_formats(resource['manifest']['url'], video_id, 'mp4', m3u8_id=resource['type'], fatal=False) - elif resource['type'] in self._UNPLAYABLE_FORMATS: + for resource in traverse_obj(video_data, ('video_resources', lambda _, v: url_or_none(v['manifest']['url']))): + resource_type = resource.get('type') + manifest_url = resource['manifest']['url'] + if resource_type == 'dash': + formats.extend(self._extract_mpd_formats(manifest_url, video_id, mpd_id=resource_type, fatal=False)) + elif resource_type in ('hlsv3', 'hlsv6'): + formats.extend(self._extract_m3u8_formats(manifest_url, video_id, 'mp4', m3u8_id=resource_type, fatal=False)) + elif resource_type in self._UNPLAYABLE_FORMATS: drm_formats = True + else: + self.report_warning(f'Skipping unknown resource type "{resource_type}"') if not formats and drm_formats: self.report_drm(video_id) elif not formats and not video_data.get('policy_match'): # policy_match is False if content was removed raise ExtractorError('This content is currently unavailable', expected=True) - thumbnails = [] - for thumbnail_url in video_data.get('thumbnails', []): - if not thumbnail_url: - continue - thumbnails.append({ - 'url': self._proto_relative_url(thumbnail_url), - }) - subtitles = {} - for sub in video_data.get('subtitles', []): - sub_url = sub.get('url') - if not sub_url: - continue + for sub in traverse_obj(video_data, ('subtitles', lambda _, v: url_or_none(v['url']))): subtitles.setdefault(sub.get('lang', 'English'), []).append({ - 'url': self._proto_relative_url(sub_url), + 'url': self._proto_relative_url(sub['url']), }) + title = traverse_obj(video_data, ('title', {str})) season_number, episode_number, episode_title = self._search_regex( r'^S(\d+):E(\d+) - (.+)', title, 'episode info', fatal=False, group=(1, 2, 3), default=(None, None, None)) @@ -128,18 +134,21 @@ class TubiTvIE(InfoExtractor): 'title': title, 'formats': formats, 'subtitles': subtitles, - 'thumbnails': thumbnails, - 'description': video_data.get('description'), - 'duration': int_or_none(video_data.get('duration')), - 'uploader_id': video_data.get('publisher_id'), - 'release_year': int_or_none(video_data.get('year')), 'season_number': int_or_none(season_number), 'episode_number': int_or_none(episode_number), - 'episode_title': episode_title + 'episode': episode_title, + **traverse_obj(video_data, { + 'description': ('description', {str}), + 'duration': ('duration', {int_or_none}), + 'uploader_id': ('publisher_id', {str}), + 'release_year': ('year', {int_or_none}), + 'thumbnails': ('thumbnails', ..., {url_or_none}, {'url': {self._proto_relative_url}}), + }), } class TubiTvShowIE(InfoExtractor): + _WORKING = False _VALID_URL = r'https?://(?:www\.)?tubitv\.com/series/[0-9]+/(?P<show_name>[^/?#]+)' _TESTS = [{ 'url': 'https://tubitv.com/series/3936/the-joy-of-painting-with-bob-ross?start=true', @@ -160,7 +169,7 @@ class TubiTvShowIE(InfoExtractor): if traverse_obj(show_json, ('byId', episode_id, 'type')) == 's': continue yield self.url_result( - 'tubitv:%s' % episode_id, + f'https://tubitv.com/tv-shows/{episode_id}/', ie=TubiTvIE.ie_key(), video_id=episode_id) def _real_extract(self, url): From 8b46ad4d8b8ee8c5472af0cde863baa89ca3f425 Mon Sep 17 00:00:00 2001 From: sepro <4618135+seproDev@users.noreply.github.com> Date: Wed, 29 May 2024 23:16:57 +0200 Subject: [PATCH 43/48] [ie/orf:on] Support segmented episodes (#10053) Closes #9930 Authored by: seproDev --- yt_dlp/extractor/orf.py | 139 +++++++++++++++++++++++++++++++--------- 1 file changed, 110 insertions(+), 29 deletions(-) diff --git a/yt_dlp/extractor/orf.py b/yt_dlp/extractor/orf.py index 3c837becd..039f33bd6 100644 --- a/yt_dlp/extractor/orf.py +++ b/yt_dlp/extractor/orf.py @@ -12,7 +12,9 @@ from ..utils import ( mimetype2ext, orderedSet, parse_age_limit, + parse_iso8601, remove_end, + str_or_none, strip_jsonp, try_call, unified_strdate, @@ -390,7 +392,7 @@ class ORFFM4StoryIE(InfoExtractor): class ORFONIE(InfoExtractor): IE_NAME = 'orf:on' - _VALID_URL = r'https?://on\.orf\.at/video/(?P<id>\d+)' + _VALID_URL = r'https?://on\.orf\.at/video/(?P<id>\d+)(?:/(?P<segment>\d+))?' _TESTS = [{ 'url': 'https://on.orf.at/video/14210000/school-of-champions-48', 'info_dict': { @@ -401,10 +403,14 @@ class ORFONIE(InfoExtractor): 'title': 'School of Champions (4/8)', 'description': 'md5:d09ad279fc2e8502611e7648484b6afd', 'media_type': 'episode', - 'timestamp': 1706472362, - 'upload_date': '20240128', + 'timestamp': 1706558922, + 'upload_date': '20240129', + 'release_timestamp': 1706472362, + 'release_date': '20240128', + 'modified_timestamp': 1712756663, + 'modified_date': '20240410', '_old_archive_ids': ['orftvthek 14210000'], - } + }, }, { 'url': 'https://on.orf.at/video/3220355', 'md5': 'f94d98e667cf9a3851317efb4e136662', @@ -418,18 +424,87 @@ class ORFONIE(InfoExtractor): 'media_type': 'episode', 'timestamp': 52916400, 'upload_date': '19710905', + 'release_timestamp': 52916400, + 'release_date': '19710905', + 'modified_timestamp': 1498536049, + 'modified_date': '20170627', '_old_archive_ids': ['orftvthek 3220355'], - } + }, + }, { + # Video with multiple segments selecting the second segment + 'url': 'https://on.orf.at/video/14226549/15639808/jugendbande-einbrueche-aus-langeweile', + 'md5': '90f4ebff86b4580837b8a361d0232a9e', + 'info_dict': { + 'id': '15639808', + 'ext': 'mp4', + 'duration': 97.707, + 'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0175/43/thumb_17442704_segments_highlight_teaser.jpg', + 'title': 'Jugendbande: Einbrüche aus Langeweile', + 'description': 'md5:193df0bf0d91cf16830c211078097120', + 'media_type': 'segment', + 'timestamp': 1715792400, + 'upload_date': '20240515', + 'modified_timestamp': 1715794394, + 'modified_date': '20240515', + '_old_archive_ids': ['orftvthek 15639808'], + }, + 'params': {'noplaylist': True}, + }, { + # Video with multiple segments and no combined version + 'url': 'https://on.orf.at/video/14227864/formel-1-grosser-preis-von-monaco-2024', + 'info_dict': { + '_type': 'multi_video', + 'id': '14227864', + 'duration': 18410.52, + 'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0176/04/thumb_17503881_segments_highlight_teaser.jpg', + 'title': 'Formel 1: Großer Preis von Monaco 2024', + 'description': 'md5:aeeb010710ccf70ce28ccb4482243d4f', + 'media_type': 'episode', + 'timestamp': 1716721200, + 'upload_date': '20240526', + 'release_timestamp': 1716721802, + 'release_date': '20240526', + 'modified_timestamp': 1716967501, + 'modified_date': '20240529', + }, + 'playlist_count': 42, + }, { + # Video with multiple segments, but with combined version + 'url': 'https://on.orf.at/video/14228172', + 'info_dict': { + 'id': '14228172', + 'ext': 'mp4', + 'duration': 3294.878, + 'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0176/17/thumb_17516455_segments_highlight_teaser.jpg', + 'title': 'Willkommen Österreich mit Stermann & Grissemann', + 'description': 'md5:5de034d033a9c27f989343be3bbd4839', + 'media_type': 'episode', + 'timestamp': 1716926584, + 'upload_date': '20240528', + 'release_timestamp': 1716919202, + 'release_date': '20240528', + 'modified_timestamp': 1716968045, + 'modified_date': '20240529', + '_old_archive_ids': ['orftvthek 14228172'], + }, }] - def _extract_video(self, video_id): - encrypted_id = base64.b64encode(f'3dSlfek03nsLKdj4Jsd{video_id}'.encode()).decode() - api_json = self._download_json( - f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', video_id) - - if traverse_obj(api_json, 'is_drm_protected'): - self.report_drm(video_id) + @staticmethod + def _parse_metadata(api_json): + return traverse_obj(api_json, { + 'id': ('id', {int}, {str_or_none}), + 'age_limit': ('age_classification', {parse_age_limit}), + 'duration': ('exact_duration', {functools.partial(float_or_none, scale=1000)}), + 'title': (('title', 'headline'), {str}), + 'description': (('description', 'teaser_text'), {str}), + 'media_type': ('video_type', {str}), + 'thumbnail': ('_embedded', 'image', 'public_urls', 'highlight_teaser', 'url', {url_or_none}), + 'timestamp': (('date', 'episode_date'), {parse_iso8601}), + 'release_timestamp': ('release_date', {parse_iso8601}), + 'modified_timestamp': ('updated_at', {parse_iso8601}), + }, get_all=False) + def _extract_video_info(self, video_id, api_json): formats, subtitles = [], {} for manifest_type in traverse_obj(api_json, ('sources', {dict.keys}, ...)): for manifest_url in traverse_obj(api_json, ('sources', manifest_type, ..., 'src', {url_or_none})): @@ -454,24 +529,30 @@ class ORFONIE(InfoExtractor): 'formats': formats, 'subtitles': subtitles, '_old_archive_ids': [make_archive_id('ORFTVthek', video_id)], - **traverse_obj(api_json, { - 'age_limit': ('age_classification', {parse_age_limit}), - 'duration': ('duration_second', {float_or_none}), - 'title': (('title', 'headline'), {str}), - 'description': (('description', 'teaser_text'), {str}), - 'media_type': ('video_type', {str}), - }, get_all=False), + **self._parse_metadata(api_json), } def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + video_id, segment_id = self._match_valid_url(url).group('id', 'segment') - return { - 'id': video_id, - 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None), - 'description': self._html_search_meta( - ['description', 'og:description', 'twitter:description'], webpage, default=None), - **self._search_json_ld(webpage, video_id, fatal=False), - **self._extract_video(video_id), - } + encrypted_id = base64.b64encode(f'3dSlfek03nsLKdj4Jsd{video_id}'.encode()).decode() + api_json = self._download_json( + f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', video_id) + + if traverse_obj(api_json, 'is_drm_protected'): + self.report_drm(video_id) + + segments = traverse_obj(api_json, ('_embedded', 'segments', lambda _, v: v['id'])) + selected_segment = traverse_obj(segments, (lambda _, v: str(v['id']) == segment_id, any)) + + # selected_segment will be falsy if input URL did not include a valid segment_id + if selected_segment and not self._yes_playlist(video_id, segment_id, playlist_label='episode', video_label='segment'): + return self._extract_video_info(segment_id, selected_segment) + + # Even some segmented videos have an unsegmented version available in API response root + if not traverse_obj(api_json, ('sources', ..., ..., 'src', {url_or_none})): + return self.playlist_result( + (self._extract_video_info(str(segment['id']), segment) for segment in segments), + video_id, **self._parse_metadata(api_json), multi_video=True) + + return self._extract_video_info(video_id, api_json) From 03334d639d5282cd4107edb32c623ba400262fc4 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Thu, 30 May 2024 13:53:37 -0500 Subject: [PATCH 44/48] [build] Use `macos-12` image for `yt-dlp_macos` (#10063) Ref: https://github.blog/changelog/2024-05-20-actions-upcoming-changes-to-github-hosted-macos-runners/ Authored by: bashonly --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 55cf3b3a2..e3896e9c9 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -237,7 +237,7 @@ jobs: macos: needs: process if: inputs.macos - runs-on: macos-11 + runs-on: macos-12 steps: - uses: actions/checkout@v4 From 5fdd13006a1c5d78642c8d3c4c7df0448273c2ae Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Thu, 30 May 2024 17:34:02 -0500 Subject: [PATCH 45/48] [build] Bump Pyinstaller to `>=6.7.0` for all builds (#10069) Ref: https://github.com/pyinstaller/pyinstaller/issues/8554 Authored by: bashonly, seproDev Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com> --- .github/workflows/build.yml | 26 +++++++++++++++++++------- pyproject.toml | 5 ++--- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e3896e9c9..9a1a22e8f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -260,11 +260,23 @@ jobs: --pre -d curl_cffi_whls \ -r requirements.txt done + ( # Overwrite x86_64-only libs with fat/universal2 libs or else Pyinstaller will do the opposite + # See https://github.com/yt-dlp/yt-dlp/pull/10069 + cd curl_cffi_whls + mkdir -p curl_cffi/.dylibs + python_libdir=$(python3 -c 'import sys; from pathlib import Path; print(Path(sys.path[1]).parent)') + for dylib in lib{ssl,crypto}.3.dylib; do + cp "${python_libdir}/${dylib}" "curl_cffi/.dylibs/${dylib}" + for wheel in curl_cffi*macos*x86_64.whl; do + zip "${wheel}" "curl_cffi/.dylibs/${dylib}" + done + done + ) python3 -m delocate.cmd.delocate_fuse curl_cffi_whls/curl_cffi*.whl -w curl_cffi_universal2 python3 -m delocate.cmd.delocate_fuse curl_cffi_whls/cffi*.whl -w curl_cffi_universal2 cd curl_cffi_universal2 - for wheel in *cffi*.whl; do mv -n -- "${wheel}" "${wheel/x86_64/universal2}"; done - python3 -m pip install -U --user *cffi*.whl + for wheel in ./*cffi*.whl; do mv -n -- "${wheel}" "${wheel/x86_64/universal2}"; done + python3 -m pip install -U --user ./*cffi*.whl - name: Prepare run: | @@ -311,7 +323,7 @@ jobs: # Hack to get the latest patch version. Uncomment if needed #brew install python@3.10 #export PYTHON_VERSION=$( $(brew --prefix)/opt/python@3.10/bin/python3 --version | cut -d ' ' -f 2 ) - curl https://www.python.org/ftp/python/${PYTHON_VERSION}/python-${PYTHON_VERSION}-macos11.pkg -o "python.pkg" + curl "https://www.python.org/ftp/python/${PYTHON_VERSION}/python-${PYTHON_VERSION}-macos11.pkg" -o "python.pkg" sudo installer -pkg python.pkg -target / python3 --version - name: Install Requirements @@ -361,7 +373,7 @@ jobs: run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds python devscripts/install_deps.py -o --include build python devscripts/install_deps.py --include curl-cffi - python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.8.0-py3-none-any.whl" + python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-6.7.0-py3-none-any.whl" - name: Prepare run: | @@ -421,7 +433,7 @@ jobs: run: | python devscripts/install_deps.py -o --include build python devscripts/install_deps.py - python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-5.8.0-py3-none-any.whl" + python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-6.7.0-py3-none-any.whl" - name: Prepare run: | @@ -475,8 +487,8 @@ jobs: run: | cd ./artifact/ # make sure SHA sums are also printed to stdout - sha256sum * | tee ../SHA2-256SUMS - sha512sum * | tee ../SHA2-512SUMS + sha256sum -- * | tee ../SHA2-256SUMS + sha512sum -- * | tee ../SHA2-512SUMS - name: Make Update spec run: | diff --git a/pyproject.toml b/pyproject.toml index 96cb368b6..b746fbc96 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,7 +62,7 @@ build = [ "build", "hatchling", "pip", - "setuptools>=66.1.0,<70", + "setuptools", "wheel", ] dev = [ @@ -78,8 +78,7 @@ test = [ "pytest~=8.1", ] pyinstaller = [ - "pyinstaller>=6.3; sys_platform!='darwin'", - "pyinstaller==5.13.2; sys_platform=='darwin'", # needed for curl_cffi + "pyinstaller>=6.7.0", # for compat with setuptools>=70 ] py2exe = [ "py2exe>=0.12", From 2e5a47da400b645aadbda6afd1156bd89c744f48 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Thu, 30 May 2024 18:04:27 -0500 Subject: [PATCH 46/48] [ie/PatreonCampaign] Fix `campaign_id` extraction (#10070) Closes #10013 Authored by: bashonly --- yt_dlp/extractor/patreon.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py index 6c441ff34..efbface4b 100644 --- a/yt_dlp/extractor/patreon.py +++ b/yt_dlp/extractor/patreon.py @@ -486,7 +486,8 @@ class PatreonCampaignIE(PatreonBaseIE): campaign_id, vanity = self._match_valid_url(url).group('campaign_id', 'vanity') if campaign_id is None: webpage = self._download_webpage(url, vanity, headers={'User-Agent': self.USER_AGENT}) - campaign_id = self._search_regex(r'https://www.patreon.com/api/campaigns/(\d+)/?', webpage, 'Campaign ID') + campaign_id = self._search_nextjs_data( + webpage, vanity)['props']['pageProps']['bootstrapEnvelope']['pageBootstrap']['campaign']['data']['id'] params = { 'json-api-use-default-includes': 'false', From db50f19d76c6870a5a13d0cab9287d684fd7449a Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 1 Jun 2024 13:57:23 -0500 Subject: [PATCH 47/48] [rh:requests] Bump minimum `requests` version to 2.32.2 (#10079) Closes #10078 Authored by: bashonly --- README.md | 2 +- bundle/py2exe.py | 6 +++--- pyproject.toml | 3 +-- yt_dlp/networking/_requests.py | 9 ++------- 4 files changed, 7 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index e8cd6d3a0..42ffd9b52 100644 --- a/README.md +++ b/README.md @@ -262,7 +262,7 @@ You can also run `make yt-dlp` instead to compile only the binary without updati ### Standalone Py2Exe Builds (Windows) -While we provide the option to build with [py2exe](https://www.py2exe.org), it is recommended to build [using PyInstaller](#standalone-pyinstaller-builds) instead since the py2exe builds **cannot contain `pycryptodomex`/`certifi` and need VC++14** on the target computer to run. +While we provide the option to build with [py2exe](https://www.py2exe.org), it is recommended to build [using PyInstaller](#standalone-pyinstaller-builds) instead since the py2exe builds **cannot contain `pycryptodomex`/`certifi`/`requests` and need VC++14** on the target computer to run. If you wish to build it anyway, install Python (if it is not already installed) and you can run the following commands: diff --git a/bundle/py2exe.py b/bundle/py2exe.py index 281167492..5fbe55e46 100755 --- a/bundle/py2exe.py +++ b/bundle/py2exe.py @@ -42,9 +42,9 @@ def main(): # py2exe cannot import Crypto 'Crypto', 'Cryptodome', - # py2exe appears to confuse this with our socks library. - # We don't use pysocks and urllib3.contrib.socks would fail to import if tried. - 'urllib3.contrib.socks' + # requests >=2.32.0 breaks py2exe builds due to certifi dependency + 'requests', + 'urllib3' ], 'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'], # Modules that are only imported dynamically must be added here diff --git a/pyproject.toml b/pyproject.toml index b746fbc96..da6403ec7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,7 +46,7 @@ dependencies = [ "certifi", "mutagen", "pycryptodomex", - "requests>=2.31.0,<3", + "requests>=2.32.2,<3", "urllib3>=1.26.17,<3", "websockets>=12.0", ] @@ -82,7 +82,6 @@ pyinstaller = [ ] py2exe = [ "py2exe>=0.12", - "requests==2.31.*", ] [project.urls] diff --git a/yt_dlp/networking/_requests.py b/yt_dlp/networking/_requests.py index 6397a2c0c..bf6fa634d 100644 --- a/yt_dlp/networking/_requests.py +++ b/yt_dlp/networking/_requests.py @@ -21,8 +21,8 @@ urllib3_version = tuple(int_or_none(x, default=0) for x in urllib3.__version__.s if urllib3_version < (1, 26, 17): raise ImportError('Only urllib3 >= 1.26.17 is supported') -if requests.__build__ < 0x023100: - raise ImportError('Only requests >= 2.31.0 is supported') +if requests.__build__ < 0x023202: + raise ImportError('Only requests >= 2.32.2 is supported') import requests.adapters import requests.utils @@ -182,14 +182,9 @@ class RequestsHTTPAdapter(requests.adapters.HTTPAdapter): return super().proxy_manager_for(proxy, **proxy_kwargs, **self._pm_args, **extra_kwargs) # Skip `requests` internal verification; we use our own SSLContext - # requests 2.31.0+ def cert_verify(*args, **kwargs): pass - # requests 2.31.0-2.32.1 - def _get_connection(self, request, *_, proxies=None, **__): - return self.get_connection(request.url, proxies) - # requests 2.32.2+: Reimplementation without `_urllib3_request_context` def get_connection_with_tls_context(self, request, verify, proxies=None, cert=None): url = urllib3.util.parse_url(request.url).url From add96eb9f84cfffe85682bf2fb85135746994ee8 Mon Sep 17 00:00:00 2001 From: sepro <4618135+seproDev@users.noreply.github.com> Date: Wed, 12 Jun 2024 01:09:58 +0200 Subject: [PATCH 48/48] [cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> --- CONTRIBUTING.md | 2 +- bundle/py2exe.py | 2 +- bundle/pyinstaller.py | 8 +- devscripts/bash-completion.py | 8 +- devscripts/make_changelog.py | 10 +- devscripts/make_readme.py | 12 +- devscripts/set-variant.py | 2 +- devscripts/update-version.py | 2 +- devscripts/zsh-completion.py | 16 +- pyproject.toml | 127 ++- test/conftest.py | 10 +- test/helper.py | 32 +- test/test_InfoExtractor.py | 138 +-- test/test_YoutubeDL.py | 45 +- test/test_aes.py | 12 +- test/test_compat.py | 10 +- test/test_config.py | 2 +- test/test_cookies.py | 132 +-- test/test_download.py | 18 +- test/test_downloader_http.py | 6 +- test/test_http_proxy.py | 4 +- test/test_iqiyi_sdk_interpreter.py | 4 +- test/test_netrc.py | 2 +- test/test_networking.py | 56 +- test/test_networking_utils.py | 12 +- test/test_overwrites.py | 4 +- test/test_plugins.py | 2 +- test/test_post_hooks.py | 2 +- test/test_postprocessors.py | 137 +-- test/test_socks.py | 8 +- test/test_subtitles.py | 11 +- test/test_traversal.py | 6 +- test/test_update.py | 8 +- test/test_utils.py | 64 +- test/test_websockets.py | 4 +- test/test_youtube_misc.py | 2 +- test/test_youtube_signature.py | 8 +- yt_dlp/YoutubeDL.py | 304 +++--- yt_dlp/__init__.py | 39 +- yt_dlp/aes.py | 36 +- yt_dlp/cache.py | 4 +- yt_dlp/compat/_legacy.py | 2 +- yt_dlp/compat/functools.py | 2 +- yt_dlp/cookies.py | 55 +- yt_dlp/downloader/common.py | 2 +- yt_dlp/downloader/external.py | 24 +- yt_dlp/downloader/f4m.py | 22 +- yt_dlp/downloader/fragment.py | 8 +- yt_dlp/downloader/hls.py | 9 +- yt_dlp/downloader/http.py | 14 +- yt_dlp/downloader/ism.py | 2 +- yt_dlp/downloader/mhtml.py | 53 +- yt_dlp/downloader/niconico.py | 14 +- yt_dlp/downloader/rtmp.py | 6 +- yt_dlp/downloader/youtube_live_chat.py | 2 +- yt_dlp/extractor/abc.py | 28 +- yt_dlp/extractor/abcnews.py | 2 +- yt_dlp/extractor/abcotvs.py | 5 +- yt_dlp/extractor/abematv.py | 22 +- yt_dlp/extractor/acast.py | 8 +- yt_dlp/extractor/acfun.py | 4 +- yt_dlp/extractor/adn.py | 19 +- yt_dlp/extractor/adobeconnect.py | 10 +- yt_dlp/extractor/adobepass.py | 866 +++++++++--------- yt_dlp/extractor/adobetv.py | 5 +- yt_dlp/extractor/adultswim.py | 6 +- yt_dlp/extractor/aenetworks.py | 34 +- yt_dlp/extractor/aeonco.py | 8 +- yt_dlp/extractor/afreecatv.py | 4 +- yt_dlp/extractor/agora.py | 6 +- yt_dlp/extractor/airtv.py | 4 +- yt_dlp/extractor/aitube.py | 2 +- yt_dlp/extractor/aliexpress.py | 3 +- yt_dlp/extractor/aljazeera.py | 14 +- yt_dlp/extractor/allocine.py | 5 +- yt_dlp/extractor/allstar.py | 26 +- yt_dlp/extractor/alphaporno.py | 2 +- yt_dlp/extractor/alsace20tv.py | 6 +- yt_dlp/extractor/altcensored.py | 2 +- yt_dlp/extractor/alura.py | 16 +- yt_dlp/extractor/amadeustv.py | 2 +- yt_dlp/extractor/amara.py | 10 +- yt_dlp/extractor/amazon.py | 8 +- yt_dlp/extractor/amazonminitv.py | 2 +- yt_dlp/extractor/amcnetworks.py | 12 +- yt_dlp/extractor/americastestkitchen.py | 14 +- yt_dlp/extractor/amp.py | 4 +- yt_dlp/extractor/anchorfm.py | 6 +- yt_dlp/extractor/angel.py | 10 +- yt_dlp/extractor/antenna.py | 2 +- yt_dlp/extractor/anvato.py | 8 +- yt_dlp/extractor/aol.py | 8 +- yt_dlp/extractor/apa.py | 4 +- yt_dlp/extractor/applepodcasts.py | 2 +- yt_dlp/extractor/appletrailers.py | 31 +- yt_dlp/extractor/archiveorg.py | 105 +-- yt_dlp/extractor/arcpublishing.py | 10 +- yt_dlp/extractor/ard.py | 6 +- yt_dlp/extractor/arkena.py | 2 +- yt_dlp/extractor/arnes.py | 12 +- yt_dlp/extractor/art19.py | 2 +- yt_dlp/extractor/arte.py | 24 +- yt_dlp/extractor/atresplayer.py | 4 +- yt_dlp/extractor/atscaleconf.py | 10 +- yt_dlp/extractor/atvat.py | 16 +- yt_dlp/extractor/audimedia.py | 4 +- yt_dlp/extractor/audioboom.py | 4 +- yt_dlp/extractor/audiodraft.py | 13 +- yt_dlp/extractor/audiomack.py | 27 +- yt_dlp/extractor/audius.py | 46 +- yt_dlp/extractor/awaan.py | 27 +- yt_dlp/extractor/aws.py | 24 +- yt_dlp/extractor/azmedien.py | 6 +- yt_dlp/extractor/baidu.py | 7 +- yt_dlp/extractor/banbye.py | 9 +- yt_dlp/extractor/bandcamp.py | 31 +- yt_dlp/extractor/bannedvideo.py | 12 +- yt_dlp/extractor/bbc.py | 110 +-- yt_dlp/extractor/beatport.py | 7 +- yt_dlp/extractor/beeg.py | 8 +- yt_dlp/extractor/behindkink.py | 2 +- yt_dlp/extractor/bellmedia.py | 2 +- yt_dlp/extractor/berufetv.py | 4 +- yt_dlp/extractor/bet.py | 8 +- yt_dlp/extractor/bfmtv.py | 4 +- yt_dlp/extractor/bigflix.py | 14 +- yt_dlp/extractor/bigo.py | 2 +- yt_dlp/extractor/bild.py | 4 +- yt_dlp/extractor/bilibili.py | 138 ++- yt_dlp/extractor/bitchute.py | 14 +- yt_dlp/extractor/blackboardcollaborate.py | 2 +- yt_dlp/extractor/bleacherreport.py | 12 +- yt_dlp/extractor/blerp.py | 25 +- yt_dlp/extractor/blogger.py | 4 +- yt_dlp/extractor/bloomberg.py | 2 +- yt_dlp/extractor/bokecc.py | 15 +- yt_dlp/extractor/bongacams.py | 9 +- yt_dlp/extractor/bostonglobe.py | 3 +- yt_dlp/extractor/box.py | 6 +- yt_dlp/extractor/boxcast.py | 10 +- yt_dlp/extractor/br.py | 8 +- yt_dlp/extractor/brainpop.py | 14 +- yt_dlp/extractor/bravotv.py | 2 +- yt_dlp/extractor/breitbart.py | 4 +- yt_dlp/extractor/brightcove.py | 74 +- yt_dlp/extractor/bundesliga.py | 10 +- yt_dlp/extractor/businessinsider.py | 4 +- yt_dlp/extractor/buzzfeed.py | 6 +- yt_dlp/extractor/byutv.py | 2 +- yt_dlp/extractor/c56.py | 4 +- yt_dlp/extractor/callin.py | 16 +- yt_dlp/extractor/caltrans.py | 2 +- yt_dlp/extractor/cam4.py | 4 +- yt_dlp/extractor/camdemy.py | 33 +- yt_dlp/extractor/camfm.py | 4 +- yt_dlp/extractor/cammodels.py | 8 +- yt_dlp/extractor/camtasia.py | 6 +- yt_dlp/extractor/canalalpha.py | 8 +- yt_dlp/extractor/canalc2.py | 2 +- yt_dlp/extractor/canalplus.py | 5 +- yt_dlp/extractor/caracoltv.py | 4 +- yt_dlp/extractor/cartoonnetwork.py | 2 +- yt_dlp/extractor/cbc.py | 51 +- yt_dlp/extractor/cbs.py | 6 +- yt_dlp/extractor/ccc.py | 6 +- yt_dlp/extractor/ccma.py | 6 +- yt_dlp/extractor/cctv.py | 7 +- yt_dlp/extractor/cda.py | 29 +- yt_dlp/extractor/cellebrite.py | 4 +- yt_dlp/extractor/ceskatelevize.py | 24 +- yt_dlp/extractor/cgtn.py | 10 +- yt_dlp/extractor/chaturbate.py | 6 +- yt_dlp/extractor/cinemax.py | 2 +- yt_dlp/extractor/cinetecamilano.py | 8 +- yt_dlp/extractor/cineverse.py | 10 +- yt_dlp/extractor/ciscolive.py | 4 +- yt_dlp/extractor/ciscowebex.py | 4 +- yt_dlp/extractor/cjsw.py | 2 +- yt_dlp/extractor/clippit.py | 4 +- yt_dlp/extractor/cliprs.py | 2 +- yt_dlp/extractor/closertotruth.py | 10 +- yt_dlp/extractor/cloudflarestream.py | 2 +- yt_dlp/extractor/cloudycdn.py | 6 +- yt_dlp/extractor/clubic.py | 4 +- yt_dlp/extractor/clyp.py | 6 +- yt_dlp/extractor/cmt.py | 4 +- yt_dlp/extractor/cnn.py | 8 +- yt_dlp/extractor/common.py | 134 ++- yt_dlp/extractor/commonmistakes.py | 6 +- yt_dlp/extractor/commonprotocols.py | 2 +- yt_dlp/extractor/condenast.py | 33 +- yt_dlp/extractor/contv.py | 2 +- yt_dlp/extractor/corus.py | 12 +- yt_dlp/extractor/coub.py | 8 +- yt_dlp/extractor/cozytv.py | 10 +- yt_dlp/extractor/cpac.py | 24 +- yt_dlp/extractor/cracked.py | 4 +- yt_dlp/extractor/crackle.py | 14 +- yt_dlp/extractor/craftsy.py | 2 +- yt_dlp/extractor/crooksandliars.py | 4 +- yt_dlp/extractor/crowdbunker.py | 28 +- yt_dlp/extractor/crtvg.py | 4 +- yt_dlp/extractor/crunchyroll.py | 6 +- yt_dlp/extractor/cspan.py | 26 +- yt_dlp/extractor/ctsnews.py | 4 +- yt_dlp/extractor/ctv.py | 4 +- yt_dlp/extractor/ctvnews.py | 6 +- yt_dlp/extractor/cultureunplugged.py | 8 +- yt_dlp/extractor/curiositystream.py | 9 +- yt_dlp/extractor/cwtv.py | 4 +- yt_dlp/extractor/cybrary.py | 20 +- yt_dlp/extractor/dailymail.py | 9 +- yt_dlp/extractor/dailymotion.py | 16 +- yt_dlp/extractor/dailywire.py | 6 +- yt_dlp/extractor/damtomo.py | 9 +- yt_dlp/extractor/daum.py | 28 +- yt_dlp/extractor/dbtv.py | 2 +- yt_dlp/extractor/dctp.py | 11 +- yt_dlp/extractor/deezer.py | 6 +- yt_dlp/extractor/democracynow.py | 8 +- yt_dlp/extractor/detik.py | 20 +- yt_dlp/extractor/deuxm.py | 16 +- yt_dlp/extractor/dfb.py | 4 +- yt_dlp/extractor/digitalconcerthall.py | 8 +- yt_dlp/extractor/digiteka.py | 2 +- yt_dlp/extractor/discovery.py | 10 +- yt_dlp/extractor/discoverygo.py | 5 +- yt_dlp/extractor/disney.py | 8 +- yt_dlp/extractor/dispeak.py | 10 +- yt_dlp/extractor/dlf.py | 36 +- yt_dlp/extractor/dlive.py | 8 +- yt_dlp/extractor/douyutv.py | 10 +- yt_dlp/extractor/dplay.py | 16 +- yt_dlp/extractor/drbonanza.py | 2 +- yt_dlp/extractor/dreisat.py | 4 +- yt_dlp/extractor/drooble.py | 6 +- yt_dlp/extractor/dropbox.py | 14 +- yt_dlp/extractor/dropout.py | 34 +- yt_dlp/extractor/drtuber.py | 10 +- yt_dlp/extractor/drtv.py | 18 +- yt_dlp/extractor/dtube.py | 6 +- yt_dlp/extractor/duboku.py | 29 +- yt_dlp/extractor/dumpert.py | 4 +- yt_dlp/extractor/dvtv.py | 14 +- yt_dlp/extractor/dw.py | 15 +- yt_dlp/extractor/eagleplatform.py | 20 +- yt_dlp/extractor/ebaumsworld.py | 2 +- yt_dlp/extractor/ebay.py | 4 +- yt_dlp/extractor/egghead.py | 11 +- yt_dlp/extractor/eighttracks.py | 49 +- yt_dlp/extractor/eitb.py | 8 +- yt_dlp/extractor/elpais.py | 4 +- yt_dlp/extractor/eltrecetv.py | 4 +- yt_dlp/extractor/epicon.py | 29 +- yt_dlp/extractor/epoch.py | 10 +- yt_dlp/extractor/eporner.py | 14 +- yt_dlp/extractor/erocast.py | 2 +- yt_dlp/extractor/eroprofile.py | 6 +- yt_dlp/extractor/err.py | 2 +- yt_dlp/extractor/ertgr.py | 17 +- yt_dlp/extractor/espn.py | 41 +- yt_dlp/extractor/ettutv.py | 2 +- yt_dlp/extractor/europa.py | 24 +- yt_dlp/extractor/europeantour.py | 8 +- yt_dlp/extractor/eurosport.py | 10 +- yt_dlp/extractor/euscreen.py | 18 +- yt_dlp/extractor/expressen.py | 2 +- yt_dlp/extractor/eyedotv.py | 12 +- yt_dlp/extractor/facebook.py | 49 +- yt_dlp/extractor/fancode.py | 35 +- yt_dlp/extractor/fc2.py | 18 +- yt_dlp/extractor/filmon.py | 11 +- yt_dlp/extractor/filmweb.py | 2 +- yt_dlp/extractor/firsttv.py | 27 +- yt_dlp/extractor/flickr.py | 14 +- yt_dlp/extractor/floatplane.py | 2 +- yt_dlp/extractor/folketinget.py | 5 +- yt_dlp/extractor/footyroom.py | 2 +- yt_dlp/extractor/fourtube.py | 41 +- yt_dlp/extractor/fox.py | 15 +- yt_dlp/extractor/fptplay.py | 2 +- yt_dlp/extractor/francetv.py | 6 +- yt_dlp/extractor/freesound.py | 2 +- yt_dlp/extractor/freetv.py | 10 +- yt_dlp/extractor/frontendmasters.py | 31 +- yt_dlp/extractor/fujitv.py | 8 +- yt_dlp/extractor/funimation.py | 32 +- yt_dlp/extractor/funker530.py | 6 +- yt_dlp/extractor/fuyintv.py | 2 +- yt_dlp/extractor/gab.py | 22 +- yt_dlp/extractor/gaia.py | 14 +- yt_dlp/extractor/gamejolt.py | 34 +- yt_dlp/extractor/gamespot.py | 5 +- yt_dlp/extractor/gamestar.py | 6 +- yt_dlp/extractor/gaskrank.py | 4 +- yt_dlp/extractor/gazeta.py | 4 +- yt_dlp/extractor/gbnews.py | 14 +- yt_dlp/extractor/gdcvault.py | 10 +- yt_dlp/extractor/gedidigital.py | 4 +- yt_dlp/extractor/generic.py | 145 ++- yt_dlp/extractor/genericembeds.py | 10 +- yt_dlp/extractor/getcourseru.py | 22 +- yt_dlp/extractor/gettr.py | 14 +- yt_dlp/extractor/giantbomb.py | 2 +- yt_dlp/extractor/gigya.py | 2 +- yt_dlp/extractor/glide.py | 2 +- yt_dlp/extractor/globalplayer.py | 4 +- yt_dlp/extractor/globo.py | 29 +- yt_dlp/extractor/glomex.py | 6 +- yt_dlp/extractor/gmanetwork.py | 4 +- yt_dlp/extractor/go.py | 19 +- yt_dlp/extractor/godresource.py | 10 +- yt_dlp/extractor/godtube.py | 4 +- yt_dlp/extractor/gofile.py | 8 +- yt_dlp/extractor/golem.py | 16 +- yt_dlp/extractor/googledrive.py | 20 +- yt_dlp/extractor/googlepodcasts.py | 2 +- yt_dlp/extractor/goplay.py | 143 ++- yt_dlp/extractor/gopro.py | 8 +- yt_dlp/extractor/goshgay.py | 9 +- yt_dlp/extractor/gotostage.py | 19 +- yt_dlp/extractor/gputechconf.py | 4 +- yt_dlp/extractor/gronkh.py | 14 +- yt_dlp/extractor/groupon.py | 3 +- yt_dlp/extractor/harpodeon.py | 6 +- yt_dlp/extractor/hbo.py | 8 +- yt_dlp/extractor/heise.py | 6 +- yt_dlp/extractor/hidive.py | 6 +- yt_dlp/extractor/historicfilms.py | 2 +- yt_dlp/extractor/hitrecord.py | 11 +- yt_dlp/extractor/hketv.py | 3 +- yt_dlp/extractor/hollywoodreporter.py | 2 +- yt_dlp/extractor/holodex.py | 2 +- yt_dlp/extractor/hotnewhiphop.py | 9 +- yt_dlp/extractor/hotstar.py | 11 +- yt_dlp/extractor/hrfensehen.py | 12 +- yt_dlp/extractor/hrti.py | 20 +- yt_dlp/extractor/hse.py | 4 +- yt_dlp/extractor/huajiao.py | 2 +- yt_dlp/extractor/huffpost.py | 2 +- yt_dlp/extractor/hungama.py | 6 +- yt_dlp/extractor/huya.py | 13 +- yt_dlp/extractor/hypem.py | 6 +- yt_dlp/extractor/hypergryph.py | 4 +- yt_dlp/extractor/hytale.py | 4 +- yt_dlp/extractor/icareus.py | 12 +- yt_dlp/extractor/ichinanalive.py | 15 +- yt_dlp/extractor/ign.py | 21 +- yt_dlp/extractor/iheart.py | 2 +- yt_dlp/extractor/ilpost.py | 2 +- yt_dlp/extractor/iltalehti.py | 2 +- yt_dlp/extractor/imdb.py | 8 +- yt_dlp/extractor/imggaming.py | 2 +- yt_dlp/extractor/imgur.py | 21 +- yt_dlp/extractor/ina.py | 2 +- yt_dlp/extractor/inc.py | 2 +- yt_dlp/extractor/indavideo.py | 4 +- yt_dlp/extractor/infoq.py | 12 +- yt_dlp/extractor/instagram.py | 54 +- yt_dlp/extractor/internazionale.py | 4 +- yt_dlp/extractor/iprima.py | 4 +- yt_dlp/extractor/iqiyi.py | 77 +- yt_dlp/extractor/islamchannel.py | 2 +- yt_dlp/extractor/israelnationalnews.py | 6 +- yt_dlp/extractor/itprotv.py | 14 +- yt_dlp/extractor/itv.py | 33 +- yt_dlp/extractor/ivi.py | 26 +- yt_dlp/extractor/ivideon.py | 16 +- yt_dlp/extractor/iwara.py | 8 +- yt_dlp/extractor/ixigua.py | 2 +- yt_dlp/extractor/izlesene.py | 18 +- yt_dlp/extractor/jamendo.py | 32 +- yt_dlp/extractor/japandiet.py | 8 +- yt_dlp/extractor/jiocinema.py | 14 +- yt_dlp/extractor/jiosaavn.py | 2 +- yt_dlp/extractor/joj.py | 15 +- yt_dlp/extractor/jove.py | 4 +- yt_dlp/extractor/jwplatform.py | 4 +- yt_dlp/extractor/kakao.py | 10 +- yt_dlp/extractor/kaltura.py | 73 +- yt_dlp/extractor/kankanews.py | 2 +- yt_dlp/extractor/karaoketv.py | 4 +- yt_dlp/extractor/kelbyone.py | 2 +- yt_dlp/extractor/kicker.py | 6 +- yt_dlp/extractor/kinja.py | 18 +- yt_dlp/extractor/kommunetv.py | 10 +- yt_dlp/extractor/kompas.py | 2 +- yt_dlp/extractor/koo.py | 27 +- yt_dlp/extractor/kth.py | 7 +- yt_dlp/extractor/ku6.py | 10 +- yt_dlp/extractor/kuwo.py | 42 +- yt_dlp/extractor/la7.py | 2 +- yt_dlp/extractor/laxarxames.py | 2 +- yt_dlp/extractor/lbry.py | 16 +- yt_dlp/extractor/lcp.py | 2 +- yt_dlp/extractor/lecture2go.py | 2 +- yt_dlp/extractor/lecturio.py | 6 +- yt_dlp/extractor/leeco.py | 41 +- yt_dlp/extractor/lego.py | 6 +- yt_dlp/extractor/lenta.py | 2 +- yt_dlp/extractor/libraryofcongress.py | 2 +- yt_dlp/extractor/libsyn.py | 6 +- yt_dlp/extractor/lifenews.py | 27 +- yt_dlp/extractor/likee.py | 2 +- yt_dlp/extractor/limelight.py | 16 +- yt_dlp/extractor/linkedin.py | 27 +- yt_dlp/extractor/liputan6.py | 6 +- yt_dlp/extractor/listennotes.py | 6 +- yt_dlp/extractor/litv.py | 6 +- yt_dlp/extractor/livejournal.py | 5 +- yt_dlp/extractor/livestream.py | 42 +- yt_dlp/extractor/livestreamfails.py | 4 +- yt_dlp/extractor/lnkgo.py | 27 +- yt_dlp/extractor/lovehomeporn.py | 6 +- yt_dlp/extractor/lrt.py | 8 +- yt_dlp/extractor/lsm.py | 12 +- yt_dlp/extractor/lumni.py | 2 +- yt_dlp/extractor/lynda.py | 54 +- yt_dlp/extractor/magentamusik.py | 2 +- yt_dlp/extractor/mailru.py | 15 +- yt_dlp/extractor/mainstreaming.py | 28 +- yt_dlp/extractor/mangomolo.py | 13 +- yt_dlp/extractor/manoto.py | 12 +- yt_dlp/extractor/manyvids.py | 8 +- yt_dlp/extractor/markiza.py | 9 +- yt_dlp/extractor/massengeschmacktv.py | 2 +- yt_dlp/extractor/masters.py | 2 +- yt_dlp/extractor/mdr.py | 7 +- yt_dlp/extractor/medaltv.py | 13 +- yt_dlp/extractor/mediaite.py | 14 +- yt_dlp/extractor/mediaklikk.py | 39 +- yt_dlp/extractor/mediaset.py | 6 +- yt_dlp/extractor/mediasite.py | 87 +- yt_dlp/extractor/mediaworksnz.py | 10 +- yt_dlp/extractor/meipai.py | 4 +- yt_dlp/extractor/melonvod.py | 4 +- yt_dlp/extractor/metacritic.py | 6 +- yt_dlp/extractor/mgtv.py | 6 +- yt_dlp/extractor/microsoftembed.py | 4 +- yt_dlp/extractor/microsoftstream.py | 6 +- yt_dlp/extractor/microsoftvirtualacademy.py | 21 +- yt_dlp/extractor/mildom.py | 10 +- yt_dlp/extractor/minds.py | 11 +- yt_dlp/extractor/minoto.py | 2 +- yt_dlp/extractor/mirrativ.py | 6 +- yt_dlp/extractor/mit.py | 8 +- yt_dlp/extractor/mixch.py | 4 +- yt_dlp/extractor/mixcloud.py | 37 +- yt_dlp/extractor/mlb.py | 16 +- yt_dlp/extractor/mlssoccer.py | 69 +- yt_dlp/extractor/mocha.py | 4 +- yt_dlp/extractor/mojvideo.py | 6 +- yt_dlp/extractor/monstercat.py | 6 +- yt_dlp/extractor/motherless.py | 6 +- yt_dlp/extractor/motorsport.py | 11 +- yt_dlp/extractor/moview.py | 6 +- yt_dlp/extractor/moviezine.py | 2 +- yt_dlp/extractor/movingimage.py | 2 +- yt_dlp/extractor/msn.py | 5 +- yt_dlp/extractor/mtv.py | 50 +- yt_dlp/extractor/muenchentv.py | 8 +- yt_dlp/extractor/murrtube.py | 6 +- yt_dlp/extractor/musescore.py | 12 +- yt_dlp/extractor/musicdex.py | 50 +- yt_dlp/extractor/mx3.py | 10 +- yt_dlp/extractor/mxplayer.py | 25 +- yt_dlp/extractor/myspace.py | 14 +- yt_dlp/extractor/myspass.py | 3 +- yt_dlp/extractor/mzaalo.py | 6 +- yt_dlp/extractor/n1.py | 6 +- yt_dlp/extractor/nate.py | 24 +- yt_dlp/extractor/nationalgeographic.py | 2 +- yt_dlp/extractor/naver.py | 12 +- yt_dlp/extractor/nba.py | 23 +- yt_dlp/extractor/nbc.py | 26 +- yt_dlp/extractor/ndr.py | 18 +- yt_dlp/extractor/ndtv.py | 26 +- yt_dlp/extractor/nekohacker.py | 32 +- yt_dlp/extractor/neteasemusic.py | 10 +- yt_dlp/extractor/netverse.py | 14 +- yt_dlp/extractor/netzkino.py | 6 +- yt_dlp/extractor/newgrounds.py | 10 +- yt_dlp/extractor/newsy.py | 4 +- yt_dlp/extractor/nextmedia.py | 17 +- yt_dlp/extractor/nexx.py | 92 +- yt_dlp/extractor/nfhsnetwork.py | 52 +- yt_dlp/extractor/nfl.py | 4 +- yt_dlp/extractor/nhk.py | 24 +- yt_dlp/extractor/nhl.py | 9 +- yt_dlp/extractor/nick.py | 18 +- yt_dlp/extractor/niconico.py | 74 +- yt_dlp/extractor/niconicochannelplus.py | 4 +- yt_dlp/extractor/ninaprotocol.py | 10 +- yt_dlp/extractor/ninecninemedia.py | 10 +- yt_dlp/extractor/ninegag.py | 6 +- yt_dlp/extractor/ninenews.py | 4 +- yt_dlp/extractor/ninenow.py | 21 +- yt_dlp/extractor/nintendo.py | 2 +- yt_dlp/extractor/nitter.py | 18 +- yt_dlp/extractor/nobelprize.py | 2 +- yt_dlp/extractor/noice.py | 6 +- yt_dlp/extractor/nonktube.py | 2 +- yt_dlp/extractor/noodlemagazine.py | 6 +- yt_dlp/extractor/noovo.py | 7 +- yt_dlp/extractor/nosnl.py | 6 +- yt_dlp/extractor/nova.py | 8 +- yt_dlp/extractor/novaplay.py | 4 +- yt_dlp/extractor/nowness.py | 7 +- yt_dlp/extractor/noz.py | 9 +- yt_dlp/extractor/npo.py | 40 +- yt_dlp/extractor/npr.py | 4 +- yt_dlp/extractor/nrk.py | 72 +- yt_dlp/extractor/ntvru.py | 6 +- yt_dlp/extractor/nubilesporn.py | 6 +- yt_dlp/extractor/nuevo.py | 2 +- yt_dlp/extractor/nuvid.py | 8 +- yt_dlp/extractor/nytimes.py | 2 +- yt_dlp/extractor/nzherald.py | 21 +- yt_dlp/extractor/nzonscreen.py | 2 +- yt_dlp/extractor/odkmedia.py | 4 +- yt_dlp/extractor/odnoklassniki.py | 19 +- yt_dlp/extractor/oftv.py | 8 +- yt_dlp/extractor/oktoberfesttv.py | 2 +- yt_dlp/extractor/olympics.py | 8 +- yt_dlp/extractor/on24.py | 6 +- yt_dlp/extractor/onefootball.py | 2 +- yt_dlp/extractor/onenewsnz.py | 10 +- yt_dlp/extractor/oneplace.py | 4 +- yt_dlp/extractor/onet.py | 6 +- yt_dlp/extractor/onionstudios.py | 3 +- yt_dlp/extractor/opencast.py | 2 +- yt_dlp/extractor/openload.py | 10 +- yt_dlp/extractor/openrec.py | 7 +- yt_dlp/extractor/ora.py | 8 +- yt_dlp/extractor/orf.py | 18 +- yt_dlp/extractor/outsidetv.py | 2 +- yt_dlp/extractor/packtpub.py | 9 +- yt_dlp/extractor/palcomp3.py | 11 +- yt_dlp/extractor/panopto.py | 66 +- yt_dlp/extractor/paramountplus.py | 8 +- yt_dlp/extractor/parler.py | 2 +- yt_dlp/extractor/parlview.py | 7 +- yt_dlp/extractor/patreon.py | 22 +- yt_dlp/extractor/pbs.py | 37 +- yt_dlp/extractor/pearvideo.py | 4 +- yt_dlp/extractor/peertube.py | 71 +- yt_dlp/extractor/peertv.py | 2 +- yt_dlp/extractor/peloton.py | 26 +- yt_dlp/extractor/performgroup.py | 6 +- yt_dlp/extractor/periscope.py | 12 +- yt_dlp/extractor/philharmoniedeparis.py | 7 +- yt_dlp/extractor/phoenix.py | 9 +- yt_dlp/extractor/photobucket.py | 6 +- yt_dlp/extractor/piapro.py | 15 +- yt_dlp/extractor/picarto.py | 14 +- yt_dlp/extractor/piksel.py | 8 +- yt_dlp/extractor/pinkbike.py | 8 +- yt_dlp/extractor/pinterest.py | 17 +- yt_dlp/extractor/pixivsketch.py | 4 +- yt_dlp/extractor/pladform.py | 12 +- yt_dlp/extractor/planetmarathi.py | 15 +- yt_dlp/extractor/platzi.py | 22 +- yt_dlp/extractor/playsuisse.py | 20 +- yt_dlp/extractor/playtvak.py | 24 +- yt_dlp/extractor/playwire.py | 2 +- yt_dlp/extractor/pluralsight.py | 69 +- yt_dlp/extractor/plutotv.py | 25 +- yt_dlp/extractor/podchaser.py | 14 +- yt_dlp/extractor/podomatic.py | 11 +- yt_dlp/extractor/pokemon.py | 14 +- yt_dlp/extractor/pokergo.py | 28 +- yt_dlp/extractor/polsatgo.py | 6 +- yt_dlp/extractor/polskieradio.py | 17 +- yt_dlp/extractor/popcorntimes.py | 5 +- yt_dlp/extractor/popcorntv.py | 2 +- yt_dlp/extractor/pornbox.py | 12 +- yt_dlp/extractor/pornflip.py | 2 +- yt_dlp/extractor/pornhub.py | 66 +- yt_dlp/extractor/pornotube.py | 11 +- yt_dlp/extractor/pornovoisines.py | 6 +- yt_dlp/extractor/pornoxo.py | 2 +- yt_dlp/extractor/pr0gramm.py | 2 +- yt_dlp/extractor/prankcast.py | 24 +- yt_dlp/extractor/premiershiprugby.py | 2 +- yt_dlp/extractor/presstv.py | 10 +- yt_dlp/extractor/projectveritas.py | 10 +- yt_dlp/extractor/prosiebensat1.py | 19 +- yt_dlp/extractor/prx.py | 68 +- yt_dlp/extractor/puhutv.py | 41 +- yt_dlp/extractor/puls4.py | 3 +- yt_dlp/extractor/pyvideo.py | 7 +- yt_dlp/extractor/qingting.py | 4 +- yt_dlp/extractor/qqmusic.py | 33 +- yt_dlp/extractor/r7.py | 6 +- yt_dlp/extractor/radiko.py | 8 +- yt_dlp/extractor/radiocanada.py | 8 +- yt_dlp/extractor/radiocomercial.py | 14 +- yt_dlp/extractor/radiode.py | 4 +- yt_dlp/extractor/radiofrance.py | 4 +- yt_dlp/extractor/radiojavan.py | 2 +- yt_dlp/extractor/radiokapital.py | 4 +- yt_dlp/extractor/radiozet.py | 2 +- yt_dlp/extractor/radlive.py | 10 +- yt_dlp/extractor/rai.py | 20 +- yt_dlp/extractor/raywenderlich.py | 16 +- yt_dlp/extractor/rbgtum.py | 10 +- yt_dlp/extractor/rcs.py | 30 +- yt_dlp/extractor/rcti.py | 30 +- yt_dlp/extractor/rds.py | 7 +- yt_dlp/extractor/redbee.py | 30 +- yt_dlp/extractor/redbulltv.py | 19 +- yt_dlp/extractor/redge.py | 4 +- yt_dlp/extractor/redgifs.py | 30 +- yt_dlp/extractor/redtube.py | 4 +- yt_dlp/extractor/rentv.py | 7 +- yt_dlp/extractor/restudy.py | 4 +- yt_dlp/extractor/reuters.py | 8 +- yt_dlp/extractor/reverbnation.py | 6 +- yt_dlp/extractor/ridehome.py | 4 +- yt_dlp/extractor/rinsefm.py | 14 +- yt_dlp/extractor/rmcdecouverte.py | 8 +- yt_dlp/extractor/rockstargames.py | 2 +- yt_dlp/extractor/rokfin.py | 18 +- yt_dlp/extractor/roosterteeth.py | 6 +- yt_dlp/extractor/rottentomatoes.py | 4 +- yt_dlp/extractor/rozhlas.py | 16 +- yt_dlp/extractor/rte.py | 2 +- yt_dlp/extractor/rtl2.py | 4 +- yt_dlp/extractor/rtlnl.py | 26 +- yt_dlp/extractor/rtnews.py | 60 +- yt_dlp/extractor/rtp.py | 2 +- yt_dlp/extractor/rtrfm.py | 4 +- yt_dlp/extractor/rts.py | 11 +- yt_dlp/extractor/rtvcplay.py | 6 +- yt_dlp/extractor/rtve.py | 20 +- yt_dlp/extractor/rtvs.py | 8 +- yt_dlp/extractor/rtvslo.py | 6 +- yt_dlp/extractor/rule34video.py | 8 +- yt_dlp/extractor/rumble.py | 26 +- yt_dlp/extractor/rutube.py | 15 +- yt_dlp/extractor/rutv.py | 10 +- yt_dlp/extractor/ruutu.py | 16 +- yt_dlp/extractor/ruv.py | 8 +- yt_dlp/extractor/s4c.py | 6 +- yt_dlp/extractor/safari.py | 29 +- yt_dlp/extractor/saitosan.py | 8 +- yt_dlp/extractor/samplefocus.py | 8 +- yt_dlp/extractor/sapo.py | 2 +- yt_dlp/extractor/sbscokr.py | 4 +- yt_dlp/extractor/screencast.py | 15 +- yt_dlp/extractor/screencastomatic.py | 2 +- yt_dlp/extractor/scrippsnetworks.py | 12 +- yt_dlp/extractor/scrolller.py | 14 +- yt_dlp/extractor/scte.py | 6 +- yt_dlp/extractor/senategov.py | 19 +- yt_dlp/extractor/sendtonews.py | 6 +- yt_dlp/extractor/servus.py | 2 +- yt_dlp/extractor/sevenplus.py | 7 +- yt_dlp/extractor/sexu.py | 2 +- yt_dlp/extractor/seznamzpravy.py | 12 +- yt_dlp/extractor/shahid.py | 18 +- yt_dlp/extractor/shemaroome.py | 21 +- yt_dlp/extractor/showroomlive.py | 9 +- yt_dlp/extractor/sibnet.py | 4 +- yt_dlp/extractor/simplecast.py | 6 +- yt_dlp/extractor/sina.py | 7 +- yt_dlp/extractor/sixplay.py | 11 +- yt_dlp/extractor/skeb.py | 16 +- yt_dlp/extractor/sky.py | 2 +- yt_dlp/extractor/skyit.py | 10 +- yt_dlp/extractor/skylinewebcams.py | 2 +- yt_dlp/extractor/skynewsarabia.py | 11 +- yt_dlp/extractor/skynewsau.py | 12 +- yt_dlp/extractor/slideshare.py | 8 +- yt_dlp/extractor/slideslive.py | 2 +- yt_dlp/extractor/slutload.py | 12 +- yt_dlp/extractor/snotr.py | 2 +- yt_dlp/extractor/sohu.py | 48 +- yt_dlp/extractor/sonyliv.py | 6 +- yt_dlp/extractor/soundcloud.py | 54 +- yt_dlp/extractor/soundgasm.py | 4 +- yt_dlp/extractor/southpark.py | 4 +- yt_dlp/extractor/spankbang.py | 11 +- yt_dlp/extractor/spiegel.py | 4 +- yt_dlp/extractor/sport5.py | 6 +- yt_dlp/extractor/sportdeutschland.py | 12 +- yt_dlp/extractor/spotify.py | 6 +- yt_dlp/extractor/spreaker.py | 23 +- yt_dlp/extractor/springboardplatform.py | 5 +- yt_dlp/extractor/srgssr.py | 15 +- yt_dlp/extractor/srmediathek.py | 2 +- yt_dlp/extractor/stageplus.py | 2 +- yt_dlp/extractor/stanfordoc.py | 18 +- yt_dlp/extractor/startrek.py | 4 +- yt_dlp/extractor/startv.py | 31 +- yt_dlp/extractor/steam.py | 24 +- yt_dlp/extractor/stitcher.py | 5 +- yt_dlp/extractor/storyfire.py | 8 +- yt_dlp/extractor/streamable.py | 10 +- yt_dlp/extractor/streamcz.py | 14 +- yt_dlp/extractor/streetvoice.py | 8 +- yt_dlp/extractor/stretchinternet.py | 2 +- yt_dlp/extractor/stripchat.py | 2 +- yt_dlp/extractor/stv.py | 9 +- yt_dlp/extractor/substack.py | 8 +- yt_dlp/extractor/sunporno.py | 4 +- yt_dlp/extractor/sverigesradio.py | 2 +- yt_dlp/extractor/svt.py | 43 +- yt_dlp/extractor/swearnet.py | 8 +- yt_dlp/extractor/syfy.py | 4 +- yt_dlp/extractor/syvdk.py | 4 +- yt_dlp/extractor/tagesschau.py | 4 +- yt_dlp/extractor/taptap.py | 24 +- yt_dlp/extractor/tbs.py | 19 +- yt_dlp/extractor/tbsjp.py | 6 +- yt_dlp/extractor/teachable.py | 35 +- yt_dlp/extractor/teachertube.py | 12 +- yt_dlp/extractor/ted.py | 22 +- yt_dlp/extractor/tele13.py | 2 +- yt_dlp/extractor/telecaribe.py | 2 +- yt_dlp/extractor/telecinco.py | 2 +- yt_dlp/extractor/telegraaf.py | 8 +- yt_dlp/extractor/telegram.py | 2 +- yt_dlp/extractor/telemb.py | 6 +- yt_dlp/extractor/telemundo.py | 4 +- yt_dlp/extractor/telequebec.py | 7 +- yt_dlp/extractor/teletask.py | 8 +- yt_dlp/extractor/telewebion.py | 2 +- yt_dlp/extractor/tempo.py | 18 +- yt_dlp/extractor/tencent.py | 2 +- yt_dlp/extractor/tennistv.py | 14 +- yt_dlp/extractor/tenplay.py | 6 +- yt_dlp/extractor/testurl.py | 2 +- yt_dlp/extractor/tf1.py | 4 +- yt_dlp/extractor/tfo.py | 4 +- yt_dlp/extractor/theguardian.py | 31 +- yt_dlp/extractor/theholetv.py | 6 +- yt_dlp/extractor/theintercept.py | 7 +- yt_dlp/extractor/theplatform.py | 32 +- yt_dlp/extractor/thestar.py | 2 +- yt_dlp/extractor/theweatherchannel.py | 8 +- yt_dlp/extractor/thisamericanlife.py | 4 +- yt_dlp/extractor/thisvid.py | 4 +- yt_dlp/extractor/threeqsdn.py | 6 +- yt_dlp/extractor/threespeak.py | 28 +- yt_dlp/extractor/tiktok.py | 42 +- yt_dlp/extractor/tmz.py | 6 +- yt_dlp/extractor/tnaflix.py | 13 +- yt_dlp/extractor/toggle.py | 24 +- yt_dlp/extractor/tonline.py | 4 +- yt_dlp/extractor/toongoggles.py | 2 +- yt_dlp/extractor/toutv.py | 2 +- yt_dlp/extractor/toypics.py | 8 +- yt_dlp/extractor/traileraddict.py | 4 +- yt_dlp/extractor/trovo.py | 4 +- yt_dlp/extractor/trtcocuk.py | 6 +- yt_dlp/extractor/trtworld.py | 10 +- yt_dlp/extractor/trueid.py | 6 +- yt_dlp/extractor/trutv.py | 2 +- yt_dlp/extractor/tube8.py | 10 +- yt_dlp/extractor/tubetugraz.py | 69 +- yt_dlp/extractor/tubitv.py | 8 +- yt_dlp/extractor/tumblr.py | 14 +- yt_dlp/extractor/tunein.py | 4 +- yt_dlp/extractor/turner.py | 9 +- yt_dlp/extractor/tv2.py | 18 +- yt_dlp/extractor/tv24ua.py | 6 +- yt_dlp/extractor/tv2dk.py | 2 +- yt_dlp/extractor/tv2hu.py | 19 +- yt_dlp/extractor/tv4.py | 2 +- yt_dlp/extractor/tv5unis.py | 12 +- yt_dlp/extractor/tvanouvelles.py | 4 +- yt_dlp/extractor/tvc.py | 2 +- yt_dlp/extractor/tver.py | 2 +- yt_dlp/extractor/tvigle.py | 8 +- yt_dlp/extractor/tviplayer.py | 8 +- yt_dlp/extractor/tvn24.py | 4 +- yt_dlp/extractor/tvnoe.py | 4 +- yt_dlp/extractor/tvp.py | 13 +- yt_dlp/extractor/tvplay.py | 14 +- yt_dlp/extractor/tvplayer.py | 7 +- yt_dlp/extractor/tweakers.py | 4 +- yt_dlp/extractor/twentymin.py | 6 +- yt_dlp/extractor/twentythreevideo.py | 4 +- yt_dlp/extractor/twitcasting.py | 14 +- yt_dlp/extractor/twitch.py | 114 ++- yt_dlp/extractor/twitter.py | 78 +- yt_dlp/extractor/txxx.py | 38 +- yt_dlp/extractor/udemy.py | 44 +- yt_dlp/extractor/udn.py | 8 +- yt_dlp/extractor/uktvplay.py | 2 +- yt_dlp/extractor/umg.py | 4 +- yt_dlp/extractor/unistra.py | 8 +- yt_dlp/extractor/unity.py | 2 +- yt_dlp/extractor/uol.py | 16 +- yt_dlp/extractor/urort.py | 12 +- yt_dlp/extractor/urplay.py | 8 +- yt_dlp/extractor/usatoday.py | 7 +- yt_dlp/extractor/ustream.py | 33 +- yt_dlp/extractor/ustudio.py | 12 +- yt_dlp/extractor/utreon.py | 10 +- yt_dlp/extractor/veo.py | 6 +- yt_dlp/extractor/veoh.py | 16 +- yt_dlp/extractor/vesti.py | 4 +- yt_dlp/extractor/vevo.py | 43 +- yt_dlp/extractor/vgtv.py | 19 +- yt_dlp/extractor/vh1.py | 2 +- yt_dlp/extractor/vice.py | 16 +- yt_dlp/extractor/viddler.py | 6 +- yt_dlp/extractor/videa.py | 7 +- yt_dlp/extractor/videocampus_sachsen.py | 34 +- yt_dlp/extractor/videofyme.py | 2 +- yt_dlp/extractor/videoken.py | 2 +- yt_dlp/extractor/videomore.py | 13 +- yt_dlp/extractor/videopress.py | 6 +- yt_dlp/extractor/vidio.py | 22 +- yt_dlp/extractor/vidlii.py | 4 +- yt_dlp/extractor/vidly.py | 2 +- yt_dlp/extractor/viewlift.py | 28 +- yt_dlp/extractor/viidea.py | 23 +- yt_dlp/extractor/viki.py | 28 +- yt_dlp/extractor/vimeo.py | 52 +- yt_dlp/extractor/vine.py | 15 +- yt_dlp/extractor/viously.py | 2 +- yt_dlp/extractor/viqeo.py | 2 +- yt_dlp/extractor/viu.py | 39 +- yt_dlp/extractor/vk.py | 10 +- yt_dlp/extractor/vodplatform.py | 2 +- yt_dlp/extractor/voicy.py | 19 +- yt_dlp/extractor/volejtv.py | 4 +- yt_dlp/extractor/voxmedia.py | 9 +- yt_dlp/extractor/vrt.py | 22 +- yt_dlp/extractor/vtm.py | 4 +- yt_dlp/extractor/vuclip.py | 10 +- yt_dlp/extractor/vvvvid.py | 22 +- yt_dlp/extractor/walla.py | 4 +- yt_dlp/extractor/washingtonpost.py | 6 +- yt_dlp/extractor/wat.py | 5 +- yt_dlp/extractor/wdr.py | 29 +- yt_dlp/extractor/webcamerapl.py | 4 +- yt_dlp/extractor/webcaster.py | 2 +- yt_dlp/extractor/webofstories.py | 18 +- yt_dlp/extractor/weibo.py | 10 +- yt_dlp/extractor/wevidi.py | 12 +- yt_dlp/extractor/whowatch.py | 21 +- yt_dlp/extractor/wikimedia.py | 4 +- yt_dlp/extractor/wimtv.py | 26 +- yt_dlp/extractor/wistia.py | 24 +- yt_dlp/extractor/wordpress.py | 14 +- yt_dlp/extractor/worldstarhiphop.py | 4 +- yt_dlp/extractor/wppilot.py | 4 +- yt_dlp/extractor/wsj.py | 6 +- yt_dlp/extractor/wwe.py | 7 +- yt_dlp/extractor/wykop.py | 2 +- yt_dlp/extractor/xanimu.py | 19 +- yt_dlp/extractor/xboxclips.py | 4 +- yt_dlp/extractor/xhamster.py | 27 +- yt_dlp/extractor/xiaohongshu.py | 4 +- yt_dlp/extractor/ximalaya.py | 30 +- yt_dlp/extractor/xinpianchang.py | 4 +- yt_dlp/extractor/xminus.py | 2 +- yt_dlp/extractor/xnxx.py | 2 +- yt_dlp/extractor/xstream.py | 5 +- yt_dlp/extractor/xvideos.py | 48 +- yt_dlp/extractor/xxxymovies.py | 2 +- yt_dlp/extractor/yahoo.py | 16 +- yt_dlp/extractor/yandexdisk.py | 4 +- yt_dlp/extractor/yandexmusic.py | 73 +- yt_dlp/extractor/yandexvideo.py | 12 +- yt_dlp/extractor/yapfiles.py | 6 +- yt_dlp/extractor/yappy.py | 12 +- yt_dlp/extractor/yle_areena.py | 10 +- yt_dlp/extractor/youjizz.py | 2 +- yt_dlp/extractor/youku.py | 4 +- yt_dlp/extractor/younow.py | 45 +- yt_dlp/extractor/youporn.py | 6 +- yt_dlp/extractor/youtube.py | 337 ++++--- yt_dlp/extractor/zaiko.py | 2 +- yt_dlp/extractor/zapiks.py | 4 +- yt_dlp/extractor/zattoo.py | 49 +- yt_dlp/extractor/zdf.py | 31 +- yt_dlp/extractor/zee5.py | 33 +- yt_dlp/extractor/zeenews.py | 6 +- yt_dlp/extractor/zenporn.py | 8 +- yt_dlp/extractor/zetland.py | 4 +- yt_dlp/extractor/zhihu.py | 2 +- yt_dlp/extractor/zingmp3.py | 12 +- yt_dlp/extractor/zoom.py | 10 +- yt_dlp/extractor/zype.py | 6 +- yt_dlp/jsinterp.py | 20 +- yt_dlp/networking/__init__.py | 2 +- yt_dlp/networking/_curlcffi.py | 2 +- yt_dlp/networking/_helper.py | 4 +- yt_dlp/networking/_requests.py | 20 +- yt_dlp/networking/_urllib.py | 8 +- yt_dlp/networking/_websockets.py | 6 +- yt_dlp/networking/common.py | 28 +- yt_dlp/networking/exceptions.py | 2 +- yt_dlp/networking/impersonate.py | 6 +- yt_dlp/options.py | 42 +- yt_dlp/postprocessor/__init__.py | 2 +- yt_dlp/postprocessor/common.py | 6 +- yt_dlp/postprocessor/embedthumbnail.py | 19 +- yt_dlp/postprocessor/exec.py | 5 +- yt_dlp/postprocessor/ffmpeg.py | 57 +- yt_dlp/postprocessor/modify_chapters.py | 2 +- .../postprocessor/movefilesafterdownload.py | 7 +- yt_dlp/postprocessor/sponskrub.py | 4 +- yt_dlp/postprocessor/sponsorblock.py | 10 +- yt_dlp/socks.py | 8 +- yt_dlp/update.py | 10 +- yt_dlp/utils/_legacy.py | 10 +- yt_dlp/utils/_utils.py | 199 ++-- yt_dlp/utils/networking.py | 4 +- yt_dlp/webvtt.py | 17 +- 915 files changed, 7027 insertions(+), 7246 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 837b600e3..aeba3c44d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -266,7 +266,7 @@ After you have ensured this site is distributing its content legally, you can fo $ hatch fmt --check ``` - You can use `hatch fmt` to automatically fix problems. + You can use `hatch fmt` to automatically fix problems. Rules that the linter/formatter enforces should not be disabled with `# noqa` unless a maintainer requests it. The only exception allowed is for old/printf-style string formatting in GraphQL query templates (use `# noqa: UP031`). 1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython and PyPy for Python 3.8 and above. Backward compatibility is not required for even older versions of Python. 1. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files, [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: diff --git a/bundle/py2exe.py b/bundle/py2exe.py index 5fbe55e46..5b7f4883b 100755 --- a/bundle/py2exe.py +++ b/bundle/py2exe.py @@ -44,7 +44,7 @@ def main(): 'Cryptodome', # requests >=2.32.0 breaks py2exe builds due to certifi dependency 'requests', - 'urllib3' + 'urllib3', ], 'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'], # Modules that are only imported dynamically must be added here diff --git a/bundle/pyinstaller.py b/bundle/pyinstaller.py index db9dbfde5..4184c4bc9 100755 --- a/bundle/pyinstaller.py +++ b/bundle/pyinstaller.py @@ -68,7 +68,7 @@ def exe(onedir): 'dist/', onedir and f'{name}/', name, - OS_NAME == 'win32' and '.exe' + OS_NAME == 'win32' and '.exe', ))) @@ -113,7 +113,7 @@ def windows_set_version(exe, version): ), kids=[ StringFileInfo([StringTable('040904B0', [ - StringStruct('Comments', 'yt-dlp%s Command Line Interface' % suffix), + StringStruct('Comments', f'yt-dlp{suffix} Command Line Interface'), StringStruct('CompanyName', 'https://github.com/yt-dlp'), StringStruct('FileDescription', 'yt-dlp%s' % (MACHINE and f' ({MACHINE})')), StringStruct('FileVersion', version), @@ -123,8 +123,8 @@ def windows_set_version(exe, version): StringStruct('ProductName', f'yt-dlp{suffix}'), StringStruct( 'ProductVersion', f'{version}{suffix} on Python {platform.python_version()}'), - ])]), VarFileInfo([VarStruct('Translation', [0, 1200])]) - ] + ])]), VarFileInfo([VarStruct('Translation', [0, 1200])]), + ], )) diff --git a/devscripts/bash-completion.py b/devscripts/bash-completion.py index 9b4a9d4e2..3918ebde8 100755 --- a/devscripts/bash-completion.py +++ b/devscripts/bash-completion.py @@ -9,8 +9,8 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import yt_dlp -BASH_COMPLETION_FILE = "completions/bash/yt-dlp" -BASH_COMPLETION_TEMPLATE = "devscripts/bash-completion.in" +BASH_COMPLETION_FILE = 'completions/bash/yt-dlp' +BASH_COMPLETION_TEMPLATE = 'devscripts/bash-completion.in' def build_completion(opt_parser): @@ -21,9 +21,9 @@ def build_completion(opt_parser): opts_flag.append(option.get_opt_string()) with open(BASH_COMPLETION_TEMPLATE) as f: template = f.read() - with open(BASH_COMPLETION_FILE, "w") as f: + with open(BASH_COMPLETION_FILE, 'w') as f: # just using the special char - filled_template = template.replace("{{flags}}", " ".join(opts_flag)) + filled_template = template.replace('{{flags}}', ' '.join(opts_flag)) f.write(filled_template) diff --git a/devscripts/make_changelog.py b/devscripts/make_changelog.py index 8e199e7d0..00634fb91 100644 --- a/devscripts/make_changelog.py +++ b/devscripts/make_changelog.py @@ -223,10 +223,10 @@ class Changelog: return message if not sep else f'{message}{sep}{rest}' - def _format_message_link(self, message, hash): - assert message or hash, 'Improperly defined commit message or override' - message = message if message else hash[:HASH_LENGTH] - return f'[{message}]({self.repo_url}/commit/{hash})' if hash else message + def _format_message_link(self, message, commit_hash): + assert message or commit_hash, 'Improperly defined commit message or override' + message = message if message else commit_hash[:HASH_LENGTH] + return f'[{message}]({self.repo_url}/commit/{commit_hash})' if commit_hash else message def _format_issues(self, issues): return ', '.join(f'[#{issue}]({self.repo_url}/issues/{issue})' for issue in issues) @@ -356,7 +356,7 @@ class CommitRange: logger.info(f'CHANGE {self._commits[commit.hash]} -> {commit}') self._commits[commit.hash] = commit - self._commits = {key: value for key, value in reversed(self._commits.items())} + self._commits = dict(reversed(self._commits.items())) def groups(self): group_dict = defaultdict(list) diff --git a/devscripts/make_readme.py b/devscripts/make_readme.py index 2270b31d3..cbb5859aa 100755 --- a/devscripts/make_readme.py +++ b/devscripts/make_readme.py @@ -51,7 +51,7 @@ PATCHES = ( ), ( # Headings r'(?m)^ (\w.+\n)( (?=\w))?', - r'## \1' + r'## \1', ), ( # Fixup `--date` formatting rf'(?m)( --date DATE.+({delim}[^\[]+)*)\[.+({delim}.+)*$', @@ -61,26 +61,26 @@ PATCHES = ( ), ( # Do not split URLs rf'({delim[:-1]})? (?P<label>\[\S+\] )?(?P<url>https?({delim})?:({delim})?/({delim})?/(({delim})?\S+)+)\s', - lambda mobj: ''.join((delim, mobj.group('label') or '', re.sub(r'\s+', '', mobj.group('url')), '\n')) + lambda mobj: ''.join((delim, mobj.group('label') or '', re.sub(r'\s+', '', mobj.group('url')), '\n')), ), ( # Do not split "words" rf'(?m)({delim}\S+)+$', - lambda mobj: ''.join((delim, mobj.group(0).replace(delim, ''))) + lambda mobj: ''.join((delim, mobj.group(0).replace(delim, ''))), ), ( # Allow overshooting last line rf'(?m)^(?P<prev>.+)${delim}(?P<current>.+)$(?!{delim})', lambda mobj: (mobj.group().replace(delim, ' ') if len(mobj.group()) - len(delim) + 1 <= max_width + ALLOWED_OVERSHOOT - else mobj.group()) + else mobj.group()), ), ( # Avoid newline when a space is available b/w switch and description DISABLE_PATCH, # This creates issues with prepare_manpage r'(?m)^(\s{4}-.{%d})(%s)' % (switch_col_width - 6, delim), - r'\1 ' + r'\1 ', ), ( # Replace brackets with a Markdown link r'SponsorBlock API \((http.+)\)', - r'[SponsorBlock API](\1)' + r'[SponsorBlock API](\1)', ), ) diff --git a/devscripts/set-variant.py b/devscripts/set-variant.py index 10341e744..24ce4552d 100644 --- a/devscripts/set-variant.py +++ b/devscripts/set-variant.py @@ -30,7 +30,7 @@ def property_setter(name, value): opts = parse_options() transform = compose_functions( property_setter('VARIANT', opts.variant), - property_setter('UPDATE_HINT', opts.update_message) + property_setter('UPDATE_HINT', opts.update_message), ) write_file(VERSION_FILE, transform(read_file(VERSION_FILE))) diff --git a/devscripts/update-version.py b/devscripts/update-version.py index 07a071745..2018ba844 100644 --- a/devscripts/update-version.py +++ b/devscripts/update-version.py @@ -24,7 +24,7 @@ def get_new_version(version, revision): else: old_version = read_version().split('.') if version.split('.') == old_version[:3]: - revision = str(int((old_version + [0])[3]) + 1) + revision = str(int(([*old_version, 0])[3]) + 1) return f'{version}.{revision}' if revision else version diff --git a/devscripts/zsh-completion.py b/devscripts/zsh-completion.py index 267af5f6e..8e190c00c 100755 --- a/devscripts/zsh-completion.py +++ b/devscripts/zsh-completion.py @@ -9,15 +9,15 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import yt_dlp -ZSH_COMPLETION_FILE = "completions/zsh/_yt-dlp" -ZSH_COMPLETION_TEMPLATE = "devscripts/zsh-completion.in" +ZSH_COMPLETION_FILE = 'completions/zsh/_yt-dlp' +ZSH_COMPLETION_TEMPLATE = 'devscripts/zsh-completion.in' def build_completion(opt_parser): opts = [opt for group in opt_parser.option_groups for opt in group.option_list] - opts_file = [opt for opt in opts if opt.metavar == "FILE"] - opts_dir = [opt for opt in opts if opt.metavar == "DIR"] + opts_file = [opt for opt in opts if opt.metavar == 'FILE'] + opts_dir = [opt for opt in opts if opt.metavar == 'DIR'] fileopts = [] for opt in opts_file: @@ -38,11 +38,11 @@ def build_completion(opt_parser): with open(ZSH_COMPLETION_TEMPLATE) as f: template = f.read() - template = template.replace("{{fileopts}}", "|".join(fileopts)) - template = template.replace("{{diropts}}", "|".join(diropts)) - template = template.replace("{{flags}}", " ".join(flags)) + template = template.replace('{{fileopts}}', '|'.join(fileopts)) + template = template.replace('{{diropts}}', '|'.join(diropts)) + template = template.replace('{{flags}}', ' '.join(flags)) - with open(ZSH_COMPLETION_FILE, "w") as f: + with open(ZSH_COMPLETION_FILE, 'w') as f: f.write(template) diff --git a/pyproject.toml b/pyproject.toml index da6403ec7..01162b794 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -183,21 +183,84 @@ line-length = 120 [tool.ruff.lint] ignore = [ - "E402", # module level import not at top of file - "E501", # line too long - "E731", # do not assign a lambda expression, use a def - "E741", # ambiguous variable name + "E402", # module-import-not-at-top-of-file + "E501", # line-too-long + "E731", # lambda-assignment + "E741", # ambiguous-variable-name + "UP036", # outdated-version-block + "B006", # mutable-argument-default + "B008", # function-call-in-default-argument + "B011", # assert-false + "B017", # assert-raises-exception + "B023", # function-uses-loop-variable (false positives) + "B028", # no-explicit-stacklevel + "B904", # raise-without-from-inside-except + "C401", # unnecessary-generator-set + "C402", # unnecessary-generator-dict + "PIE790", # unnecessary-placeholder + "SIM102", # collapsible-if + "SIM108", # if-else-block-instead-of-if-exp + "SIM112", # uncapitalized-environment-variables + "SIM113", # enumerate-for-loop + "SIM114", # if-with-same-arms + "SIM115", # open-file-with-context-handler + "SIM117", # multiple-with-statements + "SIM223", # expr-and-false + "SIM300", # yoda-conditions + "TD001", # invalid-todo-tag + "TD002", # missing-todo-author + "TD003", # missing-todo-link + "PLE0604", # invalid-all-object (false positives) + "PLW0603", # global-statement + "PLW1510", # subprocess-run-without-check + "PLW2901", # redefined-loop-name + "RUF001", # ambiguous-unicode-character-string + "RUF012", # mutable-class-default + "RUF100", # unused-noqa (flake8 has slightly different behavior) ] select = [ - "E", # pycodestyle errors - "W", # pycodestyle warnings - "F", # pyflakes - "I", # import order + "E", # pycodestyle Error + "W", # pycodestyle Warning + "F", # Pyflakes + "I", # isort + "Q", # flake8-quotes + "N803", # invalid-argument-name + "N804", # invalid-first-argument-name-for-class-method + "UP", # pyupgrade + "B", # flake8-bugbear + "A", # flake8-builtins + "COM", # flake8-commas + "C4", # flake8-comprehensions + "FA", # flake8-future-annotations + "ISC", # flake8-implicit-str-concat + "ICN003", # banned-import-from + "PIE", # flake8-pie + "T20", # flake8-print + "RSE", # flake8-raise + "RET504", # unnecessary-assign + "SIM", # flake8-simplify + "TID251", # banned-api + "TD", # flake8-todos + "PLC", # Pylint Convention + "PLE", # Pylint Error + "PLW", # Pylint Warning + "RUF", # Ruff-specific rules ] [tool.ruff.lint.per-file-ignores] -"devscripts/lazy_load_template.py" = ["F401"] -"!yt_dlp/extractor/**.py" = ["I"] +"devscripts/lazy_load_template.py" = [ + "F401", # unused-import +] +"!yt_dlp/extractor/**.py" = [ + "I", # isort + "ICN003", # banned-import-from + "T20", # flake8-print + "A002", # builtin-argument-shadowing + "C408", # unnecessary-collection-call +] +"yt_dlp/jsinterp.py" = [ + "UP031", # printf-string-formatting +] [tool.ruff.lint.isort] known-first-party = [ @@ -207,6 +270,50 @@ known-first-party = [ ] relative-imports-order = "closest-to-furthest" +[tool.ruff.lint.flake8-quotes] +docstring-quotes = "double" +multiline-quotes = "single" +inline-quotes = "single" +avoid-escape = false + +[tool.ruff.lint.pep8-naming] +classmethod-decorators = [ + "yt_dlp.utils.classproperty", +] + +[tool.ruff.lint.flake8-import-conventions] +banned-from = [ + "base64", + "datetime", + "functools", + "glob", + "hashlib", + "itertools", + "json", + "math", + "os", + "pathlib", + "random", + "re", + "string", + "sys", + "time", + "urllib", + "uuid", + "xml", +] + +[tool.ruff.lint.flake8-tidy-imports.banned-api] +"yt_dlp.compat.compat_str".msg = "Use `str` instead." +"yt_dlp.compat.compat_b64decode".msg = "Use `base64.b64decode` instead." +"yt_dlp.compat.compat_urlparse".msg = "Use `urllib.parse` instead." +"yt_dlp.compat.compat_parse_qs".msg = "Use `urllib.parse.parse_qs` instead." +"yt_dlp.compat.compat_urllib_parse_unquote".msg = "Use `urllib.parse.unquote` instead." +"yt_dlp.compat.compat_urllib_parse_urlencode".msg = "Use `urllib.parse.urlencode` instead." +"yt_dlp.compat.compat_urllib_parse_urlparse".msg = "Use `urllib.parse.urlparse` instead." +"yt_dlp.compat.compat_shlex_quote".msg = "Use `yt_dlp.utils.shell_quote` instead." +"yt_dlp.utils.error_to_compat_str".msg = "Use `str` instead." + [tool.autopep8] max_line_length = 120 recursive = true diff --git a/test/conftest.py b/test/conftest.py index decd2c85c..a8b92f811 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -22,8 +22,8 @@ def handler(request): class HandlerWrapper(handler): RH_KEY = handler.RH_KEY - def __init__(self, *args, **kwargs): - super().__init__(logger=FakeLogger, *args, **kwargs) + def __init__(self, **kwargs): + super().__init__(logger=FakeLogger, **kwargs) return HandlerWrapper @@ -54,11 +54,11 @@ def skip_handlers_if(request, handler): def pytest_configure(config): config.addinivalue_line( - "markers", "skip_handler(handler): skip test for the given handler", + 'markers', 'skip_handler(handler): skip test for the given handler', ) config.addinivalue_line( - "markers", "skip_handler_if(handler): skip test for the given handler if condition is true" + 'markers', 'skip_handler_if(handler): skip test for the given handler if condition is true', ) config.addinivalue_line( - "markers", "skip_handlers_if(handler): skip test for handlers when the condition is true" + 'markers', 'skip_handlers_if(handler): skip test for handlers when the condition is true', ) diff --git a/test/helper.py b/test/helper.py index e7473120d..3b550d192 100644 --- a/test/helper.py +++ b/test/helper.py @@ -16,8 +16,8 @@ if 'pytest' in sys.modules: import pytest is_download_test = pytest.mark.download else: - def is_download_test(testClass): - return testClass + def is_download_test(test_class): + return test_class def get_params(override=None): @@ -45,10 +45,10 @@ def try_rm(filename): def report_warning(message, *args, **kwargs): - ''' + """ Print the message to stderr, it will be prefixed with 'WARNING:' If stderr is a tty file the 'WARNING:' will be colored - ''' + """ if sys.stderr.isatty() and compat_os_name != 'nt': _msg_header = '\033[0;33mWARNING:\033[0m' else: @@ -138,15 +138,14 @@ def expect_value(self, got, expected, field): elif isinstance(expected, list) and isinstance(got, list): self.assertEqual( len(expected), len(got), - 'Expect a list of length %d, but got a list of length %d for field %s' % ( - len(expected), len(got), field)) + f'Expect a list of length {len(expected)}, but got a list of length {len(got)} for field {field}') for index, (item_got, item_expected) in enumerate(zip(got, expected)): type_got = type(item_got) type_expected = type(item_expected) self.assertEqual( type_expected, type_got, - 'Type mismatch for list item at index %d for field %s, expected %r, got %r' % ( - index, field, type_expected, type_got)) + f'Type mismatch for list item at index {index} for field {field}, ' + f'expected {type_expected!r}, got {type_got!r}') expect_value(self, item_got, item_expected, field) else: if isinstance(expected, str) and expected.startswith('md5:'): @@ -224,7 +223,7 @@ def sanitize_got_info_dict(got_dict): test_info_dict.pop('display_id') # Remove deprecated fields - for old in YoutubeDL._deprecated_multivalue_fields.keys(): + for old in YoutubeDL._deprecated_multivalue_fields: test_info_dict.pop(old, None) # release_year may be generated from release_date @@ -246,11 +245,11 @@ def expect_info_dict(self, got_dict, expected_dict): if expected_dict.get('ext'): mandatory_fields.extend(('url', 'ext')) for key in mandatory_fields: - self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key) + self.assertTrue(got_dict.get(key), f'Missing mandatory field {key}') # Check for mandatory fields that are automatically set by YoutubeDL if got_dict.get('_type', 'video') == 'video': for key in ['webpage_url', 'extractor', 'extractor_key']: - self.assertTrue(got_dict.get(key), 'Missing field: %s' % key) + self.assertTrue(got_dict.get(key), f'Missing field: {key}') test_info_dict = sanitize_got_info_dict(got_dict) @@ -258,7 +257,7 @@ def expect_info_dict(self, got_dict, expected_dict): if missing_keys: def _repr(v): if isinstance(v, str): - return "'%s'" % v.replace('\\', '\\\\').replace("'", "\\'").replace('\n', '\\n') + return "'{}'".format(v.replace('\\', '\\\\').replace("'", "\\'").replace('\n', '\\n')) elif isinstance(v, type): return v.__name__ else: @@ -275,8 +274,7 @@ def expect_info_dict(self, got_dict, expected_dict): write_string(info_dict_str.replace('\n', '\n '), out=sys.stderr) self.assertFalse( missing_keys, - 'Missing keys in test definition: %s' % ( - ', '.join(sorted(missing_keys)))) + 'Missing keys in test definition: {}'.format(', '.join(sorted(missing_keys)))) def assertRegexpMatches(self, text, regexp, msg=None): @@ -285,9 +283,9 @@ def assertRegexpMatches(self, text, regexp, msg=None): else: m = re.match(regexp, text) if not m: - note = 'Regexp didn\'t match: %r not found' % (regexp) + note = f'Regexp didn\'t match: {regexp!r} not found' if len(text) < 1000: - note += ' in %r' % text + note += f' in {text!r}' if msg is None: msg = note else: @@ -310,7 +308,7 @@ def assertLessEqual(self, got, expected, msg=None): def assertEqual(self, got, expected, msg=None): - if not (got == expected): + if got != expected: if msg is None: msg = f'{got!r} not equal to {expected!r}' self.assertTrue(got == expected, msg) diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 744587e45..31e8f8244 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -262,19 +262,19 @@ class TestInfoExtractor(unittest.TestCase): ''', { 'chapters': [ - {"title": "Explosie Turnhout", "start_time": 70, "end_time": 440}, - {"title": "Jaarwisseling", "start_time": 440, "end_time": 1179}, - {"title": "Natuurbranden Colorado", "start_time": 1179, "end_time": 1263}, - {"title": "Klimaatverandering", "start_time": 1263, "end_time": 1367}, - {"title": "Zacht weer", "start_time": 1367, "end_time": 1383}, - {"title": "Financiële balans", "start_time": 1383, "end_time": 1484}, - {"title": "Club Brugge", "start_time": 1484, "end_time": 1575}, - {"title": "Mentale gezondheid bij topsporters", "start_time": 1575, "end_time": 1728}, - {"title": "Olympische Winterspelen", "start_time": 1728, "end_time": 1873}, - {"title": "Sober oudjaar in Nederland", "start_time": 1873, "end_time": 2079.23} + {'title': 'Explosie Turnhout', 'start_time': 70, 'end_time': 440}, + {'title': 'Jaarwisseling', 'start_time': 440, 'end_time': 1179}, + {'title': 'Natuurbranden Colorado', 'start_time': 1179, 'end_time': 1263}, + {'title': 'Klimaatverandering', 'start_time': 1263, 'end_time': 1367}, + {'title': 'Zacht weer', 'start_time': 1367, 'end_time': 1383}, + {'title': 'Financiële balans', 'start_time': 1383, 'end_time': 1484}, + {'title': 'Club Brugge', 'start_time': 1484, 'end_time': 1575}, + {'title': 'Mentale gezondheid bij topsporters', 'start_time': 1575, 'end_time': 1728}, + {'title': 'Olympische Winterspelen', 'start_time': 1728, 'end_time': 1873}, + {'title': 'Sober oudjaar in Nederland', 'start_time': 1873, 'end_time': 2079.23}, ], - 'title': 'Het journaal - Aflevering 365 (Seizoen 2021)' - }, {} + 'title': 'Het journaal - Aflevering 365 (Seizoen 2021)', + }, {}, ), ( # test multiple thumbnails in a list @@ -301,13 +301,13 @@ class TestInfoExtractor(unittest.TestCase): 'thumbnails': [{'url': 'https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg'}], }, {}, - ) + ), ] for html, expected_dict, search_json_ld_kwargs in _TESTS: expect_dict( self, self.ie._search_json_ld(html, None, **search_json_ld_kwargs), - expected_dict + expected_dict, ) def test_download_json(self): @@ -366,7 +366,7 @@ class TestInfoExtractor(unittest.TestCase): 'height': 740, 'tbr': 1500, }], - 'thumbnail': '//pics.r18.com/digital/amateur/mgmr105/mgmr105jp.jpg' + 'thumbnail': '//pics.r18.com/digital/amateur/mgmr105/mgmr105jp.jpg', }) # from https://www.csfd.cz/ @@ -419,9 +419,9 @@ class TestInfoExtractor(unittest.TestCase): 'height': 1080, }], 'subtitles': { - 'cs': [{'url': 'https://video.csfd.cz/files/subtitles/163/344/163344115_4c388b.srt'}] + 'cs': [{'url': 'https://video.csfd.cz/files/subtitles/163/344/163344115_4c388b.srt'}], }, - 'thumbnail': 'https://img.csfd.cz/files/images/film/video/preview/163/344/163344118_748d20.png?h360' + 'thumbnail': 'https://img.csfd.cz/files/images/film/video/preview/163/344/163344118_748d20.png?h360', }) # from https://tamasha.com/v/Kkdjw @@ -452,7 +452,7 @@ class TestInfoExtractor(unittest.TestCase): 'ext': 'mp4', 'format_id': '144p', 'height': 144, - }] + }], }) # from https://www.directvnow.com @@ -470,7 +470,7 @@ class TestInfoExtractor(unittest.TestCase): 'formats': [{ 'ext': 'mp4', 'url': 'https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4', - }] + }], }) # from https://www.directvnow.com @@ -488,7 +488,7 @@ class TestInfoExtractor(unittest.TestCase): 'formats': [{ 'url': 'https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4', 'ext': 'mp4', - }] + }], }) # from https://www.klarna.com/uk/ @@ -547,8 +547,8 @@ class TestInfoExtractor(unittest.TestCase): 'id': 'XEgvuql4', 'formats': [{ 'url': 'rtmp://192.138.214.154/live/sjclive', - 'ext': 'flv' - }] + 'ext': 'flv', + }], }) # from https://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary/ @@ -588,8 +588,8 @@ class TestInfoExtractor(unittest.TestCase): 'thumbnail': 'https://t03.vipstreamservice.com/thumbs/pxo-full/2009-12/14/a4b2157147afe5efa93ce1978e0265289c193874e02597.flv-full-13.jpg', 'formats': [{ 'url': 'https://cdn.pornoxo.com/key=MF+oEbaxqTKb50P-w9G3nA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/4b2157147afe5efa93ce1978e0265289c193874e02597.flv', - 'ext': 'flv' - }] + 'ext': 'flv', + }], }) # from http://www.indiedb.com/games/king-machine/videos @@ -610,12 +610,12 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'formats': [{ 'url': 'http://cdn.dbolical.com/cache/videos/games/1/50/49678/encode_mp4/king-machine-trailer.mp4', 'height': 360, - 'ext': 'mp4' + 'ext': 'mp4', }, { 'url': 'http://cdn.dbolical.com/cache/videos/games/1/50/49678/encode720p_mp4/king-machine-trailer.mp4', 'height': 720, - 'ext': 'mp4' - }] + 'ext': 'mp4', + }], }) def test_parse_m3u8_formats(self): @@ -866,7 +866,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'height': 1080, 'vcodec': 'avc1.64002a', }], - {} + {}, ), ( 'bipbop_16x9', @@ -990,45 +990,45 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'en': [{ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/eng/prog_index.m3u8', 'ext': 'vtt', - 'protocol': 'm3u8_native' + 'protocol': 'm3u8_native', }, { 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/eng_forced/prog_index.m3u8', 'ext': 'vtt', - 'protocol': 'm3u8_native' + 'protocol': 'm3u8_native', }], 'fr': [{ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/fra/prog_index.m3u8', 'ext': 'vtt', - 'protocol': 'm3u8_native' + 'protocol': 'm3u8_native', }, { 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/fra_forced/prog_index.m3u8', 'ext': 'vtt', - 'protocol': 'm3u8_native' + 'protocol': 'm3u8_native', }], 'es': [{ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/spa/prog_index.m3u8', 'ext': 'vtt', - 'protocol': 'm3u8_native' + 'protocol': 'm3u8_native', }, { 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/spa_forced/prog_index.m3u8', 'ext': 'vtt', - 'protocol': 'm3u8_native' + 'protocol': 'm3u8_native', }], 'ja': [{ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/jpn/prog_index.m3u8', 'ext': 'vtt', - 'protocol': 'm3u8_native' + 'protocol': 'm3u8_native', }, { 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/jpn_forced/prog_index.m3u8', 'ext': 'vtt', - 'protocol': 'm3u8_native' + 'protocol': 'm3u8_native', }], - } + }, ), ] for m3u8_file, m3u8_url, expected_formats, expected_subs in _TEST_CASES: - with open('./test/testdata/m3u8/%s.m3u8' % m3u8_file, encoding='utf-8') as f: + with open(f'./test/testdata/m3u8/{m3u8_file}.m3u8', encoding='utf-8') as f: formats, subs = self.ie._parse_m3u8_formats_and_subtitles( f.read(), m3u8_url, ext='mp4') self.ie._sort_formats(formats) @@ -1366,14 +1366,14 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', 'protocol': 'http_dash_segments', - } - ] + }, + ], }, - ) + ), ] for mpd_file, mpd_url, mpd_base_url, expected_formats, expected_subtitles in _TEST_CASES: - with open('./test/testdata/mpd/%s.mpd' % mpd_file, encoding='utf-8') as f: + with open(f'./test/testdata/mpd/{mpd_file}.mpd', encoding='utf-8') as f: formats, subtitles = self.ie._parse_mpd_formats_and_subtitles( compat_etree_fromstring(f.read().encode()), mpd_base_url=mpd_base_url, mpd_url=mpd_url) @@ -1408,7 +1408,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'sampling_rate': 48000, 'channels': 2, 'bits_per_sample': 16, - 'nal_unit_length_field': 4 + 'nal_unit_length_field': 4, }, }, { 'format_id': 'video-100', @@ -1431,7 +1431,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'codec_private_data': '00000001674D401FDA0544EFFC2D002CBC40000003004000000C03C60CA80000000168EF32C8', 'channels': 2, 'bits_per_sample': 16, - 'nal_unit_length_field': 4 + 'nal_unit_length_field': 4, }, }, { 'format_id': 'video-326', @@ -1454,7 +1454,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'codec_private_data': '00000001674D401FDA0241FE23FFC3BC83BA44000003000400000300C03C60CA800000000168EF32C8', 'channels': 2, 'bits_per_sample': 16, - 'nal_unit_length_field': 4 + 'nal_unit_length_field': 4, }, }, { 'format_id': 'video-698', @@ -1477,7 +1477,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'codec_private_data': '00000001674D401FDA0350BFB97FF06AF06AD1000003000100000300300F1832A00000000168EF32C8', 'channels': 2, 'bits_per_sample': 16, - 'nal_unit_length_field': 4 + 'nal_unit_length_field': 4, }, }, { 'format_id': 'video-1493', @@ -1500,7 +1500,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'codec_private_data': '00000001674D401FDA011C3DE6FFF0D890D871000003000100000300300F1832A00000000168EF32C8', 'channels': 2, 'bits_per_sample': 16, - 'nal_unit_length_field': 4 + 'nal_unit_length_field': 4, }, }, { 'format_id': 'video-4482', @@ -1523,7 +1523,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'codec_private_data': '00000001674D401FDA01A816F97FFC1ABC1AB440000003004000000C03C60CA80000000168EF32C8', 'channels': 2, 'bits_per_sample': 16, - 'nal_unit_length_field': 4 + 'nal_unit_length_field': 4, }, }], { @@ -1538,10 +1538,10 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'duration': 8880746666, 'timescale': 10000000, 'fourcc': 'TTML', - 'codec_private_data': '' - } - } - ] + 'codec_private_data': '', + }, + }, + ], }, ), ( @@ -1571,7 +1571,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'sampling_rate': 48000, 'channels': 2, 'bits_per_sample': 16, - 'nal_unit_length_field': 4 + 'nal_unit_length_field': 4, }, }, { 'format_id': 'audio_deu_1-224', @@ -1597,7 +1597,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'sampling_rate': 48000, 'channels': 6, 'bits_per_sample': 16, - 'nal_unit_length_field': 4 + 'nal_unit_length_field': 4, }, }, { 'format_id': 'video_deu-23', @@ -1622,7 +1622,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'codec_private_data': '000000016742C00CDB06077E5C05A808080A00000300020000030009C0C02EE0177CC6300F142AE00000000168CA8DC8', 'channels': 2, 'bits_per_sample': 16, - 'nal_unit_length_field': 4 + 'nal_unit_length_field': 4, }, }, { 'format_id': 'video_deu-403', @@ -1647,7 +1647,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'codec_private_data': '00000001674D4014E98323B602D4040405000003000100000300320F1429380000000168EAECF2', 'channels': 2, 'bits_per_sample': 16, - 'nal_unit_length_field': 4 + 'nal_unit_length_field': 4, }, }, { 'format_id': 'video_deu-680', @@ -1672,7 +1672,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'codec_private_data': '00000001674D401EE981405FF2E02D4040405000000300100000030320F162D3800000000168EAECF2', 'channels': 2, 'bits_per_sample': 16, - 'nal_unit_length_field': 4 + 'nal_unit_length_field': 4, }, }, { 'format_id': 'video_deu-1253', @@ -1698,7 +1698,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'codec_private_data': '00000001674D401EE981405FF2E02D4040405000000300100000030320F162D3800000000168EAECF2', 'channels': 2, 'bits_per_sample': 16, - 'nal_unit_length_field': 4 + 'nal_unit_length_field': 4, }, }, { 'format_id': 'video_deu-2121', @@ -1723,7 +1723,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'codec_private_data': '00000001674D401EECA0601BD80B50101014000003000400000300C83C58B6580000000168E93B3C80', 'channels': 2, 'bits_per_sample': 16, - 'nal_unit_length_field': 4 + 'nal_unit_length_field': 4, }, }, { 'format_id': 'video_deu-3275', @@ -1748,7 +1748,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'codec_private_data': '00000001674D4020ECA02802DD80B501010140000003004000000C83C60C65800000000168E93B3C80', 'channels': 2, 'bits_per_sample': 16, - 'nal_unit_length_field': 4 + 'nal_unit_length_field': 4, }, }, { 'format_id': 'video_deu-5300', @@ -1773,7 +1773,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'codec_private_data': '00000001674D4028ECA03C0113F2E02D4040405000000300100000030320F18319600000000168E93B3C80', 'channels': 2, 'bits_per_sample': 16, - 'nal_unit_length_field': 4 + 'nal_unit_length_field': 4, }, }, { 'format_id': 'video_deu-8079', @@ -1798,7 +1798,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'codec_private_data': '00000001674D4028ECA03C0113F2E02D4040405000000300100000030320F18319600000000168E93B3C80', 'channels': 2, 'bits_per_sample': 16, - 'nal_unit_length_field': 4 + 'nal_unit_length_field': 4, }, }], {}, @@ -1806,7 +1806,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ ] for ism_file, ism_url, expected_formats, expected_subtitles in _TEST_CASES: - with open('./test/testdata/ism/%s.Manifest' % ism_file, encoding='utf-8') as f: + with open(f'./test/testdata/ism/{ism_file}.Manifest', encoding='utf-8') as f: formats, subtitles = self.ie._parse_ism_formats_and_subtitles( compat_etree_fromstring(f.read().encode()), ism_url=ism_url) self.ie._sort_formats(formats) @@ -1827,12 +1827,12 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ 'tbr': 2148, 'width': 1280, 'height': 720, - }] + }], ), ] for f4m_file, f4m_url, expected_formats in _TEST_CASES: - with open('./test/testdata/f4m/%s.f4m' % f4m_file, encoding='utf-8') as f: + with open(f'./test/testdata/f4m/{f4m_file}.f4m', encoding='utf-8') as f: formats = self.ie._parse_f4m_formats( compat_etree_fromstring(f.read().encode()), f4m_url, None) @@ -1873,13 +1873,13 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ }, { 'manifest_url': 'https://example.org/src/foo_xspf.xspf', 'url': 'https://example.com/track3.mp3', - }] - }] + }], + }], ), ] for xspf_file, xspf_url, expected_entries in _TEST_CASES: - with open('./test/testdata/xspf/%s.xspf' % xspf_file, encoding='utf-8') as f: + with open(f'./test/testdata/xspf/{xspf_file}.xspf', encoding='utf-8') as f: entries = self.ie._parse_xspf( compat_etree_fromstring(f.read().encode()), xspf_file, xspf_url=xspf_url, xspf_base_url=xspf_url) @@ -1902,7 +1902,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ server_thread.start() (content, urlh) = self.ie._download_webpage_handle( - 'http://127.0.0.1:%d/teapot' % port, None, + f'http://127.0.0.1:{port}/teapot', None, expected_status=TEAPOT_RESPONSE_STATUS) self.assertEqual(content, TEAPOT_RESPONSE_BODY) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 5242cf88f..841ce1af3 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -8,6 +8,7 @@ import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +import contextlib import copy import json @@ -129,8 +130,8 @@ class TestFormatSelection(unittest.TestCase): 'allow_multiple_audio_streams': multi, }) ydl.process_ie_result(info_dict.copy()) - downloaded = map(lambda x: x['format_id'], ydl.downloaded_info_dicts) - self.assertEqual(list(downloaded), list(expected)) + downloaded = [x['format_id'] for x in ydl.downloaded_info_dicts] + self.assertEqual(downloaded, list(expected)) test('20/47', '47') test('20/71/worst', '35') @@ -515,10 +516,8 @@ class TestFormatSelection(unittest.TestCase): self.assertEqual(downloaded_ids, ['D', 'C', 'B']) ydl = YDL({'format': 'best[height<40]'}) - try: + with contextlib.suppress(ExtractorError): ydl.process_ie_result(info_dict) - except ExtractorError: - pass self.assertEqual(ydl.downloaded_info_dicts, []) def test_default_format_spec(self): @@ -652,8 +651,8 @@ class TestYoutubeDL(unittest.TestCase): 'formats': [ {'id': 'id 1', 'height': 1080, 'width': 1920}, {'id': 'id 2', 'height': 720}, - {'id': 'id 3'} - ] + {'id': 'id 3'}, + ], } def test_prepare_outtmpl_and_filename(self): @@ -773,7 +772,7 @@ class TestYoutubeDL(unittest.TestCase): test('%(formats)j', (json.dumps(FORMATS), None)) test('%(formats)#j', ( json.dumps(FORMATS, indent=4), - json.dumps(FORMATS, indent=4).replace(':', ':').replace('"', """).replace('\n', ' ') + json.dumps(FORMATS, indent=4).replace(':', ':').replace('"', '"').replace('\n', ' '), )) test('%(title5).3B', 'á') test('%(title5)U', 'áéí 𝐀') @@ -843,8 +842,8 @@ class TestYoutubeDL(unittest.TestCase): # Empty filename test('%(foo|)s-%(bar|)s.%(ext)s', '-.mp4') - # test('%(foo|)s.%(ext)s', ('.mp4', '_.mp4')) # fixme - # test('%(foo|)s', ('', '_')) # fixme + # test('%(foo|)s.%(ext)s', ('.mp4', '_.mp4')) # FIXME: ? + # test('%(foo|)s', ('', '_')) # FIXME: ? # Environment variable expansion for prepare_filename os.environ['__yt_dlp_var'] = 'expanded' @@ -861,7 +860,7 @@ class TestYoutubeDL(unittest.TestCase): test('Hello %(title1)s', 'Hello $PATH') test('Hello %(title2)s', 'Hello %PATH%') test('%(title3)s', ('foo/bar\\test', 'foo⧸bar⧹test')) - test('folder/%(title3)s', ('folder/foo/bar\\test', 'folder%sfoo⧸bar⧹test' % os.path.sep)) + test('folder/%(title3)s', ('folder/foo/bar\\test', f'folder{os.path.sep}foo⧸bar⧹test')) def test_format_note(self): ydl = YoutubeDL() @@ -883,22 +882,22 @@ class TestYoutubeDL(unittest.TestCase): f.write('EXAMPLE') return [info['filepath']], info - def run_pp(params, PP): + def run_pp(params, pp): with open(filename, 'w') as f: f.write('EXAMPLE') ydl = YoutubeDL(params) - ydl.add_post_processor(PP()) + ydl.add_post_processor(pp()) ydl.post_process(filename, {'filepath': filename}) run_pp({'keepvideo': True}, SimplePP) - self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename) - self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile) + self.assertTrue(os.path.exists(filename), f'{filename} doesn\'t exist') + self.assertTrue(os.path.exists(audiofile), f'{audiofile} doesn\'t exist') os.unlink(filename) os.unlink(audiofile) run_pp({'keepvideo': False}, SimplePP) - self.assertFalse(os.path.exists(filename), '%s exists' % filename) - self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile) + self.assertFalse(os.path.exists(filename), f'{filename} exists') + self.assertTrue(os.path.exists(audiofile), f'{audiofile} doesn\'t exist') os.unlink(audiofile) class ModifierPP(PostProcessor): @@ -908,7 +907,7 @@ class TestYoutubeDL(unittest.TestCase): return [], info run_pp({'keepvideo': False}, ModifierPP) - self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename) + self.assertTrue(os.path.exists(filename), f'{filename} doesn\'t exist') os.unlink(filename) def test_match_filter(self): @@ -920,7 +919,7 @@ class TestYoutubeDL(unittest.TestCase): 'duration': 30, 'filesize': 10 * 1024, 'playlist_id': '42', - 'uploader': "變態妍字幕版 太妍 тест", + 'uploader': '變態妍字幕版 太妍 тест', 'creator': "тест ' 123 ' тест--", 'webpage_url': 'http://example.com/watch?v=shenanigans', } @@ -933,7 +932,7 @@ class TestYoutubeDL(unittest.TestCase): 'description': 'foo', 'filesize': 5 * 1024, 'playlist_id': '43', - 'uploader': "тест 123", + 'uploader': 'тест 123', 'webpage_url': 'http://example.com/watch?v=SHENANIGANS', } videos = [first, second] @@ -1180,7 +1179,7 @@ class TestYoutubeDL(unittest.TestCase): }) return { 'id': video_id, - 'title': 'Video %s' % video_id, + 'title': f'Video {video_id}', 'formats': formats, } @@ -1194,8 +1193,8 @@ class TestYoutubeDL(unittest.TestCase): '_type': 'url_transparent', 'ie_key': VideoIE.ie_key(), 'id': video_id, - 'url': 'video:%s' % video_id, - 'title': 'Video Transparent %s' % video_id, + 'url': f'video:{video_id}', + 'title': f'Video Transparent {video_id}', } def _real_extract(self, url): diff --git a/test/test_aes.py b/test/test_aes.py index a26abfd7d..5f975efec 100644 --- a/test/test_aes.py +++ b/test/test_aes.py @@ -87,7 +87,7 @@ class TestAES(unittest.TestCase): password = intlist_to_bytes(self.key).decode() encrypted = base64.b64encode( intlist_to_bytes(self.iv[:8]) - + b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae' + + b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae', ).decode() decrypted = (aes_decrypt_text(encrypted, password, 16)) self.assertEqual(decrypted, self.secret_msg) @@ -95,7 +95,7 @@ class TestAES(unittest.TestCase): password = intlist_to_bytes(self.key).decode() encrypted = base64.b64encode( intlist_to_bytes(self.iv[:8]) - + b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83' + + b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83', ).decode() decrypted = (aes_decrypt_text(encrypted, password, 32)) self.assertEqual(decrypted, self.secret_msg) @@ -132,16 +132,16 @@ class TestAES(unittest.TestCase): block = [0x21, 0xA0, 0x43, 0xFF] self.assertEqual(pad_block(block, 'pkcs7'), - block + [0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C]) + [*block, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C]) self.assertEqual(pad_block(block, 'iso7816'), - block + [0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]) + [*block, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]) self.assertEqual(pad_block(block, 'whitespace'), - block + [0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20]) + [*block, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20]) self.assertEqual(pad_block(block, 'zero'), - block + [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]) + [*block, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]) block = list(range(16)) for mode in ('pkcs7', 'iso7816', 'whitespace', 'zero'): diff --git a/test/test_compat.py b/test/test_compat.py index 71ca7f99f..e7d97e3e9 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -15,8 +15,8 @@ from yt_dlp.compat import urllib # isort: split from yt_dlp.compat import ( compat_etree_fromstring, compat_expanduser, - compat_urllib_parse_unquote, - compat_urllib_parse_urlencode, + compat_urllib_parse_unquote, # noqa: TID251 + compat_urllib_parse_urlencode, # noqa: TID251 ) from yt_dlp.compat.urllib.request import getproxies @@ -24,15 +24,15 @@ from yt_dlp.compat.urllib.request import getproxies class TestCompat(unittest.TestCase): def test_compat_passthrough(self): with self.assertWarns(DeprecationWarning): - compat.compat_basestring + _ = compat.compat_basestring with self.assertWarns(DeprecationWarning): - compat.WINDOWS_VT_MODE + _ = compat.WINDOWS_VT_MODE self.assertEqual(urllib.request.getproxies, getproxies) with self.assertWarns(DeprecationWarning): - compat.compat_pycrypto_AES # Must not raise error + _ = compat.compat_pycrypto_AES # Must not raise error def test_compat_expanduser(self): old_home = os.environ.get('HOME') diff --git a/test/test_config.py b/test/test_config.py index a393b6534..238ca66d0 100644 --- a/test/test_config.py +++ b/test/test_config.py @@ -71,7 +71,7 @@ def _generate_expected_groups(): Path('/etc/yt-dlp.conf'), Path('/etc/yt-dlp/config'), Path('/etc/yt-dlp/config.txt'), - ] + ], } diff --git a/test/test_cookies.py b/test/test_cookies.py index bd61f30a6..a682fee1d 100644 --- a/test/test_cookies.py +++ b/test/test_cookies.py @@ -106,7 +106,7 @@ class TestCookies(unittest.TestCase): def test_chrome_cookie_decryptor_windows_v10(self): with MonkeyPatch(cookies, { - '_get_windows_v10_key': lambda *args, **kwargs: b'Y\xef\xad\xad\xeerp\xf0Y\xe6\x9b\x12\xc2<z\x16]\n\xbb\xb8\xcb\xd7\x9bA\xc3\x14e\x99{\xd6\xf4&' + '_get_windows_v10_key': lambda *args, **kwargs: b'Y\xef\xad\xad\xeerp\xf0Y\xe6\x9b\x12\xc2<z\x16]\n\xbb\xb8\xcb\xd7\x9bA\xc3\x14e\x99{\xd6\xf4&', }): encrypted_value = b'v10T\xb8\xf3\xb8\x01\xa7TtcV\xfc\x88\xb8\xb8\xef\x05\xb5\xfd\x18\xc90\x009\xab\xb1\x893\x85)\x87\xe1\xa9-\xa3\xad=' value = '32101439' @@ -121,17 +121,17 @@ class TestCookies(unittest.TestCase): self.assertEqual(decryptor.decrypt(encrypted_value), value) def test_safari_cookie_parsing(self): - cookies = \ - b'cook\x00\x00\x00\x01\x00\x00\x00i\x00\x00\x01\x00\x01\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00Y' \ - b'\x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x008\x00\x00\x00B\x00\x00\x00F\x00\x00\x00H' \ - b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80\x03\xa5>\xc3A\x00\x00\x80\xc3\x07:\xc3A' \ - b'localhost\x00foo\x00/\x00test%20%3Bcookie\x00\x00\x00\x054\x07\x17 \x05\x00\x00\x00Kbplist00\xd1\x01' \ - b'\x02_\x10\x18NSHTTPCookieAcceptPolicy\x10\x02\x08\x0b&\x00\x00\x00\x00\x00\x00\x01\x01\x00\x00\x00' \ - b'\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00(' + cookies = ( + b'cook\x00\x00\x00\x01\x00\x00\x00i\x00\x00\x01\x00\x01\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00Y' + b'\x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x008\x00\x00\x00B\x00\x00\x00F\x00\x00\x00H' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80\x03\xa5>\xc3A\x00\x00\x80\xc3\x07:\xc3A' + b'localhost\x00foo\x00/\x00test%20%3Bcookie\x00\x00\x00\x054\x07\x17 \x05\x00\x00\x00Kbplist00\xd1\x01' + b'\x02_\x10\x18NSHTTPCookieAcceptPolicy\x10\x02\x08\x0b&\x00\x00\x00\x00\x00\x00\x01\x01\x00\x00\x00' + b'\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00(') jar = parse_safari_cookies(cookies) self.assertEqual(len(jar), 1) - cookie = list(jar)[0] + cookie = next(iter(jar)) self.assertEqual(cookie.domain, 'localhost') self.assertEqual(cookie.port, None) self.assertEqual(cookie.path, '/') @@ -164,7 +164,7 @@ class TestLenientSimpleCookie(unittest.TestCase): attributes = { key: value for key, value in dict(morsel).items() - if value != "" + if value != '' } self.assertEqual(attributes, expected_attributes, message) @@ -174,133 +174,133 @@ class TestLenientSimpleCookie(unittest.TestCase): self._run_tests( # Copied from https://github.com/python/cpython/blob/v3.10.7/Lib/test/test_http_cookies.py ( - "Test basic cookie", - "chips=ahoy; vienna=finger", - {"chips": "ahoy", "vienna": "finger"}, + 'Test basic cookie', + 'chips=ahoy; vienna=finger', + {'chips': 'ahoy', 'vienna': 'finger'}, ), ( - "Test quoted cookie", + 'Test quoted cookie', 'keebler="E=mc2; L=\\"Loves\\"; fudge=\\012;"', - {"keebler": 'E=mc2; L="Loves"; fudge=\012;'}, + {'keebler': 'E=mc2; L="Loves"; fudge=\012;'}, ), ( "Allow '=' in an unquoted value", - "keebler=E=mc2", - {"keebler": "E=mc2"}, + 'keebler=E=mc2', + {'keebler': 'E=mc2'}, ), ( "Allow cookies with ':' in their name", - "key:term=value:term", - {"key:term": "value:term"}, + 'key:term=value:term', + {'key:term': 'value:term'}, ), ( "Allow '[' and ']' in cookie values", - "a=b; c=[; d=r; f=h", - {"a": "b", "c": "[", "d": "r", "f": "h"}, + 'a=b; c=[; d=r; f=h', + {'a': 'b', 'c': '[', 'd': 'r', 'f': 'h'}, ), ( - "Test basic cookie attributes", + 'Test basic cookie attributes', 'Customer="WILE_E_COYOTE"; Version=1; Path=/acme', - {"Customer": ("WILE_E_COYOTE", {"version": "1", "path": "/acme"})}, + {'Customer': ('WILE_E_COYOTE', {'version': '1', 'path': '/acme'})}, ), ( - "Test flag only cookie attributes", + 'Test flag only cookie attributes', 'Customer="WILE_E_COYOTE"; HttpOnly; Secure', - {"Customer": ("WILE_E_COYOTE", {"httponly": True, "secure": True})}, + {'Customer': ('WILE_E_COYOTE', {'httponly': True, 'secure': True})}, ), ( - "Test flag only attribute with values", - "eggs=scrambled; httponly=foo; secure=bar; Path=/bacon", - {"eggs": ("scrambled", {"httponly": "foo", "secure": "bar", "path": "/bacon"})}, + 'Test flag only attribute with values', + 'eggs=scrambled; httponly=foo; secure=bar; Path=/bacon', + {'eggs': ('scrambled', {'httponly': 'foo', 'secure': 'bar', 'path': '/bacon'})}, ), ( "Test special case for 'expires' attribute, 4 digit year", 'Customer="W"; expires=Wed, 01 Jan 2010 00:00:00 GMT', - {"Customer": ("W", {"expires": "Wed, 01 Jan 2010 00:00:00 GMT"})}, + {'Customer': ('W', {'expires': 'Wed, 01 Jan 2010 00:00:00 GMT'})}, ), ( "Test special case for 'expires' attribute, 2 digit year", 'Customer="W"; expires=Wed, 01 Jan 98 00:00:00 GMT', - {"Customer": ("W", {"expires": "Wed, 01 Jan 98 00:00:00 GMT"})}, + {'Customer': ('W', {'expires': 'Wed, 01 Jan 98 00:00:00 GMT'})}, ), ( - "Test extra spaces in keys and values", - "eggs = scrambled ; secure ; path = bar ; foo=foo ", - {"eggs": ("scrambled", {"secure": True, "path": "bar"}), "foo": "foo"}, + 'Test extra spaces in keys and values', + 'eggs = scrambled ; secure ; path = bar ; foo=foo ', + {'eggs': ('scrambled', {'secure': True, 'path': 'bar'}), 'foo': 'foo'}, ), ( - "Test quoted attributes", + 'Test quoted attributes', 'Customer="WILE_E_COYOTE"; Version="1"; Path="/acme"', - {"Customer": ("WILE_E_COYOTE", {"version": "1", "path": "/acme"})} + {'Customer': ('WILE_E_COYOTE', {'version': '1', 'path': '/acme'})}, ), # Our own tests that CPython passes ( "Allow ';' in quoted value", 'chips="a;hoy"; vienna=finger', - {"chips": "a;hoy", "vienna": "finger"}, + {'chips': 'a;hoy', 'vienna': 'finger'}, ), ( - "Keep only the last set value", - "a=c; a=b", - {"a": "b"}, + 'Keep only the last set value', + 'a=c; a=b', + {'a': 'b'}, ), ) def test_lenient_parsing(self): self._run_tests( ( - "Ignore and try to skip invalid cookies", + 'Ignore and try to skip invalid cookies', 'chips={"ahoy;": 1}; vienna="finger;"', - {"vienna": "finger;"}, + {'vienna': 'finger;'}, ), ( - "Ignore cookies without a name", - "a=b; unnamed; c=d", - {"a": "b", "c": "d"}, + 'Ignore cookies without a name', + 'a=b; unnamed; c=d', + {'a': 'b', 'c': 'd'}, ), ( "Ignore '\"' cookie without name", 'a=b; "; c=d', - {"a": "b", "c": "d"}, + {'a': 'b', 'c': 'd'}, ), ( - "Skip all space separated values", - "x a=b c=d x; e=f", - {"a": "b", "c": "d", "e": "f"}, + 'Skip all space separated values', + 'x a=b c=d x; e=f', + {'a': 'b', 'c': 'd', 'e': 'f'}, ), ( - "Skip all space separated values", + 'Skip all space separated values', 'x a=b; data={"complex": "json", "with": "key=value"}; x c=d x', - {"a": "b", "c": "d"}, + {'a': 'b', 'c': 'd'}, ), ( - "Expect quote mending", + 'Expect quote mending', 'a=b; invalid="; c=d', - {"a": "b", "c": "d"}, + {'a': 'b', 'c': 'd'}, ), ( - "Reset morsel after invalid to not capture attributes", - "a=b; invalid; Version=1; c=d", - {"a": "b", "c": "d"}, + 'Reset morsel after invalid to not capture attributes', + 'a=b; invalid; Version=1; c=d', + {'a': 'b', 'c': 'd'}, ), ( - "Reset morsel after invalid to not capture attributes", - "a=b; $invalid; $Version=1; c=d", - {"a": "b", "c": "d"}, + 'Reset morsel after invalid to not capture attributes', + 'a=b; $invalid; $Version=1; c=d', + {'a': 'b', 'c': 'd'}, ), ( - "Continue after non-flag attribute without value", - "a=b; path; Version=1; c=d", - {"a": "b", "c": "d"}, + 'Continue after non-flag attribute without value', + 'a=b; path; Version=1; c=d', + {'a': 'b', 'c': 'd'}, ), ( - "Allow cookie attributes with `$` prefix", + 'Allow cookie attributes with `$` prefix', 'Customer="WILE_E_COYOTE"; $Version=1; $Secure; $Path=/acme', - {"Customer": ("WILE_E_COYOTE", {"version": "1", "secure": True, "path": "/acme"})}, + {'Customer': ('WILE_E_COYOTE', {'version': '1', 'secure': True, 'path': '/acme'})}, ), ( - "Invalid Morsel keys should not result in an error", - "Key=Value; [Invalid]=Value; Another=Value", - {"Key": "Value", "Another": "Value"}, + 'Invalid Morsel keys should not result in an error', + 'Key=Value; [Invalid]=Value; Another=Value', + {'Key': 'Value', 'Another': 'Value'}, ), ) diff --git a/test/test_download.py b/test/test_download.py index 253079249..882d54565 100755 --- a/test/test_download.py +++ b/test/test_download.py @@ -94,7 +94,7 @@ def generator(test_case, tname): 'playlist', [] if is_playlist else [test_case]) def print_skipping(reason): - print('Skipping %s: %s' % (test_case['name'], reason)) + print('Skipping {}: {}'.format(test_case['name'], reason)) self.skipTest(reason) if not ie.working(): @@ -117,7 +117,7 @@ def generator(test_case, tname): for other_ie in other_ies: if not other_ie.working(): - print_skipping('test depends on %sIE, marked as not WORKING' % other_ie.ie_key()) + print_skipping(f'test depends on {other_ie.ie_key()}IE, marked as not WORKING') params = get_params(test_case.get('params', {})) params['outtmpl'] = tname + '_' + params['outtmpl'] @@ -148,10 +148,7 @@ def generator(test_case, tname): return False if err.__class__.__name__ == expected_exception: return True - for exc in err.exc_info: - if exc.__class__.__name__ == expected_exception: - return True - return False + return any(exc.__class__.__name__ == expected_exception for exc in err.exc_info) def try_rm_tcs_files(tcs=None): if tcs is None: @@ -181,7 +178,7 @@ def generator(test_case, tname): raise if try_num == RETRIES: - report_warning('%s failed due to network errors, skipping...' % tname) + report_warning(f'{tname} failed due to network errors, skipping...') return print(f'Retrying: {try_num} failed tries\n\n##########\n\n') @@ -244,9 +241,8 @@ def generator(test_case, tname): got_fsize = os.path.getsize(tc_filename) assertGreaterEqual( self, got_fsize, expected_minsize, - 'Expected %s to be at least %s, but it\'s only %s ' % - (tc_filename, format_bytes(expected_minsize), - format_bytes(got_fsize))) + f'Expected {tc_filename} to be at least {format_bytes(expected_minsize)}, ' + f'but it\'s only {format_bytes(got_fsize)} ') if 'md5' in tc: md5_for_file = _file_md5(tc_filename) self.assertEqual(tc['md5'], md5_for_file) @@ -255,7 +251,7 @@ def generator(test_case, tname): info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json' self.assertTrue( os.path.exists(info_json_fn), - 'Missing info file %s' % info_json_fn) + f'Missing info file {info_json_fn}') with open(info_json_fn, encoding='utf-8') as infof: info_dict = json.load(infof) expect_info_dict(self, info_dict, tc.get('info_dict', {})) diff --git a/test/test_downloader_http.py b/test/test_downloader_http.py index 099ec2fff..faba0bc9c 100644 --- a/test/test_downloader_http.py +++ b/test/test_downloader_http.py @@ -38,9 +38,9 @@ class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler): end = int(mobj.group(2)) valid_range = start is not None and end is not None if valid_range: - content_range = 'bytes %d-%d' % (start, end) + content_range = f'bytes {start}-{end}' if total: - content_range += '/%d' % total + content_range += f'/{total}' self.send_header('Content-Range', content_range) return (end - start + 1) if valid_range else total @@ -84,7 +84,7 @@ class TestHttpFD(unittest.TestCase): filename = 'testfile.mp4' try_rm(encodeFilename(filename)) self.assertTrue(downloader.real_download(filename, { - 'url': 'http://127.0.0.1:%d/%s' % (self.port, ep), + 'url': f'http://127.0.0.1:{self.port}/{ep}', }), ep) self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE, ep) try_rm(encodeFilename(filename)) diff --git a/test/test_http_proxy.py b/test/test_http_proxy.py index 1b21fe78e..2435c878a 100644 --- a/test/test_http_proxy.py +++ b/test/test_http_proxy.py @@ -105,7 +105,7 @@ if urllib3: self.incoming, self.outgoing, server_hostname=server_hostname, - server_side=server_side + server_side=server_side, ) self._ssl_io_loop(self.sslobj.do_handshake) @@ -333,7 +333,7 @@ class TestHTTPConnectProxy: @pytest.mark.skip_handler( 'Requests', - 'bug in urllib3 causes unclosed socket: https://github.com/urllib3/urllib3/issues/3374' + 'bug in urllib3 causes unclosed socket: https://github.com/urllib3/urllib3/issues/3374', ) def test_http_connect_bad_auth(self, handler, ctx): with ctx.http_server(HTTPConnectProxyHandler, username='test', password='test') as server_address: diff --git a/test/test_iqiyi_sdk_interpreter.py b/test/test_iqiyi_sdk_interpreter.py index 47c632a4e..4e41007c8 100644 --- a/test/test_iqiyi_sdk_interpreter.py +++ b/test/test_iqiyi_sdk_interpreter.py @@ -29,11 +29,11 @@ class WarningLogger: @is_download_test class TestIqiyiSDKInterpreter(unittest.TestCase): def test_iqiyi_sdk_interpreter(self): - ''' + """ Test the functionality of IqiyiSDKInterpreter by trying to log in If `sign` is incorrect, /validate call throws an HTTP 556 error - ''' + """ logger = WarningLogger() ie = IqiyiIE(FakeYDL({'logger': logger})) ie._perform_login('foo', 'bar') diff --git a/test/test_netrc.py b/test/test_netrc.py index dc708d974..1e0f4ee3b 100644 --- a/test/test_netrc.py +++ b/test/test_netrc.py @@ -21,7 +21,7 @@ class TestNetRc(unittest.TestCase): continue self.assertTrue( ie._NETRC_MACHINE, - 'Extractor %s supports login, but is missing a _NETRC_MACHINE property' % ie.IE_NAME) + f'Extractor {ie.IE_NAME} supports login, but is missing a _NETRC_MACHINE property') if __name__ == '__main__': diff --git a/test/test_networking.py b/test/test_networking.py index d127cbb94..af3ece3b4 100644 --- a/test/test_networking.py +++ b/test/test_networking.py @@ -375,10 +375,10 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): with handler() as rh: for bad_status in (400, 500, 599, 302): with pytest.raises(HTTPError): - validate_and_send(rh, Request('http://127.0.0.1:%d/gen_%d' % (self.http_port, bad_status))) + validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_{bad_status}')) # Should not raise an error - validate_and_send(rh, Request('http://127.0.0.1:%d/gen_200' % self.http_port)).close() + validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_200')).close() def test_response_url(self, handler): with handler() as rh: @@ -472,7 +472,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): def test_incompleteread(self, handler): with handler(timeout=2) as rh: with pytest.raises(IncompleteRead, match='13 bytes read, 234221 more expected'): - validate_and_send(rh, Request('http://127.0.0.1:%d/incompleteread' % self.http_port)).read() + validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/incompleteread')).read() def test_cookies(self, handler): cookiejar = YoutubeDLCookieJar() @@ -740,7 +740,7 @@ class TestRequestHandlerMisc: @pytest.mark.parametrize('handler,logger_name', [ ('Requests', 'urllib3'), ('Websockets', 'websockets.client'), - ('Websockets', 'websockets.server') + ('Websockets', 'websockets.server'), ], indirect=['handler']) def test_remove_logging_handler(self, handler, logger_name): # Ensure any logging handlers, which may contain a YoutubeDL instance, @@ -794,7 +794,7 @@ class TestUrllibRequestHandler(TestRequestHandlerBase): with handler() as rh: with pytest.raises( CertificateVerifyError, - match=r'\[SSL: CERTIFICATE_VERIFY_FAILED\] certificate verify failed: self.signed certificate' + match=r'\[SSL: CERTIFICATE_VERIFY_FAILED\] certificate verify failed: self.signed certificate', ): validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers')) @@ -804,14 +804,14 @@ class TestUrllibRequestHandler(TestRequestHandlerBase): ( Request('http://127.0.0.1', method='GET\n'), 'method can\'t contain control characters', - lambda v: v < (3, 7, 9) or (3, 8, 0) <= v < (3, 8, 5) + lambda v: v < (3, 7, 9) or (3, 8, 0) <= v < (3, 8, 5), ), # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1265 # bpo-38576: Check implemented in 3.7.8+, 3.8.3+ ( Request('http://127.0.0. 1', method='GET'), 'URL can\'t contain control characters', - lambda v: v < (3, 7, 8) or (3, 8, 0) <= v < (3, 8, 3) + lambda v: v < (3, 7, 8) or (3, 8, 0) <= v < (3, 8, 3), ), # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1288C31-L1288C50 (Request('http://127.0.0.1', headers={'foo\n': 'bar'}), 'Invalid header name', None), @@ -840,7 +840,7 @@ class TestRequestsRequestHandler(TestRequestHandlerBase): (lambda: requests.exceptions.InvalidHeader(), RequestError), # catch-all: https://github.com/psf/requests/blob/main/src/requests/adapters.py#L535 (lambda: urllib3.exceptions.HTTPError(), TransportError), - (lambda: requests.exceptions.RequestException(), RequestError) + (lambda: requests.exceptions.RequestException(), RequestError), # (lambda: requests.exceptions.TooManyRedirects(), HTTPError) - Needs a response object ]) def test_request_error_mapping(self, handler, monkeypatch, raised, expected): @@ -868,12 +868,12 @@ class TestRequestsRequestHandler(TestRequestHandlerBase): ( lambda: urllib3.exceptions.ProtocolError('error', http.client.IncompleteRead(partial=b'abc', expected=4)), IncompleteRead, - '3 bytes read, 4 more expected' + '3 bytes read, 4 more expected', ), ( lambda: urllib3.exceptions.ProtocolError('error', urllib3.exceptions.IncompleteRead(partial=3, expected=5)), IncompleteRead, - '3 bytes read, 5 more expected' + '3 bytes read, 5 more expected', ), ]) def test_response_error_mapping(self, handler, monkeypatch, raised, expected, match): @@ -1125,7 +1125,7 @@ class TestRequestHandlerValidation: ('https', False, {}), ]), (NoCheckRH, [('http', False, {})]), - (ValidationRH, [('http', UnsupportedRequest, {})]) + (ValidationRH, [('http', UnsupportedRequest, {})]), ] PROXY_SCHEME_TESTS = [ @@ -1219,7 +1219,7 @@ class TestRequestHandlerValidation: ({'impersonate': ImpersonateTarget('chrome', None, None, None)}, False), ({'impersonate': ImpersonateTarget(None, None, None, None)}, False), ({'impersonate': ImpersonateTarget()}, False), - ({'impersonate': 'chrome'}, AssertionError) + ({'impersonate': 'chrome'}, AssertionError), ]), (NoCheckRH, 'http', [ ({'cookiejar': 'notacookiejar'}, False), @@ -1235,7 +1235,7 @@ class TestRequestHandlerValidation: ('Urllib', False, 'http'), ('Requests', False, 'http'), ('CurlCFFI', False, 'http'), - ('Websockets', False, 'ws') + ('Websockets', False, 'ws'), ], indirect=['handler']) def test_no_proxy(self, handler, fail, scheme): run_validation(handler, fail, Request(f'{scheme}://', proxies={'no': '127.0.0.1,github.com'})) @@ -1246,7 +1246,7 @@ class TestRequestHandlerValidation: (HTTPSupportedRH, 'http'), ('Requests', 'http'), ('CurlCFFI', 'http'), - ('Websockets', 'ws') + ('Websockets', 'ws'), ], indirect=['handler']) def test_empty_proxy(self, handler, scheme): run_validation(handler, False, Request(f'{scheme}://', proxies={scheme: None})) @@ -1258,7 +1258,7 @@ class TestRequestHandlerValidation: (HTTPSupportedRH, 'http'), ('Requests', 'http'), ('CurlCFFI', 'http'), - ('Websockets', 'ws') + ('Websockets', 'ws'), ], indirect=['handler']) def test_invalid_proxy_url(self, handler, scheme, proxy_url): run_validation(handler, UnsupportedRequest, Request(f'{scheme}://', proxies={scheme: proxy_url})) @@ -1474,7 +1474,7 @@ class TestYoutubeDLNetworking: @pytest.mark.parametrize('proxy,expected', [ ('http://127.0.0.1:8080', {'all': 'http://127.0.0.1:8080'}), ('', {'all': '__noproxy__'}), - (None, {'http': 'http://127.0.0.1:8081', 'https': 'http://127.0.0.1:8081'}) # env, set https + (None, {'http': 'http://127.0.0.1:8081', 'https': 'http://127.0.0.1:8081'}), # env, set https ]) def test_proxy(self, proxy, expected, monkeypatch): monkeypatch.setenv('HTTP_PROXY', 'http://127.0.0.1:8081') @@ -1546,7 +1546,7 @@ class TestYoutubeDLNetworking: with FakeImpersonationRHYDL() as ydl: with pytest.raises( RequestError, - match=r'Impersonate target "test" is not available' + match=r'Impersonate target "test" is not available', ): ydl.urlopen(Request('http://', extensions={'impersonate': ImpersonateTarget('test', None, None, None)})) @@ -1558,7 +1558,7 @@ class TestYoutubeDLNetworking: pass _SUPPORTED_URL_SCHEMES = ('http',) - _SUPPORTED_IMPERSONATE_TARGET_MAP = {ImpersonateTarget('abc',): 'test'} + _SUPPORTED_IMPERSONATE_TARGET_MAP = {ImpersonateTarget('abc'): 'test'} _SUPPORTED_PROXY_SCHEMES = None super().__init__(*args, **kwargs) @@ -1567,14 +1567,14 @@ class TestYoutubeDLNetworking: with FakeHTTPRHYDL() as ydl: with pytest.raises( RequestError, - match=r'Impersonate target "test" is not available' + match=r'Impersonate target "test" is not available', ): ydl.urlopen(Request('http://', extensions={'impersonate': ImpersonateTarget('test', None, None, None)})) def test_raise_impersonate_error(self): with pytest.raises( YoutubeDLError, - match=r'Impersonate target "test" is not available' + match=r'Impersonate target "test" is not available', ): FakeYDL({'impersonate': ImpersonateTarget('test', None, None, None)}) @@ -1592,7 +1592,7 @@ class TestYoutubeDLNetworking: monkeypatch.setattr(FakeYDL, 'build_request_director', lambda cls, handlers, preferences=None: brh(cls, handlers=[IRH])) with FakeYDL({ - 'impersonate': ImpersonateTarget('abc', None, None, None) + 'impersonate': ImpersonateTarget('abc', None, None, None), }) as ydl: rh = self.build_handler(ydl, IRH) assert rh.impersonate == ImpersonateTarget('abc', None, None, None) @@ -1604,7 +1604,7 @@ class TestYoutubeDLNetworking: def _send(self, request: Request): pass _SUPPORTED_URL_SCHEMES = ('http',) - _SUPPORTED_IMPERSONATE_TARGET_MAP = {ImpersonateTarget(target_client,): 'test'} + _SUPPORTED_IMPERSONATE_TARGET_MAP = {ImpersonateTarget(target_client): 'test'} RH_KEY = target_client RH_NAME = target_client handlers.append(TestRH) @@ -1614,7 +1614,7 @@ class TestYoutubeDLNetworking: assert set(ydl._get_available_impersonate_targets()) == { (ImpersonateTarget('xyz'), 'xyz'), (ImpersonateTarget('abc'), 'abc'), - (ImpersonateTarget('asd'), 'asd') + (ImpersonateTarget('asd'), 'asd'), } assert ydl._impersonate_target_available(ImpersonateTarget('abc')) assert ydl._impersonate_target_available(ImpersonateTarget()) @@ -1837,7 +1837,7 @@ class TestRequest: extensions={'cookiejar': CookieJar()}, headers={'Accept-Encoding': 'br'}, proxies={'http': 'http://127.0.0.1'}, - data=[b'123'] + data=[b'123'], ) req_copy = req.copy() assert req_copy is not req @@ -1863,7 +1863,7 @@ class TestRequest: assert isinstance(req.copy(), AnotherRequest) def test_url(self): - req = Request(url='https://фtest.example.com/ some spaceв?ä=c',) + req = Request(url='https://фtest.example.com/ some spaceв?ä=c') assert req.url == 'https://xn--test-z6d.example.com/%20some%20space%D0%B2?%C3%A4=c' assert Request(url='//example.com').url == 'http://example.com' @@ -1878,7 +1878,7 @@ class TestResponse: ('custom', 200, 'custom'), (None, 404, 'Not Found'), # fallback status ('', 403, 'Forbidden'), - (None, 999, None) + (None, 999, None), ]) def test_reason(self, reason, status, expected): res = Response(io.BytesIO(b''), url='test://', headers={}, status=status, reason=reason) @@ -1933,7 +1933,7 @@ class TestImpersonateTarget: @pytest.mark.parametrize('target_str', [ '-120', ':-12.0', '-12:-12', '-:-', - '::', 'a-c-d:', 'a-c-d:e-f-g', 'a:b:' + '::', 'a-c-d:', 'a-c-d:e-f-g', 'a:b:', ]) def test_target_from_invalid_str(self, target_str): with pytest.raises(ValueError): @@ -1949,7 +1949,7 @@ class TestImpersonateTarget: (ImpersonateTarget('abc', '120', 'xyz', None), 'abc-120:xyz'), (ImpersonateTarget('abc', None, 'xyz'), 'abc:xyz'), (ImpersonateTarget(None, None, 'xyz', '6.5'), ':xyz-6.5'), - (ImpersonateTarget('abc', ), 'abc'), + (ImpersonateTarget('abc'), 'abc'), (ImpersonateTarget(None, None, None, None), ''), ]) def test_str(self, target, expected): diff --git a/test/test_networking_utils.py b/test/test_networking_utils.py index b7b71430e..204fe87bd 100644 --- a/test/test_networking_utils.py +++ b/test/test_networking_utils.py @@ -39,7 +39,7 @@ class TestNetworkingUtils: proxies = { 'all': 'socks5://example.com', 'http': 'http://example.com:1080', - 'no': 'bypass.example.com,yt-dl.org' + 'no': 'bypass.example.com,yt-dl.org', } assert select_proxy('https://example.com', proxies) == proxies['all'] @@ -54,7 +54,7 @@ class TestNetworkingUtils: 'port': 1080, 'rdns': True, 'username': None, - 'password': None + 'password': None, }), ('socks5://user:@example.com:5555', { 'proxytype': ProxyType.SOCKS5, @@ -62,7 +62,7 @@ class TestNetworkingUtils: 'port': 5555, 'rdns': False, 'username': 'user', - 'password': '' + 'password': '', }), ('socks4://u%40ser:pa%20ss@127.0.0.1:1080', { 'proxytype': ProxyType.SOCKS4, @@ -70,7 +70,7 @@ class TestNetworkingUtils: 'port': 1080, 'rdns': False, 'username': 'u@ser', - 'password': 'pa ss' + 'password': 'pa ss', }), ('socks4a://:pa%20ss@127.0.0.1', { 'proxytype': ProxyType.SOCKS4A, @@ -78,8 +78,8 @@ class TestNetworkingUtils: 'port': 1080, 'rdns': True, 'username': '', - 'password': 'pa ss' - }) + 'password': 'pa ss', + }), ]) def test_make_socks_proxy_opts(self, socks_proxy, expected): assert make_socks_proxy_opts(socks_proxy) == expected diff --git a/test/test_overwrites.py b/test/test_overwrites.py index 6954c07f9..0beafdf12 100644 --- a/test/test_overwrites.py +++ b/test/test_overwrites.py @@ -27,7 +27,7 @@ class TestOverwrites(unittest.TestCase): [ sys.executable, 'yt_dlp/__main__.py', '-o', 'test.webm', - 'https://www.youtube.com/watch?v=jNQXAC9IVRw' + 'https://www.youtube.com/watch?v=jNQXAC9IVRw', ], cwd=root_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) sout, serr = outp.communicate() self.assertTrue(b'has already been downloaded' in sout) @@ -39,7 +39,7 @@ class TestOverwrites(unittest.TestCase): [ sys.executable, 'yt_dlp/__main__.py', '--yes-overwrites', '-o', 'test.webm', - 'https://www.youtube.com/watch?v=jNQXAC9IVRw' + 'https://www.youtube.com/watch?v=jNQXAC9IVRw', ], cwd=root_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) sout, serr = outp.communicate() self.assertTrue(b'has already been downloaded' not in sout) diff --git a/test/test_plugins.py b/test/test_plugins.py index 6cde579e1..c82158e9f 100644 --- a/test/test_plugins.py +++ b/test/test_plugins.py @@ -31,7 +31,7 @@ class TestPlugins(unittest.TestCase): # don't load modules with underscore prefix self.assertFalse( - f'{PACKAGE_NAME}.extractor._ignore' in sys.modules.keys(), + f'{PACKAGE_NAME}.extractor._ignore' in sys.modules, 'loaded module beginning with underscore') self.assertNotIn('IgnorePluginIE', plugins_ie.keys()) diff --git a/test/test_post_hooks.py b/test/test_post_hooks.py index 3778d1794..6500dd386 100644 --- a/test/test_post_hooks.py +++ b/test/test_post_hooks.py @@ -59,7 +59,7 @@ class TestPostHooks(unittest.TestCase): def hook_three(self, filename): self.files.append(filename) - raise Exception('Test exception for \'%s\'' % filename) + raise Exception(f'Test exception for \'{filename}\'') def tearDown(self): for f in self.files: diff --git a/test/test_postprocessors.py b/test/test_postprocessors.py index 52e558772..603f85c65 100644 --- a/test/test_postprocessors.py +++ b/test/test_postprocessors.py @@ -9,7 +9,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from yt_dlp import YoutubeDL -from yt_dlp.compat import compat_shlex_quote +from yt_dlp.utils import shell_quote from yt_dlp.postprocessor import ( ExecPP, FFmpegThumbnailsConvertorPP, @@ -65,7 +65,7 @@ class TestExec(unittest.TestCase): def test_parse_cmd(self): pp = ExecPP(YoutubeDL(), '') info = {'filepath': 'file name'} - cmd = 'echo %s' % compat_shlex_quote(info['filepath']) + cmd = 'echo {}'.format(shell_quote(info['filepath'])) self.assertEqual(pp.parse_cmd('echo', info), cmd) self.assertEqual(pp.parse_cmd('echo {}', info), cmd) @@ -125,7 +125,8 @@ class TestModifyChaptersPP(unittest.TestCase): self._remove_marked_arrange_sponsors_test_impl(chapters, chapters, []) def test_remove_marked_arrange_sponsors_ChapterWithSponsors(self): - chapters = self._chapters([70], ['c']) + [ + chapters = [ + *self._chapters([70], ['c']), self._sponsor_chapter(10, 20, 'sponsor'), self._sponsor_chapter(30, 40, 'preview'), self._sponsor_chapter(50, 60, 'filler')] @@ -136,7 +137,8 @@ class TestModifyChaptersPP(unittest.TestCase): self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) def test_remove_marked_arrange_sponsors_SponsorBlockChapters(self): - chapters = self._chapters([70], ['c']) + [ + chapters = [ + *self._chapters([70], ['c']), self._sponsor_chapter(10, 20, 'chapter', title='sb c1'), self._sponsor_chapter(15, 16, 'chapter', title='sb c2'), self._sponsor_chapter(30, 40, 'preview'), @@ -149,10 +151,14 @@ class TestModifyChaptersPP(unittest.TestCase): self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) def test_remove_marked_arrange_sponsors_UniqueNamesForOverlappingSponsors(self): - chapters = self._chapters([120], ['c']) + [ - self._sponsor_chapter(10, 45, 'sponsor'), self._sponsor_chapter(20, 40, 'selfpromo'), - self._sponsor_chapter(50, 70, 'sponsor'), self._sponsor_chapter(60, 85, 'selfpromo'), - self._sponsor_chapter(90, 120, 'selfpromo'), self._sponsor_chapter(100, 110, 'sponsor')] + chapters = [ + *self._chapters([120], ['c']), + self._sponsor_chapter(10, 45, 'sponsor'), + self._sponsor_chapter(20, 40, 'selfpromo'), + self._sponsor_chapter(50, 70, 'sponsor'), + self._sponsor_chapter(60, 85, 'selfpromo'), + self._sponsor_chapter(90, 120, 'selfpromo'), + self._sponsor_chapter(100, 110, 'sponsor')] expected = self._chapters( [10, 20, 40, 45, 50, 60, 70, 85, 90, 100, 110, 120], ['c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Sponsor, Unpaid/Self Promotion', @@ -172,7 +178,8 @@ class TestModifyChaptersPP(unittest.TestCase): chapters, self._chapters([40], ['c']), cuts) def test_remove_marked_arrange_sponsors_ChapterWithSponsorsAndCuts(self): - chapters = self._chapters([70], ['c']) + [ + chapters = [ + *self._chapters([70], ['c']), self._sponsor_chapter(10, 20, 'sponsor'), self._sponsor_chapter(30, 40, 'selfpromo', remove=True), self._sponsor_chapter(50, 60, 'interaction')] @@ -185,24 +192,29 @@ class TestModifyChaptersPP(unittest.TestCase): def test_remove_marked_arrange_sponsors_ChapterWithSponsorCutInTheMiddle(self): cuts = [self._sponsor_chapter(20, 30, 'selfpromo', remove=True), self._chapter(40, 50, remove=True)] - chapters = self._chapters([70], ['c']) + [self._sponsor_chapter(10, 60, 'sponsor')] + cuts + chapters = [ + *self._chapters([70], ['c']), + self._sponsor_chapter(10, 60, 'sponsor'), + *cuts] expected = self._chapters( [10, 40, 50], ['c', '[SponsorBlock]: Sponsor', 'c']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_ChapterWithCutHidingSponsor(self): cuts = [self._sponsor_chapter(20, 50, 'selfpromo', remove=True)] - chapters = self._chapters([60], ['c']) + [ + chapters = [ + *self._chapters([60], ['c']), self._sponsor_chapter(10, 20, 'intro'), self._sponsor_chapter(30, 40, 'sponsor'), self._sponsor_chapter(50, 60, 'outro'), - ] + cuts + *cuts] expected = self._chapters( [10, 20, 30], ['c', '[SponsorBlock]: Intermission/Intro Animation', '[SponsorBlock]: Endcards/Credits']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_ChapterWithAdjacentSponsors(self): - chapters = self._chapters([70], ['c']) + [ + chapters = [ + *self._chapters([70], ['c']), self._sponsor_chapter(10, 20, 'sponsor'), self._sponsor_chapter(20, 30, 'selfpromo'), self._sponsor_chapter(30, 40, 'interaction')] @@ -213,7 +225,8 @@ class TestModifyChaptersPP(unittest.TestCase): self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) def test_remove_marked_arrange_sponsors_ChapterWithAdjacentCuts(self): - chapters = self._chapters([70], ['c']) + [ + chapters = [ + *self._chapters([70], ['c']), self._sponsor_chapter(10, 20, 'sponsor'), self._sponsor_chapter(20, 30, 'interaction', remove=True), self._chapter(30, 40, remove=True), @@ -226,7 +239,8 @@ class TestModifyChaptersPP(unittest.TestCase): chapters, expected, [self._chapter(20, 50, remove=True)]) def test_remove_marked_arrange_sponsors_ChapterWithOverlappingSponsors(self): - chapters = self._chapters([70], ['c']) + [ + chapters = [ + *self._chapters([70], ['c']), self._sponsor_chapter(10, 30, 'sponsor'), self._sponsor_chapter(20, 50, 'selfpromo'), self._sponsor_chapter(40, 60, 'interaction')] @@ -238,7 +252,8 @@ class TestModifyChaptersPP(unittest.TestCase): self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) def test_remove_marked_arrange_sponsors_ChapterWithOverlappingCuts(self): - chapters = self._chapters([70], ['c']) + [ + chapters = [ + *self._chapters([70], ['c']), self._sponsor_chapter(10, 30, 'sponsor', remove=True), self._sponsor_chapter(20, 50, 'selfpromo', remove=True), self._sponsor_chapter(40, 60, 'interaction', remove=True)] @@ -246,7 +261,8 @@ class TestModifyChaptersPP(unittest.TestCase): chapters, self._chapters([20], ['c']), [self._chapter(10, 60, remove=True)]) def test_remove_marked_arrange_sponsors_ChapterWithRunsOfOverlappingSponsors(self): - chapters = self._chapters([170], ['c']) + [ + chapters = [ + *self._chapters([170], ['c']), self._sponsor_chapter(0, 30, 'intro'), self._sponsor_chapter(20, 50, 'sponsor'), self._sponsor_chapter(40, 60, 'selfpromo'), @@ -267,7 +283,8 @@ class TestModifyChaptersPP(unittest.TestCase): self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) def test_remove_marked_arrange_sponsors_ChapterWithRunsOfOverlappingCuts(self): - chapters = self._chapters([170], ['c']) + [ + chapters = [ + *self._chapters([170], ['c']), self._chapter(0, 30, remove=True), self._sponsor_chapter(20, 50, 'sponsor', remove=True), self._chapter(40, 60, remove=True), @@ -284,7 +301,8 @@ class TestModifyChaptersPP(unittest.TestCase): chapters, self._chapters([20], ['c']), expected_cuts) def test_remove_marked_arrange_sponsors_OverlappingSponsorsDifferentTitlesAfterCut(self): - chapters = self._chapters([60], ['c']) + [ + chapters = [ + *self._chapters([60], ['c']), self._sponsor_chapter(10, 60, 'sponsor'), self._sponsor_chapter(10, 40, 'intro'), self._sponsor_chapter(30, 50, 'interaction'), @@ -297,7 +315,8 @@ class TestModifyChaptersPP(unittest.TestCase): chapters, expected, [self._chapter(30, 50, remove=True)]) def test_remove_marked_arrange_sponsors_SponsorsNoLongerOverlapAfterCut(self): - chapters = self._chapters([70], ['c']) + [ + chapters = [ + *self._chapters([70], ['c']), self._sponsor_chapter(10, 30, 'sponsor'), self._sponsor_chapter(20, 50, 'interaction'), self._sponsor_chapter(30, 50, 'selfpromo', remove=True), @@ -310,7 +329,8 @@ class TestModifyChaptersPP(unittest.TestCase): chapters, expected, [self._chapter(30, 50, remove=True)]) def test_remove_marked_arrange_sponsors_SponsorsStillOverlapAfterCut(self): - chapters = self._chapters([70], ['c']) + [ + chapters = [ + *self._chapters([70], ['c']), self._sponsor_chapter(10, 60, 'sponsor'), self._sponsor_chapter(20, 60, 'interaction'), self._sponsor_chapter(30, 50, 'selfpromo', remove=True)] @@ -321,7 +341,8 @@ class TestModifyChaptersPP(unittest.TestCase): chapters, expected, [self._chapter(30, 50, remove=True)]) def test_remove_marked_arrange_sponsors_ChapterWithRunsOfOverlappingSponsorsAndCuts(self): - chapters = self._chapters([200], ['c']) + [ + chapters = [ + *self._chapters([200], ['c']), self._sponsor_chapter(10, 40, 'sponsor'), self._sponsor_chapter(10, 30, 'intro'), self._chapter(20, 30, remove=True), @@ -347,8 +368,9 @@ class TestModifyChaptersPP(unittest.TestCase): self._remove_marked_arrange_sponsors_test_impl(chapters, expected, expected_cuts) def test_remove_marked_arrange_sponsors_SponsorOverlapsMultipleChapters(self): - chapters = (self._chapters([20, 40, 60, 80, 100], ['c1', 'c2', 'c3', 'c4', 'c5']) - + [self._sponsor_chapter(10, 90, 'sponsor')]) + chapters = [ + *self._chapters([20, 40, 60, 80, 100], ['c1', 'c2', 'c3', 'c4', 'c5']), + self._sponsor_chapter(10, 90, 'sponsor')] expected = self._chapters([10, 90, 100], ['c1', '[SponsorBlock]: Sponsor', 'c5']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) @@ -359,9 +381,10 @@ class TestModifyChaptersPP(unittest.TestCase): self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_SponsorsWithinSomeChaptersAndOverlappingOthers(self): - chapters = (self._chapters([10, 40, 60, 80], ['c1', 'c2', 'c3', 'c4']) - + [self._sponsor_chapter(20, 30, 'sponsor'), - self._sponsor_chapter(50, 70, 'selfpromo')]) + chapters = [ + *self._chapters([10, 40, 60, 80], ['c1', 'c2', 'c3', 'c4']), + self._sponsor_chapter(20, 30, 'sponsor'), + self._sponsor_chapter(50, 70, 'selfpromo')] expected = self._chapters([10, 20, 30, 40, 50, 70, 80], ['c1', 'c2', '[SponsorBlock]: Sponsor', 'c2', 'c3', '[SponsorBlock]: Unpaid/Self Promotion', 'c4']) @@ -374,8 +397,9 @@ class TestModifyChaptersPP(unittest.TestCase): self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_ChaptersAfterLastSponsor(self): - chapters = (self._chapters([20, 40, 50, 60], ['c1', 'c2', 'c3', 'c4']) - + [self._sponsor_chapter(10, 30, 'music_offtopic')]) + chapters = [ + *self._chapters([20, 40, 50, 60], ['c1', 'c2', 'c3', 'c4']), + self._sponsor_chapter(10, 30, 'music_offtopic')] expected = self._chapters( [10, 30, 40, 50, 60], ['c1', '[SponsorBlock]: Non-Music Section', 'c2', 'c3', 'c4']) @@ -388,8 +412,9 @@ class TestModifyChaptersPP(unittest.TestCase): self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_SponsorStartsAtChapterStart(self): - chapters = (self._chapters([10, 20, 40], ['c1', 'c2', 'c3']) - + [self._sponsor_chapter(20, 30, 'sponsor')]) + chapters = [ + *self._chapters([10, 20, 40], ['c1', 'c2', 'c3']), + self._sponsor_chapter(20, 30, 'sponsor')] expected = self._chapters([10, 20, 30, 40], ['c1', 'c2', '[SponsorBlock]: Sponsor', 'c3']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) @@ -400,8 +425,9 @@ class TestModifyChaptersPP(unittest.TestCase): self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_SponsorEndsAtChapterEnd(self): - chapters = (self._chapters([10, 30, 40], ['c1', 'c2', 'c3']) - + [self._sponsor_chapter(20, 30, 'sponsor')]) + chapters = [ + *self._chapters([10, 30, 40], ['c1', 'c2', 'c3']), + self._sponsor_chapter(20, 30, 'sponsor')] expected = self._chapters([10, 20, 30, 40], ['c1', 'c2', '[SponsorBlock]: Sponsor', 'c3']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) @@ -412,8 +438,9 @@ class TestModifyChaptersPP(unittest.TestCase): self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_SponsorCoincidesWithChapters(self): - chapters = (self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']) - + [self._sponsor_chapter(10, 30, 'sponsor')]) + chapters = [ + *self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']), + self._sponsor_chapter(10, 30, 'sponsor')] expected = self._chapters([10, 30, 40], ['c1', '[SponsorBlock]: Sponsor', 'c4']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) @@ -424,8 +451,9 @@ class TestModifyChaptersPP(unittest.TestCase): self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_SponsorsAtVideoBoundaries(self): - chapters = (self._chapters([20, 40, 60], ['c1', 'c2', 'c3']) - + [self._sponsor_chapter(0, 10, 'intro'), self._sponsor_chapter(50, 60, 'outro')]) + chapters = [ + *self._chapters([20, 40, 60], ['c1', 'c2', 'c3']), + self._sponsor_chapter(0, 10, 'intro'), self._sponsor_chapter(50, 60, 'outro')] expected = self._chapters( [10, 20, 40, 50, 60], ['[SponsorBlock]: Intermission/Intro Animation', 'c1', 'c2', 'c3', '[SponsorBlock]: Endcards/Credits']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) @@ -437,8 +465,10 @@ class TestModifyChaptersPP(unittest.TestCase): self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_SponsorsOverlapChaptersAtVideoBoundaries(self): - chapters = (self._chapters([10, 40, 50], ['c1', 'c2', 'c3']) - + [self._sponsor_chapter(0, 20, 'intro'), self._sponsor_chapter(30, 50, 'outro')]) + chapters = [ + *self._chapters([10, 40, 50], ['c1', 'c2', 'c3']), + self._sponsor_chapter(0, 20, 'intro'), + self._sponsor_chapter(30, 50, 'outro')] expected = self._chapters( [20, 30, 50], ['[SponsorBlock]: Intermission/Intro Animation', 'c2', '[SponsorBlock]: Endcards/Credits']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) @@ -450,8 +480,10 @@ class TestModifyChaptersPP(unittest.TestCase): self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_EverythingSponsored(self): - chapters = (self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']) - + [self._sponsor_chapter(0, 20, 'intro'), self._sponsor_chapter(20, 40, 'outro')]) + chapters = [ + *self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']), + self._sponsor_chapter(0, 20, 'intro'), + self._sponsor_chapter(20, 40, 'outro')] expected = self._chapters([20, 40], ['[SponsorBlock]: Intermission/Intro Animation', '[SponsorBlock]: Endcards/Credits']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) @@ -491,38 +523,39 @@ class TestModifyChaptersPP(unittest.TestCase): chapters, self._chapters([2.5], ['c2']), cuts) def test_remove_marked_arrange_sponsors_TinyChaptersResultingFromSponsorOverlapAreIgnored(self): - chapters = self._chapters([1, 3, 4], ['c1', 'c2', 'c3']) + [ + chapters = [ + *self._chapters([1, 3, 4], ['c1', 'c2', 'c3']), self._sponsor_chapter(1.5, 2.5, 'sponsor')] self._remove_marked_arrange_sponsors_test_impl( chapters, self._chapters([1.5, 2.5, 4], ['c1', '[SponsorBlock]: Sponsor', 'c3']), []) def test_remove_marked_arrange_sponsors_TinySponsorsOverlapsAreIgnored(self): - chapters = self._chapters([2, 3, 5], ['c1', 'c2', 'c3']) + [ + chapters = [ + *self._chapters([2, 3, 5], ['c1', 'c2', 'c3']), self._sponsor_chapter(1, 3, 'sponsor'), - self._sponsor_chapter(2.5, 4, 'selfpromo') - ] + self._sponsor_chapter(2.5, 4, 'selfpromo')] self._remove_marked_arrange_sponsors_test_impl( chapters, self._chapters([1, 3, 4, 5], [ 'c1', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Unpaid/Self Promotion', 'c3']), []) def test_remove_marked_arrange_sponsors_TinySponsorsPrependedToTheNextSponsor(self): - chapters = self._chapters([4], ['c']) + [ + chapters = [ + *self._chapters([4], ['c']), self._sponsor_chapter(1.5, 2, 'sponsor'), - self._sponsor_chapter(2, 4, 'selfpromo') - ] + self._sponsor_chapter(2, 4, 'selfpromo')] self._remove_marked_arrange_sponsors_test_impl( chapters, self._chapters([1.5, 4], ['c', '[SponsorBlock]: Unpaid/Self Promotion']), []) def test_remove_marked_arrange_sponsors_SmallestSponsorInTheOverlapGetsNamed(self): self._pp._sponsorblock_chapter_title = '[SponsorBlock]: %(name)s' - chapters = self._chapters([10], ['c']) + [ + chapters = [ + *self._chapters([10], ['c']), self._sponsor_chapter(2, 8, 'sponsor'), - self._sponsor_chapter(4, 6, 'selfpromo') - ] + self._sponsor_chapter(4, 6, 'selfpromo')] self._remove_marked_arrange_sponsors_test_impl( chapters, self._chapters([2, 4, 6, 8, 10], [ 'c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Unpaid/Self Promotion', - '[SponsorBlock]: Sponsor', 'c' + '[SponsorBlock]: Sponsor', 'c', ]), []) def test_make_concat_opts_CommonCase(self): diff --git a/test/test_socks.py b/test/test_socks.py index 43d612d85..68af19d0c 100644 --- a/test/test_socks.py +++ b/test/test_socks.py @@ -95,7 +95,7 @@ class Socks5ProxyHandler(StreamRequestHandler, SocksProxyHandler): return elif Socks5Auth.AUTH_USER_PASS in methods: - self.connection.sendall(struct.pack("!BB", SOCKS5_VERSION, Socks5Auth.AUTH_USER_PASS)) + self.connection.sendall(struct.pack('!BB', SOCKS5_VERSION, Socks5Auth.AUTH_USER_PASS)) _, user_len = struct.unpack('!BB', self.connection.recv(2)) username = self.connection.recv(user_len).decode() @@ -174,7 +174,7 @@ class Socks4ProxyHandler(StreamRequestHandler, SocksProxyHandler): if 0x0 < dest_ip <= 0xFF: use_remote_dns = True else: - socks_info['ipv4_address'] = socket.inet_ntoa(struct.pack("!I", dest_ip)) + socks_info['ipv4_address'] = socket.inet_ntoa(struct.pack('!I', dest_ip)) user_id = self._read_until_null().decode() if user_id != (self.socks_kwargs.get('user_id') or ''): @@ -291,7 +291,7 @@ def ctx(request): ('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws'), - ('CurlCFFI', 'http') + ('CurlCFFI', 'http'), ], indirect=True) class TestSocks4Proxy: def test_socks4_no_auth(self, handler, ctx): @@ -366,7 +366,7 @@ class TestSocks4Proxy: ('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws'), - ('CurlCFFI', 'http') + ('CurlCFFI', 'http'), ], indirect=True) class TestSocks5Proxy: diff --git a/test/test_subtitles.py b/test/test_subtitles.py index 57362895f..f3b005617 100644 --- a/test/test_subtitles.py +++ b/test/test_subtitles.py @@ -40,12 +40,11 @@ class BaseTestSubtitles(unittest.TestCase): self.ie = self.IE() self.DL.add_info_extractor(self.ie) if not self.IE.working(): - print('Skipping: %s marked as not _WORKING' % self.IE.ie_key()) + print(f'Skipping: {self.IE.ie_key()} marked as not _WORKING') self.skipTest('IE marked as not _WORKING') def getInfoDict(self): - info_dict = self.DL.extract_info(self.url, download=False) - return info_dict + return self.DL.extract_info(self.url, download=False) def getSubtitles(self): info_dict = self.getInfoDict() @@ -87,7 +86,7 @@ class TestYoutubeSubtitles(BaseTestSubtitles): self.assertEqual(md5(subtitles['en']), 'ae1bd34126571a77aabd4d276b28044d') self.assertEqual(md5(subtitles['it']), '0e0b667ba68411d88fd1c5f4f4eab2f9') for lang in ['fr', 'de']: - self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang) + self.assertTrue(subtitles.get(lang) is not None, f'Subtitles for \'{lang}\' not extracted') def _test_subtitles_format(self, fmt, md5_hash, lang='en'): self.DL.params['writesubtitles'] = True @@ -157,7 +156,7 @@ class TestDailymotionSubtitles(BaseTestSubtitles): self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f') self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792') for lang in ['es', 'fr', 'de']: - self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang) + self.assertTrue(subtitles.get(lang) is not None, f'Subtitles for \'{lang}\' not extracted') def test_nosubtitles(self): self.DL.expect_warning('video doesn\'t have subtitles') @@ -182,7 +181,7 @@ class TestTedSubtitles(BaseTestSubtitles): self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14') self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5') for lang in ['es', 'fr', 'de']: - self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang) + self.assertTrue(subtitles.get(lang) is not None, f'Subtitles for \'{lang}\' not extracted') @is_download_test diff --git a/test/test_traversal.py b/test/test_traversal.py index 9b2a27b08..5d9fbe1d1 100644 --- a/test/test_traversal.py +++ b/test/test_traversal.py @@ -31,7 +31,7 @@ class TestTraversal: 'allow tuple path' assert traverse_obj(_TEST_DATA, ['str']) == 'str', \ 'allow list path' - assert traverse_obj(_TEST_DATA, (value for value in ("str",))) == 'str', \ + assert traverse_obj(_TEST_DATA, (value for value in ('str',))) == 'str', \ 'allow iterable path' assert traverse_obj(_TEST_DATA, 'str') == 'str', \ 'single items should be treated as a path' @@ -70,7 +70,7 @@ class TestTraversal: def test_traversal_set(self): # transformation/type, like `expected_type` - assert traverse_obj(_TEST_DATA, (..., {str.upper}, )) == ['STR'], \ + assert traverse_obj(_TEST_DATA, (..., {str.upper})) == ['STR'], \ 'Function in set should be a transformation' assert traverse_obj(_TEST_DATA, (..., {str})) == ['str'], \ 'Type in set should be a type filter' @@ -276,7 +276,7 @@ class TestTraversal: '`...` should result in string (same value) if `traverse_string`' assert traverse_obj(_TRAVERSE_STRING_DATA, ('str', slice(0, None, 2)), traverse_string=True) == 'sr', \ '`slice` should result in string if `traverse_string`' - assert traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda i, v: i or v == "s"), traverse_string=True) == 'str', \ + assert traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda i, v: i or v == 's'), traverse_string=True) == 'str', \ 'function should result in string if `traverse_string`' assert traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)), traverse_string=True) == ['s', 'r'], \ 'branching should result in list if `traverse_string`' diff --git a/test/test_update.py b/test/test_update.py index bc139562f..63a21e445 100644 --- a/test/test_update.py +++ b/test/test_update.py @@ -78,11 +78,11 @@ TEST_API_DATA = { TEST_LOCKFILE_COMMENT = '# This file is used for regulating self-update' -TEST_LOCKFILE_V1 = r'''%s +TEST_LOCKFILE_V1 = rf'''{TEST_LOCKFILE_COMMENT} lock 2022.08.18.36 .+ Python 3\.6 lock 2023.11.16 (?!win_x86_exe).+ Python 3\.7 lock 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server) -''' % TEST_LOCKFILE_COMMENT +''' TEST_LOCKFILE_V2_TMPL = r'''%s lockV2 yt-dlp/yt-dlp 2022.08.18.36 .+ Python 3\.6 @@ -98,12 +98,12 @@ TEST_LOCKFILE_V2 = TEST_LOCKFILE_V2_TMPL % TEST_LOCKFILE_COMMENT TEST_LOCKFILE_ACTUAL = TEST_LOCKFILE_V2_TMPL % TEST_LOCKFILE_V1.rstrip('\n') -TEST_LOCKFILE_FORK = r'''%s# Test if a fork blocks updates to non-numeric tags +TEST_LOCKFILE_FORK = rf'''{TEST_LOCKFILE_ACTUAL}# Test if a fork blocks updates to non-numeric tags lockV2 fork/yt-dlp pr0000 .+ Python 3.6 lockV2 fork/yt-dlp pr1234 (?!win_x86_exe).+ Python 3\.7 lockV2 fork/yt-dlp pr1234 win_x86_exe .+ Windows-(?:Vista|2008Server) lockV2 fork/yt-dlp pr9999 .+ Python 3.11 -''' % TEST_LOCKFILE_ACTUAL +''' class FakeUpdater(Updater): diff --git a/test/test_utils.py b/test/test_utils.py index 77fadbbea..251739686 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -276,8 +276,8 @@ class TestUtil(unittest.TestCase): self.assertEqual(expand_path(env('HOME')), os.getenv('HOME')) self.assertEqual(expand_path('~'), os.getenv('HOME')) self.assertEqual( - expand_path('~/%s' % env('yt_dlp_EXPATH_PATH')), - '%s/expanded' % os.getenv('HOME')) + expand_path('~/{}'.format(env('yt_dlp_EXPATH_PATH'))), + '{}/expanded'.format(os.getenv('HOME'))) finally: os.environ['HOME'] = old_home or '' @@ -356,12 +356,12 @@ class TestUtil(unittest.TestCase): self.assertEqual(datetime_from_str('now+23hours', precision='hour'), datetime_from_str('now+23hours', precision='auto')) def test_daterange(self): - _20century = DateRange("19000101", "20000101") - self.assertFalse("17890714" in _20century) - _ac = DateRange("00010101") - self.assertTrue("19690721" in _ac) - _firstmilenium = DateRange(end="10000101") - self.assertTrue("07110427" in _firstmilenium) + _20century = DateRange('19000101', '20000101') + self.assertFalse('17890714' in _20century) + _ac = DateRange('00010101') + self.assertTrue('19690721' in _ac) + _firstmilenium = DateRange(end='10000101') + self.assertTrue('07110427' in _firstmilenium) def test_unified_dates(self): self.assertEqual(unified_strdate('December 21, 2010'), '20101221') @@ -506,7 +506,7 @@ class TestUtil(unittest.TestCase): self.assertRaises(ExtractorError, xpath_attr, doc, 'div/p', 'y', fatal=True) def test_smuggle_url(self): - data = {"ö": "ö", "abc": [3]} + data = {'ö': 'ö', 'abc': [3]} url = 'https://foo.bar/baz?x=y#a' smug_url = smuggle_url(url, data) unsmug_url, unsmug_data = unsmuggle_url(smug_url) @@ -784,7 +784,7 @@ class TestUtil(unittest.TestCase): def test_strip_jsonp(self): stripped = strip_jsonp('cb ([ {"id":"532cb",\n\n\n"x":\n3}\n]\n);') d = json.loads(stripped) - self.assertEqual(d, [{"id": "532cb", "x": 3}]) + self.assertEqual(d, [{'id': '532cb', 'x': 3}]) stripped = strip_jsonp('parseMetadata({"STATUS":"OK"})\n\n\n//epc') d = json.loads(stripped) @@ -922,19 +922,19 @@ class TestUtil(unittest.TestCase): def test_normalize_url(self): self.assertEqual( normalize_url('http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavré_FD.mp4'), - 'http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavre%CC%81_FD.mp4' + 'http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavre%CC%81_FD.mp4', ) self.assertEqual( normalize_url('http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erklärt/Das-Erste/Video?documentId=22673108&bcastId=5290'), - 'http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erkl%C3%A4rt/Das-Erste/Video?documentId=22673108&bcastId=5290' + 'http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erkl%C3%A4rt/Das-Erste/Video?documentId=22673108&bcastId=5290', ) self.assertEqual( normalize_url('http://тест.рф/фрагмент'), - 'http://xn--e1aybc.xn--p1ai/%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82' + 'http://xn--e1aybc.xn--p1ai/%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82', ) self.assertEqual( normalize_url('http://тест.рф/абв?абв=абв#абв'), - 'http://xn--e1aybc.xn--p1ai/%D0%B0%D0%B1%D0%B2?%D0%B0%D0%B1%D0%B2=%D0%B0%D0%B1%D0%B2#%D0%B0%D0%B1%D0%B2' + 'http://xn--e1aybc.xn--p1ai/%D0%B0%D0%B1%D0%B2?%D0%B0%D0%B1%D0%B2=%D0%B0%D0%B1%D0%B2#%D0%B0%D0%B1%D0%B2', ) self.assertEqual(normalize_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0') @@ -979,7 +979,7 @@ class TestUtil(unittest.TestCase): 'e': 'false', 'f': '"false"', 'g': 'var', - } + }, )), { 'null': None, @@ -988,8 +988,8 @@ class TestUtil(unittest.TestCase): 'trueStr': 'true', 'false': False, 'falseStr': 'false', - 'unresolvedVar': 'var' - } + 'unresolvedVar': 'var', + }, ) self.assertDictEqual( @@ -1005,14 +1005,14 @@ class TestUtil(unittest.TestCase): 'b': '"123"', 'c': '1.23', 'd': '"1.23"', - } + }, )), { 'int': 123, 'intStr': '123', 'float': 1.23, 'floatStr': '1.23', - } + }, ) self.assertDictEqual( @@ -1028,14 +1028,14 @@ class TestUtil(unittest.TestCase): 'b': '"{}"', 'c': '[]', 'd': '"[]"', - } + }, )), { 'object': {}, 'objectStr': '{}', 'array': [], 'arrayStr': '[]', - } + }, ) def test_js_to_json_realworld(self): @@ -1081,7 +1081,7 @@ class TestUtil(unittest.TestCase): def test_js_to_json_edgecases(self): on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}") - self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"}) + self.assertEqual(json.loads(on), {'abc_def': "1'\\2\\'3\"4"}) on = js_to_json('{"abc": true}') self.assertEqual(json.loads(on), {'abc': True}) @@ -1113,9 +1113,9 @@ class TestUtil(unittest.TestCase): 'c': 0, 'd': 42.42, 'e': [], - 'f': "abc", - 'g': "", - '42': 42 + 'f': 'abc', + 'g': '', + '42': 42, }) on = js_to_json('["abc", "def",]') @@ -1209,8 +1209,8 @@ class TestUtil(unittest.TestCase): self.assertEqual(json.loads(js_to_json('Array(5, 10)')), [5, 10]) self.assertEqual(json.loads(js_to_json('new Array(15,5)')), [15, 5]) self.assertEqual(json.loads(js_to_json('new Map([Array(5, 10),new Array(15,5)])')), {'5': 10, '15': 5}) - self.assertEqual(json.loads(js_to_json('new Date("123")')), "123") - self.assertEqual(json.loads(js_to_json('new Date(\'2023-10-19\')')), "2023-10-19") + self.assertEqual(json.loads(js_to_json('new Date("123")')), '123') + self.assertEqual(json.loads(js_to_json('new Date(\'2023-10-19\')')), '2023-10-19') def test_extract_attributes(self): self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'}) @@ -1265,7 +1265,7 @@ class TestUtil(unittest.TestCase): def test_args_to_str(self): self.assertEqual( args_to_str(['foo', 'ba/r', '-baz', '2 be', '']), - 'foo ba/r -baz \'2 be\' \'\'' if compat_os_name != 'nt' else 'foo ba/r -baz "2 be" ""' + 'foo ba/r -baz \'2 be\' \'\'' if compat_os_name != 'nt' else 'foo ba/r -baz "2 be" ""', ) def test_parse_filesize(self): @@ -1348,10 +1348,10 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') self.assertTrue(is_html( # UTF-8 with BOM b'\xef\xbb\xbf<!DOCTYPE foo>\xaaa')) self.assertTrue(is_html( # UTF-16-LE - b'\xff\xfe<\x00h\x00t\x00m\x00l\x00>\x00\xe4\x00' + b'\xff\xfe<\x00h\x00t\x00m\x00l\x00>\x00\xe4\x00', )) self.assertTrue(is_html( # UTF-16-BE - b'\xfe\xff\x00<\x00h\x00t\x00m\x00l\x00>\x00\xe4' + b'\xfe\xff\x00<\x00h\x00t\x00m\x00l\x00>\x00\xe4', )) self.assertTrue(is_html( # UTF-32-BE b'\x00\x00\xFE\xFF\x00\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4')) @@ -1935,7 +1935,7 @@ Line 1 with locked_file(FILE, test_mode, False): pass except (BlockingIOError, PermissionError): - if not testing_write: # FIXME + if not testing_write: # FIXME: blocked read access print(f'Known issue: Exclusive lock ({lock_mode}) blocks read access ({test_mode})') continue self.assertTrue(testing_write, f'{test_mode} is blocked by {lock_mode}') @@ -2003,7 +2003,7 @@ Line 1 msg='int fn with expected_type int should give int') self.assertEqual(try_call(lambda: 1, expected_type=dict), None, msg='int fn with wrong expected_type should give None') - self.assertEqual(try_call(total, args=(0, 1, 0, ), expected_type=int), 1, + self.assertEqual(try_call(total, args=(0, 1, 0), expected_type=int), 1, msg='fn should accept arglist') self.assertEqual(try_call(total, kwargs={'a': 0, 'b': 1, 'c': 0}, expected_type=int), 1, msg='fn should accept kwargs') diff --git a/test/test_websockets.py b/test/test_websockets.py index aa0dfa2d5..5f101abcc 100644 --- a/test/test_websockets.py +++ b/test/test_websockets.py @@ -297,14 +297,14 @@ class TestWebsSocketRequestHandlerConformance: 'client_certificate': os.path.join(MTLS_CERT_DIR, 'client.crt'), 'client_certificate_key': os.path.join(MTLS_CERT_DIR, 'clientencrypted.key'), 'client_certificate_password': 'foobar', - } + }, )) def test_mtls(self, handler, client_cert): with handler( # Disable client-side validation of unacceptable self-signed testcert.pem # The test is of a check on the server side, so unaffected verify=False, - client_cert=client_cert + client_cert=client_cert, ) as rh: ws_validate_and_send(rh, Request(self.mtls_wss_base_url)).close() diff --git a/test/test_youtube_misc.py b/test/test_youtube_misc.py index 81be5d3c9..81b116217 100644 --- a/test/test_youtube_misc.py +++ b/test/test_youtube_misc.py @@ -13,7 +13,7 @@ from yt_dlp.extractor import YoutubeIE class TestYoutubeMisc(unittest.TestCase): def test_youtube_extract(self): - assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id) + assertExtractId = lambda url, video_id: self.assertEqual(YoutubeIE.extract_id(url), video_id) assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc') assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc') assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc') diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index c5592845b..bfaff83a0 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -46,17 +46,17 @@ _SIG_TESTS = [ ( 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js', 84, - '123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>' + '123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>', ), ( 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl9FYC6l.js', 83, - '123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F' + '123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F', ), ( 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflCGk6yw/html5player.js', '4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288', - '82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B' + '82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B', ), ( 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', @@ -207,7 +207,7 @@ class TestSignature(unittest.TestCase): def t_factory(name, sig_func, url_pattern): def make_tfunc(url, sig_input, expected_sig): m = url_pattern.match(url) - assert m, '%r should follow URL format' % url + assert m, f'{url!r} should follow URL format' test_id = m.group('id') def test_func(self): diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 2c6f695d0..5abcb4635 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -109,7 +109,6 @@ from .utils import ( determine_protocol, encode_compat_str, encodeFilename, - error_to_compat_str, escapeHTML, expand_path, extract_basic_auth, @@ -583,7 +582,7 @@ class YoutubeDL: 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start', 'is_dash_periods', 'request_data', 'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies', 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options', - 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time' + 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time', } _deprecated_multivalue_fields = { 'album_artist': 'album_artists', @@ -594,7 +593,7 @@ class YoutubeDL: } _format_selection_exts = { 'audio': set(MEDIA_EXTENSIONS.common_audio), - 'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )), + 'video': {*MEDIA_EXTENSIONS.common_video, '3gp'}, 'storyboards': set(MEDIA_EXTENSIONS.storyboards), } @@ -628,7 +627,7 @@ class YoutubeDL: error=sys.stderr, screen=sys.stderr if self.params.get('quiet') else stdout, console=None if compat_os_name == 'nt' else next( - filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None) + filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None), ) try: @@ -679,9 +678,9 @@ class YoutubeDL: width_args = [] if width is None else ['-w', str(width)] sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error} try: - self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs) + self._output_process = Popen(['bidiv', *width_args], **sp_kwargs) except OSError: - self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs) + self._output_process = Popen(['fribidi', '-c', 'UTF-8', *width_args], **sp_kwargs) self._output_channel = os.fdopen(master, 'rb') except OSError as ose: if ose.errno == errno.ENOENT: @@ -822,8 +821,7 @@ class YoutubeDL: ) self.report_warning( 'Long argument string detected. ' - 'Use -- to separate parameters and URLs, like this:\n%s' % - shell_quote(correct_argv)) + f'Use -- to separate parameters and URLs, like this:\n{shell_quote(correct_argv)}') def add_info_extractor(self, ie): """Add an InfoExtractor object to the end of the list.""" @@ -922,7 +920,7 @@ class YoutubeDL: if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'): return self._write_string( - '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')), + '{}{}'.format(self._bidi_workaround(message), ('' if skip_eol else '\n')), self._out_files.screen, only_once=only_once) def to_stderr(self, message, only_once=False): @@ -1045,10 +1043,10 @@ class YoutubeDL: return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs) def report_warning(self, message, only_once=False): - ''' + """ Print the message to stderr, it will be prefixed with 'WARNING:' If stderr is a tty file the 'WARNING:' will be colored - ''' + """ if self.params.get('logger') is not None: self.params['logger'].warning(message) else: @@ -1066,14 +1064,14 @@ class YoutubeDL: self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True) def report_error(self, message, *args, **kwargs): - ''' + """ Do the same as trouble, but prefixes the message with 'ERROR:', colored in red if stderr is a tty file. - ''' + """ self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs) def write_debug(self, message, only_once=False): - '''Log debug message or Print message to stderr''' + """Log debug message or Print message to stderr""" if not self.params.get('verbose', False): return message = f'[debug] {message}' @@ -1085,14 +1083,14 @@ class YoutubeDL: def report_file_already_downloaded(self, file_name): """Report file has already been fully downloaded.""" try: - self.to_screen('[download] %s has already been downloaded' % file_name) + self.to_screen(f'[download] {file_name} has already been downloaded') except UnicodeEncodeError: self.to_screen('[download] The file has already been downloaded') def report_file_delete(self, file_name): """Report that existing file will be deleted.""" try: - self.to_screen('Deleting existing file %s' % file_name) + self.to_screen(f'Deleting existing file {file_name}') except UnicodeEncodeError: self.to_screen('Deleting existing file') @@ -1147,7 +1145,7 @@ class YoutubeDL: @staticmethod def escape_outtmpl(outtmpl): - ''' Escape any remaining strings like %s, %abc% etc. ''' + """ Escape any remaining strings like %s, %abc% etc. """ return re.sub( STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'), lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0), @@ -1155,7 +1153,7 @@ class YoutubeDL: @classmethod def validate_outtmpl(cls, outtmpl): - ''' @return None or Exception object ''' + """ @return None or Exception object """ outtmpl = re.sub( STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'), lambda mobj: f'{mobj.group(0)[:-1]}s', @@ -1208,13 +1206,13 @@ class YoutubeDL: } # Field is of the form key1.key2... # where keys (except first) can be string, int, slice or "{field, ...}" - FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'} - FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % { + FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'} # noqa: UP031 + FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % { # noqa: UP031 'inner': FIELD_INNER_RE, - 'field': rf'\w*(?:\.{FIELD_INNER_RE})*' + 'field': rf'\w*(?:\.{FIELD_INNER_RE})*', } MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})' - MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys())) + MATH_OPERATORS_RE = r'(?:{})'.format('|'.join(map(re.escape, MATH_FUNCTIONS.keys()))) INTERNAL_FORMAT_RE = re.compile(rf'''(?xs) (?P<negate>-)? (?P<fields>{FIELD_RE}) @@ -1337,7 +1335,7 @@ class YoutubeDL: value, default = None, na fmt = outer_mobj.group('format') - if fmt == 's' and last_field in field_size_compat_map.keys() and isinstance(value, int): + if fmt == 's' and last_field in field_size_compat_map and isinstance(value, int): fmt = f'0{field_size_compat_map[last_field]:d}d' flags = outer_mobj.group('conversion') or '' @@ -1362,7 +1360,7 @@ class YoutubeDL: elif fmt[-1] == 'U': # unicode normalized value, fmt = unicodedata.normalize( # "+" = compatibility equivalence, "#" = NFD - 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'), + 'NF{}{}'.format('K' if '+' in flags else '', 'D' if '#' in flags else 'C'), value), str_fmt elif fmt[-1] == 'D': # decimal suffix num_fmt, fmt = fmt[:-1].replace('#', ''), 's' @@ -1390,7 +1388,7 @@ class YoutubeDL: if fmt[-1] in 'csra': value = sanitizer(last_field, value) - key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format')) + key = '{}\0{}'.format(key.replace('%', '%\0'), outer_mobj.group('format')) TMPL_DICT[key] = value return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix')) @@ -1479,9 +1477,9 @@ class YoutubeDL: date = info_dict.get('upload_date') if date is not None: - dateRange = self.params.get('daterange', DateRange()) - if date not in dateRange: - return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}' + date_range = self.params.get('daterange', DateRange()) + if date not in date_range: + return f'{date_from_str(date).isoformat()} upload date is not in range {date_range}' view_count = info_dict.get('view_count') if view_count is not None: min_views = self.params.get('min_views') @@ -1491,7 +1489,7 @@ class YoutubeDL: if max_views is not None and view_count > max_views: return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views) if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')): - return 'Skipping "%s" because it is age restricted' % video_title + return f'Skipping "{video_title}" because it is age restricted' match_filter = self.params.get('match_filter') if match_filter is None: @@ -1544,7 +1542,7 @@ class YoutubeDL: @staticmethod def add_extra_info(info_dict, extra_info): - '''Set the keys from extra_info in info dict if they are missing''' + """Set the keys from extra_info in info dict if they are missing""" for key, value in extra_info.items(): info_dict.setdefault(key, value) @@ -1590,7 +1588,7 @@ class YoutubeDL: self.to_screen(f'[download] {self._format_screen(temp_id, self.Styles.ID)}: ' 'has already been recorded in the archive') if self.params.get('break_on_existing', False): - raise ExistingVideoReached() + raise ExistingVideoReached break return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process) else: @@ -1616,8 +1614,8 @@ class YoutubeDL: except GeoRestrictedError as e: msg = e.msg if e.countries: - msg += '\nThis video is available in %s.' % ', '.join( - map(ISO3166Utils.short2full, e.countries)) + msg += '\nThis video is available in {}.'.format(', '.join( + map(ISO3166Utils.short2full, e.countries))) msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.' self.report_error(msg) except ExtractorError as e: # An error we somewhat expected @@ -1826,8 +1824,8 @@ class YoutubeDL: if isinstance(additional_urls, str): additional_urls = [additional_urls] self.to_screen( - '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls))) - self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls)) + '[info] {}: {} additional URL(s) requested'.format(ie_result['id'], len(additional_urls))) + self.write_debug('Additional URLs: "{}"'.format('", "'.join(additional_urls))) ie_result['additional_entries'] = [ self.extract_info( url, download, extra_info=extra_info, @@ -1879,8 +1877,8 @@ class YoutubeDL: webpage_url = ie_result.get('webpage_url') # Playlists maynot have webpage_url if webpage_url and webpage_url in self._playlist_urls: self.to_screen( - '[download] Skipping already downloaded playlist: %s' - % ie_result.get('title') or ie_result.get('id')) + '[download] Skipping already downloaded playlist: {}'.format( + ie_result.get('title')) or ie_result.get('id')) return self._playlist_level += 1 @@ -1895,8 +1893,8 @@ class YoutubeDL: self._playlist_urls.clear() elif result_type == 'compat_list': self.report_warning( - 'Extractor %s returned a compat_list result. ' - 'It needs to be updated.' % ie_result.get('extractor')) + 'Extractor {} returned a compat_list result. ' + 'It needs to be updated.'.format(ie_result.get('extractor'))) def _fixup(r): self.add_extra_info(r, { @@ -1913,7 +1911,7 @@ class YoutubeDL: ] return ie_result else: - raise Exception('Invalid result type: %s' % result_type) + raise Exception(f'Invalid result type: {result_type}') def _ensure_dir_exists(self, path): return make_dir(path, self.report_error) @@ -2029,8 +2027,9 @@ class YoutubeDL: resolved_entries[i] = (playlist_index, NO_DEFAULT) continue - self.to_screen('[download] Downloading item %s of %s' % ( - self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS))) + self.to_screen( + f'[download] Downloading item {self._format_screen(i + 1, self.Styles.ID)} ' + f'of {self._format_screen(n_entries, self.Styles.EMPHASIS)}') entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({ 'playlist_index': playlist_index, @@ -2080,9 +2079,9 @@ class YoutubeDL: } operator_rex = re.compile(r'''(?x)\s* (?P<key>[\w.-]+)\s* - (?P<op>%s)(?P<none_inclusive>\s*\?)?\s* + (?P<op>{})(?P<none_inclusive>\s*\?)?\s* (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s* - ''' % '|'.join(map(re.escape, OPERATORS.keys()))) + '''.format('|'.join(map(re.escape, OPERATORS.keys())))) m = operator_rex.fullmatch(filter_spec) if m: try: @@ -2093,7 +2092,7 @@ class YoutubeDL: comparison_value = parse_filesize(m.group('value') + 'B') if comparison_value is None: raise ValueError( - 'Invalid value %r in format specification %r' % ( + 'Invalid value {!r} in format specification {!r}'.format( m.group('value'), filter_spec)) op = OPERATORS[m.group('op')] @@ -2103,15 +2102,15 @@ class YoutubeDL: '^=': lambda attr, value: attr.startswith(value), '$=': lambda attr, value: attr.endswith(value), '*=': lambda attr, value: value in attr, - '~=': lambda attr, value: value.search(attr) is not None + '~=': lambda attr, value: value.search(attr) is not None, } str_operator_rex = re.compile(r'''(?x)\s* (?P<key>[a-zA-Z0-9._-]+)\s* - (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)? + (?P<negation>!\s*)?(?P<op>{})\s*(?P<none_inclusive>\?\s*)? (?P<quote>["'])? (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+)) (?(quote)(?P=quote))\s* - ''' % '|'.join(map(re.escape, STR_OPERATORS.keys()))) + '''.format('|'.join(map(re.escape, STR_OPERATORS.keys())))) m = str_operator_rex.fullmatch(filter_spec) if m: if m.group('op') == '~=': @@ -2125,7 +2124,7 @@ class YoutubeDL: op = str_op if not m: - raise SyntaxError('Invalid filter specification %r' % filter_spec) + raise SyntaxError(f'Invalid filter specification {filter_spec!r}') def _filter(f): actual_value = f.get(m.group('key')) @@ -2141,7 +2140,7 @@ class YoutubeDL: if working: yield f continue - self.to_screen('[info] Testing format %s' % f['format_id']) + self.to_screen('[info] Testing format {}'.format(f['format_id'])) path = self.get_output_path('temp') if not self._ensure_dir_exists(f'{path}/'): continue @@ -2149,19 +2148,19 @@ class YoutubeDL: temp_file.close() try: success, _ = self.dl(temp_file.name, f, test=True) - except (DownloadError, OSError, ValueError) + network_exceptions: + except (DownloadError, OSError, ValueError, *network_exceptions): success = False finally: if os.path.exists(temp_file.name): try: os.remove(temp_file.name) except OSError: - self.report_warning('Unable to delete temporary file "%s"' % temp_file.name) + self.report_warning(f'Unable to delete temporary file "{temp_file.name}"') f['__working'] = success if success: yield f else: - self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id']) + self.to_screen('[info] Unable to download format {}. Skipping...'.format(f['format_id'])) def _select_formats(self, formats, selector): return list(selector({ @@ -2214,8 +2213,8 @@ class YoutubeDL: def _parse_filter(tokens): filter_parts = [] - for type, string_, start, _, _ in tokens: - if type == tokenize.OP and string_ == ']': + for type_, string_, _start, _, _ in tokens: + if type_ == tokenize.OP and string_ == ']': return ''.join(filter_parts) else: filter_parts.append(string_) @@ -2225,23 +2224,23 @@ class YoutubeDL: # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9' ALLOWED_OPS = ('/', '+', ',', '(', ')') last_string, last_start, last_end, last_line = None, None, None, None - for type, string_, start, end, line in tokens: - if type == tokenize.OP and string_ == '[': + for type_, string_, start, end, line in tokens: + if type_ == tokenize.OP and string_ == '[': if last_string: yield tokenize.NAME, last_string, last_start, last_end, last_line last_string = None - yield type, string_, start, end, line + yield type_, string_, start, end, line # everything inside brackets will be handled by _parse_filter - for type, string_, start, end, line in tokens: - yield type, string_, start, end, line - if type == tokenize.OP and string_ == ']': + for type_, string_, start, end, line in tokens: + yield type_, string_, start, end, line + if type_ == tokenize.OP and string_ == ']': break - elif type == tokenize.OP and string_ in ALLOWED_OPS: + elif type_ == tokenize.OP and string_ in ALLOWED_OPS: if last_string: yield tokenize.NAME, last_string, last_start, last_end, last_line last_string = None - yield type, string_, start, end, line - elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]: + yield type_, string_, start, end, line + elif type_ in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]: if not last_string: last_string = string_ last_start = start @@ -2254,13 +2253,13 @@ class YoutubeDL: def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False): selectors = [] current_selector = None - for type, string_, start, _, _ in tokens: + for type_, string_, start, _, _ in tokens: # ENCODING is only defined in Python 3.x - if type == getattr(tokenize, 'ENCODING', None): + if type_ == getattr(tokenize, 'ENCODING', None): continue - elif type in [tokenize.NAME, tokenize.NUMBER]: + elif type_ in [tokenize.NAME, tokenize.NUMBER]: current_selector = FormatSelector(SINGLE, string_, []) - elif type == tokenize.OP: + elif type_ == tokenize.OP: if string_ == ')': if not inside_group: # ')' will be handled by the parentheses group @@ -2303,7 +2302,7 @@ class YoutubeDL: current_selector = FormatSelector(MERGE, (selector_1, selector_2), []) else: raise syntax_error(f'Operator not recognized: "{string_}"', start) - elif type == tokenize.ENDMARKER: + elif type_ == tokenize.ENDMARKER: break if current_selector: selectors.append(current_selector) @@ -2378,7 +2377,7 @@ class YoutubeDL: 'acodec': the_only_audio.get('acodec'), 'abr': the_only_audio.get('abr'), 'asr': the_only_audio.get('asr'), - 'audio_channels': the_only_audio.get('audio_channels') + 'audio_channels': the_only_audio.get('audio_channels'), }) return new_dict @@ -2459,9 +2458,9 @@ class YoutubeDL: format_fallback = not format_type and not format_modified # for b, w _filter_f = ( - (lambda f: f.get('%scodec' % format_type) != 'none') + (lambda f: f.get(f'{format_type}codec') != 'none') if format_type and format_modified # bv*, ba*, wv*, wa* - else (lambda f: f.get('%scodec' % not_format_type) == 'none') + else (lambda f: f.get(f'{not_format_type}codec') == 'none') if format_type # bv, ba, wv, wa else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none') if not format_modified # b, w @@ -2529,7 +2528,7 @@ class YoutubeDL: def __next__(self): if self.counter >= len(self.tokens): - raise StopIteration() + raise StopIteration value = self.tokens[self.counter] self.counter += 1 return value @@ -2612,7 +2611,7 @@ class YoutubeDL: self._sort_thumbnails(thumbnails) for i, t in enumerate(thumbnails): if t.get('id') is None: - t['id'] = '%d' % i + t['id'] = str(i) if t.get('width') and t.get('height'): t['resolution'] = '%dx%d' % (t['width'], t['height']) t['url'] = sanitize_url(t['url']) @@ -2673,8 +2672,8 @@ class YoutubeDL: # Auto generate title fields corresponding to the *_number fields when missing # in order to always have clean titles. This is very common for TV series. for field in ('chapter', 'season', 'episode'): - if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field): - info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field]) + if final and info_dict.get(f'{field}_number') is not None and not info_dict.get(field): + info_dict[field] = '%s %d' % (field.capitalize(), info_dict[f'{field}_number']) for old_key, new_key in self._deprecated_multivalue_fields.items(): if new_key in info_dict and old_key in info_dict: @@ -2706,8 +2705,8 @@ class YoutubeDL: def report_force_conversion(field, field_not, conversion): self.report_warning( - '"%s" field is not %s - forcing %s conversion, there is an error in extractor' - % (field, field_not, conversion)) + f'"{field}" field is not {field_not} - forcing {conversion} conversion, ' + 'there is an error in extractor') def sanitize_string_field(info, string_field): field = info.get(string_field) @@ -2824,28 +2823,28 @@ class YoutubeDL: if not formats: self.raise_no_formats(info_dict) - for format in formats: - sanitize_string_field(format, 'format_id') - sanitize_numeric_fields(format) - format['url'] = sanitize_url(format['url']) - if format.get('ext') is None: - format['ext'] = determine_ext(format['url']).lower() - if format['ext'] in ('aac', 'opus', 'mp3', 'flac', 'vorbis'): - if format.get('acodec') is None: - format['acodec'] = format['ext'] - if format.get('protocol') is None: - format['protocol'] = determine_protocol(format) - if format.get('resolution') is None: - format['resolution'] = self.format_resolution(format, default=None) - if format.get('dynamic_range') is None and format.get('vcodec') != 'none': - format['dynamic_range'] = 'SDR' - if format.get('aspect_ratio') is None: - format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2)) + for fmt in formats: + sanitize_string_field(fmt, 'format_id') + sanitize_numeric_fields(fmt) + fmt['url'] = sanitize_url(fmt['url']) + if fmt.get('ext') is None: + fmt['ext'] = determine_ext(fmt['url']).lower() + if fmt['ext'] in ('aac', 'opus', 'mp3', 'flac', 'vorbis'): + if fmt.get('acodec') is None: + fmt['acodec'] = fmt['ext'] + if fmt.get('protocol') is None: + fmt['protocol'] = determine_protocol(fmt) + if fmt.get('resolution') is None: + fmt['resolution'] = self.format_resolution(fmt, default=None) + if fmt.get('dynamic_range') is None and fmt.get('vcodec') != 'none': + fmt['dynamic_range'] = 'SDR' + if fmt.get('aspect_ratio') is None: + fmt['aspect_ratio'] = try_call(lambda: round(fmt['width'] / fmt['height'], 2)) # For fragmented formats, "tbr" is often max bitrate and not average - if (('manifest-filesize-approx' in self.params['compat_opts'] or not format.get('manifest_url')) - and not format.get('filesize') and not format.get('filesize_approx')): - format['filesize_approx'] = filesize_from_tbr(format.get('tbr'), info_dict.get('duration')) - format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict), load_cookies=True) + if (('manifest-filesize-approx' in self.params['compat_opts'] or not fmt.get('manifest_url')) + and not fmt.get('filesize') and not fmt.get('filesize_approx')): + fmt['filesize_approx'] = filesize_from_tbr(fmt.get('tbr'), info_dict.get('duration')) + fmt['http_headers'] = self._calc_headers(collections.ChainMap(fmt, info_dict), load_cookies=True) # Safeguard against old/insecure infojson when using --load-info-json if info_dict.get('http_headers'): @@ -2858,36 +2857,36 @@ class YoutubeDL: self.sort_formats({ 'formats': formats, - '_format_sort_fields': info_dict.get('_format_sort_fields') + '_format_sort_fields': info_dict.get('_format_sort_fields'), }) # Sanitize and group by format_id formats_dict = {} - for i, format in enumerate(formats): - if not format.get('format_id'): - format['format_id'] = str(i) + for i, fmt in enumerate(formats): + if not fmt.get('format_id'): + fmt['format_id'] = str(i) else: # Sanitize format_id from characters used in format selector expression - format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id']) - formats_dict.setdefault(format['format_id'], []).append(format) + fmt['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', fmt['format_id']) + formats_dict.setdefault(fmt['format_id'], []).append(fmt) # Make sure all formats have unique format_id common_exts = set(itertools.chain(*self._format_selection_exts.values())) for format_id, ambiguous_formats in formats_dict.items(): ambigious_id = len(ambiguous_formats) > 1 - for i, format in enumerate(ambiguous_formats): + for i, fmt in enumerate(ambiguous_formats): if ambigious_id: - format['format_id'] = '%s-%d' % (format_id, i) + fmt['format_id'] = f'{format_id}-{i}' # Ensure there is no conflict between id and ext in format selection # See https://github.com/yt-dlp/yt-dlp/issues/1282 - if format['format_id'] != format['ext'] and format['format_id'] in common_exts: - format['format_id'] = 'f%s' % format['format_id'] + if fmt['format_id'] != fmt['ext'] and fmt['format_id'] in common_exts: + fmt['format_id'] = 'f{}'.format(fmt['format_id']) - if format.get('format') is None: - format['format'] = '{id} - {res}{note}'.format( - id=format['format_id'], - res=self.format_resolution(format), - note=format_field(format, 'format_note', ' (%s)'), + if fmt.get('format') is None: + fmt['format'] = '{id} - {res}{note}'.format( + id=fmt['format_id'], + res=self.format_resolution(fmt), + note=format_field(fmt, 'format_note', ' (%s)'), ) if self.params.get('check_formats') is True: @@ -3009,7 +3008,7 @@ class YoutubeDL: info_dict['requested_downloads'] = downloaded_formats info_dict = self.run_all_pps('after_video', info_dict) if max_downloads_reached: - raise MaxDownloadsReached() + raise MaxDownloadsReached # We update the info dict with the selected best quality format (backwards compatibility) info_dict.update(best_format) @@ -3070,8 +3069,8 @@ class YoutubeDL: else: f = formats[-1] self.report_warning( - 'No subtitle format found matching "%s" for language %s, ' - 'using %s. Use --list-subs for a list of available subtitles' % (formats_query, lang, f['ext'])) + 'No subtitle format found matching "{}" for language {}, ' + 'using {}. Use --list-subs for a list of available subtitles'.format(formats_query, lang, f['ext'])) subs[lang] = f return subs @@ -3226,7 +3225,7 @@ class YoutubeDL: def check_max_downloads(): if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'): - raise MaxDownloadsReached() + raise MaxDownloadsReached if self.params.get('simulate'): info_dict['__write_download_archive'] = self.params.get('force_write_download_archive') @@ -3400,7 +3399,7 @@ class YoutubeDL: for f in info_dict['requested_formats'] if fd != FFmpegFD else []: f['filepath'] = fname = prepend_extension( correct_ext(temp_filename, info_dict['ext']), - 'f%s' % f['format_id'], info_dict['ext']) + 'f{}'.format(f['format_id']), info_dict['ext']) downloaded.append(fname) info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats']) success, real_download = self.dl(temp_filename, info_dict) @@ -3433,7 +3432,7 @@ class YoutubeDL: if temp_filename != '-': fname = prepend_extension( correct_ext(temp_filename, new_info['ext']), - 'f%s' % f['format_id'], new_info['ext']) + 'f{}'.format(f['format_id']), new_info['ext']) if not self._ensure_dir_exists(fname): return f['filepath'] = fname @@ -3465,11 +3464,11 @@ class YoutubeDL: info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename))) except network_exceptions as err: - self.report_error('unable to download video data: %s' % error_to_compat_str(err)) + self.report_error(f'unable to download video data: {err}') return except OSError as err: raise UnavailableVideoError(err) - except (ContentTooShortError, ) as err: + except ContentTooShortError as err: self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})') return @@ -3536,13 +3535,13 @@ class YoutubeDL: try: replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move)) except PostProcessingError as err: - self.report_error('Postprocessing: %s' % str(err)) + self.report_error(f'Postprocessing: {err}') return try: for ph in self._post_hooks: ph(info_dict['filepath']) except Exception as err: - self.report_error('post hooks: %s' % str(err)) + self.report_error(f'post hooks: {err}') return info_dict['__write_download_archive'] = True @@ -3609,7 +3608,7 @@ class YoutubeDL: @staticmethod def sanitize_info(info_dict, remove_private_keys=False): - ''' Sanitize the infodict for converting to json ''' + """ Sanitize the infodict for converting to json """ if info_dict is None: return info_dict info_dict.setdefault('epoch', int(time.time())) @@ -3644,7 +3643,7 @@ class YoutubeDL: @staticmethod def filter_requested_info(info_dict, actually_filter=True): - ''' Alias of sanitize_info for backward compatibility ''' + """ Alias of sanitize_info for backward compatibility """ return YoutubeDL.sanitize_info(info_dict, actually_filter) def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None): @@ -3666,7 +3665,7 @@ class YoutubeDL: actual_post_extract(video_dict or {}) return - post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {}) + post_extractor = info_dict.pop('__post_extractor', None) or dict info_dict.update(post_extractor()) actual_post_extract(info_dict or {}) @@ -3771,7 +3770,7 @@ class YoutubeDL: if format.get('width') and format.get('height'): return '%dx%d' % (format['width'], format['height']) elif format.get('height'): - return '%sp' % format['height'] + return '{}p'.format(format['height']) elif format.get('width'): return '%dx?' % format['width'] return default @@ -3788,7 +3787,7 @@ class YoutubeDL: if fdict.get('language'): if res: res += ' ' - res += '[%s]' % fdict['language'] + res += '[{}]'.format(fdict['language']) if fdict.get('format_note') is not None: if res: res += ' ' @@ -3800,7 +3799,7 @@ class YoutubeDL: if fdict.get('container') is not None: if res: res += ', ' - res += '%s container' % fdict['container'] + res += '{} container'.format(fdict['container']) if (fdict.get('vcodec') is not None and fdict.get('vcodec') != 'none'): if res: @@ -3815,7 +3814,7 @@ class YoutubeDL: if fdict.get('fps') is not None: if res: res += ', ' - res += '%sfps' % fdict['fps'] + res += '{}fps'.format(fdict['fps']) if fdict.get('acodec') is not None: if res: res += ', ' @@ -3858,7 +3857,7 @@ class YoutubeDL: format_field(f, 'format_id'), format_field(f, 'ext'), self.format_resolution(f), - self._format_note(f) + self._format_note(f), ] for f in formats if (f.get('preference') or 0) >= -1000] return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1) @@ -3964,11 +3963,11 @@ class YoutubeDL: from .extractor.extractors import _LAZY_LOADER from .extractor.extractors import ( _PLUGIN_CLASSES as plugin_ies, - _PLUGIN_OVERRIDES as plugin_ie_overrides + _PLUGIN_OVERRIDES as plugin_ie_overrides, ) def get_encoding(stream): - ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)) + ret = str(getattr(stream, 'encoding', f'missing ({type(stream).__name__})')) additional_info = [] if os.environ.get('TERM', '').lower() == 'dumb': additional_info.append('dumb') @@ -3979,13 +3978,13 @@ class YoutubeDL: ret = f'{ret} ({",".join(additional_info)})' return ret - encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % ( + encoding_str = 'Encodings: locale {}, fs {}, pref {}, {}'.format( locale.getpreferredencoding(), sys.getfilesystemencoding(), self.get_encoding(), ', '.join( f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_ - if stream is not None and key != 'console') + if stream is not None and key != 'console'), ) logger = self.params.get('logger') @@ -4017,7 +4016,7 @@ class YoutubeDL: else: write_debug('Lazy loading extractors is disabled') if self.params['compat_opts']: - write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts'])) + write_debug('Compatibility options: {}'.format(', '.join(self.params['compat_opts']))) if current_git_head(): write_debug(f'Git HEAD: {current_git_head()}') @@ -4026,14 +4025,14 @@ class YoutubeDL: exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self) ffmpeg_features = {key for key, val in ffmpeg_features.items() if val} if ffmpeg_features: - exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features)) + exe_versions['ffmpeg'] += ' ({})'.format(','.join(sorted(ffmpeg_features))) exe_versions['rtmpdump'] = rtmpdump_version() exe_versions['phantomjs'] = PhantomJSwrapper._version() exe_str = ', '.join( f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v ) or 'none' - write_debug('exe versions: %s' % exe_str) + write_debug(f'exe versions: {exe_str}') from .compat.compat_utils import get_package_info from .dependencies import available_dependencies @@ -4045,7 +4044,7 @@ class YoutubeDL: write_debug(f'Proxy map: {self.proxies}') write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}') for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items(): - display_list = ['%s%s' % ( + display_list = ['{}{}'.format( klass.__name__, '' if klass.__name__ == name else f' as {name}') for name, klass in plugins.items()] if plugin_type == 'Extractor': @@ -4062,14 +4061,13 @@ class YoutubeDL: # Not implemented if False and self.params.get('call_home'): ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode() - write_debug('Public IP address: %s' % ipaddr) + write_debug(f'Public IP address: {ipaddr}') latest_version = self.urlopen( 'https://yt-dl.org/latest/version').read().decode() if version_tuple(latest_version) > version_tuple(__version__): self.report_warning( - 'You are using an outdated version (newest version: %s)! ' - 'See https://yt-dl.org/update if you need help updating.' % - latest_version) + f'You are using an outdated version (newest version: {latest_version})! ' + 'See https://yt-dl.org/update if you need help updating.') @functools.cached_property def proxies(self): @@ -4103,7 +4101,7 @@ class YoutubeDL: return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies) def _get_available_impersonate_targets(self): - # todo(future): make available as public API + # TODO(future): make available as public API return [ (target, rh.RH_NAME) for rh in self._request_director.handlers.values() @@ -4112,7 +4110,7 @@ class YoutubeDL: ] def _impersonate_target_available(self, target): - # todo(future): make available as public API + # TODO(future): make available as public API return any( rh.is_supported_target(target) for rh in self._request_director.handlers.values() @@ -4238,7 +4236,7 @@ class YoutubeDL: return encoding def _write_info_json(self, label, ie_result, infofn, overwrite=None): - ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error ''' + """ Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error """ if overwrite is None: overwrite = self.params.get('overwrites', True) if not self.params.get('writeinfojson'): @@ -4261,7 +4259,7 @@ class YoutubeDL: return None def _write_description(self, label, ie_result, descfn): - ''' Write description and returns True = written, False = skip, None = error ''' + """ Write description and returns True = written, False = skip, None = error """ if not self.params.get('writedescription'): return False elif not descfn: @@ -4285,7 +4283,7 @@ class YoutubeDL: return True def _write_subtitles(self, info_dict, filename): - ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error''' + """ Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error""" ret = [] subtitles = info_dict.get('requested_subtitles') if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')): @@ -4331,7 +4329,7 @@ class YoutubeDL: self.dl(sub_filename, sub_copy, subtitle=True) sub_info['filepath'] = sub_filename ret.append((sub_filename, sub_filename_final)) - except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err: + except (DownloadError, ExtractorError, OSError, ValueError, *network_exceptions) as err: msg = f'Unable to download video subtitles for {sub_lang!r}: {err}' if self.params.get('ignoreerrors') is not True: # False or 'only_download' if not self.params.get('ignoreerrors'): @@ -4341,7 +4339,7 @@ class YoutubeDL: return ret def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None): - ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename); or None if error ''' + """ Write thumbnails to file and return list of (thumb_filename, final_thumb_filename); or None if error """ write_all = self.params.get('write_all_thumbnails', False) thumbnails, ret = [], [] if write_all or self.params.get('writethumbnail', False): @@ -4368,8 +4366,8 @@ class YoutubeDL: existing_thumb = self.existing_file((thumb_filename_final, thumb_filename)) if existing_thumb: - self.to_screen('[info] %s is already present' % ( - thumb_display_id if multiple else f'{label} thumbnail').capitalize()) + self.to_screen('[info] {} is already present'.format(( + thumb_display_id if multiple else f'{label} thumbnail').capitalize())) t['filepath'] = existing_thumb ret.append((existing_thumb, thumb_filename_final)) else: diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 3d606bcba..c18af7589 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -14,7 +14,7 @@ import os import re import traceback -from .compat import compat_os_name, compat_shlex_quote +from .compat import compat_os_name from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS from .downloader.external import get_external_downloader from .extractor import list_extractor_classes @@ -58,6 +58,7 @@ from .utils import ( read_stdin, render_table, setproctitle, + shell_quote, traverse_obj, variadic, write_string, @@ -115,9 +116,9 @@ def print_extractor_information(opts, urls): ie.description(markdown=False, search_examples=_SEARCHES) for ie in list_extractor_classes(opts.age_limit) if ie.working() and ie.IE_DESC is not False) elif opts.ap_list_mso: - out = 'Supported TV Providers:\n%s\n' % render_table( + out = 'Supported TV Providers:\n{}\n'.format(render_table( ['mso', 'mso name'], - [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()]) + [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()])) else: return False write_string(out, out=sys.stdout) @@ -129,7 +130,7 @@ def set_compat_opts(opts): if name not in opts.compat_opts: return False opts.compat_opts.discard(name) - opts.compat_opts.update(['*%s' % name]) + opts.compat_opts.update([f'*{name}']) return True def set_default_compat(compat_name, opt_name, default=True, remove_compat=True): @@ -222,7 +223,7 @@ def validate_options(opts): validate_minmax(opts.sleep_interval, opts.max_sleep_interval, 'sleep interval') if opts.wait_for_video is not None: - min_wait, max_wait, *_ = map(parse_duration, opts.wait_for_video.split('-', 1) + [None]) + min_wait, max_wait, *_ = map(parse_duration, [*opts.wait_for_video.split('-', 1), None]) validate(min_wait is not None and not (max_wait is None and '-' in opts.wait_for_video), 'time range to wait for video', opts.wait_for_video) validate_minmax(min_wait, max_wait, 'time range to wait for video') @@ -264,9 +265,9 @@ def validate_options(opts): # Retry sleep function def parse_sleep_func(expr): NUMBER_RE = r'\d+(?:\.\d+)?' - op, start, limit, step, *_ = tuple(re.fullmatch( + op, start, limit, step, *_ = (*tuple(re.fullmatch( rf'(?:(linear|exp)=)?({NUMBER_RE})(?::({NUMBER_RE})?)?(?::({NUMBER_RE}))?', - expr.strip()).groups()) + (None, None) + expr.strip()).groups()), None, None) if op == 'exp': return lambda n: min(float(start) * (float(step or 2) ** n), float(limit or 'inf')) @@ -396,13 +397,13 @@ def validate_options(opts): # MetadataParser def metadataparser_actions(f): if isinstance(f, str): - cmd = '--parse-metadata %s' % compat_shlex_quote(f) + cmd = f'--parse-metadata {shell_quote(f)}' try: actions = [MetadataFromFieldPP.to_action(f)] except Exception as err: raise ValueError(f'{cmd} is invalid; {err}') else: - cmd = '--replace-in-metadata %s' % ' '.join(map(compat_shlex_quote, f)) + cmd = f'--replace-in-metadata {shell_quote(f)}' actions = ((MetadataParserPP.Actions.REPLACE, x, *f[1:]) for x in f[0].split(',')) for action in actions: @@ -413,7 +414,7 @@ def validate_options(opts): yield action if opts.metafromtitle is not None: - opts.parse_metadata.setdefault('pre_process', []).append('title:%s' % opts.metafromtitle) + opts.parse_metadata.setdefault('pre_process', []).append(f'title:{opts.metafromtitle}') opts.parse_metadata = { k: list(itertools.chain(*map(metadataparser_actions, v))) for k, v in opts.parse_metadata.items() @@ -602,7 +603,7 @@ def get_postprocessors(opts): yield { 'key': 'MetadataParser', 'actions': actions, - 'when': when + 'when': when, } sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove if sponsorblock_query: @@ -610,19 +611,19 @@ def get_postprocessors(opts): 'key': 'SponsorBlock', 'categories': sponsorblock_query, 'api': opts.sponsorblock_api, - 'when': 'after_filter' + 'when': 'after_filter', } if opts.convertsubtitles: yield { 'key': 'FFmpegSubtitlesConvertor', 'format': opts.convertsubtitles, - 'when': 'before_dl' + 'when': 'before_dl', } if opts.convertthumbnails: yield { 'key': 'FFmpegThumbnailsConvertor', 'format': opts.convertthumbnails, - 'when': 'before_dl' + 'when': 'before_dl', } if opts.extractaudio: yield { @@ -647,7 +648,7 @@ def get_postprocessors(opts): yield { 'key': 'FFmpegEmbedSubtitle', # already_have_subtitle = True prevents the file from being deleted after embedding - 'already_have_subtitle': opts.writesubtitles and keep_subs + 'already_have_subtitle': opts.writesubtitles and keep_subs, } if not opts.writeautomaticsub and keep_subs: opts.writesubtitles = True @@ -660,7 +661,7 @@ def get_postprocessors(opts): 'remove_sponsor_segments': opts.sponsorblock_remove, 'remove_ranges': opts.remove_ranges, 'sponsorblock_chapter_title': opts.sponsorblock_chapter_title, - 'force_keyframes': opts.force_keyframes_at_cuts + 'force_keyframes': opts.force_keyframes_at_cuts, } # FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and # FFmpegExtractAudioPP as containers before conversion may not support @@ -694,7 +695,7 @@ def get_postprocessors(opts): yield { 'key': 'EmbedThumbnail', # already_have_thumbnail = True prevents the file from being deleted after embedding - 'already_have_thumbnail': opts.writethumbnail + 'already_have_thumbnail': opts.writethumbnail, } if not opts.writethumbnail: opts.writethumbnail = True @@ -741,7 +742,7 @@ def parse_options(argv=None): print_only = bool(opts.forceprint) and all(k not in opts.forceprint for k in POSTPROCESS_WHEN[3:]) any_getting = any(getattr(opts, k) for k in ( 'dumpjson', 'dump_single_json', 'getdescription', 'getduration', 'getfilename', - 'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl' + 'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl', )) if opts.quiet is None: opts.quiet = any_getting or opts.print_json or bool(opts.forceprint) @@ -1002,7 +1003,7 @@ def _real_main(argv=None): def make_row(target, handler): return [ join_nonempty(target.client.title(), target.version, delim='-') or '-', - join_nonempty((target.os or "").title(), target.os_version, delim='-') or '-', + join_nonempty((target.os or '').title(), target.os_version, delim='-') or '-', handler, ] diff --git a/yt_dlp/aes.py b/yt_dlp/aes.py index b3a383cd9..abf54a998 100644 --- a/yt_dlp/aes.py +++ b/yt_dlp/aes.py @@ -68,7 +68,7 @@ def pad_block(block, padding_mode): raise NotImplementedError(f'Padding mode {padding_mode} is not implemented') if padding_mode == 'iso7816' and padding_size: - block = block + [0x80] # NB: += mutates list + block = [*block, 0x80] # NB: += mutates list padding_size -= 1 return block + [PADDING_BYTE[padding_mode]] * padding_size @@ -110,9 +110,7 @@ def aes_ecb_decrypt(data, key, iv=None): for i in range(block_count): block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] encrypted_data += aes_decrypt(block, expanded_key) - encrypted_data = encrypted_data[:len(data)] - - return encrypted_data + return encrypted_data[:len(data)] def aes_ctr_decrypt(data, key, iv): @@ -148,9 +146,7 @@ def aes_ctr_encrypt(data, key, iv): cipher_counter_block = aes_encrypt(counter_block, expanded_key) encrypted_data += xor(block, cipher_counter_block) - encrypted_data = encrypted_data[:len(data)] - - return encrypted_data + return encrypted_data[:len(data)] def aes_cbc_decrypt(data, key, iv): @@ -174,9 +170,7 @@ def aes_cbc_decrypt(data, key, iv): decrypted_block = aes_decrypt(block, expanded_key) decrypted_data += xor(decrypted_block, previous_cipher_block) previous_cipher_block = block - decrypted_data = decrypted_data[:len(data)] - - return decrypted_data + return decrypted_data[:len(data)] def aes_cbc_encrypt(data, key, iv, *, padding_mode='pkcs7'): @@ -224,7 +218,7 @@ def aes_gcm_decrypt_and_verify(data, key, tag, nonce): hash_subkey = aes_encrypt([0] * BLOCK_SIZE_BYTES, key_expansion(key)) if len(nonce) == 12: - j0 = nonce + [0, 0, 0, 1] + j0 = [*nonce, 0, 0, 0, 1] else: fill = (BLOCK_SIZE_BYTES - (len(nonce) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES + 8 ghash_in = nonce + [0] * fill + bytes_to_intlist((8 * len(nonce)).to_bytes(8, 'big')) @@ -242,11 +236,11 @@ def aes_gcm_decrypt_and_verify(data, key, tag, nonce): data + [0] * (BLOCK_SIZE_BYTES - len(data) + pad_len) # pad + bytes_to_intlist((0 * 8).to_bytes(8, 'big') # length of associated data - + ((len(data) * 8).to_bytes(8, 'big'))) # length of data + + ((len(data) * 8).to_bytes(8, 'big'))), # length of data ) if tag != aes_ctr_encrypt(s_tag, key, j0): - raise ValueError("Mismatching authentication tag") + raise ValueError('Mismatching authentication tag') return decrypted_data @@ -288,9 +282,7 @@ def aes_decrypt(data, expanded_key): data = list(iter_mix_columns(data, MIX_COLUMN_MATRIX_INV)) data = shift_rows_inv(data) data = sub_bytes_inv(data) - data = xor(data, expanded_key[:BLOCK_SIZE_BYTES]) - - return data + return xor(data, expanded_key[:BLOCK_SIZE_BYTES]) def aes_decrypt_text(data, password, key_size_bytes): @@ -318,9 +310,7 @@ def aes_decrypt_text(data, password, key_size_bytes): cipher = data[NONCE_LENGTH_BYTES:] decrypted_data = aes_ctr_decrypt(cipher, key, nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)) - plaintext = intlist_to_bytes(decrypted_data) - - return plaintext + return intlist_to_bytes(decrypted_data) RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36) @@ -428,9 +418,7 @@ def key_expansion(data): for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0): temp = data[-4:] data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) - data = data[:expanded_key_size_bytes] - - return data + return data[:expanded_key_size_bytes] def iter_vector(iv): @@ -511,7 +499,7 @@ def block_product(block_x, block_y): # NIST SP 800-38D, Algorithm 1 if len(block_x) != BLOCK_SIZE_BYTES or len(block_y) != BLOCK_SIZE_BYTES: - raise ValueError("Length of blocks need to be %d bytes" % BLOCK_SIZE_BYTES) + raise ValueError(f'Length of blocks need to be {BLOCK_SIZE_BYTES} bytes') block_r = [0xE1] + [0] * (BLOCK_SIZE_BYTES - 1) block_v = block_y[:] @@ -534,7 +522,7 @@ def ghash(subkey, data): # NIST SP 800-38D, Algorithm 2 if len(data) % BLOCK_SIZE_BYTES: - raise ValueError("Length of data should be %d bytes" % BLOCK_SIZE_BYTES) + raise ValueError(f'Length of data should be {BLOCK_SIZE_BYTES} bytes') last_y = [0] * BLOCK_SIZE_BYTES for i in range(0, len(data), BLOCK_SIZE_BYTES): diff --git a/yt_dlp/cache.py b/yt_dlp/cache.py index 9dd4f2f25..71dca82b3 100644 --- a/yt_dlp/cache.py +++ b/yt_dlp/cache.py @@ -81,10 +81,10 @@ class Cache: cachedir = self._get_root_dir() if not any((term in cachedir) for term in ('cache', 'tmp')): - raise Exception('Not removing directory %s - this does not look like a cache dir' % cachedir) + raise Exception(f'Not removing directory {cachedir} - this does not look like a cache dir') self._ydl.to_screen( - 'Removing cache dir %s .' % cachedir, skip_eol=True) + f'Removing cache dir {cachedir} .', skip_eol=True) if os.path.exists(cachedir): self._ydl.to_screen('.', skip_eol=True) shutil.rmtree(cachedir) diff --git a/yt_dlp/compat/_legacy.py b/yt_dlp/compat/_legacy.py index 7ea5d0812..dfc792eae 100644 --- a/yt_dlp/compat/_legacy.py +++ b/yt_dlp/compat/_legacy.py @@ -35,7 +35,7 @@ from .compat_utils import passthrough_module from ..dependencies import brotli as compat_brotli # noqa: F401 from ..dependencies import websockets as compat_websockets # noqa: F401 from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401 -from ..networking.exceptions import HTTPError as compat_HTTPError # noqa: F401 +from ..networking.exceptions import HTTPError as compat_HTTPError passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode')) diff --git a/yt_dlp/compat/functools.py b/yt_dlp/compat/functools.py index 36c983642..96689575f 100644 --- a/yt_dlp/compat/functools.py +++ b/yt_dlp/compat/functools.py @@ -7,6 +7,6 @@ passthrough_module(__name__, 'functools') del passthrough_module try: - cache # >= 3.9 + _ = cache # >= 3.9 except NameError: cache = lru_cache(maxsize=None) diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index 815897d5a..0850ad260 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -146,7 +146,7 @@ def _extract_firefox_cookies(profile, container, logger): identities = json.load(containers).get('identities', []) container_id = next((context.get('userContextId') for context in identities if container in ( context.get('name'), - try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group()) + try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group()), )), None) if not isinstance(container_id, int): raise ValueError(f'could not find firefox container "{container}" in containers.json') @@ -263,7 +263,7 @@ def _get_chromium_based_browser_settings(browser_name): return { 'browser_dir': browser_dir, 'keyring_name': keyring_name, - 'supports_profiles': browser_name not in browsers_without_profiles + 'supports_profiles': browser_name not in browsers_without_profiles, } @@ -826,7 +826,7 @@ def _choose_linux_keyring(logger): elif desktop_environment == _LinuxDesktopEnvironment.KDE6: linux_keyring = _LinuxKeyring.KWALLET6 elif desktop_environment in ( - _LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER + _LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER, ): linux_keyring = _LinuxKeyring.BASICTEXT else: @@ -861,7 +861,7 @@ def _get_kwallet_network_wallet(keyring, logger): 'dbus-send', '--session', '--print-reply=literal', f'--dest={service_name}', wallet_path, - 'org.kde.KWallet.networkWallet' + 'org.kde.KWallet.networkWallet', ], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) if returncode: @@ -891,7 +891,7 @@ def _get_kwallet_password(browser_keyring_name, keyring, logger): 'kwallet-query', '--read-password', f'{browser_keyring_name} Safe Storage', '--folder', f'{browser_keyring_name} Keys', - network_wallet + network_wallet, ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) if returncode: @@ -931,9 +931,8 @@ def _get_gnome_keyring_password(browser_keyring_name, logger): for item in col.get_all_items(): if item.get_label() == f'{browser_keyring_name} Safe Storage': return item.get_secret() - else: - logger.error('failed to read from keyring') - return b'' + logger.error('failed to read from keyring') + return b'' def _get_linux_keyring_password(browser_keyring_name, keyring, logger): @@ -1053,7 +1052,7 @@ def _decrypt_windows_dpapi(ciphertext, logger): None, # pvReserved: must be NULL None, # pPromptStruct: information about prompts to display 0, # dwFlags - ctypes.byref(blob_out) # pDataOut + ctypes.byref(blob_out), # pDataOut ) if not ret: logger.warning('failed to decrypt with DPAPI', only_once=True) @@ -1129,24 +1128,24 @@ class LenientSimpleCookie(http.cookies.SimpleCookie): _LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}') _RESERVED = { - "expires", - "path", - "comment", - "domain", - "max-age", - "secure", - "httponly", - "version", - "samesite", + 'expires', + 'path', + 'comment', + 'domain', + 'max-age', + 'secure', + 'httponly', + 'version', + 'samesite', } - _FLAGS = {"secure", "httponly"} + _FLAGS = {'secure', 'httponly'} # Added 'bad' group to catch the remaining value - _COOKIE_PATTERN = re.compile(r""" + _COOKIE_PATTERN = re.compile(r''' \s* # Optional whitespace at start of cookie (?P<key> # Start of group 'key' - [""" + _LEGAL_KEY_CHARS + r"""]+?# Any word of at least one letter + [''' + _LEGAL_KEY_CHARS + r''']+?# Any word of at least one letter ) # End of group 'key' ( # Optional group: there may not be a value. \s*=\s* # Equal Sign @@ -1156,7 +1155,7 @@ class LenientSimpleCookie(http.cookies.SimpleCookie): | # or \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr | # or - [""" + _LEGAL_VALUE_CHARS + r"""]* # Any word or empty string + [''' + _LEGAL_VALUE_CHARS + r''']* # Any word or empty string ) # End of group 'val' | # or (?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values @@ -1164,7 +1163,7 @@ class LenientSimpleCookie(http.cookies.SimpleCookie): )? # End of optional value group \s* # Any number of spaces. (\s+|;|$) # Ending either at space, semicolon, or EOS. - """, re.ASCII | re.VERBOSE) + ''', re.ASCII | re.VERBOSE) def load(self, data): # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776 @@ -1260,14 +1259,14 @@ class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar): # with no name, whereas http.cookiejar regards it as a # cookie with no value. name, value = '', name - f.write('%s\n' % '\t'.join(( + f.write('{}\n'.format('\t'.join(( cookie.domain, self._true_or_false(cookie.domain.startswith('.')), cookie.path, self._true_or_false(cookie.secure), str_or_none(cookie.expires, default=''), - name, value - ))) + name, value, + )))) def save(self, filename=None, ignore_discard=True, ignore_expires=True): """ @@ -1306,10 +1305,10 @@ class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar): return line cookie_list = line.split('\t') if len(cookie_list) != self._ENTRY_LEN: - raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list)) + raise http.cookiejar.LoadError(f'invalid length {len(cookie_list)}') cookie = self._CookieFileEntry(*cookie_list) if cookie.expires_at and not cookie.expires_at.isdigit(): - raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at) + raise http.cookiejar.LoadError(f'invalid expires at {cookie.expires_at}') return line cf = io.StringIO() diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index 65a0d6f23..2e3ea2fc4 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -404,7 +404,7 @@ class FileDownloader: def report_resuming_byte(self, resume_len): """Report attempt to resume at given byte.""" - self.to_screen('[download] Resuming download at byte %s' % resume_len) + self.to_screen(f'[download] Resuming download at byte {resume_len}') def report_retry(self, err, count, retries, frag_index=NO_DEFAULT, fatal=True): """Report retry""" diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index 8b0b94e72..8b45c671a 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -55,7 +55,7 @@ class ExternalFD(FragmentFD): # correct and expected termination thus all postprocessing # should take place retval = 0 - self.to_screen('[%s] Interrupted by user' % self.get_basename()) + self.to_screen(f'[{self.get_basename()}] Interrupted by user') finally: if self._cookies_tempfile: self.try_remove(self._cookies_tempfile) @@ -172,7 +172,7 @@ class ExternalFD(FragmentFD): decrypt_fragment = self.decrypter(info_dict) dest, _ = self.sanitize_open(tmpfilename, 'wb') for frag_index, fragment in enumerate(info_dict['fragments']): - fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index) + fragment_filename = f'{tmpfilename}-Frag{frag_index}' try: src, _ = self.sanitize_open(fragment_filename, 'rb') except OSError as err: @@ -186,7 +186,7 @@ class ExternalFD(FragmentFD): if not self.params.get('keep_fragments', False): self.try_remove(encodeFilename(fragment_filename)) dest.close() - self.try_remove(encodeFilename('%s.frag.urls' % tmpfilename)) + self.try_remove(encodeFilename(f'{tmpfilename}.frag.urls')) return 0 def _call_process(self, cmd, info_dict): @@ -336,11 +336,11 @@ class Aria2cFD(ExternalFD): if 'fragments' in info_dict: cmd += ['--uri-selector=inorder'] - url_list_file = '%s.frag.urls' % tmpfilename + url_list_file = f'{tmpfilename}.frag.urls' url_list = [] for frag_index, fragment in enumerate(info_dict['fragments']): - fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index) - url_list.append('%s\n\tout=%s' % (fragment['url'], self._aria2c_filename(fragment_filename))) + fragment_filename = f'{os.path.basename(tmpfilename)}-Frag{frag_index}' + url_list.append('{}\n\tout={}'.format(fragment['url'], self._aria2c_filename(fragment_filename))) stream, _ = self.sanitize_open(url_list_file, 'wb') stream.write('\n'.join(url_list).encode()) stream.close() @@ -357,7 +357,7 @@ class Aria2cFD(ExternalFD): 'id': sanitycheck, 'method': method, 'params': [f'token:{rpc_secret}', *params], - }).encode('utf-8') + }).encode() request = Request( f'http://localhost:{rpc_port}/jsonrpc', data=d, headers={ @@ -416,7 +416,7 @@ class Aria2cFD(ExternalFD): 'total_bytes_estimate': total, 'eta': (total - downloaded) / (speed or 1), 'fragment_index': min(frag_count, len(completed) + 1) if fragmented else None, - 'elapsed': time.time() - started + 'elapsed': time.time() - started, }) self._hook_progress(status, info_dict) @@ -509,12 +509,12 @@ class FFmpegFD(ExternalFD): proxy = self.params.get('proxy') if proxy: if not re.match(r'^[\da-zA-Z]+://', proxy): - proxy = 'http://%s' % proxy + proxy = f'http://{proxy}' if proxy.startswith('socks'): self.report_warning( - '%s does not support SOCKS proxies. Downloading is likely to fail. ' - 'Consider adding --hls-prefer-native to your command.' % self.get_basename()) + f'{self.get_basename()} does not support SOCKS proxies. Downloading is likely to fail. ' + 'Consider adding --hls-prefer-native to your command.') # Since December 2015 ffmpeg supports -http_proxy option (see # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd) @@ -575,7 +575,7 @@ class FFmpegFD(ExternalFD): if end_time: args += ['-t', str(end_time - start_time)] - args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', fmt['url']] + args += [*self._configuration_args((f'_i{i + 1}', '_i')), '-i', fmt['url']] if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'): args += ['-c', 'copy'] diff --git a/yt_dlp/downloader/f4m.py b/yt_dlp/downloader/f4m.py index 28cbba016..22d0ebd26 100644 --- a/yt_dlp/downloader/f4m.py +++ b/yt_dlp/downloader/f4m.py @@ -67,12 +67,12 @@ class FlvReader(io.BytesIO): self.read_bytes(3) quality_entry_count = self.read_unsigned_char() # QualityEntryCount - for i in range(quality_entry_count): + for _ in range(quality_entry_count): self.read_string() segment_run_count = self.read_unsigned_int() segments = [] - for i in range(segment_run_count): + for _ in range(segment_run_count): first_segment = self.read_unsigned_int() fragments_per_segment = self.read_unsigned_int() segments.append((first_segment, fragments_per_segment)) @@ -91,12 +91,12 @@ class FlvReader(io.BytesIO): quality_entry_count = self.read_unsigned_char() # QualitySegmentUrlModifiers - for i in range(quality_entry_count): + for _ in range(quality_entry_count): self.read_string() fragments_count = self.read_unsigned_int() fragments = [] - for i in range(fragments_count): + for _ in range(fragments_count): first = self.read_unsigned_int() first_ts = self.read_unsigned_long_long() duration = self.read_unsigned_int() @@ -135,11 +135,11 @@ class FlvReader(io.BytesIO): self.read_string() # MovieIdentifier server_count = self.read_unsigned_char() # ServerEntryTable - for i in range(server_count): + for _ in range(server_count): self.read_string() quality_count = self.read_unsigned_char() # QualityEntryTable - for i in range(quality_count): + for _ in range(quality_count): self.read_string() # DrmData self.read_string() @@ -148,14 +148,14 @@ class FlvReader(io.BytesIO): segments_count = self.read_unsigned_char() segments = [] - for i in range(segments_count): + for _ in range(segments_count): box_size, box_type, box_data = self.read_box_info() assert box_type == b'asrt' segment = FlvReader(box_data).read_asrt() segments.append(segment) fragments_run_count = self.read_unsigned_char() fragments = [] - for i in range(fragments_run_count): + for _ in range(fragments_run_count): box_size, box_type, box_data = self.read_box_info() assert box_type == b'afrt' fragments.append(FlvReader(box_data).read_afrt()) @@ -309,7 +309,7 @@ class F4mFD(FragmentFD): def real_download(self, filename, info_dict): man_url = info_dict['url'] requested_bitrate = info_dict.get('tbr') - self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME) + self.to_screen(f'[{self.FD_NAME}] Downloading f4m manifest') urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) man_url = urlh.url @@ -326,8 +326,8 @@ class F4mFD(FragmentFD): formats = sorted(formats, key=lambda f: f[0]) rate, media = formats[-1] else: - rate, media = list(filter( - lambda f: int(f[0]) == requested_bitrate, formats))[0] + rate, media = next(filter( + lambda f: int(f[0]) == requested_bitrate, formats)) # Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec. man_base_url = get_base_url(doc) or man_url diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index b4f003d37..0d00196e2 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -199,7 +199,7 @@ class FragmentFD(FileDownloader): '.ytdl file is corrupt' if is_corrupt else 'Inconsistent state of incomplete fragment download') self.report_warning( - '%s. Restarting from the beginning ...' % message) + f'{message}. Restarting from the beginning ...') ctx['fragment_index'] = resume_len = 0 if 'ytdl_corrupt' in ctx: del ctx['ytdl_corrupt'] @@ -366,10 +366,10 @@ class FragmentFD(FileDownloader): return decrypt_fragment def download_and_append_fragments_multiple(self, *args, **kwargs): - ''' + """ @params (ctx1, fragments1, info_dict1), (ctx2, fragments2, info_dict2), ... all args must be either tuple or list - ''' + """ interrupt_trigger = [True] max_progress = len(args) if max_progress == 1: @@ -424,7 +424,7 @@ class FragmentFD(FileDownloader): finally: tpe.shutdown(wait=True) if not interrupt_trigger[0] and not is_live: - raise KeyboardInterrupt() + raise KeyboardInterrupt # we expect the user wants to stop and DO WANT the preceding postprocessors to run; # so returning a intermediate result here instead of KeyboardInterrupt on live return result diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index 4ac5d99dc..9cb4f014c 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -72,7 +72,7 @@ class HlsFD(FragmentFD): def real_download(self, filename, info_dict): man_url = info_dict['url'] - self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME) + self.to_screen(f'[{self.FD_NAME}] Downloading m3u8 manifest') urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) man_url = urlh.url @@ -228,7 +228,7 @@ class HlsFD(FragmentFD): 'url': frag_url, 'decrypt_info': decrypt_info, 'byte_range': byte_range, - 'media_sequence': media_sequence + 'media_sequence': media_sequence, }) media_sequence += 1 @@ -350,9 +350,8 @@ class HlsFD(FragmentFD): # XXX: this should probably be silent as well # or verify that all segments contain the same data self.report_warning(bug_reports_message( - 'Discarding a %s block found in the middle of the stream; ' - 'if the subtitles display incorrectly,' - % (type(block).__name__))) + f'Discarding a {type(block).__name__} block found in the middle of the stream; ' + 'if the subtitles display incorrectly,')) continue block.write_into(output) diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index 693828b6e..c0165790d 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -176,7 +176,7 @@ class HttpFD(FileDownloader): 'downloaded_bytes': ctx.resume_len, 'total_bytes': ctx.resume_len, }, info_dict) - raise SucceedDownload() + raise SucceedDownload else: # The length does not match, we start the download over self.report_unable_to_resume() @@ -194,7 +194,7 @@ class HttpFD(FileDownloader): def close_stream(): if ctx.stream is not None: - if not ctx.tmpfilename == '-': + if ctx.tmpfilename != '-': ctx.stream.close() ctx.stream = None @@ -268,20 +268,20 @@ class HttpFD(FileDownloader): ctx.filename = self.undo_temp_name(ctx.tmpfilename) self.report_destination(ctx.filename) except OSError as err: - self.report_error('unable to open for writing: %s' % str(err)) + self.report_error(f'unable to open for writing: {err}') return False if self.params.get('xattr_set_filesize', False) and data_len is not None: try: write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode()) except (XAttrUnavailableError, XAttrMetadataError) as err: - self.report_error('unable to set filesize xattr: %s' % str(err)) + self.report_error(f'unable to set filesize xattr: {err}') try: ctx.stream.write(data_block) except OSError as err: self.to_stderr('\n') - self.report_error('unable to write data: %s' % str(err)) + self.report_error(f'unable to write data: {err}') return False # Apply rate limit @@ -327,7 +327,7 @@ class HttpFD(FileDownloader): elif now - ctx.throttle_start > 3: if ctx.stream is not None and ctx.tmpfilename != '-': ctx.stream.close() - raise ThrottledDownload() + raise ThrottledDownload elif speed: ctx.throttle_start = None @@ -338,7 +338,7 @@ class HttpFD(FileDownloader): if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len: ctx.resume_len = byte_counter - raise NextFragment() + raise NextFragment if ctx.tmpfilename != '-': ctx.stream.close() diff --git a/yt_dlp/downloader/ism.py b/yt_dlp/downloader/ism.py index dd688f586..62c3a3b7f 100644 --- a/yt_dlp/downloader/ism.py +++ b/yt_dlp/downloader/ism.py @@ -251,7 +251,7 @@ class IsmFD(FragmentFD): skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) frag_index = 0 - for i, segment in enumerate(segments): + for segment in segments: frag_index += 1 if frag_index <= ctx['fragment_index']: continue diff --git a/yt_dlp/downloader/mhtml.py b/yt_dlp/downloader/mhtml.py index d977dcec3..3d4f2d763 100644 --- a/yt_dlp/downloader/mhtml.py +++ b/yt_dlp/downloader/mhtml.py @@ -10,7 +10,7 @@ from ..version import __version__ as YT_DLP_VERSION class MhtmlFD(FragmentFD): - _STYLESHEET = """\ + _STYLESHEET = '''\ html, body { margin: 0; padding: 0; @@ -45,7 +45,7 @@ body > figure > img { max-width: 100%; max-height: calc(100vh - 5em); } -""" +''' _STYLESHEET = re.sub(r'\s+', ' ', _STYLESHEET) _STYLESHEET = re.sub(r'\B \B|(?<=[\w\-]) (?=[^\w\-])|(?<=[^\w\-]) (?=[\w\-])', '', _STYLESHEET) @@ -57,24 +57,19 @@ body > figure > img { )).decode('us-ascii') + '?=' def _gen_cid(self, i, fragment, frag_boundary): - return '%u.%s@yt-dlp.github.io.invalid' % (i, frag_boundary) + return f'{i}.{frag_boundary}@yt-dlp.github.io.invalid' def _gen_stub(self, *, fragments, frag_boundary, title): output = io.StringIO() - output.write(( + output.write( '<!DOCTYPE html>' '<html>' '<head>' - '' '<meta name="generator" content="yt-dlp {version}">' - '' '<title>{title}' - '' '' - '' - ).format( - version=escapeHTML(YT_DLP_VERSION), - styles=self._STYLESHEET, - title=escapeHTML(title) - )) + f'' + f'{escapeHTML(title)}' + f'' + '') t0 = 0 for i, frag in enumerate(fragments): @@ -87,15 +82,12 @@ body > figure > img { num=i + 1, t0=srt_subtitles_timecode(t0), t1=srt_subtitles_timecode(t1), - duration=formatSeconds(frag['duration'], msec=True) + duration=formatSeconds(frag['duration'], msec=True), )) except (KeyError, ValueError, TypeError): t1 = None - output.write(( - '
Slide #{num}
' - ).format(num=i + 1)) - output.write(''.format( - cid=self._gen_cid(i, frag, frag_boundary))) + output.write(f'
Slide #{i + 1}
') + output.write(f'') output.write('') t0 = t1 @@ -126,31 +118,24 @@ body > figure > img { stub = self._gen_stub( fragments=fragments, frag_boundary=frag_boundary, - title=title + title=title, ) ctx['dest_stream'].write(( 'MIME-Version: 1.0\r\n' 'From: \r\n' 'To: \r\n' - 'Subject: {title}\r\n' + f'Subject: {self._escape_mime(title)}\r\n' 'Content-type: multipart/related; ' - '' 'boundary="{boundary}"; ' - '' 'type="text/html"\r\n' - 'X.yt-dlp.Origin: {origin}\r\n' + f'boundary="{frag_boundary}"; ' + 'type="text/html"\r\n' + f'X.yt-dlp.Origin: {origin}\r\n' '\r\n' - '--{boundary}\r\n' + f'--{frag_boundary}\r\n' 'Content-Type: text/html; charset=utf-8\r\n' - 'Content-Length: {length}\r\n' + f'Content-Length: {len(stub)}\r\n' '\r\n' - '{stub}\r\n' - ).format( - origin=origin, - boundary=frag_boundary, - length=len(stub), - title=self._escape_mime(title), - stub=stub - ).encode()) + f'{stub}\r\n').encode()) extra_state['header_written'] = True for i, fragment in enumerate(fragments): diff --git a/yt_dlp/downloader/niconico.py b/yt_dlp/downloader/niconico.py index fef8bff73..462c6e2d6 100644 --- a/yt_dlp/downloader/niconico.py +++ b/yt_dlp/downloader/niconico.py @@ -15,7 +15,7 @@ class NiconicoDmcFD(FileDownloader): def real_download(self, filename, info_dict): from ..extractor.niconico import NiconicoIE - self.to_screen('[%s] Downloading from DMC' % self.FD_NAME) + self.to_screen(f'[{self.FD_NAME}] Downloading from DMC') ie = NiconicoIE(self.ydl) info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict) @@ -34,7 +34,7 @@ class NiconicoDmcFD(FileDownloader): try: self.ydl.urlopen(request).read() except Exception: - self.to_screen('[%s] Heartbeat failed' % self.FD_NAME) + self.to_screen(f'[{self.FD_NAME}] Heartbeat failed') with heartbeat_lock: if not download_complete: @@ -85,14 +85,14 @@ class NiconicoLiveFD(FileDownloader): 'quality': live_quality, 'protocol': 'hls+fmp4', 'latency': live_latency, - 'chasePlay': False + 'chasePlay': False, }, 'room': { 'protocol': 'webSocket', - 'commentable': True + 'commentable': True, }, 'reconnect': True, - } + }, })) else: ws = ws_extractor @@ -118,7 +118,7 @@ class NiconicoLiveFD(FileDownloader): elif self.ydl.params.get('verbose', False): if len(recv) > 100: recv = recv[:100] + '...' - self.to_screen('[debug] Server said: %s' % recv) + self.to_screen(f'[debug] Server said: {recv}') def ws_main(): reconnect = False @@ -128,7 +128,7 @@ class NiconicoLiveFD(FileDownloader): if ret is True: return except BaseException as e: - self.to_screen('[%s] %s: Connection error occured, reconnecting after 10 seconds: %s' % ('niconico:live', video_id, str_or_none(e))) + self.to_screen('[{}] {}: Connection error occured, reconnecting after 10 seconds: {}'.format('niconico:live', video_id, str_or_none(e))) time.sleep(10) continue finally: diff --git a/yt_dlp/downloader/rtmp.py b/yt_dlp/downloader/rtmp.py index 0e0952599..d7ffb3b34 100644 --- a/yt_dlp/downloader/rtmp.py +++ b/yt_dlp/downloader/rtmp.py @@ -180,9 +180,9 @@ class RtmpFD(FileDownloader): while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live: prevsize = os.path.getsize(encodeFilename(tmpfilename)) - self.to_screen('[rtmpdump] Downloaded %s bytes' % prevsize) + self.to_screen(f'[rtmpdump] Downloaded {prevsize} bytes') time.sleep(5.0) # This seems to be needed - args = basic_args + ['--resume'] + args = [*basic_args, '--resume'] if retval == RD_FAILED: args += ['--skip', '1'] args = [encodeArgument(a) for a in args] @@ -197,7 +197,7 @@ class RtmpFD(FileDownloader): break if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE): fsize = os.path.getsize(encodeFilename(tmpfilename)) - self.to_screen('[rtmpdump] Downloaded %s bytes' % fsize) + self.to_screen(f'[rtmpdump] Downloaded {fsize} bytes') self.try_rename(tmpfilename, filename) self._hook_progress({ 'downloaded_bytes': fsize, diff --git a/yt_dlp/downloader/youtube_live_chat.py b/yt_dlp/downloader/youtube_live_chat.py index c7a86374a..961938d44 100644 --- a/yt_dlp/downloader/youtube_live_chat.py +++ b/yt_dlp/downloader/youtube_live_chat.py @@ -18,7 +18,7 @@ class YoutubeLiveChatFD(FragmentFD): def real_download(self, filename, info_dict): video_id = info_dict['video_id'] - self.to_screen('[%s] Downloading live chat' % self.FD_NAME) + self.to_screen(f'[{self.FD_NAME}] Downloading live chat') if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat': self.report_warning('Live chat download runs until the livestream ends. ' 'If you wish to download the video simultaneously, run a separate yt-dlp instance') diff --git a/yt_dlp/extractor/abc.py b/yt_dlp/extractor/abc.py index 2c0d296fd..7518ba6f0 100644 --- a/yt_dlp/extractor/abc.py +++ b/yt_dlp/extractor/abc.py @@ -4,7 +4,6 @@ import re import time from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, dict_get, @@ -67,7 +66,7 @@ class ABCIE(InfoExtractor): 'ext': 'mp4', 'title': 'WWI Centenary', 'description': 'md5:c2379ec0ca84072e86b446e536954546', - } + }, }, { 'url': 'https://www.abc.net.au/news/programs/the-world/2020-06-10/black-lives-matter-protests-spawn-support-for/12342074', 'info_dict': { @@ -75,7 +74,7 @@ class ABCIE(InfoExtractor): 'ext': 'mp4', 'title': 'Black Lives Matter protests spawn support for Papuans in Indonesia', 'description': 'md5:2961a17dc53abc558589ccd0fb8edd6f', - } + }, }, { 'url': 'https://www.abc.net.au/btn/newsbreak/btn-newsbreak-20200814/12560476', 'info_dict': { @@ -86,7 +85,7 @@ class ABCIE(InfoExtractor): 'upload_date': '20200813', 'uploader': 'Behind the News', 'uploader_id': 'behindthenews', - } + }, }, { 'url': 'https://www.abc.net.au/news/2023-06-25/wagner-boss-orders-troops-back-to-bases-to-avoid-bloodshed/102520540', 'info_dict': { @@ -95,7 +94,7 @@ class ABCIE(InfoExtractor): 'ext': 'mp4', 'description': 'Wagner troops leave Rostov-on-Don and\xa0Yevgeny Prigozhin will move to Belarus under a deal brokered by Belarusian President Alexander Lukashenko to end the mutiny.', 'thumbnail': 'https://live-production.wcms.abc-cdn.net.au/0c170f5b57f0105c432f366c0e8e267b?impolicy=wcms_crop_resize&cropH=2813&cropW=5000&xPos=0&yPos=249&width=862&height=485', - } + }, }] def _real_extract(self, url): @@ -126,7 +125,7 @@ class ABCIE(InfoExtractor): if mobj is None: expired = self._html_search_regex(r'(?s)class="expired-(?:video|audio)".+?(.+?)', webpage, 'expired', None) if expired: - raise ExtractorError('%s said: %s' % (self.IE_NAME, expired), expected=True) + raise ExtractorError(f'{self.IE_NAME} said: {expired}', expected=True) raise ExtractorError('Unable to extract video urls') urls_info = self._parse_json( @@ -164,7 +163,7 @@ class ABCIE(InfoExtractor): 'height': height, 'tbr': bitrate, 'filesize': int_or_none(url_info.get('filesize')), - 'format_id': format_id + 'format_id': format_id, }) return { @@ -288,13 +287,12 @@ class ABCIViewIE(InfoExtractor): stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream')) house_number = video_params.get('episodeHouseNumber') or video_id - path = '/auth/hls/sign?ts={0}&hn={1}&d=android-tablet'.format( - int(time.time()), house_number) + path = f'/auth/hls/sign?ts={int(time.time())}&hn={house_number}&d=android-tablet' sig = hmac.new( b'android.content.res.Resources', - path.encode('utf-8'), hashlib.sha256).hexdigest() + path.encode(), hashlib.sha256).hexdigest() token = self._download_webpage( - 'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id) + f'http://iview.abc.net.au{path}&sig={sig}', video_id) def tokenize_url(url, token): return update_url_query(url, { @@ -303,7 +301,7 @@ class ABCIViewIE(InfoExtractor): for sd in ('1080', '720', 'sd', 'sd-low'): sd_url = try_get( - stream, lambda x: x['streams']['hls'][sd], compat_str) + stream, lambda x: x['streams']['hls'][sd], str) if not sd_url: continue formats = self._extract_m3u8_formats( @@ -358,7 +356,7 @@ class ABCIViewShowSeriesIE(InfoExtractor): 'description': 'md5:93119346c24a7c322d446d8eece430ff', 'series': 'Upper Middle Bogan', 'season': 'Series 1', - 'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$' + 'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$', }, 'playlist_count': 8, }, { @@ -386,7 +384,7 @@ class ABCIViewShowSeriesIE(InfoExtractor): 'description': 'Satirist Mark Humphries brings his unique perspective on current political events for 7.30.', 'series': '7.30 Mark Humphries Satire', 'season': 'Episodes', - 'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$' + 'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$', }, 'playlist_count': 15, }] @@ -398,7 +396,7 @@ class ABCIViewShowSeriesIE(InfoExtractor): r'window\.__INITIAL_STATE__\s*=\s*[\'"](.+?)[\'"]\s*;', webpage, 'initial state') video_data = self._parse_json( - unescapeHTML(webpage_data).encode('utf-8').decode('unicode_escape'), show_id) + unescapeHTML(webpage_data).encode().decode('unicode_escape'), show_id) video_data = video_data['route']['pageData']['_embedded'] highlight = try_get(video_data, lambda x: x['highlightVideo']['shareUrl']) diff --git a/yt_dlp/extractor/abcnews.py b/yt_dlp/extractor/abcnews.py index a57295b13..7215500b9 100644 --- a/yt_dlp/extractor/abcnews.py +++ b/yt_dlp/extractor/abcnews.py @@ -58,7 +58,7 @@ class AbcNewsVideoIE(AMPIE): display_id = mobj.group('display_id') video_id = mobj.group('id') info_dict = self._extract_feed_info( - 'http://abcnews.go.com/video/itemfeed?id=%s' % video_id) + f'http://abcnews.go.com/video/itemfeed?id={video_id}') info_dict.update({ 'id': video_id, 'display_id': display_id, diff --git a/yt_dlp/extractor/abcotvs.py b/yt_dlp/extractor/abcotvs.py index 6dca19de4..ea5882b26 100644 --- a/yt_dlp/extractor/abcotvs.py +++ b/yt_dlp/extractor/abcotvs.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( dict_get, int_or_none, @@ -57,11 +56,11 @@ class ABCOTVSIE(InfoExtractor): data = self._download_json( 'https://api.abcotvs.com/v2/content', display_id, query={ 'id': video_id, - 'key': 'otv.web.%s.story' % station, + 'key': f'otv.web.{station}.story', 'station': station, })['data'] video = try_get(data, lambda x: x['featuredMedia']['video'], dict) or data - video_id = compat_str(dict_get(video, ('id', 'publishedKey'), video_id)) + video_id = str(dict_get(video, ('id', 'publishedKey'), video_id)) title = video.get('title') or video['linkText'] formats = [] diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py index b8c79b912..293a6c40e 100644 --- a/yt_dlp/extractor/abematv.py +++ b/yt_dlp/extractor/abematv.py @@ -66,8 +66,8 @@ class AbemaLicenseHandler(urllib.request.BaseHandler): query={'t': media_token}, data=json.dumps({ 'kv': 'a', - 'lt': ticket - }).encode('utf-8'), + 'lt': ticket, + }).encode(), headers={ 'Content-Type': 'application/json', }) @@ -77,7 +77,7 @@ class AbemaLicenseHandler(urllib.request.BaseHandler): h = hmac.new( binascii.unhexlify(self.HKEY), - (license_response['cid'] + self.ie._DEVICE_ID).encode('utf-8'), + (license_response['cid'] + self.ie._DEVICE_ID).encode(), digestmod=hashlib.sha256) enckey = bytes_to_intlist(h.digest()) @@ -103,11 +103,11 @@ class AbemaTVBaseIE(InfoExtractor): @classmethod def _generate_aks(cls, deviceid): - deviceid = deviceid.encode('utf-8') + deviceid = deviceid.encode() # add 1 hour and then drop minute and secs ts_1hour = int((time_seconds() // 3600 + 1) * 3600) time_struct = time.gmtime(ts_1hour) - ts_1hour_str = str(ts_1hour).encode('utf-8') + ts_1hour_str = str(ts_1hour).encode() tmp = None @@ -119,7 +119,7 @@ class AbemaTVBaseIE(InfoExtractor): def mix_tmp(count): nonlocal tmp - for i in range(count): + for _ in range(count): mix_once(tmp) def mix_twist(nonce): @@ -160,7 +160,7 @@ class AbemaTVBaseIE(InfoExtractor): data=json.dumps({ 'deviceId': self._DEVICE_ID, 'applicationKeySecret': aks, - }).encode('utf-8'), + }).encode(), headers={ 'Content-Type': 'application/json', }) @@ -180,7 +180,7 @@ class AbemaTVBaseIE(InfoExtractor): 'osLang': 'ja_JP', 'osTimezone': 'Asia/Tokyo', 'appId': 'tv.abema', - 'appVersion': '3.27.1' + 'appVersion': '3.27.1', }, headers={ 'Authorization': f'bearer {self._get_device_token()}', })['token'] @@ -202,8 +202,8 @@ class AbemaTVBaseIE(InfoExtractor): f'https://api.abema.io/v1/auth/{ep}', None, note='Logging in', data=json.dumps({ method: username, - 'password': password - }).encode('utf-8'), headers={ + 'password': password, + }).encode(), headers={ 'Authorization': f'bearer {self._get_device_token()}', 'Origin': 'https://abema.tv', 'Referer': 'https://abema.tv/', @@ -344,7 +344,7 @@ class AbemaTVIE(AbemaTVBaseIE): description = self._html_search_regex( (r'(.+?)

(.+?)(.+?)(.+?)' % (tag, tag), xml_str, tag) + f'<{tag}>(.+?)', xml_str, tag) def is_expired(token, date_ele): token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(token, date_ele))) @@ -1394,7 +1394,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en form_page, urlh = form_page_res post_url = self._html_search_regex(r']+action=(["\'])(?P.+?)\1', form_page, 'post url', group='url') if not re.match(r'https?://', post_url): - post_url = compat_urlparse.urljoin(urlh.url, post_url) + post_url = urllib.parse.urljoin(urlh.url, post_url) form_data = self._hidden_inputs(form_page) form_data.update(data) return self._download_webpage_handle( @@ -1414,13 +1414,13 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)' redirect_url = self._search_regex( r'(?i)]+src="(%s)' % HistoryPlayerIE._VALID_URL, + rf']+src="({HistoryPlayerIE._VALID_URL})', webpage, 'player URL') return self.url_result(player_url, HistoryPlayerIE.ie_key()) diff --git a/yt_dlp/extractor/aeonco.py b/yt_dlp/extractor/aeonco.py index 390eae32b..22d0266ba 100644 --- a/yt_dlp/extractor/aeonco.py +++ b/yt_dlp/extractor/aeonco.py @@ -16,8 +16,8 @@ class AeonCoIE(InfoExtractor): 'uploader': 'Semiconductor', 'uploader_id': 'semiconductor', 'uploader_url': 'https://vimeo.com/semiconductor', - 'duration': 348 - } + 'duration': 348, + }, }, { 'url': 'https://aeon.co/videos/dazzling-timelapse-shows-how-microbes-spoil-our-food-and-sometimes-enrich-it', 'md5': '03582d795382e49f2fd0b427b55de409', @@ -29,8 +29,8 @@ class AeonCoIE(InfoExtractor): 'uploader': 'Aeon Video', 'uploader_id': 'aeonvideo', 'uploader_url': 'https://vimeo.com/aeonvideo', - 'duration': 1344 - } + 'duration': 1344, + }, }, { 'url': 'https://aeon.co/videos/chew-over-the-prisoners-dilemma-and-see-if-you-can-find-the-rational-path-out', 'md5': '1cfda0bf3ae24df17d00f2c0cb6cc21b', diff --git a/yt_dlp/extractor/afreecatv.py b/yt_dlp/extractor/afreecatv.py index 3e5738f6a..bcfb02cb9 100644 --- a/yt_dlp/extractor/afreecatv.py +++ b/yt_dlp/extractor/afreecatv.py @@ -55,7 +55,7 @@ class AfreecaTVBaseIE(InfoExtractor): if result != 1: error = _ERRORS.get(result, 'You have failed to log in.') raise ExtractorError( - 'Unable to login: %s said: %s' % (self.IE_NAME, error), + f'Unable to login: {self.IE_NAME} said: {error}', expected=True) @@ -227,7 +227,7 @@ class AfreecaTVIE(AfreecaTVBaseIE): **traverse_obj(file_element, { 'duration': ('duration', {functools.partial(int_or_none, scale=1000)}), 'timestamp': ('file_start', {unified_timestamp}), - }) + }), }) if traverse_obj(data, ('adult_status', {str})) == 'notLogin': diff --git a/yt_dlp/extractor/agora.py b/yt_dlp/extractor/agora.py index abb2d3ff2..983558425 100644 --- a/yt_dlp/extractor/agora.py +++ b/yt_dlp/extractor/agora.py @@ -168,7 +168,7 @@ class TokFMPodcastIE(InfoExtractor): for ext in ('aac', 'mp3'): url_data = self._download_json( f'https://api.podcast.radioagora.pl/api4/getSongUrl?podcast_id={media_id}&device_id={uuid.uuid4()}&ppre=false&audio={ext}', - media_id, 'Downloading podcast %s URL' % ext) + media_id, f'Downloading podcast {ext} URL') # prevents inserting the mp3 (default) multiple times if 'link_ssl' in url_data and f'.{ext}' in url_data['link_ssl']: formats.append({ @@ -206,8 +206,8 @@ class TokFMAuditionIE(InfoExtractor): } @staticmethod - def _create_url(id): - return f'https://audycje.tokfm.pl/audycja/{id}' + def _create_url(video_id): + return f'https://audycje.tokfm.pl/audycja/{video_id}' def _real_extract(self, url): audition_id = self._match_id(url) diff --git a/yt_dlp/extractor/airtv.py b/yt_dlp/extractor/airtv.py index 6cc63cd7f..cee660dfc 100644 --- a/yt_dlp/extractor/airtv.py +++ b/yt_dlp/extractor/airtv.py @@ -26,7 +26,7 @@ class AirTVIE(InfoExtractor): 'view_count': int, 'thumbnail': 'https://cdn-sp-gcs.air.tv/videos/W/8/W87jcWleSn2hXZN47zJZsQ/b13fc56464f47d9d62a36d110b9b5a72-4096x2160_9.jpg', 'timestamp': 1664792603, - } + }, }, { # with youtube_id 'url': 'https://www.air.tv/watch?v=sv57EC8tRXG6h8dNXFUU1Q', @@ -54,7 +54,7 @@ class AirTVIE(InfoExtractor): 'channel': 'Newsflare', 'duration': 37, 'upload_date': '20180511', - } + }, }] def _get_formats_and_subtitle(self, json_data, video_id): diff --git a/yt_dlp/extractor/aitube.py b/yt_dlp/extractor/aitube.py index 89a64503f..5179b72e9 100644 --- a/yt_dlp/extractor/aitube.py +++ b/yt_dlp/extractor/aitube.py @@ -22,7 +22,7 @@ class AitubeKZVideoIE(InfoExtractor): 'timestamp': 1667370519, 'title': 'Ангел хранитель 1 серия', 'channel_follower_count': int, - } + }, }, { # embed url 'url': 'https://aitube.kz/embed/?id=9291d29b-c038-49a1-ad42-3da2051d353c', diff --git a/yt_dlp/extractor/aliexpress.py b/yt_dlp/extractor/aliexpress.py index 2e83f2eb6..e8f8618fa 100644 --- a/yt_dlp/extractor/aliexpress.py +++ b/yt_dlp/extractor/aliexpress.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( float_or_none, try_get, @@ -44,7 +43,7 @@ class AliExpressLiveIE(InfoExtractor): 'title': title, 'thumbnail': data.get('coverUrl'), 'uploader': try_get( - data, lambda x: x['followBar']['name'], compat_str), + data, lambda x: x['followBar']['name'], str), 'timestamp': float_or_none(data.get('startTimeLong'), scale=1000), 'formats': formats, } diff --git a/yt_dlp/extractor/aljazeera.py b/yt_dlp/extractor/aljazeera.py index 124bab0d9..9715b497e 100644 --- a/yt_dlp/extractor/aljazeera.py +++ b/yt_dlp/extractor/aljazeera.py @@ -18,7 +18,7 @@ class AlJazeeraIE(InfoExtractor): 'timestamp': 1636219149, 'description': 'U sarajevskim naseljima Rajlovac i Reljevo stambeni objekti, ali i industrijska postrojenja i dalje su pod vodom.', 'upload_date': '20211106', - } + }, }, { 'url': 'https://balkans.aljazeera.net/videos/2021/11/6/djokovic-usao-u-finale-mastersa-u-parizu', 'info_dict': { @@ -33,7 +33,7 @@ class AlJazeeraIE(InfoExtractor): BRIGHTCOVE_URL_RE = r'https?://players.brightcove.net/(?P\d+)/(?P[a-zA-Z0-9]+)_(?P[^/]+)/index.html\?videoId=(?P\d+)' def _real_extract(self, url): - base, post_type, id = self._match_valid_url(url).groups() + base, post_type, display_id = self._match_valid_url(url).groups() wp = { 'balkans.aljazeera.net': 'ajb', 'chinese.aljazeera.net': 'chinese', @@ -47,11 +47,11 @@ class AlJazeeraIE(InfoExtractor): 'news': 'news', }[post_type.split('/')[0]] video = self._download_json( - f'https://{base}/graphql', id, query={ + f'https://{base}/graphql', display_id, query={ 'wp-site': wp, 'operationName': 'ArchipelagoSingleArticleQuery', 'variables': json.dumps({ - 'name': id, + 'name': display_id, 'postType': post_type, }), }, headers={ @@ -64,7 +64,7 @@ class AlJazeeraIE(InfoExtractor): embed = 'default' if video_id is None: - webpage = self._download_webpage(url, id) + webpage = self._download_webpage(url, display_id) account, player_id, embed, video_id = self._search_regex(self.BRIGHTCOVE_URL_RE, webpage, 'video id', group=(1, 2, 3, 4), default=(None, None, None, None)) @@ -73,11 +73,11 @@ class AlJazeeraIE(InfoExtractor): return { '_type': 'url_transparent', 'url': url, - 'ie_key': 'Generic' + 'ie_key': 'Generic', } return { '_type': 'url_transparent', 'url': f'https://players.brightcove.net/{account}/{player_id}_{embed}/index.html?videoId={video_id}', - 'ie_key': 'BrightcoveNew' + 'ie_key': 'BrightcoveNew', } diff --git a/yt_dlp/extractor/allocine.py b/yt_dlp/extractor/allocine.py index 2d342cf03..e0859d451 100644 --- a/yt_dlp/extractor/allocine.py +++ b/yt_dlp/extractor/allocine.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( int_or_none, qualities, @@ -95,11 +94,11 @@ class AllocineIE(InfoExtractor): duration = int_or_none(video.get('duration')) view_count = int_or_none(video.get('view_count')) timestamp = unified_timestamp(try_get( - video, lambda x: x['added_at']['date'], compat_str)) + video, lambda x: x['added_at']['date'], str)) else: video_id = display_id media_data = self._download_json( - 'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id) + f'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media={video_id}', display_id) title = remove_end(strip_or_none(self._html_extract_title(webpage), ' - AlloCiné')) for key, value in media_data['video'].items(): if not key.endswith('Path'): diff --git a/yt_dlp/extractor/allstar.py b/yt_dlp/extractor/allstar.py index 49df4bf3a..5ea1c30e3 100644 --- a/yt_dlp/extractor/allstar.py +++ b/yt_dlp/extractor/allstar.py @@ -33,27 +33,27 @@ _QUERIES = { video: getClip(clipIdentifier: $id) { %s %s } - }''' % (_FIELDS, _EXTRA_FIELDS), + }''' % (_FIELDS, _EXTRA_FIELDS), # noqa: UP031 'montage': '''query ($id: String!) { video: getMontage(clipIdentifier: $id) { %s } - }''' % _FIELDS, + }''' % _FIELDS, # noqa: UP031 'Clips': '''query ($page: Int!, $user: String!, $game: Int) { videos: clips(search: createdDate, page: $page, user: $user, mobile: false, game: $game) { data { %s %s } } - }''' % (_FIELDS, _EXTRA_FIELDS), + }''' % (_FIELDS, _EXTRA_FIELDS), # noqa: UP031 'Montages': '''query ($page: Int!, $user: String!) { videos: montages(search: createdDate, page: $page, user: $user) { data { %s } } - }''' % _FIELDS, + }''' % _FIELDS, # noqa: UP031 'Mobile Clips': '''query ($page: Int!, $user: String!) { videos: clips(search: createdDate, page: $page, user: $user, mobile: true) { data { %s %s } } - }''' % (_FIELDS, _EXTRA_FIELDS), + }''' % (_FIELDS, _EXTRA_FIELDS), # noqa: UP031 } @@ -121,7 +121,7 @@ class AllstarIE(AllstarBaseIE): 'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d', 'upload_date': '20230425', 'view_count': int, - } + }, }, { 'url': 'https://allstar.gg/clip?clip=8LJLY4JKB', 'info_dict': { @@ -139,7 +139,7 @@ class AllstarIE(AllstarBaseIE): 'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d', 'upload_date': '20230702', 'view_count': int, - } + }, }, { 'url': 'https://allstar.gg/montage?montage=643e64089da7e9363e1fa66c', 'info_dict': { @@ -155,7 +155,7 @@ class AllstarIE(AllstarBaseIE): 'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d', 'upload_date': '20230418', 'view_count': int, - } + }, }, { 'url': 'https://allstar.gg/montage?montage=RILJMH6QOS', 'info_dict': { @@ -171,7 +171,7 @@ class AllstarIE(AllstarBaseIE): 'uploader_url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d', 'upload_date': '20230703', 'view_count': int, - } + }, }] def _real_extract(self, url): @@ -191,28 +191,28 @@ class AllstarProfileIE(AllstarBaseIE): 'id': '62b8bdfc9021052f7905882d-clips', 'title': 'cherokee - Clips', }, - 'playlist_mincount': 15 + 'playlist_mincount': 15, }, { 'url': 'https://allstar.gg/u/cherokee?game=730&view=Clips', 'info_dict': { 'id': '62b8bdfc9021052f7905882d-clips-730', 'title': 'cherokee - Clips - 730', }, - 'playlist_mincount': 15 + 'playlist_mincount': 15, }, { 'url': 'https://allstar.gg/u/62b8bdfc9021052f7905882d?view=Montages', 'info_dict': { 'id': '62b8bdfc9021052f7905882d-montages', 'title': 'cherokee - Montages', }, - 'playlist_mincount': 4 + 'playlist_mincount': 4, }, { 'url': 'https://allstar.gg/profile?user=cherokee&view=Mobile Clips', 'info_dict': { 'id': '62b8bdfc9021052f7905882d-mobile', 'title': 'cherokee - Mobile Clips', }, - 'playlist_mincount': 1 + 'playlist_mincount': 1, }] _PAGE_SIZE = 10 diff --git a/yt_dlp/extractor/alphaporno.py b/yt_dlp/extractor/alphaporno.py index f927965de..7b74d5524 100644 --- a/yt_dlp/extractor/alphaporno.py +++ b/yt_dlp/extractor/alphaporno.py @@ -25,7 +25,7 @@ class AlphaPornoIE(InfoExtractor): 'tbr': 1145, 'categories': list, 'age_limit': 18, - } + }, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/alsace20tv.py b/yt_dlp/extractor/alsace20tv.py index ea3332e3d..c315e4f21 100644 --- a/yt_dlp/extractor/alsace20tv.py +++ b/yt_dlp/extractor/alsace20tv.py @@ -12,7 +12,7 @@ from ..utils import ( class Alsace20TVBaseIE(InfoExtractor): def _extract_video(self, video_id, url=None): info = self._download_json( - 'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key=%s&habillage=0&mode=html' % (video_id, ), + f'https://www.alsace20.tv/visionneuse/visio_v9_js.php?key={video_id}&habillage=0&mode=html', video_id) or {} title = info.get('titre') @@ -24,9 +24,9 @@ class Alsace20TVBaseIE(InfoExtractor): else self._extract_mpd_formats(fmt_url, video_id, mpd_id=res, fatal=False)) webpage = (url and self._download_webpage(url, video_id, fatal=False)) or '' - thumbnail = url_or_none(dict_get(info, ('image', 'preview', )) or self._og_search_thumbnail(webpage)) + thumbnail = url_or_none(dict_get(info, ('image', 'preview')) or self._og_search_thumbnail(webpage)) upload_date = self._search_regex(r'/(\d{6})_', thumbnail, 'upload_date', default=None) - upload_date = unified_strdate('20%s-%s-%s' % (upload_date[:2], upload_date[2:4], upload_date[4:])) if upload_date else None + upload_date = unified_strdate(f'20{upload_date[:2]}-{upload_date[2:4]}-{upload_date[4:]}') if upload_date else None return { 'id': video_id, 'title': title, diff --git a/yt_dlp/extractor/altcensored.py b/yt_dlp/extractor/altcensored.py index 6878918a0..bfbf6b6af 100644 --- a/yt_dlp/extractor/altcensored.py +++ b/yt_dlp/extractor/altcensored.py @@ -34,7 +34,7 @@ class AltCensoredIE(InfoExtractor): 'thumbnail': 'https://archive.org/download/youtube-k0srjLSkga8/youtube-k0srjLSkga8.thumbs/k0srjLSkga8_000925.jpg', 'view_count': int, 'categories': ['News & Politics'], - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/alura.py b/yt_dlp/extractor/alura.py index cb2b9891e..ce03a4265 100644 --- a/yt_dlp/extractor/alura.py +++ b/yt_dlp/extractor/alura.py @@ -1,7 +1,7 @@ import re +import urllib.parse from .common import InfoExtractor -from ..compat import compat_urlparse from ..utils import ( ExtractorError, clean_html, @@ -21,7 +21,7 @@ class AluraIE(InfoExtractor): 'info_dict': { 'id': '60095', 'ext': 'mp4', - 'title': 'Referências, ref-set e alter' + 'title': 'Referências, ref-set e alter', }, 'skip': 'Requires alura account credentials'}, { @@ -30,7 +30,7 @@ class AluraIE(InfoExtractor): 'only_matching': True}, { 'url': 'https://cursos.alura.com.br/course/fundamentos-market-digital/task/55219', - 'only_matching': True} + 'only_matching': True}, ] def _real_extract(self, url): @@ -62,7 +62,7 @@ class AluraIE(InfoExtractor): return { 'id': video_id, 'title': video_title, - "formats": formats + 'formats': formats, } def _perform_login(self, username, password): @@ -91,7 +91,7 @@ class AluraIE(InfoExtractor): 'post url', default=self._LOGIN_URL, group='url') if not post_url.startswith('http'): - post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) + post_url = urllib.parse.urljoin(self._LOGIN_URL, post_url) response = self._download_webpage( post_url, None, 'Logging in', @@ -103,7 +103,7 @@ class AluraIE(InfoExtractor): r'(?s)]+class="alert-message[^"]*">(.+?)

', response, 'error message', default=None) if error: - raise ExtractorError('Unable to login: %s' % error, expected=True) + raise ExtractorError(f'Unable to login: {error}', expected=True) raise ExtractorError('Unable to log in') @@ -119,7 +119,7 @@ class AluraCourseIE(AluraIE): # XXX: Do not subclass from concrete IE @classmethod def suitable(cls, url): - return False if AluraIE.suitable(url) else super(AluraCourseIE, cls).suitable(url) + return False if AluraIE.suitable(url) else super().suitable(url) def _real_extract(self, url): @@ -157,7 +157,7 @@ class AluraCourseIE(AluraIE): # XXX: Do not subclass from concrete IE 'url': video_url, 'id_key': self.ie_key(), 'chapter': chapter, - 'chapter_number': chapter_number + 'chapter_number': chapter_number, } entries.append(entry) return self.playlist_result(entries, course_path, course_title) diff --git a/yt_dlp/extractor/amadeustv.py b/yt_dlp/extractor/amadeustv.py index 2f5ca9137..f4ea04efd 100644 --- a/yt_dlp/extractor/amadeustv.py +++ b/yt_dlp/extractor/amadeustv.py @@ -24,7 +24,7 @@ class AmadeusTVIE(InfoExtractor): 'display_id': '65091a87ff85af59d9fc54c3', 'view_count': int, 'description': 'md5:a0357b9c215489e2067cbae0b777bb95', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/amara.py b/yt_dlp/extractor/amara.py index 509b21a53..ed0f0cd35 100644 --- a/yt_dlp/extractor/amara.py +++ b/yt_dlp/extractor/amara.py @@ -25,7 +25,7 @@ class AmaraIE(InfoExtractor): 'uploader': 'PBS NewsHour', 'uploader_id': 'PBSNewsHour', 'timestamp': 1549639570, - } + }, }, { # Vimeo 'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011', @@ -40,8 +40,8 @@ class AmaraIE(InfoExtractor): 'timestamp': 1294763658, 'upload_date': '20110111', 'uploader': 'Sam Morrill', - 'uploader_id': 'sammorrill' - } + 'uploader_id': 'sammorrill', + }, }, { # Direct Link 'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/', @@ -55,13 +55,13 @@ class AmaraIE(InfoExtractor): 'subtitles': dict, 'upload_date': '20091007', 'timestamp': 1254942511, - } + }, }] def _real_extract(self, url): video_id = self._match_id(url) meta = self._download_json( - 'https://amara.org/api/videos/%s/' % video_id, + f'https://amara.org/api/videos/{video_id}/', video_id, query={'format': 'json'}) title = meta['title'] video_url = meta['all_urls'][0] diff --git a/yt_dlp/extractor/amazon.py b/yt_dlp/extractor/amazon.py index a03f983e0..d1b91665c 100644 --- a/yt_dlp/extractor/amazon.py +++ b/yt_dlp/extractor/amazon.py @@ -61,13 +61,13 @@ class AmazonStoreIE(InfoExtractor): }] def _real_extract(self, url): - id = self._match_id(url) + playlist_id = self._match_id(url) for retry in self.RetryManager(): - webpage = self._download_webpage(url, id) + webpage = self._download_webpage(url, playlist_id) try: data_json = self._search_json( - r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'', webpage, 'data', id, + r'var\s?obj\s?=\s?jQuery\.parseJSON\(\'', webpage, 'data', playlist_id, transform_source=js_to_json) except ExtractorError as e: retry.error = e @@ -81,7 +81,7 @@ class AmazonStoreIE(InfoExtractor): 'height': int_or_none(video.get('videoHeight')), 'width': int_or_none(video.get('videoWidth')), } for video in (data_json.get('videos') or []) if video.get('isVideo') and video.get('url')] - return self.playlist_result(entries, playlist_id=id, playlist_title=data_json.get('title')) + return self.playlist_result(entries, playlist_id=playlist_id, playlist_title=data_json.get('title')) class AmazonReviewsIE(InfoExtractor): diff --git a/yt_dlp/extractor/amazonminitv.py b/yt_dlp/extractor/amazonminitv.py index 2c71c5ef5..0590a344a 100644 --- a/yt_dlp/extractor/amazonminitv.py +++ b/yt_dlp/extractor/amazonminitv.py @@ -25,7 +25,7 @@ class AmazonMiniTVBaseIE(InfoExtractor): asin, note=note, headers={ 'Content-Type': 'application/json', 'currentpageurl': '/', - 'currentplatform': 'dWeb' + 'currentplatform': 'dWeb', }, data=json.dumps(data).encode() if data else None, query=None if data else { 'deviceType': 'A1WMMUXPCUJL4N', diff --git a/yt_dlp/extractor/amcnetworks.py b/yt_dlp/extractor/amcnetworks.py index 10bd021c5..15a86e245 100644 --- a/yt_dlp/extractor/amcnetworks.py +++ b/yt_dlp/extractor/amcnetworks.py @@ -64,8 +64,8 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE site, display_id = self._match_valid_url(url).groups() requestor_id = self._REQUESTOR_ID_MAP[site] page_data = self._download_json( - 'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/%s/url/%s' - % (requestor_id.lower(), display_id), display_id)['data'] + f'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/{requestor_id.lower()}/url/{display_id}', + display_id)['data'] properties = page_data.get('properties') or {} query = { 'mbr': 'true', @@ -76,15 +76,15 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE try: for v in page_data['children']: if v.get('type') == 'video-player': - releasePid = v['properties']['currentVideo']['meta']['releasePid'] - tp_path = 'M_UwQC/' + releasePid + release_pid = v['properties']['currentVideo']['meta']['releasePid'] + tp_path = 'M_UwQC/' + release_pid media_url = 'https://link.theplatform.com/s/' + tp_path video_player_count += 1 except KeyError: pass if video_player_count > 1: self.report_warning( - 'The JSON data has %d video players. Only one will be extracted' % video_player_count) + f'The JSON data has {video_player_count} video players. Only one will be extracted') # Fall back to videoPid if releasePid not found. # TODO: Fall back to videoPid if releasePid manifest uses DRM. @@ -131,7 +131,7 @@ class AMCNetworksIE(ThePlatformIE): # XXX: Do not subclass from concrete IE }) ns_keys = theplatform_metadata.get('$xmlns', {}).keys() if ns_keys: - ns = list(ns_keys)[0] + ns = next(iter(ns_keys)) episode = theplatform_metadata.get(ns + '$episodeTitle') or None episode_number = int_or_none( theplatform_metadata.get(ns + '$episode')) diff --git a/yt_dlp/extractor/americastestkitchen.py b/yt_dlp/extractor/americastestkitchen.py index e889458a2..a6337e482 100644 --- a/yt_dlp/extractor/americastestkitchen.py +++ b/yt_dlp/extractor/americastestkitchen.py @@ -87,13 +87,13 @@ class AmericasTestKitchenIE(InfoExtractor): resource_type = 'episodes' resource = self._download_json( - 'https://www.americastestkitchen.com/api/v6/%s/%s' % (resource_type, video_id), video_id) + f'https://www.americastestkitchen.com/api/v6/{resource_type}/{video_id}', video_id) video = resource['video'] if is_episode else resource episode = resource if is_episode else resource.get('episode') or {} return { '_type': 'url_transparent', - 'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % video['zypeId'], + 'url': 'https://player.zype.com/embed/{}.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ'.format(video['zypeId']), 'ie_key': 'Zype', 'description': clean_html(video.get('description')), 'timestamp': unified_timestamp(video.get('publishDate')), @@ -174,22 +174,22 @@ class AmericasTestKitchenSeasonIE(InfoExtractor): ] if season_number: - playlist_id = 'season_%d' % season_number - playlist_title = 'Season %d' % season_number + playlist_id = f'season_{season_number}' + playlist_title = f'Season {season_number}' facet_filters.append('search_season_list:' + playlist_title) else: playlist_id = show playlist_title = title season_search = self._download_json( - 'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug, + f'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_{slug}_season_desc_production', playlist_id, headers={ 'Origin': 'https://www.americastestkitchen.com', 'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805', 'X-Algolia-Application-Id': 'Y1FNZXUI30', }, query={ 'facetFilters': json.dumps(facet_filters), - 'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title,search_atk_episode_season' % slug, + 'attributesToRetrieve': f'description,search_{slug}_episode_number,search_document_date,search_url,title,search_atk_episode_season', 'attributesToHighlight': '', 'hitsPerPage': 1000, }) @@ -207,7 +207,7 @@ class AmericasTestKitchenSeasonIE(InfoExtractor): 'description': episode.get('description'), 'timestamp': unified_timestamp(episode.get('search_document_date')), 'season_number': season_number, - 'episode_number': int_or_none(episode.get('search_%s_episode_number' % slug)), + 'episode_number': int_or_none(episode.get(f'search_{slug}_episode_number')), 'ie_key': AmericasTestKitchenIE.ie_key(), } diff --git a/yt_dlp/extractor/amp.py b/yt_dlp/extractor/amp.py index 6b2bf2db2..adf473374 100644 --- a/yt_dlp/extractor/amp.py +++ b/yt_dlp/extractor/amp.py @@ -19,12 +19,12 @@ class AMPIE(InfoExtractor): # XXX: Conventionally, base classes should end with 'Unable to download Akamai AMP feed', transform_source=strip_jsonp) item = feed.get('channel', {}).get('item') if not item: - raise ExtractorError('%s said: %s' % (self.IE_NAME, feed['error'])) + raise ExtractorError('{} said: {}'.format(self.IE_NAME, feed['error'])) video_id = item['guid'] def get_media_node(name, default=None): - media_name = 'media-%s' % name + media_name = f'media-{name}' media_group = item.get('media-group') or item return media_group.get(media_name) or item.get(media_name) or item.get(name, default) diff --git a/yt_dlp/extractor/anchorfm.py b/yt_dlp/extractor/anchorfm.py index 5e78f372e..652154a4a 100644 --- a/yt_dlp/extractor/anchorfm.py +++ b/yt_dlp/extractor/anchorfm.py @@ -29,7 +29,7 @@ class AnchorFMEpisodeIE(InfoExtractor): 'release_date': '20230121', 'release_timestamp': 1674285179, 'episode_id': 'e1tpt3d', - } + }, }, { # embed url 'url': 'https://anchor.fm/apakatatempo/embed/episodes/S2E75-Perang-Bintang-di-Balik-Kasus-Ferdy-Sambo-dan-Ismail-Bolong-e1shjqd', @@ -50,7 +50,7 @@ class AnchorFMEpisodeIE(InfoExtractor): 'season': 'Season 2', 'season_number': 2, 'episode_id': 'e1shjqd', - } + }, }] _WEBPAGE_TESTS = [{ @@ -72,7 +72,7 @@ class AnchorFMEpisodeIE(InfoExtractor): 'thumbnail': 'https://s3-us-west-2.amazonaws.com/anchor-generated-image-bank/production/podcast_uploaded_episode400/2627805/2627805-1671590688729-4db3882ac9e4b.jpg', 'uploader': 'Podcast Tempo', 'channel': 'apakatatempo', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/angel.py b/yt_dlp/extractor/angel.py index 9f5b9b523..6800fe3d7 100644 --- a/yt_dlp/extractor/angel.py +++ b/yt_dlp/extractor/angel.py @@ -15,8 +15,8 @@ class AngelIE(InfoExtractor): 'title': 'Tuttle Twins Season 1, Episode 1: When Laws Give You Lemons', 'description': 'md5:73b704897c20ab59c433a9c0a8202d5e', 'thumbnail': r're:^https?://images.angelstudios.com/image/upload/angel-app/.*$', - 'duration': 1359.0 - } + 'duration': 1359.0, + }, }, { 'url': 'https://www.angel.com/watch/the-chosen/episode/8dfb714d-bca5-4812-8125-24fb9514cd10/season-1/episode-1/i-have-called-you-by-name', 'md5': 'e4774bad0a5f0ad2e90d175cafdb797d', @@ -26,8 +26,8 @@ class AngelIE(InfoExtractor): 'title': 'The Chosen Season 1, Episode 1: I Have Called You By Name', 'description': 'md5:aadfb4827a94415de5ff6426e6dee3be', 'thumbnail': r're:^https?://images.angelstudios.com/image/upload/angel-app/.*$', - 'duration': 3276.0 - } + 'duration': 3276.0, + }, }] def _real_extract(self, url): @@ -44,7 +44,7 @@ class AngelIE(InfoExtractor): 'title': self._og_search_title(webpage), 'description': self._og_search_description(webpage), 'formats': formats, - 'subtitles': subtitles + 'subtitles': subtitles, } # Angel uses cloudinary in the background and supports image transformations. diff --git a/yt_dlp/extractor/antenna.py b/yt_dlp/extractor/antenna.py index 2929d6550..b1a01791f 100644 --- a/yt_dlp/extractor/antenna.py +++ b/yt_dlp/extractor/antenna.py @@ -105,7 +105,7 @@ class Ant1NewsGrArticleIE(AntennaBaseIE): info = self._search_json_ld(webpage, video_id, expected_type='NewsArticle') embed_urls = list(Ant1NewsGrEmbedIE._extract_embed_urls(url, webpage)) if not embed_urls: - raise ExtractorError('no videos found for %s' % video_id, expected=True) + raise ExtractorError(f'no videos found for {video_id}', expected=True) return self.playlist_from_matches( embed_urls, video_id, info.get('title'), ie=Ant1NewsGrEmbedIE.ie_key(), video_kwargs={'url_transparent': True, 'timestamp': info.get('timestamp')}) diff --git a/yt_dlp/extractor/anvato.py b/yt_dlp/extractor/anvato.py index 0df50333c..bf3d60b5e 100644 --- a/yt_dlp/extractor/anvato.py +++ b/yt_dlp/extractor/anvato.py @@ -238,7 +238,7 @@ class AnvatoIE(InfoExtractor): 'gray': 'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900', 'hearst': 'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99', 'cbs': 'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe', - 'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582' + 'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582', } def _generate_nfl_token(self, anvack, mcp_id): @@ -255,7 +255,7 @@ class AnvatoIE(InfoExtractor): token } } -}''' % (anvack, mcp_id), +}''' % (anvack, mcp_id), # noqa: UP031 }).encode(), headers={ 'Authorization': auth_token, 'Content-Type': 'application/json', @@ -299,7 +299,7 @@ class AnvatoIE(InfoExtractor): return self._download_json( video_data_url, video_id, transform_source=strip_jsonp, query=query, - data=json.dumps({'api': api}, separators=(',', ':')).encode('utf-8')) + data=json.dumps({'api': api}, separators=(',', ':')).encode()) def _get_anvato_videos(self, access_key, video_id, token): video_data = self._get_video_json(access_key, video_id, token) @@ -358,7 +358,7 @@ class AnvatoIE(InfoExtractor): for caption in video_data.get('captions', []): a_caption = { 'url': caption['url'], - 'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None + 'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None, } subtitles.setdefault(caption['language'], []).append(a_caption) subtitles = self._merge_subtitles(subtitles, hls_subs, vtt_subs) diff --git a/yt_dlp/extractor/aol.py b/yt_dlp/extractor/aol.py index 455f66795..893dce7b0 100644 --- a/yt_dlp/extractor/aol.py +++ b/yt_dlp/extractor/aol.py @@ -30,7 +30,7 @@ class AolIE(YahooIE): # XXX: Do not subclass from concrete IE 'params': { # m3u8 download 'skip_download': True, - } + }, }, { # video with vidible ID 'url': 'https://www.aol.com/video/view/netflix-is-raising-rates/5707d6b8e4b090497b04f706/', @@ -46,7 +46,7 @@ class AolIE(YahooIE): # XXX: Do not subclass from concrete IE 'params': { # m3u8 download 'skip_download': True, - } + }, }, { 'url': 'https://www.aol.com/video/view/park-bench-season-2-trailer/559a1b9be4b0c3bfad3357a7/', 'only_matching': True, @@ -83,10 +83,10 @@ class AolIE(YahooIE): # XXX: Do not subclass from concrete IE return self._extract_yahoo_video(video_id, 'us') response = self._download_json( - 'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id, + f'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/{video_id}/details', video_id)['response'] if response['statusText'] != 'Ok': - raise ExtractorError('%s said: %s' % (self.IE_NAME, response['statusText']), expected=True) + raise ExtractorError('{} said: {}'.format(self.IE_NAME, response['statusText']), expected=True) video_data = response['data'] formats = [] diff --git a/yt_dlp/extractor/apa.py b/yt_dlp/extractor/apa.py index 1ea0b1de4..fed597042 100644 --- a/yt_dlp/extractor/apa.py +++ b/yt_dlp/extractor/apa.py @@ -34,7 +34,7 @@ class APAIE(InfoExtractor): video_id, base_url = mobj.group('id', 'base_url') webpage = self._download_webpage( - '%s/player/%s' % (base_url, video_id), video_id) + f'{base_url}/player/{video_id}', video_id) jwplatform_id = self._search_regex( r'media[iI]d\s*:\s*["\'](?P[a-zA-Z0-9]{8})', webpage, @@ -47,7 +47,7 @@ class APAIE(InfoExtractor): def extract(field, name=None): return self._search_regex( - r'\b%s["\']\s*:\s*(["\'])(?P(?:(?!\1).)+)\1' % field, + rf'\b{field}["\']\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, name or field, default=None, group='value') title = extract('title') or video_id diff --git a/yt_dlp/extractor/applepodcasts.py b/yt_dlp/extractor/applepodcasts.py index 49bbeab82..bd301e904 100644 --- a/yt_dlp/extractor/applepodcasts.py +++ b/yt_dlp/extractor/applepodcasts.py @@ -24,7 +24,7 @@ class ApplePodcastsIE(InfoExtractor): 'duration': 6454, 'series': 'The Tim Dillon Show', 'thumbnail': 're:.+[.](png|jpe?g|webp)', - } + }, }, { 'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777', 'only_matching': True, diff --git a/yt_dlp/extractor/appletrailers.py b/yt_dlp/extractor/appletrailers.py index 21103aee5..0a600f6df 100644 --- a/yt_dlp/extractor/appletrailers.py +++ b/yt_dlp/extractor/appletrailers.py @@ -1,8 +1,8 @@ import json import re +import urllib.parse from .common import InfoExtractor -from ..compat import compat_urlparse from ..utils import ( int_or_none, parse_duration, @@ -64,7 +64,7 @@ class AppleTrailersIE(InfoExtractor): 'uploader_id': 'wb', }, }, - ] + ], }, { 'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/', 'info_dict': { @@ -99,7 +99,7 @@ class AppleTrailersIE(InfoExtractor): webpage = self._download_webpage(url, movie) film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id') film_data = self._download_json( - 'http://trailers.apple.com/trailers/feeds/data/%s.json' % film_id, + f'http://trailers.apple.com/trailers/feeds/data/{film_id}.json', film_id, fatal=False) if film_data: @@ -114,7 +114,7 @@ class AppleTrailersIE(InfoExtractor): if not src: continue formats.append({ - 'format_id': '%s-%s' % (version, size), + 'format_id': f'{version}-{size}', 'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src), 'width': int_or_none(size_data.get('width')), 'height': int_or_none(size_data.get('height')), @@ -134,7 +134,7 @@ class AppleTrailersIE(InfoExtractor): page_data = film_data.get('page', {}) return self.playlist_result(entries, film_id, page_data.get('movie_title')) - playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc') + playlist_url = urllib.parse.urljoin(url, 'includes/playlists/itunes.inc') def fix_html(s): s = re.sub(r'(?s).*?', '', s) @@ -143,10 +143,9 @@ class AppleTrailersIE(InfoExtractor): # like: http://trailers.apple.com/trailers/wb/gravity/ def _clean_json(m): - return 'iTunes.playURL(%s);' % m.group(1).replace('\'', ''') + return 'iTunes.playURL({});'.format(m.group(1).replace('\'', ''')) s = re.sub(self._JSON_RE, _clean_json, s) - s = '%s' % s - return s + return f'{s}' doc = self._download_xml(playlist_url, movie, transform_source=fix_html) playlist = [] @@ -170,18 +169,18 @@ class AppleTrailersIE(InfoExtractor): duration = 60 * int(m.group('minutes')) + int(m.group('seconds')) trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower() - settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id) + settings_json_url = urllib.parse.urljoin(url, f'includes/settings/{trailer_id}.json') settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json') formats = [] - for format in settings['metadata']['sizes']: + for fmt in settings['metadata']['sizes']: # The src is a file pointing to the real video file - format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', format['src']) + format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', fmt['src']) formats.append({ 'url': format_url, - 'format': format['type'], - 'width': int_or_none(format['width']), - 'height': int_or_none(format['height']), + 'format': fmt['type'], + 'width': int_or_none(fmt['width']), + 'height': int_or_none(fmt['height']), }) playlist.append({ @@ -229,7 +228,7 @@ class AppleTrailersSectionIE(InfoExtractor): 'title': 'Movie Studios', }, } - _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P%s)' % '|'.join(_SECTIONS) + _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P{})'.format('|'.join(_SECTIONS)) _TESTS = [{ 'url': 'http://trailers.apple.com/#section=justadded', 'info_dict': { @@ -270,7 +269,7 @@ class AppleTrailersSectionIE(InfoExtractor): def _real_extract(self, url): section = self._match_id(url) section_data = self._download_json( - 'http://trailers.apple.com/trailers/home/feeds/%s.json' % self._SECTIONS[section]['feed_path'], + 'http://trailers.apple.com/trailers/home/feeds/{}.json'.format(self._SECTIONS[section]['feed_path']), section) entries = [ self.url_result('http://trailers.apple.com' + e['location']) diff --git a/yt_dlp/extractor/archiveorg.py b/yt_dlp/extractor/archiveorg.py index 41f3a4ff2..f5a55efc4 100644 --- a/yt_dlp/extractor/archiveorg.py +++ b/yt_dlp/extractor/archiveorg.py @@ -1,10 +1,11 @@ +from __future__ import annotations + import json import re import urllib.parse from .common import InfoExtractor from .youtube import YoutubeBaseInfoExtractor, YoutubeIE -from ..compat import compat_urllib_parse_unquote from ..networking import HEADRequest from ..networking.exceptions import HTTPError from ..utils import ( @@ -145,7 +146,7 @@ class ArchiveOrgIE(InfoExtractor): 'title': 'Bells Of Rostov', 'ext': 'mp3', }, - 'skip': 'restricted' + 'skip': 'restricted', }, { 'url': 'https://archive.org/details/lp_the-music-of-russia_various-artists-a-askaryan-alexander-melik/disc1/02.02.+Song+And+Chorus+In+The+Polovetsian+Camp+From+%22Prince+Igor%22+(Act+2%2C+Scene+1).mp3', 'md5': '1d0aabe03edca83ca58d9ed3b493a3c3', @@ -158,7 +159,7 @@ class ArchiveOrgIE(InfoExtractor): 'description': 'md5:012b2d668ae753be36896f343d12a236', 'upload_date': '20190928', }, - 'skip': 'restricted' + 'skip': 'restricted', }, { # Original formats are private 'url': 'https://archive.org/details/irelandthemakingofarepublic', @@ -202,8 +203,8 @@ class ArchiveOrgIE(InfoExtractor): 'thumbnail': 'https://archive.org/download/irelandthemakingofarepublic/irelandthemakingofarepublic.thumbs/irelandthemakingofarepublicreel2_001554.jpg', 'display_id': 'irelandthemakingofarepublicreel2.mov', }, - } - ] + }, + ], }] @staticmethod @@ -220,7 +221,7 @@ class ArchiveOrgIE(InfoExtractor): def _real_extract(self, url): video_id = urllib.parse.unquote_plus(self._match_id(url)) - identifier, entry_id = (video_id.split('/', 1) + [None])[:2] + identifier, _, entry_id = video_id.partition('/') # Archive.org metadata API doesn't clearly demarcate playlist entries # or subtitle tracks, so we get them from the embeddable player. @@ -246,7 +247,7 @@ class ArchiveOrgIE(InfoExtractor): if track['kind'] != 'subtitles': continue entries[p['orig']][track['label']] = { - 'url': 'https://archive.org/' + track['file'].lstrip('/') + 'url': 'https://archive.org/' + track['file'].lstrip('/'), } metadata = self._download_json('http://archive.org/metadata/' + identifier, identifier) @@ -293,7 +294,9 @@ class ArchiveOrgIE(InfoExtractor): 'height': int_or_none(f.get('width')), 'filesize': int_or_none(f.get('size'))}) - extension = (f['name'].rsplit('.', 1) + [None])[1] + _, has_ext, extension = f['name'].rpartition('.') + if not has_ext: + extension = None # We don't want to skip private formats if the user has access to them, # however without access to an account with such privileges we can't implement/test this. @@ -308,7 +311,7 @@ class ArchiveOrgIE(InfoExtractor): 'filesize': int_or_none(f.get('size')), 'protocol': 'https', 'source_preference': 0 if f.get('source') == 'original' else -1, - 'format_note': f.get('source') + 'format_note': f.get('source'), }) for entry in entries.values(): @@ -371,7 +374,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'uploader_url': 'https://www.youtube.com/user/Zeurel', 'thumbnail': r're:https?://.*\.(jpg|webp)', 'channel_url': 'https://www.youtube.com/channel/UCukCyHaD-bK3in_pKpfH9Eg', - } + }, }, { # Internal link 'url': 'https://web.archive.org/web/2oe/http://wayback-fakeurl.archive.org/yt/97t7Xj_iBv0', @@ -388,7 +391,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'uploader_url': 'https://www.youtube.com/user/1veritasium', 'thumbnail': r're:https?://.*\.(jpg|webp)', 'channel_url': 'https://www.youtube.com/channel/UCHnyfMqiRRG1u-2MsSQLbXA', - } + }, }, { # Video from 2012, webm format itag 45. Newest capture is deleted video, with an invalid description. # Should use the date in the link. Title ends with '- Youtube'. Capture has description in eow-description @@ -403,8 +406,8 @@ class YoutubeWebArchiveIE(InfoExtractor): 'uploader_id': 'machinima', 'uploader_url': 'https://www.youtube.com/user/machinima', 'thumbnail': r're:https?://.*\.(jpg|webp)', - 'uploader': 'machinima' - } + 'uploader': 'machinima', + }, }, { # FLV video. Video file URL does not provide itag information 'url': 'https://web.archive.org/web/20081211103536/http://www.youtube.com/watch?v=jNQXAC9IVRw', @@ -421,7 +424,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'channel_url': 'https://www.youtube.com/channel/UC4QobU6STFB0P71PMvOGN5A', 'thumbnail': r're:https?://.*\.(jpg|webp)', 'uploader': 'jawed', - } + }, }, { 'url': 'https://web.archive.org/web/20110712231407/http://www.youtube.com/watch?v=lTx3G6h2xyA', 'info_dict': { @@ -437,7 +440,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'uploader_url': 'https://www.youtube.com/user/itsmadeon', 'channel_url': 'https://www.youtube.com/channel/UCqMDNf3Pn5L7pcNkuSEeO3w', 'thumbnail': r're:https?://.*\.(jpg|webp)', - } + }, }, { # First capture is of dead video, second is the oldest from CDX response. 'url': 'https://web.archive.org/https://www.youtube.com/watch?v=1JYutPM8O6E', @@ -454,7 +457,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'channel_url': 'https://www.youtube.com/channel/UCdIaNUarhzLSXGoItz7BHVA', 'thumbnail': r're:https?://.*\.(jpg|webp)', 'uploader': 'ETC News', - } + }, }, { # First capture of dead video, capture date in link links to dead capture. 'url': 'https://web.archive.org/web/20180803221945/https://www.youtube.com/watch?v=6FPhZJGvf4E', @@ -473,15 +476,15 @@ class YoutubeWebArchiveIE(InfoExtractor): 'uploader': 'ETC News', }, 'expected_warnings': [ - r'unable to download capture webpage \(it may not be archived\)' - ] + r'unable to download capture webpage \(it may not be archived\)', + ], }, { # Very old YouTube page, has - YouTube in title. 'url': 'http://web.archive.org/web/20070302011044/http://youtube.com/watch?v=-06-KB9XTzg', 'info_dict': { 'id': '-06-KB9XTzg', 'ext': 'flv', - 'title': 'New Coin Hack!! 100% Safe!!' - } + 'title': 'New Coin Hack!! 100% Safe!!', + }, }, { 'url': 'web.archive.org/https://www.youtube.com/watch?v=dWW7qP423y8', 'info_dict': { @@ -495,7 +498,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'description': 'md5:7b567f898d8237b256f36c1a07d6d7bc', 'thumbnail': r're:https?://.*\.(jpg|webp)', 'uploader': 'DankPods', - } + }, }, { # player response contains '};' See: https://github.com/ytdl-org/youtube-dl/issues/27093 'url': 'https://web.archive.org/web/20200827003909if_/http://www.youtube.com/watch?v=6Dh-RL__uN4', @@ -512,7 +515,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'uploader_id': 'PewDiePie', 'uploader_url': 'https://www.youtube.com/user/PewDiePie', 'thumbnail': r're:https?://.*\.(jpg|webp)', - } + }, }, { # ~June 2010 Capture. swfconfig 'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=8XeW5ilk-9Y', @@ -527,7 +530,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'thumbnail': r're:https?://.*\.(jpg|webp)', 'uploader_url': 'https://www.youtube.com/user/HowTheWorldWorks', 'upload_date': '20090520', - } + }, }, { # Jan 2011: watch-video-date/eow-date surrounded by whitespace 'url': 'https://web.archive.org/web/20110126141719/http://www.youtube.com/watch?v=Q_yjX80U7Yc', @@ -542,7 +545,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'thumbnail': r're:https?://.*\.(jpg|webp)', 'duration': 132, 'uploader_url': 'https://www.youtube.com/user/claybutlermusic', - } + }, }, { # ~May 2009 swfArgs. ytcfg is spread out over various vars 'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=c5uJgG05xUY', @@ -557,7 +560,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'description': 'md5:4ca77d79538064e41e4cc464e93f44f0', 'thumbnail': r're:https?://.*\.(jpg|webp)', 'duration': 754, - } + }, }, { # ~June 2012. Upload date is in another lang so cannot extract. 'url': 'https://web.archive.org/web/20120607174520/http://www.youtube.com/watch?v=xWTLLl-dQaA', @@ -571,7 +574,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'uploader': 'BlackNerdComedy', 'duration': 182, 'thumbnail': r're:https?://.*\.(jpg|webp)', - } + }, }, { # ~July 2013 'url': 'https://web.archive.org/web/*/https://www.youtube.com/watch?v=9eO1aasHyTM', @@ -587,7 +590,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'channel_url': 'https://www.youtube.com/channel/UC62R2cBezNBOqxSerfb1nMQ', 'upload_date': '20060428', 'uploader': 'punkybird', - } + }, }, { # April 2020: Player response in player config 'url': 'https://web.archive.org/web/20200416034815/https://www.youtube.com/watch?v=Cf7vS8jc7dY&gl=US&hl=en', @@ -604,7 +607,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'thumbnail': r're:https?://.*\.(jpg|webp)', 'description': 'md5:c625bb3c02c4f5fb4205971e468fa341', 'uploader_url': 'https://www.youtube.com/user/GameGrumps', - } + }, }, { # watch7-user-header with yt-user-info 'url': 'ytarchive:kbh4T_b4Ixw:20160307085057', @@ -619,7 +622,7 @@ class YoutubeWebArchiveIE(InfoExtractor): 'thumbnail': r're:https?://.*\.(jpg|webp)', 'upload_date': '20150503', 'channel_id': 'UCnTaGvsHmMy792DWeT6HbGA', - } + }, }, { # April 2012 'url': 'https://web.archive.org/web/0/https://www.youtube.com/watch?v=SOm7mPoPskU', @@ -634,35 +637,35 @@ class YoutubeWebArchiveIE(InfoExtractor): 'duration': 200, 'upload_date': '20120407', 'uploader_id': 'thecomputernerd01', - } + }, }, { 'url': 'https://web.archive.org/web/http://www.youtube.com/watch?v=kH-G_aIBlFw', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://web.archive.org/web/20050214000000_if/http://www.youtube.com/watch?v=0altSZ96U4M', - 'only_matching': True + 'only_matching': True, }, { # Video not archived, only capture is unavailable video page 'url': 'https://web.archive.org/web/20210530071008/https://www.youtube.com/watch?v=lHJTf93HL1s&spfreload=10', - 'only_matching': True + 'only_matching': True, }, { # Encoded url 'url': 'https://web.archive.org/web/20120712231619/http%3A//www.youtube.com/watch%3Fgl%3DUS%26v%3DAkhihxRKcrs%26hl%3Den', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://web.archive.org/web/20120712231619/http%3A//www.youtube.com/watch%3Fv%3DAkhihxRKcrs%26gl%3DUS%26hl%3Den', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://web.archive.org/web/20060527081937/http://www.youtube.com:80/watch.php?v=ELTFsLT73fA&search=soccer', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://web.archive.org/http://www.youtube.com:80/watch?v=-05VVye-ffg', - 'only_matching': True + 'only_matching': True, }, { 'url': 'ytarchive:BaW_jenozKc:20050214000000', - 'only_matching': True + 'only_matching': True, }, { 'url': 'ytarchive:BaW_jenozKc', - 'only_matching': True + 'only_matching': True, }, ] _YT_INITIAL_DATA_RE = YoutubeBaseInfoExtractor._YT_INITIAL_DATA_RE @@ -673,13 +676,13 @@ class YoutubeWebArchiveIE(InfoExtractor): _YT_DEFAULT_THUMB_SERVERS = ['i.ytimg.com'] # thumbnails most likely archived on these servers _YT_ALL_THUMB_SERVERS = orderedSet( - _YT_DEFAULT_THUMB_SERVERS + ['img.youtube.com', *[f'{c}{n or ""}.ytimg.com' for c in ('i', 's') for n in (*range(0, 5), 9)]]) + [*_YT_DEFAULT_THUMB_SERVERS, 'img.youtube.com', *[f'{c}{n or ""}.ytimg.com' for c in ('i', 's') for n in (*range(5), 9)]]) _WAYBACK_BASE_URL = 'https://web.archive.org/web/%sif_/' _OLDEST_CAPTURE_DATE = 20050214000000 _NEWEST_CAPTURE_DATE = 20500101000000 - def _call_cdx_api(self, item_id, url, filters: list = None, collapse: list = None, query: dict = None, note=None, fatal=False): + def _call_cdx_api(self, item_id, url, filters: list | None = None, collapse: list | None = None, query: dict | None = None, note=None, fatal=False): # CDX docs: https://github.com/internetarchive/wayback/blob/master/wayback-cdx-server/README.md query = { 'url': url, @@ -688,14 +691,14 @@ class YoutubeWebArchiveIE(InfoExtractor): 'limit': 500, 'filter': ['statuscode:200'] + (filters or []), 'collapse': collapse or [], - **(query or {}) + **(query or {}), } res = self._download_json( 'https://web.archive.org/cdx/search/cdx', item_id, note or 'Downloading CDX API JSON', query=query, fatal=fatal) if isinstance(res, list) and len(res) >= 2: # format response to make it easier to use - return list(dict(zip(res[0], v)) for v in res[1:]) + return [dict(zip(res[0], v)) for v in res[1:]] elif not isinstance(res, list) or len(res) != 0: self.report_warning('Error while parsing CDX API response' + bug_reports_message()) @@ -852,7 +855,7 @@ class YoutubeWebArchiveIE(InfoExtractor): { 'url': (self._WAYBACK_BASE_URL % (int_or_none(thumbnail_dict.get('timestamp')) or self._OLDEST_CAPTURE_DATE)) + thumbnail_dict.get('original'), 'filesize': int_or_none(thumbnail_dict.get('length')), - 'preference': int_or_none(thumbnail_dict.get('length')) + 'preference': int_or_none(thumbnail_dict.get('length')), } for thumbnail_dict in response) if not try_all: break @@ -893,7 +896,7 @@ class YoutubeWebArchiveIE(InfoExtractor): for retry in retry_manager: try: urlh = self._request_webpage( - HEADRequest('https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/%s' % video_id), + HEADRequest(f'https://web.archive.org/web/2oe_/http://wayback-fakeurl.archive.org/yt/{video_id}'), video_id, note='Fetching archived video file url', expected_status=True) except ExtractorError as e: # HTTP Error 404 is expected if the video is not saved. @@ -924,21 +927,21 @@ class YoutubeWebArchiveIE(InfoExtractor): info['thumbnails'] = self._extract_thumbnails(video_id) if urlh: - url = compat_urllib_parse_unquote(urlh.url) + url = urllib.parse.unquote(urlh.url) video_file_url_qs = parse_qs(url) # Attempt to recover any ext & format info from playback url & response headers - format = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))} + fmt = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))} itag = try_get(video_file_url_qs, lambda x: x['itag'][0]) if itag and itag in YoutubeIE._formats: - format.update(YoutubeIE._formats[itag]) - format.update({'format_id': itag}) + fmt.update(YoutubeIE._formats[itag]) + fmt.update({'format_id': itag}) else: mime = try_get(video_file_url_qs, lambda x: x['mime'][0]) ext = (mimetype2ext(mime) or urlhandle_detect_ext(urlh) or mimetype2ext(urlh.headers.get('x-archive-guessed-content-type'))) - format.update({'ext': ext}) - info['formats'] = [format] + fmt.update({'ext': ext}) + info['formats'] = [fmt] if not info.get('duration'): info['duration'] = str_to_int(try_get(video_file_url_qs, lambda x: x['dur'][0])) diff --git a/yt_dlp/extractor/arcpublishing.py b/yt_dlp/extractor/arcpublishing.py index febd3d28a..338bada7c 100644 --- a/yt_dlp/extractor/arcpublishing.py +++ b/yt_dlp/extractor/arcpublishing.py @@ -11,7 +11,7 @@ from ..utils import ( class ArcPublishingIE(InfoExtractor): _UUID_REGEX = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}' - _VALID_URL = r'arcpublishing:(?P[a-z]+):(?P%s)' % _UUID_REGEX + _VALID_URL = rf'arcpublishing:(?P[a-z]+):(?P{_UUID_REGEX})' _TESTS = [{ # https://www.adn.com/politics/2020/11/02/video-senate-candidates-campaign-in-anchorage-on-eve-of-election-day/ 'url': 'arcpublishing:adn:8c99cb6e-b29c-4bc9-9173-7bf9979225ab', @@ -74,12 +74,12 @@ class ArcPublishingIE(InfoExtractor): def _extract_embed_urls(cls, url, webpage): entries = [] # https://arcpublishing.atlassian.net/wiki/spaces/POWA/overview - for powa_el in re.findall(r'(]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="%s"[^>]*>)' % ArcPublishingIE._UUID_REGEX, webpage): + for powa_el in re.findall(rf'(]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="{ArcPublishingIE._UUID_REGEX}"[^>]*>)', webpage): powa = extract_attributes(powa_el) or {} org = powa.get('data-org') uuid = powa.get('data-uuid') if org and uuid: - entries.append('arcpublishing:%s:%s' % (org, uuid)) + entries.append(f'arcpublishing:{org}:{uuid}') return entries def _real_extract(self, url): @@ -122,7 +122,7 @@ class ArcPublishingIE(InfoExtractor): elif stream_type in ('ts', 'hls'): m3u8_formats = self._extract_m3u8_formats( s_url, uuid, 'mp4', live=is_live, m3u8_id='hls', fatal=False) - if all([f.get('acodec') == 'none' for f in m3u8_formats]): + if all(f.get('acodec') == 'none' for f in m3u8_formats): continue for f in m3u8_formats: height = f.get('height') @@ -136,7 +136,7 @@ class ArcPublishingIE(InfoExtractor): else: vbr = int_or_none(s.get('bitrate')) formats.append({ - 'format_id': '%s-%d' % (stream_type, vbr) if vbr else stream_type, + 'format_id': f'{stream_type}-{vbr}' if vbr else stream_type, 'vbr': vbr, 'width': int_or_none(s.get('width')), 'height': int_or_none(s.get('height')), diff --git a/yt_dlp/extractor/ard.py b/yt_dlp/extractor/ard.py index 3db59c5ca..6fd641347 100644 --- a/yt_dlp/extractor/ard.py +++ b/yt_dlp/extractor/ard.py @@ -85,7 +85,7 @@ class ARDMediathekBaseIE(InfoExtractor): formats.extend(self._extract_f4m_formats( update_url_query(stream_url, { 'hdcore': '3.1.1', - 'plugin': 'aasp-3.1.1.69.124' + 'plugin': 'aasp-3.1.1.69.124', }), video_id, f4m_id='hds', fatal=False)) elif ext == 'm3u8': formats.extend(self._extract_m3u8_formats( @@ -96,12 +96,12 @@ class ARDMediathekBaseIE(InfoExtractor): f = { 'url': server, 'play_path': stream_url, - 'format_id': 'a%s-rtmp-%s' % (num, quality), + 'format_id': f'a{num}-rtmp-{quality}', } else: f = { 'url': stream_url, - 'format_id': 'a%s-%s-%s' % (num, ext, quality) + 'format_id': f'a{num}-{ext}-{quality}', } m = re.search( r'_(?P\d+)x(?P\d+)\.mp4$', diff --git a/yt_dlp/extractor/arkena.py b/yt_dlp/extractor/arkena.py index de36ec886..b0e853d57 100644 --- a/yt_dlp/extractor/arkena.py +++ b/yt_dlp/extractor/arkena.py @@ -64,7 +64,7 @@ class ArkenaIE(InfoExtractor): raise ExtractorError('Invalid URL', expected=True) media = self._download_json( - 'https://video.qbrick.com/api/v1/public/accounts/%s/medias/%s' % (account_id, video_id), + f'https://video.qbrick.com/api/v1/public/accounts/{account_id}/medias/{video_id}', video_id, query={ # https://video.qbrick.com/docs/api/examples/library-api.html 'fields': 'asset/resources/*/renditions/*(height,id,language,links/*(href,mimeType),type,size,videos/*(audios/*(codec,sampleRate),bitrate,codec,duration,height,width),width),created,metadata/*(title,description),tags', diff --git a/yt_dlp/extractor/arnes.py b/yt_dlp/extractor/arnes.py index 9a5524aab..f196f611a 100644 --- a/yt_dlp/extractor/arnes.py +++ b/yt_dlp/extractor/arnes.py @@ -1,8 +1,6 @@ +import urllib.parse + from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_urllib_parse_urlparse, -) from ..utils import ( float_or_none, format_field, @@ -35,7 +33,7 @@ class ArnesIE(InfoExtractor): 'view_count': int, 'tags': ['linearna_algebra'], 'start_time': 10, - } + }, }, { 'url': 'https://video.arnes.si/api/asset/s1YjnV7hadlC/play.mp4', 'only_matching': True, @@ -93,6 +91,6 @@ class ArnesIE(InfoExtractor): 'duration': float_or_none(video.get('duration'), 1000), 'view_count': int_or_none(video.get('views')), 'tags': video.get('hashtags'), - 'start_time': int_or_none(compat_parse_qs( - compat_urllib_parse_urlparse(url).query).get('t', [None])[0]), + 'start_time': int_or_none(urllib.parse.parse_qs( + urllib.parse.urlparse(url).query).get('t', [None])[0]), } diff --git a/yt_dlp/extractor/art19.py b/yt_dlp/extractor/art19.py index 271c505da..deec7ad01 100644 --- a/yt_dlp/extractor/art19.py +++ b/yt_dlp/extractor/art19.py @@ -153,7 +153,7 @@ class Art19IE(InfoExtractor): 'series_id': ('series_id', {str}), 'timestamp': ('created_at', {parse_iso8601}), 'release_timestamp': ('released_at', {parse_iso8601}), - 'modified_timestamp': ('updated_at', {parse_iso8601}) + 'modified_timestamp': ('updated_at', {parse_iso8601}), })), **traverse_obj(rss_metadata, ('content', { 'title': ('episode_title', {str}), diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py index 46fe006cc..142d4b066 100644 --- a/yt_dlp/extractor/arte.py +++ b/yt_dlp/extractor/arte.py @@ -20,15 +20,15 @@ class ArteTVBaseIE(InfoExtractor): class ArteTVIE(ArteTVBaseIE): - _VALID_URL = r'''(?x) + _VALID_URL = rf'''(?x) (?:https?:// (?: - (?:www\.)?arte\.tv/(?P%(langs)s)/videos| - api\.arte\.tv/api/player/v\d+/config/(?P%(langs)s) + (?:www\.)?arte\.tv/(?P{ArteTVBaseIE._ARTE_LANGUAGES})/videos| + api\.arte\.tv/api/player/v\d+/config/(?P{ArteTVBaseIE._ARTE_LANGUAGES}) ) |arte://program) - /(?P\d{6}-\d{3}-[AF]|LIVE) - ''' % {'langs': ArteTVBaseIE._ARTE_LANGUAGES} + /(?P\d{{6}}-\d{{3}}-[AF]|LIVE) + ''' _TESTS = [{ 'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/', 'only_matching': True, @@ -145,7 +145,7 @@ class ArteTVIE(ArteTVBaseIE): language_code = self._LANG_MAP.get(lang) config = self._download_json(f'{self._API_BASE}/config/{lang}/{video_id}', video_id, headers={ - 'x-validated-age': '18' + 'x-validated-age': '18', }) geoblocking = traverse_obj(config, ('data', 'attributes', 'restriction', 'geoblocking')) or {} @@ -247,7 +247,7 @@ class ArteTVEmbedIE(InfoExtractor): 'description': 'md5:be40b667f45189632b78c1425c7c2ce1', 'upload_date': '20201116', }, - 'skip': 'No video available' + 'skip': 'No video available', }, { 'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A', 'only_matching': True, @@ -262,7 +262,7 @@ class ArteTVEmbedIE(InfoExtractor): class ArteTVPlaylistIE(ArteTVBaseIE): - _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P%s)/videos/(?PRC-\d{6})' % ArteTVBaseIE._ARTE_LANGUAGES + _VALID_URL = rf'https?://(?:www\.)?arte\.tv/(?P{ArteTVBaseIE._ARTE_LANGUAGES})/videos/(?PRC-\d{{6}})' _TESTS = [{ 'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/', 'only_matching': True, @@ -298,7 +298,7 @@ class ArteTVPlaylistIE(ArteTVBaseIE): class ArteTVCategoryIE(ArteTVBaseIE): - _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P%s)/videos/(?P[\w-]+(?:/[\w-]+)*)/?\s*$' % ArteTVBaseIE._ARTE_LANGUAGES + _VALID_URL = rf'https?://(?:www\.)?arte\.tv/(?P{ArteTVBaseIE._ARTE_LANGUAGES})/videos/(?P[\w-]+(?:/[\w-]+)*)/?\s*$' _TESTS = [{ 'url': 'https://www.arte.tv/en/videos/politics-and-society/', 'info_dict': { @@ -312,7 +312,7 @@ class ArteTVCategoryIE(ArteTVBaseIE): @classmethod def suitable(cls, url): return ( - not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE, )) + not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE)) and super().suitable(url)) def _real_extract(self, url): @@ -321,12 +321,12 @@ class ArteTVCategoryIE(ArteTVBaseIE): items = [] for video in re.finditer( - r']*?href\s*=\s*(?P"|\'|\b)(?Phttps?://www\.arte\.tv/%s/videos/[\w/-]+)(?P=q)' % lang, + rf']*?href\s*=\s*(?P"|\'|\b)(?Phttps?://www\.arte\.tv/{lang}/videos/[\w/-]+)(?P=q)', webpage): video = video.group('url') if video == url: continue - if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE, )): + if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE)): items.append(video) title = strip_or_none(self._generic_title('', webpage, default='').rsplit('|', 1)[0]) or None diff --git a/yt_dlp/extractor/atresplayer.py b/yt_dlp/extractor/atresplayer.py index 3a44e5265..7c8139714 100644 --- a/yt_dlp/extractor/atresplayer.py +++ b/yt_dlp/extractor/atresplayer.py @@ -20,7 +20,7 @@ class AtresPlayerIE(InfoExtractor): 'description': 'md5:7634cdcb4d50d5381bedf93efb537fbc', 'duration': 3413, }, - 'skip': 'This video is only available for registered users' + 'skip': 'This video is only available for registered users', }, { 'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/', @@ -49,7 +49,7 @@ class AtresPlayerIE(InfoExtractor): target_url = self._download_json( 'https://account.atresmedia.com/api/login', None, 'Logging in', headers={ - 'Content-Type': 'application/x-www-form-urlencoded' + 'Content-Type': 'application/x-www-form-urlencoded', }, data=urlencode_postdata({ 'username': username, 'password': password, diff --git a/yt_dlp/extractor/atscaleconf.py b/yt_dlp/extractor/atscaleconf.py index 3f7b1e9f8..b219eeec5 100644 --- a/yt_dlp/extractor/atscaleconf.py +++ b/yt_dlp/extractor/atscaleconf.py @@ -12,7 +12,7 @@ class AtScaleConfEventIE(InfoExtractor): 'info_dict': { 'id': 'data-scale-spring-2022', 'title': 'Data @Scale Spring 2022', - 'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55' + 'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55', }, }, { 'url': 'https://atscaleconference.com/events/video-scale-2021/', @@ -20,15 +20,15 @@ class AtScaleConfEventIE(InfoExtractor): 'info_dict': { 'id': 'video-scale-2021', 'title': 'Video @Scale 2021', - 'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55' + 'description': 'md5:7d7ca1c42ac9c6d8a785092a1aea4b55', }, }] def _real_extract(self, url): - id = self._match_id(url) - webpage = self._download_webpage(url, id) + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) return self.playlist_from_matches( re.findall(r'data-url\s*=\s*"(https?://(?:www\.)?atscaleconference\.com/videos/[^"]+)"', webpage), - ie='Generic', playlist_id=id, + ie='Generic', playlist_id=playlist_id, title=self._og_search_title(webpage), description=self._og_search_description(webpage)) diff --git a/yt_dlp/extractor/atvat.py b/yt_dlp/extractor/atvat.py index 20ee34cca..37bb61695 100644 --- a/yt_dlp/extractor/atvat.py +++ b/yt_dlp/extractor/atvat.py @@ -19,7 +19,7 @@ class ATVAtIE(InfoExtractor): 'id': 'v-ce9cgn1e70n5-1', 'ext': 'mp4', 'title': 'Bauer sucht Frau - Staffel 18 Folge 3 - Die Hofwochen', - } + }, }, { 'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/episode-01/bauer-sucht-frau-staffel-18-vorstellungsfolge-1', 'only_matching': True, @@ -66,10 +66,10 @@ class ATVAtIE(InfoExtractor): video_id=video_id) video_title = json_data['views']['default']['page']['title'] - contentResource = json_data['views']['default']['page']['contentResource'] - content_id = contentResource[0]['id'] - content_ids = [{'id': id, 'subclip_start': content['start'], 'subclip_end': content['end']} - for id, content in enumerate(contentResource)] + content_resource = json_data['views']['default']['page']['contentResource'] + content_id = content_resource[0]['id'] + content_ids = [{'id': id_, 'subclip_start': content['start'], 'subclip_end': content['end']} + for id_, content in enumerate(content_resource)] time_of_request = dt.datetime.now() not_before = time_of_request - dt.timedelta(minutes=5) @@ -87,17 +87,17 @@ class ATVAtIE(InfoExtractor): videos = self._download_json( 'https://vas-v4.p7s1video.net/4.0/getsources', content_id, 'Downloading videos JSON', query={ - 'token': jwt_token.decode('utf-8') + 'token': jwt_token.decode('utf-8'), }) - video_id, videos_data = list(videos['data'].items())[0] + video_id, videos_data = next(iter(videos['data'].items())) error_msg = try_get(videos_data, lambda x: x['error']['title']) if error_msg == 'Geo check failed': self.raise_geo_restricted(error_msg) elif error_msg: raise ExtractorError(error_msg) entries = [ - self._extract_video_info(url, contentResource[video['id']], video) + self._extract_video_info(url, content_resource[video['id']], video) for video in videos_data] return { diff --git a/yt_dlp/extractor/audimedia.py b/yt_dlp/extractor/audimedia.py index 35114e545..c5a9c7e29 100644 --- a/yt_dlp/extractor/audimedia.py +++ b/yt_dlp/extractor/audimedia.py @@ -19,7 +19,7 @@ class AudiMediaIE(InfoExtractor): 'timestamp': 1448354940, 'duration': 74022, 'view_count': int, - } + }, }, { 'url': 'https://www.audi-mediacenter.com/en/audimediatv/video/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-2991', 'only_matching': True, @@ -73,7 +73,7 @@ class AudiMediaIE(InfoExtractor): bitrate = self._search_regex(r'(\d+)k', video_version_url, 'bitrate', default=None) if bitrate: f.update({ - 'format_id': 'http-%s' % bitrate, + 'format_id': f'http-{bitrate}', }) formats.append(f) diff --git a/yt_dlp/extractor/audioboom.py b/yt_dlp/extractor/audioboom.py index a23fcd299..751b74add 100644 --- a/yt_dlp/extractor/audioboom.py +++ b/yt_dlp/extractor/audioboom.py @@ -15,7 +15,7 @@ class AudioBoomIE(InfoExtractor): 'duration': 4000.99, 'uploader': 'Sue Perkins: An hour or so with...', 'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/perkins', - } + }, }, { # Direct mp3-file link 'url': 'https://audioboom.com/posts/8128496.mp3', 'md5': 'e329edf304d450def95c7f86a9165ee1', @@ -27,7 +27,7 @@ class AudioBoomIE(InfoExtractor): 'duration': 1689.7, 'uploader': 'Lost Dot Podcast: The Trans Pyrenees and Transcontinental Race', 'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channels/5003904', - } + }, }, { 'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0', 'only_matching': True, diff --git a/yt_dlp/extractor/audiodraft.py b/yt_dlp/extractor/audiodraft.py index 71e5afd8c..484ad4e1a 100644 --- a/yt_dlp/extractor/audiodraft.py +++ b/yt_dlp/extractor/audiodraft.py @@ -9,7 +9,7 @@ class AudiodraftBaseIE(InfoExtractor): headers={ 'Content-type': 'application/x-www-form-urlencoded; charset=UTF-8', 'X-Requested-With': 'XMLHttpRequest', - }, data=f'id={player_entry_id}'.encode('utf-8')) + }, data=f'id={player_entry_id}'.encode()) return { 'id': str(data_json['entry_id']), @@ -65,9 +65,10 @@ class AudiodraftCustomIE(AudiodraftBaseIE): }] def _real_extract(self, url): - id = self._match_id(url) - webpage = self._download_webpage(url, id) - player_entry_id = self._search_regex(r'playAudio\(\'(player_entry_\d+)\'\);', webpage, id, 'play entry id') + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + player_entry_id = self._search_regex( + r'playAudio\(\'(player_entry_\d+)\'\);', webpage, video_id, 'play entry id') return self._audiodraft_extract_from_id(player_entry_id) @@ -89,5 +90,5 @@ class AudiodraftGenericIE(AudiodraftBaseIE): }] def _real_extract(self, url): - id = self._match_id(url) - return self._audiodraft_extract_from_id(f'player_entry_{id}') + video_id = self._match_id(url) + return self._audiodraft_extract_from_id(f'player_entry_{video_id}') diff --git a/yt_dlp/extractor/audiomack.py b/yt_dlp/extractor/audiomack.py index 5c4160fe4..1d4460c9f 100644 --- a/yt_dlp/extractor/audiomack.py +++ b/yt_dlp/extractor/audiomack.py @@ -3,7 +3,6 @@ import time from .common import InfoExtractor from .soundcloud import SoundcloudIE -from ..compat import compat_str from ..utils import ( ExtractorError, url_basename, @@ -22,8 +21,8 @@ class AudiomackIE(InfoExtractor): 'id': '310086', 'ext': 'mp3', 'uploader': 'Roosh Williams', - 'title': 'Extraordinary' - } + 'title': 'Extraordinary', + }, }, # audiomack wrapper around soundcloud song # Needs new test URL. @@ -56,7 +55,7 @@ class AudiomackIE(InfoExtractor): # API is inconsistent with errors if 'url' not in api_response or not api_response['url'] or 'error' in api_response: - raise ExtractorError('Invalid url %s' % url) + raise ExtractorError(f'Invalid url {url}') # Audiomack wraps a lot of soundcloud tracks in their branded wrapper # if so, pass the work off to the soundcloud extractor @@ -64,7 +63,7 @@ class AudiomackIE(InfoExtractor): return self.url_result(api_response['url'], SoundcloudIE.ie_key()) return { - 'id': compat_str(api_response.get('id', album_url_tag)), + 'id': str(api_response.get('id', album_url_tag)), 'uploader': api_response.get('artist'), 'title': api_response.get('title'), 'url': api_response['url'], @@ -82,8 +81,8 @@ class AudiomackAlbumIE(InfoExtractor): 'info_dict': { 'id': '812251', - 'title': 'Tha Tour: Part 2 (Official Mixtape)' - } + 'title': 'Tha Tour: Part 2 (Official Mixtape)', + }, }, # Album playlist ripped from fakeshoredrive with no metadata { @@ -98,16 +97,16 @@ class AudiomackAlbumIE(InfoExtractor): 'id': '837576', 'ext': 'mp3', 'uploader': 'Lil Herb a.k.a. G Herbo', - } + }, }, { 'info_dict': { 'title': 'PPP (Pistol P Project) - 10. 4 Minutes Of Hell Part 4 (prod by DY OF 808 MAFIA)', 'id': '837580', 'ext': 'mp3', 'uploader': 'Lil Herb a.k.a. G Herbo', - } + }, }], - } + }, ] def _real_extract(self, url): @@ -123,12 +122,12 @@ class AudiomackAlbumIE(InfoExtractor): api_response = self._download_json( 'http://www.audiomack.com/api/music/url/album/%s/%d?extended=1&_=%d' % (album_url_tag, track_no, time.time()), album_url_tag, - note='Querying song information (%d)' % (track_no + 1)) + note=f'Querying song information ({track_no + 1})') # Total failure, only occurs when url is totally wrong # Won't happen in middle of valid playlist (next case) if 'url' not in api_response or 'error' in api_response: - raise ExtractorError('Invalid url for track %d of album url %s' % (track_no, url)) + raise ExtractorError(f'Invalid url for track {track_no} of album url {url}') # URL is good but song id doesn't exist - usually means end of playlist elif not api_response['url']: break @@ -136,10 +135,10 @@ class AudiomackAlbumIE(InfoExtractor): # Pull out the album metadata and add to result (if it exists) for resultkey, apikey in [('id', 'album_id'), ('title', 'album_title')]: if apikey in api_response and resultkey not in result: - result[resultkey] = compat_str(api_response[apikey]) + result[resultkey] = str(api_response[apikey]) song_id = url_basename(api_response['url']).rpartition('.')[0] result['entries'].append({ - 'id': compat_str(api_response.get('id', song_id)), + 'id': str(api_response.get('id', song_id)), 'uploader': api_response.get('artist'), 'title': api_response.get('title', song_id), 'url': api_response['url'], diff --git a/yt_dlp/extractor/audius.py b/yt_dlp/extractor/audius.py index 6448b449b..c611c6e08 100644 --- a/yt_dlp/extractor/audius.py +++ b/yt_dlp/extractor/audius.py @@ -1,7 +1,7 @@ import random +import urllib.parse from .common import InfoExtractor -from ..compat import compat_str, compat_urllib_parse_unquote from ..utils import ExtractorError, str_or_none, try_get @@ -15,13 +15,13 @@ class AudiusBaseIE(InfoExtractor): if response_data is not None: return response_data if len(response) == 1 and 'message' in response: - raise ExtractorError('API error: %s' % response['message'], + raise ExtractorError('API error: {}'.format(response['message']), expected=True) raise ExtractorError('Unexpected API response') def _select_api_base(self): """Selecting one of the currently available API hosts""" - response = super(AudiusBaseIE, self)._download_json( + response = super()._download_json( 'https://api.audius.co/', None, note='Requesting available API hosts', errnote='Unable to request available API hosts') @@ -41,8 +41,8 @@ class AudiusBaseIE(InfoExtractor): anything from this link, since the Audius API won't be able to resolve this url """ - url = compat_urllib_parse_unquote(url) - title = compat_urllib_parse_unquote(title) + url = urllib.parse.unquote(url) + title = urllib.parse.unquote(title) if '/' in title or '%2F' in title: fixed_title = title.replace('/', '%5C').replace('%2F', '%5C') return url.replace(title, fixed_title) @@ -54,19 +54,19 @@ class AudiusBaseIE(InfoExtractor): if self._API_BASE is None: self._select_api_base() try: - response = super(AudiusBaseIE, self)._download_json( - '%s%s%s' % (self._API_BASE, self._API_V, path), item_id, note=note, + response = super()._download_json( + f'{self._API_BASE}{self._API_V}{path}', item_id, note=note, errnote=errnote, expected_status=expected_status) except ExtractorError as exc: # some of Audius API hosts may not work as expected and return HTML - if 'Failed to parse JSON' in compat_str(exc): + if 'Failed to parse JSON' in str(exc): raise ExtractorError('An error occurred while receiving data. Try again', expected=True) raise exc return self._get_response_data(response) def _resolve_url(self, url, item_id): - return self._api_request('/resolve?url=%s' % url, item_id, + return self._api_request(f'/resolve?url={url}', item_id, expected_status=404) @@ -91,7 +91,7 @@ class AudiusIE(AudiusBaseIE): 'view_count': int, 'like_count': int, 'repost_count': int, - } + }, }, { # Regular track @@ -109,14 +109,14 @@ class AudiusIE(AudiusBaseIE): 'view_count': int, 'like_count': int, 'repost_count': int, - } + }, }, ] _ARTWORK_MAP = { - "150x150": 150, - "480x480": 480, - "1000x1000": 1000 + '150x150': 150, + '480x480': 480, + '1000x1000': 1000, } def _real_extract(self, url): @@ -130,7 +130,7 @@ class AudiusIE(AudiusBaseIE): else: # API link title = None # uploader = None - track_data = self._api_request('/tracks/%s' % track_id, track_id) + track_data = self._api_request(f'/tracks/{track_id}', track_id) if not isinstance(track_data, dict): raise ExtractorError('Unexpected API response') @@ -144,7 +144,7 @@ class AudiusIE(AudiusBaseIE): if isinstance(artworks_data, dict): for quality_key, thumbnail_url in artworks_data.items(): thumbnail = { - "url": thumbnail_url + 'url': thumbnail_url, } quality_code = self._ARTWORK_MAP.get(quality_key) if quality_code is not None: @@ -154,12 +154,12 @@ class AudiusIE(AudiusBaseIE): return { 'id': track_id, 'title': track_data.get('title', title), - 'url': '%s/v1/tracks/%s/stream' % (self._API_BASE, track_id), + 'url': f'{self._API_BASE}/v1/tracks/{track_id}/stream', 'ext': 'mp3', 'description': track_data.get('description'), 'duration': track_data.get('duration'), 'track': track_data.get('title'), - 'artist': try_get(track_data, lambda x: x['user']['name'], compat_str), + 'artist': try_get(track_data, lambda x: x['user']['name'], str), 'genre': track_data.get('genre'), 'thumbnails': thumbnails, 'view_count': track_data.get('play_count'), @@ -175,11 +175,11 @@ class AudiusTrackIE(AudiusIE): # XXX: Do not subclass from concrete IE _TESTS = [ { 'url': 'audius:9RWlo', - 'only_matching': True + 'only_matching': True, }, { 'url': 'audius:http://discoveryprovider.audius.prod-us-west-2.staked.cloud/v1/tracks/9RWlo', - 'only_matching': True + 'only_matching': True, }, ] @@ -207,7 +207,7 @@ class AudiusPlaylistIE(AudiusBaseIE): if not track_id: raise ExtractorError('Unable to get track ID from playlist') entries.append(self.url_result( - 'audius:%s' % track_id, + f'audius:{track_id}', ie=AudiusTrackIE.ie_key(), video_id=track_id)) return entries @@ -231,7 +231,7 @@ class AudiusPlaylistIE(AudiusBaseIE): raise ExtractorError('Unable to get playlist ID') playlist_tracks = self._api_request( - '/playlists/%s/tracks' % playlist_id, + f'/playlists/{playlist_id}/tracks', title, note='Downloading playlist tracks metadata', errnote='Unable to download playlist tracks metadata') if not isinstance(playlist_tracks, list): @@ -267,5 +267,5 @@ class AudiusProfileIE(AudiusPlaylistIE): # XXX: Do not subclass from concrete I profile_audius_id = _profile_data[0]['id'] profile_bio = _profile_data[0].get('bio') - api_call = self._api_request('/full/users/handle/%s/tracks' % profile_id, profile_id) + api_call = self._api_request(f'/full/users/handle/{profile_id}/tracks', profile_id) return self.playlist_result(self._build_playlist(api_call), profile_audius_id, profile_id, profile_bio) diff --git a/yt_dlp/extractor/awaan.py b/yt_dlp/extractor/awaan.py index a8dfb3efc..4066a5a83 100644 --- a/yt_dlp/extractor/awaan.py +++ b/yt_dlp/extractor/awaan.py @@ -1,10 +1,7 @@ import base64 +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urllib_parse_urlencode, -) from ..utils import ( format_field, int_or_none, @@ -22,14 +19,14 @@ class AWAANIE(InfoExtractor): show_id, video_id, season_id = self._match_valid_url(url).groups() if video_id and int(video_id) > 0: return self.url_result( - 'http://awaan.ae/media/%s' % video_id, 'AWAANVideo') + f'http://awaan.ae/media/{video_id}', 'AWAANVideo') elif season_id and int(season_id) > 0: return self.url_result(smuggle_url( - 'http://awaan.ae/program/season/%s' % season_id, + f'http://awaan.ae/program/season/{season_id}', {'show_id': show_id}), 'AWAANSeason') else: return self.url_result( - 'http://awaan.ae/program/%s' % show_id, 'AWAANSeason') + f'http://awaan.ae/program/{show_id}', 'AWAANSeason') class AWAANBaseIE(InfoExtractor): @@ -75,11 +72,11 @@ class AWAANVideoIE(AWAANBaseIE): video_id = self._match_id(url) video_data = self._download_json( - 'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id, + f'http://admin.mangomolo.com/analytics/index.php/plus/video?id={video_id}', video_id, headers={'Origin': 'http://awaan.ae'}) info = self._parse_video_data(video_data, video_id, False) - embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + compat_urllib_parse_urlencode({ + embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + urllib.parse.urlencode({ 'id': video_data['id'], 'user_id': video_data['user_id'], 'signature': video_data['signature'], @@ -117,11 +114,11 @@ class AWAANLiveIE(AWAANBaseIE): channel_id = self._match_id(url) channel_data = self._download_json( - 'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id=%s' % channel_id, + f'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id={channel_id}', channel_id, headers={'Origin': 'http://awaan.ae'}) info = self._parse_video_data(channel_data, channel_id, True) - embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + compat_urllib_parse_urlencode({ + embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + urllib.parse.urlencode({ 'id': base64.b64encode(channel_data['user_id'].encode()).decode(), 'channelid': base64.b64encode(channel_data['id'].encode()).decode(), 'signature': channel_data['signature'], @@ -159,7 +156,7 @@ class AWAANSeasonIE(InfoExtractor): show_id = smuggled_data.get('show_id') if show_id is None: season = self._download_json( - 'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id=%s' % season_id, + f'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id={season_id}', season_id, headers={'Origin': 'http://awaan.ae'}) show_id = season['id'] data['show_id'] = show_id @@ -167,7 +164,7 @@ class AWAANSeasonIE(InfoExtractor): 'http://admin.mangomolo.com/analytics/index.php/plus/show', show_id, data=urlencode_postdata(data), headers={ 'Origin': 'http://awaan.ae', - 'Content-Type': 'application/x-www-form-urlencoded' + 'Content-Type': 'application/x-www-form-urlencoded', }) if not season_id: season_id = show['default_season'] @@ -177,8 +174,8 @@ class AWAANSeasonIE(InfoExtractor): entries = [] for video in show['videos']: - video_id = compat_str(video['id']) + video_id = str(video['id']) entries.append(self.url_result( - 'http://awaan.ae/media/%s' % video_id, 'AWAANVideo', video_id)) + f'http://awaan.ae/media/{video_id}', 'AWAANVideo', video_id)) return self.playlist_result(entries, season_id, title) diff --git a/yt_dlp/extractor/aws.py b/yt_dlp/extractor/aws.py index 4ebef9295..177c41027 100644 --- a/yt_dlp/extractor/aws.py +++ b/yt_dlp/extractor/aws.py @@ -1,9 +1,9 @@ import datetime as dt import hashlib import hmac +import urllib.parse from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlencode class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor @@ -18,20 +18,20 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with 'Accept': 'application/json', 'Host': self._AWS_PROXY_HOST, 'X-Amz-Date': amz_date, - 'X-Api-Key': self._AWS_API_KEY + 'X-Api-Key': self._AWS_API_KEY, } session_token = aws_dict.get('session_token') if session_token: headers['X-Amz-Security-Token'] = session_token def aws_hash(s): - return hashlib.sha256(s.encode('utf-8')).hexdigest() + return hashlib.sha256(s.encode()).hexdigest() # Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html - canonical_querystring = compat_urllib_parse_urlencode(query) + canonical_querystring = urllib.parse.urlencode(query) canonical_headers = '' for header_name, header_value in sorted(headers.items()): - canonical_headers += '%s:%s\n' % (header_name.lower(), header_value) + canonical_headers += f'{header_name.lower()}:{header_value}\n' signed_headers = ';'.join([header.lower() for header in sorted(headers.keys())]) canonical_request = '\n'.join([ 'GET', @@ -39,7 +39,7 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with canonical_querystring, canonical_headers, signed_headers, - aws_hash('') + aws_hash(''), ]) # Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html @@ -49,7 +49,7 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with # Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html def aws_hmac(key, msg): - return hmac.new(key, msg.encode('utf-8'), hashlib.sha256) + return hmac.new(key, msg.encode(), hashlib.sha256) def aws_hmac_digest(key, msg): return aws_hmac(key, msg).digest() @@ -57,7 +57,7 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with def aws_hmac_hexdigest(key, msg): return aws_hmac(key, msg).hexdigest() - k_signing = ('AWS4' + aws_dict['secret_key']).encode('utf-8') + k_signing = ('AWS4' + aws_dict['secret_key']).encode() for value in credential_scope_list: k_signing = aws_hmac_digest(k_signing, value) @@ -65,11 +65,11 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with # Task 4: http://docs.aws.amazon.com/general/latest/gr/sigv4-add-signature-to-request.html headers['Authorization'] = ', '.join([ - '%s Credential=%s/%s' % (self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope), - 'SignedHeaders=%s' % signed_headers, - 'Signature=%s' % signature, + '{} Credential={}/{}'.format(self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope), + f'SignedHeaders={signed_headers}', + f'Signature={signature}', ]) return self._download_json( - 'https://%s%s%s' % (self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''), + 'https://{}{}{}'.format(self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''), video_id, headers=headers) diff --git a/yt_dlp/extractor/azmedien.py b/yt_dlp/extractor/azmedien.py index d1686eed6..0e3a03f03 100644 --- a/yt_dlp/extractor/azmedien.py +++ b/yt_dlp/extractor/azmedien.py @@ -38,14 +38,14 @@ class AZMedienIE(InfoExtractor): 'timestamp': 1538328802, 'view_count': int, 'thumbnail': 'http://cfvod.kaltura.com/p/1719221/sp/171922100/thumbnail/entry_id/1_anruz3wy/version/100031', - 'duration': 1930 + 'duration': 1930, }, 'params': { 'skip_download': True, }, }, { 'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1', - 'only_matching': True + 'only_matching': True, }] _API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/a4016f65fe62b81dc6664dd9f4910e4ab40383be' _PARTNER_ID = '1719221' @@ -62,5 +62,5 @@ class AZMedienIE(InfoExtractor): })['data']['context']['mainAsset']['video']['kaltura']['kalturaId'] return self.url_result( - 'kaltura:%s:%s' % (self._PARTNER_ID, entry_id), + f'kaltura:{self._PARTNER_ID}:{entry_id}', ie=KalturaIE.ie_key(), video_id=entry_id) diff --git a/yt_dlp/extractor/baidu.py b/yt_dlp/extractor/baidu.py index 8786d67e0..a1ad4240f 100644 --- a/yt_dlp/extractor/baidu.py +++ b/yt_dlp/extractor/baidu.py @@ -24,8 +24,9 @@ class BaiduVideoIE(InfoExtractor): }] def _call_api(self, path, category, playlist_id, note): - return self._download_json('http://app.video.baidu.com/%s/?worktype=adnative%s&id=%s' % ( - path, category, playlist_id), playlist_id, note) + return self._download_json( + f'http://app.video.baidu.com/{path}/?worktype=adnative{category}&id={playlist_id}', + playlist_id, note) def _real_extract(self, url): category, playlist_id = self._match_valid_url(url).groups() @@ -44,7 +45,7 @@ class BaiduVideoIE(InfoExtractor): 'xqsingle', category, playlist_id, 'Download episodes JSON metadata') entries = [self.url_result( - episode['url'], video_title=episode['title'] + episode['url'], video_title=episode['title'], ) for episode in episodes_detail['videos']] return self.playlist_result( diff --git a/yt_dlp/extractor/banbye.py b/yt_dlp/extractor/banbye.py index c4e07a79a..d10bdf8da 100644 --- a/yt_dlp/extractor/banbye.py +++ b/yt_dlp/extractor/banbye.py @@ -1,10 +1,7 @@ import math +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_urllib_parse_urlparse, -) from ..utils import ( InAdvancePagedList, format_field, @@ -20,8 +17,8 @@ class BanByeBaseIE(InfoExtractor): @staticmethod def _extract_playlist_id(url, param='playlist'): - return compat_parse_qs( - compat_urllib_parse_urlparse(url).query).get(param, [None])[0] + return urllib.parse.parse_qs( + urllib.parse.urlparse(url).query).get(param, [None])[0] def _extract_playlist(self, playlist_id): data = self._download_json(f'{self._API_BASE}/playlists/{playlist_id}', playlist_id) diff --git a/yt_dlp/extractor/bandcamp.py b/yt_dlp/extractor/bandcamp.py index e89b3a69b..6128de791 100644 --- a/yt_dlp/extractor/bandcamp.py +++ b/yt_dlp/extractor/bandcamp.py @@ -3,7 +3,6 @@ import re import time from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( KNOWN_EXTENSIONS, ExtractorError, @@ -42,7 +41,7 @@ class BandcampIE(InfoExtractor): 'uploader_id': 'youtube-dl', 'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg', }, - '_skip': 'There is a limit of 200 free downloads / month for the test song' + '_skip': 'There is a limit of 200 free downloads / month for the test song', }, { # free download 'url': 'http://benprunty.bandcamp.com/track/lanius-battle', @@ -119,7 +118,7 @@ class BandcampIE(InfoExtractor): def _extract_data_attr(self, webpage, video_id, attr='tralbum', fatal=True): return self._parse_json(self._html_search_regex( - r'data-%s=(["\'])({.+?})\1' % attr, webpage, + rf'data-{attr}=(["\'])({{.+?}})\1', webpage, attr + ' data', group=2), video_id, fatal=fatal) def _real_extract(self, url): @@ -167,7 +166,7 @@ class BandcampIE(InfoExtractor): download_link = tralbum.get('freeDownloadPage') if download_link: - track_id = compat_str(tralbum['id']) + track_id = str(tralbum['id']) download_webpage = self._download_webpage( download_link, track_id, 'Downloading free downloads page') @@ -192,7 +191,7 @@ class BandcampIE(InfoExtractor): if isinstance(download_formats_list, list): for f in blob['download_formats']: name, ext = f.get('name'), f.get('file_extension') - if all(isinstance(x, compat_str) for x in (name, ext)): + if all(isinstance(x, str) for x in (name, ext)): download_formats[name] = ext.strip('.') for format_id, f in downloads.items(): @@ -207,7 +206,7 @@ class BandcampIE(InfoExtractor): }) format_id = f.get('encoding_name') or format_id stat = self._download_json( - stat_url, track_id, 'Downloading %s JSON' % format_id, + stat_url, track_id, f'Downloading {format_id} JSON', transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1], fatal=False) if not stat: @@ -225,7 +224,7 @@ class BandcampIE(InfoExtractor): 'acodec': format_id.split('-')[0], }) - title = '%s - %s' % (artist, track) if artist else track + title = f'{artist} - {track}' if artist else track if not duration: duration = float_or_none(self._html_search_meta( @@ -267,7 +266,7 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE 'timestamp': 1311756226, 'upload_date': '20110727', 'uploader': 'Blazo', - } + }, }, { 'md5': '1a2c32e2691474643e912cc6cd4bffaa', @@ -278,7 +277,7 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE 'timestamp': 1311757238, 'upload_date': '20110727', 'uploader': 'Blazo', - } + }, }, ], 'info_dict': { @@ -287,9 +286,9 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE 'uploader_id': 'blazo', }, 'params': { - 'playlistend': 2 + 'playlistend': 2, }, - 'skip': 'Bandcamp imposes download limits.' + 'skip': 'Bandcamp imposes download limits.', }, { 'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave', 'info_dict': { @@ -324,7 +323,7 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE def suitable(cls, url): return (False if BandcampWeeklyIE.suitable(url) or BandcampIE.suitable(url) - else super(BandcampAlbumIE, cls).suitable(url)) + else super().suitable(url)) def _real_extract(self, url): uploader_id, album_id = self._match_valid_url(url).groups() @@ -376,7 +375,7 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE }, }, { 'url': 'https://bandcamp.com/?blah/blah@&show=228', - 'only_matching': True + 'only_matching': True, }] def _real_extract(self, url): @@ -407,7 +406,7 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE title = show.get('audio_title') or 'Bandcamp Weekly' subtitle = show.get('subtitle') if subtitle: - title += ' - %s' % subtitle + title += f' - {subtitle}' return { 'id': show_id, @@ -419,7 +418,7 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE 'series': 'Bandcamp Weekly', 'episode': show.get('subtitle'), 'episode_id': show_id, - 'formats': formats + 'formats': formats, } @@ -440,7 +439,7 @@ class BandcampUserIE(InfoExtractor): 'url': 'http://dotscale.bandcamp.com', 'info_dict': { 'id': 'dotscale', - 'title': 'Discography of dotscale' + 'title': 'Discography of dotscale', }, 'playlist_count': 1, }, { diff --git a/yt_dlp/extractor/bannedvideo.py b/yt_dlp/extractor/bannedvideo.py index 82dc9ab02..46f2978f7 100644 --- a/yt_dlp/extractor/bannedvideo.py +++ b/yt_dlp/extractor/bannedvideo.py @@ -23,7 +23,7 @@ class BannedVideoIE(InfoExtractor): 'description': 'md5:560d96f02abbebe6c6b78b47465f6b28', 'upload_date': '20200324', 'timestamp': 1585087895, - } + }, }] _GRAPHQL_GETMETADATA_QUERY = ''' @@ -84,15 +84,15 @@ query GetCommentReplies($id: String!) { 'GetCommentReplies': _GRAPHQL_GETCOMMENTSREPLIES_QUERY, } - def _call_api(self, video_id, id, operation, note): + def _call_api(self, video_id, id_var, operation, note): return self._download_json( 'https://api.infowarsmedia.com/graphql', video_id, note=note, headers={ - 'Content-Type': 'application/json; charset=utf-8' + 'Content-Type': 'application/json; charset=utf-8', }, data=json.dumps({ - 'variables': {'id': id}, + 'variables': {'id': id_var}, 'operationName': operation, - 'query': self._GRAPHQL_QUERIES[operation] + 'query': self._GRAPHQL_QUERIES[operation], }).encode('utf8')).get('data') def _get_comments(self, video_id, comments, comment_data): @@ -151,5 +151,5 @@ query GetCommentReplies($id: String!) { 'tags': [tag.get('name') for tag in video_info.get('tags')], 'availability': self._availability(is_unlisted=video_info.get('unlisted')), 'comments': comments, - '__post_extractor': self.extract_comments(video_id, comments, video_json.get('getVideoComments')) + '__post_extractor': self.extract_comments(video_id, comments, video_json.get('getVideoComments')), } diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py index f6b58b361..3af923f95 100644 --- a/yt_dlp/extractor/bbc.py +++ b/yt_dlp/extractor/bbc.py @@ -2,10 +2,10 @@ import functools import itertools import json import re +import urllib.parse import xml.etree.ElementTree from .common import InfoExtractor -from ..compat import compat_str, compat_urlparse from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, @@ -35,7 +35,7 @@ class BBCCoUkIE(InfoExtractor): IE_NAME = 'bbc.co.uk' IE_DESC = 'BBC iPlayer' _ID_REGEX = r'(?:[pbml][\da-z]{7}|w[\da-z]{7,14})' - _VALID_URL = r'''(?x) + _VALID_URL = rf'''(?x) https?:// (?:www\.)?bbc\.co\.uk/ (?: @@ -45,8 +45,8 @@ class BBCCoUkIE(InfoExtractor): radio/player/| events/[^/]+/play/[^/]+/ ) - (?P%s)(?!/(?:episodes|broadcasts|clips)) - ''' % _ID_REGEX + (?P{_ID_REGEX})(?!/(?:episodes|broadcasts|clips)) + ''' _EMBED_REGEX = [r'setPlaylist\("(?Phttps?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)'] _LOGIN_URL = 'https://account.bbc.com/signin' @@ -75,7 +75,7 @@ class BBCCoUkIE(InfoExtractor): 'params': { # rtmp download 'skip_download': True, - } + }, }, { 'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/', @@ -148,7 +148,7 @@ class BBCCoUkIE(InfoExtractor): 'params': { # rtmp download 'skip_download': True, - } + }, }, { 'url': 'http://www.bbc.co.uk/music/clips/p025c0zz', 'note': 'Video', @@ -162,7 +162,7 @@ class BBCCoUkIE(InfoExtractor): 'params': { # rtmp download 'skip_download': True, - } + }, }, { 'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls', 'info_dict': { @@ -268,19 +268,19 @@ class BBCCoUkIE(InfoExtractor): error = clean_html(get_element_by_class('form-message', response)) if error: raise ExtractorError( - 'Unable to login: %s' % error, expected=True) + f'Unable to login: {error}', expected=True) raise ExtractorError('Unable to log in') class MediaSelectionError(Exception): - def __init__(self, id): - self.id = id + def __init__(self, error_id): + self.id = error_id def _extract_asx_playlist(self, connection, programme_id): asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist') return [ref.get('href') for ref in asx.findall('./Entry/ref')] def _extract_items(self, playlist): - return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS) + return playlist.findall(f'./{{{self._EMP_PLAYLIST_NS}}}item') def _extract_medias(self, media_selection): error = media_selection.get('result') @@ -312,7 +312,7 @@ class BBCCoUkIE(InfoExtractor): def _raise_extractor_error(self, media_selection_error): raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, media_selection_error.id), + f'{self.IE_NAME} returned error: {media_selection_error.id}', expected=True) def _download_media_selector(self, programme_id): @@ -372,7 +372,7 @@ class BBCCoUkIE(InfoExtractor): for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)): formats.append({ 'url': ref, - 'format_id': 'ref%s_%s' % (i, format_id), + 'format_id': f'ref{i}_{format_id}', }) elif transfer_format == 'dash': formats.extend(self._extract_mpd_formats( @@ -394,7 +394,7 @@ class BBCCoUkIE(InfoExtractor): href, programme_id, f4m_id=format_id, fatal=False)) else: if not supplier and bitrate: - format_id += '-%d' % bitrate + format_id += f'-{bitrate}' fmt = { 'format_id': format_id, 'filesize': file_size, @@ -423,9 +423,9 @@ class BBCCoUkIE(InfoExtractor): identifier = connection.get('identifier') server = connection.get('server') fmt.update({ - 'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string), + 'url': f'{protocol}://{server}/{application}?{auth_string}', 'play_path': identifier, - 'app': '%s?%s' % (application, auth_string), + 'app': f'{application}?{auth_string}', 'page_url': 'http://www.bbc.co.uk', 'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf', 'rtmp_live': False, @@ -441,7 +441,7 @@ class BBCCoUkIE(InfoExtractor): def _download_playlist(self, playlist_id): try: playlist = self._download_json( - 'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id, + f'http://www.bbc.co.uk/programmes/{playlist_id}/playlist.json', playlist_id, 'Downloading playlist JSON') formats = [] subtitles = {} @@ -480,32 +480,32 @@ class BBCCoUkIE(InfoExtractor): def _process_legacy_playlist(self, playlist_id): return self._process_legacy_playlist_url( - 'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id, playlist_id) + f'http://www.bbc.co.uk/iplayer/playlist/{playlist_id}', playlist_id) def _download_legacy_playlist_url(self, url, playlist_id=None): return self._download_xml( url, playlist_id, 'Downloading legacy playlist XML') def _extract_from_legacy_playlist(self, playlist, playlist_id): - no_items = playlist.find('./{%s}noItems' % self._EMP_PLAYLIST_NS) + no_items = playlist.find(f'./{{{self._EMP_PLAYLIST_NS}}}noItems') if no_items is not None: reason = no_items.get('reason') if reason == 'preAvailability': - msg = 'Episode %s is not yet available' % playlist_id + msg = f'Episode {playlist_id} is not yet available' elif reason == 'postAvailability': - msg = 'Episode %s is no longer available' % playlist_id + msg = f'Episode {playlist_id} is no longer available' elif reason == 'noMedia': - msg = 'Episode %s is not currently available' % playlist_id + msg = f'Episode {playlist_id} is not currently available' else: - msg = 'Episode %s is not available: %s' % (playlist_id, reason) + msg = f'Episode {playlist_id} is not available: {reason}' raise ExtractorError(msg, expected=True) for item in self._extract_items(playlist): kind = item.get('kind') if kind not in ('programme', 'radioProgramme'): continue - title = playlist.find('./{%s}title' % self._EMP_PLAYLIST_NS).text - description_el = playlist.find('./{%s}summary' % self._EMP_PLAYLIST_NS) + title = playlist.find(f'./{{{self._EMP_PLAYLIST_NS}}}title').text + description_el = playlist.find(f'./{{{self._EMP_PLAYLIST_NS}}}summary') description = description_el.text if description_el is not None else None def get_programme_id(item): @@ -515,7 +515,7 @@ class BBCCoUkIE(InfoExtractor): if value and re.match(r'^[pb][\da-z]{7}$', value): return value get_from_attributes(item) - mediator = item.find('./{%s}mediator' % self._EMP_PLAYLIST_NS) + mediator = item.find(f'./{{{self._EMP_PLAYLIST_NS}}}mediator') if mediator is not None: return get_from_attributes(mediator) @@ -555,7 +555,7 @@ class BBCCoUkIE(InfoExtractor): if not programme_id: programme_id = self._search_regex( - r'"vpid"\s*:\s*"(%s)"' % self._ID_REGEX, webpage, 'vpid', fatal=False, default=None) + rf'"vpid"\s*:\s*"({self._ID_REGEX})"', webpage, 'vpid', fatal=False, default=None) if programme_id: formats, subtitles = self._download_media_selector(programme_id) @@ -641,7 +641,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE }, 'params': { 'skip_download': True, - } + }, }, { # article with single video embedded with data-playable containing XML playlist # with direct video links as progressiveDownloadUrl (for now these are extracted) @@ -884,7 +884,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE 'uploader_id': 'bbc_world_service', 'series': 'CrowdScience', 'chapters': [], - } + }, }, { # onion routes 'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576', 'only_matching': True, @@ -897,7 +897,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE def suitable(cls, url): EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerEpisodesIE, BBCCoUkIPlayerGroupIE, BBCCoUkPlaylistIE) return (False if any(ie.suitable(url) for ie in EXCLUDE_IE) - else super(BBCIE, cls).suitable(url)) + else super().suitable(url)) def _extract_from_media_meta(self, media_meta, video_id): # Direct links to media in media metadata (e.g. @@ -1009,7 +1009,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE if playlist: entry = None for key in ('streaming', 'progressiveDownload'): - playlist_url = playlist.get('%sUrl' % key) + playlist_url = playlist.get(f'{key}Url') if not playlist_url: continue try: @@ -1035,7 +1035,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE # http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227 group_id = self._search_regex( - r']+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX, + rf']+\bclass=["\']video["\'][^>]+\bdata-pid=["\']({self._ID_REGEX})', webpage, 'group id', default=None) if group_id: return self.url_result( @@ -1043,9 +1043,9 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE # single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret) programme_id = self._search_regex( - [r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX, - r']+name="externalIdentifier"[^>]+value="(%s)"' % self._ID_REGEX, - r'videoId\s*:\s*["\'](%s)["\']' % self._ID_REGEX], + [rf'data-(?:video-player|media)-vpid="({self._ID_REGEX})"', + rf']+name="externalIdentifier"[^>]+value="({self._ID_REGEX})"', + rf'videoId\s*:\s*["\']({self._ID_REGEX})["\']'], webpage, 'vpid', default=None) if programme_id: @@ -1142,7 +1142,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE video_id, url_transparent=True) entry.update({ 'timestamp': traverse_obj(morph_payload, ( - 'body', 'content', 'article', 'dateTimeInfo', 'dateTime', {parse_iso8601}) + 'body', 'content', 'article', 'dateTimeInfo', 'dateTime', {parse_iso8601}), ), **traverse_obj(video_data, { 'thumbnail': (('iChefImage', 'image'), {url_or_none}, any), @@ -1189,7 +1189,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE 'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}), 'start_time': ('offset', 'start', {float_or_none}), 'end_time': ('offset', 'end', {float_or_none}), - }) + }), ), } @@ -1287,7 +1287,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE 'description': ('synopses', ('long', 'medium', 'short'), {str}, {lambda x: x or None}, any), 'duration': ('versions', 0, 'duration', {int}), 'timestamp': ('versions', 0, 'availableFrom', {functools.partial(int_or_none, scale=1000)}), - }) + }), } def is_type(*types): @@ -1331,7 +1331,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE if blocks: summary = [] for block in blocks: - text = try_get(block, lambda x: x['model']['text'], compat_str) + text = try_get(block, lambda x: x['model']['text'], str) if text: summary.append(text) if summary: @@ -1411,9 +1411,9 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE entries, playlist_id, playlist_title, playlist_description) def extract_all(pattern): - return list(filter(None, map( - lambda s: self._parse_json(s, playlist_id, fatal=False), - re.findall(pattern, webpage)))) + return list(filter(None, ( + self._parse_json(s, playlist_id, fatal=False) + for s in re.findall(pattern, webpage)))) # US accessed article with single embedded video (e.g. # https://www.bbc.com/news/uk-68546268) @@ -1435,14 +1435,14 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE # Multiple video article (e.g. # http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460) - EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+%s(?:\b[^"]+)?' % self._ID_REGEX + EMBED_URL = rf'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+{self._ID_REGEX}(?:\b[^"]+)?' entries = [] for match in extract_all(r'new\s+SMP\(({.+?})\)'): embed_url = match.get('playerSettings', {}).get('externalEmbedUrl') if embed_url and re.match(EMBED_URL, embed_url): entries.append(embed_url) entries.extend(re.findall( - r'setPlaylist\("(%s)"\)' % EMBED_URL, webpage)) + rf'setPlaylist\("({EMBED_URL})"\)', webpage)) if entries: return self.playlist_result( [self.url_result(entry_, 'BBCCoUk') for entry_ in entries], @@ -1492,11 +1492,11 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE video_id = media_meta.get('externalId') if not video_id: - video_id = playlist_id if len(medias) == 1 else '%s-%s' % (playlist_id, num) + video_id = playlist_id if len(medias) == 1 else f'{playlist_id}-{num}' title = media_meta.get('caption') if not title: - title = playlist_title if len(medias) == 1 else '%s - Video %s' % (playlist_title, num) + title = playlist_title if len(medias) == 1 else f'{playlist_title} - Video {num}' duration = int_or_none(media_meta.get('durationInSeconds')) or parse_duration(media_meta.get('duration')) @@ -1557,8 +1557,8 @@ class BBCCoUkArticleIE(InfoExtractor): class BBCCoUkPlaylistBaseIE(InfoExtractor): def _entries(self, webpage, url, playlist_id): - single_page = 'page' in compat_urlparse.parse_qs( - compat_urlparse.urlparse(url).query) + single_page = 'page' in urllib.parse.parse_qs( + urllib.parse.urlparse(url).query) for page_num in itertools.count(2): for video_id in re.findall( self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage): @@ -1572,8 +1572,8 @@ class BBCCoUkPlaylistBaseIE(InfoExtractor): if not next_page: break webpage = self._download_webpage( - compat_urlparse.urljoin(url, next_page), playlist_id, - 'Downloading page %d' % page_num, page_num) + urllib.parse.urljoin(url, next_page), playlist_id, + f'Downloading page {page_num}', page_num) def _real_extract(self, url): playlist_id = self._match_id(url) @@ -1588,7 +1588,7 @@ class BBCCoUkPlaylistBaseIE(InfoExtractor): class BBCCoUkIPlayerPlaylistBaseIE(InfoExtractor): - _VALID_URL_TMPL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/%%s/(?P%s)' % BBCCoUkIE._ID_REGEX + _VALID_URL_TMPL = rf'https?://(?:www\.)?bbc\.co\.uk/iplayer/%s/(?P{BBCCoUkIE._ID_REGEX})' @staticmethod def _get_default(episode, key, default_key='default'): @@ -1712,11 +1712,11 @@ class BBCCoUkIPlayerEpisodesIE(BBCCoUkIPlayerPlaylistBaseIE): variables['sliceId'] = series_id return self._download_json( 'https://graph.ibl.api.bbc.co.uk/', pid, headers={ - 'Content-Type': 'application/json' + 'Content-Type': 'application/json', }, data=json.dumps({ 'id': '5692d93d5aac8d796a0305e895e61551', 'variables': variables, - }).encode('utf-8'))['data']['programme'] + }).encode())['data']['programme'] @staticmethod def _get_playlist_data(data): @@ -1776,7 +1776,7 @@ class BBCCoUkIPlayerGroupIE(BBCCoUkIPlayerPlaylistBaseIE): def _call_api(self, pid, per_page, page=1, series_id=None): return self._download_json( - 'http://ibl.api.bbc.co.uk/ibl/v1/groups/%s/episodes' % pid, + f'http://ibl.api.bbc.co.uk/ibl/v1/groups/{pid}/episodes', pid, query={ 'page': page, 'per_page': per_page, @@ -1792,7 +1792,7 @@ class BBCCoUkIPlayerGroupIE(BBCCoUkIPlayerPlaylistBaseIE): class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE): IE_NAME = 'bbc.co.uk:playlist' - _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P%s)/(?:episodes|broadcasts|clips)' % BBCCoUkIE._ID_REGEX + _VALID_URL = rf'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P{BBCCoUkIE._ID_REGEX})/(?:episodes|broadcasts|clips)' _URL_TEMPLATE = 'http://www.bbc.co.uk/programmes/%s' _VIDEO_ID_TEMPLATE = r'data-pid=["\'](%s)' _TESTS = [{ diff --git a/yt_dlp/extractor/beatport.py b/yt_dlp/extractor/beatport.py index 0aecbd089..acc8d1259 100644 --- a/yt_dlp/extractor/beatport.py +++ b/yt_dlp/extractor/beatport.py @@ -1,7 +1,6 @@ import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import int_or_none @@ -33,7 +32,7 @@ class BeatportIE(InfoExtractor): 'display_id': 'birds-original-mix', 'ext': 'mp4', 'title': "Tos, Middle Milk, Mumblin' Johnsson - Birds (Original Mix)", - } + }, }] def _real_extract(self, url): @@ -51,7 +50,7 @@ class BeatportIE(InfoExtractor): track = next(t for t in playables['tracks'] if t['id'] == int(track_id)) - title = ', '.join((a['name'] for a in track['artists'])) + ' - ' + track['name'] + title = ', '.join(a['name'] for a in track['artists']) + ' - ' + track['name'] if track['mix']: title += ' (' + track['mix'] + ')' @@ -89,7 +88,7 @@ class BeatportIE(InfoExtractor): images.append(image) return { - 'id': compat_str(track.get('id')) or track_id, + 'id': str(track.get('id')) or track_id, 'display_id': track.get('slug') or display_id, 'title': title, 'formats': formats, diff --git a/yt_dlp/extractor/beeg.py b/yt_dlp/extractor/beeg.py index da98ac314..960cdfabd 100644 --- a/yt_dlp/extractor/beeg.py +++ b/yt_dlp/extractor/beeg.py @@ -23,7 +23,7 @@ class BeegIE(InfoExtractor): 'upload_date': '20220131', 'timestamp': 1643656455, 'display_id': '2540839', - } + }, }, { 'url': 'https://beeg.com/-0599050563103750?t=4-861', 'md5': 'bd8b5ea75134f7f07fad63008db2060e', @@ -38,7 +38,7 @@ class BeegIE(InfoExtractor): 'timestamp': 1643623200, 'display_id': '2569965', 'upload_date': '20220131', - } + }, }, { # api/v6 v2 'url': 'https://beeg.com/1941093077?t=911-1391', @@ -55,8 +55,8 @@ class BeegIE(InfoExtractor): webpage = self._download_webpage(url, video_id) video = self._download_json( - 'https://store.externulls.com/facts/file/%s' % video_id, - video_id, 'Downloading JSON for %s' % video_id) + f'https://store.externulls.com/facts/file/{video_id}', + video_id, f'Downloading JSON for {video_id}') fc_facts = video.get('fc_facts') first_fact = {} diff --git a/yt_dlp/extractor/behindkink.py b/yt_dlp/extractor/behindkink.py index 9d2324f4f..45f45d03b 100644 --- a/yt_dlp/extractor/behindkink.py +++ b/yt_dlp/extractor/behindkink.py @@ -16,7 +16,7 @@ class BehindKinkIE(InfoExtractor): 'upload_date': '20141205', 'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/12/blaze-1.jpg', 'age_limit': 18, - } + }, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/bellmedia.py b/yt_dlp/extractor/bellmedia.py index 677680b42..ac45dd477 100644 --- a/yt_dlp/extractor/bellmedia.py +++ b/yt_dlp/extractor/bellmedia.py @@ -86,6 +86,6 @@ class BellMediaIE(InfoExtractor): return { '_type': 'url_transparent', 'id': video_id, - 'url': '9c9media:%s_web:%s' % (self._DOMAINS.get(domain, domain), video_id), + 'url': f'9c9media:{self._DOMAINS.get(domain, domain)}_web:{video_id}', 'ie_key': 'NineCNineMedia', } diff --git a/yt_dlp/extractor/berufetv.py b/yt_dlp/extractor/berufetv.py index 8160cbd9a..5bba33a44 100644 --- a/yt_dlp/extractor/berufetv.py +++ b/yt_dlp/extractor/berufetv.py @@ -16,7 +16,7 @@ class BerufeTVIE(InfoExtractor): 'tags': ['Studienfilm'], 'duration': 602.440, 'thumbnail': r're:^https://asset-out-cdn\.video-cdn\.net/private/videos/DvKC3DUpMKvUZ_6fEnfg3u/thumbnails/793063\?quality=thumbnail&__token__=[^\s]+$', - } + }, }] def _real_extract(self, url): @@ -54,7 +54,7 @@ class BerufeTVIE(InfoExtractor): subtitles.setdefault(track['language'], []).append({ 'url': track['source'], 'name': track.get('label'), - 'ext': 'vtt' + 'ext': 'vtt', }) return { diff --git a/yt_dlp/extractor/bet.py b/yt_dlp/extractor/bet.py index cbf3dd082..3a8e74309 100644 --- a/yt_dlp/extractor/bet.py +++ b/yt_dlp/extractor/bet.py @@ -19,7 +19,7 @@ class BetIE(MTVServicesInfoExtractor): 'thumbnail': r're:(?i)^https?://.*\.jpg$', 'subtitles': { 'en': 'mincount:2', - } + }, }, 'params': { # rtmp download @@ -39,16 +39,16 @@ class BetIE(MTVServicesInfoExtractor): 'thumbnail': r're:(?i)^https?://.*\.jpg$', 'subtitles': { 'en': 'mincount:2', - } + }, }, 'params': { # rtmp download 'skip_download': True, }, - } + }, ] - _FEED_URL = "http://feeds.mtvnservices.com/od/feed/bet-mrss-player" + _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/bet-mrss-player' def _get_feed_query(self, uri): return { diff --git a/yt_dlp/extractor/bfmtv.py b/yt_dlp/extractor/bfmtv.py index c4621ca82..87f011783 100644 --- a/yt_dlp/extractor/bfmtv.py +++ b/yt_dlp/extractor/bfmtv.py @@ -98,8 +98,8 @@ class BFMTVArticleIE(BFMTVBaseIE): 'timestamp': 1673341692, 'duration': 109.269, 'tags': ['rmc', 'show', 'apolline de malherbe', 'info', 'talk', 'matinale', 'radio'], - 'thumbnail': 'https://cf-images.eu-west-1.prod.boltdns.net/v1/static/876630703001/5bef74b8-9d5e-4480-a21f-60c2e2480c46/96c88b74-f9db-45e1-8040-e199c5da216c/1920x1080/match/image.jpg' - } + 'thumbnail': 'https://cf-images.eu-west-1.prod.boltdns.net/v1/static/876630703001/5bef74b8-9d5e-4480-a21f-60c2e2480c46/96c88b74-f9db-45e1-8040-e199c5da216c/1920x1080/match/image.jpg', + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/bigflix.py b/yt_dlp/extractor/bigflix.py index 02d1ba0e3..9c55bb968 100644 --- a/yt_dlp/extractor/bigflix.py +++ b/yt_dlp/extractor/bigflix.py @@ -1,10 +1,8 @@ +import base64 import re +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_b64decode, - compat_urllib_parse_unquote, -) class BigflixIE(InfoExtractor): @@ -21,7 +19,7 @@ class BigflixIE(InfoExtractor): }, 'params': { 'skip_download': True, - } + }, }, { # multiple formats 'url': 'http://www.bigflix.com/Malayalam-movies/Drama-movies/Indian-Rupee/15967', @@ -38,7 +36,7 @@ class BigflixIE(InfoExtractor): webpage, 'title') def decode_url(quoted_b64_url): - return compat_b64decode(compat_urllib_parse_unquote( + return base64.b64decode(urllib.parse.unquote( quoted_b64_url)).decode('utf-8') formats = [] @@ -47,7 +45,7 @@ class BigflixIE(InfoExtractor): video_url = decode_url(encoded_url) f = { 'url': video_url, - 'format_id': '%sp' % height, + 'format_id': f'{height}p', 'height': int(height), } if video_url.startswith('rtmp'): @@ -69,5 +67,5 @@ class BigflixIE(InfoExtractor): 'id': video_id, 'title': title, 'description': description, - 'formats': formats + 'formats': formats, } diff --git a/yt_dlp/extractor/bigo.py b/yt_dlp/extractor/bigo.py index acf78e49a..b1c230f35 100644 --- a/yt_dlp/extractor/bigo.py +++ b/yt_dlp/extractor/bigo.py @@ -36,7 +36,7 @@ class BigoIE(InfoExtractor): raise ExtractorError('Received invalid JSON data') if info_raw.get('code'): raise ExtractorError( - 'Bigo says: %s (code %s)' % (info_raw.get('msg'), info_raw.get('code')), expected=True) + 'Bigo says: {} (code {})'.format(info_raw.get('msg'), info_raw.get('code')), expected=True) info = info_raw.get('data') or {} if not info.get('alive'): diff --git a/yt_dlp/extractor/bild.py b/yt_dlp/extractor/bild.py index eb289329d..2ba63700c 100644 --- a/yt_dlp/extractor/bild.py +++ b/yt_dlp/extractor/bild.py @@ -20,7 +20,7 @@ class BildIE(InfoExtractor): 'description': 'md5:a4058c4fa2a804ab59c00d7244bbf62f', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 196, - } + }, }, { 'note': 'static MP4 and HLS', 'url': 'https://www.bild.de/video/clip/news-ausland/deftiger-abgang-vom-10m-turm-bademeister-sorgt-fuer-skandal-85158620.bild.html', @@ -32,7 +32,7 @@ class BildIE(InfoExtractor): 'description': 'md5:709b543c24dc31bbbffee73bccda34ad', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 69, - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index b38c90b1d..411b48c28 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -112,7 +112,7 @@ class BilibiliBaseIE(InfoExtractor): 'danmaku': [{ 'ext': 'xml', 'url': f'https://comment.bilibili.com/{cid}.xml', - }] + }], } subtitle_info = traverse_obj(self._download_json( @@ -126,7 +126,7 @@ class BilibiliBaseIE(InfoExtractor): for s in subs_list: subtitles.setdefault(s['lan'], []).append({ 'ext': 'srt', - 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)) + 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)), }) return subtitles @@ -215,7 +215,7 @@ class BilibiliBaseIE(InfoExtractor): yield { **metainfo, 'id': f'{video_id}_{cid}', - 'title': f'{metainfo.get("title")} - {list(edges.values())[0].get("title")}', + 'title': f'{metainfo.get("title")} - {next(iter(edges.values())).get("title")}', 'formats': self.extract_formats(play_info), 'description': f'{json.dumps(edges, ensure_ascii=False)}\n{metainfo.get("description", "")}', 'duration': float_or_none(play_info.get('timelength'), scale=1000), @@ -269,7 +269,7 @@ class BiliBiliIE(BilibiliBaseIE): 'url': 'https://www.bilibili.com/video/BV1bK411W797', 'info_dict': { 'id': 'BV1bK411W797', - 'title': '物语中的人物是如何吐槽自己的OP的' + 'title': '物语中的人物是如何吐槽自己的OP的', }, 'playlist_count': 18, 'playlist': [{ @@ -288,8 +288,8 @@ class BiliBiliIE(BilibiliBaseIE): 'view_count': int, 'description': 'md5:e3c401cf7bc363118d1783dd74068a68', 'duration': 90.314, - } - }] + }, + }], }, { 'note': 'Specific page of Anthology', 'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1', @@ -308,7 +308,7 @@ class BiliBiliIE(BilibiliBaseIE): 'view_count': int, 'description': 'md5:e3c401cf7bc363118d1783dd74068a68', 'duration': 90.314, - } + }, }, { 'note': 'video has subtitles', 'url': 'https://www.bilibili.com/video/BV12N4y1M7rh', @@ -327,7 +327,7 @@ class BiliBiliIE(BilibiliBaseIE): 'view_count': int, 'like_count': int, 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', - 'subtitles': 'count:2' + 'subtitles': 'count:2', }, 'params': {'listsubtitles': True}, }, { @@ -586,10 +586,9 @@ class BiliBiliIE(BilibiliBaseIE): is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate')) if is_interactive: return self.playlist_result( - self._get_interactive_entries(video_id, cid, metainfo), **metainfo, **{ - 'duration': traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})), - '__post_extractor': self.extract_comments(aid), - }) + self._get_interactive_entries(video_id, cid, metainfo), **metainfo, + duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})), + __post_extractor=self.extract_comments(aid)) else: return { **metainfo, @@ -640,7 +639,7 @@ class BiliBiliBangumiIE(BilibiliBaseIE): 'duration': 1425.256, 'timestamp': 1554566400, 'upload_date': '20190406', - 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$' + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, 'skip': 'Geo-restricted', }, { @@ -661,7 +660,7 @@ class BiliBiliBangumiIE(BilibiliBaseIE): 'duration': 1922.129, 'timestamp': 1602853860, 'upload_date': '20201016', - 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$' + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, }] @@ -764,7 +763,7 @@ class BiliBiliBangumiMediaIE(BilibiliBaseIE): 'duration': 1525.777, 'timestamp': 1425074413, 'upload_date': '20150227', - 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$' + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, }], }] @@ -794,7 +793,7 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE): 'title': '鬼灭之刃', 'description': 'md5:e2cc9848b6f69be6db79fc2a82d9661b', }, - 'playlist_mincount': 26 + 'playlist_mincount': 26, }, { 'url': 'https://www.bilibili.com/bangumi/play/ss2251', 'info_dict': { @@ -819,7 +818,7 @@ class BiliBiliBangumiSeasonIE(BilibiliBaseIE): 'duration': 1436.992, 'timestamp': 1343185080, 'upload_date': '20120725', - 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$' + 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', }, }], }] @@ -906,7 +905,7 @@ class BilibiliCheeseIE(BilibiliCheeseBaseIE): 'upload_date': '20230924', 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', 'view_count': int, - } + }, }] def _real_extract(self, url): @@ -939,7 +938,7 @@ class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE): 'upload_date': '20230924', 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', 'view_count': int, - } + }, }], 'params': {'playlist_items': '1'}, }, { @@ -1012,7 +1011,7 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE): for position in ( 46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, - 57, 62, 11, 36, 20, 34, 44, 52 + 57, 62, 11, 36, 20, 34, 44, 52, ): char_at_position = try_call(lambda: session_key[position]) if char_at_position: @@ -1163,7 +1162,7 @@ class BilibiliCollectionListIE(BilibiliSpaceListBaseIE): 'uploader_id': ('meta', 'mid', {str_or_none}), 'timestamp': ('meta', 'ptime', {int_or_none}), 'thumbnail': ('meta', 'cover', {url_or_none}), - }) + }), } def get_entries(page_data): @@ -1195,7 +1194,7 @@ class BilibiliSeriesListIE(BilibiliSpaceListBaseIE): mid, sid = self._match_valid_url(url).group('mid', 'sid') playlist_id = f'{mid}_{sid}' playlist_meta = traverse_obj(self._download_json( - f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False + f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False, ), { 'title': ('data', 'meta', 'name', {str}), 'description': ('data', 'meta', 'description', {str}), @@ -1217,7 +1216,7 @@ class BilibiliSeriesListIE(BilibiliSpaceListBaseIE): 'page_count': math.ceil(entry_count / page_size), 'page_size': page_size, 'uploader': self._get_uploader(mid, playlist_id), - **playlist_meta + **playlist_meta, } def get_entries(page_data): @@ -1241,7 +1240,7 @@ class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE): 'upload_date': '20201109', 'modified_timestamp': int, 'modified_date': str, - 'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg", + 'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg', 'view_count': int, 'like_count': int, }, @@ -1345,7 +1344,7 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE): 'uploader_id': '84912', 'timestamp': 1604905176, 'upload_date': '20201109', - 'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg", + 'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg', }, 'playlist_mincount': 22, }, { @@ -1371,7 +1370,7 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE): for page_num in itertools.count(1): page_data = self._download_json( 'https://api.bilibili.com/x/v2/medialist/resource/list', - list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}' + list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}', )['data'] yield from self._get_entries(page_data, 'media_list', ending_key='bv_id') query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id')) @@ -1407,7 +1406,7 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE): 'tid': ('tid', {int_or_none}), 'sort_field': ('sortFiled', {int_or_none}), 'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}), - }) + }), } metadata = { 'id': f'{query["type"]}_{query["biz_id"]}', @@ -1430,26 +1429,26 @@ class BilibiliCategoryIE(InfoExtractor): 'url': 'https://www.bilibili.com/v/kichiku/mad', 'info_dict': { 'id': 'kichiku: mad', - 'title': 'kichiku: mad' + 'title': 'kichiku: mad', }, 'playlist_mincount': 45, 'params': { - 'playlistend': 45 - } + 'playlistend': 45, + }, }] def _fetch_page(self, api_url, num_pages, query, page_num): parsed_json = self._download_json( api_url, query, query={'Search_key': query, 'pn': page_num}, - note='Extracting results from page %s of %s' % (page_num, num_pages)) + note=f'Extracting results from page {page_num} of {num_pages}') video_list = traverse_obj(parsed_json, ('data', 'archives'), expected_type=list) if not video_list: - raise ExtractorError('Failed to retrieve video list for page %d' % page_num) + raise ExtractorError(f'Failed to retrieve video list for page {page_num}') for video in video_list: yield self.url_result( - 'https://www.bilibili.com/video/%s' % video['bvid'], 'BiliBili', video['bvid']) + 'https://www.bilibili.com/video/{}'.format(video['bvid']), 'BiliBili', video['bvid']) def _entries(self, category, subcategory, query): # map of categories : subcategories : RIDs @@ -1459,7 +1458,7 @@ class BilibiliCategoryIE(InfoExtractor): 'manual_vocaloid': 126, 'guide': 22, 'theatre': 216, - 'course': 127 + 'course': 127, }, } @@ -1485,7 +1484,7 @@ class BilibiliCategoryIE(InfoExtractor): def _real_extract(self, url): category, subcategory = urllib.parse.urlparse(url).path.split('/')[2:4] - query = '%s: %s' % (category, subcategory) + query = f'{category}: {subcategory}' return self.playlist_result(self._entries(category, subcategory, query), query, query) @@ -1588,7 +1587,7 @@ class BilibiliAudioIE(BilibiliAudioBaseIE): formats = [{ 'url': play_data['cdns'][0], 'filesize': int_or_none(play_data.get('size')), - 'vcodec': 'none' + 'vcodec': 'none', }] for a_format in formats: @@ -1606,7 +1605,7 @@ class BilibiliAudioIE(BilibiliAudioBaseIE): subtitles = { 'origin': [{ 'url': lyric, - }] + }], } return { @@ -1674,7 +1673,7 @@ class BiliBiliPlayerIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) return self.url_result( - 'http://www.bilibili.tv/video/av%s/' % video_id, + f'http://www.bilibili.tv/video/av{video_id}/', ie=BiliBiliIE.ie_key(), video_id=video_id) @@ -1702,11 +1701,10 @@ class BiliIntlBaseIE(InfoExtractor): return json.get('data') def json2srt(self, json): - data = '\n\n'.join( + return '\n\n'.join( f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}' for i, line in enumerate(traverse_obj(json, ( 'body', lambda _, l: l['content'] and l['from'] and l['to'])))) - return data def _get_subtitles(self, *, ep_id=None, aid=None): sub_json = self._call_api( @@ -1808,14 +1806,14 @@ class BiliIntlBaseIE(InfoExtractor): note='Downloading login key', errnote='Unable to download login key')['data'] public_key = Cryptodome.RSA.importKey(key_data['key']) - password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8')) + password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode()) login_post = self._download_json( 'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({ 'username': username, 'password': base64.b64encode(password_hash).decode('ascii'), 'keep_me': 'true', 's_locale': 'en_US', - 'isTrusted': 'true' + 'isTrusted': 'true', }), note='Logging in', errnote='Unable to log in') if login_post.get('code'): if login_post.get('message'): @@ -1842,17 +1840,17 @@ class BiliIntlIE(BiliIntlBaseIE): 'chapters': [{ 'start_time': 0, 'end_time': 76.242, - 'title': '' + 'title': '', }, { 'start_time': 76.242, 'end_time': 161.161, - 'title': 'Intro' + 'title': 'Intro', }, { 'start_time': 1325.742, 'end_time': 1403.903, - 'title': 'Outro' + 'title': 'Outro', }], - } + }, }, { # Non-Bstation page 'url': 'https://www.bilibili.tv/en/play/1033760/11005006', @@ -1869,17 +1867,17 @@ class BiliIntlIE(BiliIntlBaseIE): 'chapters': [{ 'start_time': 0, 'end_time': 88.0, - 'title': '' + 'title': '', }, { 'start_time': 88.0, 'end_time': 156.0, - 'title': 'Intro' + 'title': 'Intro', }, { 'start_time': 1173.0, 'end_time': 1259.535, - 'title': 'Outro' + 'title': 'Outro', }], - } + }, }, { # Subtitle with empty content 'url': 'https://www.bilibili.tv/en/play/1005144/10131790', @@ -1890,7 +1888,7 @@ class BiliIntlIE(BiliIntlBaseIE): 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$', 'episode_number': 140, }, - 'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.' + 'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.', }, { # episode comment extraction 'url': 'https://www.bilibili.tv/en/play/34580/340317', @@ -1908,20 +1906,20 @@ class BiliIntlIE(BiliIntlBaseIE): 'chapters': [{ 'start_time': 0, 'end_time': 61.0, - 'title': '' + 'title': '', }, { 'start_time': 61.0, 'end_time': 134.0, - 'title': 'Intro' + 'title': 'Intro', }, { 'start_time': 1290.0, 'end_time': 1379.0, - 'title': 'Outro' + 'title': 'Outro', }], }, 'params': { - 'getcomments': True - } + 'getcomments': True, + }, }, { # user generated content comment extraction 'url': 'https://www.bilibili.tv/en/video/2045730385', @@ -1936,8 +1934,8 @@ class BiliIntlIE(BiliIntlBaseIE): 'thumbnail': r're:https://pic\.bstarstatic\.(?:com|net)/ugc/f6c363659efd2eabe5683fbb906b1582\.jpg', }, 'params': { - 'getcomments': True - } + 'getcomments': True, + }, }, { # episode id without intro and outro 'url': 'https://www.bilibili.tv/en/play/1048837/11246489', @@ -1992,7 +1990,7 @@ class BiliIntlIE(BiliIntlBaseIE): # Non-Bstation layout, read through episode list season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id) video_data = traverse_obj(season_json, ( - 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id + 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id, ), expected_type=dict, get_all=False) # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found @@ -2024,7 +2022,7 @@ class BiliIntlIE(BiliIntlBaseIE): 'id': replies.get('rpid'), 'like_count': int_or_none(replies.get('like_count')), 'parent': replies.get('parent'), - 'timestamp': unified_timestamp(replies.get('ctime_text')) + 'timestamp': unified_timestamp(replies.get('ctime_text')), } if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')): @@ -2077,11 +2075,11 @@ class BiliIntlIE(BiliIntlBaseIE): chapters = [{ 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_start_time')), 1000), 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_end_time')), 1000), - 'title': 'Intro' + 'title': 'Intro', }, { 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_start_time')), 1000), 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_end_time')), 1000), - 'title': 'Outro' + 'title': 'Outro', }] return { @@ -2137,7 +2135,7 @@ class BiliIntlSeriesIE(BiliIntlBaseIE): episode_id = str(episode['episode_id']) yield self.url_result(smuggle_url( BiliIntlIE._make_url(episode_id, series_id), - self._parse_video_metadata(episode) + self._parse_video_metadata(episode), ), BiliIntlIE, episode_id) def _real_extract(self, url): @@ -2156,19 +2154,19 @@ class BiliLiveIE(InfoExtractor): 'url': 'https://live.bilibili.com/196', 'info_dict': { 'id': '33989', - 'description': "周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)", + 'description': '周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)', 'ext': 'flv', - 'title': "太空狼人杀联动,不被爆杀就算赢", - 'thumbnail': "https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg", + 'title': '太空狼人杀联动,不被爆杀就算赢', + 'thumbnail': 'https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg', 'timestamp': 1650802769, }, - 'skip': 'not live' + 'skip': 'not live', }, { 'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://live.bilibili.com/blanc/196', - 'only_matching': True + 'only_matching': True, }] _FORMATS = { @@ -2209,7 +2207,7 @@ class BiliLiveIE(InfoExtractor): raise ExtractorError('Streamer is not live', expected=True) formats = [] - for qn in self._FORMATS.keys(): + for qn in self._FORMATS: stream_data = self._call_api('xlive/web-room/v2/index/getRoomPlayInfo', room_id, { 'room_id': room_id, 'qn': qn, diff --git a/yt_dlp/extractor/bitchute.py b/yt_dlp/extractor/bitchute.py index 194bf1f46..c74f34c2a 100644 --- a/yt_dlp/extractor/bitchute.py +++ b/yt_dlp/extractor/bitchute.py @@ -39,7 +39,7 @@ class BitChuteIE(InfoExtractor): 'upload_date': '20170103', 'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/', 'channel': 'BitChute', - 'channel_url': 'https://www.bitchute.com/channel/bitchute/' + 'channel_url': 'https://www.bitchute.com/channel/bitchute/', }, }, { # test case: video with different channel and uploader @@ -55,7 +55,7 @@ class BitChuteIE(InfoExtractor): 'upload_date': '20231106', 'uploader_url': 'https://www.bitchute.com/profile/9K0kUWA9zmd9/', 'channel': 'Full Measure with Sharyl Attkisson', - 'channel_url': 'https://www.bitchute.com/channel/sharylattkisson/' + 'channel_url': 'https://www.bitchute.com/channel/sharylattkisson/', }, }, { # video not downloadable in browser, but we can recover it @@ -72,7 +72,7 @@ class BitChuteIE(InfoExtractor): 'upload_date': '20181113', 'uploader_url': 'https://www.bitchute.com/profile/I5NgtHZn9vPj/', 'channel': 'BitChute', - 'channel_url': 'https://www.bitchute.com/channel/bitchute/' + 'channel_url': 'https://www.bitchute.com/channel/bitchute/', }, 'params': {'check_formats': None}, }, { @@ -115,7 +115,7 @@ class BitChuteIE(InfoExtractor): continue return { 'url': url, - 'filesize': int_or_none(response.headers.get('Content-Length')) + 'filesize': int_or_none(response.headers.get('Content-Length')), } def _raise_if_restricted(self, webpage): @@ -196,7 +196,7 @@ class BitChuteChannelIE(InfoExtractor): 'duration': 16, 'view_count': int, }, - } + }, ], 'params': { 'skip_download': True, @@ -209,7 +209,7 @@ class BitChuteChannelIE(InfoExtractor): 'id': 'wV9Imujxasw9', 'title': 'Bruce MacDonald and "The Light of Darkness"', 'description': 'md5:747724ef404eebdfc04277714f81863e', - } + }, }] _TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7' @@ -224,7 +224,7 @@ class BitChuteChannelIE(InfoExtractor): 'container': 'playlist-video', 'title': 'title', 'description': 'description', - } + }, } diff --git a/yt_dlp/extractor/blackboardcollaborate.py b/yt_dlp/extractor/blackboardcollaborate.py index 8f41c897a..535890979 100644 --- a/yt_dlp/extractor/blackboardcollaborate.py +++ b/yt_dlp/extractor/blackboardcollaborate.py @@ -47,7 +47,7 @@ class BlackboardCollaborateIE(InfoExtractor): region = mobj.group('region') video_id = mobj.group('id') info = self._download_json( - 'https://{}.bbcollab.com/collab/api/csa/recordings/{}/data'.format(region, video_id), video_id) + f'https://{region}.bbcollab.com/collab/api/csa/recordings/{video_id}/data', video_id) duration = info.get('duration') title = info['name'] upload_date = info.get('created') diff --git a/yt_dlp/extractor/bleacherreport.py b/yt_dlp/extractor/bleacherreport.py index aa3d63ee7..71b237d4b 100644 --- a/yt_dlp/extractor/bleacherreport.py +++ b/yt_dlp/extractor/bleacherreport.py @@ -44,7 +44,7 @@ class BleacherReportIE(InfoExtractor): def _real_extract(self, url): article_id = self._match_id(url) - article_data = self._download_json('http://api.bleacherreport.com/api/v1/articles/%s' % article_id, article_id)['article'] + article_data = self._download_json(f'http://api.bleacherreport.com/api/v1/articles/{article_id}', article_id)['article'] thumbnails = [] primary_photo = article_data.get('primaryPhoto') @@ -71,11 +71,11 @@ class BleacherReportIE(InfoExtractor): if video: video_type = video['type'] if video_type in ('cms.bleacherreport.com', 'vid.bleacherreport.com'): - info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id'] + info['url'] = 'http://bleacherreport.com/video_embed?id={}'.format(video['id']) elif video_type == 'youtube.com': info['url'] = video['id'] elif video_type == 'vine.co': - info['url'] = 'https://vine.co/v/%s' % video['id'] + info['url'] = 'https://vine.co/v/{}'.format(video['id']) else: info['url'] = video_type + video['id'] return info @@ -99,12 +99,12 @@ class BleacherReportCMSIE(AMPIE): }, 'expected_warnings': [ - 'Unable to download f4m manifest' - ] + 'Unable to download f4m manifest', + ], }] def _real_extract(self, url): video_id = self._match_id(url) - info = self._extract_feed_info('http://vid.bleacherreport.com/videos/%s.akamai' % video_id) + info = self._extract_feed_info(f'http://vid.bleacherreport.com/videos/{video_id}.akamai') info['id'] = video_id return info diff --git a/yt_dlp/extractor/blerp.py b/yt_dlp/extractor/blerp.py index 4631ad2e9..f4f22488e 100644 --- a/yt_dlp/extractor/blerp.py +++ b/yt_dlp/extractor/blerp.py @@ -16,7 +16,7 @@ class BlerpIE(InfoExtractor): 'uploader_id': '5fb81e51aa66ae000c395478', 'ext': 'mp3', 'tags': ['samsung', 'galaxy', 's8', 'over the horizon', '2016', 'ringtone'], - } + }, }, { 'url': 'https://blerp.com/soundbites/5bc94ef4796001000498429f', 'info_dict': { @@ -25,11 +25,11 @@ class BlerpIE(InfoExtractor): 'uploader': '179617322678353920', 'uploader_id': '5ba99cf71386730004552c42', 'ext': 'mp3', - 'tags': ['YEE', 'YEET', 'wo ha haah catchy tune yee', 'yee'] - } + 'tags': ['YEE', 'YEET', 'wo ha haah catchy tune yee', 'yee'], + }, }] - _GRAPHQL_OPERATIONNAME = "webBitePageGetBite" + _GRAPHQL_OPERATIONNAME = 'webBitePageGetBite' _GRAPHQL_QUERY = ( '''query webBitePageGetBite($_id: MongoID!) { web { @@ -141,27 +141,26 @@ class BlerpIE(InfoExtractor): 'operationName': self._GRAPHQL_OPERATIONNAME, 'query': self._GRAPHQL_QUERY, 'variables': { - '_id': audio_id - } + '_id': audio_id, + }, } headers = { - 'Content-Type': 'application/json' + 'Content-Type': 'application/json', } - json_result = self._download_json('https://api.blerp.com/graphql', - audio_id, data=json.dumps(data).encode('utf-8'), headers=headers) + json_result = self._download_json( + 'https://api.blerp.com/graphql', audio_id, + data=json.dumps(data).encode(), headers=headers) bite_json = json_result['data']['web']['biteById'] - info_dict = { + return { 'id': bite_json['_id'], 'url': bite_json['audio']['mp3']['url'], 'title': bite_json['title'], 'uploader': traverse_obj(bite_json, ('ownerObject', 'username'), expected_type=strip_or_none), 'uploader_id': traverse_obj(bite_json, ('ownerObject', '_id'), expected_type=strip_or_none), 'ext': 'mp3', - 'tags': list(filter(None, map(strip_or_none, (traverse_obj(bite_json, 'userKeywords', expected_type=list) or []))) or None) + 'tags': list(filter(None, map(strip_or_none, (traverse_obj(bite_json, 'userKeywords', expected_type=list) or []))) or None), } - - return info_dict diff --git a/yt_dlp/extractor/blogger.py b/yt_dlp/extractor/blogger.py index ef0151de6..1614b6f94 100644 --- a/yt_dlp/extractor/blogger.py +++ b/yt_dlp/extractor/blogger.py @@ -21,14 +21,14 @@ class BloggerIE(InfoExtractor): 'ext': 'mp4', 'thumbnail': r're:^https?://.*', 'duration': 76.068, - } + }, }] def _real_extract(self, url): token_id = self._match_id(url) webpage = self._download_webpage(url, token_id) data_json = self._search_regex(r'var\s+VIDEO_CONFIG\s*=\s*(\{.*)', webpage, 'JSON data') - data = self._parse_json(data_json.encode('utf-8').decode('unicode_escape'), token_id) + data = self._parse_json(data_json.encode().decode('unicode_escape'), token_id) streams = data['streams'] formats = [{ 'ext': mimetype2ext(traverse_obj(parse_qs(stream['play_url']), ('mime', 0))), diff --git a/yt_dlp/extractor/bloomberg.py b/yt_dlp/extractor/bloomberg.py index 792155e51..ec6b7a86e 100644 --- a/yt_dlp/extractor/bloomberg.py +++ b/yt_dlp/extractor/bloomberg.py @@ -55,7 +55,7 @@ class BloombergIE(InfoExtractor): title = re.sub(': Video$', '', self._og_search_title(webpage)) embed_info = self._download_json( - 'http://www.bloomberg.com/multimedia/api/embed?id=%s' % video_id, video_id) + f'http://www.bloomberg.com/multimedia/api/embed?id={video_id}', video_id) formats = [] for stream in embed_info['streams']: stream_url = stream.get('url') diff --git a/yt_dlp/extractor/bokecc.py b/yt_dlp/extractor/bokecc.py index ca326f25f..5fe937a6a 100644 --- a/yt_dlp/extractor/bokecc.py +++ b/yt_dlp/extractor/bokecc.py @@ -1,5 +1,6 @@ +import urllib.parse + from .common import InfoExtractor -from ..compat import compat_parse_qs from ..utils import ExtractorError @@ -9,20 +10,18 @@ class BokeCCBaseIE(InfoExtractor): r'<(?:script|embed)[^>]+src=(?P["\'])(?:https?:)?//p\.bokecc\.com/(?:player|flash/player\.swf)\?(?P.+?)(?P=q)', webpage, 'player params', group='query') - player_params = compat_parse_qs(player_params_str) + player_params = urllib.parse.parse_qs(player_params_str) info_xml = self._download_xml( - 'http://p.bokecc.com/servlet/playinfo?uid=%s&vid=%s&m=1' % ( + 'http://p.bokecc.com/servlet/playinfo?uid={}&vid={}&m=1'.format( player_params['siteid'][0], player_params['vid'][0]), video_id) - formats = [{ + return [{ 'format_id': format_id, 'url': quality.find('./copy').attrib['playurl'], 'quality': int(quality.attrib['value']), } for quality in info_xml.findall('./video/quality')] - return formats - class BokeCCIE(BokeCCBaseIE): _IE_DESC = 'CC视频' @@ -38,11 +37,11 @@ class BokeCCIE(BokeCCBaseIE): }] def _real_extract(self, url): - qs = compat_parse_qs(self._match_valid_url(url).group('query')) + qs = urllib.parse.parse_qs(self._match_valid_url(url).group('query')) if not qs.get('vid') or not qs.get('uid'): raise ExtractorError('Invalid URL', expected=True) - video_id = '%s_%s' % (qs['uid'][0], qs['vid'][0]) + video_id = '{}_{}'.format(qs['uid'][0], qs['vid'][0]) webpage = self._download_webpage(url, video_id) diff --git a/yt_dlp/extractor/bongacams.py b/yt_dlp/extractor/bongacams.py index bf955668d..ab85477de 100644 --- a/yt_dlp/extractor/bongacams.py +++ b/yt_dlp/extractor/bongacams.py @@ -1,5 +1,4 @@ from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( int_or_none, try_get, @@ -38,7 +37,7 @@ class BongaCamsIE(InfoExtractor): channel_id = mobj.group('id') amf = self._download_json( - 'https://%s/tools/amf.php' % host, channel_id, + f'https://{host}/tools/amf.php', channel_id, data=urlencode_postdata(( ('method', 'getRoomData'), ('args[]', channel_id), @@ -48,14 +47,14 @@ class BongaCamsIE(InfoExtractor): server_url = amf['localData']['videoServerUrl'] uploader_id = try_get( - amf, lambda x: x['performerData']['username'], compat_str) or channel_id + amf, lambda x: x['performerData']['username'], str) or channel_id uploader = try_get( - amf, lambda x: x['performerData']['displayName'], compat_str) + amf, lambda x: x['performerData']['displayName'], str) like_count = int_or_none(try_get( amf, lambda x: x['performerData']['loversCount'])) formats = self._extract_m3u8_formats( - '%s/hls/stream_%s/playlist.m3u8' % (server_url, uploader_id), + f'{server_url}/hls/stream_{uploader_id}/playlist.m3u8', channel_id, 'mp4', m3u8_id='hls', live=True) return { diff --git a/yt_dlp/extractor/bostonglobe.py b/yt_dlp/extractor/bostonglobe.py index 267586687..f5b819678 100644 --- a/yt_dlp/extractor/bostonglobe.py +++ b/yt_dlp/extractor/bostonglobe.py @@ -57,8 +57,7 @@ class BostonGlobeIE(InfoExtractor): if video_id and account_id and player_id and embed: entries.append( - 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' - % (account_id, player_id, embed, video_id)) + f'http://players.brightcove.net/{account_id}/{player_id}_{embed}/index.html?videoId={video_id}') if len(entries) == 0: return self.url_result(url, 'Generic') diff --git a/yt_dlp/extractor/box.py b/yt_dlp/extractor/box.py index 008c011cc..3547ad997 100644 --- a/yt_dlp/extractor/box.py +++ b/yt_dlp/extractor/box.py @@ -72,20 +72,20 @@ class BoxIE(InfoExtractor): 'BoxApi': 'shared_link=' + shared_link, 'X-Rep-Hints': '[dash]', # TODO: extract `hls` formats }, query={ - 'fields': 'authenticated_download_url,created_at,created_by,description,extension,is_download_available,name,representations,size' + 'fields': 'authenticated_download_url,created_at,created_by,description,extension,is_download_available,name,representations,size', }) title = f['name'] query = { 'access_token': access_token, - 'shared_link': shared_link + 'shared_link': shared_link, } formats = [] for url_tmpl in traverse_obj(f, ( 'representations', 'entries', lambda _, v: v['representation'] == 'dash', - 'content', 'url_template', {url_or_none} + 'content', 'url_template', {url_or_none}, )): manifest_url = update_url_query(url_tmpl.replace('{+asset_path}', 'manifest.mpd'), query) fmts = self._extract_mpd_formats(manifest_url, file_id) diff --git a/yt_dlp/extractor/boxcast.py b/yt_dlp/extractor/boxcast.py index da06cc3f8..efa66994a 100644 --- a/yt_dlp/extractor/boxcast.py +++ b/yt_dlp/extractor/boxcast.py @@ -21,7 +21,7 @@ class BoxCastVideoIE(InfoExtractor): 'release_date': '20221210', 'uploader_id': 're8w0v8hohhvpqtbskpe', 'uploader': 'Children\'s Health Defense', - } + }, }, { 'url': 'https://boxcast.tv/video-portal/vctwevwntun3o0ikq7af/rvyblnn0fxbfjx5nwxhl/otbpltj2kzkveo2qz3ad', 'info_dict': { @@ -30,8 +30,8 @@ class BoxCastVideoIE(InfoExtractor): 'uploader_id': 'vctwevwntun3o0ikq7af', 'uploader': 'Legacy Christian Church', 'title': 'The Quest | 1: Beginner\'s Bay | Jamie Schools', - 'thumbnail': r're:https?://uploads.boxcast.com/(?:[\w-]+/){3}.+\.jpg' - } + 'thumbnail': r're:https?://uploads.boxcast.com/(?:[\w-]+/){3}.+\.jpg', + }, }, { 'url': 'https://boxcast.tv/channel/z03fqwaeaby5lnaawox2?b=ssihlw5gvfij2by8tkev', 'info_dict': { @@ -44,7 +44,7 @@ class BoxCastVideoIE(InfoExtractor): 'uploader': 'Lighthouse Ministries International - Beltsville, Maryland', 'description': 'md5:ac23e3d01b0b0be592e8f7fe0ec3a340', 'title': 'New Year\'s Eve CROSSOVER Service at LHMI | December 31, 2022', - } + }, }] _WEBPAGE_TESTS = [{ 'url': 'https://childrenshealthdefense.eu/live-stream/', @@ -57,7 +57,7 @@ class BoxCastVideoIE(InfoExtractor): 'release_date': '20221210', 'uploader_id': 're8w0v8hohhvpqtbskpe', 'uploader': 'Children\'s Health Defense', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/br.py b/yt_dlp/extractor/br.py index 6e1c63e2b..0568e06f6 100644 --- a/yt_dlp/extractor/br.py +++ b/yt_dlp/extractor/br.py @@ -61,7 +61,7 @@ class BRIE(InfoExtractor): 'title': 'Umweltbewusster Häuslebauer', 'description': 'md5:d52dae9792d00226348c1dbb13c9bae2', 'duration': 116, - } + }, }, { 'url': 'http://www.br.de/fernsehen/br-alpha/sendungen/kant-fuer-anfaenger/kritik-der-reinen-vernunft/kant-kritik-01-metaphysik100.html', @@ -74,7 +74,7 @@ class BRIE(InfoExtractor): 'duration': 893, 'uploader': 'Eva Maria Steimle', 'upload_date': '20170208', - } + }, }, ] @@ -142,7 +142,7 @@ class BRIE(InfoExtractor): http_format_info = format_info.copy() http_format_info.update({ 'url': format_url, - 'format_id': 'http-%s' % asset_type, + 'format_id': f'http-{asset_type}', }) formats.append(http_format_info) server_prefix = xpath_text(asset, 'serverPrefix') @@ -151,7 +151,7 @@ class BRIE(InfoExtractor): rtmp_format_info.update({ 'url': server_prefix, 'play_path': xpath_text(asset, 'fileName'), - 'format_id': 'rtmp-%s' % asset_type, + 'format_id': f'rtmp-{asset_type}', }) formats.append(rtmp_format_info) return formats diff --git a/yt_dlp/extractor/brainpop.py b/yt_dlp/extractor/brainpop.py index 04b1dd80c..df10299a0 100644 --- a/yt_dlp/extractor/brainpop.py +++ b/yt_dlp/extractor/brainpop.py @@ -52,8 +52,8 @@ class BrainPOPBaseIE(InfoExtractor): '%s': {}, 'ad_%s': { 'format_note': 'Audio description', - 'source_preference': -2 - } + 'source_preference': -2, + }, } for additional_key_format, additional_key_fields in additional_key_formats.items(): for key_quality, key_index in enumerate(('high', 'low')): @@ -62,7 +62,7 @@ class BrainPOPBaseIE(InfoExtractor): formats.extend(self._assemble_formats(data[full_key_index], full_key_index, display_id, token, { 'quality': -1 - key_quality, **additional_key_fields, - **extra_fields + **extra_fields, })) return formats @@ -72,7 +72,7 @@ class BrainPOPBaseIE(InfoExtractor): data=json.dumps({'username': username, 'password': password}).encode(), headers={ 'Content-Type': 'application/json', - 'Referer': self._ORIGIN + 'Referer': self._ORIGIN, }, note='Logging in', errnote='Unable to log in', expected_status=400) status_code = int_or_none(login_res['status_code']) if status_code != 1505: @@ -131,12 +131,12 @@ class BrainPOPIE(BrainPOPBaseIE): formats, subtitles = [], {} formats.extend(self._extract_adaptive_formats(movie_feature_data, movie_feature_data.get('token', ''), display_id, '%s_v2', { 'language': movie_feature.get('language') or 'en', - 'language_preference': 10 + 'language_preference': 10, })) for lang, localized_feature in traverse_obj(movie_feature, 'localization', default={}, expected_type=dict).items(): formats.extend(self._extract_adaptive_formats(localized_feature, localized_feature.get('token', ''), display_id, '%s_v2', { 'language': lang, - 'language_preference': -10 + 'language_preference': -10, })) # TODO: Do localization fields also have subtitles? @@ -145,7 +145,7 @@ class BrainPOPIE(BrainPOPBaseIE): r'^subtitles_(?P\w+)$', name, 'subtitle metadata', default=None) if lang and url: subtitles.setdefault(lang, []).append({ - 'url': urljoin(self._CDN_URL, url) + 'url': urljoin(self._CDN_URL, url), }) return { diff --git a/yt_dlp/extractor/bravotv.py b/yt_dlp/extractor/bravotv.py index 419fe8c9c..ec72f0d88 100644 --- a/yt_dlp/extractor/bravotv.py +++ b/yt_dlp/extractor/bravotv.py @@ -185,5 +185,5 @@ class BravoTVIE(AdobePassIE): 'episode_number': ('episodeNumber', {int_or_none}), 'episode': 'episodeTitle', 'series': 'show', - })) + })), } diff --git a/yt_dlp/extractor/breitbart.py b/yt_dlp/extractor/breitbart.py index b5abb7f19..fedf4772a 100644 --- a/yt_dlp/extractor/breitbart.py +++ b/yt_dlp/extractor/breitbart.py @@ -13,7 +13,7 @@ class BreitBartIE(InfoExtractor): 'description': 'md5:bac35eb0256d1cb17f517f54c79404d5', 'thumbnail': 'https://cdn.jwplayer.com/thumbs/5cOz1yup-1920.jpg', 'age_limit': 0, - } + }, }, { 'url': 'https://www.breitbart.com/videos/v/eaiZjVOn/', 'only_matching': True, @@ -30,5 +30,5 @@ class BreitBartIE(InfoExtractor): 'description': self._og_search_description(webpage), 'thumbnail': self._og_search_thumbnail(webpage), 'age_limit': self._rta_search(webpage), - 'formats': formats + 'formats': formats, } diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py index 4190e1a09..dc0c83572 100644 --- a/yt_dlp/extractor/brightcove.py +++ b/yt_dlp/extractor/brightcove.py @@ -1,15 +1,12 @@ import base64 import re import struct +import urllib.parse import xml.etree.ElementTree from .adobepass import AdobePassIE from .common import InfoExtractor -from ..compat import ( - compat_etree_fromstring, - compat_parse_qs, - compat_urlparse, -) +from ..compat import compat_etree_fromstring from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, @@ -142,7 +139,7 @@ class BrightcoveLegacyIE(InfoExtractor): # from http://www.un.org/chinese/News/story.asp?NewsID=27724 'url': 'https://link.brightcove.com/services/player/bcpid1722935254001/?bctid=5360463607001&autoStart=false&secureConnections=true&width=650&height=350', 'only_matching': True, # Tested in GenericIE - } + }, ] _WEBPAGE_TESTS = [{ @@ -315,7 +312,7 @@ class BrightcoveLegacyIE(InfoExtractor): object_str = fix_xml_ampersands(object_str) try: - object_doc = compat_etree_fromstring(object_str.encode('utf-8')) + object_doc = compat_etree_fromstring(object_str.encode()) except xml.etree.ElementTree.ParseError: return @@ -323,7 +320,7 @@ class BrightcoveLegacyIE(InfoExtractor): if fv_el is not None: flashvars = dict( (k, v[0]) - for k, v in compat_parse_qs(fv_el.attrib['value']).items()) + for k, v in urllib.parse.parse_qs(fv_el.attrib['value']).items()) else: flashvars = {} @@ -340,32 +337,32 @@ class BrightcoveLegacyIE(InfoExtractor): params = {} - playerID = find_param('playerID') or find_param('playerId') - if playerID is None: + player_id = find_param('playerID') or find_param('playerId') + if player_id is None: raise ExtractorError('Cannot find player ID') - params['playerID'] = playerID + params['playerID'] = player_id - playerKey = find_param('playerKey') + player_key = find_param('playerKey') # Not all pages define this value - if playerKey is not None: - params['playerKey'] = playerKey + if player_key is not None: + params['playerKey'] = player_key # These fields hold the id of the video - videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList') - if videoPlayer is not None: - if isinstance(videoPlayer, list): - videoPlayer = videoPlayer[0] - videoPlayer = videoPlayer.strip() + video_player = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList') + if video_player is not None: + if isinstance(video_player, list): + video_player = video_player[0] + video_player = video_player.strip() # UUID is also possible for videoPlayer (e.g. # http://www.popcornflix.com/hoodies-vs-hooligans/7f2d2b87-bbf2-4623-acfb-ea942b4f01dd # or http://www8.hp.com/cn/zh/home.html) if not (re.match( r'^(?:\d+|[\da-fA-F]{8}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{12})$', - videoPlayer) or videoPlayer.startswith('ref:')): + video_player) or video_player.startswith('ref:')): return None - params['@videoPlayer'] = videoPlayer - linkBase = find_param('linkBaseURL') - if linkBase is not None: - params['linkBaseURL'] = linkBase + params['@videoPlayer'] = video_player + link_base = find_param('linkBaseURL') + if link_base is not None: + params['linkBaseURL'] = link_base return cls._make_brightcove_url(params) @classmethod @@ -448,13 +445,13 @@ class BrightcoveLegacyIE(InfoExtractor): url = re.sub(r'(?<=[?&])bckey', 'playerKey', url) mobj = self._match_valid_url(url) query_str = mobj.group('query') - query = compat_urlparse.parse_qs(query_str) + query = urllib.parse.parse_qs(query_str) - videoPlayer = query.get('@videoPlayer') - if videoPlayer: + video_player = query.get('@videoPlayer') + if video_player: # We set the original url as the default 'Referer' header referer = query.get('linkBaseURL', [None])[0] or smuggled_data.get('Referer', url) - video_id = videoPlayer[0] + video_id = video_player[0] if 'playerID' not in query: mobj = re.search(r'/bcpid(\d+)', url) if mobj is not None: @@ -483,7 +480,7 @@ class BrightcoveLegacyIE(InfoExtractor): enc_pub_id = player_key.split(',')[1].replace('~', '=') publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0] if publisher_id: - brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id) + brightcove_new_url = f'http://players.brightcove.net/{publisher_id}/default_default/index.html?videoId={video_id}' if referer: brightcove_new_url = smuggle_url(brightcove_new_url, {'referrer': referer}) return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id) @@ -543,9 +540,9 @@ class BrightcoveNewBaseIE(AdobePassIE): def build_format_id(kind): format_id = kind if tbr: - format_id += '-%dk' % int(tbr) + format_id += f'-{int(tbr)}k' if height: - format_id += '-%dp' % height + format_id += f'-{height}p' return format_id if src or streaming_src: @@ -654,7 +651,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE): 'params': { # m3u8 download 'skip_download': True, - } + }, }, { # playlist stream 'url': 'https://players.brightcove.net/1752604059001/S13cJdUBz_default/index.html?playlistId=5718313430001', @@ -666,7 +663,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE): 'params': { # m3u8 download 'skip_download': True, - } + }, }, { 'url': 'http://players.brightcove.net/5690807595001/HyZNerRl7_default/index.html?playlistId=5743160747001', 'only_matching': True, @@ -833,8 +830,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE): player_id = player_id or attrs.get('data-player') or 'default' embed = embed or attrs.get('data-embed') or 'default' - bc_url = 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' % ( - account_id, player_id, embed, video_id) + bc_url = f'http://players.brightcove.net/{account_id}/{player_id}_{embed}/index.html?videoId={video_id}' # Some brightcove videos may be embedded with video tag only and # without script tag or any mentioning of brightcove at all. Such @@ -865,13 +861,13 @@ class BrightcoveNewIE(BrightcoveNewBaseIE): account_id, player_id, embed, content_type, video_id = self._match_valid_url(url).groups() - policy_key_id = '%s_%s' % (account_id, player_id) + policy_key_id = f'{account_id}_{player_id}' policy_key = self.cache.load('brightcove', policy_key_id) policy_key_extracted = False store_pk = lambda x: self.cache.store('brightcove', policy_key_id, x) def extract_policy_key(): - base_url = 'http://players.brightcove.net/%s/%s_%s/' % (account_id, player_id, embed) + base_url = f'http://players.brightcove.net/{account_id}/{player_id}_{embed}/' config = self._download_json( base_url + 'config.json', video_id, fatal=False) or {} policy_key = try_get( @@ -910,7 +906,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE): if not policy_key: policy_key = extract_policy_key() policy_key_extracted = True - headers['Accept'] = 'application/json;pk=%s' % policy_key + headers['Accept'] = f'application/json;pk={policy_key}' try: json_data = self._download_json(api_url, video_id, headers=headers) break @@ -936,7 +932,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE): custom_fields['bcadobepassresourceid']) json_data = self._download_json( api_url, video_id, headers={ - 'Accept': 'application/json;pk=%s' % policy_key + 'Accept': f'application/json;pk={policy_key}', }, query={ 'tveToken': tve_token, }) diff --git a/yt_dlp/extractor/bundesliga.py b/yt_dlp/extractor/bundesliga.py index e76dd58dd..29f8f9415 100644 --- a/yt_dlp/extractor/bundesliga.py +++ b/yt_dlp/extractor/bundesliga.py @@ -16,17 +16,17 @@ class BundesligaIE(InfoExtractor): 'upload_date': '20220928', 'duration': 146, 'timestamp': 1664366511, - 'description': 'md5:803d4411bd134140c774021dd4b7598b' - } + 'description': 'md5:803d4411bd134140c774021dd4b7598b', + }, }, { 'url': 'https://www.bundesliga.com/en/bundesliga/videos/latest-features/T8IKc8TX?vid=ROHjs06G', - 'only_matching': True + 'only_matching': True, }, { 'url': 'https://www.bundesliga.com/en/bundesliga/videos/goals?vid=mOG56vWA', - 'only_matching': True - } + 'only_matching': True, + }, ] def _real_extract(self, url): diff --git a/yt_dlp/extractor/businessinsider.py b/yt_dlp/extractor/businessinsider.py index 4b3f5e68b..7cb9af692 100644 --- a/yt_dlp/extractor/businessinsider.py +++ b/yt_dlp/extractor/businessinsider.py @@ -10,7 +10,7 @@ class BusinessInsiderIE(InfoExtractor): 'info_dict': { 'id': 'cjGDb0X9', 'ext': 'mp4', - 'title': "Bananas give you more radiation exposure than living next to a nuclear power plant", + 'title': 'Bananas give you more radiation exposure than living next to a nuclear power plant', 'description': 'md5:0175a3baf200dd8fa658f94cade841b3', 'upload_date': '20160611', 'timestamp': 1465675620, @@ -41,5 +41,5 @@ class BusinessInsiderIE(InfoExtractor): r'(?:jwplatform\.com/players/|jwplayer_)([a-zA-Z0-9]{8})'), webpage, 'jwplatform id') return self.url_result( - 'jwplatform:%s' % jwplatform_id, ie=JWPlatformIE.ie_key(), + f'jwplatform:{jwplatform_id}', ie=JWPlatformIE.ie_key(), video_id=video_id) diff --git a/yt_dlp/extractor/buzzfeed.py b/yt_dlp/extractor/buzzfeed.py index b30a3b7ae..9847095bc 100644 --- a/yt_dlp/extractor/buzzfeed.py +++ b/yt_dlp/extractor/buzzfeed.py @@ -23,8 +23,8 @@ class BuzzFeedIE(InfoExtractor): 'upload_date': '20141024', 'uploader_id': 'Buddhanz1', 'uploader': 'Angry Ram', - } - }] + }, + }], }, { 'url': 'http://www.buzzfeed.com/sheridanwatson/look-at-this-cute-dog-omg?utm_term=4ldqpia', 'params': { @@ -45,7 +45,7 @@ class BuzzFeedIE(InfoExtractor): 'uploader_id': 'CindysMunchkin', 'uploader': 're:^Munchkin the', }, - }] + }], }, { 'url': 'http://www.buzzfeed.com/craigsilverman/the-most-adorable-crash-landing-ever#.eq7pX0BAmK', 'info_dict': { diff --git a/yt_dlp/extractor/byutv.py b/yt_dlp/extractor/byutv.py index ad35427ed..e9796f7da 100644 --- a/yt_dlp/extractor/byutv.py +++ b/yt_dlp/extractor/byutv.py @@ -36,7 +36,7 @@ class BYUtvIE(InfoExtractor): 'duration': 11645, }, 'params': { - 'skip_download': True + 'skip_download': True, }, }, { 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d', diff --git a/yt_dlp/extractor/c56.py b/yt_dlp/extractor/c56.py index e4b1c9a84..6264803dd 100644 --- a/yt_dlp/extractor/c56.py +++ b/yt_dlp/extractor/c56.py @@ -38,7 +38,7 @@ class C56IE(InfoExtractor): return self.url_result(sohu_video_info['url'], 'Sohu') page = self._download_json( - 'http://vxml.56.com/json/%s/' % text_id, text_id, 'Downloading video info') + f'http://vxml.56.com/json/{text_id}/', text_id, 'Downloading video info') info = page['info'] @@ -46,7 +46,7 @@ class C56IE(InfoExtractor): { 'format_id': f['type'], 'filesize': int(f['filesize']), - 'url': f['url'] + 'url': f['url'], } for f in info['rfiles'] ] diff --git a/yt_dlp/extractor/callin.py b/yt_dlp/extractor/callin.py index c77179c7b..b7061a7d1 100644 --- a/yt_dlp/extractor/callin.py +++ b/yt_dlp/extractor/callin.py @@ -29,8 +29,8 @@ class CallinIE(InfoExtractor): 'series_id': '436d1f82ddeb30cd2306ea9156044d8d2cfdc3f1f1552d245117a42173e78553', 'episode': 'The Title IX Regime and the Long March Through and Beyond the Institutions', 'episode_number': 1, - 'episode_id': '218b979630a35ead12c6fd096f2996c56c37e4d0dc1f6dc0feada32dcf7b31cd' - } + 'episode_id': '218b979630a35ead12c6fd096f2996c56c37e4d0dc1f6dc0feada32dcf7b31cd', + }, }, { 'url': 'https://www.callin.com/episode/fcc-commissioner-brendan-carr-on-elons-PrumRdSQJW', 'md5': '14ede27ee2c957b7e4db93140fc0745c', @@ -54,7 +54,7 @@ class CallinIE(InfoExtractor): 'thumbnail': 'https://d1z76fhpoqkd01.cloudfront.net/shows/legacy/1ade9142625344045dc17cf523469ced1d93610762f4c886d06aa190a2f979e8.png', 'episode_id': 'c3dab47f237bf953d180d3f243477a84302798be0e0b29bc9ade6d60a69f04f5', 'timestamp': 1662100688.005, - } + }, }, { 'url': 'https://www.callin.com/episode/episode-81-elites-melt-down-over-student-debt-lzxMidUnjA', 'md5': '16f704ddbf82a27e3930533b12062f07', @@ -78,7 +78,7 @@ class CallinIE(InfoExtractor): 'thumbnail': 'https://d1z76fhpoqkd01.cloudfront.net/shows/legacy/461ea0d86172cb6aff7d6c80fd49259cf5e64bdf737a4650f8bc24cf392ca218.png', 'episode_id': '8d06f869798f93a7814e380bceabea72d501417e620180416ff6bd510596e83c', 'timestamp': 1661476708.282, - } + }, }] def try_get_user_name(self, d): @@ -94,7 +94,7 @@ class CallinIE(InfoExtractor): next_data = self._search_nextjs_data(webpage, display_id) episode = next_data['props']['pageProps']['episode'] - id = episode['id'] + video_id = episode['id'] title = episode.get('title') or self._generic_title('', webpage) url = episode['m3u8'] formats = self._extract_m3u8_formats(url, display_id, ext='ts') @@ -125,11 +125,11 @@ class CallinIE(InfoExtractor): episode_list = traverse_obj(show_json, ('pageProps', 'show', 'episodes')) or [] episode_number = next( - (len(episode_list) - i for (i, e) in enumerate(episode_list) if e.get('id') == id), + (len(episode_list) - i for i, e in enumerate(episode_list) if e.get('id') == video_id), None) return { - 'id': id, + 'id': video_id, '_old_archive_ids': [make_archive_id(self, display_id.rsplit('-', 1)[-1])], 'display_id': display_id, 'title': title, @@ -151,5 +151,5 @@ class CallinIE(InfoExtractor): 'series_id': show_id, 'episode': title, 'episode_number': episode_number, - 'episode_id': id + 'episode_id': video_id, } diff --git a/yt_dlp/extractor/caltrans.py b/yt_dlp/extractor/caltrans.py index f4a4a834b..5513bb2df 100644 --- a/yt_dlp/extractor/caltrans.py +++ b/yt_dlp/extractor/caltrans.py @@ -11,7 +11,7 @@ class CaltransIE(InfoExtractor): 'title': 'US-50 : Sacramento : Hwy 50 at 24th', 'live_status': 'is_live', 'thumbnail': 'https://cwwp2.dot.ca.gov/data/d3/cctv/image/hwy50at24th/hwy50at24th.jpg', - } + }, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/cam4.py b/yt_dlp/extractor/cam4.py index 2650cc1ef..0d0dccb79 100644 --- a/yt_dlp/extractor/cam4.py +++ b/yt_dlp/extractor/cam4.py @@ -12,12 +12,12 @@ class CAM4IE(InfoExtractor): 'age_limit': 18, 'live_status': 'is_live', 'thumbnail': 'https://snapshots.xcdnpro.com/thumbnails/foxynesss', - } + }, } def _real_extract(self, url): channel_id = self._match_id(url) - m3u8_playlist = self._download_json('https://www.cam4.com/rest/v1.0/profile/{}/streamInfo'.format(channel_id), channel_id).get('cdnURL') + m3u8_playlist = self._download_json(f'https://www.cam4.com/rest/v1.0/profile/{channel_id}/streamInfo', channel_id).get('cdnURL') formats = self._extract_m3u8_formats(m3u8_playlist, channel_id, 'mp4', m3u8_id='hls', live=True) diff --git a/yt_dlp/extractor/camdemy.py b/yt_dlp/extractor/camdemy.py index c7079e422..34dc095af 100644 --- a/yt_dlp/extractor/camdemy.py +++ b/yt_dlp/extractor/camdemy.py @@ -1,10 +1,7 @@ import re +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_urlencode, - compat_urlparse, -) from ..utils import ( clean_html, parse_duration, @@ -28,7 +25,7 @@ class CamdemyIE(InfoExtractor): 'duration': 1591, 'upload_date': '20130114', 'view_count': int, - } + }, }, { # With non-empty description # webpage returns "No permission or not login" @@ -42,7 +39,7 @@ class CamdemyIE(InfoExtractor): 'description': 'md5:2a9f989c2b153a2342acee579c6e7db6', 'creator': 'evercam', 'duration': 318, - } + }, }, { # External source (YouTube) 'url': 'http://www.camdemy.com/media/14842', @@ -76,12 +73,12 @@ class CamdemyIE(InfoExtractor): title = oembed_obj['title'] thumb_url = oembed_obj['thumbnail_url'] - video_folder = compat_urlparse.urljoin(thumb_url, 'video/') + video_folder = urllib.parse.urljoin(thumb_url, 'video/') file_list_doc = self._download_xml( - compat_urlparse.urljoin(video_folder, 'fileList.xml'), + urllib.parse.urljoin(video_folder, 'fileList.xml'), video_id, 'Downloading filelist XML') file_name = file_list_doc.find('./video/item/fileName').text - video_url = compat_urlparse.urljoin(video_folder, file_name) + video_url = urllib.parse.urljoin(video_folder, file_name) # Some URLs return "No permission or not login" in a webpage despite being # freely available via oembed JSON URL (e.g. http://www.camdemy.com/media/13885) @@ -117,35 +114,35 @@ class CamdemyFolderIE(InfoExtractor): 'id': '450', 'title': '信號與系統 2012 & 2011 (Signals and Systems)', }, - 'playlist_mincount': 145 + 'playlist_mincount': 145, }, { # links without trailing slash # and multi-page 'url': 'http://www.camdemy.com/folder/853', 'info_dict': { 'id': '853', - 'title': '科學計算 - 使用 Matlab' + 'title': '科學計算 - 使用 Matlab', }, - 'playlist_mincount': 20 + 'playlist_mincount': 20, }, { # with displayMode parameter. For testing the codes to add parameters 'url': 'http://www.camdemy.com/folder/853/?displayMode=defaultOrderByOrg', 'info_dict': { 'id': '853', - 'title': '科學計算 - 使用 Matlab' + 'title': '科學計算 - 使用 Matlab', }, - 'playlist_mincount': 20 + 'playlist_mincount': 20, }] def _real_extract(self, url): folder_id = self._match_id(url) # Add displayMode=list so that all links are displayed in a single page - parsed_url = list(compat_urlparse.urlparse(url)) - query = dict(compat_urlparse.parse_qsl(parsed_url[4])) + parsed_url = list(urllib.parse.urlparse(url)) + query = dict(urllib.parse.parse_qsl(parsed_url[4])) query.update({'displayMode': 'list'}) - parsed_url[4] = compat_urllib_parse_urlencode(query) - final_url = compat_urlparse.urlunparse(parsed_url) + parsed_url[4] = urllib.parse.urlencode(query) + final_url = urllib.parse.urlunparse(parsed_url) page = self._download_webpage(final_url, folder_id) matches = re.findall(r"href='(/media/\d+/?)'", page) diff --git a/yt_dlp/extractor/camfm.py b/yt_dlp/extractor/camfm.py index 11dafa4a2..6036f136f 100644 --- a/yt_dlp/extractor/camfm.py +++ b/yt_dlp/extractor/camfm.py @@ -37,7 +37,7 @@ class CamFMShowIE(InfoExtractor): 'thumbnail': urljoin('https://camfm.co.uk', self._search_regex( r']+class="thumb-expand"[^>]+src="([^"]+)"', page, 'thumbnail', fatal=False)), 'title': self._html_search_regex('

([^<]+)

', page, 'title', fatal=False), - 'description': clean_html(get_element_by_class('small-12 medium-8 cell', page)) + 'description': clean_html(get_element_by_class('small-12 medium-8 cell', page)), } @@ -56,7 +56,7 @@ class CamFMEpisodeIE(InfoExtractor): 'series': 'AITAA: Am I the Agony Aunt?', 'thumbnail': 'md5:5980a831360d0744c3764551be3d09c1', 'categories': ['Entertainment'], - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/cammodels.py b/yt_dlp/extractor/cammodels.py index 135b31529..7388cfb6c 100644 --- a/yt_dlp/extractor/cammodels.py +++ b/yt_dlp/extractor/cammodels.py @@ -7,14 +7,14 @@ class CamModelsIE(InfoExtractor): _TESTS = [{ 'url': 'https://www.cammodels.com/cam/AutumnKnight/', 'only_matching': True, - 'age_limit': 18 + 'age_limit': 18, }] def _real_extract(self, url): user_id = self._match_id(url) manifest = self._download_json( - 'https://manifest-server.naiadsystems.com/live/s:%s.json' % user_id, user_id) + f'https://manifest-server.naiadsystems.com/live/s:{user_id}.json', user_id) formats = [] thumbnails = [] @@ -36,7 +36,7 @@ class CamModelsIE(InfoExtractor): format_id_list = [format_id] height = int_or_none(media.get('videoHeight')) if height is not None: - format_id_list.append('%dp' % height) + format_id_list.append(f'{height}p') f = { 'url': media_url, 'format_id': '-'.join(format_id_list), @@ -73,5 +73,5 @@ class CamModelsIE(InfoExtractor): 'thumbnails': thumbnails, 'is_live': True, 'formats': formats, - 'age_limit': 18 + 'age_limit': 18, } diff --git a/yt_dlp/extractor/camtasia.py b/yt_dlp/extractor/camtasia.py index 70ab6c62a..326643175 100644 --- a/yt_dlp/extractor/camtasia.py +++ b/yt_dlp/extractor/camtasia.py @@ -17,7 +17,7 @@ class CamtasiaEmbedIE(InfoExtractor): 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1', 'ext': 'flv', 'duration': 2235.90, - } + }, }, { 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63', 'info_dict': { @@ -25,12 +25,12 @@ class CamtasiaEmbedIE(InfoExtractor): 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip', 'ext': 'flv', 'duration': 2235.93, - } + }, }], 'info_dict': { 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final', }, - 'skip': 'webpage dead' + 'skip': 'webpage dead', }, ] diff --git a/yt_dlp/extractor/canalalpha.py b/yt_dlp/extractor/canalalpha.py index 745e6954c..3a0df9545 100644 --- a/yt_dlp/extractor/canalalpha.py +++ b/yt_dlp/extractor/canalalpha.py @@ -21,7 +21,7 @@ class CanalAlphaIE(InfoExtractor): 'upload_date': '20211028', 'duration': 1125, }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { 'url': 'https://www.canalalpha.ch/play/le-journal/topic/24512/la-poste-fait-de-neuchatel-un-pole-cryptographique', 'info_dict': { @@ -33,7 +33,7 @@ class CanalAlphaIE(InfoExtractor): 'upload_date': '20211028', 'duration': 138, }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { 'url': 'https://www.canalalpha.ch/play/eureka/episode/24484/ces-innovations-qui-veulent-rendre-lagriculture-plus-durable', 'info_dict': { @@ -45,7 +45,7 @@ class CanalAlphaIE(InfoExtractor): 'upload_date': '20211026', 'duration': 360, }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { 'url': 'https://www.canalalpha.ch/play/avec-le-temps/episode/23516/redonner-de-leclat-grace-au-polissage', 'info_dict': { @@ -57,7 +57,7 @@ class CanalAlphaIE(InfoExtractor): 'upload_date': '20210726', 'duration': 360, }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { 'url': 'https://www.canalalpha.ch/play/le-journal/topic/33500/encore-des-mesures-deconomie-dans-le-jura', 'info_dict': { diff --git a/yt_dlp/extractor/canalc2.py b/yt_dlp/extractor/canalc2.py index 597cb2a6b..c725545fa 100644 --- a/yt_dlp/extractor/canalc2.py +++ b/yt_dlp/extractor/canalc2.py @@ -26,7 +26,7 @@ class Canalc2IE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage( - 'http://www.canalc2.tv/video/%s' % video_id, video_id) + f'http://www.canalc2.tv/video/{video_id}', video_id) title = self._html_search_regex( r'(?s)class="[^"]*col_description[^"]*">.*?

(.+?)

', diff --git a/yt_dlp/extractor/canalplus.py b/yt_dlp/extractor/canalplus.py index 3ff5c3fbf..728b7a047 100644 --- a/yt_dlp/extractor/canalplus.py +++ b/yt_dlp/extractor/canalplus.py @@ -53,7 +53,7 @@ class CanalplusIE(InfoExtractor): video_data = self._download_json(info_url, video_id, 'Downloading video JSON') if isinstance(video_data, list): - video_data = [video for video in video_data if video.get('ID') == video_id][0] + video_data = next(video for video in video_data if video.get('ID') == video_id) media = video_data['MEDIA'] infos = video_data['INFOS'] @@ -97,8 +97,7 @@ class CanalplusIE(InfoExtractor): return { 'id': video_id, 'display_id': display_id, - 'title': '%s - %s' % (titrage['TITRE'], - titrage['SOUS_TITRE']), + 'title': '{} - {}'.format(titrage['TITRE'], titrage['SOUS_TITRE']), 'upload_date': unified_strdate(infos.get('PUBLICATION', {}).get('DATE')), 'thumbnails': thumbnails, 'description': infos.get('DESCRIPTION'), diff --git a/yt_dlp/extractor/caracoltv.py b/yt_dlp/extractor/caracoltv.py index 79f7752fe..493ffdae5 100644 --- a/yt_dlp/extractor/caracoltv.py +++ b/yt_dlp/extractor/caracoltv.py @@ -78,13 +78,13 @@ class CaracolTvPlayIE(InfoExtractor): 'device_data': { 'device_id': str(uuid.uuid4()), 'device_token': '', - 'device_type': 'web' + 'device_type': 'web', }, 'login_data': { 'enabled': True, 'email': email, 'password': password, - } + }, }).encode())['user_token'] def _extract_video(self, video_data, series_id=None, season_id=None, season_number=None): diff --git a/yt_dlp/extractor/cartoonnetwork.py b/yt_dlp/extractor/cartoonnetwork.py index 4dd7ac46d..1749a008a 100644 --- a/yt_dlp/extractor/cartoonnetwork.py +++ b/yt_dlp/extractor/cartoonnetwork.py @@ -27,7 +27,7 @@ class CartoonNetworkIE(TurnerBaseIE): if content_re: metadata_re = r'|video_metadata\.content_' + content_re return self._search_regex( - r'(?:_cnglobal\.currentVideo\.%s%s)\s*=\s*"(%s)";' % (global_re, metadata_re, value_re), + rf'(?:_cnglobal\.currentVideo\.{global_re}{metadata_re})\s*=\s*"({value_re})";', webpage, name, fatal=fatal) media_id = find_field('mediaId', 'media id', 'id', '[0-9a-f]{40}', True) diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py index a4180262b..740e12926 100644 --- a/yt_dlp/extractor/cbc.py +++ b/yt_dlp/extractor/cbc.py @@ -6,9 +6,6 @@ import urllib.parse import xml.etree.ElementTree from .common import InfoExtractor -from ..compat import ( - compat_str, -) from ..utils import ( ExtractorError, int_or_none, @@ -99,7 +96,7 @@ class CBCIE(InfoExtractor): # multiple CBC.APP.Caffeine.initInstance(...) 'url': 'http://www.cbc.ca/news/canada/calgary/dog-indoor-exercise-winter-1.3928238', 'info_dict': { - 'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks', # FIXME + 'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks', # FIXME: actual title includes " | CBC News" 'id': 'dog-indoor-exercise-winter-1.3928238', 'description': 'md5:c18552e41726ee95bd75210d1ca9194c', }, @@ -108,7 +105,7 @@ class CBCIE(InfoExtractor): @classmethod def suitable(cls, url): - return False if CBCPlayerIE.suitable(url) else super(CBCIE, cls).suitable(url) + return False if CBCPlayerIE.suitable(url) else super().suitable(url) def _extract_player_init(self, player_init, display_id): player_info = self._parse_json(player_init, display_id, js_to_json) @@ -116,15 +113,15 @@ class CBCIE(InfoExtractor): if not media_id: clip_id = player_info['clipId'] feed = self._download_json( - 'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id, + f'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={{:mpsReleases}}{{{clip_id}}}', clip_id, fatal=False) if feed: - media_id = try_get(feed, lambda x: x['entries'][0]['guid'], compat_str) + media_id = try_get(feed, lambda x: x['entries'][0]['guid'], str) if not media_id: media_id = self._download_json( 'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id, clip_id)['entries'][0]['id'].split('/')[-1] - return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) + return self.url_result(f'cbcplayer:{media_id}', 'CBCPlayer', media_id) def _real_extract(self, url): display_id = self._match_id(url) @@ -142,7 +139,7 @@ class CBCIE(InfoExtractor): r'guid["\']\s*:\s*["\'](\d+)'): media_ids.extend(re.findall(media_id_re, webpage)) entries.extend([ - self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) + self.url_result(f'cbcplayer:{media_id}', 'CBCPlayer', media_id) for media_id in orderedSet(media_ids)]) return self.playlist_result( entries, display_id, strip_or_none(title), @@ -322,11 +319,11 @@ class CBCPlayerIE(InfoExtractor): '_type': 'url_transparent', 'ie_key': 'ThePlatform', 'url': smuggle_url( - 'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/%s?mbr=true&formats=MPEG4,FLV,MP3' % video_id, { - 'force_smil_url': True + f'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/{video_id}?mbr=true&formats=MPEG4,FLV,MP3', { + 'force_smil_url': True, }), 'id': video_id, - '_format_sort_fields': ('res', 'proto') # Prioritize direct http formats over HLS + '_format_sort_fields': ('res', 'proto'), # Prioritize direct http formats over HLS } @@ -338,13 +335,13 @@ class CBCPlayerPlaylistIE(InfoExtractor): 'playlist_mincount': 25, 'info_dict': { 'id': 'news/tv shows/the national/latest broadcast', - } + }, }, { 'url': 'https://www.cbc.ca/player/news/Canada/North', 'playlist_mincount': 25, 'info_dict': { 'id': 'news/canada/north', - } + }, }] def _real_extract(self, url): @@ -355,7 +352,7 @@ class CBCPlayerPlaylistIE(InfoExtractor): def entries(): for video_id in traverse_obj(json_content, ( - 'video', 'clipsByCategory', lambda k, _: k.lower() == playlist_id, 'items', ..., 'id' + 'video', 'clipsByCategory', lambda k, _: k.lower() == playlist_id, 'items', ..., 'id', )): yield self.url_result(f'https://www.cbc.ca/player/play/{video_id}', CBCPlayerIE) @@ -453,7 +450,7 @@ class CBCGemIE(InfoExtractor): # JWT is decoded here and 'exp' field is extracted # It is a Unix timestamp for when the token expires b64_data = self._claims_token.split('.')[1] - data = base64.urlsafe_b64decode(b64_data + "==") + data = base64.urlsafe_b64decode(b64_data + '==') return json.loads(data)['exp'] def claims_token_expired(self): @@ -535,17 +532,17 @@ class CBCGemIE(InfoExtractor): self._remove_duplicate_formats(formats) formats.extend(self._find_secret_formats(formats, video_id)) - for format in formats: - if format.get('vcodec') == 'none': - if format.get('ext') is None: - format['ext'] = 'm4a' - if format.get('acodec') is None: - format['acodec'] = 'mp4a.40.2' + for fmt in formats: + if fmt.get('vcodec') == 'none': + if fmt.get('ext') is None: + fmt['ext'] = 'm4a' + if fmt.get('acodec') is None: + fmt['acodec'] = 'mp4a.40.2' # Put described audio at the beginning of the list, so that it # isn't chosen by default, as most people won't want it. - if 'descriptive' in format['format_id'].lower(): - format['preference'] = -2 + if 'descriptive' in fmt['format_id'].lower(): + fmt['preference'] = -2 return { 'id': video_id, @@ -670,7 +667,7 @@ class CBCGemLiveIE(InfoExtractor): 'title': r're:^Ottawa [0-9\-: ]+', 'description': 'The live TV channel and local programming from Ottawa', 'live_status': 'is_live', - 'thumbnail': r're:https://images.gem.cbc.ca/v1/cbc-gem/live/.*' + 'thumbnail': r're:https://images.gem.cbc.ca/v1/cbc-gem/live/.*', }, 'params': {'skip_download': True}, 'skip': 'Live might have ended', @@ -690,7 +687,7 @@ class CBCGemLiveIE(InfoExtractor): }, 'params': {'skip_download': True}, 'skip': 'Live might have ended', - } + }, ] def _real_extract(self, url): @@ -729,5 +726,5 @@ class CBCGemLiveIE(InfoExtractor): 'description': 'description', 'thumbnail': ('images', 'card', 'url'), 'timestamp': ('airDate', {parse_iso8601}), - }) + }), } diff --git a/yt_dlp/extractor/cbs.py b/yt_dlp/extractor/cbs.py index aca9782c7..e82558897 100644 --- a/yt_dlp/extractor/cbs.py +++ b/yt_dlp/extractor/cbs.py @@ -31,7 +31,7 @@ class CBSBaseIE(ThePlatformFeedIE): # XXX: Do not subclass from concrete IE return subtitles def _extract_common_video_info(self, content_id, asset_types, mpx_acc, extra_info): - tp_path = 'dJ5BDC/media/guid/%d/%s' % (mpx_acc, content_id) + tp_path = f'dJ5BDC/media/guid/{mpx_acc}/{content_id}' tp_release_url = f'https://link.theplatform.com/s/{tp_path}' info = self._extract_theplatform_metadata(tp_path, content_id) @@ -41,7 +41,7 @@ class CBSBaseIE(ThePlatformFeedIE): # XXX: Do not subclass from concrete IE try: tp_formats, tp_subtitles = self._extract_theplatform_smil( update_url_query(tp_release_url, query), content_id, - 'Downloading %s SMIL data' % asset_type) + f'Downloading {asset_type} SMIL data') except ExtractorError as e: last_e = e if asset_type != 'fallback': @@ -50,7 +50,7 @@ class CBSBaseIE(ThePlatformFeedIE): # XXX: Do not subclass from concrete IE try: tp_formats, tp_subtitles = self._extract_theplatform_smil( update_url_query(tp_release_url, query), content_id, - 'Downloading %s SMIL data, trying again with another format' % asset_type) + f'Downloading {asset_type} SMIL data, trying again with another format') except ExtractorError as e: last_e = e continue diff --git a/yt_dlp/extractor/ccc.py b/yt_dlp/extractor/ccc.py index ca6b82c98..1d781cc47 100644 --- a/yt_dlp/extractor/ccc.py +++ b/yt_dlp/extractor/ccc.py @@ -25,7 +25,7 @@ class CCCIE(InfoExtractor): 'timestamp': 1388188800, 'duration': 3710, 'tags': list, - } + }, }, { 'url': 'https://media.ccc.de/v/32c3-7368-shopshifting#download', 'only_matching': True, @@ -35,7 +35,7 @@ class CCCIE(InfoExtractor): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) event_id = self._search_regex(r"data-id='(\d+)'", webpage, 'event id') - event_data = self._download_json('https://media.ccc.de/public/events/%s' % event_id, event_id) + event_data = self._download_json(f'https://media.ccc.de/public/events/{event_id}', event_id) formats = [] for recording in event_data.get('recordings', []): @@ -96,7 +96,7 @@ class CCCPlaylistIE(InfoExtractor): 'title': 'Datenspuren 2023', 'id': 'DS2023', }, - 'playlist_count': 37 + 'playlist_count': 37, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/ccma.py b/yt_dlp/extractor/ccma.py index ab840f301..ffe4b49c1 100644 --- a/yt_dlp/extractor/ccma.py +++ b/yt_dlp/extractor/ccma.py @@ -24,7 +24,7 @@ class CCMAIE(InfoExtractor): 'timestamp': 1478608140, 'upload_date': '20161108', 'age_limit': 0, - } + }, }, { 'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/', 'md5': 'fa3e38f269329a278271276330261425', @@ -37,7 +37,7 @@ class CCMAIE(InfoExtractor): 'timestamp': 1494622500, 'vcodec': 'none', 'categories': ['Esports'], - } + }, }, { 'url': 'http://www.ccma.cat/tv3/alacarta/crims/crims-josep-tallada-lespereu-me-capitol-1/video/6031387/', 'md5': 'b43c3d3486f430f3032b5b160d80cbc3', @@ -51,7 +51,7 @@ class CCMAIE(InfoExtractor): 'subtitles': 'mincount:4', 'age_limit': 16, 'series': 'Crims', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/cctv.py b/yt_dlp/extractor/cctv.py index 8552ee511..18c080df1 100644 --- a/yt_dlp/extractor/cctv.py +++ b/yt_dlp/extractor/cctv.py @@ -1,7 +1,6 @@ import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( float_or_none, try_get, @@ -167,17 +166,17 @@ class CCTVIE(InfoExtractor): if isinstance(video, dict): for quality, chapters_key in enumerate(('lowChapters', 'chapters')): video_url = try_get( - video, lambda x: x[chapters_key][0]['url'], compat_str) + video, lambda x: x[chapters_key][0]['url'], str) if video_url: formats.append({ 'url': video_url, 'format_id': 'http', 'quality': quality, # Sample clip - 'preference': -10 + 'preference': -10, }) - hls_url = try_get(data, lambda x: x['hls_url'], compat_str) + hls_url = try_get(data, lambda x: x['hls_url'], str) if hls_url: hls_url = re.sub(r'maxbr=\d+&?', '', hls_url) formats.extend(self._extract_m3u8_formats( diff --git a/yt_dlp/extractor/cda.py b/yt_dlp/extractor/cda.py index 0a5a524c1..62ee8b17f 100644 --- a/yt_dlp/extractor/cda.py +++ b/yt_dlp/extractor/cda.py @@ -6,9 +6,10 @@ import hmac import json import random import re +import urllib.parse from .common import InfoExtractor -from ..compat import compat_ord, compat_urllib_parse_unquote +from ..compat import compat_ord from ..utils import ( ExtractorError, float_or_none, @@ -51,7 +52,7 @@ class CDAIE(InfoExtractor): 'age_limit': 0, 'upload_date': '20160221', 'timestamp': 1456078244, - } + }, }, { 'url': 'http://www.cda.pl/video/57413289', 'md5': 'a88828770a8310fc00be6c95faf7f4d5', @@ -67,7 +68,7 @@ class CDAIE(InfoExtractor): 'age_limit': 0, 'upload_date': '20160220', 'timestamp': 1455968218, - } + }, }, { # Age-restricted with vfilm redirection 'url': 'https://www.cda.pl/video/8753244c4', @@ -85,7 +86,7 @@ class CDAIE(InfoExtractor): 'average_rating': float, 'timestamp': 1633888264, 'upload_date': '20211010', - } + }, }, { # Age-restricted without vfilm redirection 'url': 'https://www.cda.pl/video/17028157b8', @@ -103,7 +104,7 @@ class CDAIE(InfoExtractor): 'average_rating': float, 'timestamp': 1699705901, 'upload_date': '20231111', - } + }, }, { 'url': 'http://ebd.cda.pl/0x0/5749950c', 'only_matching': True, @@ -263,7 +264,7 @@ class CDAIE(InfoExtractor): def decrypt_file(a): for p in ('_XDDD', '_CDA', '_ADC', '_CXD', '_QWE', '_Q5', '_IKSDE'): a = a.replace(p, '') - a = compat_urllib_parse_unquote(a) + a = urllib.parse.unquote(a) b = [] for c in a: f = compat_ord(c) @@ -280,16 +281,16 @@ class CDAIE(InfoExtractor): def extract_format(page, version): json_str = self._html_search_regex( r'player_data=(\\?["\'])(?P.+?)\1', page, - '%s player_json' % version, fatal=False, group='player_data') + f'{version} player_json', fatal=False, group='player_data') if not json_str: return player_data = self._parse_json( - json_str, '%s player_data' % version, fatal=False) + json_str, f'{version} player_data', fatal=False) if not player_data: return video = player_data.get('video') if not video or 'file' not in video: - self.report_warning('Unable to extract %s version information' % version) + self.report_warning(f'Unable to extract {version} version information') return if video['file'].startswith('uggc'): video['file'] = codecs.decode(video['file'], 'rot_13') @@ -310,11 +311,11 @@ class CDAIE(InfoExtractor): continue data = {'jsonrpc': '2.0', 'method': 'videoGetLink', 'id': 2, 'params': [video_id, cda_quality, video.get('ts'), video.get('hash2'), {}]} - data = json.dumps(data).encode('utf-8') + data = json.dumps(data).encode() video_url = self._download_json( f'https://www.cda.pl/video/{video_id}', video_id, headers={ 'Content-Type': 'application/json', - 'X-Requested-With': 'XMLHttpRequest' + 'X-Requested-With': 'XMLHttpRequest', }, data=data, note=f'Fetching {quality} url', errnote=f'Failed to fetch {quality} url', fatal=False) if try_get(video_url, lambda x: x['result']['status']) == 'ok': @@ -322,7 +323,7 @@ class CDAIE(InfoExtractor): info_dict['formats'].append({ 'url': video_url, 'format_id': quality, - 'height': int_or_none(quality[:-1]) + 'height': int_or_none(quality[:-1]), }) if not info_dict['duration']: @@ -340,11 +341,11 @@ class CDAIE(InfoExtractor): webpage = handler( urljoin(self._BASE_URL, href), video_id, - 'Downloading %s version information' % resolution, fatal=False) + f'Downloading {resolution} version information', fatal=False) if not webpage: # Manually report warning because empty page is returned when # invalid version is requested. - self.report_warning('Unable to download %s version information' % resolution) + self.report_warning(f'Unable to download {resolution} version information') continue extract_format(webpage, resolution) diff --git a/yt_dlp/extractor/cellebrite.py b/yt_dlp/extractor/cellebrite.py index 9896a31af..e90365a8b 100644 --- a/yt_dlp/extractor/cellebrite.py +++ b/yt_dlp/extractor/cellebrite.py @@ -14,7 +14,7 @@ class CellebriteIE(InfoExtractor): 'title': 'Ask the Expert: Chat Capture - Collect Data from Android Devices in Cellebrite UFED', 'duration': 455, 'tags': [], - } + }, }, { 'url': 'https://cellebrite.com/en/how-to-lawfully-collect-the-maximum-amount-of-data-from-android-devices/', 'info_dict': { @@ -25,7 +25,7 @@ class CellebriteIE(InfoExtractor): 'description': 'md5:e9a3d124c7287b0b07bad2547061cacf', 'thumbnail': 'https://cellebrite.com/wp-content/uploads/2022/07/How-to-Lawfully-Collect-the-Maximum-Amount-of-Data-From-Android-Devices.png', 'title': 'Android Extractions Explained', - } + }, }] def _get_formats_and_subtitles(self, json_data, display_id): diff --git a/yt_dlp/extractor/ceskatelevize.py b/yt_dlp/extractor/ceskatelevize.py index 5d6335729..c323985ca 100644 --- a/yt_dlp/extractor/ceskatelevize.py +++ b/yt_dlp/extractor/ceskatelevize.py @@ -1,7 +1,7 @@ import re +import urllib.parse from .common import InfoExtractor -from ..compat import compat_urllib_parse_unquote, compat_urllib_parse_urlparse from ..networking import Request from ..utils import ( ExtractorError, @@ -97,11 +97,11 @@ class CeskaTelevizeIE(InfoExtractor): def _real_extract(self, url): playlist_id = self._match_id(url) webpage, urlh = self._download_webpage_handle(url, playlist_id) - parsed_url = compat_urllib_parse_urlparse(urlh.url) + parsed_url = urllib.parse.urlparse(urlh.url) site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize') playlist_title = self._og_search_title(webpage, default=None) if site_name and playlist_title: - playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, maxsplit=1)[0] + playlist_title = re.split(rf'\s*[—|]\s*{site_name}', playlist_title, maxsplit=1)[0] playlist_description = self._og_search_description(webpage, default=None) if playlist_description: playlist_description = playlist_description.replace('\xa0', ' ') @@ -122,15 +122,15 @@ class CeskaTelevizeIE(InfoExtractor): iframe_hash = self._download_webpage( 'https://www.ceskatelevize.cz/v-api/iframe-hash/', playlist_id, note='Getting IFRAME hash') - query = {'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', type_: idec, } + query = {'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', type_: idec} webpage = self._download_webpage( 'https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php', playlist_id, note='Downloading player', query=query) NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.' - if '%s

' % NOT_AVAILABLE_STRING in webpage: + if f'{NOT_AVAILABLE_STRING}

' in webpage: self.raise_geo_restricted(NOT_AVAILABLE_STRING) - if any(not_found in webpage for not_found in ('Neplatný parametr pro videopřehrávač', 'IDEC nebyl nalezen', )): + if any(not_found in webpage for not_found in ('Neplatný parametr pro videopřehrávač', 'IDEC nebyl nalezen')): raise ExtractorError('no video with IDEC available', video_id=idec, expected=True) type_ = None @@ -183,7 +183,7 @@ class CeskaTelevizeIE(InfoExtractor): if playlist_url == 'error_region': raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) - req = Request(compat_urllib_parse_unquote(playlist_url)) + req = Request(urllib.parse.unquote(playlist_url)) req.headers['Referer'] = url playlist = self._download_json(req, playlist_id, fatal=False) @@ -203,11 +203,11 @@ class CeskaTelevizeIE(InfoExtractor): if 'playerType=flash' in stream_url: stream_formats = self._extract_m3u8_formats( stream_url, playlist_id, 'mp4', 'm3u8_native', - m3u8_id='hls-%s' % format_id, fatal=False) + m3u8_id=f'hls-{format_id}', fatal=False) else: stream_formats = self._extract_mpd_formats( stream_url, playlist_id, - mpd_id='dash-%s' % format_id, fatal=False) + mpd_id=f'dash-{format_id}', fatal=False) if 'drmOnly=true' in stream_url: for f in stream_formats: f['has_drm'] = True @@ -236,7 +236,7 @@ class CeskaTelevizeIE(InfoExtractor): if playlist_len == 1: final_title = playlist_title or title else: - final_title = '%s (%s)' % (playlist_title, title) + final_title = f'{playlist_title} ({title})' entries.append({ 'id': item_id, @@ -261,7 +261,7 @@ class CeskaTelevizeIE(InfoExtractor): 'cs': [{ 'ext': 'srt', 'data': srt_subs, - }] + }], } @staticmethod @@ -282,7 +282,7 @@ class CeskaTelevizeIE(InfoExtractor): if m: yield m.group(1) start, stop = (_msectotimecode(int(t)) for t in m.groups()[1:]) - yield '{0} --> {1}'.format(start, stop) + yield f'{start} --> {stop}' else: yield line diff --git a/yt_dlp/extractor/cgtn.py b/yt_dlp/extractor/cgtn.py index 5d9d9bcde..b9757e063 100644 --- a/yt_dlp/extractor/cgtn.py +++ b/yt_dlp/extractor/cgtn.py @@ -20,8 +20,8 @@ class CGTNIE(InfoExtractor): 'categories': ['Video'], }, 'params': { - 'skip_download': True - } + 'skip_download': True, + }, }, { 'url': 'https://news.cgtn.com/news/2021-06-06/China-Indonesia-vow-to-further-deepen-maritime-cooperation-10REvJCewCY/index.html', 'info_dict': { @@ -36,9 +36,9 @@ class CGTNIE(InfoExtractor): 'upload_date': '20210606', }, 'params': { - 'skip_download': False - } - } + 'skip_download': False, + }, + }, ] def _real_extract(self, url): diff --git a/yt_dlp/extractor/chaturbate.py b/yt_dlp/extractor/chaturbate.py index 99dfcfdeb..b49f741ef 100644 --- a/yt_dlp/extractor/chaturbate.py +++ b/yt_dlp/extractor/chaturbate.py @@ -37,7 +37,7 @@ class ChaturbateIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage( - 'https://chaturbate.com/%s/' % video_id, video_id, + f'https://chaturbate.com/{video_id}/', video_id, headers=self.geo_verification_headers()) found_m3u8_urls = [] @@ -85,7 +85,7 @@ class ChaturbateIE(InfoExtractor): formats = [] for m3u8_url in m3u8_urls: for known_id in ('fast', 'slow'): - if '_%s' % known_id in m3u8_url: + if f'_{known_id}' in m3u8_url: m3u8_id = known_id break else: @@ -99,7 +99,7 @@ class ChaturbateIE(InfoExtractor): return { 'id': video_id, 'title': video_id, - 'thumbnail': 'https://roomimg.stream.highwebmedia.com/ri/%s.jpg' % video_id, + 'thumbnail': f'https://roomimg.stream.highwebmedia.com/ri/{video_id}.jpg', 'age_limit': self._rta_search(webpage), 'is_live': True, 'formats': formats, diff --git a/yt_dlp/extractor/cinemax.py b/yt_dlp/extractor/cinemax.py index 706ec8553..66831ef62 100644 --- a/yt_dlp/extractor/cinemax.py +++ b/yt_dlp/extractor/cinemax.py @@ -20,6 +20,6 @@ class CinemaxIE(HBOBaseIE): def _real_extract(self, url): path, video_id = self._match_valid_url(url).groups() - info = self._extract_info('https://www.cinemax.com/%s.xml' % path, video_id) + info = self._extract_info(f'https://www.cinemax.com/{path}.xml', video_id) info['id'] = video_id return info diff --git a/yt_dlp/extractor/cinetecamilano.py b/yt_dlp/extractor/cinetecamilano.py index 745b71f24..834890d56 100644 --- a/yt_dlp/extractor/cinetecamilano.py +++ b/yt_dlp/extractor/cinetecamilano.py @@ -27,8 +27,8 @@ class CinetecaMilanoIE(InfoExtractor): 'modified_date': '20200520', 'duration': 3139, 'release_timestamp': 1643446208, - 'modified_timestamp': int - } + 'modified_timestamp': int, + }, }] def _real_extract(self, url): @@ -38,7 +38,7 @@ class CinetecaMilanoIE(InfoExtractor): f'https://www.cinetecamilano.it/api/catalogo/{video_id}/?', video_id, headers={ 'Referer': url, - 'Authorization': try_get(self._get_cookies('https://www.cinetecamilano.it'), lambda x: f'Bearer {x["cnt-token"].value}') or '' + 'Authorization': try_get(self._get_cookies('https://www.cinetecamilano.it'), lambda x: f'Bearer {x["cnt-token"].value}') or '', }) except ExtractorError as e: if ((isinstance(e.cause, HTTPError) and e.cause.status == 500) @@ -58,5 +58,5 @@ class CinetecaMilanoIE(InfoExtractor): 'modified_timestamp': parse_iso8601(archive.get('created_at'), delimiter=' '), 'thumbnail': urljoin(url, try_get(archive, lambda x: x['thumb']['src'].replace('/public/', '/storage/'))), 'formats': self._extract_m3u8_formats( - urljoin(url, traverse_obj(archive, ('drm', 'hls'))), video_id, 'mp4') + urljoin(url, traverse_obj(archive, ('drm', 'hls'))), video_id, 'mp4'), } diff --git a/yt_dlp/extractor/cineverse.py b/yt_dlp/extractor/cineverse.py index 4405297c6..c8c6c48c2 100644 --- a/yt_dlp/extractor/cineverse.py +++ b/yt_dlp/extractor/cineverse.py @@ -13,7 +13,7 @@ from ..utils import ( class CineverseBaseIE(InfoExtractor): - _VALID_URL_BASE = r'https?://www\.(?P%s)' % '|'.join(map(re.escape, ( + _VALID_URL_BASE = r'https?://www\.(?P{})'.format('|'.join(map(re.escape, ( 'cineverse.com', 'asiancrush.com', 'dovechannel.com', @@ -21,7 +21,7 @@ class CineverseBaseIE(InfoExtractor): 'midnightpulp.com', 'fandor.com', 'retrocrush.tv', - ))) + )))) class CineverseIE(CineverseBaseIE): @@ -38,7 +38,7 @@ class CineverseIE(CineverseBaseIE): 'duration': 5811.597, 'description': 'md5:892fd62a05611d394141e8394ace0bc6', 'age_limit': 13, - } + }, }, { 'url': 'https://www.retrocrush.tv/watch/1000000023016/Archenemy! Crystal Bowie', 'skip': 'geo-blocked', @@ -55,7 +55,7 @@ class CineverseIE(CineverseBaseIE): 'duration': 1485.067, 'description': 'Cobra meets a beautiful bounty hunter by the name of Jane Royal.', 'series': 'Space Adventure COBRA (Original Japanese)', - } + }, }] def _real_extract(self, url): @@ -104,7 +104,7 @@ class CineverseDetailsIE(CineverseBaseIE): 'info_dict': { 'title': 'Space Adventure COBRA (Original Japanese)', 'id': '1000000023012', - } + }, }, { 'url': 'https://www.asiancrush.com/details/NNVG4938/Hansel-and-Gretel', 'info_dict': { diff --git a/yt_dlp/extractor/ciscolive.py b/yt_dlp/extractor/ciscolive.py index 066857817..1584ca665 100644 --- a/yt_dlp/extractor/ciscolive.py +++ b/yt_dlp/extractor/ciscolive.py @@ -105,7 +105,7 @@ class CiscoLiveSearchIE(CiscoLiveBaseIE): @classmethod def suitable(cls, url): - return False if CiscoLiveSessionIE.suitable(url) else super(CiscoLiveSearchIE, cls).suitable(url) + return False if CiscoLiveSessionIE.suitable(url) else super().suitable(url) @staticmethod def _check_bc_id_exists(rf_item): @@ -117,7 +117,7 @@ class CiscoLiveSearchIE(CiscoLiveBaseIE): for page_num in itertools.count(1): results = self._call_api( 'search', None, query, url, - 'Downloading search JSON page %d' % page_num) + f'Downloading search JSON page {page_num}') sl = try_get(results, lambda x: x['sectionList'][0], dict) if sl: results = sl diff --git a/yt_dlp/extractor/ciscowebex.py b/yt_dlp/extractor/ciscowebex.py index 85585dffb..d39347c82 100644 --- a/yt_dlp/extractor/ciscowebex.py +++ b/yt_dlp/extractor/ciscowebex.py @@ -46,7 +46,7 @@ class CiscoWebexIE(InfoExtractor): headers['accessPwd'] = password stream, urlh = self._download_json_handle( - 'https://%s.webex.com/webappng/api/v1/recordings/%s/stream' % (subdomain, video_id), + f'https://{subdomain}.webex.com/webappng/api/v1/recordings/{video_id}/stream', video_id, headers=headers, query={'siteurl': siteurl}, expected_status=(403, 429)) if urlh.status == 403: @@ -101,6 +101,6 @@ class CiscoWebexIE(InfoExtractor): 'uploader_id': stream.get('ownerUserName') or stream.get('ownerId'), 'timestamp': unified_timestamp(stream.get('createTime')), 'duration': int_or_none(stream.get('duration'), 1000), - 'webpage_url': 'https://%s.webex.com/recordingservice/sites/%s/recording/playback/%s' % (subdomain, siteurl, video_id), + 'webpage_url': f'https://{subdomain}.webex.com/recordingservice/sites/{siteurl}/recording/playback/{video_id}', 'formats': formats, } diff --git a/yt_dlp/extractor/cjsw.py b/yt_dlp/extractor/cjsw.py index c37a3b848..b80236a7e 100644 --- a/yt_dlp/extractor/cjsw.py +++ b/yt_dlp/extractor/cjsw.py @@ -27,7 +27,7 @@ class CJSWIE(InfoExtractor): def _real_extract(self, url): mobj = self._match_valid_url(url) program, episode_id = mobj.group('program', 'id') - audio_id = '%s/%s' % (program, episode_id) + audio_id = f'{program}/{episode_id}' webpage = self._download_webpage(url, episode_id) diff --git a/yt_dlp/extractor/clippit.py b/yt_dlp/extractor/clippit.py index 67b56e00d..393f21730 100644 --- a/yt_dlp/extractor/clippit.py +++ b/yt_dlp/extractor/clippit.py @@ -23,7 +23,7 @@ class ClippitIE(InfoExtractor): 'upload_date': '20160826', 'description': 'BattleBots | ABC', 'thumbnail': r're:^https?://.*\.jpg$', - } + }, } def _real_extract(self, url): @@ -36,7 +36,7 @@ class ClippitIE(InfoExtractor): quality = qualities(FORMATS) formats = [] for format_id in FORMATS: - url = self._html_search_regex(r'data-%s-file="(.+?)"' % format_id, + url = self._html_search_regex(rf'data-{format_id}-file="(.+?)"', webpage, 'url', fatal=False) if not url: continue diff --git a/yt_dlp/extractor/cliprs.py b/yt_dlp/extractor/cliprs.py index c2add02da..42f78cac6 100644 --- a/yt_dlp/extractor/cliprs.py +++ b/yt_dlp/extractor/cliprs.py @@ -15,7 +15,7 @@ class ClipRsIE(OnetBaseIE): 'duration': 229, 'timestamp': 1459850243, 'upload_date': '20160405', - } + }, } def _real_extract(self, url): diff --git a/yt_dlp/extractor/closertotruth.py b/yt_dlp/extractor/closertotruth.py index 1f9a5f611..77469eda9 100644 --- a/yt_dlp/extractor/closertotruth.py +++ b/yt_dlp/extractor/closertotruth.py @@ -15,7 +15,7 @@ class CloserToTruthIE(InfoExtractor): 'title': 'Solutions to the Mind-Body Problem?', 'upload_date': '20140221', 'timestamp': 1392956007, - 'uploader_id': 'CTTXML' + 'uploader_id': 'CTTXML', }, 'params': { 'skip_download': True, @@ -29,7 +29,7 @@ class CloserToTruthIE(InfoExtractor): 'title': 'How do Brains Work?', 'upload_date': '20140221', 'timestamp': 1392956024, - 'uploader_id': 'CTTXML' + 'uploader_id': 'CTTXML', }, 'params': { 'skip_download': True, @@ -69,7 +69,7 @@ class CloserToTruthIE(InfoExtractor): entry_ids.add(entry_id) entries.append({ '_type': 'url_transparent', - 'url': 'kaltura:%s:%s' % (partner_id, entry_id), + 'url': f'kaltura:{partner_id}:{entry_id}', 'ie_key': 'Kaltura', 'title': mobj.group('title'), }) @@ -83,7 +83,7 @@ class CloserToTruthIE(InfoExtractor): return { '_type': 'url_transparent', 'display_id': display_id, - 'url': 'kaltura:%s:%s' % (partner_id, entry_id), + 'url': f'kaltura:{partner_id}:{entry_id}', 'ie_key': 'Kaltura', - 'title': title + 'title': title, } diff --git a/yt_dlp/extractor/cloudflarestream.py b/yt_dlp/extractor/cloudflarestream.py index a812c24af..f902daacf 100644 --- a/yt_dlp/extractor/cloudflarestream.py +++ b/yt_dlp/extractor/cloudflarestream.py @@ -53,7 +53,7 @@ class CloudflareStreamIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) domain = 'bytehighway.net' if 'bytehighway.net/' in url else 'videodelivery.net' - base_url = 'https://%s/%s/' % (domain, video_id) + base_url = f'https://{domain}/{video_id}/' if '.' in video_id: video_id = self._parse_json(base64.urlsafe_b64decode( video_id.split('.')[1] + '==='), video_id)['sub'] diff --git a/yt_dlp/extractor/cloudycdn.py b/yt_dlp/extractor/cloudycdn.py index e6e470e07..58bde4666 100644 --- a/yt_dlp/extractor/cloudycdn.py +++ b/yt_dlp/extractor/cloudycdn.py @@ -22,7 +22,7 @@ class CloudyCDNIE(InfoExtractor): 'upload_date': '20231121', 'title': 'D23-6000-105_cetstud', 'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/660858/placeholder1700589200.jpg', - } + }, }, { 'url': 'https://embed.cloudycdn.services/izm/media/26e_lv-8-5-1', 'md5': '798828a479151e2444d8dcfbec76e482', @@ -34,7 +34,7 @@ class CloudyCDNIE(InfoExtractor): 'thumbnail': 'https://store.cloudycdn.services/tmsp00120/assets/media/488306/placeholder1679423604.jpg', 'duration': 1205, 'upload_date': '20221130', - } + }, }] _WEBPAGE_TESTS = [{ 'url': 'https://www.tavaklase.lv/video/es-esmu-mina-um-2/', @@ -47,7 +47,7 @@ class CloudyCDNIE(InfoExtractor): 'thumbnail': 'https://store.cloudycdn.services/tmsp00120/assets/media/518407/placeholder1678748124.jpg', 'timestamp': 1677181513, 'title': 'LIB-2', - } + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/clubic.py b/yt_dlp/extractor/clubic.py index 716f25969..c908e61a1 100644 --- a/yt_dlp/extractor/clubic.py +++ b/yt_dlp/extractor/clubic.py @@ -18,7 +18,7 @@ class ClubicIE(InfoExtractor): 'title': 'Clubic Week 2.0 : le FBI se lance dans la photo d\u0092identité', 'description': 're:Gueule de bois chez Nokia. Le constructeur a indiqué cette.*', 'thumbnail': r're:^http://img\.clubic\.com/.*\.jpg$', - } + }, }, { 'url': 'http://www.clubic.com/video/video-clubic-week-2-0-apple-iphone-6s-et-plus-mais-surtout-le-pencil-469792.html', 'only_matching': True, @@ -27,7 +27,7 @@ class ClubicIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - player_url = 'http://player.m6web.fr/v1/player/clubic/%s.html' % video_id + player_url = f'http://player.m6web.fr/v1/player/clubic/{video_id}.html' player_page = self._download_webpage(player_url, video_id) config = self._parse_json(self._search_regex( diff --git a/yt_dlp/extractor/clyp.py b/yt_dlp/extractor/clyp.py index 273d0025f..2702427c8 100644 --- a/yt_dlp/extractor/clyp.py +++ b/yt_dlp/extractor/clyp.py @@ -58,13 +58,13 @@ class ClypIE(InfoExtractor): query['token'] = token metadata = self._download_json( - 'https://api.clyp.it/%s' % audio_id, audio_id, query=query) + f'https://api.clyp.it/{audio_id}', audio_id, query=query) formats = [] for secure in ('', 'Secure'): for ext in ('Ogg', 'Mp3'): - format_id = '%s%s' % (secure, ext) - format_url = metadata.get('%sUrl' % format_id) + format_id = f'{secure}{ext}' + format_url = metadata.get(f'{format_id}Url') if format_url: formats.append({ 'url': format_url, diff --git a/yt_dlp/extractor/cmt.py b/yt_dlp/extractor/cmt.py index 6359102aa..8e53b7fbf 100644 --- a/yt_dlp/extractor/cmt.py +++ b/yt_dlp/extractor/cmt.py @@ -1,6 +1,6 @@ from .mtv import MTVIE -# TODO Remove - Reason: Outdated Site +# TODO: Remove - Reason: Outdated Site class CMTIE(MTVIE): # XXX: Do not subclass from concrete IE @@ -52,4 +52,4 @@ class CMTIE(MTVIE): # XXX: Do not subclass from concrete IE video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) mgid = self._extract_mgid(webpage, url) - return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid) + return self.url_result(f'http://media.mtvnservices.com/embed/{mgid}') diff --git a/yt_dlp/extractor/cnn.py b/yt_dlp/extractor/cnn.py index 61b62fae9..fe7615a89 100644 --- a/yt_dlp/extractor/cnn.py +++ b/yt_dlp/extractor/cnn.py @@ -26,7 +26,7 @@ class CNNIE(TurnerBaseIE): 'id': 'us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology', 'ext': 'mp4', 'title': "Student's epic speech stuns new freshmen", - 'description': "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"", + 'description': 'A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from "2001: A Space Odyssey."', 'upload_date': '20130821', }, 'expected_warnings': ['Failed to download m3u8 information'], @@ -161,7 +161,7 @@ class CNNIndonesiaIE(InfoExtractor): 'release_timestamp': 1662859088, 'release_date': '20220911', 'uploader': 'Asfahan Yahsyi', - } + }, }, { 'url': 'https://www.cnnindonesia.com/internasional/20220911104341-139-846189/video-momen-charles-disambut-meriah-usai-dilantik-jadi-raja-inggris', 'info_dict': { @@ -178,7 +178,7 @@ class CNNIndonesiaIE(InfoExtractor): 'release_date': '20220911', 'uploader': 'REUTERS', 'release_timestamp': 1662869995, - } + }, }] def _real_extract(self, url): @@ -194,5 +194,5 @@ class CNNIndonesiaIE(InfoExtractor): '_type': 'url_transparent', 'url': embed_url, 'upload_date': upload_date, - 'tags': try_call(lambda: self._html_search_meta('keywords', webpage).split(', ')) + 'tags': try_call(lambda: self._html_search_meta('keywords', webpage).split(', ')), }) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 1d2c443c0..2799747ec 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -60,7 +60,6 @@ from ..utils import ( determine_ext, dict_get, encode_data_uri, - error_to_compat_str, extract_attributes, filter_dict, fix_xml_ampersands, @@ -767,8 +766,8 @@ class InfoExtractor: self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code) if self._x_forwarded_for_ip: self.report_warning( - 'Video is geo restricted. Retrying extraction with fake IP %s (%s) as X-Forwarded-For.' - % (self._x_forwarded_for_ip, country_code.upper())) + 'Video is geo restricted. Retrying extraction with fake IP ' + f'{self._x_forwarded_for_ip} ({country_code.upper()}) as X-Forwarded-For.') return True return False @@ -841,7 +840,7 @@ class InfoExtractor: if not self._downloader._first_webpage_request: sleep_interval = self.get_param('sleep_interval_requests') or 0 if sleep_interval > 0: - self.to_screen('Sleeping %s seconds ...' % sleep_interval) + self.to_screen(f'Sleeping {sleep_interval} seconds ...') time.sleep(sleep_interval) else: self._downloader._first_webpage_request = False @@ -898,7 +897,7 @@ class InfoExtractor: if errnote is None: errnote = 'Unable to download webpage' - errmsg = f'{errnote}: {error_to_compat_str(err)}' + errmsg = f'{errnote}: {err}' if fatal: raise ExtractorError(errmsg, cause=err) else: @@ -987,7 +986,7 @@ class InfoExtractor: r'