From 7af1ddaaf2a6a0a750373a9ab53c7770af4f9fe4 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 19 Oct 2024 21:40:20 +0000 Subject: [PATCH 01/98] [ie/youtube] Fix `comment_count` extraction (#11274) Authored by: bashonly --- yt_dlp/extractor/youtube.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 6acc42fc0a..f41f57ed16 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -4701,11 +4701,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): headers=self.generate_api_headers(ytcfg=master_ytcfg), note='Downloading initial data API JSON') + COMMENTS_SECTION_IDS = ('comment-item-section', 'engagement-panel-comments-section') info['comment_count'] = traverse_obj(initial_data, ( 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer', 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', ), ( - 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section', + 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] in COMMENTS_SECTION_IDS, 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', ), expected_type=self._get_count, get_all=False) From 3148c1822f66533998278f0a1cf842b9bea1526a Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 19 Oct 2024 21:41:14 +0000 Subject: [PATCH 02/98] [ie/substack] Resolve podcast file extensions (#11275) Closes #4601 Authored by: bashonly --- yt_dlp/extractor/substack.py | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/substack.py b/yt_dlp/extractor/substack.py index 30cb322dc2..b70d40f2ca 100644 --- a/yt_dlp/extractor/substack.py +++ b/yt_dlp/extractor/substack.py @@ -2,7 +2,13 @@ import re import urllib.parse from .common import InfoExtractor -from ..utils import js_to_json, str_or_none, traverse_obj +from ..networking import HEADRequest +from ..utils import ( + determine_ext, + js_to_json, + str_or_none, +) +from ..utils.traversal import traverse_obj class SubstackIE(InfoExtractor): @@ -43,6 +49,19 @@ class SubstackIE(InfoExtractor): 'uploader': "Andrew Zimmern's Spilled Milk ", 'uploader_id': '577659', }, + }, { + # Podcast that needs its file extension resolved to mp3 + 'url': 'https://persuasion1.substack.com/p/summers', + 'md5': '1456a755d46084744facdfac9edf900f', + 'info_dict': { + 'id': '141970405', + 'ext': 'mp3', + 'title': 'Larry Summers on What Went Wrong on Campus', + 'description': 'Yascha Mounk and Larry Summers also discuss the promise and perils of artificial intelligence.', + 'thumbnail': r're:https://substackcdn\.com/image/.+\.jpeg', + 'uploader': 'Persuasion', + 'uploader_id': '61579', + }, }] @classmethod @@ -89,7 +108,15 @@ class SubstackIE(InfoExtractor): post_type = webpage_info['post']['type'] formats, subtitles = [], {} if post_type == 'podcast': - formats, subtitles = [{'url': webpage_info['post']['podcast_url']}], {} + fmt = {'url': webpage_info['post']['podcast_url']} + if not determine_ext(fmt['url'], default_ext=None): + # The redirected format URL expires but the original URL doesn't, + # so we only want to extract the extension from this request + fmt['ext'] = determine_ext(self._request_webpage( + HEADRequest(fmt['url']), display_id, + 'Resolving podcast file extension', + 'Podcast URL is invalid').url) + formats.append(fmt) elif post_type == 'video': formats, subtitles = self._extract_video_formats(webpage_info['post']['videoUpload']['id'], canonical_url) else: From 679c68240a26481ea7c07cc0c014745631ea8481 Mon Sep 17 00:00:00 2001 From: rubyevadestaxes <147743127+rubyevadestaxes@users.noreply.github.com> Date: Sat, 19 Oct 2024 23:51:47 +0200 Subject: [PATCH 03/98] [ie/twitter:spaces] Allow extraction when not logged in (#11289) Closes #11288 Authored by: rubyevadestaxes --- yt_dlp/extractor/twitter.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py index aca94df2dd..5adaf16393 100644 --- a/yt_dlp/extractor/twitter.py +++ b/yt_dlp/extractor/twitter.py @@ -934,14 +934,13 @@ class TwitterIE(TwitterBaseIE): 'uploader_id': 'MoniqueCamarra', 'live_status': 'was_live', 'release_timestamp': 1658417414, - 'description': 'md5:acce559345fd49f129c20dbcda3f1201', + 'description': r're:Twitter Space participated by Sergej Sumlenny.+', 'timestamp': 1658407771, 'release_date': '20220721', 'upload_date': '20220721', }, 'add_ie': ['TwitterSpaces'], 'params': {'skip_download': 'm3u8'}, - 'skip': 'Requires authentication', }, { # URL specifies video number but --yes-playlist 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1', @@ -1856,8 +1855,6 @@ class TwitterSpacesIE(TwitterBaseIE): def _real_extract(self, url): space_id = self._match_id(url) - if not self.is_logged_in: - self.raise_login_required('Twitter Spaces require authentication') space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace'] if not space_data: raise ExtractorError('Twitter Space not found', expected=True) From 8de431ec97a4b62b73df8f686b6e21e462775336 Mon Sep 17 00:00:00 2001 From: sepro Date: Sun, 20 Oct 2024 15:18:15 +0200 Subject: [PATCH 04/98] [ie/Funk] Extend `_VALID_URL` (#11269) Authored by: seproDev --- yt_dlp/extractor/funk.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/funk.py b/yt_dlp/extractor/funk.py index 8bdea3fce7..ef8ea72a8c 100644 --- a/yt_dlp/extractor/funk.py +++ b/yt_dlp/extractor/funk.py @@ -3,7 +3,7 @@ from .nexx import NexxIE class FunkIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.|origin\.)?funk\.net/(?:channel|playlist)/[^/]+/(?P[0-9a-z-]+)-(?P\d+)' + _VALID_URL = r'https?://(?:(?:www|origin|play)\.)?funk\.net/(?:channel|playlist)/[^/?#]+/(?P[0-9a-z-]+)-(?P\d+)' _TESTS = [{ 'url': 'https://www.funk.net/channel/ba-793/die-lustigsten-instrumente-aus-dem-internet-teil-2-1155821', 'md5': '8610449476156f338761a75391b0017d', @@ -27,6 +27,9 @@ class FunkIE(InfoExtractor): }, { 'url': 'https://www.funk.net/playlist/neuesteVideos/kameras-auf-dem-fusion-festival-1618699', 'only_matching': True, + }, { + 'url': 'https://play.funk.net/playlist/neuesteVideos/george-floyd-wenn-die-polizei-toetet-der-fall-2004391', + 'only_matching': True, }] def _real_extract(self, url): From 0f593dca9fa995d88eb763170a932da61c8f24dc Mon Sep 17 00:00:00 2001 From: Imran Hussain Date: Sun, 20 Oct 2024 18:10:26 +0100 Subject: [PATCH 05/98] Add option `--plugin-dirs` (#11277) Closes #3260 Authored by: imranh2, coletdjnz Co-authored-by: coletdjnz --- README.md | 7 +++++++ test/test_plugins.py | 19 +++++++++++++++++++ .../yt_dlp_plugins/extractor/package.py | 5 +++++ yt_dlp/__init__.py | 5 +++++ yt_dlp/options.py | 8 ++++++++ yt_dlp/plugins.py | 7 +++++++ yt_dlp/utils/_utils.py | 4 ++++ 7 files changed, 55 insertions(+) create mode 100644 test/testdata/plugin_packages/testpackage/yt_dlp_plugins/extractor/package.py diff --git a/README.md b/README.md index 1cafe51d51..fc38a529a7 100644 --- a/README.md +++ b/README.md @@ -348,6 +348,13 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git containing directory ("-" for stdin). Can be used multiple times and inside other configuration files + --plugin-dirs PATH Path to an additional directory to search + for plugins. This option can be used + multiple times to add multiple directories. + Note that this currently only works for + extractor plugins; postprocessor plugins can + only be loaded from the default plugin + directories --flat-playlist Do not extract the videos of a playlist, only list them --no-flat-playlist Fully extract the videos of a playlist diff --git a/test/test_plugins.py b/test/test_plugins.py index c82158e9fc..77545d136c 100644 --- a/test/test_plugins.py +++ b/test/test_plugins.py @@ -10,6 +10,7 @@ TEST_DATA_DIR = Path(os.path.dirname(os.path.abspath(__file__)), 'testdata') sys.path.append(str(TEST_DATA_DIR)) importlib.invalidate_caches() +from yt_dlp.utils import Config from yt_dlp.plugins import PACKAGE_NAME, directories, load_plugins @@ -68,6 +69,24 @@ class TestPlugins(unittest.TestCase): os.remove(zip_path) importlib.invalidate_caches() # reset the import caches + def test_plugin_dirs(self): + # Internal plugin dirs hack for CLI --plugin-dirs + # To be replaced with proper system later + custom_plugin_dir = TEST_DATA_DIR / 'plugin_packages' + Config._plugin_dirs = [str(custom_plugin_dir)] + importlib.invalidate_caches() # reset the import caches + + try: + package = importlib.import_module(f'{PACKAGE_NAME}.extractor') + self.assertIn(custom_plugin_dir / 'testpackage' / PACKAGE_NAME / 'extractor', map(Path, package.__path__)) + + plugins_ie = load_plugins('extractor', 'IE') + self.assertIn('PackagePluginIE', plugins_ie.keys()) + + finally: + Config._plugin_dirs = [] + importlib.invalidate_caches() # reset the import caches + if __name__ == '__main__': unittest.main() diff --git a/test/testdata/plugin_packages/testpackage/yt_dlp_plugins/extractor/package.py b/test/testdata/plugin_packages/testpackage/yt_dlp_plugins/extractor/package.py new file mode 100644 index 0000000000..b860300d8d --- /dev/null +++ b/test/testdata/plugin_packages/testpackage/yt_dlp_plugins/extractor/package.py @@ -0,0 +1,5 @@ +from yt_dlp.extractor.common import InfoExtractor + + +class PackagePluginIE(InfoExtractor): + pass diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index f598b6c2fe..d976f5bbcb 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -34,6 +34,7 @@ from .postprocessor import ( ) from .update import Updater from .utils import ( + Config, NO_DEFAULT, POSTPROCESS_WHEN, DateRange, @@ -967,6 +968,10 @@ def _real_main(argv=None): parser, opts, all_urls, ydl_opts = parse_options(argv) + # HACK: Set the plugin dirs early on + # TODO(coletdjnz): remove when plugin globals system is implemented + Config._plugin_dirs = opts.plugin_dirs + # Dump user agent if opts.dump_user_agent: ua = traverse_obj(opts.headers, 'User-Agent', casesense=False, default=std_headers['User-Agent']) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 9980b7fc3f..c3a647da77 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -408,6 +408,14 @@ def create_parser(): help=( 'Location of the main configuration file; either the path to the config or its containing directory ' '("-" for stdin). Can be used multiple times and inside other configuration files')) + general.add_option( + '--plugin-dirs', + dest='plugin_dirs', metavar='PATH', action='append', + help=( + 'Path to an additional directory to search for plugins. ' + 'This option can be used multiple times to add multiple directories. ' + 'Note that this currently only works for extractor plugins; ' + 'postprocessor plugins can only be loaded from the default plugin directories')) general.add_option( '--flat-playlist', action='store_const', dest='extract_flat', const='in_playlist', default=False, diff --git a/yt_dlp/plugins.py b/yt_dlp/plugins.py index d777d14e71..204558d603 100644 --- a/yt_dlp/plugins.py +++ b/yt_dlp/plugins.py @@ -15,6 +15,7 @@ from zipfile import ZipFile from .compat import functools # isort: split from .utils import ( + Config, get_executable_path, get_system_config_dirs, get_user_config_dirs, @@ -84,6 +85,12 @@ class PluginFinder(importlib.abc.MetaPathFinder): with contextlib.suppress(ValueError): # Added when running __main__.py directly candidate_locations.remove(Path(__file__).parent) + # TODO(coletdjnz): remove when plugin globals system is implemented + if Config._plugin_dirs: + candidate_locations.extend(_get_package_paths( + *Config._plugin_dirs, + containing_folder='')) + parts = Path(*fullname.split('.')) for path in orderedSet(candidate_locations, lazy=True): candidate = path / parts diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 27ebfefbcb..ea748898f2 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -4897,6 +4897,10 @@ class Config: filename = None __initialized = False + # Internal only, do not use! Hack to enable --plugin-dirs + # TODO(coletdjnz): remove when plugin globals system is implemented + _plugin_dirs = None + def __init__(self, parser, label=None): self.parser, self.label = parser, label self._loaded_paths, self.configs = set(), [] From 5af774d7a36c00bea618c7047c9326532cd3f616 Mon Sep 17 00:00:00 2001 From: Deer-Spangle <60626596+Deer-Spangle@users.noreply.github.com> Date: Sun, 20 Oct 2024 21:58:53 +0100 Subject: [PATCH 06/98] [ie/imgur] Support new URL format (#11075) Authored by: Deer-Spangle --- yt_dlp/extractor/imgur.py | 68 ++++++++++++++++++++++++++++++++------- 1 file changed, 56 insertions(+), 12 deletions(-) diff --git a/yt_dlp/extractor/imgur.py b/yt_dlp/extractor/imgur.py index f0c3419d49..2a5a1c9e84 100644 --- a/yt_dlp/extractor/imgur.py +++ b/yt_dlp/extractor/imgur.py @@ -37,7 +37,7 @@ class ImgurBaseIE(InfoExtractor): class ImgurIE(ImgurBaseIE): - _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|t|topic|r)/)(?P[a-zA-Z0-9]+)' + _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|t|topic|r)/)(?:[^/?#]+-)?(?P[a-zA-Z0-9]+)' _TESTS = [{ 'url': 'https://imgur.com/A61SaA1', @@ -54,6 +54,22 @@ class ImgurIE(ImgurBaseIE): 'like_count': int, 'thumbnail': 'https://i.imgur.com/A61SaA1h.jpg', }, + }, { + # Test with URL slug + 'url': 'https://imgur.com/mrw-gifv-is-up-running-without-any-bugs-A61SaA1', + 'info_dict': { + 'id': 'A61SaA1', + 'ext': 'mp4', + 'title': 'MRW gifv is up and running without any bugs', + 'timestamp': 1416446068, + 'upload_date': '20141120', + 'dislike_count': int, + 'comment_count': int, + 'release_timestamp': 1416446068, + 'release_date': '20141120', + 'like_count': int, + 'thumbnail': 'https://i.imgur.com/A61SaA1h.jpg', + }, }, { 'url': 'https://i.imgur.com/A61SaA1.gifv', 'only_matching': True, @@ -92,6 +108,7 @@ class ImgurIE(ImgurBaseIE): 'comment_count': int, 'release_timestamp': 1710491255, 'release_date': '20240315', + 'thumbnail': 'https://i.imgur.com/zV03bd5h.jpg', }, }] @@ -252,17 +269,9 @@ class ImgurGalleryBaseIE(ImgurBaseIE): class ImgurGalleryIE(ImgurGalleryBaseIE): IE_NAME = 'imgur:gallery' - _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/?#]+)/(?P[a-zA-Z0-9]+)' + _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/?#]+)/(?:[^/?#]+-)?(?P[a-zA-Z0-9]+)' _TESTS = [{ - 'url': 'http://imgur.com/gallery/Q95ko', - 'info_dict': { - 'id': 'Q95ko', - 'title': 'Adding faces make every GIF better', - }, - 'playlist_count': 25, - 'skip': 'Zoinks! You\'ve taken a wrong turn.', - }, { # TODO: static images - replace with animated/video gallery 'url': 'http://imgur.com/topic/Aww/ll5Vk', 'only_matching': True, @@ -280,7 +289,27 @@ class ImgurGalleryIE(ImgurGalleryBaseIE): 'release_timestamp': 1358554297, 'thumbnail': 'https://i.imgur.com/YcAQlkxh.jpg', 'release_date': '20130119', - 'uploader_url': 'https://i.imgur.com/u3R4I2S_d.png?maxwidth=290&fidelity=grand', + 'uploader_url': 'https://i.imgur.com/N5Flb2v_d.png?maxwidth=290&fidelity=grand', + 'comment_count': int, + 'dislike_count': int, + 'like_count': int, + }, + }, { + # Test with slug + 'url': 'https://imgur.com/gallery/classic-steve-carell-gif-cracks-me-up-everytime-repost-downvotes-YcAQlkx', + 'add_ies': ['Imgur'], + 'info_dict': { + 'id': 'YcAQlkx', + 'ext': 'mp4', + 'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....', + 'timestamp': 1358554297, + 'upload_date': '20130119', + 'uploader_id': '1648642', + 'uploader': 'wittyusernamehere', + 'release_timestamp': 1358554297, + 'release_date': '20130119', + 'thumbnail': 'https://i.imgur.com/YcAQlkxh.jpg', + 'uploader_url': 'https://i.imgur.com/N5Flb2v_d.png?maxwidth=290&fidelity=grand', 'comment_count': int, 'dislike_count': int, 'like_count': int, @@ -317,6 +346,13 @@ class ImgurGalleryIE(ImgurGalleryBaseIE): 'title': 'Penguins !', }, 'playlist_count': 3, + }, { + 'url': 'https://imgur.com/t/unmuted/penguins-penguins-6lAn9VQ', + 'info_dict': { + 'id': '6lAn9VQ', + 'title': 'Penguins !', + }, + 'playlist_count': 3, }, { 'url': 'https://imgur.com/t/unmuted/kx2uD3C', 'add_ies': ['Imgur'], @@ -357,7 +393,7 @@ class ImgurGalleryIE(ImgurGalleryBaseIE): class ImgurAlbumIE(ImgurGalleryBaseIE): IE_NAME = 'imgur:album' - _VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?P[a-zA-Z0-9]+)' + _VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?:[^/?#]+-)?(?P[a-zA-Z0-9]+)' _GALLERY = False _TESTS = [{ # TODO: only static images - replace with animated/video gallery @@ -372,6 +408,14 @@ class ImgurAlbumIE(ImgurGalleryBaseIE): 'title': 'enen-no-shouboutai', }, 'playlist_count': 2, + }, { + # Test with URL slug + 'url': 'https://imgur.com/a/enen-no-shouboutai-iX265HX', + 'info_dict': { + 'id': 'iX265HX', + 'title': 'enen-no-shouboutai', + }, + 'playlist_count': 2, }, { 'url': 'https://imgur.com/a/8pih2Ed', 'info_dict': { From c4d95f67ddc522297bb1fea875255cf94b34d595 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Kotiuk?= Date: Sun, 20 Oct 2024 23:16:22 +0200 Subject: [PATCH 07/98] [ie/cda] Support folders (#10786) Closes #5429 Authored by: pktiuk --- yt_dlp/extractor/_extractors.py | 5 +++- yt_dlp/extractor/cda.py | 48 +++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 4b1f4c316d..8d59360949 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -363,7 +363,10 @@ from .ccc import ( ) from .ccma import CCMAIE from .cctv import CCTVIE -from .cda import CDAIE +from .cda import ( + CDAIE, + CDAFolderIE, +) from .cellebrite import CellebriteIE from .ceskatelevize import CeskaTelevizeIE from .cgtn import CGTNIE diff --git a/yt_dlp/extractor/cda.py b/yt_dlp/extractor/cda.py index 62ee8b17f1..b2738e492f 100644 --- a/yt_dlp/extractor/cda.py +++ b/yt_dlp/extractor/cda.py @@ -12,6 +12,7 @@ from .common import InfoExtractor from ..compat import compat_ord from ..utils import ( ExtractorError, + OnDemandPagedList, float_or_none, int_or_none, merge_dicts, @@ -351,3 +352,50 @@ class CDAIE(InfoExtractor): extract_format(webpage, resolution) return merge_dicts(info_dict, info) + + +class CDAFolderIE(InfoExtractor): + _MAX_PAGE_SIZE = 36 + _VALID_URL = r'https?://(?:www\.)?cda\.pl/(?P\w+)/folder/(?P\d+)' + _TESTS = [ + { + 'url': 'https://www.cda.pl/domino264/folder/31188385', + 'info_dict': { + 'id': '31188385', + 'title': 'SERIA DRUGA', + }, + 'playlist_mincount': 13, + }, + { + 'url': 'https://www.cda.pl/smiechawaTV/folder/2664592/vfilm', + 'info_dict': { + 'id': '2664592', + 'title': 'VideoDowcipy - wszystkie odcinki', + }, + 'playlist_mincount': 71, + }, + { + 'url': 'https://www.cda.pl/DeliciousBeauty/folder/19129979/vfilm', + 'info_dict': { + 'id': '19129979', + 'title': 'TESTY KOSMETYKÓW', + }, + 'playlist_mincount': 139, + }] + + def _real_extract(self, url): + folder_id, channel = self._match_valid_url(url).group('id', 'channel') + + webpage = self._download_webpage(url, folder_id) + + def extract_page_entries(page): + webpage = self._download_webpage( + f'https://www.cda.pl/{channel}/folder/{folder_id}/vfilm/{page + 1}', folder_id, + f'Downloading page {page + 1}', expected_status=404) + items = re.findall(r']+href="/video/([0-9a-z]+)"', webpage) + for video_id in items: + yield self.url_result(f'https://www.cda.pl/video/{video_id}', CDAIE, video_id) + + return self.playlist_result( + OnDemandPagedList(extract_page_entries, self._MAX_PAGE_SIZE), + folder_id, self._og_search_title(webpage)) From 87408ccfd772ddf31a8323d8151c24f9577cbc9f Mon Sep 17 00:00:00 2001 From: sepro Date: Sun, 20 Oct 2024 23:18:11 +0200 Subject: [PATCH 08/98] [ie/imgur] Fix thumbnail extraction (#11298) Authored by: seproDev --- yt_dlp/extractor/imgur.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/imgur.py b/yt_dlp/extractor/imgur.py index 2a5a1c9e84..e2644e6a40 100644 --- a/yt_dlp/extractor/imgur.py +++ b/yt_dlp/extractor/imgur.py @@ -225,7 +225,10 @@ class ImgurIE(ImgurBaseIE): }), get_all=False), 'id': video_id, 'formats': formats, - 'thumbnail': url_or_none(search('thumbnailUrl')), + 'thumbnails': [{ + 'url': thumbnail_url, + 'http_headers': {'Accept': '*/*'}, + }] if (thumbnail_url := search(['thumbnailUrl', 'twitter:image', 'og:image'])) else None, 'http_headers': {'Accept': '*/*'}, } From ec2f4bf0823a13043f98f5bd0bf6677837bf09dc Mon Sep 17 00:00:00 2001 From: bashonly Date: Sun, 20 Oct 2024 17:25:29 -0500 Subject: [PATCH 09/98] [ie/youtube] Remove broken age-restriction workaround (#11297) Closes #11296 Authored by: bashonly --- yt_dlp/extractor/youtube.py | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index f41f57ed16..60492fff9f 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -114,6 +114,7 @@ INNERTUBE_CLIENTS = { }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 67, }, + # This client now requires sign-in for every video 'web_creator': { 'INNERTUBE_CONTEXT': { 'client': { @@ -153,6 +154,7 @@ INNERTUBE_CLIENTS = { 'REQUIRE_JS_PLAYER': False, 'REQUIRE_PO_TOKEN': True, }, + # This client now requires sign-in for every video 'android_creator': { 'INNERTUBE_CONTEXT': { 'client': { @@ -201,6 +203,7 @@ INNERTUBE_CLIENTS = { 'PLAYER_PARAMS': '2AMB', }, # This client only has legacy formats and storyboards + # BROKEN: Unable to download API page: HTTP Error 403: Forbidden "The caller does not have permission" 'android_producer': { 'INNERTUBE_CONTEXT': { 'client': { @@ -247,6 +250,7 @@ INNERTUBE_CLIENTS = { 'INNERTUBE_CONTEXT_CLIENT_NAME': 26, 'REQUIRE_JS_PLAYER': False, }, + # This client now requires sign-in for every video 'ios_creator': { 'INNERTUBE_CONTEXT': { 'client': { @@ -282,8 +286,9 @@ INNERTUBE_CLIENTS = { }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 7, }, - # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option) - # See: https://github.com/zerodytrash/YouTube-Internal-Clients + # This client now requires sign-in for every video + # It was previously an age-gate workaround for videos that were `playable_in_embed` + # It may still be useful if signed into an EU account that is not age-verified 'tv_embedded': { 'INNERTUBE_CONTEXT': { 'client': { @@ -1525,6 +1530,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'heatmap': 'count:100', 'timestamp': 1401991663, }, + 'skip': 'Age-restricted; requires authentication', }, { 'note': 'Age-gate video with embed allowed in public site', @@ -1555,6 +1561,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'comment_count': int, 'channel_is_verified': True, }, + 'skip': 'Age-restricted; requires authentication', }, { 'note': 'Age-gate video embedable only with clientScreen=EMBED', @@ -1585,6 +1592,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': '@ProjektMelody', 'timestamp': 1577508724, }, + 'skip': 'Age-restricted; requires authentication', }, { 'note': 'Non-Agegated non-embeddable video', @@ -2356,6 +2364,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'channel_is_verified': True, 'timestamp': 1405513526, }, + 'skip': 'Age-restricted; requires authentication', }, { # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685 @@ -2726,6 +2735,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'timestamp': 1577508724, }, 'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'}, + 'skip': 'Age-restricted; requires authentication', }, { 'url': 'https://www.youtube.com/live/qVv6vCqciTM', @@ -3953,26 +3963,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): else: prs.append(pr) - # tv_embedded can work around age-gate and age-verification IF the video is embeddable - if self._is_agegated(pr) and variant != 'tv_embedded': - append_client(f'tv_embedded.{base_client}') - - # Unauthenticated users will only get tv_embedded client formats if age-gated - if self._is_agegated(pr) and not self.is_authenticated: - self.to_screen( - f'{video_id}: This video is age-restricted; some formats may be missing ' - f'without authentication. {self._login_hint()}', only_once=True) - # EU countries require age-verification for accounts to access age-restricted videos # If account is not age-verified, _is_agegated() will be truthy for non-embedded clients - # If embedding is disabled for the video, _is_unplayable() will be truthy for tv_embedded - embedding_is_disabled = variant == 'tv_embedded' and self._is_unplayable(pr) - if self.is_authenticated and (self._is_agegated(pr) or embedding_is_disabled): + if self.is_authenticated and self._is_agegated(pr): self.to_screen( f'{video_id}: This video is age-restricted and YouTube is requiring ' 'account age-verification; some formats may be missing', only_once=True) # web_creator and mediaconnect can work around the age-verification requirement - # _producer, _testsuite, & _vr variants can also work around age-verification + # _testsuite & _vr variants can also work around age-verification + # tv_embedded may(?) still work around age-verification if the video is embeddable append_client('web_creator', 'mediaconnect') prs.extend(deprioritized_prs) From fed53d70bdb7d3e37ef63dd7fcf0ef74356167fd Mon Sep 17 00:00:00 2001 From: bashonly Date: Sun, 20 Oct 2024 14:49:00 -0500 Subject: [PATCH 10/98] [ie/youtube] Remove broken `android_producer` client (#11297) Authored by: bashonly --- yt_dlp/extractor/youtube.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 60492fff9f..728cb06966 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -202,22 +202,6 @@ INNERTUBE_CLIENTS = { 'REQUIRE_JS_PLAYER': False, 'PLAYER_PARAMS': '2AMB', }, - # This client only has legacy formats and storyboards - # BROKEN: Unable to download API page: HTTP Error 403: Forbidden "The caller does not have permission" - 'android_producer': { - 'INNERTUBE_CONTEXT': { - 'client': { - 'clientName': 'ANDROID_PRODUCER', - 'clientVersion': '0.111.1', - 'androidSdkVersion': 30, - 'userAgent': 'com.google.android.apps.youtube.producer/0.111.1 (Linux; U; Android 11) gzip', - 'osName': 'Android', - 'osVersion': '11', - }, - }, - 'INNERTUBE_CONTEXT_CLIENT_NAME': 91, - 'REQUIRE_JS_PLAYER': False, - }, # iOS clients have HLS live streams. Setting device model to get 60fps formats. # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558 'ios': { From 40054cb4a7ebbea30d335d444e6f58b298a3baa0 Mon Sep 17 00:00:00 2001 From: David Skrundz Date: Mon, 21 Oct 2024 12:56:43 -0600 Subject: [PATCH 11/98] [ie/gem.cbc.ca] Fix formats extraction (#11196) Also extracts `timestamp` and `release_timestamp` as seconds instead of milliseconds Authored by: DavidSkrundz --- yt_dlp/extractor/cbc.py | 82 +++++++++++++---------------------------- 1 file changed, 26 insertions(+), 56 deletions(-) diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py index 40224f63f5..b44c23fa10 100644 --- a/yt_dlp/extractor/cbc.py +++ b/yt_dlp/extractor/cbc.py @@ -4,7 +4,6 @@ import json import re import time import urllib.parse -import xml.etree.ElementTree from .common import InfoExtractor from ..networking import HEADRequest @@ -12,7 +11,6 @@ from ..utils import ( ExtractorError, float_or_none, int_or_none, - join_nonempty, js_to_json, mimetype2ext, orderedSet, @@ -524,14 +522,13 @@ class CBCGemIE(InfoExtractor): _TESTS = [{ # This is a normal, public, TV show video 'url': 'https://gem.cbc.ca/media/schitts-creek/s06e01', - 'md5': '93dbb31c74a8e45b378cf13bd3f6f11e', 'info_dict': { 'id': 'schitts-creek/s06e01', 'ext': 'mp4', 'title': 'Smoke Signals', 'description': 'md5:929868d20021c924020641769eb3e7f1', - 'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/episode/perso/cbc_schitts_creek_season_06e01_thumbnail_v01.jpg?im=Resize=(Size)', - 'duration': 1314, + 'thumbnail': r're:https://images\.radio-canada\.ca/[^#?]+/cbc_schitts_creek_season_06e01_thumbnail_v01\.jpg', + 'duration': 1324, 'categories': ['comedy'], 'series': 'Schitt\'s Creek', 'season': 'Season 6', @@ -539,19 +536,21 @@ class CBCGemIE(InfoExtractor): 'episode': 'Smoke Signals', 'episode_number': 1, 'episode_id': 'schitts-creek/s06e01', + 'upload_date': '20210618', + 'timestamp': 1623988800, + 'release_date': '20200107', + 'release_timestamp': 1578427200, }, 'params': {'format': 'bv'}, - 'skip': 'Geo-restricted to Canada', }, { # This video requires an account in the browser, but works fine in yt-dlp 'url': 'https://gem.cbc.ca/media/schitts-creek/s01e01', - 'md5': '297a9600f554f2258aed01514226a697', 'info_dict': { 'id': 'schitts-creek/s01e01', 'ext': 'mp4', 'title': 'The Cup Runneth Over', 'description': 'md5:9bca14ea49ab808097530eb05a29e797', - 'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/episode/perso/cbc_schitts_creek_season_01e01_thumbnail_v01.jpg?im=Resize=(Size)', + 'thumbnail': r're:https://images\.radio-canada\.ca/[^#?]+/cbc_schitts_creek_season_01e01_thumbnail_v01\.jpg', 'series': 'Schitt\'s Creek', 'season_number': 1, 'season': 'Season 1', @@ -560,9 +559,12 @@ class CBCGemIE(InfoExtractor): 'episode_id': 'schitts-creek/s01e01', 'duration': 1309, 'categories': ['comedy'], + 'upload_date': '20210617', + 'timestamp': 1623902400, + 'release_date': '20151124', + 'release_timestamp': 1448323200, }, 'params': {'format': 'bv'}, - 'skip': 'Geo-restricted to Canada', }, { 'url': 'https://gem.cbc.ca/nadiyas-family-favourites/s01e01', 'only_matching': True, @@ -631,38 +633,6 @@ class CBCGemIE(InfoExtractor): return self._claims_token = self.cache.load(self._NETRC_MACHINE, 'claims_token') - def _find_secret_formats(self, formats, video_id): - """ Find a valid video url and convert it to the secret variant """ - base_format = next((f for f in formats if f.get('vcodec') != 'none'), None) - if not base_format: - return - - base_url = re.sub(r'(Manifest\(.*?),filter=[\w-]+(.*?\))', r'\1\2', base_format['url']) - url = re.sub(r'(Manifest\(.*?),format=[\w-]+(.*?\))', r'\1\2', base_url) - - secret_xml = self._download_xml(url, video_id, note='Downloading secret XML', fatal=False) - if not isinstance(secret_xml, xml.etree.ElementTree.Element): - return - - for child in secret_xml: - if child.attrib.get('Type') != 'video': - continue - for video_quality in child: - bitrate = int_or_none(video_quality.attrib.get('Bitrate')) - if not bitrate or 'Index' not in video_quality.attrib: - continue - height = int_or_none(video_quality.attrib.get('MaxHeight')) - - yield { - **base_format, - 'format_id': join_nonempty('sec', height), - # Note: \g<1> is necessary instead of \1 since bitrate is a number - 'url': re.sub(r'(QualityLevels\()\d+(\))', fr'\g<1>{bitrate}\2', base_url), - 'width': int_or_none(video_quality.attrib.get('MaxWidth')), - 'tbr': bitrate / 1000.0, - 'height': height, - } - def _real_extract(self, url): video_id = self._match_id(url) video_info = self._download_json( @@ -676,7 +646,6 @@ class CBCGemIE(InfoExtractor): else: headers = {} m3u8_info = self._download_json(video_info['playSession']['url'], video_id, headers=headers) - m3u8_url = m3u8_info.get('url') if m3u8_info.get('errorCode') == 1: self.raise_geo_restricted(countries=['CA']) @@ -685,9 +654,9 @@ class CBCGemIE(InfoExtractor): elif m3u8_info.get('errorCode') != 0: raise ExtractorError(f'{self.IE_NAME} said: {m3u8_info.get("errorCode")} - {m3u8_info.get("message")}') - formats = self._extract_m3u8_formats(m3u8_url, video_id, m3u8_id='hls') + formats = self._extract_m3u8_formats( + m3u8_info['url'], video_id, 'mp4', m3u8_id='hls', query={'manifestType': ''}) self._remove_duplicate_formats(formats) - formats.extend(self._find_secret_formats(formats, video_id)) for fmt in formats: if fmt.get('vcodec') == 'none': @@ -703,20 +672,21 @@ class CBCGemIE(InfoExtractor): return { 'id': video_id, - 'title': video_info['title'], - 'description': video_info.get('description'), - 'thumbnail': video_info.get('image'), - 'series': video_info.get('series'), - 'season_number': video_info.get('season'), - 'season': f'Season {video_info.get("season")}', - 'episode_number': video_info.get('episode'), - 'episode': video_info.get('title'), 'episode_id': video_id, - 'duration': video_info.get('duration'), - 'categories': [video_info.get('category')], 'formats': formats, - 'release_timestamp': video_info.get('airDate'), - 'timestamp': video_info.get('availableDate'), + **traverse_obj(video_info, { + 'title': ('title', {str}), + 'episode': ('title', {str}), + 'description': ('description', {str}), + 'thumbnail': ('image', {url_or_none}), + 'series': ('series', {str}), + 'season_number': ('season', {int_or_none}), + 'episode_number': ('episode', {int_or_none}), + 'duration': ('duration', {int_or_none}), + 'categories': ('category', {str}, all), + 'release_timestamp': ('airDate', {int_or_none(scale=1000)}), + 'timestamp': ('availableDate', {int_or_none(scale=1000)}), + }), } From 0b7ec08816fb196cd41d392f8331b4eb8366c4f8 Mon Sep 17 00:00:00 2001 From: DarkZeros Date: Mon, 21 Oct 2024 22:18:12 +0100 Subject: [PATCH 12/98] [ie/telecinco] Fix extractors (#11142) Closes #10986, Closes #11106 Authored by: DarkZeros, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> --- yt_dlp/extractor/mitele.py | 26 ++++++- yt_dlp/extractor/telecinco.py | 140 +++++++++++++++++++--------------- 2 files changed, 103 insertions(+), 63 deletions(-) diff --git a/yt_dlp/extractor/mitele.py b/yt_dlp/extractor/mitele.py index ea29986729..3573a2a3fd 100644 --- a/yt_dlp/extractor/mitele.py +++ b/yt_dlp/extractor/mitele.py @@ -1,14 +1,13 @@ -from .telecinco import TelecincoIE +from .telecinco import TelecincoBaseIE from ..utils import ( int_or_none, parse_iso8601, ) -class MiTeleIE(TelecincoIE): # XXX: Do not subclass from concrete IE +class MiTeleIE(TelecincoBaseIE): IE_DESC = 'mitele.es' _VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P[^/]+)/player' - _TESTS = [{ 'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player', 'info_dict': { @@ -27,6 +26,7 @@ class MiTeleIE(TelecincoIE): # XXX: Do not subclass from concrete IE 'timestamp': 1471209401, 'upload_date': '20160814', }, + 'skip': 'HTTP Error 404 Not Found', }, { # no explicit title 'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player', @@ -49,6 +49,26 @@ class MiTeleIE(TelecincoIE): # XXX: Do not subclass from concrete IE 'params': { 'skip_download': True, }, + 'skip': 'HTTP Error 404 Not Found', + }, { + 'url': 'https://www.mitele.es/programas-tv/horizonte/temporada-5/programa-171-40_013480051/player/', + 'info_dict': { + 'id': '7adbe22e-cd41-4787-afa4-36f3da7c2c6f', + 'ext': 'mp4', + 'title': 'Horizonte Temporada 5 Programa 171', + 'description': 'md5:97f1fb712c5ac27e5693a8b3c5c0c6e3', + 'episode': 'Las Zonas de Bajas Emisiones, a debate', + 'episode_number': 171, + 'season': 'Season 5', + 'season_number': 5, + 'series': 'Horizonte', + 'duration': 7012, + 'upload_date': '20240927', + 'timestamp': 1727416450, + 'thumbnail': 'https://album.mediaset.es/eimg/2024/09/27/horizonte-171_9f02.jpg', + 'age_limit': 12, + }, + 'params': {'geo_bypass_country': 'ES'}, }, { 'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player', 'only_matching': True, diff --git a/yt_dlp/extractor/telecinco.py b/yt_dlp/extractor/telecinco.py index 7a9dcd71c5..9ef621446d 100644 --- a/yt_dlp/extractor/telecinco.py +++ b/yt_dlp/extractor/telecinco.py @@ -2,15 +2,69 @@ import json import re from .common import InfoExtractor +from ..networking.exceptions import HTTPError from ..utils import ( + ExtractorError, clean_html, int_or_none, + join_nonempty, str_or_none, - try_get, + traverse_obj, + update_url, + url_or_none, ) -class TelecincoIE(InfoExtractor): +class TelecincoBaseIE(InfoExtractor): + def _parse_content(self, content, url): + video_id = content['dataMediaId'] + config = self._download_json( + content['dataConfig'], video_id, 'Downloading config JSON') + services = config['services'] + caronte = self._download_json(services['caronte'], video_id) + if traverse_obj(caronte, ('dls', 0, 'drm', {bool})): + self.report_drm(video_id) + + stream = caronte['dls'][0]['stream'] + headers = { + 'Referer': url, + 'Origin': re.match(r'https?://[^/]+', url).group(0), + } + geo_headers = {**headers, **self.geo_verification_headers()} + + try: + cdn = self._download_json( + caronte['cerbero'], video_id, data=json.dumps({ + 'bbx': caronte['bbx'], + 'gbx': self._download_json(services['gbx'], video_id)['gbx'], + }).encode(), headers={ + 'Content-Type': 'application/json', + **geo_headers, + })['tokens']['1']['cdn'] + except ExtractorError as error: + if isinstance(error.cause, HTTPError) and error.cause.status == 403: + error_code = traverse_obj( + self._webpage_read_content(error.cause.response, caronte['cerbero'], video_id, fatal=False), + ({json.loads}, 'code', {int})) + if error_code == 4038: + self.raise_geo_restricted(countries=['ES']) + raise + + formats = self._extract_m3u8_formats( + update_url(stream, query=cdn), video_id, 'mp4', m3u8_id='hls', headers=geo_headers) + + return { + 'id': video_id, + 'title': traverse_obj(config, ('info', 'title', {str})), + 'formats': formats, + 'thumbnail': (traverse_obj(content, ('dataPoster', {url_or_none})) + or traverse_obj(config, 'poster', 'imageUrl', expected_type=url_or_none)), + 'duration': traverse_obj(content, ('dataDuration', {int_or_none})), + 'http_headers': headers, + } + + +class TelecincoIE(TelecincoBaseIE): IE_DESC = 'telecinco.es, cuatro.com and mediaset.es' _VALID_URL = r'https?://(?:www\.)?(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P.+?)\.html' @@ -30,6 +84,7 @@ class TelecincoIE(InfoExtractor): 'duration': 662, }, }], + 'skip': 'HTTP Error 410 Gone', }, { 'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html', 'md5': 'c86fe0d99e3bdb46b7950d38bf6ef12a', @@ -40,23 +95,24 @@ class TelecincoIE(InfoExtractor): 'description': 'md5:a62ecb5f1934fc787107d7b9a2262805', 'duration': 79, }, + 'skip': 'Redirects to main page', }, { 'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html', - 'md5': 'eddb50291df704ce23c74821b995bcac', + 'md5': '5ce057f43f30b634fbaf0f18c71a140a', 'info_dict': { 'id': 'aywerkD2Sv1vGNqq9b85Q2', 'ext': 'mp4', 'title': '#DOYLACARA. Con la trata no hay trato', - 'description': 'md5:2771356ff7bfad9179c5f5cd954f1477', 'duration': 50, + 'thumbnail': 'https://album.mediaset.es/eimg/2017/11/02/1tlQLO5Q3mtKT24f3EaC24.jpg', }, }, { # video in opening's content 'url': 'https://www.telecinco.es/vivalavida/fiorella-sobrina-edmundo-arrocet-entrevista_18_2907195140.html', 'info_dict': { - 'id': '2907195140', + 'id': '1691427', 'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"', - 'description': 'md5:73f340a7320143d37ab895375b2bf13a', + 'description': r're:Fiorella, la sobrina de Edmundo Arrocet, concedió .{727}', }, 'playlist': [{ 'md5': 'adb28c37238b675dad0f042292f209a7', @@ -65,6 +121,7 @@ class TelecincoIE(InfoExtractor): 'ext': 'mp4', 'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"', 'duration': 1015, + 'thumbnail': 'https://album.mediaset.es/eimg/2020/02/29/5opaC37lUhKlZ7FoDhiVC.jpg', }, }], 'params': { @@ -81,66 +138,29 @@ class TelecincoIE(InfoExtractor): 'only_matching': True, }] - def _parse_content(self, content, url): - video_id = content['dataMediaId'] - config = self._download_json( - content['dataConfig'], video_id, 'Downloading config JSON') - title = config['info']['title'] - services = config['services'] - caronte = self._download_json(services['caronte'], video_id) - stream = caronte['dls'][0]['stream'] - headers = self.geo_verification_headers() - headers.update({ - 'Content-Type': 'application/json;charset=UTF-8', - 'Origin': re.match(r'https?://[^/]+', url).group(0), - }) - cdn = self._download_json( - caronte['cerbero'], video_id, data=json.dumps({ - 'bbx': caronte['bbx'], - 'gbx': self._download_json(services['gbx'], video_id)['gbx'], - }).encode(), headers=headers)['tokens']['1']['cdn'] - formats = self._extract_m3u8_formats( - stream + '?' + cdn, video_id, 'mp4', 'm3u8_native', m3u8_id='hls') - - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'thumbnail': content.get('dataPoster') or config.get('poster', {}).get('imageUrl'), - 'duration': int_or_none(content.get('dataDuration')), - } - def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - article = self._parse_json(self._search_regex( - r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=\s*({.+})', - webpage, 'article'), display_id)['article'] - title = article.get('title') - description = clean_html(article.get('leadParagraph')) or '' + article = self._search_json( + r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=', + webpage, 'article', display_id)['article'] + description = traverse_obj(article, ('leadParagraph', {clean_html}, filter)) + if article.get('editorialType') != 'VID': entries = [] - body = [article.get('opening')] - body.extend(try_get(article, lambda x: x['body'], list) or []) - for p in body: - if not isinstance(p, dict): - continue - content = p.get('content') - if not content: - continue + + for p in traverse_obj(article, ((('opening', all), 'body'), lambda _, v: v['content'])): + content = p['content'] type_ = p.get('type') - if type_ == 'paragraph': - content_str = str_or_none(content) - if content_str: - description += content_str - continue - if type_ == 'video' and isinstance(content, dict): + if type_ == 'paragraph' and isinstance(content, str): + description = join_nonempty(description, content, delim='') + elif type_ == 'video' and isinstance(content, dict): entries.append(self._parse_content(content, url)) + return self.playlist_result( - entries, str_or_none(article.get('id')), title, description) - content = article['opening']['content'] - info = self._parse_content(content, url) - info.update({ - 'description': description, - }) + entries, str_or_none(article.get('id')), + traverse_obj(article, ('title', {str})), clean_html(description)) + + info = self._parse_content(article['opening']['content'], url) + info['description'] = description return info From 46fe60ff19395698a87113b2944453779e04ab9d Mon Sep 17 00:00:00 2001 From: 63427083dev <77916527+63427083@users.noreply.github.com> Date: Tue, 22 Oct 2024 01:42:45 +0200 Subject: [PATCH 13/98] [ie/afreecatv] Adapt extractors to new sooplive.co.kr domain (#11266) Closes #11253 Authored by: 63427083, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> --- yt_dlp/extractor/afreecatv.py | 185 +++++++++++++--------------------- 1 file changed, 68 insertions(+), 117 deletions(-) diff --git a/yt_dlp/extractor/afreecatv.py b/yt_dlp/extractor/afreecatv.py index 815d20537f..83e510d1a2 100644 --- a/yt_dlp/extractor/afreecatv.py +++ b/yt_dlp/extractor/afreecatv.py @@ -33,21 +33,21 @@ class AfreecaTVBaseIE(InfoExtractor): } response = self._download_json( - 'https://login.afreecatv.com/app/LoginAction.php', None, + 'https://login.sooplive.co.kr/app/LoginAction.php', None, 'Logging in', data=urlencode_postdata(login_form)) _ERRORS = { -4: 'Your account has been suspended due to a violation of our terms and policies.', - -5: 'https://member.afreecatv.com/app/user_delete_progress.php', - -6: 'https://login.afreecatv.com/membership/changeMember.php', - -8: "Hello! AfreecaTV here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.", - -9: 'https://member.afreecatv.com/app/pop_login_block.php', - -11: 'https://login.afreecatv.com/afreeca/second_login.php', - -12: 'https://member.afreecatv.com/app/user_security.php', + -5: 'https://member.sooplive.co.kr/app/user_delete_progress.php', + -6: 'https://login.sooplive.co.kr/membership/changeMember.php', + -8: "Hello! Soop here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.", + -9: 'https://member.sooplive.co.kr/app/pop_login_block.php', + -11: 'https://login.sooplive.co.kr/afreeca/second_login.php', + -12: 'https://member.sooplive.co.kr/app/user_security.php', 0: 'The username does not exist or you have entered the wrong password.', -1: 'The username does not exist or you have entered the wrong password.', -3: 'You have entered your username/password incorrectly.', - -7: 'You cannot use your Global AfreecaTV account to access Korean AfreecaTV.', + -7: 'You cannot use your Global Soop account to access Korean Soop.', -10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.', -32008: 'You have failed to log in. Please contact our Help Center.', } @@ -61,76 +61,40 @@ class AfreecaTVBaseIE(InfoExtractor): def _call_api(self, endpoint, display_id, data=None, headers=None, query=None): return self._download_json(Request( - f'https://api.m.afreecatv.com/{endpoint}', + f'https://api.m.sooplive.co.kr/{endpoint}', data=data, headers=headers, query=query, extensions={'legacy_ssl': True}), display_id, 'Downloading API JSON', 'Unable to download API JSON') class AfreecaTVIE(AfreecaTVBaseIE): - IE_NAME = 'afreecatv' - IE_DESC = 'afreecatv.com' - _VALID_URL = r'''(?x) - https?:// - (?: - (?:(?:live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)? - (?: - /app/(?:index|read_ucc_bbs)\.cgi| - /player/[Pp]layer\.(?:swf|html) - )\?.*?\bnTitleNo=| - vod\.afreecatv\.com/(PLAYER/STATION|player)/ - ) - (?P\d+)/?(?:$|[?#&]) - ''' + IE_NAME = 'soop' + IE_DESC = 'sooplive.co.kr' + _VALID_URL = r'https?://vod\.(?:sooplive\.co\.kr|afreecatv\.com)/(?:PLAYER/STATION|player)/(?P\d+)/?(?:$|[?#&])' _TESTS = [{ - 'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=', - 'md5': 'f72c89fe7ecc14c1b5ce506c4996046e', + 'url': 'https://vod.sooplive.co.kr/player/96753363', 'info_dict': { - 'id': '36164052', + 'id': '20230108_9FF5BEE1_244432674_1', 'ext': 'mp4', - 'title': '데일리 에이프릴 요정들의 시상식!', - 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', - 'uploader': 'dailyapril', - 'uploader_id': 'dailyapril', - 'upload_date': '20160503', + 'uploader_id': 'rlantnghks', + 'uploader': '페이즈으', + 'duration': 10840, + 'thumbnail': r're:https?://videoimg\.sooplive\.co/.kr/.+', + 'upload_date': '20230108', + 'timestamp': 1673218805, + 'title': '젠지 페이즈', }, - 'skip': 'Video is gone', - }, { - 'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867', - 'info_dict': { - 'id': '36153164', - 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'", - 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', - 'uploader': 'dailyapril', - 'uploader_id': 'dailyapril', + 'params': { + 'skip_download': True, }, - 'playlist_count': 2, - 'playlist': [{ - 'md5': 'd8b7c174568da61d774ef0203159bf97', - 'info_dict': { - 'id': '36153164_1', - 'ext': 'mp4', - 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'", - 'upload_date': '20160502', - }, - }, { - 'md5': '58f2ce7f6044e34439ab2d50612ab02b', - 'info_dict': { - 'id': '36153164_2', - 'ext': 'mp4', - 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'", - 'upload_date': '20160502', - }, - }], - 'skip': 'Video is gone', }, { # non standard key - 'url': 'http://vod.afreecatv.com/PLAYER/STATION/20515605', + 'url': 'http://vod.sooplive.co.kr/PLAYER/STATION/20515605', 'info_dict': { 'id': '20170411_BE689A0E_190960999_1_2_h', 'ext': 'mp4', 'title': '혼자사는여자집', - 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', + 'thumbnail': r're:https?://(?:video|st)img\.sooplive\.co\.kr/.+', 'uploader': '♥이슬이', 'uploader_id': 'dasl8121', 'upload_date': '20170411', @@ -142,12 +106,12 @@ class AfreecaTVIE(AfreecaTVBaseIE): }, }, { # adult content - 'url': 'https://vod.afreecatv.com/player/97267690', + 'url': 'https://vod.sooplive.co.kr/player/97267690', 'info_dict': { 'id': '20180327_27901457_202289533_1', 'ext': 'mp4', 'title': '[생]빨개요♥ (part 1)', - 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', + 'thumbnail': r're:https?://(?:video|st)img\.sooplive\.co\.kr/.+', 'uploader': '[SA]서아', 'uploader_id': 'bjdyrksu', 'upload_date': '20180327', @@ -157,36 +121,17 @@ class AfreecaTVIE(AfreecaTVBaseIE): 'skip_download': True, }, 'skip': 'The VOD does not exist', - }, { - 'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652', - 'only_matching': True, - }, { - 'url': 'https://vod.afreecatv.com/player/96753363', - 'info_dict': { - 'id': '20230108_9FF5BEE1_244432674_1', - 'ext': 'mp4', - 'uploader_id': 'rlantnghks', - 'uploader': '페이즈으', - 'duration': 10840, - 'thumbnail': r're:https?://videoimg\.afreecatv\.com/.+', - 'upload_date': '20230108', - 'timestamp': 1673218805, - 'title': '젠지 페이즈', - }, - 'params': { - 'skip_download': True, - }, }, { # adult content - 'url': 'https://vod.afreecatv.com/player/70395877', + 'url': 'https://vod.sooplive.co.kr/player/70395877', 'only_matching': True, }, { # subscribers only - 'url': 'https://vod.afreecatv.com/player/104647403', + 'url': 'https://vod.sooplive.co.kr/player/104647403', 'only_matching': True, }, { # private - 'url': 'https://vod.afreecatv.com/player/81669846', + 'url': 'https://vod.sooplive.co.kr/player/81669846', 'only_matching': True, }] @@ -262,11 +207,11 @@ class AfreecaTVIE(AfreecaTVBaseIE): class AfreecaTVCatchStoryIE(AfreecaTVBaseIE): - IE_NAME = 'afreecatv:catchstory' - IE_DESC = 'afreecatv.com catch story' - _VALID_URL = r'https?://vod\.afreecatv\.com/player/(?P\d+)/catchstory' + IE_NAME = 'soop:catchstory' + IE_DESC = 'sooplive.co.kr catch story' + _VALID_URL = r'https?://vod\.(?:sooplive\.co\.kr|afreecatv\.com)/player/(?P\d+)/catchstory' _TESTS = [{ - 'url': 'https://vod.afreecatv.com/player/103247/catchstory', + 'url': 'https://vod.sooplive.co.kr/player/103247/catchstory', 'info_dict': { 'id': '103247', }, @@ -299,11 +244,11 @@ class AfreecaTVCatchStoryIE(AfreecaTVBaseIE): class AfreecaTVLiveIE(AfreecaTVBaseIE): - IE_NAME = 'afreecatv:live' - IE_DESC = 'afreecatv.com livestreams' - _VALID_URL = r'https?://play\.afreeca(?:tv)?\.com/(?P[^/]+)(?:/(?P\d+))?' + IE_NAME = 'soop:live' + IE_DESC = 'sooplive.co.kr livestreams' + _VALID_URL = r'https?://play\.(?:sooplive\.co\.kr|afreecatv\.com)/(?P[^/?#]+)(?:/(?P\d+))?' _TESTS = [{ - 'url': 'https://play.afreecatv.com/pyh3646/237852185', + 'url': 'https://play.sooplive.co.kr/pyh3646/237852185', 'info_dict': { 'id': '237852185', 'ext': 'mp4', @@ -315,30 +260,30 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE): }, 'skip': 'Livestream has ended', }, { - 'url': 'https://play.afreecatv.com/pyh3646/237852185', + 'url': 'https://play.sooplive.co.kr/pyh3646/237852185', 'only_matching': True, }, { - 'url': 'https://play.afreecatv.com/pyh3646', + 'url': 'https://play.sooplive.co.kr/pyh3646', 'only_matching': True, }] - _LIVE_API_URL = 'https://live.afreecatv.com/afreeca/player_live_api.php' + _LIVE_API_URL = 'https://live.sooplive.co.kr/afreeca/player_live_api.php' _WORKING_CDNS = [ - 'gcp_cdn', # live-global-cdn-v02.afreecatv.com - 'gs_cdn_pc_app', # pc-app.stream.afreecatv.com - 'gs_cdn_mobile_web', # mobile-web.stream.afreecatv.com - 'gs_cdn_pc_web', # pc-web.stream.afreecatv.com + 'gcp_cdn', # live-global-cdn-v02.sooplive.co.kr + 'gs_cdn_pc_app', # pc-app.stream.sooplive.co.kr + 'gs_cdn_mobile_web', # mobile-web.stream.sooplive.co.kr + 'gs_cdn_pc_web', # pc-web.stream.sooplive.co.kr ] _BAD_CDNS = [ 'gs_cdn', # chromecast.afreeca.gscdn.com (cannot resolve) - 'gs_cdn_chromecast', # chromecast.stream.afreecatv.com (HTTP Error 400) - 'azure_cdn', # live-global-cdn-v01.afreecatv.com (cannot resolve) - 'aws_cf', # live-global-cdn-v03.afreecatv.com (cannot resolve) - 'kt_cdn', # kt.stream.afreecatv.com (HTTP Error 400) + 'gs_cdn_chromecast', # chromecast.stream.sooplive.co.kr (HTTP Error 400) + 'azure_cdn', # live-global-cdn-v01.sooplive.co.kr (cannot resolve) + 'aws_cf', # live-global-cdn-v03.sooplive.co.kr (cannot resolve) + 'kt_cdn', # kt.stream.sooplive.co.kr (HTTP Error 400) ] def _extract_formats(self, channel_info, broadcast_no, aid): - stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.afreecatv.com' + stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.sooplive.co.kr' # If user has not passed CDN IDs, try API-provided CDN ID followed by other working CDN IDs default_cdn_ids = orderedSet([ @@ -358,7 +303,7 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE): try: return self._extract_m3u8_formats( m3u8_url, broadcast_no, 'mp4', m3u8_id='hls', query={'aid': aid}, - headers={'Referer': 'https://play.afreecatv.com/'}) + headers={'Referer': 'https://play.sooplive.co.kr/'}) except ExtractorError as e: if attempt == len(cdn_ids): raise @@ -374,7 +319,13 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE): broadcaster_id = channel_info.get('BJID') or broadcaster_id broadcast_no = channel_info.get('BNO') or broadcast_no if not broadcast_no: - raise UserNotLive(video_id=broadcaster_id) + result = channel_info.get('RESULT') + if result == 0: + raise UserNotLive(video_id=broadcaster_id) + elif result == -6: + self.raise_login_required( + 'This channel is streaming for subscribers only', method='password') + raise ExtractorError('Unable to extract broadcast number') password = self.get_param('videopassword') if channel_info.get('BPWD') == 'Y' and password is None: @@ -403,7 +354,7 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE): formats = self._extract_formats(channel_info, broadcast_no, aid) station_info = traverse_obj(self._download_json( - 'https://st.afreecatv.com/api/get_station_status.php', broadcast_no, + 'https://st.sooplive.co.kr/api/get_station_status.php', broadcast_no, 'Downloading channel metadata', 'Unable to download channel metadata', query={'szBjId': broadcaster_id}, fatal=False), {dict}) or {} @@ -419,11 +370,11 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE): } -class AfreecaTVUserIE(InfoExtractor): - IE_NAME = 'afreecatv:user' - _VALID_URL = r'https?://bj\.afreeca(?:tv)?\.com/(?P[^/]+)/vods/?(?P[^/]+)?' +class AfreecaTVUserIE(AfreecaTVBaseIE): + IE_NAME = 'soop:user' + _VALID_URL = r'https?://ch\.(?:sooplive\.co\.kr|afreecatv\.com)/(?P[^/?#]+)/vods/?(?P[^/?#]+)?' _TESTS = [{ - 'url': 'https://bj.afreecatv.com/ryuryu24/vods/review', + 'url': 'https://ch.sooplive.co.kr/ryuryu24/vods/review', 'info_dict': { '_type': 'playlist', 'id': 'ryuryu24', @@ -431,7 +382,7 @@ class AfreecaTVUserIE(InfoExtractor): }, 'playlist_count': 218, }, { - 'url': 'https://bj.afreecatv.com/parang1995/vods/highlight', + 'url': 'https://ch.sooplive.co.kr/parang1995/vods/highlight', 'info_dict': { '_type': 'playlist', 'id': 'parang1995', @@ -439,7 +390,7 @@ class AfreecaTVUserIE(InfoExtractor): }, 'playlist_count': 997, }, { - 'url': 'https://bj.afreecatv.com/ryuryu24/vods', + 'url': 'https://ch.sooplive.co.kr/ryuryu24/vods', 'info_dict': { '_type': 'playlist', 'id': 'ryuryu24', @@ -447,7 +398,7 @@ class AfreecaTVUserIE(InfoExtractor): }, 'playlist_count': 221, }, { - 'url': 'https://bj.afreecatv.com/ryuryu24/vods/balloonclip', + 'url': 'https://ch.sooplive.co.kr/ryuryu24/vods/balloonclip', 'info_dict': { '_type': 'playlist', 'id': 'ryuryu24', @@ -459,12 +410,12 @@ class AfreecaTVUserIE(InfoExtractor): def _fetch_page(self, user_id, user_type, page): page += 1 - info = self._download_json(f'https://bjapi.afreecatv.com/api/{user_id}/vods/{user_type}', user_id, + info = self._download_json(f'https://chapi.sooplive.co.kr/api/{user_id}/vods/{user_type}', user_id, query={'page': page, 'per_page': self._PER_PAGE, 'orderby': 'reg_date'}, note=f'Downloading {user_type} video page {page}') for item in info['data']: yield self.url_result( - f'https://vod.afreecatv.com/player/{item["title_no"]}/', AfreecaTVIE, item['title_no']) + f'https://vod.sooplive.co.kr/player/{item["title_no"]}/', AfreecaTVIE, item['title_no']) def _real_extract(self, url): user_id, user_type = self._match_valid_url(url).group('id', 'slug_type') From b8635c1d4779da195e71aa281f73aaad702c935e Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Tue, 22 Oct 2024 16:46:53 +1300 Subject: [PATCH 14/98] [ie/youtube] Support logging in with OAuth (#11001) See: https://github.com/yt-dlp/yt-dlp/wiki/Extractors#logging-in-with-oauth Authored by: coletdjnz --- yt_dlp/extractor/youtube.py | 307 ++++++++++++++++++++++++++++++------ 1 file changed, 262 insertions(+), 45 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 728cb06966..8cbc00f37c 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -22,7 +22,7 @@ import urllib.parse from .common import InfoExtractor, SearchInfoExtractor from .openload import PhantomJSwrapper from ..jsinterp import JSInterpreter -from ..networking.exceptions import HTTPError, network_exceptions +from ..networking.exceptions import HTTPError, TransportError, network_exceptions from ..utils import ( NO_DEFAULT, ExtractorError, @@ -55,6 +55,7 @@ from ..utils import ( str_or_none, str_to_int, strftime_or_none, + time_seconds, traverse_obj, try_call, try_get, @@ -515,6 +516,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor): _YT_HANDLE_RE = r'@[\w.-]{3,30}' # https://support.google.com/youtube/answer/11585688?hl=en _YT_CHANNEL_UCID_RE = r'UC[\w-]{22}' + _NETRC_MACHINE = 'youtube' + def ucid_or_none(self, ucid): return self._search_regex(rf'^({self._YT_CHANNEL_UCID_RE})$', ucid, 'UC-id', default=None) @@ -573,9 +576,213 @@ class YoutubeBaseInfoExtractor(InfoExtractor): self._initialize_consent() self._check_login_required() + def _perform_login(self, username, password): + auth_type, _, user = (username or '').partition('+') + + if auth_type != 'oauth': + raise ExtractorError(self._youtube_login_hint, expected=True) + + self._initialize_oauth(user, password) + + ''' + OAuth 2.0 Device Authorization Grant flow, used by the YouTube TV client (youtube.com/tv). + + For more information regarding OAuth 2.0 and the Device Authorization Grant flow in general, see: + - https://developers.google.com/identity/protocols/oauth2/limited-input-device + - https://accounts.google.com/.well-known/openid-configuration + - https://www.rfc-editor.org/rfc/rfc8628 + - https://www.rfc-editor.org/rfc/rfc6749 + + Note: The official client appears to use a proxied version of the oauth2 endpoints on youtube.com/o/oauth2, + which applies some modifications to the response (such as returning errors as 200 OK). + Since the client works with the standard API, we will use that as it is well-documented. + ''' + + _OAUTH_PROFILE = None + _OAUTH_ACCESS_TOKEN_CACHE = {} + _OAUTH_DISPLAY_ID = 'oauth' + + # YouTube TV (TVHTML5) client. You can find these at youtube.com/tv + _OAUTH_CLIENT_ID = '861556708454-d6dlm3lh05idd8npek18k6be8ba3oc68.apps.googleusercontent.com' + _OAUTH_CLIENT_SECRET = 'SboVhoG9s0rNafixCSGGKXAT' + _OAUTH_SCOPE = 'http://gdata.youtube.com https://www.googleapis.com/auth/youtube-paid-content' + + # From https://accounts.google.com/.well-known/openid-configuration + # Technically, these should be fetched dynamically and not hard-coded. + # However, as these endpoints rarely change, we can risk saving an extra request for every invocation. + _OAUTH_DEVICE_AUTHORIZATION_ENDPOINT = 'https://oauth2.googleapis.com/device/code' + _OAUTH_TOKEN_ENDPOINT = 'https://oauth2.googleapis.com/token' + + @property + def _oauth_cache_key(self): + return f'oauth_refresh_token_{self._OAUTH_PROFILE}' + + def _read_oauth_error_response(self, response): + return traverse_obj( + self._webpage_read_content(response, self._OAUTH_TOKEN_ENDPOINT, self._OAUTH_DISPLAY_ID, fatal=False), + ({json.loads}, 'error', {str})) + + def _set_oauth_info(self, token_response): + YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE.setdefault(self._OAUTH_PROFILE, {}).update({ + 'access_token': token_response['access_token'], + 'token_type': token_response['token_type'], + 'expiry': time_seconds( + seconds=traverse_obj(token_response, ('expires_in', {float_or_none}), default=300) - 10), + }) + refresh_token = traverse_obj(token_response, ('refresh_token', {str})) + if refresh_token: + self.cache.store(self._NETRC_MACHINE, self._oauth_cache_key, refresh_token) + YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE[self._OAUTH_PROFILE]['refresh_token'] = refresh_token + + def _initialize_oauth(self, user, refresh_token): + self._OAUTH_PROFILE = user or 'default' + + if self._OAUTH_PROFILE in YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE: + self.write_debug(f'{self._OAUTH_DISPLAY_ID}: Using cached access token for profile "{self._OAUTH_PROFILE}"') + return + + YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE[self._OAUTH_PROFILE] = {} + + if refresh_token: + refresh_token = refresh_token.strip('\'') or None + + # Allow refresh token passed to initialize cache + if refresh_token: + self.cache.store(self._NETRC_MACHINE, self._oauth_cache_key, refresh_token) + + refresh_token = refresh_token or self.cache.load(self._NETRC_MACHINE, self._oauth_cache_key) + if refresh_token: + YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE[self._OAUTH_PROFILE]['refresh_token'] = refresh_token + try: + token_response = self._refresh_token(refresh_token) + except ExtractorError as e: + error_msg = str(e.orig_msg).replace('Failed to refresh access token: ', '') + self.report_warning(f'{self._OAUTH_DISPLAY_ID}: Failed to refresh access token: {error_msg}') + token_response = self._oauth_authorize + else: + token_response = self._oauth_authorize + + self._set_oauth_info(token_response) + self.write_debug(f'{self._OAUTH_DISPLAY_ID}: Logged in using profile "{self._OAUTH_PROFILE}"') + + def _refresh_token(self, refresh_token): + try: + token_response = self._download_json( + self._OAUTH_TOKEN_ENDPOINT, + video_id=self._OAUTH_DISPLAY_ID, + note='Refreshing access token', + data=json.dumps({ + 'client_id': self._OAUTH_CLIENT_ID, + 'client_secret': self._OAUTH_CLIENT_SECRET, + 'refresh_token': refresh_token, + 'grant_type': 'refresh_token', + }).encode(), + headers={'Content-Type': 'application/json'}) + except ExtractorError as e: + if isinstance(e.cause, HTTPError): + error = self._read_oauth_error_response(e.cause.response) + if error == 'invalid_grant': + # RFC6749 § 5.2 + raise ExtractorError( + 'Failed to refresh access token: Refresh token is invalid, revoked, or expired (invalid_grant)', + expected=True, video_id=self._OAUTH_DISPLAY_ID) + raise ExtractorError( + f'Failed to refresh access token: Authorization server returned error {error}', + video_id=self._OAUTH_DISPLAY_ID) + raise + return token_response + + @property + def _oauth_authorize(self): + code_response = self._download_json( + self._OAUTH_DEVICE_AUTHORIZATION_ENDPOINT, + video_id=self._OAUTH_DISPLAY_ID, + note='Initializing authorization flow', + data=json.dumps({ + 'client_id': self._OAUTH_CLIENT_ID, + 'scope': self._OAUTH_SCOPE, + }).encode(), + headers={'Content-Type': 'application/json'}) + + verification_url = traverse_obj(code_response, ('verification_url', {str})) + user_code = traverse_obj(code_response, ('user_code', {str})) + if not verification_url or not user_code: + raise ExtractorError( + 'Authorization server did not provide verification_url or user_code', video_id=self._OAUTH_DISPLAY_ID) + + # note: The whitespace is intentional + self.to_screen( + f'{self._OAUTH_DISPLAY_ID}: To give yt-dlp access to your account, ' + f'go to {verification_url} and enter code {user_code}') + + # RFC8628 § 3.5: default poll interval is 5 seconds if not provided + poll_interval = traverse_obj(code_response, ('interval', {int}), default=5) + + for retry in self.RetryManager(): + while True: + try: + token_response = self._download_json( + self._OAUTH_TOKEN_ENDPOINT, + video_id=self._OAUTH_DISPLAY_ID, + note=False, + errnote='Failed to request access token', + data=json.dumps({ + 'client_id': self._OAUTH_CLIENT_ID, + 'client_secret': self._OAUTH_CLIENT_SECRET, + 'device_code': code_response['device_code'], + 'grant_type': 'urn:ietf:params:oauth:grant-type:device_code', + }).encode(), + headers={'Content-Type': 'application/json'}) + except ExtractorError as e: + if isinstance(e.cause, TransportError): + retry.error = e + break + elif isinstance(e.cause, HTTPError): + error = self._read_oauth_error_response(e.cause.response) + if not error: + retry.error = e + break + + if error == 'authorization_pending': + time.sleep(poll_interval) + continue + elif error == 'expired_token': + raise ExtractorError( + 'Authorization timed out', expected=True, video_id=self._OAUTH_DISPLAY_ID) + elif error == 'access_denied': + raise ExtractorError( + 'You denied access to an account', expected=True, video_id=self._OAUTH_DISPLAY_ID) + elif error == 'slow_down': + # RFC8628 § 3.5: add 5 seconds to the poll interval + poll_interval += 5 + time.sleep(poll_interval) + continue + else: + raise ExtractorError( + f'Authorization server returned an error when fetching access token: {error}', + video_id=self._OAUTH_DISPLAY_ID) + raise + + return token_response + + def _update_oauth(self): + token = YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE.get(self._OAUTH_PROFILE) + if token is None or token['expiry'] > time.time(): + return + + self._set_oauth_info(self._refresh_token(token['refresh_token'])) + + @property + def _youtube_login_hint(self): + return ('Use --username=oauth[+PROFILE] --password="" to log in using oauth, ' + f'or else u{self._login_hint(method="cookies")[1:]}. ' + 'See https://github.com/yt-dlp/yt-dlp/wiki/Extractors#logging-in-with-oauth for more on how to use oauth. ' + 'See https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies for help with cookies') + def _check_login_required(self): - if self._LOGIN_REQUIRED and not self._cookies_passed: - self.raise_login_required('Login details are needed to download this content', method='cookies') + if self._LOGIN_REQUIRED and not self.is_authenticated: + self.raise_login_required( + f'Login details are needed to download this content. {self._youtube_login_hint}', method=None) _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=' _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=' @@ -674,17 +881,6 @@ class YoutubeBaseInfoExtractor(InfoExtractor): if session_index is not None: return session_index - # Deprecated? - def _extract_identity_token(self, ytcfg=None, webpage=None): - if ytcfg: - token = try_get(ytcfg, lambda x: x['ID_TOKEN'], str) - if token: - return token - if webpage: - return self._search_regex( - r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage, - 'identity token', default=None, fatal=False) - def _data_sync_id_to_delegated_session_id(self, data_sync_id): if not data_sync_id: return @@ -731,7 +927,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): @functools.cached_property def is_authenticated(self): - return bool(self._generate_sapisidhash_header()) + return self._OAUTH_PROFILE or bool(self._generate_sapisidhash_header()) def extract_ytcfg(self, video_id, webpage): if not webpage: @@ -741,21 +937,21 @@ class YoutubeBaseInfoExtractor(InfoExtractor): r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg', default='{}'), video_id, fatal=False) or {} - def generate_api_headers( - self, *, ytcfg=None, account_syncid=None, session_index=None, - visitor_data=None, identity_token=None, api_hostname=None, default_client='web'): + def _generate_oauth_headers(self): + self._update_oauth() + oauth_token = YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE.get(self._OAUTH_PROFILE) + if not oauth_token: + return {} - origin = 'https://' + (self._select_api_hostname(api_hostname, default_client)) - headers = { - 'X-YouTube-Client-Name': str( - self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)), - 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client), - 'Origin': origin, - 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg), - 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg), - 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg), - 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client), + return { + 'Authorization': f'{oauth_token["token_type"]} {oauth_token["access_token"]}', } + + def _generate_cookie_auth_headers(self, *, ytcfg=None, account_syncid=None, session_index=None, origin=None, **kwargs): + headers = {} + account_syncid = account_syncid or self._extract_account_syncid(ytcfg) + if account_syncid: + headers['X-Goog-PageId'] = account_syncid if session_index is None: session_index = self._extract_session_index(ytcfg) if account_syncid or session_index is not None: @@ -765,8 +961,29 @@ class YoutubeBaseInfoExtractor(InfoExtractor): if auth is not None: headers['Authorization'] = auth headers['X-Origin'] = origin + + return headers + + def generate_api_headers( + self, *, ytcfg=None, account_syncid=None, session_index=None, + visitor_data=None, api_hostname=None, default_client='web', **kwargs): + + origin = 'https://' + (self._select_api_hostname(api_hostname, default_client)) + headers = { + 'X-YouTube-Client-Name': str( + self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)), + 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client), + 'Origin': origin, + 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg), + 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client), + **self._generate_oauth_headers(), + **self._generate_cookie_auth_headers(ytcfg=ytcfg, account_syncid=account_syncid, session_index=session_index, origin=origin), + } return filter_dict(headers) + def _generate_webpage_headers(self): + return self._generate_oauth_headers() + def _download_ytcfg(self, client, video_id): url = { 'web': 'https://www.youtube.com', @@ -776,7 +993,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor): if not url: return {} webpage = self._download_webpage( - url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config') + url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config', + headers=self._generate_webpage_headers()) return self.extract_ytcfg(video_id, webpage) or {} @staticmethod @@ -3041,7 +3259,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): code = self._download_webpage( player_url, video_id, fatal=fatal, note='Downloading player ' + player_id, - errnote=f'Download of {player_url} failed') + errnote=f'Download of {player_url} failed', + headers=self._generate_webpage_headers()) if code: self._code_cache[player_id] = code return self._code_cache.get(player_id) @@ -3324,7 +3543,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): self._download_webpage( url, video_id, f'Marking {label}watched', - 'Unable to mark watched', fatal=False) + 'Unable to mark watched', fatal=False, + headers=self._generate_webpage_headers()) @classmethod def _extract_from_webpage(cls, url, webpage): @@ -4305,7 +4525,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if pp: query['pp'] = pp webpage = self._download_webpage( - webpage_url, video_id, fatal=False, query=query) + webpage_url, video_id, fatal=False, query=query, headers=self._generate_webpage_headers()) master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg() @@ -5593,7 +5813,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): webpage, data = None, None for retry in self.RetryManager(fatal=fatal): try: - webpage = self._download_webpage(url, item_id, note='Downloading webpage') + webpage = self._download_webpage(url, item_id, note='Downloading webpage', headers=self._generate_webpage_headers()) data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {} except ExtractorError as e: if isinstance(e.cause, network_exceptions): @@ -6967,7 +7187,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): raise ExtractorError('Unable to recognize tab page') -class YoutubePlaylistIE(InfoExtractor): +class YoutubePlaylistIE(YoutubeBaseInfoExtractor): IE_DESC = 'YouTube playlists' _VALID_URL = r'''(?x)(?: (?:https?://)? @@ -7081,7 +7301,7 @@ class YoutubePlaylistIE(InfoExtractor): return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id) -class YoutubeYtBeIE(InfoExtractor): +class YoutubeYtBeIE(YoutubeBaseInfoExtractor): IE_DESC = 'youtu.be' _VALID_URL = rf'https?://youtu\.be/(?P[0-9A-Za-z_-]{{11}})/*?.*?\blist=(?P{YoutubeBaseInfoExtractor._PLAYLIST_ID_RE})' _TESTS = [{ @@ -7132,7 +7352,7 @@ class YoutubeYtBeIE(InfoExtractor): }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id) -class YoutubeLivestreamEmbedIE(InfoExtractor): +class YoutubeLivestreamEmbedIE(YoutubeBaseInfoExtractor): IE_DESC = 'YouTube livestream embeds' _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/embed/live_stream/?\?(?:[^#]+&)?channel=(?P[^&#]+)' _TESTS = [{ @@ -7147,7 +7367,7 @@ class YoutubeLivestreamEmbedIE(InfoExtractor): ie=YoutubeTabIE.ie_key(), video_id=channel_id) -class YoutubeYtUserIE(InfoExtractor): +class YoutubeYtUserIE(YoutubeBaseInfoExtractor): IE_DESC = 'YouTube user videos; "ytuser:" prefix' IE_NAME = 'youtube:user' _VALID_URL = r'ytuser:(?P.+)' @@ -7434,7 +7654,7 @@ class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor): return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title) -class YoutubeFeedsInfoExtractor(InfoExtractor): +class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): """ Base class for feed extractors Subclasses must re-define the _FEED_NAME property. @@ -7442,9 +7662,6 @@ class YoutubeFeedsInfoExtractor(InfoExtractor): _LOGIN_REQUIRED = True _FEED_NAME = 'feeds' - def _real_initialize(self): - YoutubeBaseInfoExtractor._check_login_required(self) - @classproperty def IE_NAME(cls): return f'youtube:{cls._FEED_NAME}' @@ -7454,7 +7671,7 @@ class YoutubeFeedsInfoExtractor(InfoExtractor): f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key()) -class YoutubeWatchLaterIE(InfoExtractor): +class YoutubeWatchLaterIE(YoutubeBaseInfoExtractor): IE_NAME = 'youtube:watchlater' IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)' _VALID_URL = r':ytwatchlater' @@ -7508,7 +7725,7 @@ class YoutubeHistoryIE(YoutubeFeedsInfoExtractor): }] -class YoutubeShortsAudioPivotIE(InfoExtractor): +class YoutubeShortsAudioPivotIE(YoutubeBaseInfoExtractor): IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)' IE_NAME = 'youtube:shorts:pivot:audio' _VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P[\w-]{11})/shorts' @@ -7532,7 +7749,7 @@ class YoutubeShortsAudioPivotIE(InfoExtractor): ie=YoutubeTabIE) -class YoutubeTruncatedURLIE(InfoExtractor): +class YoutubeTruncatedURLIE(YoutubeBaseInfoExtractor): IE_NAME = 'youtube:truncated_url' IE_DESC = False # Do not list _VALID_URL = r'''(?x) @@ -7691,7 +7908,7 @@ class YoutubeConsentRedirectIE(YoutubeBaseInfoExtractor): return self.url_result(redirect_url) -class YoutubeTruncatedIDIE(InfoExtractor): +class YoutubeTruncatedIDIE(YoutubeBaseInfoExtractor): IE_NAME = 'youtube:truncated_id' IE_DESC = False # Do not list _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P[0-9A-Za-z_-]{1,10})$' From e68b4c19af122876561a41f2dd8093fae7b417c7 Mon Sep 17 00:00:00 2001 From: Allen <64094914+allendema@users.noreply.github.com> Date: Tue, 22 Oct 2024 05:54:41 +0200 Subject: [PATCH 15/98] [ie/tubitv] Strip extra whitespace from titles (#10795) Closes #10794 Authored by: allendema --- yt_dlp/extractor/tubitv.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/tubitv.py b/yt_dlp/extractor/tubitv.py index 85eb3a211c..694a92fcd4 100644 --- a/yt_dlp/extractor/tubitv.py +++ b/yt_dlp/extractor/tubitv.py @@ -6,6 +6,7 @@ from ..utils import ( ExtractorError, int_or_none, js_to_json, + strip_or_none, traverse_obj, url_or_none, urlencode_postdata, @@ -132,12 +133,12 @@ class TubiTvIE(InfoExtractor): return { 'id': video_id, - 'title': title, + 'title': strip_or_none(title), 'formats': formats, 'subtitles': subtitles, 'season_number': int_or_none(season_number), 'episode_number': int_or_none(episode_number), - 'episode': episode_title, + 'episode': strip_or_none(episode_title), **traverse_obj(video_data, { 'description': ('description', {str}), 'duration': ('duration', {int_or_none}), From a886cf3e900f4a2ec00af705f883539269545609 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Tue, 22 Oct 2024 04:20:20 +0000 Subject: [PATCH 16/98] [build] Migrate `py2exe` builds to `win_exe` (#11256) This commit removes py2exe support Closes #10087 Authored by: bashonly --- .github/workflows/build.yml | 12 ++------ README.md | 15 +--------- bundle/py2exe.py | 59 ------------------------------------- pyproject.toml | 3 -- yt_dlp/update.py | 11 ++----- 5 files changed, 7 insertions(+), 93 deletions(-) delete mode 100755 bundle/py2exe.py diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 495d3c6306..64227d9740 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -419,22 +419,16 @@ jobs: run: | python -m bundle.pyinstaller python -m bundle.pyinstaller --onedir - Move-Item ./dist/yt-dlp.exe ./dist/yt-dlp_real.exe Compress-Archive -Path ./dist/yt-dlp/* -DestinationPath ./dist/yt-dlp_win.zip - - name: Install Requirements (py2exe) + - name: Add migration executable for py2exe run: | - python devscripts/install_deps.py --include py2exe - - name: Build (py2exe) - run: | - python -m bundle.py2exe - Move-Item ./dist/yt-dlp.exe ./dist/yt-dlp_min.exe - Move-Item ./dist/yt-dlp_real.exe ./dist/yt-dlp.exe + Copy-Item ./dist/yt-dlp.exe ./dist/yt-dlp_min.exe - name: Verify --update-to if: vars.UPDATE_TO_VERIFICATION run: | - foreach ($name in @("yt-dlp","yt-dlp_min")) { + foreach ($name in @("yt-dlp")) { Copy-Item "./dist/${name}.exe" "./dist/${name}_downgraded.exe" $version = & "./dist/${name}.exe" --version & "./dist/${name}_downgraded.exe" -v --update-to yt-dlp/yt-dlp@2023.03.04 diff --git a/README.md b/README.md index fc38a529a7..6bd632dd7d 100644 --- a/README.md +++ b/README.md @@ -106,7 +106,6 @@ File|Description File|Description :---|:--- [yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows (Win7 SP1+) standalone x86 (32-bit) binary -[yt-dlp_min.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_min.exe)|Windows (Win7 SP1+) standalone x64 binary built with `py2exe`
([Not recommended](#standalone-py2exe-builds-windows)) [yt-dlp_linux](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux)|Linux standalone x64 binary [yt-dlp_linux_armv7l](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_armv7l)|Linux standalone armv7l (32-bit) binary [yt-dlp_linux_aarch64](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_aarch64)|Linux standalone aarch64 (64-bit) binary @@ -260,18 +259,6 @@ After installing these, simply run `make`. You can also run `make yt-dlp` instead to compile only the binary without updating any of the additional files. (The build tools marked with **\*** are not needed for this) -### Standalone Py2Exe Builds (Windows) - -While we provide the option to build with [py2exe](https://www.py2exe.org), it is recommended to build [using PyInstaller](#standalone-pyinstaller-builds) instead since the py2exe builds **cannot contain `pycryptodomex`/`certifi`/`requests` and need VC++14** on the target computer to run. - -If you wish to build it anyway, install Python (if it is not already installed) and you can run the following commands: - -``` -py devscripts/install_deps.py --include py2exe -py devscripts/make_lazy_extractors.py -py -m bundle.py2exe -``` - ### Related scripts * **`devscripts/install_deps.py`** - Install dependencies for yt-dlp. @@ -1933,7 +1920,7 @@ Plugins can be installed using various methods and locations. * Plugin packages can be installed and managed using `pip`. See [yt-dlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) for an example. * Note: plugin files between plugin packages installed with pip must have unique filenames. * Any path in `PYTHONPATH` is searched in for the `yt_dlp_plugins` namespace folder. - * Note: This does not apply for Pyinstaller/py2exe builds. + * Note: This does not apply for Pyinstaller builds. `.zip`, `.egg` and `.whl` archives containing a `yt_dlp_plugins` namespace folder in their root are also supported as plugin packages. diff --git a/bundle/py2exe.py b/bundle/py2exe.py deleted file mode 100755 index 5b7f4883bc..0000000000 --- a/bundle/py2exe.py +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env python3 - -# Allow execution from anywhere -import os -import sys - -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -import warnings - -from py2exe import freeze - -from devscripts.utils import read_version - -VERSION = read_version() - - -def main(): - warnings.warn( - 'py2exe builds do not support pycryptodomex and needs VC++14 to run. ' - 'It is recommended to run "pyinst.py" to build using pyinstaller instead') - - freeze( - console=[{ - 'script': './yt_dlp/__main__.py', - 'dest_base': 'yt-dlp', - 'icon_resources': [(1, 'devscripts/logo.ico')], - }], - version_info={ - 'version': VERSION, - 'description': 'A feature-rich command-line audio/video downloader', - 'comments': 'Official repository: ', - 'product_name': 'yt-dlp', - 'product_version': VERSION, - }, - options={ - 'bundle_files': 0, - 'compressed': 1, - 'optimize': 2, - 'dist_dir': './dist', - 'excludes': [ - # py2exe cannot import Crypto - 'Crypto', - 'Cryptodome', - # requests >=2.32.0 breaks py2exe builds due to certifi dependency - 'requests', - 'urllib3', - ], - 'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'], - # Modules that are only imported dynamically must be added here - 'includes': ['yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated', - 'yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated'], - }, - zipfile=None, - ) - - -if __name__ == '__main__': - main() diff --git a/pyproject.toml b/pyproject.toml index 200a9c99ae..be81c265cb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -85,9 +85,6 @@ test = [ pyinstaller = [ "pyinstaller>=6.10.0", # Windows temp cleanup fixed in 6.10.0 ] -py2exe = [ - "py2exe>=0.12", -] [project.urls] Documentation = "https://github.com/yt-dlp/yt-dlp#readme" diff --git a/yt_dlp/update.py b/yt_dlp/update.py index 4cf3bdc320..0172acfd63 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -103,7 +103,6 @@ def current_git_head(): _FILE_SUFFIXES = { 'zip': '', - 'py2exe': '_min.exe', 'win_exe': '.exe', 'win_x86_exe': '_x86.exe', 'darwin_exe': '_macos', @@ -117,6 +116,7 @@ _NON_UPDATEABLE_REASONS = { **{variant: None for variant in _FILE_SUFFIXES}, # Updatable **{variant: f'Auto-update is not supported for unpackaged {name} executable; Re-download the latest release' for variant, name in {'win32_dir': 'Windows', 'darwin_dir': 'MacOS', 'linux_dir': 'Linux'}.items()}, + 'py2exe': 'py2exe is no longer supported by yt-dlp; This executable cannot be updated', 'source': 'You cannot update when running from source code; Use git to pull the latest changes', 'unknown': 'You installed yt-dlp from a manual build or with a package manager; Use that to update', 'other': 'You are using an unofficial build of yt-dlp; Build the executable again', @@ -152,15 +152,10 @@ def _get_system_deprecation(): variant = detect_variant() # Temporary until Windows builds use 3.9, which will drop support for Win7 and 2008ServerR2 - if variant in ('win_exe', 'win_x86_exe', 'py2exe'): + if variant in ('win_exe', 'win_x86_exe'): platform_name = platform.platform() if any(platform_name.startswith(f'Windows-{name}') for name in ('7', '2008ServerR2')): return EXE_MSG_TMPL.format('Windows 7/Server 2008 R2', 'issues/10086', STOP_MSG) - elif variant == 'py2exe': - return EXE_MSG_TMPL.format( - 'py2exe builds (yt-dlp_min.exe)', 'issues/10087', - 'In a future update you will be migrated to the PyInstaller-bundled executable. ' - 'This will be done automatically; no action is required on your part') return None # Temporary until aarch64/armv7l build flow is bumped to Ubuntu 20.04 and Python 3.9 @@ -525,7 +520,7 @@ class Updater: return os.rename(old_filename, self.filename) variant = detect_variant() - if variant.startswith('win') or variant == 'py2exe': + if variant.startswith('win'): atexit.register(Popen, f'ping 127.0.0.1 -n 5 -w 1000 & del /F "{old_filename}"', shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) elif old_filename: From 67adeb7bab00662ba55d473e405b301abb42fe61 Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Tue, 22 Oct 2024 06:50:35 +0200 Subject: [PATCH 17/98] [cleanup] Misc (#11216) - Add Python 3.13 to CI, finalize 3.13 support - Remove Python 3.8 from CI in preparation for removing 3.8 support - Document that PyPy3.8 and PyPy3.9 are no longer supported - Usual documentation fixes and code cleanup Closes #8248, Closes #11146, Closes #11149, Closes #11211 Authored by: Grub4K, grqz, DTrombett, KarboniteKream, bashonly, mikkovedru, seproDev Co-authored-by: N/Ame <173015200+grqz@users.noreply.github.com> Co-authored-by: DTrombett Co-authored-by: =?UTF-8?q?Klemen=20Ko=C5=A1ir?= Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> Co-authored-by: Mikko Vedru Co-authored-by: sepro --- .github/workflows/core.yml | 12 ++-- .github/workflows/download.yml | 6 +- .github/workflows/quick-test.yml | 6 +- CONTRIBUTING.md | 7 +- README.md | 106 +++++++++++++++-------------- devscripts/changelog_override.json | 10 +++ pyproject.toml | 5 +- setup.cfg | 2 +- yt_dlp/YoutubeDL.py | 20 ++---- yt_dlp/extractor/common.py | 4 +- yt_dlp/extractor/nexx.py | 2 +- yt_dlp/extractor/tubetugraz.py | 2 +- yt_dlp/extractor/ustream.py | 2 +- yt_dlp/extractor/veoh.py | 5 +- yt_dlp/extractor/youtube.py | 4 +- yt_dlp/options.py | 35 +++++----- yt_dlp/utils/traversal.py | 4 +- 17 files changed, 118 insertions(+), 114 deletions(-) diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml index a5cb6c9707..9a4342a585 100644 --- a/.github/workflows/core.yml +++ b/.github/workflows/core.yml @@ -36,16 +36,20 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest] - # CPython 3.8 is in quick-test - python-version: ['3.9', '3.10', '3.11', '3.12', pypy-3.8, pypy-3.10] + # CPython 3.9 is in quick-test + python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.10] include: # atleast one of each CPython/PyPy tests must be in windows - os: windows-latest - python-version: '3.8' + python-version: '3.9' + - os: windows-latest + python-version: '3.10' - os: windows-latest python-version: '3.12' - os: windows-latest - python-version: pypy-3.9 + python-version: '3.13' + - os: windows-latest + python-version: pypy-3.10 steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} diff --git a/.github/workflows/download.yml b/.github/workflows/download.yml index 7256804d93..6849fba9b6 100644 --- a/.github/workflows/download.yml +++ b/.github/workflows/download.yml @@ -28,13 +28,13 @@ jobs: fail-fast: true matrix: os: [ubuntu-latest] - python-version: ['3.10', '3.11', '3.12', pypy-3.8, pypy-3.10] + python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.10] include: # atleast one of each CPython/PyPy tests must be in windows - os: windows-latest - python-version: '3.8' + python-version: '3.9' - os: windows-latest - python-version: pypy-3.9 + python-version: pypy-3.10 steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} diff --git a/.github/workflows/quick-test.yml b/.github/workflows/quick-test.yml index cce7cbac1e..1a32bbfe31 100644 --- a/.github/workflows/quick-test.yml +++ b/.github/workflows/quick-test.yml @@ -10,10 +10,10 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - name: Set up Python 3.8 + - name: Set up Python 3.9 uses: actions/setup-python@v5 with: - python-version: '3.8' + python-version: '3.9' - name: Install test requirements run: python3 ./devscripts/install_deps.py -o --include test - name: Run tests @@ -29,7 +29,7 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: - python-version: '3.8' + python-version: '3.9' - name: Install dev dependencies run: python3 ./devscripts/install_deps.py -o --include static-analysis - name: Make lazy extractors diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index dbae6476f6..f1646e5952 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -233,7 +233,7 @@ After you have ensured this site is distributing its content legally, you can fo # * MD5 checksum; start the string with 'md5:', e.g. # 'description': 'md5:098f6bcd4621d373cade4e832627b4f6', # * A regular expression; start the string with 're:', e.g. - # 'thumbnail': r're:^https?://.*\.jpg$', + # 'thumbnail': r're:https?://.*\.jpg$', # * A count of elements in a list; start the string with 'count:', e.g. # 'tags': 'count:10', # * Any Python type, e.g. @@ -268,7 +268,7 @@ After you have ensured this site is distributing its content legally, you can fo You can use `hatch fmt` to automatically fix problems. Rules that the linter/formatter enforces should not be disabled with `# noqa` unless a maintainer requests it. The only exception allowed is for old/printf-style string formatting in GraphQL query templates (use `# noqa: UP031`). -1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython and PyPy for Python 3.8 and above. Backward compatibility is not required for even older versions of Python. +1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython >=3.8 and PyPy >=3.10. Backward compatibility is not required for even older versions of Python. 1. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files, [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: ```shell @@ -302,10 +302,9 @@ Extractors are very fragile by nature since they depend on the layout of the sou For extraction to work yt-dlp relies on metadata your extractor extracts and provides to yt-dlp expressed by an [information dictionary](yt_dlp/extractor/common.py#L119-L440) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by yt-dlp: - `id` (media identifier) - - `title` (media title) - `url` (media download URL) or `formats` -The aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken. While all extractors must return a `title`, they must also allow it's extraction to be non-fatal. +The aforementioned metadata fields are the critical data without which extraction does not make any sense. If any of them fail to be extracted, then the extractor is considered broken. All other metadata extraction should be completely non-fatal. For pornographic sites, appropriate `age_limit` must also be returned. diff --git a/README.md b/README.md index 6bd632dd7d..46fff07df2 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ [![YT-DLP](https://raw.githubusercontent.com/yt-dlp/yt-dlp/master/.github/banner.svg)](#readme) [![Release version](https://img.shields.io/github/v/release/yt-dlp/yt-dlp?color=brightgreen&label=Download&style=for-the-badge)](#installation "Installation") -[![PyPi](https://img.shields.io/badge/-PyPi-blue.svg?logo=pypi&labelColor=555555&style=for-the-badge)](https://pypi.org/project/yt-dlp "PyPi") +[![PyPI](https://img.shields.io/badge/-PyPI-blue.svg?logo=pypi&labelColor=555555&style=for-the-badge)](https://pypi.org/project/yt-dlp "PyPI") [![Donate](https://img.shields.io/badge/_-Donate-red.svg?logo=githubsponsors&labelColor=555555&style=for-the-badge)](Collaborators.md#collaborators "Donate") [![Matrix](https://img.shields.io/matrix/yt-dlp:matrix.org?color=brightgreen&labelColor=555555&label=&logo=element&style=for-the-badge)](https://matrix.to/#/#yt-dlp:matrix.org "Matrix") [![Discord](https://img.shields.io/discord/807245652072857610?color=blue&labelColor=555555&label=&logo=discord&style=for-the-badge)](https://discord.gg/H5MNcFW63r "Discord") @@ -81,7 +81,7 @@ yt-dlp is a feature-rich command-line audio/video downloader with support for [t [![Windows](https://img.shields.io/badge/-Windows_x64-blue.svg?style=for-the-badge&logo=windows)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe) [![Unix](https://img.shields.io/badge/-Linux/BSD-red.svg?style=for-the-badge&logo=linux)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp) [![MacOS](https://img.shields.io/badge/-MacOS-lightblue.svg?style=for-the-badge&logo=apple)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos) -[![PyPi](https://img.shields.io/badge/-PyPi-blue.svg?logo=pypi&labelColor=555555&style=for-the-badge)](https://pypi.org/project/yt-dlp) +[![PyPI](https://img.shields.io/badge/-PyPI-blue.svg?logo=pypi&labelColor=555555&style=for-the-badge)](https://pypi.org/project/yt-dlp) [![Source Tarball](https://img.shields.io/badge/-Source_tar-green.svg?style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz) [![Other variants](https://img.shields.io/badge/-Other-grey.svg?style=for-the-badge)](#release-files) [![All versions](https://img.shields.io/badge/-All_Versions-lightgrey.svg?style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/releases) @@ -172,11 +172,11 @@ python3 -m pip install -U --pre "yt-dlp[default]" ``` ## DEPENDENCIES -Python versions 3.8+ (CPython and PyPy) are supported. Other versions and implementations may or may not work correctly. +Python versions 3.8+ (CPython) and 3.10+ (PyPy) are supported. Other versions and implementations may or may not work correctly. While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly recommended @@ -438,10 +438,10 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git E.g. "--date today-2weeks" downloads only videos uploaded on the same day two weeks ago --datebefore DATE Download only videos uploaded on or before - this date. The date formats accepted is the + this date. The date formats accepted are the same as --date --dateafter DATE Download only videos uploaded on or after - this date. The date formats accepted is the + this date. The date formats accepted are the same as --date --match-filters FILTER Generic video filter. Any "OUTPUT TEMPLATE" field can be compared with a number or a @@ -726,16 +726,16 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git used. This option can be used multiple times --print-to-file [WHEN:]TEMPLATE FILE Append given template to the file. The - values of WHEN and TEMPLATE are same as that - of --print. FILE uses the same syntax as the - output template. This option can be used - multiple times + values of WHEN and TEMPLATE are the same as + that of --print. FILE uses the same syntax + as the output template. This option can be + used multiple times -j, --dump-json Quiet, but print JSON information for each video. Simulate unless --no-simulate is used. See "OUTPUT TEMPLATE" for a description of available keys -J, --dump-single-json Quiet, but print JSON information for each - url or infojson passed. Simulate unless + URL or infojson passed. Simulate unless --no-simulate is used. If the URL refers to a playlist, the whole playlist information is dumped in a single line @@ -810,9 +810,9 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git --no-audio-multistreams Only one audio stream is downloaded for each output file (default) --prefer-free-formats Prefer video formats with free containers - over non-free ones of same quality. Use with - "-S ext" to strictly prefer free containers - irrespective of quality + over non-free ones of the same quality. Use + with "-S ext" to strictly prefer free + containers irrespective of quality --no-prefer-free-formats Don't give any special preference to free containers (default) --check-formats Make sure formats are selected only from @@ -837,15 +837,17 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git (default) (Alias: --no-write-automatic-subs) --list-subs List available subtitles of each video. Simulate unless --no-simulate is used - --sub-format FORMAT Subtitle format; accepts formats preference, - e.g. "srt" or "ass/srt/best" + --sub-format FORMAT Subtitle format; accepts formats preference + separated by "/", e.g. "srt" or "ass/srt/best" --sub-langs LANGS Languages of the subtitles to download (can be regex) or "all" separated by commas, e.g. - --sub-langs "en.*,ja". You can prefix the - language code with a "-" to exclude it from - the requested languages, e.g. --sub-langs - all,-live_chat. Use --list-subs for a list - of available language tags + --sub-langs "en.*,ja" (where "en.*" is a + regex pattern that matches "en" followed by + 0 or more of any character). You can prefix + the language code with a "-" to exclude it + from the requested languages, e.g. --sub- + langs all,-live_chat. Use --list-subs for a + list of available language tags ## Authentication Options: -u, --username USERNAME Login with this account ID @@ -893,9 +895,9 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git necessary (currently supported: avi, flv, gif, mkv, mov, mp4, webm, aac, aiff, alac, flac, m4a, mka, mp3, ogg, opus, vorbis, - wav). If target container does not support - the video/audio codec, remuxing will fail. - You can specify multiple rules; e.g. + wav). If the target container does not + support the video/audio codec, remuxing will + fail. You can specify multiple rules; e.g. "aac>m4a/mov>mp4/mkv" will remux aac to m4a, mov to mp4 and anything else to mkv --recode-video FORMAT Re-encode the video into another format if @@ -963,29 +965,29 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git are the same as that of --use-postprocessor (default: pre_process) --xattrs Write metadata to the video file's xattrs - (using dublin core and xdg standards) + (using Dublin Core and XDG standards) --concat-playlist POLICY Concatenate videos in a playlist. One of "never", "always", or "multi_video" (default; only when the videos form a single - show). All the video files must have same - codecs and number of streams to be - concatable. The "pl_video:" prefix can be + show). All the video files must have the + same codecs and number of streams to be + concatenable. The "pl_video:" prefix can be used with "--paths" and "--output" to set the output filename for the concatenated files. See "OUTPUT TEMPLATE" for details --fixup POLICY Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn (the - default; fix file if we can, warn - otherwise), force (try fixing even if file - already exists) + default; fix the file if we can, warn + otherwise), force (try fixing even if the + file already exists) --ffmpeg-location PATH Location of the ffmpeg binary; either the path to the binary or its containing directory --exec [WHEN:]CMD Execute a command, optionally prefixed with when to execute it, separated by a ":". Supported values of "WHEN" are the same as that of --use-postprocessor (default: - after_move). Same syntax as the output + after_move). The same syntax as the output template can be used to pass any field as arguments to the command. If no fields are passed, %(filepath,_filename|)q is appended @@ -1023,7 +1025,7 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git --no-force-keyframes-at-cuts Do not force keyframes around the chapters when cutting/splitting (default) --use-postprocessor NAME[:ARGS] - The (case sensitive) name of plugin + The (case-sensitive) name of plugin postprocessors to be enabled, and (optionally) arguments to be passed to it, separated by a colon ":". ARGS are a @@ -1036,8 +1038,8 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git --print/--output), "before_dl" (before each video download), "post_process" (after each video download; default), "after_move" - (after moving video file to its final - locations), "after_video" (after downloading + (after moving the video file to its final + location), "after_video" (after downloading and processing all formats of a video), or "playlist" (at end of playlist). This option can be used multiple times to add different @@ -1055,7 +1057,7 @@ Make chapter entries for, or remove various segments (sponsor, music_offtopic, poi_highlight, chapter, all and default (=all). You can prefix the category with a "-" to exclude it. See [1] - for description of the categories. E.g. + for descriptions of the categories. E.g. --sponsorblock-mark all,-preview [1] https://wiki.sponsor.ajay.app/w/Segment_Categories --sponsorblock-remove CATS SponsorBlock categories to be removed from @@ -1087,7 +1089,7 @@ Make chapter entries for, or remove various segments (sponsor, (Alias: --no-allow-dynamic-mpd) --hls-split-discontinuity Split HLS playlists to different formats at discontinuities such as ad breaks - --no-hls-split-discontinuity Do not split HLS playlists to different + --no-hls-split-discontinuity Do not split HLS playlists into different formats at discontinuities such as ad breaks (default) --extractor-args IE_KEY:ARGS Pass ARGS arguments to the IE_KEY extractor. @@ -1097,7 +1099,7 @@ Make chapter entries for, or remove various segments (sponsor, # CONFIGURATION -You can configure yt-dlp by placing any supported command line option to a configuration file. The configuration is loaded from the following locations: +You can configure yt-dlp by placing any supported command line option in a configuration file. The configuration is loaded from the following locations: 1. **Main Configuration**: * The file given to `--config-location` @@ -1142,7 +1144,7 @@ E.g. with the following configuration file, yt-dlp will always extract the audio -o ~/YouTube/%(title)s.%(ext)s ``` -**Note**: Options in configuration file are just the same options aka switches used in regular command line calls; thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`. They must also be quoted when necessary, as if it were a UNIX shell. +**Note**: Options in a configuration file are just the same options aka switches used in regular command line calls; thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`. They must also be quoted when necessary, as if it were a UNIX shell. You can use `--ignore-config` if you want to disable all configuration files for a particular yt-dlp run. If `--ignore-config` is found inside any configuration file, no further configuration will be loaded. For example, having the option in the portable configuration file prevents loading of home, user, and system configurations. Additionally, (for backward compatibility) if `--ignore-config` is found inside the system configuration file, the user configuration is not loaded. @@ -1176,13 +1178,13 @@ As an alternative to using the `.netrc` file, which has the disadvantage of keep E.g. To use an encrypted `.netrc` file stored as `.authinfo.gpg` ``` -yt-dlp --netrc-cmd 'gpg --decrypt ~/.authinfo.gpg' https://www.youtube.com/watch?v=BaW_jenozKc +yt-dlp --netrc-cmd 'gpg --decrypt ~/.authinfo.gpg' 'https://www.youtube.com/watch?v=BaW_jenozKc' ``` ### Notes about environment variables * Environment variables are normally specified as `${VARIABLE}`/`$VARIABLE` on UNIX and `%VARIABLE%` on Windows; but is always shown as `${VARIABLE}` in this documentation -* yt-dlp also allow using UNIX-style variables on Windows for path-like options; e.g. `--output`, `--config-location` +* yt-dlp also allows using UNIX-style variables on Windows for path-like options; e.g. `--output`, `--config-location` * If unset, `${XDG_CONFIG_HOME}` defaults to `~/.config` and `${XDG_CACHE_HOME}` to `~/.cache` * On Windows, `~` points to `${HOME}` if present; or, `${USERPROFILE}` or `${HOMEDRIVE}${HOMEPATH}` otherwise * On Windows, `${USERPROFILE}` generally points to `C:\Users\` and `${APPDATA}` to `${USERPROFILE}\AppData\Roaming` @@ -1263,7 +1265,7 @@ The available fields are: - `like_count` (numeric): Number of positive ratings of the video - `dislike_count` (numeric): Number of negative ratings of the video - `repost_count` (numeric): Number of reposts of the video - - `average_rating` (numeric): Average rating give by users, the scale used depends on the webpage + - `average_rating` (numeric): Average rating given by users, the scale used depends on the webpage - `comment_count` (numeric): Number of comments on the video (For some extractors, comments are only downloaded at the end, and so this field cannot be used) - `age_limit` (numeric): Age restriction for the video (years) - `live_status` (string): One of "not_live", "is_live", "is_upcoming", "was_live", "post_live" (was live, but VOD is not yet processed) @@ -1293,7 +1295,7 @@ The available fields are: - `webpage_url` (string): A URL to the video webpage which, if given to yt-dlp, should yield the same result again - `webpage_url_basename` (string): The basename of the webpage URL - `webpage_url_domain` (string): The domain of the webpage URL - - `original_url` (string): The URL given by the user (or same as `webpage_url` for playlist entries) + - `original_url` (string): The URL given by the user (or the same as `webpage_url` for playlist entries) - `categories` (list): List of categories the video belongs to - `tags` (list): List of tags assigned to the video - `cast` (list): List of cast members @@ -1370,7 +1372,7 @@ Each aforementioned sequence when referenced in an output template will be repla **Tip**: Look at the `-j` output to identify which fields are available for the particular URL -For numeric sequences you can use [numeric related formatting](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting); e.g. `%(view_count)05d` will result in a string with view count padded with zeros up to 5 characters, like in `00042`. +For numeric sequences, you can use [numeric related formatting](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting); e.g. `%(view_count)05d` will result in a string with view count padded with zeros up to 5 characters, like in `00042`. Output templates can also contain arbitrary hierarchical path, e.g. `-o "%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s"` which will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you. @@ -1412,7 +1414,7 @@ $ yt-dlp -P "C:/MyVideos" -o "%(series)s/%(season_number)s - %(season)s/%(episod # Download video as "C:\MyVideos\uploader\title.ext", subtitles as "C:\MyVideos\subs\uploader\title.ext" # and put all temporary files in "C:\MyVideos\tmp" -$ yt-dlp -P "C:/MyVideos" -P "temp:tmp" -P "subtitle:subs" -o "%(uploader)s/%(title)s.%(ext)s" BaW_jenoz --write-subs +$ yt-dlp -P "C:/MyVideos" -P "temp:tmp" -P "subtitle:subs" -o "%(uploader)s/%(title)s.%(ext)s" BaW_jenozKc --write-subs # Download video as "C:\MyVideos\uploader\title.ext" and subtitles as "C:\MyVideos\uploader\subs\title.ext" $ yt-dlp -P "C:/MyVideos" -o "%(uploader)s/%(title)s.%(ext)s" -o "subtitle:%(uploader)s/subs/%(title)s.%(ext)s" BaW_jenozKc --write-subs @@ -1630,11 +1632,11 @@ $ yt-dlp -S "res:480" # or the worst video (that also has audio) if there is no video under 50 MB $ yt-dlp -f "b[filesize<50M] / w" -# Download largest video (that also has audio) but no bigger than 50 MB, +# Download the largest video (that also has audio) but no bigger than 50 MB, # or the smallest video (that also has audio) if there is no video under 50 MB $ yt-dlp -f "b" -S "filesize:50M" -# Download best video (that also has audio) that is closest in size to 50 MB +# Download the best video (that also has audio) that is closest in size to 50 MB $ yt-dlp -f "b" -S "filesize~50M" @@ -1690,7 +1692,7 @@ The metadata obtained by the extractors can be modified by using `--parse-metada The general syntax of `--parse-metadata FROM:TO` is to give the name of a field or an [output template](#output-template) to extract data from, and the format to interpret it as, separated by a colon `:`. Either a [Python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax) with named capture groups, a single field name, or a similar syntax to the [output template](#output-template) (only `%(field)s` formatting is supported) can be used for `TO`. The option can be used multiple times to parse and modify various fields. -Note that these options preserve their relative order, allowing replacements to be made in parsed fields and viceversa. Also, any field thus created can be used in the [output template](#output-template) and will also affect the media file's metadata added when using `--embed-metadata`. +Note that these options preserve their relative order, allowing replacements to be made in parsed fields and vice versa. Also, any field thus created can be used in the [output template](#output-template) and will also affect the media file's metadata added when using `--embed-metadata`. This option also has a few special uses: @@ -1765,7 +1767,7 @@ The following extractors use this feature: #### youtube * `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes * `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively -* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music` and `_creator` (e.g. `ios_creator`); and `mediaconnect`, `mweb`, `android_producer`, `android_testsuite`, `android_vr`, `web_safari`, `web_embedded`, `tv` and `tv_embedded` with no variants. By default, `ios,mweb` is used, and `tv_embedded`, `web_creator` and `mediaconnect` are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. Most `android` clients will be given lowest priority since their formats are broken. You can use `all` to use all the clients, and `default` for the default clients. You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=all,-web` +* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music` and `_creator` (e.g. `ios_creator`); and `mediaconnect`, `mweb`, `android_producer`, `android_testsuite`, `android_vr`, `web_safari`, `web_embedded`, `tv` and `tv_embedded` with no variants. By default, `ios,mweb` is used, and `tv_embedded`, `web_creator` and `mediaconnect` are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` URLs. Most `android` clients will be given lowest priority since their formats are broken. You can use `all` to use all the clients, and `default` for the default clients. You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=all,-web` * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details * `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp. * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) @@ -2198,7 +2200,7 @@ Features marked with a **\*** have been back-ported to youtube-dl Some of yt-dlp's default options are different from that of youtube-dl and youtube-dlc: -* yt-dlp supports only [Python 3.8+](## "Windows 7"), and *may* remove support for more versions as they [become EOL](https://devguide.python.org/versions/#python-release-cycle); while [youtube-dl still supports Python 2.6+ and 3.2+](https://github.com/ytdl-org/youtube-dl/issues/30568#issue-1118238743) +* yt-dlp supports only [Python 3.8+](## "Windows 7"), and will remove support for more versions as they [become EOL](https://devguide.python.org/versions/#python-release-cycle); while [youtube-dl still supports Python 2.6+ and 3.2+](https://github.com/ytdl-org/youtube-dl/issues/30568#issue-1118238743) * The options `--auto-number` (`-A`), `--title` (`-t`) and `--literal` (`-l`), no longer work. See [removed options](#Removed) for details * `avconv` is not supported as an alternative to `ffmpeg` * yt-dlp stores config files in slightly different locations to youtube-dl. See [CONFIGURATION](#configuration) for a list of correct locations @@ -2274,8 +2276,8 @@ While these options are redundant, they are still expected to be used due to the --min-views COUNT --match-filters "view_count >=? COUNT" --max-views COUNT --match-filters "view_count <=? COUNT" --break-on-reject Use --break-match-filters - --user-agent UA --add-header "User-Agent:UA" - --referer URL --add-header "Referer:URL" + --user-agent UA --add-headers "User-Agent:UA" + --referer URL --add-headers "Referer:URL" --playlist-start NUMBER -I NUMBER: --playlist-end NUMBER -I :NUMBER --playlist-reverse -I ::-1 diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json index e7f553a5f2..3d8fe53a52 100644 --- a/devscripts/changelog_override.json +++ b/devscripts/changelog_override.json @@ -196,5 +196,15 @@ "when": "b31b81d85f00601710d4fac590c3e4efb4133283", "short": "[ci] Rerun failed tests (#11143)", "authors": ["Grub4K"] + }, + { + "action": "add", + "when": "a886cf3e900f4a2ec00af705f883539269545609", + "short": "[priority] **py2exe is no longer supported**\nThis release's `yt-dlp_min.exe` will be the last, and it's actually a PyInstaller-bundled executable so that yt-dlp users updating their py2exe build with `-U` will be automatically migrated. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10087)" + }, + { + "action": "add", + "when": "a886cf3e900f4a2ec00af705f883539269545609", + "short": "[priority] **Following this release, yt-dlp's Python dependencies *must* be installed using the `default` group**\nIf you're installing yt-dlp with pip/pipx or requiring yt-dlp in your own Python project, you'll need to specify `yt-dlp[default]` if you want to also install yt-dlp's optional dependencies (which were previously included by default). [Read more](https://github.com/yt-dlp/yt-dlp/pull/11255)" } ] diff --git a/pyproject.toml b/pyproject.toml index be81c265cb..5439db1df2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,7 @@ classifiers = [ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Programming Language :: Python :: Implementation", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", @@ -169,13 +170,11 @@ run-cov = "echo Code coverage not implemented && exit 1" [[tool.hatch.envs.hatch-test.matrix]] python = [ - "3.8", "3.9", "3.10", "3.11", "3.12", - "pypy3.8", - "pypy3.9", + "3.13", "pypy3.10", ] diff --git a/setup.cfg b/setup.cfg index 340cc3b4d9..e7f3e2b955 100644 --- a/setup.cfg +++ b/setup.cfg @@ -16,7 +16,7 @@ remove-unused-variables = true [tox:tox] skipsdist = true -envlist = py{38,39,310,311,312},pypy{38,39,310} +envlist = py{39,310,311,312,313},pypy310 skip_missing_interpreters = true [testenv] # tox diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index eea1065036..48185b7693 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -154,7 +154,6 @@ from .utils import ( try_get, url_basename, variadic, - version_tuple, windows_enable_vt_mode, write_json_file, write_string, @@ -251,7 +250,7 @@ class YoutubeDL: format_sort_force: Force the given format_sort. see "Sorting Formats" for more details. prefer_free_formats: Whether to prefer video formats with free containers - over non-free ones of same quality. + over non-free ones of the same quality. allow_multiple_video_streams: Allow multiple video streams to be merged into a single file allow_multiple_audio_streams: Allow multiple audio streams to be merged @@ -285,7 +284,7 @@ class YoutubeDL: rejecttitle: Reject downloads for matching titles. logger: Log messages to a logging.Logger instance. logtostderr: Print everything to stderr instead of stdout. - consoletitle: Display progress in console window's titlebar. + consoletitle: Display progress in the console window's titlebar. writedescription: Write the video description to a .description file writeinfojson: Write the video description to a .info.json file clean_infojson: Remove internal metadata from the infojson @@ -513,7 +512,7 @@ class YoutubeDL: The following options are used by the extractors: extractor_retries: Number of times to retry for known errors (default: 3) dynamic_mpd: Whether to process dynamic DASH manifests (default: True) - hls_split_discontinuity: Split HLS playlists to different formats at + hls_split_discontinuity: Split HLS playlists into different formats at discontinuities such as ad breaks (default: False) extractor_args: A dictionary of arguments to be passed to the extractors. See "EXTRACTOR ARGUMENTS" for details. @@ -553,7 +552,7 @@ class YoutubeDL: include_ads: - Doesn't work Download ads as well call_home: - Not implemented - Boolean, true iff we are allowed to contact the + Boolean, true if we are allowed to contact the yt-dlp servers for debugging. post_hooks: - Register a custom postprocessor A list of functions that get called as the final step @@ -4089,17 +4088,6 @@ class YoutubeDL: if plugin_dirs: write_debug(f'Plugin directories: {plugin_dirs}') - # Not implemented - if False and self.params.get('call_home'): - ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode() - write_debug(f'Public IP address: {ipaddr}') - latest_version = self.urlopen( - 'https://yt-dl.org/latest/version').read().decode() - if version_tuple(latest_version) > version_tuple(__version__): - self.report_warning( - f'You are using an outdated version (newest version: {latest_version})! ' - 'See https://yt-dl.org/update if you need help updating.') - @functools.cached_property def proxies(self): """Global proxy configuration""" diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 812fbfa9f9..795105b7d8 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -333,7 +333,7 @@ class InfoExtractor: like_count: Number of positive ratings of the video dislike_count: Number of negative ratings of the video repost_count: Number of reposts of the video - average_rating: Average rating give by users, the scale used depends on the webpage + average_rating: Average rating given by users, the scale used depends on the webpage comment_count: Number of comments on the video comments: A list of comments, each with one or more of the following properties (all but one of text or html optional): @@ -520,7 +520,7 @@ class InfoExtractor: or _extract_from_webpage as necessary. While these are normally classmethods, _extract_from_webpage is allowed to be an instance method. - _extract_from_webpage may raise self.StopExtraction() to stop further + _extract_from_webpage may raise self.StopExtraction to stop further processing of the webpage and obtain exclusive rights to it. This is useful when the extractor cannot reliably be matched using just the URL, e.g. invidious/peertube instances diff --git a/yt_dlp/extractor/nexx.py b/yt_dlp/extractor/nexx.py index cd32892fa0..ee1bc281c6 100644 --- a/yt_dlp/extractor/nexx.py +++ b/yt_dlp/extractor/nexx.py @@ -371,7 +371,7 @@ class NexxIE(InfoExtractor): # not all videos work via arc, e.g. nexx:741:1269984 if not video: # Reverse engineered from JS code (see getDeviceID function) - device_id = f'{random.randint(1, 4)}:{int(time.time())}:{random.randint(1e4, 99999)}{random.randint(1, 9)}' + device_id = f'{random.randint(1, 4)}:{int(time.time())}:{random.randint(10000, 99999)}{random.randint(1, 9)}' result = self._call_api(domain_id, 'session/init', video_id, data={ 'nxp_devh': device_id, diff --git a/yt_dlp/extractor/tubetugraz.py b/yt_dlp/extractor/tubetugraz.py index d5dbf007b1..805e2686f7 100644 --- a/yt_dlp/extractor/tubetugraz.py +++ b/yt_dlp/extractor/tubetugraz.py @@ -236,7 +236,7 @@ class TubeTuGrazSeriesIE(TubeTuGrazBaseIE): }, }, ], - 'min_playlist_count': 4, + 'playlist_mincount': 4, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/ustream.py b/yt_dlp/extractor/ustream.py index 33cf8f454d..0fdf8f7484 100644 --- a/yt_dlp/extractor/ustream.py +++ b/yt_dlp/extractor/ustream.py @@ -73,7 +73,7 @@ class UstreamIE(InfoExtractor): def num_to_hex(n): return hex(n)[2:] - rnd = random.randrange + rnd = lambda x: random.randrange(int(x)) if not extra_note: extra_note = '' diff --git a/yt_dlp/extractor/veoh.py b/yt_dlp/extractor/veoh.py index dc1bf96ec6..aac768f3c6 100644 --- a/yt_dlp/extractor/veoh.py +++ b/yt_dlp/extractor/veoh.py @@ -8,7 +8,8 @@ from ..utils import ( int_or_none, parse_duration, qualities, - try_get, + remove_start, + strip_or_none, ) @@ -108,7 +109,7 @@ class VeohIE(InfoExtractor): categories = metadata.get('categoryPath') if not categories: - category = try_get(video, lambda x: x['category'].strip().removeprefix('category_')) + category = remove_start(strip_or_none(video.get('category')), 'category_') categories = [category] if category else None tags = video.get('tags') diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 8cbc00f37c..5148e82619 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -7792,9 +7792,9 @@ class YoutubeTruncatedURLIE(YoutubeBaseInfoExtractor): raise ExtractorError( 'Did you forget to quote the URL? Remember that & is a meta ' 'character in most shells, so you want to put the URL in quotes, ' - 'like youtube-dl ' + 'like yt-dlp ' '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" ' - ' or simply youtube-dl BaW_jenozKc .', + ' or simply yt-dlp BaW_jenozKc .', expected=True) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index c3a647da77..c4d2a72743 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -631,13 +631,13 @@ def create_parser(): metavar='DATE', dest='datebefore', default=None, help=( 'Download only videos uploaded on or before this date. ' - 'The date formats accepted is the same as --date')) + 'The date formats accepted are the same as --date')) selection.add_option( '--dateafter', metavar='DATE', dest='dateafter', default=None, help=( 'Download only videos uploaded on or after this date. ' - 'The date formats accepted is the same as --date')) + 'The date formats accepted are the same as --date')) selection.add_option( '--min-views', metavar='COUNT', dest='min_views', default=None, type=int, @@ -833,7 +833,7 @@ def create_parser(): '--prefer-free-formats', action='store_true', dest='prefer_free_formats', default=False, help=( - 'Prefer video formats with free containers over non-free ones of same quality. ' + 'Prefer video formats with free containers over non-free ones of the same quality. ' 'Use with "-S ext" to strictly prefer free containers irrespective of quality')) video_format.add_option( '--no-prefer-free-formats', @@ -907,13 +907,14 @@ def create_parser(): subtitles.add_option( '--sub-format', action='store', dest='subtitlesformat', metavar='FORMAT', default='best', - help='Subtitle format; accepts formats preference, e.g. "srt" or "ass/srt/best"') + help='Subtitle format; accepts formats preference separated by "/", e.g. "srt" or "ass/srt/best"') subtitles.add_option( '--sub-langs', '--srt-langs', action='callback', dest='subtitleslangs', metavar='LANGS', type='str', default=[], callback=_list_from_options_callback, help=( - 'Languages of the subtitles to download (can be regex) or "all" separated by commas, e.g. --sub-langs "en.*,ja". ' + 'Languages of the subtitles to download (can be regex) or "all" separated by commas, e.g. --sub-langs "en.*,ja" ' + '(where "en.*" is a regex pattern that matches "en" followed by 0 or more of any character). ' 'You can prefix the language code with a "-" to exclude it from the requested languages, e.g. --sub-langs all,-live_chat. ' 'Use --list-subs for a list of available language tags')) @@ -1182,7 +1183,7 @@ def create_parser(): '--print-to-file', metavar='[WHEN:]TEMPLATE FILE', dest='print_to_file', nargs=2, **when_prefix('video'), help=( - 'Append given template to the file. The values of WHEN and TEMPLATE are same as that of --print. ' + 'Append given template to the file. The values of WHEN and TEMPLATE are the same as that of --print. ' 'FILE uses the same syntax as the output template. This option can be used multiple times')) verbosity.add_option( '-g', '--get-url', @@ -1226,7 +1227,7 @@ def create_parser(): '-J', '--dump-single-json', action='store_true', dest='dump_single_json', default=False, help=( - 'Quiet, but print JSON information for each url or infojson passed. Simulate unless --no-simulate is used. ' + 'Quiet, but print JSON information for each URL or infojson passed. Simulate unless --no-simulate is used. ' 'If the URL refers to a playlist, the whole playlist information is dumped in a single line')) verbosity.add_option( '--print-json', @@ -1570,7 +1571,7 @@ def create_parser(): help=( 'Remux the video into another container if necessary ' f'(currently supported: {", ".join(FFmpegVideoRemuxerPP.SUPPORTED_EXTS)}). ' - 'If target container does not support the video/audio codec, remuxing will fail. You can specify multiple rules; ' + 'If the target container does not support the video/audio codec, remuxing will fail. You can specify multiple rules; ' 'e.g. "aac>m4a/mov>mp4/mkv" will remux aac to m4a, mov to mp4 and anything else to mkv')) postproc.add_option( '--recode-video', @@ -1676,7 +1677,7 @@ def create_parser(): postproc.add_option( '--xattrs', '--xattr', action='store_true', dest='xattrs', default=False, - help='Write metadata to the video file\'s xattrs (using dublin core and xdg standards)') + help='Write metadata to the video file\'s xattrs (using Dublin Core and XDG standards)') postproc.add_option( '--concat-playlist', metavar='POLICY', dest='concat_playlist', default='multi_video', @@ -1684,7 +1685,7 @@ def create_parser(): help=( 'Concatenate videos in a playlist. One of "never", "always", or ' '"multi_video" (default; only when the videos form a single show). ' - 'All the video files must have same codecs and number of streams to be concatable. ' + 'All the video files must have the same codecs and number of streams to be concatenable. ' 'The "pl_video:" prefix can be used with "--paths" and "--output" to ' 'set the output filename for the concatenated files. See "OUTPUT TEMPLATE" for details')) postproc.add_option( @@ -1694,8 +1695,8 @@ def create_parser(): help=( 'Automatically correct known faults of the file. ' 'One of never (do nothing), warn (only emit a warning), ' - 'detect_or_warn (the default; fix file if we can, warn otherwise), ' - 'force (try fixing even if file already exists)')) + 'detect_or_warn (the default; fix the file if we can, warn otherwise), ' + 'force (try fixing even if the file already exists)')) postproc.add_option( '--prefer-avconv', '--no-prefer-ffmpeg', action='store_false', dest='prefer_ffmpeg', @@ -1714,7 +1715,7 @@ def create_parser(): help=( 'Execute a command, optionally prefixed with when to execute it, separated by a ":". ' 'Supported values of "WHEN" are the same as that of --use-postprocessor (default: after_move). ' - 'Same syntax as the output template can be used to pass any field as arguments to the command. ' + 'The same syntax as the output template can be used to pass any field as arguments to the command. ' 'If no fields are passed, %(filepath,_filename|)q is appended to the end of the command. ' 'This option can be used multiple times')) postproc.add_option( @@ -1785,14 +1786,14 @@ def create_parser(): 'delim': None, 'process': lambda val: dict(_postprocessor_opts_parser(*val.split(':', 1))), }, help=( - 'The (case sensitive) name of plugin postprocessors to be enabled, ' + 'The (case-sensitive) name of plugin postprocessors to be enabled, ' 'and (optionally) arguments to be passed to it, separated by a colon ":". ' 'ARGS are a semicolon ";" delimited list of NAME=VALUE. ' 'The "when" argument determines when the postprocessor is invoked. ' 'It can be one of "pre_process" (after video extraction), "after_filter" (after video passes filter), ' '"video" (after --format; before --print/--output), "before_dl" (before each video download), ' '"post_process" (after each video download; default), ' - '"after_move" (after moving video file to its final locations), ' + '"after_move" (after moving the video file to its final location), ' '"after_video" (after downloading and processing all formats of a video), ' 'or "playlist" (at end of playlist). ' 'This option can be used multiple times to add different postprocessors')) @@ -1809,7 +1810,7 @@ def create_parser(): }, help=( 'SponsorBlock categories to create chapters for, separated by commas. ' f'Available categories are {", ".join(SponsorBlockPP.CATEGORIES.keys())}, all and default (=all). ' - 'You can prefix the category with a "-" to exclude it. See [1] for description of the categories. ' + 'You can prefix the category with a "-" to exclude it. See [1] for descriptions of the categories. ' 'E.g. --sponsorblock-mark all,-preview [1] https://wiki.sponsor.ajay.app/w/Segment_Categories')) sponsorblock.add_option( '--sponsorblock-remove', metavar='CATS', @@ -1895,7 +1896,7 @@ def create_parser(): extractor.add_option( '--no-hls-split-discontinuity', dest='hls_split_discontinuity', action='store_false', - help='Do not split HLS playlists to different formats at discontinuities such as ad breaks (default)') + help='Do not split HLS playlists into different formats at discontinuities such as ad breaks (default)') _extractor_arg_parser = lambda key, vals='': (key.strip().lower().replace('-', '_'), [ val.replace(r'\,', ',').strip() for val in re.split(r'(? Date: Tue, 22 Oct 2024 05:03:55 +0000 Subject: [PATCH 18/98] Release 2024.10.22 Created by: bashonly :ci skip all --- CONTRIBUTORS | 10 ++++++++++ Changelog.md | 50 +++++++++++++++++++++++++++++++++++++++++++++++ supportedsites.md | 49 +++++++++++++++++++++++----------------------- yt_dlp/version.py | 6 +++--- 4 files changed, 88 insertions(+), 27 deletions(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index bcdf6a0c24..949bc89c47 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -678,3 +678,13 @@ coreywright eric321 poyhen tetra-fox +444995 +63427083 +allendema +DarkZeros +DTrombett +imranh2 +KarboniteKream +mikkovedru +pktiuk +rubyevadestaxes diff --git a/Changelog.md b/Changelog.md index 10fd437fa1..0efccadd10 100644 --- a/Changelog.md +++ b/Changelog.md @@ -4,6 +4,56 @@ # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master --> +### 2024.10.22 + +#### Important changes +- **Following this release, yt-dlp's Python dependencies *must* be installed using the `default` group** +If you're installing yt-dlp with pip/pipx or requiring yt-dlp in your own Python project, you'll need to specify `yt-dlp[default]` if you want to also install yt-dlp's optional dependencies (which were previously included by default). [Read more](https://github.com/yt-dlp/yt-dlp/pull/11255) +- **py2exe is no longer supported** +This release's `yt-dlp_min.exe` will be the last, and it's actually a PyInstaller-bundled executable so that yt-dlp users updating their py2exe build with `-U` will be automatically migrated. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10087) + +#### Core changes +- [Add extractor helpers](https://github.com/yt-dlp/yt-dlp/commit/d710a6ca7c622705c0c8c8a3615916f531137d5d) ([#10653](https://github.com/yt-dlp/yt-dlp/issues/10653)) by [Grub4K](https://github.com/Grub4K) +- [Add option `--plugin-dirs`](https://github.com/yt-dlp/yt-dlp/commit/0f593dca9fa995d88eb763170a932da61c8f24dc) ([#11277](https://github.com/yt-dlp/yt-dlp/issues/11277)) by [coletdjnz](https://github.com/coletdjnz), [imranh2](https://github.com/imranh2) +- **cookies**: [Fix compatibility for Python <=3.9 in traceback](https://github.com/yt-dlp/yt-dlp/commit/c5f0f58efd8c3930de8202c15a5c53b1b635bd51) by [Grub4K](https://github.com/Grub4K) +- **utils** + - `Popen`: [Reset PyInstaller environment](https://github.com/yt-dlp/yt-dlp/commit/fbc66e3ab35743cc847a21223c67d88bb463cd9c) ([#11258](https://github.com/yt-dlp/yt-dlp/issues/11258)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) + - `sanitize_path`: [Reimplement function](https://github.com/yt-dlp/yt-dlp/commit/85b87c991af25dcb35630fa94580fd418e78ee33) ([#11198](https://github.com/yt-dlp/yt-dlp/issues/11198)) by [Grub4K](https://github.com/Grub4K) + +#### Extractor changes +- **adobepass**: [Use newer user-agent for provider redirect request](https://github.com/yt-dlp/yt-dlp/commit/dcfeea4dd5e5686821350baa6c7767a011944867) ([#11250](https://github.com/yt-dlp/yt-dlp/issues/11250)) by [bashonly](https://github.com/bashonly) +- **afreecatv**: [Adapt extractors to new sooplive.co.kr domain](https://github.com/yt-dlp/yt-dlp/commit/46fe60ff19395698a87113b2944453779e04ab9d) ([#11266](https://github.com/yt-dlp/yt-dlp/issues/11266)) by [63427083](https://github.com/63427083), [bashonly](https://github.com/bashonly) +- **cda**: [Support folders](https://github.com/yt-dlp/yt-dlp/commit/c4d95f67ddc522297bb1fea875255cf94b34d595) ([#10786](https://github.com/yt-dlp/yt-dlp/issues/10786)) by [pktiuk](https://github.com/pktiuk) +- **cwtv**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/9d43dcb2c5c38f443f84dfc126cd32720e1a1ad6) ([#11230](https://github.com/yt-dlp/yt-dlp/issues/11230)) by [bashonly](https://github.com/bashonly) +- **drtv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/f4338714241b11d9d43768ae71a25f5e952f677d) ([#11141](https://github.com/yt-dlp/yt-dlp/issues/11141)) by [444995](https://github.com/444995) +- **funk**: [Extend `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/8de431ec97a4b62b73df8f686b6e21e462775336) ([#11269](https://github.com/yt-dlp/yt-dlp/issues/11269)) by [seproDev](https://github.com/seproDev) +- **gem.cbc.ca**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/40054cb4a7ebbea30d335d444e6f58b298a3baa0) ([#11196](https://github.com/yt-dlp/yt-dlp/issues/11196)) by [DavidSkrundz](https://github.com/DavidSkrundz) +- **generic**: [Impersonate browser by default](https://github.com/yt-dlp/yt-dlp/commit/edfd095b1917701c5046bd51f9542897c17d41a7) ([#11206](https://github.com/yt-dlp/yt-dlp/issues/11206)) by [Grub4K](https://github.com/Grub4K) +- **imgur** + - [Fix thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/87408ccfd772ddf31a8323d8151c24f9577cbc9f) ([#11298](https://github.com/yt-dlp/yt-dlp/issues/11298)) by [seproDev](https://github.com/seproDev) + - [Support new URL format](https://github.com/yt-dlp/yt-dlp/commit/5af774d7a36c00bea618c7047c9326532cd3f616) ([#11075](https://github.com/yt-dlp/yt-dlp/issues/11075)) by [Deer-Spangle](https://github.com/Deer-Spangle) +- **patreon**: campaign: [Stricter URL matching](https://github.com/yt-dlp/yt-dlp/commit/babb70960595e2146f06f81affc29c7e713e34e2) ([#11235](https://github.com/yt-dlp/yt-dlp/issues/11235)) by [bashonly](https://github.com/bashonly) +- **reddit**: [Detect and raise when login is required](https://github.com/yt-dlp/yt-dlp/commit/cba7868502f04175fecf9ab3e363296aee7ebec2) ([#11202](https://github.com/yt-dlp/yt-dlp/issues/11202)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **substack**: [Resolve podcast file extensions](https://github.com/yt-dlp/yt-dlp/commit/3148c1822f66533998278f0a1cf842b9bea1526a) ([#11275](https://github.com/yt-dlp/yt-dlp/issues/11275)) by [bashonly](https://github.com/bashonly) +- **telecinco**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/0b7ec08816fb196cd41d392f8331b4eb8366c4f8) ([#11142](https://github.com/yt-dlp/yt-dlp/issues/11142)) by [bashonly](https://github.com/bashonly), [DarkZeros](https://github.com/DarkZeros) +- **tubitv**: [Strip extra whitespace from titles](https://github.com/yt-dlp/yt-dlp/commit/e68b4c19af122876561a41f2dd8093fae7b417c7) ([#10795](https://github.com/yt-dlp/yt-dlp/issues/10795)) by [allendema](https://github.com/allendema) +- **tver**: [Support series URLs](https://github.com/yt-dlp/yt-dlp/commit/ceaea731b6e314dbbdfb2e358d7677785ed0b4fc) ([#9507](https://github.com/yt-dlp/yt-dlp/issues/9507)) by [pzhlkj6612](https://github.com/pzhlkj6612), [vvto33](https://github.com/vvto33) +- **twitter**: spaces: [Allow extraction when not logged in](https://github.com/yt-dlp/yt-dlp/commit/679c68240a26481ea7c07cc0c014745631ea8481) ([#11289](https://github.com/yt-dlp/yt-dlp/issues/11289)) by [rubyevadestaxes](https://github.com/rubyevadestaxes) +- **weverse**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/5310fa87f6cb7f66bf42e2520878952fbf6b1652) ([#11215](https://github.com/yt-dlp/yt-dlp/issues/11215)) by [bashonly](https://github.com/bashonly) +- **youtube** + - [Fix `comment_count` extraction](https://github.com/yt-dlp/yt-dlp/commit/7af1ddaaf2a6a0a750373a9ab53c7770af4f9fe4) ([#11274](https://github.com/yt-dlp/yt-dlp/issues/11274)) by [bashonly](https://github.com/bashonly) + - [Remove broken `android_producer` client](https://github.com/yt-dlp/yt-dlp/commit/fed53d70bdb7d3e37ef63dd7fcf0ef74356167fd) ([#11297](https://github.com/yt-dlp/yt-dlp/issues/11297)) by [bashonly](https://github.com/bashonly) + - [Remove broken age-restriction workaround](https://github.com/yt-dlp/yt-dlp/commit/ec2f4bf0823a13043f98f5bd0bf6677837bf09dc) ([#11297](https://github.com/yt-dlp/yt-dlp/issues/11297)) by [bashonly](https://github.com/bashonly) + - [Support logging in with OAuth](https://github.com/yt-dlp/yt-dlp/commit/b8635c1d4779da195e71aa281f73aaad702c935e) ([#11001](https://github.com/yt-dlp/yt-dlp/issues/11001)) by [coletdjnz](https://github.com/coletdjnz) + +#### Misc. changes +- **build** + - [Migrate `py2exe` builds to `win_exe`](https://github.com/yt-dlp/yt-dlp/commit/a886cf3e900f4a2ec00af705f883539269545609) ([#11256](https://github.com/yt-dlp/yt-dlp/issues/11256)) by [bashonly](https://github.com/bashonly) + - [Use `macos-13` image for macOS builds](https://github.com/yt-dlp/yt-dlp/commit/64d84d75ca8c19ec06558cc7c511f5f4f7a822bc) ([#11236](https://github.com/yt-dlp/yt-dlp/issues/11236)) by [bashonly](https://github.com/bashonly) + - `make_lazy_extractors`: [Force running without plugins](https://github.com/yt-dlp/yt-dlp/commit/1a830394a21a81a3e9918f9e175abc9fbb21f089) ([#11205](https://github.com/yt-dlp/yt-dlp/issues/11205)) by [Grub4K](https://github.com/Grub4K) +- **cleanup**: Miscellaneous: [67adeb7](https://github.com/yt-dlp/yt-dlp/commit/67adeb7bab00662ba55d473e405b301abb42fe61) by [bashonly](https://github.com/bashonly), [DTrombett](https://github.com/DTrombett), [grqz](https://github.com/grqz), [Grub4K](https://github.com/Grub4K), [KarboniteKream](https://github.com/KarboniteKream), [mikkovedru](https://github.com/mikkovedru), [seproDev](https://github.com/seproDev) +- **test**: [Allow running tests explicitly](https://github.com/yt-dlp/yt-dlp/commit/16eb28026a2ddf5608d0a628ef15949b8d3805a9) ([#11203](https://github.com/yt-dlp/yt-dlp/issues/11203)) by [Grub4K](https://github.com/Grub4K) + ### 2024.10.07 #### Core changes diff --git a/supportedsites.md b/supportedsites.md index e23d395fde..7b22e8c6fa 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -45,10 +45,6 @@ - **aenetworks:collection** - **aenetworks:show** - **AeonCo** - - **afreecatv**: [*afreecatv*](## "netrc machine") afreecatv.com - - **afreecatv:catchstory**: [*afreecatv*](## "netrc machine") afreecatv.com catch story - - **afreecatv:live**: [*afreecatv*](## "netrc machine") afreecatv.com livestreams - - **afreecatv:user** - **AirTV** - **AitubeKZVideo** - **AliExpressLive** @@ -254,6 +250,7 @@ - **CCMA** - **CCTV**: 央视网 - **CDA**: [*cdapl*](## "netrc machine") + - **CDAFolder** - **Cellebrite** - **CeskaTelevize** - **CGTN** @@ -1046,8 +1043,8 @@ - **Parler**: Posts on parler.com - **parliamentlive.tv**: UK parliament videos - **Parlview**: (**Currently broken**) - - **Patreon** - - **PatreonCampaign** + - **patreon** + - **patreon:campaign** - **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC) - **PBSKids** - **PearVideo** @@ -1339,6 +1336,10 @@ - **SohuV** - **SonyLIV**: [*sonyliv*](## "netrc machine") - **SonyLIVSeries** + - **soop**: [*afreecatv*](## "netrc machine") sooplive.co.kr + - **soop:catchstory**: [*afreecatv*](## "netrc machine") sooplive.co.kr catch story + - **soop:live**: [*afreecatv*](## "netrc machine") sooplive.co.kr livestreams + - **soop:user**: [*afreecatv*](## "netrc machine") - **soundcloud**: [*soundcloud*](## "netrc machine") - **soundcloud:playlist**: [*soundcloud*](## "netrc machine") - **soundcloud:related**: [*soundcloud*](## "netrc machine") @@ -1778,24 +1779,24 @@ - **YouPornStar**: YouPorn Pornstar, with description, sorting and pagination - **YouPornTag**: YouPorn tag (porntags), with sorting, filtering and pagination - **YouPornVideos**: YouPorn video (browse) playlists, with sorting, filtering and pagination - - **youtube**: YouTube - - **youtube:clip** - - **youtube:favorites**: YouTube liked videos; ":ytfav" keyword (requires cookies) - - **youtube:history**: Youtube watch history; ":ythis" keyword (requires cookies) - - **youtube:​music:search_url**: YouTube music search URLs with selectable sections, e.g. #songs - - **youtube:notif**: YouTube notifications; ":ytnotif" keyword (requires cookies) - - **youtube:playlist**: YouTube playlists - - **youtube:recommended**: YouTube recommended videos; ":ytrec" keyword - - **youtube:search**: YouTube search; "ytsearch:" prefix - - **youtube:​search:date**: YouTube search, newest videos first; "ytsearchdate:" prefix - - **youtube:search_url**: YouTube search URLs with sorting and filter support - - **youtube:​shorts:pivot:audio**: YouTube Shorts audio pivot (Shorts using audio of a given video) - - **youtube:subscriptions**: YouTube subscriptions feed; ":ytsubs" keyword (requires cookies) - - **youtube:tab**: YouTube Tabs - - **youtube:user**: YouTube user videos; "ytuser:" prefix - - **youtube:watchlater**: Youtube watch later list; ":ytwatchlater" keyword (requires cookies) - - **YoutubeLivestreamEmbed**: YouTube livestream embeds - - **YoutubeYtBe**: youtu.be + - **youtube**: [*youtube*](## "netrc machine") YouTube + - **youtube:clip**: [*youtube*](## "netrc machine") + - **youtube:favorites**: [*youtube*](## "netrc machine") YouTube liked videos; ":ytfav" keyword (requires cookies) + - **youtube:history**: [*youtube*](## "netrc machine") Youtube watch history; ":ythis" keyword (requires cookies) + - **youtube:​music:search_url**: [*youtube*](## "netrc machine") YouTube music search URLs with selectable sections, e.g. #songs + - **youtube:notif**: [*youtube*](## "netrc machine") YouTube notifications; ":ytnotif" keyword (requires cookies) + - **youtube:playlist**: [*youtube*](## "netrc machine") YouTube playlists + - **youtube:recommended**: [*youtube*](## "netrc machine") YouTube recommended videos; ":ytrec" keyword + - **youtube:search**: [*youtube*](## "netrc machine") YouTube search; "ytsearch:" prefix + - **youtube:​search:date**: [*youtube*](## "netrc machine") YouTube search, newest videos first; "ytsearchdate:" prefix + - **youtube:search_url**: [*youtube*](## "netrc machine") YouTube search URLs with sorting and filter support + - **youtube:​shorts:pivot:audio**: [*youtube*](## "netrc machine") YouTube Shorts audio pivot (Shorts using audio of a given video) + - **youtube:subscriptions**: [*youtube*](## "netrc machine") YouTube subscriptions feed; ":ytsubs" keyword (requires cookies) + - **youtube:tab**: [*youtube*](## "netrc machine") YouTube Tabs + - **youtube:user**: [*youtube*](## "netrc machine") YouTube user videos; "ytuser:" prefix + - **youtube:watchlater**: [*youtube*](## "netrc machine") Youtube watch later list; ":ytwatchlater" keyword (requires cookies) + - **YoutubeLivestreamEmbed**: [*youtube*](## "netrc machine") YouTube livestream embeds + - **YoutubeYtBe**: [*youtube*](## "netrc machine") youtu.be - **Zaiko** - **ZaikoETicket** - **Zapiks** diff --git a/yt_dlp/version.py b/yt_dlp/version.py index 2ad18dd196..17d7881845 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,8 +1,8 @@ # Autogenerated by devscripts/update-version.py -__version__ = '2024.10.07' +__version__ = '2024.10.22' -RELEASE_GIT_HEAD = '1a176d874e6772cd898ce507379ea388e96ee3f7' +RELEASE_GIT_HEAD = '67adeb7bab00662ba55d473e405b301abb42fe61' VARIANT = None @@ -12,4 +12,4 @@ CHANNEL = 'stable' ORIGIN = 'yt-dlp/yt-dlp' -_pkg_version = '2024.10.07' +_pkg_version = '2024.10.22' From 87884f15580910e4e0fe0e1db73508debc657471 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Tue, 22 Oct 2024 05:40:25 +0000 Subject: [PATCH 19/98] [build] Move optional dependencies to the `default` group (#11255) Closes #11221 Authored by: bashonly --- pyproject.toml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 5439db1df2..ff5e38ff50 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,10 @@ classifiers = [ "Operating System :: OS Independent", ] dynamic = ["version"] -dependencies = [ +dependencies = [] + +[project.optional-dependencies] +default = [ "brotli; implementation_name=='cpython'", "brotlicffi; implementation_name!='cpython'", "certifi", @@ -52,9 +55,6 @@ dependencies = [ "urllib3>=1.26.17,<3", "websockets>=13.0", ] - -[project.optional-dependencies] -default = [] curl-cffi = [ "curl-cffi==0.5.10; os_name=='nt' and implementation_name=='cpython'", "curl-cffi>=0.5.10,!=0.6.*,<0.7.2; os_name!='nt' and implementation_name=='cpython'", From ea9e35d85fba5eab341cdcaf1eaed69b57f7e465 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Tue, 22 Oct 2024 06:03:30 +0000 Subject: [PATCH 20/98] [cleanup] Misc (#11311) Authored by: bashonly --- .github/workflows/build.yml | 11 +++++------ devscripts/changelog_override.json | 5 +++++ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 64227d9740..00326416d8 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -72,7 +72,7 @@ on: default: true type: boolean windows: - description: yt-dlp.exe, yt-dlp_min.exe, yt-dlp_win.zip + description: yt-dlp.exe, yt-dlp_win.zip default: true type: boolean windows32: @@ -421,10 +421,6 @@ jobs: python -m bundle.pyinstaller --onedir Compress-Archive -Path ./dist/yt-dlp/* -DestinationPath ./dist/yt-dlp_win.zip - - name: Add migration executable for py2exe - run: | - Copy-Item ./dist/yt-dlp.exe ./dist/yt-dlp_min.exe - - name: Verify --update-to if: vars.UPDATE_TO_VERIFICATION run: | @@ -444,7 +440,6 @@ jobs: name: build-bin-${{ github.job }} path: | dist/yt-dlp.exe - dist/yt-dlp_min.exe dist/yt-dlp_win.zip compression-level: 0 @@ -531,13 +526,17 @@ jobs: lock 2022.08.18.36 .+ Python 3\.6 lock 2023.11.16 (?!win_x86_exe).+ Python 3\.7 lock 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server) + lock 2024.10.22 py2exe .+ lockV2 yt-dlp/yt-dlp 2022.08.18.36 .+ Python 3\.6 lockV2 yt-dlp/yt-dlp 2023.11.16 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server) + lockV2 yt-dlp/yt-dlp 2024.10.22 py2exe .+ lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 win_x86_exe .+ Windows-(?:Vista|2008Server) + lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 py2exe .+ lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 win_x86_exe .+ Windows-(?:Vista|2008Server) + lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.045052 py2exe .+ EOF - name: Sign checksum files diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json index 3d8fe53a52..3262a0e678 100644 --- a/devscripts/changelog_override.json +++ b/devscripts/changelog_override.json @@ -206,5 +206,10 @@ "action": "add", "when": "a886cf3e900f4a2ec00af705f883539269545609", "short": "[priority] **Following this release, yt-dlp's Python dependencies *must* be installed using the `default` group**\nIf you're installing yt-dlp with pip/pipx or requiring yt-dlp in your own Python project, you'll need to specify `yt-dlp[default]` if you want to also install yt-dlp's optional dependencies (which were previously included by default). [Read more](https://github.com/yt-dlp/yt-dlp/pull/11255)" + }, + { + "action": "add", + "when": "87884f15580910e4e0fe0e1db73508debc657471", + "short": "[priority] **Beginning with this release, yt-dlp's Python dependencies *must* be installed using the `default` group**\nIf you're installing yt-dlp with pip/pipx or requiring yt-dlp in your own Python project, you'll need to specify `yt-dlp[default]` if you want to also install yt-dlp's optional dependencies (which were previously included by default). [Read more](https://github.com/yt-dlp/yt-dlp/pull/11255)" } ] From dd2e24446954246a2ec4d4a7e95531f52a14b351 Mon Sep 17 00:00:00 2001 From: bashonly Date: Tue, 22 Oct 2024 13:09:43 -0500 Subject: [PATCH 21/98] [build] Use Ubuntu 20.04 and Python 3.9 for Linux ARM builds (#8638) Authored by: bashonly --- .github/workflows/build.yml | 26 ++++++++++++++++---------- yt_dlp/update.py | 10 ++-------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 00326416d8..fdca5d702e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -199,22 +199,24 @@ jobs: GITHUB_WORKFLOW: build githubToken: ${{ github.token }} # To cache image arch: ${{ matrix.architecture }} - distro: ubuntu18.04 # Standalone executable should be built on minimum supported OS + distro: ubuntu20.04 # Standalone executable should be built on minimum supported OS dockerRunArgs: --volume "${PWD}/repo:/repo" install: | # Installing Python 3.10 from the Deadsnakes repo raises errors apt update - apt -y install zlib1g-dev libffi-dev python3.8 python3.8-dev python3.8-distutils python3-pip - python3.8 -m pip install -U pip setuptools wheel - # Cannot access any files from the repo directory at this stage - python3.8 -m pip install -U Pyinstaller mutagen pycryptodomex websockets brotli certifi secretstorage cffi + apt -y install zlib1g-dev libffi-dev python3.9 python3.9-dev python3.9-distutils python3-pip \ + python3-secretstorage # Cannot build cryptography wheel in virtual armv7 environment + python3.9 -m pip install -U pip wheel 'setuptools>=71.0.2' + # XXX: Keep this in sync with pyproject.toml (it can't be accessed at this stage) and exclude secretstorage + python3.9 -m pip install -U Pyinstaller mutagen pycryptodomex brotli certifi cffi \ + 'requests>=2.32.2,<3' 'urllib3>=1.26.17,<3' 'websockets>=13.0' run: | cd repo - python3.8 devscripts/install_deps.py -o --include build - python3.8 devscripts/install_deps.py --include pyinstaller --include secretstorage # Cached version may be out of date - python3.8 devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}" - python3.8 devscripts/make_lazy_extractors.py - python3.8 -m bundle.pyinstaller + python3.9 devscripts/install_deps.py -o --include build + python3.9 devscripts/install_deps.py --include pyinstaller # Cached versions may be out of date + python3.9 devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}" + python3.9 devscripts/make_lazy_extractors.py + python3.9 -m bundle.pyinstaller if ${{ vars.UPDATE_TO_VERIFICATION && 'true' || 'false' }}; then arch="${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }}" @@ -527,16 +529,20 @@ jobs: lock 2023.11.16 (?!win_x86_exe).+ Python 3\.7 lock 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server) lock 2024.10.22 py2exe .+ + lock 2024.10.22 linux_(?:armv7l|aarch64)_exe .+ glibc 2\.(?:[12]?\d|30)\b lockV2 yt-dlp/yt-dlp 2022.08.18.36 .+ Python 3\.6 lockV2 yt-dlp/yt-dlp 2023.11.16 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server) lockV2 yt-dlp/yt-dlp 2024.10.22 py2exe .+ + lockV2 yt-dlp/yt-dlp 2024.10.22 linux_(?:armv7l|aarch64)_exe .+ glibc 2\.(?:[12]?\d|30)\b lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 win_x86_exe .+ Windows-(?:Vista|2008Server) lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 py2exe .+ + lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 linux_(?:armv7l|aarch64)_exe .+ glibc 2\.(?:[12]?\d|30)\b lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 win_x86_exe .+ Windows-(?:Vista|2008Server) lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.045052 py2exe .+ + lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.060347 linux_(?:armv7l|aarch64)_exe .+ glibc 2\.(?:[12]?\d|30)\b EOF - name: Sign checksum files diff --git a/yt_dlp/update.py b/yt_dlp/update.py index 0172acfd63..3a8d78de41 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -158,13 +158,6 @@ def _get_system_deprecation(): return EXE_MSG_TMPL.format('Windows 7/Server 2008 R2', 'issues/10086', STOP_MSG) return None - # Temporary until aarch64/armv7l build flow is bumped to Ubuntu 20.04 and Python 3.9 - elif variant in ('linux_aarch64_exe', 'linux_armv7l_exe'): - libc_ver = version_tuple(os.confstr('CS_GNU_LIBC_VERSION').partition(' ')[2]) - if libc_ver < (2, 31): - return EXE_MSG_TMPL.format('system glibc version < 2.31', 'pull/8638', STOP_MSG) - return None - return f'Support for Python version {major}.{minor} has been deprecated. {PYTHON_MSG}' @@ -357,7 +350,8 @@ class Updater: continue self._report_error( - f'yt-dlp cannot be updated to {resolved_tag} since you are on an older Python version', True) + f'yt-dlp cannot be updated to {resolved_tag} since you are on an older Python version ' + 'or your operating system is not compatible with the requested build', True) return None return resolved_tag From d784464399b600ba9516bbcec6286f11d68974dd Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Wed, 23 Oct 2024 06:33:50 +0000 Subject: [PATCH 22/98] Remove Python 3.8 support (#11321) Closes #10086 Authored by: bashonly --- .github/workflows/build.yml | 22 +++++--- CONTRIBUTING.md | 20 ++++--- README.md | 10 ++-- devscripts/make_issue_template.py | 15 +++--- pyproject.toml | 3 +- setup.cfg | 2 +- test/test_update.py | 89 ++++++++++++++++++++++++------- yt_dlp/__init__.py | 4 +- yt_dlp/compat/compat_utils.py | 2 +- yt_dlp/compat/functools.py | 5 -- yt_dlp/extractor/pornbox.py | 3 +- yt_dlp/plugins.py | 2 +- yt_dlp/update.py | 16 +----- yt_dlp/utils/_utils.py | 2 +- 14 files changed, 120 insertions(+), 75 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index fdca5d702e..d062d7720d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -405,8 +405,8 @@ jobs: steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 - with: # 3.8 is used for Win7 support - python-version: "3.8" + with: + python-version: "3.10" - name: Install Requirements run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds python devscripts/install_deps.py -o --include build @@ -454,7 +454,7 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: - python-version: "3.8" + python-version: "3.10" architecture: "x86" - name: Install Requirements run: | @@ -529,20 +529,28 @@ jobs: lock 2023.11.16 (?!win_x86_exe).+ Python 3\.7 lock 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server) lock 2024.10.22 py2exe .+ - lock 2024.10.22 linux_(?:armv7l|aarch64)_exe .+ glibc 2\.(?:[12]?\d|30)\b + lock 2024.10.22 linux_(?:armv7l|aarch64)_exe .+-glibc2\.(?:[12]?\d|30)\b + lock 2024.10.22 (?!\w+_exe).+ Python 3\.8 + lock 2024.10.22 win(?:_x86)?_exe Python 3\.[78].+ Windows-(?:7-|2008ServerR2) lockV2 yt-dlp/yt-dlp 2022.08.18.36 .+ Python 3\.6 lockV2 yt-dlp/yt-dlp 2023.11.16 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server) lockV2 yt-dlp/yt-dlp 2024.10.22 py2exe .+ - lockV2 yt-dlp/yt-dlp 2024.10.22 linux_(?:armv7l|aarch64)_exe .+ glibc 2\.(?:[12]?\d|30)\b + lockV2 yt-dlp/yt-dlp 2024.10.22 linux_(?:armv7l|aarch64)_exe .+-glibc2\.(?:[12]?\d|30)\b + lockV2 yt-dlp/yt-dlp 2024.10.22 (?!\w+_exe).+ Python 3\.8 + lockV2 yt-dlp/yt-dlp 2024.10.22 win(?:_x86)?_exe Python 3\.[78].+ Windows-(?:7-|2008ServerR2) lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 win_x86_exe .+ Windows-(?:Vista|2008Server) lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 py2exe .+ - lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 linux_(?:armv7l|aarch64)_exe .+ glibc 2\.(?:[12]?\d|30)\b + lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 linux_(?:armv7l|aarch64)_exe .+-glibc2\.(?:[12]?\d|30)\b + lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 (?!\w+_exe).+ Python 3\.8 + lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 win(?:_x86)?_exe Python 3\.[78].+ Windows-(?:7-|2008ServerR2) lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 win_x86_exe .+ Windows-(?:Vista|2008Server) lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.045052 py2exe .+ - lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.060347 linux_(?:armv7l|aarch64)_exe .+ glibc 2\.(?:[12]?\d|30)\b + lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.060347 linux_(?:armv7l|aarch64)_exe .+-glibc2\.(?:[12]?\d|30)\b + lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.060347 (?!\w+_exe).+ Python 3\.8 + lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.060347 win(?:_x86)?_exe Python 3\.[78].+ Windows-(?:7-|2008ServerR2) EOF - name: Sign checksum files diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f1646e5952..fd7b0f1210 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -37,14 +37,18 @@ Bugs and suggestions should be reported at: [yt-dlp/yt-dlp/issues](https://githu **Please include the full output of yt-dlp when run with `-vU`**, i.e. **add** `-vU` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this: ``` $ yt-dlp -vU -[debug] Command-line config: ['-v', 'demo.com'] -[debug] Encodings: locale UTF-8, fs utf-8, out utf-8, pref UTF-8 -[debug] yt-dlp version 2021.09.25 (zip) -[debug] Python version 3.8.10 (CPython 64bit) - Linux-5.4.0-74-generic-x86_64-with-glibc2.29 -[debug] exe versions: ffmpeg 4.2.4, ffprobe 4.2.4 +[debug] Command-line config: ['-vU', 'https://www.example.com/'] +[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 +[debug] yt-dlp version nightly@... from yt-dlp/yt-dlp-nightly-builds [1a176d874] (win_exe) +[debug] Python 3.10.11 (CPython AMD64 64bit) - Windows-10-10.0.20348-SP0 (OpenSSL 1.1.1t 7 Feb 2023) +[debug] exe versions: ffmpeg 7.0.2 (setts), ffprobe 7.0.2 +[debug] Optional libraries: Cryptodome-3.21.0, brotli-1.1.0, certifi-2024.08.30, curl_cffi-0.5.10, mutagen-1.47.0, requests-2.32.3, sqlite3-3.40.1, urllib3-2.2.3, websockets-13.1 [debug] Proxy map: {} -Current Build Hash 25cc412d1d3c0725a1f2f5b7e4682f6fb40e6d15f7024e96f7afd572e9919535 -yt-dlp is up to date (2021.09.25) +[debug] Request Handlers: urllib, requests, websockets, curl_cffi +[debug] Loaded 1838 extractors +[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest +Latest version: nightly@... from yt-dlp/yt-dlp-nightly-builds +yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) ... ``` **Do not post screenshots of verbose logs; only plain text is acceptable.** @@ -268,7 +272,7 @@ After you have ensured this site is distributing its content legally, you can fo You can use `hatch fmt` to automatically fix problems. Rules that the linter/formatter enforces should not be disabled with `# noqa` unless a maintainer requests it. The only exception allowed is for old/printf-style string formatting in GraphQL query templates (use `# noqa: UP031`). -1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython >=3.8 and PyPy >=3.10. Backward compatibility is not required for even older versions of Python. +1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython >=3.9 and PyPy >=3.10. Backward compatibility is not required for even older versions of Python. 1. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files, [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: ```shell diff --git a/README.md b/README.md index 46fff07df2..05b8e2b868 100644 --- a/README.md +++ b/README.md @@ -98,14 +98,14 @@ You can install yt-dlp using [the binaries](#release-files), [pip](https://pypi. File|Description :---|:--- [yt-dlp](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)|Platform-independent [zipimport](https://docs.python.org/3/library/zipimport.html) binary. Needs Python (recommended for **Linux/BSD**) -[yt-dlp.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)|Windows (Win7 SP1+) standalone x64 binary (recommended for **Windows**) +[yt-dlp.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)|Windows (Win8+) standalone x64 binary (recommended for **Windows**) [yt-dlp_macos](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos)|Universal MacOS (10.15+) standalone executable (recommended for **MacOS**) #### Alternatives File|Description :---|:--- -[yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows (Win7 SP1+) standalone x86 (32-bit) binary +[yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows (Win8+) standalone x86 (32-bit) binary [yt-dlp_linux](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux)|Linux standalone x64 binary [yt-dlp_linux_armv7l](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_armv7l)|Linux standalone armv7l (32-bit) binary [yt-dlp_linux_aarch64](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_aarch64)|Linux standalone aarch64 (64-bit) binary @@ -172,7 +172,7 @@ python3 -m pip install -U --pre "yt-dlp[default]" ``` ## DEPENDENCIES -Python versions 3.8+ (CPython) and 3.10+ (PyPy) are supported. Other versions and implementations may or may not work correctly. +Python versions 3.9+ (CPython) and 3.10+ (PyPy) are supported. Other versions and implementations may or may not work correctly. +### 2024.11.04 + +#### Important changes +- **Beginning with this release, yt-dlp's Python dependencies *must* be installed using the `default` group** +If you're installing yt-dlp with pip/pipx or requiring yt-dlp in your own Python project, you'll need to specify `yt-dlp[default]` if you want to also install yt-dlp's optional dependencies (which were previously included by default). [Read more](https://github.com/yt-dlp/yt-dlp/pull/11255) +- **The minimum *required* Python version has been raised to 3.9** +Python 3.8 reached its end-of-life on 2024.10.07, and yt-dlp has now removed support for it. As an unfortunate side effect, the official `yt-dlp.exe` and `yt-dlp_x86.exe` binaries are no longer supported on Windows 7. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10086) + +#### Core changes +- [Allow thumbnails with `.jpe` extension](https://github.com/yt-dlp/yt-dlp/commit/5bc5fb2835ea59bdf326bd12176d74d2c7348a95) ([#11408](https://github.com/yt-dlp/yt-dlp/issues/11408)) by [bashonly](https://github.com/bashonly) +- [Expand paths in `--plugin-dirs`](https://github.com/yt-dlp/yt-dlp/commit/914af9a0cf51c9a3f74aa88d952bee8334c67511) ([#11334](https://github.com/yt-dlp/yt-dlp/issues/11334)) by [bashonly](https://github.com/bashonly) +- [Fix `--netrc` empty string parsing for Python <=3.10](https://github.com/yt-dlp/yt-dlp/commit/88402b714ec124633933737bc156b172a3dec3d6) ([#11414](https://github.com/yt-dlp/yt-dlp/issues/11414)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) +- [Populate format sorting fields before dependent fields](https://github.com/yt-dlp/yt-dlp/commit/5c880ef42e9c2b2fc412f6d69dad37d34fb75a62) ([#11353](https://github.com/yt-dlp/yt-dlp/issues/11353)) by [Grub4K](https://github.com/Grub4K) +- [Prioritize AV1](https://github.com/yt-dlp/yt-dlp/commit/3945677a75e94a1fecc085432d791e1c21220cd3) ([#11153](https://github.com/yt-dlp/yt-dlp/issues/11153)) by [seproDev](https://github.com/seproDev) +- [Remove Python 3.8 support](https://github.com/yt-dlp/yt-dlp/commit/d784464399b600ba9516bbcec6286f11d68974dd) ([#11321](https://github.com/yt-dlp/yt-dlp/issues/11321)) by [bashonly](https://github.com/bashonly) +- **aes**: [Fix GCM pad length calculation](https://github.com/yt-dlp/yt-dlp/commit/beae2db127d3b5017cbcf685da9de7a9ef496541) ([#11438](https://github.com/yt-dlp/yt-dlp/issues/11438)) by [seproDev](https://github.com/seproDev) +- **cookies**: [Support chrome table version 24](https://github.com/yt-dlp/yt-dlp/commit/4613096f2e6eab9dcbac0e98b6cec760bbc99375) ([#11425](https://github.com/yt-dlp/yt-dlp/issues/11425)) by [kesor](https://github.com/kesor), [seproDev](https://github.com/seproDev) +- **utils** + - [Allow partial application for more functions](https://github.com/yt-dlp/yt-dlp/commit/b6dc2c49e8793c6dfa21275e61caf49ec1148b81) ([#11391](https://github.com/yt-dlp/yt-dlp/issues/11391)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) (With fixes in [422195e](https://github.com/yt-dlp/yt-dlp/commit/422195ec70a00b0d2002b238cacbae7790c57fdf) by [Grub4K](https://github.com/Grub4K)) + - [Fix `find_element` by class](https://github.com/yt-dlp/yt-dlp/commit/f93c16395cea1fe9ffc3c594d3e019c3b214544c) ([#11402](https://github.com/yt-dlp/yt-dlp/issues/11402)) by [bashonly](https://github.com/bashonly) + - [Fix and improve `find_element` and `find_elements`](https://github.com/yt-dlp/yt-dlp/commit/b103aca24d35b72b405c340357dc01a0ed534281) ([#11443](https://github.com/yt-dlp/yt-dlp/issues/11443)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) + +#### Extractor changes +- [Resolve `language` to ISO639-2 for ISM formats](https://github.com/yt-dlp/yt-dlp/commit/21cdcf03a237a0c4979c941d5a5385cae44c7906) ([#11359](https://github.com/yt-dlp/yt-dlp/issues/11359)) by [bashonly](https://github.com/bashonly) +- **ardmediathek**: [Extract chapters](https://github.com/yt-dlp/yt-dlp/commit/59f8dd8239c31f00b708da53b39b1e2e9409b6e6) ([#11442](https://github.com/yt-dlp/yt-dlp/issues/11442)) by [iw0nderhow](https://github.com/iw0nderhow) +- **bfmtv**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/754940e9a558565d6bd3c0c529802569b1d0ae4e) ([#11444](https://github.com/yt-dlp/yt-dlp/issues/11444)) by [seproDev](https://github.com/seproDev) +- **bluesky**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/5c7a5aaab27e9c3cb367b663a6136ca58866e547) ([#11055](https://github.com/yt-dlp/yt-dlp/issues/11055)) by [MellowKyler](https://github.com/MellowKyler), [seproDev](https://github.com/seproDev) +- **ccma**: [Support new 3cat.cat domain](https://github.com/yt-dlp/yt-dlp/commit/330335386d4f7603d92d6796798375336005275e) ([#11222](https://github.com/yt-dlp/yt-dlp/issues/11222)) by [JoseAngelB](https://github.com/JoseAngelB) +- **chzzk**: video: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/9c6534da81e485b2325b3489ee4128943e6d3e4b) ([#11228](https://github.com/yt-dlp/yt-dlp/issues/11228)) by [hui1601](https://github.com/hui1601) +- **cnn**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/9acf79c91a8c6c55ca972747c6858e784e2da351) ([#10185](https://github.com/yt-dlp/yt-dlp/issues/10185)) by [kylegustavo](https://github.com/kylegustavo), [seproDev](https://github.com/seproDev) +- **dailymotion** + - [Improve embed extraction](https://github.com/yt-dlp/yt-dlp/commit/a403dcf9be20b49cbb3017328f4aaa352fb6d685) ([#10843](https://github.com/yt-dlp/yt-dlp/issues/10843)) by [bashonly](https://github.com/bashonly), [pzhlkj6612](https://github.com/pzhlkj6612) + - [Support shortened URLs](https://github.com/yt-dlp/yt-dlp/commit/d1358231371f20fa23020fa9176be3b56119873e) ([#11374](https://github.com/yt-dlp/yt-dlp/issues/11374)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev) +- **facebook**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/ec9b25043f399de6a591d8370d32bf0e66c117f2) ([#11343](https://github.com/yt-dlp/yt-dlp/issues/11343)) by [kclauhk](https://github.com/kclauhk) +- **generic**: [Do not impersonate by default](https://github.com/yt-dlp/yt-dlp/commit/c29f5a7fae93a08f3cfbb6127b2faa75145b06a0) ([#11336](https://github.com/yt-dlp/yt-dlp/issues/11336)) by [bashonly](https://github.com/bashonly) +- **nfl**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/838f4385de8300a4dd4e7ffbbf0e5b7b85fb52c2) ([#11409](https://github.com/yt-dlp/yt-dlp/issues/11409)) by [bashonly](https://github.com/bashonly) +- **niconicouser**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6abef74232c0fc695cd803c18ae446cacb129389) ([#11324](https://github.com/yt-dlp/yt-dlp/issues/11324)) by [Wesley107772](https://github.com/Wesley107772) +- **soundcloud**: [Extract artists](https://github.com/yt-dlp/yt-dlp/commit/f101e5d34c97c608156ad5396714c2a2edca966a) ([#11377](https://github.com/yt-dlp/yt-dlp/issues/11377)) by [seproDev](https://github.com/seproDev) +- **tumblr**: [Support more URLs](https://github.com/yt-dlp/yt-dlp/commit/b03267bf0675eeb8df5baf1daac7cf67840c91a5) ([#6057](https://github.com/yt-dlp/yt-dlp/issues/6057)) by [selfisekai](https://github.com/selfisekai), [seproDev](https://github.com/seproDev) +- **twitter**: [Remove cookies migration workaround](https://github.com/yt-dlp/yt-dlp/commit/76802f461332d444e596437c42374fa237fa5174) ([#11392](https://github.com/yt-dlp/yt-dlp/issues/11392)) by [bashonly](https://github.com/bashonly) +- **vimeo**: [Fix API retries](https://github.com/yt-dlp/yt-dlp/commit/57212a5f97ce367590aaa5c3e9a135eead8f81f7) ([#11351](https://github.com/yt-dlp/yt-dlp/issues/11351)) by [bashonly](https://github.com/bashonly) +- **yle_areena**: [Support live events](https://github.com/yt-dlp/yt-dlp/commit/a6783a3b9905e547f6c1d4df9d7c7999feda8afa) ([#11358](https://github.com/yt-dlp/yt-dlp/issues/11358)) by [bashonly](https://github.com/bashonly), [CounterPillow](https://github.com/CounterPillow) +- **youtube**: [Adjust OAuth refresh token handling](https://github.com/yt-dlp/yt-dlp/commit/d569a8845254d90ce13ad74ae76695e8d6441068) ([#11414](https://github.com/yt-dlp/yt-dlp/issues/11414)) by [bashonly](https://github.com/bashonly) + +#### Misc. changes +- **build** + - [Disable attestations for trusted publishing](https://github.com/yt-dlp/yt-dlp/commit/428ffb75aa3534b275cf54de42693a4d261519da) ([#11418](https://github.com/yt-dlp/yt-dlp/issues/11418)) by [bashonly](https://github.com/bashonly) + - [Move optional dependencies to the `default` group](https://github.com/yt-dlp/yt-dlp/commit/87884f15580910e4e0fe0e1db73508debc657471) ([#11255](https://github.com/yt-dlp/yt-dlp/issues/11255)) by [bashonly](https://github.com/bashonly) + - [Use Ubuntu 20.04 and Python 3.9 for Linux ARM builds](https://github.com/yt-dlp/yt-dlp/commit/dd2e24446954246a2ec4d4a7e95531f52a14b351) ([#8638](https://github.com/yt-dlp/yt-dlp/issues/8638)) by [bashonly](https://github.com/bashonly) +- **cleanup** + - Miscellaneous + - [ea9e35d](https://github.com/yt-dlp/yt-dlp/commit/ea9e35d85fba5eab341cdcaf1eaed69b57f7e465) by [bashonly](https://github.com/bashonly) + - [c998238](https://github.com/yt-dlp/yt-dlp/commit/c998238c2e76c62d1d29962c6e8ebe916cc7913b) by [bashonly](https://github.com/bashonly), [KBelmin](https://github.com/KBelmin) + - [197d0b0](https://github.com/yt-dlp/yt-dlp/commit/197d0b03b6a3c8fe4fa5ace630eeffec629bf72c) by [avagordon01](https://github.com/avagordon01), [bashonly](https://github.com/bashonly), [grqz](https://github.com/grqz), [Grub4K](https://github.com/Grub4K), [seproDev](https://github.com/seproDev) +- **devscripts**: `make_changelog`: [Parse full commit message for fixes](https://github.com/yt-dlp/yt-dlp/commit/0a3991edae0e10f2ea41ece9fdea5e48f789f1de) ([#11366](https://github.com/yt-dlp/yt-dlp/issues/11366)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) + ### 2024.10.22 #### Important changes diff --git a/supportedsites.md b/supportedsites.md index 7b22e8c6fa..fc79e4ae61 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -190,6 +190,7 @@ - **blerp** - **blogger.com** - **Bloomberg** + - **Bluesky** - **BokeCC** - **BongaCams** - **Boosty** @@ -247,7 +248,7 @@ - **cbsnews:livevideo**: CBS News Live Videos - **cbssports**: (**Currently broken**) - **cbssports:embed**: (**Currently broken**) - - **CCMA** + - **CCMA**: 3Cat, TV3 and Catalunya Ràdio - **CCTV**: 央视网 - **CDA**: [*cdapl*](## "netrc machine") - **CDAFolder** @@ -280,8 +281,6 @@ - **cmt.com**: (**Currently broken**) - **CNBCVideo** - **CNN** - - **CNNArticle** - - **CNNBlogs** - **CNNIndonesia** - **ComedyCentral** - **ComedyCentralTV** @@ -685,9 +684,9 @@ - **LastFMPlaylist** - **LastFMUser** - **LaXarxaMes**: [*laxarxames*](## "netrc machine") - - **lbry** - - **lbry:channel** - - **lbry:playlist** + - **lbry**: odysee.com + - **lbry:channel**: odysee.com channels + - **lbry:playlist**: odysee.com playlists - **LCI** - **Lcp** - **LcpPlay** @@ -1446,7 +1445,7 @@ - **TeleQuebecSquat** - **TeleQuebecVideo** - **TeleTask**: (**Currently broken**) - - **Telewebion** + - **Telewebion**: (**Currently broken**) - **Tempo** - **TennisTV**: [*tennistv*](## "netrc machine") - **TenPlay**: [*10play*](## "netrc machine") diff --git a/yt_dlp/version.py b/yt_dlp/version.py index 17d7881845..75dc563679 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,8 +1,8 @@ # Autogenerated by devscripts/update-version.py -__version__ = '2024.10.22' +__version__ = '2024.11.04' -RELEASE_GIT_HEAD = '67adeb7bab00662ba55d473e405b301abb42fe61' +RELEASE_GIT_HEAD = '197d0b03b6a3c8fe4fa5ace630eeffec629bf72c' VARIANT = None @@ -12,4 +12,4 @@ CHANNEL = 'stable' ORIGIN = 'yt-dlp/yt-dlp' -_pkg_version = '2024.10.22' +_pkg_version = '2024.11.04' From 85fdc66b6e01d19a94b4f39b58e3c0cf23600902 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Wed, 6 Nov 2024 21:26:05 +0000 Subject: [PATCH 58/98] [ie/adobepass] Fix provider requests (#11472) Fix bug in dcfeea4dd5e5686821350baa6c7767a011944867 Closes #11469 Authored by: bashonly --- yt_dlp/extractor/adobepass.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py index 7cc15ec7b6..f1b8779271 100644 --- a/yt_dlp/extractor/adobepass.py +++ b/yt_dlp/extractor/adobepass.py @@ -1362,7 +1362,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en def _download_webpage_handle(self, *args, **kwargs): headers = self.geo_verification_headers() - headers.update(kwargs.get('headers', {})) + headers.update(kwargs.get('headers') or {}) kwargs['headers'] = headers return super()._download_webpage_handle( *args, **kwargs) From be3579aaf0c3b71a0a3195e1955415d5e4d6b3d8 Mon Sep 17 00:00:00 2001 From: Steve Ovens Date: Wed, 6 Nov 2024 15:58:44 -0600 Subject: [PATCH 59/98] [ie/GameDevTV] Add extractor (#11368) Authored by: stratus-ss, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/gamedevtv.py | 141 ++++++++++++++++++++++++++++++++ 2 files changed, 142 insertions(+) create mode 100644 yt_dlp/extractor/gamedevtv.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index ff5ddb2493..fc3ffeef00 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -708,6 +708,7 @@ from .gab import ( GabTVIE, ) from .gaia import GaiaIE +from .gamedevtv import GameDevTVDashboardIE from .gamejolt import ( GameJoltCommunityIE, GameJoltGameIE, diff --git a/yt_dlp/extractor/gamedevtv.py b/yt_dlp/extractor/gamedevtv.py new file mode 100644 index 0000000000..06e8b7356d --- /dev/null +++ b/yt_dlp/extractor/gamedevtv.py @@ -0,0 +1,141 @@ +import json + +from .common import InfoExtractor +from ..networking.exceptions import HTTPError +from ..utils import ( + ExtractorError, + clean_html, + int_or_none, + join_nonempty, + parse_iso8601, + str_or_none, + url_or_none, +) +from ..utils.traversal import traverse_obj + + +class GameDevTVDashboardIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?gamedev\.tv/dashboard/courses/(?P\d+)(?:/(?P\d+))?' + _NETRC_MACHINE = 'gamedevtv' + _TESTS = [{ + 'url': 'https://www.gamedev.tv/dashboard/courses/25', + 'info_dict': { + 'id': '25', + 'title': 'Complete Blender Creator 3: Learn 3D Modelling for Beginners', + 'tags': ['blender', 'course', 'all', 'box modelling', 'sculpting'], + 'categories': ['Blender', '3D Art'], + 'thumbnail': 'https://gamedev-files.b-cdn.net/courses/qisc9pmu1jdc.jpg', + 'upload_date': '20220516', + 'timestamp': 1652694420, + 'modified_date': '20241027', + 'modified_timestamp': 1730049658, + }, + 'playlist_count': 100, + }, { + 'url': 'https://www.gamedev.tv/dashboard/courses/63/2279', + 'info_dict': { + 'id': 'df04f4d8-68a4-4756-a71b-9ca9446c3a01', + 'ext': 'mp4', + 'modified_timestamp': 1701695752, + 'upload_date': '20230504', + 'episode': 'MagicaVoxel Community Course Introduction', + 'series_id': '63', + 'title': 'MagicaVoxel Community Course Introduction', + 'timestamp': 1683195397, + 'modified_date': '20231204', + 'categories': ['3D Art', 'MagicaVoxel'], + 'season': 'MagicaVoxel Community Course', + 'tags': ['MagicaVoxel', 'all', 'course'], + 'series': 'MagicaVoxel 3D Art Mini Course', + 'duration': 1405, + 'episode_number': 1, + 'season_number': 1, + 'season_id': '219', + 'description': 'md5:a378738c5bbec1c785d76c067652d650', + 'display_id': '63-219-2279', + 'alt_title': '1_CC_MVX MagicaVoxel Community Course Introduction.mp4', + 'thumbnail': 'https://vz-23691c65-6fa.b-cdn.net/df04f4d8-68a4-4756-a71b-9ca9446c3a01/thumbnail.jpg', + }, + }] + _API_HEADERS = {} + + def _perform_login(self, username, password): + try: + response = self._download_json( + 'https://api.gamedev.tv/api/students/login', None, 'Logging in', + headers={'Content-Type': 'application/json'}, + data=json.dumps({ + 'email': username, + 'password': password, + 'cart_items': [], + }).encode()) + except ExtractorError as e: + if isinstance(e.cause, HTTPError) and e.cause.status == 401: + raise ExtractorError('Invalid username/password', expected=True) + raise + + self._API_HEADERS['Authorization'] = f'{response["token_type"]} {response["access_token"]}' + + def _real_initialize(self): + if not self._API_HEADERS.get('Authorization'): + self.raise_login_required( + 'This content is only available with purchase', method='password') + + def _entries(self, data, course_id, course_info, selected_lecture): + for section in traverse_obj(data, ('sections', ..., {dict})): + section_info = traverse_obj(section, { + 'season_id': ('id', {str_or_none}), + 'season': ('title', {str}), + 'season_number': ('order', {int_or_none}), + }) + for lecture in traverse_obj(section, ('lectures', lambda _, v: url_or_none(v['video']['playListUrl']))): + if selected_lecture and str(lecture.get('id')) != selected_lecture: + continue + display_id = join_nonempty(course_id, section_info.get('season_id'), lecture.get('id')) + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + lecture['video']['playListUrl'], display_id, 'mp4', m3u8_id='hls') + yield { + **course_info, + **section_info, + 'id': display_id, # fallback + 'display_id': display_id, + 'formats': formats, + 'subtitles': subtitles, + 'series': course_info.get('title'), + 'series_id': course_id, + **traverse_obj(lecture, { + 'id': ('video', 'guid', {str}), + 'title': ('title', {str}), + 'alt_title': ('video', 'title', {str}), + 'description': ('description', {clean_html}), + 'episode': ('title', {str}), + 'episode_number': ('order', {int_or_none}), + 'duration': ('video', 'duration_in_sec', {int_or_none}), + 'timestamp': ('video', 'created_at', {parse_iso8601}), + 'modified_timestamp': ('video', 'updated_at', {parse_iso8601}), + 'thumbnail': ('video', 'thumbnailUrl', {url_or_none}), + }), + } + + def _real_extract(self, url): + course_id, lecture_id = self._match_valid_url(url).group('course_id', 'lecture_id') + data = self._download_json( + f'https://api.gamedev.tv/api/courses/my/{course_id}', course_id, + headers=self._API_HEADERS)['data'] + + course_info = traverse_obj(data, { + 'title': ('title', {str}), + 'tags': ('tags', ..., 'name', {str}), + 'categories': ('categories', ..., 'title', {str}), + 'timestamp': ('created_at', {parse_iso8601}), + 'modified_timestamp': ('updated_at', {parse_iso8601}), + 'thumbnail': ('image', {url_or_none}), + }) + + entries = self._entries(data, course_id, course_info, lecture_id) + if lecture_id: + lecture = next(entries, None) + if not lecture: + raise ExtractorError('Lecture not found') + return lecture + return self.playlist_result(entries, course_id, **course_info) From f13df591d4d7ca8e2f31b35c9c91e69ba9e9b013 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 9 Nov 2024 23:26:02 +0000 Subject: [PATCH 60/98] [build] Enable attestations for trusted publishing (#11420) Reverts 428ffb75aa3534b275cf54de42693a4d261519da Authored by: bashonly --- .github/workflows/build.yml | 3 ++- .github/workflows/release-master.yml | 17 +++++++++++++++++ .github/workflows/release-nightly.yml | 17 +++++++++++++++++ .github/workflows/release.yml | 19 ++++++++++++++----- 4 files changed, 50 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d062d7720d..c18843cfcb 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -504,7 +504,8 @@ jobs: - windows32 runs-on: ubuntu-latest steps: - - uses: actions/download-artifact@v4 + - name: Download artifacts + uses: actions/download-artifact@v4 with: path: artifact pattern: build-bin-* diff --git a/.github/workflows/release-master.yml b/.github/workflows/release-master.yml index c49319b171..78445e417e 100644 --- a/.github/workflows/release-master.yml +++ b/.github/workflows/release-master.yml @@ -28,3 +28,20 @@ jobs: actions: write # For cleaning up cache id-token: write # mandatory for trusted publishing secrets: inherit + + publish_pypi: + needs: [release] + if: vars.MASTER_PYPI_PROJECT != '' + runs-on: ubuntu-latest + permissions: + id-token: write # mandatory for trusted publishing + steps: + - name: Download artifacts + uses: actions/download-artifact@v4 + with: + path: dist + name: build-pypi + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + verbose: true diff --git a/.github/workflows/release-nightly.yml b/.github/workflows/release-nightly.yml index b536c50669..8f72844058 100644 --- a/.github/workflows/release-nightly.yml +++ b/.github/workflows/release-nightly.yml @@ -41,3 +41,20 @@ jobs: actions: write # For cleaning up cache id-token: write # mandatory for trusted publishing secrets: inherit + + publish_pypi: + needs: [release] + if: vars.NIGHTLY_PYPI_PROJECT != '' + runs-on: ubuntu-latest + permissions: + id-token: write # mandatory for trusted publishing + steps: + - name: Download artifacts + uses: actions/download-artifact@v4 + with: + path: dist + name: build-pypi + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + verbose: true diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 2bc09c64d0..26b93e429c 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -2,10 +2,6 @@ name: Release on: workflow_call: inputs: - prerelease: - required: false - default: true - type: boolean source: required: false default: '' @@ -18,6 +14,10 @@ on: required: false default: '' type: string + prerelease: + required: false + default: true + type: boolean workflow_dispatch: inputs: source: @@ -278,11 +278,20 @@ jobs: make clean-cache python -m build --no-isolation . + - name: Upload artifacts + if: github.event_name != 'workflow_dispatch' + uses: actions/upload-artifact@v4 + with: + name: build-pypi + path: | + dist/* + compression-level: 0 + - name: Publish to PyPI + if: github.event_name == 'workflow_dispatch' uses: pypa/gh-action-pypi-publish@release/v1 with: verbose: true - attestations: false # Currently doesn't work w/ reusable workflows (breaks nightly) publish: needs: [prepare, build] From 240a7d43c8a67ffb86d44dc276805aa43c358dcc Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 9 Nov 2024 23:46:47 +0000 Subject: [PATCH 61/98] [build] Pin `websockets` version to >=13.0,<14 (#11488) websockets 14.0 causes CI test failures (a lot more of them) Authored by: bashonly --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 55bd55bb9e..75ad3e15d2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,7 +52,7 @@ default = [ "pycryptodomex", "requests>=2.32.2,<3", "urllib3>=1.26.17,<3", - "websockets>=13.0", + "websockets>=13.0,<14", ] curl-cffi = [ "curl-cffi==0.5.10; os_name=='nt' and implementation_name=='cpython'", From b83ca24eb72e1e558b0185bd73975586c0bc0546 Mon Sep 17 00:00:00 2001 From: sepro Date: Sun, 10 Nov 2024 00:53:49 +0100 Subject: [PATCH 62/98] [core] Catch broken Cryptodome installations (#11486) Authored by: seproDev --- yt_dlp/dependencies/Cryptodome.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/dependencies/Cryptodome.py b/yt_dlp/dependencies/Cryptodome.py index 2cfa4c9522..0e4404d49e 100644 --- a/yt_dlp/dependencies/Cryptodome.py +++ b/yt_dlp/dependencies/Cryptodome.py @@ -24,7 +24,7 @@ try: from Crypto.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5 # noqa: F401 from Crypto.Hash import CMAC, SHA1 # noqa: F401 from Crypto.PublicKey import RSA # noqa: F401 -except ImportError: +except (ImportError, OSError): __version__ = f'broken {__version__}'.strip() From c39016f66df76d14284c705736ca73db8055d8de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Julio=20Napur=C3=AD?= Date: Mon, 11 Nov 2024 12:42:05 -0500 Subject: [PATCH 63/98] [ie/spreaker] Support episode pages and access keys (#11489) Authored by: julionc --- yt_dlp/extractor/_extractors.py | 1 - yt_dlp/extractor/spreaker.py | 63 ++++++++++++++++++--------------- 2 files changed, 34 insertions(+), 30 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index fc3ffeef00..4543c85878 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1939,7 +1939,6 @@ from .spotify import ( ) from .spreaker import ( SpreakerIE, - SpreakerPageIE, SpreakerShowIE, SpreakerShowPageIE, ) diff --git a/yt_dlp/extractor/spreaker.py b/yt_dlp/extractor/spreaker.py index d1df45969b..ff6e1f423c 100644 --- a/yt_dlp/extractor/spreaker.py +++ b/yt_dlp/extractor/spreaker.py @@ -4,11 +4,13 @@ from .common import InfoExtractor from ..utils import ( float_or_none, int_or_none, + parse_qs, str_or_none, try_get, unified_timestamp, url_or_none, ) +from ..utils.traversal import traverse_obj def _extract_episode(data, episode_id=None): @@ -58,15 +60,10 @@ def _extract_episode(data, episode_id=None): class SpreakerIE(InfoExtractor): - _VALID_URL = r'''(?x) - https?:// - api\.spreaker\.com/ - (?: - (?:download/)?episode| - v2/episodes - )/ - (?P\d+) - ''' + _VALID_URL = [ + r'https?://api\.spreaker\.com/(?:(?:download/)?episode|v2/episodes)/(?P\d+)', + r'https?://(?:www\.)?spreaker\.com/episode/[^#?/]*?(?P\d+)/?(?:[?#]|$)', + ] _TESTS = [{ 'url': 'https://api.spreaker.com/episode/12534508', 'info_dict': { @@ -83,7 +80,9 @@ class SpreakerIE(InfoExtractor): 'view_count': int, 'like_count': int, 'comment_count': int, - 'series': 'Success With Music (SWM)', + 'series': 'Success With Music | SWM', + 'thumbnail': 'https://d3wo5wojvuv7l.cloudfront.net/t_square_limited_160/images.spreaker.com/original/777ce4f96b71b0e1b7c09a5e625210e3.jpg', + 'creators': ['SWM'], }, }, { 'url': 'https://api.spreaker.com/download/episode/12534508/swm_ep15_how_to_market_your_music_part_2.mp3', @@ -91,34 +90,40 @@ class SpreakerIE(InfoExtractor): }, { 'url': 'https://api.spreaker.com/v2/episodes/12534508?export=episode_segments', 'only_matching': True, + }, { + 'note': 'episode', + 'url': 'https://www.spreaker.com/episode/grunge-music-origins-the-raw-sound-that-defined-a-generation--60269615', + 'info_dict': { + 'id': '60269615', + 'display_id': 'grunge-music-origins-the-raw-sound-that-', + 'ext': 'mp3', + 'title': 'Grunge Music Origins - The Raw Sound that Defined a Generation', + 'description': str, + 'timestamp': 1717468905, + 'upload_date': '20240604', + 'uploader': 'Katie Brown 2', + 'uploader_id': '17733249', + 'duration': 818.83, + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'series': '90s Grunge', + 'thumbnail': 'https://d3wo5wojvuv7l.cloudfront.net/t_square_limited_160/images.spreaker.com/original/bb0d4178f7cf57cc8786dedbd9c5d969.jpg', + 'creators': ['Katie Brown 2'], + }, + }, { + 'url': 'https://www.spreaker.com/episode/60269615', + 'only_matching': True, }] def _real_extract(self, url): episode_id = self._match_id(url) data = self._download_json( f'https://api.spreaker.com/v2/episodes/{episode_id}', - episode_id)['response']['episode'] + episode_id, query=traverse_obj(parse_qs(url), {'key': ('key', 0)}))['response']['episode'] return _extract_episode(data, episode_id) -class SpreakerPageIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?spreaker\.com/user/[^/]+/(?P[^/?#&]+)' - _TESTS = [{ - 'url': 'https://www.spreaker.com/user/9780658/swm-ep15-how-to-market-your-music-part-2', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - episode_id = self._search_regex( - (r'data-episode_id=["\'](?P\d+)', - r'episode_id\s*:\s*(?P\d+)'), webpage, 'episode id') - return self.url_result( - f'https://api.spreaker.com/episode/{episode_id}', - ie=SpreakerIE.ie_key(), video_id=episode_id) - - class SpreakerShowIE(InfoExtractor): _VALID_URL = r'https?://api\.spreaker\.com/show/(?P\d+)' _TESTS = [{ From e398217aae19bb25f91797bfbe8a3243698d7f45 Mon Sep 17 00:00:00 2001 From: sepro Date: Mon, 11 Nov 2024 18:44:53 +0100 Subject: [PATCH 64/98] [ie/rutube] Rework extractors (#11480) Closes #9694, Closes #10104, Closes #11117, Closes #11415, Closes #11476 Authored by: seproDev --- yt_dlp/extractor/rutube.py | 202 +++++++++++++++++++++++-------------- 1 file changed, 129 insertions(+), 73 deletions(-) diff --git a/yt_dlp/extractor/rutube.py b/yt_dlp/extractor/rutube.py index 2c416811af..abf9aec727 100644 --- a/yt_dlp/extractor/rutube.py +++ b/yt_dlp/extractor/rutube.py @@ -2,15 +2,18 @@ import itertools from .common import InfoExtractor from ..utils import ( + UnsupportedError, bool_or_none, determine_ext, int_or_none, + js_to_json, parse_qs, - traverse_obj, + str_or_none, try_get, unified_timestamp, url_or_none, ) +from ..utils.traversal import traverse_obj class RutubeBaseIE(InfoExtractor): @@ -19,7 +22,7 @@ class RutubeBaseIE(InfoExtractor): query = {} query['format'] = 'json' return self._download_json( - f'http://rutube.ru/api/video/{video_id}/', + f'https://rutube.ru/api/video/{video_id}/', video_id, 'Downloading video JSON', 'Unable to download video JSON', query=query) @@ -61,18 +64,21 @@ class RutubeBaseIE(InfoExtractor): query = {} query['format'] = 'json' return self._download_json( - f'http://rutube.ru/api/play/options/{video_id}/', + f'https://rutube.ru/api/play/options/{video_id}/', video_id, 'Downloading options JSON', 'Unable to download options JSON', headers=self.geo_verification_headers(), query=query) - def _extract_formats(self, options, video_id): + def _extract_formats_and_subtitles(self, options, video_id): formats = [] + subtitles = {} for format_id, format_url in options['video_balancer'].items(): ext = determine_ext(format_url) if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)) + fmts, subs = self._extract_m3u8_formats_and_subtitles( + format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) elif ext == 'f4m': formats.extend(self._extract_f4m_formats( format_url, video_id, f4m_id=format_id, fatal=False)) @@ -82,11 +88,19 @@ class RutubeBaseIE(InfoExtractor): 'format_id': format_id, }) for hls_url in traverse_obj(options, ('live_streams', 'hls', ..., 'url', {url_or_none})): - formats.extend(self._extract_m3u8_formats(hls_url, video_id, ext='mp4', fatal=False)) - return formats + fmts, subs = self._extract_m3u8_formats_and_subtitles( + hls_url, video_id, 'mp4', fatal=False, m3u8_id='hls') + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + for caption in traverse_obj(options, ('captions', lambda _, v: url_or_none(v['file']))): + subtitles.setdefault(caption.get('code') or 'ru', []).append({ + 'url': caption['file'], + 'name': caption.get('langTitle'), + }) + return formats, subtitles - def _download_and_extract_formats(self, video_id, query=None): - return self._extract_formats( + def _download_and_extract_formats_and_subtitles(self, video_id, query=None): + return self._extract_formats_and_subtitles( self._download_api_options(video_id, query=query), video_id) @@ -97,8 +111,8 @@ class RutubeIE(RutubeBaseIE): _EMBED_REGEX = [r']+?src=(["\'])(?P(?:https?:)?//rutube\.ru/(?:play/)?embed/[\da-z]{32}.*?)\1'] _TESTS = [{ - 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', - 'md5': 'e33ac625efca66aba86cbec9851f2692', + 'url': 'https://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', + 'md5': '3d73fdfe5bb81b9aef139e22ef3de26a', 'info_dict': { 'id': '3eac3b4561676c17df9132a9a1e62e3e', 'ext': 'mp4', @@ -111,26 +125,25 @@ class RutubeIE(RutubeBaseIE): 'upload_date': '20131016', 'age_limit': 0, 'view_count': int, - 'thumbnail': 'http://pic.rutubelist.ru/video/d2/a0/d2a0aec998494a396deafc7ba2c82add.jpg', + 'thumbnail': 'https://pic.rutubelist.ru/video/d2/a0/d2a0aec998494a396deafc7ba2c82add.jpg', 'categories': ['Новости и СМИ'], 'chapters': [], }, - 'expected_warnings': ['Unable to download f4m'], }, { - 'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661', + 'url': 'https://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661', 'only_matching': True, }, { - 'url': 'http://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661', + 'url': 'https://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661', 'only_matching': True, }, { - 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/?pl_id=4252', + 'url': 'https://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/?pl_id=4252', 'only_matching': True, }, { 'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_type=source', 'only_matching': True, }, { 'url': 'https://rutube.ru/video/private/884fb55f07a97ab673c7d654553e0f48/?p=x2QojCumHTS3rsKHWXN8Lg', - 'md5': 'd106225f15d625538fe22971158e896f', + 'md5': '4fce7b4fcc7b1bcaa3f45eb1e1ad0dd7', 'info_dict': { 'id': '884fb55f07a97ab673c7d654553e0f48', 'ext': 'mp4', @@ -143,11 +156,10 @@ class RutubeIE(RutubeBaseIE): 'upload_date': '20221210', 'age_limit': 0, 'view_count': int, - 'thumbnail': 'http://pic.rutubelist.ru/video/f2/d4/f2d42b54be0a6e69c1c22539e3152156.jpg', + 'thumbnail': 'https://pic.rutubelist.ru/video/f2/d4/f2d42b54be0a6e69c1c22539e3152156.jpg', 'categories': ['Видеоигры'], 'chapters': [], }, - 'expected_warnings': ['Unable to download f4m'], }, { 'url': 'https://rutube.ru/video/c65b465ad0c98c89f3b25cb03dcc87c6/', 'info_dict': { @@ -156,17 +168,16 @@ class RutubeIE(RutubeBaseIE): 'chapters': 'count:4', 'categories': ['Бизнес и предпринимательство'], 'description': 'md5:252feac1305257d8c1bab215cedde75d', - 'thumbnail': 'http://pic.rutubelist.ru/video/71/8f/718f27425ea9706073eb80883dd3787b.png', + 'thumbnail': 'https://pic.rutubelist.ru/video/71/8f/718f27425ea9706073eb80883dd3787b.png', 'duration': 782, 'age_limit': 0, 'uploader_id': '23491359', 'timestamp': 1677153329, 'view_count': int, 'upload_date': '20230223', - 'title': 'Бизнес с нуля: найм сотрудников. Интервью с директором строительной компании', + 'title': 'Бизнес с нуля: найм сотрудников. Интервью с директором строительной компании #1', 'uploader': 'Стас Быков', }, - 'expected_warnings': ['Unable to download f4m'], }, { 'url': 'https://rutube.ru/live/video/c58f502c7bb34a8fcdd976b221fca292/', 'info_dict': { @@ -174,7 +185,7 @@ class RutubeIE(RutubeBaseIE): 'ext': 'mp4', 'categories': ['Телепередачи'], 'description': '', - 'thumbnail': 'http://pic.rutubelist.ru/video/14/19/14190807c0c48b40361aca93ad0867c7.jpg', + 'thumbnail': 'https://pic.rutubelist.ru/video/14/19/14190807c0c48b40361aca93ad0867c7.jpg', 'live_status': 'is_live', 'age_limit': 0, 'uploader_id': '23460655', @@ -184,6 +195,24 @@ class RutubeIE(RutubeBaseIE): 'title': r're:Первый канал. Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', 'uploader': 'Первый канал', }, + }, { + 'url': 'https://rutube.ru/play/embed/03a9cb54bac3376af4c5cb0f18444e01/', + 'info_dict': { + 'id': '03a9cb54bac3376af4c5cb0f18444e01', + 'ext': 'mp4', + 'age_limit': 0, + 'description': '', + 'title': 'Церемония начала торгов акциями ПАО «ЕвроТранс»', + 'chapters': [], + 'upload_date': '20240829', + 'duration': 293, + 'uploader': 'MOEX - Московская биржа', + 'timestamp': 1724946628, + 'thumbnail': 'https://pic.rutubelist.ru/video/2e/24/2e241fddb459baf0fa54acfca44874f4.jpg', + 'view_count': int, + 'uploader_id': '38420507', + 'categories': ['Интервью'], + }, }, { 'url': 'https://rutube.ru/video/5ab908fccfac5bb43ef2b1e4182256b0/', 'only_matching': True, @@ -192,40 +221,46 @@ class RutubeIE(RutubeBaseIE): 'only_matching': True, }] - @classmethod - def suitable(cls, url): - return False if RutubePlaylistIE.suitable(url) else super().suitable(url) - def _real_extract(self, url): video_id = self._match_id(url) query = parse_qs(url) info = self._download_and_extract_info(video_id, query) - info['formats'] = self._download_and_extract_formats(video_id, query) - return info + formats, subtitles = self._download_and_extract_formats_and_subtitles(video_id, query) + return { + **info, + 'formats': formats, + 'subtitles': subtitles, + } class RutubeEmbedIE(RutubeBaseIE): IE_NAME = 'rutube:embed' IE_DESC = 'Rutube embedded videos' - _VALID_URL = r'https?://rutube\.ru/(?:video|play)/embed/(?P[0-9]+)' + _VALID_URL = r'https?://rutube\.ru/(?:video|play)/embed/(?P[0-9]+)(?:[?#/]|$)' _TESTS = [{ - 'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=', + 'url': 'https://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=', 'info_dict': { 'id': 'a10e53b86e8f349080f718582ce4c661', 'ext': 'mp4', 'timestamp': 1387830582, 'upload_date': '20131223', 'uploader_id': '297833', - 'description': 'Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix

восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89', 'uploader': 'subziro89 ILya', 'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89', + 'age_limit': 0, + 'duration': 1395, + 'chapters': [], + 'description': 'md5:a5acea57bbc3ccdc3cacd1f11a014b5b', + 'view_count': int, + 'thumbnail': 'https://pic.rutubelist.ru/video/d3/03/d3031f4670a6e6170d88fb3607948418.jpg', + 'categories': ['Сериалы'], }, 'params': { 'skip_download': True, }, }, { - 'url': 'http://rutube.ru/play/embed/8083783', + 'url': 'https://rutube.ru/play/embed/8083783', 'only_matching': True, }, { # private video @@ -240,11 +275,12 @@ class RutubeEmbedIE(RutubeBaseIE): query = parse_qs(url) options = self._download_api_options(embed_id, query) video_id = options['effective_video'] - formats = self._extract_formats(options, video_id) + formats, subtitles = self._extract_formats_and_subtitles(options, video_id) info = self._download_and_extract_info(video_id, query) info.update({ 'extractor_key': 'Rutube', 'formats': formats, + 'subtitles': subtitles, }) return info @@ -295,14 +331,14 @@ class RutubeTagsIE(RutubePlaylistBaseIE): IE_DESC = 'Rutube tags' _VALID_URL = r'https?://rutube\.ru/tags/video/(?P\d+)' _TESTS = [{ - 'url': 'http://rutube.ru/tags/video/1800/', + 'url': 'https://rutube.ru/tags/video/1800/', 'info_dict': { 'id': '1800', }, 'playlist_mincount': 68, }] - _PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json' + _PAGE_TEMPLATE = 'https://rutube.ru/api/tags/video/%s/?page=%s&format=json' class RutubeMovieIE(RutubePlaylistBaseIE): @@ -310,8 +346,8 @@ class RutubeMovieIE(RutubePlaylistBaseIE): IE_DESC = 'Rutube movies' _VALID_URL = r'https?://rutube\.ru/metainfo/tv/(?P\d+)' - _MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json' - _PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json' + _MOVIE_TEMPLATE = 'https://rutube.ru/api/metainfo/tv/%s/?format=json' + _PAGE_TEMPLATE = 'https://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json' def _real_extract(self, url): movie_id = self._match_id(url) @@ -327,62 +363,82 @@ class RutubePersonIE(RutubePlaylistBaseIE): IE_DESC = 'Rutube person videos' _VALID_URL = r'https?://rutube\.ru/video/person/(?P\d+)' _TESTS = [{ - 'url': 'http://rutube.ru/video/person/313878/', + 'url': 'https://rutube.ru/video/person/313878/', 'info_dict': { 'id': '313878', }, - 'playlist_mincount': 37, + 'playlist_mincount': 36, }] - _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json' + _PAGE_TEMPLATE = 'https://rutube.ru/api/video/person/%s/?page=%s&format=json' class RutubePlaylistIE(RutubePlaylistBaseIE): IE_NAME = 'rutube:playlist' IE_DESC = 'Rutube playlists' - _VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/[\da-z]{32}/\?.*?\bpl_id=(?P\d+)' + _VALID_URL = r'https?://rutube\.ru/plst/(?P\d+)' _TESTS = [{ - 'url': 'https://rutube.ru/video/cecd58ed7d531fc0f3d795d51cee9026/?pl_id=3097&pl_type=tag', + 'url': 'https://rutube.ru/plst/308547/', 'info_dict': { - 'id': '3097', + 'id': '308547', }, - 'playlist_count': 27, - }, { - 'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_id=4252&pl_type=source', - 'only_matching': True, + 'playlist_mincount': 22, }] - - _PAGE_TEMPLATE = 'http://rutube.ru/api/playlist/%s/%s/?page=%s&format=json' - - @classmethod - def suitable(cls, url): - from ..utils import int_or_none, parse_qs - - if not super().suitable(url): - return False - params = parse_qs(url) - return params.get('pl_type', [None])[0] and int_or_none(params.get('pl_id', [None])[0]) - - def _next_page_url(self, page_num, playlist_id, item_kind): - return self._PAGE_TEMPLATE % (item_kind, playlist_id, page_num) - - def _real_extract(self, url): - qs = parse_qs(url) - playlist_kind = qs['pl_type'][0] - playlist_id = qs['pl_id'][0] - return self._extract_playlist(playlist_id, item_kind=playlist_kind) + _PAGE_TEMPLATE = 'https://rutube.ru/api/playlist/custom/%s/videos?page=%s&format=json' class RutubeChannelIE(RutubePlaylistBaseIE): IE_NAME = 'rutube:channel' IE_DESC = 'Rutube channel' - _VALID_URL = r'https?://rutube\.ru/channel/(?P\d+)/videos' + _VALID_URL = r'https?://rutube\.ru/(?:channel/(?P\d+)|u/(?P\w+))(?:/(?P
videos|shorts|playlists))?' _TESTS = [{ 'url': 'https://rutube.ru/channel/639184/videos/', 'info_dict': { - 'id': '639184', + 'id': '639184_videos', }, - 'playlist_mincount': 133, + 'playlist_mincount': 129, + }, { + 'url': 'https://rutube.ru/channel/25902603/shorts/', + 'info_dict': { + 'id': '25902603_shorts', + }, + 'playlist_mincount': 277, + }, { + 'url': 'https://rutube.ru/channel/25902603/', + 'info_dict': { + 'id': '25902603', + }, + 'playlist_mincount': 406, + }, { + 'url': 'https://rutube.ru/u/rutube/videos/', + 'info_dict': { + 'id': '23704195_videos', + }, + 'playlist_mincount': 113, }] - _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json' + _PAGE_TEMPLATE = 'https://rutube.ru/api/video/person/%s/?page=%s&format=json&origin__type=%s' + + def _next_page_url(self, page_num, playlist_id, section): + origin_type = { + 'videos': 'rtb,rst,ifrm,rspa', + 'shorts': 'rshorts', + None: '', + }.get(section) + return self._PAGE_TEMPLATE % (playlist_id, page_num, origin_type) + + def _real_extract(self, url): + playlist_id, slug, section = self._match_valid_url(url).group('id', 'slug', 'section') + if section == 'playlists': + raise UnsupportedError(url) + if slug: + webpage = self._download_webpage(url, slug) + redux_state = self._search_json( + r'window\.reduxState\s*=', webpage, 'redux state', slug, transform_source=js_to_json) + playlist_id = traverse_obj(redux_state, ( + 'api', 'queries', lambda k, _: k.startswith('channelIdBySlug'), + 'data', 'channel_id', {int}, {str_or_none}, any)) + playlist = self._extract_playlist(playlist_id, section=section) + if section: + playlist['id'] = f'{playlist_id}_{section}' + return playlist From c6737310619022248f5d0fd13872073cac168453 Mon Sep 17 00:00:00 2001 From: Subrat Lima <74418100+subrat-lima@users.noreply.github.com> Date: Tue, 12 Nov 2024 00:38:18 +0530 Subject: [PATCH 65/98] [ie/spreaker] Support podcast and feed pages (#10968) Closes #10925 Authored by: subrat-lima --- yt_dlp/extractor/_extractors.py | 1 - yt_dlp/extractor/spreaker.py | 50 +++++++++++++++++---------------- 2 files changed, 26 insertions(+), 25 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 4543c85878..0b935fe3a5 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1940,7 +1940,6 @@ from .spotify import ( from .spreaker import ( SpreakerIE, SpreakerShowIE, - SpreakerShowPageIE, ) from .springboardplatform import SpringboardPlatformIE from .sprout import SproutIE diff --git a/yt_dlp/extractor/spreaker.py b/yt_dlp/extractor/spreaker.py index ff6e1f423c..c64c2fcd2e 100644 --- a/yt_dlp/extractor/spreaker.py +++ b/yt_dlp/extractor/spreaker.py @@ -2,6 +2,7 @@ import itertools from .common import InfoExtractor from ..utils import ( + filter_dict, float_or_none, int_or_none, parse_qs, @@ -119,29 +120,46 @@ class SpreakerIE(InfoExtractor): def _real_extract(self, url): episode_id = self._match_id(url) data = self._download_json( - f'https://api.spreaker.com/v2/episodes/{episode_id}', - episode_id, query=traverse_obj(parse_qs(url), {'key': ('key', 0)}))['response']['episode'] + f'https://api.spreaker.com/v2/episodes/{episode_id}', episode_id, + query=traverse_obj(parse_qs(url), {'key': ('key', 0)}))['response']['episode'] return _extract_episode(data, episode_id) class SpreakerShowIE(InfoExtractor): - _VALID_URL = r'https?://api\.spreaker\.com/show/(?P\d+)' + _VALID_URL = [ + r'https?://api\.spreaker\.com/show/(?P\d+)', + r'https?://(?:www\.)?spreaker\.com/podcast/[\w-]+--(?P[\d]+)', + r'https?://(?:www\.)?spreaker\.com/show/(?P\d+)/episodes/feed', + ] _TESTS = [{ 'url': 'https://api.spreaker.com/show/4652058', 'info_dict': { 'id': '4652058', }, 'playlist_mincount': 118, + }, { + 'url': 'https://www.spreaker.com/podcast/health-wealth--5918323', + 'info_dict': { + 'id': '5918323', + }, + 'playlist_mincount': 60, + }, { + 'url': 'https://www.spreaker.com/show/5887186/episodes/feed', + 'info_dict': { + 'id': '5887186', + }, + 'playlist_mincount': 290, }] - def _entries(self, show_id): + def _entries(self, show_id, key=None): for page_num in itertools.count(1): episodes = self._download_json( f'https://api.spreaker.com/show/{show_id}/episodes', - show_id, note=f'Downloading JSON page {page_num}', query={ + show_id, note=f'Downloading JSON page {page_num}', query=filter_dict({ 'page': page_num, 'max_per_page': 100, - }) + 'key': key, + })) pager = try_get(episodes, lambda x: x['response']['pager'], dict) if not pager: break @@ -157,21 +175,5 @@ class SpreakerShowIE(InfoExtractor): def _real_extract(self, url): show_id = self._match_id(url) - return self.playlist_result(self._entries(show_id), playlist_id=show_id) - - -class SpreakerShowPageIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?spreaker\.com/show/(?P[^/?#&]+)' - _TESTS = [{ - 'url': 'https://www.spreaker.com/show/success-with-music', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - show_id = self._search_regex( - r'show_id\s*:\s*(?P\d+)', webpage, 'show id') - return self.url_result( - f'https://api.spreaker.com/show/{show_id}', - ie=SpreakerShowIE.ie_key(), video_id=show_id) + key = traverse_obj(parse_qs(url), ('key', 0)) + return self.playlist_result(self._entries(show_id, key), playlist_id=show_id) From 0ec9bfed4d4a52bfb4f8733da1acf0aeeae21e6b Mon Sep 17 00:00:00 2001 From: Sakura286 Date: Tue, 12 Nov 2024 04:40:29 +0800 Subject: [PATCH 66/98] [ie/MixchMovie] Add extractor (#10897) Closes #10765 Authored by: Sakura286 --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/mixch.py | 57 +++++++++++++++++++++++++++++++-- 2 files changed, 56 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 0b935fe3a5..51caefd4d7 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1156,6 +1156,7 @@ from .mitele import MiTeleIE from .mixch import ( MixchArchiveIE, MixchIE, + MixchMovieIE, ) from .mixcloud import ( MixcloudIE, diff --git a/yt_dlp/extractor/mixch.py b/yt_dlp/extractor/mixch.py index 7832784b2e..4bccc81bdc 100644 --- a/yt_dlp/extractor/mixch.py +++ b/yt_dlp/extractor/mixch.py @@ -12,7 +12,7 @@ from ..utils.traversal import traverse_obj class MixchIE(InfoExtractor): IE_NAME = 'mixch' - _VALID_URL = r'https?://(?:www\.)?mixch\.tv/u/(?P\d+)' + _VALID_URL = r'https?://mixch\.tv/u/(?P\d+)' _TESTS = [{ 'url': 'https://mixch.tv/u/16943797/live', @@ -74,7 +74,7 @@ class MixchIE(InfoExtractor): class MixchArchiveIE(InfoExtractor): IE_NAME = 'mixch:archive' - _VALID_URL = r'https?://(?:www\.)?mixch\.tv/archive/(?P\d+)' + _VALID_URL = r'https?://mixch\.tv/archive/(?P\d+)' _TESTS = [{ 'url': 'https://mixch.tv/archive/421', @@ -116,3 +116,56 @@ class MixchArchiveIE(InfoExtractor): 'formats': self._extract_m3u8_formats(info_json['archiveURL'], video_id), 'thumbnail': traverse_obj(info_json, ('thumbnailURL', {url_or_none})), } + + +class MixchMovieIE(InfoExtractor): + IE_NAME = 'mixch:movie' + _VALID_URL = r'https?://mixch\.tv/m/(?P\w+)' + + _TESTS = [{ + 'url': 'https://mixch.tv/m/Ve8KNkJ5', + 'info_dict': { + 'id': 'Ve8KNkJ5', + 'title': '夏☀️\nムービーへのポイントは本イベントに加算されないので配信にてお願い致します🙇🏻\u200d♀️\n#TGCCAMPUS #ミス東大 #ミス東大2024 ', + 'ext': 'mp4', + 'uploader': 'ミス東大No.5 松藤百香🍑💫', + 'uploader_id': '12299174', + 'channel_follower_count': int, + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'timestamp': 1724070828, + 'uploader_url': 'https://mixch.tv/u/12299174', + 'live_status': 'not_live', + 'upload_date': '20240819', + }, + }, { + 'url': 'https://mixch.tv/m/61DzpIKE', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + data = self._download_json( + f'https://mixch.tv/api-web/movies/{video_id}', video_id) + return { + 'id': video_id, + 'formats': [{ + 'format_id': 'mp4', + 'url': data['movie']['file'], + 'ext': 'mp4', + }], + **traverse_obj(data, { + 'title': ('movie', 'title', {str}), + 'thumbnail': ('movie', 'thumbnailURL', {url_or_none}), + 'uploader': ('ownerInfo', 'name', {str}), + 'uploader_id': ('ownerInfo', 'id', {int}, {str_or_none}), + 'channel_follower_count': ('ownerInfo', 'fan', {int_or_none}), + 'view_count': ('ownerInfo', 'view', {int_or_none}), + 'like_count': ('movie', 'favCount', {int_or_none}), + 'comment_count': ('movie', 'commentCount', {int_or_none}), + 'timestamp': ('movie', 'published', {int_or_none}), + 'uploader_url': ('ownerInfo', 'id', {lambda x: x and f'https://mixch.tv/u/{x}'}, filter), + }), + 'live_status': 'not_live', + } From f9c8deb4e5887ff5150e911ac0452e645f988044 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 11 Nov 2024 21:19:03 +0000 Subject: [PATCH 67/98] [build] Bump PyInstaller version pin to `>=6.11.1` (#11507) Authored by: bashonly --- .github/workflows/build.yml | 4 ++-- pyproject.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c18843cfcb..a211ae1652 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -411,7 +411,7 @@ jobs: run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds python devscripts/install_deps.py -o --include build python devscripts/install_deps.py --include curl-cffi - python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-6.10.0-py3-none-any.whl" + python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-6.11.1-py3-none-any.whl" - name: Prepare run: | @@ -460,7 +460,7 @@ jobs: run: | python devscripts/install_deps.py -o --include build python devscripts/install_deps.py - python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-6.10.0-py3-none-any.whl" + python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-6.11.1-py3-none-any.whl" - name: Prepare run: | diff --git a/pyproject.toml b/pyproject.toml index 75ad3e15d2..ef921fed58 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -83,7 +83,7 @@ test = [ "pytest-rerunfailures~=14.0", ] pyinstaller = [ - "pyinstaller>=6.10.0", # Windows temp cleanup fixed in 6.10.0 + "pyinstaller>=6.11.1", # Windows temp cleanup fixed in 6.11.1 ] [project.urls] From 2db8c2e7d57a1784b06057c48e3e91023720d195 Mon Sep 17 00:00:00 2001 From: Hugo Date: Mon, 11 Nov 2024 23:00:05 +0100 Subject: [PATCH 68/98] [ie/CloudflareStream] Avoid extraction via videodelivery.net (#11478) Closes #11477 Authored by: hugovdev --- yt_dlp/extractor/cloudflarestream.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/cloudflarestream.py b/yt_dlp/extractor/cloudflarestream.py index 8a409461a8..9e9e89a801 100644 --- a/yt_dlp/extractor/cloudflarestream.py +++ b/yt_dlp/extractor/cloudflarestream.py @@ -8,7 +8,7 @@ class CloudflareStreamIE(InfoExtractor): _DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)' _EMBED_RE = rf'(?:embed\.|{_SUBDOMAIN_RE}){_DOMAIN_RE}/embed/[^/?#]+\.js\?(?:[^#]+&)?video=' _ID_RE = r'[\da-f]{32}|eyJ[\w-]+\.[\w-]+\.[\w-]+' - _VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}{_DOMAIN_RE}/|{_EMBED_RE})(?P{_ID_RE})' + _VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}(?P{_DOMAIN_RE})/|{_EMBED_RE})(?P{_ID_RE})' _EMBED_REGEX = [ rf']+\bsrc=(["\'])(?P(?:https?:)?//{_EMBED_RE}(?:{_ID_RE})(?:(?!\1).)*)\1', rf']+\bsrc=["\'](?Phttps?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})', @@ -19,7 +19,7 @@ class CloudflareStreamIE(InfoExtractor): 'id': '31c9291ab41fac05471db4e73aa11717', 'ext': 'mp4', 'title': '31c9291ab41fac05471db4e73aa11717', - 'thumbnail': 'https://videodelivery.net/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg', + 'thumbnail': 'https://cloudflarestream.com/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg', }, 'params': { 'skip_download': 'm3u8', @@ -30,7 +30,7 @@ class CloudflareStreamIE(InfoExtractor): 'id': '0e8e040aec776862e1d632a699edf59e', 'ext': 'mp4', 'title': '0e8e040aec776862e1d632a699edf59e', - 'thumbnail': 'https://videodelivery.net/0e8e040aec776862e1d632a699edf59e/thumbnails/thumbnail.jpg', + 'thumbnail': 'https://cloudflarestream.com/0e8e040aec776862e1d632a699edf59e/thumbnails/thumbnail.jpg', }, }, { 'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1', @@ -54,7 +54,7 @@ class CloudflareStreamIE(InfoExtractor): 'id': 'eaef9dea5159cf968be84241b5cedfe7', 'ext': 'mp4', 'title': 'eaef9dea5159cf968be84241b5cedfe7', - 'thumbnail': 'https://videodelivery.net/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg', + 'thumbnail': 'https://cloudflarestream.com/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg', }, 'params': { 'skip_download': 'm3u8', @@ -62,8 +62,9 @@ class CloudflareStreamIE(InfoExtractor): }] def _real_extract(self, url): - video_id = self._match_id(url) - domain = 'bytehighway.net' if 'bytehighway.net/' in url else 'videodelivery.net' + video_id, domain = self._match_valid_url(url).group('id', 'domain') + if domain != 'bytehighway.net': + domain = 'cloudflarestream.com' base_url = f'https://{domain}/{video_id}/' if '.' in video_id: video_id = self._parse_json(base64.urlsafe_b64decode( From 6b43a8d84b881d769b480ba6e20ec691e9d1b92d Mon Sep 17 00:00:00 2001 From: Sam Date: Mon, 11 Nov 2024 23:03:31 +0100 Subject: [PATCH 69/98] [ie/goplay] Fix extractor (#11466) Closes #10857 Authored by: SamDecrock, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> --- yt_dlp/extractor/goplay.py | 96 +++++++++++++++++++++++--------------- 1 file changed, 59 insertions(+), 37 deletions(-) diff --git a/yt_dlp/extractor/goplay.py b/yt_dlp/extractor/goplay.py index dfe5afe635..32300f75c2 100644 --- a/yt_dlp/extractor/goplay.py +++ b/yt_dlp/extractor/goplay.py @@ -5,56 +5,63 @@ import hashlib import hmac import json import os +import re +import urllib.parse from .common import InfoExtractor from ..utils import ( ExtractorError, + int_or_none, + js_to_json, + remove_end, traverse_obj, - unescapeHTML, ) class GoPlayIE(InfoExtractor): - _VALID_URL = r'https?://(www\.)?goplay\.be/video/([^/]+/[^/]+/|)(?P[^/#]+)' + _VALID_URL = r'https?://(www\.)?goplay\.be/video/([^/?#]+/[^/?#]+/|)(?P[^/#]+)' _NETRC_MACHINE = 'goplay' _TESTS = [{ - 'url': 'https://www.goplay.be/video/de-container-cup/de-container-cup-s3/de-container-cup-s3-aflevering-2#autoplay', + 'url': 'https://www.goplay.be/video/de-slimste-mens-ter-wereld/de-slimste-mens-ter-wereld-s22/de-slimste-mens-ter-wereld-s22-aflevering-1', 'info_dict': { - 'id': '9c4214b8-e55d-4e4b-a446-f015f6c6f811', + 'id': '2baa4560-87a0-421b-bffc-359914e3c387', 'ext': 'mp4', - 'title': 'S3 - Aflevering 2', - 'series': 'De Container Cup', - 'season': 'Season 3', - 'season_number': 3, - 'episode': 'Episode 2', - 'episode_number': 2, + 'title': 'S22 - Aflevering 1', + 'description': r're:In aflevering 1 nemen Daan Alferink, Tess Elst en Xander De Rycke .{66}', + 'series': 'De Slimste Mens ter Wereld', + 'episode': 'Episode 1', + 'season_number': 22, + 'episode_number': 1, + 'season': 'Season 22', }, + 'params': {'skip_download': True}, 'skip': 'This video is only available for registered users', }, { - 'url': 'https://www.goplay.be/video/a-family-for-thr-holidays-s1-aflevering-1#autoplay', + 'url': 'https://www.goplay.be/video/1917', 'info_dict': { - 'id': '74e3ed07-748c-49e4-85a0-393a93337dbf', + 'id': '40cac41d-8d29-4ef5-aa11-75047b9f0907', 'ext': 'mp4', - 'title': 'A Family for the Holidays', + 'title': '1917', + 'description': r're:Op het hoogtepunt van de Eerste Wereldoorlog krijgen twee jonge .{94}', }, + 'params': {'skip_download': True}, 'skip': 'This video is only available for registered users', }, { 'url': 'https://www.goplay.be/video/de-mol/de-mol-s11/de-mol-s11-aflevering-1#autoplay', 'info_dict': { - 'id': '03eb8f2f-153e-41cb-9805-0d3a29dab656', + 'id': 'ecb79672-92b9-4cd9-a0d7-e2f0250681ee', 'ext': 'mp4', 'title': 'S11 - Aflevering 1', + 'description': r're:Tien kandidaten beginnen aan hun verovering van Amerika en ontmoeten .{102}', 'episode': 'Episode 1', 'series': 'De Mol', 'season_number': 11, 'episode_number': 1, 'season': 'Season 11', }, - 'params': { - 'skip_download': True, - }, + 'params': {'skip_download': True}, 'skip': 'This video is only available for registered users', }] @@ -69,27 +76,42 @@ class GoPlayIE(InfoExtractor): if not self._id_token: raise self.raise_login_required(method='password') - def _real_extract(self, url): - url, display_id = self._match_valid_url(url).group(0, 'display_id') - webpage = self._download_webpage(url, display_id) - video_data_json = self._html_search_regex(r']*>\s*self\.__next_f\.push\(\s*(\[.+?\])\s*\);?\s*', webpage), + (..., {js_to_json}, {json.loads}, ..., {self._find_json}, ...)) + meta = traverse_obj(nextjs_data, ( + ..., lambda _, v: v['meta']['path'] == urllib.parse.urlparse(url).path, 'meta', any)) + + video_id = meta['uuid'] + info_dict = traverse_obj(meta, { + 'title': ('title', {str}), + 'description': ('description', {str.strip}), + }) + + if traverse_obj(meta, ('program', 'subtype')) != 'movie': + for season_data in traverse_obj(nextjs_data, (..., 'children', ..., 'playlists', ...)): + episode_data = traverse_obj( + season_data, ('videos', lambda _, v: v['videoId'] == video_id, any)) + if not episode_data: + continue + + episode_title = traverse_obj( + episode_data, 'contextualTitle', 'episodeTitle', expected_type=str) + info_dict.update({ + 'title': episode_title or info_dict.get('title'), + 'series': remove_end(info_dict.get('title'), f' - {episode_title}'), + 'season_number': traverse_obj(season_data, ('season', {int_or_none})), + 'episode_number': traverse_obj(episode_data, ('episodeNumber', {int_or_none})), + }) + break api = self._download_json( f'https://api.goplay.be/web/v1/videos/long-form/{video_id}', From a9f85670d03ab993dc589f21a9ffffcad61392d5 Mon Sep 17 00:00:00 2001 From: manav_chaudhary <100396248+manavchaudhary1@users.noreply.github.com> Date: Tue, 12 Nov 2024 04:11:56 +0530 Subject: [PATCH 70/98] [ie/Chaturbate] Support alternate domains (#10595) Closes #10594 Authored by: manavchaudhary1 --- yt_dlp/extractor/chaturbate.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/chaturbate.py b/yt_dlp/extractor/chaturbate.py index b49f741efa..864d61f9c2 100644 --- a/yt_dlp/extractor/chaturbate.py +++ b/yt_dlp/extractor/chaturbate.py @@ -9,7 +9,7 @@ from ..utils import ( class ChaturbateIE(InfoExtractor): - _VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.com/(?:fullvideo/?\?.*?\bb=)?(?P[^/?&#]+)' + _VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.(?Pcom|eu|global)/(?:fullvideo/?\?.*?\bb=)?(?P[^/?&#]+)' _TESTS = [{ 'url': 'https://www.chaturbate.com/siswet19/', 'info_dict': { @@ -29,15 +29,24 @@ class ChaturbateIE(InfoExtractor): }, { 'url': 'https://en.chaturbate.com/siswet19/', 'only_matching': True, + }, { + 'url': 'https://chaturbate.eu/siswet19/', + 'only_matching': True, + }, { + 'url': 'https://chaturbate.eu/fullvideo/?b=caylin', + 'only_matching': True, + }, { + 'url': 'https://chaturbate.global/siswet19/', + 'only_matching': True, }] _ROOM_OFFLINE = 'Room is currently offline' def _real_extract(self, url): - video_id = self._match_id(url) + video_id, tld = self._match_valid_url(url).group('id', 'tld') webpage = self._download_webpage( - f'https://chaturbate.com/{video_id}/', video_id, + f'https://chaturbate.{tld}/{video_id}/', video_id, headers=self.geo_verification_headers()) found_m3u8_urls = [] From bacc31b05a04181b63100c481565256b14813a5e Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Tue, 12 Nov 2024 23:23:10 +0000 Subject: [PATCH 71/98] [ie/facebook] Fix formats extraction (#11513) Closes #11497 Authored by: bashonly --- yt_dlp/extractor/facebook.py | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index 2bcb5a8411..91e2f3489c 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -563,13 +563,13 @@ class FacebookIE(InfoExtractor): return extract_video_data(try_get( js_data, lambda x: x['jsmods']['instances'], list) or []) - def extract_dash_manifest(video, formats): + def extract_dash_manifest(vid_data, formats, mpd_url=None): dash_manifest = traverse_obj( - video, 'dash_manifest', 'playlist', 'dash_manifest_xml_string', expected_type=str) + vid_data, 'dash_manifest', 'playlist', 'dash_manifest_xml_string', 'manifest_xml', expected_type=str) if dash_manifest: formats.extend(self._parse_mpd_formats( compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)), - mpd_url=url_or_none(video.get('dash_manifest_url')))) + mpd_url=url_or_none(video.get('dash_manifest_url')) or mpd_url)) def process_formats(info): # Downloads with browser's User-Agent are rate limited. Working around @@ -619,9 +619,12 @@ class FacebookIE(InfoExtractor): video = video['creation_story'] video['owner'] = traverse_obj(video, ('short_form_video_context', 'video_owner')) video.update(reel_info) - fmt_data = traverse_obj(video, ('videoDeliveryLegacyFields', {dict})) or video + formats = [] q = qualities(['sd', 'hd']) + + # Legacy formats extraction + fmt_data = traverse_obj(video, ('videoDeliveryLegacyFields', {dict})) or video for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'), ('playable_url_dash', ''), ('browser_native_hd_url', 'hd'), ('browser_native_sd_url', 'sd')): @@ -629,7 +632,7 @@ class FacebookIE(InfoExtractor): if not playable_url: continue if determine_ext(playable_url) == 'mpd': - formats.extend(self._extract_mpd_formats(playable_url, video_id)) + formats.extend(self._extract_mpd_formats(playable_url, video_id, fatal=False)) else: formats.append({ 'format_id': format_id, @@ -638,6 +641,28 @@ class FacebookIE(InfoExtractor): 'url': playable_url, }) extract_dash_manifest(fmt_data, formats) + + # New videoDeliveryResponse formats extraction + fmt_data = traverse_obj(video, ('videoDeliveryResponseFragment', 'videoDeliveryResponseResult')) + mpd_urls = traverse_obj(fmt_data, ('dash_manifest_urls', ..., 'manifest_url', {url_or_none})) + dash_manifests = traverse_obj(fmt_data, ('dash_manifests', lambda _, v: v['manifest_xml'])) + for idx, dash_manifest in enumerate(dash_manifests): + extract_dash_manifest(dash_manifest, formats, mpd_url=traverse_obj(mpd_urls, idx)) + if not dash_manifests: + # Only extract from MPD URLs if the manifests are not already provided + for mpd_url in mpd_urls: + formats.extend(self._extract_mpd_formats(mpd_url, video_id, fatal=False)) + for prog_fmt in traverse_obj(fmt_data, ('progressive_urls', lambda _, v: v['progressive_url'])): + format_id = traverse_obj(prog_fmt, ('metadata', 'quality', {str.lower})) + formats.append({ + 'format_id': format_id, + # sd, hd formats w/o resolution info should be deprioritized below DASH + 'quality': q(format_id) - 3, + 'url': prog_fmt['progressive_url'], + }) + for m3u8_url in traverse_obj(fmt_data, ('hls_playlist_urls', ..., 'hls_playlist_url', {url_or_none})): + formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', fatal=False, m3u8_id='hls')) + if not formats: # Do not append false positive entry w/o any formats return From f2a4983df7a64c4e93b56f79dbd16a781bd90206 Mon Sep 17 00:00:00 2001 From: Jackson Humphrey Date: Tue, 12 Nov 2024 17:26:18 -0600 Subject: [PATCH 72/98] [ie/archive.org] Fix comments extraction (#11527) Closes #11526 Authored by: jshumphrey --- yt_dlp/extractor/archiveorg.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/archiveorg.py b/yt_dlp/extractor/archiveorg.py index f5a55efc4f..2849d9fd5b 100644 --- a/yt_dlp/extractor/archiveorg.py +++ b/yt_dlp/extractor/archiveorg.py @@ -205,6 +205,26 @@ class ArchiveOrgIE(InfoExtractor): }, }, ], + }, { + # The reviewbody is None for one of the reviews; just need to extract data without crashing + 'url': 'https://archive.org/details/gd95-04-02.sbd.11622.sbeok.shnf/gd95-04-02d1t04.shn', + 'info_dict': { + 'id': 'gd95-04-02.sbd.11622.sbeok.shnf/gd95-04-02d1t04.shn', + 'ext': 'mp3', + 'title': 'Stuck Inside of Mobile with the Memphis Blues Again', + 'creators': ['Grateful Dead'], + 'duration': 338.31, + 'track': 'Stuck Inside of Mobile with the Memphis Blues Again', + 'description': 'md5:764348a470b986f1217ffd38d6ac7b72', + 'display_id': 'gd95-04-02d1t04.shn', + 'location': 'Pyramid Arena', + 'uploader': 'jon@archive.org', + 'album': '1995-04-02 - Pyramid Arena', + 'upload_date': '20040519', + 'track_number': 4, + 'release_date': '19950402', + 'timestamp': 1084927901, + }, }] @staticmethod @@ -335,7 +355,7 @@ class ArchiveOrgIE(InfoExtractor): info['comments'].append({ 'id': review.get('review_id'), 'author': review.get('reviewer'), - 'text': str_or_none(review.get('reviewtitle'), '') + '\n\n' + review.get('reviewbody'), + 'text': join_nonempty('reviewtitle', 'reviewbody', from_dict=review, delim='\n\n'), 'timestamp': unified_timestamp(review.get('createdate')), 'parent': 'root'}) From 39d79c9b9cf23411d935910685c40aa1a2fdb409 Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Fri, 15 Nov 2024 22:06:15 +0100 Subject: [PATCH 73/98] [utils] Fix `join_nonempty`, add `**kwargs` to `unpack` (#11559) Authored by: Grub4K --- test/test_traversal.py | 2 +- test/test_utils.py | 5 ----- yt_dlp/utils/_utils.py | 3 +-- yt_dlp/utils/traversal.py | 4 ++-- 4 files changed, 4 insertions(+), 10 deletions(-) diff --git a/test/test_traversal.py b/test/test_traversal.py index d48606e99c..52ea19fab3 100644 --- a/test/test_traversal.py +++ b/test/test_traversal.py @@ -525,7 +525,7 @@ class TestTraversalHelpers: def test_unpack(self): assert unpack(lambda *x: ''.join(map(str, x)))([1, 2, 3]) == '123' assert unpack(join_nonempty)([1, 2, 3]) == '1-2-3' - assert unpack(join_nonempty(delim=' '))([1, 2, 3]) == '1 2 3' + assert unpack(join_nonempty, delim=' ')([1, 2, 3]) == '1 2 3' with pytest.raises(TypeError): unpack(join_nonempty)() with pytest.raises(TypeError): diff --git a/test/test_utils.py b/test/test_utils.py index b5f35736b6..835774a912 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -72,7 +72,6 @@ from yt_dlp.utils import ( intlist_to_bytes, iri_to_uri, is_html, - join_nonempty, js_to_json, limit_length, locked_file, @@ -2158,10 +2157,6 @@ Line 1 assert int_or_none(v=10) == 10, 'keyword passed positional should call function' assert int_or_none(scale=0.1)(10) == 100, 'call after partial application should call the function' - assert callable(join_nonempty(delim=', ')), 'varargs positional should apply partially' - assert callable(join_nonempty()), 'varargs positional should apply partially' - assert join_nonempty(None, delim=', ') == '', 'passed varargs should call the function' - if __name__ == '__main__': unittest.main() diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index b28bb555e1..89c53c39e7 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -216,7 +216,7 @@ def partial_application(func): sig = inspect.signature(func) required_args = [ param.name for param in sig.parameters.values() - if param.kind in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD, inspect.Parameter.VAR_POSITIONAL) + if param.kind in (inspect.Parameter.POSITIONAL_ONLY, inspect.Parameter.POSITIONAL_OR_KEYWORD) if param.default is inspect.Parameter.empty ] @@ -4837,7 +4837,6 @@ def number_of_digits(number): return len('%d' % number) -@partial_application def join_nonempty(*values, delim='-', from_dict=None): if from_dict is not None: values = (traversal.traverse_obj(from_dict, variadic(v)) for v in values) diff --git a/yt_dlp/utils/traversal.py b/yt_dlp/utils/traversal.py index 361f239ba6..6bb52050f2 100644 --- a/yt_dlp/utils/traversal.py +++ b/yt_dlp/utils/traversal.py @@ -452,9 +452,9 @@ def trim_str(*, start=None, end=None): return trim -def unpack(func): +def unpack(func, **kwargs): @functools.wraps(func) - def inner(items, **kwargs): + def inner(items): return func(*items, **kwargs) return inner From c014fbcddcb4c8f79d914ac5bb526758b540ea33 Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Fri, 15 Nov 2024 23:25:52 +0100 Subject: [PATCH 74/98] [utils] `subs_list_to_dict`: Add `lang` default parameter (#11508) Authored by: Grub4K --- test/test_traversal.py | 50 ++++++++++++++++++++++++++++++++++++++- yt_dlp/utils/traversal.py | 22 ++++++++++------- 2 files changed, 63 insertions(+), 9 deletions(-) diff --git a/test/test_traversal.py b/test/test_traversal.py index 52ea19fab3..bc433029d8 100644 --- a/test/test_traversal.py +++ b/test/test_traversal.py @@ -481,7 +481,7 @@ class TestTraversalHelpers: 'id': 'name', 'data': 'content', 'url': 'url', - }, all, {subs_list_to_dict}]) == { + }, all, {subs_list_to_dict(lang=None)}]) == { 'de': [{'url': 'https://example.com/subs/de.ass'}], 'en': [{'data': 'content'}], }, 'subs with mandatory items missing should be filtered' @@ -507,6 +507,54 @@ class TestTraversalHelpers: {'url': 'https://example.com/subs/en1', 'ext': 'ext'}, {'url': 'https://example.com/subs/en2', 'ext': 'ext'}, ]}, '`quality` key should sort subtitle list accordingly' + assert traverse_obj([ + {'name': 'de', 'url': 'https://example.com/subs/de.ass'}, + {'name': 'de'}, + {'name': 'en', 'content': 'content'}, + {'url': 'https://example.com/subs/en'}, + ], [..., { + 'id': 'name', + 'url': 'url', + 'data': 'content', + }, all, {subs_list_to_dict(lang='en')}]) == { + 'de': [{'url': 'https://example.com/subs/de.ass'}], + 'en': [ + {'data': 'content'}, + {'url': 'https://example.com/subs/en'}, + ], + }, 'optionally provided lang should be used if no id available' + assert traverse_obj([ + {'name': 1, 'url': 'https://example.com/subs/de1'}, + {'name': {}, 'url': 'https://example.com/subs/de2'}, + {'name': 'de', 'ext': 1, 'url': 'https://example.com/subs/de3'}, + {'name': 'de', 'ext': {}, 'url': 'https://example.com/subs/de4'}, + ], [..., { + 'id': 'name', + 'url': 'url', + 'ext': 'ext', + }, all, {subs_list_to_dict(lang=None)}]) == { + 'de': [ + {'url': 'https://example.com/subs/de3'}, + {'url': 'https://example.com/subs/de4'}, + ], + }, 'non str types should be ignored for id and ext' + assert traverse_obj([ + {'name': 1, 'url': 'https://example.com/subs/de1'}, + {'name': {}, 'url': 'https://example.com/subs/de2'}, + {'name': 'de', 'ext': 1, 'url': 'https://example.com/subs/de3'}, + {'name': 'de', 'ext': {}, 'url': 'https://example.com/subs/de4'}, + ], [..., { + 'id': 'name', + 'url': 'url', + 'ext': 'ext', + }, all, {subs_list_to_dict(lang='de')}]) == { + 'de': [ + {'url': 'https://example.com/subs/de1'}, + {'url': 'https://example.com/subs/de2'}, + {'url': 'https://example.com/subs/de3'}, + {'url': 'https://example.com/subs/de4'}, + ], + }, 'non str types should be replaced by default id' def test_trim_str(self): with pytest.raises(TypeError): diff --git a/yt_dlp/utils/traversal.py b/yt_dlp/utils/traversal.py index 6bb52050f2..76b51f53d1 100644 --- a/yt_dlp/utils/traversal.py +++ b/yt_dlp/utils/traversal.py @@ -332,14 +332,14 @@ class _RequiredError(ExtractorError): @typing.overload -def subs_list_to_dict(*, ext: str | None = None) -> collections.abc.Callable[[list[dict]], dict[str, list[dict]]]: ... +def subs_list_to_dict(*, lang: str | None = 'und', ext: str | None = None) -> collections.abc.Callable[[list[dict]], dict[str, list[dict]]]: ... @typing.overload -def subs_list_to_dict(subs: list[dict] | None, /, *, ext: str | None = None) -> dict[str, list[dict]]: ... +def subs_list_to_dict(subs: list[dict] | None, /, *, lang: str | None = 'und', ext: str | None = None) -> dict[str, list[dict]]: ... -def subs_list_to_dict(subs: list[dict] | None = None, /, *, ext=None): +def subs_list_to_dict(subs: list[dict] | None = None, /, *, lang='und', ext=None): """ Convert subtitles from a traversal into a subtitle dict. The path should have an `all` immediately before this function. @@ -352,7 +352,7 @@ def subs_list_to_dict(subs: list[dict] | None = None, /, *, ext=None): `quality` The sort order for each subtitle """ if subs is None: - return functools.partial(subs_list_to_dict, ext=ext) + return functools.partial(subs_list_to_dict, lang=lang, ext=ext) result = collections.defaultdict(list) @@ -360,10 +360,16 @@ def subs_list_to_dict(subs: list[dict] | None = None, /, *, ext=None): if not url_or_none(sub.get('url')) and not sub.get('data'): continue sub_id = sub.pop('id', None) - if sub_id is None: - continue - if ext is not None and not sub.get('ext'): - sub['ext'] = ext + if not isinstance(sub_id, str): + if not lang: + continue + sub_id = lang + sub_ext = sub.get('ext') + if not isinstance(sub_ext, str): + if not ext: + sub.pop('ext', None) + else: + sub['ext'] = ext result[sub_id].append(sub) result = dict(result) From eb64ae7d5def6df2aba74fb703e7f168fb299865 Mon Sep 17 00:00:00 2001 From: bashonly Date: Thu, 14 Nov 2024 16:08:50 -0600 Subject: [PATCH 75/98] [ie] Allow `ext` override for thumbnails (#11545) Authored by: bashonly --- yt_dlp/YoutubeDL.py | 4 +++- yt_dlp/extractor/common.py | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 3186a999de..3130deda31 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -4381,7 +4381,9 @@ class YoutubeDL: return None for idx, t in list(enumerate(thumbnails))[::-1]: - thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg') + thumb_ext = t.get('ext') or determine_ext(t['url'], 'jpg') + if multiple: + thumb_ext = f'{t["id"]}.{thumb_ext}' thumb_display_id = f'{label} thumbnail {t["id"]}' thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext')) thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext')) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 01915acf23..23f6fc6c46 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -279,6 +279,7 @@ class InfoExtractor: thumbnails: A list of dictionaries, with the following entries: * "id" (optional, string) - Thumbnail format ID * "url" + * "ext" (optional, string) - actual image extension if not given in URL * "preference" (optional, int) - quality of the image * "width" (optional, int) * "height" (optional, int) From c699bafc5038b59c9afe8c2e69175fb66424c832 Mon Sep 17 00:00:00 2001 From: bashonly Date: Thu, 14 Nov 2024 16:09:11 -0600 Subject: [PATCH 76/98] [ie/soop] Fix thumbnail extraction (#11545) Closes #11537 Authored by: bashonly --- yt_dlp/extractor/afreecatv.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/afreecatv.py b/yt_dlp/extractor/afreecatv.py index 6682a89817..572d1a3893 100644 --- a/yt_dlp/extractor/afreecatv.py +++ b/yt_dlp/extractor/afreecatv.py @@ -66,6 +66,14 @@ class AfreecaTVBaseIE(InfoExtractor): extensions={'legacy_ssl': True}), display_id, 'Downloading API JSON', 'Unable to download API JSON') + @staticmethod + def _fixup_thumb(thumb_url): + if not url_or_none(thumb_url): + return None + # Core would determine_ext as 'php' from the url, so we need to provide the real ext + # See: https://github.com/yt-dlp/yt-dlp/issues/11537 + return [{'url': thumb_url, 'ext': 'jpg'}] + class AfreecaTVIE(AfreecaTVBaseIE): IE_NAME = 'soop' @@ -155,7 +163,7 @@ class AfreecaTVIE(AfreecaTVBaseIE): 'uploader': ('writer_nick', {str}), 'uploader_id': ('bj_id', {str}), 'duration': ('total_file_duration', {int_or_none(scale=1000)}), - 'thumbnail': ('thumb', {url_or_none}), + 'thumbnails': ('thumb', {self._fixup_thumb}), }) entries = [] @@ -226,8 +234,7 @@ class AfreecaTVCatchStoryIE(AfreecaTVBaseIE): return self.playlist_result(self._entries(data), video_id) - @staticmethod - def _entries(data): + def _entries(self, data): # 'files' is always a list with 1 element yield from traverse_obj(data, ( 'data', lambda _, v: v['story_type'] == 'catch', @@ -238,7 +245,7 @@ class AfreecaTVCatchStoryIE(AfreecaTVBaseIE): 'title': ('title', {str}), 'uploader': ('writer_nick', {str}), 'uploader_id': ('writer_id', {str}), - 'thumbnail': ('thumb', {url_or_none}), + 'thumbnails': ('thumb', {self._fixup_thumb}), 'timestamp': ('write_timestamp', {int_or_none}), })) From 70c55cb08f780eab687e881ef42bb5c6007d290b Mon Sep 17 00:00:00 2001 From: Alessandro Campolo Date: Sat, 16 Nov 2024 13:56:15 +0100 Subject: [PATCH 77/98] [ie/RadioRadicale] Add extractor (#5607) Authored by: a13ssandr0, pzhlkj6612 Co-authored-by: Mozi <29089388+pzhlkj6612@users.noreply.github.com> --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/radioradicale.py | 105 ++++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+) create mode 100644 yt_dlp/extractor/radioradicale.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 51caefd4d7..d6ab610025 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1649,6 +1649,7 @@ from .radiokapital import ( RadioKapitalIE, RadioKapitalShowIE, ) +from .radioradicale import RadioRadicaleIE from .radiozet import RadioZetPodcastIE from .radlive import ( RadLiveChannelIE, diff --git a/yt_dlp/extractor/radioradicale.py b/yt_dlp/extractor/radioradicale.py new file mode 100644 index 0000000000..472e25c45f --- /dev/null +++ b/yt_dlp/extractor/radioradicale.py @@ -0,0 +1,105 @@ +from .common import InfoExtractor +from ..utils import url_or_none +from ..utils.traversal import traverse_obj + + +class RadioRadicaleIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?radioradicale\.it/scheda/(?P[0-9]+)' + _TESTS = [{ + 'url': 'https://www.radioradicale.it/scheda/471591', + 'md5': 'eb0fbe43a601f1a361cbd00f3c45af4a', + 'info_dict': { + 'id': '471591', + 'ext': 'mp4', + 'title': 'md5:e8fbb8de57011a3255db0beca69af73d', + 'description': 'md5:5e15a789a2fe4d67da8d1366996e89ef', + 'location': 'Napoli', + 'duration': 2852.0, + 'timestamp': 1459987200, + 'upload_date': '20160407', + 'thumbnail': 'https://www.radioradicale.it/photo400/0/0/9/0/1/00901768.jpg', + }, + }, { + 'url': 'https://www.radioradicale.it/scheda/742783/parlamento-riunito-in-seduta-comune-11a-della-xix-legislatura', + 'info_dict': { + 'id': '742783', + 'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)', + 'description': '-) Votazione per l\'elezione di un giudice della Corte Costituzionale (nono scrutinio)', + 'location': 'CAMERA', + 'duration': 5868.0, + 'timestamp': 1730246400, + 'upload_date': '20241030', + }, + 'playlist': [{ + 'md5': 'aa48de55dcc45478e4cd200f299aab7d', + 'info_dict': { + 'id': '742783-0', + 'ext': 'mp4', + 'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)', + }, + }, { + 'md5': 'be915c189c70ad2920e5810f32260ff5', + 'info_dict': { + 'id': '742783-1', + 'ext': 'mp4', + 'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)', + }, + }, { + 'md5': 'f0ee4047342baf8ed3128a8417ac5e0a', + 'info_dict': { + 'id': '742783-2', + 'ext': 'mp4', + 'title': 'Parlamento riunito in seduta comune (11ª della XIX legislatura)', + }, + }], + }] + + def _entries(self, videos_info, page_id): + for idx, video in enumerate(traverse_obj( + videos_info, ('playlist', lambda _, v: v['sources']))): + video_id = f'{page_id}-{idx}' + formats = [] + subtitles = {} + + for m3u8_url in traverse_obj(video, ('sources', ..., 'src', {url_or_none})): + fmts, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + for sub in traverse_obj(video, ('subtitles', ..., lambda _, v: url_or_none(v['src']))): + self._merge_subtitles({sub.get('srclang') or 'und': [{ + 'url': sub['src'], + 'name': sub.get('label'), + }]}, target=subtitles) + + yield { + 'id': video_id, + 'title': video.get('title'), + 'formats': formats, + 'subtitles': subtitles, + } + + def _real_extract(self, url): + page_id = self._match_id(url) + webpage = self._download_webpage(url, page_id) + + videos_info = self._search_json( + r'jQuery\.extend\(Drupal\.settings\s*,', + webpage, 'videos_info', page_id)['RRscheda'] + + entries = list(self._entries(videos_info, page_id)) + + common_info = { + 'id': page_id, + 'title': self._og_search_title(webpage), + 'description': self._og_search_description(webpage), + 'location': videos_info.get('luogo'), + **self._search_json_ld(webpage, page_id), + } + + if len(entries) == 1: + return { + **entries[0], + **common_info, + } + + return self.playlist_result(entries, multi_video=True, **common_info) From 6365e92589e4bc17b8fffb0125a716d144ad2137 Mon Sep 17 00:00:00 2001 From: sepro Date: Sat, 16 Nov 2024 17:56:43 +0100 Subject: [PATCH 78/98] [ie/bandlab] Add extractors (#11535) Closes #7750 Authored by: seproDev --- yt_dlp/extractor/_extractors.py | 4 + yt_dlp/extractor/bandlab.py | 438 ++++++++++++++++++++++++++++++++ 2 files changed, 442 insertions(+) create mode 100644 yt_dlp/extractor/bandlab.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index d6ab610025..25a233a2d6 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -208,6 +208,10 @@ from .bandcamp import ( BandcampUserIE, BandcampWeeklyIE, ) +from .bandlab import ( + BandlabIE, + BandlabPlaylistIE, +) from .bannedvideo import BannedVideoIE from .bbc import ( BBCIE, diff --git a/yt_dlp/extractor/bandlab.py b/yt_dlp/extractor/bandlab.py new file mode 100644 index 0000000000..e48d5d3f76 --- /dev/null +++ b/yt_dlp/extractor/bandlab.py @@ -0,0 +1,438 @@ + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + float_or_none, + format_field, + int_or_none, + parse_iso8601, + parse_qs, + truncate_string, + url_or_none, +) +from ..utils.traversal import traverse_obj, value + + +class BandlabBaseIE(InfoExtractor): + def _call_api(self, endpoint, asset_id, **kwargs): + headers = kwargs.pop('headers', None) or {} + return self._download_json( + f'https://www.bandlab.com/api/v1.3/{endpoint}/{asset_id}', + asset_id, headers={ + 'accept': 'application/json', + 'referer': 'https://www.bandlab.com/', + 'x-client-id': 'BandLab-Web', + 'x-client-version': '10.1.124', + **headers, + }, **kwargs) + + def _parse_revision(self, revision_data, url=None): + return { + 'vcodec': 'none', + 'media_type': 'revision', + 'extractor_key': BandlabIE.ie_key(), + 'extractor': BandlabIE.IE_NAME, + **traverse_obj(revision_data, { + 'webpage_url': ( + 'id', ({value(url)}, {format_field(template='https://www.bandlab.com/revision/%s')}), filter, any), + 'id': (('revisionId', 'id'), {str}, any), + 'title': ('song', 'name', {str}), + 'track': ('song', 'name', {str}), + 'url': ('mixdown', 'file', {url_or_none}), + 'thumbnail': ('song', 'picture', 'url', {url_or_none}), + 'description': ('description', {str}), + 'uploader': ('creator', 'name', {str}), + 'uploader_id': ('creator', 'username', {str}), + 'timestamp': ('createdOn', {parse_iso8601}), + 'duration': ('mixdown', 'duration', {float_or_none}), + 'view_count': ('counters', 'plays', {int_or_none}), + 'like_count': ('counters', 'likes', {int_or_none}), + 'comment_count': ('counters', 'comments', {int_or_none}), + 'genres': ('genres', ..., 'name', {str}), + }), + } + + def _parse_track(self, track_data, url=None): + return { + 'vcodec': 'none', + 'media_type': 'track', + 'extractor_key': BandlabIE.ie_key(), + 'extractor': BandlabIE.IE_NAME, + **traverse_obj(track_data, { + 'webpage_url': ( + 'id', ({value(url)}, {format_field(template='https://www.bandlab.com/post/%s')}), filter, any), + 'id': (('revisionId', 'id'), {str}, any), + 'url': ('track', 'sample', 'audioUrl', {url_or_none}), + 'title': ('track', 'name', {str}), + 'track': ('track', 'name', {str}), + 'description': ('caption', {str}), + 'thumbnail': ('track', 'picture', ('original', 'url'), {url_or_none}, any), + 'view_count': ('counters', 'plays', {int_or_none}), + 'like_count': ('counters', 'likes', {int_or_none}), + 'comment_count': ('counters', 'comments', {int_or_none}), + 'duration': ('track', 'sample', 'duration', {float_or_none}), + 'uploader': ('creator', 'name', {str}), + 'uploader_id': ('creator', 'username', {str}), + 'timestamp': ('createdOn', {parse_iso8601}), + }), + } + + def _parse_video(self, video_data, url=None): + return { + 'media_type': 'video', + 'extractor_key': BandlabIE.ie_key(), + 'extractor': BandlabIE.IE_NAME, + **traverse_obj(video_data, { + 'id': ('id', {str}), + 'webpage_url': ( + 'id', ({value(url)}, {format_field(template='https://www.bandlab.com/post/%s')}), filter, any), + 'url': ('video', 'url', {url_or_none}), + 'title': ('caption', {lambda x: x.replace('\n', ' ')}, {truncate_string(left=50)}), + 'description': ('caption', {str}), + 'thumbnail': ('video', 'picture', 'url', {url_or_none}), + 'view_count': ('video', 'counters', 'plays', {int_or_none}), + 'like_count': ('video', 'counters', 'likes', {int_or_none}), + 'comment_count': ('counters', 'comments', {int_or_none}), + 'duration': ('video', 'duration', {float_or_none}), + 'uploader': ('creator', 'name', {str}), + 'uploader_id': ('creator', 'username', {str}), + }), + } + + +class BandlabIE(BandlabBaseIE): + _VALID_URL = [ + r'https?://(?:www\.)?bandlab.com/(?Ptrack|post|revision)/(?P[\da-f_-]+)', + r'https?://(?:www\.)?bandlab.com/(?Pembed)/\?(?:[^#]*&)?id=(?P[\da-f-]+)', + ] + _EMBED_REGEX = [rf']+src=[\'"](?P{_VALID_URL[1]})[\'"]'] + _TESTS = [{ + 'url': 'https://www.bandlab.com/track/04b37e88dba24967b9dac8eb8567ff39_07d7f906fc96ee11b75e000d3a428fff', + 'md5': '46f7b43367dd268bbcf0bbe466753b2c', + 'info_dict': { + 'id': '02d7f906-fc96-ee11-b75e-000d3a428fff', + 'ext': 'm4a', + 'uploader_id': 'ender_milze', + 'track': 'sweet black', + 'description': 'composed by juanjn3737', + 'timestamp': 1702171963, + 'view_count': int, + 'like_count': int, + 'duration': 54.629999999999995, + 'title': 'sweet black', + 'upload_date': '20231210', + 'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/fa082beb-b856-4730-9170-a57e4e32cc2c/', + 'genres': ['Lofi'], + 'uploader': 'ender milze', + 'comment_count': int, + 'media_type': 'revision', + }, + }, { + # Same track as above but post URL + 'url': 'https://www.bandlab.com/post/07d7f906-fc96-ee11-b75e-000d3a428fff', + 'md5': '46f7b43367dd268bbcf0bbe466753b2c', + 'info_dict': { + 'id': '02d7f906-fc96-ee11-b75e-000d3a428fff', + 'ext': 'm4a', + 'uploader_id': 'ender_milze', + 'track': 'sweet black', + 'description': 'composed by juanjn3737', + 'timestamp': 1702171973, + 'view_count': int, + 'like_count': int, + 'duration': 54.629999999999995, + 'title': 'sweet black', + 'upload_date': '20231210', + 'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/fa082beb-b856-4730-9170-a57e4e32cc2c/', + 'genres': ['Lofi'], + 'uploader': 'ender milze', + 'comment_count': int, + 'media_type': 'revision', + }, + }, { + # SharedKey Example + 'url': 'https://www.bandlab.com/track/048916c2-c6da-ee11-85f9-6045bd2e11f9?sharedKey=0NNWX8qYAEmI38lWAzCNDA', + 'md5': '15174b57c44440e2a2008be9cae00250', + 'info_dict': { + 'id': '038916c2-c6da-ee11-85f9-6045bd2e11f9', + 'ext': 'm4a', + 'comment_count': int, + 'genres': ['Other'], + 'uploader_id': 'user8353034818103753', + 'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/51b18363-da23-4b9b-a29c-2933a3e561ca/', + 'timestamp': 1709625771, + 'track': 'PodcastMaerchen4b', + 'duration': 468.14, + 'view_count': int, + 'description': 'Podcast: Neues aus der Märchenwelt', + 'like_count': int, + 'upload_date': '20240305', + 'uploader': 'Erna Wageneder', + 'title': 'PodcastMaerchen4b', + 'media_type': 'revision', + }, + }, { + # Different Revision selected + 'url': 'https://www.bandlab.com/track/130343fc-148b-ea11-96d2-0003ffd1fc09?revId=110343fc-148b-ea11-96d2-0003ffd1fc09', + 'md5': '74e055ef9325d63f37088772fbfe4454', + 'info_dict': { + 'id': '110343fc-148b-ea11-96d2-0003ffd1fc09', + 'ext': 'm4a', + 'timestamp': 1588273294, + 'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/users/b612e533-e4f7-4542-9f50-3fcfd8dd822c/', + 'description': 'Final Revision.', + 'title': 'Replay ( Instrumental)', + 'uploader': 'David R Sparks', + 'uploader_id': 'davesnothome69', + 'view_count': int, + 'comment_count': int, + 'track': 'Replay ( Instrumental)', + 'genres': ['Rock'], + 'upload_date': '20200430', + 'like_count': int, + 'duration': 279.43, + 'media_type': 'revision', + }, + }, { + # Video + 'url': 'https://www.bandlab.com/post/5cdf9036-3857-ef11-991a-6045bd36e0d9', + 'md5': '8caa2ef28e86c1dacf167293cfdbeba9', + 'info_dict': { + 'id': '5cdf9036-3857-ef11-991a-6045bd36e0d9', + 'ext': 'mp4', + 'duration': 44.705, + 'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/videos/67c6cef1-cef6-40d3-831e-a55bc1dcb972/', + 'comment_count': int, + 'title': 'backing vocals', + 'uploader_id': 'marliashya', + 'uploader': 'auraa', + 'like_count': int, + 'description': 'backing vocals', + 'media_type': 'video', + }, + }, { + # Embed Example + 'url': 'https://www.bandlab.com/embed/?blur=false&id=014de0a4-7d82-ea11-a94c-0003ffd19c0f', + 'md5': 'a4ad05cb68c54faaed9b0a8453a8cf4a', + 'info_dict': { + 'id': '014de0a4-7d82-ea11-a94c-0003ffd19c0f', + 'ext': 'm4a', + 'comment_count': int, + 'genres': ['Electronic'], + 'uploader': 'Charlie Henson', + 'timestamp': 1587328674, + 'upload_date': '20200419', + 'view_count': int, + 'track': 'Positronic Meltdown', + 'duration': 318.55, + 'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/87165bc3-5439-496e-b1f7-a9f13b541ff2/', + 'description': 'Checkout my tracks at AOMX http://aomxsounds.com/', + 'uploader_id': 'microfreaks', + 'title': 'Positronic Meltdown', + 'like_count': int, + 'media_type': 'revision', + }, + }, { + # Track without revisions available + 'url': 'https://www.bandlab.com/track/55767ac51789ea11a94c0003ffd1fc09_2f007b0a37b94ec7a69bc25ae15108a5', + 'md5': 'f05d68a3769952c2d9257c473e14c15f', + 'info_dict': { + 'id': '55767ac51789ea11a94c0003ffd1fc09_2f007b0a37b94ec7a69bc25ae15108a5', + 'ext': 'm4a', + 'track': 'insame', + 'like_count': int, + 'duration': 84.03, + 'title': 'insame', + 'view_count': int, + 'comment_count': int, + 'uploader': 'Sorakime', + 'uploader_id': 'sorakime', + 'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/users/572a351a-0f3a-4c6a-ac39-1a5defdeeb1c/', + 'timestamp': 1691162128, + 'upload_date': '20230804', + 'media_type': 'track', + }, + }, { + 'url': 'https://www.bandlab.com/revision/014de0a4-7d82-ea11-a94c-0003ffd19c0f', + 'only_matching': True, + }] + _WEBPAGE_TESTS = [{ + 'url': 'https://phantomluigi.github.io/', + 'info_dict': { + 'id': 'e14223c3-7871-ef11-bdfd-000d3a980db3', + 'ext': 'm4a', + 'view_count': int, + 'upload_date': '20240913', + 'uploader_id': 'phantommusicofficial', + 'timestamp': 1726194897, + 'uploader': 'Phantom', + 'comment_count': int, + 'genres': ['Progresive Rock'], + 'description': 'md5:a38cd668f7a2843295ef284114f18429', + 'duration': 225.23, + 'like_count': int, + 'title': 'Vermilion Pt. 2 (Cover)', + 'track': 'Vermilion Pt. 2 (Cover)', + 'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/62b10750-7aef-4f42-ad08-1af52f577e97/', + 'media_type': 'revision', + }, + }] + + def _real_extract(self, url): + display_id, url_type = self._match_valid_url(url).group('id', 'url_type') + + qs = parse_qs(url) + revision_id = traverse_obj(qs, (('revId', 'id'), 0, any)) + if url_type == 'revision': + revision_id = display_id + + revision_data = None + if not revision_id: + post_data = self._call_api( + 'posts', display_id, note='Downloading post data', + query=traverse_obj(qs, {'sharedKey': ('sharedKey', 0)})) + + revision_id = traverse_obj(post_data, (('revisionId', ('revision', 'id')), {str}, any)) + revision_data = traverse_obj(post_data, ('revision', {dict})) + + if not revision_data and not revision_id: + post_type = post_data.get('type') + if post_type == 'Video': + return self._parse_video(post_data, url=url) + if post_type == 'Track': + return self._parse_track(post_data, url=url) + raise ExtractorError(f'Could not extract data for post type {post_type!r}') + + if not revision_data: + revision_data = self._call_api( + 'revisions', revision_id, note='Downloading revision data', query={'edit': 'false'}) + + return self._parse_revision(revision_data, url=url) + + +class BandlabPlaylistIE(BandlabBaseIE): + _VALID_URL = [ + r'https?://(?:www\.)?bandlab.com/(?:[\w]+/)?(?Palbums|collections)/(?P[\da-f-]+)', + r'https?://(?:www\.)?bandlab.com/(?Pembed)/collection/\?(?:[^#]*&)?id=(?P[\da-f-]+)', + ] + _EMBED_REGEX = [rf']+src=[\'"](?P{_VALID_URL[1]})[\'"]'] + _TESTS = [{ + 'url': 'https://www.bandlab.com/davesnothome69/albums/89b79ea6-de42-ed11-b495-00224845aac7', + 'info_dict': { + 'thumbnail': 'https://bl-prod-images.azureedge.net/v1.3/albums/69507ff3-579a-45be-afca-9e87eddec944/', + 'release_date': '20221003', + 'title': 'Remnants', + 'album': 'Remnants', + 'like_count': int, + 'album_type': 'LP', + 'description': 'A collection of some feel good, rock hits.', + 'comment_count': int, + 'view_count': int, + 'id': '89b79ea6-de42-ed11-b495-00224845aac7', + 'uploader': 'David R Sparks', + 'uploader_id': 'davesnothome69', + }, + 'playlist_count': 10, + }, { + 'url': 'https://www.bandlab.com/slytheband/collections/955102d4-1040-ef11-86c3-000d3a42581b', + 'info_dict': { + 'id': '955102d4-1040-ef11-86c3-000d3a42581b', + 'timestamp': 1720762659, + 'view_count': int, + 'title': 'My Shit 🖤', + 'uploader_id': 'slytheband', + 'uploader': '𝓢𝓛𝓨', + 'upload_date': '20240712', + 'like_count': int, + 'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/collections/2c64ca12-b180-4b76-8587-7a8da76bddc8/', + }, + 'playlist_count': 15, + }, { + # Embeds can contain both albums and collections with the same URL pattern. This is an album + 'url': 'https://www.bandlab.com/embed/collection/?id=12cc6f7f-951b-ee11-907c-00224844f303', + 'info_dict': { + 'id': '12cc6f7f-951b-ee11-907c-00224844f303', + 'release_date': '20230706', + 'description': 'This is a collection of songs I created when I had an Amiga computer.', + 'view_count': int, + 'title': 'Mark Salud The Amiga Collection', + 'uploader_id': 'mssirmooth1962', + 'comment_count': int, + 'thumbnail': 'https://bl-prod-images.azureedge.net/v1.3/albums/d618bd7b-0537-40d5-bdd8-61b066e77d59/', + 'like_count': int, + 'uploader': 'Mark Salud', + 'album': 'Mark Salud The Amiga Collection', + 'album_type': 'LP', + }, + 'playlist_count': 24, + }, { + # Tracks without revision id + 'url': 'https://www.bandlab.com/embed/collection/?id=e98aafb5-d932-ee11-b8f0-00224844c719', + 'info_dict': { + 'like_count': int, + 'uploader_id': 'sorakime', + 'comment_count': int, + 'uploader': 'Sorakime', + 'view_count': int, + 'description': 'md5:4ec31c568a5f5a5a2b17572ea64c3825', + 'release_date': '20230812', + 'title': 'Art', + 'album': 'Art', + 'album_type': 'Album', + 'id': 'e98aafb5-d932-ee11-b8f0-00224844c719', + 'thumbnail': 'https://bl-prod-images.azureedge.net/v1.3/albums/20c890de-e94a-4422-828a-2da6377a13c8/', + }, + 'playlist_count': 13, + }, { + 'url': 'https://www.bandlab.com/albums/89b79ea6-de42-ed11-b495-00224845aac7', + 'only_matching': True, + }] + + def _entries(self, album_data): + for post in traverse_obj(album_data, ('posts', lambda _, v: v['type'])): + post_type = post['type'] + if post_type == 'Revision': + yield self._parse_revision(post.get('revision')) + elif post_type == 'Track': + yield self._parse_track(post) + elif post_type == 'Video': + yield self._parse_video(post) + else: + self.report_warning(f'Skipping unknown post type: "{post_type}"') + + def _real_extract(self, url): + playlist_id, playlist_type = self._match_valid_url(url).group('id', 'type') + + endpoints = { + 'albums': ['albums'], + 'collections': ['collections'], + 'embed': ['collections', 'albums'], + }.get(playlist_type) + for endpoint in endpoints: + playlist_data = self._call_api( + endpoint, playlist_id, note=f'Downloading {endpoint[:-1]} data', + fatal=False, expected_status=404) + if not playlist_data.get('errorCode'): + playlist_type = endpoint + break + if error_code := playlist_data.get('errorCode'): + raise ExtractorError(f'Could not find playlist data. Error code: "{error_code}"') + + return self.playlist_result( + self._entries(playlist_data), playlist_id, + **traverse_obj(playlist_data, { + 'title': ('name', {str}), + 'description': ('description', {str}), + 'uploader': ('creator', 'name', {str}), + 'uploader_id': ('creator', 'username', {str}), + 'timestamp': ('createdOn', {parse_iso8601}), + 'release_date': ('releaseDate', {lambda x: x.replace('-', '')}, filter), + 'thumbnail': ('picture', ('original', 'url'), {url_or_none}, any), + 'like_count': ('counters', 'likes', {int_or_none}), + 'comment_count': ('counters', 'comments', {int_or_none}), + 'view_count': ('counters', 'plays', {int_or_none}), + }), + **(traverse_obj(playlist_data, { + 'album': ('name', {str}), + 'album_type': ('type', {str}), + }) if playlist_type == 'albums' else {})) From 8388ec256f7753b02488788e3cfa771f6e1db247 Mon Sep 17 00:00:00 2001 From: Jackson Humphrey Date: Sat, 16 Nov 2024 13:48:47 -0600 Subject: [PATCH 79/98] [ie/spankbang] Support browser impersonation (#11542) Closes #6545 Authored by: jshumphrey --- yt_dlp/extractor/spankbang.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/spankbang.py b/yt_dlp/extractor/spankbang.py index 6805a72deb..05f0bb1468 100644 --- a/yt_dlp/extractor/spankbang.py +++ b/yt_dlp/extractor/spankbang.py @@ -71,9 +71,11 @@ class SpankBangIE(InfoExtractor): def _real_extract(self, url): mobj = self._match_valid_url(url) video_id = mobj.group('id') or mobj.group('id_2') + country = self.get_param('geo_bypass_country') or 'US' + self._set_cookie('.spankbang.com', 'country', country.upper()) webpage = self._download_webpage( url.replace(f'/{video_id}/embed', f'/{video_id}/video'), - video_id, headers={'Cookie': 'country=US'}) + video_id, impersonate=True) if re.search(r'<[^>]+\b(?:id|class)=["\']video_removed', webpage): raise ExtractorError( From d215fba7edb69d4fa665f43663756fd260b1489f Mon Sep 17 00:00:00 2001 From: Jackson Humphrey Date: Sat, 16 Nov 2024 13:50:17 -0600 Subject: [PATCH 80/98] [ie/RedGifsUser] Fix extraction (#11531) Closes #7382, Closes #9131 Authored by: jshumphrey --- yt_dlp/extractor/redgifs.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/redgifs.py b/yt_dlp/extractor/redgifs.py index 50138ab12c..b11ea273de 100644 --- a/yt_dlp/extractor/redgifs.py +++ b/yt_dlp/extractor/redgifs.py @@ -213,7 +213,7 @@ class RedGifsSearchIE(RedGifsBaseInfoExtractor): class RedGifsUserIE(RedGifsBaseInfoExtractor): IE_DESC = 'Redgifs user' _VALID_URL = r'https?://(?:www\.)?redgifs\.com/users/(?P[^/?#]+)(?:\?(?P[^#]+))?' - _PAGE_SIZE = 30 + _PAGE_SIZE = 80 _TESTS = [ { 'url': 'https://www.redgifs.com/users/lamsinka89', @@ -222,7 +222,7 @@ class RedGifsUserIE(RedGifsBaseInfoExtractor): 'title': 'lamsinka89', 'description': 'RedGifs user lamsinka89, ordered by recent', }, - 'playlist_mincount': 100, + 'playlist_mincount': 391, }, { 'url': 'https://www.redgifs.com/users/lamsinka89?page=3', @@ -231,7 +231,7 @@ class RedGifsUserIE(RedGifsBaseInfoExtractor): 'title': 'lamsinka89', 'description': 'RedGifs user lamsinka89, ordered by recent', }, - 'playlist_count': 30, + 'playlist_count': 80, }, { 'url': 'https://www.redgifs.com/users/lamsinka89?order=best&type=g', @@ -240,7 +240,17 @@ class RedGifsUserIE(RedGifsBaseInfoExtractor): 'title': 'lamsinka89', 'description': 'RedGifs user lamsinka89, ordered by best', }, - 'playlist_mincount': 100, + 'playlist_mincount': 391, + }, + { + 'url': 'https://www.redgifs.com/users/ignored52', + 'note': 'https://github.com/yt-dlp/yt-dlp/issues/7382', + 'info_dict': { + 'id': 'ignored52', + 'title': 'ignored52', + 'description': 'RedGifs user ignored52, ordered by recent', + }, + 'playlist_mincount': 121, }, ] From 720b3dc453c342bc2e8df7dbc0acaab4479de46c Mon Sep 17 00:00:00 2001 From: powergold1 <18133986+powergold1@users.noreply.github.com> Date: Sat, 16 Nov 2024 20:55:40 +0100 Subject: [PATCH 81/98] [ie/chaturbate] Extract from API and support impersonation (#11555) Closes #6546, Closes #10359 Authored by: powergold1 --- yt_dlp/extractor/chaturbate.py | 51 ++++++++++++++++++++++++++++++---- 1 file changed, 45 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/chaturbate.py b/yt_dlp/extractor/chaturbate.py index 864d61f9c2..aa70f26a1b 100644 --- a/yt_dlp/extractor/chaturbate.py +++ b/yt_dlp/extractor/chaturbate.py @@ -5,6 +5,7 @@ from ..utils import ( ExtractorError, lowercase_escape, url_or_none, + urlencode_postdata, ) @@ -40,14 +41,48 @@ class ChaturbateIE(InfoExtractor): 'only_matching': True, }] - _ROOM_OFFLINE = 'Room is currently offline' + _ERROR_MAP = { + 'offline': 'Room is currently offline', + 'private': 'Room is currently in a private show', + 'away': 'Performer is currently away', + 'password protected': 'Room is password protected', + 'hidden': 'Hidden session in progress', + } - def _real_extract(self, url): - video_id, tld = self._match_valid_url(url).group('id', 'tld') + def _extract_from_api(self, video_id, tld): + response = self._download_json( + f'https://chaturbate.{tld}/get_edge_hls_url_ajax/', video_id, + data=urlencode_postdata({'room_slug': video_id}), + headers={ + **self.geo_verification_headers(), + 'X-Requested-With': 'XMLHttpRequest', + 'Accept': 'application/json', + }, fatal=False, impersonate=True) or {} + status = response.get('room_status') + if status != 'public': + if error := self._ERROR_MAP.get(status): + raise ExtractorError(error, expected=True) + self.report_warning('Falling back to webpage extraction') + return None + + m3u8_url = response.get('url') + if not m3u8_url: + self.raise_geo_restricted() + + return { + 'id': video_id, + 'title': video_id, + 'thumbnail': f'https://roomimg.stream.highwebmedia.com/ri/{video_id}.jpg', + 'is_live': True, + 'age_limit': 18, + 'formats': self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True), + } + + def _extract_from_webpage(self, video_id, tld): webpage = self._download_webpage( f'https://chaturbate.{tld}/{video_id}/', video_id, - headers=self.geo_verification_headers()) + headers=self.geo_verification_headers(), impersonate=True) found_m3u8_urls = [] @@ -85,8 +120,8 @@ class ChaturbateIE(InfoExtractor): webpage, 'error', group='error', default=None) if not error: if any(p in webpage for p in ( - self._ROOM_OFFLINE, 'offline_tipping', 'tip_offline')): - error = self._ROOM_OFFLINE + self._ERROR_MAP['offline'], 'offline_tipping', 'tip_offline')): + error = self._ERROR_MAP['offline'] if error: raise ExtractorError(error, expected=True) raise ExtractorError('Unable to find stream URL') @@ -113,3 +148,7 @@ class ChaturbateIE(InfoExtractor): 'is_live': True, 'formats': formats, } + + def _real_extract(self, url): + video_id, tld = self._match_valid_url(url).group('id', 'tld') + return self._extract_from_api(video_id, tld) or self._extract_from_webpage(video_id, tld) From 1d253b0a27110d174c40faf8fb1c999d099e0cde Mon Sep 17 00:00:00 2001 From: Jackson Humphrey Date: Sat, 16 Nov 2024 14:02:14 -0600 Subject: [PATCH 82/98] [ie/patreon] Fix comments extraction (#11530) Closes #11483 Authored by: jshumphrey, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> --- yt_dlp/extractor/patreon.py | 51 +++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 16 deletions(-) diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py index 4d668cd37d..6bdeaf1571 100644 --- a/yt_dlp/extractor/patreon.py +++ b/yt_dlp/extractor/patreon.py @@ -16,10 +16,10 @@ from ..utils import ( parse_iso8601, smuggle_url, str_or_none, - traverse_obj, url_or_none, urljoin, ) +from ..utils.traversal import traverse_obj, value class PatreonBaseIE(InfoExtractor): @@ -252,6 +252,27 @@ class PatreonIE(PatreonBaseIE): 'thumbnail': r're:^https?://.+', }, 'skip': 'Patron-only content', + }, { + # Contains a comment reply in the 'included' section + 'url': 'https://www.patreon.com/posts/114721679', + 'info_dict': { + 'id': '114721679', + 'ext': 'mp4', + 'upload_date': '20241025', + 'uploader': 'Japanalysis', + 'like_count': int, + 'thumbnail': r're:^https?://.+', + 'comment_count': int, + 'title': 'Karasawa Part 2', + 'description': 'Part 2 of this video https://www.youtube.com/watch?v=Azms2-VTASk', + 'uploader_url': 'https://www.patreon.com/japanalysis', + 'uploader_id': '80504268', + 'channel_url': 'https://www.patreon.com/japanalysis', + 'channel_follower_count': int, + 'timestamp': 1729897015, + 'channel_id': '9346307', + }, + 'params': {'getcomments': True}, }] _RETURN_TYPE = 'video' @@ -404,26 +425,24 @@ class PatreonIE(PatreonBaseIE): f'posts/{post_id}/comments', post_id, query=params, note=f'Downloading comments page {page}') cursor = None - for comment in traverse_obj(response, (('data', ('included', lambda _, v: v['type'] == 'comment')), ...)): + for comment in traverse_obj(response, (('data', 'included'), lambda _, v: v['type'] == 'comment' and v['id'])): count += 1 - comment_id = comment.get('id') - attributes = comment.get('attributes') or {} - if comment_id is None: - continue author_id = traverse_obj(comment, ('relationships', 'commenter', 'data', 'id')) - author_info = traverse_obj( - response, ('included', lambda _, v: v['id'] == author_id and v['type'] == 'user', 'attributes'), - get_all=False, expected_type=dict, default={}) yield { - 'id': comment_id, - 'text': attributes.get('body'), - 'timestamp': parse_iso8601(attributes.get('created')), - 'parent': traverse_obj(comment, ('relationships', 'parent', 'data', 'id'), default='root'), - 'author_is_uploader': attributes.get('is_by_creator'), + **traverse_obj(comment, { + 'id': ('id', {str_or_none}), + 'text': ('attributes', 'body', {str}), + 'timestamp': ('attributes', 'created', {parse_iso8601}), + 'parent': ('relationships', 'parent', 'data', ('id', {value('root')}), {str}, any), + 'author_is_uploader': ('attributes', 'is_by_creator', {bool}), + }), + **traverse_obj(response, ( + 'included', lambda _, v: v['id'] == author_id and v['type'] == 'user', 'attributes', { + 'author': ('full_name', {str}), + 'author_thumbnail': ('image_url', {url_or_none}), + }), get_all=False), 'author_id': author_id, - 'author': author_info.get('full_name'), - 'author_thumbnail': author_info.get('image_url'), } if count < traverse_obj(response, ('meta', 'count')): From f95a92b3d0169a784ee15a138fbe09d82b2754a1 Mon Sep 17 00:00:00 2001 From: sepro Date: Sun, 17 Nov 2024 00:24:11 +0100 Subject: [PATCH 83/98] [cleanup] Deprecate more compat functions (#11439) Authored by: seproDev --- devscripts/generate_aes_testdata.py | 3 +- pyproject.toml | 10 ++++ test/helper.py | 3 +- test/test_YoutubeDL.py | 9 ++- test/test_aes.py | 47 ++++++++------- test/test_compat.py | 40 +------------ test/test_downloader_http.py | 7 +-- test/test_utils.py | 16 ++--- yt_dlp/YoutubeDL.py | 35 ++++++----- yt_dlp/__init__.py | 8 +-- yt_dlp/aes.py | 21 ++++--- yt_dlp/compat/__init__.py | 22 +------ yt_dlp/compat/_deprecated.py | 18 +++--- yt_dlp/compat/_legacy.py | 11 +++- yt_dlp/compat/functools.py | 7 --- yt_dlp/compat/urllib/request.py | 6 +- yt_dlp/cookies.py | 3 +- yt_dlp/downloader/common.py | 18 +++--- yt_dlp/downloader/external.py | 9 ++- yt_dlp/downloader/fragment.py | 9 ++- yt_dlp/downloader/http.py | 8 +-- yt_dlp/downloader/rtmp.py | 7 +-- yt_dlp/downloader/rtsp.py | 4 +- yt_dlp/extractor/abematv.py | 9 +-- yt_dlp/extractor/adn.py | 8 +-- yt_dlp/extractor/anvato.py | 6 +- yt_dlp/extractor/common.py | 3 +- yt_dlp/extractor/shemaroome.py | 12 ++-- yt_dlp/postprocessor/common.py | 3 +- yt_dlp/postprocessor/embedthumbnail.py | 13 ++-- yt_dlp/postprocessor/ffmpeg.py | 23 ++++---- .../postprocessor/movefilesafterdownload.py | 16 +++-- yt_dlp/postprocessor/sponskrub.py | 7 +-- yt_dlp/postprocessor/xattrpp.py | 3 +- yt_dlp/update.py | 59 ++----------------- yt_dlp/utils/_deprecated.py | 36 +++++------ yt_dlp/utils/_legacy.py | 27 +++++++++ yt_dlp/utils/_utils.py | 35 +++-------- 38 files changed, 218 insertions(+), 363 deletions(-) delete mode 100644 yt_dlp/compat/functools.py diff --git a/devscripts/generate_aes_testdata.py b/devscripts/generate_aes_testdata.py index 7f3c88bcfb..73cf803b8f 100644 --- a/devscripts/generate_aes_testdata.py +++ b/devscripts/generate_aes_testdata.py @@ -11,13 +11,12 @@ import codecs import subprocess from yt_dlp.aes import aes_encrypt, key_expansion -from yt_dlp.utils import intlist_to_bytes secret_msg = b'Secret message goes here' def hex_str(int_list): - return codecs.encode(intlist_to_bytes(int_list), 'hex') + return codecs.encode(bytes(int_list), 'hex') def openssl_encode(algo, key, iv): diff --git a/pyproject.toml b/pyproject.toml index ef921fed58..92d399e319 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -313,6 +313,16 @@ banned-from = [ "yt_dlp.compat.compat_urllib_parse_urlparse".msg = "Use `urllib.parse.urlparse` instead." "yt_dlp.compat.compat_shlex_quote".msg = "Use `yt_dlp.utils.shell_quote` instead." "yt_dlp.utils.error_to_compat_str".msg = "Use `str` instead." +"yt_dlp.utils.bytes_to_intlist".msg = "Use `list` instead." +"yt_dlp.utils.intlist_to_bytes".msg = "Use `bytes` instead." +"yt_dlp.utils.decodeArgument".msg = "Do not use" +"yt_dlp.utils.decodeFilename".msg = "Do not use" +"yt_dlp.utils.encodeFilename".msg = "Do not use" +"yt_dlp.compat.compat_os_name".msg = "Use `os.name` instead." +"yt_dlp.compat.compat_realpath".msg = "Use `os.path.realpath` instead." +"yt_dlp.compat.functools".msg = "Use `functools` instead." +"yt_dlp.utils.decodeOption".msg = "Do not use" +"yt_dlp.utils.compiled_regex_type".msg = "Use `re.Pattern` instead." [tool.autopep8] max_line_length = 120 diff --git a/test/helper.py b/test/helper.py index 3b550d1927..c776e70b73 100644 --- a/test/helper.py +++ b/test/helper.py @@ -9,7 +9,6 @@ import types import yt_dlp.extractor from yt_dlp import YoutubeDL -from yt_dlp.compat import compat_os_name from yt_dlp.utils import preferredencoding, try_call, write_string, find_available_port if 'pytest' in sys.modules: @@ -49,7 +48,7 @@ def report_warning(message, *args, **kwargs): Print the message to stderr, it will be prefixed with 'WARNING:' If stderr is a tty file the 'WARNING:' will be colored """ - if sys.stderr.isatty() and compat_os_name != 'nt': + if sys.stderr.isatty() and os.name != 'nt': _msg_header = '\033[0;33mWARNING:\033[0m' else: _msg_header = 'WARNING:' diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index a99e624080..966d27a498 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -15,7 +15,6 @@ import json from test.helper import FakeYDL, assertRegexpMatches, try_rm from yt_dlp import YoutubeDL -from yt_dlp.compat import compat_os_name from yt_dlp.extractor import YoutubeIE from yt_dlp.extractor.common import InfoExtractor from yt_dlp.postprocessor.common import PostProcessor @@ -839,8 +838,8 @@ class TestYoutubeDL(unittest.TestCase): test('%(filesize)#D', '1Ki') test('%(height)5.2D', ' 1.08k') test('%(title4)#S', 'foo_bar_test') - test('%(title4).10S', ('foo "bar" ', 'foo "bar"' + ('#' if compat_os_name == 'nt' else ' '))) - if compat_os_name == 'nt': + test('%(title4).10S', ('foo "bar" ', 'foo "bar"' + ('#' if os.name == 'nt' else ' '))) + if os.name == 'nt': test('%(title4)q', ('"foo ""bar"" test"', None)) test('%(formats.:.id)#q', ('"id 1" "id 2" "id 3"', None)) test('%(formats.0.id)#q', ('"id 1"', None)) @@ -903,9 +902,9 @@ class TestYoutubeDL(unittest.TestCase): # Environment variable expansion for prepare_filename os.environ['__yt_dlp_var'] = 'expanded' - envvar = '%__yt_dlp_var%' if compat_os_name == 'nt' else '$__yt_dlp_var' + envvar = '%__yt_dlp_var%' if os.name == 'nt' else '$__yt_dlp_var' test(envvar, (envvar, 'expanded')) - if compat_os_name == 'nt': + if os.name == 'nt': test('%s%', ('%s%', '%s%')) os.environ['s'] = 'expanded' test('%s%', ('%s%', 'expanded')) # %s% should be expanded before escaping %s diff --git a/test/test_aes.py b/test/test_aes.py index 6fe6059a17..9cd9189bcc 100644 --- a/test/test_aes.py +++ b/test/test_aes.py @@ -27,7 +27,6 @@ from yt_dlp.aes import ( pad_block, ) from yt_dlp.dependencies import Cryptodome -from yt_dlp.utils import bytes_to_intlist, intlist_to_bytes # the encrypted data can be generate with 'devscripts/generate_aes_testdata.py' @@ -40,33 +39,33 @@ class TestAES(unittest.TestCase): def test_encrypt(self): msg = b'message' key = list(range(16)) - encrypted = aes_encrypt(bytes_to_intlist(msg), key) - decrypted = intlist_to_bytes(aes_decrypt(encrypted, key)) + encrypted = aes_encrypt(list(msg), key) + decrypted = bytes(aes_decrypt(encrypted, key)) self.assertEqual(decrypted, msg) def test_cbc_decrypt(self): data = b'\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6\x27\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd' - decrypted = intlist_to_bytes(aes_cbc_decrypt(bytes_to_intlist(data), self.key, self.iv)) + decrypted = bytes(aes_cbc_decrypt(list(data), self.key, self.iv)) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) if Cryptodome.AES: - decrypted = aes_cbc_decrypt_bytes(data, intlist_to_bytes(self.key), intlist_to_bytes(self.iv)) + decrypted = aes_cbc_decrypt_bytes(data, bytes(self.key), bytes(self.iv)) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) def test_cbc_encrypt(self): - data = bytes_to_intlist(self.secret_msg) - encrypted = intlist_to_bytes(aes_cbc_encrypt(data, self.key, self.iv)) + data = list(self.secret_msg) + encrypted = bytes(aes_cbc_encrypt(data, self.key, self.iv)) self.assertEqual( encrypted, b'\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6\'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd') def test_ctr_decrypt(self): - data = bytes_to_intlist(b'\x03\xc7\xdd\xd4\x8e\xb3\xbc\x1a*O\xdc1\x12+8Aio\xd1z\xb5#\xaf\x08') - decrypted = intlist_to_bytes(aes_ctr_decrypt(data, self.key, self.iv)) + data = list(b'\x03\xc7\xdd\xd4\x8e\xb3\xbc\x1a*O\xdc1\x12+8Aio\xd1z\xb5#\xaf\x08') + decrypted = bytes(aes_ctr_decrypt(data, self.key, self.iv)) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) def test_ctr_encrypt(self): - data = bytes_to_intlist(self.secret_msg) - encrypted = intlist_to_bytes(aes_ctr_encrypt(data, self.key, self.iv)) + data = list(self.secret_msg) + encrypted = bytes(aes_ctr_encrypt(data, self.key, self.iv)) self.assertEqual( encrypted, b'\x03\xc7\xdd\xd4\x8e\xb3\xbc\x1a*O\xdc1\x12+8Aio\xd1z\xb5#\xaf\x08') @@ -75,19 +74,19 @@ class TestAES(unittest.TestCase): data = b'\x159Y\xcf5eud\x90\x9c\x85&]\x14\x1d\x0f.\x08\xb4T\xe4/\x17\xbd' authentication_tag = b'\xe8&I\x80rI\x07\x9d}YWuU@:e' - decrypted = intlist_to_bytes(aes_gcm_decrypt_and_verify( - bytes_to_intlist(data), self.key, bytes_to_intlist(authentication_tag), self.iv[:12])) + decrypted = bytes(aes_gcm_decrypt_and_verify( + list(data), self.key, list(authentication_tag), self.iv[:12])) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) if Cryptodome.AES: decrypted = aes_gcm_decrypt_and_verify_bytes( - data, intlist_to_bytes(self.key), authentication_tag, intlist_to_bytes(self.iv[:12])) + data, bytes(self.key), authentication_tag, bytes(self.iv[:12])) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) def test_gcm_aligned_decrypt(self): data = b'\x159Y\xcf5eud\x90\x9c\x85&]\x14\x1d\x0f' authentication_tag = b'\x08\xb1\x9d!&\x98\xd0\xeaRq\x90\xe6;\xb5]\xd8' - decrypted = intlist_to_bytes(aes_gcm_decrypt_and_verify( + decrypted = bytes(aes_gcm_decrypt_and_verify( list(data), self.key, list(authentication_tag), self.iv[:12])) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg[:16]) if Cryptodome.AES: @@ -96,38 +95,38 @@ class TestAES(unittest.TestCase): self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg[:16]) def test_decrypt_text(self): - password = intlist_to_bytes(self.key).decode() + password = bytes(self.key).decode() encrypted = base64.b64encode( - intlist_to_bytes(self.iv[:8]) + bytes(self.iv[:8]) + b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae', ).decode() decrypted = (aes_decrypt_text(encrypted, password, 16)) self.assertEqual(decrypted, self.secret_msg) - password = intlist_to_bytes(self.key).decode() + password = bytes(self.key).decode() encrypted = base64.b64encode( - intlist_to_bytes(self.iv[:8]) + bytes(self.iv[:8]) + b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83', ).decode() decrypted = (aes_decrypt_text(encrypted, password, 32)) self.assertEqual(decrypted, self.secret_msg) def test_ecb_encrypt(self): - data = bytes_to_intlist(self.secret_msg) - encrypted = intlist_to_bytes(aes_ecb_encrypt(data, self.key)) + data = list(self.secret_msg) + encrypted = bytes(aes_ecb_encrypt(data, self.key)) self.assertEqual( encrypted, b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:') def test_ecb_decrypt(self): - data = bytes_to_intlist(b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:') - decrypted = intlist_to_bytes(aes_ecb_decrypt(data, self.key, self.iv)) + data = list(b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:') + decrypted = bytes(aes_ecb_decrypt(data, self.key, self.iv)) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) def test_key_expansion(self): key = '4f6bdaa39e2f8cb07f5e722d9edef314' - self.assertEqual(key_expansion(bytes_to_intlist(bytearray.fromhex(key))), [ + self.assertEqual(key_expansion(list(bytearray.fromhex(key))), [ 0x4F, 0x6B, 0xDA, 0xA3, 0x9E, 0x2F, 0x8C, 0xB0, 0x7F, 0x5E, 0x72, 0x2D, 0x9E, 0xDE, 0xF3, 0x14, 0x53, 0x66, 0x20, 0xA8, 0xCD, 0x49, 0xAC, 0x18, 0xB2, 0x17, 0xDE, 0x35, 0x2C, 0xC9, 0x2D, 0x21, 0x8C, 0xBE, 0xDD, 0xD9, 0x41, 0xF7, 0x71, 0xC1, 0xF3, 0xE0, 0xAF, 0xF4, 0xDF, 0x29, 0x82, 0xD5, diff --git a/test/test_compat.py b/test/test_compat.py index e7d97e3e93..b1cc2a8187 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -12,12 +12,7 @@ import struct from yt_dlp import compat from yt_dlp.compat import urllib # isort: split -from yt_dlp.compat import ( - compat_etree_fromstring, - compat_expanduser, - compat_urllib_parse_unquote, # noqa: TID251 - compat_urllib_parse_urlencode, # noqa: TID251 -) +from yt_dlp.compat import compat_etree_fromstring, compat_expanduser from yt_dlp.compat.urllib.request import getproxies @@ -43,39 +38,6 @@ class TestCompat(unittest.TestCase): finally: os.environ['HOME'] = old_home or '' - def test_compat_urllib_parse_unquote(self): - self.assertEqual(compat_urllib_parse_unquote('abc%20def'), 'abc def') - self.assertEqual(compat_urllib_parse_unquote('%7e/abc+def'), '~/abc+def') - self.assertEqual(compat_urllib_parse_unquote(''), '') - self.assertEqual(compat_urllib_parse_unquote('%'), '%') - self.assertEqual(compat_urllib_parse_unquote('%%'), '%%') - self.assertEqual(compat_urllib_parse_unquote('%%%'), '%%%') - self.assertEqual(compat_urllib_parse_unquote('%2F'), '/') - self.assertEqual(compat_urllib_parse_unquote('%2f'), '/') - self.assertEqual(compat_urllib_parse_unquote('%E6%B4%A5%E6%B3%A2'), '津波') - self.assertEqual( - compat_urllib_parse_unquote(''' -%%a'''), - ''' -%%a''') - self.assertEqual( - compat_urllib_parse_unquote('''%28%5E%E2%97%A3_%E2%97%A2%5E%29%E3%81%A3%EF%B8%BB%E3%83%87%E2%95%90%E4%B8%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%86%B6%I%Break%25Things%'''), - '''(^◣_◢^)っ︻デ═一 ⇀ ⇀ ⇀ ⇀ ⇀ ↶%I%Break%Things%''') - - def test_compat_urllib_parse_unquote_plus(self): - self.assertEqual(urllib.parse.unquote_plus('abc%20def'), 'abc def') - self.assertEqual(urllib.parse.unquote_plus('%7e/abc+def'), '~/abc def') - - def test_compat_urllib_parse_urlencode(self): - self.assertEqual(compat_urllib_parse_urlencode({'abc': 'def'}), 'abc=def') - self.assertEqual(compat_urllib_parse_urlencode({'abc': b'def'}), 'abc=def') - self.assertEqual(compat_urllib_parse_urlencode({b'abc': 'def'}), 'abc=def') - self.assertEqual(compat_urllib_parse_urlencode({b'abc': b'def'}), 'abc=def') - self.assertEqual(compat_urllib_parse_urlencode([('abc', 'def')]), 'abc=def') - self.assertEqual(compat_urllib_parse_urlencode([('abc', b'def')]), 'abc=def') - self.assertEqual(compat_urllib_parse_urlencode([(b'abc', 'def')]), 'abc=def') - self.assertEqual(compat_urllib_parse_urlencode([(b'abc', b'def')]), 'abc=def') - def test_compat_etree_fromstring(self): xml = ''' diff --git a/test/test_downloader_http.py b/test/test_downloader_http.py index faba0bc9c8..cf2e3fac16 100644 --- a/test/test_downloader_http.py +++ b/test/test_downloader_http.py @@ -15,7 +15,6 @@ import threading from test.helper import http_server_port, try_rm from yt_dlp import YoutubeDL from yt_dlp.downloader.http import HttpFD -from yt_dlp.utils import encodeFilename from yt_dlp.utils._utils import _YDLLogger as FakeLogger TEST_DIR = os.path.dirname(os.path.abspath(__file__)) @@ -82,12 +81,12 @@ class TestHttpFD(unittest.TestCase): ydl = YoutubeDL(params) downloader = HttpFD(ydl, params) filename = 'testfile.mp4' - try_rm(encodeFilename(filename)) + try_rm(filename) self.assertTrue(downloader.real_download(filename, { 'url': f'http://127.0.0.1:{self.port}/{ep}', }), ep) - self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE, ep) - try_rm(encodeFilename(filename)) + self.assertEqual(os.path.getsize(filename), TEST_SIZE, ep) + try_rm(filename) def download_all(self, params): for ep in ('regular', 'no-content-length', 'no-range', 'no-range-no-content-length'): diff --git a/test/test_utils.py b/test/test_utils.py index 835774a912..b3de14198e 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -21,7 +21,6 @@ import xml.etree.ElementTree from yt_dlp.compat import ( compat_etree_fromstring, compat_HTMLParseError, - compat_os_name, ) from yt_dlp.utils import ( Config, @@ -49,7 +48,6 @@ from yt_dlp.utils import ( dfxp2srt, encode_base_n, encode_compat_str, - encodeFilename, expand_path, extract_attributes, extract_basic_auth, @@ -69,7 +67,6 @@ from yt_dlp.utils import ( get_elements_html_by_class, get_elements_text_and_html_by_attribute, int_or_none, - intlist_to_bytes, iri_to_uri, is_html, js_to_json, @@ -566,10 +563,10 @@ class TestUtil(unittest.TestCase): self.assertEqual(res_data, {'a': 'b', 'c': 'd'}) def test_shell_quote(self): - args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')] + args = ['ffmpeg', '-i', 'ñ€ß\'.mp4'] self.assertEqual( shell_quote(args), - """ffmpeg -i 'ñ€ß'"'"'.mp4'""" if compat_os_name != 'nt' else '''ffmpeg -i "ñ€ß'.mp4"''') + """ffmpeg -i 'ñ€ß'"'"'.mp4'""" if os.name != 'nt' else '''ffmpeg -i "ñ€ß'.mp4"''') def test_float_or_none(self): self.assertEqual(float_or_none('42.42'), 42.42) @@ -1309,15 +1306,10 @@ class TestUtil(unittest.TestCase): self.assertEqual(clean_html('a:\n "b"'), 'a: "b"') self.assertEqual(clean_html('a
\xa0b'), 'a\nb') - def test_intlist_to_bytes(self): - self.assertEqual( - intlist_to_bytes([0, 1, 127, 128, 255]), - b'\x00\x01\x7f\x80\xff') - def test_args_to_str(self): self.assertEqual( args_to_str(['foo', 'ba/r', '-baz', '2 be', '']), - 'foo ba/r -baz \'2 be\' \'\'' if compat_os_name != 'nt' else 'foo ba/r -baz "2 be" ""', + 'foo ba/r -baz \'2 be\' \'\'' if os.name != 'nt' else 'foo ba/r -baz "2 be" ""', ) def test_parse_filesize(self): @@ -2117,7 +2109,7 @@ Line 1 assert extract_basic_auth('http://user:@foo.bar') == ('http://foo.bar', 'Basic dXNlcjo=') assert extract_basic_auth('http://user:pass@foo.bar') == ('http://foo.bar', 'Basic dXNlcjpwYXNz') - @unittest.skipUnless(compat_os_name == 'nt', 'Only relevant on Windows') + @unittest.skipUnless(os.name == 'nt', 'Only relevant on Windows') def test_windows_escaping(self): tests = [ 'test"&', diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 3130deda31..749de5d4e3 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -26,7 +26,7 @@ import unicodedata from .cache import Cache from .compat import urllib # isort: split -from .compat import compat_os_name, urllib_req_to_req +from .compat import urllib_req_to_req from .cookies import CookieLoadError, LenientSimpleCookie, load_cookies from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name from .downloader.rtmp import rtmpdump_version @@ -109,7 +109,6 @@ from .utils import ( determine_ext, determine_protocol, encode_compat_str, - encodeFilename, escapeHTML, expand_path, extract_basic_auth, @@ -167,7 +166,7 @@ from .utils.networking import ( ) from .version import CHANNEL, ORIGIN, RELEASE_GIT_HEAD, VARIANT, __version__ -if compat_os_name == 'nt': +if os.name == 'nt': import ctypes @@ -643,7 +642,7 @@ class YoutubeDL: out=stdout, error=sys.stderr, screen=sys.stderr if self.params.get('quiet') else stdout, - console=None if compat_os_name == 'nt' else next( + console=None if os.name == 'nt' else next( filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None), ) @@ -952,7 +951,7 @@ class YoutubeDL: self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once) def _send_console_code(self, code): - if compat_os_name == 'nt' or not self._out_files.console: + if os.name == 'nt' or not self._out_files.console: return self._write_string(code, self._out_files.console) @@ -960,7 +959,7 @@ class YoutubeDL: if not self.params.get('consoletitle', False): return message = remove_terminal_sequences(message) - if compat_os_name == 'nt': + if os.name == 'nt': if ctypes.windll.kernel32.GetConsoleWindow(): # c_wchar_p() might not be necessary if `message` is # already of type unicode() @@ -3255,9 +3254,9 @@ class YoutubeDL: if full_filename is None: return - if not self._ensure_dir_exists(encodeFilename(full_filename)): + if not self._ensure_dir_exists(full_filename): return - if not self._ensure_dir_exists(encodeFilename(temp_filename)): + if not self._ensure_dir_exists(temp_filename): return if self._write_description('video', info_dict, @@ -3289,16 +3288,16 @@ class YoutubeDL: if self.params.get('writeannotations', False): annofn = self.prepare_filename(info_dict, 'annotation') if annofn: - if not self._ensure_dir_exists(encodeFilename(annofn)): + if not self._ensure_dir_exists(annofn): return - if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)): + if not self.params.get('overwrites', True) and os.path.exists(annofn): self.to_screen('[info] Video annotations are already present') elif not info_dict.get('annotations'): self.report_warning('There are no annotations to write.') else: try: self.to_screen('[info] Writing video annotations to: ' + annofn) - with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile: + with open(annofn, 'w', encoding='utf-8') as annofile: annofile.write(info_dict['annotations']) except (KeyError, TypeError): self.report_warning('There are no annotations to write.') @@ -3314,14 +3313,14 @@ class YoutubeDL: f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown') return True linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext')) - if not self._ensure_dir_exists(encodeFilename(linkfn)): + if not self._ensure_dir_exists(linkfn): return False - if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)): + if self.params.get('overwrites', True) and os.path.exists(linkfn): self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present') return True try: self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}') - with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', + with open(to_high_limit_path(linkfn), 'w', encoding='utf-8', newline='\r\n' if link_type == 'url' else '\n') as linkfile: template_vars = {'url': url} if link_type == 'desktop': @@ -3352,7 +3351,7 @@ class YoutubeDL: if self.params.get('skip_download'): info_dict['filepath'] = temp_filename - info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename))) + info_dict['__finaldir'] = os.path.dirname(os.path.abspath(full_filename)) info_dict['__files_to_move'] = files_to_move replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)) info_dict['__write_download_archive'] = self.params.get('force_write_download_archive') @@ -3482,7 +3481,7 @@ class YoutubeDL: self.report_file_already_downloaded(dl_filename) dl_filename = dl_filename or temp_filename - info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename))) + info_dict['__finaldir'] = os.path.dirname(os.path.abspath(full_filename)) except network_exceptions as err: self.report_error(f'unable to download video data: {err}') @@ -4297,7 +4296,7 @@ class YoutubeDL: else: try: self.to_screen(f'[info] Writing {label} description to: {descfn}') - with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: + with open(descfn, 'w', encoding='utf-8') as descfile: descfile.write(ie_result['description']) except OSError: self.report_error(f'Cannot write {label} description file {descfn}') @@ -4399,7 +4398,7 @@ class YoutubeDL: try: uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {}))) self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}') - with open(encodeFilename(thumb_filename), 'wb') as thumbf: + with open(thumb_filename, 'wb') as thumbf: shutil.copyfileobj(uf, thumbf) ret.append((thumb_filename, thumb_filename_final)) t['filepath'] = thumb_filename diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index a7665159bd..a1880bf7dc 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -14,7 +14,6 @@ import os import re import traceback -from .compat import compat_os_name from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS, CookieLoadError from .downloader.external import get_external_downloader from .extractor import list_extractor_classes @@ -44,7 +43,6 @@ from .utils import ( GeoUtils, PlaylistEntries, SameFileError, - decodeOption, download_range_func, expand_path, float_or_none, @@ -883,8 +881,8 @@ def parse_options(argv=None): 'listsubtitles': opts.listsubtitles, 'subtitlesformat': opts.subtitlesformat, 'subtitleslangs': opts.subtitleslangs, - 'matchtitle': decodeOption(opts.matchtitle), - 'rejecttitle': decodeOption(opts.rejecttitle), + 'matchtitle': opts.matchtitle, + 'rejecttitle': opts.rejecttitle, 'max_downloads': opts.max_downloads, 'prefer_free_formats': opts.prefer_free_formats, 'trim_file_name': opts.trim_file_name, @@ -1053,7 +1051,7 @@ def _real_main(argv=None): ydl.warn_if_short_id(args) # Show a useful error message and wait for keypress if not launched from shell on Windows - if not args and compat_os_name == 'nt' and getattr(sys, 'frozen', False): + if not args and os.name == 'nt' and getattr(sys, 'frozen', False): import ctypes.wintypes import msvcrt diff --git a/yt_dlp/aes.py b/yt_dlp/aes.py index be67b40fe2..0930d36df9 100644 --- a/yt_dlp/aes.py +++ b/yt_dlp/aes.py @@ -3,7 +3,6 @@ from math import ceil from .compat import compat_ord from .dependencies import Cryptodome -from .utils import bytes_to_intlist, intlist_to_bytes if Cryptodome.AES: def aes_cbc_decrypt_bytes(data, key, iv): @@ -17,15 +16,15 @@ if Cryptodome.AES: else: def aes_cbc_decrypt_bytes(data, key, iv): """ Decrypt bytes with AES-CBC using native implementation since pycryptodome is unavailable """ - return intlist_to_bytes(aes_cbc_decrypt(*map(bytes_to_intlist, (data, key, iv)))) + return bytes(aes_cbc_decrypt(*map(list, (data, key, iv)))) def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce): """ Decrypt bytes with AES-GCM using native implementation since pycryptodome is unavailable """ - return intlist_to_bytes(aes_gcm_decrypt_and_verify(*map(bytes_to_intlist, (data, key, tag, nonce)))) + return bytes(aes_gcm_decrypt_and_verify(*map(list, (data, key, tag, nonce)))) def aes_cbc_encrypt_bytes(data, key, iv, **kwargs): - return intlist_to_bytes(aes_cbc_encrypt(*map(bytes_to_intlist, (data, key, iv)), **kwargs)) + return bytes(aes_cbc_encrypt(*map(list, (data, key, iv)), **kwargs)) BLOCK_SIZE_BYTES = 16 @@ -221,7 +220,7 @@ def aes_gcm_decrypt_and_verify(data, key, tag, nonce): j0 = [*nonce, 0, 0, 0, 1] else: fill = (BLOCK_SIZE_BYTES - (len(nonce) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES + 8 - ghash_in = nonce + [0] * fill + bytes_to_intlist((8 * len(nonce)).to_bytes(8, 'big')) + ghash_in = nonce + [0] * fill + list((8 * len(nonce)).to_bytes(8, 'big')) j0 = ghash(hash_subkey, ghash_in) # TODO: add nonce support to aes_ctr_decrypt @@ -234,9 +233,9 @@ def aes_gcm_decrypt_and_verify(data, key, tag, nonce): s_tag = ghash( hash_subkey, data - + [0] * pad_len # pad - + bytes_to_intlist((0 * 8).to_bytes(8, 'big') # length of associated data - + ((len(data) * 8).to_bytes(8, 'big'))), # length of data + + [0] * pad_len # pad + + list((0 * 8).to_bytes(8, 'big') # length of associated data + + ((len(data) * 8).to_bytes(8, 'big'))), # length of data ) if tag != aes_ctr_encrypt(s_tag, key, j0): @@ -300,8 +299,8 @@ def aes_decrypt_text(data, password, key_size_bytes): """ NONCE_LENGTH_BYTES = 8 - data = bytes_to_intlist(base64.b64decode(data)) - password = bytes_to_intlist(password.encode()) + data = list(base64.b64decode(data)) + password = list(password.encode()) key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password)) key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES) @@ -310,7 +309,7 @@ def aes_decrypt_text(data, password, key_size_bytes): cipher = data[NONCE_LENGTH_BYTES:] decrypted_data = aes_ctr_decrypt(cipher, key, nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)) - return intlist_to_bytes(decrypted_data) + return bytes(decrypted_data) RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36) diff --git a/yt_dlp/compat/__init__.py b/yt_dlp/compat/__init__.py index d820adaf1e..d779620688 100644 --- a/yt_dlp/compat/__init__.py +++ b/yt_dlp/compat/__init__.py @@ -1,5 +1,4 @@ import os -import sys import xml.etree.ElementTree as etree from .compat_utils import passthrough_module @@ -24,33 +23,14 @@ def compat_etree_fromstring(text): return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder())) -compat_os_name = os._name if os.name == 'java' else os.name - - -def compat_shlex_quote(s): - from ..utils import shell_quote - return shell_quote(s) - - def compat_ord(c): return c if isinstance(c, int) else ord(c) -if compat_os_name == 'nt' and sys.version_info < (3, 8): - # os.path.realpath on Windows does not follow symbolic links - # prior to Python 3.8 (see https://bugs.python.org/issue9949) - def compat_realpath(path): - while os.path.islink(path): - path = os.path.abspath(os.readlink(path)) - return os.path.realpath(path) -else: - compat_realpath = os.path.realpath - - # Python 3.8+ does not honor %HOME% on windows, but this breaks compatibility with youtube-dl # See https://github.com/yt-dlp/yt-dlp/issues/792 # https://docs.python.org/3/library/os.path.html#os.path.expanduser -if compat_os_name in ('nt', 'ce'): +if os.name in ('nt', 'ce'): def compat_expanduser(path): HOME = os.environ.get('HOME') if not HOME: diff --git a/yt_dlp/compat/_deprecated.py b/yt_dlp/compat/_deprecated.py index 607bae9999..445acc1a06 100644 --- a/yt_dlp/compat/_deprecated.py +++ b/yt_dlp/compat/_deprecated.py @@ -8,16 +8,14 @@ passthrough_module(__name__, '.._legacy', callback=lambda attr: warnings.warn( DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=6)) del passthrough_module -import base64 -import urllib.error -import urllib.parse +import functools # noqa: F401 +import os -compat_str = str -compat_b64decode = base64.b64decode +compat_os_name = os.name +compat_realpath = os.path.realpath -compat_urlparse = urllib.parse -compat_parse_qs = urllib.parse.parse_qs -compat_urllib_parse_unquote = urllib.parse.unquote -compat_urllib_parse_urlencode = urllib.parse.urlencode -compat_urllib_parse_urlparse = urllib.parse.urlparse + +def compat_shlex_quote(s): + from ..utils import shell_quote + return shell_quote(s) diff --git a/yt_dlp/compat/_legacy.py b/yt_dlp/compat/_legacy.py index dfc792eae4..dae2c14592 100644 --- a/yt_dlp/compat/_legacy.py +++ b/yt_dlp/compat/_legacy.py @@ -30,7 +30,7 @@ from asyncio import run as compat_asyncio_run # noqa: F401 from re import Pattern as compat_Pattern # noqa: F401 from re import match as compat_Match # noqa: F401 -from . import compat_expanduser, compat_HTMLParseError, compat_realpath +from . import compat_expanduser, compat_HTMLParseError from .compat_utils import passthrough_module from ..dependencies import brotli as compat_brotli # noqa: F401 from ..dependencies import websockets as compat_websockets # noqa: F401 @@ -78,7 +78,7 @@ compat_kwargs = lambda kwargs: kwargs compat_map = map compat_numeric_types = (int, float, complex) compat_os_path_expanduser = compat_expanduser -compat_os_path_realpath = compat_realpath +compat_os_path_realpath = os.path.realpath compat_print = print compat_shlex_split = shlex.split compat_socket_create_connection = socket.create_connection @@ -104,5 +104,12 @@ compat_xml_parse_error = compat_xml_etree_ElementTree_ParseError = etree.ParseEr compat_xpath = lambda xpath: xpath compat_zip = zip workaround_optparse_bug9161 = lambda: None +compat_str = str +compat_b64decode = base64.b64decode +compat_urlparse = urllib.parse +compat_parse_qs = urllib.parse.parse_qs +compat_urllib_parse_unquote = urllib.parse.unquote +compat_urllib_parse_urlencode = urllib.parse.urlencode +compat_urllib_parse_urlparse = urllib.parse.urlparse legacy = [] diff --git a/yt_dlp/compat/functools.py b/yt_dlp/compat/functools.py deleted file mode 100644 index c2e9e90279..0000000000 --- a/yt_dlp/compat/functools.py +++ /dev/null @@ -1,7 +0,0 @@ -# flake8: noqa: F405 -from functools import * # noqa: F403 - -from .compat_utils import passthrough_module - -passthrough_module(__name__, 'functools') -del passthrough_module diff --git a/yt_dlp/compat/urllib/request.py b/yt_dlp/compat/urllib/request.py index ad9fa83c87..dfc7f4a2dc 100644 --- a/yt_dlp/compat/urllib/request.py +++ b/yt_dlp/compat/urllib/request.py @@ -7,9 +7,9 @@ passthrough_module(__name__, 'urllib.request') del passthrough_module -from .. import compat_os_name +import os -if compat_os_name == 'nt': +if os.name == 'nt': # On older Python versions, proxies are extracted from Windows registry erroneously. [1] # If the https proxy in the registry does not have a scheme, urllib will incorrectly add https:// to it. [2] # It is unlikely that the user has actually set it to be https, so we should be fine to safely downgrade @@ -37,4 +37,4 @@ if compat_os_name == 'nt': def getproxies(): return getproxies_environment() or getproxies_registry_patched() -del compat_os_name +del os diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index e673498244..d5b0d3991b 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -25,7 +25,6 @@ from .aes import ( aes_gcm_decrypt_and_verify_bytes, unpad_pkcs7, ) -from .compat import compat_os_name from .dependencies import ( _SECRETSTORAGE_UNAVAILABLE_REASON, secretstorage, @@ -343,7 +342,7 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger): logger.debug(f'cookie version breakdown: {counts}') return jar except PermissionError as error: - if compat_os_name == 'nt' and error.errno == 13: + if os.name == 'nt' and error.errno == 13: message = 'Could not copy Chrome cookie database. See https://github.com/yt-dlp/yt-dlp/issues/7271 for more info' logger.error(message) raise DownloadError(message) # force exit diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index 2e3ea2fc4e..e8dcb37cc3 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -20,9 +20,7 @@ from ..utils import ( Namespace, RetryManager, classproperty, - decodeArgument, deprecation_warning, - encodeFilename, format_bytes, join_nonempty, parse_bytes, @@ -219,7 +217,7 @@ class FileDownloader: def temp_name(self, filename): """Returns a temporary filename for the given filename.""" if self.params.get('nopart', False) or filename == '-' or \ - (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))): + (os.path.exists(filename) and not os.path.isfile(filename)): return filename return filename + '.part' @@ -273,7 +271,7 @@ class FileDownloader: """Try to set the last-modified time of the given file.""" if last_modified_hdr is None: return - if not os.path.isfile(encodeFilename(filename)): + if not os.path.isfile(filename): return timestr = last_modified_hdr if timestr is None: @@ -432,13 +430,13 @@ class FileDownloader: """ nooverwrites_and_exists = ( not self.params.get('overwrites', True) - and os.path.exists(encodeFilename(filename)) + and os.path.exists(filename) ) if not hasattr(filename, 'write'): continuedl_and_exists = ( self.params.get('continuedl', True) - and os.path.isfile(encodeFilename(filename)) + and os.path.isfile(filename) and not self.params.get('nopart', False) ) @@ -448,7 +446,7 @@ class FileDownloader: self._hook_progress({ 'filename': filename, 'status': 'finished', - 'total_bytes': os.path.getsize(encodeFilename(filename)), + 'total_bytes': os.path.getsize(filename), }, info_dict) self._finish_multiline_status() return True, False @@ -489,9 +487,7 @@ class FileDownloader: if not self.params.get('verbose', False): return - str_args = [decodeArgument(a) for a in args] - if exe is None: - exe = os.path.basename(str_args[0]) + exe = os.path.basename(args[0]) - self.write_debug(f'{exe} command line: {shell_quote(str_args)}') + self.write_debug(f'{exe} command line: {shell_quote(args)}') diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index 6c1ec403c8..7f6b5b45cc 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -23,7 +23,6 @@ from ..utils import ( cli_valueless_option, determine_ext, encodeArgument, - encodeFilename, find_available_port, remove_end, traverse_obj, @@ -67,7 +66,7 @@ class ExternalFD(FragmentFD): 'elapsed': time.time() - started, } if filename != '-': - fsize = os.path.getsize(encodeFilename(tmpfilename)) + fsize = os.path.getsize(tmpfilename) self.try_rename(tmpfilename, filename) status.update({ 'downloaded_bytes': fsize, @@ -184,9 +183,9 @@ class ExternalFD(FragmentFD): dest.write(decrypt_fragment(fragment, src.read())) src.close() if not self.params.get('keep_fragments', False): - self.try_remove(encodeFilename(fragment_filename)) + self.try_remove(fragment_filename) dest.close() - self.try_remove(encodeFilename(f'{tmpfilename}.frag.urls')) + self.try_remove(f'{tmpfilename}.frag.urls') return 0 def _call_process(self, cmd, info_dict): @@ -620,7 +619,7 @@ class FFmpegFD(ExternalFD): args += self._configuration_args(('_o1', '_o', '')) args = [encodeArgument(opt) for opt in args] - args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True)) + args.append(ffpp._ffmpeg_filename_argument(tmpfilename)) self._debug_cmd(args) piped = any(fmt['url'] in ('-', 'pipe:') for fmt in selected_formats) diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index 0d00196e2e..98784e7039 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -9,10 +9,9 @@ import time from .common import FileDownloader from .http import HttpFD from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7 -from ..compat import compat_os_name from ..networking import Request from ..networking.exceptions import HTTPError, IncompleteRead -from ..utils import DownloadError, RetryManager, encodeFilename, traverse_obj +from ..utils import DownloadError, RetryManager, traverse_obj from ..utils.networking import HTTPHeaderDict from ..utils.progress import ProgressCalculator @@ -152,7 +151,7 @@ class FragmentFD(FileDownloader): if self.__do_ytdl_file(ctx): self._write_ytdl_file(ctx) if not self.params.get('keep_fragments', False): - self.try_remove(encodeFilename(ctx['fragment_filename_sanitized'])) + self.try_remove(ctx['fragment_filename_sanitized']) del ctx['fragment_filename_sanitized'] def _prepare_frag_download(self, ctx): @@ -188,7 +187,7 @@ class FragmentFD(FileDownloader): }) if self.__do_ytdl_file(ctx): - ytdl_file_exists = os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))) + ytdl_file_exists = os.path.isfile(self.ytdl_filename(ctx['filename'])) continuedl = self.params.get('continuedl', True) if continuedl and ytdl_file_exists: self._read_ytdl_file(ctx) @@ -390,7 +389,7 @@ class FragmentFD(FileDownloader): def __exit__(self, exc_type, exc_val, exc_tb): pass - if compat_os_name == 'nt': + if os.name == 'nt': def future_result(future): while True: try: diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index c0165790d1..9c6dd8b799 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -15,7 +15,6 @@ from ..utils import ( ThrottledDownload, XAttrMetadataError, XAttrUnavailableError, - encodeFilename, int_or_none, parse_http_range, try_call, @@ -58,9 +57,8 @@ class HttpFD(FileDownloader): if self.params.get('continuedl', True): # Establish possible resume length - if os.path.isfile(encodeFilename(ctx.tmpfilename)): - ctx.resume_len = os.path.getsize( - encodeFilename(ctx.tmpfilename)) + if os.path.isfile(ctx.tmpfilename): + ctx.resume_len = os.path.getsize(ctx.tmpfilename) ctx.is_resume = ctx.resume_len > 0 @@ -241,7 +239,7 @@ class HttpFD(FileDownloader): ctx.resume_len = byte_counter else: try: - ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename)) + ctx.resume_len = os.path.getsize(ctx.tmpfilename) except FileNotFoundError: ctx.resume_len = 0 raise RetryDownload(e) diff --git a/yt_dlp/downloader/rtmp.py b/yt_dlp/downloader/rtmp.py index d7ffb3b34d..1b831e5f30 100644 --- a/yt_dlp/downloader/rtmp.py +++ b/yt_dlp/downloader/rtmp.py @@ -8,7 +8,6 @@ from ..utils import ( Popen, check_executable, encodeArgument, - encodeFilename, get_exe_version, ) @@ -179,7 +178,7 @@ class RtmpFD(FileDownloader): return False while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live: - prevsize = os.path.getsize(encodeFilename(tmpfilename)) + prevsize = os.path.getsize(tmpfilename) self.to_screen(f'[rtmpdump] Downloaded {prevsize} bytes') time.sleep(5.0) # This seems to be needed args = [*basic_args, '--resume'] @@ -187,7 +186,7 @@ class RtmpFD(FileDownloader): args += ['--skip', '1'] args = [encodeArgument(a) for a in args] retval = run_rtmpdump(args) - cursize = os.path.getsize(encodeFilename(tmpfilename)) + cursize = os.path.getsize(tmpfilename) if prevsize == cursize and retval == RD_FAILED: break # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those @@ -196,7 +195,7 @@ class RtmpFD(FileDownloader): retval = RD_SUCCESS break if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE): - fsize = os.path.getsize(encodeFilename(tmpfilename)) + fsize = os.path.getsize(tmpfilename) self.to_screen(f'[rtmpdump] Downloaded {fsize} bytes') self.try_rename(tmpfilename, filename) self._hook_progress({ diff --git a/yt_dlp/downloader/rtsp.py b/yt_dlp/downloader/rtsp.py index e89269fed9..b4b0be7e6e 100644 --- a/yt_dlp/downloader/rtsp.py +++ b/yt_dlp/downloader/rtsp.py @@ -2,7 +2,7 @@ import os import subprocess from .common import FileDownloader -from ..utils import check_executable, encodeFilename +from ..utils import check_executable class RtspFD(FileDownloader): @@ -26,7 +26,7 @@ class RtspFD(FileDownloader): retval = subprocess.call(args) if retval == 0: - fsize = os.path.getsize(encodeFilename(tmpfilename)) + fsize = os.path.getsize(tmpfilename) self.to_screen(f'\r[{args[0]}] {fsize} bytes') self.try_rename(tmpfilename, filename) self._hook_progress({ diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py index 66ab083fe0..b1343eed39 100644 --- a/yt_dlp/extractor/abematv.py +++ b/yt_dlp/extractor/abematv.py @@ -6,7 +6,6 @@ import hmac import io import json import re -import struct import time import urllib.parse import uuid @@ -18,10 +17,8 @@ from ..networking.exceptions import TransportError from ..utils import ( ExtractorError, OnDemandPagedList, - bytes_to_intlist, decode_base_n, int_or_none, - intlist_to_bytes, time_seconds, traverse_obj, update_url_query, @@ -72,15 +69,15 @@ class AbemaLicenseRH(RequestHandler): }) res = decode_base_n(license_response['k'], table=self._STRTABLE) - encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff)) + encvideokey = list(res.to_bytes(16, 'big')) h = hmac.new( binascii.unhexlify(self._HKEY), (license_response['cid'] + self.ie._DEVICE_ID).encode(), digestmod=hashlib.sha256) - enckey = bytes_to_intlist(h.digest()) + enckey = list(h.digest()) - return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey)) + return bytes(aes_ecb_decrypt(encvideokey, enckey)) class AbemaTVBaseIE(InfoExtractor): diff --git a/yt_dlp/extractor/adn.py b/yt_dlp/extractor/adn.py index c8a2613754..919e1d6af5 100644 --- a/yt_dlp/extractor/adn.py +++ b/yt_dlp/extractor/adn.py @@ -11,11 +11,9 @@ from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, ass_subtitles_timecode, - bytes_to_intlist, bytes_to_long, float_or_none, int_or_none, - intlist_to_bytes, join_nonempty, long_to_bytes, parse_iso8601, @@ -198,16 +196,16 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link') self._K = ''.join(random.choices('0123456789abcdef', k=16)) - message = bytes_to_intlist(json.dumps({ + message = list(json.dumps({ 'k': self._K, 't': token, - })) + }).encode()) # Sometimes authentication fails for no good reason, retry with # a different random padding links_data = None for _ in range(3): - padded_message = intlist_to_bytes(pkcs1pad(message, 128)) + padded_message = bytes(pkcs1pad(message, 128)) n, e = self._RSA_KEY encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n)) authorization = base64.b64encode(encrypted_message).decode() diff --git a/yt_dlp/extractor/anvato.py b/yt_dlp/extractor/anvato.py index ba1d7df372..bd3b19b133 100644 --- a/yt_dlp/extractor/anvato.py +++ b/yt_dlp/extractor/anvato.py @@ -8,10 +8,8 @@ import time from .common import InfoExtractor from ..aes import aes_encrypt from ..utils import ( - bytes_to_intlist, determine_ext, int_or_none, - intlist_to_bytes, join_nonempty, smuggle_url, strip_jsonp, @@ -234,8 +232,8 @@ class AnvatoIE(InfoExtractor): server_time = self._server_time(access_key, video_id) input_data = f'{server_time}~{md5_text(video_data_url)}~{md5_text(server_time)}' - auth_secret = intlist_to_bytes(aes_encrypt( - bytes_to_intlist(input_data[:64]), bytes_to_intlist(self._AUTH_KEY))) + auth_secret = bytes(aes_encrypt( + list(input_data[:64].encode()), list(self._AUTH_KEY))) query = { 'X-Anvato-Adst-Auth': base64.b64encode(auth_secret).decode('ascii'), 'rtyp': 'fp', diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 23f6fc6c46..2aa40a77a7 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -25,7 +25,6 @@ import xml.etree.ElementTree from ..compat import ( compat_etree_fromstring, compat_expanduser, - compat_os_name, urllib_req_to_req, ) from ..cookies import LenientSimpleCookie @@ -1029,7 +1028,7 @@ class InfoExtractor: filename = sanitize_filename(f'{basen}.dump', restricted=True) # Working around MAX_PATH limitation on Windows (see # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx) - if compat_os_name == 'nt': + if os.name == 'nt': absfilepath = os.path.abspath(filename) if len(absfilepath) > 259: filename = fR'\\?\{absfilepath}' diff --git a/yt_dlp/extractor/shemaroome.py b/yt_dlp/extractor/shemaroome.py index 284b2f89c1..3ab322f67d 100644 --- a/yt_dlp/extractor/shemaroome.py +++ b/yt_dlp/extractor/shemaroome.py @@ -1,11 +1,9 @@ import base64 from .common import InfoExtractor -from ..aes import aes_cbc_decrypt, unpad_pkcs7 +from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7 from ..utils import ( ExtractorError, - bytes_to_intlist, - intlist_to_bytes, unified_strdate, ) @@ -68,10 +66,10 @@ class ShemarooMeIE(InfoExtractor): data_json = self._download_json('https://www.shemaroome.com/users/user_all_lists', video_id, data=data.encode()) if not data_json.get('status'): raise ExtractorError('Premium videos cannot be downloaded yet.', expected=True) - url_data = bytes_to_intlist(base64.b64decode(data_json['new_play_url'])) - key = bytes_to_intlist(base64.b64decode(data_json['key'])) - iv = [0] * 16 - m3u8_url = unpad_pkcs7(intlist_to_bytes(aes_cbc_decrypt(url_data, key, iv))).decode('ascii') + url_data = base64.b64decode(data_json['new_play_url']) + key = base64.b64decode(data_json['key']) + iv = bytes(16) + m3u8_url = unpad_pkcs7(aes_cbc_decrypt_bytes(url_data, key, iv)).decode('ascii') headers = {'stream_key': data_json['stream_key']} formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, fatal=False, headers=headers) for fmt in formats: diff --git a/yt_dlp/postprocessor/common.py b/yt_dlp/postprocessor/common.py index eeeece82c2..be2bb33f64 100644 --- a/yt_dlp/postprocessor/common.py +++ b/yt_dlp/postprocessor/common.py @@ -9,7 +9,6 @@ from ..utils import ( RetryManager, _configuration_args, deprecation_warning, - encodeFilename, ) @@ -151,7 +150,7 @@ class PostProcessor(metaclass=PostProcessorMetaClass): def try_utime(self, path, atime, mtime, errnote='Cannot update utime of file'): try: - os.utime(encodeFilename(path), (atime, mtime)) + os.utime(path, (atime, mtime)) except Exception: self.report_warning(errnote) diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py index 16c8bcdda7..d8ba220cab 100644 --- a/yt_dlp/postprocessor/embedthumbnail.py +++ b/yt_dlp/postprocessor/embedthumbnail.py @@ -12,7 +12,6 @@ from ..utils import ( PostProcessingError, check_executable, encodeArgument, - encodeFilename, prepend_extension, shell_quote, ) @@ -68,7 +67,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor): self.to_screen('There are no thumbnails on disk') return [], info thumbnail_filename = info['thumbnails'][idx]['filepath'] - if not os.path.exists(encodeFilename(thumbnail_filename)): + if not os.path.exists(thumbnail_filename): self.report_warning('Skipping embedding the thumbnail because the file is missing.') return [], info @@ -85,7 +84,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor): thumbnail_filename = convertor.convert_thumbnail(thumbnail_filename, 'png') thumbnail_ext = 'png' - mtime = os.stat(encodeFilename(filename)).st_mtime + mtime = os.stat(filename).st_mtime success = True if info['ext'] == 'mp3': @@ -154,12 +153,12 @@ class EmbedThumbnailPP(FFmpegPostProcessor): else: if not prefer_atomicparsley: self.to_screen('mutagen was not found. Falling back to AtomicParsley') - cmd = [encodeFilename(atomicparsley, True), - encodeFilename(filename, True), + cmd = [atomicparsley, + filename, encodeArgument('--artwork'), - encodeFilename(thumbnail_filename, True), + thumbnail_filename, encodeArgument('-o'), - encodeFilename(temp_filename, True)] + temp_filename] cmd += [encodeArgument(o) for o in self._configuration_args('AtomicParsley')] self._report_run('atomicparsley', filename) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 164c46d143..d994754fd3 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -21,7 +21,6 @@ from ..utils import ( determine_ext, dfxp2srt, encodeArgument, - encodeFilename, filter_dict, float_or_none, is_outdated_version, @@ -243,13 +242,13 @@ class FFmpegPostProcessor(PostProcessor): try: if self.probe_available: cmd = [ - encodeFilename(self.probe_executable, True), + self.probe_executable, encodeArgument('-show_streams')] else: cmd = [ - encodeFilename(self.executable, True), + self.executable, encodeArgument('-i')] - cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True)) + cmd.append(self._ffmpeg_filename_argument(path)) self.write_debug(f'{self.basename} command line: {shell_quote(cmd)}') stdout, stderr, returncode = Popen.run( cmd, text=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) @@ -282,7 +281,7 @@ class FFmpegPostProcessor(PostProcessor): self.check_version() cmd = [ - encodeFilename(self.probe_executable, True), + self.probe_executable, encodeArgument('-hide_banner'), encodeArgument('-show_format'), encodeArgument('-show_streams'), @@ -335,9 +334,9 @@ class FFmpegPostProcessor(PostProcessor): self.check_version() oldest_mtime = min( - os.stat(encodeFilename(path)).st_mtime for path, _ in input_path_opts if path) + os.stat(path).st_mtime for path, _ in input_path_opts if path) - cmd = [encodeFilename(self.executable, True), encodeArgument('-y')] + cmd = [self.executable, encodeArgument('-y')] # avconv does not have repeat option if self.basename == 'ffmpeg': cmd += [encodeArgument('-loglevel'), encodeArgument('repeat+info')] @@ -353,7 +352,7 @@ class FFmpegPostProcessor(PostProcessor): args.append('-i') return ( [encodeArgument(arg) for arg in args] - + [encodeFilename(self._ffmpeg_filename_argument(file), True)]) + + [self._ffmpeg_filename_argument(file)]) for arg_type, path_opts in (('i', input_path_opts), ('o', output_path_opts)): cmd += itertools.chain.from_iterable( @@ -522,8 +521,8 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): return [], information orig_path = prepend_extension(path, 'orig') temp_path = prepend_extension(path, 'temp') - if (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)) - and os.path.exists(encodeFilename(orig_path))): + if (self._nopostoverwrites and os.path.exists(new_path) + and os.path.exists(orig_path)): self.to_screen(f'Post-process file {new_path} exists, skipping') return [], information @@ -838,7 +837,7 @@ class FFmpegMergerPP(FFmpegPostProcessor): args.extend(['-map', f'{i}:v:0']) self.to_screen(f'Merging formats into "{filename}"') self.run_ffmpeg_multiple_files(info['__files_to_merge'], temp_filename, args) - os.rename(encodeFilename(temp_filename), encodeFilename(filename)) + os.rename(temp_filename, filename) return info['__files_to_merge'], info def can_merge(self): @@ -1039,7 +1038,7 @@ class FFmpegSplitChaptersPP(FFmpegPostProcessor): def _ffmpeg_args_for_chapter(self, number, chapter, info): destination = self._prepare_filename(number, chapter, info) - if not self._downloader._ensure_dir_exists(encodeFilename(destination)): + if not self._downloader._ensure_dir_exists(destination): return chapter['filepath'] = destination diff --git a/yt_dlp/postprocessor/movefilesafterdownload.py b/yt_dlp/postprocessor/movefilesafterdownload.py index 35e87051b4..964ca1f921 100644 --- a/yt_dlp/postprocessor/movefilesafterdownload.py +++ b/yt_dlp/postprocessor/movefilesafterdownload.py @@ -4,8 +4,6 @@ from .common import PostProcessor from ..compat import shutil from ..utils import ( PostProcessingError, - decodeFilename, - encodeFilename, make_dir, ) @@ -21,25 +19,25 @@ class MoveFilesAfterDownloadPP(PostProcessor): return 'MoveFiles' def run(self, info): - dl_path, dl_name = os.path.split(encodeFilename(info['filepath'])) + dl_path, dl_name = os.path.split(info['filepath']) finaldir = info.get('__finaldir', dl_path) finalpath = os.path.join(finaldir, dl_name) if self._downloaded: - info['__files_to_move'][info['filepath']] = decodeFilename(finalpath) + info['__files_to_move'][info['filepath']] = finalpath - make_newfilename = lambda old: decodeFilename(os.path.join(finaldir, os.path.basename(encodeFilename(old)))) + make_newfilename = lambda old: os.path.join(finaldir, os.path.basename(old)) for oldfile, newfile in info['__files_to_move'].items(): if not newfile: newfile = make_newfilename(oldfile) - if os.path.abspath(encodeFilename(oldfile)) == os.path.abspath(encodeFilename(newfile)): + if os.path.abspath(oldfile) == os.path.abspath(newfile): continue - if not os.path.exists(encodeFilename(oldfile)): + if not os.path.exists(oldfile): self.report_warning(f'File "{oldfile}" cannot be found') continue - if os.path.exists(encodeFilename(newfile)): + if os.path.exists(newfile): if self.get_param('overwrites', True): self.report_warning(f'Replacing existing file "{newfile}"') - os.remove(encodeFilename(newfile)) + os.remove(newfile) else: self.report_warning( f'Cannot move file "{oldfile}" out of temporary directory since "{newfile}" already exists. ') diff --git a/yt_dlp/postprocessor/sponskrub.py b/yt_dlp/postprocessor/sponskrub.py index 525b6392a4..ac6db1bc7b 100644 --- a/yt_dlp/postprocessor/sponskrub.py +++ b/yt_dlp/postprocessor/sponskrub.py @@ -9,7 +9,6 @@ from ..utils import ( check_executable, cli_option, encodeArgument, - encodeFilename, prepend_extension, shell_quote, str_or_none, @@ -52,7 +51,7 @@ class SponSkrubPP(PostProcessor): return [], information filename = information['filepath'] - if not os.path.exists(encodeFilename(filename)): # no download + if not os.path.exists(filename): # no download return [], information if information['extractor_key'].lower() != 'youtube': @@ -71,8 +70,8 @@ class SponSkrubPP(PostProcessor): self.report_warning('If sponskrub is run multiple times, unintended parts of the video could be cut out.') temp_filename = prepend_extension(filename, self._temp_ext) - if os.path.exists(encodeFilename(temp_filename)): - os.remove(encodeFilename(temp_filename)) + if os.path.exists(temp_filename): + os.remove(temp_filename) cmd = [self.path] if not self.cutout: diff --git a/yt_dlp/postprocessor/xattrpp.py b/yt_dlp/postprocessor/xattrpp.py index 166aabaf92..e486b797b7 100644 --- a/yt_dlp/postprocessor/xattrpp.py +++ b/yt_dlp/postprocessor/xattrpp.py @@ -1,7 +1,6 @@ import os from .common import PostProcessor -from ..compat import compat_os_name from ..utils import ( PostProcessingError, XAttrMetadataError, @@ -57,7 +56,7 @@ class XAttrMetadataPP(PostProcessor): elif e.reason == 'VALUE_TOO_LONG': self.report_warning(f'Unable to write extended attribute "{xattrname}" due to too long values.') else: - tip = ('You need to use NTFS' if compat_os_name == 'nt' + tip = ('You need to use NTFS' if os.name == 'nt' else 'You may have to enable them in your "/etc/fstab"') raise PostProcessingError(f'This filesystem doesn\'t support extended attributes. {tip}') diff --git a/yt_dlp/update.py b/yt_dlp/update.py index 90df2509f0..ca2ec5f376 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -13,7 +13,6 @@ import sys from dataclasses import dataclass from zipimport import zipimporter -from .compat import compat_realpath from .networking import Request from .networking.exceptions import HTTPError, network_exceptions from .utils import ( @@ -201,8 +200,6 @@ class UpdateInfo: binary_name: str | None = _get_binary_name() # noqa: RUF009: Always returns the same value checksum: str | None = None - _has_update = True - class Updater: # XXX: use class variables to simplify testing @@ -523,7 +520,7 @@ class Updater: @functools.cached_property def filename(self): """Filename of the executable""" - return compat_realpath(_get_variant_and_executable_path()[1]) + return os.path.realpath(_get_variant_and_executable_path()[1]) @functools.cached_property def cmd(self): @@ -562,62 +559,14 @@ class Updater: f'Unable to {action}{delim} visit ' f'https://github.com/{self.requested_repo}/releases/{path}', True) - # XXX: Everything below this line in this class is deprecated / for compat only - @property - def _target_tag(self): - """Deprecated; requested tag with 'tags/' prepended when necessary for API calls""" - return f'tags/{self.requested_tag}' if self.requested_tag != 'latest' else self.requested_tag - - def _check_update(self): - """Deprecated; report whether there is an update available""" - return bool(self.query_update(_output=True)) - - def __getattr__(self, attribute: str): - """Compat getter function for deprecated attributes""" - deprecated_props_map = { - 'check_update': '_check_update', - 'target_tag': '_target_tag', - 'target_channel': 'requested_channel', - } - update_info_props_map = { - 'has_update': '_has_update', - 'new_version': 'version', - 'latest_version': 'requested_version', - 'release_name': 'binary_name', - 'release_hash': 'checksum', - } - - if attribute not in deprecated_props_map and attribute not in update_info_props_map: - raise AttributeError(f'{type(self).__name__!r} object has no attribute {attribute!r}') - - msg = f'{type(self).__name__}.{attribute} is deprecated and will be removed in a future version' - if attribute in deprecated_props_map: - source_name = deprecated_props_map[attribute] - if not source_name.startswith('_'): - msg += f'. Please use {source_name!r} instead' - source = self - mapping = deprecated_props_map - - else: # attribute in update_info_props_map - msg += '. Please call query_update() instead' - source = self.query_update() - if source is None: - source = UpdateInfo('', None, None, None) - source._has_update = False - mapping = update_info_props_map - - deprecation_warning(msg) - for target_name, source_name in mapping.items(): - value = getattr(source, source_name) - setattr(self, target_name, value) - - return getattr(self, attribute) - def run_update(ydl): """Update the program file with the latest version from the repository @returns Whether there was a successful update (No update = False) """ + deprecation_warning( + '"yt_dlp.update.run_update(ydl)" is deprecated and may be removed in a future version. ' + 'Use "yt_dlp.update.Updater(ydl).update()" instead') return Updater(ydl).update() diff --git a/yt_dlp/utils/_deprecated.py b/yt_dlp/utils/_deprecated.py index a8ae8ecb5d..e4762699b7 100644 --- a/yt_dlp/utils/_deprecated.py +++ b/yt_dlp/utils/_deprecated.py @@ -9,31 +9,23 @@ passthrough_module(__name__, '.._legacy', callback=lambda attr: warnings.warn( del passthrough_module -from ._utils import preferredencoding +import re +import struct -def encodeFilename(s, for_subprocess=False): - assert isinstance(s, str) - return s +def bytes_to_intlist(bs): + if not bs: + return [] + if isinstance(bs[0], int): # Python 3 + return list(bs) + else: + return [ord(c) for c in bs] -def decodeFilename(b, for_subprocess=False): - return b +def intlist_to_bytes(xs): + if not xs: + return b'' + return struct.pack('%dB' % len(xs), *xs) -def decodeArgument(b): - return b - - -def decodeOption(optval): - if optval is None: - return optval - if isinstance(optval, bytes): - optval = optval.decode(preferredencoding()) - - assert isinstance(optval, str) - return optval - - -def error_to_compat_str(err): - return str(err) +compiled_regex_type = type(re.compile('')) diff --git a/yt_dlp/utils/_legacy.py b/yt_dlp/utils/_legacy.py index 356e580226..d65b135d9d 100644 --- a/yt_dlp/utils/_legacy.py +++ b/yt_dlp/utils/_legacy.py @@ -313,3 +313,30 @@ def make_HTTPS_handler(params, **kwargs): def process_communicate_or_kill(p, *args, **kwargs): return Popen.communicate_or_kill(p, *args, **kwargs) + + +def encodeFilename(s, for_subprocess=False): + assert isinstance(s, str) + return s + + +def decodeFilename(b, for_subprocess=False): + return b + + +def decodeArgument(b): + return b + + +def decodeOption(optval): + if optval is None: + return optval + if isinstance(optval, bytes): + optval = optval.decode(preferredencoding()) + + assert isinstance(optval, str) + return optval + + +def error_to_compat_str(err): + return str(err) diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 89c53c39e7..8517b762ef 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -49,15 +49,11 @@ from ..compat import ( compat_etree_fromstring, compat_expanduser, compat_HTMLParseError, - compat_os_name, ) from ..dependencies import xattr __name__ = __name__.rsplit('.', 1)[0] # noqa: A001: Pretend to be the parent module -# This is not clearly defined otherwise -compiled_regex_type = type(re.compile('')) - class NO_DEFAULT: pass @@ -874,7 +870,7 @@ class Popen(subprocess.Popen): kwargs.setdefault('encoding', 'utf-8') kwargs.setdefault('errors', 'replace') - if shell and compat_os_name == 'nt' and kwargs.get('executable') is None: + if shell and os.name == 'nt' and kwargs.get('executable') is None: if not isinstance(args, str): args = shell_quote(args, shell=True) shell = False @@ -1457,7 +1453,7 @@ def system_identifier(): @functools.cache def get_windows_version(): """ Get Windows version. returns () if it's not running on Windows """ - if compat_os_name == 'nt': + if os.name == 'nt': return version_tuple(platform.win32_ver()[1]) else: return () @@ -1470,7 +1466,7 @@ def write_string(s, out=None, encoding=None): if not out: return - if compat_os_name == 'nt' and supports_terminal_sequences(out): + if os.name == 'nt' and supports_terminal_sequences(out): s = re.sub(r'([\r\n]+)', r' \1', s) enc, buffer = None, out @@ -1503,21 +1499,6 @@ def deprecation_warning(msg, *, printer=None, stacklevel=0, **kwargs): deprecation_warning._cache = set() -def bytes_to_intlist(bs): - if not bs: - return [] - if isinstance(bs[0], int): # Python 3 - return list(bs) - else: - return [ord(c) for c in bs] - - -def intlist_to_bytes(xs): - if not xs: - return b'' - return struct.pack('%dB' % len(xs), *xs) - - class LockingUnsupportedError(OSError): msg = 'File locking is not supported' @@ -1701,7 +1682,7 @@ _CMD_QUOTE_TRANS = str.maketrans({ def shell_quote(args, *, shell=False): args = list(variadic(args)) - if compat_os_name != 'nt': + if os.name != 'nt': return shlex.join(args) trans = _CMD_QUOTE_TRANS if shell else _WINDOWS_QUOTE_TRANS @@ -4516,7 +4497,7 @@ def urshift(val, n): def write_xattr(path, key, value): # Windows: Write xattrs to NTFS Alternate Data Streams: # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29 - if compat_os_name == 'nt': + if os.name == 'nt': assert ':' not in key assert os.path.exists(path) @@ -4778,12 +4759,12 @@ def jwt_decode_hs256(jwt): return json.loads(base64.urlsafe_b64decode(f'{payload_b64}===')) -WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None +WINDOWS_VT_MODE = False if os.name == 'nt' else None @functools.cache def supports_terminal_sequences(stream): - if compat_os_name == 'nt': + if os.name == 'nt': if not WINDOWS_VT_MODE: return False elif not os.getenv('TERM'): @@ -4877,7 +4858,7 @@ def parse_http_range(range): def read_stdin(what): if what: - eof = 'Ctrl+Z' if compat_os_name == 'nt' else 'Ctrl+D' + eof = 'Ctrl+Z' if os.name == 'nt' else 'Ctrl+D' write_string(f'Reading {what} from STDIN - EOF ({eof}) to end:\n') return sys.stdin From 637d62a3a9fc723d68632c1af25c30acdadeeb85 Mon Sep 17 00:00:00 2001 From: sepro Date: Sun, 17 Nov 2024 00:31:04 +0100 Subject: [PATCH 84/98] [ie/youtube] Player client maintenance (#11528) Authored by: bashonly, seproDev Co-authored-by: bashonly --- README.md | 2 +- yt_dlp/extractor/youtube.py | 73 ++++++++++++++++++------------------- 2 files changed, 36 insertions(+), 39 deletions(-) diff --git a/README.md b/README.md index 2df72b7498..09096218e8 100644 --- a/README.md +++ b/README.md @@ -1768,7 +1768,7 @@ The following extractors use this feature: #### youtube * `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes * `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively -* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music` and `_creator` (e.g. `ios_creator`); and `mweb`, `mediaconnect`, `android_testsuite`, `android_vr`, `web_safari`, `web_embedded`, `tv` and `tv_embedded` with no variants. By default, `ios,mweb` is used, and `web_creator,mediaconnect` is added as needed for age-gated videos when account age verification is required. Similarly, the `_music` variants are added for `music.youtube.com` URLs. Some clients, such as `web` and `android`, require a `po_token` for their formats to be downloadable. Some clients, such as the `_creator` variants, will only work with authentication. You can use `all` to use all the clients, and `default` for the default clients. You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=all,-web` +* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music` and `_creator` (e.g. `ios_creator`); and `mweb`, `mediaconnect`, `android_vr`, `web_safari`, `web_embedded`, `tv` and `tv_embedded` with no variants. By default, `ios,mweb` is used, and `web_creator` is added as needed for age-gated videos when account age verification is required. Similarly, the `_music` variants are added for `music.youtube.com` URLs. Some clients, such as `web` and `android`, require a `po_token` for their formats to be downloadable. Some clients, such as the `_creator` variants, will only work with authentication. You can use `all` to use all the clients, and `default` for the default clients. You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=all,-web` * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details * `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp. * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index caa99182ae..2c57ee6005 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -124,14 +124,15 @@ INNERTUBE_CLIENTS = { }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 62, + 'REQUIRE_AUTH': True, }, 'android': { 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'ANDROID', - 'clientVersion': '19.29.37', + 'clientVersion': '19.44.38', 'androidSdkVersion': 30, - 'userAgent': 'com.google.android.youtube/19.29.37 (Linux; U; Android 11) gzip', + 'userAgent': 'com.google.android.youtube/19.44.38 (Linux; U; Android 11) gzip', 'osName': 'Android', 'osVersion': '11', }, @@ -140,13 +141,14 @@ INNERTUBE_CLIENTS = { 'REQUIRE_JS_PLAYER': False, 'REQUIRE_PO_TOKEN': True, }, + # This client now requires sign-in for every video 'android_music': { 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'ANDROID_MUSIC', - 'clientVersion': '7.11.50', + 'clientVersion': '7.27.52', 'androidSdkVersion': 30, - 'userAgent': 'com.google.android.apps.youtube.music/7.11.50 (Linux; U; Android 11) gzip', + 'userAgent': 'com.google.android.apps.youtube.music/7.27.52 (Linux; U; Android 11) gzip', 'osName': 'Android', 'osVersion': '11', }, @@ -154,15 +156,16 @@ INNERTUBE_CLIENTS = { 'INNERTUBE_CONTEXT_CLIENT_NAME': 21, 'REQUIRE_JS_PLAYER': False, 'REQUIRE_PO_TOKEN': True, + 'REQUIRE_AUTH': True, }, # This client now requires sign-in for every video 'android_creator': { 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'ANDROID_CREATOR', - 'clientVersion': '24.30.100', + 'clientVersion': '24.45.100', 'androidSdkVersion': 30, - 'userAgent': 'com.google.android.apps.youtube.creator/24.30.100 (Linux; U; Android 11) gzip', + 'userAgent': 'com.google.android.apps.youtube.creator/24.45.100 (Linux; U; Android 11) gzip', 'osName': 'Android', 'osVersion': '11', }, @@ -170,17 +173,18 @@ INNERTUBE_CLIENTS = { 'INNERTUBE_CONTEXT_CLIENT_NAME': 14, 'REQUIRE_JS_PLAYER': False, 'REQUIRE_PO_TOKEN': True, + 'REQUIRE_AUTH': True, }, # YouTube Kids videos aren't returned on this client for some reason 'android_vr': { 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'ANDROID_VR', - 'clientVersion': '1.57.29', + 'clientVersion': '1.60.19', 'deviceMake': 'Oculus', 'deviceModel': 'Quest 3', 'androidSdkVersion': 32, - 'userAgent': 'com.google.android.apps.youtube.vr.oculus/1.57.29 (Linux; U; Android 12L; eureka-user Build/SQ3A.220605.009.A1) gzip', + 'userAgent': 'com.google.android.apps.youtube.vr.oculus/1.60.19 (Linux; U; Android 12L; eureka-user Build/SQ3A.220605.009.A1) gzip', 'osName': 'Android', 'osVersion': '12L', }, @@ -188,68 +192,56 @@ INNERTUBE_CLIENTS = { 'INNERTUBE_CONTEXT_CLIENT_NAME': 28, 'REQUIRE_JS_PLAYER': False, }, - 'android_testsuite': { - 'INNERTUBE_CONTEXT': { - 'client': { - 'clientName': 'ANDROID_TESTSUITE', - 'clientVersion': '1.9', - 'androidSdkVersion': 30, - 'userAgent': 'com.google.android.youtube/1.9 (Linux; U; Android 11) gzip', - 'osName': 'Android', - 'osVersion': '11', - }, - }, - 'INNERTUBE_CONTEXT_CLIENT_NAME': 30, - 'REQUIRE_JS_PLAYER': False, - 'PLAYER_PARAMS': '2AMB', - }, # iOS clients have HLS live streams. Setting device model to get 60fps formats. # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558 'ios': { 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'IOS', - 'clientVersion': '19.29.1', + 'clientVersion': '19.45.4', 'deviceMake': 'Apple', 'deviceModel': 'iPhone16,2', - 'userAgent': 'com.google.ios.youtube/19.29.1 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X;)', + 'userAgent': 'com.google.ios.youtube/19.45.4 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)', 'osName': 'iPhone', - 'osVersion': '17.5.1.21F90', + 'osVersion': '18.1.0.22B83', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 5, 'REQUIRE_JS_PLAYER': False, }, + # This client now requires sign-in for every video 'ios_music': { 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'IOS_MUSIC', - 'clientVersion': '7.08.2', + 'clientVersion': '7.27.0', 'deviceMake': 'Apple', 'deviceModel': 'iPhone16,2', - 'userAgent': 'com.google.ios.youtubemusic/7.08.2 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X;)', + 'userAgent': 'com.google.ios.youtubemusic/7.27.0 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)', 'osName': 'iPhone', - 'osVersion': '17.5.1.21F90', + 'osVersion': '18.1.0.22B83', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 26, 'REQUIRE_JS_PLAYER': False, + 'REQUIRE_AUTH': True, }, # This client now requires sign-in for every video 'ios_creator': { 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'IOS_CREATOR', - 'clientVersion': '24.30.100', + 'clientVersion': '24.45.100', 'deviceMake': 'Apple', 'deviceModel': 'iPhone16,2', - 'userAgent': 'com.google.ios.ytcreator/24.30.100 (iPhone16,2; U; CPU iOS 17_5_1 like Mac OS X;)', + 'userAgent': 'com.google.ios.ytcreator/24.45.100 (iPhone16,2; U; CPU iOS 18_1_0 like Mac OS X;)', 'osName': 'iPhone', - 'osVersion': '17.5.1.21F90', + 'osVersion': '18.1.0.22B83', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 15, 'REQUIRE_JS_PLAYER': False, + 'REQUIRE_AUTH': True, }, # mweb has 'ultralow' formats # See: https://github.com/yt-dlp/yt-dlp/pull/557 @@ -282,8 +274,10 @@ INNERTUBE_CLIENTS = { }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 85, + 'REQUIRE_AUTH': True, }, - # This client has pre-merged video+audio 720p/1080p streams + # This client now requires sign-in for every video + # It may be able to receive pre-merged video+audio 720p/1080p streams 'mediaconnect': { 'INNERTUBE_CONTEXT': { 'client': { @@ -293,6 +287,7 @@ INNERTUBE_CLIENTS = { }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 95, 'REQUIRE_JS_PLAYER': False, + 'REQUIRE_AUTH': True, }, } @@ -321,6 +316,7 @@ def build_innertube_clients(): ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com') ytcfg.setdefault('REQUIRE_JS_PLAYER', True) ytcfg.setdefault('REQUIRE_PO_TOKEN', False) + ytcfg.setdefault('REQUIRE_AUTH', False) ytcfg.setdefault('PLAYER_PARAMS', None) ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en') @@ -4059,9 +4055,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if smuggled_data.get('is_music_url') or self.is_music_url(url): for requested_client in requested_clients: _, base_client, variant = _split_innertube_client(requested_client) - music_client = f'{base_client}_music' + music_client = f'{base_client}_music' if base_client != 'mweb' else 'web_music' if variant != 'music' and music_client in INNERTUBE_CLIENTS: - requested_clients.append(music_client) + if not INNERTUBE_CLIENTS[music_client]['REQUIRE_AUTH'] or self.is_authenticated: + requested_clients.append(music_client) return orderedSet(requested_clients) @@ -4174,10 +4171,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): self.to_screen( f'{video_id}: This video is age-restricted and YouTube is requiring ' 'account age-verification; some formats may be missing', only_once=True) - # web_creator and mediaconnect can work around the age-verification requirement - # _testsuite & _vr variants can also work around age-verification + # web_creator can work around the age-verification requirement + # android_vr and mediaconnect may also be able to work around age-verification # tv_embedded may(?) still work around age-verification if the video is embeddable - append_client('web_creator', 'mediaconnect') + append_client('web_creator') prs.extend(deprioritized_prs) From 52c0ffe40ad6e8404d93296f575007b05b04c686 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 16 Nov 2024 23:40:21 +0000 Subject: [PATCH 85/98] [ie/youtube] Remove broken OAuth support (#11558) Closes #11462 Authored by: bashonly --- yt_dlp/extractor/youtube.py | 240 +++--------------------------------- 1 file changed, 18 insertions(+), 222 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 2c57ee6005..a3b237bc8d 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -22,7 +22,7 @@ import urllib.parse from .common import InfoExtractor, SearchInfoExtractor from .openload import PhantomJSwrapper from ..jsinterp import JSInterpreter -from ..networking.exceptions import HTTPError, TransportError, network_exceptions +from ..networking.exceptions import HTTPError, network_exceptions from ..utils import ( NO_DEFAULT, ExtractorError, @@ -50,12 +50,12 @@ from ..utils import ( parse_iso8601, parse_qs, qualities, + remove_end, remove_start, smuggle_url, str_or_none, str_to_int, strftime_or_none, - time_seconds, traverse_obj, try_call, try_get, @@ -573,208 +573,18 @@ class YoutubeBaseInfoExtractor(InfoExtractor): self._check_login_required() def _perform_login(self, username, password): - auth_type, _, user = (username or '').partition('+') - - if auth_type != 'oauth': - raise ExtractorError(self._youtube_login_hint, expected=True) - - self._initialize_oauth(user, password) - - ''' - OAuth 2.0 Device Authorization Grant flow, used by the YouTube TV client (youtube.com/tv). - - For more information regarding OAuth 2.0 and the Device Authorization Grant flow in general, see: - - https://developers.google.com/identity/protocols/oauth2/limited-input-device - - https://accounts.google.com/.well-known/openid-configuration - - https://www.rfc-editor.org/rfc/rfc8628 - - https://www.rfc-editor.org/rfc/rfc6749 - - Note: The official client appears to use a proxied version of the oauth2 endpoints on youtube.com/o/oauth2, - which applies some modifications to the response (such as returning errors as 200 OK). - Since the client works with the standard API, we will use that as it is well-documented. - ''' - - _OAUTH_PROFILE = None - _OAUTH_ACCESS_TOKEN_CACHE = {} - _OAUTH_DISPLAY_ID = 'oauth' - - # YouTube TV (TVHTML5) client. You can find these at youtube.com/tv - _OAUTH_CLIENT_ID = '861556708454-d6dlm3lh05idd8npek18k6be8ba3oc68.apps.googleusercontent.com' - _OAUTH_CLIENT_SECRET = 'SboVhoG9s0rNafixCSGGKXAT' - _OAUTH_SCOPE = 'http://gdata.youtube.com https://www.googleapis.com/auth/youtube-paid-content' - - # From https://accounts.google.com/.well-known/openid-configuration - # Technically, these should be fetched dynamically and not hard-coded. - # However, as these endpoints rarely change, we can risk saving an extra request for every invocation. - _OAUTH_DEVICE_AUTHORIZATION_ENDPOINT = 'https://oauth2.googleapis.com/device/code' - _OAUTH_TOKEN_ENDPOINT = 'https://oauth2.googleapis.com/token' - - @property - def _oauth_cache_key(self): - return f'oauth_refresh_token_{self._OAUTH_PROFILE}' - - def _read_oauth_error_response(self, response): - return traverse_obj( - self._webpage_read_content(response, self._OAUTH_TOKEN_ENDPOINT, self._OAUTH_DISPLAY_ID, fatal=False), - ({json.loads}, 'error', {str})) - - def _set_oauth_info(self, token_response): - YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE.setdefault(self._OAUTH_PROFILE, {}).update({ - 'access_token': token_response['access_token'], - 'token_type': token_response['token_type'], - 'expiry': time_seconds( - seconds=traverse_obj(token_response, ('expires_in', {float_or_none}), default=300) - 10), - }) - refresh_token = traverse_obj(token_response, ('refresh_token', {str})) - if refresh_token: - self.cache.store(self._NETRC_MACHINE, self._oauth_cache_key, refresh_token) - YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE[self._OAUTH_PROFILE]['refresh_token'] = refresh_token - - def _initialize_oauth(self, user, refresh_token): - self._OAUTH_PROFILE = user or 'default' - - if self._OAUTH_PROFILE in YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE: - self.write_debug(f'{self._OAUTH_DISPLAY_ID}: Using cached access token for profile "{self._OAUTH_PROFILE}"') - return - - YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE[self._OAUTH_PROFILE] = {} - - if refresh_token: - msg = f'{self._OAUTH_DISPLAY_ID}: Using password input as refresh token' - if self.get_param('cachedir') is not False: - msg += ' and caching token to disk; you should supply an empty password next time' - self.to_screen(msg) - self.cache.store(self._NETRC_MACHINE, self._oauth_cache_key, refresh_token) - else: - refresh_token = self.cache.load(self._NETRC_MACHINE, self._oauth_cache_key) - - if refresh_token: - YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE[self._OAUTH_PROFILE]['refresh_token'] = refresh_token - try: - token_response = self._refresh_token(refresh_token) - except ExtractorError as e: - error_msg = str(e.orig_msg).replace('Failed to refresh access token: ', '') - self.report_warning(f'{self._OAUTH_DISPLAY_ID}: Failed to refresh access token: {error_msg}') - token_response = self._oauth_authorize - else: - token_response = self._oauth_authorize - - self._set_oauth_info(token_response) - self.write_debug(f'{self._OAUTH_DISPLAY_ID}: Logged in using profile "{self._OAUTH_PROFILE}"') - - def _refresh_token(self, refresh_token): - try: - token_response = self._download_json( - self._OAUTH_TOKEN_ENDPOINT, - video_id=self._OAUTH_DISPLAY_ID, - note='Refreshing access token', - data=json.dumps({ - 'client_id': self._OAUTH_CLIENT_ID, - 'client_secret': self._OAUTH_CLIENT_SECRET, - 'refresh_token': refresh_token, - 'grant_type': 'refresh_token', - }).encode(), - headers={'Content-Type': 'application/json'}) - except ExtractorError as e: - if isinstance(e.cause, HTTPError): - error = self._read_oauth_error_response(e.cause.response) - if error == 'invalid_grant': - # RFC6749 § 5.2 - raise ExtractorError( - 'Failed to refresh access token: Refresh token is invalid, revoked, or expired (invalid_grant)', - expected=True, video_id=self._OAUTH_DISPLAY_ID) - raise ExtractorError( - f'Failed to refresh access token: Authorization server returned error {error}', - video_id=self._OAUTH_DISPLAY_ID) - raise - return token_response - - @property - def _oauth_authorize(self): - code_response = self._download_json( - self._OAUTH_DEVICE_AUTHORIZATION_ENDPOINT, - video_id=self._OAUTH_DISPLAY_ID, - note='Initializing authorization flow', - data=json.dumps({ - 'client_id': self._OAUTH_CLIENT_ID, - 'scope': self._OAUTH_SCOPE, - }).encode(), - headers={'Content-Type': 'application/json'}) - - verification_url = traverse_obj(code_response, ('verification_url', {str})) - user_code = traverse_obj(code_response, ('user_code', {str})) - if not verification_url or not user_code: + if username.startswith('oauth'): raise ExtractorError( - 'Authorization server did not provide verification_url or user_code', video_id=self._OAUTH_DISPLAY_ID) + f'Login with OAuth is no longer supported. {self._youtube_login_hint}', expected=True) - # note: The whitespace is intentional - self.to_screen( - f'{self._OAUTH_DISPLAY_ID}: To give yt-dlp access to your account, ' - f'go to {verification_url} and enter code {user_code}') - - # RFC8628 § 3.5: default poll interval is 5 seconds if not provided - poll_interval = traverse_obj(code_response, ('interval', {int}), default=5) - - for retry in self.RetryManager(): - while True: - try: - token_response = self._download_json( - self._OAUTH_TOKEN_ENDPOINT, - video_id=self._OAUTH_DISPLAY_ID, - note=False, - errnote='Failed to request access token', - data=json.dumps({ - 'client_id': self._OAUTH_CLIENT_ID, - 'client_secret': self._OAUTH_CLIENT_SECRET, - 'device_code': code_response['device_code'], - 'grant_type': 'urn:ietf:params:oauth:grant-type:device_code', - }).encode(), - headers={'Content-Type': 'application/json'}) - except ExtractorError as e: - if isinstance(e.cause, TransportError): - retry.error = e - break - elif isinstance(e.cause, HTTPError): - error = self._read_oauth_error_response(e.cause.response) - if not error: - retry.error = e - break - - if error == 'authorization_pending': - time.sleep(poll_interval) - continue - elif error == 'expired_token': - raise ExtractorError( - 'Authorization timed out', expected=True, video_id=self._OAUTH_DISPLAY_ID) - elif error == 'access_denied': - raise ExtractorError( - 'You denied access to an account', expected=True, video_id=self._OAUTH_DISPLAY_ID) - elif error == 'slow_down': - # RFC8628 § 3.5: add 5 seconds to the poll interval - poll_interval += 5 - time.sleep(poll_interval) - continue - else: - raise ExtractorError( - f'Authorization server returned an error when fetching access token: {error}', - video_id=self._OAUTH_DISPLAY_ID) - raise - - return token_response - - def _update_oauth(self): - token = YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE.get(self._OAUTH_PROFILE) - if token is None or token['expiry'] > time.time(): - return - - self._set_oauth_info(self._refresh_token(token['refresh_token'])) + self.report_warning( + f'Login with password is not supported for YouTube. {self._youtube_login_hint}') @property def _youtube_login_hint(self): - return ('Use --username=oauth[+PROFILE] --password="" to log in using oauth, ' - f'or else u{self._login_hint(method="cookies")[1:]}. ' - 'See https://github.com/yt-dlp/yt-dlp/wiki/Extractors#logging-in-with-oauth for more on how to use oauth. ' - 'See https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies for help with cookies') + return (f'{self._login_hint(method="cookies")}. Also see ' + 'https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies ' + 'for tips on effectively exporting YouTube cookies') def _check_login_required(self): if self._LOGIN_REQUIRED and not self.is_authenticated: @@ -924,7 +734,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): @functools.cached_property def is_authenticated(self): - return self._OAUTH_PROFILE or bool(self._generate_sapisidhash_header()) + return bool(self._generate_sapisidhash_header()) def extract_ytcfg(self, video_id, webpage): if not webpage: @@ -934,16 +744,6 @@ class YoutubeBaseInfoExtractor(InfoExtractor): r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg', default='{}'), video_id, fatal=False) or {} - def _generate_oauth_headers(self): - self._update_oauth() - oauth_token = YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE.get(self._OAUTH_PROFILE) - if not oauth_token: - return {} - - return { - 'Authorization': f'{oauth_token["token_type"]} {oauth_token["access_token"]}', - } - def _generate_cookie_auth_headers(self, *, ytcfg=None, account_syncid=None, session_index=None, origin=None, **kwargs): headers = {} account_syncid = account_syncid or self._extract_account_syncid(ytcfg) @@ -973,14 +773,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'Origin': origin, 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg), 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client), - **self._generate_oauth_headers(), **self._generate_cookie_auth_headers(ytcfg=ytcfg, account_syncid=account_syncid, session_index=session_index, origin=origin), } return filter_dict(headers) - def _generate_webpage_headers(self): - return self._generate_oauth_headers() - def _download_ytcfg(self, client, video_id): url = { 'web': 'https://www.youtube.com', @@ -990,8 +786,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): if not url: return {} webpage = self._download_webpage( - url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config', - headers=self._generate_webpage_headers()) + url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config') return self.extract_ytcfg(video_id, webpage) or {} @staticmethod @@ -3256,8 +3051,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): code = self._download_webpage( player_url, video_id, fatal=fatal, note='Downloading player ' + player_id, - errnote=f'Download of {player_url} failed', - headers=self._generate_webpage_headers()) + errnote=f'Download of {player_url} failed') if code: self._code_cache[player_id] = code return self._code_cache.get(player_id) @@ -3540,8 +3334,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): self._download_webpage( url, video_id, f'Marking {label}watched', - 'Unable to mark watched', fatal=False, - headers=self._generate_webpage_headers()) + 'Unable to mark watched', fatal=False) @classmethod def _extract_from_webpage(cls, url, webpage): @@ -4523,7 +4316,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if pp: query['pp'] = pp webpage = self._download_webpage( - webpage_url, video_id, fatal=False, query=query, headers=self._generate_webpage_headers()) + webpage_url, video_id, fatal=False, query=query) master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg() @@ -4666,6 +4459,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): self.raise_geo_restricted(subreason, countries, metadata_available=True) reason += f'. {subreason}' if reason: + if 'sign in' in reason.lower(): + reason = remove_end(reason, 'This helps protect our community. Learn more') + reason = f'{remove_end(reason.strip(), ".")}. {self._youtube_login_hint}' self.raise_no_formats(reason, expected=True) keywords = get_first(video_details, 'keywords', expected_type=list) or [] @@ -5811,7 +5607,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): webpage, data = None, None for retry in self.RetryManager(fatal=fatal): try: - webpage = self._download_webpage(url, item_id, note='Downloading webpage', headers=self._generate_webpage_headers()) + webpage = self._download_webpage(url, item_id, note='Downloading webpage') data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {} except ExtractorError as e: if isinstance(e.cause, network_exceptions): From 7cecd299e4a5ef1f0f044b2fedc26f17e41f15e3 Mon Sep 17 00:00:00 2001 From: sepro Date: Sun, 17 Nov 2024 13:32:12 +0100 Subject: [PATCH 86/98] [ie/chaturbate] Don't break embed detection (#11565) Bugfix for 720b3dc453c342bc2e8df7dbc0acaab4479de46c Authored by: seproDev --- yt_dlp/extractor/chaturbate.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/chaturbate.py b/yt_dlp/extractor/chaturbate.py index aa70f26a1b..a40b7d39c7 100644 --- a/yt_dlp/extractor/chaturbate.py +++ b/yt_dlp/extractor/chaturbate.py @@ -79,7 +79,7 @@ class ChaturbateIE(InfoExtractor): 'formats': self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True), } - def _extract_from_webpage(self, video_id, tld): + def _extract_from_html(self, video_id, tld): webpage = self._download_webpage( f'https://chaturbate.{tld}/{video_id}/', video_id, headers=self.geo_verification_headers(), impersonate=True) @@ -151,4 +151,4 @@ class ChaturbateIE(InfoExtractor): def _real_extract(self, url): video_id, tld = self._match_valid_url(url).group('id', 'tld') - return self._extract_from_api(video_id, tld) or self._extract_from_webpage(video_id, tld) + return self._extract_from_api(video_id, tld) or self._extract_from_html(video_id, tld) From eb15fd5a32d8b35ef515f7a3d1158c03025648ff Mon Sep 17 00:00:00 2001 From: krichbanana <77071421+krichbanana@users.noreply.github.com> Date: Sun, 17 Nov 2024 09:12:26 -0500 Subject: [PATCH 87/98] [ie/kenh14] Add extractor (#3996) Closes #3937 Authored by: krichbanana, pzhlkj6612 Co-authored-by: Mozi <29089388+pzhlkj6612@users.noreply.github.com> --- yt_dlp/extractor/_extractors.py | 4 + yt_dlp/extractor/kenh14.py | 160 ++++++++++++++++++++++++++++++++ 2 files changed, 164 insertions(+) create mode 100644 yt_dlp/extractor/kenh14.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 25a233a2d6..af903f307b 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -946,6 +946,10 @@ from .kaltura import KalturaIE from .kankanews import KankaNewsIE from .karaoketv import KaraoketvIE from .kelbyone import KelbyOneIE +from .kenh14 import ( + Kenh14PlaylistIE, + Kenh14VideoIE, +) from .khanacademy import ( KhanAcademyIE, KhanAcademyUnitIE, diff --git a/yt_dlp/extractor/kenh14.py b/yt_dlp/extractor/kenh14.py new file mode 100644 index 0000000000..3c46020e8b --- /dev/null +++ b/yt_dlp/extractor/kenh14.py @@ -0,0 +1,160 @@ +from .common import InfoExtractor +from ..utils import ( + clean_html, + extract_attributes, + get_element_by_class, + get_element_html_by_attribute, + get_elements_html_by_class, + int_or_none, + parse_duration, + parse_iso8601, + remove_start, + strip_or_none, + unescapeHTML, + update_url, + url_or_none, +) +from ..utils.traversal import traverse_obj + + +class Kenh14VideoIE(InfoExtractor): + _VALID_URL = r'https?://video\.kenh14\.vn/(?:video/)?[\w-]+-(?P[0-9]+)\.chn' + _TESTS = [{ + 'url': 'https://video.kenh14.vn/video/mo-hop-iphone-14-pro-max-nguon-unbox-therapy-316173.chn', + 'md5': '1ed67f9c3a1e74acf15db69590cf6210', + 'info_dict': { + 'id': '316173', + 'ext': 'mp4', + 'title': 'Video mở hộp iPhone 14 Pro Max (Nguồn: Unbox Therapy)', + 'description': 'Video mở hộp iPhone 14 Pro MaxVideo mở hộp iPhone 14 Pro Max (Nguồn: Unbox Therapy)', + 'thumbnail': r're:^https?://videothumbs\.mediacdn\.vn/.*\.jpg$', + 'tags': [], + 'uploader': 'Unbox Therapy', + 'upload_date': '20220517', + 'view_count': int, + 'duration': 722.86, + 'timestamp': 1652764468, + }, + }, { + 'url': 'https://video.kenh14.vn/video-316174.chn', + 'md5': '2b41877d2afaf4a3f487ceda8e5c7cbd', + 'info_dict': { + 'id': '316174', + 'ext': 'mp4', + 'title': 'Khoảnh khắc VĐV nằm gục khóc sau chiến thắng: 7 năm trời Việt Nam mới có HCV kiếm chém nữ, chỉ có 8 tháng để khổ luyện trước khi lên sàn đấu', + 'description': 'md5:de86aa22e143e2b277bce8ec9c6f17dc', + 'thumbnail': r're:^https?://videothumbs\.mediacdn\.vn/.*\.jpg$', + 'tags': [], + 'upload_date': '20220517', + 'view_count': int, + 'duration': 70.04, + 'timestamp': 1652766021, + }, + }, { + 'url': 'https://video.kenh14.vn/0-344740.chn', + 'md5': 'b843495d5e728142c8870c09b46df2a9', + 'info_dict': { + 'id': '344740', + 'ext': 'mov', + 'title': 'Kỳ Duyên đầy căng thẳng trong buổi ra quân đi Miss Universe, nghi thức tuyên thuệ lần đầu xuất hiện gây nhiều tranh cãi', + 'description': 'md5:2a2dbb4a7397169fb21ee68f09160497', + 'thumbnail': r're:^https?://kenh14cdn\.com/.*\.jpg$', + 'tags': ['kỳ duyên', 'Kỳ Duyên tuyên thuệ', 'miss universe'], + 'uploader': 'Quang Vũ', + 'upload_date': '20241024', + 'view_count': int, + 'duration': 198.88, + 'timestamp': 1729741590, + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + attrs = extract_attributes(get_element_html_by_attribute('type', 'VideoStream', webpage) or '') + direct_url = attrs['data-vid'] + + metadata = self._download_json( + 'https://api.kinghub.vn/video/api/v1/detailVideoByGet?FileName={}'.format( + remove_start(direct_url, 'kenh14cdn.com/')), video_id, fatal=False) + + formats = [{'url': f'https://{direct_url}', 'format_id': 'http', 'quality': 1}] + subtitles = {} + video_data = self._download_json( + f'https://{direct_url}.json', video_id, note='Downloading video data', fatal=False) + if hls_url := traverse_obj(video_data, ('hls', {url_or_none})): + fmts, subs = self._extract_m3u8_formats_and_subtitles( + hls_url, video_id, m3u8_id='hls', fatal=False) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + if dash_url := traverse_obj(video_data, ('mpd', {url_or_none})): + fmts, subs = self._extract_mpd_formats_and_subtitles( + dash_url, video_id, mpd_id='dash', fatal=False) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + + return { + **traverse_obj(metadata, { + 'duration': ('duration', {parse_duration}), + 'uploader': ('author', {strip_or_none}), + 'timestamp': ('uploadtime', {parse_iso8601(delimiter=' ')}), + 'view_count': ('views', {int_or_none}), + }), + 'id': video_id, + 'title': ( + traverse_obj(metadata, ('title', {strip_or_none})) + or clean_html(self._og_search_title(webpage)) + or clean_html(get_element_by_class('vdbw-title', webpage))), + 'formats': formats, + 'subtitles': subtitles, + 'description': ( + clean_html(self._og_search_description(webpage)) + or clean_html(get_element_by_class('vdbw-sapo', webpage))), + 'thumbnail': (self._og_search_thumbnail(webpage) or attrs.get('data-thumb')), + 'tags': traverse_obj(self._html_search_meta('keywords', webpage), ( + {lambda x: x.split(';')}, ..., filter)), + } + + +class Kenh14PlaylistIE(InfoExtractor): + _VALID_URL = r'https?://video\.kenh14\.vn/playlist/[\w-]+-(?P[0-9]+)\.chn' + _TESTS = [{ + 'url': 'https://video.kenh14.vn/playlist/tran-tinh-naked-love-mua-2-71.chn', + 'info_dict': { + 'id': '71', + 'title': 'Trần Tình (Naked love) mùa 2', + 'description': 'md5:e9522339304956dea931722dd72eddb2', + 'thumbnail': r're:^https?://kenh14cdn\.com/.*\.png$', + }, + 'playlist_count': 9, + }, { + 'url': 'https://video.kenh14.vn/playlist/0-72.chn', + 'info_dict': { + 'id': '72', + 'title': 'Lau Lại Đầu Từ', + 'description': 'Cùng xem xưa và nay có gì khác biệt nhé!', + 'thumbnail': r're:^https?://kenh14cdn\.com/.*\.png$', + }, + 'playlist_count': 6, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) + + category_detail = get_element_by_class('category-detail', webpage) or '' + embed_info = traverse_obj( + self._yield_json_ld(webpage, playlist_id), + (lambda _, v: v['name'] and v['alternateName'], any)) or {} + + return self.playlist_from_matches( + get_elements_html_by_class('video-item', webpage), playlist_id, + (clean_html(get_element_by_class('name', category_detail)) or unescapeHTML(embed_info.get('name'))), + getter=lambda x: 'https://video.kenh14.vn/video/video-{}.chn'.format(extract_attributes(x)['data-id']), + ie=Kenh14VideoIE, playlist_description=( + clean_html(get_element_by_class('description', category_detail)) + or unescapeHTML(embed_info.get('alternateName'))), + thumbnail=traverse_obj( + self._og_search_thumbnail(webpage), + ({url_or_none}, {update_url(query=None)}))) From 10fc719bc7f1eef469389c5219102266ef411f29 Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Mon, 18 Nov 2024 01:22:40 +0900 Subject: [PATCH 88/98] [cleanup] Remove dead extractors (#11566) - Removes MildomClipIE, MildomIE, MildomUserVodIE, MildomVodIE - Removes PokemonIE, PokemonWatchIE - Removes VeohIE, VeohUserIE Closes #3373, Closes #7059 Authored by: doe1080 --- yt_dlp/extractor/_extractors.py | 14 -- yt_dlp/extractor/mildom.py | 291 -------------------------------- yt_dlp/extractor/pokemon.py | 136 --------------- yt_dlp/extractor/veoh.py | 189 --------------------- 4 files changed, 630 deletions(-) delete mode 100644 yt_dlp/extractor/mildom.py delete mode 100644 yt_dlp/extractor/pokemon.py delete mode 100644 yt_dlp/extractor/veoh.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index af903f307b..0d849c1697 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1139,12 +1139,6 @@ from .microsoftembed import ( MicrosoftMediusIE, ) from .microsoftstream import MicrosoftStreamIE -from .mildom import ( - MildomClipIE, - MildomIE, - MildomUserVodIE, - MildomVodIE, -) from .minds import ( MindsChannelIE, MindsGroupIE, @@ -1563,10 +1557,6 @@ from .podbayfm import ( ) from .podchaser import PodchaserIE from .podomatic import PodomaticIE -from .pokemon import ( - PokemonIE, - PokemonWatchIE, -) from .pokergo import ( PokerGoCollectionIE, PokerGoIE, @@ -2288,10 +2278,6 @@ from .utreon import UtreonIE from .varzesh3 import Varzesh3IE from .vbox7 import Vbox7IE from .veo import VeoIE -from .veoh import ( - VeohIE, - VeohUserIE, -) from .vesti import VestiIE from .vevo import ( VevoIE, diff --git a/yt_dlp/extractor/mildom.py b/yt_dlp/extractor/mildom.py deleted file mode 100644 index 88a2b9e891..0000000000 --- a/yt_dlp/extractor/mildom.py +++ /dev/null @@ -1,291 +0,0 @@ -import functools -import json -import uuid - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - OnDemandPagedList, - determine_ext, - dict_get, - float_or_none, - traverse_obj, -) - - -class MildomBaseIE(InfoExtractor): - _GUEST_ID = None - - def _call_api(self, url, video_id, query=None, note='Downloading JSON metadata', body=None): - if not self._GUEST_ID: - self._GUEST_ID = f'pc-gp-{uuid.uuid4()}' - - content = self._download_json( - url, video_id, note=note, data=json.dumps(body).encode() if body else None, - headers={'Content-Type': 'application/json'} if body else {}, - query={ - '__guest_id': self._GUEST_ID, - '__platform': 'web', - **(query or {}), - }) - - if content['code'] != 0: - raise ExtractorError( - f'Mildom says: {content["message"]} (code {content["code"]})', - expected=True) - return content['body'] - - -class MildomIE(MildomBaseIE): - IE_NAME = 'mildom' - IE_DESC = 'Record ongoing live by specific user in Mildom' - _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/(?P\d+)' - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(f'https://www.mildom.com/{video_id}', video_id) - - enterstudio = self._call_api( - 'https://cloudac.mildom.com/nonolive/gappserv/live/enterstudio', video_id, - note='Downloading live metadata', query={'user_id': video_id}) - result_video_id = enterstudio.get('log_id', video_id) - - servers = self._call_api( - 'https://cloudac.mildom.com/nonolive/gappserv/live/liveserver', result_video_id, - note='Downloading live server list', query={ - 'user_id': video_id, - 'live_server_type': 'hls', - }) - - playback_token = self._call_api( - 'https://cloudac.mildom.com/nonolive/gappserv/live/token', result_video_id, - note='Obtaining live playback token', body={'host_id': video_id, 'type': 'hls'}) - playback_token = traverse_obj(playback_token, ('data', ..., 'token'), get_all=False) - if not playback_token: - raise ExtractorError('Failed to obtain live playback token') - - formats = self._extract_m3u8_formats( - f'{servers["stream_server"]}/{video_id}_master.m3u8?{playback_token}', - result_video_id, 'mp4', headers={ - 'Referer': 'https://www.mildom.com/', - 'Origin': 'https://www.mildom.com', - }) - - for fmt in formats: - fmt.setdefault('http_headers', {})['Referer'] = 'https://www.mildom.com/' - - return { - 'id': result_video_id, - 'title': self._html_search_meta('twitter:description', webpage, default=None) or traverse_obj(enterstudio, 'anchor_intro'), - 'description': traverse_obj(enterstudio, 'intro', 'live_intro', expected_type=str), - 'timestamp': float_or_none(enterstudio.get('live_start_ms'), scale=1000), - 'uploader': self._html_search_meta('twitter:title', webpage, default=None) or traverse_obj(enterstudio, 'loginname'), - 'uploader_id': video_id, - 'formats': formats, - 'is_live': True, - } - - -class MildomVodIE(MildomBaseIE): - IE_NAME = 'mildom:vod' - IE_DESC = 'VOD in Mildom' - _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/playback/(?P\d+)/(?P(?P=user_id)-[a-zA-Z0-9]+-?[0-9]*)' - _TESTS = [{ - 'url': 'https://www.mildom.com/playback/10882672/10882672-1597662269', - 'info_dict': { - 'id': '10882672-1597662269', - 'ext': 'mp4', - 'title': '始めてのミルダム配信じゃぃ!', - 'thumbnail': r're:^https?://.*\.(png|jpg)$', - 'upload_date': '20200817', - 'duration': 4138.37, - 'description': 'ゲームをしたくて!', - 'timestamp': 1597662269.0, - 'uploader_id': '10882672', - 'uploader': 'kson組長(けいそん)', - }, - }, { - 'url': 'https://www.mildom.com/playback/10882672/10882672-1597758589870-477', - 'info_dict': { - 'id': '10882672-1597758589870-477', - 'ext': 'mp4', - 'title': '【kson】感染メイズ!麻酔銃で無双する', - 'thumbnail': r're:^https?://.*\.(png|jpg)$', - 'timestamp': 1597759093.0, - 'uploader': 'kson組長(けいそん)', - 'duration': 4302.58, - 'uploader_id': '10882672', - 'description': 'このステージ絶対乗り越えたい', - 'upload_date': '20200818', - }, - }, { - 'url': 'https://www.mildom.com/playback/10882672/10882672-buha9td2lrn97fk2jme0', - 'info_dict': { - 'id': '10882672-buha9td2lrn97fk2jme0', - 'ext': 'mp4', - 'title': '【kson組長】CART RACER!!!', - 'thumbnail': r're:^https?://.*\.(png|jpg)$', - 'uploader_id': '10882672', - 'uploader': 'kson組長(けいそん)', - 'upload_date': '20201104', - 'timestamp': 1604494797.0, - 'duration': 4657.25, - 'description': 'WTF', - }, - }] - - def _real_extract(self, url): - user_id, video_id = self._match_valid_url(url).group('user_id', 'id') - webpage = self._download_webpage(f'https://www.mildom.com/playback/{user_id}/{video_id}', video_id) - - autoplay = self._call_api( - 'https://cloudac.mildom.com/nonolive/videocontent/playback/getPlaybackDetail', video_id, - note='Downloading playback metadata', query={ - 'v_id': video_id, - })['playback'] - - formats = [{ - 'url': autoplay['audio_url'], - 'format_id': 'audio', - 'protocol': 'm3u8_native', - 'vcodec': 'none', - 'acodec': 'aac', - 'ext': 'm4a', - }] - for fmt in autoplay['video_link']: - formats.append({ - 'format_id': 'video-{}'.format(fmt['name']), - 'url': fmt['url'], - 'protocol': 'm3u8_native', - 'width': fmt['level'] * autoplay['video_width'] // autoplay['video_height'], - 'height': fmt['level'], - 'vcodec': 'h264', - 'acodec': 'aac', - 'ext': 'mp4', - }) - - return { - 'id': video_id, - 'title': self._html_search_meta(('og:description', 'description'), webpage, default=None) or autoplay.get('title'), - 'description': traverse_obj(autoplay, 'video_intro'), - 'timestamp': float_or_none(autoplay.get('publish_time'), scale=1000), - 'duration': float_or_none(autoplay.get('video_length'), scale=1000), - 'thumbnail': dict_get(autoplay, ('upload_pic', 'video_pic')), - 'uploader': traverse_obj(autoplay, ('author_info', 'login_name')), - 'uploader_id': user_id, - 'formats': formats, - } - - -class MildomClipIE(MildomBaseIE): - IE_NAME = 'mildom:clip' - IE_DESC = 'Clip in Mildom' - _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/clip/(?P(?P\d+)-[a-zA-Z0-9]+)' - _TESTS = [{ - 'url': 'https://www.mildom.com/clip/10042245-63921673e7b147ebb0806d42b5ba5ce9', - 'info_dict': { - 'id': '10042245-63921673e7b147ebb0806d42b5ba5ce9', - 'title': '全然違ったよ', - 'timestamp': 1619181890, - 'duration': 59, - 'thumbnail': r're:https?://.+', - 'uploader': 'ざきんぽ', - 'uploader_id': '10042245', - }, - }, { - 'url': 'https://www.mildom.com/clip/10111524-ebf4036e5aa8411c99fb3a1ae0902864', - 'info_dict': { - 'id': '10111524-ebf4036e5aa8411c99fb3a1ae0902864', - 'title': 'かっこいい', - 'timestamp': 1621094003, - 'duration': 59, - 'thumbnail': r're:https?://.+', - 'uploader': '(ルーキー', - 'uploader_id': '10111524', - }, - }, { - 'url': 'https://www.mildom.com/clip/10660174-2c539e6e277c4aaeb4b1fbe8d22cb902', - 'info_dict': { - 'id': '10660174-2c539e6e277c4aaeb4b1fbe8d22cb902', - 'title': 'あ', - 'timestamp': 1614769431, - 'duration': 31, - 'thumbnail': r're:https?://.+', - 'uploader': 'ドルゴルスレンギーン=ダグワドルジ', - 'uploader_id': '10660174', - }, - }] - - def _real_extract(self, url): - user_id, video_id = self._match_valid_url(url).group('user_id', 'id') - webpage = self._download_webpage(f'https://www.mildom.com/clip/{video_id}', video_id) - - clip_detail = self._call_api( - 'https://cloudac-cf-jp.mildom.com/nonolive/videocontent/clip/detail', video_id, - note='Downloading playback metadata', query={ - 'clip_id': video_id, - }) - - return { - 'id': video_id, - 'title': self._html_search_meta( - ('og:description', 'description'), webpage, default=None) or clip_detail.get('title'), - 'timestamp': float_or_none(clip_detail.get('create_time')), - 'duration': float_or_none(clip_detail.get('length')), - 'thumbnail': clip_detail.get('cover'), - 'uploader': traverse_obj(clip_detail, ('user_info', 'loginname')), - 'uploader_id': user_id, - - 'url': clip_detail['url'], - 'ext': determine_ext(clip_detail.get('url'), 'mp4'), - } - - -class MildomUserVodIE(MildomBaseIE): - IE_NAME = 'mildom:user:vod' - IE_DESC = 'Download all VODs from specific user in Mildom' - _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/profile/(?P\d+)' - _TESTS = [{ - 'url': 'https://www.mildom.com/profile/10093333', - 'info_dict': { - 'id': '10093333', - 'title': 'Uploads from ねこばたけ', - }, - 'playlist_mincount': 732, - }, { - 'url': 'https://www.mildom.com/profile/10882672', - 'info_dict': { - 'id': '10882672', - 'title': 'Uploads from kson組長(けいそん)', - }, - 'playlist_mincount': 201, - }] - - def _fetch_page(self, user_id, page): - page += 1 - reply = self._call_api( - 'https://cloudac.mildom.com/nonolive/videocontent/profile/playbackList', - user_id, note=f'Downloading page {page}', query={ - 'user_id': user_id, - 'page': page, - 'limit': '30', - }) - if not reply: - return - for x in reply: - v_id = x.get('v_id') - if not v_id: - continue - yield self.url_result(f'https://www.mildom.com/playback/{user_id}/{v_id}') - - def _real_extract(self, url): - user_id = self._match_id(url) - self.to_screen(f'This will download all VODs belonging to user. To download ongoing live video, use "https://www.mildom.com/{user_id}" instead') - - profile = self._call_api( - 'https://cloudac.mildom.com/nonolive/gappserv/user/profileV2', user_id, - query={'user_id': user_id}, note='Downloading user profile')['user_info'] - - return self.playlist_result( - OnDemandPagedList(functools.partial(self._fetch_page, user_id), 30), - user_id, f'Uploads from {profile["loginname"]}') diff --git a/yt_dlp/extractor/pokemon.py b/yt_dlp/extractor/pokemon.py deleted file mode 100644 index 1769684f72..0000000000 --- a/yt_dlp/extractor/pokemon.py +++ /dev/null @@ -1,136 +0,0 @@ -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - extract_attributes, - int_or_none, - js_to_json, - merge_dicts, -) - - -class PokemonIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P[a-z0-9]{32})|/(?:[^/]+/)+(?P[^/?#&]+))' - _TESTS = [{ - 'url': 'https://www.pokemon.com/us/pokemon-episodes/20_30-the-ol-raise-and-switch/', - 'md5': '2fe8eaec69768b25ef898cda9c43062e', - 'info_dict': { - 'id': 'afe22e30f01c41f49d4f1d9eab5cd9a4', - 'ext': 'mp4', - 'title': 'The Ol’ Raise and Switch!', - 'description': 'md5:7db77f7107f98ba88401d3adc80ff7af', - }, - 'add_id': ['LimelightMedia'], - }, { - # no data-video-title - 'url': 'https://www.pokemon.com/fr/episodes-pokemon/films-pokemon/pokemon-lascension-de-darkrai-2008', - 'info_dict': { - 'id': 'dfbaf830d7e54e179837c50c0c6cc0e1', - 'ext': 'mp4', - 'title': "Pokémon : L'ascension de Darkrai", - 'description': 'md5:d1dbc9e206070c3e14a06ff557659fb5', - }, - 'add_id': ['LimelightMedia'], - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'http://www.pokemon.com/uk/pokemon-episodes/?play=2e8b5c761f1d4a9286165d7748c1ece2', - 'only_matching': True, - }, { - 'url': 'http://www.pokemon.com/fr/episodes-pokemon/18_09-un-hiver-inattendu/', - 'only_matching': True, - }, { - 'url': 'http://www.pokemon.com/de/pokemon-folgen/01_20-bye-bye-smettbo/', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id, display_id = self._match_valid_url(url).groups() - webpage = self._download_webpage(url, video_id or display_id) - video_data = extract_attributes(self._search_regex( - r'(<[^>]+data-video-id="{}"[^>]*>)'.format(video_id if video_id else '[a-z0-9]{32}'), - webpage, 'video data element')) - video_id = video_data['data-video-id'] - title = video_data.get('data-video-title') or self._html_search_meta( - 'pkm-title', webpage, ' title', default=None) or self._search_regex( - r']+\bclass=["\']us-title[^>]+>([^<]+)', webpage, 'title') - return { - '_type': 'url_transparent', - 'id': video_id, - 'url': f'limelight:media:{video_id}', - 'title': title, - 'description': video_data.get('data-video-summary'), - 'thumbnail': video_data.get('data-video-poster'), - 'series': 'Pokémon', - 'season_number': int_or_none(video_data.get('data-video-season')), - 'episode': title, - 'episode_number': int_or_none(video_data.get('data-video-episode')), - 'ie_key': 'LimelightMedia', - } - - -class PokemonWatchIE(InfoExtractor): - _VALID_URL = r'https?://watch\.pokemon\.com/[a-z]{2}-[a-z]{2}/(?:#/)?player(?:\.html)?\?id=(?P[a-z0-9]{32})' - _API_URL = 'https://www.pokemon.com/api/pokemontv/v2/channels/{0:}' - _TESTS = [{ - 'url': 'https://watch.pokemon.com/en-us/player.html?id=8309a40969894a8e8d5bc1311e9c5667', - 'md5': '62833938a31e61ab49ada92f524c42ff', - 'info_dict': { - 'id': '8309a40969894a8e8d5bc1311e9c5667', - 'ext': 'mp4', - 'title': 'Lillier and the Staff!', - 'description': 'md5:338841b8c21b283d24bdc9b568849f04', - }, - }, { - 'url': 'https://watch.pokemon.com/en-us/#/player?id=3fe7752ba09141f0b0f7756d1981c6b2', - 'only_matching': True, - }, { - 'url': 'https://watch.pokemon.com/de-de/player.html?id=b3c402e111a4459eb47e12160ab0ba07', - 'only_matching': True, - }] - - def _extract_media(self, channel_array, video_id): - for channel in channel_array: - for media in channel.get('media'): - if media.get('id') == video_id: - return media - return None - - def _real_extract(self, url): - video_id = self._match_id(url) - - info = { - '_type': 'url', - 'id': video_id, - 'url': f'limelight:media:{video_id}', - 'ie_key': 'LimelightMedia', - } - - # API call can be avoided entirely if we are listing formats - if self.get_param('listformats', False): - return info - - webpage = self._download_webpage(url, video_id) - build_vars = self._parse_json(self._search_regex( - r'(?s)buildVars\s*=\s*({.*?})', webpage, 'build vars'), - video_id, transform_source=js_to_json) - region = build_vars.get('region') - channel_array = self._download_json(self._API_URL.format(region), video_id) - video_data = self._extract_media(channel_array, video_id) - - if video_data is None: - raise ExtractorError( - f'Video {video_id} does not exist', expected=True) - - info['_type'] = 'url_transparent' - images = video_data.get('images') - - return merge_dicts(info, { - 'title': video_data.get('title'), - 'description': video_data.get('description'), - 'thumbnail': images.get('medium') or images.get('small'), - 'series': 'Pokémon', - 'season_number': int_or_none(video_data.get('season')), - 'episode': video_data.get('title'), - 'episode_number': int_or_none(video_data.get('episode')), - }) diff --git a/yt_dlp/extractor/veoh.py b/yt_dlp/extractor/veoh.py deleted file mode 100644 index aac768f3c6..0000000000 --- a/yt_dlp/extractor/veoh.py +++ /dev/null @@ -1,189 +0,0 @@ -import functools -import json - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - OnDemandPagedList, - int_or_none, - parse_duration, - qualities, - remove_start, - strip_or_none, -) - - -class VeohIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?veoh\.com/(?:watch|videos|embed|iphone/#_Watch)/(?P(?:v|e|yapi-)[\da-zA-Z]+)' - - _TESTS = [{ - 'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3', - 'md5': '620e68e6a3cff80086df3348426c9ca3', - 'info_dict': { - 'id': 'v56314296nk7Zdmz3', - 'ext': 'mp4', - 'title': 'Straight Backs Are Stronger', - 'description': 'md5:203f976279939a6dc664d4001e13f5f4', - 'thumbnail': 're:https://fcache\\.veoh\\.com/file/f/th56314296\\.jpg(\\?.*)?', - 'uploader': 'LUMOback', - 'duration': 46, - 'view_count': int, - 'average_rating': int, - 'comment_count': int, - 'age_limit': 0, - 'categories': ['technology_and_gaming'], - 'tags': ['posture', 'posture', 'sensor', 'back', 'pain', 'wearable', 'tech', 'lumo'], - }, - }, { - 'url': 'http://www.veoh.com/embed/v56314296nk7Zdmz3', - 'only_matching': True, - }, { - 'url': 'http://www.veoh.com/watch/v27701988pbTc4wzN?h1=Chile+workers+cover+up+to+avoid+skin+damage', - 'md5': '4a6ff84b87d536a6a71e6aa6c0ad07fa', - 'info_dict': { - 'id': '27701988', - 'ext': 'mp4', - 'title': 'Chile workers cover up to avoid skin damage', - 'description': 'md5:2bd151625a60a32822873efc246ba20d', - 'uploader': 'afp-news', - 'duration': 123, - }, - 'skip': 'This video has been deleted.', - }, { - 'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX', - 'md5': '4fde7b9e33577bab2f2f8f260e30e979', - 'note': 'Embedded ooyala video', - 'info_dict': { - 'id': '69525809', - 'ext': 'mp4', - 'title': 'Doctors Alter Plan For Preteen\'s Weight Loss Surgery', - 'description': 'md5:f5a11c51f8fb51d2315bca0937526891', - 'uploader': 'newsy-videos', - }, - 'skip': 'This video has been deleted.', - }, { - 'url': 'http://www.veoh.com/watch/e152215AJxZktGS', - 'only_matching': True, - }, { - 'url': 'https://www.veoh.com/videos/v16374379WA437rMH', - 'md5': 'cceb73f3909063d64f4b93d4defca1b3', - 'info_dict': { - 'id': 'v16374379WA437rMH', - 'ext': 'mp4', - 'title': 'Phantasmagoria 2, pt. 1-3', - 'description': 'Phantasmagoria: a Puzzle of Flesh', - 'thumbnail': 're:https://fcache\\.veoh\\.com/file/f/th16374379\\.jpg(\\?.*)?', - 'uploader': 'davidspackage', - 'duration': 968, - 'view_count': int, - 'average_rating': int, - 'comment_count': int, - 'age_limit': 18, - 'categories': ['technology_and_gaming', 'gaming'], - 'tags': ['puzzle', 'of', 'flesh'], - }, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - metadata = self._download_json( - 'https://www.veoh.com/watch/getVideo/' + video_id, - video_id) - video = metadata['video'] - title = video['title'] - - thumbnail_url = None - q = qualities(['Regular', 'HQ']) - formats = [] - for f_id, f_url in video.get('src', {}).items(): - if not f_url: - continue - if f_id == 'poster': - thumbnail_url = f_url - else: - formats.append({ - 'format_id': f_id, - 'quality': q(f_id), - 'url': f_url, - }) - - categories = metadata.get('categoryPath') - if not categories: - category = remove_start(strip_or_none(video.get('category')), 'category_') - categories = [category] if category else None - tags = video.get('tags') - - return { - 'id': video_id, - 'title': title, - 'description': video.get('description'), - 'thumbnail': thumbnail_url, - 'uploader': video.get('author', {}).get('nickname'), - 'duration': int_or_none(video.get('lengthBySec')) or parse_duration(video.get('length')), - 'view_count': int_or_none(video.get('views')), - 'formats': formats, - 'average_rating': int_or_none(video.get('rating')), - 'comment_count': int_or_none(video.get('numOfComments')), - 'age_limit': 18 if video.get('contentRatingId') == 2 else 0, - 'categories': categories, - 'tags': tags.split(', ') if tags else None, - } - - -class VeohUserIE(VeohIE): # XXX: Do not subclass from concrete IE - _VALID_URL = r'https?://(?:www\.)?veoh\.com/users/(?P[\w-]+)' - IE_NAME = 'veoh:user' - - _TESTS = [ - { - 'url': 'https://www.veoh.com/users/valentinazoe', - 'info_dict': { - 'id': 'valentinazoe', - 'title': 'valentinazoe (Uploads)', - }, - 'playlist_mincount': 75, - }, - { - 'url': 'https://www.veoh.com/users/PiensaLibre', - 'info_dict': { - 'id': 'PiensaLibre', - 'title': 'PiensaLibre (Uploads)', - }, - 'playlist_mincount': 2, - }] - - _PAGE_SIZE = 16 - - def _fetch_page(self, uploader, page): - response = self._download_json( - 'https://www.veoh.com/users/published/videos', uploader, - note=f'Downloading videos page {page + 1}', - headers={ - 'x-csrf-token': self._TOKEN, - 'content-type': 'application/json;charset=UTF-8', - }, - data=json.dumps({ - 'username': uploader, - 'maxResults': self._PAGE_SIZE, - 'page': page + 1, - 'requestName': 'userPage', - }).encode()) - if not response.get('success'): - raise ExtractorError(response['message']) - - for video in response['videos']: - yield self.url_result(f'https://www.veoh.com/watch/{video["permalinkId"]}', VeohIE, - video['permalinkId'], video.get('title')) - - def _real_initialize(self): - webpage = self._download_webpage( - 'https://www.veoh.com', None, note='Downloading authorization token') - self._TOKEN = self._search_regex( - r'csrfToken:\s*(["\'])(?P[0-9a-zA-Z]{40})\1', webpage, - 'request token', group='token') - - def _real_extract(self, url): - uploader = self._match_id(url) - return self.playlist_result(OnDemandPagedList( - functools.partial(self._fetch_page, uploader), - self._PAGE_SIZE), uploader, f'{uploader} (Uploads)') From d867f99622ef7fba690b08da56c39d739b822bb7 Mon Sep 17 00:00:00 2001 From: ChocoLZS <61224208+ChocoLZS@users.noreply.github.com> Date: Mon, 18 Nov 2024 02:41:57 +0800 Subject: [PATCH 89/98] [ie/PiaLive] Add extractor (#10811) Authored by: ChocoLZS --- yt_dlp/extractor/_extractors.py | 6 +- yt_dlp/extractor/pialive.py | 122 +++++++++++++++++++++++++++++ yt_dlp/extractor/piaulizaportal.py | 70 ----------------- yt_dlp/extractor/uliza.py | 113 ++++++++++++++++++++++++++ 4 files changed, 240 insertions(+), 71 deletions(-) create mode 100644 yt_dlp/extractor/pialive.py delete mode 100644 yt_dlp/extractor/piaulizaportal.py create mode 100644 yt_dlp/extractor/uliza.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 0d849c1697..967010826e 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1520,8 +1520,8 @@ from .pgatour import PGATourIE from .philharmoniedeparis import PhilharmonieDeParisIE from .phoenix import PhoenixIE from .photobucket import PhotobucketIE +from .pialive import PiaLiveIE from .piapro import PiaproIE -from .piaulizaportal import PIAULIZAPortalIE from .picarto import ( PicartoIE, PicartoVodIE, @@ -2250,6 +2250,10 @@ from .ufctv import ( ) from .ukcolumn import UkColumnIE from .uktvplay import UKTVPlayIE +from .uliza import ( + UlizaPlayerIE, + UlizaPortalIE, +) from .umg import UMGDeIE from .unistra import UnistraIE from .unity import UnityIE diff --git a/yt_dlp/extractor/pialive.py b/yt_dlp/extractor/pialive.py new file mode 100644 index 0000000000..7469135c1b --- /dev/null +++ b/yt_dlp/extractor/pialive.py @@ -0,0 +1,122 @@ +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + clean_html, + extract_attributes, + get_element_by_class, + get_element_html_by_class, + multipart_encode, + str_or_none, + unified_timestamp, + url_or_none, +) +from ..utils.traversal import traverse_obj + + +class PiaLiveIE(InfoExtractor): + _VALID_URL = r'https?://player\.pia-live\.jp/stream/(?P[\w-]+)' + _PLAYER_ROOT_URL = 'https://player.pia-live.jp/' + _PIA_LIVE_API_URL = 'https://api.pia-live.jp' + _API_KEY = 'kfds)FKFps-dms9e' + _TESTS = [{ + 'url': 'https://player.pia-live.jp/stream/4JagFBEIM14s_hK9aXHKf3k3F3bY5eoHFQxu68TC6krUDqGOwN4d61dCWQYOd6CTxl4hjya9dsfEZGsM4uGOUdax60lEI4twsXGXf7crmz8Gk__GhupTrWxA7RFRVt76', + 'info_dict': { + 'id': '88f3109a-f503-4d0f-a9f7-9f39ac745d84', + 'display_id': '2431867_001', + 'title': 'こながめでたい日2024の視聴ページ | PIA LIVE STREAM(ぴあライブストリーム)', + 'live_status': 'was_live', + 'comment_count': int, + }, + 'params': { + 'getcomments': True, + 'skip_download': True, + 'ignore_no_formats_error': True, + }, + 'skip': 'The video is no longer available', + }, { + 'url': 'https://player.pia-live.jp/stream/4JagFBEIM14s_hK9aXHKf3k3F3bY5eoHFQxu68TC6krJdu0GVBVbVy01IwpJ6J3qBEm3d9TCTt1d0eWpsZGj7DrOjVOmS7GAWGwyscMgiThopJvzgWC4H5b-7XQjAfRZ', + 'info_dict': { + 'id': '9ce8b8ba-f6d1-4d1f-83a0-18c3148ded93', + 'display_id': '2431867_002', + 'title': 'こながめでたい日2024の視聴ページ | PIA LIVE STREAM(ぴあライブストリーム)', + 'live_status': 'was_live', + 'comment_count': int, + }, + 'params': { + 'getcomments': True, + 'skip_download': True, + 'ignore_no_formats_error': True, + }, + 'skip': 'The video is no longer available', + }] + + def _extract_var(self, variable, html): + return self._search_regex( + rf'(?:var|const|let)\s+{variable}\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', + html, f'variable {variable}', group='value') + + def _real_extract(self, url): + video_key = self._match_id(url) + webpage = self._download_webpage(url, video_key) + + program_code = self._extract_var('programCode', webpage) + article_code = self._extract_var('articleCode', webpage) + title = self._html_extract_title(webpage) + + if get_element_html_by_class('play-end', webpage): + raise ExtractorError('The video is no longer available', expected=True, video_id=program_code) + + if start_info := clean_html(get_element_by_class('play-waiting__date', webpage)): + date, time = self._search_regex( + r'(?P\d{4}/\d{1,2}/\d{1,2})\([月火水木金土日]\)(?P