From f4d3e9e6dc25077b79849a31a2f67f93fdc01e62 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Thu, 12 Dec 2024 13:39:38 +0000 Subject: [PATCH 1/8] [ie/soundcloud] Fix extraction (#11777) Authored by: bashonly --- yt_dlp/extractor/soundcloud.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index e0dda8ff89..66bc5f9c58 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -259,6 +259,8 @@ class SoundcloudBaseIE(InfoExtractor): preset_base = preset.partition('_')[0] protocol = traverse_obj(t, ('format', 'protocol', {str})) or 'http' + if protocol.startswith(('ctr-', 'cbc-')): + continue if protocol == 'progressive': protocol = 'http' if protocol != 'hls' and '/hls' in format_url: From bc262bcad4d3683ceadf61a7eb87e233e72adef3 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Thu, 12 Dec 2024 13:44:19 +0000 Subject: [PATCH 2/8] [ie/patreon:campaign] Support /c/ URLs (#11756) Closes #11755 Authored by: bashonly --- yt_dlp/extractor/patreon.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py index 6bdeaf1571..a0e831a5ce 100644 --- a/yt_dlp/extractor/patreon.py +++ b/yt_dlp/extractor/patreon.py @@ -457,7 +457,7 @@ class PatreonCampaignIE(PatreonBaseIE): _VALID_URL = r'''(?x) https?://(?:www\.)?patreon\.com/(?: (?:m|api/campaigns)/(?P\d+)| - (?P(?!creation[?/]|posts/|rss[?/])[\w-]+) + (?:c/)?(?P(?!creation[?/]|posts/|rss[?/])[\w-]+) )(?:/posts)?/?(?:$|[?#])''' _TESTS = [{ 'url': 'https://www.patreon.com/dissonancepod/', @@ -509,6 +509,26 @@ class PatreonCampaignIE(PatreonBaseIE): 'thumbnail': r're:^https?://.*$', }, 'playlist_mincount': 201, + }, { + 'url': 'https://www.patreon.com/c/OgSog', + 'info_dict': { + 'id': '8504388', + 'title': 'OGSoG', + 'description': r're:(?s)Hello and welcome to our Patreon page. We are Mari, Lasercorn, .+', + 'channel': 'OGSoG', + 'channel_id': '8504388', + 'channel_url': 'https://www.patreon.com/OgSog', + 'uploader_url': 'https://www.patreon.com/OgSog', + 'uploader_id': '72323575', + 'uploader': 'David Moss', + 'thumbnail': r're:https?://.+/.+', + 'channel_follower_count': int, + 'age_limit': 0, + }, + 'playlist_mincount': 331, + }, { + 'url': 'https://www.patreon.com/c/OgSog/posts', + 'only_matching': True, }, { 'url': 'https://www.patreon.com/dissonancepod/posts', 'only_matching': True, From d5e2a379f2adcb28bc48c7d9e90716d7278f89d2 Mon Sep 17 00:00:00 2001 From: Pew <65320806+MutantPiggieGolem1@users.noreply.github.com> Date: Thu, 12 Dec 2024 05:46:52 -0800 Subject: [PATCH 3/8] [ie/youtube] Fix `release_date` extraction (#11759) Authored by: MutantPiggieGolem1 --- yt_dlp/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index e69373ba2f..5ba8a62aac 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -4689,7 +4689,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?=(?P[^\n]+))(?P=artist)\n+ (?=(?P[^\n]+))(?P=album)\n (?:.+?℗\s*(?P\d{4})(?!\d))? - (?:.+?Released on\s*:\s*(?P\d{4}-\d{2}-\d{2}))? + (?:.+?Released\ on\s*:\s*(?P\d{4}-\d{2}-\d{2}))? (.+?\nArtist\s*:\s* (?=(?P[^\n]+))(?P=clean_artist)\n )?.+\nAuto-generated\ by\ YouTube\.\s*$ From f6c73aad5f1a67544bea137ebd9d1e22e0e56567 Mon Sep 17 00:00:00 2001 From: Crypto90 Date: Thu, 12 Dec 2024 14:54:11 +0100 Subject: [PATCH 4/8] [ie/youtube:search_url] Fix playlist searches (#11782) Closes #11666 Authored by: Crypto90 --- yt_dlp/extractor/youtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 5ba8a62aac..7eb522a47a 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -5282,6 +5282,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}), 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)], 'richGridRenderer': lambda x: self._extract_entries(x, continuation_list), + 'lockupViewModel': lambda x: [self._extract_lockup_view_model(x)], } for key, renderer in isr_content.items(): if key not in known_renderers: From 5460cd91891bf613a2065e2fc278d9903c37a127 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 13 Dec 2024 09:43:08 +0000 Subject: [PATCH 5/8] [ie/youtube] Fix signature function extraction for `2f1832d2` (#11801) Closes #11798 Authored by: bashonly --- test/test_youtube_signature.py | 9 +++++++++ yt_dlp/extractor/youtube.py | 6 +++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 2a99436a6d..13436f0884 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -73,6 +73,11 @@ _SIG_TESTS = [ '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', 'MyOSJXtKI3m-uME_jv7-pT12gOFC02RFkGoqWpzE0Cs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', ), + ( + 'https://www.youtube.com/s/player/2f1832d2/player_ias.vflset/en_US/base.js', + '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', + '0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xxAj7v1pDL0QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJ2OySqa0q', + ), ] _NSIG_TESTS = [ @@ -192,6 +197,10 @@ _NSIG_TESTS = [ 'https://www.youtube.com/s/player/3bb1f723/player_ias.vflset/en_US/base.js', 'gK15nzVyaXE9RsMP3z', 'ZFFWFLPWx9DEgQ', ), + ( + 'https://www.youtube.com/s/player/2f1832d2/player_ias.vflset/en_US/base.js', + 'YWt1qdbe8SAfkoPHW5d', 'RrRjWQOJmBiP', + ), ] diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 7eb522a47a..e4904965d9 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3127,9 +3127,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # ;N&&(N=sig(decodeURIComponent(N)),J.set(R,encodeURIComponent(N)));return J}; # {var H=u,k=f.sp,v=sig(decodeURIComponent(f.s));H.set(k,encodeURIComponent(v))} funcname = self._search_regex( - (r'\b(?P[a-zA-Z0-9$]+)&&\((?P=var)=(?P[a-zA-Z0-9$]{2,})\(decodeURIComponent\((?P=var)\)\)', - r'(?P[a-zA-Z0-9$]+)\s*=\s*function\(\s*(?P[a-zA-Z0-9$]+)\s*\)\s*{\s*(?P=arg)\s*=\s*(?P=arg)\.split\(\s*""\s*\)\s*;\s*[^}]+;\s*return\s+(?P=arg)\.join\(\s*""\s*\)', - r'(?:\b|[^a-zA-Z0-9$])(?P[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\))?', + (r'\b(?P[a-zA-Z0-9_$]+)&&\((?P=var)=(?P[a-zA-Z0-9_$]{2,})\(decodeURIComponent\((?P=var)\)\)', + r'(?P[a-zA-Z0-9_$]+)\s*=\s*function\(\s*(?P[a-zA-Z0-9_$]+)\s*\)\s*{\s*(?P=arg)\s*=\s*(?P=arg)\.split\(\s*""\s*\)\s*;\s*[^}]+;\s*return\s+(?P=arg)\.join\(\s*""\s*\)', + r'(?:\b|[^a-zA-Z0-9_$])(?P[a-zA-Z0-9_$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)(?:;[a-zA-Z0-9_$]{2}\.[a-zA-Z0-9_$]{2}\(a,\d+\))?', # Old patterns r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P[a-zA-Z0-9$]+)\(', r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P[a-zA-Z0-9$]+)\(', From dc3c4fddcc653989dae71fc563d82a308fc898cc Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 13 Dec 2024 10:21:48 +0000 Subject: [PATCH 6/8] [ie/youtube] Prioritize original language over auto-dubbed audio (#11803) Closes #11753 Authored by: bashonly --- yt_dlp/extractor/youtube.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index e4904965d9..fd9c7107c7 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -4067,10 +4067,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if height: res_qualities[height] = quality + display_name = audio_track.get('displayName') or '' + is_original = 'original' in display_name.lower() + is_descriptive = 'descriptive' in display_name.lower() is_default = audio_track.get('audioIsDefault') - is_descriptive = 'descriptive' in (audio_track.get('displayName') or '').lower() language_code = audio_track.get('id', '').split('.')[0] - if language_code and is_default: + if language_code and (is_original or (is_default and not original_language)): original_language = language_code # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment @@ -4151,7 +4153,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'filesize': int_or_none(fmt.get('contentLength')), 'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}', 'format_note': join_nonempty( - join_nonempty(audio_track.get('displayName'), is_default and ' (default)', delim=''), + join_nonempty(display_name, is_default and ' (default)', delim=''), name, fmt.get('isDrc') and 'DRC', try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()), try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()), @@ -4170,7 +4172,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'url': fmt_url, 'width': int_or_none(fmt.get('width')), 'language': join_nonempty(language_code, 'desc' if is_descriptive else '') or None, - 'language_preference': PREFERRED_LANG_VALUE if is_default else -10 if is_descriptive else -1, + 'language_preference': PREFERRED_LANG_VALUE if is_original else 5 if is_default else -10 if is_descriptive else -1, # Strictly de-prioritize broken, damaged and 3gp formats 'preference': -20 if is_broken else -10 if is_damaged else -2 if itag == '17' else None, } From 54216696261bc07cacd9a837c501d9e0b7fed09e Mon Sep 17 00:00:00 2001 From: sepro Date: Fri, 13 Dec 2024 11:25:29 +0100 Subject: [PATCH 7/8] [cleanup] Make more playlist entries lazy (#11763) Authored by: seproDev --- yt_dlp/extractor/brightcove.py | 5 +++-- yt_dlp/extractor/dvtv.py | 2 +- yt_dlp/extractor/nytimes.py | 2 +- yt_dlp/extractor/vidyard.py | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py index 2526f25dac..3ada1fd5de 100644 --- a/yt_dlp/extractor/brightcove.py +++ b/yt_dlp/extractor/brightcove.py @@ -31,6 +31,7 @@ from ..utils import ( update_url_query, url_or_none, ) +from ..utils.traversal import traverse_obj class BrightcoveLegacyIE(InfoExtractor): @@ -935,8 +936,8 @@ class BrightcoveNewIE(BrightcoveNewBaseIE): if content_type == 'playlist': return self.playlist_result( - [self._parse_brightcove_metadata(vid, vid.get('id'), headers) - for vid in json_data.get('videos', []) if vid.get('id')], + (self._parse_brightcove_metadata(vid, vid['id'], headers) + for vid in traverse_obj(json_data, ('videos', lambda _, v: v['id']))), json_data.get('id'), json_data.get('name'), json_data.get('description')) diff --git a/yt_dlp/extractor/dvtv.py b/yt_dlp/extractor/dvtv.py index 3e442b339b..52d67d2bd0 100644 --- a/yt_dlp/extractor/dvtv.py +++ b/yt_dlp/extractor/dvtv.py @@ -162,7 +162,7 @@ class DVTVIE(InfoExtractor): items = re.findall(r'(?s)playlist\.push\(({.+?})\);', webpage) if items: return self.playlist_result( - [self._parse_video_metadata(i, video_id, timestamp) for i in items], + (self._parse_video_metadata(i, video_id, timestamp) for i in items), video_id, self._html_search_meta('twitter:title', webpage)) item = self._search_regex( diff --git a/yt_dlp/extractor/nytimes.py b/yt_dlp/extractor/nytimes.py index 9ef57410ac..a97add71a4 100644 --- a/yt_dlp/extractor/nytimes.py +++ b/yt_dlp/extractor/nytimes.py @@ -343,7 +343,7 @@ class NYTimesCookingIE(NYTimesBaseIE): if media_ids: media_ids.append(lead_video_id) return self.playlist_result( - [self._extract_video(media_id) for media_id in media_ids], page_id, title, description) + map(self._extract_video, media_ids), page_id, title, description) return { **self._extract_video(lead_video_id), diff --git a/yt_dlp/extractor/vidyard.py b/yt_dlp/extractor/vidyard.py index 2f6d1f4c51..89a89b13f1 100644 --- a/yt_dlp/extractor/vidyard.py +++ b/yt_dlp/extractor/vidyard.py @@ -421,5 +421,5 @@ class VidyardIE(VidyardBaseIE): return self._process_video_json(video_json['chapters'][0], video_id) return self.playlist_result( - [self._process_video_json(chapter, video_id) for chapter in video_json['chapters']], + (self._process_video_json(chapter, video_id) for chapter in video_json['chapters']), str(video_json['playerUuid']), video_json.get('name')) From 2037a6414f81db8080ca724dca506fde91974c5d Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 13 Dec 2024 10:35:40 +0000 Subject: [PATCH 8/8] Release 2024.12.13 Created by: bashonly :ci skip all --- CONTRIBUTORS | 2 ++ Changelog.md | 14 ++++++++++++++ yt_dlp/version.py | 6 +++--- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 240197e8af..4b69642603 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -711,3 +711,5 @@ gitninja1234 jkruse xiaomac wesson09 +Crypto90 +MutantPiggieGolem1 diff --git a/Changelog.md b/Changelog.md index 9dc9053093..75e8240335 100644 --- a/Changelog.md +++ b/Changelog.md @@ -4,6 +4,20 @@ # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master --> +### 2024.12.13 + +#### Extractor changes +- **patreon**: campaign: [Support /c/ URLs](https://github.com/yt-dlp/yt-dlp/commit/bc262bcad4d3683ceadf61a7eb87e233e72adef3) ([#11756](https://github.com/yt-dlp/yt-dlp/issues/11756)) by [bashonly](https://github.com/bashonly) +- **soundcloud**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/f4d3e9e6dc25077b79849a31a2f67f93fdc01e62) ([#11777](https://github.com/yt-dlp/yt-dlp/issues/11777)) by [bashonly](https://github.com/bashonly) +- **youtube** + - [Fix `release_date` extraction](https://github.com/yt-dlp/yt-dlp/commit/d5e2a379f2adcb28bc48c7d9e90716d7278f89d2) ([#11759](https://github.com/yt-dlp/yt-dlp/issues/11759)) by [MutantPiggieGolem1](https://github.com/MutantPiggieGolem1) + - [Fix signature function extraction for `2f1832d2`](https://github.com/yt-dlp/yt-dlp/commit/5460cd91891bf613a2065e2fc278d9903c37a127) ([#11801](https://github.com/yt-dlp/yt-dlp/issues/11801)) by [bashonly](https://github.com/bashonly) + - [Prioritize original language over auto-dubbed audio](https://github.com/yt-dlp/yt-dlp/commit/dc3c4fddcc653989dae71fc563d82a308fc898cc) ([#11803](https://github.com/yt-dlp/yt-dlp/issues/11803)) by [bashonly](https://github.com/bashonly) + - search_url: [Fix playlist searches](https://github.com/yt-dlp/yt-dlp/commit/f6c73aad5f1a67544bea137ebd9d1e22e0e56567) ([#11782](https://github.com/yt-dlp/yt-dlp/issues/11782)) by [Crypto90](https://github.com/Crypto90) + +#### Misc. changes +- **cleanup**: [Make more playlist entries lazy](https://github.com/yt-dlp/yt-dlp/commit/54216696261bc07cacd9a837c501d9e0b7fed09e) ([#11763](https://github.com/yt-dlp/yt-dlp/issues/11763)) by [seproDev](https://github.com/seproDev) + ### 2024.12.06 #### Core changes diff --git a/yt_dlp/version.py b/yt_dlp/version.py index 3dec228d36..f696e1e9d0 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,8 +1,8 @@ # Autogenerated by devscripts/update-version.py -__version__ = '2024.12.06' +__version__ = '2024.12.13' -RELEASE_GIT_HEAD = '4bd2655398aed450456197a6767639114a24eac2' +RELEASE_GIT_HEAD = '54216696261bc07cacd9a837c501d9e0b7fed09e' VARIANT = None @@ -12,4 +12,4 @@ CHANNEL = 'stable' ORIGIN = 'yt-dlp/yt-dlp' -_pkg_version = '2024.12.06' +_pkg_version = '2024.12.13'