diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 12ec5b0d8c..63bc8713a8 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -468,7 +468,7 @@ jobs: - name: Install Requirements run: | python devscripts/install_deps.py -o --include build - python devscripts/install_deps.py + python devscripts/install_deps.py --include curl-cffi python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-6.7.0-py3-none-any.whl" - name: Prepare diff --git a/Changelog.md b/Changelog.md index b5a829d94a..6d3033efc7 100644 --- a/Changelog.md +++ b/Changelog.md @@ -4,6 +4,23 @@ # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master --> +### 2024.07.09 + +#### Core changes +- [Do not alter default format selection when simulated](https://github.com/yt-dlp/yt-dlp/commit/0b570f2a90ce2363ba06089217514d644e7be2e0) ([#9862](https://github.com/yt-dlp/yt-dlp/issues/9862)) by [seproDev](https://github.com/seproDev) + +#### Extractor changes +- **youtube**: [Remove broken `n` function extraction fallback](https://github.com/yt-dlp/yt-dlp/commit/7ead7332af69422cee931aec3faa277288e9e212) ([#10396](https://github.com/yt-dlp/yt-dlp/issues/10396)) by [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev) + +### 2024.07.08 + +#### Core changes +- **jsinterp**: [Implement `Function.prototype` resolving for `call` and `apply`](https://github.com/yt-dlp/yt-dlp/commit/6c056ea7aeb03660281653a9668547f2548f194f) ([#10392](https://github.com/yt-dlp/yt-dlp/issues/10392)) by [Grub4K](https://github.com/Grub4K) + +#### Extractor changes +- **soundcloud**: [Fix rate-limit handling](https://github.com/yt-dlp/yt-dlp/commit/4b50b292cc98534fb8c7cdf0ae5cb85862f7ebfc) ([#10389](https://github.com/yt-dlp/yt-dlp/issues/10389)) by [bashonly](https://github.com/bashonly) +- **youtube**: [Fix JS `n` function name extraction](https://github.com/yt-dlp/yt-dlp/commit/297b0a379282a15c80d82d51f3757c961db2dae1) ([#10390](https://github.com/yt-dlp/yt-dlp/issues/10390)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev) + ### 2024.07.07 #### Important changes diff --git a/README.md b/README.md index 836e084e61..f13562ae9b 100644 --- a/README.md +++ b/README.md @@ -202,7 +202,7 @@ The following provide support for impersonating browser requests. This may be re * [**curl_cffi**](https://github.com/yifeikong/curl_cffi) (recommended) - Python binding for [curl-impersonate](https://github.com/lwthiker/curl-impersonate). Provides impersonation targets for Chrome, Edge and Safari. Licensed under [MIT](https://github.com/yifeikong/curl_cffi/blob/main/LICENSE) * Can be installed with the `curl-cffi` group, e.g. `pip install "yt-dlp[default,curl-cffi]"` - * Currently only included in `yt-dlp.exe` and `yt-dlp_macos` builds + * Currently included in `yt-dlp.exe`, `yt-dlp_x86.exe`, `yt-dlp_linux` and `yt-dlp_macos` builds ### Metadata @@ -2219,6 +2219,7 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu * yt-dlp versions between 2021.11.10 and 2023.06.21 estimated `filesize_approx` values for fragmented/manifest formats. This was added for convenience in [f2fe69](https://github.com/yt-dlp/yt-dlp/commit/f2fe69c7b0d208bdb1f6292b4ae92bc1e1a7444a), but was reverted in [0dff8e](https://github.com/yt-dlp/yt-dlp/commit/0dff8e4d1e6e9fb938f4256ea9af7d81f42fd54f) due to the potentially extreme inaccuracy of the estimated values. Use `--compat-options manifest-filesize-approx` to keep extracting the estimated values * yt-dlp uses modern http client backends such as `requests`. Use `--compat-options prefer-legacy-http-handler` to prefer the legacy http handler (`urllib`) to be used for standard http requests. * The sub-modules `swfinterp`, `casefold` are removed. +* Passing `--simulate` (or calling `extract_info` with `download=False`) no longer alters the default format selection. See [#9843](https://github.com/yt-dlp/yt-dlp/issues/9843) for details. For ease of use, a few more compat options are available: diff --git a/bundle/docker/static/entrypoint.sh b/bundle/docker/static/entrypoint.sh index 93d84fa9b7..2202759742 100755 --- a/bundle/docker/static/entrypoint.sh +++ b/bundle/docker/static/entrypoint.sh @@ -2,7 +2,7 @@ set -e source ~/.local/share/pipx/venvs/pyinstaller/bin/activate -python -m devscripts.install_deps --include secretstorage +python -m devscripts.install_deps --include secretstorage --include curl-cffi python -m devscripts.make_lazy_extractors python devscripts/update-version.py -c "${channel}" -r "${origin}" "${version}" python -m bundle.pyinstaller diff --git a/pyproject.toml b/pyproject.toml index 39986a355c..4561abaf4d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,7 +53,9 @@ dependencies = [ [project.optional-dependencies] default = [] -curl-cffi = ["curl-cffi==0.5.10; implementation_name=='cpython'"] +curl-cffi = [ + "curl-cffi>=0.5.10,!=0.6.*,<0.8; implementation_name=='cpython'", +] secretstorage = [ "cffi", "secretstorage", diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 841ce1af3e..1847c4ffd8 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -4,6 +4,7 @@ import os import sys import unittest +from unittest.mock import patch sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) @@ -520,7 +521,33 @@ class TestFormatSelection(unittest.TestCase): ydl.process_ie_result(info_dict) self.assertEqual(ydl.downloaded_info_dicts, []) - def test_default_format_spec(self): + @patch('yt_dlp.postprocessor.ffmpeg.FFmpegMergerPP.available', False) + def test_default_format_spec_without_ffmpeg(self): + ydl = YDL({}) + self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio') + + ydl = YDL({'simulate': True}) + self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio') + + ydl = YDL({}) + self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio') + + ydl = YDL({'simulate': True}) + self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio') + + ydl = YDL({'outtmpl': '-'}) + self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio') + + ydl = YDL({}) + self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio') + self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio') + + @patch('yt_dlp.postprocessor.ffmpeg.FFmpegMergerPP.available', True) + @patch('yt_dlp.postprocessor.ffmpeg.FFmpegMergerPP.can_merge', lambda _: True) + def test_default_format_spec_with_ffmpeg(self): + ydl = YDL({}) + self.assertEqual(ydl._default_format_spec({}), 'bestvideo*+bestaudio/best') + ydl = YDL({'simulate': True}) self.assertEqual(ydl._default_format_spec({}), 'bestvideo*+bestaudio/best') @@ -528,13 +555,13 @@ class TestFormatSelection(unittest.TestCase): self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio') ydl = YDL({'simulate': True}) - self.assertEqual(ydl._default_format_spec({'is_live': True}), 'bestvideo*+bestaudio/best') + self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio') ydl = YDL({'outtmpl': '-'}) self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio') ydl = YDL({}) - self.assertEqual(ydl._default_format_spec({}, download=False), 'bestvideo*+bestaudio/best') + self.assertEqual(ydl._default_format_spec({}), 'bestvideo*+bestaudio/best') self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio') diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index 7c556e4611..df92c8315b 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -376,6 +376,33 @@ class TestJSInterpreter(unittest.TestCase): jsi = JSInterpreter('''function f(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}''') self.assertEqual(jsi.call_function('f', '''h 7=g("1j");7.7h({7g:[{33:"w://7f-7e-7d-7c.v.7b/7a/79/78/77/76.74?t=73&s=2s&e=72&f=2t&71=70.0.0.1&6z=6y&6x=6w"}],6v:"w://32.v.u/6u.31",16:"r%",15:"r%",6t:"6s",6r:"",6q:"l",6p:"l",6o:"6n",6m:\'6l\',6k:"6j",9:[{33:"/2u?b=6i&n=50&6h=w://32.v.u/6g.31",6f:"6e"}],1y:{6d:1,6c:\'#6b\',6a:\'#69\',68:"67",66:30,65:r,},"64":{63:"%62 2m%m%61%5z%5y%5x.u%5w%5v%5u.2y%22 2k%m%1o%22 5t%m%1o%22 5s%m%1o%22 2j%m%5r%22 16%m%5q%22 15%m%5p%22 5o%2z%5n%5m%2z",5l:"w://v.u/d/1k/5k.2y",5j:[]},\'5i\':{"5h":"5g"},5f:"5e",5d:"w://v.u",5c:{},5b:l,1x:[0.25,0.50,0.75,1,1.25,1.5,2]});h 1m,1n,5a;h 59=0,58=0;h 7=g("1j");h 2x=0,57=0,56=0;$.55({54:{\'53-52\':\'2i-51\'}});7.j(\'4z\',6(x){c(5>0&&x.1l>=5&&1n!=1){1n=1;$(\'q.4y\').4x(\'4w\')}});7.j(\'13\',6(x){2x=x.1l});7.j(\'2g\',6(x){2w(x)});7.j(\'4v\',6(){$(\'q.2v\').4u()});6 2w(x){$(\'q.2v\').4t();c(1m)19;1m=1;17=0;c(4s.4r===l){17=1}$.4q(\'/2u?b=4p&2l=1k&4o=2t-4n-4m-2s-4l&4k=&4j=&4i=&17=\'+17,6(2r){$(\'#4h\').4g(2r)});$(\'.3-8-4f-4e:4d("4c")\').2h(6(e){2q();g().4b(0);g().4a(l)});6 2q(){h $14=$("").2p({1l:"49",16:"r%",15:"r%",48:0,2n:0,2o:47,46:"45(10%, 10%, 10%, 0.4)","44-43":"42"});$("<41 />").2p({16:"60%",15:"60%",2o:40,"3z-2n":"3y"}).3x({\'2m\':\'/?b=3w&2l=1k\',\'2k\':\'0\',\'2j\':\'2i\'}).2f($14);$14.2h(6(){$(3v).3u();g().2g()});$14.2f($(\'#1j\'))}g().13(0);}6 3t(){h 9=7.1b(2e);2d.2c(9);c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==2e){2d.2c(\'!!=\'+i);7.1p(i)}}}}7.j(\'3s\',6(){g().1h("/2a/3r.29","3q 10 28",6(){g().13(g().27()+10)},"2b");$("q[26=2b]").23().21(\'.3-20-1z\');g().1h("/2a/3p.29","3o 10 28",6(){h 12=g().27()-10;c(12<0)12=0;g().13(12)},"24");$("q[26=24]").23().21(\'.3-20-1z\');});6 1i(){}7.j(\'3n\',6(){1i()});7.j(\'3m\',6(){1i()});7.j("k",6(y){h 9=7.1b();c(9.n<2)19;$(\'.3-8-3l-3k\').3j(6(){$(\'#3-8-a-k\').1e(\'3-8-a-z\');$(\'.3-a-k\').p(\'o-1f\',\'11\')});7.1h("/3i/3h.3g","3f 3e",6(){$(\'.3-1w\').3d(\'3-8-1v\');$(\'.3-8-1y, .3-8-1x\').p(\'o-1g\',\'11\');c($(\'.3-1w\').3c(\'3-8-1v\')){$(\'.3-a-k\').p(\'o-1g\',\'l\');$(\'.3-a-k\').p(\'o-1f\',\'l\');$(\'.3-8-a\').1e(\'3-8-a-z\');$(\'.3-8-a:1u\').3b(\'3-8-a-z\')}3a{$(\'.3-a-k\').p(\'o-1g\',\'11\');$(\'.3-a-k\').p(\'o-1f\',\'11\');$(\'.3-8-a:1u\').1e(\'3-8-a-z\')}},"39");7.j("38",6(y){1d.37(\'1c\',y.9[y.36].1a)});c(1d.1t(\'1c\')){35("1s(1d.1t(\'1c\'));",34)}});h 18;6 1s(1q){h 9=7.1b();c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==1q){c(i==18){19}18=i;7.1p(i)}}}}',36,270,'|||jw|||function|player|settings|tracks|submenu||if||||jwplayer|var||on|audioTracks|true|3D|length|aria|attr|div|100|||sx|filemoon|https||event|active||false|tt|seek|dd|height|width|adb|current_audio|return|name|getAudioTracks|default_audio|localStorage|removeClass|expanded|checked|addButton|callMeMaybe|vplayer|0fxcyc2ajhp1|position|vvplay|vvad|220|setCurrentAudioTrack|audio_name|for|audio_set|getItem|last|open|controls|playbackRates|captions|rewind|icon|insertAfter||detach|ff00||button|getPosition|sec|png|player8|ff11|log|console|track_name|appendTo|play|click|no|scrolling|frameborder|file_code|src|top|zIndex|css|showCCform|data|1662367683|383371|dl|video_ad|doPlay|prevt|mp4|3E||jpg|thumbs|file|300|setTimeout|currentTrack|setItem|audioTrackChanged|dualSound|else|addClass|hasClass|toggleClass|Track|Audio|svg|dualy|images|mousedown|buttons|topbar|playAttemptFailed|beforePlay|Rewind|fr|Forward|ff|ready|set_audio_track|remove|this|upload_srt|prop|50px|margin|1000001|iframe|center|align|text|rgba|background|1000000|left|absolute|pause|setCurrentCaptions|Upload|contains|item|content|html|fviews|referer|prem|embed|3e57249ef633e0d03bf76ceb8d8a4b65|216|83|hash|view|get|TokenZir|window|hide|show|complete|slow|fadeIn|video_ad_fadein|time||cache|Cache|Content|headers|ajaxSetup|v2done|tott|vastdone2|vastdone1|vvbefore|playbackRateControls|cast|aboutlink|FileMoon|abouttext|UHD|1870|qualityLabels|sites|GNOME_POWER|link|2Fiframe|3C|allowfullscreen|22360|22640|22no|marginheight|marginwidth|2FGNOME_POWER|2F0fxcyc2ajhp1|2Fe|2Ffilemoon|2F|3A||22https|3Ciframe|code|sharing|fontOpacity|backgroundOpacity|Tahoma|fontFamily|303030|backgroundColor|FFFFFF|color|userFontScale|thumbnails|kind|0fxcyc2ajhp10000|url|get_slides|start|startparam|none|preload|html5|primary|hlshtml|androidhls|duration|uniform|stretching|0fxcyc2ajhp1_xt|image|2048|sp|6871|asn|127|srv|43200|_g3XlBcu2lmD9oDexD2NLWSmah2Nu3XcDrl93m9PwXY|m3u8||master|0fxcyc2ajhp1_x|00076|01|hls2|to|s01|delivery|storage|moon|sources|setup'''.split('|'))) + def test_join(self): + test_input = list('test') + tests = [ + 'function f(a, b){return a.join(b)}', + 'function f(a, b){return Array.prototype.join.call(a, b)}', + 'function f(a, b){return Array.prototype.join.apply(a, [b])}', + ] + for test in tests: + jsi = JSInterpreter(test) + self._test(jsi, 'test', args=[test_input, '']) + self._test(jsi, 't-e-s-t', args=[test_input, '-']) + self._test(jsi, '', args=[[], '-']) + + def test_split(self): + test_result = list('test') + tests = [ + 'function f(a, b){return a.split(b)}', + 'function f(a, b){return String.prototype.split.call(a, b)}', + 'function f(a, b){return String.prototype.split.apply(a, [b])}', + ] + for test in tests: + jsi = JSInterpreter(test) + self._test(jsi, test_result, args=['test', '']) + self._test(jsi, test_result, args=['t-e-s-t', '-']) + self._test(jsi, [''], args=['', '-']) + self._test(jsi, [], args=['', '']) + if __name__ == '__main__': unittest.main() diff --git a/test/test_networking.py b/test/test_networking.py index af3ece3b44..983c89e2e3 100644 --- a/test/test_networking.py +++ b/test/test_networking.py @@ -914,7 +914,6 @@ class TestRequestsRequestHandler(TestRequestHandlerBase): class TestCurlCFFIRequestHandler(TestRequestHandlerBase): @pytest.mark.parametrize('params,extensions', [ - ({}, {'impersonate': ImpersonateTarget('chrome')}), ({'impersonate': ImpersonateTarget('chrome', '110')}, {}), ({'impersonate': ImpersonateTarget('chrome', '99')}, {'impersonate': ImpersonateTarget('chrome', '110')}), ]) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index b0f3269e1c..a14bef511e 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -167,6 +167,10 @@ _NSIG_TESTS = [ 'https://www.youtube.com/s/player/590f65a6/player_ias.vflset/en_US/base.js', '1tm7-g_A9zsI8_Lay_', 'xI4Vem4Put_rOg', ), + ( + 'https://www.youtube.com/s/player/b22ef6e7/player_ias.vflset/en_US/base.js', + 'b6HcntHGkvBLk_FRf', 'kNPW6A7FyP2l8A', + ), ] diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index b18684aea2..faee548bab 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2195,9 +2195,8 @@ class YoutubeDL: or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio })) - def _default_format_spec(self, info_dict, download=True): - download = download and not self.params.get('simulate') - prefer_best = download and ( + def _default_format_spec(self, info_dict): + prefer_best = ( self.params['outtmpl']['default'] == '-' or info_dict.get('is_live') and not self.params.get('live_from_start')) @@ -2205,7 +2204,7 @@ class YoutubeDL: merger = FFmpegMergerPP(self) return merger.available and merger.can_merge() - if not prefer_best and download and not can_merge(): + if not prefer_best and not can_merge(): prefer_best = True formats = self._get_formats(info_dict) evaluate_formats = lambda spec: self._select_formats(formats, self.build_format_selector(spec)) @@ -2964,7 +2963,7 @@ class YoutubeDL: continue if format_selector is None: - req_format = self._default_format_spec(info_dict, download=download) + req_format = self._default_format_spec(info_dict) self.write_debug(f'Default format spec: {req_format}') format_selector = self.build_format_selector(req_format) diff --git a/yt_dlp/extractor/box.py b/yt_dlp/extractor/box.py index 3547ad9973..f06339f701 100644 --- a/yt_dlp/extractor/box.py +++ b/yt_dlp/extractor/box.py @@ -12,7 +12,7 @@ from ..utils.traversal import traverse_obj class BoxIE(InfoExtractor): - _VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P[^/?#]+)(?:/file/(?P\d+))?' + _VALID_URL = r'https?://(?:[^.]+\.)?(?Papp|ent)\.box\.com/s/(?P[^/?#]+)(?:/file/(?P\d+))?' _TESTS = [{ 'url': 'https://mlssoccer.app.box.com/s/0evd2o3e08l60lr4ygukepvnkord1o1x/file/510727257538', 'md5': '1f81b2fd3960f38a40a3b8823e5fcd43', @@ -38,10 +38,22 @@ class BoxIE(InfoExtractor): 'uploader_id': '239068974', }, 'params': {'skip_download': 'dash fragment too small'}, + }, { + 'url': 'https://thejacksonlaboratory.ent.box.com/s/2x09dm6vcg6y28o0oox1so4l0t8wzt6l/file/1536173056065', + 'info_dict': { + 'id': '1536173056065', + 'ext': 'mp4', + 'uploader_id': '18523128264', + 'uploader': 'Lexi Hennigan', + 'title': 'iPSC Symposium recording part 1.mp4', + 'timestamp': 1716228343, + 'upload_date': '20240520', + }, + 'params': {'skip_download': 'dash fragment too small'}, }] def _real_extract(self, url): - shared_name, file_id = self._match_valid_url(url).groups() + shared_name, file_id, service = self._match_valid_url(url).group('shared_name', 'id', 'service') webpage = self._download_webpage(url, file_id or shared_name) if not file_id: @@ -57,14 +69,14 @@ class BoxIE(InfoExtractor): request_token = self._search_json( r'Box\.config\s*=', webpage, 'Box config', file_id)['requestToken'] access_token = self._download_json( - 'https://app.box.com/app-api/enduserapp/elements/tokens', file_id, + f'https://{service}.box.com/app-api/enduserapp/elements/tokens', file_id, 'Downloading token JSON metadata', data=json.dumps({'fileIDs': [file_id]}).encode(), headers={ 'Content-Type': 'application/json', 'X-Request-Token': request_token, 'X-Box-EndUser-API': 'sharedName=' + shared_name, })[file_id]['read'] - shared_link = 'https://app.box.com/s/' + shared_name + shared_link = f'https://{service}.box.com/s/{shared_name}' f = self._download_json( 'https://api.box.com/2.0/files/' + file_id, file_id, 'Downloading file JSON metadata', headers={ diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index 0c6f0b070a..afb512d90c 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -314,23 +314,11 @@ class SoundcloudBaseIE(InfoExtractor): self.write_debug(f'"{identifier}" is not a requested format, skipping') continue - stream = None - for retry in self.RetryManager(fatal=False): - try: - stream = self._call_api( - format_url, track_id, f'Downloading {identifier} format info JSON', - query=query, headers=self._HEADERS) - except ExtractorError as e: - if isinstance(e.cause, HTTPError) and e.cause.status == 429: - self.report_warning( - 'You have reached the API rate limit, which is ~600 requests per ' - '10 minutes. Use the --extractor-retries and --retry-sleep options ' - 'to configure an appropriate retry count and wait time', only_once=True) - retry.error = e.cause - else: - self.report_warning(e.msg) + # XXX: if not extract_flat, 429 error must be caught where _extract_info_dict is called + stream_url = traverse_obj(self._call_api( + format_url, track_id, f'Downloading {identifier} format info JSON', + query=query, headers=self._HEADERS), ('url', {url_or_none})) - stream_url = traverse_obj(stream, ('url', {url_or_none})) if invalid_url(stream_url): continue format_urls.add(stream_url) @@ -647,7 +635,17 @@ class SoundcloudIE(SoundcloudBaseIE): info = self._call_api( info_json_url, full_title, 'Downloading info JSON', query=query, headers=self._HEADERS) - return self._extract_info_dict(info, full_title, token) + for retry in self.RetryManager(): + try: + return self._extract_info_dict(info, full_title, token) + except ExtractorError as e: + if not isinstance(e.cause, HTTPError) or not e.cause.status == 429: + raise + self.report_warning( + 'You have reached the API rate limit, which is ~600 requests per ' + '10 minutes. Use the --extractor-retries and --retry-sleep options ' + 'to configure an appropriate retry count and wait time', only_once=True) + retry.error = e.cause class SoundcloudPlaylistBaseIE(SoundcloudBaseIE): diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index c3505b14fe..aa1dcecf62 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -1458,9 +1458,11 @@ class TikTokLiveIE(TikTokBaseIE): if webpage: data = self._get_sigi_state(webpage, uploader or room_id) - room_id = (traverse_obj(data, ('UserModule', 'users', ..., 'roomId', {str_or_none}), get_all=False) - or self._search_regex(r'snssdk\d*://live\?room_id=(\d+)', webpage, 'room ID', default=None) - or room_id) + room_id = ( + traverse_obj(data, (( + ('LiveRoom', 'liveRoomUserInfo', 'user'), + ('UserModule', 'users', ...)), 'roomId', {str}, any)) + or self._search_regex(r'snssdk\d*://live\?room_id=(\d+)', webpage, 'room ID', default=room_id)) uploader = uploader or traverse_obj( data, ('LiveRoom', 'liveRoomUserInfo', 'user', 'uniqueId'), ('UserModule', 'users', ..., 'uniqueId'), get_all=False, expected_type=str) diff --git a/yt_dlp/extractor/tv5mondeplus.py b/yt_dlp/extractor/tv5mondeplus.py index 52ff230f2a..15e9975c61 100644 --- a/yt_dlp/extractor/tv5mondeplus.py +++ b/yt_dlp/extractor/tv5mondeplus.py @@ -96,7 +96,7 @@ class TV5MondePlusIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) + webpage = self._download_webpage(url, display_id, impersonate=True) if ">Ce programme n'est malheureusement pas disponible pour votre zone géographique.<" in webpage: self.raise_geo_restricted(countries=['FR']) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index fecf854504..386b29b6eb 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3159,7 +3159,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _extract_n_function_name(self, jscode): funcname, idx = self._search_regex( - r'\.get\("n"\)\)&&\(b=(?P[a-zA-Z0-9$]+)(?:\[(?P\d+)\])?\([a-zA-Z0-9]\)', + r'''(?x)(?:\.get\("n"\)\)&&\(b=|b=String\.fromCharCode\(110\),c=a\.get\(b\)\)&&\(c=) + (?P[a-zA-Z0-9$]+)(?:\[(?P\d+)\])?\([a-zA-Z0-9]\)''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx')) if not idx: return funcname @@ -3170,7 +3171,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _extract_n_function_code(self, video_id, player_url): player_id = self._extract_player_info(player_url) - func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1') + func_code = self.cache.load('youtube-nsig', player_id, min_ver='2024.07.09') jscode = func_code or self._load_player(video_id, player_url) jsi = JSInterpreter(jscode) @@ -3179,17 +3180,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): func_name = self._extract_n_function_name(jscode) - # For redundancy - func_code = self._search_regex( - rf'''(?xs){func_name}\s*=\s*function\s*\((?P[\w$]+)\)\s* - # NB: The end of the regex is intentionally kept strict - {{(?P.+?}}\s*return\ [\w$]+.join\(""\))}};''', - jscode, 'nsig function', group=('var', 'code'), default=None) - if func_code: - func_code = ([func_code[0]], func_code[1]) - else: - self.write_debug('Extracting nsig function with jsinterp') - func_code = jsi.extract_function_code(func_name) + func_code = jsi.extract_function_code(func_name) self.cache.store('youtube-nsig', player_id, func_code) return jsi, player_id, func_code diff --git a/yt_dlp/jsinterp.py b/yt_dlp/jsinterp.py index a0f32892fd..851d4dc7bf 100644 --- a/yt_dlp/jsinterp.py +++ b/yt_dlp/jsinterp.py @@ -636,6 +636,8 @@ class JSInterpreter: raise self.Exception(f'{member} {msg}', expr) def eval_method(): + nonlocal member + if (variable, member) == ('console', 'debug'): if Debugger.ENABLED: Debugger.write(self.interpret_expression(f'[{arg_str}]', local_vars, allow_recursion)) @@ -644,6 +646,7 @@ class JSInterpreter: types = { 'String': str, 'Math': float, + 'Array': list, } obj = local_vars.get(variable, types.get(variable, NO_DEFAULT)) if obj is NO_DEFAULT: @@ -667,6 +670,21 @@ class JSInterpreter: self.interpret_expression(v, local_vars, allow_recursion) for v in self._separate(arg_str)] + # Fixup prototype call + if isinstance(obj, type) and member.startswith('prototype.'): + new_member, _, func_prototype = member.partition('.')[2].partition('.') + assertion(argvals, 'takes one or more arguments') + assertion(isinstance(argvals[0], obj), f'needs binding to type {obj}') + if func_prototype == 'call': + obj, *argvals = argvals + elif func_prototype == 'apply': + assertion(len(argvals) == 2, 'takes two arguments') + obj, argvals = argvals + assertion(isinstance(argvals, list), 'second argument needs to be a list') + else: + raise self.Exception(f'Unsupported Function method {func_prototype}', expr) + member = new_member + if obj is str: if member == 'fromCharCode': assertion(argvals, 'takes one or more arguments') diff --git a/yt_dlp/networking/_curlcffi.py b/yt_dlp/networking/_curlcffi.py index b1f0fb82e8..45b25cefb5 100644 --- a/yt_dlp/networking/_curlcffi.py +++ b/yt_dlp/networking/_curlcffi.py @@ -2,6 +2,7 @@ from __future__ import annotations import io import math +import re import urllib.parse from ._helper import InstanceStoreMixin, select_proxy @@ -27,11 +28,12 @@ from ..utils import int_or_none if curl_cffi is None: raise ImportError('curl_cffi is not installed') -curl_cffi_version = tuple(int_or_none(x, default=0) for x in curl_cffi.__version__.split('.')) -if curl_cffi_version != (0, 5, 10): +curl_cffi_version = tuple(map(int, re.split(r'[^\d]+', curl_cffi.__version__)[:3])) + +if curl_cffi_version != (0, 5, 10) and not ((0, 7, 0) <= curl_cffi_version < (0, 8, 0)): curl_cffi._yt_dlp__version = f'{curl_cffi.__version__} (unsupported)' - raise ImportError('Only curl_cffi 0.5.10 is supported') + raise ImportError('Only curl_cffi versions 0.5.10, 0.7.X are supported') import curl_cffi.requests from curl_cffi.const import CurlECode, CurlOpt @@ -110,6 +112,13 @@ class CurlCFFIRH(ImpersonateRequestHandler, InstanceStoreMixin): _SUPPORTED_FEATURES = (Features.NO_PROXY, Features.ALL_PROXY) _SUPPORTED_PROXY_SCHEMES = ('http', 'https', 'socks4', 'socks4a', 'socks5', 'socks5h') _SUPPORTED_IMPERSONATE_TARGET_MAP = { + **({ + ImpersonateTarget('chrome', '124', 'macos', '14'): curl_cffi.requests.BrowserType.chrome124, + ImpersonateTarget('chrome', '123', 'macos', '14'): curl_cffi.requests.BrowserType.chrome123, + ImpersonateTarget('chrome', '120', 'macos', '14'): curl_cffi.requests.BrowserType.chrome120, + ImpersonateTarget('chrome', '119', 'macos', '14'): curl_cffi.requests.BrowserType.chrome119, + ImpersonateTarget('chrome', '116', 'windows', '10'): curl_cffi.requests.BrowserType.chrome116, + } if curl_cffi_version >= (0, 7, 0) else {}), ImpersonateTarget('chrome', '110', 'windows', '10'): curl_cffi.requests.BrowserType.chrome110, ImpersonateTarget('chrome', '107', 'windows', '10'): curl_cffi.requests.BrowserType.chrome107, ImpersonateTarget('chrome', '104', 'windows', '10'): curl_cffi.requests.BrowserType.chrome104, @@ -118,9 +127,15 @@ class CurlCFFIRH(ImpersonateRequestHandler, InstanceStoreMixin): ImpersonateTarget('chrome', '99', 'windows', '10'): curl_cffi.requests.BrowserType.chrome99, ImpersonateTarget('edge', '101', 'windows', '10'): curl_cffi.requests.BrowserType.edge101, ImpersonateTarget('edge', '99', 'windows', '10'): curl_cffi.requests.BrowserType.edge99, + **({ + ImpersonateTarget('safari', '17.0', 'macos', '14'): curl_cffi.requests.BrowserType.safari17_0, + } if curl_cffi_version >= (0, 7, 0) else {}), ImpersonateTarget('safari', '15.5', 'macos', '12'): curl_cffi.requests.BrowserType.safari15_5, ImpersonateTarget('safari', '15.3', 'macos', '11'): curl_cffi.requests.BrowserType.safari15_3, ImpersonateTarget('chrome', '99', 'android', '12'): curl_cffi.requests.BrowserType.chrome99_android, + **({ + ImpersonateTarget('safari', '17.2', 'ios', '17.2'): curl_cffi.requests.BrowserType.safari17_2_ios, + } if curl_cffi_version >= (0, 7, 0) else {}), } def _create_instance(self, cookiejar=None): @@ -187,7 +202,7 @@ class CurlCFFIRH(ImpersonateRequestHandler, InstanceStoreMixin): timeout = self._calculate_timeout(request) # set CURLOPT_LOW_SPEED_LIMIT and CURLOPT_LOW_SPEED_TIME to act as a read timeout. [1] - # curl_cffi does not currently do this. [2] + # This is required only for 0.5.10 [2] # Note: CURLOPT_LOW_SPEED_TIME is in seconds, so we need to round up to the nearest second. [3] # [1] https://unix.stackexchange.com/a/305311 # [2] https://github.com/yifeikong/curl_cffi/issues/156 @@ -203,7 +218,7 @@ class CurlCFFIRH(ImpersonateRequestHandler, InstanceStoreMixin): data=request.data, verify=self.verify, max_redirects=5, - timeout=timeout, + timeout=(timeout, timeout), impersonate=self._SUPPORTED_IMPERSONATE_TARGET_MAP.get( self._get_request_target(request)), interface=self.source_address, @@ -222,7 +237,7 @@ class CurlCFFIRH(ImpersonateRequestHandler, InstanceStoreMixin): elif ( e.code == CurlECode.PROXY - or (e.code == CurlECode.RECV_ERROR and 'Received HTTP code 407 from proxy after CONNECT' in str(e)) + or (e.code == CurlECode.RECV_ERROR and 'CONNECT' in str(e)) ): raise ProxyError(cause=e) from e else: diff --git a/yt_dlp/version.py b/yt_dlp/version.py index 323b54c371..31de564c52 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,8 +1,8 @@ # Autogenerated by devscripts/update-version.py -__version__ = '2024.07.07' +__version__ = '2024.07.09' -RELEASE_GIT_HEAD = 'b337d2989ce0614651d363383f6f743d977248ef' +RELEASE_GIT_HEAD = '7ead7332af69422cee931aec3faa277288e9e212' VARIANT = None @@ -12,4 +12,4 @@ CHANNEL = 'stable' ORIGIN = 'yt-dlp/yt-dlp' -_pkg_version = '2024.07.07' +_pkg_version = '2024.07.09'