diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 12ec5b0d8c..63bc8713a8 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -468,7 +468,7 @@ jobs:
- name: Install Requirements
run: |
python devscripts/install_deps.py -o --include build
- python devscripts/install_deps.py
+ python devscripts/install_deps.py --include curl-cffi
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-6.7.0-py3-none-any.whl"
- name: Prepare
diff --git a/Changelog.md b/Changelog.md
index b5a829d94a..6d3033efc7 100644
--- a/Changelog.md
+++ b/Changelog.md
@@ -4,6 +4,23 @@
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
-->
+### 2024.07.09
+
+#### Core changes
+- [Do not alter default format selection when simulated](https://github.com/yt-dlp/yt-dlp/commit/0b570f2a90ce2363ba06089217514d644e7be2e0) ([#9862](https://github.com/yt-dlp/yt-dlp/issues/9862)) by [seproDev](https://github.com/seproDev)
+
+#### Extractor changes
+- **youtube**: [Remove broken `n` function extraction fallback](https://github.com/yt-dlp/yt-dlp/commit/7ead7332af69422cee931aec3faa277288e9e212) ([#10396](https://github.com/yt-dlp/yt-dlp/issues/10396)) by [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev)
+
+### 2024.07.08
+
+#### Core changes
+- **jsinterp**: [Implement `Function.prototype` resolving for `call` and `apply`](https://github.com/yt-dlp/yt-dlp/commit/6c056ea7aeb03660281653a9668547f2548f194f) ([#10392](https://github.com/yt-dlp/yt-dlp/issues/10392)) by [Grub4K](https://github.com/Grub4K)
+
+#### Extractor changes
+- **soundcloud**: [Fix rate-limit handling](https://github.com/yt-dlp/yt-dlp/commit/4b50b292cc98534fb8c7cdf0ae5cb85862f7ebfc) ([#10389](https://github.com/yt-dlp/yt-dlp/issues/10389)) by [bashonly](https://github.com/bashonly)
+- **youtube**: [Fix JS `n` function name extraction](https://github.com/yt-dlp/yt-dlp/commit/297b0a379282a15c80d82d51f3757c961db2dae1) ([#10390](https://github.com/yt-dlp/yt-dlp/issues/10390)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev)
+
### 2024.07.07
#### Important changes
diff --git a/README.md b/README.md
index 836e084e61..f13562ae9b 100644
--- a/README.md
+++ b/README.md
@@ -202,7 +202,7 @@ The following provide support for impersonating browser requests. This may be re
* [**curl_cffi**](https://github.com/yifeikong/curl_cffi) (recommended) - Python binding for [curl-impersonate](https://github.com/lwthiker/curl-impersonate). Provides impersonation targets for Chrome, Edge and Safari. Licensed under [MIT](https://github.com/yifeikong/curl_cffi/blob/main/LICENSE)
* Can be installed with the `curl-cffi` group, e.g. `pip install "yt-dlp[default,curl-cffi]"`
- * Currently only included in `yt-dlp.exe` and `yt-dlp_macos` builds
+ * Currently included in `yt-dlp.exe`, `yt-dlp_x86.exe`, `yt-dlp_linux` and `yt-dlp_macos` builds
### Metadata
@@ -2219,6 +2219,7 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu
* yt-dlp versions between 2021.11.10 and 2023.06.21 estimated `filesize_approx` values for fragmented/manifest formats. This was added for convenience in [f2fe69](https://github.com/yt-dlp/yt-dlp/commit/f2fe69c7b0d208bdb1f6292b4ae92bc1e1a7444a), but was reverted in [0dff8e](https://github.com/yt-dlp/yt-dlp/commit/0dff8e4d1e6e9fb938f4256ea9af7d81f42fd54f) due to the potentially extreme inaccuracy of the estimated values. Use `--compat-options manifest-filesize-approx` to keep extracting the estimated values
* yt-dlp uses modern http client backends such as `requests`. Use `--compat-options prefer-legacy-http-handler` to prefer the legacy http handler (`urllib`) to be used for standard http requests.
* The sub-modules `swfinterp`, `casefold` are removed.
+* Passing `--simulate` (or calling `extract_info` with `download=False`) no longer alters the default format selection. See [#9843](https://github.com/yt-dlp/yt-dlp/issues/9843) for details.
For ease of use, a few more compat options are available:
diff --git a/bundle/docker/static/entrypoint.sh b/bundle/docker/static/entrypoint.sh
index 93d84fa9b7..2202759742 100755
--- a/bundle/docker/static/entrypoint.sh
+++ b/bundle/docker/static/entrypoint.sh
@@ -2,7 +2,7 @@
set -e
source ~/.local/share/pipx/venvs/pyinstaller/bin/activate
-python -m devscripts.install_deps --include secretstorage
+python -m devscripts.install_deps --include secretstorage --include curl-cffi
python -m devscripts.make_lazy_extractors
python devscripts/update-version.py -c "${channel}" -r "${origin}" "${version}"
python -m bundle.pyinstaller
diff --git a/pyproject.toml b/pyproject.toml
index 39986a355c..4561abaf4d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -53,7 +53,9 @@ dependencies = [
[project.optional-dependencies]
default = []
-curl-cffi = ["curl-cffi==0.5.10; implementation_name=='cpython'"]
+curl-cffi = [
+ "curl-cffi>=0.5.10,!=0.6.*,<0.8; implementation_name=='cpython'",
+]
secretstorage = [
"cffi",
"secretstorage",
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index 841ce1af3e..1847c4ffd8 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -4,6 +4,7 @@
import os
import sys
import unittest
+from unittest.mock import patch
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -520,7 +521,33 @@ class TestFormatSelection(unittest.TestCase):
ydl.process_ie_result(info_dict)
self.assertEqual(ydl.downloaded_info_dicts, [])
- def test_default_format_spec(self):
+ @patch('yt_dlp.postprocessor.ffmpeg.FFmpegMergerPP.available', False)
+ def test_default_format_spec_without_ffmpeg(self):
+ ydl = YDL({})
+ self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio')
+
+ ydl = YDL({'simulate': True})
+ self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio')
+
+ ydl = YDL({})
+ self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio')
+
+ ydl = YDL({'simulate': True})
+ self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio')
+
+ ydl = YDL({'outtmpl': '-'})
+ self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio')
+
+ ydl = YDL({})
+ self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio')
+ self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio')
+
+ @patch('yt_dlp.postprocessor.ffmpeg.FFmpegMergerPP.available', True)
+ @patch('yt_dlp.postprocessor.ffmpeg.FFmpegMergerPP.can_merge', lambda _: True)
+ def test_default_format_spec_with_ffmpeg(self):
+ ydl = YDL({})
+ self.assertEqual(ydl._default_format_spec({}), 'bestvideo*+bestaudio/best')
+
ydl = YDL({'simulate': True})
self.assertEqual(ydl._default_format_spec({}), 'bestvideo*+bestaudio/best')
@@ -528,13 +555,13 @@ class TestFormatSelection(unittest.TestCase):
self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio')
ydl = YDL({'simulate': True})
- self.assertEqual(ydl._default_format_spec({'is_live': True}), 'bestvideo*+bestaudio/best')
+ self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio')
ydl = YDL({'outtmpl': '-'})
self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio')
ydl = YDL({})
- self.assertEqual(ydl._default_format_spec({}, download=False), 'bestvideo*+bestaudio/best')
+ self.assertEqual(ydl._default_format_spec({}), 'bestvideo*+bestaudio/best')
self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio')
diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index 7c556e4611..df92c8315b 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -376,6 +376,33 @@ class TestJSInterpreter(unittest.TestCase):
jsi = JSInterpreter('''function f(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}''')
self.assertEqual(jsi.call_function('f', '''h 7=g("1j");7.7h({7g:[{33:"w://7f-7e-7d-7c.v.7b/7a/79/78/77/76.74?t=73&s=2s&e=72&f=2t&71=70.0.0.1&6z=6y&6x=6w"}],6v:"w://32.v.u/6u.31",16:"r%",15:"r%",6t:"6s",6r:"",6q:"l",6p:"l",6o:"6n",6m:\'6l\',6k:"6j",9:[{33:"/2u?b=6i&n=50&6h=w://32.v.u/6g.31",6f:"6e"}],1y:{6d:1,6c:\'#6b\',6a:\'#69\',68:"67",66:30,65:r,},"64":{63:"%62 2m%m%61%5z%5y%5x.u%5w%5v%5u.2y%22 2k%m%1o%22 5t%m%1o%22 5s%m%1o%22 2j%m%5r%22 16%m%5q%22 15%m%5p%22 5o%2z%5n%5m%2z",5l:"w://v.u/d/1k/5k.2y",5j:[]},\'5i\':{"5h":"5g"},5f:"5e",5d:"w://v.u",5c:{},5b:l,1x:[0.25,0.50,0.75,1,1.25,1.5,2]});h 1m,1n,5a;h 59=0,58=0;h 7=g("1j");h 2x=0,57=0,56=0;$.55({54:{\'53-52\':\'2i-51\'}});7.j(\'4z\',6(x){c(5>0&&x.1l>=5&&1n!=1){1n=1;$(\'q.4y\').4x(\'4w\')}});7.j(\'13\',6(x){2x=x.1l});7.j(\'2g\',6(x){2w(x)});7.j(\'4v\',6(){$(\'q.2v\').4u()});6 2w(x){$(\'q.2v\').4t();c(1m)19;1m=1;17=0;c(4s.4r===l){17=1}$.4q(\'/2u?b=4p&2l=1k&4o=2t-4n-4m-2s-4l&4k=&4j=&4i=&17=\'+17,6(2r){$(\'#4h\').4g(2r)});$(\'.3-8-4f-4e:4d("4c")\').2h(6(e){2q();g().4b(0);g().4a(l)});6 2q(){h $14=$("").2p({1l:"49",16:"r%",15:"r%",48:0,2n:0,2o:47,46:"45(10%, 10%, 10%, 0.4)","44-43":"42"});$("<41 />").2p({16:"60%",15:"60%",2o:40,"3z-2n":"3y"}).3x({\'2m\':\'/?b=3w&2l=1k\',\'2k\':\'0\',\'2j\':\'2i\'}).2f($14);$14.2h(6(){$(3v).3u();g().2g()});$14.2f($(\'#1j\'))}g().13(0);}6 3t(){h 9=7.1b(2e);2d.2c(9);c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==2e){2d.2c(\'!!=\'+i);7.1p(i)}}}}7.j(\'3s\',6(){g().1h("/2a/3r.29","3q 10 28",6(){g().13(g().27()+10)},"2b");$("q[26=2b]").23().21(\'.3-20-1z\');g().1h("/2a/3p.29","3o 10 28",6(){h 12=g().27()-10;c(12<0)12=0;g().13(12)},"24");$("q[26=24]").23().21(\'.3-20-1z\');});6 1i(){}7.j(\'3n\',6(){1i()});7.j(\'3m\',6(){1i()});7.j("k",6(y){h 9=7.1b();c(9.n<2)19;$(\'.3-8-3l-3k\').3j(6(){$(\'#3-8-a-k\').1e(\'3-8-a-z\');$(\'.3-a-k\').p(\'o-1f\',\'11\')});7.1h("/3i/3h.3g","3f 3e",6(){$(\'.3-1w\').3d(\'3-8-1v\');$(\'.3-8-1y, .3-8-1x\').p(\'o-1g\',\'11\');c($(\'.3-1w\').3c(\'3-8-1v\')){$(\'.3-a-k\').p(\'o-1g\',\'l\');$(\'.3-a-k\').p(\'o-1f\',\'l\');$(\'.3-8-a\').1e(\'3-8-a-z\');$(\'.3-8-a:1u\').3b(\'3-8-a-z\')}3a{$(\'.3-a-k\').p(\'o-1g\',\'11\');$(\'.3-a-k\').p(\'o-1f\',\'11\');$(\'.3-8-a:1u\').1e(\'3-8-a-z\')}},"39");7.j("38",6(y){1d.37(\'1c\',y.9[y.36].1a)});c(1d.1t(\'1c\')){35("1s(1d.1t(\'1c\'));",34)}});h 18;6 1s(1q){h 9=7.1b();c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==1q){c(i==18){19}18=i;7.1p(i)}}}}',36,270,'|||jw|||function|player|settings|tracks|submenu||if||||jwplayer|var||on|audioTracks|true|3D|length|aria|attr|div|100|||sx|filemoon|https||event|active||false|tt|seek|dd|height|width|adb|current_audio|return|name|getAudioTracks|default_audio|localStorage|removeClass|expanded|checked|addButton|callMeMaybe|vplayer|0fxcyc2ajhp1|position|vvplay|vvad|220|setCurrentAudioTrack|audio_name|for|audio_set|getItem|last|open|controls|playbackRates|captions|rewind|icon|insertAfter||detach|ff00||button|getPosition|sec|png|player8|ff11|log|console|track_name|appendTo|play|click|no|scrolling|frameborder|file_code|src|top|zIndex|css|showCCform|data|1662367683|383371|dl|video_ad|doPlay|prevt|mp4|3E||jpg|thumbs|file|300|setTimeout|currentTrack|setItem|audioTrackChanged|dualSound|else|addClass|hasClass|toggleClass|Track|Audio|svg|dualy|images|mousedown|buttons|topbar|playAttemptFailed|beforePlay|Rewind|fr|Forward|ff|ready|set_audio_track|remove|this|upload_srt|prop|50px|margin|1000001|iframe|center|align|text|rgba|background|1000000|left|absolute|pause|setCurrentCaptions|Upload|contains|item|content|html|fviews|referer|prem|embed|3e57249ef633e0d03bf76ceb8d8a4b65|216|83|hash|view|get|TokenZir|window|hide|show|complete|slow|fadeIn|video_ad_fadein|time||cache|Cache|Content|headers|ajaxSetup|v2done|tott|vastdone2|vastdone1|vvbefore|playbackRateControls|cast|aboutlink|FileMoon|abouttext|UHD|1870|qualityLabels|sites|GNOME_POWER|link|2Fiframe|3C|allowfullscreen|22360|22640|22no|marginheight|marginwidth|2FGNOME_POWER|2F0fxcyc2ajhp1|2Fe|2Ffilemoon|2F|3A||22https|3Ciframe|code|sharing|fontOpacity|backgroundOpacity|Tahoma|fontFamily|303030|backgroundColor|FFFFFF|color|userFontScale|thumbnails|kind|0fxcyc2ajhp10000|url|get_slides|start|startparam|none|preload|html5|primary|hlshtml|androidhls|duration|uniform|stretching|0fxcyc2ajhp1_xt|image|2048|sp|6871|asn|127|srv|43200|_g3XlBcu2lmD9oDexD2NLWSmah2Nu3XcDrl93m9PwXY|m3u8||master|0fxcyc2ajhp1_x|00076|01|hls2|to|s01|delivery|storage|moon|sources|setup'''.split('|')))
+ def test_join(self):
+ test_input = list('test')
+ tests = [
+ 'function f(a, b){return a.join(b)}',
+ 'function f(a, b){return Array.prototype.join.call(a, b)}',
+ 'function f(a, b){return Array.prototype.join.apply(a, [b])}',
+ ]
+ for test in tests:
+ jsi = JSInterpreter(test)
+ self._test(jsi, 'test', args=[test_input, ''])
+ self._test(jsi, 't-e-s-t', args=[test_input, '-'])
+ self._test(jsi, '', args=[[], '-'])
+
+ def test_split(self):
+ test_result = list('test')
+ tests = [
+ 'function f(a, b){return a.split(b)}',
+ 'function f(a, b){return String.prototype.split.call(a, b)}',
+ 'function f(a, b){return String.prototype.split.apply(a, [b])}',
+ ]
+ for test in tests:
+ jsi = JSInterpreter(test)
+ self._test(jsi, test_result, args=['test', ''])
+ self._test(jsi, test_result, args=['t-e-s-t', '-'])
+ self._test(jsi, [''], args=['', '-'])
+ self._test(jsi, [], args=['', ''])
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_networking.py b/test/test_networking.py
index af3ece3b44..983c89e2e3 100644
--- a/test/test_networking.py
+++ b/test/test_networking.py
@@ -914,7 +914,6 @@ class TestRequestsRequestHandler(TestRequestHandlerBase):
class TestCurlCFFIRequestHandler(TestRequestHandlerBase):
@pytest.mark.parametrize('params,extensions', [
- ({}, {'impersonate': ImpersonateTarget('chrome')}),
({'impersonate': ImpersonateTarget('chrome', '110')}, {}),
({'impersonate': ImpersonateTarget('chrome', '99')}, {'impersonate': ImpersonateTarget('chrome', '110')}),
])
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index b0f3269e1c..a14bef511e 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -167,6 +167,10 @@ _NSIG_TESTS = [
'https://www.youtube.com/s/player/590f65a6/player_ias.vflset/en_US/base.js',
'1tm7-g_A9zsI8_Lay_', 'xI4Vem4Put_rOg',
),
+ (
+ 'https://www.youtube.com/s/player/b22ef6e7/player_ias.vflset/en_US/base.js',
+ 'b6HcntHGkvBLk_FRf', 'kNPW6A7FyP2l8A',
+ ),
]
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index b18684aea2..faee548bab 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -2195,9 +2195,8 @@ class YoutubeDL:
or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio
}))
- def _default_format_spec(self, info_dict, download=True):
- download = download and not self.params.get('simulate')
- prefer_best = download and (
+ def _default_format_spec(self, info_dict):
+ prefer_best = (
self.params['outtmpl']['default'] == '-'
or info_dict.get('is_live') and not self.params.get('live_from_start'))
@@ -2205,7 +2204,7 @@ class YoutubeDL:
merger = FFmpegMergerPP(self)
return merger.available and merger.can_merge()
- if not prefer_best and download and not can_merge():
+ if not prefer_best and not can_merge():
prefer_best = True
formats = self._get_formats(info_dict)
evaluate_formats = lambda spec: self._select_formats(formats, self.build_format_selector(spec))
@@ -2964,7 +2963,7 @@ class YoutubeDL:
continue
if format_selector is None:
- req_format = self._default_format_spec(info_dict, download=download)
+ req_format = self._default_format_spec(info_dict)
self.write_debug(f'Default format spec: {req_format}')
format_selector = self.build_format_selector(req_format)
diff --git a/yt_dlp/extractor/box.py b/yt_dlp/extractor/box.py
index 3547ad9973..f06339f701 100644
--- a/yt_dlp/extractor/box.py
+++ b/yt_dlp/extractor/box.py
@@ -12,7 +12,7 @@ from ..utils.traversal import traverse_obj
class BoxIE(InfoExtractor):
- _VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P[^/?#]+)(?:/file/(?P\d+))?'
+ _VALID_URL = r'https?://(?:[^.]+\.)?(?Papp|ent)\.box\.com/s/(?P[^/?#]+)(?:/file/(?P\d+))?'
_TESTS = [{
'url': 'https://mlssoccer.app.box.com/s/0evd2o3e08l60lr4ygukepvnkord1o1x/file/510727257538',
'md5': '1f81b2fd3960f38a40a3b8823e5fcd43',
@@ -38,10 +38,22 @@ class BoxIE(InfoExtractor):
'uploader_id': '239068974',
},
'params': {'skip_download': 'dash fragment too small'},
+ }, {
+ 'url': 'https://thejacksonlaboratory.ent.box.com/s/2x09dm6vcg6y28o0oox1so4l0t8wzt6l/file/1536173056065',
+ 'info_dict': {
+ 'id': '1536173056065',
+ 'ext': 'mp4',
+ 'uploader_id': '18523128264',
+ 'uploader': 'Lexi Hennigan',
+ 'title': 'iPSC Symposium recording part 1.mp4',
+ 'timestamp': 1716228343,
+ 'upload_date': '20240520',
+ },
+ 'params': {'skip_download': 'dash fragment too small'},
}]
def _real_extract(self, url):
- shared_name, file_id = self._match_valid_url(url).groups()
+ shared_name, file_id, service = self._match_valid_url(url).group('shared_name', 'id', 'service')
webpage = self._download_webpage(url, file_id or shared_name)
if not file_id:
@@ -57,14 +69,14 @@ class BoxIE(InfoExtractor):
request_token = self._search_json(
r'Box\.config\s*=', webpage, 'Box config', file_id)['requestToken']
access_token = self._download_json(
- 'https://app.box.com/app-api/enduserapp/elements/tokens', file_id,
+ f'https://{service}.box.com/app-api/enduserapp/elements/tokens', file_id,
'Downloading token JSON metadata',
data=json.dumps({'fileIDs': [file_id]}).encode(), headers={
'Content-Type': 'application/json',
'X-Request-Token': request_token,
'X-Box-EndUser-API': 'sharedName=' + shared_name,
})[file_id]['read']
- shared_link = 'https://app.box.com/s/' + shared_name
+ shared_link = f'https://{service}.box.com/s/{shared_name}'
f = self._download_json(
'https://api.box.com/2.0/files/' + file_id, file_id,
'Downloading file JSON metadata', headers={
diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py
index 0c6f0b070a..afb512d90c 100644
--- a/yt_dlp/extractor/soundcloud.py
+++ b/yt_dlp/extractor/soundcloud.py
@@ -314,23 +314,11 @@ class SoundcloudBaseIE(InfoExtractor):
self.write_debug(f'"{identifier}" is not a requested format, skipping')
continue
- stream = None
- for retry in self.RetryManager(fatal=False):
- try:
- stream = self._call_api(
- format_url, track_id, f'Downloading {identifier} format info JSON',
- query=query, headers=self._HEADERS)
- except ExtractorError as e:
- if isinstance(e.cause, HTTPError) and e.cause.status == 429:
- self.report_warning(
- 'You have reached the API rate limit, which is ~600 requests per '
- '10 minutes. Use the --extractor-retries and --retry-sleep options '
- 'to configure an appropriate retry count and wait time', only_once=True)
- retry.error = e.cause
- else:
- self.report_warning(e.msg)
+ # XXX: if not extract_flat, 429 error must be caught where _extract_info_dict is called
+ stream_url = traverse_obj(self._call_api(
+ format_url, track_id, f'Downloading {identifier} format info JSON',
+ query=query, headers=self._HEADERS), ('url', {url_or_none}))
- stream_url = traverse_obj(stream, ('url', {url_or_none}))
if invalid_url(stream_url):
continue
format_urls.add(stream_url)
@@ -647,7 +635,17 @@ class SoundcloudIE(SoundcloudBaseIE):
info = self._call_api(
info_json_url, full_title, 'Downloading info JSON', query=query, headers=self._HEADERS)
- return self._extract_info_dict(info, full_title, token)
+ for retry in self.RetryManager():
+ try:
+ return self._extract_info_dict(info, full_title, token)
+ except ExtractorError as e:
+ if not isinstance(e.cause, HTTPError) or not e.cause.status == 429:
+ raise
+ self.report_warning(
+ 'You have reached the API rate limit, which is ~600 requests per '
+ '10 minutes. Use the --extractor-retries and --retry-sleep options '
+ 'to configure an appropriate retry count and wait time', only_once=True)
+ retry.error = e.cause
class SoundcloudPlaylistBaseIE(SoundcloudBaseIE):
diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py
index c3505b14fe..aa1dcecf62 100644
--- a/yt_dlp/extractor/tiktok.py
+++ b/yt_dlp/extractor/tiktok.py
@@ -1458,9 +1458,11 @@ class TikTokLiveIE(TikTokBaseIE):
if webpage:
data = self._get_sigi_state(webpage, uploader or room_id)
- room_id = (traverse_obj(data, ('UserModule', 'users', ..., 'roomId', {str_or_none}), get_all=False)
- or self._search_regex(r'snssdk\d*://live\?room_id=(\d+)', webpage, 'room ID', default=None)
- or room_id)
+ room_id = (
+ traverse_obj(data, ((
+ ('LiveRoom', 'liveRoomUserInfo', 'user'),
+ ('UserModule', 'users', ...)), 'roomId', {str}, any))
+ or self._search_regex(r'snssdk\d*://live\?room_id=(\d+)', webpage, 'room ID', default=room_id))
uploader = uploader or traverse_obj(
data, ('LiveRoom', 'liveRoomUserInfo', 'user', 'uniqueId'),
('UserModule', 'users', ..., 'uniqueId'), get_all=False, expected_type=str)
diff --git a/yt_dlp/extractor/tv5mondeplus.py b/yt_dlp/extractor/tv5mondeplus.py
index 52ff230f2a..15e9975c61 100644
--- a/yt_dlp/extractor/tv5mondeplus.py
+++ b/yt_dlp/extractor/tv5mondeplus.py
@@ -96,7 +96,7 @@ class TV5MondePlusIE(InfoExtractor):
def _real_extract(self, url):
display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
+ webpage = self._download_webpage(url, display_id, impersonate=True)
if ">Ce programme n'est malheureusement pas disponible pour votre zone géographique.<" in webpage:
self.raise_geo_restricted(countries=['FR'])
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index fecf854504..386b29b6eb 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -3159,7 +3159,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_n_function_name(self, jscode):
funcname, idx = self._search_regex(
- r'\.get\("n"\)\)&&\(b=(?P[a-zA-Z0-9$]+)(?:\[(?P\d+)\])?\([a-zA-Z0-9]\)',
+ r'''(?x)(?:\.get\("n"\)\)&&\(b=|b=String\.fromCharCode\(110\),c=a\.get\(b\)\)&&\(c=)
+ (?P[a-zA-Z0-9$]+)(?:\[(?P\d+)\])?\([a-zA-Z0-9]\)''',
jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
if not idx:
return funcname
@@ -3170,7 +3171,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_n_function_code(self, video_id, player_url):
player_id = self._extract_player_info(player_url)
- func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')
+ func_code = self.cache.load('youtube-nsig', player_id, min_ver='2024.07.09')
jscode = func_code or self._load_player(video_id, player_url)
jsi = JSInterpreter(jscode)
@@ -3179,17 +3180,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
func_name = self._extract_n_function_name(jscode)
- # For redundancy
- func_code = self._search_regex(
- rf'''(?xs){func_name}\s*=\s*function\s*\((?P[\w$]+)\)\s*
- # NB: The end of the regex is intentionally kept strict
- {{(?P.+?}}\s*return\ [\w$]+.join\(""\))}};''',
- jscode, 'nsig function', group=('var', 'code'), default=None)
- if func_code:
- func_code = ([func_code[0]], func_code[1])
- else:
- self.write_debug('Extracting nsig function with jsinterp')
- func_code = jsi.extract_function_code(func_name)
+ func_code = jsi.extract_function_code(func_name)
self.cache.store('youtube-nsig', player_id, func_code)
return jsi, player_id, func_code
diff --git a/yt_dlp/jsinterp.py b/yt_dlp/jsinterp.py
index a0f32892fd..851d4dc7bf 100644
--- a/yt_dlp/jsinterp.py
+++ b/yt_dlp/jsinterp.py
@@ -636,6 +636,8 @@ class JSInterpreter:
raise self.Exception(f'{member} {msg}', expr)
def eval_method():
+ nonlocal member
+
if (variable, member) == ('console', 'debug'):
if Debugger.ENABLED:
Debugger.write(self.interpret_expression(f'[{arg_str}]', local_vars, allow_recursion))
@@ -644,6 +646,7 @@ class JSInterpreter:
types = {
'String': str,
'Math': float,
+ 'Array': list,
}
obj = local_vars.get(variable, types.get(variable, NO_DEFAULT))
if obj is NO_DEFAULT:
@@ -667,6 +670,21 @@ class JSInterpreter:
self.interpret_expression(v, local_vars, allow_recursion)
for v in self._separate(arg_str)]
+ # Fixup prototype call
+ if isinstance(obj, type) and member.startswith('prototype.'):
+ new_member, _, func_prototype = member.partition('.')[2].partition('.')
+ assertion(argvals, 'takes one or more arguments')
+ assertion(isinstance(argvals[0], obj), f'needs binding to type {obj}')
+ if func_prototype == 'call':
+ obj, *argvals = argvals
+ elif func_prototype == 'apply':
+ assertion(len(argvals) == 2, 'takes two arguments')
+ obj, argvals = argvals
+ assertion(isinstance(argvals, list), 'second argument needs to be a list')
+ else:
+ raise self.Exception(f'Unsupported Function method {func_prototype}', expr)
+ member = new_member
+
if obj is str:
if member == 'fromCharCode':
assertion(argvals, 'takes one or more arguments')
diff --git a/yt_dlp/networking/_curlcffi.py b/yt_dlp/networking/_curlcffi.py
index b1f0fb82e8..45b25cefb5 100644
--- a/yt_dlp/networking/_curlcffi.py
+++ b/yt_dlp/networking/_curlcffi.py
@@ -2,6 +2,7 @@ from __future__ import annotations
import io
import math
+import re
import urllib.parse
from ._helper import InstanceStoreMixin, select_proxy
@@ -27,11 +28,12 @@ from ..utils import int_or_none
if curl_cffi is None:
raise ImportError('curl_cffi is not installed')
-curl_cffi_version = tuple(int_or_none(x, default=0) for x in curl_cffi.__version__.split('.'))
-if curl_cffi_version != (0, 5, 10):
+curl_cffi_version = tuple(map(int, re.split(r'[^\d]+', curl_cffi.__version__)[:3]))
+
+if curl_cffi_version != (0, 5, 10) and not ((0, 7, 0) <= curl_cffi_version < (0, 8, 0)):
curl_cffi._yt_dlp__version = f'{curl_cffi.__version__} (unsupported)'
- raise ImportError('Only curl_cffi 0.5.10 is supported')
+ raise ImportError('Only curl_cffi versions 0.5.10, 0.7.X are supported')
import curl_cffi.requests
from curl_cffi.const import CurlECode, CurlOpt
@@ -110,6 +112,13 @@ class CurlCFFIRH(ImpersonateRequestHandler, InstanceStoreMixin):
_SUPPORTED_FEATURES = (Features.NO_PROXY, Features.ALL_PROXY)
_SUPPORTED_PROXY_SCHEMES = ('http', 'https', 'socks4', 'socks4a', 'socks5', 'socks5h')
_SUPPORTED_IMPERSONATE_TARGET_MAP = {
+ **({
+ ImpersonateTarget('chrome', '124', 'macos', '14'): curl_cffi.requests.BrowserType.chrome124,
+ ImpersonateTarget('chrome', '123', 'macos', '14'): curl_cffi.requests.BrowserType.chrome123,
+ ImpersonateTarget('chrome', '120', 'macos', '14'): curl_cffi.requests.BrowserType.chrome120,
+ ImpersonateTarget('chrome', '119', 'macos', '14'): curl_cffi.requests.BrowserType.chrome119,
+ ImpersonateTarget('chrome', '116', 'windows', '10'): curl_cffi.requests.BrowserType.chrome116,
+ } if curl_cffi_version >= (0, 7, 0) else {}),
ImpersonateTarget('chrome', '110', 'windows', '10'): curl_cffi.requests.BrowserType.chrome110,
ImpersonateTarget('chrome', '107', 'windows', '10'): curl_cffi.requests.BrowserType.chrome107,
ImpersonateTarget('chrome', '104', 'windows', '10'): curl_cffi.requests.BrowserType.chrome104,
@@ -118,9 +127,15 @@ class CurlCFFIRH(ImpersonateRequestHandler, InstanceStoreMixin):
ImpersonateTarget('chrome', '99', 'windows', '10'): curl_cffi.requests.BrowserType.chrome99,
ImpersonateTarget('edge', '101', 'windows', '10'): curl_cffi.requests.BrowserType.edge101,
ImpersonateTarget('edge', '99', 'windows', '10'): curl_cffi.requests.BrowserType.edge99,
+ **({
+ ImpersonateTarget('safari', '17.0', 'macos', '14'): curl_cffi.requests.BrowserType.safari17_0,
+ } if curl_cffi_version >= (0, 7, 0) else {}),
ImpersonateTarget('safari', '15.5', 'macos', '12'): curl_cffi.requests.BrowserType.safari15_5,
ImpersonateTarget('safari', '15.3', 'macos', '11'): curl_cffi.requests.BrowserType.safari15_3,
ImpersonateTarget('chrome', '99', 'android', '12'): curl_cffi.requests.BrowserType.chrome99_android,
+ **({
+ ImpersonateTarget('safari', '17.2', 'ios', '17.2'): curl_cffi.requests.BrowserType.safari17_2_ios,
+ } if curl_cffi_version >= (0, 7, 0) else {}),
}
def _create_instance(self, cookiejar=None):
@@ -187,7 +202,7 @@ class CurlCFFIRH(ImpersonateRequestHandler, InstanceStoreMixin):
timeout = self._calculate_timeout(request)
# set CURLOPT_LOW_SPEED_LIMIT and CURLOPT_LOW_SPEED_TIME to act as a read timeout. [1]
- # curl_cffi does not currently do this. [2]
+ # This is required only for 0.5.10 [2]
# Note: CURLOPT_LOW_SPEED_TIME is in seconds, so we need to round up to the nearest second. [3]
# [1] https://unix.stackexchange.com/a/305311
# [2] https://github.com/yifeikong/curl_cffi/issues/156
@@ -203,7 +218,7 @@ class CurlCFFIRH(ImpersonateRequestHandler, InstanceStoreMixin):
data=request.data,
verify=self.verify,
max_redirects=5,
- timeout=timeout,
+ timeout=(timeout, timeout),
impersonate=self._SUPPORTED_IMPERSONATE_TARGET_MAP.get(
self._get_request_target(request)),
interface=self.source_address,
@@ -222,7 +237,7 @@ class CurlCFFIRH(ImpersonateRequestHandler, InstanceStoreMixin):
elif (
e.code == CurlECode.PROXY
- or (e.code == CurlECode.RECV_ERROR and 'Received HTTP code 407 from proxy after CONNECT' in str(e))
+ or (e.code == CurlECode.RECV_ERROR and 'CONNECT' in str(e))
):
raise ProxyError(cause=e) from e
else:
diff --git a/yt_dlp/version.py b/yt_dlp/version.py
index 323b54c371..31de564c52 100644
--- a/yt_dlp/version.py
+++ b/yt_dlp/version.py
@@ -1,8 +1,8 @@
# Autogenerated by devscripts/update-version.py
-__version__ = '2024.07.07'
+__version__ = '2024.07.09'
-RELEASE_GIT_HEAD = 'b337d2989ce0614651d363383f6f743d977248ef'
+RELEASE_GIT_HEAD = '7ead7332af69422cee931aec3faa277288e9e212'
VARIANT = None
@@ -12,4 +12,4 @@ CHANNEL = 'stable'
ORIGIN = 'yt-dlp/yt-dlp'
-_pkg_version = '2024.07.07'
+_pkg_version = '2024.07.09'