From ec2ee10f341c72bdfd10fa2d44e11c1dcda66d76 Mon Sep 17 00:00:00 2001 From: Maxim Biro Date: Thu, 30 May 2024 11:48:20 -0400 Subject: [PATCH] [core] Fix the byte string-format going over the specified byte limit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The byte string-format should be applied after the sanitization is done, as sanitize might replace a single byte character with a multi-byte one, e.g. '/' with '⧸', making the resulting string go over the desired byte limit. Fixes #10060 --- test/test_YoutubeDL.py | 5 +++-- yt_dlp/YoutubeDL.py | 11 ++++++----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 841ce1af3..2535c6d98 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -656,13 +656,13 @@ class TestYoutubeDL(unittest.TestCase): } def test_prepare_outtmpl_and_filename(self): - def test(tmpl, expected, *, info=None, **params): + def test(tmpl, expected, *, info=None, sanitize=False, **params): params['outtmpl'] = tmpl ydl = FakeYDL(params) ydl._num_downloads = 1 self.assertEqual(ydl.validate_outtmpl(tmpl), None) - out = ydl.evaluate_outtmpl(tmpl, info or self.outtmpl_info) + out = ydl.evaluate_outtmpl(tmpl, info or self.outtmpl_info, sanitize=sanitize) fname = ydl.prepare_filename(info or self.outtmpl_info) if not isinstance(expected, (list, tuple)): @@ -861,6 +861,7 @@ class TestYoutubeDL(unittest.TestCase): test('Hello %(title2)s', 'Hello %PATH%') test('%(title3)s', ('foo/bar\\test', 'foo⧸bar⧹test')) test('folder/%(title3)s', ('folder/foo/bar\\test', f'folder{os.path.sep}foo⧸bar⧹test')) + test('%(title3).7B', 'foo⧸b', sanitize=True) def test_format_note(self): ydl = YoutubeDL() diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index e56c3ed3c..deeaa3129 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1371,9 +1371,6 @@ class YoutubeDL: elif fmt[-1] == 'q': # quoted value = map(str, variadic(value) if '#' in flags else [value]) value, fmt = shell_quote(value, shell=True), str_fmt - elif fmt[-1] == 'B': # bytes - value = f'%{str_fmt}'.encode() % str(value).encode() - value, fmt = value.decode('utf-8', 'ignore'), 's' elif fmt[-1] == 'U': # unicode normalized value, fmt = unicodedata.normalize( # "+" = compatibility equivalence, "#" = NFD @@ -1390,7 +1387,7 @@ class YoutubeDL: value = str(value)[0] else: fmt = str_fmt - elif fmt[-1] not in 'rsa': # numeric + elif fmt[-1] not in 'rsaB': # numeric value = float_or_none(value) if value is None: value, fmt = default, 's' @@ -1402,9 +1399,13 @@ class YoutubeDL: value, fmt = repr(value), str_fmt elif fmt[-1] == 'a': value, fmt = ascii(value), str_fmt - if fmt[-1] in 'csra': + if fmt[-1] in 'csraB': value = sanitizer(last_field, value) + if fmt[-1] == 'B': # bytes + value = f'%{str_fmt}'.encode() % str(value).encode() + value, fmt = value.decode('utf-8', 'ignore'), 's' + key = '{}\0{}'.format(key.replace('%', '%\0'), outer_mobj.group('format')) TMPL_DICT[key] = value return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))