[core] Fix the byte string-format going over the specified byte limit

The byte string-format should be applied after the sanitization is done,
as sanitize might replace a single byte character with a multi-byte one,
e.g. '/' with '⧸', making the resulting string go over the desired byte
limit.

Fixes #10060
This commit is contained in:
Maxim Biro 2024-05-30 11:48:20 -04:00
parent cc767e9490
commit ec2ee10f34
No known key found for this signature in database
GPG key ID: AB3AD9896472BFA4
2 changed files with 9 additions and 7 deletions

View file

@ -656,13 +656,13 @@ class TestYoutubeDL(unittest.TestCase):
} }
def test_prepare_outtmpl_and_filename(self): def test_prepare_outtmpl_and_filename(self):
def test(tmpl, expected, *, info=None, **params): def test(tmpl, expected, *, info=None, sanitize=False, **params):
params['outtmpl'] = tmpl params['outtmpl'] = tmpl
ydl = FakeYDL(params) ydl = FakeYDL(params)
ydl._num_downloads = 1 ydl._num_downloads = 1
self.assertEqual(ydl.validate_outtmpl(tmpl), None) self.assertEqual(ydl.validate_outtmpl(tmpl), None)
out = ydl.evaluate_outtmpl(tmpl, info or self.outtmpl_info) out = ydl.evaluate_outtmpl(tmpl, info or self.outtmpl_info, sanitize=sanitize)
fname = ydl.prepare_filename(info or self.outtmpl_info) fname = ydl.prepare_filename(info or self.outtmpl_info)
if not isinstance(expected, (list, tuple)): if not isinstance(expected, (list, tuple)):
@ -861,6 +861,7 @@ class TestYoutubeDL(unittest.TestCase):
test('Hello %(title2)s', 'Hello %PATH%') test('Hello %(title2)s', 'Hello %PATH%')
test('%(title3)s', ('foo/bar\\test', 'foobartest')) test('%(title3)s', ('foo/bar\\test', 'foobartest'))
test('folder/%(title3)s', ('folder/foo/bar\\test', f'folder{os.path.sep}foobartest')) test('folder/%(title3)s', ('folder/foo/bar\\test', f'folder{os.path.sep}foobartest'))
test('%(title3).7B', 'foob', sanitize=True)
def test_format_note(self): def test_format_note(self):
ydl = YoutubeDL() ydl = YoutubeDL()

View file

@ -1371,9 +1371,6 @@ class YoutubeDL:
elif fmt[-1] == 'q': # quoted elif fmt[-1] == 'q': # quoted
value = map(str, variadic(value) if '#' in flags else [value]) value = map(str, variadic(value) if '#' in flags else [value])
value, fmt = shell_quote(value, shell=True), str_fmt value, fmt = shell_quote(value, shell=True), str_fmt
elif fmt[-1] == 'B': # bytes
value = f'%{str_fmt}'.encode() % str(value).encode()
value, fmt = value.decode('utf-8', 'ignore'), 's'
elif fmt[-1] == 'U': # unicode normalized elif fmt[-1] == 'U': # unicode normalized
value, fmt = unicodedata.normalize( value, fmt = unicodedata.normalize(
# "+" = compatibility equivalence, "#" = NFD # "+" = compatibility equivalence, "#" = NFD
@ -1390,7 +1387,7 @@ class YoutubeDL:
value = str(value)[0] value = str(value)[0]
else: else:
fmt = str_fmt fmt = str_fmt
elif fmt[-1] not in 'rsa': # numeric elif fmt[-1] not in 'rsaB': # numeric
value = float_or_none(value) value = float_or_none(value)
if value is None: if value is None:
value, fmt = default, 's' value, fmt = default, 's'
@ -1402,9 +1399,13 @@ class YoutubeDL:
value, fmt = repr(value), str_fmt value, fmt = repr(value), str_fmt
elif fmt[-1] == 'a': elif fmt[-1] == 'a':
value, fmt = ascii(value), str_fmt value, fmt = ascii(value), str_fmt
if fmt[-1] in 'csra': if fmt[-1] in 'csraB':
value = sanitizer(last_field, value) value = sanitizer(last_field, value)
if fmt[-1] == 'B': # bytes
value = f'%{str_fmt}'.encode() % str(value).encode()
value, fmt = value.decode('utf-8', 'ignore'), 's'
key = '{}\0{}'.format(key.replace('%', '%\0'), outer_mobj.group('format')) key = '{}\0{}'.format(key.replace('%', '%\0'), outer_mobj.group('format'))
TMPL_DICT[key] = value TMPL_DICT[key] = value
return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix')) return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))