mirror of
https://github.com/yt-dlp/yt-dlp
synced 2025-01-15 03:41:33 +01:00
[core] Fix the byte string-format going over the specified byte limit
The byte string-format should be applied after the sanitization is done, as sanitize might replace a single byte character with a multi-byte one, e.g. '/' with '⧸', making the resulting string go over the desired byte limit. Fixes #10060
This commit is contained in:
parent
cc767e9490
commit
ec2ee10f34
2 changed files with 9 additions and 7 deletions
|
@ -656,13 +656,13 @@ class TestYoutubeDL(unittest.TestCase):
|
||||||
}
|
}
|
||||||
|
|
||||||
def test_prepare_outtmpl_and_filename(self):
|
def test_prepare_outtmpl_and_filename(self):
|
||||||
def test(tmpl, expected, *, info=None, **params):
|
def test(tmpl, expected, *, info=None, sanitize=False, **params):
|
||||||
params['outtmpl'] = tmpl
|
params['outtmpl'] = tmpl
|
||||||
ydl = FakeYDL(params)
|
ydl = FakeYDL(params)
|
||||||
ydl._num_downloads = 1
|
ydl._num_downloads = 1
|
||||||
self.assertEqual(ydl.validate_outtmpl(tmpl), None)
|
self.assertEqual(ydl.validate_outtmpl(tmpl), None)
|
||||||
|
|
||||||
out = ydl.evaluate_outtmpl(tmpl, info or self.outtmpl_info)
|
out = ydl.evaluate_outtmpl(tmpl, info or self.outtmpl_info, sanitize=sanitize)
|
||||||
fname = ydl.prepare_filename(info or self.outtmpl_info)
|
fname = ydl.prepare_filename(info or self.outtmpl_info)
|
||||||
|
|
||||||
if not isinstance(expected, (list, tuple)):
|
if not isinstance(expected, (list, tuple)):
|
||||||
|
@ -861,6 +861,7 @@ class TestYoutubeDL(unittest.TestCase):
|
||||||
test('Hello %(title2)s', 'Hello %PATH%')
|
test('Hello %(title2)s', 'Hello %PATH%')
|
||||||
test('%(title3)s', ('foo/bar\\test', 'foo⧸bar⧹test'))
|
test('%(title3)s', ('foo/bar\\test', 'foo⧸bar⧹test'))
|
||||||
test('folder/%(title3)s', ('folder/foo/bar\\test', f'folder{os.path.sep}foo⧸bar⧹test'))
|
test('folder/%(title3)s', ('folder/foo/bar\\test', f'folder{os.path.sep}foo⧸bar⧹test'))
|
||||||
|
test('%(title3).7B', 'foo⧸b', sanitize=True)
|
||||||
|
|
||||||
def test_format_note(self):
|
def test_format_note(self):
|
||||||
ydl = YoutubeDL()
|
ydl = YoutubeDL()
|
||||||
|
|
|
@ -1371,9 +1371,6 @@ class YoutubeDL:
|
||||||
elif fmt[-1] == 'q': # quoted
|
elif fmt[-1] == 'q': # quoted
|
||||||
value = map(str, variadic(value) if '#' in flags else [value])
|
value = map(str, variadic(value) if '#' in flags else [value])
|
||||||
value, fmt = shell_quote(value, shell=True), str_fmt
|
value, fmt = shell_quote(value, shell=True), str_fmt
|
||||||
elif fmt[-1] == 'B': # bytes
|
|
||||||
value = f'%{str_fmt}'.encode() % str(value).encode()
|
|
||||||
value, fmt = value.decode('utf-8', 'ignore'), 's'
|
|
||||||
elif fmt[-1] == 'U': # unicode normalized
|
elif fmt[-1] == 'U': # unicode normalized
|
||||||
value, fmt = unicodedata.normalize(
|
value, fmt = unicodedata.normalize(
|
||||||
# "+" = compatibility equivalence, "#" = NFD
|
# "+" = compatibility equivalence, "#" = NFD
|
||||||
|
@ -1390,7 +1387,7 @@ class YoutubeDL:
|
||||||
value = str(value)[0]
|
value = str(value)[0]
|
||||||
else:
|
else:
|
||||||
fmt = str_fmt
|
fmt = str_fmt
|
||||||
elif fmt[-1] not in 'rsa': # numeric
|
elif fmt[-1] not in 'rsaB': # numeric
|
||||||
value = float_or_none(value)
|
value = float_or_none(value)
|
||||||
if value is None:
|
if value is None:
|
||||||
value, fmt = default, 's'
|
value, fmt = default, 's'
|
||||||
|
@ -1402,9 +1399,13 @@ class YoutubeDL:
|
||||||
value, fmt = repr(value), str_fmt
|
value, fmt = repr(value), str_fmt
|
||||||
elif fmt[-1] == 'a':
|
elif fmt[-1] == 'a':
|
||||||
value, fmt = ascii(value), str_fmt
|
value, fmt = ascii(value), str_fmt
|
||||||
if fmt[-1] in 'csra':
|
if fmt[-1] in 'csraB':
|
||||||
value = sanitizer(last_field, value)
|
value = sanitizer(last_field, value)
|
||||||
|
|
||||||
|
if fmt[-1] == 'B': # bytes
|
||||||
|
value = f'%{str_fmt}'.encode() % str(value).encode()
|
||||||
|
value, fmt = value.decode('utf-8', 'ignore'), 's'
|
||||||
|
|
||||||
key = '{}\0{}'.format(key.replace('%', '%\0'), outer_mobj.group('format'))
|
key = '{}\0{}'.format(key.replace('%', '%\0'), outer_mobj.group('format'))
|
||||||
TMPL_DICT[key] = value
|
TMPL_DICT[key] = value
|
||||||
return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
|
return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
|
||||||
|
|
Loading…
Reference in a new issue