mirror of
https://github.com/yt-dlp/yt-dlp
synced 2025-01-15 03:41:33 +01:00
Ignore BOM in batch files (Fixes #2450)
This commit is contained in:
parent
f6acbdecf4
commit
62e609ab77
3 changed files with 34 additions and 7 deletions
|
@ -9,6 +9,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
|
||||||
# Various small unit tests
|
# Various small unit tests
|
||||||
|
import io
|
||||||
import xml.etree.ElementTree
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
#from youtube_dl.utils import htmlentity_transform
|
#from youtube_dl.utils import htmlentity_transform
|
||||||
|
@ -21,6 +22,7 @@ from youtube_dl.utils import (
|
||||||
orderedSet,
|
orderedSet,
|
||||||
PagedList,
|
PagedList,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
|
read_batch_urls,
|
||||||
sanitize_filename,
|
sanitize_filename,
|
||||||
shell_quote,
|
shell_quote,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
|
@ -250,5 +252,14 @@ class TestUtil(unittest.TestCase):
|
||||||
def test_struct_unpack(self):
|
def test_struct_unpack(self):
|
||||||
self.assertEqual(struct_unpack(u'!B', b'\x00'), (0,))
|
self.assertEqual(struct_unpack(u'!B', b'\x00'), (0,))
|
||||||
|
|
||||||
|
def test_read_batch_urls(self):
|
||||||
|
f = io.StringIO(u'''\xef\xbb\xbf foo
|
||||||
|
bar\r
|
||||||
|
baz
|
||||||
|
# More after this line\r
|
||||||
|
; or after this
|
||||||
|
bam''')
|
||||||
|
self.assertEqual(read_batch_urls(f), [u'foo', u'bar', u'baz', u'bam'])
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -71,6 +71,7 @@ from .utils import (
|
||||||
get_cachedir,
|
get_cachedir,
|
||||||
MaxDownloadsReached,
|
MaxDownloadsReached,
|
||||||
preferredencoding,
|
preferredencoding,
|
||||||
|
read_batch_urls,
|
||||||
SameFileError,
|
SameFileError,
|
||||||
setproctitle,
|
setproctitle,
|
||||||
std_headers,
|
std_headers,
|
||||||
|
@ -552,21 +553,19 @@ def _real_main(argv=None):
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
# Batch file verification
|
# Batch file verification
|
||||||
batchurls = []
|
batch_urls = []
|
||||||
if opts.batchfile is not None:
|
if opts.batchfile is not None:
|
||||||
try:
|
try:
|
||||||
if opts.batchfile == '-':
|
if opts.batchfile == '-':
|
||||||
batchfd = sys.stdin
|
batchfd = sys.stdin
|
||||||
else:
|
else:
|
||||||
batchfd = open(opts.batchfile, 'r')
|
batchfd = io.open(opts.batchfile, 'r', encoding='utf-8', errors='ignore')
|
||||||
batchurls = batchfd.readlines()
|
batch_urls = read_batch_urls(batchfd)
|
||||||
batchurls = [x.strip() for x in batchurls]
|
|
||||||
batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
|
|
||||||
if opts.verbose:
|
if opts.verbose:
|
||||||
write_string(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n')
|
write_string(u'[debug] Batch file urls: ' + repr(batch_urls) + u'\n')
|
||||||
except IOError:
|
except IOError:
|
||||||
sys.exit(u'ERROR: batch file could not be read')
|
sys.exit(u'ERROR: batch file could not be read')
|
||||||
all_urls = batchurls + args
|
all_urls = batch_urls + args
|
||||||
all_urls = [url.strip() for url in all_urls]
|
all_urls = [url.strip() for url in all_urls]
|
||||||
_enc = preferredencoding()
|
_enc = preferredencoding()
|
||||||
all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
|
all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import contextlib
|
||||||
import ctypes
|
import ctypes
|
||||||
import datetime
|
import datetime
|
||||||
import email.utils
|
import email.utils
|
||||||
|
@ -1245,3 +1246,19 @@ except TypeError:
|
||||||
else:
|
else:
|
||||||
struct_pack = struct.pack
|
struct_pack = struct.pack
|
||||||
struct_unpack = struct.unpack
|
struct_unpack = struct.unpack
|
||||||
|
|
||||||
|
|
||||||
|
def read_batch_urls(batch_fd):
|
||||||
|
def fixup(url):
|
||||||
|
if not isinstance(url, compat_str):
|
||||||
|
url = url.decode('utf-8', 'replace')
|
||||||
|
BOM_UTF8 = u'\xef\xbb\xbf'
|
||||||
|
if url.startswith(BOM_UTF8):
|
||||||
|
url = url[len(BOM_UTF8):]
|
||||||
|
url = url.strip()
|
||||||
|
if url.startswith(('#', ';', ']')):
|
||||||
|
return False
|
||||||
|
return url
|
||||||
|
|
||||||
|
with contextlib.closing(batch_fd) as fd:
|
||||||
|
return [url for url in map(fixup, fd) if url]
|
||||||
|
|
Loading…
Reference in a new issue