mirror of
https://github.com/yt-dlp/yt-dlp
synced 2025-01-13 20:01:57 +01:00
Use 64 KiB buffered writes for performance and less fragmentation
I use yt-dlp on Windows writing to a Linux system via SMB over a 10GbE connection and downloading via 400 Mbps cable internet. I have observed that downloads often seem to start very fast (40+ MiB/sec) but then throttle down to 8-20 MiB/sec. I also observed a large amount of disk thrashing for such a large array and small amount of data that's supposedly being written sequentially. The problem is two-fold. Downloaded fragments are stored using a very short-lived *-FragX file, then immediately appended to the stream upon fragment completion, and deleted. Both operations use small write buffers. When the OS write buffers start to flush, the two sets of writes plus the large number of writes start to force competition to complete the queued writes in different areas of the volume. Python defaults to sending writes at the underlying device's "block size" or a fallback to io.DEFAULT_BUFFER_SIZE. In practical terms, this means a write buffer of 4096 or 8192 bytes. This commit increases most write buffers to 65536 (64 KiB) using the open() buffering=X option, significantly speeding up writes of larger chunks of data and reducing potential fragmentation in low disk space conditions. With these changes, I consistently see fast downloads and the array thrashing is noticeably lessened.
This commit is contained in:
parent
d298693b1b
commit
a9ac178eb1
3 changed files with 6 additions and 6 deletions
|
@ -3298,7 +3298,7 @@ class YoutubeDL:
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
self.to_screen('[info] Writing video annotations to: ' + annofn)
|
self.to_screen('[info] Writing video annotations to: ' + annofn)
|
||||||
with open(annofn, 'w', encoding='utf-8') as annofile:
|
with open(annofn, 'w', encoding='utf-8', buffering=65536) as annofile:
|
||||||
annofile.write(info_dict['annotations'])
|
annofile.write(info_dict['annotations'])
|
||||||
except (KeyError, TypeError):
|
except (KeyError, TypeError):
|
||||||
self.report_warning('There are no annotations to write.')
|
self.report_warning('There are no annotations to write.')
|
||||||
|
@ -4336,7 +4336,7 @@ class YoutubeDL:
|
||||||
try:
|
try:
|
||||||
# Use newline='' to prevent conversion of newline characters
|
# Use newline='' to prevent conversion of newline characters
|
||||||
# See https://github.com/ytdl-org/youtube-dl/issues/10268
|
# See https://github.com/ytdl-org/youtube-dl/issues/10268
|
||||||
with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
|
with open(sub_filename, 'w', encoding='utf-8', newline='', buffering=65536) as subfile:
|
||||||
subfile.write(sub_info['data'])
|
subfile.write(sub_info['data'])
|
||||||
sub_info['filepath'] = sub_filename
|
sub_info['filepath'] = sub_filename
|
||||||
ret.append((sub_filename, sub_filename_final))
|
ret.append((sub_filename, sub_filename_final))
|
||||||
|
@ -4399,7 +4399,7 @@ class YoutubeDL:
|
||||||
try:
|
try:
|
||||||
uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {})))
|
uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {})))
|
||||||
self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
|
self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
|
||||||
with open(thumb_filename, 'wb') as thumbf:
|
with open(thumb_filename, 'wb', buffering=65536) as thumbf:
|
||||||
shutil.copyfileobj(uf, thumbf)
|
shutil.copyfileobj(uf, thumbf)
|
||||||
ret.append((thumb_filename, thumb_filename_final))
|
ret.append((thumb_filename, thumb_filename_final))
|
||||||
t['filepath'] = thumb_filename
|
t['filepath'] = thumb_filename
|
||||||
|
|
|
@ -1064,7 +1064,7 @@ class InfoExtractor:
|
||||||
data = self._create_request(url_or_request, data).data
|
data = self._create_request(url_or_request, data).data
|
||||||
filename = self._request_dump_filename(urlh.url, video_id, data)
|
filename = self._request_dump_filename(urlh.url, video_id, data)
|
||||||
self.to_screen(f'Saving request to {filename}')
|
self.to_screen(f'Saving request to {filename}')
|
||||||
with open(filename, 'wb') as outf:
|
with open(filename, 'wb', buffering=65536) as outf:
|
||||||
outf.write(webpage_bytes)
|
outf.write(webpage_bytes)
|
||||||
|
|
||||||
content = self.__decode_webpage(webpage_bytes, encoding, urlh.headers)
|
content = self.__decode_webpage(webpage_bytes, encoding, urlh.headers)
|
||||||
|
|
|
@ -187,7 +187,7 @@ def write_json_file(obj, fn):
|
||||||
|
|
||||||
tf = tempfile.NamedTemporaryFile(
|
tf = tempfile.NamedTemporaryFile(
|
||||||
prefix=f'{os.path.basename(fn)}.', dir=os.path.dirname(fn),
|
prefix=f'{os.path.basename(fn)}.', dir=os.path.dirname(fn),
|
||||||
suffix='.tmp', delete=False, mode='w', encoding='utf-8')
|
suffix='.tmp', delete=False, mode='w', encoding='utf-8', buffering=65536)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with tf:
|
with tf:
|
||||||
|
@ -603,7 +603,7 @@ def sanitize_open(filename, open_mode):
|
||||||
raise LockingUnsupportedError
|
raise LockingUnsupportedError
|
||||||
stream = locked_file(filename, open_mode, block=False).__enter__()
|
stream = locked_file(filename, open_mode, block=False).__enter__()
|
||||||
except OSError:
|
except OSError:
|
||||||
stream = open(filename, open_mode)
|
stream = open(filename, open_mode, buffering=65536)
|
||||||
return stream, filename
|
return stream, filename
|
||||||
except OSError as err:
|
except OSError as err:
|
||||||
if attempt or err.errno in (errno.EACCES,):
|
if attempt or err.errno in (errno.EACCES,):
|
||||||
|
|
Loading…
Reference in a new issue