[cleanup] Misc fixes

Closes #3565, https://github.com/yt-dlp/yt-dlp/issues/3514#issuecomment-1105944364
This commit is contained in:
pukkandan 2022-04-29 07:18:36 +05:30
parent 0a41f331cc
commit 1d485a1a79
No known key found for this signature in database
GPG key ID: 7EEE9E1E817D0A39
19 changed files with 75 additions and 42 deletions

View file

@ -7,7 +7,7 @@ class LazyLoadMetaClass(type):
def __getattr__(cls, name):
if '_real_class' not in cls.__dict__:
write_string(
f'WARNING: Falling back to normal extractor since lazy extractor '
'WARNING: Falling back to normal extractor since lazy extractor '
f'{cls.__name__} does not have attribute {name}{bug_reports_message()}')
return getattr(cls._get_real_class(), name)

View file

@ -62,6 +62,7 @@ from .utils import (
DEFAULT_OUTTMPL,
LINK_TEMPLATES,
NO_DEFAULT,
NUMBER_RE,
OUTTMPL_TYPES,
POSTPROCESS_WHEN,
STR_FORMAT_RE_TMPL,
@ -1049,7 +1050,7 @@ class YoutubeDL:
formatSeconds(info_dict['duration'], '-' if sanitize else ':')
if info_dict.get('duration', None) is not None
else None)
info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
info_dict['video_autonumber'] = self._num_videos
if info_dict.get('resolution') is None:
info_dict['resolution'] = self.format_resolution(info_dict, default=None)
@ -1071,18 +1072,18 @@ class YoutubeDL:
# Field is of the form key1.key2...
# where keys (except first) can be string, int or slice
FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
INTERNAL_FORMAT_RE = re.compile(r'''(?x)
INTERNAL_FORMAT_RE = re.compile(rf'''(?x)
(?P<negate>-)?
(?P<fields>{field})
(?P<maths>(?:{math_op}{math_field})*)
(?P<fields>{FIELD_RE})
(?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
(?:>(?P<strf_format>.+?))?
(?P<remaining>
(?P<alternate>(?<!\\),[^|&)]+)?
(?:&(?P<replacement>.*?))?
(?:\|(?P<default>.*?))?
)$'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
)$''')
def _traverse_infodict(k):
k = k.split('.')
@ -2336,7 +2337,7 @@ class YoutubeDL:
video_id=info_dict['id'], ie=info_dict['extractor'])
elif not info_dict.get('title'):
self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}'
info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
if info_dict.get('duration') is not None:
info_dict['duration_string'] = formatSeconds(info_dict['duration'])
@ -3669,10 +3670,11 @@ class YoutubeDL:
) or 'none'
write_debug('exe versions: %s' % exe_str)
from .compat.compat_utils import get_package_info
from .dependencies import available_dependencies
write_debug('Optional libraries: %s' % (', '.join(sorted({
module.__name__.split('.')[0] for module in available_dependencies.values()
join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
})) or 'none'))
self._setup_opener()

View file

@ -46,10 +46,6 @@ def compat_ord(c):
return c if isinstance(c, int) else ord(c)
def compat_setenv(key, value, env=os.environ):
env[key] = value
if compat_os_name == 'nt' and sys.version_info < (3, 8):
# os.path.realpath on Windows does not follow symbolic links
# prior to Python 3.8 (see https://bugs.python.org/issue9949)

View file

@ -44,4 +44,9 @@ compat_urllib_parse_urlparse = urllib.parse.urlparse
compat_urllib_request = urllib.request
compat_urlparse = compat_urllib_parse = urllib.parse
def compat_setenv(key, value, env=os.environ):
env[key] = value
__all__ = [x for x in globals() if x.startswith('compat_')]

View file

@ -1,5 +1,4 @@
# flake8: noqa: F405
from asyncio import * # noqa: F403
from .compat_utils import passthrough_module

View file

@ -1,9 +1,28 @@
import collections
import contextlib
import importlib
import sys
import types
_NO_ATTRIBUTE = object()
_Package = collections.namedtuple('Package', ('name', 'version'))
def get_package_info(module):
parent = module.__name__.split('.')[0]
parent_module = None
with contextlib.suppress(ImportError):
parent_module = importlib.import_module(parent)
for attr in ('__version__', 'version_string', 'version'):
version = getattr(parent_module, attr, None)
if version is not None:
break
return _Package(getattr(module, '_yt_dlp__identifier', parent), str(version))
def _is_package(module):
try:
module.__getattribute__('__path__')
@ -12,9 +31,6 @@ def _is_package(module):
return True
_NO_ATTRIBUTE = object()
def passthrough_module(parent, child, *, callback=lambda _: None):
parent_module = importlib.import_module(parent)
child_module = importlib.import_module(child, parent)

View file

@ -1,5 +1,4 @@
# flake8: noqa: F405
from re import * # F403
from .compat_utils import passthrough_module

View file

@ -1,4 +1,6 @@
# flake8: noqa: F401
"""Imports all optional dependencies for the project.
An attribute "_yt_dlp__identifier" may be inserted into the module if it uses an ambigious namespace"""
try:
import brotlicffi as brotli
@ -28,6 +30,15 @@ except ImportError:
from Crypto.Cipher import AES as Cryptodome_AES
except ImportError:
Cryptodome_AES = None
else:
try:
# In pycrypto, mode defaults to ECB. See:
# https://www.pycryptodome.org/en/latest/src/vs_pycrypto.html#:~:text=not%20have%20ECB%20as%20default%20mode
Cryptodome_AES.new(b'abcdefghijklmnop')
except TypeError:
pass
else:
Cryptodome_AES._yt_dlp__identifier = 'pycrypto'
try:

View file

@ -12,6 +12,7 @@ from ..minicurses import (
QuietMultilinePrinter,
)
from ..utils import (
NUMBER_RE,
LockingUnsupportedError,
Namespace,
decodeArgument,
@ -91,7 +92,8 @@ class FileDownloader:
'trouble',
'write_debug',
):
setattr(self, func, getattr(ydl, func))
if not hasattr(self, func):
setattr(self, func, getattr(ydl, func))
def to_screen(self, *args, **kargs):
self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs)
@ -170,7 +172,7 @@ class FileDownloader:
@staticmethod
def parse_bytes(bytestr):
"""Parse a string indicating a byte quantity into an integer."""
matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
matchobj = re.match(rf'(?i)^({NUMBER_RE})([kMGTPEZY]?)$', bytestr)
if matchobj is None:
return None
number = float(matchobj.group(1))

View file

@ -368,7 +368,7 @@ class FFmpegFD(ExternalFD):
# These exists only for compatibility. Extractors should use
# info_dict['downloader_options']['ffmpeg_args'] instead
args += info_dict.get('_ffmpeg_args')
args += info_dict.get('_ffmpeg_args') or []
seekable = info_dict.get('_seekable')
if seekable is not None:
# setting -seekable prevents ffmpeg from guessing if the server

View file

@ -1,3 +1,4 @@
import concurrent.futures
import contextlib
import http.client
import json
@ -5,12 +6,6 @@ import math
import os
import time
try:
import concurrent.futures
can_threaded_download = True
except ImportError:
can_threaded_download = False
from .common import FileDownloader
from .http import HttpFD
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
@ -28,6 +23,8 @@ class HttpQuietDownloader(HttpFD):
def to_screen(self, *args, **kargs):
pass
console_title = to_screen
def report_retry(self, err, count, retries):
super().to_screen(
f'[download] Got server HTTP error: {err}. Retrying (attempt {count} of {self.format_retries(retries)}) ...')
@ -501,8 +498,7 @@ class FragmentFD(FileDownloader):
max_workers = math.ceil(
self.params.get('concurrent_fragment_downloads', 1) / ctx.get('max_progress', 1))
if can_threaded_download and max_workers > 1:
if max_workers > 1:
def _download_fragment(fragment):
ctx_copy = ctx.copy()
download_fragment(fragment, ctx_copy)

View file

@ -173,7 +173,7 @@ body > figure > img {
mime_type = b'image/png'
if frag_content.startswith((b'GIF87a', b'GIF89a')):
mime_type = b'image/gif'
if frag_content.startswith(b'RIFF') and frag_content[8:12] == 'WEBP':
if frag_content.startswith(b'RIFF') and frag_content[8:12] == b'WEBP':
mime_type = b'image/webp'
frag_header = io.BytesIO()

View file

@ -1922,8 +1922,7 @@ class InfoExtractor:
def _sort_formats(self, formats, field_preference=[]):
if not formats:
return
format_sort = self.FormatSort(self, field_preference)
formats.sort(key=lambda f: format_sort.calculate_preference(f))
formats.sort(key=self.FormatSort(self, field_preference).calculate_preference)
def _check_formats(self, formats, video_id):
if formats:

View file

@ -17,7 +17,7 @@ class FujiTVFODPlus7IE(InfoExtractor):
'url': 'https://fod.fujitv.co.jp/title/5d40/5d40110076',
'info_dict': {
'id': '5d40110076',
'ext': 'mp4',
'ext': 'ts',
'title': '#1318 『まる子、まぼろしの洋館を見る』の巻',
'series': 'ちびまる子ちゃん',
'series_id': '5d40',
@ -28,7 +28,7 @@ class FujiTVFODPlus7IE(InfoExtractor):
'url': 'https://fod.fujitv.co.jp/title/5d40/5d40810083',
'info_dict': {
'id': '5d40810083',
'ext': 'mp4',
'ext': 'ts',
'title': '#1324 『まる子とオニの子』の巻『結成2月をムダにしない会』の巻',
'description': 'md5:3972d900b896adc8ab1849e310507efa',
'series': 'ちびまる子ちゃん',
@ -51,7 +51,7 @@ class FujiTVFODPlus7IE(InfoExtractor):
for src in src_json['video_selector']:
if not src.get('url'):
continue
fmt, subs = self._extract_m3u8_formats_and_subtitles(src['url'], video_id, 'mp4')
fmt, subs = self._extract_m3u8_formats_and_subtitles(src['url'], video_id, 'ts')
for f in fmt:
f.update(dict(zip(('height', 'width'),
self._BITRATE_MAP.get(f.get('tbr'), ()))))

View file

@ -242,6 +242,9 @@ class FunimationIE(FunimationBaseIE):
'language_preference': language_preference(lang.lower()),
})
formats.extend(current_formats)
if not formats and (requested_languages or requested_versions):
self.raise_no_formats(
'There are no video formats matching the requested languages/versions', expected=True, video_id=display_id)
self._remove_duplicate_formats(formats)
self._sort_formats(formats, ('lang', 'source'))

View file

@ -3107,7 +3107,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
except ExtractorError as e:
self.report_warning(
f'nsig extraction failed: You may experience throttling for some formats\n'
'nsig extraction failed: You may experience throttling for some formats\n'
f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)
throttled = True

View file

@ -79,9 +79,9 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
original_thumbnail = thumbnail_filename = info['thumbnails'][idx]['filepath']
thumbnail_ext = os.path.splitext(thumbnail_filename)[1][1:]
# Convert unsupported thumbnail formats (see #25687, #25717)
# PNG is preferred since JPEG is lossy
thumbnail_ext = os.path.splitext(thumbnail_filename)[1][1:]
if info['ext'] not in ('mkv', 'mka') and thumbnail_ext not in ('jpg', 'jpeg', 'png'):
thumbnail_filename = convertor.convert_thumbnail(thumbnail_filename, 'png')
thumbnail_ext = 'png'
@ -100,7 +100,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
elif info['ext'] in ['mkv', 'mka']:
options = list(self.stream_copy_opts())
mimetype = 'image/%s' % ('jpeg' if thumbnail_ext in ('jpg', 'jpeg') else thumbnail_ext)
mimetype = f'image/{thumbnail_ext.replace("jpg", "jpeg")}'
old_stream, new_stream = self.get_stream_number(
filename, ('tags', 'mimetype'), mimetype)
if old_stream is not None:

View file

@ -1,3 +1,5 @@
import os
from .common import PostProcessor
from ..compat import compat_os_name
from ..utils import (
@ -28,6 +30,7 @@ class XAttrMetadataPP(PostProcessor):
self.to_screen('Writing metadata to file\'s xattrs')
filename = info['filepath']
mtime = os.stat(filename).st_mtime
try:
xattr_mapping = {
@ -53,8 +56,6 @@ class XAttrMetadataPP(PostProcessor):
write_xattr(filename, xattrname, byte_value)
num_written += 1
return [], info
except XAttrUnavailableError as e:
raise PostProcessingError(str(e))
@ -73,4 +74,6 @@ class XAttrMetadataPP(PostProcessor):
else:
msg += '(You may have to enable them in your /etc/fstab)'
raise PostProcessingError(str(e))
return [], info
self.try_utime(filename, mtime, mtime)
return [], info

View file

@ -245,6 +245,8 @@ DATE_FORMATS_MONTH_FIRST.extend([
PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
NUMBER_RE = r'\d+(?:\.\d+)?'
def preferredencoding():
"""Get preferred encoding.
@ -3427,7 +3429,7 @@ def parse_dfxp_time_expr(time_expr):
if not time_expr:
return
mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
mobj = re.match(rf'^(?P<time_offset>{NUMBER_RE})s?$', time_expr)
if mobj:
return float(mobj.group('time_offset'))