mirror of
https://github.com/yt-dlp/yt-dlp
synced 2024-12-26 21:59:08 +01:00
Merge branch 'yt-dlp:master' into master
This commit is contained in:
commit
ba50c89113
27 changed files with 611 additions and 188 deletions
4
.github/workflows/build.yml
vendored
4
.github/workflows/build.yml
vendored
|
@ -240,7 +240,7 @@ jobs:
|
||||||
permissions:
|
permissions:
|
||||||
contents: read
|
contents: read
|
||||||
actions: write # For cleaning up cache
|
actions: write # For cleaning up cache
|
||||||
runs-on: macos-12
|
runs-on: macos-13
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
|
@ -346,7 +346,7 @@ jobs:
|
||||||
macos_legacy:
|
macos_legacy:
|
||||||
needs: process
|
needs: process
|
||||||
if: inputs.macos_legacy
|
if: inputs.macos_legacy
|
||||||
runs-on: macos-12
|
runs-on: macos-13
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
|
|
|
@ -673,3 +673,8 @@ rakslice
|
||||||
sahilsinghss73
|
sahilsinghss73
|
||||||
tony-hn
|
tony-hn
|
||||||
xingchensong
|
xingchensong
|
||||||
|
BallzCrasher
|
||||||
|
coreywright
|
||||||
|
eric321
|
||||||
|
poyhen
|
||||||
|
tetra-fox
|
||||||
|
|
22
Changelog.md
22
Changelog.md
|
@ -4,6 +4,28 @@
|
||||||
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
|
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
|
||||||
-->
|
-->
|
||||||
|
|
||||||
|
### 2024.10.07
|
||||||
|
|
||||||
|
#### Core changes
|
||||||
|
- **cookies**: [Fix cookie load error handling](https://github.com/yt-dlp/yt-dlp/commit/e59c82a74cda5139eb3928c75b0bd45484dbe7f0) ([#11140](https://github.com/yt-dlp/yt-dlp/issues/11140)) by [Grub4K](https://github.com/Grub4K)
|
||||||
|
|
||||||
|
#### Extractor changes
|
||||||
|
- **applepodcasts**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6328e2e67a4e126e08af382e6a387073082d5c5f) ([#10903](https://github.com/yt-dlp/yt-dlp/issues/10903)) by [coreywright](https://github.com/coreywright)
|
||||||
|
- **cwtv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/4b7bec66d8100978b82bb24110ed44e2a7749931) ([#11135](https://github.com/yt-dlp/yt-dlp/issues/11135)) by [kclauhk](https://github.com/kclauhk)
|
||||||
|
- **instagram**
|
||||||
|
- [Do not hardcode user-agent](https://github.com/yt-dlp/yt-dlp/commit/079a7bc334281d3c13d347770ae5f9f2b7da471a) ([#11155](https://github.com/yt-dlp/yt-dlp/issues/11155)) by [poyhen](https://github.com/poyhen)
|
||||||
|
- [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/cf85cba5d9496bd2689e1070005b4d1b4cd3dc6d) ([#11156](https://github.com/yt-dlp/yt-dlp/issues/11156)) by [tetra-fox](https://github.com/tetra-fox)
|
||||||
|
- **noodlemagazine**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/ccb23e1bac9768d1c70535beb744e668ed4a2720) ([#11144](https://github.com/yt-dlp/yt-dlp/issues/11144)) by [BallzCrasher](https://github.com/BallzCrasher)
|
||||||
|
- **patreon**: [Extract all m3u8 formats for locked posts](https://github.com/yt-dlp/yt-dlp/commit/f91645aceaf13926cf35be2c1dfef61b3aab97fb) ([#11138](https://github.com/yt-dlp/yt-dlp/issues/11138)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **youtube**: [Change default player clients to `ios,mweb`](https://github.com/yt-dlp/yt-dlp/commit/de2062753a188060d76f587e45becce61fe399f9) ([#11190](https://github.com/yt-dlp/yt-dlp/issues/11190)) by [seproDev](https://github.com/seproDev)
|
||||||
|
|
||||||
|
#### Postprocessor changes
|
||||||
|
- **xattrmetadata**: [Try to write each attribute](https://github.com/yt-dlp/yt-dlp/commit/3a193346eeb27ac2959ff30c370adb899ec94732) ([#11115](https://github.com/yt-dlp/yt-dlp/issues/11115)) by [eric321](https://github.com/eric321)
|
||||||
|
|
||||||
|
#### Misc. changes
|
||||||
|
- **ci**: [Rerun failed tests](https://github.com/yt-dlp/yt-dlp/commit/b31b81d85f00601710d4fac590c3e4efb4133283) ([#11143](https://github.com/yt-dlp/yt-dlp/issues/11143)) by [Grub4K](https://github.com/Grub4K)
|
||||||
|
- **cleanup**: Miscellaneous: [1a176d8](https://github.com/yt-dlp/yt-dlp/commit/1a176d874e6772cd898ce507379ea388e96ee3f7) by [bashonly](https://github.com/bashonly)
|
||||||
|
|
||||||
### 2024.09.27
|
### 2024.09.27
|
||||||
|
|
||||||
#### Important changes
|
#### Important changes
|
||||||
|
|
|
@ -278,7 +278,7 @@ py -m bundle.py2exe
|
||||||
* **`devscripts/update-version.py`** - Update the version number based on the current date.
|
* **`devscripts/update-version.py`** - Update the version number based on the current date.
|
||||||
* **`devscripts/set-variant.py`** - Set the build variant of the executable.
|
* **`devscripts/set-variant.py`** - Set the build variant of the executable.
|
||||||
* **`devscripts/make_changelog.py`** - Create a markdown changelog using short commit messages and update `CONTRIBUTORS` file.
|
* **`devscripts/make_changelog.py`** - Create a markdown changelog using short commit messages and update `CONTRIBUTORS` file.
|
||||||
* **`devscripts/make_lazy_extractors.py`** - Create lazy extractors. Running this before building the binaries (any variant) will improve their startup performance. Set the environment variable `YTDLP_NO_LAZY_EXTRACTORS=1` if you wish to forcefully disable lazy extractor loading.
|
* **`devscripts/make_lazy_extractors.py`** - Create lazy extractors. Running this before building the binaries (any variant) will improve their startup performance. Set the environment variable `YTDLP_NO_LAZY_EXTRACTORS` to something nonempty to forcefully disable lazy extractor loading.
|
||||||
|
|
||||||
Note: See their `--help` for more info.
|
Note: See their `--help` for more info.
|
||||||
|
|
||||||
|
@ -1771,7 +1771,7 @@ The following extractors use this feature:
|
||||||
#### youtube
|
#### youtube
|
||||||
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
|
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
|
||||||
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
|
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
|
||||||
* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music` and `_creator` (e.g. `ios_creator`); and `mediaconnect`, `mweb`, `android_producer`, `android_testsuite`, `android_vr`, `web_safari`, `web_embedded`, `tv` and `tv_embedded` with no variants. By default, `ios,web_creator` is used, and `tv_embedded`, `web_creator` and `mediaconnect` are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. Most `android` clients will be given lowest priority since their formats are broken. You can use `all` to use all the clients, and `default` for the default clients. You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=all,-web`
|
* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music` and `_creator` (e.g. `ios_creator`); and `mediaconnect`, `mweb`, `android_producer`, `android_testsuite`, `android_vr`, `web_safari`, `web_embedded`, `tv` and `tv_embedded` with no variants. By default, `ios,mweb` is used, and `tv_embedded`, `web_creator` and `mediaconnect` are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. Most `android` clients will be given lowest priority since their formats are broken. You can use `all` to use all the clients, and `default` for the default clients. You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=all,-web`
|
||||||
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
|
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
|
||||||
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
|
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
|
||||||
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
|
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
|
||||||
|
@ -1795,6 +1795,7 @@ The following extractors use this feature:
|
||||||
* `key_query`: Passthrough the master m3u8 URL query to its HLS AES-128 decryption key URI if no value is provided, or else apply the query string given as `key_query=VALUE`. Note that this will have no effect if the key URI is provided via the `hls_key` extractor-arg. Does not apply to ffmpeg
|
* `key_query`: Passthrough the master m3u8 URL query to its HLS AES-128 decryption key URI if no value is provided, or else apply the query string given as `key_query=VALUE`. Note that this will have no effect if the key URI is provided via the `hls_key` extractor-arg. Does not apply to ffmpeg
|
||||||
* `hls_key`: An HLS AES-128 key URI *or* key (as hex), and optionally the IV (as hex), in the form of `(URI|KEY)[,IV]`; e.g. `generic:hls_key=ABCDEF1234567980,0xFEDCBA0987654321`. Passing any of these values will force usage of the native HLS downloader and override the corresponding values found in the m3u8 playlist
|
* `hls_key`: An HLS AES-128 key URI *or* key (as hex), and optionally the IV (as hex), in the form of `(URI|KEY)[,IV]`; e.g. `generic:hls_key=ABCDEF1234567980,0xFEDCBA0987654321`. Passing any of these values will force usage of the native HLS downloader and override the corresponding values found in the m3u8 playlist
|
||||||
* `is_live`: Bypass live HLS detection and manually set `live_status` - a value of `false` will set `not_live`, any other value (or no value) will set `is_live`
|
* `is_live`: Bypass live HLS detection and manually set `live_status` - a value of `false` will set `not_live`, any other value (or no value) will set `is_live`
|
||||||
|
* `impersonate`: Target(s) to try and impersonate with the initial webpage request; e.g. `safari,chrome-110`. By default any available target will be used. Use `false` to disable impersonation
|
||||||
|
|
||||||
#### funimation
|
#### funimation
|
||||||
* `language`: Audio languages to extract, e.g. `funimation:language=english,japanese`
|
* `language`: Audio languages to extract, e.g. `funimation:language=english,japanese`
|
||||||
|
@ -1897,6 +1898,7 @@ In other words, the file structure on the disk looks something like:
|
||||||
myplugin.py
|
myplugin.py
|
||||||
|
|
||||||
yt-dlp looks for these `yt_dlp_plugins` namespace folders in many locations (see below) and loads in plugins from **all** of them.
|
yt-dlp looks for these `yt_dlp_plugins` namespace folders in many locations (see below) and loads in plugins from **all** of them.
|
||||||
|
Set the environment variable `YTDLP_NO_PLUGINS` to something nonempty to disable loading plugins entirely.
|
||||||
|
|
||||||
See the [wiki for some known plugins](https://github.com/yt-dlp/yt-dlp/wiki/Plugins)
|
See the [wiki for some known plugins](https://github.com/yt-dlp/yt-dlp/wiki/Plugins)
|
||||||
|
|
||||||
|
|
|
@ -190,5 +190,11 @@
|
||||||
"action": "add",
|
"action": "add",
|
||||||
"when": "fb8b7f226d251e521a89b23c415e249e5b788e5c",
|
"when": "fb8b7f226d251e521a89b23c415e249e5b788e5c",
|
||||||
"short": "[priority] **The minimum *recommended* Python version has been raised to 3.9**\nSince Python 3.8 will reach end-of-life in October 2024, support for it will be dropped soon. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10086)"
|
"short": "[priority] **The minimum *recommended* Python version has been raised to 3.9**\nSince Python 3.8 will reach end-of-life in October 2024, support for it will be dropped soon. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10086)"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action": "change",
|
||||||
|
"when": "b31b81d85f00601710d4fac590c3e4efb4133283",
|
||||||
|
"short": "[ci] Rerun failed tests (#11143)",
|
||||||
|
"authors": ["Grub4K"]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
|
@ -2,7 +2,6 @@
|
||||||
|
|
||||||
# Allow direct execution
|
# Allow direct execution
|
||||||
import os
|
import os
|
||||||
import shutil
|
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
@ -34,18 +33,14 @@ MODULE_TEMPLATE = read_file('devscripts/lazy_load_template.py')
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
os.environ['YTDLP_NO_PLUGINS'] = 'true'
|
||||||
|
os.environ['YTDLP_NO_LAZY_EXTRACTORS'] = 'true'
|
||||||
|
|
||||||
lazy_extractors_filename = get_filename_args(default_outfile='yt_dlp/extractor/lazy_extractors.py')
|
lazy_extractors_filename = get_filename_args(default_outfile='yt_dlp/extractor/lazy_extractors.py')
|
||||||
if os.path.exists(lazy_extractors_filename):
|
|
||||||
os.remove(lazy_extractors_filename)
|
|
||||||
|
|
||||||
_ALL_CLASSES = get_all_ies() # Must be before import
|
from yt_dlp.extractor.extractors import _ALL_CLASSES
|
||||||
|
|
||||||
import yt_dlp.plugins
|
|
||||||
from yt_dlp.extractor.common import InfoExtractor, SearchInfoExtractor
|
from yt_dlp.extractor.common import InfoExtractor, SearchInfoExtractor
|
||||||
|
|
||||||
# Filter out plugins
|
|
||||||
_ALL_CLASSES = [cls for cls in _ALL_CLASSES if not cls.__module__.startswith(f'{yt_dlp.plugins.PACKAGE_NAME}.')]
|
|
||||||
|
|
||||||
DummyInfoExtractor = type('InfoExtractor', (InfoExtractor,), {'IE_NAME': NO_ATTR})
|
DummyInfoExtractor = type('InfoExtractor', (InfoExtractor,), {'IE_NAME': NO_ATTR})
|
||||||
module_src = '\n'.join((
|
module_src = '\n'.join((
|
||||||
MODULE_TEMPLATE,
|
MODULE_TEMPLATE,
|
||||||
|
@ -58,20 +53,6 @@ def main():
|
||||||
write_file(lazy_extractors_filename, f'{module_src}\n')
|
write_file(lazy_extractors_filename, f'{module_src}\n')
|
||||||
|
|
||||||
|
|
||||||
def get_all_ies():
|
|
||||||
PLUGINS_DIRNAME = 'ytdlp_plugins'
|
|
||||||
BLOCKED_DIRNAME = f'{PLUGINS_DIRNAME}_blocked'
|
|
||||||
if os.path.exists(PLUGINS_DIRNAME):
|
|
||||||
# os.rename cannot be used, e.g. in Docker. See https://github.com/yt-dlp/yt-dlp/pull/4958
|
|
||||||
shutil.move(PLUGINS_DIRNAME, BLOCKED_DIRNAME)
|
|
||||||
try:
|
|
||||||
from yt_dlp.extractor.extractors import _ALL_CLASSES
|
|
||||||
finally:
|
|
||||||
if os.path.exists(BLOCKED_DIRNAME):
|
|
||||||
shutil.move(BLOCKED_DIRNAME, PLUGINS_DIRNAME)
|
|
||||||
return _ALL_CLASSES
|
|
||||||
|
|
||||||
|
|
||||||
def extra_ie_code(ie, base=None):
|
def extra_ie_code(ie, base=None):
|
||||||
for var in STATIC_CLASS_PROPERTIES:
|
for var in STATIC_CLASS_PROPERTIES:
|
||||||
val = getattr(ie, var)
|
val = getattr(ie, var)
|
||||||
|
|
|
@ -16,7 +16,7 @@ fix_test_name = functools.partial(re.compile(r'IE(_all|_\d+)?$').sub, r'\1')
|
||||||
def parse_args():
|
def parse_args():
|
||||||
parser = argparse.ArgumentParser(description='Run selected yt-dlp tests')
|
parser = argparse.ArgumentParser(description='Run selected yt-dlp tests')
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'test', help='a extractor tests, or one of "core" or "download"', nargs='*')
|
'test', help='an extractor test, test path, or one of "core" or "download"', nargs='*')
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'-k', help='run a test matching EXPRESSION. Same as "pytest -k"', metavar='EXPRESSION')
|
'-k', help='run a test matching EXPRESSION. Same as "pytest -k"', metavar='EXPRESSION')
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
|
@ -27,7 +27,6 @@ def parse_args():
|
||||||
def run_tests(*tests, pattern=None, ci=False):
|
def run_tests(*tests, pattern=None, ci=False):
|
||||||
run_core = 'core' in tests or (not pattern and not tests)
|
run_core = 'core' in tests or (not pattern and not tests)
|
||||||
run_download = 'download' in tests
|
run_download = 'download' in tests
|
||||||
tests = list(map(fix_test_name, tests))
|
|
||||||
|
|
||||||
pytest_args = args.pytest_args or os.getenv('HATCH_TEST_ARGS', '')
|
pytest_args = args.pytest_args or os.getenv('HATCH_TEST_ARGS', '')
|
||||||
arguments = ['pytest', '-Werror', '--tb=short', *shlex.split(pytest_args)]
|
arguments = ['pytest', '-Werror', '--tb=short', *shlex.split(pytest_args)]
|
||||||
|
@ -41,7 +40,9 @@ def run_tests(*tests, pattern=None, ci=False):
|
||||||
arguments.extend(['-m', 'download'])
|
arguments.extend(['-m', 'download'])
|
||||||
else:
|
else:
|
||||||
arguments.extend(
|
arguments.extend(
|
||||||
f'test/test_download.py::TestDownload::test_{test}' for test in tests)
|
test if '/' in test
|
||||||
|
else f'test/test_download.py::TestDownload::test_{fix_test_name(test)}'
|
||||||
|
for test in tests)
|
||||||
|
|
||||||
print(f'Running {arguments}', flush=True)
|
print(f'Running {arguments}', flush=True)
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -4,8 +4,18 @@ import xml.etree.ElementTree
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from yt_dlp.utils import dict_get, int_or_none, str_or_none
|
from yt_dlp.utils import (
|
||||||
from yt_dlp.utils.traversal import traverse_obj
|
ExtractorError,
|
||||||
|
determine_ext,
|
||||||
|
dict_get,
|
||||||
|
int_or_none,
|
||||||
|
str_or_none,
|
||||||
|
)
|
||||||
|
from yt_dlp.utils.traversal import (
|
||||||
|
traverse_obj,
|
||||||
|
require,
|
||||||
|
subs_list_to_dict,
|
||||||
|
)
|
||||||
|
|
||||||
_TEST_DATA = {
|
_TEST_DATA = {
|
||||||
100: 100,
|
100: 100,
|
||||||
|
@ -420,6 +430,71 @@ class TestTraversal:
|
||||||
assert traverse_obj(morsel, [(None,), any]) == morsel, \
|
assert traverse_obj(morsel, [(None,), any]) == morsel, \
|
||||||
'Morsel should not be implicitly changed to dict on usage'
|
'Morsel should not be implicitly changed to dict on usage'
|
||||||
|
|
||||||
|
def test_traversal_filter(self):
|
||||||
|
data = [None, False, True, 0, 1, 0.0, 1.1, '', 'str', {}, {0: 0}, [], [1]]
|
||||||
|
|
||||||
|
assert traverse_obj(data, [..., filter]) == [True, 1, 1.1, 'str', {0: 0}, [1]], \
|
||||||
|
'`filter` should filter falsy values'
|
||||||
|
|
||||||
|
|
||||||
|
class TestTraversalHelpers:
|
||||||
|
def test_traversal_require(self):
|
||||||
|
with pytest.raises(ExtractorError):
|
||||||
|
traverse_obj(_TEST_DATA, ['None', {require('value')}])
|
||||||
|
assert traverse_obj(_TEST_DATA, ['str', {require('value')}]) == 'str', \
|
||||||
|
'`require` should pass through non `None` values'
|
||||||
|
|
||||||
|
def test_subs_list_to_dict(self):
|
||||||
|
assert traverse_obj([
|
||||||
|
{'name': 'de', 'url': 'https://example.com/subs/de.vtt'},
|
||||||
|
{'name': 'en', 'url': 'https://example.com/subs/en1.ass'},
|
||||||
|
{'name': 'en', 'url': 'https://example.com/subs/en2.ass'},
|
||||||
|
], [..., {
|
||||||
|
'id': 'name',
|
||||||
|
'url': 'url',
|
||||||
|
}, all, {subs_list_to_dict}]) == {
|
||||||
|
'de': [{'url': 'https://example.com/subs/de.vtt'}],
|
||||||
|
'en': [
|
||||||
|
{'url': 'https://example.com/subs/en1.ass'},
|
||||||
|
{'url': 'https://example.com/subs/en2.ass'},
|
||||||
|
],
|
||||||
|
}, 'function should build subtitle dict from list of subtitles'
|
||||||
|
assert traverse_obj([
|
||||||
|
{'name': 'de', 'url': 'https://example.com/subs/de.ass'},
|
||||||
|
{'name': 'de'},
|
||||||
|
{'name': 'en', 'content': 'content'},
|
||||||
|
{'url': 'https://example.com/subs/en'},
|
||||||
|
], [..., {
|
||||||
|
'id': 'name',
|
||||||
|
'data': 'content',
|
||||||
|
'url': 'url',
|
||||||
|
}, all, {subs_list_to_dict}]) == {
|
||||||
|
'de': [{'url': 'https://example.com/subs/de.ass'}],
|
||||||
|
'en': [{'data': 'content'}],
|
||||||
|
}, 'subs with mandatory items missing should be filtered'
|
||||||
|
assert traverse_obj([
|
||||||
|
{'url': 'https://example.com/subs/de.ass', 'name': 'de'},
|
||||||
|
{'url': 'https://example.com/subs/en', 'name': 'en'},
|
||||||
|
], [..., {
|
||||||
|
'id': 'name',
|
||||||
|
'ext': ['url', {lambda x: determine_ext(x, default_ext=None)}],
|
||||||
|
'url': 'url',
|
||||||
|
}, all, {subs_list_to_dict(ext='ext')}]) == {
|
||||||
|
'de': [{'url': 'https://example.com/subs/de.ass', 'ext': 'ass'}],
|
||||||
|
'en': [{'url': 'https://example.com/subs/en', 'ext': 'ext'}],
|
||||||
|
}, '`ext` should set default ext but leave existing value untouched'
|
||||||
|
assert traverse_obj([
|
||||||
|
{'name': 'en', 'url': 'https://example.com/subs/en2', 'prio': True},
|
||||||
|
{'name': 'en', 'url': 'https://example.com/subs/en1', 'prio': False},
|
||||||
|
], [..., {
|
||||||
|
'id': 'name',
|
||||||
|
'quality': ['prio', {int}],
|
||||||
|
'url': 'url',
|
||||||
|
}, all, {subs_list_to_dict(ext='ext')}]) == {'en': [
|
||||||
|
{'url': 'https://example.com/subs/en1', 'ext': 'ext'},
|
||||||
|
{'url': 'https://example.com/subs/en2', 'ext': 'ext'},
|
||||||
|
]}, '`quality` key should sort subtitle list accordingly'
|
||||||
|
|
||||||
|
|
||||||
class TestDictGet:
|
class TestDictGet:
|
||||||
def test_dict_get(self):
|
def test_dict_get(self):
|
||||||
|
|
|
@ -221,9 +221,10 @@ class TestUtil(unittest.TestCase):
|
||||||
self.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI')
|
self.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI')
|
||||||
|
|
||||||
def test_sanitize_path(self):
|
def test_sanitize_path(self):
|
||||||
if sys.platform != 'win32':
|
with unittest.mock.patch('sys.platform', 'win32'):
|
||||||
return
|
self._test_sanitize_path()
|
||||||
|
|
||||||
|
def _test_sanitize_path(self):
|
||||||
self.assertEqual(sanitize_path('abc'), 'abc')
|
self.assertEqual(sanitize_path('abc'), 'abc')
|
||||||
self.assertEqual(sanitize_path('abc/def'), 'abc\\def')
|
self.assertEqual(sanitize_path('abc/def'), 'abc\\def')
|
||||||
self.assertEqual(sanitize_path('abc\\def'), 'abc\\def')
|
self.assertEqual(sanitize_path('abc\\def'), 'abc\\def')
|
||||||
|
@ -256,6 +257,11 @@ class TestUtil(unittest.TestCase):
|
||||||
self.assertEqual(sanitize_path('./abc'), 'abc')
|
self.assertEqual(sanitize_path('./abc'), 'abc')
|
||||||
self.assertEqual(sanitize_path('./../abc'), '..\\abc')
|
self.assertEqual(sanitize_path('./../abc'), '..\\abc')
|
||||||
|
|
||||||
|
self.assertEqual(sanitize_path('\\abc'), '\\abc')
|
||||||
|
self.assertEqual(sanitize_path('C:abc'), 'C:abc')
|
||||||
|
self.assertEqual(sanitize_path('C:abc\\..\\'), 'C:..')
|
||||||
|
self.assertEqual(sanitize_path('C:\\abc:%(title)s.%(ext)s'), 'C:\\abc#%(title)s.%(ext)s')
|
||||||
|
|
||||||
def test_sanitize_url(self):
|
def test_sanitize_url(self):
|
||||||
self.assertEqual(sanitize_url('//foo.bar'), 'http://foo.bar')
|
self.assertEqual(sanitize_url('//foo.bar'), 'http://foo.bar')
|
||||||
self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar')
|
self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar')
|
||||||
|
|
|
@ -4070,6 +4070,10 @@ class YoutubeDL:
|
||||||
|
|
||||||
write_debug(f'Proxy map: {self.proxies}')
|
write_debug(f'Proxy map: {self.proxies}')
|
||||||
write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
|
write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
|
||||||
|
if os.environ.get('YTDLP_NO_PLUGINS'):
|
||||||
|
write_debug('Plugins are forcibly disabled')
|
||||||
|
return
|
||||||
|
|
||||||
for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
|
for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
|
||||||
display_list = ['{}{}'.format(
|
display_list = ['{}{}'.format(
|
||||||
klass.__name__, '' if klass.__name__ == name else f' as {name}')
|
klass.__name__, '' if klass.__name__ == name else f' as {name}')
|
||||||
|
@ -4120,7 +4124,8 @@ class YoutubeDL:
|
||||||
self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self)
|
self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self)
|
||||||
except CookieLoadError as error:
|
except CookieLoadError as error:
|
||||||
cause = error.__context__
|
cause = error.__context__
|
||||||
self.report_error(str(cause), tb=''.join(traceback.format_exception(cause)))
|
# compat: <=py3.9: `traceback.format_exception` has a different signature
|
||||||
|
self.report_error(str(cause), tb=''.join(traceback.format_exception(None, cause, cause.__traceback__)))
|
||||||
raise
|
raise
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|
|
@ -1355,6 +1355,7 @@ MSO_INFO = {
|
||||||
class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
|
class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
|
||||||
_SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s'
|
_SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s'
|
||||||
_USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
|
_USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
|
||||||
|
_MODERN_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; rv:131.0) Gecko/20100101 Firefox/131.0'
|
||||||
_MVPD_CACHE = 'ap-mvpd'
|
_MVPD_CACHE = 'ap-mvpd'
|
||||||
|
|
||||||
_DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page'
|
_DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page'
|
||||||
|
@ -1454,7 +1455,11 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||||
'no_iframe': 'false',
|
'no_iframe': 'false',
|
||||||
'domain_name': 'adobe.com',
|
'domain_name': 'adobe.com',
|
||||||
'redirect_url': url,
|
'redirect_url': url,
|
||||||
})
|
}, headers={
|
||||||
|
# yt-dlp's default user-agent is usually too old for Comcast_SSO
|
||||||
|
# See: https://github.com/yt-dlp/yt-dlp/issues/10848
|
||||||
|
'User-Agent': self._MODERN_USER_AGENT,
|
||||||
|
} if mso_id == 'Comcast_SSO' else None)
|
||||||
elif not self._cookies_passed:
|
elif not self._cookies_passed:
|
||||||
raise_mvpd_required()
|
raise_mvpd_required()
|
||||||
|
|
||||||
|
|
|
@ -573,13 +573,13 @@ class InfoExtractor:
|
||||||
|
|
||||||
def _login_hint(self, method=NO_DEFAULT, netrc=None):
|
def _login_hint(self, method=NO_DEFAULT, netrc=None):
|
||||||
password_hint = f'--username and --password, --netrc-cmd, or --netrc ({netrc or self._NETRC_MACHINE}) to provide account credentials'
|
password_hint = f'--username and --password, --netrc-cmd, or --netrc ({netrc or self._NETRC_MACHINE}) to provide account credentials'
|
||||||
|
cookies_hint = 'See https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp for how to manually pass cookies'
|
||||||
return {
|
return {
|
||||||
None: '',
|
None: '',
|
||||||
'any': f'Use --cookies, --cookies-from-browser, {password_hint}',
|
'any': f'Use --cookies, --cookies-from-browser, {password_hint}. {cookies_hint}',
|
||||||
'password': f'Use {password_hint}',
|
'password': f'Use {password_hint}',
|
||||||
'cookies': (
|
'cookies': f'Use --cookies-from-browser or --cookies for the authentication. {cookies_hint}',
|
||||||
'Use --cookies-from-browser or --cookies for the authentication. '
|
'session_cookies': f'Use --cookies for the authentication (--cookies-from-browser might not work). {cookies_hint}',
|
||||||
'See https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp for how to manually pass cookies'),
|
|
||||||
}[method if method is not NO_DEFAULT else 'any' if self.supports_login() else 'cookies']
|
}[method if method is not NO_DEFAULT else 'any' if self.supports_login() else 'cookies']
|
||||||
|
|
||||||
def __init__(self, downloader=None):
|
def __init__(self, downloader=None):
|
||||||
|
|
|
@ -6,12 +6,37 @@ from ..utils import (
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
|
update_url_query,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class CWTVIE(InfoExtractor):
|
class CWTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?cw(?:tv(?:pr)?|seed)\.com/(?:shows/)?(?:[^/]+/)+[^?]*\?.*\b(?:play|watch)=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})'
|
_VALID_URL = r'https?://(?:www\.)?cw(?:tv(?:pr)?|seed)\.com/(?:shows/)?(?:[^/]+/)+[^?]*\?.*\b(?:play|watch)=(?P<id>[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12})'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
'url': 'https://www.cwtv.com/shows/all-american-homecoming/ready-or-not/?play=d848488f-f62a-40fd-af1f-6440b1821aab',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'd848488f-f62a-40fd-af1f-6440b1821aab',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Ready Or Not',
|
||||||
|
'description': 'Simone is concerned about changes taking place at Bringston; JR makes a decision about his future.',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||||
|
'duration': 2547,
|
||||||
|
'timestamp': 1720519200,
|
||||||
|
'uploader': 'CWTV',
|
||||||
|
'chapters': 'count:6',
|
||||||
|
'series': 'All American: Homecoming',
|
||||||
|
'season_number': 3,
|
||||||
|
'episode_number': 1,
|
||||||
|
'age_limit': 0,
|
||||||
|
'upload_date': '20240709',
|
||||||
|
'season': 'Season 3',
|
||||||
|
'episode': 'Episode 1',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?play=6b15e985-9345-4f60-baf8-56e96be57c63',
|
'url': 'http://cwtv.com/shows/arrow/legends-of-yesterday/?play=6b15e985-9345-4f60-baf8-56e96be57c63',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '6b15e985-9345-4f60-baf8-56e96be57c63',
|
'id': '6b15e985-9345-4f60-baf8-56e96be57c63',
|
||||||
|
@ -69,13 +94,14 @@ class CWTVIE(InfoExtractor):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
data = self._download_json(
|
data = self._download_json(
|
||||||
'http://images.cwtv.com/feed/mobileapp/video-meta/apiversion_8/guid_' + video_id,
|
f'https://images.cwtv.com/feed/mobileapp/video-meta/apiversion_12/guid_{video_id}', video_id)
|
||||||
video_id)
|
|
||||||
if data.get('result') != 'ok':
|
if data.get('result') != 'ok':
|
||||||
raise ExtractorError(data['msg'], expected=True)
|
raise ExtractorError(data['msg'], expected=True)
|
||||||
video_data = data['video']
|
video_data = data['video']
|
||||||
title = video_data['title']
|
title = video_data['title']
|
||||||
mpx_url = video_data.get('mpx_url') or f'http://link.theplatform.com/s/cwtv/media/guid/2703454149/{video_id}?formats=M3U'
|
mpx_url = update_url_query(
|
||||||
|
video_data.get('mpx_url') or f'https://link.theplatform.com/s/cwtv/media/guid/2703454149/{video_id}',
|
||||||
|
{'formats': 'M3U+none'})
|
||||||
|
|
||||||
season = str_or_none(video_data.get('season'))
|
season = str_or_none(video_data.get('season'))
|
||||||
episode = str_or_none(video_data.get('episode'))
|
episode = str_or_none(video_data.get('episode'))
|
||||||
|
|
|
@ -139,12 +139,11 @@ class DRTVIE(InfoExtractor):
|
||||||
return
|
return
|
||||||
|
|
||||||
token_response = self._download_json(
|
token_response = self._download_json(
|
||||||
'https://production.dr-massive.com/api/authorization/anonymous-sso', None,
|
'https://isl.dr-massive.com/api/authorization/anonymous-sso', None,
|
||||||
note='Downloading anonymous token', headers={
|
note='Downloading anonymous token', headers={
|
||||||
'content-type': 'application/json',
|
'content-type': 'application/json',
|
||||||
}, query={
|
}, query={
|
||||||
'device': 'web_browser',
|
'device': 'phone_android',
|
||||||
'ff': 'idp,ldp,rpt',
|
|
||||||
'lang': 'da',
|
'lang': 'da',
|
||||||
'supportFallbackToken': 'true',
|
'supportFallbackToken': 'true',
|
||||||
}, data=json.dumps({
|
}, data=json.dumps({
|
||||||
|
|
|
@ -8,6 +8,7 @@ from .common import InfoExtractor
|
||||||
from .commonprotocols import RtmpIE
|
from .commonprotocols import RtmpIE
|
||||||
from .youtube import YoutubeIE
|
from .youtube import YoutubeIE
|
||||||
from ..compat import compat_etree_fromstring
|
from ..compat import compat_etree_fromstring
|
||||||
|
from ..networking.impersonate import ImpersonateTarget
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
KNOWN_EXTENSIONS,
|
KNOWN_EXTENSIONS,
|
||||||
MEDIA_EXTENSIONS,
|
MEDIA_EXTENSIONS,
|
||||||
|
@ -2373,6 +2374,12 @@ class GenericIE(InfoExtractor):
|
||||||
else:
|
else:
|
||||||
video_id = self._generic_id(url)
|
video_id = self._generic_id(url)
|
||||||
|
|
||||||
|
# Try to impersonate a web-browser by default if possible
|
||||||
|
# Skip impersonation if not available to omit the warning
|
||||||
|
impersonate = self._configuration_arg('impersonate', [''])
|
||||||
|
if 'false' in impersonate or not self._downloader._impersonate_target_available(ImpersonateTarget()):
|
||||||
|
impersonate = None
|
||||||
|
|
||||||
# Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
|
# Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
|
||||||
# making it impossible to download only chunk of the file (yet we need only 512kB to
|
# making it impossible to download only chunk of the file (yet we need only 512kB to
|
||||||
# test whether it's HTML or not). According to yt-dlp default Accept-Encoding
|
# test whether it's HTML or not). According to yt-dlp default Accept-Encoding
|
||||||
|
@ -2384,7 +2391,7 @@ class GenericIE(InfoExtractor):
|
||||||
full_response = self._request_webpage(url, video_id, headers=filter_dict({
|
full_response = self._request_webpage(url, video_id, headers=filter_dict({
|
||||||
'Accept-Encoding': 'identity',
|
'Accept-Encoding': 'identity',
|
||||||
'Referer': smuggled_data.get('referer'),
|
'Referer': smuggled_data.get('referer'),
|
||||||
}))
|
}), impersonate=impersonate)
|
||||||
new_url = full_response.url
|
new_url = full_response.url
|
||||||
if new_url != extract_basic_auth(url)[0]:
|
if new_url != extract_basic_auth(url)[0]:
|
||||||
self.report_following_redirect(new_url)
|
self.report_following_redirect(new_url)
|
||||||
|
|
|
@ -48,7 +48,6 @@ class InstagramBaseIE(InfoExtractor):
|
||||||
'X-IG-WWW-Claim': '0',
|
'X-IG-WWW-Claim': '0',
|
||||||
'Origin': 'https://www.instagram.com',
|
'Origin': 'https://www.instagram.com',
|
||||||
'Accept': '*/*',
|
'Accept': '*/*',
|
||||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36',
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def _perform_login(self, username, password):
|
def _perform_login(self, username, password):
|
||||||
|
@ -435,10 +434,10 @@ class InstagramIE(InstagramBaseIE):
|
||||||
'X-Requested-With': 'XMLHttpRequest',
|
'X-Requested-With': 'XMLHttpRequest',
|
||||||
'Referer': url,
|
'Referer': url,
|
||||||
}, query={
|
}, query={
|
||||||
'query_hash': '9f8827793ef34641b2fb195d4d41151c',
|
'doc_id': '8845758582119845',
|
||||||
'variables': json.dumps(variables, separators=(',', ':')),
|
'variables': json.dumps(variables, separators=(',', ':')),
|
||||||
})
|
})
|
||||||
media.update(traverse_obj(general_info, ('data', 'shortcode_media')) or {})
|
media.update(traverse_obj(general_info, ('data', 'xdt_shortcode_media')) or {})
|
||||||
|
|
||||||
if not general_info:
|
if not general_info:
|
||||||
self.report_warning('General metadata extraction failed (some metadata might be missing).', video_id)
|
self.report_warning('General metadata extraction failed (some metadata might be missing).', video_id)
|
||||||
|
|
|
@ -43,14 +43,8 @@ class NoodleMagazineIE(InfoExtractor):
|
||||||
def build_url(url_or_path):
|
def build_url(url_or_path):
|
||||||
return urljoin('https://adult.noodlemagazine.com', url_or_path)
|
return urljoin('https://adult.noodlemagazine.com', url_or_path)
|
||||||
|
|
||||||
headers = {'Referer': url}
|
playlist_info = self._search_json(
|
||||||
player_path = self._html_search_regex(
|
r'window\.playlist\s*=', webpage, video_id, 'playlist info')
|
||||||
r'<iframe[^>]+\bid="iplayer"[^>]+\bsrc="([^"]+)"', webpage, 'player path')
|
|
||||||
player_iframe = self._download_webpage(
|
|
||||||
build_url(player_path), video_id, 'Downloading iframe page', headers=headers)
|
|
||||||
playlist_url = self._search_regex(
|
|
||||||
r'window\.playlistUrl\s*=\s*["\']([^"\']+)["\']', player_iframe, 'playlist url')
|
|
||||||
playlist_info = self._download_json(build_url(playlist_url), video_id, headers=headers)
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for source in traverse_obj(playlist_info, ('sources', lambda _, v: v['file'])):
|
for source in traverse_obj(playlist_info, ('sources', lambda _, v: v['file'])):
|
||||||
|
|
|
@ -55,6 +55,7 @@ class PatreonBaseIE(InfoExtractor):
|
||||||
|
|
||||||
|
|
||||||
class PatreonIE(PatreonBaseIE):
|
class PatreonIE(PatreonBaseIE):
|
||||||
|
IE_NAME = 'patreon'
|
||||||
_VALID_URL = r'https?://(?:www\.)?patreon\.com/(?:creation\?hid=|posts/(?:[\w-]+-)?)(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?patreon\.com/(?:creation\?hid=|posts/(?:[\w-]+-)?)(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.patreon.com/creation?hid=743933',
|
'url': 'http://www.patreon.com/creation?hid=743933',
|
||||||
|
@ -433,8 +434,12 @@ class PatreonIE(PatreonBaseIE):
|
||||||
|
|
||||||
|
|
||||||
class PatreonCampaignIE(PatreonBaseIE):
|
class PatreonCampaignIE(PatreonBaseIE):
|
||||||
|
IE_NAME = 'patreon:campaign'
|
||||||
_VALID_URL = r'https?://(?:www\.)?patreon\.com/(?!rss)(?:(?:m|api/campaigns)/(?P<campaign_id>\d+)|(?P<vanity>[-\w]+))'
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://(?:www\.)?patreon\.com/(?:
|
||||||
|
(?:m|api/campaigns)/(?P<campaign_id>\d+)|
|
||||||
|
(?P<vanity>(?!creation[?/]|posts/|rss[?/])[\w-]+)
|
||||||
|
)(?:/posts)?/?(?:$|[?#])'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.patreon.com/dissonancepod/',
|
'url': 'https://www.patreon.com/dissonancepod/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -496,10 +501,6 @@ class PatreonCampaignIE(PatreonBaseIE):
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def suitable(cls, url):
|
|
||||||
return False if PatreonIE.suitable(url) else super().suitable(url)
|
|
||||||
|
|
||||||
def _entries(self, campaign_id):
|
def _entries(self, campaign_id):
|
||||||
cursor = None
|
cursor = None
|
||||||
params = {
|
params = {
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
import json
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
@ -17,7 +18,7 @@ from ..utils import (
|
||||||
|
|
||||||
class RedditIE(InfoExtractor):
|
class RedditIE(InfoExtractor):
|
||||||
_NETRC_MACHINE = 'reddit'
|
_NETRC_MACHINE = 'reddit'
|
||||||
_VALID_URL = r'https?://(?P<host>(?:\w+\.)?reddit(?:media)?\.com)/(?P<slug>(?:(?:r|user)/[^/]+/)?comments/(?P<id>[^/?#&]+))'
|
_VALID_URL = r'https?://(?:\w+\.)?reddit(?:media)?\.com/(?P<slug>(?:(?:r|user)/[^/]+/)?comments/(?P<id>[^/?#&]+))'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
|
'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -251,15 +252,15 @@ class RedditIE(InfoExtractor):
|
||||||
return {'en': [{'url': caption_url}]}
|
return {'en': [{'url': caption_url}]}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
host, slug, video_id = self._match_valid_url(url).group('host', 'slug', 'id')
|
slug, video_id = self._match_valid_url(url).group('slug', 'id')
|
||||||
|
|
||||||
|
try:
|
||||||
data = self._download_json(
|
data = self._download_json(
|
||||||
f'https://{host}/{slug}/.json', video_id, fatal=False, expected_status=403)
|
f'https://www.reddit.com/{slug}/.json', video_id, expected_status=403)
|
||||||
if not data:
|
except ExtractorError as e:
|
||||||
fallback_host = 'old.reddit.com' if host != 'old.reddit.com' else 'www.reddit.com'
|
if isinstance(e.cause, json.JSONDecodeError):
|
||||||
self.to_screen(f'{host} request failed, retrying with {fallback_host}')
|
self.raise_login_required('Account authentication is required')
|
||||||
data = self._download_json(
|
raise
|
||||||
f'https://{fallback_host}/{slug}/.json', video_id, expected_status=403)
|
|
||||||
|
|
||||||
if traverse_obj(data, 'error') == 403:
|
if traverse_obj(data, 'error') == 403:
|
||||||
reason = data.get('reason')
|
reason = data.get('reason')
|
||||||
|
|
|
@ -6,11 +6,12 @@ from ..utils import (
|
||||||
str_or_none,
|
str_or_none,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
|
update_url_query,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TVerIE(InfoExtractor):
|
class TVerIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?P<type>lp|corner|series|episodes?|feature|tokyo2020/video|olympic/paris2024/video)/)+(?P<id>[a-zA-Z0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?P<type>lp|corner|series|episodes?|feature)/)+(?P<id>[a-zA-Z0-9]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'skip': 'videos are only available for 7 days',
|
'skip': 'videos are only available for 7 days',
|
||||||
'url': 'https://tver.jp/episodes/ep83nf3w4p',
|
'url': 'https://tver.jp/episodes/ep83nf3w4p',
|
||||||
|
@ -21,80 +22,115 @@ class TVerIE(InfoExtractor):
|
||||||
'episode': '売り場席巻のチーズSP&財前直見×森泉親子の脱東京暮らし密着!',
|
'episode': '売り場席巻のチーズSP&財前直見×森泉親子の脱東京暮らし密着!',
|
||||||
'alt_title': '売り場席巻のチーズSP&財前直見×森泉親子の脱東京暮らし密着!',
|
'alt_title': '売り場席巻のチーズSP&財前直見×森泉親子の脱東京暮らし密着!',
|
||||||
'channel': 'テレビ朝日',
|
'channel': 'テレビ朝日',
|
||||||
|
'id': 'ep83nf3w4p',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'onair_label': '5月3日(火)放送分',
|
||||||
|
'ext_title': '家事ヤロウ!!! 売り場席巻のチーズSP&財前直見×森泉親子の脱東京暮らし密着! テレビ朝日 5月3日(火)放送分',
|
||||||
},
|
},
|
||||||
'add_ie': ['BrightcoveNew'],
|
'add_ie': ['BrightcoveNew'],
|
||||||
}, {
|
|
||||||
'url': 'https://tver.jp/olympic/paris2024/video/6359578055112/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '6359578055112',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': '堀米雄斗 金メダルで五輪連覇!「みんなの応援が最後に乗れたカギ」',
|
|
||||||
'timestamp': 1722279928,
|
|
||||||
'upload_date': '20240729',
|
|
||||||
'tags': ['20240729', 'japanese', 'japanmedal', 'paris'],
|
|
||||||
'uploader_id': '4774017240001',
|
|
||||||
'thumbnail': r're:https?://[^/?#]+boltdns\.net/[^?#]+/1920x1080/match/image\.jpg',
|
|
||||||
'duration': 670.571,
|
|
||||||
},
|
|
||||||
'params': {'skip_download': 'm3u8'},
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://tver.jp/corner/f0103888',
|
'url': 'https://tver.jp/corner/f0103888',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://tver.jp/lp/f0033031',
|
'url': 'https://tver.jp/lp/f0033031',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://tver.jp/series/srtxft431v',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'srtxft431v',
|
||||||
|
'title': '名探偵コナン',
|
||||||
|
},
|
||||||
|
'playlist': [
|
||||||
|
{
|
||||||
|
'md5': '779ffd97493ed59b0a6277ea726b389e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'ref:conan-1137-241005',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '名探偵コナン #1137「行列店、味変の秘密」',
|
||||||
|
'uploader_id': '5330942432001',
|
||||||
|
'tags': [],
|
||||||
|
'channel': '読売テレビ',
|
||||||
|
'series': '名探偵コナン',
|
||||||
|
'description': 'md5:601fccc1d2430d942a2c8068c4b33eb5',
|
||||||
|
'episode': '#1137「行列店、味変の秘密」',
|
||||||
|
'duration': 1469.077,
|
||||||
|
'timestamp': 1728030405,
|
||||||
|
'upload_date': '20241004',
|
||||||
|
'alt_title': '名探偵コナン #1137「行列店、味変の秘密」 読売テレビ 10月5日(土)放送分',
|
||||||
|
'thumbnail': r're:https://.+\.jpg',
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
}, {
|
||||||
|
'url': 'https://tver.jp/series/sru35hwdd2',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'sru35hwdd2',
|
||||||
|
'title': '神回だけ見せます!',
|
||||||
|
},
|
||||||
|
'playlist_count': 11,
|
||||||
|
}, {
|
||||||
|
'url': 'https://tver.jp/series/srkq2shp9d',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
|
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
|
||||||
_PLATFORM_UID = None
|
_HEADERS = {'x-tver-platform-type': 'web'}
|
||||||
_PLATFORM_TOKEN = None
|
_PLATFORM_QUERY = {}
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
create_response = self._download_json(
|
session_info = self._download_json(
|
||||||
'https://platform-api.tver.jp/v2/api/platform_users/browser/create', None,
|
'https://platform-api.tver.jp/v2/api/platform_users/browser/create',
|
||||||
note='Creating session', data=b'device_type=pc', headers={
|
None, 'Creating session', data=b'device_type=pc')
|
||||||
'Origin': 'https://s.tver.jp',
|
self._PLATFORM_QUERY = traverse_obj(session_info, ('result', {
|
||||||
'Referer': 'https://s.tver.jp/',
|
'platform_uid': 'platform_uid',
|
||||||
'Content-Type': 'application/x-www-form-urlencoded',
|
'platform_token': 'platform_token',
|
||||||
|
}))
|
||||||
|
|
||||||
|
def _call_platform_api(self, path, video_id, note=None, fatal=True, query=None):
|
||||||
|
return self._download_json(
|
||||||
|
f'https://platform-api.tver.jp/service/api/{path}', video_id, note,
|
||||||
|
fatal=fatal, headers=self._HEADERS, query={
|
||||||
|
**self._PLATFORM_QUERY,
|
||||||
|
**(query or {}),
|
||||||
})
|
})
|
||||||
self._PLATFORM_UID = traverse_obj(create_response, ('result', 'platform_uid'))
|
|
||||||
self._PLATFORM_TOKEN = traverse_obj(create_response, ('result', 'platform_token'))
|
def _yield_episode_ids_for_series(self, series_id):
|
||||||
|
seasons_info = self._download_json(
|
||||||
|
f'https://service-api.tver.jp/api/v1/callSeriesSeasons/{series_id}',
|
||||||
|
series_id, 'Downloading seasons info', headers=self._HEADERS)
|
||||||
|
for season_id in traverse_obj(
|
||||||
|
seasons_info, ('result', 'contents', lambda _, v: v['type'] == 'season', 'content', 'id', {str})):
|
||||||
|
episodes_info = self._call_platform_api(
|
||||||
|
f'v1/callSeasonEpisodes/{season_id}', series_id, f'Downloading season {season_id} episodes info')
|
||||||
|
yield from traverse_obj(episodes_info, (
|
||||||
|
'result', 'contents', lambda _, v: v['type'] == 'episode', 'content', 'id', {str}))
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id, video_type = self._match_valid_url(url).group('id', 'type')
|
video_id, video_type = self._match_valid_url(url).group('id', 'type')
|
||||||
|
|
||||||
if video_type == 'olympic/paris2024/video':
|
if video_type == 'series':
|
||||||
# Player ID is taken from .content.brightcove.E200.pro.pc.account_id:
|
series_info = self._call_platform_api(
|
||||||
# https://tver.jp/olympic/paris2024/req/api/hook?q=https%3A%2F%2Folympic-assets.tver.jp%2Fweb-static%2Fjson%2Fconfig.json&d=
|
f'v2/callSeries/{video_id}', video_id, 'Downloading series info')
|
||||||
return self.url_result(smuggle_url(
|
return self.playlist_from_matches(
|
||||||
self.BRIGHTCOVE_URL_TEMPLATE % ('4774017240001', video_id),
|
self._yield_episode_ids_for_series(video_id), video_id,
|
||||||
{'geo_countries': ['JP']}), 'BrightcoveNew')
|
traverse_obj(series_info, ('result', 'content', 'content', 'title', {str})),
|
||||||
|
ie=TVerIE, getter=lambda x: f'https://tver.jp/episodes/{x}')
|
||||||
|
|
||||||
elif video_type not in {'series', 'episodes'}:
|
if video_type != 'episodes':
|
||||||
webpage = self._download_webpage(url, video_id, note='Resolving to new URL')
|
webpage = self._download_webpage(url, video_id, note='Resolving to new URL')
|
||||||
video_id = self._match_id(self._search_regex(
|
video_id = self._match_id(self._search_regex(
|
||||||
(r'canonical"\s*href="(https?://tver\.jp/[^"]+)"', r'&link=(https?://tver\.jp/[^?&]+)[?&]'),
|
(r'canonical"\s*href="(https?://tver\.jp/[^"]+)"', r'&link=(https?://tver\.jp/[^?&]+)[?&]'),
|
||||||
webpage, 'url regex'))
|
webpage, 'url regex'))
|
||||||
|
|
||||||
episode_info = self._download_json(
|
episode_info = self._call_platform_api(
|
||||||
f'https://platform-api.tver.jp/service/api/v1/callEpisode/{video_id}?require_data=mylist,later[epefy106ur],good[epefy106ur],resume[epefy106ur]',
|
f'v1/callEpisode/{video_id}', video_id, 'Downloading episode info', fatal=False, query={
|
||||||
video_id, fatal=False,
|
'require_data': 'mylist,later[epefy106ur],good[epefy106ur],resume[epefy106ur]',
|
||||||
query={
|
|
||||||
'platform_uid': self._PLATFORM_UID,
|
|
||||||
'platform_token': self._PLATFORM_TOKEN,
|
|
||||||
}, headers={
|
|
||||||
'x-tver-platform-type': 'web',
|
|
||||||
})
|
})
|
||||||
episode_content = traverse_obj(
|
episode_content = traverse_obj(
|
||||||
episode_info, ('result', 'episode', 'content')) or {}
|
episode_info, ('result', 'episode', 'content')) or {}
|
||||||
|
|
||||||
|
version = traverse_obj(episode_content, ('version', {str_or_none}), default='5')
|
||||||
video_info = self._download_json(
|
video_info = self._download_json(
|
||||||
f'https://statics.tver.jp/content/episode/{video_id}.json', video_id,
|
f'https://statics.tver.jp/content/episode/{video_id}.json', video_id, 'Downloading video info',
|
||||||
query={
|
query={'v': version}, headers={'Referer': 'https://tver.jp/'})
|
||||||
'v': str_or_none(episode_content.get('version')) or '5',
|
|
||||||
}, headers={
|
|
||||||
'Origin': 'https://tver.jp',
|
|
||||||
'Referer': 'https://tver.jp/',
|
|
||||||
})
|
|
||||||
p_id = video_info['video']['accountID']
|
p_id = video_info['video']['accountID']
|
||||||
r_id = traverse_obj(video_info, ('video', ('videoRefID', 'videoID')), get_all=False)
|
r_id = traverse_obj(video_info, ('video', ('videoRefID', 'videoID')), get_all=False)
|
||||||
if not r_id:
|
if not r_id:
|
||||||
|
@ -110,6 +146,23 @@ class TVerIE(InfoExtractor):
|
||||||
provider = str_or_none(episode_content.get('productionProviderName'))
|
provider = str_or_none(episode_content.get('productionProviderName'))
|
||||||
onair_label = str_or_none(episode_content.get('broadcastDateLabel'))
|
onair_label = str_or_none(episode_content.get('broadcastDateLabel'))
|
||||||
|
|
||||||
|
thumbnails = [
|
||||||
|
{
|
||||||
|
'id': quality,
|
||||||
|
'url': update_url_query(
|
||||||
|
f'https://statics.tver.jp/images/content/thumbnail/episode/{quality}/{video_id}.jpg',
|
||||||
|
{'v': version}),
|
||||||
|
'width': width,
|
||||||
|
'height': height,
|
||||||
|
}
|
||||||
|
for quality, width, height in [
|
||||||
|
('small', 480, 270),
|
||||||
|
('medium', 640, 360),
|
||||||
|
('large', 960, 540),
|
||||||
|
('xlarge', 1280, 720),
|
||||||
|
]
|
||||||
|
]
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'title': title,
|
'title': title,
|
||||||
|
@ -119,6 +172,7 @@ class TVerIE(InfoExtractor):
|
||||||
'alt_title': join_nonempty(title, provider, onair_label, delim=' '),
|
'alt_title': join_nonempty(title, provider, onair_label, delim=' '),
|
||||||
'channel': provider,
|
'channel': provider,
|
||||||
'description': str_or_none(video_info.get('description')),
|
'description': str_or_none(video_info.get('description')),
|
||||||
|
'thumbnails': thumbnails,
|
||||||
'url': smuggle_url(
|
'url': smuggle_url(
|
||||||
self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id), {'geo_countries': ['JP']}),
|
self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id), {'geo_countries': ['JP']}),
|
||||||
'ie_key': 'BrightcoveNew',
|
'ie_key': 'BrightcoveNew',
|
||||||
|
|
|
@ -27,8 +27,9 @@ from ..utils import (
|
||||||
|
|
||||||
class WeverseBaseIE(InfoExtractor):
|
class WeverseBaseIE(InfoExtractor):
|
||||||
_NETRC_MACHINE = 'weverse'
|
_NETRC_MACHINE = 'weverse'
|
||||||
_ACCOUNT_API_BASE = 'https://accountapi.weverse.io/web/api/v2'
|
_ACCOUNT_API_BASE = 'https://accountapi.weverse.io/web/api'
|
||||||
_API_HEADERS = {
|
_API_HEADERS = {
|
||||||
|
'Accept': 'application/json',
|
||||||
'Referer': 'https://weverse.io/',
|
'Referer': 'https://weverse.io/',
|
||||||
'WEV-device-Id': str(uuid.uuid4()),
|
'WEV-device-Id': str(uuid.uuid4()),
|
||||||
}
|
}
|
||||||
|
@ -39,14 +40,14 @@ class WeverseBaseIE(InfoExtractor):
|
||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
'x-acc-app-secret': '5419526f1c624b38b10787e5c10b2a7a',
|
'x-acc-app-secret': '5419526f1c624b38b10787e5c10b2a7a',
|
||||||
'x-acc-app-version': '2.2.6',
|
'x-acc-app-version': '3.3.6',
|
||||||
'x-acc-language': 'en',
|
'x-acc-language': 'en',
|
||||||
'x-acc-service-id': 'weverse',
|
'x-acc-service-id': 'weverse',
|
||||||
'x-acc-trace-id': str(uuid.uuid4()),
|
'x-acc-trace-id': str(uuid.uuid4()),
|
||||||
'x-clog-user-device-id': str(uuid.uuid4()),
|
'x-clog-user-device-id': str(uuid.uuid4()),
|
||||||
}
|
}
|
||||||
valid_username = traverse_obj(self._download_json(
|
valid_username = traverse_obj(self._download_json(
|
||||||
f'{self._ACCOUNT_API_BASE}/signup/email/status', None, note='Checking username',
|
f'{self._ACCOUNT_API_BASE}/v2/signup/email/status', None, note='Checking username',
|
||||||
query={'email': username}, headers=headers, expected_status=(400, 404)), 'hasPassword')
|
query={'email': username}, headers=headers, expected_status=(400, 404)), 'hasPassword')
|
||||||
if not valid_username:
|
if not valid_username:
|
||||||
raise ExtractorError('Invalid username provided', expected=True)
|
raise ExtractorError('Invalid username provided', expected=True)
|
||||||
|
@ -54,8 +55,9 @@ class WeverseBaseIE(InfoExtractor):
|
||||||
headers['content-type'] = 'application/json'
|
headers['content-type'] = 'application/json'
|
||||||
try:
|
try:
|
||||||
auth = self._download_json(
|
auth = self._download_json(
|
||||||
f'{self._ACCOUNT_API_BASE}/auth/token/by-credentials', None, data=json.dumps({
|
f'{self._ACCOUNT_API_BASE}/v3/auth/token/by-credentials', None, data=json.dumps({
|
||||||
'email': username,
|
'email': username,
|
||||||
|
'otpSessionId': 'BY_PASS',
|
||||||
'password': password,
|
'password': password,
|
||||||
}, separators=(',', ':')).encode(), headers=headers, note='Logging in')
|
}, separators=(',', ':')).encode(), headers=headers, note='Logging in')
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
|
@ -78,8 +80,10 @@ class WeverseBaseIE(InfoExtractor):
|
||||||
# From https://ssl.pstatic.net/static/wevweb/2_3_2_11101725/public/static/js/main.e206f7c1.js:
|
# From https://ssl.pstatic.net/static/wevweb/2_3_2_11101725/public/static/js/main.e206f7c1.js:
|
||||||
key = b'1b9cb6378d959b45714bec49971ade22e6e24e42'
|
key = b'1b9cb6378d959b45714bec49971ade22e6e24e42'
|
||||||
api_path = update_url_query(ep, {
|
api_path = update_url_query(ep, {
|
||||||
|
# 'gcc': 'US',
|
||||||
'appId': 'be4d79eb8fc7bd008ee82c8ec4ff6fd4',
|
'appId': 'be4d79eb8fc7bd008ee82c8ec4ff6fd4',
|
||||||
'language': 'en',
|
'language': 'en',
|
||||||
|
'os': 'WEB',
|
||||||
'platform': 'WEB',
|
'platform': 'WEB',
|
||||||
'wpf': 'pc',
|
'wpf': 'pc',
|
||||||
})
|
})
|
||||||
|
@ -152,7 +156,7 @@ class WeverseBaseIE(InfoExtractor):
|
||||||
'description': ((('extension', 'mediaInfo', 'body'), 'body'), {str}),
|
'description': ((('extension', 'mediaInfo', 'body'), 'body'), {str}),
|
||||||
'uploader': ('author', 'profileName', {str}),
|
'uploader': ('author', 'profileName', {str}),
|
||||||
'uploader_id': ('author', 'memberId', {str}),
|
'uploader_id': ('author', 'memberId', {str}),
|
||||||
'creator': ('community', 'communityName', {str}),
|
'creators': ('community', 'communityName', {str}, all),
|
||||||
'channel_id': (('community', 'author'), 'communityId', {str_or_none}),
|
'channel_id': (('community', 'author'), 'communityId', {str_or_none}),
|
||||||
'duration': ('extension', 'video', 'playTime', {float_or_none}),
|
'duration': ('extension', 'video', 'playTime', {float_or_none}),
|
||||||
'timestamp': ('publishedAt', {lambda x: int_or_none(x, 1000)}),
|
'timestamp': ('publishedAt', {lambda x: int_or_none(x, 1000)}),
|
||||||
|
@ -196,7 +200,7 @@ class WeverseIE(WeverseBaseIE):
|
||||||
'channel': 'billlie',
|
'channel': 'billlie',
|
||||||
'channel_id': '72',
|
'channel_id': '72',
|
||||||
'channel_url': 'https://weverse.io/billlie',
|
'channel_url': 'https://weverse.io/billlie',
|
||||||
'creator': 'Billlie',
|
'creators': ['Billlie'],
|
||||||
'timestamp': 1666262062,
|
'timestamp': 1666262062,
|
||||||
'upload_date': '20221020',
|
'upload_date': '20221020',
|
||||||
'release_timestamp': 1666262058,
|
'release_timestamp': 1666262058,
|
||||||
|
@ -222,7 +226,7 @@ class WeverseIE(WeverseBaseIE):
|
||||||
'channel': 'lesserafim',
|
'channel': 'lesserafim',
|
||||||
'channel_id': '47',
|
'channel_id': '47',
|
||||||
'channel_url': 'https://weverse.io/lesserafim',
|
'channel_url': 'https://weverse.io/lesserafim',
|
||||||
'creator': 'LE SSERAFIM',
|
'creators': ['LE SSERAFIM'],
|
||||||
'timestamp': 1659353400,
|
'timestamp': 1659353400,
|
||||||
'upload_date': '20220801',
|
'upload_date': '20220801',
|
||||||
'release_timestamp': 1659353400,
|
'release_timestamp': 1659353400,
|
||||||
|
@ -286,7 +290,7 @@ class WeverseIE(WeverseBaseIE):
|
||||||
|
|
||||||
elif live_status == 'is_live':
|
elif live_status == 'is_live':
|
||||||
video_info = self._call_api(
|
video_info = self._call_api(
|
||||||
f'/video/v1.0/lives/{api_video_id}/playInfo?preview.format=json&preview.version=v2',
|
f'/video/v1.2/lives/{api_video_id}/playInfo?preview.format=json&preview.version=v2',
|
||||||
video_id, note='Downloading live JSON')
|
video_id, note='Downloading live JSON')
|
||||||
playback = self._parse_json(video_info['lipPlayback'], video_id)
|
playback = self._parse_json(video_info['lipPlayback'], video_id)
|
||||||
m3u8_url = traverse_obj(playback, (
|
m3u8_url = traverse_obj(playback, (
|
||||||
|
@ -302,7 +306,7 @@ class WeverseIE(WeverseBaseIE):
|
||||||
else:
|
else:
|
||||||
infra_video_id = post['extension']['video']['infraVideoId']
|
infra_video_id = post['extension']['video']['infraVideoId']
|
||||||
in_key = self._call_api(
|
in_key = self._call_api(
|
||||||
f'/video/v1.0/vod/{api_video_id}/inKey?preview=false', video_id,
|
f'/video/v1.1/vod/{api_video_id}/inKey?preview=false', video_id,
|
||||||
data=b'{}', note='Downloading VOD API key')['inKey']
|
data=b'{}', note='Downloading VOD API key')['inKey']
|
||||||
|
|
||||||
video_info = self._download_json(
|
video_info = self._download_json(
|
||||||
|
@ -347,7 +351,6 @@ class WeverseMediaIE(WeverseBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.|m\.)?weverse\.io/(?P<artist>[^/?#]+)/media/(?P<id>[\d-]+)'
|
_VALID_URL = r'https?://(?:www\.|m\.)?weverse\.io/(?P<artist>[^/?#]+)/media/(?P<id>[\d-]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://weverse.io/billlie/media/4-116372884',
|
'url': 'https://weverse.io/billlie/media/4-116372884',
|
||||||
'md5': '8efc9cfd61b2f25209eb1a5326314d28',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'e-C9wLSQs6o',
|
'id': 'e-C9wLSQs6o',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
@ -358,8 +361,9 @@ class WeverseMediaIE(WeverseBaseIE):
|
||||||
'channel_url': 'https://www.youtube.com/channel/UCyc9sUCxELTDK9vELO5Fzeg',
|
'channel_url': 'https://www.youtube.com/channel/UCyc9sUCxELTDK9vELO5Fzeg',
|
||||||
'uploader': 'Billlie',
|
'uploader': 'Billlie',
|
||||||
'uploader_id': '@Billlie',
|
'uploader_id': '@Billlie',
|
||||||
'uploader_url': 'http://www.youtube.com/@Billlie',
|
'uploader_url': 'https://www.youtube.com/@Billlie',
|
||||||
'upload_date': '20230403',
|
'upload_date': '20230403',
|
||||||
|
'timestamp': 1680533992,
|
||||||
'duration': 211,
|
'duration': 211,
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
'playable_in_embed': True,
|
'playable_in_embed': True,
|
||||||
|
@ -372,6 +376,8 @@ class WeverseMediaIE(WeverseBaseIE):
|
||||||
'thumbnail': 'https://i.ytimg.com/vi/e-C9wLSQs6o/maxresdefault.jpg',
|
'thumbnail': 'https://i.ytimg.com/vi/e-C9wLSQs6o/maxresdefault.jpg',
|
||||||
'categories': ['Entertainment'],
|
'categories': ['Entertainment'],
|
||||||
'tags': 'count:7',
|
'tags': 'count:7',
|
||||||
|
'channel_is_verified': True,
|
||||||
|
'heatmap': 'count:100',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://weverse.io/billlie/media/3-102914520',
|
'url': 'https://weverse.io/billlie/media/3-102914520',
|
||||||
|
@ -386,7 +392,7 @@ class WeverseMediaIE(WeverseBaseIE):
|
||||||
'channel': 'billlie',
|
'channel': 'billlie',
|
||||||
'channel_id': '72',
|
'channel_id': '72',
|
||||||
'channel_url': 'https://weverse.io/billlie',
|
'channel_url': 'https://weverse.io/billlie',
|
||||||
'creator': 'Billlie',
|
'creators': ['Billlie'],
|
||||||
'timestamp': 1662174000,
|
'timestamp': 1662174000,
|
||||||
'upload_date': '20220903',
|
'upload_date': '20220903',
|
||||||
'release_timestamp': 1662174000,
|
'release_timestamp': 1662174000,
|
||||||
|
@ -432,7 +438,7 @@ class WeverseMomentIE(WeverseBaseIE):
|
||||||
'uploader_id': '66a07e164b56a696ee71c99315ffe27b',
|
'uploader_id': '66a07e164b56a696ee71c99315ffe27b',
|
||||||
'channel': 'secretnumber',
|
'channel': 'secretnumber',
|
||||||
'channel_id': '56',
|
'channel_id': '56',
|
||||||
'creator': 'SECRET NUMBER',
|
'creators': ['SECRET NUMBER'],
|
||||||
'duration': 10,
|
'duration': 10,
|
||||||
'upload_date': '20230405',
|
'upload_date': '20230405',
|
||||||
'timestamp': 1680653968,
|
'timestamp': 1680653968,
|
||||||
|
@ -441,7 +447,6 @@ class WeverseMomentIE(WeverseBaseIE):
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'availability': 'needs_auth',
|
'availability': 'needs_auth',
|
||||||
},
|
},
|
||||||
'skip': 'Moment has expired',
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -571,7 +576,7 @@ class WeverseLiveIE(WeverseBaseIE):
|
||||||
'channel': 'purplekiss',
|
'channel': 'purplekiss',
|
||||||
'channel_id': '35',
|
'channel_id': '35',
|
||||||
'channel_url': 'https://weverse.io/purplekiss',
|
'channel_url': 'https://weverse.io/purplekiss',
|
||||||
'creator': 'PURPLE KISS',
|
'creators': ['PURPLE KISS'],
|
||||||
'timestamp': 1680780892,
|
'timestamp': 1680780892,
|
||||||
'upload_date': '20230406',
|
'upload_date': '20230406',
|
||||||
'release_timestamp': 1680780883,
|
'release_timestamp': 1680780883,
|
||||||
|
@ -584,6 +589,31 @@ class WeverseLiveIE(WeverseBaseIE):
|
||||||
'live_status': 'is_live',
|
'live_status': 'is_live',
|
||||||
},
|
},
|
||||||
'skip': 'Livestream has ended',
|
'skip': 'Livestream has ended',
|
||||||
|
}, {
|
||||||
|
'url': 'https://weverse.io/lesserafim',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4-181521628',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': r're:심심해서요',
|
||||||
|
'description': '',
|
||||||
|
'uploader': '채채🤎',
|
||||||
|
'uploader_id': 'd49b8b06f3cc1d92d655b25ab27ac2e7',
|
||||||
|
'channel': 'lesserafim',
|
||||||
|
'channel_id': '47',
|
||||||
|
'creators': ['LE SSERAFIM'],
|
||||||
|
'channel_url': 'https://weverse.io/lesserafim',
|
||||||
|
'timestamp': 1728570273,
|
||||||
|
'upload_date': '20241010',
|
||||||
|
'release_timestamp': 1728570264,
|
||||||
|
'release_date': '20241010',
|
||||||
|
'thumbnail': r're:https://phinf\.wevpstatic\.net/.+\.png',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'availability': 'needs_auth',
|
||||||
|
'live_status': 'is_live',
|
||||||
|
},
|
||||||
|
'skip': 'Livestream has ended',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://weverse.io/billlie/',
|
'url': 'https://weverse.io/billlie/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
|
|
@ -1357,7 +1357,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
|
'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
|
||||||
}
|
}
|
||||||
_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
|
_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
|
||||||
_DEFAULT_CLIENTS = ('ios', 'web_creator')
|
_DEFAULT_CLIENTS = ('ios', 'mweb')
|
||||||
|
|
||||||
_GEO_BYPASS = False
|
_GEO_BYPASS = False
|
||||||
|
|
||||||
|
|
|
@ -5,6 +5,7 @@ import importlib.machinery
|
||||||
import importlib.util
|
import importlib.util
|
||||||
import inspect
|
import inspect
|
||||||
import itertools
|
import itertools
|
||||||
|
import os
|
||||||
import pkgutil
|
import pkgutil
|
||||||
import sys
|
import sys
|
||||||
import traceback
|
import traceback
|
||||||
|
@ -137,6 +138,8 @@ def load_module(module, module_name, suffix):
|
||||||
|
|
||||||
def load_plugins(name, suffix):
|
def load_plugins(name, suffix):
|
||||||
classes = {}
|
classes = {}
|
||||||
|
if os.environ.get('YTDLP_NO_PLUGINS'):
|
||||||
|
return classes
|
||||||
|
|
||||||
for finder, module_name, _ in iter_modules(name):
|
for finder, module_name, _ in iter_modules(name):
|
||||||
if any(x.startswith('_') for x in module_name.split('.')):
|
if any(x.startswith('_') for x in module_name.split('.')):
|
||||||
|
|
|
@ -26,19 +26,21 @@ class XAttrMetadataPP(PostProcessor):
|
||||||
|
|
||||||
XATTR_MAPPING = {
|
XATTR_MAPPING = {
|
||||||
'user.xdg.referrer.url': 'webpage_url',
|
'user.xdg.referrer.url': 'webpage_url',
|
||||||
# 'user.xdg.comment': 'description',
|
|
||||||
'user.dublincore.title': 'title',
|
'user.dublincore.title': 'title',
|
||||||
'user.dublincore.date': 'upload_date',
|
'user.dublincore.date': 'upload_date',
|
||||||
'user.dublincore.description': 'description',
|
|
||||||
'user.dublincore.contributor': 'uploader',
|
'user.dublincore.contributor': 'uploader',
|
||||||
'user.dublincore.format': 'format',
|
'user.dublincore.format': 'format',
|
||||||
|
# We do this last because it may get us close to the xattr limits
|
||||||
|
# (e.g., 4kB on ext4), and we don't want to have the other ones fail
|
||||||
|
'user.dublincore.description': 'description',
|
||||||
|
# 'user.xdg.comment': 'description',
|
||||||
}
|
}
|
||||||
|
|
||||||
def run(self, info):
|
def run(self, info):
|
||||||
mtime = os.stat(info['filepath']).st_mtime
|
mtime = os.stat(info['filepath']).st_mtime
|
||||||
self.to_screen('Writing metadata to file\'s xattrs')
|
self.to_screen('Writing metadata to file\'s xattrs')
|
||||||
try:
|
|
||||||
for xattrname, infoname in self.XATTR_MAPPING.items():
|
for xattrname, infoname in self.XATTR_MAPPING.items():
|
||||||
|
try:
|
||||||
value = info.get(infoname)
|
value = info.get(infoname)
|
||||||
if value:
|
if value:
|
||||||
if infoname == 'upload_date':
|
if infoname == 'upload_date':
|
||||||
|
@ -51,9 +53,9 @@ class XAttrMetadataPP(PostProcessor):
|
||||||
if e.reason == 'NO_SPACE':
|
if e.reason == 'NO_SPACE':
|
||||||
self.report_warning(
|
self.report_warning(
|
||||||
'There\'s no disk space left, disk quota exceeded or filesystem xattr limit exceeded. '
|
'There\'s no disk space left, disk quota exceeded or filesystem xattr limit exceeded. '
|
||||||
'Some extended attributes are not written')
|
f'Extended attribute "{xattrname}" was not written.')
|
||||||
elif e.reason == 'VALUE_TOO_LONG':
|
elif e.reason == 'VALUE_TOO_LONG':
|
||||||
self.report_warning('Unable to write extended attributes due to too long values.')
|
self.report_warning(f'Unable to write extended attribute "{xattrname}" due to too long values.')
|
||||||
else:
|
else:
|
||||||
tip = ('You need to use NTFS' if compat_os_name == 'nt'
|
tip = ('You need to use NTFS' if compat_os_name == 'nt'
|
||||||
else 'You may have to enable them in your "/etc/fstab"')
|
else 'You may have to enable them in your "/etc/fstab"')
|
||||||
|
|
|
@ -664,31 +664,51 @@ def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _sanitize_path_parts(parts):
|
||||||
|
sanitized_parts = []
|
||||||
|
for part in parts:
|
||||||
|
if not part or part == '.':
|
||||||
|
continue
|
||||||
|
elif part == '..':
|
||||||
|
if sanitized_parts and sanitized_parts[-1] != '..':
|
||||||
|
sanitized_parts.pop()
|
||||||
|
sanitized_parts.append('..')
|
||||||
|
continue
|
||||||
|
# Replace invalid segments with `#`
|
||||||
|
# - trailing dots and spaces (`asdf...` => `asdf..#`)
|
||||||
|
# - invalid chars (`<>` => `##`)
|
||||||
|
sanitized_part = re.sub(r'[/<>:"\|\\?\*]|[\s.]$', '#', part)
|
||||||
|
sanitized_parts.append(sanitized_part)
|
||||||
|
|
||||||
|
return sanitized_parts
|
||||||
|
|
||||||
|
|
||||||
def sanitize_path(s, force=False):
|
def sanitize_path(s, force=False):
|
||||||
"""Sanitizes and normalizes path on Windows"""
|
"""Sanitizes and normalizes path on Windows"""
|
||||||
# XXX: this handles drive relative paths (c:sth) incorrectly
|
if sys.platform != 'win32':
|
||||||
if sys.platform == 'win32':
|
if not force:
|
||||||
force = False
|
|
||||||
drive_or_unc, _ = os.path.splitdrive(s)
|
|
||||||
elif force:
|
|
||||||
drive_or_unc = ''
|
|
||||||
else:
|
|
||||||
return s
|
return s
|
||||||
|
root = '/' if s.startswith('/') else ''
|
||||||
|
return root + '/'.join(_sanitize_path_parts(s.split('/')))
|
||||||
|
|
||||||
norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
|
normed = s.replace('/', '\\')
|
||||||
if drive_or_unc:
|
|
||||||
norm_path.pop(0)
|
if normed.startswith('\\\\'):
|
||||||
sanitized_path = [
|
# UNC path (`\\SERVER\SHARE`) or device path (`\\.`, `\\?`)
|
||||||
path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
|
parts = normed.split('\\')
|
||||||
for path_part in norm_path]
|
root = '\\'.join(parts[:4]) + '\\'
|
||||||
if drive_or_unc:
|
parts = parts[4:]
|
||||||
sanitized_path.insert(0, drive_or_unc + os.path.sep)
|
elif normed[1:2] == ':':
|
||||||
elif force and s and s[0] == os.path.sep:
|
# absolute path or drive relative path
|
||||||
sanitized_path.insert(0, os.path.sep)
|
offset = 3 if normed[2:3] == '\\' else 2
|
||||||
# TODO: Fix behavioral differences <3.12
|
root = normed[:offset]
|
||||||
# The workaround using `normpath` only superficially passes tests
|
parts = normed[offset:].split('\\')
|
||||||
# Ref: https://github.com/python/cpython/pull/100351
|
else:
|
||||||
return os.path.normpath(os.path.join(*sanitized_path))
|
# relative/drive root relative path
|
||||||
|
root = '\\' if normed[:1] == '\\' else ''
|
||||||
|
parts = normed.split('\\')
|
||||||
|
|
||||||
|
return root + '\\'.join(_sanitize_path_parts(parts))
|
||||||
|
|
||||||
|
|
||||||
def sanitize_url(url, *, scheme='http'):
|
def sanitize_url(url, *, scheme='http'):
|
||||||
|
@ -804,14 +824,18 @@ class Popen(subprocess.Popen):
|
||||||
_startupinfo = None
|
_startupinfo = None
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _fix_pyinstaller_ld_path(env):
|
def _fix_pyinstaller_issues(env):
|
||||||
"""Restore LD_LIBRARY_PATH when using PyInstaller
|
|
||||||
Ref: https://github.com/pyinstaller/pyinstaller/blob/develop/doc/runtime-information.rst#ld_library_path--libpath-considerations
|
|
||||||
https://github.com/yt-dlp/yt-dlp/issues/4573
|
|
||||||
"""
|
|
||||||
if not hasattr(sys, '_MEIPASS'):
|
if not hasattr(sys, '_MEIPASS'):
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Force spawning independent subprocesses for exes bundled with PyInstaller>=6.10
|
||||||
|
# Ref: https://pyinstaller.org/en/v6.10.0/CHANGES.html#incompatible-changes
|
||||||
|
# https://github.com/yt-dlp/yt-dlp/issues/11259
|
||||||
|
env['PYINSTALLER_RESET_ENVIRONMENT'] = '1'
|
||||||
|
|
||||||
|
# Restore LD_LIBRARY_PATH when using PyInstaller
|
||||||
|
# Ref: https://pyinstaller.org/en/v6.10.0/runtime-information.html#ld-library-path-libpath-considerations
|
||||||
|
# https://github.com/yt-dlp/yt-dlp/issues/4573
|
||||||
def _fix(key):
|
def _fix(key):
|
||||||
orig = env.get(f'{key}_ORIG')
|
orig = env.get(f'{key}_ORIG')
|
||||||
if orig is None:
|
if orig is None:
|
||||||
|
@ -825,7 +849,7 @@ class Popen(subprocess.Popen):
|
||||||
def __init__(self, args, *remaining, env=None, text=False, shell=False, **kwargs):
|
def __init__(self, args, *remaining, env=None, text=False, shell=False, **kwargs):
|
||||||
if env is None:
|
if env is None:
|
||||||
env = os.environ.copy()
|
env = os.environ.copy()
|
||||||
self._fix_pyinstaller_ld_path(env)
|
self._fix_pyinstaller_issues(env)
|
||||||
|
|
||||||
self.__text_mode = kwargs.get('encoding') or kwargs.get('errors') or text or kwargs.get('universal_newlines')
|
self.__text_mode = kwargs.get('encoding') or kwargs.get('errors') or text or kwargs.get('universal_newlines')
|
||||||
if text is True:
|
if text is True:
|
||||||
|
@ -1964,11 +1988,30 @@ def urljoin(base, path):
|
||||||
return urllib.parse.urljoin(base, path)
|
return urllib.parse.urljoin(base, path)
|
||||||
|
|
||||||
|
|
||||||
def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
|
def partial_application(func):
|
||||||
|
sig = inspect.signature(func)
|
||||||
|
|
||||||
|
@functools.wraps(func)
|
||||||
|
def wrapped(*args, **kwargs):
|
||||||
|
try:
|
||||||
|
sig.bind(*args, **kwargs)
|
||||||
|
except TypeError:
|
||||||
|
return functools.partial(func, *args, **kwargs)
|
||||||
|
else:
|
||||||
|
return func(*args, **kwargs)
|
||||||
|
|
||||||
|
return wrapped
|
||||||
|
|
||||||
|
|
||||||
|
@partial_application
|
||||||
|
def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1, base=None):
|
||||||
if get_attr and v is not None:
|
if get_attr and v is not None:
|
||||||
v = getattr(v, get_attr, None)
|
v = getattr(v, get_attr, None)
|
||||||
|
if invscale == 1 and scale < 1:
|
||||||
|
invscale = int(1 / scale)
|
||||||
|
scale = 1
|
||||||
try:
|
try:
|
||||||
return int(v) * invscale // scale
|
return (int(v) if base is None else int(v, base=base)) * invscale // scale
|
||||||
except (ValueError, TypeError, OverflowError):
|
except (ValueError, TypeError, OverflowError):
|
||||||
return default
|
return default
|
||||||
|
|
||||||
|
@ -1986,9 +2029,13 @@ def str_to_int(int_str):
|
||||||
return int_or_none(int_str)
|
return int_or_none(int_str)
|
||||||
|
|
||||||
|
|
||||||
|
@partial_application
|
||||||
def float_or_none(v, scale=1, invscale=1, default=None):
|
def float_or_none(v, scale=1, invscale=1, default=None):
|
||||||
if v is None:
|
if v is None:
|
||||||
return default
|
return default
|
||||||
|
if invscale == 1 and scale < 1:
|
||||||
|
invscale = int(1 / scale)
|
||||||
|
scale = 1
|
||||||
try:
|
try:
|
||||||
return float(v) * invscale / scale
|
return float(v) * invscale / scale
|
||||||
except (ValueError, TypeError):
|
except (ValueError, TypeError):
|
||||||
|
|
|
@ -1,18 +1,35 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import collections
|
||||||
import collections.abc
|
import collections.abc
|
||||||
import contextlib
|
import contextlib
|
||||||
|
import functools
|
||||||
import http.cookies
|
import http.cookies
|
||||||
import inspect
|
import inspect
|
||||||
import itertools
|
import itertools
|
||||||
import re
|
import re
|
||||||
|
import typing
|
||||||
import xml.etree.ElementTree
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from ._utils import (
|
from ._utils import (
|
||||||
IDENTITY,
|
IDENTITY,
|
||||||
NO_DEFAULT,
|
NO_DEFAULT,
|
||||||
|
ExtractorError,
|
||||||
LazyList,
|
LazyList,
|
||||||
deprecation_warning,
|
deprecation_warning,
|
||||||
|
get_elements_html_by_class,
|
||||||
|
get_elements_html_by_attribute,
|
||||||
|
get_elements_by_attribute,
|
||||||
|
get_element_html_by_attribute,
|
||||||
|
get_element_by_attribute,
|
||||||
|
get_element_html_by_id,
|
||||||
|
get_element_by_id,
|
||||||
|
get_element_html_by_class,
|
||||||
|
get_elements_by_class,
|
||||||
|
get_element_text_and_html_by_tag,
|
||||||
is_iterable_like,
|
is_iterable_like,
|
||||||
try_call,
|
try_call,
|
||||||
|
url_or_none,
|
||||||
variadic,
|
variadic,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -54,6 +71,7 @@ def traverse_obj(
|
||||||
Read as: `{key: traverse_obj(obj, path) for key, path in dct.items()}`.
|
Read as: `{key: traverse_obj(obj, path) for key, path in dct.items()}`.
|
||||||
- `any`-builtin: Take the first matching object and return it, resetting branching.
|
- `any`-builtin: Take the first matching object and return it, resetting branching.
|
||||||
- `all`-builtin: Take all matching objects and return them as a list, resetting branching.
|
- `all`-builtin: Take all matching objects and return them as a list, resetting branching.
|
||||||
|
- `filter`-builtin: Return the value if it is truthy, `None` otherwise.
|
||||||
|
|
||||||
`tuple`, `list`, and `dict` all support nested paths and branches.
|
`tuple`, `list`, and `dict` all support nested paths and branches.
|
||||||
|
|
||||||
|
@ -247,6 +265,10 @@ def traverse_obj(
|
||||||
objs = (list(filtered_objs),)
|
objs = (list(filtered_objs),)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
if key is filter:
|
||||||
|
objs = filter(None, objs)
|
||||||
|
continue
|
||||||
|
|
||||||
if __debug__ and callable(key):
|
if __debug__ and callable(key):
|
||||||
# Verify function signature
|
# Verify function signature
|
||||||
inspect.signature(key).bind(None, None)
|
inspect.signature(key).bind(None, None)
|
||||||
|
@ -277,13 +299,143 @@ def traverse_obj(
|
||||||
return results[0] if results else {} if allow_empty and is_dict else None
|
return results[0] if results else {} if allow_empty and is_dict else None
|
||||||
|
|
||||||
for index, path in enumerate(paths, 1):
|
for index, path in enumerate(paths, 1):
|
||||||
result = _traverse_obj(obj, path, index == len(paths), True)
|
is_last = index == len(paths)
|
||||||
|
try:
|
||||||
|
result = _traverse_obj(obj, path, is_last, True)
|
||||||
if result is not None:
|
if result is not None:
|
||||||
return result
|
return result
|
||||||
|
except _RequiredError as e:
|
||||||
|
if is_last:
|
||||||
|
# Reraise to get cleaner stack trace
|
||||||
|
raise ExtractorError(e.orig_msg, expected=e.expected) from None
|
||||||
|
|
||||||
return None if default is NO_DEFAULT else default
|
return None if default is NO_DEFAULT else default
|
||||||
|
|
||||||
|
|
||||||
|
def value(value, /):
|
||||||
|
return lambda _: value
|
||||||
|
|
||||||
|
|
||||||
|
def require(name, /, *, expected=False):
|
||||||
|
def func(value):
|
||||||
|
if value is None:
|
||||||
|
raise _RequiredError(f'Unable to extract {name}', expected=expected)
|
||||||
|
|
||||||
|
return value
|
||||||
|
|
||||||
|
return func
|
||||||
|
|
||||||
|
|
||||||
|
class _RequiredError(ExtractorError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@typing.overload
|
||||||
|
def subs_list_to_dict(*, ext: str | None = None) -> collections.abc.Callable[[list[dict]], dict[str, list[dict]]]: ...
|
||||||
|
|
||||||
|
|
||||||
|
@typing.overload
|
||||||
|
def subs_list_to_dict(subs: list[dict] | None, /, *, ext: str | None = None) -> dict[str, list[dict]]: ...
|
||||||
|
|
||||||
|
|
||||||
|
def subs_list_to_dict(subs: list[dict] | None = None, /, *, ext=None):
|
||||||
|
"""
|
||||||
|
Convert subtitles from a traversal into a subtitle dict.
|
||||||
|
The path should have an `all` immediately before this function.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
`ext` The default value for `ext` in the subtitle dict
|
||||||
|
|
||||||
|
In the dict you can set the following additional items:
|
||||||
|
`id` The subtitle id to sort the dict into
|
||||||
|
`quality` The sort order for each subtitle
|
||||||
|
"""
|
||||||
|
if subs is None:
|
||||||
|
return functools.partial(subs_list_to_dict, ext=ext)
|
||||||
|
|
||||||
|
result = collections.defaultdict(list)
|
||||||
|
|
||||||
|
for sub in subs:
|
||||||
|
if not url_or_none(sub.get('url')) and not sub.get('data'):
|
||||||
|
continue
|
||||||
|
sub_id = sub.pop('id', None)
|
||||||
|
if sub_id is None:
|
||||||
|
continue
|
||||||
|
if ext is not None and not sub.get('ext'):
|
||||||
|
sub['ext'] = ext
|
||||||
|
result[sub_id].append(sub)
|
||||||
|
result = dict(result)
|
||||||
|
|
||||||
|
for subs in result.values():
|
||||||
|
subs.sort(key=lambda x: x.pop('quality', 0) or 0)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
@typing.overload
|
||||||
|
def find_element(*, attr: str, value: str, tag: str | None = None, html=False): ...
|
||||||
|
|
||||||
|
|
||||||
|
@typing.overload
|
||||||
|
def find_element(*, cls: str, html=False): ...
|
||||||
|
|
||||||
|
|
||||||
|
@typing.overload
|
||||||
|
def find_element(*, id: str, tag: str | None = None, html=False): ...
|
||||||
|
|
||||||
|
|
||||||
|
@typing.overload
|
||||||
|
def find_element(*, tag: str, html=False): ...
|
||||||
|
|
||||||
|
|
||||||
|
def find_element(*, tag=None, id=None, cls=None, attr=None, value=None, html=False):
|
||||||
|
# deliberately using `id=` and `cls=` for ease of readability
|
||||||
|
assert tag or id or cls or (attr and value), 'One of tag, id, cls or (attr AND value) is required'
|
||||||
|
if not tag:
|
||||||
|
tag = r'[\w:.-]+'
|
||||||
|
|
||||||
|
if attr and value:
|
||||||
|
assert not cls, 'Cannot match both attr and cls'
|
||||||
|
assert not id, 'Cannot match both attr and id'
|
||||||
|
func = get_element_html_by_attribute if html else get_element_by_attribute
|
||||||
|
return functools.partial(func, attr, value, tag=tag)
|
||||||
|
|
||||||
|
elif cls:
|
||||||
|
assert not id, 'Cannot match both cls and id'
|
||||||
|
assert tag is None, 'Cannot match both cls and tag'
|
||||||
|
func = get_element_html_by_class if html else get_elements_by_class
|
||||||
|
return functools.partial(func, cls)
|
||||||
|
|
||||||
|
elif id:
|
||||||
|
func = get_element_html_by_id if html else get_element_by_id
|
||||||
|
return functools.partial(func, id, tag=tag)
|
||||||
|
|
||||||
|
index = int(bool(html))
|
||||||
|
return lambda html: get_element_text_and_html_by_tag(tag, html)[index]
|
||||||
|
|
||||||
|
|
||||||
|
@typing.overload
|
||||||
|
def find_elements(*, cls: str, html=False): ...
|
||||||
|
|
||||||
|
|
||||||
|
@typing.overload
|
||||||
|
def find_elements(*, attr: str, value: str, tag: str | None = None, html=False): ...
|
||||||
|
|
||||||
|
|
||||||
|
def find_elements(*, tag=None, cls=None, attr=None, value=None, html=False):
|
||||||
|
# deliberately using `cls=` for ease of readability
|
||||||
|
assert cls or (attr and value), 'One of cls or (attr AND value) is required'
|
||||||
|
|
||||||
|
if attr and value:
|
||||||
|
assert not cls, 'Cannot match both attr and cls'
|
||||||
|
func = get_elements_html_by_attribute if html else get_elements_by_attribute
|
||||||
|
return functools.partial(func, attr, value, tag=tag or r'[\w:.-]+')
|
||||||
|
|
||||||
|
assert not tag, 'Cannot match both cls and tag'
|
||||||
|
func = get_elements_html_by_class if html else get_elements_by_class
|
||||||
|
return functools.partial(func, cls)
|
||||||
|
|
||||||
|
|
||||||
def get_first(obj, *paths, **kwargs):
|
def get_first(obj, *paths, **kwargs):
|
||||||
return traverse_obj(obj, *((..., *variadic(keys)) for keys in paths), **kwargs, get_all=False)
|
return traverse_obj(obj, *((..., *variadic(keys)) for keys in paths), **kwargs, get_all=False)
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
# Autogenerated by devscripts/update-version.py
|
# Autogenerated by devscripts/update-version.py
|
||||||
|
|
||||||
__version__ = '2024.09.27'
|
__version__ = '2024.10.07'
|
||||||
|
|
||||||
RELEASE_GIT_HEAD = 'c6387abc1af9842bb0541288a5610abba9b1ab51'
|
RELEASE_GIT_HEAD = '1a176d874e6772cd898ce507379ea388e96ee3f7'
|
||||||
|
|
||||||
VARIANT = None
|
VARIANT = None
|
||||||
|
|
||||||
|
@ -12,4 +12,4 @@ CHANNEL = 'stable'
|
||||||
|
|
||||||
ORIGIN = 'yt-dlp/yt-dlp'
|
ORIGIN = 'yt-dlp/yt-dlp'
|
||||||
|
|
||||||
_pkg_version = '2024.09.27'
|
_pkg_version = '2024.10.07'
|
||||||
|
|
Loading…
Reference in a new issue