Merge branch 'yt-dlp:master' into ie/shouttv

This commit is contained in:
bashonly 2024-11-17 11:22:20 -06:00
commit 2c7a1dc392
No known key found for this signature in database
GPG key ID: 783F096F253D15B0
153 changed files with 3415 additions and 2162 deletions

View file

@ -63,14 +63,15 @@ body:
placeholder: | placeholder: |
[debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp-nightly-builds [1a176d874] (win_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Python 3.10.11 (CPython AMD64 64bit) - Windows-10-10.0.20348-SP0 (OpenSSL 1.1.1t 7 Feb 2023)
[debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 [debug] exe versions: ffmpeg 7.0.2 (setts), ffprobe 7.0.2
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Optional libraries: Cryptodome-3.21.0, brotli-1.1.0, certifi-2024.08.30, curl_cffi-0.5.10, mutagen-1.47.0, requests-2.32.3, sqlite3-3.40.1, urllib3-2.2.3, websockets-13.1
[debug] Proxy map: {} [debug] Proxy map: {}
[debug] Request Handlers: urllib, requests [debug] Request Handlers: urllib, requests, websockets, curl_cffi
[debug] Loaded 1893 extractors [debug] Loaded 1838 extractors
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: nightly@... from yt-dlp/yt-dlp-nightly-builds
yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds)
[youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc
<more lines> <more lines>

View file

@ -75,14 +75,15 @@ body:
placeholder: | placeholder: |
[debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp-nightly-builds [1a176d874] (win_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Python 3.10.11 (CPython AMD64 64bit) - Windows-10-10.0.20348-SP0 (OpenSSL 1.1.1t 7 Feb 2023)
[debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 [debug] exe versions: ffmpeg 7.0.2 (setts), ffprobe 7.0.2
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Optional libraries: Cryptodome-3.21.0, brotli-1.1.0, certifi-2024.08.30, curl_cffi-0.5.10, mutagen-1.47.0, requests-2.32.3, sqlite3-3.40.1, urllib3-2.2.3, websockets-13.1
[debug] Proxy map: {} [debug] Proxy map: {}
[debug] Request Handlers: urllib, requests [debug] Request Handlers: urllib, requests, websockets, curl_cffi
[debug] Loaded 1893 extractors [debug] Loaded 1838 extractors
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: nightly@... from yt-dlp/yt-dlp-nightly-builds
yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds)
[youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc
<more lines> <more lines>

View file

@ -71,14 +71,15 @@ body:
placeholder: | placeholder: |
[debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp-nightly-builds [1a176d874] (win_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Python 3.10.11 (CPython AMD64 64bit) - Windows-10-10.0.20348-SP0 (OpenSSL 1.1.1t 7 Feb 2023)
[debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 [debug] exe versions: ffmpeg 7.0.2 (setts), ffprobe 7.0.2
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Optional libraries: Cryptodome-3.21.0, brotli-1.1.0, certifi-2024.08.30, curl_cffi-0.5.10, mutagen-1.47.0, requests-2.32.3, sqlite3-3.40.1, urllib3-2.2.3, websockets-13.1
[debug] Proxy map: {} [debug] Proxy map: {}
[debug] Request Handlers: urllib, requests [debug] Request Handlers: urllib, requests, websockets, curl_cffi
[debug] Loaded 1893 extractors [debug] Loaded 1838 extractors
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: nightly@... from yt-dlp/yt-dlp-nightly-builds
yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds)
[youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc
<more lines> <more lines>

View file

@ -56,14 +56,15 @@ body:
placeholder: | placeholder: |
[debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp-nightly-builds [1a176d874] (win_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Python 3.10.11 (CPython AMD64 64bit) - Windows-10-10.0.20348-SP0 (OpenSSL 1.1.1t 7 Feb 2023)
[debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 [debug] exe versions: ffmpeg 7.0.2 (setts), ffprobe 7.0.2
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Optional libraries: Cryptodome-3.21.0, brotli-1.1.0, certifi-2024.08.30, curl_cffi-0.5.10, mutagen-1.47.0, requests-2.32.3, sqlite3-3.40.1, urllib3-2.2.3, websockets-13.1
[debug] Proxy map: {} [debug] Proxy map: {}
[debug] Request Handlers: urllib, requests [debug] Request Handlers: urllib, requests, websockets, curl_cffi
[debug] Loaded 1893 extractors [debug] Loaded 1838 extractors
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: nightly@... from yt-dlp/yt-dlp-nightly-builds
yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds)
[youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc
<more lines> <more lines>

View file

@ -52,14 +52,15 @@ body:
placeholder: | placeholder: |
[debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp-nightly-builds [1a176d874] (win_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Python 3.10.11 (CPython AMD64 64bit) - Windows-10-10.0.20348-SP0 (OpenSSL 1.1.1t 7 Feb 2023)
[debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 [debug] exe versions: ffmpeg 7.0.2 (setts), ffprobe 7.0.2
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Optional libraries: Cryptodome-3.21.0, brotli-1.1.0, certifi-2024.08.30, curl_cffi-0.5.10, mutagen-1.47.0, requests-2.32.3, sqlite3-3.40.1, urllib3-2.2.3, websockets-13.1
[debug] Proxy map: {} [debug] Proxy map: {}
[debug] Request Handlers: urllib, requests [debug] Request Handlers: urllib, requests, websockets, curl_cffi
[debug] Loaded 1893 extractors [debug] Loaded 1838 extractors
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: nightly@... from yt-dlp/yt-dlp-nightly-builds
yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds)
[youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc
<more lines> <more lines>

View file

@ -58,14 +58,15 @@ body:
placeholder: | placeholder: |
[debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp-nightly-builds [1a176d874] (win_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Python 3.10.11 (CPython AMD64 64bit) - Windows-10-10.0.20348-SP0 (OpenSSL 1.1.1t 7 Feb 2023)
[debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 [debug] exe versions: ffmpeg 7.0.2 (setts), ffprobe 7.0.2
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Optional libraries: Cryptodome-3.21.0, brotli-1.1.0, certifi-2024.08.30, curl_cffi-0.5.10, mutagen-1.47.0, requests-2.32.3, sqlite3-3.40.1, urllib3-2.2.3, websockets-13.1
[debug] Proxy map: {} [debug] Proxy map: {}
[debug] Request Handlers: urllib, requests [debug] Request Handlers: urllib, requests, websockets, curl_cffi
[debug] Loaded 1893 extractors [debug] Loaded 1838 extractors
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: nightly@... from yt-dlp/yt-dlp-nightly-builds
yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds)
[youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc
<more lines> <more lines>

View file

@ -411,7 +411,7 @@ jobs:
run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds
python devscripts/install_deps.py -o --include build python devscripts/install_deps.py -o --include build
python devscripts/install_deps.py --include curl-cffi python devscripts/install_deps.py --include curl-cffi
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-6.10.0-py3-none-any.whl" python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-6.11.1-py3-none-any.whl"
- name: Prepare - name: Prepare
run: | run: |
@ -460,7 +460,7 @@ jobs:
run: | run: |
python devscripts/install_deps.py -o --include build python devscripts/install_deps.py -o --include build
python devscripts/install_deps.py python devscripts/install_deps.py
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-6.10.0-py3-none-any.whl" python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-6.11.1-py3-none-any.whl"
- name: Prepare - name: Prepare
run: | run: |
@ -504,7 +504,8 @@ jobs:
- windows32 - windows32
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/download-artifact@v4 - name: Download artifacts
uses: actions/download-artifact@v4
with: with:
path: artifact path: artifact
pattern: build-bin-* pattern: build-bin-*

View file

@ -28,3 +28,20 @@ jobs:
actions: write # For cleaning up cache actions: write # For cleaning up cache
id-token: write # mandatory for trusted publishing id-token: write # mandatory for trusted publishing
secrets: inherit secrets: inherit
publish_pypi:
needs: [release]
if: vars.MASTER_PYPI_PROJECT != ''
runs-on: ubuntu-latest
permissions:
id-token: write # mandatory for trusted publishing
steps:
- name: Download artifacts
uses: actions/download-artifact@v4
with:
path: dist
name: build-pypi
- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
verbose: true

View file

@ -41,3 +41,20 @@ jobs:
actions: write # For cleaning up cache actions: write # For cleaning up cache
id-token: write # mandatory for trusted publishing id-token: write # mandatory for trusted publishing
secrets: inherit secrets: inherit
publish_pypi:
needs: [release]
if: vars.NIGHTLY_PYPI_PROJECT != ''
runs-on: ubuntu-latest
permissions:
id-token: write # mandatory for trusted publishing
steps:
- name: Download artifacts
uses: actions/download-artifact@v4
with:
path: dist
name: build-pypi
- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
verbose: true

View file

@ -2,10 +2,6 @@ name: Release
on: on:
workflow_call: workflow_call:
inputs: inputs:
prerelease:
required: false
default: true
type: boolean
source: source:
required: false required: false
default: '' default: ''
@ -18,6 +14,10 @@ on:
required: false required: false
default: '' default: ''
type: string type: string
prerelease:
required: false
default: true
type: boolean
workflow_dispatch: workflow_dispatch:
inputs: inputs:
source: source:
@ -278,7 +278,17 @@ jobs:
make clean-cache make clean-cache
python -m build --no-isolation . python -m build --no-isolation .
- name: Upload artifacts
if: github.event_name != 'workflow_dispatch'
uses: actions/upload-artifact@v4
with:
name: build-pypi
path: |
dist/*
compression-level: 0
- name: Publish to PyPI - name: Publish to PyPI
if: github.event_name == 'workflow_dispatch'
uses: pypa/gh-action-pypi-publish@release/v1 uses: pypa/gh-action-pypi-publish@release/v1
with: with:
verbose: true verbose: true

View file

@ -688,3 +688,10 @@ KarboniteKream
mikkovedru mikkovedru
pktiuk pktiuk
rubyevadestaxes rubyevadestaxes
avagordon01
CounterPillow
JoseAngelB
KBelmin
kesor
MellowKyler
Wesley107772

View file

@ -4,6 +4,62 @@
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
--> -->
### 2024.11.04
#### Important changes
- **Beginning with this release, yt-dlp's Python dependencies *must* be installed using the `default` group**
If you're installing yt-dlp with pip/pipx or requiring yt-dlp in your own Python project, you'll need to specify `yt-dlp[default]` if you want to also install yt-dlp's optional dependencies (which were previously included by default). [Read more](https://github.com/yt-dlp/yt-dlp/pull/11255)
- **The minimum *required* Python version has been raised to 3.9**
Python 3.8 reached its end-of-life on 2024.10.07, and yt-dlp has now removed support for it. As an unfortunate side effect, the official `yt-dlp.exe` and `yt-dlp_x86.exe` binaries are no longer supported on Windows 7. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10086)
#### Core changes
- [Allow thumbnails with `.jpe` extension](https://github.com/yt-dlp/yt-dlp/commit/5bc5fb2835ea59bdf326bd12176d74d2c7348a95) ([#11408](https://github.com/yt-dlp/yt-dlp/issues/11408)) by [bashonly](https://github.com/bashonly)
- [Expand paths in `--plugin-dirs`](https://github.com/yt-dlp/yt-dlp/commit/914af9a0cf51c9a3f74aa88d952bee8334c67511) ([#11334](https://github.com/yt-dlp/yt-dlp/issues/11334)) by [bashonly](https://github.com/bashonly)
- [Fix `--netrc` empty string parsing for Python <=3.10](https://github.com/yt-dlp/yt-dlp/commit/88402b714ec124633933737bc156b172a3dec3d6) ([#11414](https://github.com/yt-dlp/yt-dlp/issues/11414)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
- [Populate format sorting fields before dependent fields](https://github.com/yt-dlp/yt-dlp/commit/5c880ef42e9c2b2fc412f6d69dad37d34fb75a62) ([#11353](https://github.com/yt-dlp/yt-dlp/issues/11353)) by [Grub4K](https://github.com/Grub4K)
- [Prioritize AV1](https://github.com/yt-dlp/yt-dlp/commit/3945677a75e94a1fecc085432d791e1c21220cd3) ([#11153](https://github.com/yt-dlp/yt-dlp/issues/11153)) by [seproDev](https://github.com/seproDev)
- [Remove Python 3.8 support](https://github.com/yt-dlp/yt-dlp/commit/d784464399b600ba9516bbcec6286f11d68974dd) ([#11321](https://github.com/yt-dlp/yt-dlp/issues/11321)) by [bashonly](https://github.com/bashonly)
- **aes**: [Fix GCM pad length calculation](https://github.com/yt-dlp/yt-dlp/commit/beae2db127d3b5017cbcf685da9de7a9ef496541) ([#11438](https://github.com/yt-dlp/yt-dlp/issues/11438)) by [seproDev](https://github.com/seproDev)
- **cookies**: [Support chrome table version 24](https://github.com/yt-dlp/yt-dlp/commit/4613096f2e6eab9dcbac0e98b6cec760bbc99375) ([#11425](https://github.com/yt-dlp/yt-dlp/issues/11425)) by [kesor](https://github.com/kesor), [seproDev](https://github.com/seproDev)
- **utils**
- [Allow partial application for more functions](https://github.com/yt-dlp/yt-dlp/commit/b6dc2c49e8793c6dfa21275e61caf49ec1148b81) ([#11391](https://github.com/yt-dlp/yt-dlp/issues/11391)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) (With fixes in [422195e](https://github.com/yt-dlp/yt-dlp/commit/422195ec70a00b0d2002b238cacbae7790c57fdf) by [Grub4K](https://github.com/Grub4K))
- [Fix `find_element` by class](https://github.com/yt-dlp/yt-dlp/commit/f93c16395cea1fe9ffc3c594d3e019c3b214544c) ([#11402](https://github.com/yt-dlp/yt-dlp/issues/11402)) by [bashonly](https://github.com/bashonly)
- [Fix and improve `find_element` and `find_elements`](https://github.com/yt-dlp/yt-dlp/commit/b103aca24d35b72b405c340357dc01a0ed534281) ([#11443](https://github.com/yt-dlp/yt-dlp/issues/11443)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
#### Extractor changes
- [Resolve `language` to ISO639-2 for ISM formats](https://github.com/yt-dlp/yt-dlp/commit/21cdcf03a237a0c4979c941d5a5385cae44c7906) ([#11359](https://github.com/yt-dlp/yt-dlp/issues/11359)) by [bashonly](https://github.com/bashonly)
- **ardmediathek**: [Extract chapters](https://github.com/yt-dlp/yt-dlp/commit/59f8dd8239c31f00b708da53b39b1e2e9409b6e6) ([#11442](https://github.com/yt-dlp/yt-dlp/issues/11442)) by [iw0nderhow](https://github.com/iw0nderhow)
- **bfmtv**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/754940e9a558565d6bd3c0c529802569b1d0ae4e) ([#11444](https://github.com/yt-dlp/yt-dlp/issues/11444)) by [seproDev](https://github.com/seproDev)
- **bluesky**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/5c7a5aaab27e9c3cb367b663a6136ca58866e547) ([#11055](https://github.com/yt-dlp/yt-dlp/issues/11055)) by [MellowKyler](https://github.com/MellowKyler), [seproDev](https://github.com/seproDev)
- **ccma**: [Support new 3cat.cat domain](https://github.com/yt-dlp/yt-dlp/commit/330335386d4f7603d92d6796798375336005275e) ([#11222](https://github.com/yt-dlp/yt-dlp/issues/11222)) by [JoseAngelB](https://github.com/JoseAngelB)
- **chzzk**: video: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/9c6534da81e485b2325b3489ee4128943e6d3e4b) ([#11228](https://github.com/yt-dlp/yt-dlp/issues/11228)) by [hui1601](https://github.com/hui1601)
- **cnn**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/9acf79c91a8c6c55ca972747c6858e784e2da351) ([#10185](https://github.com/yt-dlp/yt-dlp/issues/10185)) by [kylegustavo](https://github.com/kylegustavo), [seproDev](https://github.com/seproDev)
- **dailymotion**
- [Improve embed extraction](https://github.com/yt-dlp/yt-dlp/commit/a403dcf9be20b49cbb3017328f4aaa352fb6d685) ([#10843](https://github.com/yt-dlp/yt-dlp/issues/10843)) by [bashonly](https://github.com/bashonly), [pzhlkj6612](https://github.com/pzhlkj6612)
- [Support shortened URLs](https://github.com/yt-dlp/yt-dlp/commit/d1358231371f20fa23020fa9176be3b56119873e) ([#11374](https://github.com/yt-dlp/yt-dlp/issues/11374)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev)
- **facebook**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/ec9b25043f399de6a591d8370d32bf0e66c117f2) ([#11343](https://github.com/yt-dlp/yt-dlp/issues/11343)) by [kclauhk](https://github.com/kclauhk)
- **generic**: [Do not impersonate by default](https://github.com/yt-dlp/yt-dlp/commit/c29f5a7fae93a08f3cfbb6127b2faa75145b06a0) ([#11336](https://github.com/yt-dlp/yt-dlp/issues/11336)) by [bashonly](https://github.com/bashonly)
- **nfl**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/838f4385de8300a4dd4e7ffbbf0e5b7b85fb52c2) ([#11409](https://github.com/yt-dlp/yt-dlp/issues/11409)) by [bashonly](https://github.com/bashonly)
- **niconicouser**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6abef74232c0fc695cd803c18ae446cacb129389) ([#11324](https://github.com/yt-dlp/yt-dlp/issues/11324)) by [Wesley107772](https://github.com/Wesley107772)
- **soundcloud**: [Extract artists](https://github.com/yt-dlp/yt-dlp/commit/f101e5d34c97c608156ad5396714c2a2edca966a) ([#11377](https://github.com/yt-dlp/yt-dlp/issues/11377)) by [seproDev](https://github.com/seproDev)
- **tumblr**: [Support more URLs](https://github.com/yt-dlp/yt-dlp/commit/b03267bf0675eeb8df5baf1daac7cf67840c91a5) ([#6057](https://github.com/yt-dlp/yt-dlp/issues/6057)) by [selfisekai](https://github.com/selfisekai), [seproDev](https://github.com/seproDev)
- **twitter**: [Remove cookies migration workaround](https://github.com/yt-dlp/yt-dlp/commit/76802f461332d444e596437c42374fa237fa5174) ([#11392](https://github.com/yt-dlp/yt-dlp/issues/11392)) by [bashonly](https://github.com/bashonly)
- **vimeo**: [Fix API retries](https://github.com/yt-dlp/yt-dlp/commit/57212a5f97ce367590aaa5c3e9a135eead8f81f7) ([#11351](https://github.com/yt-dlp/yt-dlp/issues/11351)) by [bashonly](https://github.com/bashonly)
- **yle_areena**: [Support live events](https://github.com/yt-dlp/yt-dlp/commit/a6783a3b9905e547f6c1d4df9d7c7999feda8afa) ([#11358](https://github.com/yt-dlp/yt-dlp/issues/11358)) by [bashonly](https://github.com/bashonly), [CounterPillow](https://github.com/CounterPillow)
- **youtube**: [Adjust OAuth refresh token handling](https://github.com/yt-dlp/yt-dlp/commit/d569a8845254d90ce13ad74ae76695e8d6441068) ([#11414](https://github.com/yt-dlp/yt-dlp/issues/11414)) by [bashonly](https://github.com/bashonly)
#### Misc. changes
- **build**
- [Disable attestations for trusted publishing](https://github.com/yt-dlp/yt-dlp/commit/428ffb75aa3534b275cf54de42693a4d261519da) ([#11418](https://github.com/yt-dlp/yt-dlp/issues/11418)) by [bashonly](https://github.com/bashonly)
- [Move optional dependencies to the `default` group](https://github.com/yt-dlp/yt-dlp/commit/87884f15580910e4e0fe0e1db73508debc657471) ([#11255](https://github.com/yt-dlp/yt-dlp/issues/11255)) by [bashonly](https://github.com/bashonly)
- [Use Ubuntu 20.04 and Python 3.9 for Linux ARM builds](https://github.com/yt-dlp/yt-dlp/commit/dd2e24446954246a2ec4d4a7e95531f52a14b351) ([#8638](https://github.com/yt-dlp/yt-dlp/issues/8638)) by [bashonly](https://github.com/bashonly)
- **cleanup**
- Miscellaneous
- [ea9e35d](https://github.com/yt-dlp/yt-dlp/commit/ea9e35d85fba5eab341cdcaf1eaed69b57f7e465) by [bashonly](https://github.com/bashonly)
- [c998238](https://github.com/yt-dlp/yt-dlp/commit/c998238c2e76c62d1d29962c6e8ebe916cc7913b) by [bashonly](https://github.com/bashonly), [KBelmin](https://github.com/KBelmin)
- [197d0b0](https://github.com/yt-dlp/yt-dlp/commit/197d0b03b6a3c8fe4fa5ace630eeffec629bf72c) by [avagordon01](https://github.com/avagordon01), [bashonly](https://github.com/bashonly), [grqz](https://github.com/grqz), [Grub4K](https://github.com/Grub4K), [seproDev](https://github.com/seproDev)
- **devscripts**: `make_changelog`: [Parse full commit message for fixes](https://github.com/yt-dlp/yt-dlp/commit/0a3991edae0e10f2ea41ece9fdea5e48f789f1de) ([#11366](https://github.com/yt-dlp/yt-dlp/issues/11366)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
### 2024.10.22 ### 2024.10.22
#### Important changes #### Important changes

View file

@ -479,7 +479,8 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
--no-download-archive Do not use archive file (default) --no-download-archive Do not use archive file (default)
--max-downloads NUMBER Abort after downloading NUMBER files --max-downloads NUMBER Abort after downloading NUMBER files
--break-on-existing Stop the download process when encountering --break-on-existing Stop the download process when encountering
a file that is in the archive a file that is in the archive supplied with
the --download-archive option
--no-break-on-existing Do not stop the download process when --no-break-on-existing Do not stop the download process when
encountering a file that is in the archive encountering a file that is in the archive
(default) (default)
@ -1553,9 +1554,9 @@ The available fields are:
All fields, unless specified otherwise, are sorted in descending order. To reverse this, prefix the field with a `+`. E.g. `+res` prefers format with the smallest resolution. Additionally, you can suffix a preferred value for the fields, separated by a `:`. E.g. `res:720` prefers larger videos, but no larger than 720p and the smallest video if there are no videos less than 720p. For `codec` and `ext`, you can provide two preferred values, the first for video and the second for audio. E.g. `+codec:avc:m4a` (equivalent to `+vcodec:avc,+acodec:m4a`) sets the video codec preference to `h264` > `h265` > `vp9` > `vp9.2` > `av01` > `vp8` > `h263` > `theora` and audio codec preference to `mp4a` > `aac` > `vorbis` > `opus` > `mp3` > `ac3` > `dts`. You can also make the sorting prefer the nearest values to the provided by using `~` as the delimiter. E.g. `filesize~1G` prefers the format with filesize closest to 1 GiB. All fields, unless specified otherwise, are sorted in descending order. To reverse this, prefix the field with a `+`. E.g. `+res` prefers format with the smallest resolution. Additionally, you can suffix a preferred value for the fields, separated by a `:`. E.g. `res:720` prefers larger videos, but no larger than 720p and the smallest video if there are no videos less than 720p. For `codec` and `ext`, you can provide two preferred values, the first for video and the second for audio. E.g. `+codec:avc:m4a` (equivalent to `+vcodec:avc,+acodec:m4a`) sets the video codec preference to `h264` > `h265` > `vp9` > `vp9.2` > `av01` > `vp8` > `h263` > `theora` and audio codec preference to `mp4a` > `aac` > `vorbis` > `opus` > `mp3` > `ac3` > `dts`. You can also make the sorting prefer the nearest values to the provided by using `~` as the delimiter. E.g. `filesize~1G` prefers the format with filesize closest to 1 GiB.
The fields `hasvid` and `ie_pref` are always given highest priority in sorting, irrespective of the user-defined order. This behavior can be changed by using `--format-sort-force`. Apart from these, the default order used is: `lang,quality,res,fps,hdr:12,vcodec:vp9.2,channels,acodec,size,br,asr,proto,ext,hasaud,source,id`. The extractors may override this default order, but they cannot override the user-provided order. The fields `hasvid` and `ie_pref` are always given highest priority in sorting, irrespective of the user-defined order. This behavior can be changed by using `--format-sort-force`. Apart from these, the default order used is: `lang,quality,res,fps,hdr:12,vcodec,channels,acodec,size,br,asr,proto,ext,hasaud,source,id`. The extractors may override this default order, but they cannot override the user-provided order.
Note that the default has `vcodec:vp9.2`; i.e. `av1` is not preferred. Similarly, the default for hdr is `hdr:12`; i.e. Dolby Vision is not preferred. These choices are made since DV and AV1 formats are not yet fully compatible with most devices. This may be changed in the future as more devices become capable of smoothly playing back these formats. Note that the default for hdr is `hdr:12`; i.e. Dolby Vision is not preferred. This choice was made since DV formats are not yet fully compatible with most devices. This may be changed in the future.
If your format selector is `worst`, the last item is selected after sorting. This means it will select the format that is worst in all respects. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-f best -S +size,+br,+res,+fps`. If your format selector is `worst`, the last item is selected after sorting. This means it will select the format that is worst in all respects. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-f best -S +size,+br,+res,+fps`.
@ -1767,7 +1768,7 @@ The following extractors use this feature:
#### youtube #### youtube
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes * `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively * `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music` and `_creator` (e.g. `ios_creator`); and `mweb`, `mediaconnect`, `android_testsuite`, `android_vr`, `web_safari`, `web_embedded`, `tv` and `tv_embedded` with no variants. By default, `ios,mweb` is used, and `web_creator,mediaconnect` is added as needed for age-gated videos when account age verification is required. Similarly, the `_music` variants are added for `music.youtube.com` URLs. Some clients, such as `web` and `android`, require a `po_token` for their formats to be downloadable. Some clients, such as the `_creator` variants, will only work with authentication. You can use `all` to use all the clients, and `default` for the default clients. You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=all,-web` * `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music` and `_creator` (e.g. `ios_creator`); and `mweb`, `mediaconnect`, `android_vr`, `web_safari`, `web_embedded`, `tv` and `tv_embedded` with no variants. By default, `ios,mweb` is used, and `web_creator` is added as needed for age-gated videos when account age verification is required. Similarly, the `_music` variants are added for `music.youtube.com` URLs. Some clients, such as `web` and `android`, require a `po_token` for their formats to be downloadable. Some clients, such as the `_creator` variants, will only work with authentication. You can use `all` to use all the clients, and `default` for the default clients. You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=all,-web`
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp. * `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
@ -2205,7 +2206,7 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu
* `avconv` is not supported as an alternative to `ffmpeg` * `avconv` is not supported as an alternative to `ffmpeg`
* yt-dlp stores config files in slightly different locations to youtube-dl. See [CONFIGURATION](#configuration) for a list of correct locations * yt-dlp stores config files in slightly different locations to youtube-dl. See [CONFIGURATION](#configuration) for a list of correct locations
* The default [output template](#output-template) is `%(title)s [%(id)s].%(ext)s`. There is no real reason for this change. This was changed before yt-dlp was ever made public and now there are no plans to change it back to `%(title)s-%(id)s.%(ext)s`. Instead, you may use `--compat-options filename` * The default [output template](#output-template) is `%(title)s [%(id)s].%(ext)s`. There is no real reason for this change. This was changed before yt-dlp was ever made public and now there are no plans to change it back to `%(title)s-%(id)s.%(ext)s`. Instead, you may use `--compat-options filename`
* The default [format sorting](#sorting-formats) is different from youtube-dl and prefers higher resolution and better codecs rather than higher bitrates. You can use the `--format-sort` option to change this to any order you prefer, or use `--compat-options format-sort` to use youtube-dl's sorting order * The default [format sorting](#sorting-formats) is different from youtube-dl and prefers higher resolution and better codecs rather than higher bitrates. You can use the `--format-sort` option to change this to any order you prefer, or use `--compat-options format-sort` to use youtube-dl's sorting order. Older versions of yt-dlp preferred VP9 due to its broader compatibility; you can use `--compat-options prefer-vp9-sort` to revert to that format sorting preference. These two compat options cannot be used together
* The default format selector is `bv*+ba/b`. This means that if a combined video + audio format that is better than the best video-only format is found, the former will be preferred. Use `-f bv+ba/b` or `--compat-options format-spec` to revert this * The default format selector is `bv*+ba/b`. This means that if a combined video + audio format that is better than the best video-only format is found, the former will be preferred. Use `-f bv+ba/b` or `--compat-options format-spec` to revert this
* Unlike youtube-dlc, yt-dlp does not allow merging multiple audio/video streams into one file by default (since this conflicts with the use of `-f bv*+ba`). If needed, this feature must be enabled using `--audio-multistreams` and `--video-multistreams`. You can also use `--compat-options multistreams` to enable both * Unlike youtube-dlc, yt-dlp does not allow merging multiple audio/video streams into one file by default (since this conflicts with the use of `-f bv*+ba`). If needed, this feature must be enabled using `--audio-multistreams` and `--video-multistreams`. You can also use `--compat-options multistreams` to enable both
* `--no-abort-on-error` is enabled by default. Use `--abort-on-error` or `--compat-options abort-on-error` to abort on errors instead * `--no-abort-on-error` is enabled by default. Use `--abort-on-error` or `--compat-options abort-on-error` to abort on errors instead
@ -2234,11 +2235,11 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu
For ease of use, a few more compat options are available: For ease of use, a few more compat options are available:
* `--compat-options all`: Use all compat options (**Do NOT use this!**) * `--compat-options all`: Use all compat options (**Do NOT use this!**)
* `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams,-playlist-match-filter,-manifest-filesize-approx,-allow-unsafe-ext` * `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams,-playlist-match-filter,-manifest-filesize-approx,-allow-unsafe-ext,-prefer-vp9-sort`
* `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect,-playlist-match-filter,-manifest-filesize-approx,-allow-unsafe-ext` * `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect,-playlist-match-filter,-manifest-filesize-approx,-allow-unsafe-ext,-prefer-vp9-sort`
* `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date` * `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date`
* `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler,manifest-filesize-approx` * `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler,manifest-filesize-approx`
* `--compat-options 2023`: Currently does nothing. Use this to enable all future compat options * `--compat-options 2023`: Same as `--compat-options prefer-vp9-sort`. Use this to enable all future compat options
The following compat options restore vulnerable behavior from before security patches: The following compat options restore vulnerable behavior from before security patches:

View file

@ -11,13 +11,12 @@ import codecs
import subprocess import subprocess
from yt_dlp.aes import aes_encrypt, key_expansion from yt_dlp.aes import aes_encrypt, key_expansion
from yt_dlp.utils import intlist_to_bytes
secret_msg = b'Secret message goes here' secret_msg = b'Secret message goes here'
def hex_str(int_list): def hex_str(int_list):
return codecs.encode(intlist_to_bytes(int_list), 'hex') return codecs.encode(bytes(int_list), 'hex')
def openssl_encode(algo, key, iv): def openssl_encode(algo, key, iv):

View file

@ -52,7 +52,7 @@ default = [
"pycryptodomex", "pycryptodomex",
"requests>=2.32.2,<3", "requests>=2.32.2,<3",
"urllib3>=1.26.17,<3", "urllib3>=1.26.17,<3",
"websockets>=13.0", "websockets>=13.0,<14",
] ]
curl-cffi = [ curl-cffi = [
"curl-cffi==0.5.10; os_name=='nt' and implementation_name=='cpython'", "curl-cffi==0.5.10; os_name=='nt' and implementation_name=='cpython'",
@ -83,7 +83,7 @@ test = [
"pytest-rerunfailures~=14.0", "pytest-rerunfailures~=14.0",
] ]
pyinstaller = [ pyinstaller = [
"pyinstaller>=6.10.0", # Windows temp cleanup fixed in 6.10.0 "pyinstaller>=6.11.1", # Windows temp cleanup fixed in 6.11.1
] ]
[project.urls] [project.urls]
@ -313,6 +313,16 @@ banned-from = [
"yt_dlp.compat.compat_urllib_parse_urlparse".msg = "Use `urllib.parse.urlparse` instead." "yt_dlp.compat.compat_urllib_parse_urlparse".msg = "Use `urllib.parse.urlparse` instead."
"yt_dlp.compat.compat_shlex_quote".msg = "Use `yt_dlp.utils.shell_quote` instead." "yt_dlp.compat.compat_shlex_quote".msg = "Use `yt_dlp.utils.shell_quote` instead."
"yt_dlp.utils.error_to_compat_str".msg = "Use `str` instead." "yt_dlp.utils.error_to_compat_str".msg = "Use `str` instead."
"yt_dlp.utils.bytes_to_intlist".msg = "Use `list` instead."
"yt_dlp.utils.intlist_to_bytes".msg = "Use `bytes` instead."
"yt_dlp.utils.decodeArgument".msg = "Do not use"
"yt_dlp.utils.decodeFilename".msg = "Do not use"
"yt_dlp.utils.encodeFilename".msg = "Do not use"
"yt_dlp.compat.compat_os_name".msg = "Use `os.name` instead."
"yt_dlp.compat.compat_realpath".msg = "Use `os.path.realpath` instead."
"yt_dlp.compat.functools".msg = "Use `functools` instead."
"yt_dlp.utils.decodeOption".msg = "Do not use"
"yt_dlp.utils.compiled_regex_type".msg = "Use `re.Pattern` instead."
[tool.autopep8] [tool.autopep8]
max_line_length = 120 max_line_length = 120

View file

@ -190,6 +190,7 @@
- **blerp** - **blerp**
- **blogger.com** - **blogger.com**
- **Bloomberg** - **Bloomberg**
- **Bluesky**
- **BokeCC** - **BokeCC**
- **BongaCams** - **BongaCams**
- **Boosty** - **Boosty**
@ -247,7 +248,7 @@
- **cbsnews:livevideo**: CBS News Live Videos - **cbsnews:livevideo**: CBS News Live Videos
- **cbssports**: (**Currently broken**) - **cbssports**: (**Currently broken**)
- **cbssports:embed**: (**Currently broken**) - **cbssports:embed**: (**Currently broken**)
- **CCMA** - **CCMA**: 3Cat, TV3 and Catalunya Ràdio
- **CCTV**: 央视网 - **CCTV**: 央视网
- **CDA**: [*cdapl*](## "netrc machine") - **CDA**: [*cdapl*](## "netrc machine")
- **CDAFolder** - **CDAFolder**
@ -280,8 +281,6 @@
- **cmt.com**: (**Currently broken**) - **cmt.com**: (**Currently broken**)
- **CNBCVideo** - **CNBCVideo**
- **CNN** - **CNN**
- **CNNArticle**
- **CNNBlogs**
- **CNNIndonesia** - **CNNIndonesia**
- **ComedyCentral** - **ComedyCentral**
- **ComedyCentralTV** - **ComedyCentralTV**
@ -685,9 +684,9 @@
- **LastFMPlaylist** - **LastFMPlaylist**
- **LastFMUser** - **LastFMUser**
- **LaXarxaMes**: [*laxarxames*](## "netrc machine") - **LaXarxaMes**: [*laxarxames*](## "netrc machine")
- **lbry** - **lbry**: odysee.com
- **lbry:channel** - **lbry:channel**: odysee.com channels
- **lbry:playlist** - **lbry:playlist**: odysee.com playlists
- **LCI** - **LCI**
- **Lcp** - **Lcp**
- **LcpPlay** - **LcpPlay**
@ -1446,7 +1445,7 @@
- **TeleQuebecSquat** - **TeleQuebecSquat**
- **TeleQuebecVideo** - **TeleQuebecVideo**
- **TeleTask**: (**Currently broken**) - **TeleTask**: (**Currently broken**)
- **Telewebion** - **Telewebion**: (**Currently broken**)
- **Tempo** - **Tempo**
- **TennisTV**: [*tennistv*](## "netrc machine") - **TennisTV**: [*tennistv*](## "netrc machine")
- **TenPlay**: [*10play*](## "netrc machine") - **TenPlay**: [*10play*](## "netrc machine")

View file

@ -9,7 +9,6 @@ import types
import yt_dlp.extractor import yt_dlp.extractor
from yt_dlp import YoutubeDL from yt_dlp import YoutubeDL
from yt_dlp.compat import compat_os_name
from yt_dlp.utils import preferredencoding, try_call, write_string, find_available_port from yt_dlp.utils import preferredencoding, try_call, write_string, find_available_port
if 'pytest' in sys.modules: if 'pytest' in sys.modules:
@ -49,7 +48,7 @@ def report_warning(message, *args, **kwargs):
Print the message to stderr, it will be prefixed with 'WARNING:' Print the message to stderr, it will be prefixed with 'WARNING:'
If stderr is a tty file the 'WARNING:' will be colored If stderr is a tty file the 'WARNING:' will be colored
""" """
if sys.stderr.isatty() and compat_os_name != 'nt': if sys.stderr.isatty() and os.name != 'nt':
_msg_header = '\033[0;33mWARNING:\033[0m' _msg_header = '\033[0;33mWARNING:\033[0m'
else: else:
_msg_header = 'WARNING:' _msg_header = 'WARNING:'

View file

@ -53,6 +53,18 @@ class TestInfoExtractor(unittest.TestCase):
def test_ie_key(self): def test_ie_key(self):
self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE) self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE)
def test_get_netrc_login_info(self):
for params in [
{'usenetrc': True, 'netrc_location': './test/testdata/netrc/netrc'},
{'netrc_cmd': f'{sys.executable} ./test/testdata/netrc/print_netrc.py'},
]:
ie = DummyIE(FakeYDL(params))
self.assertEqual(ie._get_netrc_login_info(netrc_machine='normal_use'), ('user', 'pass'))
self.assertEqual(ie._get_netrc_login_info(netrc_machine='empty_user'), ('', 'pass'))
self.assertEqual(ie._get_netrc_login_info(netrc_machine='empty_pass'), ('user', ''))
self.assertEqual(ie._get_netrc_login_info(netrc_machine='both_empty'), ('', ''))
self.assertEqual(ie._get_netrc_login_info(netrc_machine='nonexistent'), (None, None))
def test_html_search_regex(self): def test_html_search_regex(self):
html = '<p id="foo">Watch this <a href="http://www.youtube.com/watch?v=BaW_jenozKc">video</a></p>' html = '<p id="foo">Watch this <a href="http://www.youtube.com/watch?v=BaW_jenozKc">video</a></p>'
search = lambda re, *args: self.ie._html_search_regex(re, html, *args) search = lambda re, *args: self.ie._html_search_regex(re, html, *args)

View file

@ -15,7 +15,6 @@ import json
from test.helper import FakeYDL, assertRegexpMatches, try_rm from test.helper import FakeYDL, assertRegexpMatches, try_rm
from yt_dlp import YoutubeDL from yt_dlp import YoutubeDL
from yt_dlp.compat import compat_os_name
from yt_dlp.extractor import YoutubeIE from yt_dlp.extractor import YoutubeIE
from yt_dlp.extractor.common import InfoExtractor from yt_dlp.extractor.common import InfoExtractor
from yt_dlp.postprocessor.common import PostProcessor from yt_dlp.postprocessor.common import PostProcessor
@ -839,8 +838,8 @@ class TestYoutubeDL(unittest.TestCase):
test('%(filesize)#D', '1Ki') test('%(filesize)#D', '1Ki')
test('%(height)5.2D', ' 1.08k') test('%(height)5.2D', ' 1.08k')
test('%(title4)#S', 'foo_bar_test') test('%(title4)#S', 'foo_bar_test')
test('%(title4).10S', ('foo bar ', 'foo bar' + ('#' if compat_os_name == 'nt' else ' '))) test('%(title4).10S', ('foo bar ', 'foo bar' + ('#' if os.name == 'nt' else ' ')))
if compat_os_name == 'nt': if os.name == 'nt':
test('%(title4)q', ('"foo ""bar"" test"', None)) test('%(title4)q', ('"foo ""bar"" test"', None))
test('%(formats.:.id)#q', ('"id 1" "id 2" "id 3"', None)) test('%(formats.:.id)#q', ('"id 1" "id 2" "id 3"', None))
test('%(formats.0.id)#q', ('"id 1"', None)) test('%(formats.0.id)#q', ('"id 1"', None))
@ -903,9 +902,9 @@ class TestYoutubeDL(unittest.TestCase):
# Environment variable expansion for prepare_filename # Environment variable expansion for prepare_filename
os.environ['__yt_dlp_var'] = 'expanded' os.environ['__yt_dlp_var'] = 'expanded'
envvar = '%__yt_dlp_var%' if compat_os_name == 'nt' else '$__yt_dlp_var' envvar = '%__yt_dlp_var%' if os.name == 'nt' else '$__yt_dlp_var'
test(envvar, (envvar, 'expanded')) test(envvar, (envvar, 'expanded'))
if compat_os_name == 'nt': if os.name == 'nt':
test('%s%', ('%s%', '%s%')) test('%s%', ('%s%', '%s%'))
os.environ['s'] = 'expanded' os.environ['s'] = 'expanded'
test('%s%', ('%s%', 'expanded')) # %s% should be expanded before escaping %s test('%s%', ('%s%', 'expanded')) # %s% should be expanded before escaping %s

View file

@ -27,7 +27,6 @@ from yt_dlp.aes import (
pad_block, pad_block,
) )
from yt_dlp.dependencies import Cryptodome from yt_dlp.dependencies import Cryptodome
from yt_dlp.utils import bytes_to_intlist, intlist_to_bytes
# the encrypted data can be generate with 'devscripts/generate_aes_testdata.py' # the encrypted data can be generate with 'devscripts/generate_aes_testdata.py'
@ -40,33 +39,33 @@ class TestAES(unittest.TestCase):
def test_encrypt(self): def test_encrypt(self):
msg = b'message' msg = b'message'
key = list(range(16)) key = list(range(16))
encrypted = aes_encrypt(bytes_to_intlist(msg), key) encrypted = aes_encrypt(list(msg), key)
decrypted = intlist_to_bytes(aes_decrypt(encrypted, key)) decrypted = bytes(aes_decrypt(encrypted, key))
self.assertEqual(decrypted, msg) self.assertEqual(decrypted, msg)
def test_cbc_decrypt(self): def test_cbc_decrypt(self):
data = b'\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6\x27\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd' data = b'\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6\x27\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd'
decrypted = intlist_to_bytes(aes_cbc_decrypt(bytes_to_intlist(data), self.key, self.iv)) decrypted = bytes(aes_cbc_decrypt(list(data), self.key, self.iv))
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
if Cryptodome.AES: if Cryptodome.AES:
decrypted = aes_cbc_decrypt_bytes(data, intlist_to_bytes(self.key), intlist_to_bytes(self.iv)) decrypted = aes_cbc_decrypt_bytes(data, bytes(self.key), bytes(self.iv))
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
def test_cbc_encrypt(self): def test_cbc_encrypt(self):
data = bytes_to_intlist(self.secret_msg) data = list(self.secret_msg)
encrypted = intlist_to_bytes(aes_cbc_encrypt(data, self.key, self.iv)) encrypted = bytes(aes_cbc_encrypt(data, self.key, self.iv))
self.assertEqual( self.assertEqual(
encrypted, encrypted,
b'\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6\'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd') b'\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6\'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd')
def test_ctr_decrypt(self): def test_ctr_decrypt(self):
data = bytes_to_intlist(b'\x03\xc7\xdd\xd4\x8e\xb3\xbc\x1a*O\xdc1\x12+8Aio\xd1z\xb5#\xaf\x08') data = list(b'\x03\xc7\xdd\xd4\x8e\xb3\xbc\x1a*O\xdc1\x12+8Aio\xd1z\xb5#\xaf\x08')
decrypted = intlist_to_bytes(aes_ctr_decrypt(data, self.key, self.iv)) decrypted = bytes(aes_ctr_decrypt(data, self.key, self.iv))
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
def test_ctr_encrypt(self): def test_ctr_encrypt(self):
data = bytes_to_intlist(self.secret_msg) data = list(self.secret_msg)
encrypted = intlist_to_bytes(aes_ctr_encrypt(data, self.key, self.iv)) encrypted = bytes(aes_ctr_encrypt(data, self.key, self.iv))
self.assertEqual( self.assertEqual(
encrypted, encrypted,
b'\x03\xc7\xdd\xd4\x8e\xb3\xbc\x1a*O\xdc1\x12+8Aio\xd1z\xb5#\xaf\x08') b'\x03\xc7\xdd\xd4\x8e\xb3\xbc\x1a*O\xdc1\x12+8Aio\xd1z\xb5#\xaf\x08')
@ -75,47 +74,59 @@ class TestAES(unittest.TestCase):
data = b'\x159Y\xcf5eud\x90\x9c\x85&]\x14\x1d\x0f.\x08\xb4T\xe4/\x17\xbd' data = b'\x159Y\xcf5eud\x90\x9c\x85&]\x14\x1d\x0f.\x08\xb4T\xe4/\x17\xbd'
authentication_tag = b'\xe8&I\x80rI\x07\x9d}YWuU@:e' authentication_tag = b'\xe8&I\x80rI\x07\x9d}YWuU@:e'
decrypted = intlist_to_bytes(aes_gcm_decrypt_and_verify( decrypted = bytes(aes_gcm_decrypt_and_verify(
bytes_to_intlist(data), self.key, bytes_to_intlist(authentication_tag), self.iv[:12])) list(data), self.key, list(authentication_tag), self.iv[:12]))
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
if Cryptodome.AES: if Cryptodome.AES:
decrypted = aes_gcm_decrypt_and_verify_bytes( decrypted = aes_gcm_decrypt_and_verify_bytes(
data, intlist_to_bytes(self.key), authentication_tag, intlist_to_bytes(self.iv[:12])) data, bytes(self.key), authentication_tag, bytes(self.iv[:12]))
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
def test_gcm_aligned_decrypt(self):
data = b'\x159Y\xcf5eud\x90\x9c\x85&]\x14\x1d\x0f'
authentication_tag = b'\x08\xb1\x9d!&\x98\xd0\xeaRq\x90\xe6;\xb5]\xd8'
decrypted = bytes(aes_gcm_decrypt_and_verify(
list(data), self.key, list(authentication_tag), self.iv[:12]))
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg[:16])
if Cryptodome.AES:
decrypted = aes_gcm_decrypt_and_verify_bytes(
data, bytes(self.key), authentication_tag, bytes(self.iv[:12]))
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg[:16])
def test_decrypt_text(self): def test_decrypt_text(self):
password = intlist_to_bytes(self.key).decode() password = bytes(self.key).decode()
encrypted = base64.b64encode( encrypted = base64.b64encode(
intlist_to_bytes(self.iv[:8]) bytes(self.iv[:8])
+ b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae', + b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae',
).decode() ).decode()
decrypted = (aes_decrypt_text(encrypted, password, 16)) decrypted = (aes_decrypt_text(encrypted, password, 16))
self.assertEqual(decrypted, self.secret_msg) self.assertEqual(decrypted, self.secret_msg)
password = intlist_to_bytes(self.key).decode() password = bytes(self.key).decode()
encrypted = base64.b64encode( encrypted = base64.b64encode(
intlist_to_bytes(self.iv[:8]) bytes(self.iv[:8])
+ b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83', + b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83',
).decode() ).decode()
decrypted = (aes_decrypt_text(encrypted, password, 32)) decrypted = (aes_decrypt_text(encrypted, password, 32))
self.assertEqual(decrypted, self.secret_msg) self.assertEqual(decrypted, self.secret_msg)
def test_ecb_encrypt(self): def test_ecb_encrypt(self):
data = bytes_to_intlist(self.secret_msg) data = list(self.secret_msg)
encrypted = intlist_to_bytes(aes_ecb_encrypt(data, self.key)) encrypted = bytes(aes_ecb_encrypt(data, self.key))
self.assertEqual( self.assertEqual(
encrypted, encrypted,
b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:') b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:')
def test_ecb_decrypt(self): def test_ecb_decrypt(self):
data = bytes_to_intlist(b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:') data = list(b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:')
decrypted = intlist_to_bytes(aes_ecb_decrypt(data, self.key, self.iv)) decrypted = bytes(aes_ecb_decrypt(data, self.key, self.iv))
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
def test_key_expansion(self): def test_key_expansion(self):
key = '4f6bdaa39e2f8cb07f5e722d9edef314' key = '4f6bdaa39e2f8cb07f5e722d9edef314'
self.assertEqual(key_expansion(bytes_to_intlist(bytearray.fromhex(key))), [ self.assertEqual(key_expansion(list(bytearray.fromhex(key))), [
0x4F, 0x6B, 0xDA, 0xA3, 0x9E, 0x2F, 0x8C, 0xB0, 0x7F, 0x5E, 0x72, 0x2D, 0x9E, 0xDE, 0xF3, 0x14, 0x4F, 0x6B, 0xDA, 0xA3, 0x9E, 0x2F, 0x8C, 0xB0, 0x7F, 0x5E, 0x72, 0x2D, 0x9E, 0xDE, 0xF3, 0x14,
0x53, 0x66, 0x20, 0xA8, 0xCD, 0x49, 0xAC, 0x18, 0xB2, 0x17, 0xDE, 0x35, 0x2C, 0xC9, 0x2D, 0x21, 0x53, 0x66, 0x20, 0xA8, 0xCD, 0x49, 0xAC, 0x18, 0xB2, 0x17, 0xDE, 0x35, 0x2C, 0xC9, 0x2D, 0x21,
0x8C, 0xBE, 0xDD, 0xD9, 0x41, 0xF7, 0x71, 0xC1, 0xF3, 0xE0, 0xAF, 0xF4, 0xDF, 0x29, 0x82, 0xD5, 0x8C, 0xBE, 0xDD, 0xD9, 0x41, 0xF7, 0x71, 0xC1, 0xF3, 0xE0, 0xAF, 0xF4, 0xDF, 0x29, 0x82, 0xD5,

View file

@ -12,12 +12,7 @@ import struct
from yt_dlp import compat from yt_dlp import compat
from yt_dlp.compat import urllib # isort: split from yt_dlp.compat import urllib # isort: split
from yt_dlp.compat import ( from yt_dlp.compat import compat_etree_fromstring, compat_expanduser
compat_etree_fromstring,
compat_expanduser,
compat_urllib_parse_unquote, # noqa: TID251
compat_urllib_parse_urlencode, # noqa: TID251
)
from yt_dlp.compat.urllib.request import getproxies from yt_dlp.compat.urllib.request import getproxies
@ -43,39 +38,6 @@ class TestCompat(unittest.TestCase):
finally: finally:
os.environ['HOME'] = old_home or '' os.environ['HOME'] = old_home or ''
def test_compat_urllib_parse_unquote(self):
self.assertEqual(compat_urllib_parse_unquote('abc%20def'), 'abc def')
self.assertEqual(compat_urllib_parse_unquote('%7e/abc+def'), '~/abc+def')
self.assertEqual(compat_urllib_parse_unquote(''), '')
self.assertEqual(compat_urllib_parse_unquote('%'), '%')
self.assertEqual(compat_urllib_parse_unquote('%%'), '%%')
self.assertEqual(compat_urllib_parse_unquote('%%%'), '%%%')
self.assertEqual(compat_urllib_parse_unquote('%2F'), '/')
self.assertEqual(compat_urllib_parse_unquote('%2f'), '/')
self.assertEqual(compat_urllib_parse_unquote('%E6%B4%A5%E6%B3%A2'), '津波')
self.assertEqual(
compat_urllib_parse_unquote('''<meta property="og:description" content="%E2%96%81%E2%96%82%E2%96%83%E2%96%84%25%E2%96%85%E2%96%86%E2%96%87%E2%96%88" />
%<a href="https://ar.wikipedia.org/wiki/%D8%AA%D8%B3%D9%88%D9%86%D8%A7%D9%85%D9%8A">%a'''),
'''<meta property="og:description" content="▁▂▃▄%▅▆▇█" />
%<a href="https://ar.wikipedia.org/wiki/تسونامي">%a''')
self.assertEqual(
compat_urllib_parse_unquote('''%28%5E%E2%97%A3_%E2%97%A2%5E%29%E3%81%A3%EF%B8%BB%E3%83%87%E2%95%90%E4%B8%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%86%B6%I%Break%25Things%'''),
'''(^◣_◢^)っ︻デ═一 ⇀ ⇀ ⇀ ⇀ ⇀ ↶%I%Break%Things%''')
def test_compat_urllib_parse_unquote_plus(self):
self.assertEqual(urllib.parse.unquote_plus('abc%20def'), 'abc def')
self.assertEqual(urllib.parse.unquote_plus('%7e/abc+def'), '~/abc def')
def test_compat_urllib_parse_urlencode(self):
self.assertEqual(compat_urllib_parse_urlencode({'abc': 'def'}), 'abc=def')
self.assertEqual(compat_urllib_parse_urlencode({'abc': b'def'}), 'abc=def')
self.assertEqual(compat_urllib_parse_urlencode({b'abc': 'def'}), 'abc=def')
self.assertEqual(compat_urllib_parse_urlencode({b'abc': b'def'}), 'abc=def')
self.assertEqual(compat_urllib_parse_urlencode([('abc', 'def')]), 'abc=def')
self.assertEqual(compat_urllib_parse_urlencode([('abc', b'def')]), 'abc=def')
self.assertEqual(compat_urllib_parse_urlencode([(b'abc', 'def')]), 'abc=def')
self.assertEqual(compat_urllib_parse_urlencode([(b'abc', b'def')]), 'abc=def')
def test_compat_etree_fromstring(self): def test_compat_etree_fromstring(self):
xml = ''' xml = '''
<root foo="bar" spam="中文"> <root foo="bar" spam="中文">

View file

@ -105,6 +105,13 @@ class TestCookies(unittest.TestCase):
decryptor = LinuxChromeCookieDecryptor('Chrome', Logger()) decryptor = LinuxChromeCookieDecryptor('Chrome', Logger())
self.assertEqual(decryptor.decrypt(encrypted_value), value) self.assertEqual(decryptor.decrypt(encrypted_value), value)
def test_chrome_cookie_decryptor_linux_v10_meta24(self):
with MonkeyPatch(cookies, {'_get_linux_keyring_password': lambda *args, **kwargs: b''}):
encrypted_value = b'v10\x1f\xe4\x0e[\x83\x0c\xcc*kPi \xce\x8d\x1d\xbb\x80\r\x11\t\xbb\x9e^Hy\x94\xf4\x963\x9f\x82\xba\xfe\xa1\xed\xb9\xf1)\x00710\x92\xc8/<\x96B'
value = 'DE'
decryptor = LinuxChromeCookieDecryptor('Chrome', Logger(), meta_version=24)
self.assertEqual(decryptor.decrypt(encrypted_value), value)
def test_chrome_cookie_decryptor_windows_v10(self): def test_chrome_cookie_decryptor_windows_v10(self):
with MonkeyPatch(cookies, { with MonkeyPatch(cookies, {
'_get_windows_v10_key': lambda *args, **kwargs: b'Y\xef\xad\xad\xeerp\xf0Y\xe6\x9b\x12\xc2<z\x16]\n\xbb\xb8\xcb\xd7\x9bA\xc3\x14e\x99{\xd6\xf4&', '_get_windows_v10_key': lambda *args, **kwargs: b'Y\xef\xad\xad\xeerp\xf0Y\xe6\x9b\x12\xc2<z\x16]\n\xbb\xb8\xcb\xd7\x9bA\xc3\x14e\x99{\xd6\xf4&',
@ -114,6 +121,15 @@ class TestCookies(unittest.TestCase):
decryptor = WindowsChromeCookieDecryptor('', Logger()) decryptor = WindowsChromeCookieDecryptor('', Logger())
self.assertEqual(decryptor.decrypt(encrypted_value), value) self.assertEqual(decryptor.decrypt(encrypted_value), value)
def test_chrome_cookie_decryptor_windows_v10_meta24(self):
with MonkeyPatch(cookies, {
'_get_windows_v10_key': lambda *args, **kwargs: b'\xea\x8b\x02\xc3\xc6\xc5\x99\xc3\xa3[ j\xfa\xf6\xfcU\xac\x13u\xdc\x0c\x0e\xf1\x03\x90\xb6\xdf\xbb\x8fL\xb1\xb2',
}):
encrypted_value = b'v10dN\xe1\xacy\x84^\xe1I\xact\x03r\xfb\xe2\xce{^\x0e<(\xb0y\xeb\x01\xfb@"\x9e\x8c\xa53~\xdb*\x8f\xac\x8b\xe3\xfd3\x06\xe5\x93\x19OyOG\xb2\xfb\x1d$\xc0\xda\x13j\x9e\xfe\xc5\xa3\xa8\xfe\xd9'
value = '1234'
decryptor = WindowsChromeCookieDecryptor('', Logger(), meta_version=24)
self.assertEqual(decryptor.decrypt(encrypted_value), value)
def test_chrome_cookie_decryptor_mac_v10(self): def test_chrome_cookie_decryptor_mac_v10(self):
with MonkeyPatch(cookies, {'_get_mac_keyring_password': lambda *args, **kwargs: b'6eIDUdtKAacvlHwBVwvg/Q=='}): with MonkeyPatch(cookies, {'_get_mac_keyring_password': lambda *args, **kwargs: b'6eIDUdtKAacvlHwBVwvg/Q=='}):
encrypted_value = b'v10\xb3\xbe\xad\xa1[\x9fC\xa1\x98\xe0\x9a\x01\xd9\xcf\xbfc' encrypted_value = b'v10\xb3\xbe\xad\xa1[\x9fC\xa1\x98\xe0\x9a\x01\xd9\xcf\xbfc'

View file

@ -15,7 +15,6 @@ import threading
from test.helper import http_server_port, try_rm from test.helper import http_server_port, try_rm
from yt_dlp import YoutubeDL from yt_dlp import YoutubeDL
from yt_dlp.downloader.http import HttpFD from yt_dlp.downloader.http import HttpFD
from yt_dlp.utils import encodeFilename
from yt_dlp.utils._utils import _YDLLogger as FakeLogger from yt_dlp.utils._utils import _YDLLogger as FakeLogger
TEST_DIR = os.path.dirname(os.path.abspath(__file__)) TEST_DIR = os.path.dirname(os.path.abspath(__file__))
@ -82,12 +81,12 @@ class TestHttpFD(unittest.TestCase):
ydl = YoutubeDL(params) ydl = YoutubeDL(params)
downloader = HttpFD(ydl, params) downloader = HttpFD(ydl, params)
filename = 'testfile.mp4' filename = 'testfile.mp4'
try_rm(encodeFilename(filename)) try_rm(filename)
self.assertTrue(downloader.real_download(filename, { self.assertTrue(downloader.real_download(filename, {
'url': f'http://127.0.0.1:{self.port}/{ep}', 'url': f'http://127.0.0.1:{self.port}/{ep}',
}), ep) }), ep)
self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE, ep) self.assertEqual(os.path.getsize(filename), TEST_SIZE, ep)
try_rm(encodeFilename(filename)) try_rm(filename)
def download_all(self, params): def download_all(self, params):
for ep in ('regular', 'no-content-length', 'no-range', 'no-range-no-content-length'): for ep in ('regular', 'no-content-length', 'no-range', 'no-range-no-content-length'):

View file

@ -9,12 +9,17 @@ from yt_dlp.utils import (
determine_ext, determine_ext,
dict_get, dict_get,
int_or_none, int_or_none,
join_nonempty,
str_or_none, str_or_none,
) )
from yt_dlp.utils.traversal import ( from yt_dlp.utils.traversal import (
traverse_obj, find_element,
find_elements,
require, require,
subs_list_to_dict, subs_list_to_dict,
traverse_obj,
trim_str,
unpack,
) )
_TEST_DATA = { _TEST_DATA = {
@ -34,6 +39,14 @@ _TEST_DATA = {
'dict': {}, 'dict': {},
} }
_TEST_HTML = '''<html><body>
<div class="a">1</div>
<div class="a" id="x" custom="z">2</div>
<div class="b" data-id="y" custom="z">3</div>
<p class="a">4</p>
<p id="d" custom="e">5</p>
</body></html>'''
class TestTraversal: class TestTraversal:
def test_traversal_base(self): def test_traversal_base(self):
@ -468,7 +481,7 @@ class TestTraversalHelpers:
'id': 'name', 'id': 'name',
'data': 'content', 'data': 'content',
'url': 'url', 'url': 'url',
}, all, {subs_list_to_dict}]) == { }, all, {subs_list_to_dict(lang=None)}]) == {
'de': [{'url': 'https://example.com/subs/de.ass'}], 'de': [{'url': 'https://example.com/subs/de.ass'}],
'en': [{'data': 'content'}], 'en': [{'data': 'content'}],
}, 'subs with mandatory items missing should be filtered' }, 'subs with mandatory items missing should be filtered'
@ -477,7 +490,7 @@ class TestTraversalHelpers:
{'url': 'https://example.com/subs/en', 'name': 'en'}, {'url': 'https://example.com/subs/en', 'name': 'en'},
], [..., { ], [..., {
'id': 'name', 'id': 'name',
'ext': ['url', {lambda x: determine_ext(x, default_ext=None)}], 'ext': ['url', {determine_ext(default_ext=None)}],
'url': 'url', 'url': 'url',
}, all, {subs_list_to_dict(ext='ext')}]) == { }, all, {subs_list_to_dict(ext='ext')}]) == {
'de': [{'url': 'https://example.com/subs/de.ass', 'ext': 'ass'}], 'de': [{'url': 'https://example.com/subs/de.ass', 'ext': 'ass'}],
@ -494,6 +507,121 @@ class TestTraversalHelpers:
{'url': 'https://example.com/subs/en1', 'ext': 'ext'}, {'url': 'https://example.com/subs/en1', 'ext': 'ext'},
{'url': 'https://example.com/subs/en2', 'ext': 'ext'}, {'url': 'https://example.com/subs/en2', 'ext': 'ext'},
]}, '`quality` key should sort subtitle list accordingly' ]}, '`quality` key should sort subtitle list accordingly'
assert traverse_obj([
{'name': 'de', 'url': 'https://example.com/subs/de.ass'},
{'name': 'de'},
{'name': 'en', 'content': 'content'},
{'url': 'https://example.com/subs/en'},
], [..., {
'id': 'name',
'url': 'url',
'data': 'content',
}, all, {subs_list_to_dict(lang='en')}]) == {
'de': [{'url': 'https://example.com/subs/de.ass'}],
'en': [
{'data': 'content'},
{'url': 'https://example.com/subs/en'},
],
}, 'optionally provided lang should be used if no id available'
assert traverse_obj([
{'name': 1, 'url': 'https://example.com/subs/de1'},
{'name': {}, 'url': 'https://example.com/subs/de2'},
{'name': 'de', 'ext': 1, 'url': 'https://example.com/subs/de3'},
{'name': 'de', 'ext': {}, 'url': 'https://example.com/subs/de4'},
], [..., {
'id': 'name',
'url': 'url',
'ext': 'ext',
}, all, {subs_list_to_dict(lang=None)}]) == {
'de': [
{'url': 'https://example.com/subs/de3'},
{'url': 'https://example.com/subs/de4'},
],
}, 'non str types should be ignored for id and ext'
assert traverse_obj([
{'name': 1, 'url': 'https://example.com/subs/de1'},
{'name': {}, 'url': 'https://example.com/subs/de2'},
{'name': 'de', 'ext': 1, 'url': 'https://example.com/subs/de3'},
{'name': 'de', 'ext': {}, 'url': 'https://example.com/subs/de4'},
], [..., {
'id': 'name',
'url': 'url',
'ext': 'ext',
}, all, {subs_list_to_dict(lang='de')}]) == {
'de': [
{'url': 'https://example.com/subs/de1'},
{'url': 'https://example.com/subs/de2'},
{'url': 'https://example.com/subs/de3'},
{'url': 'https://example.com/subs/de4'},
],
}, 'non str types should be replaced by default id'
def test_trim_str(self):
with pytest.raises(TypeError):
trim_str('positional')
assert callable(trim_str(start='a'))
assert trim_str(start='ab')('abc') == 'c'
assert trim_str(end='bc')('abc') == 'a'
assert trim_str(start='a', end='c')('abc') == 'b'
assert trim_str(start='ab', end='c')('abc') == ''
assert trim_str(start='a', end='bc')('abc') == ''
assert trim_str(start='ab', end='bc')('abc') == ''
assert trim_str(start='abc', end='abc')('abc') == ''
assert trim_str(start='', end='')('abc') == 'abc'
def test_unpack(self):
assert unpack(lambda *x: ''.join(map(str, x)))([1, 2, 3]) == '123'
assert unpack(join_nonempty)([1, 2, 3]) == '1-2-3'
assert unpack(join_nonempty, delim=' ')([1, 2, 3]) == '1 2 3'
with pytest.raises(TypeError):
unpack(join_nonempty)()
with pytest.raises(TypeError):
unpack()
def test_find_element(self):
for improper_kwargs in [
dict(attr='data-id'),
dict(value='y'),
dict(attr='data-id', value='y', cls='a'),
dict(attr='data-id', value='y', id='x'),
dict(cls='a', id='x'),
dict(cls='a', tag='p'),
dict(cls='[ab]', regex=True),
]:
with pytest.raises(AssertionError):
find_element(**improper_kwargs)(_TEST_HTML)
assert find_element(cls='a')(_TEST_HTML) == '1'
assert find_element(cls='a', html=True)(_TEST_HTML) == '<div class="a">1</div>'
assert find_element(id='x')(_TEST_HTML) == '2'
assert find_element(id='[ex]')(_TEST_HTML) is None
assert find_element(id='[ex]', regex=True)(_TEST_HTML) == '2'
assert find_element(id='x', html=True)(_TEST_HTML) == '<div class="a" id="x" custom="z">2</div>'
assert find_element(attr='data-id', value='y')(_TEST_HTML) == '3'
assert find_element(attr='data-id', value='y(?:es)?')(_TEST_HTML) is None
assert find_element(attr='data-id', value='y(?:es)?', regex=True)(_TEST_HTML) == '3'
assert find_element(
attr='data-id', value='y', html=True)(_TEST_HTML) == '<div class="b" data-id="y" custom="z">3</div>'
def test_find_elements(self):
for improper_kwargs in [
dict(tag='p'),
dict(attr='data-id'),
dict(value='y'),
dict(attr='data-id', value='y', cls='a'),
dict(cls='a', tag='div'),
dict(cls='[ab]', regex=True),
]:
with pytest.raises(AssertionError):
find_elements(**improper_kwargs)(_TEST_HTML)
assert find_elements(cls='a')(_TEST_HTML) == ['1', '2', '4']
assert find_elements(cls='a', html=True)(_TEST_HTML) == [
'<div class="a">1</div>', '<div class="a" id="x" custom="z">2</div>', '<p class="a">4</p>']
assert find_elements(attr='custom', value='z')(_TEST_HTML) == ['2', '3']
assert find_elements(attr='custom', value='[ez]')(_TEST_HTML) == []
assert find_elements(attr='custom', value='[ez]', regex=True)(_TEST_HTML) == ['2', '3', '5']
class TestDictGet: class TestDictGet:

View file

@ -4,6 +4,7 @@
import os import os
import sys import sys
import unittest import unittest
import unittest.mock
import warnings import warnings
import datetime as dt import datetime as dt
@ -20,7 +21,6 @@ import xml.etree.ElementTree
from yt_dlp.compat import ( from yt_dlp.compat import (
compat_etree_fromstring, compat_etree_fromstring,
compat_HTMLParseError, compat_HTMLParseError,
compat_os_name,
) )
from yt_dlp.utils import ( from yt_dlp.utils import (
Config, Config,
@ -48,7 +48,6 @@ from yt_dlp.utils import (
dfxp2srt, dfxp2srt,
encode_base_n, encode_base_n,
encode_compat_str, encode_compat_str,
encodeFilename,
expand_path, expand_path,
extract_attributes, extract_attributes,
extract_basic_auth, extract_basic_auth,
@ -68,7 +67,6 @@ from yt_dlp.utils import (
get_elements_html_by_class, get_elements_html_by_class,
get_elements_text_and_html_by_attribute, get_elements_text_and_html_by_attribute,
int_or_none, int_or_none,
intlist_to_bytes,
iri_to_uri, iri_to_uri,
is_html, is_html,
js_to_json, js_to_json,
@ -343,11 +341,13 @@ class TestUtil(unittest.TestCase):
self.assertEqual(remove_start(None, 'A - '), None) self.assertEqual(remove_start(None, 'A - '), None)
self.assertEqual(remove_start('A - B', 'A - '), 'B') self.assertEqual(remove_start('A - B', 'A - '), 'B')
self.assertEqual(remove_start('B - A', 'A - '), 'B - A') self.assertEqual(remove_start('B - A', 'A - '), 'B - A')
self.assertEqual(remove_start('non-empty', ''), 'non-empty')
def test_remove_end(self): def test_remove_end(self):
self.assertEqual(remove_end(None, ' - B'), None) self.assertEqual(remove_end(None, ' - B'), None)
self.assertEqual(remove_end('A - B', ' - B'), 'A') self.assertEqual(remove_end('A - B', ' - B'), 'A')
self.assertEqual(remove_end('B - A', ' - B'), 'B - A') self.assertEqual(remove_end('B - A', ' - B'), 'B - A')
self.assertEqual(remove_end('non-empty', ''), 'non-empty')
def test_remove_quotes(self): def test_remove_quotes(self):
self.assertEqual(remove_quotes(None), None) self.assertEqual(remove_quotes(None), None)
@ -563,10 +563,10 @@ class TestUtil(unittest.TestCase):
self.assertEqual(res_data, {'a': 'b', 'c': 'd'}) self.assertEqual(res_data, {'a': 'b', 'c': 'd'})
def test_shell_quote(self): def test_shell_quote(self):
args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')] args = ['ffmpeg', '-i', 'ñ€ß\'.mp4']
self.assertEqual( self.assertEqual(
shell_quote(args), shell_quote(args),
"""ffmpeg -i 'ñ€ß'"'"'.mp4'""" if compat_os_name != 'nt' else '''ffmpeg -i "ñ€ß'.mp4"''') """ffmpeg -i 'ñ€ß'"'"'.mp4'""" if os.name != 'nt' else '''ffmpeg -i "ñ€ß'.mp4"''')
def test_float_or_none(self): def test_float_or_none(self):
self.assertEqual(float_or_none('42.42'), 42.42) self.assertEqual(float_or_none('42.42'), 42.42)
@ -1306,15 +1306,10 @@ class TestUtil(unittest.TestCase):
self.assertEqual(clean_html('a:\n "b"'), 'a: "b"') self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
self.assertEqual(clean_html('a<br>\xa0b'), 'a\nb') self.assertEqual(clean_html('a<br>\xa0b'), 'a\nb')
def test_intlist_to_bytes(self):
self.assertEqual(
intlist_to_bytes([0, 1, 127, 128, 255]),
b'\x00\x01\x7f\x80\xff')
def test_args_to_str(self): def test_args_to_str(self):
self.assertEqual( self.assertEqual(
args_to_str(['foo', 'ba/r', '-baz', '2 be', '']), args_to_str(['foo', 'ba/r', '-baz', '2 be', '']),
'foo ba/r -baz \'2 be\' \'\'' if compat_os_name != 'nt' else 'foo ba/r -baz "2 be" ""', 'foo ba/r -baz \'2 be\' \'\'' if os.name != 'nt' else 'foo ba/r -baz "2 be" ""',
) )
def test_parse_filesize(self): def test_parse_filesize(self):
@ -2114,7 +2109,7 @@ Line 1
assert extract_basic_auth('http://user:@foo.bar') == ('http://foo.bar', 'Basic dXNlcjo=') assert extract_basic_auth('http://user:@foo.bar') == ('http://foo.bar', 'Basic dXNlcjo=')
assert extract_basic_auth('http://user:pass@foo.bar') == ('http://foo.bar', 'Basic dXNlcjpwYXNz') assert extract_basic_auth('http://user:pass@foo.bar') == ('http://foo.bar', 'Basic dXNlcjpwYXNz')
@unittest.skipUnless(compat_os_name == 'nt', 'Only relevant on Windows') @unittest.skipUnless(os.name == 'nt', 'Only relevant on Windows')
def test_windows_escaping(self): def test_windows_escaping(self):
tests = [ tests = [
'test"&', 'test"&',
@ -2148,6 +2143,12 @@ Line 1
assert run_shell(args) == expected assert run_shell(args) == expected
assert run_shell(shell_quote(args, shell=True)) == expected assert run_shell(shell_quote(args, shell=True)) == expected
def test_partial_application(self):
assert callable(int_or_none(scale=10)), 'missing positional parameter should apply partially'
assert int_or_none(10, scale=0.1) == 100, 'positionally passed argument should call function'
assert int_or_none(v=10) == 10, 'keyword passed positional should call function'
assert int_or_none(scale=0.1)(10) == 100, 'call after partial application should call the function'
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

4
test/testdata/netrc/netrc vendored Normal file
View file

@ -0,0 +1,4 @@
machine normal_use login user password pass
machine empty_user login "" password pass
machine empty_pass login user password ""
machine both_empty login "" password ""

2
test/testdata/netrc/print_netrc.py vendored Normal file
View file

@ -0,0 +1,2 @@
with open('./test/testdata/netrc/netrc', encoding='utf-8') as fp:
print(fp.read())

View file

@ -26,7 +26,7 @@ import unicodedata
from .cache import Cache from .cache import Cache
from .compat import urllib # isort: split from .compat import urllib # isort: split
from .compat import compat_os_name, urllib_req_to_req from .compat import urllib_req_to_req
from .cookies import CookieLoadError, LenientSimpleCookie, load_cookies from .cookies import CookieLoadError, LenientSimpleCookie, load_cookies
from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
from .downloader.rtmp import rtmpdump_version from .downloader.rtmp import rtmpdump_version
@ -109,7 +109,6 @@ from .utils import (
determine_ext, determine_ext,
determine_protocol, determine_protocol,
encode_compat_str, encode_compat_str,
encodeFilename,
escapeHTML, escapeHTML,
expand_path, expand_path,
extract_basic_auth, extract_basic_auth,
@ -167,7 +166,7 @@ from .utils.networking import (
) )
from .version import CHANNEL, ORIGIN, RELEASE_GIT_HEAD, VARIANT, __version__ from .version import CHANNEL, ORIGIN, RELEASE_GIT_HEAD, VARIANT, __version__
if compat_os_name == 'nt': if os.name == 'nt':
import ctypes import ctypes
@ -470,7 +469,7 @@ class YoutubeDL:
The following options do not work when used through the API: The following options do not work when used through the API:
filename, abort-on-error, multistreams, no-live-chat, filename, abort-on-error, multistreams, no-live-chat,
format-sort, no-clean-infojson, no-playlist-metafiles, format-sort, no-clean-infojson, no-playlist-metafiles,
no-keep-subs, no-attach-info-json, allow-unsafe-ext. no-keep-subs, no-attach-info-json, allow-unsafe-ext, prefer-vp9-sort.
Refer __init__.py for their implementation Refer __init__.py for their implementation
progress_template: Dictionary of templates for progress outputs. progress_template: Dictionary of templates for progress outputs.
Allowed keys are 'download', 'postprocess', Allowed keys are 'download', 'postprocess',
@ -643,7 +642,7 @@ class YoutubeDL:
out=stdout, out=stdout,
error=sys.stderr, error=sys.stderr,
screen=sys.stderr if self.params.get('quiet') else stdout, screen=sys.stderr if self.params.get('quiet') else stdout,
console=None if compat_os_name == 'nt' else next( console=None if os.name == 'nt' else next(
filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None), filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None),
) )
@ -952,7 +951,7 @@ class YoutubeDL:
self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once) self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
def _send_console_code(self, code): def _send_console_code(self, code):
if compat_os_name == 'nt' or not self._out_files.console: if os.name == 'nt' or not self._out_files.console:
return return
self._write_string(code, self._out_files.console) self._write_string(code, self._out_files.console)
@ -960,7 +959,7 @@ class YoutubeDL:
if not self.params.get('consoletitle', False): if not self.params.get('consoletitle', False):
return return
message = remove_terminal_sequences(message) message = remove_terminal_sequences(message)
if compat_os_name == 'nt': if os.name == 'nt':
if ctypes.windll.kernel32.GetConsoleWindow(): if ctypes.windll.kernel32.GetConsoleWindow():
# c_wchar_p() might not be necessary if `message` is # c_wchar_p() might not be necessary if `message` is
# already of type unicode() # already of type unicode()
@ -3255,9 +3254,9 @@ class YoutubeDL:
if full_filename is None: if full_filename is None:
return return
if not self._ensure_dir_exists(encodeFilename(full_filename)): if not self._ensure_dir_exists(full_filename):
return return
if not self._ensure_dir_exists(encodeFilename(temp_filename)): if not self._ensure_dir_exists(temp_filename):
return return
if self._write_description('video', info_dict, if self._write_description('video', info_dict,
@ -3289,16 +3288,16 @@ class YoutubeDL:
if self.params.get('writeannotations', False): if self.params.get('writeannotations', False):
annofn = self.prepare_filename(info_dict, 'annotation') annofn = self.prepare_filename(info_dict, 'annotation')
if annofn: if annofn:
if not self._ensure_dir_exists(encodeFilename(annofn)): if not self._ensure_dir_exists(annofn):
return return
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)): if not self.params.get('overwrites', True) and os.path.exists(annofn):
self.to_screen('[info] Video annotations are already present') self.to_screen('[info] Video annotations are already present')
elif not info_dict.get('annotations'): elif not info_dict.get('annotations'):
self.report_warning('There are no annotations to write.') self.report_warning('There are no annotations to write.')
else: else:
try: try:
self.to_screen('[info] Writing video annotations to: ' + annofn) self.to_screen('[info] Writing video annotations to: ' + annofn)
with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile: with open(annofn, 'w', encoding='utf-8') as annofile:
annofile.write(info_dict['annotations']) annofile.write(info_dict['annotations'])
except (KeyError, TypeError): except (KeyError, TypeError):
self.report_warning('There are no annotations to write.') self.report_warning('There are no annotations to write.')
@ -3314,14 +3313,14 @@ class YoutubeDL:
f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown') f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
return True return True
linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext')) linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
if not self._ensure_dir_exists(encodeFilename(linkfn)): if not self._ensure_dir_exists(linkfn):
return False return False
if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)): if self.params.get('overwrites', True) and os.path.exists(linkfn):
self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present') self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
return True return True
try: try:
self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}') self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', with open(to_high_limit_path(linkfn), 'w', encoding='utf-8',
newline='\r\n' if link_type == 'url' else '\n') as linkfile: newline='\r\n' if link_type == 'url' else '\n') as linkfile:
template_vars = {'url': url} template_vars = {'url': url}
if link_type == 'desktop': if link_type == 'desktop':
@ -3352,7 +3351,7 @@ class YoutubeDL:
if self.params.get('skip_download'): if self.params.get('skip_download'):
info_dict['filepath'] = temp_filename info_dict['filepath'] = temp_filename
info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename))) info_dict['__finaldir'] = os.path.dirname(os.path.abspath(full_filename))
info_dict['__files_to_move'] = files_to_move info_dict['__files_to_move'] = files_to_move
replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)) replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
info_dict['__write_download_archive'] = self.params.get('force_write_download_archive') info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
@ -3482,7 +3481,7 @@ class YoutubeDL:
self.report_file_already_downloaded(dl_filename) self.report_file_already_downloaded(dl_filename)
dl_filename = dl_filename or temp_filename dl_filename = dl_filename or temp_filename
info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename))) info_dict['__finaldir'] = os.path.dirname(os.path.abspath(full_filename))
except network_exceptions as err: except network_exceptions as err:
self.report_error(f'unable to download video data: {err}') self.report_error(f'unable to download video data: {err}')
@ -4297,7 +4296,7 @@ class YoutubeDL:
else: else:
try: try:
self.to_screen(f'[info] Writing {label} description to: {descfn}') self.to_screen(f'[info] Writing {label} description to: {descfn}')
with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: with open(descfn, 'w', encoding='utf-8') as descfile:
descfile.write(ie_result['description']) descfile.write(ie_result['description'])
except OSError: except OSError:
self.report_error(f'Cannot write {label} description file {descfn}') self.report_error(f'Cannot write {label} description file {descfn}')
@ -4381,7 +4380,9 @@ class YoutubeDL:
return None return None
for idx, t in list(enumerate(thumbnails))[::-1]: for idx, t in list(enumerate(thumbnails))[::-1]:
thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg') thumb_ext = t.get('ext') or determine_ext(t['url'], 'jpg')
if multiple:
thumb_ext = f'{t["id"]}.{thumb_ext}'
thumb_display_id = f'{label} thumbnail {t["id"]}' thumb_display_id = f'{label} thumbnail {t["id"]}'
thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext')) thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext')) thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
@ -4397,7 +4398,7 @@ class YoutubeDL:
try: try:
uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {}))) uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {})))
self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}') self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
with open(encodeFilename(thumb_filename), 'wb') as thumbf: with open(thumb_filename, 'wb') as thumbf:
shutil.copyfileobj(uf, thumbf) shutil.copyfileobj(uf, thumbf)
ret.append((thumb_filename, thumb_filename_final)) ret.append((thumb_filename, thumb_filename_final))
t['filepath'] = thumb_filename t['filepath'] = thumb_filename

View file

@ -14,7 +14,6 @@ import os
import re import re
import traceback import traceback
from .compat import compat_os_name
from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS, CookieLoadError from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS, CookieLoadError
from .downloader.external import get_external_downloader from .downloader.external import get_external_downloader
from .extractor import list_extractor_classes from .extractor import list_extractor_classes
@ -44,7 +43,6 @@ from .utils import (
GeoUtils, GeoUtils,
PlaylistEntries, PlaylistEntries,
SameFileError, SameFileError,
decodeOption,
download_range_func, download_range_func,
expand_path, expand_path,
float_or_none, float_or_none,
@ -159,6 +157,9 @@ def set_compat_opts(opts):
opts.embed_infojson = False opts.embed_infojson = False
if 'format-sort' in opts.compat_opts: if 'format-sort' in opts.compat_opts:
opts.format_sort.extend(FormatSorter.ytdl_default) opts.format_sort.extend(FormatSorter.ytdl_default)
elif 'prefer-vp9-sort' in opts.compat_opts:
opts.format_sort.extend(FormatSorter._prefer_vp9_sort)
_video_multistreams_set = set_default_compat('multistreams', 'allow_multiple_video_streams', False, remove_compat=False) _video_multistreams_set = set_default_compat('multistreams', 'allow_multiple_video_streams', False, remove_compat=False)
_audio_multistreams_set = set_default_compat('multistreams', 'allow_multiple_audio_streams', False, remove_compat=False) _audio_multistreams_set = set_default_compat('multistreams', 'allow_multiple_audio_streams', False, remove_compat=False)
if _video_multistreams_set is False and _audio_multistreams_set is False: if _video_multistreams_set is False and _audio_multistreams_set is False:
@ -880,8 +881,8 @@ def parse_options(argv=None):
'listsubtitles': opts.listsubtitles, 'listsubtitles': opts.listsubtitles,
'subtitlesformat': opts.subtitlesformat, 'subtitlesformat': opts.subtitlesformat,
'subtitleslangs': opts.subtitleslangs, 'subtitleslangs': opts.subtitleslangs,
'matchtitle': decodeOption(opts.matchtitle), 'matchtitle': opts.matchtitle,
'rejecttitle': decodeOption(opts.rejecttitle), 'rejecttitle': opts.rejecttitle,
'max_downloads': opts.max_downloads, 'max_downloads': opts.max_downloads,
'prefer_free_formats': opts.prefer_free_formats, 'prefer_free_formats': opts.prefer_free_formats,
'trim_file_name': opts.trim_file_name, 'trim_file_name': opts.trim_file_name,
@ -1050,7 +1051,7 @@ def _real_main(argv=None):
ydl.warn_if_short_id(args) ydl.warn_if_short_id(args)
# Show a useful error message and wait for keypress if not launched from shell on Windows # Show a useful error message and wait for keypress if not launched from shell on Windows
if not args and compat_os_name == 'nt' and getattr(sys, 'frozen', False): if not args and os.name == 'nt' and getattr(sys, 'frozen', False):
import ctypes.wintypes import ctypes.wintypes
import msvcrt import msvcrt

View file

@ -3,7 +3,6 @@ from math import ceil
from .compat import compat_ord from .compat import compat_ord
from .dependencies import Cryptodome from .dependencies import Cryptodome
from .utils import bytes_to_intlist, intlist_to_bytes
if Cryptodome.AES: if Cryptodome.AES:
def aes_cbc_decrypt_bytes(data, key, iv): def aes_cbc_decrypt_bytes(data, key, iv):
@ -17,15 +16,15 @@ if Cryptodome.AES:
else: else:
def aes_cbc_decrypt_bytes(data, key, iv): def aes_cbc_decrypt_bytes(data, key, iv):
""" Decrypt bytes with AES-CBC using native implementation since pycryptodome is unavailable """ """ Decrypt bytes with AES-CBC using native implementation since pycryptodome is unavailable """
return intlist_to_bytes(aes_cbc_decrypt(*map(bytes_to_intlist, (data, key, iv)))) return bytes(aes_cbc_decrypt(*map(list, (data, key, iv))))
def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce): def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce):
""" Decrypt bytes with AES-GCM using native implementation since pycryptodome is unavailable """ """ Decrypt bytes with AES-GCM using native implementation since pycryptodome is unavailable """
return intlist_to_bytes(aes_gcm_decrypt_and_verify(*map(bytes_to_intlist, (data, key, tag, nonce)))) return bytes(aes_gcm_decrypt_and_verify(*map(list, (data, key, tag, nonce))))
def aes_cbc_encrypt_bytes(data, key, iv, **kwargs): def aes_cbc_encrypt_bytes(data, key, iv, **kwargs):
return intlist_to_bytes(aes_cbc_encrypt(*map(bytes_to_intlist, (data, key, iv)), **kwargs)) return bytes(aes_cbc_encrypt(*map(list, (data, key, iv)), **kwargs))
BLOCK_SIZE_BYTES = 16 BLOCK_SIZE_BYTES = 16
@ -221,7 +220,7 @@ def aes_gcm_decrypt_and_verify(data, key, tag, nonce):
j0 = [*nonce, 0, 0, 0, 1] j0 = [*nonce, 0, 0, 0, 1]
else: else:
fill = (BLOCK_SIZE_BYTES - (len(nonce) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES + 8 fill = (BLOCK_SIZE_BYTES - (len(nonce) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES + 8
ghash_in = nonce + [0] * fill + bytes_to_intlist((8 * len(nonce)).to_bytes(8, 'big')) ghash_in = nonce + [0] * fill + list((8 * len(nonce)).to_bytes(8, 'big'))
j0 = ghash(hash_subkey, ghash_in) j0 = ghash(hash_subkey, ghash_in)
# TODO: add nonce support to aes_ctr_decrypt # TODO: add nonce support to aes_ctr_decrypt
@ -230,13 +229,13 @@ def aes_gcm_decrypt_and_verify(data, key, tag, nonce):
iv_ctr = inc(j0) iv_ctr = inc(j0)
decrypted_data = aes_ctr_decrypt(data, key, iv_ctr + [0] * (BLOCK_SIZE_BYTES - len(iv_ctr))) decrypted_data = aes_ctr_decrypt(data, key, iv_ctr + [0] * (BLOCK_SIZE_BYTES - len(iv_ctr)))
pad_len = len(data) // 16 * 16 pad_len = (BLOCK_SIZE_BYTES - (len(data) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES
s_tag = ghash( s_tag = ghash(
hash_subkey, hash_subkey,
data data
+ [0] * (BLOCK_SIZE_BYTES - len(data) + pad_len) # pad + [0] * pad_len # pad
+ bytes_to_intlist((0 * 8).to_bytes(8, 'big') # length of associated data + list((0 * 8).to_bytes(8, 'big') # length of associated data
+ ((len(data) * 8).to_bytes(8, 'big'))), # length of data + ((len(data) * 8).to_bytes(8, 'big'))), # length of data
) )
if tag != aes_ctr_encrypt(s_tag, key, j0): if tag != aes_ctr_encrypt(s_tag, key, j0):
@ -300,8 +299,8 @@ def aes_decrypt_text(data, password, key_size_bytes):
""" """
NONCE_LENGTH_BYTES = 8 NONCE_LENGTH_BYTES = 8
data = bytes_to_intlist(base64.b64decode(data)) data = list(base64.b64decode(data))
password = bytes_to_intlist(password.encode()) password = list(password.encode())
key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password)) key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password))
key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES) key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES)
@ -310,7 +309,7 @@ def aes_decrypt_text(data, password, key_size_bytes):
cipher = data[NONCE_LENGTH_BYTES:] cipher = data[NONCE_LENGTH_BYTES:]
decrypted_data = aes_ctr_decrypt(cipher, key, nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)) decrypted_data = aes_ctr_decrypt(cipher, key, nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES))
return intlist_to_bytes(decrypted_data) return bytes(decrypted_data)
RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36) RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36)

View file

@ -1,5 +1,4 @@
import os import os
import sys
import xml.etree.ElementTree as etree import xml.etree.ElementTree as etree
from .compat_utils import passthrough_module from .compat_utils import passthrough_module
@ -24,33 +23,14 @@ def compat_etree_fromstring(text):
return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder())) return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
compat_os_name = os._name if os.name == 'java' else os.name
def compat_shlex_quote(s):
from ..utils import shell_quote
return shell_quote(s)
def compat_ord(c): def compat_ord(c):
return c if isinstance(c, int) else ord(c) return c if isinstance(c, int) else ord(c)
if compat_os_name == 'nt' and sys.version_info < (3, 8):
# os.path.realpath on Windows does not follow symbolic links
# prior to Python 3.8 (see https://bugs.python.org/issue9949)
def compat_realpath(path):
while os.path.islink(path):
path = os.path.abspath(os.readlink(path))
return os.path.realpath(path)
else:
compat_realpath = os.path.realpath
# Python 3.8+ does not honor %HOME% on windows, but this breaks compatibility with youtube-dl # Python 3.8+ does not honor %HOME% on windows, but this breaks compatibility with youtube-dl
# See https://github.com/yt-dlp/yt-dlp/issues/792 # See https://github.com/yt-dlp/yt-dlp/issues/792
# https://docs.python.org/3/library/os.path.html#os.path.expanduser # https://docs.python.org/3/library/os.path.html#os.path.expanduser
if compat_os_name in ('nt', 'ce'): if os.name in ('nt', 'ce'):
def compat_expanduser(path): def compat_expanduser(path):
HOME = os.environ.get('HOME') HOME = os.environ.get('HOME')
if not HOME: if not HOME:

View file

@ -8,16 +8,14 @@ passthrough_module(__name__, '.._legacy', callback=lambda attr: warnings.warn(
DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=6)) DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=6))
del passthrough_module del passthrough_module
import base64 import functools # noqa: F401
import urllib.error import os
import urllib.parse
compat_str = str
compat_b64decode = base64.b64decode compat_os_name = os.name
compat_realpath = os.path.realpath
compat_urlparse = urllib.parse
compat_parse_qs = urllib.parse.parse_qs def compat_shlex_quote(s):
compat_urllib_parse_unquote = urllib.parse.unquote from ..utils import shell_quote
compat_urllib_parse_urlencode = urllib.parse.urlencode return shell_quote(s)
compat_urllib_parse_urlparse = urllib.parse.urlparse

View file

@ -30,7 +30,7 @@ from asyncio import run as compat_asyncio_run # noqa: F401
from re import Pattern as compat_Pattern # noqa: F401 from re import Pattern as compat_Pattern # noqa: F401
from re import match as compat_Match # noqa: F401 from re import match as compat_Match # noqa: F401
from . import compat_expanduser, compat_HTMLParseError, compat_realpath from . import compat_expanduser, compat_HTMLParseError
from .compat_utils import passthrough_module from .compat_utils import passthrough_module
from ..dependencies import brotli as compat_brotli # noqa: F401 from ..dependencies import brotli as compat_brotli # noqa: F401
from ..dependencies import websockets as compat_websockets # noqa: F401 from ..dependencies import websockets as compat_websockets # noqa: F401
@ -78,7 +78,7 @@ compat_kwargs = lambda kwargs: kwargs
compat_map = map compat_map = map
compat_numeric_types = (int, float, complex) compat_numeric_types = (int, float, complex)
compat_os_path_expanduser = compat_expanduser compat_os_path_expanduser = compat_expanduser
compat_os_path_realpath = compat_realpath compat_os_path_realpath = os.path.realpath
compat_print = print compat_print = print
compat_shlex_split = shlex.split compat_shlex_split = shlex.split
compat_socket_create_connection = socket.create_connection compat_socket_create_connection = socket.create_connection
@ -104,5 +104,12 @@ compat_xml_parse_error = compat_xml_etree_ElementTree_ParseError = etree.ParseEr
compat_xpath = lambda xpath: xpath compat_xpath = lambda xpath: xpath
compat_zip = zip compat_zip = zip
workaround_optparse_bug9161 = lambda: None workaround_optparse_bug9161 = lambda: None
compat_str = str
compat_b64decode = base64.b64decode
compat_urlparse = urllib.parse
compat_parse_qs = urllib.parse.parse_qs
compat_urllib_parse_unquote = urllib.parse.unquote
compat_urllib_parse_urlencode = urllib.parse.urlencode
compat_urllib_parse_urlparse = urllib.parse.urlparse
legacy = [] legacy = []

View file

@ -1,7 +0,0 @@
# flake8: noqa: F405
from functools import * # noqa: F403
from .compat_utils import passthrough_module
passthrough_module(__name__, 'functools')
del passthrough_module

View file

@ -7,9 +7,9 @@ passthrough_module(__name__, 'urllib.request')
del passthrough_module del passthrough_module
from .. import compat_os_name import os
if compat_os_name == 'nt': if os.name == 'nt':
# On older Python versions, proxies are extracted from Windows registry erroneously. [1] # On older Python versions, proxies are extracted from Windows registry erroneously. [1]
# If the https proxy in the registry does not have a scheme, urllib will incorrectly add https:// to it. [2] # If the https proxy in the registry does not have a scheme, urllib will incorrectly add https:// to it. [2]
# It is unlikely that the user has actually set it to be https, so we should be fine to safely downgrade # It is unlikely that the user has actually set it to be https, so we should be fine to safely downgrade
@ -37,4 +37,4 @@ if compat_os_name == 'nt':
def getproxies(): def getproxies():
return getproxies_environment() or getproxies_registry_patched() return getproxies_environment() or getproxies_registry_patched()
del compat_os_name del os

View file

@ -25,7 +25,6 @@ from .aes import (
aes_gcm_decrypt_and_verify_bytes, aes_gcm_decrypt_and_verify_bytes,
unpad_pkcs7, unpad_pkcs7,
) )
from .compat import compat_os_name
from .dependencies import ( from .dependencies import (
_SECRETSTORAGE_UNAVAILABLE_REASON, _SECRETSTORAGE_UNAVAILABLE_REASON,
secretstorage, secretstorage,
@ -302,12 +301,18 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger):
raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"') raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
logger.debug(f'Extracting cookies from: "{cookie_database_path}"') logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir: with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
cursor = None cursor = None
try: try:
cursor = _open_database_copy(cookie_database_path, tmpdir) cursor = _open_database_copy(cookie_database_path, tmpdir)
# meta_version is necessary to determine if we need to trim the hash prefix from the cookies
# Ref: https://chromium.googlesource.com/chromium/src/+/b02dcebd7cafab92770734dc2bc317bd07f1d891/net/extras/sqlite/sqlite_persistent_cookie_store.cc#223
meta_version = int(cursor.execute('SELECT value FROM meta WHERE key = "version"').fetchone()[0])
decryptor = get_cookie_decryptor(
config['browser_dir'], config['keyring_name'], logger,
keyring=keyring, meta_version=meta_version)
cursor.connection.text_factory = bytes cursor.connection.text_factory = bytes
column_names = _get_column_names(cursor, 'cookies') column_names = _get_column_names(cursor, 'cookies')
secure_column = 'is_secure' if 'is_secure' in column_names else 'secure' secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
@ -337,7 +342,7 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger):
logger.debug(f'cookie version breakdown: {counts}') logger.debug(f'cookie version breakdown: {counts}')
return jar return jar
except PermissionError as error: except PermissionError as error:
if compat_os_name == 'nt' and error.errno == 13: if os.name == 'nt' and error.errno == 13:
message = 'Could not copy Chrome cookie database. See https://github.com/yt-dlp/yt-dlp/issues/7271 for more info' message = 'Could not copy Chrome cookie database. See https://github.com/yt-dlp/yt-dlp/issues/7271 for more info'
logger.error(message) logger.error(message)
raise DownloadError(message) # force exit raise DownloadError(message) # force exit
@ -405,22 +410,23 @@ class ChromeCookieDecryptor:
raise NotImplementedError('Must be implemented by sub classes') raise NotImplementedError('Must be implemented by sub classes')
def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None): def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None, meta_version=None):
if sys.platform == 'darwin': if sys.platform == 'darwin':
return MacChromeCookieDecryptor(browser_keyring_name, logger) return MacChromeCookieDecryptor(browser_keyring_name, logger, meta_version=meta_version)
elif sys.platform in ('win32', 'cygwin'): elif sys.platform in ('win32', 'cygwin'):
return WindowsChromeCookieDecryptor(browser_root, logger) return WindowsChromeCookieDecryptor(browser_root, logger, meta_version=meta_version)
return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring) return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring, meta_version=meta_version)
class LinuxChromeCookieDecryptor(ChromeCookieDecryptor): class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
def __init__(self, browser_keyring_name, logger, *, keyring=None): def __init__(self, browser_keyring_name, logger, *, keyring=None, meta_version=None):
self._logger = logger self._logger = logger
self._v10_key = self.derive_key(b'peanuts') self._v10_key = self.derive_key(b'peanuts')
self._empty_key = self.derive_key(b'') self._empty_key = self.derive_key(b'')
self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0} self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
self._browser_keyring_name = browser_keyring_name self._browser_keyring_name = browser_keyring_name
self._keyring = keyring self._keyring = keyring
self._meta_version = meta_version or 0
@functools.cached_property @functools.cached_property
def _v11_key(self): def _v11_key(self):
@ -449,14 +455,18 @@ class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
if version == b'v10': if version == b'v10':
self._cookie_counts['v10'] += 1 self._cookie_counts['v10'] += 1
return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key, self._empty_key), self._logger) return _decrypt_aes_cbc_multi(
ciphertext, (self._v10_key, self._empty_key), self._logger,
hash_prefix=self._meta_version >= 24)
elif version == b'v11': elif version == b'v11':
self._cookie_counts['v11'] += 1 self._cookie_counts['v11'] += 1
if self._v11_key is None: if self._v11_key is None:
self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True) self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
return None return None
return _decrypt_aes_cbc_multi(ciphertext, (self._v11_key, self._empty_key), self._logger) return _decrypt_aes_cbc_multi(
ciphertext, (self._v11_key, self._empty_key), self._logger,
hash_prefix=self._meta_version >= 24)
else: else:
self._logger.warning(f'unknown cookie version: "{version}"', only_once=True) self._logger.warning(f'unknown cookie version: "{version}"', only_once=True)
@ -465,11 +475,12 @@ class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
class MacChromeCookieDecryptor(ChromeCookieDecryptor): class MacChromeCookieDecryptor(ChromeCookieDecryptor):
def __init__(self, browser_keyring_name, logger): def __init__(self, browser_keyring_name, logger, meta_version=None):
self._logger = logger self._logger = logger
password = _get_mac_keyring_password(browser_keyring_name, logger) password = _get_mac_keyring_password(browser_keyring_name, logger)
self._v10_key = None if password is None else self.derive_key(password) self._v10_key = None if password is None else self.derive_key(password)
self._cookie_counts = {'v10': 0, 'other': 0} self._cookie_counts = {'v10': 0, 'other': 0}
self._meta_version = meta_version or 0
@staticmethod @staticmethod
def derive_key(password): def derive_key(password):
@ -487,7 +498,8 @@ class MacChromeCookieDecryptor(ChromeCookieDecryptor):
self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True) self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
return None return None
return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key,), self._logger) return _decrypt_aes_cbc_multi(
ciphertext, (self._v10_key,), self._logger, hash_prefix=self._meta_version >= 24)
else: else:
self._cookie_counts['other'] += 1 self._cookie_counts['other'] += 1
@ -497,10 +509,11 @@ class MacChromeCookieDecryptor(ChromeCookieDecryptor):
class WindowsChromeCookieDecryptor(ChromeCookieDecryptor): class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
def __init__(self, browser_root, logger): def __init__(self, browser_root, logger, meta_version=None):
self._logger = logger self._logger = logger
self._v10_key = _get_windows_v10_key(browser_root, logger) self._v10_key = _get_windows_v10_key(browser_root, logger)
self._cookie_counts = {'v10': 0, 'other': 0} self._cookie_counts = {'v10': 0, 'other': 0}
self._meta_version = meta_version or 0
def decrypt(self, encrypted_value): def decrypt(self, encrypted_value):
version = encrypted_value[:3] version = encrypted_value[:3]
@ -524,7 +537,9 @@ class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length] ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
authentication_tag = raw_ciphertext[-authentication_tag_length:] authentication_tag = raw_ciphertext[-authentication_tag_length:]
return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger) return _decrypt_aes_gcm(
ciphertext, self._v10_key, nonce, authentication_tag, self._logger,
hash_prefix=self._meta_version >= 24)
else: else:
self._cookie_counts['other'] += 1 self._cookie_counts['other'] += 1
@ -1010,10 +1025,12 @@ def pbkdf2_sha1(password, salt, iterations, key_length):
return hashlib.pbkdf2_hmac('sha1', password, salt, iterations, key_length) return hashlib.pbkdf2_hmac('sha1', password, salt, iterations, key_length)
def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16): def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16, hash_prefix=False):
for key in keys: for key in keys:
plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector)) plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
try: try:
if hash_prefix:
return plaintext[32:].decode()
return plaintext.decode() return plaintext.decode()
except UnicodeDecodeError: except UnicodeDecodeError:
pass pass
@ -1021,7 +1038,7 @@ def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' '
return None return None
def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger): def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger, hash_prefix=False):
try: try:
plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce) plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
except ValueError: except ValueError:
@ -1029,6 +1046,8 @@ def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
return None return None
try: try:
if hash_prefix:
return plaintext[32:].decode()
return plaintext.decode() return plaintext.decode()
except UnicodeDecodeError: except UnicodeDecodeError:
logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True) logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)

View file

@ -24,7 +24,7 @@ try:
from Crypto.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5 # noqa: F401 from Crypto.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5 # noqa: F401
from Crypto.Hash import CMAC, SHA1 # noqa: F401 from Crypto.Hash import CMAC, SHA1 # noqa: F401
from Crypto.PublicKey import RSA # noqa: F401 from Crypto.PublicKey import RSA # noqa: F401
except ImportError: except (ImportError, OSError):
__version__ = f'broken {__version__}'.strip() __version__ = f'broken {__version__}'.strip()

View file

@ -20,9 +20,7 @@ from ..utils import (
Namespace, Namespace,
RetryManager, RetryManager,
classproperty, classproperty,
decodeArgument,
deprecation_warning, deprecation_warning,
encodeFilename,
format_bytes, format_bytes,
join_nonempty, join_nonempty,
parse_bytes, parse_bytes,
@ -219,7 +217,7 @@ class FileDownloader:
def temp_name(self, filename): def temp_name(self, filename):
"""Returns a temporary filename for the given filename.""" """Returns a temporary filename for the given filename."""
if self.params.get('nopart', False) or filename == '-' or \ if self.params.get('nopart', False) or filename == '-' or \
(os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))): (os.path.exists(filename) and not os.path.isfile(filename)):
return filename return filename
return filename + '.part' return filename + '.part'
@ -273,7 +271,7 @@ class FileDownloader:
"""Try to set the last-modified time of the given file.""" """Try to set the last-modified time of the given file."""
if last_modified_hdr is None: if last_modified_hdr is None:
return return
if not os.path.isfile(encodeFilename(filename)): if not os.path.isfile(filename):
return return
timestr = last_modified_hdr timestr = last_modified_hdr
if timestr is None: if timestr is None:
@ -432,13 +430,13 @@ class FileDownloader:
""" """
nooverwrites_and_exists = ( nooverwrites_and_exists = (
not self.params.get('overwrites', True) not self.params.get('overwrites', True)
and os.path.exists(encodeFilename(filename)) and os.path.exists(filename)
) )
if not hasattr(filename, 'write'): if not hasattr(filename, 'write'):
continuedl_and_exists = ( continuedl_and_exists = (
self.params.get('continuedl', True) self.params.get('continuedl', True)
and os.path.isfile(encodeFilename(filename)) and os.path.isfile(filename)
and not self.params.get('nopart', False) and not self.params.get('nopart', False)
) )
@ -448,7 +446,7 @@ class FileDownloader:
self._hook_progress({ self._hook_progress({
'filename': filename, 'filename': filename,
'status': 'finished', 'status': 'finished',
'total_bytes': os.path.getsize(encodeFilename(filename)), 'total_bytes': os.path.getsize(filename),
}, info_dict) }, info_dict)
self._finish_multiline_status() self._finish_multiline_status()
return True, False return True, False
@ -489,9 +487,7 @@ class FileDownloader:
if not self.params.get('verbose', False): if not self.params.get('verbose', False):
return return
str_args = [decodeArgument(a) for a in args]
if exe is None: if exe is None:
exe = os.path.basename(str_args[0]) exe = os.path.basename(args[0])
self.write_debug(f'{exe} command line: {shell_quote(str_args)}') self.write_debug(f'{exe} command line: {shell_quote(args)}')

View file

@ -23,7 +23,6 @@ from ..utils import (
cli_valueless_option, cli_valueless_option,
determine_ext, determine_ext,
encodeArgument, encodeArgument,
encodeFilename,
find_available_port, find_available_port,
remove_end, remove_end,
traverse_obj, traverse_obj,
@ -67,7 +66,7 @@ class ExternalFD(FragmentFD):
'elapsed': time.time() - started, 'elapsed': time.time() - started,
} }
if filename != '-': if filename != '-':
fsize = os.path.getsize(encodeFilename(tmpfilename)) fsize = os.path.getsize(tmpfilename)
self.try_rename(tmpfilename, filename) self.try_rename(tmpfilename, filename)
status.update({ status.update({
'downloaded_bytes': fsize, 'downloaded_bytes': fsize,
@ -184,9 +183,9 @@ class ExternalFD(FragmentFD):
dest.write(decrypt_fragment(fragment, src.read())) dest.write(decrypt_fragment(fragment, src.read()))
src.close() src.close()
if not self.params.get('keep_fragments', False): if not self.params.get('keep_fragments', False):
self.try_remove(encodeFilename(fragment_filename)) self.try_remove(fragment_filename)
dest.close() dest.close()
self.try_remove(encodeFilename(f'{tmpfilename}.frag.urls')) self.try_remove(f'{tmpfilename}.frag.urls')
return 0 return 0
def _call_process(self, cmd, info_dict): def _call_process(self, cmd, info_dict):
@ -620,7 +619,7 @@ class FFmpegFD(ExternalFD):
args += self._configuration_args(('_o1', '_o', '')) args += self._configuration_args(('_o1', '_o', ''))
args = [encodeArgument(opt) for opt in args] args = [encodeArgument(opt) for opt in args]
args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True)) args.append(ffpp._ffmpeg_filename_argument(tmpfilename))
self._debug_cmd(args) self._debug_cmd(args)
piped = any(fmt['url'] in ('-', 'pipe:') for fmt in selected_formats) piped = any(fmt['url'] in ('-', 'pipe:') for fmt in selected_formats)

View file

@ -9,10 +9,9 @@ import time
from .common import FileDownloader from .common import FileDownloader
from .http import HttpFD from .http import HttpFD
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7 from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
from ..compat import compat_os_name
from ..networking import Request from ..networking import Request
from ..networking.exceptions import HTTPError, IncompleteRead from ..networking.exceptions import HTTPError, IncompleteRead
from ..utils import DownloadError, RetryManager, encodeFilename, traverse_obj from ..utils import DownloadError, RetryManager, traverse_obj
from ..utils.networking import HTTPHeaderDict from ..utils.networking import HTTPHeaderDict
from ..utils.progress import ProgressCalculator from ..utils.progress import ProgressCalculator
@ -152,7 +151,7 @@ class FragmentFD(FileDownloader):
if self.__do_ytdl_file(ctx): if self.__do_ytdl_file(ctx):
self._write_ytdl_file(ctx) self._write_ytdl_file(ctx)
if not self.params.get('keep_fragments', False): if not self.params.get('keep_fragments', False):
self.try_remove(encodeFilename(ctx['fragment_filename_sanitized'])) self.try_remove(ctx['fragment_filename_sanitized'])
del ctx['fragment_filename_sanitized'] del ctx['fragment_filename_sanitized']
def _prepare_frag_download(self, ctx): def _prepare_frag_download(self, ctx):
@ -188,7 +187,7 @@ class FragmentFD(FileDownloader):
}) })
if self.__do_ytdl_file(ctx): if self.__do_ytdl_file(ctx):
ytdl_file_exists = os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))) ytdl_file_exists = os.path.isfile(self.ytdl_filename(ctx['filename']))
continuedl = self.params.get('continuedl', True) continuedl = self.params.get('continuedl', True)
if continuedl and ytdl_file_exists: if continuedl and ytdl_file_exists:
self._read_ytdl_file(ctx) self._read_ytdl_file(ctx)
@ -390,7 +389,7 @@ class FragmentFD(FileDownloader):
def __exit__(self, exc_type, exc_val, exc_tb): def __exit__(self, exc_type, exc_val, exc_tb):
pass pass
if compat_os_name == 'nt': if os.name == 'nt':
def future_result(future): def future_result(future):
while True: while True:
try: try:

View file

@ -15,7 +15,6 @@ from ..utils import (
ThrottledDownload, ThrottledDownload,
XAttrMetadataError, XAttrMetadataError,
XAttrUnavailableError, XAttrUnavailableError,
encodeFilename,
int_or_none, int_or_none,
parse_http_range, parse_http_range,
try_call, try_call,
@ -58,9 +57,8 @@ class HttpFD(FileDownloader):
if self.params.get('continuedl', True): if self.params.get('continuedl', True):
# Establish possible resume length # Establish possible resume length
if os.path.isfile(encodeFilename(ctx.tmpfilename)): if os.path.isfile(ctx.tmpfilename):
ctx.resume_len = os.path.getsize( ctx.resume_len = os.path.getsize(ctx.tmpfilename)
encodeFilename(ctx.tmpfilename))
ctx.is_resume = ctx.resume_len > 0 ctx.is_resume = ctx.resume_len > 0
@ -241,7 +239,7 @@ class HttpFD(FileDownloader):
ctx.resume_len = byte_counter ctx.resume_len = byte_counter
else: else:
try: try:
ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename)) ctx.resume_len = os.path.getsize(ctx.tmpfilename)
except FileNotFoundError: except FileNotFoundError:
ctx.resume_len = 0 ctx.resume_len = 0
raise RetryDownload(e) raise RetryDownload(e)

View file

@ -8,7 +8,6 @@ from ..utils import (
Popen, Popen,
check_executable, check_executable,
encodeArgument, encodeArgument,
encodeFilename,
get_exe_version, get_exe_version,
) )
@ -179,7 +178,7 @@ class RtmpFD(FileDownloader):
return False return False
while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live: while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live:
prevsize = os.path.getsize(encodeFilename(tmpfilename)) prevsize = os.path.getsize(tmpfilename)
self.to_screen(f'[rtmpdump] Downloaded {prevsize} bytes') self.to_screen(f'[rtmpdump] Downloaded {prevsize} bytes')
time.sleep(5.0) # This seems to be needed time.sleep(5.0) # This seems to be needed
args = [*basic_args, '--resume'] args = [*basic_args, '--resume']
@ -187,7 +186,7 @@ class RtmpFD(FileDownloader):
args += ['--skip', '1'] args += ['--skip', '1']
args = [encodeArgument(a) for a in args] args = [encodeArgument(a) for a in args]
retval = run_rtmpdump(args) retval = run_rtmpdump(args)
cursize = os.path.getsize(encodeFilename(tmpfilename)) cursize = os.path.getsize(tmpfilename)
if prevsize == cursize and retval == RD_FAILED: if prevsize == cursize and retval == RD_FAILED:
break break
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
@ -196,7 +195,7 @@ class RtmpFD(FileDownloader):
retval = RD_SUCCESS retval = RD_SUCCESS
break break
if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE): if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE):
fsize = os.path.getsize(encodeFilename(tmpfilename)) fsize = os.path.getsize(tmpfilename)
self.to_screen(f'[rtmpdump] Downloaded {fsize} bytes') self.to_screen(f'[rtmpdump] Downloaded {fsize} bytes')
self.try_rename(tmpfilename, filename) self.try_rename(tmpfilename, filename)
self._hook_progress({ self._hook_progress({

View file

@ -2,7 +2,7 @@ import os
import subprocess import subprocess
from .common import FileDownloader from .common import FileDownloader
from ..utils import check_executable, encodeFilename from ..utils import check_executable
class RtspFD(FileDownloader): class RtspFD(FileDownloader):
@ -26,7 +26,7 @@ class RtspFD(FileDownloader):
retval = subprocess.call(args) retval = subprocess.call(args)
if retval == 0: if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename)) fsize = os.path.getsize(tmpfilename)
self.to_screen(f'\r[{args[0]}] {fsize} bytes') self.to_screen(f'\r[{args[0]}] {fsize} bytes')
self.try_rename(tmpfilename, filename) self.try_rename(tmpfilename, filename)
self._hook_progress({ self._hook_progress({

View file

@ -208,6 +208,10 @@ from .bandcamp import (
BandcampUserIE, BandcampUserIE,
BandcampWeeklyIE, BandcampWeeklyIE,
) )
from .bandlab import (
BandlabIE,
BandlabPlaylistIE,
)
from .bannedvideo import BannedVideoIE from .bannedvideo import BannedVideoIE
from .bbc import ( from .bbc import (
BBCIE, BBCIE,
@ -278,6 +282,7 @@ from .bleacherreport import (
from .blerp import BlerpIE from .blerp import BlerpIE
from .blogger import BloggerIE from .blogger import BloggerIE
from .bloomberg import BloombergIE from .bloomberg import BloombergIE
from .bluesky import BlueskyIE
from .bokecc import BokeCCIE from .bokecc import BokeCCIE
from .bongacams import BongaCamsIE from .bongacams import BongaCamsIE
from .boosty import BoostyIE from .boosty import BoostyIE
@ -707,6 +712,7 @@ from .gab import (
GabTVIE, GabTVIE,
) )
from .gaia import GaiaIE from .gaia import GaiaIE
from .gamedevtv import GameDevTVDashboardIE
from .gamejolt import ( from .gamejolt import (
GameJoltCommunityIE, GameJoltCommunityIE,
GameJoltGameIE, GameJoltGameIE,
@ -940,6 +946,10 @@ from .kaltura import KalturaIE
from .kankanews import KankaNewsIE from .kankanews import KankaNewsIE
from .karaoketv import KaraoketvIE from .karaoketv import KaraoketvIE
from .kelbyone import KelbyOneIE from .kelbyone import KelbyOneIE
from .kenh14 import (
Kenh14PlaylistIE,
Kenh14VideoIE,
)
from .khanacademy import ( from .khanacademy import (
KhanAcademyIE, KhanAcademyIE,
KhanAcademyUnitIE, KhanAcademyUnitIE,
@ -1129,12 +1139,6 @@ from .microsoftembed import (
MicrosoftMediusIE, MicrosoftMediusIE,
) )
from .microsoftstream import MicrosoftStreamIE from .microsoftstream import MicrosoftStreamIE
from .mildom import (
MildomClipIE,
MildomIE,
MildomUserVodIE,
MildomVodIE,
)
from .minds import ( from .minds import (
MindsChannelIE, MindsChannelIE,
MindsGroupIE, MindsGroupIE,
@ -1154,6 +1158,7 @@ from .mitele import MiTeleIE
from .mixch import ( from .mixch import (
MixchArchiveIE, MixchArchiveIE,
MixchIE, MixchIE,
MixchMovieIE,
) )
from .mixcloud import ( from .mixcloud import (
MixcloudIE, MixcloudIE,
@ -1552,10 +1557,6 @@ from .podbayfm import (
) )
from .podchaser import PodchaserIE from .podchaser import PodchaserIE
from .podomatic import PodomaticIE from .podomatic import PodomaticIE
from .pokemon import (
PokemonIE,
PokemonWatchIE,
)
from .pokergo import ( from .pokergo import (
PokerGoCollectionIE, PokerGoCollectionIE,
PokerGoIE, PokerGoIE,
@ -1646,6 +1647,7 @@ from .radiokapital import (
RadioKapitalIE, RadioKapitalIE,
RadioKapitalShowIE, RadioKapitalShowIE,
) )
from .radioradicale import RadioRadicaleIE
from .radiozet import RadioZetPodcastIE from .radiozet import RadioZetPodcastIE
from .radlive import ( from .radlive import (
RadLiveChannelIE, RadLiveChannelIE,
@ -1942,9 +1944,7 @@ from .spotify import (
) )
from .spreaker import ( from .spreaker import (
SpreakerIE, SpreakerIE,
SpreakerPageIE,
SpreakerShowIE, SpreakerShowIE,
SpreakerShowPageIE,
) )
from .springboardplatform import SpringboardPlatformIE from .springboardplatform import SpringboardPlatformIE
from .sprout import SproutIE from .sprout import SproutIE
@ -2283,10 +2283,6 @@ from .utreon import UtreonIE
from .varzesh3 import Varzesh3IE from .varzesh3 import Varzesh3IE
from .vbox7 import Vbox7IE from .vbox7 import Vbox7IE
from .veo import VeoIE from .veo import VeoIE
from .veoh import (
VeohIE,
VeohUserIE,
)
from .vesti import VestiIE from .vesti import VestiIE
from .vevo import ( from .vevo import (
VevoIE, VevoIE,

View file

@ -6,7 +6,6 @@ import hmac
import io import io
import json import json
import re import re
import struct
import time import time
import urllib.parse import urllib.parse
import uuid import uuid
@ -18,10 +17,8 @@ from ..networking.exceptions import TransportError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
OnDemandPagedList, OnDemandPagedList,
bytes_to_intlist,
decode_base_n, decode_base_n,
int_or_none, int_or_none,
intlist_to_bytes,
time_seconds, time_seconds,
traverse_obj, traverse_obj,
update_url_query, update_url_query,
@ -72,15 +69,15 @@ class AbemaLicenseRH(RequestHandler):
}) })
res = decode_base_n(license_response['k'], table=self._STRTABLE) res = decode_base_n(license_response['k'], table=self._STRTABLE)
encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff)) encvideokey = list(res.to_bytes(16, 'big'))
h = hmac.new( h = hmac.new(
binascii.unhexlify(self._HKEY), binascii.unhexlify(self._HKEY),
(license_response['cid'] + self.ie._DEVICE_ID).encode(), (license_response['cid'] + self.ie._DEVICE_ID).encode(),
digestmod=hashlib.sha256) digestmod=hashlib.sha256)
enckey = bytes_to_intlist(h.digest()) enckey = list(h.digest())
return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey)) return bytes(aes_ecb_decrypt(encvideokey, enckey))
class AbemaTVBaseIE(InfoExtractor): class AbemaTVBaseIE(InfoExtractor):

View file

@ -11,11 +11,9 @@ from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
ass_subtitles_timecode, ass_subtitles_timecode,
bytes_to_intlist,
bytes_to_long, bytes_to_long,
float_or_none, float_or_none,
int_or_none, int_or_none,
intlist_to_bytes,
join_nonempty, join_nonempty,
long_to_bytes, long_to_bytes,
parse_iso8601, parse_iso8601,
@ -198,16 +196,16 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link') links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link')
self._K = ''.join(random.choices('0123456789abcdef', k=16)) self._K = ''.join(random.choices('0123456789abcdef', k=16))
message = bytes_to_intlist(json.dumps({ message = list(json.dumps({
'k': self._K, 'k': self._K,
't': token, 't': token,
})) }).encode())
# Sometimes authentication fails for no good reason, retry with # Sometimes authentication fails for no good reason, retry with
# a different random padding # a different random padding
links_data = None links_data = None
for _ in range(3): for _ in range(3):
padded_message = intlist_to_bytes(pkcs1pad(message, 128)) padded_message = bytes(pkcs1pad(message, 128))
n, e = self._RSA_KEY n, e = self._RSA_KEY
encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n)) encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n))
authorization = base64.b64encode(encrypted_message).decode() authorization = base64.b64encode(encrypted_message).decode()

View file

@ -1362,7 +1362,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
def _download_webpage_handle(self, *args, **kwargs): def _download_webpage_handle(self, *args, **kwargs):
headers = self.geo_verification_headers() headers = self.geo_verification_headers()
headers.update(kwargs.get('headers', {})) headers.update(kwargs.get('headers') or {})
kwargs['headers'] = headers kwargs['headers'] = headers
return super()._download_webpage_handle( return super()._download_webpage_handle(
*args, **kwargs) *args, **kwargs)

View file

@ -66,6 +66,14 @@ class AfreecaTVBaseIE(InfoExtractor):
extensions={'legacy_ssl': True}), display_id, extensions={'legacy_ssl': True}), display_id,
'Downloading API JSON', 'Unable to download API JSON') 'Downloading API JSON', 'Unable to download API JSON')
@staticmethod
def _fixup_thumb(thumb_url):
if not url_or_none(thumb_url):
return None
# Core would determine_ext as 'php' from the url, so we need to provide the real ext
# See: https://github.com/yt-dlp/yt-dlp/issues/11537
return [{'url': thumb_url, 'ext': 'jpg'}]
class AfreecaTVIE(AfreecaTVBaseIE): class AfreecaTVIE(AfreecaTVBaseIE):
IE_NAME = 'soop' IE_NAME = 'soop'
@ -154,8 +162,8 @@ class AfreecaTVIE(AfreecaTVBaseIE):
'title': ('title', {str}), 'title': ('title', {str}),
'uploader': ('writer_nick', {str}), 'uploader': ('writer_nick', {str}),
'uploader_id': ('bj_id', {str}), 'uploader_id': ('bj_id', {str}),
'duration': ('total_file_duration', {functools.partial(int_or_none, scale=1000)}), 'duration': ('total_file_duration', {int_or_none(scale=1000)}),
'thumbnail': ('thumb', {url_or_none}), 'thumbnails': ('thumb', {self._fixup_thumb}),
}) })
entries = [] entries = []
@ -178,7 +186,7 @@ class AfreecaTVIE(AfreecaTVBaseIE):
'title': f'{common_info.get("title") or "Untitled"} (part {file_num})', 'title': f'{common_info.get("title") or "Untitled"} (part {file_num})',
'formats': formats, 'formats': formats,
**traverse_obj(file_element, { **traverse_obj(file_element, {
'duration': ('duration', {functools.partial(int_or_none, scale=1000)}), 'duration': ('duration', {int_or_none(scale=1000)}),
'timestamp': ('file_start', {unified_timestamp}), 'timestamp': ('file_start', {unified_timestamp}),
}), }),
}) })
@ -226,19 +234,18 @@ class AfreecaTVCatchStoryIE(AfreecaTVBaseIE):
return self.playlist_result(self._entries(data), video_id) return self.playlist_result(self._entries(data), video_id)
@staticmethod def _entries(self, data):
def _entries(data):
# 'files' is always a list with 1 element # 'files' is always a list with 1 element
yield from traverse_obj(data, ( yield from traverse_obj(data, (
'data', lambda _, v: v['story_type'] == 'catch', 'data', lambda _, v: v['story_type'] == 'catch',
'catch_list', lambda _, v: v['files'][0]['file'], { 'catch_list', lambda _, v: v['files'][0]['file'], {
'id': ('files', 0, 'file_info_key', {str}), 'id': ('files', 0, 'file_info_key', {str}),
'url': ('files', 0, 'file', {url_or_none}), 'url': ('files', 0, 'file', {url_or_none}),
'duration': ('files', 0, 'duration', {functools.partial(int_or_none, scale=1000)}), 'duration': ('files', 0, 'duration', {int_or_none(scale=1000)}),
'title': ('title', {str}), 'title': ('title', {str}),
'uploader': ('writer_nick', {str}), 'uploader': ('writer_nick', {str}),
'uploader_id': ('writer_id', {str}), 'uploader_id': ('writer_id', {str}),
'thumbnail': ('thumb', {url_or_none}), 'thumbnails': ('thumb', {self._fixup_thumb}),
'timestamp': ('write_timestamp', {int_or_none}), 'timestamp': ('write_timestamp', {int_or_none}),
})) }))

View file

@ -71,7 +71,7 @@ class AllstarBaseIE(InfoExtractor):
'thumbnails': (('clipImageThumb', 'clipImageSource'), {'url': {media_url_or_none}}), 'thumbnails': (('clipImageThumb', 'clipImageSource'), {'url': {media_url_or_none}}),
'duration': ('clipLength', {int_or_none}), 'duration': ('clipLength', {int_or_none}),
'filesize': ('clipSizeBytes', {int_or_none}), 'filesize': ('clipSizeBytes', {int_or_none}),
'timestamp': ('createdDate', {functools.partial(int_or_none, scale=1000)}), 'timestamp': ('createdDate', {int_or_none(scale=1000)}),
'uploader': ('username', {str}), 'uploader': ('username', {str}),
'uploader_id': ('user', '_id', {str}), 'uploader_id': ('user', '_id', {str}),
'view_count': ('views', {int_or_none}), 'view_count': ('views', {int_or_none}),

View file

@ -8,10 +8,8 @@ import time
from .common import InfoExtractor from .common import InfoExtractor
from ..aes import aes_encrypt from ..aes import aes_encrypt
from ..utils import ( from ..utils import (
bytes_to_intlist,
determine_ext, determine_ext,
int_or_none, int_or_none,
intlist_to_bytes,
join_nonempty, join_nonempty,
smuggle_url, smuggle_url,
strip_jsonp, strip_jsonp,
@ -33,24 +31,6 @@ class AnvatoIE(InfoExtractor):
_AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce' # from anvplayer.min.js _AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce' # from anvplayer.min.js
_TESTS = [{ _TESTS = [{
# from https://www.nfl.com/videos/baker-mayfield-s-game-changing-plays-from-3-td-game-week-14
'url': 'anvato:GXvEgwyJeWem8KCYXfeoHWknwP48Mboj:899441',
'md5': '921919dab3cd0b849ff3d624831ae3e2',
'info_dict': {
'id': '899441',
'ext': 'mp4',
'title': 'Baker Mayfield\'s game-changing plays from 3-TD game Week 14',
'description': 'md5:85e05a3cc163f8c344340f220521136d',
'upload_date': '20201215',
'timestamp': 1608009755,
'thumbnail': r're:^https?://.*\.jpg',
'uploader': 'NFL',
'tags': ['Baltimore Ravens at Cleveland Browns (2020-REG-14)', 'Baker Mayfield', 'Game Highlights',
'Player Highlights', 'Cleveland Browns', 'league'],
'duration': 157,
'categories': ['Entertainment', 'Game', 'Highlights'],
},
}, {
# from https://ktla.com/news/99-year-old-woman-learns-to-fly-in-torrance-checks-off-bucket-list-dream/ # from https://ktla.com/news/99-year-old-woman-learns-to-fly-in-torrance-checks-off-bucket-list-dream/
'url': 'anvato:X8POa4zpGZMmeiq0wqiO8IP5rMqQM9VN:8032455', 'url': 'anvato:X8POa4zpGZMmeiq0wqiO8IP5rMqQM9VN:8032455',
'md5': '837718bcfb3a7778d022f857f7a9b19e', 'md5': '837718bcfb3a7778d022f857f7a9b19e',
@ -241,31 +221,6 @@ class AnvatoIE(InfoExtractor):
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582', 'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582',
} }
def _generate_nfl_token(self, anvack, mcp_id):
reroute = self._download_json(
'https://api.nfl.com/v1/reroute', mcp_id, data=b'grant_type=client_credentials',
headers={'X-Domain-Id': 100}, note='Fetching token info')
token_type = reroute.get('token_type') or 'Bearer'
auth_token = f'{token_type} {reroute["access_token"]}'
response = self._download_json(
'https://api.nfl.com/v3/shield/', mcp_id, data=json.dumps({
'query': '''{
viewer {
mediaToken(anvack: "%s", id: %s) {
token
}
}
}''' % (anvack, mcp_id), # noqa: UP031
}).encode(), headers={
'Authorization': auth_token,
'Content-Type': 'application/json',
}, note='Fetching NFL API token')
return traverse_obj(response, ('data', 'viewer', 'mediaToken', 'token'))
_TOKEN_GENERATORS = {
'GXvEgwyJeWem8KCYXfeoHWknwP48Mboj': _generate_nfl_token,
}
def _server_time(self, access_key, video_id): def _server_time(self, access_key, video_id):
return int_or_none(traverse_obj(self._download_json( return int_or_none(traverse_obj(self._download_json(
f'{self._API_BASE_URL}/server_time', video_id, query={'anvack': access_key}, f'{self._API_BASE_URL}/server_time', video_id, query={'anvack': access_key},
@ -277,8 +232,8 @@ class AnvatoIE(InfoExtractor):
server_time = self._server_time(access_key, video_id) server_time = self._server_time(access_key, video_id)
input_data = f'{server_time}~{md5_text(video_data_url)}~{md5_text(server_time)}' input_data = f'{server_time}~{md5_text(video_data_url)}~{md5_text(server_time)}'
auth_secret = intlist_to_bytes(aes_encrypt( auth_secret = bytes(aes_encrypt(
bytes_to_intlist(input_data[:64]), bytes_to_intlist(self._AUTH_KEY))) list(input_data[:64].encode()), list(self._AUTH_KEY)))
query = { query = {
'X-Anvato-Adst-Auth': base64.b64encode(auth_secret).decode('ascii'), 'X-Anvato-Adst-Auth': base64.b64encode(auth_secret).decode('ascii'),
'rtyp': 'fp', 'rtyp': 'fp',
@ -290,8 +245,6 @@ class AnvatoIE(InfoExtractor):
} }
if extracted_token is not None: if extracted_token is not None:
api['anvstk2'] = extracted_token api['anvstk2'] = extracted_token
elif self._TOKEN_GENERATORS.get(access_key) is not None:
api['anvstk2'] = self._TOKEN_GENERATORS[access_key](self, access_key, video_id)
elif self._ANVACK_TABLE.get(access_key) is not None: elif self._ANVACK_TABLE.get(access_key) is not None:
api['anvstk'] = md5_text(f'{access_key}|{anvrid}|{server_time}|{self._ANVACK_TABLE[access_key]}') api['anvstk'] = md5_text(f'{access_key}|{anvrid}|{server_time}|{self._ANVACK_TABLE[access_key]}')
else: else:

View file

@ -205,6 +205,26 @@ class ArchiveOrgIE(InfoExtractor):
}, },
}, },
], ],
}, {
# The reviewbody is None for one of the reviews; just need to extract data without crashing
'url': 'https://archive.org/details/gd95-04-02.sbd.11622.sbeok.shnf/gd95-04-02d1t04.shn',
'info_dict': {
'id': 'gd95-04-02.sbd.11622.sbeok.shnf/gd95-04-02d1t04.shn',
'ext': 'mp3',
'title': 'Stuck Inside of Mobile with the Memphis Blues Again',
'creators': ['Grateful Dead'],
'duration': 338.31,
'track': 'Stuck Inside of Mobile with the Memphis Blues Again',
'description': 'md5:764348a470b986f1217ffd38d6ac7b72',
'display_id': 'gd95-04-02d1t04.shn',
'location': 'Pyramid Arena',
'uploader': 'jon@archive.org',
'album': '1995-04-02 - Pyramid Arena',
'upload_date': '20040519',
'track_number': 4,
'release_date': '19950402',
'timestamp': 1084927901,
},
}] }]
@staticmethod @staticmethod
@ -335,7 +355,7 @@ class ArchiveOrgIE(InfoExtractor):
info['comments'].append({ info['comments'].append({
'id': review.get('review_id'), 'id': review.get('review_id'),
'author': review.get('reviewer'), 'author': review.get('reviewer'),
'text': str_or_none(review.get('reviewtitle'), '') + '\n\n' + review.get('reviewbody'), 'text': join_nonempty('reviewtitle', 'reviewbody', from_dict=review, delim='\n\n'),
'timestamp': unified_timestamp(review.get('createdate')), 'timestamp': unified_timestamp(review.get('createdate')),
'parent': 'root'}) 'parent': 'root'})

View file

@ -299,7 +299,7 @@ class ARDBetaMediathekIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '94834686', 'id': '94834686',
'ext': 'mp4', 'ext': 'mp4',
'duration': 2700, 'duration': 2670,
'episode': '7 Tage ... unter harten Jungs', 'episode': '7 Tage ... unter harten Jungs',
'description': 'md5:0f215470dcd2b02f59f4bd10c963f072', 'description': 'md5:0f215470dcd2b02f59f4bd10c963f072',
'upload_date': '20231005', 'upload_date': '20231005',
@ -307,10 +307,28 @@ class ARDBetaMediathekIE(InfoExtractor):
'display_id': 'N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3', 'display_id': 'N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3',
'series': '7 Tage ...', 'series': '7 Tage ...',
'channel': 'HR', 'channel': 'HR',
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:f6e6d5ffac41925c?w=960&ch=fa32ba69bc87989a', 'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:430c86d233afa42d?w=960&ch=fa32ba69bc87989a',
'title': '7 Tage ... unter harten Jungs', 'title': '7 Tage ... unter harten Jungs',
'_old_archive_ids': ['ardbetamediathek N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3'], '_old_archive_ids': ['ardbetamediathek N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3'],
}, },
}, {
'url': 'https://www.ardmediathek.de/video/lokalzeit-aus-duesseldorf/lokalzeit-aus-duesseldorf-oder-31-10-2024/wdr-duesseldorf/Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtOWFkMTc0ZWMtMDA5ZS00ZDEwLWFjYjctMGNmNTdhNzVmNzUz',
'info_dict': {
'id': '13847165',
'chapters': 'count:8',
'ext': 'mp4',
'channel': 'WDR',
'display_id': 'Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtOWFkMTc0ZWMtMDA5ZS00ZDEwLWFjYjctMGNmNTdhNzVmNzUz',
'episode': 'Lokalzeit aus Düsseldorf | 31.10.2024',
'series': 'Lokalzeit aus Düsseldorf',
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:f02ec9bd9b7bd5f6?w=960&ch=612491dcd5e09b0c',
'title': 'Lokalzeit aus Düsseldorf | 31.10.2024',
'upload_date': '20241031',
'timestamp': 1730399400,
'description': 'md5:12db30b3b706314efe3778b8df1a7058',
'duration': 1759,
'_old_archive_ids': ['ardbetamediathek Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtOWFkMTc0ZWMtMDA5ZS00ZDEwLWFjYjctMGNmNTdhNzVmNzUz'],
},
}, { }, {
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE', 'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
'only_matching': True, 'only_matching': True,
@ -455,6 +473,12 @@ class ARDBetaMediathekIE(InfoExtractor):
'subtitles': subtitles, 'subtitles': subtitles,
'is_live': is_live, 'is_live': is_live,
'age_limit': age_limit, 'age_limit': age_limit,
**traverse_obj(media_data, {
'chapters': ('pluginData', 'jumpmarks@all', 'chapterArray', lambda _, v: int_or_none(v['chapterTime']), {
'start_time': ('chapterTime', {int_or_none}),
'title': ('chapterTitle', {str}),
}),
}),
**traverse_obj(media_data, ('meta', { **traverse_obj(media_data, ('meta', {
'title': 'title', 'title': 'title',
'description': 'synopsis', 'description': 'synopsis',

View file

@ -1,4 +1,3 @@
import functools
import json import json
import random import random
import re import re
@ -10,7 +9,6 @@ from ..utils import (
ExtractorError, ExtractorError,
extract_attributes, extract_attributes,
float_or_none, float_or_none,
get_element_html_by_id,
int_or_none, int_or_none,
parse_filesize, parse_filesize,
str_or_none, str_or_none,
@ -21,7 +19,7 @@ from ..utils import (
url_or_none, url_or_none,
urljoin, urljoin,
) )
from ..utils.traversal import traverse_obj from ..utils.traversal import find_element, traverse_obj
class BandcampIE(InfoExtractor): class BandcampIE(InfoExtractor):
@ -45,6 +43,8 @@ class BandcampIE(InfoExtractor):
'uploader_url': 'https://youtube-dl.bandcamp.com', 'uploader_url': 'https://youtube-dl.bandcamp.com',
'uploader_id': 'youtube-dl', 'uploader_id': 'youtube-dl',
'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg', 'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg',
'artists': ['youtube-dl "\'/\\ä↭'],
'album_artists': ['youtube-dl "\'/\\ä↭'],
}, },
'skip': 'There is a limit of 200 free downloads / month for the test song', 'skip': 'There is a limit of 200 free downloads / month for the test song',
}, { }, {
@ -271,6 +271,18 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
'timestamp': 1311756226, 'timestamp': 1311756226,
'upload_date': '20110727', 'upload_date': '20110727',
'uploader': 'Blazo', 'uploader': 'Blazo',
'thumbnail': 'https://f4.bcbits.com/img/a1721150828_5.jpg',
'album_artists': ['Blazo'],
'uploader_url': 'https://blazo.bandcamp.com',
'release_date': '20110727',
'release_timestamp': 1311724800.0,
'track': 'Intro',
'uploader_id': 'blazo',
'track_number': 1,
'album': 'Jazz Format Mixtape vol.1',
'artists': ['Blazo'],
'duration': 19.335,
'track_id': '1353101989',
}, },
}, },
{ {
@ -282,6 +294,18 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
'timestamp': 1311757238, 'timestamp': 1311757238,
'upload_date': '20110727', 'upload_date': '20110727',
'uploader': 'Blazo', 'uploader': 'Blazo',
'track': 'Kero One - Keep It Alive (Blazo remix)',
'release_date': '20110727',
'track_id': '38097443',
'track_number': 2,
'duration': 181.467,
'uploader_url': 'https://blazo.bandcamp.com',
'album': 'Jazz Format Mixtape vol.1',
'uploader_id': 'blazo',
'album_artists': ['Blazo'],
'artists': ['Blazo'],
'thumbnail': 'https://f4.bcbits.com/img/a1721150828_5.jpg',
'release_timestamp': 1311724800.0,
}, },
}, },
], ],
@ -289,6 +313,7 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
'title': 'Jazz Format Mixtape vol.1', 'title': 'Jazz Format Mixtape vol.1',
'id': 'jazz-format-mixtape-vol-1', 'id': 'jazz-format-mixtape-vol-1',
'uploader_id': 'blazo', 'uploader_id': 'blazo',
'description': 'md5:38052a93217f3ffdc033cd5dbbce2989',
}, },
'params': { 'params': {
'playlistend': 2, 'playlistend': 2,
@ -363,10 +388,10 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://bandcamp.com/?show=224', 'url': 'https://bandcamp.com/?show=224',
'md5': 'b00df799c733cf7e0c567ed187dea0fd', 'md5': '61acc9a002bed93986b91168aa3ab433',
'info_dict': { 'info_dict': {
'id': '224', 'id': '224',
'ext': 'opus', 'ext': 'mp3',
'title': 'BC Weekly April 4th 2017 - Magic Moments', 'title': 'BC Weekly April 4th 2017 - Magic Moments',
'description': 'md5:5d48150916e8e02d030623a48512c874', 'description': 'md5:5d48150916e8e02d030623a48512c874',
'duration': 5829.77, 'duration': 5829.77,
@ -376,7 +401,7 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE
'episode_id': '224', 'episode_id': '224',
}, },
'params': { 'params': {
'format': 'opus-lo', 'format': 'mp3-128',
}, },
}, { }, {
'url': 'https://bandcamp.com/?blah/blah@&show=228', 'url': 'https://bandcamp.com/?blah/blah@&show=228',
@ -484,7 +509,7 @@ class BandcampUserIE(InfoExtractor):
or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage)) or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage))
yield from traverse_obj(webpage, ( yield from traverse_obj(webpage, (
{functools.partial(get_element_html_by_id, 'music-grid')}, {extract_attributes}, {find_element(id='music-grid', html=True)}, {extract_attributes},
'data-client-items', {json.loads}, ..., 'page_url', {str})) 'data-client-items', {json.loads}, ..., 'page_url', {str}))
def _real_extract(self, url): def _real_extract(self, url):
@ -493,4 +518,4 @@ class BandcampUserIE(InfoExtractor):
return self.playlist_from_matches( return self.playlist_from_matches(
self._yield_items(webpage), uploader, f'Discography of {uploader}', self._yield_items(webpage), uploader, f'Discography of {uploader}',
getter=functools.partial(urljoin, url)) getter=urljoin(url))

438
yt_dlp/extractor/bandlab.py Normal file
View file

@ -0,0 +1,438 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
float_or_none,
format_field,
int_or_none,
parse_iso8601,
parse_qs,
truncate_string,
url_or_none,
)
from ..utils.traversal import traverse_obj, value
class BandlabBaseIE(InfoExtractor):
def _call_api(self, endpoint, asset_id, **kwargs):
headers = kwargs.pop('headers', None) or {}
return self._download_json(
f'https://www.bandlab.com/api/v1.3/{endpoint}/{asset_id}',
asset_id, headers={
'accept': 'application/json',
'referer': 'https://www.bandlab.com/',
'x-client-id': 'BandLab-Web',
'x-client-version': '10.1.124',
**headers,
}, **kwargs)
def _parse_revision(self, revision_data, url=None):
return {
'vcodec': 'none',
'media_type': 'revision',
'extractor_key': BandlabIE.ie_key(),
'extractor': BandlabIE.IE_NAME,
**traverse_obj(revision_data, {
'webpage_url': (
'id', ({value(url)}, {format_field(template='https://www.bandlab.com/revision/%s')}), filter, any),
'id': (('revisionId', 'id'), {str}, any),
'title': ('song', 'name', {str}),
'track': ('song', 'name', {str}),
'url': ('mixdown', 'file', {url_or_none}),
'thumbnail': ('song', 'picture', 'url', {url_or_none}),
'description': ('description', {str}),
'uploader': ('creator', 'name', {str}),
'uploader_id': ('creator', 'username', {str}),
'timestamp': ('createdOn', {parse_iso8601}),
'duration': ('mixdown', 'duration', {float_or_none}),
'view_count': ('counters', 'plays', {int_or_none}),
'like_count': ('counters', 'likes', {int_or_none}),
'comment_count': ('counters', 'comments', {int_or_none}),
'genres': ('genres', ..., 'name', {str}),
}),
}
def _parse_track(self, track_data, url=None):
return {
'vcodec': 'none',
'media_type': 'track',
'extractor_key': BandlabIE.ie_key(),
'extractor': BandlabIE.IE_NAME,
**traverse_obj(track_data, {
'webpage_url': (
'id', ({value(url)}, {format_field(template='https://www.bandlab.com/post/%s')}), filter, any),
'id': (('revisionId', 'id'), {str}, any),
'url': ('track', 'sample', 'audioUrl', {url_or_none}),
'title': ('track', 'name', {str}),
'track': ('track', 'name', {str}),
'description': ('caption', {str}),
'thumbnail': ('track', 'picture', ('original', 'url'), {url_or_none}, any),
'view_count': ('counters', 'plays', {int_or_none}),
'like_count': ('counters', 'likes', {int_or_none}),
'comment_count': ('counters', 'comments', {int_or_none}),
'duration': ('track', 'sample', 'duration', {float_or_none}),
'uploader': ('creator', 'name', {str}),
'uploader_id': ('creator', 'username', {str}),
'timestamp': ('createdOn', {parse_iso8601}),
}),
}
def _parse_video(self, video_data, url=None):
return {
'media_type': 'video',
'extractor_key': BandlabIE.ie_key(),
'extractor': BandlabIE.IE_NAME,
**traverse_obj(video_data, {
'id': ('id', {str}),
'webpage_url': (
'id', ({value(url)}, {format_field(template='https://www.bandlab.com/post/%s')}), filter, any),
'url': ('video', 'url', {url_or_none}),
'title': ('caption', {lambda x: x.replace('\n', ' ')}, {truncate_string(left=50)}),
'description': ('caption', {str}),
'thumbnail': ('video', 'picture', 'url', {url_or_none}),
'view_count': ('video', 'counters', 'plays', {int_or_none}),
'like_count': ('video', 'counters', 'likes', {int_or_none}),
'comment_count': ('counters', 'comments', {int_or_none}),
'duration': ('video', 'duration', {float_or_none}),
'uploader': ('creator', 'name', {str}),
'uploader_id': ('creator', 'username', {str}),
}),
}
class BandlabIE(BandlabBaseIE):
_VALID_URL = [
r'https?://(?:www\.)?bandlab.com/(?P<url_type>track|post|revision)/(?P<id>[\da-f_-]+)',
r'https?://(?:www\.)?bandlab.com/(?P<url_type>embed)/\?(?:[^#]*&)?id=(?P<id>[\da-f-]+)',
]
_EMBED_REGEX = [rf'<iframe[^>]+src=[\'"](?P<url>{_VALID_URL[1]})[\'"]']
_TESTS = [{
'url': 'https://www.bandlab.com/track/04b37e88dba24967b9dac8eb8567ff39_07d7f906fc96ee11b75e000d3a428fff',
'md5': '46f7b43367dd268bbcf0bbe466753b2c',
'info_dict': {
'id': '02d7f906-fc96-ee11-b75e-000d3a428fff',
'ext': 'm4a',
'uploader_id': 'ender_milze',
'track': 'sweet black',
'description': 'composed by juanjn3737',
'timestamp': 1702171963,
'view_count': int,
'like_count': int,
'duration': 54.629999999999995,
'title': 'sweet black',
'upload_date': '20231210',
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/fa082beb-b856-4730-9170-a57e4e32cc2c/',
'genres': ['Lofi'],
'uploader': 'ender milze',
'comment_count': int,
'media_type': 'revision',
},
}, {
# Same track as above but post URL
'url': 'https://www.bandlab.com/post/07d7f906-fc96-ee11-b75e-000d3a428fff',
'md5': '46f7b43367dd268bbcf0bbe466753b2c',
'info_dict': {
'id': '02d7f906-fc96-ee11-b75e-000d3a428fff',
'ext': 'm4a',
'uploader_id': 'ender_milze',
'track': 'sweet black',
'description': 'composed by juanjn3737',
'timestamp': 1702171973,
'view_count': int,
'like_count': int,
'duration': 54.629999999999995,
'title': 'sweet black',
'upload_date': '20231210',
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/fa082beb-b856-4730-9170-a57e4e32cc2c/',
'genres': ['Lofi'],
'uploader': 'ender milze',
'comment_count': int,
'media_type': 'revision',
},
}, {
# SharedKey Example
'url': 'https://www.bandlab.com/track/048916c2-c6da-ee11-85f9-6045bd2e11f9?sharedKey=0NNWX8qYAEmI38lWAzCNDA',
'md5': '15174b57c44440e2a2008be9cae00250',
'info_dict': {
'id': '038916c2-c6da-ee11-85f9-6045bd2e11f9',
'ext': 'm4a',
'comment_count': int,
'genres': ['Other'],
'uploader_id': 'user8353034818103753',
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/51b18363-da23-4b9b-a29c-2933a3e561ca/',
'timestamp': 1709625771,
'track': 'PodcastMaerchen4b',
'duration': 468.14,
'view_count': int,
'description': 'Podcast: Neues aus der Märchenwelt',
'like_count': int,
'upload_date': '20240305',
'uploader': 'Erna Wageneder',
'title': 'PodcastMaerchen4b',
'media_type': 'revision',
},
}, {
# Different Revision selected
'url': 'https://www.bandlab.com/track/130343fc-148b-ea11-96d2-0003ffd1fc09?revId=110343fc-148b-ea11-96d2-0003ffd1fc09',
'md5': '74e055ef9325d63f37088772fbfe4454',
'info_dict': {
'id': '110343fc-148b-ea11-96d2-0003ffd1fc09',
'ext': 'm4a',
'timestamp': 1588273294,
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/users/b612e533-e4f7-4542-9f50-3fcfd8dd822c/',
'description': 'Final Revision.',
'title': 'Replay ( Instrumental)',
'uploader': 'David R Sparks',
'uploader_id': 'davesnothome69',
'view_count': int,
'comment_count': int,
'track': 'Replay ( Instrumental)',
'genres': ['Rock'],
'upload_date': '20200430',
'like_count': int,
'duration': 279.43,
'media_type': 'revision',
},
}, {
# Video
'url': 'https://www.bandlab.com/post/5cdf9036-3857-ef11-991a-6045bd36e0d9',
'md5': '8caa2ef28e86c1dacf167293cfdbeba9',
'info_dict': {
'id': '5cdf9036-3857-ef11-991a-6045bd36e0d9',
'ext': 'mp4',
'duration': 44.705,
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/videos/67c6cef1-cef6-40d3-831e-a55bc1dcb972/',
'comment_count': int,
'title': 'backing vocals',
'uploader_id': 'marliashya',
'uploader': 'auraa',
'like_count': int,
'description': 'backing vocals',
'media_type': 'video',
},
}, {
# Embed Example
'url': 'https://www.bandlab.com/embed/?blur=false&id=014de0a4-7d82-ea11-a94c-0003ffd19c0f',
'md5': 'a4ad05cb68c54faaed9b0a8453a8cf4a',
'info_dict': {
'id': '014de0a4-7d82-ea11-a94c-0003ffd19c0f',
'ext': 'm4a',
'comment_count': int,
'genres': ['Electronic'],
'uploader': 'Charlie Henson',
'timestamp': 1587328674,
'upload_date': '20200419',
'view_count': int,
'track': 'Positronic Meltdown',
'duration': 318.55,
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/87165bc3-5439-496e-b1f7-a9f13b541ff2/',
'description': 'Checkout my tracks at AOMX http://aomxsounds.com/',
'uploader_id': 'microfreaks',
'title': 'Positronic Meltdown',
'like_count': int,
'media_type': 'revision',
},
}, {
# Track without revisions available
'url': 'https://www.bandlab.com/track/55767ac51789ea11a94c0003ffd1fc09_2f007b0a37b94ec7a69bc25ae15108a5',
'md5': 'f05d68a3769952c2d9257c473e14c15f',
'info_dict': {
'id': '55767ac51789ea11a94c0003ffd1fc09_2f007b0a37b94ec7a69bc25ae15108a5',
'ext': 'm4a',
'track': 'insame',
'like_count': int,
'duration': 84.03,
'title': 'insame',
'view_count': int,
'comment_count': int,
'uploader': 'Sorakime',
'uploader_id': 'sorakime',
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/users/572a351a-0f3a-4c6a-ac39-1a5defdeeb1c/',
'timestamp': 1691162128,
'upload_date': '20230804',
'media_type': 'track',
},
}, {
'url': 'https://www.bandlab.com/revision/014de0a4-7d82-ea11-a94c-0003ffd19c0f',
'only_matching': True,
}]
_WEBPAGE_TESTS = [{
'url': 'https://phantomluigi.github.io/',
'info_dict': {
'id': 'e14223c3-7871-ef11-bdfd-000d3a980db3',
'ext': 'm4a',
'view_count': int,
'upload_date': '20240913',
'uploader_id': 'phantommusicofficial',
'timestamp': 1726194897,
'uploader': 'Phantom',
'comment_count': int,
'genres': ['Progresive Rock'],
'description': 'md5:a38cd668f7a2843295ef284114f18429',
'duration': 225.23,
'like_count': int,
'title': 'Vermilion Pt. 2 (Cover)',
'track': 'Vermilion Pt. 2 (Cover)',
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/62b10750-7aef-4f42-ad08-1af52f577e97/',
'media_type': 'revision',
},
}]
def _real_extract(self, url):
display_id, url_type = self._match_valid_url(url).group('id', 'url_type')
qs = parse_qs(url)
revision_id = traverse_obj(qs, (('revId', 'id'), 0, any))
if url_type == 'revision':
revision_id = display_id
revision_data = None
if not revision_id:
post_data = self._call_api(
'posts', display_id, note='Downloading post data',
query=traverse_obj(qs, {'sharedKey': ('sharedKey', 0)}))
revision_id = traverse_obj(post_data, (('revisionId', ('revision', 'id')), {str}, any))
revision_data = traverse_obj(post_data, ('revision', {dict}))
if not revision_data and not revision_id:
post_type = post_data.get('type')
if post_type == 'Video':
return self._parse_video(post_data, url=url)
if post_type == 'Track':
return self._parse_track(post_data, url=url)
raise ExtractorError(f'Could not extract data for post type {post_type!r}')
if not revision_data:
revision_data = self._call_api(
'revisions', revision_id, note='Downloading revision data', query={'edit': 'false'})
return self._parse_revision(revision_data, url=url)
class BandlabPlaylistIE(BandlabBaseIE):
_VALID_URL = [
r'https?://(?:www\.)?bandlab.com/(?:[\w]+/)?(?P<type>albums|collections)/(?P<id>[\da-f-]+)',
r'https?://(?:www\.)?bandlab.com/(?P<type>embed)/collection/\?(?:[^#]*&)?id=(?P<id>[\da-f-]+)',
]
_EMBED_REGEX = [rf'<iframe[^>]+src=[\'"](?P<url>{_VALID_URL[1]})[\'"]']
_TESTS = [{
'url': 'https://www.bandlab.com/davesnothome69/albums/89b79ea6-de42-ed11-b495-00224845aac7',
'info_dict': {
'thumbnail': 'https://bl-prod-images.azureedge.net/v1.3/albums/69507ff3-579a-45be-afca-9e87eddec944/',
'release_date': '20221003',
'title': 'Remnants',
'album': 'Remnants',
'like_count': int,
'album_type': 'LP',
'description': 'A collection of some feel good, rock hits.',
'comment_count': int,
'view_count': int,
'id': '89b79ea6-de42-ed11-b495-00224845aac7',
'uploader': 'David R Sparks',
'uploader_id': 'davesnothome69',
},
'playlist_count': 10,
}, {
'url': 'https://www.bandlab.com/slytheband/collections/955102d4-1040-ef11-86c3-000d3a42581b',
'info_dict': {
'id': '955102d4-1040-ef11-86c3-000d3a42581b',
'timestamp': 1720762659,
'view_count': int,
'title': 'My Shit 🖤',
'uploader_id': 'slytheband',
'uploader': '𝓢𝓛𝓨',
'upload_date': '20240712',
'like_count': int,
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/collections/2c64ca12-b180-4b76-8587-7a8da76bddc8/',
},
'playlist_count': 15,
}, {
# Embeds can contain both albums and collections with the same URL pattern. This is an album
'url': 'https://www.bandlab.com/embed/collection/?id=12cc6f7f-951b-ee11-907c-00224844f303',
'info_dict': {
'id': '12cc6f7f-951b-ee11-907c-00224844f303',
'release_date': '20230706',
'description': 'This is a collection of songs I created when I had an Amiga computer.',
'view_count': int,
'title': 'Mark Salud The Amiga Collection',
'uploader_id': 'mssirmooth1962',
'comment_count': int,
'thumbnail': 'https://bl-prod-images.azureedge.net/v1.3/albums/d618bd7b-0537-40d5-bdd8-61b066e77d59/',
'like_count': int,
'uploader': 'Mark Salud',
'album': 'Mark Salud The Amiga Collection',
'album_type': 'LP',
},
'playlist_count': 24,
}, {
# Tracks without revision id
'url': 'https://www.bandlab.com/embed/collection/?id=e98aafb5-d932-ee11-b8f0-00224844c719',
'info_dict': {
'like_count': int,
'uploader_id': 'sorakime',
'comment_count': int,
'uploader': 'Sorakime',
'view_count': int,
'description': 'md5:4ec31c568a5f5a5a2b17572ea64c3825',
'release_date': '20230812',
'title': 'Art',
'album': 'Art',
'album_type': 'Album',
'id': 'e98aafb5-d932-ee11-b8f0-00224844c719',
'thumbnail': 'https://bl-prod-images.azureedge.net/v1.3/albums/20c890de-e94a-4422-828a-2da6377a13c8/',
},
'playlist_count': 13,
}, {
'url': 'https://www.bandlab.com/albums/89b79ea6-de42-ed11-b495-00224845aac7',
'only_matching': True,
}]
def _entries(self, album_data):
for post in traverse_obj(album_data, ('posts', lambda _, v: v['type'])):
post_type = post['type']
if post_type == 'Revision':
yield self._parse_revision(post.get('revision'))
elif post_type == 'Track':
yield self._parse_track(post)
elif post_type == 'Video':
yield self._parse_video(post)
else:
self.report_warning(f'Skipping unknown post type: "{post_type}"')
def _real_extract(self, url):
playlist_id, playlist_type = self._match_valid_url(url).group('id', 'type')
endpoints = {
'albums': ['albums'],
'collections': ['collections'],
'embed': ['collections', 'albums'],
}.get(playlist_type)
for endpoint in endpoints:
playlist_data = self._call_api(
endpoint, playlist_id, note=f'Downloading {endpoint[:-1]} data',
fatal=False, expected_status=404)
if not playlist_data.get('errorCode'):
playlist_type = endpoint
break
if error_code := playlist_data.get('errorCode'):
raise ExtractorError(f'Could not find playlist data. Error code: "{error_code}"')
return self.playlist_result(
self._entries(playlist_data), playlist_id,
**traverse_obj(playlist_data, {
'title': ('name', {str}),
'description': ('description', {str}),
'uploader': ('creator', 'name', {str}),
'uploader_id': ('creator', 'username', {str}),
'timestamp': ('createdOn', {parse_iso8601}),
'release_date': ('releaseDate', {lambda x: x.replace('-', '')}, filter),
'thumbnail': ('picture', ('original', 'url'), {url_or_none}, any),
'like_count': ('counters', 'likes', {int_or_none}),
'comment_count': ('counters', 'comments', {int_or_none}),
'view_count': ('counters', 'plays', {int_or_none}),
}),
**(traverse_obj(playlist_data, {
'album': ('name', {str}),
'album_type': ('type', {str}),
}) if playlist_type == 'albums' else {}))

View file

@ -1284,9 +1284,9 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
**traverse_obj(model, { **traverse_obj(model, {
'title': ('title', {str}), 'title': ('title', {str}),
'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}), 'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
'description': ('synopses', ('long', 'medium', 'short'), {str}, {lambda x: x or None}, any), 'description': ('synopses', ('long', 'medium', 'short'), {str}, filter, any),
'duration': ('versions', 0, 'duration', {int}), 'duration': ('versions', 0, 'duration', {int}),
'timestamp': ('versions', 0, 'availableFrom', {functools.partial(int_or_none, scale=1000)}), 'timestamp': ('versions', 0, 'availableFrom', {int_or_none(scale=1000)}),
}), }),
} }
@ -1386,7 +1386,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
formats = traverse_obj(media_data, ('playlist', lambda _, v: url_or_none(v['url']), { formats = traverse_obj(media_data, ('playlist', lambda _, v: url_or_none(v['url']), {
'url': ('url', {url_or_none}), 'url': ('url', {url_or_none}),
'ext': ('format', {str}), 'ext': ('format', {str}),
'tbr': ('bitrate', {functools.partial(int_or_none, scale=1000)}), 'tbr': ('bitrate', {int_or_none(scale=1000)}),
})) }))
if formats: if formats:
entry = { entry = {
@ -1398,7 +1398,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'title': ('title', {str}), 'title': ('title', {str}),
'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}), 'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
'description': ('synopses', ('long', 'medium', 'short'), {str}, any), 'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
'timestamp': ('firstPublished', {functools.partial(int_or_none, scale=1000)}), 'timestamp': ('firstPublished', {int_or_none(scale=1000)}),
}), }),
} }
done = True done = True
@ -1428,7 +1428,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
if not entry.get('timestamp'): if not entry.get('timestamp'):
entry['timestamp'] = traverse_obj(next_data, ( entry['timestamp'] = traverse_obj(next_data, (
..., 'contents', is_type('timestamp'), 'model', ..., 'contents', is_type('timestamp'), 'model',
'timestamp', {functools.partial(int_or_none, scale=1000)}, any)) 'timestamp', {int_or_none(scale=1000)}, any))
entries.append(entry) entries.append(entry)
return self.playlist_result( return self.playlist_result(
entries, playlist_id, playlist_title, playlist_description) entries, playlist_id, playlist_title, playlist_description)

View file

@ -1,18 +1,33 @@
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import extract_attributes from ..utils import ExtractorError, extract_attributes
class BFMTVBaseIE(InfoExtractor): class BFMTVBaseIE(InfoExtractor):
_VALID_URL_BASE = r'https?://(?:www\.|rmc\.)?bfmtv\.com/' _VALID_URL_BASE = r'https?://(?:www\.|rmc\.)?bfmtv\.com/'
_VALID_URL_TMPL = _VALID_URL_BASE + r'(?:[^/]+/)*[^/?&#]+_%s[A-Z]-(?P<id>\d{12})\.html' _VALID_URL_TMPL = _VALID_URL_BASE + r'(?:[^/]+/)*[^/?&#]+_%s[A-Z]-(?P<id>\d{12})\.html'
_VIDEO_BLOCK_REGEX = r'(<div[^>]+class="video_block[^"]*"[^>]*>)' _VIDEO_BLOCK_REGEX = r'(<div[^>]+class="video_block[^"]*"[^>]*>.*?</div>)'
_VIDEO_ELEMENT_REGEX = r'(<video-js[^>]+>)'
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s' BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
def _brightcove_url_result(self, video_id, video_block): def _extract_video(self, video_block):
account_id = video_block.get('accountid') or '876450612001' video_element = self._search_regex(
player_id = video_block.get('playerid') or 'I2qBTln4u' self._VIDEO_ELEMENT_REGEX, video_block, 'video element', default=None)
if video_element:
video_element_attrs = extract_attributes(video_element)
video_id = video_element_attrs.get('data-video-id')
if not video_id:
return
account_id = video_element_attrs.get('data-account') or '876450610001'
player_id = video_element_attrs.get('adjustplayer') or '19dszYXgm'
else:
video_block_attrs = extract_attributes(video_block)
video_id = video_block_attrs.get('videoid')
if not video_id:
return
account_id = video_block_attrs.get('accountid') or '876630703001'
player_id = video_block_attrs.get('playerid') or 'KbPwEbuHx'
return self.url_result( return self.url_result(
self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id), self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id),
'BrightcoveNew', video_id) 'BrightcoveNew', video_id)
@ -40,23 +55,25 @@ class BFMTVIE(BFMTVBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
bfmtv_id = self._match_id(url) bfmtv_id = self._match_id(url)
webpage = self._download_webpage(url, bfmtv_id) webpage = self._download_webpage(url, bfmtv_id)
video_block = extract_attributes(self._search_regex( video = self._extract_video(self._search_regex(
self._VIDEO_BLOCK_REGEX, webpage, 'video block')) self._VIDEO_BLOCK_REGEX, webpage, 'video block'))
return self._brightcove_url_result(video_block['videoid'], video_block) if not video:
raise ExtractorError('Failed to extract video')
return video
class BFMTVLiveIE(BFMTVIE): # XXX: Do not subclass from concrete IE class BFMTVLiveIE(BFMTVBaseIE):
IE_NAME = 'bfmtv:live' IE_NAME = 'bfmtv:live'
_VALID_URL = BFMTVBaseIE._VALID_URL_BASE + '(?P<id>(?:[^/]+/)?en-direct)' _VALID_URL = BFMTVBaseIE._VALID_URL_BASE + '(?P<id>(?:[^/]+/)?en-direct)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.bfmtv.com/en-direct/', 'url': 'https://www.bfmtv.com/en-direct/',
'info_dict': { 'info_dict': {
'id': '5615950982001', 'id': '6346069778112',
'ext': 'mp4', 'ext': 'mp4',
'title': r're:^le direct BFMTV WEB \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', 'title': r're:^Le Live BFM TV \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'uploader_id': '876450610001', 'uploader_id': '876450610001',
'upload_date': '20220926', 'upload_date': '20240202',
'timestamp': 1664207191, 'timestamp': 1706887572,
'live_status': 'is_live', 'live_status': 'is_live',
'thumbnail': r're:https://.+/image\.jpg', 'thumbnail': r're:https://.+/image\.jpg',
'tags': [], 'tags': [],
@ -69,6 +86,15 @@ class BFMTVLiveIE(BFMTVIE): # XXX: Do not subclass from concrete IE
'only_matching': True, 'only_matching': True,
}] }]
def _real_extract(self, url):
bfmtv_id = self._match_id(url)
webpage = self._download_webpage(url, bfmtv_id)
video = self._extract_video(self._search_regex(
self._VIDEO_BLOCK_REGEX, webpage, 'video block'))
if not video:
raise ExtractorError('Failed to extract video')
return video
class BFMTVArticleIE(BFMTVBaseIE): class BFMTVArticleIE(BFMTVBaseIE):
IE_NAME = 'bfmtv:article' IE_NAME = 'bfmtv:article'
@ -102,18 +128,16 @@ class BFMTVArticleIE(BFMTVBaseIE):
}, },
}] }]
def _entries(self, webpage):
for video_block_el in re.findall(self._VIDEO_BLOCK_REGEX, webpage):
video = self._extract_video(video_block_el)
if video:
yield video
def _real_extract(self, url): def _real_extract(self, url):
bfmtv_id = self._match_id(url) bfmtv_id = self._match_id(url)
webpage = self._download_webpage(url, bfmtv_id) webpage = self._download_webpage(url, bfmtv_id)
entries = []
for video_block_el in re.findall(self._VIDEO_BLOCK_REGEX, webpage):
video_block = extract_attributes(video_block_el)
video_id = video_block.get('videoid')
if not video_id:
continue
entries.append(self._brightcove_url_result(video_id, video_block))
return self.playlist_result( return self.playlist_result(
entries, bfmtv_id, self._og_search_title(webpage, fatal=False), self._entries(webpage), bfmtv_id, self._og_search_title(webpage, fatal=False),
self._html_search_meta(['og:description', 'description'], webpage)) self._html_search_meta(['og:description', 'description'], webpage))

View file

@ -1,4 +1,3 @@
import functools
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -50,7 +49,7 @@ class BibelTVBaseIE(InfoExtractor):
**traverse_obj(data, { **traverse_obj(data, {
'title': 'title', 'title': 'title',
'description': 'description', 'description': 'description',
'duration': ('duration', {functools.partial(int_or_none, scale=1000)}), 'duration': ('duration', {int_or_none(scale=1000)}),
'timestamp': ('schedulingStart', {parse_iso8601}), 'timestamp': ('schedulingStart', {parse_iso8601}),
'season_number': 'seasonNumber', 'season_number': 'seasonNumber',
'episode_number': 'episodeNumber', 'episode_number': 'episodeNumber',

View file

@ -109,7 +109,7 @@ class BilibiliBaseIE(InfoExtractor):
fragments = traverse_obj(play_info, ('durl', lambda _, v: url_or_none(v['url']), { fragments = traverse_obj(play_info, ('durl', lambda _, v: url_or_none(v['url']), {
'url': ('url', {url_or_none}), 'url': ('url', {url_or_none}),
'duration': ('length', {functools.partial(float_or_none, scale=1000)}), 'duration': ('length', {float_or_none(scale=1000)}),
'filesize': ('size', {int_or_none}), 'filesize': ('size', {int_or_none}),
})) }))
if fragments: if fragments:
@ -124,7 +124,7 @@ class BilibiliBaseIE(InfoExtractor):
'quality': ('quality', {int_or_none}), 'quality': ('quality', {int_or_none}),
'format_id': ('quality', {str_or_none}), 'format_id': ('quality', {str_or_none}),
'format_note': ('quality', {lambda x: format_names.get(x)}), 'format_note': ('quality', {lambda x: format_names.get(x)}),
'duration': ('timelength', {functools.partial(float_or_none, scale=1000)}), 'duration': ('timelength', {float_or_none(scale=1000)}),
}), }),
**parse_resolution(format_names.get(play_info.get('quality'))), **parse_resolution(format_names.get(play_info.get('quality'))),
}) })
@ -1585,7 +1585,7 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
'title': ('title', {str}), 'title': ('title', {str}),
'uploader': ('upper', 'name', {str}), 'uploader': ('upper', 'name', {str}),
'uploader_id': ('upper', 'mid', {str_or_none}), 'uploader_id': ('upper', 'mid', {str_or_none}),
'timestamp': ('ctime', {int_or_none}, {lambda x: x or None}), 'timestamp': ('ctime', {int_or_none}, filter),
'thumbnail': ('cover', {url_or_none}), 'thumbnail': ('cover', {url_or_none}),
})), })),
} }

388
yt_dlp/extractor/bluesky.py Normal file
View file

@ -0,0 +1,388 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
format_field,
int_or_none,
mimetype2ext,
orderedSet,
parse_iso8601,
truncate_string,
update_url_query,
url_basename,
url_or_none,
variadic,
)
from ..utils.traversal import traverse_obj
class BlueskyIE(InfoExtractor):
_VALID_URL = [
r'https?://(?:www\.)?(?:bsky\.app|main\.bsky\.dev)/profile/(?P<handle>[\w.:%-]+)/post/(?P<id>\w+)',
r'at://(?P<handle>[\w.:%-]+)/app\.bsky\.feed\.post/(?P<id>\w+)',
]
_TESTS = [{
'url': 'https://bsky.app/profile/blu3blue.bsky.social/post/3l4omssdl632g',
'md5': '375539c1930ab05d15585ed772ab54fd',
'info_dict': {
'id': '3l4omssdl632g',
'ext': 'mp4',
'uploader': 'Blu3Blu3Lilith',
'uploader_id': 'blu3blue.bsky.social',
'uploader_url': 'https://bsky.app/profile/blu3blue.bsky.social',
'channel_id': 'did:plc:pzdr5ylumf7vmvwasrpr5bf2',
'channel_url': 'https://bsky.app/profile/did:plc:pzdr5ylumf7vmvwasrpr5bf2',
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
'title': 'OMG WE HAVE VIDEOS NOW',
'description': 'OMG WE HAVE VIDEOS NOW',
'upload_date': '20240921',
'timestamp': 1726940605,
'like_count': int,
'repost_count': int,
'comment_count': int,
'tags': [],
},
}, {
'url': 'https://bsky.app/profile/bsky.app/post/3l3vgf77uco2g',
'md5': 'b9e344fdbce9f2852c668a97efefb105',
'info_dict': {
'id': '3l3vgf77uco2g',
'ext': 'mp4',
'uploader': 'Bluesky',
'uploader_id': 'bsky.app',
'uploader_url': 'https://bsky.app/profile/bsky.app',
'channel_id': 'did:plc:z72i7hdynmk6r22z27h6tvur',
'channel_url': 'https://bsky.app/profile/did:plc:z72i7hdynmk6r22z27h6tvur',
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
'title': 'Bluesky now has video! Update your app to versi...',
'alt_title': 'Bluesky video feature announcement',
'description': r're:(?s)Bluesky now has video! .{239}',
'upload_date': '20240911',
'timestamp': 1726074716,
'like_count': int,
'repost_count': int,
'comment_count': int,
'tags': [],
'subtitles': {
'en': 'mincount:1',
},
},
}, {
'url': 'https://main.bsky.dev/profile/souris.moe/post/3l4qhp7bcs52c',
'md5': '5f2df8c200b5633eb7fb2c984d29772f',
'info_dict': {
'id': '3l4qhp7bcs52c',
'ext': 'mp4',
'uploader': 'souris',
'uploader_id': 'souris.moe',
'uploader_url': 'https://bsky.app/profile/souris.moe',
'channel_id': 'did:plc:tj7g244gl5v6ai6cm4f4wlqp',
'channel_url': 'https://bsky.app/profile/did:plc:tj7g244gl5v6ai6cm4f4wlqp',
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
'title': 'Bluesky video #3l4qhp7bcs52c',
'upload_date': '20240922',
'timestamp': 1727003838,
'like_count': int,
'repost_count': int,
'comment_count': int,
'tags': [],
},
}, {
'url': 'https://bsky.app/profile/de1.pds.tentacle.expert/post/3l3w4tnezek2e',
'md5': '1af9c7fda061cf7593bbffca89e43d1c',
'info_dict': {
'id': '3l3w4tnezek2e',
'ext': 'mp4',
'uploader': 'clean',
'uploader_id': 'de1.pds.tentacle.expert',
'uploader_url': 'https://bsky.app/profile/de1.pds.tentacle.expert',
'channel_id': 'did:web:de1.tentacle.expert',
'channel_url': 'https://bsky.app/profile/did:web:de1.tentacle.expert',
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
'title': 'Bluesky video #3l3w4tnezek2e',
'upload_date': '20240911',
'timestamp': 1726098823,
'like_count': int,
'repost_count': int,
'comment_count': int,
'tags': [],
},
}, {
'url': 'https://bsky.app/profile/yunayuispink.bsky.social/post/3l7gqcfes742o',
'info_dict': {
'id': 'XxK3t_5V3ao',
'ext': 'mp4',
'uploader': 'yunayu',
'uploader_id': '@yunayuispink',
'uploader_url': 'https://www.youtube.com/@yunayuispink',
'channel': 'yunayu',
'channel_id': 'UCPLvXnHa7lTyNoR_dGsU14w',
'channel_url': 'https://www.youtube.com/channel/UCPLvXnHa7lTyNoR_dGsU14w',
'thumbnail': 'https://i.ytimg.com/vi_webp/XxK3t_5V3ao/maxresdefault.webp',
'description': r're:Have a good goodx10000day',
'title': '5min vs 5hours drawing',
'availability': 'public',
'live_status': 'not_live',
'playable_in_embed': True,
'upload_date': '20241026',
'timestamp': 1729967784,
'duration': 321,
'age_limit': 0,
'like_count': int,
'view_count': int,
'comment_count': int,
'channel_follower_count': int,
'categories': ['Entertainment'],
'tags': [],
},
'add_ie': ['Youtube'],
}, {
'url': 'https://bsky.app/profile/endshark.bsky.social/post/3jzxjkcemae2m',
'info_dict': {
'id': '222792849',
'ext': 'mp3',
'uploader': 'LASERBAT',
'uploader_id': 'laserbatx',
'uploader_url': 'https://laserbatx.bandcamp.com',
'artists': ['LASERBAT'],
'album_artists': ['LASERBAT'],
'album': 'Hari Nezumi [EP]',
'track': 'Forward to the End',
'title': 'LASERBAT - Forward to the End',
'thumbnail': 'https://f4.bcbits.com/img/a2507705510_5.jpg',
'duration': 228.571,
'track_id': '222792849',
'release_date': '20230423',
'upload_date': '20230423',
'timestamp': 1682276040.0,
'release_timestamp': 1682276040.0,
'track_number': 1,
},
'add_ie': ['Bandcamp'],
}, {
'url': 'https://bsky.app/profile/dannybhoix.bsky.social/post/3l6oe5mtr2c2j',
'md5': 'b9e344fdbce9f2852c668a97efefb105',
'info_dict': {
'id': '3l3vgf77uco2g',
'ext': 'mp4',
'uploader': 'Bluesky',
'uploader_id': 'bsky.app',
'uploader_url': 'https://bsky.app/profile/bsky.app',
'channel_id': 'did:plc:z72i7hdynmk6r22z27h6tvur',
'channel_url': 'https://bsky.app/profile/did:plc:z72i7hdynmk6r22z27h6tvur',
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
'title': 'Bluesky now has video! Update your app to versi...',
'alt_title': 'Bluesky video feature announcement',
'description': r're:(?s)Bluesky now has video! .{239}',
'upload_date': '20240911',
'timestamp': 1726074716,
'like_count': int,
'repost_count': int,
'comment_count': int,
'tags': [],
'subtitles': {
'en': 'mincount:1',
},
},
}, {
'url': 'https://bsky.app/profile/alt.bun.how/post/3l7rdfxhyds2f',
'md5': '8775118b235cf9fa6b5ad30f95cda75c',
'info_dict': {
'id': '3l7rdfxhyds2f',
'ext': 'mp4',
'uploader': 'cinnamon',
'uploader_id': 'alt.bun.how',
'uploader_url': 'https://bsky.app/profile/alt.bun.how',
'channel_id': 'did:plc:7x6rtuenkuvxq3zsvffp2ide',
'channel_url': 'https://bsky.app/profile/did:plc:7x6rtuenkuvxq3zsvffp2ide',
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
'title': 'crazy that i look like this tbh',
'description': 'crazy that i look like this tbh',
'upload_date': '20241030',
'timestamp': 1730332128,
'like_count': int,
'repost_count': int,
'comment_count': int,
'tags': ['sexual'],
'age_limit': 18,
},
}, {
'url': 'at://did:plc:ia76kvnndjutgedggx2ibrem/app.bsky.feed.post/3l6zrz6zyl2dr',
'md5': '71b0eb6d85d03145e6af6642c7fc6d78',
'info_dict': {
'id': '3l6zrz6zyl2dr',
'ext': 'mp4',
'uploader': 'mary🐇',
'uploader_id': 'mary.my.id',
'uploader_url': 'https://bsky.app/profile/mary.my.id',
'channel_id': 'did:plc:ia76kvnndjutgedggx2ibrem',
'channel_url': 'https://bsky.app/profile/did:plc:ia76kvnndjutgedggx2ibrem',
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
'title': 'Bluesky video #3l6zrz6zyl2dr',
'upload_date': '20241021',
'timestamp': 1729523172,
'like_count': int,
'repost_count': int,
'comment_count': int,
'tags': [],
},
}, {
'url': 'https://bsky.app/profile/purpleicetea.bsky.social/post/3l7gv55dc2o2w',
'info_dict': {
'id': '3l7gv55dc2o2w',
},
'playlist': [{
'info_dict': {
'id': '3l7gv55dc2o2w',
'ext': 'mp4',
'upload_date': '20241026',
'description': 'One of my favorite videos',
'comment_count': int,
'uploader_url': 'https://bsky.app/profile/purpleicetea.bsky.social',
'uploader': 'Purple.Ice.Tea',
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
'channel_url': 'https://bsky.app/profile/did:plc:bjh5ffwya5f53dfy47dezuwx',
'like_count': int,
'channel_id': 'did:plc:bjh5ffwya5f53dfy47dezuwx',
'repost_count': int,
'timestamp': 1729973202,
'tags': [],
'uploader_id': 'purpleicetea.bsky.social',
'title': 'One of my favorite videos',
},
}, {
'info_dict': {
'id': '3l77u64l7le2e',
'ext': 'mp4',
'title': 'hearing people on twitter say that bluesky isn\'...',
'like_count': int,
'uploader_id': 'thafnine.net',
'uploader_url': 'https://bsky.app/profile/thafnine.net',
'upload_date': '20241024',
'channel_url': 'https://bsky.app/profile/did:plc:6ttyq36rhiyed7wu3ws7dmqj',
'description': r're:(?s)hearing people on twitter say that bluesky .{93}',
'tags': [],
'alt_title': 'md5:9b1ee1937fb3d1a81e932f9ec14d560e',
'uploader': 'T9',
'channel_id': 'did:plc:6ttyq36rhiyed7wu3ws7dmqj',
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
'timestamp': 1729731642,
'comment_count': int,
'repost_count': int,
},
}],
}]
_BLOB_URL_TMPL = '{}/xrpc/com.atproto.sync.getBlob'
def _get_service_endpoint(self, did, video_id):
if did.startswith('did:web:'):
url = f'https://{did[8:]}/.well-known/did.json'
else:
url = f'https://plc.directory/{did}'
services = self._download_json(
url, video_id, 'Fetching service endpoint', 'Falling back to bsky.social', fatal=False)
return traverse_obj(
services, ('service', lambda _, x: x['type'] == 'AtprotoPersonalDataServer',
'serviceEndpoint', {url_or_none}, any)) or 'https://bsky.social'
def _real_extract(self, url):
handle, video_id = self._match_valid_url(url).group('handle', 'id')
post = self._download_json(
'https://public.api.bsky.app/xrpc/app.bsky.feed.getPostThread',
video_id, query={
'uri': f'at://{handle}/app.bsky.feed.post/{video_id}',
'depth': 0,
'parentHeight': 0,
})['thread']['post']
entries = []
# app.bsky.embed.video.view/app.bsky.embed.external.view
entries.extend(self._extract_videos(post, video_id))
# app.bsky.embed.recordWithMedia.view
entries.extend(self._extract_videos(
post, video_id, embed_path=('embed', 'media'), record_subpath=('embed', 'media')))
# app.bsky.embed.record.view
if nested_post := traverse_obj(post, ('embed', 'record', ('record', None), {dict}, any)):
entries.extend(self._extract_videos(
nested_post, video_id, embed_path=('embeds', 0), record_path='value'))
if not entries:
raise ExtractorError('No video could be found in this post', expected=True)
if len(entries) == 1:
return entries[0]
return self.playlist_result(entries, video_id)
@staticmethod
def _build_profile_url(path):
return format_field(path, None, 'https://bsky.app/profile/%s', default=None)
def _extract_videos(self, root, video_id, embed_path='embed', record_path='record', record_subpath='embed'):
embed_path = variadic(embed_path, (str, bytes, dict, set))
record_path = variadic(record_path, (str, bytes, dict, set))
record_subpath = variadic(record_subpath, (str, bytes, dict, set))
entries = []
if external_uri := traverse_obj(root, (
((*record_path, *record_subpath), embed_path), 'external', 'uri', {url_or_none}, any)):
entries.append(self.url_result(external_uri))
if playlist := traverse_obj(root, (*embed_path, 'playlist', {url_or_none})):
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
playlist, video_id, 'mp4', m3u8_id='hls', fatal=False)
else:
return entries
video_cid = traverse_obj(
root, (*embed_path, 'cid', {str}),
(*record_path, *record_subpath, 'video', 'ref', '$link', {str}))
did = traverse_obj(root, ('author', 'did', {str}))
if did and video_cid:
endpoint = self._get_service_endpoint(did, video_id)
formats.append({
'format_id': 'blob',
'url': update_url_query(
self._BLOB_URL_TMPL.format(endpoint), {'did': did, 'cid': video_cid}),
**traverse_obj(root, (*embed_path, 'aspectRatio', {
'width': ('width', {int_or_none}),
'height': ('height', {int_or_none}),
})),
**traverse_obj(root, (*record_path, *record_subpath, 'video', {
'filesize': ('size', {int_or_none}),
'ext': ('mimeType', {mimetype2ext}),
})),
})
for sub_data in traverse_obj(root, (
*record_path, *record_subpath, 'captions', lambda _, v: v['file']['ref']['$link'])):
subtitles.setdefault(sub_data.get('lang') or 'und', []).append({
'url': update_url_query(
self._BLOB_URL_TMPL.format(endpoint), {'did': did, 'cid': sub_data['file']['ref']['$link']}),
'ext': traverse_obj(sub_data, ('file', 'mimeType', {mimetype2ext})),
})
entries.append({
'id': video_id,
'formats': formats,
'subtitles': subtitles,
**traverse_obj(root, {
'id': ('uri', {url_basename}),
'thumbnail': (*embed_path, 'thumbnail', {url_or_none}),
'alt_title': (*embed_path, 'alt', {str}, filter),
'uploader': ('author', 'displayName', {str}),
'uploader_id': ('author', 'handle', {str}),
'uploader_url': ('author', 'handle', {self._build_profile_url}),
'channel_id': ('author', 'did', {str}),
'channel_url': ('author', 'did', {self._build_profile_url}),
'like_count': ('likeCount', {int_or_none}),
'repost_count': ('repostCount', {int_or_none}),
'comment_count': ('replyCount', {int_or_none}),
'timestamp': ('indexedAt', {parse_iso8601}),
'tags': ('labels', ..., 'val', {str}, all, {orderedSet}),
'age_limit': (
'labels', ..., 'val', {lambda x: 18 if x in ('sexual', 'porn', 'graphic-media') else None}, any),
'description': (*record_path, 'text', {str}, filter),
'title': (*record_path, 'text', {lambda x: x.replace('\n', ' ')}, {truncate_string(left=50)}),
}),
})
return entries

View file

@ -1,35 +1,20 @@
import functools
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html, clean_html,
extract_attributes, extract_attributes,
get_element_text_and_html_by_tag,
get_elements_by_class,
join_nonempty, join_nonempty,
js_to_json, js_to_json,
mimetype2ext, mimetype2ext,
unified_strdate, unified_strdate,
url_or_none, url_or_none,
urljoin, urljoin,
variadic,
) )
from ..utils.traversal import traverse_obj from ..utils.traversal import (
find_element,
traverse_obj,
def html_get_element(tag=None, cls=None): )
assert tag or cls, 'One of tag or class is required'
if cls:
func = functools.partial(get_elements_by_class, cls, tag=tag)
else:
func = functools.partial(get_element_text_and_html_by_tag, tag)
def html_get_element_wrapper(html):
return variadic(func(html))[0]
return html_get_element_wrapper
class BpbIE(InfoExtractor): class BpbIE(InfoExtractor):
@ -41,12 +26,12 @@ class BpbIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '297', 'id': '297',
'ext': 'mp4', 'ext': 'mp4',
'creator': 'Kooperative Berlin', 'creators': ['Kooperative Berlin'],
'description': 'md5:f4f75885ba009d3e2b156247a8941ce6', 'description': r're:Joachim Gauck, .*\n\nKamera: .*',
'release_date': '20160115', 'release_date': '20150716',
'series': 'Interview auf dem Geschichtsforum 1989 | 2009', 'series': 'Interview auf dem Geschichtsforum 1989 | 2009',
'tags': ['Friedliche Revolution', 'Erinnerungskultur', 'Vergangenheitspolitik', 'DDR 1949 - 1990', 'Freiheitsrecht', 'BStU', 'Deutschland'], 'tags': [],
'thumbnail': 'https://www.bpb.de/cache/images/7/297_teaser_16x9_1240.jpg?8839D', 'thumbnail': r're:https?://www\.bpb\.de/cache/images/7/297_teaser_16x9_1240\.jpg.*',
'title': 'Joachim Gauck zu 1989 und die Erinnerung an die DDR', 'title': 'Joachim Gauck zu 1989 und die Erinnerung an die DDR',
'uploader': 'Bundeszentrale für politische Bildung', 'uploader': 'Bundeszentrale für politische Bildung',
}, },
@ -55,11 +40,12 @@ class BpbIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '522184', 'id': '522184',
'ext': 'mp4', 'ext': 'mp4',
'creator': 'Institute for Strategic Dialogue Germany gGmbH (ISD)', 'creators': ['Institute for Strategic Dialogue Germany gGmbH (ISD)'],
'description': 'md5:f83c795ff8f825a69456a9e51fc15903', 'description': 'md5:f83c795ff8f825a69456a9e51fc15903',
'release_date': '20230621', 'release_date': '20230621',
'tags': ['Desinformation', 'Ukraine', 'Russland', 'Geflüchtete'], 'series': 'Narrative über den Krieg Russlands gegen die Ukraine (NUK)',
'thumbnail': 'https://www.bpb.de/cache/images/4/522184_teaser_16x9_1240.png?EABFB', 'tags': [],
'thumbnail': r're:https://www\.bpb\.de/cache/images/4/522184_teaser_16x9_1240\.png.*',
'title': 'md5:9b01ccdbf58dbf9e5c9f6e771a803b1c', 'title': 'md5:9b01ccdbf58dbf9e5c9f6e771a803b1c',
'uploader': 'Bundeszentrale für politische Bildung', 'uploader': 'Bundeszentrale für politische Bildung',
}, },
@ -68,11 +54,12 @@ class BpbIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '518789', 'id': '518789',
'ext': 'mp4', 'ext': 'mp4',
'creator': 'Institute for Strategic Dialogue Germany gGmbH (ISD)', 'creators': ['Institute for Strategic Dialogue Germany gGmbH (ISD)'],
'description': 'md5:85228aed433e84ff0ff9bc582abd4ea8', 'description': 'md5:85228aed433e84ff0ff9bc582abd4ea8',
'release_date': '20230302', 'release_date': '20230302',
'tags': ['Desinformation', 'Ukraine', 'Russland', 'Geflüchtete'], 'series': 'Narrative über den Krieg Russlands gegen die Ukraine (NUK)',
'thumbnail': 'https://www.bpb.de/cache/images/9/518789_teaser_16x9_1240.jpeg?56D0D', 'tags': [],
'thumbnail': r're:https://www\.bpb\.de/cache/images/9/518789_teaser_16x9_1240\.jpeg.*',
'title': 'md5:3e956f264bb501f6383f10495a401da4', 'title': 'md5:3e956f264bb501f6383f10495a401da4',
'uploader': 'Bundeszentrale für politische Bildung', 'uploader': 'Bundeszentrale für politische Bildung',
}, },
@ -84,12 +71,12 @@ class BpbIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '315813', 'id': '315813',
'ext': 'mp3', 'ext': 'mp3',
'creator': 'Axel Schröder', 'creators': ['Axel Schröder'],
'description': 'md5:eda9d1af34e5912efef5baf54fba4427', 'description': 'md5:eda9d1af34e5912efef5baf54fba4427',
'release_date': '20200921', 'release_date': '20200921',
'series': 'Auf Endlagersuche. Der deutsche Weg zu einem sicheren Atommülllager', 'series': 'Auf Endlagersuche. Der deutsche Weg zu einem sicheren Atommülllager',
'tags': ['Atomenergie', 'Endlager', 'hoch-radioaktiver Abfall', 'Endlagersuche', 'Atommüll', 'Atomendlager', 'Gorleben', 'Deutschland'], 'tags': ['Atomenergie', 'Endlager', 'hoch-radioaktiver Abfall', 'Endlagersuche', 'Atommüll', 'Atomendlager', 'Gorleben', 'Deutschland'],
'thumbnail': 'https://www.bpb.de/cache/images/3/315813_teaser_16x9_1240.png?92A94', 'thumbnail': r're:https://www\.bpb\.de/cache/images/3/315813_teaser_16x9_1240\.png.*',
'title': 'Folge 1: Eine Einführung', 'title': 'Folge 1: Eine Einführung',
'uploader': 'Bundeszentrale für politische Bildung', 'uploader': 'Bundeszentrale für politische Bildung',
}, },
@ -98,12 +85,12 @@ class BpbIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '517806', 'id': '517806',
'ext': 'mp3', 'ext': 'mp3',
'creator': 'Bundeszentrale für politische Bildung', 'creators': ['Bundeszentrale für politische Bildung'],
'description': 'md5:594689600e919912aade0b2871cc3fed', 'description': 'md5:594689600e919912aade0b2871cc3fed',
'release_date': '20230127', 'release_date': '20230127',
'series': 'Vorträge des Fachtags "Modernisierer. Grenzgänger. Anstifter. Sechs Jahrzehnte \'Neue Rechte\'"', 'series': 'Vorträge des Fachtags "Modernisierer. Grenzgänger. Anstifter. Sechs Jahrzehnte \'Neue Rechte\'"',
'tags': ['Rechtsextremismus', 'Konservatismus', 'Konservativismus', 'neue Rechte', 'Rechtspopulismus', 'Schnellroda', 'Deutschland'], 'tags': ['Rechtsextremismus', 'Konservatismus', 'Konservativismus', 'neue Rechte', 'Rechtspopulismus', 'Schnellroda', 'Deutschland'],
'thumbnail': 'https://www.bpb.de/cache/images/6/517806_teaser_16x9_1240.png?7A7A0', 'thumbnail': r're:https://www\.bpb\.de/cache/images/6/517806_teaser_16x9_1240\.png.*',
'title': 'Die Weltanschauung der "Neuen Rechten"', 'title': 'Die Weltanschauung der "Neuen Rechten"',
'uploader': 'Bundeszentrale für politische Bildung', 'uploader': 'Bundeszentrale für politische Bildung',
}, },
@ -147,7 +134,7 @@ class BpbIE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title_result = traverse_obj(webpage, ({html_get_element(cls='opening-header__title')}, {self._TITLE_RE.match})) title_result = traverse_obj(webpage, ({find_element(cls='opening-header__title')}, {self._TITLE_RE.match}))
json_lds = list(self._yield_json_ld(webpage, video_id, fatal=False)) json_lds = list(self._yield_json_ld(webpage, video_id, fatal=False))
return { return {
@ -156,15 +143,15 @@ class BpbIE(InfoExtractor):
# This metadata could be interpreted otherwise, but it fits "series" the most # This metadata could be interpreted otherwise, but it fits "series" the most
'series': traverse_obj(title_result, ('series', {str.strip})) or None, 'series': traverse_obj(title_result, ('series', {str.strip})) or None,
'description': join_nonempty(*traverse_obj(webpage, [( 'description': join_nonempty(*traverse_obj(webpage, [(
{html_get_element(cls='opening-intro')}, {find_element(cls='opening-intro')},
[{html_get_element(tag='bpb-accordion-item')}, {html_get_element(cls='text-content')}], [{find_element(tag='bpb-accordion-item')}, {find_element(cls='text-content')}],
), {clean_html}]), delim='\n\n') or None, ), {clean_html}]), delim='\n\n') or None,
'creator': self._html_search_meta('author', webpage), 'creators': traverse_obj(self._html_search_meta('author', webpage), all),
'uploader': self._html_search_meta('publisher', webpage), 'uploader': self._html_search_meta('publisher', webpage),
'release_date': unified_strdate(self._html_search_meta('date', webpage)), 'release_date': unified_strdate(self._html_search_meta('date', webpage)),
'tags': traverse_obj(json_lds, (..., 'keywords', {lambda x: x.split(',')}, ...)), 'tags': traverse_obj(json_lds, (..., 'keywords', {lambda x: x.split(',')}, ...)),
**traverse_obj(self._parse_vue_attributes('bpb-player', webpage, video_id), { **traverse_obj(self._parse_vue_attributes('bpb-player', webpage, video_id), {
'formats': (':sources', ..., {self._process_source}), 'formats': (':sources', ..., {self._process_source}),
'thumbnail': ('poster', {lambda x: urljoin(url, x)}), 'thumbnail': ('poster', {urljoin(url)}),
}), }),
} }

View file

@ -145,10 +145,9 @@ class BravoTVIE(AdobePassIE):
tp_metadata = self._download_json( tp_metadata = self._download_json(
update_url_query(tp_url, {'format': 'preview'}), video_id, fatal=False) update_url_query(tp_url, {'format': 'preview'}), video_id, fatal=False)
seconds_or_none = lambda x: float_or_none(x, 1000)
chapters = traverse_obj(tp_metadata, ('chapters', ..., { chapters = traverse_obj(tp_metadata, ('chapters', ..., {
'start_time': ('startTime', {seconds_or_none}), 'start_time': ('startTime', {float_or_none(scale=1000)}),
'end_time': ('endTime', {seconds_or_none}), 'end_time': ('endTime', {float_or_none(scale=1000)}),
})) }))
# prune pointless single chapters that span the entire duration from short videos # prune pointless single chapters that span the entire duration from short videos
if len(chapters) == 1 and not traverse_obj(chapters, (0, 'end_time')): if len(chapters) == 1 and not traverse_obj(chapters, (0, 'end_time')):
@ -168,8 +167,8 @@ class BravoTVIE(AdobePassIE):
**merge_dicts(traverse_obj(tp_metadata, { **merge_dicts(traverse_obj(tp_metadata, {
'title': 'title', 'title': 'title',
'description': 'description', 'description': 'description',
'duration': ('duration', {seconds_or_none}), 'duration': ('duration', {float_or_none(scale=1000)}),
'timestamp': ('pubDate', {seconds_or_none}), 'timestamp': ('pubDate', {float_or_none(scale=1000)}),
'season_number': (('pl1$seasonNumber', 'nbcu$seasonNumber'), {int_or_none}), 'season_number': (('pl1$seasonNumber', 'nbcu$seasonNumber'), {int_or_none}),
'episode_number': (('pl1$episodeNumber', 'nbcu$episodeNumber'), {int_or_none}), 'episode_number': (('pl1$episodeNumber', 'nbcu$episodeNumber'), {int_or_none}),
'series': (('pl1$show', 'nbcu$show'), (None, ...), {str}), 'series': (('pl1$show', 'nbcu$show'), (None, ...), {str}),

View file

@ -8,11 +8,13 @@ from ..utils import (
bug_reports_message, bug_reports_message,
clean_html, clean_html,
format_field, format_field,
get_element_text_and_html_by_tag,
int_or_none, int_or_none,
url_or_none, url_or_none,
) )
from ..utils.traversal import traverse_obj from ..utils.traversal import (
find_element,
traverse_obj,
)
class BundestagIE(InfoExtractor): class BundestagIE(InfoExtractor):
@ -115,9 +117,8 @@ class BundestagIE(InfoExtractor):
note='Downloading metadata overlay', fatal=False, note='Downloading metadata overlay', fatal=False,
), { ), {
'title': ( 'title': (
{functools.partial(get_element_text_and_html_by_tag, 'h3')}, 0, {find_element(tag='h3')}, {functools.partial(re.sub, r'<span[^>]*>[^<]+</span>', '')}, {clean_html}),
{functools.partial(re.sub, r'<span[^>]*>[^<]+</span>', '')}, {clean_html}), 'description': ({find_element(tag='p')}, {clean_html}),
'description': ({functools.partial(get_element_text_and_html_by_tag, 'p')}, 0, {clean_html}),
})) }))
return result return result

View file

@ -53,7 +53,7 @@ class CaffeineTVIE(InfoExtractor):
'like_count': ('like_count', {int_or_none}), 'like_count': ('like_count', {int_or_none}),
'view_count': ('view_count', {int_or_none}), 'view_count': ('view_count', {int_or_none}),
'comment_count': ('comment_count', {int_or_none}), 'comment_count': ('comment_count', {int_or_none}),
'tags': ('tags', ..., {str}, {lambda x: x or None}), 'tags': ('tags', ..., {str}, filter),
'uploader': ('user', 'name', {str}), 'uploader': ('user', 'name', {str}),
'uploader_id': (((None, 'user'), 'username'), {str}, any), 'uploader_id': (((None, 'user'), 'username'), {str}, any),
'is_live': ('is_live', {bool}), 'is_live': ('is_live', {bool}),
@ -62,7 +62,7 @@ class CaffeineTVIE(InfoExtractor):
'title': ('broadcast_title', {str}), 'title': ('broadcast_title', {str}),
'duration': ('content_duration', {int_or_none}), 'duration': ('content_duration', {int_or_none}),
'timestamp': ('broadcast_start_time', {parse_iso8601}), 'timestamp': ('broadcast_start_time', {parse_iso8601}),
'thumbnail': ('preview_image_path', {lambda x: urljoin(url, x)}), 'thumbnail': ('preview_image_path', {urljoin(url)}),
}), }),
'age_limit': { 'age_limit': {
# assume Apple Store ratings: https://en.wikipedia.org/wiki/Mobile_software_content_rating_system # assume Apple Store ratings: https://en.wikipedia.org/wiki/Mobile_software_content_rating_system

View file

@ -453,8 +453,8 @@ class CBCPlayerIE(InfoExtractor):
chapters = traverse_obj(data, ( chapters = traverse_obj(data, (
'media', 'chapters', lambda _, v: float(v['startTime']) is not None, { 'media', 'chapters', lambda _, v: float(v['startTime']) is not None, {
'start_time': ('startTime', {functools.partial(float_or_none, scale=1000)}), 'start_time': ('startTime', {float_or_none(scale=1000)}),
'end_time': ('endTime', {functools.partial(float_or_none, scale=1000)}), 'end_time': ('endTime', {float_or_none(scale=1000)}),
'title': ('name', {str}), 'title': ('name', {str}),
})) }))
# Filter out pointless single chapters with start_time==0 and no end_time # Filter out pointless single chapters with start_time==0 and no end_time
@ -465,8 +465,8 @@ class CBCPlayerIE(InfoExtractor):
**traverse_obj(data, { **traverse_obj(data, {
'title': ('title', {str}), 'title': ('title', {str}),
'description': ('description', {str.strip}), 'description': ('description', {str.strip}),
'thumbnail': ('image', 'url', {url_or_none}, {functools.partial(update_url, query=None)}), 'thumbnail': ('image', 'url', {url_or_none}, {update_url(query=None)}),
'timestamp': ('publishedAt', {functools.partial(float_or_none, scale=1000)}), 'timestamp': ('publishedAt', {float_or_none(scale=1000)}),
'media_type': ('media', 'clipType', {str}), 'media_type': ('media', 'clipType', {str}),
'series': ('showName', {str}), 'series': ('showName', {str}),
'season_number': ('media', 'season', {int_or_none}), 'season_number': ('media', 'season', {int_or_none}),

View file

@ -96,7 +96,7 @@ class CBSNewsBaseIE(InfoExtractor):
**traverse_obj(item, { **traverse_obj(item, {
'title': (None, ('fulltitle', 'title')), 'title': (None, ('fulltitle', 'title')),
'description': 'dek', 'description': 'dek',
'timestamp': ('timestamp', {lambda x: float_or_none(x, 1000)}), 'timestamp': ('timestamp', {float_or_none(scale=1000)}),
'duration': ('duration', {float_or_none}), 'duration': ('duration', {float_or_none}),
'subtitles': ('captions', {get_subtitles}), 'subtitles': ('captions', {get_subtitles}),
'thumbnail': ('images', ('hd', 'sd'), {url_or_none}), 'thumbnail': ('images', ('hd', 'sd'), {url_or_none}),

View file

@ -5,11 +5,12 @@ from ..utils import (
ExtractorError, ExtractorError,
lowercase_escape, lowercase_escape,
url_or_none, url_or_none,
urlencode_postdata,
) )
class ChaturbateIE(InfoExtractor): class ChaturbateIE(InfoExtractor):
_VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.com/(?:fullvideo/?\?.*?\bb=)?(?P<id>[^/?&#]+)' _VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.(?P<tld>com|eu|global)/(?:fullvideo/?\?.*?\bb=)?(?P<id>[^/?&#]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.chaturbate.com/siswet19/', 'url': 'https://www.chaturbate.com/siswet19/',
'info_dict': { 'info_dict': {
@ -29,16 +30,59 @@ class ChaturbateIE(InfoExtractor):
}, { }, {
'url': 'https://en.chaturbate.com/siswet19/', 'url': 'https://en.chaturbate.com/siswet19/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://chaturbate.eu/siswet19/',
'only_matching': True,
}, {
'url': 'https://chaturbate.eu/fullvideo/?b=caylin',
'only_matching': True,
}, {
'url': 'https://chaturbate.global/siswet19/',
'only_matching': True,
}] }]
_ROOM_OFFLINE = 'Room is currently offline' _ERROR_MAP = {
'offline': 'Room is currently offline',
'private': 'Room is currently in a private show',
'away': 'Performer is currently away',
'password protected': 'Room is password protected',
'hidden': 'Hidden session in progress',
}
def _real_extract(self, url): def _extract_from_api(self, video_id, tld):
video_id = self._match_id(url) response = self._download_json(
f'https://chaturbate.{tld}/get_edge_hls_url_ajax/', video_id,
data=urlencode_postdata({'room_slug': video_id}),
headers={
**self.geo_verification_headers(),
'X-Requested-With': 'XMLHttpRequest',
'Accept': 'application/json',
}, fatal=False, impersonate=True) or {}
status = response.get('room_status')
if status != 'public':
if error := self._ERROR_MAP.get(status):
raise ExtractorError(error, expected=True)
self.report_warning('Falling back to webpage extraction')
return None
m3u8_url = response.get('url')
if not m3u8_url:
self.raise_geo_restricted()
return {
'id': video_id,
'title': video_id,
'thumbnail': f'https://roomimg.stream.highwebmedia.com/ri/{video_id}.jpg',
'is_live': True,
'age_limit': 18,
'formats': self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True),
}
def _extract_from_html(self, video_id, tld):
webpage = self._download_webpage( webpage = self._download_webpage(
f'https://chaturbate.com/{video_id}/', video_id, f'https://chaturbate.{tld}/{video_id}/', video_id,
headers=self.geo_verification_headers()) headers=self.geo_verification_headers(), impersonate=True)
found_m3u8_urls = [] found_m3u8_urls = []
@ -76,8 +120,8 @@ class ChaturbateIE(InfoExtractor):
webpage, 'error', group='error', default=None) webpage, 'error', group='error', default=None)
if not error: if not error:
if any(p in webpage for p in ( if any(p in webpage for p in (
self._ROOM_OFFLINE, 'offline_tipping', 'tip_offline')): self._ERROR_MAP['offline'], 'offline_tipping', 'tip_offline')):
error = self._ROOM_OFFLINE error = self._ERROR_MAP['offline']
if error: if error:
raise ExtractorError(error, expected=True) raise ExtractorError(error, expected=True)
raise ExtractorError('Unable to find stream URL') raise ExtractorError('Unable to find stream URL')
@ -104,3 +148,7 @@ class ChaturbateIE(InfoExtractor):
'is_live': True, 'is_live': True,
'formats': formats, 'formats': formats,
} }
def _real_extract(self, url):
video_id, tld = self._match_valid_url(url).group('id', 'tld')
return self._extract_from_api(video_id, tld) or self._extract_from_html(video_id, tld)

View file

@ -1,5 +1,3 @@
import functools
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
UserNotLive, UserNotLive,
@ -77,7 +75,7 @@ class CHZZKLiveIE(InfoExtractor):
'thumbnails': thumbnails, 'thumbnails': thumbnails,
**traverse_obj(live_detail, { **traverse_obj(live_detail, {
'title': ('liveTitle', {str}), 'title': ('liveTitle', {str}),
'timestamp': ('openDate', {functools.partial(parse_iso8601, delimiter=' ')}), 'timestamp': ('openDate', {parse_iso8601(delimiter=' ')}),
'concurrent_view_count': ('concurrentUserCount', {int_or_none}), 'concurrent_view_count': ('concurrentUserCount', {int_or_none}),
'view_count': ('accumulateCount', {int_or_none}), 'view_count': ('accumulateCount', {int_or_none}),
'channel': ('channel', 'channelName', {str}), 'channel': ('channel', 'channelName', {str}),
@ -146,23 +144,37 @@ class CHZZKVideoIE(InfoExtractor):
video_meta = self._download_json( video_meta = self._download_json(
f'https://api.chzzk.naver.com/service/v3/videos/{video_id}', video_id, f'https://api.chzzk.naver.com/service/v3/videos/{video_id}', video_id,
note='Downloading video info', errnote='Unable to download video info')['content'] note='Downloading video info', errnote='Unable to download video info')['content']
formats, subtitles = self._extract_mpd_formats_and_subtitles(
f'https://apis.naver.com/neonplayer/vodplay/v1/playback/{video_meta["videoId"]}', video_id, live_status = 'was_live' if video_meta.get('liveOpenDate') else 'not_live'
query={ video_status = video_meta.get('vodStatus')
'key': video_meta['inKey'], if video_status == 'UPLOAD':
'env': 'real', playback = self._parse_json(video_meta['liveRewindPlaybackJson'], video_id)
'lc': 'en_US', formats, subtitles = self._extract_m3u8_formats_and_subtitles(
'cpl': 'en_US', playback['media'][0]['path'], video_id, 'mp4', m3u8_id='hls')
}, note='Downloading video playback', errnote='Unable to download video playback') elif video_status == 'ABR_HLS':
formats, subtitles = self._extract_mpd_formats_and_subtitles(
f'https://apis.naver.com/neonplayer/vodplay/v1/playback/{video_meta["videoId"]}',
video_id, query={
'key': video_meta['inKey'],
'env': 'real',
'lc': 'en_US',
'cpl': 'en_US',
})
else:
self.raise_no_formats(
f'Unknown video status detected: "{video_status}"', expected=True, video_id=video_id)
formats, subtitles = [], {}
live_status = 'post_live' if live_status == 'was_live' else None
return { return {
'id': video_id, 'id': video_id,
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'live_status': live_status,
**traverse_obj(video_meta, { **traverse_obj(video_meta, {
'title': ('videoTitle', {str}), 'title': ('videoTitle', {str}),
'thumbnail': ('thumbnailImageUrl', {url_or_none}), 'thumbnail': ('thumbnailImageUrl', {url_or_none}),
'timestamp': ('publishDateAt', {functools.partial(float_or_none, scale=1000)}), 'timestamp': ('publishDateAt', {float_or_none(scale=1000)}),
'view_count': ('readCount', {int_or_none}), 'view_count': ('readCount', {int_or_none}),
'duration': ('duration', {int_or_none}), 'duration': ('duration', {int_or_none}),
'channel': ('channel', 'channelName', {str}), 'channel': ('channel', 'channelName', {str}),

View file

@ -3,6 +3,7 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
filter_dict, filter_dict,
float_or_none,
int_or_none, int_or_none,
parse_age_limit, parse_age_limit,
smuggle_url, smuggle_url,
@ -85,7 +86,7 @@ class CineverseIE(CineverseBaseIE):
'title': 'title', 'title': 'title',
'id': ('details', 'item_id'), 'id': ('details', 'item_id'),
'description': ('details', 'description'), 'description': ('details', 'description'),
'duration': ('duration', {lambda x: x / 1000}), 'duration': ('duration', {float_or_none(scale=1000)}),
'cast': ('details', 'cast', {lambda x: x.split(', ')}), 'cast': ('details', 'cast', {lambda x: x.split(', ')}),
'modified_timestamp': ('details', 'updated_by', 0, 'update_time', 'time', {int_or_none}), 'modified_timestamp': ('details', 'updated_by', 0, 'update_time', 'time', {int_or_none}),
'season_number': ('details', 'season', {int_or_none}), 'season_number': ('details', 'season', {int_or_none}),

View file

@ -8,7 +8,7 @@ class CloudflareStreamIE(InfoExtractor):
_DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)' _DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
_EMBED_RE = rf'(?:embed\.|{_SUBDOMAIN_RE}){_DOMAIN_RE}/embed/[^/?#]+\.js\?(?:[^#]+&)?video=' _EMBED_RE = rf'(?:embed\.|{_SUBDOMAIN_RE}){_DOMAIN_RE}/embed/[^/?#]+\.js\?(?:[^#]+&)?video='
_ID_RE = r'[\da-f]{32}|eyJ[\w-]+\.[\w-]+\.[\w-]+' _ID_RE = r'[\da-f]{32}|eyJ[\w-]+\.[\w-]+\.[\w-]+'
_VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}{_DOMAIN_RE}/|{_EMBED_RE})(?P<id>{_ID_RE})' _VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}(?P<domain>{_DOMAIN_RE})/|{_EMBED_RE})(?P<id>{_ID_RE})'
_EMBED_REGEX = [ _EMBED_REGEX = [
rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE})(?:(?!\1).)*)\1', rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE})(?:(?!\1).)*)\1',
rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})', rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})',
@ -19,7 +19,7 @@ class CloudflareStreamIE(InfoExtractor):
'id': '31c9291ab41fac05471db4e73aa11717', 'id': '31c9291ab41fac05471db4e73aa11717',
'ext': 'mp4', 'ext': 'mp4',
'title': '31c9291ab41fac05471db4e73aa11717', 'title': '31c9291ab41fac05471db4e73aa11717',
'thumbnail': 'https://videodelivery.net/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg', 'thumbnail': 'https://cloudflarestream.com/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg',
}, },
'params': { 'params': {
'skip_download': 'm3u8', 'skip_download': 'm3u8',
@ -30,7 +30,7 @@ class CloudflareStreamIE(InfoExtractor):
'id': '0e8e040aec776862e1d632a699edf59e', 'id': '0e8e040aec776862e1d632a699edf59e',
'ext': 'mp4', 'ext': 'mp4',
'title': '0e8e040aec776862e1d632a699edf59e', 'title': '0e8e040aec776862e1d632a699edf59e',
'thumbnail': 'https://videodelivery.net/0e8e040aec776862e1d632a699edf59e/thumbnails/thumbnail.jpg', 'thumbnail': 'https://cloudflarestream.com/0e8e040aec776862e1d632a699edf59e/thumbnails/thumbnail.jpg',
}, },
}, { }, {
'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1', 'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
@ -54,7 +54,7 @@ class CloudflareStreamIE(InfoExtractor):
'id': 'eaef9dea5159cf968be84241b5cedfe7', 'id': 'eaef9dea5159cf968be84241b5cedfe7',
'ext': 'mp4', 'ext': 'mp4',
'title': 'eaef9dea5159cf968be84241b5cedfe7', 'title': 'eaef9dea5159cf968be84241b5cedfe7',
'thumbnail': 'https://videodelivery.net/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg', 'thumbnail': 'https://cloudflarestream.com/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg',
}, },
'params': { 'params': {
'skip_download': 'm3u8', 'skip_download': 'm3u8',
@ -62,8 +62,9 @@ class CloudflareStreamIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id, domain = self._match_valid_url(url).group('id', 'domain')
domain = 'bytehighway.net' if 'bytehighway.net/' in url else 'videodelivery.net' if domain != 'bytehighway.net':
domain = 'cloudflarestream.com'
base_url = f'https://{domain}/{video_id}/' base_url = f'https://{domain}/{video_id}/'
if '.' in video_id: if '.' in video_id:
video_id = self._parse_json(base64.urlsafe_b64decode( video_id = self._parse_json(base64.urlsafe_b64decode(

View file

@ -1,4 +1,3 @@
import functools
import json import json
import re import re
@ -199,7 +198,7 @@ class CNNIE(InfoExtractor):
'timestamp': ('data-publish-date', {parse_iso8601}), 'timestamp': ('data-publish-date', {parse_iso8601}),
'thumbnail': ( 'thumbnail': (
'data-poster-image-override', {json.loads}, 'big', 'uri', {url_or_none}, 'data-poster-image-override', {json.loads}, 'big', 'uri', {url_or_none},
{functools.partial(update_url, query='c=original')}), {update_url(query='c=original')}),
'display_id': 'data-video-slug', 'display_id': 'data-video-slug',
}), }),
**traverse_obj(video_data, { **traverse_obj(video_data, {

View file

@ -25,7 +25,6 @@ import xml.etree.ElementTree
from ..compat import ( from ..compat import (
compat_etree_fromstring, compat_etree_fromstring,
compat_expanduser, compat_expanduser,
compat_os_name,
urllib_req_to_req, urllib_req_to_req,
) )
from ..cookies import LenientSimpleCookie from ..cookies import LenientSimpleCookie
@ -279,6 +278,7 @@ class InfoExtractor:
thumbnails: A list of dictionaries, with the following entries: thumbnails: A list of dictionaries, with the following entries:
* "id" (optional, string) - Thumbnail format ID * "id" (optional, string) - Thumbnail format ID
* "url" * "url"
* "ext" (optional, string) - actual image extension if not given in URL
* "preference" (optional, int) - quality of the image * "preference" (optional, int) - quality of the image
* "width" (optional, int) * "width" (optional, int)
* "height" (optional, int) * "height" (optional, int)
@ -1028,7 +1028,7 @@ class InfoExtractor:
filename = sanitize_filename(f'{basen}.dump', restricted=True) filename = sanitize_filename(f'{basen}.dump', restricted=True)
# Working around MAX_PATH limitation on Windows (see # Working around MAX_PATH limitation on Windows (see
# http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx) # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
if compat_os_name == 'nt': if os.name == 'nt':
absfilepath = os.path.abspath(filename) absfilepath = os.path.abspath(filename)
if len(absfilepath) > 259: if len(absfilepath) > 259:
filename = fR'\\?\{absfilepath}' filename = fR'\\?\{absfilepath}'
@ -1409,6 +1409,13 @@ class InfoExtractor:
return None, None return None, None
self.write_debug(f'Using netrc for {netrc_machine} authentication') self.write_debug(f'Using netrc for {netrc_machine} authentication')
# compat: <=py3.10: netrc cannot parse tokens as empty strings, will return `""` instead
# Ref: https://github.com/yt-dlp/yt-dlp/issues/11413
# https://github.com/python/cpython/commit/15409c720be0503131713e3d3abc1acd0da07378
if sys.version_info < (3, 11):
return tuple(x if x != '""' else '' for x in info[::2])
return info[0], info[2] return info[0], info[2]
def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None): def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None):
@ -1571,7 +1578,9 @@ class InfoExtractor:
if default is not NO_DEFAULT: if default is not NO_DEFAULT:
fatal = False fatal = False
for mobj in re.finditer(JSON_LD_RE, html): for mobj in re.finditer(JSON_LD_RE, html):
json_ld_item = self._parse_json(mobj.group('json_ld'), video_id, fatal=fatal) json_ld_item = self._parse_json(
mobj.group('json_ld'), video_id, fatal=fatal,
errnote=False if default is not NO_DEFAULT else None)
for json_ld in variadic(json_ld_item): for json_ld in variadic(json_ld_item):
if isinstance(json_ld, dict): if isinstance(json_ld, dict):
yield json_ld yield json_ld

View file

@ -12,6 +12,7 @@ from ..utils import (
parse_iso8601, parse_iso8601,
strip_or_none, strip_or_none,
try_get, try_get,
urljoin,
) )
@ -112,8 +113,7 @@ class CondeNastIE(InfoExtractor):
m_paths = re.finditer( m_paths = re.finditer(
r'(?s)<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]', webpage) r'(?s)<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]', webpage)
paths = orderedSet(m.group(1) for m in m_paths) paths = orderedSet(m.group(1) for m in m_paths)
build_url = lambda path: urllib.parse.urljoin(base_url, path) entries = [self.url_result(urljoin(base_url, path), 'CondeNast') for path in paths]
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
return self.playlist_result(entries, playlist_title=title) return self.playlist_result(entries, playlist_title=title)
def _extract_video_params(self, webpage, display_id): def _extract_video_params(self, webpage, display_id):

View file

@ -456,7 +456,7 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
}), }),
}), }),
**traverse_obj(metadata, { **traverse_obj(metadata, {
'duration': ('duration_ms', {lambda x: float_or_none(x, 1000)}), 'duration': ('duration_ms', {float_or_none(scale=1000)}),
'timestamp': ('upload_date', {parse_iso8601}), 'timestamp': ('upload_date', {parse_iso8601}),
'series': ('series_title', {str}), 'series': ('series_title', {str}),
'series_id': ('series_id', {str}), 'series_id': ('series_id', {str}),
@ -484,7 +484,7 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
}), }),
}), }),
**traverse_obj(metadata, { **traverse_obj(metadata, {
'duration': ('duration_ms', {lambda x: float_or_none(x, 1000)}), 'duration': ('duration_ms', {float_or_none(scale=1000)}),
'age_limit': ('maturity_ratings', -1, {parse_age_limit}), 'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
}), }),
} }

View file

@ -10,11 +10,14 @@ from ..utils import (
OnDemandPagedList, OnDemandPagedList,
age_restricted, age_restricted,
clean_html, clean_html,
extract_attributes,
int_or_none, int_or_none,
traverse_obj, traverse_obj,
try_get, try_get,
unescapeHTML, unescapeHTML,
unsmuggle_url, unsmuggle_url,
update_url,
url_or_none,
urlencode_postdata, urlencode_postdata,
) )
@ -98,12 +101,20 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
class DailymotionIE(DailymotionBaseInfoExtractor): class DailymotionIE(DailymotionBaseInfoExtractor):
_VALID_URL = r'''(?ix) _VALID_URL = r'''(?ix)
https?:// https?://
(?:
dai\.ly/|
(?: (?:
(?:(?:www|touch|geo)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:(?:embed|swf|\#)/)|player(?:/\w+)?\.html\?)?video|swf)| (?:(?:www|touch|geo)\.)?dailymotion\.[a-z]{2,3}|
(?:www\.)?lequipe\.fr/video (?:www\.)?lequipe\.fr
)/
(?:
swf/(?!video)|
(?:(?:crawler|embed|swf)/)?video/|
player(?:/[\da-z]+)?\.html\?(?:video|(?P<is_playlist>playlist))=
) )
[/=](?P<id>[^/?_&]+)(?:.+?\bplaylist=(?P<playlist_id>x[0-9a-z]+))? )
''' (?P<id>[^/?_&#]+)(?:[\w-]*\?playlist=(?P<playlist_id>x[0-9a-z]+))?
'''
IE_NAME = 'dailymotion' IE_NAME = 'dailymotion'
_EMBED_REGEX = [r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1'] _EMBED_REGEX = [r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1']
_TESTS = [{ _TESTS = [{
@ -123,7 +134,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'tags': ['hollywood', 'celeb', 'celebrity', 'movies', 'red carpet'], 'tags': ['hollywood', 'celeb', 'celebrity', 'movies', 'red carpet'],
'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/K456B1aXqIx58LKWQ/x1080', 'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/K456B1cmt4ZcZ9KiM/x1080',
}, },
}, { }, {
'url': 'https://geo.dailymotion.com/player.html?video=x89eyek&mute=true', 'url': 'https://geo.dailymotion.com/player.html?video=x89eyek&mute=true',
@ -142,7 +153,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'tags': ['en_quete_d_esprit'], 'tags': ['en_quete_d_esprit'],
'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/Tncwi1YNg_RUl7ueu/x1080', 'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/Tncwi1clTH6StrxMP/x1080',
}, },
}, { }, {
'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames', 'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
@ -217,6 +228,66 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
}, { }, {
'url': 'https://geo.dailymotion.com/player/xakln.html?video=x8mjju4&customConfig%5BcustomParams%5D=%2Ffr-fr%2Ftennis%2Fwimbledon-mens-singles%2Farticles-video', 'url': 'https://geo.dailymotion.com/player/xakln.html?video=x8mjju4&customConfig%5BcustomParams%5D=%2Ffr-fr%2Ftennis%2Fwimbledon-mens-singles%2Farticles-video',
'only_matching': True, 'only_matching': True,
}, { # playlist-only
'url': 'https://geo.dailymotion.com/player/xf7zn.html?playlist=x7wdsj',
'only_matching': True,
}, {
'url': 'https://geo.dailymotion.com/player/xmyye.html?video=x93blhi',
'only_matching': True,
}, {
'url': 'https://www.dailymotion.com/crawler/video/x8u4owg',
'only_matching': True,
}, {
'url': 'https://www.dailymotion.com/embed/video/x8u4owg',
'only_matching': True,
}, {
'url': 'https://dai.ly/x94cnnk',
'only_matching': True,
}]
_WEBPAGE_TESTS = [{
# https://geo.dailymotion.com/player/xmyye.html?video=x93blhi
'url': 'https://www.financialounge.com/video/2024/08/01/borse-europee-in-rosso-dopo-la-fed-a-milano-volano-mediobanca-e-tim-edizione-del-1-agosto/',
'info_dict': {
'id': 'x93blhi',
'ext': 'mp4',
'title': 'OnAir - 01/08/24',
'description': '',
'duration': 217,
'timestamp': 1722505658,
'upload_date': '20240801',
'uploader': 'Financialounge',
'uploader_id': 'x2vtgmm',
'age_limit': 0,
'tags': [],
'view_count': int,
'like_count': int,
},
}, {
# https://geo.dailymotion.com/player/xf7zn.html?playlist=x7wdsj
'url': 'https://www.cycleworld.com/blogs/ask-kevin/ducati-continues-to-evolve-with-v4/',
'info_dict': {
'id': 'x7wdsj',
},
'playlist_mincount': 50,
}, {
# https://www.dailymotion.com/crawler/video/x8u4owg
'url': 'https://www.leparisien.fr/environnement/video-le-veloto-la-voiture-a-pedales-qui-aimerait-se-faire-une-place-sur-les-routes-09-03-2024-KCYMCPM4WFHJXMSKBUI66UNFPU.php',
'info_dict': {
'id': 'x8u4owg',
'ext': 'mp4',
'like_count': int,
'uploader': 'Le Parisien',
'thumbnail': 'https://www.leparisien.fr/resizer/ho_GwveeYftNkLwg_cEta--5Bv4=/1200x675/cloudfront-eu-central-1.images.arcpublishing.com/leparisien/BFXJNEBN75EUNHGYJLORUC3TX4.jpg',
'upload_date': '20240309',
'view_count': int,
'timestamp': 1709997866,
'age_limit': 0,
'uploader_id': 'x32f7b',
'title': 'VIDÉO. Le «\xa0véloto\xa0», la voiture à pédales qui aimerait se faire une place sur les routes',
'duration': 428.0,
'description': 'À bord du « véloto », lalternative à la voiture pour la campagne',
'tags': ['biclou', 'vélo', 'véloto', 'campagne', 'voiture', 'environnement', 'véhicules intermédiaires'],
},
}] }]
_GEO_BYPASS = False _GEO_BYPASS = False
_COMMON_MEDIA_FIELDS = '''description _COMMON_MEDIA_FIELDS = '''description
@ -232,16 +303,35 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
for mobj in re.finditer( for mobj in re.finditer(
r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P<id>[0-9a-zA-Z]+).+?}\s*\);', webpage): r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P<id>[0-9a-zA-Z]+).+?}\s*\);', webpage):
yield from 'https://www.dailymotion.com/embed/video/' + mobj.group('id') yield from 'https://www.dailymotion.com/embed/video/' + mobj.group('id')
for mobj in re.finditer(
r'(?s)<script [^>]*\bsrc=(["\'])(?:https?:)?//[\w-]+\.dailymotion\.com/player/(?:(?!\1).)+\1[^>]*>', webpage):
attrs = extract_attributes(mobj.group(0))
player_url = url_or_none(attrs.get('src'))
if not player_url:
continue
player_url = player_url.replace('.js', '.html')
if player_url.startswith('//'):
player_url = f'https:{player_url}'
if video_id := attrs.get('data-video'):
query_string = f'video={video_id}'
elif playlist_id := attrs.get('data-playlist'):
query_string = f'playlist={playlist_id}'
else:
continue
yield update_url(player_url, query=query_string)
def _real_extract(self, url): def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url) url, smuggled_data = unsmuggle_url(url)
video_id, playlist_id = self._match_valid_url(url).groups() video_id, is_playlist, playlist_id = self._match_valid_url(url).group('id', 'is_playlist', 'playlist_id')
if playlist_id: if is_playlist: # We matched the playlist query param as video_id
if self._yes_playlist(playlist_id, video_id): playlist_id = video_id
return self.url_result( video_id = None
'http://www.dailymotion.com/playlist/' + playlist_id,
'DailymotionPlaylist', playlist_id) if self._yes_playlist(playlist_id, video_id):
return self.url_result(
f'http://www.dailymotion.com/playlist/{playlist_id}',
'DailymotionPlaylist', playlist_id)
password = self.get_param('videopassword') password = self.get_param('videopassword')
media = self._call_api( media = self._call_api(
@ -282,6 +372,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
title = metadata['title'] title = metadata['title']
is_live = media.get('isOnAir') is_live = media.get('isOnAir')
formats = [] formats = []
subtitles = {}
for quality, media_list in metadata['qualities'].items(): for quality, media_list in metadata['qualities'].items():
for m in media_list: for m in media_list:
media_url = m.get('url') media_url = m.get('url')
@ -289,8 +381,10 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
if not media_url or media_type == 'application/vnd.lumberjack.manifest': if not media_url or media_type == 'application/vnd.lumberjack.manifest':
continue continue
if media_type == 'application/x-mpegURL': if media_type == 'application/x-mpegURL':
formats.extend(self._extract_m3u8_formats( fmt, subs = self._extract_m3u8_formats_and_subtitles(
media_url, video_id, 'mp4', live=is_live, m3u8_id='hls', fatal=False)) media_url, video_id, 'mp4', live=is_live, m3u8_id='hls', fatal=False)
formats.extend(fmt)
self._merge_subtitles(subs, target=subtitles)
else: else:
f = { f = {
'url': media_url, 'url': media_url,
@ -310,20 +404,18 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
if not f.get('fps') and f['format_id'].endswith('@60'): if not f.get('fps') and f['format_id'].endswith('@60'):
f['fps'] = 60 f['fps'] = 60
subtitles = {}
subtitles_data = try_get(metadata, lambda x: x['subtitles']['data'], dict) or {} subtitles_data = try_get(metadata, lambda x: x['subtitles']['data'], dict) or {}
for subtitle_lang, subtitle in subtitles_data.items(): for subtitle_lang, subtitle in subtitles_data.items():
subtitles[subtitle_lang] = [{ subtitles[subtitle_lang] = [{
'url': subtitle_url, 'url': subtitle_url,
} for subtitle_url in subtitle.get('urls', [])] } for subtitle_url in subtitle.get('urls', [])]
thumbnails = [] thumbnails = traverse_obj(metadata, (
for height, poster_url in metadata.get('posters', {}).items(): ('posters', 'thumbnails'), {dict.items}, lambda _, v: url_or_none(v[1]), {
thumbnails.append({ 'height': (0, {int_or_none}),
'height': int_or_none(height), 'id': (0, {str}),
'id': height, 'url': 1,
'url': poster_url, }))
})
owner = metadata.get('owner') or {} owner = metadata.get('owner') or {}
stats = media.get('stats') or {} stats = media.get('stats') or {}
@ -447,7 +539,7 @@ class DailymotionSearchIE(DailymotionPlaylistBaseIE):
class DailymotionUserIE(DailymotionPlaylistBaseIE): class DailymotionUserIE(DailymotionPlaylistBaseIE):
IE_NAME = 'dailymotion:user' IE_NAME = 'dailymotion:user'
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist|search)/)(?:(?:old/)?user/)?(?P<id>[^/?#]+)' _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist|search|crawler)/)(?:(?:old/)?user/)?(?P<id>[^/?#]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.dailymotion.com/user/nqtv', 'url': 'https://www.dailymotion.com/user/nqtv',
'info_dict': { 'info_dict': {

View file

@ -40,7 +40,7 @@ class DangalPlayBaseIE(InfoExtractor):
'id': ('content_id', {str}), 'id': ('content_id', {str}),
'title': ('display_title', {str}), 'title': ('display_title', {str}),
'episode': ('title', {str}), 'episode': ('title', {str}),
'series': ('show_name', {str}, {lambda x: x or None}), 'series': ('show_name', {str}, filter),
'series_id': ('catalog_id', {str}), 'series_id': ('catalog_id', {str}),
'duration': ('duration', {int_or_none}), 'duration': ('duration', {int_or_none}),
'release_timestamp': ('release_date_uts', {int_or_none}), 'release_timestamp': ('release_date_uts', {int_or_none}),

View file

@ -207,7 +207,7 @@ class ERRJupiterIE(InfoExtractor):
**traverse_obj(data, { **traverse_obj(data, {
'title': ('heading', {str}), 'title': ('heading', {str}),
'alt_title': ('subHeading', {str}), 'alt_title': ('subHeading', {str}),
'description': (('lead', 'body'), {clean_html}, {lambda x: x or None}), 'description': (('lead', 'body'), {clean_html}, filter),
'timestamp': ('created', {int_or_none}), 'timestamp': ('created', {int_or_none}),
'modified_timestamp': ('updated', {int_or_none}), 'modified_timestamp': ('updated', {int_or_none}),
'release_timestamp': (('scheduleStart', 'publicStart'), {int_or_none}), 'release_timestamp': (('scheduleStart', 'publicStart'), {int_or_none}),

View file

@ -563,13 +563,13 @@ class FacebookIE(InfoExtractor):
return extract_video_data(try_get( return extract_video_data(try_get(
js_data, lambda x: x['jsmods']['instances'], list) or []) js_data, lambda x: x['jsmods']['instances'], list) or [])
def extract_dash_manifest(video, formats): def extract_dash_manifest(vid_data, formats, mpd_url=None):
dash_manifest = traverse_obj( dash_manifest = traverse_obj(
video, 'dash_manifest', 'playlist', 'dash_manifest_xml_string', expected_type=str) vid_data, 'dash_manifest', 'playlist', 'dash_manifest_xml_string', 'manifest_xml', expected_type=str)
if dash_manifest: if dash_manifest:
formats.extend(self._parse_mpd_formats( formats.extend(self._parse_mpd_formats(
compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)), compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
mpd_url=url_or_none(video.get('dash_manifest_url')))) mpd_url=url_or_none(video.get('dash_manifest_url')) or mpd_url))
def process_formats(info): def process_formats(info):
# Downloads with browser's User-Agent are rate limited. Working around # Downloads with browser's User-Agent are rate limited. Working around
@ -619,9 +619,12 @@ class FacebookIE(InfoExtractor):
video = video['creation_story'] video = video['creation_story']
video['owner'] = traverse_obj(video, ('short_form_video_context', 'video_owner')) video['owner'] = traverse_obj(video, ('short_form_video_context', 'video_owner'))
video.update(reel_info) video.update(reel_info)
fmt_data = traverse_obj(video, ('videoDeliveryLegacyFields', {dict})) or video
formats = [] formats = []
q = qualities(['sd', 'hd']) q = qualities(['sd', 'hd'])
# Legacy formats extraction
fmt_data = traverse_obj(video, ('videoDeliveryLegacyFields', {dict})) or video
for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'), for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'),
('playable_url_dash', ''), ('browser_native_hd_url', 'hd'), ('playable_url_dash', ''), ('browser_native_hd_url', 'hd'),
('browser_native_sd_url', 'sd')): ('browser_native_sd_url', 'sd')):
@ -629,7 +632,7 @@ class FacebookIE(InfoExtractor):
if not playable_url: if not playable_url:
continue continue
if determine_ext(playable_url) == 'mpd': if determine_ext(playable_url) == 'mpd':
formats.extend(self._extract_mpd_formats(playable_url, video_id)) formats.extend(self._extract_mpd_formats(playable_url, video_id, fatal=False))
else: else:
formats.append({ formats.append({
'format_id': format_id, 'format_id': format_id,
@ -638,6 +641,28 @@ class FacebookIE(InfoExtractor):
'url': playable_url, 'url': playable_url,
}) })
extract_dash_manifest(fmt_data, formats) extract_dash_manifest(fmt_data, formats)
# New videoDeliveryResponse formats extraction
fmt_data = traverse_obj(video, ('videoDeliveryResponseFragment', 'videoDeliveryResponseResult'))
mpd_urls = traverse_obj(fmt_data, ('dash_manifest_urls', ..., 'manifest_url', {url_or_none}))
dash_manifests = traverse_obj(fmt_data, ('dash_manifests', lambda _, v: v['manifest_xml']))
for idx, dash_manifest in enumerate(dash_manifests):
extract_dash_manifest(dash_manifest, formats, mpd_url=traverse_obj(mpd_urls, idx))
if not dash_manifests:
# Only extract from MPD URLs if the manifests are not already provided
for mpd_url in mpd_urls:
formats.extend(self._extract_mpd_formats(mpd_url, video_id, fatal=False))
for prog_fmt in traverse_obj(fmt_data, ('progressive_urls', lambda _, v: v['progressive_url'])):
format_id = traverse_obj(prog_fmt, ('metadata', 'quality', {str.lower}))
formats.append({
'format_id': format_id,
# sd, hd formats w/o resolution info should be deprioritized below DASH
'quality': q(format_id) - 3,
'url': prog_fmt['progressive_url'],
})
for m3u8_url in traverse_obj(fmt_data, ('hls_playlist_urls', ..., 'hls_playlist_url', {url_or_none})):
formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', fatal=False, m3u8_id='hls'))
if not formats: if not formats:
# Do not append false positive entry w/o any formats # Do not append false positive entry w/o any formats
return return

View file

@ -0,0 +1,141 @@
import json
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
clean_html,
int_or_none,
join_nonempty,
parse_iso8601,
str_or_none,
url_or_none,
)
from ..utils.traversal import traverse_obj
class GameDevTVDashboardIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?gamedev\.tv/dashboard/courses/(?P<course_id>\d+)(?:/(?P<lecture_id>\d+))?'
_NETRC_MACHINE = 'gamedevtv'
_TESTS = [{
'url': 'https://www.gamedev.tv/dashboard/courses/25',
'info_dict': {
'id': '25',
'title': 'Complete Blender Creator 3: Learn 3D Modelling for Beginners',
'tags': ['blender', 'course', 'all', 'box modelling', 'sculpting'],
'categories': ['Blender', '3D Art'],
'thumbnail': 'https://gamedev-files.b-cdn.net/courses/qisc9pmu1jdc.jpg',
'upload_date': '20220516',
'timestamp': 1652694420,
'modified_date': '20241027',
'modified_timestamp': 1730049658,
},
'playlist_count': 100,
}, {
'url': 'https://www.gamedev.tv/dashboard/courses/63/2279',
'info_dict': {
'id': 'df04f4d8-68a4-4756-a71b-9ca9446c3a01',
'ext': 'mp4',
'modified_timestamp': 1701695752,
'upload_date': '20230504',
'episode': 'MagicaVoxel Community Course Introduction',
'series_id': '63',
'title': 'MagicaVoxel Community Course Introduction',
'timestamp': 1683195397,
'modified_date': '20231204',
'categories': ['3D Art', 'MagicaVoxel'],
'season': 'MagicaVoxel Community Course',
'tags': ['MagicaVoxel', 'all', 'course'],
'series': 'MagicaVoxel 3D Art Mini Course',
'duration': 1405,
'episode_number': 1,
'season_number': 1,
'season_id': '219',
'description': 'md5:a378738c5bbec1c785d76c067652d650',
'display_id': '63-219-2279',
'alt_title': '1_CC_MVX MagicaVoxel Community Course Introduction.mp4',
'thumbnail': 'https://vz-23691c65-6fa.b-cdn.net/df04f4d8-68a4-4756-a71b-9ca9446c3a01/thumbnail.jpg',
},
}]
_API_HEADERS = {}
def _perform_login(self, username, password):
try:
response = self._download_json(
'https://api.gamedev.tv/api/students/login', None, 'Logging in',
headers={'Content-Type': 'application/json'},
data=json.dumps({
'email': username,
'password': password,
'cart_items': [],
}).encode())
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
raise ExtractorError('Invalid username/password', expected=True)
raise
self._API_HEADERS['Authorization'] = f'{response["token_type"]} {response["access_token"]}'
def _real_initialize(self):
if not self._API_HEADERS.get('Authorization'):
self.raise_login_required(
'This content is only available with purchase', method='password')
def _entries(self, data, course_id, course_info, selected_lecture):
for section in traverse_obj(data, ('sections', ..., {dict})):
section_info = traverse_obj(section, {
'season_id': ('id', {str_or_none}),
'season': ('title', {str}),
'season_number': ('order', {int_or_none}),
})
for lecture in traverse_obj(section, ('lectures', lambda _, v: url_or_none(v['video']['playListUrl']))):
if selected_lecture and str(lecture.get('id')) != selected_lecture:
continue
display_id = join_nonempty(course_id, section_info.get('season_id'), lecture.get('id'))
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
lecture['video']['playListUrl'], display_id, 'mp4', m3u8_id='hls')
yield {
**course_info,
**section_info,
'id': display_id, # fallback
'display_id': display_id,
'formats': formats,
'subtitles': subtitles,
'series': course_info.get('title'),
'series_id': course_id,
**traverse_obj(lecture, {
'id': ('video', 'guid', {str}),
'title': ('title', {str}),
'alt_title': ('video', 'title', {str}),
'description': ('description', {clean_html}),
'episode': ('title', {str}),
'episode_number': ('order', {int_or_none}),
'duration': ('video', 'duration_in_sec', {int_or_none}),
'timestamp': ('video', 'created_at', {parse_iso8601}),
'modified_timestamp': ('video', 'updated_at', {parse_iso8601}),
'thumbnail': ('video', 'thumbnailUrl', {url_or_none}),
}),
}
def _real_extract(self, url):
course_id, lecture_id = self._match_valid_url(url).group('course_id', 'lecture_id')
data = self._download_json(
f'https://api.gamedev.tv/api/courses/my/{course_id}', course_id,
headers=self._API_HEADERS)['data']
course_info = traverse_obj(data, {
'title': ('title', {str}),
'tags': ('tags', ..., 'name', {str}),
'categories': ('categories', ..., 'title', {str}),
'timestamp': ('created_at', {parse_iso8601}),
'modified_timestamp': ('updated_at', {parse_iso8601}),
'thumbnail': ('image', {url_or_none}),
})
entries = self._entries(data, course_id, course_info, lecture_id)
if lecture_id:
lecture = next(entries, None)
if not lecture:
raise ExtractorError('Lecture not found')
return lecture
return self.playlist_result(entries, course_id, **course_info)

View file

@ -5,56 +5,63 @@ import hashlib
import hmac import hmac
import json import json
import os import os
import re
import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none,
js_to_json,
remove_end,
traverse_obj, traverse_obj,
unescapeHTML,
) )
class GoPlayIE(InfoExtractor): class GoPlayIE(InfoExtractor):
_VALID_URL = r'https?://(www\.)?goplay\.be/video/([^/]+/[^/]+/|)(?P<display_id>[^/#]+)' _VALID_URL = r'https?://(www\.)?goplay\.be/video/([^/?#]+/[^/?#]+/|)(?P<id>[^/#]+)'
_NETRC_MACHINE = 'goplay' _NETRC_MACHINE = 'goplay'
_TESTS = [{ _TESTS = [{
'url': 'https://www.goplay.be/video/de-container-cup/de-container-cup-s3/de-container-cup-s3-aflevering-2#autoplay', 'url': 'https://www.goplay.be/video/de-slimste-mens-ter-wereld/de-slimste-mens-ter-wereld-s22/de-slimste-mens-ter-wereld-s22-aflevering-1',
'info_dict': { 'info_dict': {
'id': '9c4214b8-e55d-4e4b-a446-f015f6c6f811', 'id': '2baa4560-87a0-421b-bffc-359914e3c387',
'ext': 'mp4', 'ext': 'mp4',
'title': 'S3 - Aflevering 2', 'title': 'S22 - Aflevering 1',
'series': 'De Container Cup', 'description': r're:In aflevering 1 nemen Daan Alferink, Tess Elst en Xander De Rycke .{66}',
'season': 'Season 3', 'series': 'De Slimste Mens ter Wereld',
'season_number': 3, 'episode': 'Episode 1',
'episode': 'Episode 2', 'season_number': 22,
'episode_number': 2, 'episode_number': 1,
'season': 'Season 22',
}, },
'params': {'skip_download': True},
'skip': 'This video is only available for registered users', 'skip': 'This video is only available for registered users',
}, { }, {
'url': 'https://www.goplay.be/video/a-family-for-thr-holidays-s1-aflevering-1#autoplay', 'url': 'https://www.goplay.be/video/1917',
'info_dict': { 'info_dict': {
'id': '74e3ed07-748c-49e4-85a0-393a93337dbf', 'id': '40cac41d-8d29-4ef5-aa11-75047b9f0907',
'ext': 'mp4', 'ext': 'mp4',
'title': 'A Family for the Holidays', 'title': '1917',
'description': r're:Op het hoogtepunt van de Eerste Wereldoorlog krijgen twee jonge .{94}',
}, },
'params': {'skip_download': True},
'skip': 'This video is only available for registered users', 'skip': 'This video is only available for registered users',
}, { }, {
'url': 'https://www.goplay.be/video/de-mol/de-mol-s11/de-mol-s11-aflevering-1#autoplay', 'url': 'https://www.goplay.be/video/de-mol/de-mol-s11/de-mol-s11-aflevering-1#autoplay',
'info_dict': { 'info_dict': {
'id': '03eb8f2f-153e-41cb-9805-0d3a29dab656', 'id': 'ecb79672-92b9-4cd9-a0d7-e2f0250681ee',
'ext': 'mp4', 'ext': 'mp4',
'title': 'S11 - Aflevering 1', 'title': 'S11 - Aflevering 1',
'description': r're:Tien kandidaten beginnen aan hun verovering van Amerika en ontmoeten .{102}',
'episode': 'Episode 1', 'episode': 'Episode 1',
'series': 'De Mol', 'series': 'De Mol',
'season_number': 11, 'season_number': 11,
'episode_number': 1, 'episode_number': 1,
'season': 'Season 11', 'season': 'Season 11',
}, },
'params': { 'params': {'skip_download': True},
'skip_download': True,
},
'skip': 'This video is only available for registered users', 'skip': 'This video is only available for registered users',
}] }]
@ -69,27 +76,42 @@ class GoPlayIE(InfoExtractor):
if not self._id_token: if not self._id_token:
raise self.raise_login_required(method='password') raise self.raise_login_required(method='password')
def _real_extract(self, url): def _find_json(self, s):
url, display_id = self._match_valid_url(url).group(0, 'display_id') return self._search_json(
webpage = self._download_webpage(url, display_id) r'\w+\s*:\s*', s, 'next js data', None, contains_pattern=r'\[(?s:.+)\]', default=None)
video_data_json = self._html_search_regex(r'<div\s+data-hero="([^"]+)"', webpage, 'video_data')
video_data = self._parse_json(unescapeHTML(video_data_json), display_id).get('data')
movie = video_data.get('movie') def _real_extract(self, url):
if movie: display_id = self._match_id(url)
video_id = movie['videoUuid'] webpage = self._download_webpage(url, display_id)
info_dict = {
'title': movie.get('title'), nextjs_data = traverse_obj(
} re.findall(r'<script[^>]*>\s*self\.__next_f\.push\(\s*(\[.+?\])\s*\);?\s*</script>', webpage),
else: (..., {js_to_json}, {json.loads}, ..., {self._find_json}, ...))
episode = traverse_obj(video_data, ('playlists', ..., 'episodes', lambda _, v: v['pageInfo']['url'] == url), get_all=False) meta = traverse_obj(nextjs_data, (
video_id = episode['videoUuid'] ..., lambda _, v: v['meta']['path'] == urllib.parse.urlparse(url).path, 'meta', any))
info_dict = {
'title': episode.get('episodeTitle'), video_id = meta['uuid']
'series': traverse_obj(episode, ('program', 'title')), info_dict = traverse_obj(meta, {
'season_number': episode.get('seasonNumber'), 'title': ('title', {str}),
'episode_number': episode.get('episodeNumber'), 'description': ('description', {str.strip}),
} })
if traverse_obj(meta, ('program', 'subtype')) != 'movie':
for season_data in traverse_obj(nextjs_data, (..., 'children', ..., 'playlists', ...)):
episode_data = traverse_obj(
season_data, ('videos', lambda _, v: v['videoId'] == video_id, any))
if not episode_data:
continue
episode_title = traverse_obj(
episode_data, 'contextualTitle', 'episodeTitle', expected_type=str)
info_dict.update({
'title': episode_title or info_dict.get('title'),
'series': remove_end(info_dict.get('title'), f' - {episode_title}'),
'season_number': traverse_obj(season_data, ('season', {int_or_none})),
'episode_number': traverse_obj(episode_data, ('episodeNumber', {int_or_none})),
})
break
api = self._download_json( api = self._download_json(
f'https://api.goplay.be/web/v1/videos/long-form/{video_id}', f'https://api.goplay.be/web/v1/videos/long-form/{video_id}',

View file

@ -1,4 +1,3 @@
import functools
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -63,7 +62,7 @@ class IlPostIE(InfoExtractor):
'url': ('podcast_raw_url', {url_or_none}), 'url': ('podcast_raw_url', {url_or_none}),
'thumbnail': ('image', {url_or_none}), 'thumbnail': ('image', {url_or_none}),
'timestamp': ('timestamp', {int_or_none}), 'timestamp': ('timestamp', {int_or_none}),
'duration': ('milliseconds', {functools.partial(float_or_none, scale=1000)}), 'duration': ('milliseconds', {float_or_none(scale=1000)}),
'availability': ('free', {lambda v: 'public' if v else 'subscriber_only'}), 'availability': ('free', {lambda v: 'public' if v else 'subscriber_only'}),
}), }),
} }

View file

@ -326,11 +326,11 @@ class JioCinemaIE(JioCinemaBaseIE):
# fallback metadata # fallback metadata
'title': ('name', {str}), 'title': ('name', {str}),
'description': ('fullSynopsis', {str}), 'description': ('fullSynopsis', {str}),
'series': ('show', 'name', {str}, {lambda x: x or None}), 'series': ('show', 'name', {str}, filter),
'season': ('tournamentName', {str}, {lambda x: x if x != 'Season 0' else None}), 'season': ('tournamentName', {str}, {lambda x: x if x != 'Season 0' else None}),
'season_number': ('episode', 'season', {int_or_none}, {lambda x: x or None}), 'season_number': ('episode', 'season', {int_or_none}, filter),
'episode': ('fullTitle', {str}), 'episode': ('fullTitle', {str}),
'episode_number': ('episode', 'episodeNo', {int_or_none}, {lambda x: x or None}), 'episode_number': ('episode', 'episodeNo', {int_or_none}, filter),
'age_limit': ('ageNemonic', {parse_age_limit}), 'age_limit': ('ageNemonic', {parse_age_limit}),
'duration': ('totalDuration', {float_or_none}), 'duration': ('totalDuration', {float_or_none}),
'thumbnail': ('images', {url_or_none}), 'thumbnail': ('images', {url_or_none}),
@ -338,10 +338,10 @@ class JioCinemaIE(JioCinemaBaseIE):
**traverse_obj(metadata, ('result', 0, { **traverse_obj(metadata, ('result', 0, {
'title': ('fullTitle', {str}), 'title': ('fullTitle', {str}),
'description': ('fullSynopsis', {str}), 'description': ('fullSynopsis', {str}),
'series': ('showName', {str}, {lambda x: x or None}), 'series': ('showName', {str}, filter),
'season': ('seasonName', {str}, {lambda x: x or None}), 'season': ('seasonName', {str}, filter),
'season_number': ('season', {int_or_none}), 'season_number': ('season', {int_or_none}),
'season_id': ('seasonId', {str}, {lambda x: x or None}), 'season_id': ('seasonId', {str}, filter),
'episode': ('fullTitle', {str}), 'episode': ('fullTitle', {str}),
'episode_number': ('episode', {int_or_none}), 'episode_number': ('episode', {int_or_none}),
'timestamp': ('uploadTime', {int_or_none}), 'timestamp': ('uploadTime', {int_or_none}),

160
yt_dlp/extractor/kenh14.py Normal file
View file

@ -0,0 +1,160 @@
from .common import InfoExtractor
from ..utils import (
clean_html,
extract_attributes,
get_element_by_class,
get_element_html_by_attribute,
get_elements_html_by_class,
int_or_none,
parse_duration,
parse_iso8601,
remove_start,
strip_or_none,
unescapeHTML,
update_url,
url_or_none,
)
from ..utils.traversal import traverse_obj
class Kenh14VideoIE(InfoExtractor):
_VALID_URL = r'https?://video\.kenh14\.vn/(?:video/)?[\w-]+-(?P<id>[0-9]+)\.chn'
_TESTS = [{
'url': 'https://video.kenh14.vn/video/mo-hop-iphone-14-pro-max-nguon-unbox-therapy-316173.chn',
'md5': '1ed67f9c3a1e74acf15db69590cf6210',
'info_dict': {
'id': '316173',
'ext': 'mp4',
'title': 'Video mở hộp iPhone 14 Pro Max (Nguồn: Unbox Therapy)',
'description': 'Video mở hộp iPhone 14 Pro MaxVideo mở hộp iPhone 14 Pro Max (Nguồn: Unbox Therapy)',
'thumbnail': r're:^https?://videothumbs\.mediacdn\.vn/.*\.jpg$',
'tags': [],
'uploader': 'Unbox Therapy',
'upload_date': '20220517',
'view_count': int,
'duration': 722.86,
'timestamp': 1652764468,
},
}, {
'url': 'https://video.kenh14.vn/video-316174.chn',
'md5': '2b41877d2afaf4a3f487ceda8e5c7cbd',
'info_dict': {
'id': '316174',
'ext': 'mp4',
'title': 'Khoảnh khắc VĐV nằm gục khóc sau chiến thắng: 7 năm trời Việt Nam mới có HCV kiếm chém nữ, chỉ có 8 tháng để khổ luyện trước khi lên sàn đấu',
'description': 'md5:de86aa22e143e2b277bce8ec9c6f17dc',
'thumbnail': r're:^https?://videothumbs\.mediacdn\.vn/.*\.jpg$',
'tags': [],
'upload_date': '20220517',
'view_count': int,
'duration': 70.04,
'timestamp': 1652766021,
},
}, {
'url': 'https://video.kenh14.vn/0-344740.chn',
'md5': 'b843495d5e728142c8870c09b46df2a9',
'info_dict': {
'id': '344740',
'ext': 'mov',
'title': 'Kỳ Duyên đầy căng thẳng trong buổi ra quân đi Miss Universe, nghi thức tuyên thuệ lần đầu xuất hiện gây nhiều tranh cãi',
'description': 'md5:2a2dbb4a7397169fb21ee68f09160497',
'thumbnail': r're:^https?://kenh14cdn\.com/.*\.jpg$',
'tags': ['kỳ duyên', 'Kỳ Duyên tuyên thuệ', 'miss universe'],
'uploader': 'Quang Vũ',
'upload_date': '20241024',
'view_count': int,
'duration': 198.88,
'timestamp': 1729741590,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
attrs = extract_attributes(get_element_html_by_attribute('type', 'VideoStream', webpage) or '')
direct_url = attrs['data-vid']
metadata = self._download_json(
'https://api.kinghub.vn/video/api/v1/detailVideoByGet?FileName={}'.format(
remove_start(direct_url, 'kenh14cdn.com/')), video_id, fatal=False)
formats = [{'url': f'https://{direct_url}', 'format_id': 'http', 'quality': 1}]
subtitles = {}
video_data = self._download_json(
f'https://{direct_url}.json', video_id, note='Downloading video data', fatal=False)
if hls_url := traverse_obj(video_data, ('hls', {url_or_none})):
fmts, subs = self._extract_m3u8_formats_and_subtitles(
hls_url, video_id, m3u8_id='hls', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
if dash_url := traverse_obj(video_data, ('mpd', {url_or_none})):
fmts, subs = self._extract_mpd_formats_and_subtitles(
dash_url, video_id, mpd_id='dash', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
return {
**traverse_obj(metadata, {
'duration': ('duration', {parse_duration}),
'uploader': ('author', {strip_or_none}),
'timestamp': ('uploadtime', {parse_iso8601(delimiter=' ')}),
'view_count': ('views', {int_or_none}),
}),
'id': video_id,
'title': (
traverse_obj(metadata, ('title', {strip_or_none}))
or clean_html(self._og_search_title(webpage))
or clean_html(get_element_by_class('vdbw-title', webpage))),
'formats': formats,
'subtitles': subtitles,
'description': (
clean_html(self._og_search_description(webpage))
or clean_html(get_element_by_class('vdbw-sapo', webpage))),
'thumbnail': (self._og_search_thumbnail(webpage) or attrs.get('data-thumb')),
'tags': traverse_obj(self._html_search_meta('keywords', webpage), (
{lambda x: x.split(';')}, ..., filter)),
}
class Kenh14PlaylistIE(InfoExtractor):
_VALID_URL = r'https?://video\.kenh14\.vn/playlist/[\w-]+-(?P<id>[0-9]+)\.chn'
_TESTS = [{
'url': 'https://video.kenh14.vn/playlist/tran-tinh-naked-love-mua-2-71.chn',
'info_dict': {
'id': '71',
'title': 'Trần Tình (Naked love) mùa 2',
'description': 'md5:e9522339304956dea931722dd72eddb2',
'thumbnail': r're:^https?://kenh14cdn\.com/.*\.png$',
},
'playlist_count': 9,
}, {
'url': 'https://video.kenh14.vn/playlist/0-72.chn',
'info_dict': {
'id': '72',
'title': 'Lau Lại Đầu Từ',
'description': 'Cùng xem xưa và nay có gì khác biệt nhé!',
'thumbnail': r're:^https?://kenh14cdn\.com/.*\.png$',
},
'playlist_count': 6,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
category_detail = get_element_by_class('category-detail', webpage) or ''
embed_info = traverse_obj(
self._yield_json_ld(webpage, playlist_id),
(lambda _, v: v['name'] and v['alternateName'], any)) or {}
return self.playlist_from_matches(
get_elements_html_by_class('video-item', webpage), playlist_id,
(clean_html(get_element_by_class('name', category_detail)) or unescapeHTML(embed_info.get('name'))),
getter=lambda x: 'https://video.kenh14.vn/video/video-{}.chn'.format(extract_attributes(x)['data-id']),
ie=Kenh14VideoIE, playlist_description=(
clean_html(get_element_by_class('description', category_detail))
or unescapeHTML(embed_info.get('alternateName'))),
thumbnail=traverse_obj(
self._og_search_thumbnail(webpage),
({url_or_none}, {update_url(query=None)})))

View file

@ -1,4 +1,3 @@
import functools
from .common import InfoExtractor from .common import InfoExtractor
from ..networking import HEADRequest from ..networking import HEADRequest
@ -137,7 +136,7 @@ class KickVODIE(KickBaseIE):
'uploader': ('livestream', 'channel', 'user', 'username', {str}), 'uploader': ('livestream', 'channel', 'user', 'username', {str}),
'uploader_id': ('livestream', 'channel', 'user_id', {int}, {str_or_none}), 'uploader_id': ('livestream', 'channel', 'user_id', {int}, {str_or_none}),
'timestamp': ('created_at', {parse_iso8601}), 'timestamp': ('created_at', {parse_iso8601}),
'duration': ('livestream', 'duration', {functools.partial(float_or_none, scale=1000)}), 'duration': ('livestream', 'duration', {float_or_none(scale=1000)}),
'thumbnail': ('livestream', 'thumbnail', {url_or_none}), 'thumbnail': ('livestream', 'thumbnail', {url_or_none}),
'categories': ('livestream', 'categories', ..., 'name', {str}), 'categories': ('livestream', 'categories', ..., 'name', {str}),
'view_count': ('views', {int_or_none}), 'view_count': ('views', {int_or_none}),

View file

@ -119,7 +119,7 @@ class KikaIE(InfoExtractor):
'width': ('frameWidth', {int_or_none}), 'width': ('frameWidth', {int_or_none}),
'height': ('frameHeight', {int_or_none}), 'height': ('frameHeight', {int_or_none}),
# NB: filesize is 0 if unknown, bitrate is -1 if unknown # NB: filesize is 0 if unknown, bitrate is -1 if unknown
'filesize': ('fileSize', {int_or_none}, {lambda x: x or None}), 'filesize': ('fileSize', {int_or_none}, filter),
'abr': ('bitrateAudio', {int_or_none}, {lambda x: None if x == -1 else x}), 'abr': ('bitrateAudio', {int_or_none}, {lambda x: None if x == -1 else x}),
'vbr': ('bitrateVideo', {int_or_none}, {lambda x: None if x == -1 else x}), 'vbr': ('bitrateVideo', {int_or_none}, {lambda x: None if x == -1 else x}),
}), }),

View file

@ -32,7 +32,7 @@ class LaracastsBaseIE(InfoExtractor):
VimeoIE, url_transparent=True, VimeoIE, url_transparent=True,
**traverse_obj(episode, { **traverse_obj(episode, {
'id': ('id', {int}, {str_or_none}), 'id': ('id', {int}, {str_or_none}),
'webpage_url': ('path', {lambda x: urljoin('https://laracasts.com', x)}), 'webpage_url': ('path', {urljoin('https://laracasts.com')}),
'title': ('title', {clean_html}), 'title': ('title', {clean_html}),
'season_number': ('chapter', {int_or_none}), 'season_number': ('chapter', {int_or_none}),
'episode_number': ('position', {int_or_none}), 'episode_number': ('position', {int_or_none}),
@ -104,7 +104,7 @@ class LaracastsPlaylistIE(LaracastsBaseIE):
'description': ('body', {clean_html}), 'description': ('body', {clean_html}),
'thumbnail': (('large_thumbnail', 'thumbnail'), {url_or_none}, any), 'thumbnail': (('large_thumbnail', 'thumbnail'), {url_or_none}, any),
'duration': ('runTime', {parse_duration}), 'duration': ('runTime', {parse_duration}),
'categories': ('taxonomy', 'name', {str}, {lambda x: x and [x]}), 'categories': ('taxonomy', 'name', {str}, all, filter),
'tags': ('topics', ..., 'name', {str}), 'tags': ('topics', ..., 'name', {str}),
'modified_date': ('lastUpdated', {unified_strdate}), 'modified_date': ('lastUpdated', {unified_strdate}),
}), }),

View file

@ -66,7 +66,7 @@ class LBRYBaseIE(InfoExtractor):
'license': ('value', 'license', {str}), 'license': ('value', 'license', {str}),
'timestamp': ('timestamp', {int_or_none}), 'timestamp': ('timestamp', {int_or_none}),
'release_timestamp': ('value', 'release_time', {int_or_none}), 'release_timestamp': ('value', 'release_time', {int_or_none}),
'tags': ('value', 'tags', ..., {lambda x: x or None}), 'tags': ('value', 'tags', ..., filter),
'duration': ('value', stream_type, 'duration', {int_or_none}), 'duration': ('value', stream_type, 'duration', {int_or_none}),
'channel': ('signing_channel', 'value', 'title', {str}), 'channel': ('signing_channel', 'value', 'title', {str}),
'channel_id': ('signing_channel', 'claim_id', {str}), 'channel_id': ('signing_channel', 'claim_id', {str}),

View file

@ -6,13 +6,11 @@ from ..utils import (
ExtractorError, ExtractorError,
clean_html, clean_html,
extract_attributes, extract_attributes,
get_element_by_class,
get_element_html_by_id,
join_nonempty, join_nonempty,
parse_duration, parse_duration,
unified_timestamp, unified_timestamp,
) )
from ..utils.traversal import traverse_obj from ..utils.traversal import find_element, traverse_obj
class LearningOnScreenIE(InfoExtractor): class LearningOnScreenIE(InfoExtractor):
@ -32,28 +30,24 @@ class LearningOnScreenIE(InfoExtractor):
def _real_initialize(self): def _real_initialize(self):
if not self._get_cookies('https://learningonscreen.ac.uk/').get('PHPSESSID-BOB-LIVE'): if not self._get_cookies('https://learningonscreen.ac.uk/').get('PHPSESSID-BOB-LIVE'):
self.raise_login_required( self.raise_login_required(method='session_cookies')
'Use --cookies for authentication. See '
' https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp '
'for how to manually pass cookies', method=None)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
details = traverse_obj(webpage, ( details = traverse_obj(webpage, (
{functools.partial(get_element_html_by_id, 'programme-details')}, { {find_element(id='programme-details', html=True)}, {
'title': ({functools.partial(re.search, r'<h2>([^<]+)</h2>')}, 1, {clean_html}), 'title': ({find_element(tag='h2')}, {clean_html}),
'timestamp': ( 'timestamp': (
{functools.partial(get_element_by_class, 'broadcast-date')}, {find_element(cls='broadcast-date')},
{functools.partial(re.match, r'([^<]+)')}, 1, {unified_timestamp}), {functools.partial(re.match, r'([^<]+)')}, 1, {unified_timestamp}),
'duration': ( 'duration': (
{functools.partial(get_element_by_class, 'prog-running-time')}, {find_element(cls='prog-running-time')}, {clean_html}, {parse_duration}),
{clean_html}, {parse_duration}),
})) }))
title = details.pop('title', None) or traverse_obj(webpage, ( title = details.pop('title', None) or traverse_obj(webpage, (
{functools.partial(get_element_html_by_id, 'add-to-existing-playlist')}, {find_element(id='add-to-existing-playlist', html=True)},
{extract_attributes}, 'data-record-title', {clean_html})) {extract_attributes}, 'data-record-title', {clean_html}))
entries = self._parse_html5_media_entries( entries = self._parse_html5_media_entries(

View file

@ -6,12 +6,10 @@ from ..utils import (
extract_attributes, extract_attributes,
get_element_by_class, get_element_by_class,
get_element_html_by_id, get_element_html_by_id,
get_element_text_and_html_by_tag,
parse_duration, parse_duration,
strip_or_none, strip_or_none,
traverse_obj,
try_call,
) )
from ..utils.traversal import find_element, traverse_obj
class ListenNotesIE(InfoExtractor): class ListenNotesIE(InfoExtractor):
@ -22,14 +20,14 @@ class ListenNotesIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': 'KrDgvNb_u1n', 'id': 'KrDgvNb_u1n',
'ext': 'mp3', 'ext': 'mp3',
'title': 'md5:32236591a921adf17bbdbf0441b6c0e9', 'title': r're:Tim OReilly on noticing things other people .{113}',
'description': 'md5:c581ed197eeddcee55a67cdb547c8cbd', 'description': r're:(?s)We shape reality by what we notice and .{27459}',
'duration': 2148.0, 'duration': 2215.0,
'channel': 'Thriving on Overload', 'channel': 'Amplifying Cognition',
'channel_id': 'ed84wITivxF', 'channel_id': 'ed84wITivxF',
'episode_id': 'e1312583fa7b4e24acfbb5131050be00', 'episode_id': 'e1312583fa7b4e24acfbb5131050be00',
'thumbnail': 'https://production.listennotes.com/podcasts/thriving-on-overload-ross-dawson-1wb_KospA3P-ed84wITivxF.300x300.jpg', 'thumbnail': 'https://cdn-images-3.listennotes.com/podcasts/amplifying-cognition-ross-dawson-Iemft4Gdr0k-ed84wITivxF.300x300.jpg',
'channel_url': 'https://www.listennotes.com/podcasts/thriving-on-overload-ross-dawson-ed84wITivxF/', 'channel_url': 'https://www.listennotes.com/podcasts/amplifying-cognition-ross-dawson-ed84wITivxF/',
'cast': ['Tim OReilly', 'Cookie Monster', 'Lao Tzu', 'Wallace Steven', 'Eric Raymond', 'Christine Peterson', 'John Maynard Keyne', 'Ross Dawson'], 'cast': ['Tim OReilly', 'Cookie Monster', 'Lao Tzu', 'Wallace Steven', 'Eric Raymond', 'Christine Peterson', 'John Maynard Keyne', 'Ross Dawson'],
}, },
}, { }, {
@ -39,13 +37,13 @@ class ListenNotesIE(InfoExtractor):
'id': 'lwEA3154JzG', 'id': 'lwEA3154JzG',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Episode 177: WireGuard with Jason Donenfeld', 'title': 'Episode 177: WireGuard with Jason Donenfeld',
'description': 'md5:24744f36456a3e95f83c1193a3458594', 'description': r're:(?s)Jason Donenfeld lead developer joins us this hour to discuss WireGuard, .{3169}',
'duration': 3861.0, 'duration': 3861.0,
'channel': 'Ask Noah Show', 'channel': 'Ask Noah Show',
'channel_id': '4DQTzdS5-j7', 'channel_id': '4DQTzdS5-j7',
'episode_id': '8c8954b95e0b4859ad1eecec8bf6d3a4', 'episode_id': '8c8954b95e0b4859ad1eecec8bf6d3a4',
'channel_url': 'https://www.listennotes.com/podcasts/ask-noah-show-noah-j-chelliah-4DQTzdS5-j7/', 'channel_url': 'https://www.listennotes.com/podcasts/ask-noah-show-noah-j-chelliah-4DQTzdS5-j7/',
'thumbnail': 'https://production.listennotes.com/podcasts/ask-noah-show-noah-j-chelliah-cfbRUw9Gs3F-4DQTzdS5-j7.300x300.jpg', 'thumbnail': 'https://cdn-images-3.listennotes.com/podcasts/ask-noah-show-noah-j-chelliah-gD7vG150cxf-4DQTzdS5-j7.300x300.jpg',
'cast': ['noah showlink', 'noah show', 'noah dashboard', 'jason donenfeld'], 'cast': ['noah showlink', 'noah show', 'noah dashboard', 'jason donenfeld'],
}, },
}] }]
@ -70,7 +68,7 @@ class ListenNotesIE(InfoExtractor):
'id': audio_id, 'id': audio_id,
'url': data['audio'], 'url': data['audio'],
'title': (data.get('data-title') 'title': (data.get('data-title')
or try_call(lambda: get_element_text_and_html_by_tag('h1', webpage)[0]) or traverse_obj(webpage, ({find_element(tag='h1')}, {clean_html}))
or self._html_search_meta(('og:title', 'title', 'twitter:title'), webpage, 'title')), or self._html_search_meta(('og:title', 'title', 'twitter:title'), webpage, 'title')),
'description': (self._clean_description(get_element_by_class('ln-text-p', webpage)) 'description': (self._clean_description(get_element_by_class('ln-text-p', webpage))
or strip_or_none(description)), or strip_or_none(description)),

View file

@ -114,7 +114,7 @@ class LSMLREmbedIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
query = parse_qs(url) query = parse_qs(url)
video_id = traverse_obj(query, ( video_id = traverse_obj(query, (
('show', 'id'), 0, {int_or_none}, {lambda x: x or None}, {str_or_none}), get_all=False) ('show', 'id'), 0, {int_or_none}, filter, {str_or_none}), get_all=False)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
player_data, media_data = self._search_regex( player_data, media_data = self._search_regex(

View file

@ -57,6 +57,6 @@ class MagentaMusikIE(InfoExtractor):
'duration': ('runtimeInSeconds', {int_or_none}), 'duration': ('runtimeInSeconds', {int_or_none}),
'location': ('countriesOfProduction', {list}, {lambda x: join_nonempty(*x, delim=', ')}), 'location': ('countriesOfProduction', {list}, {lambda x: join_nonempty(*x, delim=', ')}),
'release_year': ('yearOfProduction', {int_or_none}), 'release_year': ('yearOfProduction', {int_or_none}),
'categories': ('mainGenre', {str}, {lambda x: x and [x]}), 'categories': ('mainGenre', {str}, all, filter),
})), })),
} }

View file

@ -17,7 +17,7 @@ class MediaStreamBaseIE(InfoExtractor):
_BASE_URL_RE = r'https?://mdstrm\.com/(?:embed|live-stream)' _BASE_URL_RE = r'https?://mdstrm\.com/(?:embed|live-stream)'
def _extract_mediastream_urls(self, webpage): def _extract_mediastream_urls(self, webpage):
yield from traverse_obj(list(self._yield_json_ld(webpage, None, fatal=False)), ( yield from traverse_obj(list(self._yield_json_ld(webpage, None, default={})), (
lambda _, v: v['@type'] == 'VideoObject', ('embedUrl', 'contentUrl'), lambda _, v: v['@type'] == 'VideoObject', ('embedUrl', 'contentUrl'),
{lambda x: x if re.match(rf'{self._BASE_URL_RE}/\w+', x) else None})) {lambda x: x if re.match(rf'{self._BASE_URL_RE}/\w+', x) else None}))

View file

@ -1,291 +0,0 @@
import functools
import json
import uuid
from .common import InfoExtractor
from ..utils import (
ExtractorError,
OnDemandPagedList,
determine_ext,
dict_get,
float_or_none,
traverse_obj,
)
class MildomBaseIE(InfoExtractor):
_GUEST_ID = None
def _call_api(self, url, video_id, query=None, note='Downloading JSON metadata', body=None):
if not self._GUEST_ID:
self._GUEST_ID = f'pc-gp-{uuid.uuid4()}'
content = self._download_json(
url, video_id, note=note, data=json.dumps(body).encode() if body else None,
headers={'Content-Type': 'application/json'} if body else {},
query={
'__guest_id': self._GUEST_ID,
'__platform': 'web',
**(query or {}),
})
if content['code'] != 0:
raise ExtractorError(
f'Mildom says: {content["message"]} (code {content["code"]})',
expected=True)
return content['body']
class MildomIE(MildomBaseIE):
IE_NAME = 'mildom'
IE_DESC = 'Record ongoing live by specific user in Mildom'
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/(?P<id>\d+)'
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(f'https://www.mildom.com/{video_id}', video_id)
enterstudio = self._call_api(
'https://cloudac.mildom.com/nonolive/gappserv/live/enterstudio', video_id,
note='Downloading live metadata', query={'user_id': video_id})
result_video_id = enterstudio.get('log_id', video_id)
servers = self._call_api(
'https://cloudac.mildom.com/nonolive/gappserv/live/liveserver', result_video_id,
note='Downloading live server list', query={
'user_id': video_id,
'live_server_type': 'hls',
})
playback_token = self._call_api(
'https://cloudac.mildom.com/nonolive/gappserv/live/token', result_video_id,
note='Obtaining live playback token', body={'host_id': video_id, 'type': 'hls'})
playback_token = traverse_obj(playback_token, ('data', ..., 'token'), get_all=False)
if not playback_token:
raise ExtractorError('Failed to obtain live playback token')
formats = self._extract_m3u8_formats(
f'{servers["stream_server"]}/{video_id}_master.m3u8?{playback_token}',
result_video_id, 'mp4', headers={
'Referer': 'https://www.mildom.com/',
'Origin': 'https://www.mildom.com',
})
for fmt in formats:
fmt.setdefault('http_headers', {})['Referer'] = 'https://www.mildom.com/'
return {
'id': result_video_id,
'title': self._html_search_meta('twitter:description', webpage, default=None) or traverse_obj(enterstudio, 'anchor_intro'),
'description': traverse_obj(enterstudio, 'intro', 'live_intro', expected_type=str),
'timestamp': float_or_none(enterstudio.get('live_start_ms'), scale=1000),
'uploader': self._html_search_meta('twitter:title', webpage, default=None) or traverse_obj(enterstudio, 'loginname'),
'uploader_id': video_id,
'formats': formats,
'is_live': True,
}
class MildomVodIE(MildomBaseIE):
IE_NAME = 'mildom:vod'
IE_DESC = 'VOD in Mildom'
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/playback/(?P<user_id>\d+)/(?P<id>(?P=user_id)-[a-zA-Z0-9]+-?[0-9]*)'
_TESTS = [{
'url': 'https://www.mildom.com/playback/10882672/10882672-1597662269',
'info_dict': {
'id': '10882672-1597662269',
'ext': 'mp4',
'title': '始めてのミルダム配信じゃぃ!',
'thumbnail': r're:^https?://.*\.(png|jpg)$',
'upload_date': '20200817',
'duration': 4138.37,
'description': 'ゲームをしたくて!',
'timestamp': 1597662269.0,
'uploader_id': '10882672',
'uploader': 'kson組長(けいそん)',
},
}, {
'url': 'https://www.mildom.com/playback/10882672/10882672-1597758589870-477',
'info_dict': {
'id': '10882672-1597758589870-477',
'ext': 'mp4',
'title': '【kson】感染メイズ麻酔銃で無双する',
'thumbnail': r're:^https?://.*\.(png|jpg)$',
'timestamp': 1597759093.0,
'uploader': 'kson組長(けいそん)',
'duration': 4302.58,
'uploader_id': '10882672',
'description': 'このステージ絶対乗り越えたい',
'upload_date': '20200818',
},
}, {
'url': 'https://www.mildom.com/playback/10882672/10882672-buha9td2lrn97fk2jme0',
'info_dict': {
'id': '10882672-buha9td2lrn97fk2jme0',
'ext': 'mp4',
'title': '【kson組長】CART RACER!!!',
'thumbnail': r're:^https?://.*\.(png|jpg)$',
'uploader_id': '10882672',
'uploader': 'kson組長(けいそん)',
'upload_date': '20201104',
'timestamp': 1604494797.0,
'duration': 4657.25,
'description': 'WTF',
},
}]
def _real_extract(self, url):
user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
webpage = self._download_webpage(f'https://www.mildom.com/playback/{user_id}/{video_id}', video_id)
autoplay = self._call_api(
'https://cloudac.mildom.com/nonolive/videocontent/playback/getPlaybackDetail', video_id,
note='Downloading playback metadata', query={
'v_id': video_id,
})['playback']
formats = [{
'url': autoplay['audio_url'],
'format_id': 'audio',
'protocol': 'm3u8_native',
'vcodec': 'none',
'acodec': 'aac',
'ext': 'm4a',
}]
for fmt in autoplay['video_link']:
formats.append({
'format_id': 'video-{}'.format(fmt['name']),
'url': fmt['url'],
'protocol': 'm3u8_native',
'width': fmt['level'] * autoplay['video_width'] // autoplay['video_height'],
'height': fmt['level'],
'vcodec': 'h264',
'acodec': 'aac',
'ext': 'mp4',
})
return {
'id': video_id,
'title': self._html_search_meta(('og:description', 'description'), webpage, default=None) or autoplay.get('title'),
'description': traverse_obj(autoplay, 'video_intro'),
'timestamp': float_or_none(autoplay.get('publish_time'), scale=1000),
'duration': float_or_none(autoplay.get('video_length'), scale=1000),
'thumbnail': dict_get(autoplay, ('upload_pic', 'video_pic')),
'uploader': traverse_obj(autoplay, ('author_info', 'login_name')),
'uploader_id': user_id,
'formats': formats,
}
class MildomClipIE(MildomBaseIE):
IE_NAME = 'mildom:clip'
IE_DESC = 'Clip in Mildom'
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/clip/(?P<id>(?P<user_id>\d+)-[a-zA-Z0-9]+)'
_TESTS = [{
'url': 'https://www.mildom.com/clip/10042245-63921673e7b147ebb0806d42b5ba5ce9',
'info_dict': {
'id': '10042245-63921673e7b147ebb0806d42b5ba5ce9',
'title': '全然違ったよ',
'timestamp': 1619181890,
'duration': 59,
'thumbnail': r're:https?://.+',
'uploader': 'ざきんぽ',
'uploader_id': '10042245',
},
}, {
'url': 'https://www.mildom.com/clip/10111524-ebf4036e5aa8411c99fb3a1ae0902864',
'info_dict': {
'id': '10111524-ebf4036e5aa8411c99fb3a1ae0902864',
'title': 'かっこいい',
'timestamp': 1621094003,
'duration': 59,
'thumbnail': r're:https?://.+',
'uploader': '(ルーキー',
'uploader_id': '10111524',
},
}, {
'url': 'https://www.mildom.com/clip/10660174-2c539e6e277c4aaeb4b1fbe8d22cb902',
'info_dict': {
'id': '10660174-2c539e6e277c4aaeb4b1fbe8d22cb902',
'title': '',
'timestamp': 1614769431,
'duration': 31,
'thumbnail': r're:https?://.+',
'uploader': 'ドルゴルスレンギーン=ダグワドルジ',
'uploader_id': '10660174',
},
}]
def _real_extract(self, url):
user_id, video_id = self._match_valid_url(url).group('user_id', 'id')
webpage = self._download_webpage(f'https://www.mildom.com/clip/{video_id}', video_id)
clip_detail = self._call_api(
'https://cloudac-cf-jp.mildom.com/nonolive/videocontent/clip/detail', video_id,
note='Downloading playback metadata', query={
'clip_id': video_id,
})
return {
'id': video_id,
'title': self._html_search_meta(
('og:description', 'description'), webpage, default=None) or clip_detail.get('title'),
'timestamp': float_or_none(clip_detail.get('create_time')),
'duration': float_or_none(clip_detail.get('length')),
'thumbnail': clip_detail.get('cover'),
'uploader': traverse_obj(clip_detail, ('user_info', 'loginname')),
'uploader_id': user_id,
'url': clip_detail['url'],
'ext': determine_ext(clip_detail.get('url'), 'mp4'),
}
class MildomUserVodIE(MildomBaseIE):
IE_NAME = 'mildom:user:vod'
IE_DESC = 'Download all VODs from specific user in Mildom'
_VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/profile/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.mildom.com/profile/10093333',
'info_dict': {
'id': '10093333',
'title': 'Uploads from ねこばたけ',
},
'playlist_mincount': 732,
}, {
'url': 'https://www.mildom.com/profile/10882672',
'info_dict': {
'id': '10882672',
'title': 'Uploads from kson組長(けいそん)',
},
'playlist_mincount': 201,
}]
def _fetch_page(self, user_id, page):
page += 1
reply = self._call_api(
'https://cloudac.mildom.com/nonolive/videocontent/profile/playbackList',
user_id, note=f'Downloading page {page}', query={
'user_id': user_id,
'page': page,
'limit': '30',
})
if not reply:
return
for x in reply:
v_id = x.get('v_id')
if not v_id:
continue
yield self.url_result(f'https://www.mildom.com/playback/{user_id}/{v_id}')
def _real_extract(self, url):
user_id = self._match_id(url)
self.to_screen(f'This will download all VODs belonging to user. To download ongoing live video, use "https://www.mildom.com/{user_id}" instead')
profile = self._call_api(
'https://cloudac.mildom.com/nonolive/gappserv/user/profileV2', user_id,
query={'user_id': user_id}, note='Downloading user profile')['user_info']
return self.playlist_result(
OnDemandPagedList(functools.partial(self._fetch_page, user_id), 30),
user_id, f'Uploads from {profile["loginname"]}')

View file

@ -12,7 +12,7 @@ from ..utils.traversal import traverse_obj
class MixchIE(InfoExtractor): class MixchIE(InfoExtractor):
IE_NAME = 'mixch' IE_NAME = 'mixch'
_VALID_URL = r'https?://(?:www\.)?mixch\.tv/u/(?P<id>\d+)' _VALID_URL = r'https?://mixch\.tv/u/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://mixch.tv/u/16943797/live', 'url': 'https://mixch.tv/u/16943797/live',
@ -66,7 +66,7 @@ class MixchIE(InfoExtractor):
note='Downloading comments', errnote='Failed to download comments'), (..., { note='Downloading comments', errnote='Failed to download comments'), (..., {
'author': ('name', {str}), 'author': ('name', {str}),
'author_id': ('user_id', {str_or_none}), 'author_id': ('user_id', {str_or_none}),
'id': ('message_id', {str}, {lambda x: x or None}), 'id': ('message_id', {str}, filter),
'text': ('body', {str}), 'text': ('body', {str}),
'timestamp': ('created', {int}), 'timestamp': ('created', {int}),
})) }))
@ -74,7 +74,7 @@ class MixchIE(InfoExtractor):
class MixchArchiveIE(InfoExtractor): class MixchArchiveIE(InfoExtractor):
IE_NAME = 'mixch:archive' IE_NAME = 'mixch:archive'
_VALID_URL = r'https?://(?:www\.)?mixch\.tv/archive/(?P<id>\d+)' _VALID_URL = r'https?://mixch\.tv/archive/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://mixch.tv/archive/421', 'url': 'https://mixch.tv/archive/421',
@ -116,3 +116,56 @@ class MixchArchiveIE(InfoExtractor):
'formats': self._extract_m3u8_formats(info_json['archiveURL'], video_id), 'formats': self._extract_m3u8_formats(info_json['archiveURL'], video_id),
'thumbnail': traverse_obj(info_json, ('thumbnailURL', {url_or_none})), 'thumbnail': traverse_obj(info_json, ('thumbnailURL', {url_or_none})),
} }
class MixchMovieIE(InfoExtractor):
IE_NAME = 'mixch:movie'
_VALID_URL = r'https?://mixch\.tv/m/(?P<id>\w+)'
_TESTS = [{
'url': 'https://mixch.tv/m/Ve8KNkJ5',
'info_dict': {
'id': 'Ve8KNkJ5',
'title': '夏☀️\nムービーへのポイントは本イベントに加算されないので配信にてお願い致します🙇🏻\u200d♀️\n#TGCCAMPUS #ミス東大 #ミス東大2024 ',
'ext': 'mp4',
'uploader': 'ミス東大No.5 松藤百香🍑💫',
'uploader_id': '12299174',
'channel_follower_count': int,
'view_count': int,
'like_count': int,
'comment_count': int,
'timestamp': 1724070828,
'uploader_url': 'https://mixch.tv/u/12299174',
'live_status': 'not_live',
'upload_date': '20240819',
},
}, {
'url': 'https://mixch.tv/m/61DzpIKE',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._download_json(
f'https://mixch.tv/api-web/movies/{video_id}', video_id)
return {
'id': video_id,
'formats': [{
'format_id': 'mp4',
'url': data['movie']['file'],
'ext': 'mp4',
}],
**traverse_obj(data, {
'title': ('movie', 'title', {str}),
'thumbnail': ('movie', 'thumbnailURL', {url_or_none}),
'uploader': ('ownerInfo', 'name', {str}),
'uploader_id': ('ownerInfo', 'id', {int}, {str_or_none}),
'channel_follower_count': ('ownerInfo', 'fan', {int_or_none}),
'view_count': ('ownerInfo', 'view', {int_or_none}),
'like_count': ('movie', 'favCount', {int_or_none}),
'comment_count': ('movie', 'commentCount', {int_or_none}),
'timestamp': ('movie', 'published', {int_or_none}),
'uploader_url': ('ownerInfo', 'id', {lambda x: x and f'https://mixch.tv/u/{x}'}, filter),
}),
'live_status': 'not_live',
}

View file

@ -4,15 +4,11 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html, clean_html,
extract_attributes, extract_attributes,
get_element_by_class,
get_element_html_by_class,
get_element_text_and_html_by_tag,
int_or_none, int_or_none,
strip_or_none, strip_or_none,
traverse_obj,
try_call,
unified_strdate, unified_strdate,
) )
from ..utils.traversal import find_element, traverse_obj
class MonstercatIE(InfoExtractor): class MonstercatIE(InfoExtractor):
@ -26,19 +22,21 @@ class MonstercatIE(InfoExtractor):
'thumbnail': 'https://www.monstercat.com/release/742779548009/cover', 'thumbnail': 'https://www.monstercat.com/release/742779548009/cover',
'release_date': '20230711', 'release_date': '20230711',
'album': 'The Secret Language of Trees', 'album': 'The Secret Language of Trees',
'album_artist': 'BT', 'album_artists': ['BT'],
}, },
}] }]
def _extract_tracks(self, table, album_meta): def _extract_tracks(self, table, album_meta):
for td in re.findall(r'<tr[^<]*>((?:(?!</tr>)[\w\W])+)', table): # regex by chatgpt due to lack of get_elements_by_tag for td in re.findall(r'<tr[^<]*>((?:(?!</tr>)[\w\W])+)', table): # regex by chatgpt due to lack of get_elements_by_tag
title = clean_html(try_call( title = traverse_obj(td, (
lambda: get_element_by_class('d-inline-flex flex-column', td).partition(' <span')[0])) {find_element(cls='d-inline-flex flex-column')},
ids = extract_attributes(try_call(lambda: get_element_html_by_class('btn-play cursor-pointer mr-small', td)) or '') {lambda x: x.partition(' <span')}, 0, {clean_html}))
ids = traverse_obj(td, (
{find_element(cls='btn-play cursor-pointer mr-small', html=True)}, {extract_attributes})) or {}
track_id = ids.get('data-track-id') track_id = ids.get('data-track-id')
release_id = ids.get('data-release-id') release_id = ids.get('data-release-id')
track_number = int_or_none(try_call(lambda: get_element_by_class('py-xsmall', td))) track_number = traverse_obj(td, ({find_element(cls='py-xsmall')}, {int_or_none}))
if not track_id or not release_id: if not track_id or not release_id:
self.report_warning(f'Skipping track {track_number}, ID(s) not found') self.report_warning(f'Skipping track {track_number}, ID(s) not found')
self.write_debug(f'release_id={release_id!r} track_id={track_id!r}') self.write_debug(f'release_id={release_id!r} track_id={track_id!r}')
@ -48,7 +46,7 @@ class MonstercatIE(InfoExtractor):
'title': title, 'title': title,
'track': title, 'track': title,
'track_number': track_number, 'track_number': track_number,
'artist': clean_html(try_call(lambda: get_element_by_class('d-block fs-xxsmall', td))), 'artists': traverse_obj(td, ({find_element(cls='d-block fs-xxsmall')}, {clean_html}, all)),
'url': f'https://www.monstercat.com/api/release/{release_id}/track-stream/{track_id}', 'url': f'https://www.monstercat.com/api/release/{release_id}/track-stream/{track_id}',
'id': track_id, 'id': track_id,
'ext': 'mp3', 'ext': 'mp3',
@ -57,20 +55,19 @@ class MonstercatIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
url_id = self._match_id(url) url_id = self._match_id(url)
html = self._download_webpage(url, url_id) html = self._download_webpage(url, url_id)
# wrap all `get_elements` in `try_call`, HTMLParser has problems with site's html # NB: HTMLParser may choke on this html; use {find_element} or try_call(lambda: get_element...)
tracklist_table = try_call(lambda: get_element_by_class('table table-small', html)) or '' tracklist_table = traverse_obj(html, {find_element(cls='table table-small')}) or ''
title = traverse_obj(html, ({find_element(tag='h1')}, {clean_html}))
title = try_call(lambda: get_element_text_and_html_by_tag('h1', html)[0])
date = traverse_obj(html, ({lambda html: get_element_by_class('font-italic mb-medium d-tablet-none d-phone-block',
html).partition('Released ')}, 2, {strip_or_none}, {unified_strdate}))
album_meta = { album_meta = {
'title': title, 'title': title,
'album': title, 'album': title,
'thumbnail': f'https://www.monstercat.com/release/{url_id}/cover', 'thumbnail': f'https://www.monstercat.com/release/{url_id}/cover',
'album_artist': try_call( 'album_artists': traverse_obj(html, (
lambda: get_element_by_class('h-normal text-uppercase mb-desktop-medium mb-smallish', html)), {find_element(cls='h-normal text-uppercase mb-desktop-medium mb-smallish')}, {clean_html}, all)),
'release_date': date, 'release_date': traverse_obj(html, (
{find_element(cls='font-italic mb-medium d-tablet-none d-phone-block')},
{lambda x: x.partition('Released ')}, 2, {strip_or_none}, {unified_strdate})),
} }
return self.playlist_result( return self.playlist_result(

View file

@ -86,7 +86,7 @@ class NebulaBaseIE(InfoExtractor):
def _extract_video_metadata(self, episode): def _extract_video_metadata(self, episode):
channel_url = traverse_obj( channel_url = traverse_obj(
episode, (('channel_slug', 'class_slug'), {lambda x: urljoin('https://nebula.tv/', x)}), get_all=False) episode, (('channel_slug', 'class_slug'), {urljoin('https://nebula.tv/')}), get_all=False)
return { return {
'id': episode['id'].partition(':')[2], 'id': episode['id'].partition(':')[2],
**traverse_obj(episode, { **traverse_obj(episode, {

View file

@ -6,12 +6,10 @@ from ..utils import (
determine_ext, determine_ext,
extract_attributes, extract_attributes,
get_element_by_class, get_element_by_class,
get_element_text_and_html_by_tag,
parse_duration, parse_duration,
traverse_obj,
try_call,
url_or_none, url_or_none,
) )
from ..utils.traversal import find_element, traverse_obj
class NekoHackerIE(InfoExtractor): class NekoHackerIE(InfoExtractor):
@ -35,7 +33,7 @@ class NekoHackerIE(InfoExtractor):
'acodec': 'mp3', 'acodec': 'mp3',
'release_date': '20221101', 'release_date': '20221101',
'album': 'Nekoverse', 'album': 'Nekoverse',
'artist': 'Neko Hacker', 'artists': ['Neko Hacker'],
'track': 'Spaceship', 'track': 'Spaceship',
'track_number': 1, 'track_number': 1,
'duration': 195.0, 'duration': 195.0,
@ -53,7 +51,7 @@ class NekoHackerIE(InfoExtractor):
'acodec': 'mp3', 'acodec': 'mp3',
'release_date': '20221101', 'release_date': '20221101',
'album': 'Nekoverse', 'album': 'Nekoverse',
'artist': 'Neko Hacker', 'artists': ['Neko Hacker'],
'track': 'City Runner', 'track': 'City Runner',
'track_number': 2, 'track_number': 2,
'duration': 148.0, 'duration': 148.0,
@ -71,7 +69,7 @@ class NekoHackerIE(InfoExtractor):
'acodec': 'mp3', 'acodec': 'mp3',
'release_date': '20221101', 'release_date': '20221101',
'album': 'Nekoverse', 'album': 'Nekoverse',
'artist': 'Neko Hacker', 'artists': ['Neko Hacker'],
'track': 'Nature Talk', 'track': 'Nature Talk',
'track_number': 3, 'track_number': 3,
'duration': 174.0, 'duration': 174.0,
@ -89,7 +87,7 @@ class NekoHackerIE(InfoExtractor):
'acodec': 'mp3', 'acodec': 'mp3',
'release_date': '20221101', 'release_date': '20221101',
'album': 'Nekoverse', 'album': 'Nekoverse',
'artist': 'Neko Hacker', 'artists': ['Neko Hacker'],
'track': 'Crystal World', 'track': 'Crystal World',
'track_number': 4, 'track_number': 4,
'duration': 199.0, 'duration': 199.0,
@ -115,7 +113,7 @@ class NekoHackerIE(InfoExtractor):
'acodec': 'mp3', 'acodec': 'mp3',
'release_date': '20210115', 'release_date': '20210115',
'album': '進め!むじなカンパニー', 'album': '進め!むじなカンパニー',
'artist': 'Neko Hacker', 'artists': ['Neko Hacker'],
'track': 'md5:1a5fcbc96ca3c3265b1c6f9f79f30fd0', 'track': 'md5:1a5fcbc96ca3c3265b1c6f9f79f30fd0',
'track_number': 1, 'track_number': 1,
}, },
@ -132,7 +130,7 @@ class NekoHackerIE(InfoExtractor):
'acodec': 'mp3', 'acodec': 'mp3',
'release_date': '20210115', 'release_date': '20210115',
'album': '進め!むじなカンパニー', 'album': '進め!むじなカンパニー',
'artist': 'Neko Hacker', 'artists': ['Neko Hacker'],
'track': 'むじな de なじむ feat. 六科なじむ (CV: 日高里菜 )', 'track': 'むじな de なじむ feat. 六科なじむ (CV: 日高里菜 )',
'track_number': 2, 'track_number': 2,
}, },
@ -149,7 +147,7 @@ class NekoHackerIE(InfoExtractor):
'acodec': 'mp3', 'acodec': 'mp3',
'release_date': '20210115', 'release_date': '20210115',
'album': '進め!むじなカンパニー', 'album': '進め!むじなカンパニー',
'artist': 'Neko Hacker', 'artists': ['Neko Hacker'],
'track': '進め!むじなカンパニー (instrumental)', 'track': '進め!むじなカンパニー (instrumental)',
'track_number': 3, 'track_number': 3,
}, },
@ -166,7 +164,7 @@ class NekoHackerIE(InfoExtractor):
'acodec': 'mp3', 'acodec': 'mp3',
'release_date': '20210115', 'release_date': '20210115',
'album': '進め!むじなカンパニー', 'album': '進め!むじなカンパニー',
'artist': 'Neko Hacker', 'artists': ['Neko Hacker'],
'track': 'むじな de なじむ (instrumental)', 'track': 'むじな de なじむ (instrumental)',
'track_number': 4, 'track_number': 4,
}, },
@ -181,14 +179,17 @@ class NekoHackerIE(InfoExtractor):
playlist = get_element_by_class('playlist', webpage) playlist = get_element_by_class('playlist', webpage)
if not playlist: if not playlist:
iframe = try_call(lambda: get_element_text_and_html_by_tag('iframe', webpage)[1]) or '' iframe_src = traverse_obj(webpage, (
iframe_src = url_or_none(extract_attributes(iframe).get('src')) {find_element(tag='iframe', html=True)}, {extract_attributes}, 'src', {url_or_none}))
if not iframe_src: if not iframe_src:
raise ExtractorError('No playlist or embed found in webpage') raise ExtractorError('No playlist or embed found in webpage')
elif re.match(r'https?://(?:\w+\.)?spotify\.com/', iframe_src): elif re.match(r'https?://(?:\w+\.)?spotify\.com/', iframe_src):
raise ExtractorError('Spotify embeds are not supported', expected=True) raise ExtractorError('Spotify embeds are not supported', expected=True)
return self.url_result(url, 'Generic') return self.url_result(url, 'Generic')
player_params = self._search_json(
r'var srp_player_params_[\da-f]+\s*=', webpage, 'player params', playlist_id, default={})
entries = [] entries = []
for track_number, track in enumerate(re.findall(r'(<li[^>]+data-audiopath[^>]+>)', playlist), 1): for track_number, track in enumerate(re.findall(r'(<li[^>]+data-audiopath[^>]+>)', playlist), 1):
entry = traverse_obj(extract_attributes(track), { entry = traverse_obj(extract_attributes(track), {
@ -200,12 +201,12 @@ class NekoHackerIE(InfoExtractor):
'album': 'data-albumtitle', 'album': 'data-albumtitle',
'duration': ('data-tracktime', {parse_duration}), 'duration': ('data-tracktime', {parse_duration}),
'release_date': ('data-releasedate', {lambda x: re.match(r'\d{8}', x.replace('.', ''))}, 0), 'release_date': ('data-releasedate', {lambda x: re.match(r'\d{8}', x.replace('.', ''))}, 0),
'thumbnail': ('data-albumart', {url_or_none}),
}) })
entries.append({ entries.append({
**entry, **entry,
'thumbnail': url_or_none(player_params.get('artwork')),
'track_number': track_number, 'track_number': track_number,
'artist': 'Neko Hacker', 'artists': ['Neko Hacker'],
'vcodec': 'none', 'vcodec': 'none',
'acodec': 'mp3' if entry['ext'] == 'mp3' else None, 'acodec': 'mp3' if entry['ext'] == 'mp3' else None,
}) })

View file

@ -36,10 +36,6 @@ class NetEaseMusicBaseIE(InfoExtractor):
_API_BASE = 'http://music.163.com/api/' _API_BASE = 'http://music.163.com/api/'
_GEO_BYPASS = False _GEO_BYPASS = False
@staticmethod
def _kilo_or_none(value):
return int_or_none(value, scale=1000)
def _create_eapi_cipher(self, api_path, query_body, cookies): def _create_eapi_cipher(self, api_path, query_body, cookies):
request_text = json.dumps({**query_body, 'header': cookies}, separators=(',', ':')) request_text = json.dumps({**query_body, 'header': cookies}, separators=(',', ':'))
@ -101,7 +97,7 @@ class NetEaseMusicBaseIE(InfoExtractor):
'vcodec': 'none', 'vcodec': 'none',
**traverse_obj(song, { **traverse_obj(song, {
'ext': ('type', {str}), 'ext': ('type', {str}),
'abr': ('br', {self._kilo_or_none}), 'abr': ('br', {int_or_none(scale=1000)}),
'filesize': ('size', {int_or_none}), 'filesize': ('size', {int_or_none}),
}), }),
}) })
@ -282,9 +278,9 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
**lyric_data, **lyric_data,
**traverse_obj(info, { **traverse_obj(info, {
'title': ('name', {str}), 'title': ('name', {str}),
'timestamp': ('album', 'publishTime', {self._kilo_or_none}), 'timestamp': ('album', 'publishTime', {int_or_none(scale=1000)}),
'thumbnail': ('album', 'picUrl', {url_or_none}), 'thumbnail': ('album', 'picUrl', {url_or_none}),
'duration': ('duration', {self._kilo_or_none}), 'duration': ('duration', {int_or_none(scale=1000)}),
'album': ('album', 'name', {str}), 'album': ('album', 'name', {str}),
'average_rating': ('score', {int_or_none}), 'average_rating': ('score', {int_or_none}),
}), }),
@ -440,7 +436,7 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE):
'tags': ('tags', ..., {str}), 'tags': ('tags', ..., {str}),
'uploader': ('creator', 'nickname', {str}), 'uploader': ('creator', 'nickname', {str}),
'uploader_id': ('creator', 'userId', {str_or_none}), 'uploader_id': ('creator', 'userId', {str_or_none}),
'timestamp': ('updateTime', {self._kilo_or_none}), 'timestamp': ('updateTime', {int_or_none(scale=1000)}),
})) }))
if traverse_obj(info, ('playlist', 'specialType')) == 10: if traverse_obj(info, ('playlist', 'specialType')) == 10:
metainfo['title'] = f'{metainfo.get("title")} {strftime_or_none(metainfo.get("timestamp"), "%Y-%m-%d")}' metainfo['title'] = f'{metainfo.get("title")} {strftime_or_none(metainfo.get("timestamp"), "%Y-%m-%d")}'
@ -517,10 +513,10 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
'creators': traverse_obj(info, ('artists', ..., 'name')) or [info.get('artistName')], 'creators': traverse_obj(info, ('artists', ..., 'name')) or [info.get('artistName')],
**traverse_obj(info, { **traverse_obj(info, {
'title': ('name', {str}), 'title': ('name', {str}),
'description': (('desc', 'briefDesc'), {str}, {lambda x: x or None}), 'description': (('desc', 'briefDesc'), {str}, filter),
'upload_date': ('publishTime', {unified_strdate}), 'upload_date': ('publishTime', {unified_strdate}),
'thumbnail': ('cover', {url_or_none}), 'thumbnail': ('cover', {url_or_none}),
'duration': ('duration', {self._kilo_or_none}), 'duration': ('duration', {int_or_none(scale=1000)}),
'view_count': ('playCount', {int_or_none}), 'view_count': ('playCount', {int_or_none}),
'like_count': ('likeCount', {int_or_none}), 'like_count': ('likeCount', {int_or_none}),
'comment_count': ('commentCount', {int_or_none}), 'comment_count': ('commentCount', {int_or_none}),
@ -588,7 +584,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
'description': ('description', {str}), 'description': ('description', {str}),
'creator': ('dj', 'brand', {str}), 'creator': ('dj', 'brand', {str}),
'thumbnail': ('coverUrl', {url_or_none}), 'thumbnail': ('coverUrl', {url_or_none}),
'timestamp': ('createTime', {self._kilo_or_none}), 'timestamp': ('createTime', {int_or_none(scale=1000)}),
}) })
if not self._yes_playlist( if not self._yes_playlist(
@ -598,7 +594,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
return { return {
'id': str(info['mainSong']['id']), 'id': str(info['mainSong']['id']),
'formats': formats, 'formats': formats,
'duration': traverse_obj(info, ('mainSong', 'duration', {self._kilo_or_none})), 'duration': traverse_obj(info, ('mainSong', 'duration', {int_or_none(scale=1000)})),
**metainfo, **metainfo,
} }

View file

@ -11,9 +11,12 @@ from ..utils import (
clean_html, clean_html,
determine_ext, determine_ext,
get_element_by_class, get_element_by_class,
traverse_obj, int_or_none,
make_archive_id,
url_or_none,
urlencode_postdata, urlencode_postdata,
) )
from ..utils.traversal import traverse_obj
class NFLBaseIE(InfoExtractor): class NFLBaseIE(InfoExtractor):
@ -75,22 +78,15 @@ class NFLBaseIE(InfoExtractor):
'osVersion': '10.0', 'osVersion': '10.0',
}, separators=(',', ':')).encode()).decode(), }, separators=(',', ':')).encode()).decode(),
'networkType': 'other', 'networkType': 'other',
'nflClaimGroupsToAdd': [], 'peacockUUID': 'undefined',
'nflClaimGroupsToRemove': [],
} }
_ACCOUNT_INFO = {} _ACCOUNT_INFO = {}
_API_KEY = None _API_KEY = '3_Qa8TkWpIB8ESCBT8tY2TukbVKgO5F6BJVc7N1oComdwFzI7H2L9NOWdm11i_BY9f'
_TOKEN = None _TOKEN = None
_TOKEN_EXPIRY = 0 _TOKEN_EXPIRY = 0
def _get_account_info(self, url, slug): def _get_account_info(self):
if not self._API_KEY:
webpage = self._download_webpage(url, slug, fatal=False) or ''
self._API_KEY = self._search_regex(
r'window\.gigyaApiKey\s*=\s*["\'](\w+)["\'];', webpage, 'API key',
fatal=False) or '3_Qa8TkWpIB8ESCBT8tY2TukbVKgO5F6BJVc7N1oComdwFzI7H2L9NOWdm11i_BY9f'
cookies = self._get_cookies('https://auth-id.nfl.com/') cookies = self._get_cookies('https://auth-id.nfl.com/')
login_token = traverse_obj(cookies, ( login_token = traverse_obj(cookies, (
(f'glt_{self._API_KEY}', lambda k, _: k.startswith('glt_')), {lambda x: x.value}), get_all=False) (f'glt_{self._API_KEY}', lambda k, _: k.startswith('glt_')), {lambda x: x.value}), get_all=False)
@ -103,7 +99,7 @@ class NFLBaseIE(InfoExtractor):
'or else try using --cookies-from-browser instead', expected=True) 'or else try using --cookies-from-browser instead', expected=True)
account = self._download_json( account = self._download_json(
'https://auth-id.nfl.com/accounts.getAccountInfo', slug, 'https://auth-id.nfl.com/accounts.getAccountInfo', None,
note='Downloading account info', data=urlencode_postdata({ note='Downloading account info', data=urlencode_postdata({
'include': 'profile,data', 'include': 'profile,data',
'lang': 'en', 'lang': 'en',
@ -111,7 +107,7 @@ class NFLBaseIE(InfoExtractor):
'sdk': 'js_latest', 'sdk': 'js_latest',
'login_token': login_token, 'login_token': login_token,
'authMode': 'cookie', 'authMode': 'cookie',
'pageURL': url, 'pageURL': 'https://www.nfl.com/',
'sdkBuild': traverse_obj(cookies, ( 'sdkBuild': traverse_obj(cookies, (
'gig_canary_ver', {lambda x: x.value.partition('-')[0]}), default='15170'), 'gig_canary_ver', {lambda x: x.value.partition('-')[0]}), default='15170'),
'format': 'json', 'format': 'json',
@ -126,55 +122,78 @@ class NFLBaseIE(InfoExtractor):
if len(self._ACCOUNT_INFO) != 3: if len(self._ACCOUNT_INFO) != 3:
raise ExtractorError('Failed to retrieve account info with provided cookies', expected=True) raise ExtractorError('Failed to retrieve account info with provided cookies', expected=True)
def _get_auth_token(self, url, slug): def _get_auth_token(self):
if self._TOKEN and self._TOKEN_EXPIRY > int(time.time() + 30): if self._TOKEN and self._TOKEN_EXPIRY > int(time.time() + 30):
return return
if not self._ACCOUNT_INFO:
self._get_account_info(url, slug)
token = self._download_json( token = self._download_json(
'https://api.nfl.com/identity/v3/token%s' % ( 'https://api.nfl.com/identity/v3/token%s' % (
'/refresh' if self._ACCOUNT_INFO.get('refreshToken') else ''), '/refresh' if self._ACCOUNT_INFO.get('refreshToken') else ''),
slug, headers={'Content-Type': 'application/json'}, note='Downloading access token', None, headers={'Content-Type': 'application/json'}, note='Downloading access token',
data=json.dumps({**self._CLIENT_DATA, **self._ACCOUNT_INFO}, separators=(',', ':')).encode()) data=json.dumps({**self._CLIENT_DATA, **self._ACCOUNT_INFO}, separators=(',', ':')).encode())
self._TOKEN = token['accessToken'] self._TOKEN = token['accessToken']
self._TOKEN_EXPIRY = token['expiresIn'] self._TOKEN_EXPIRY = token['expiresIn']
self._ACCOUNT_INFO['refreshToken'] = token['refreshToken'] self._ACCOUNT_INFO['refreshToken'] = token['refreshToken']
def _extract_video(self, mcp_id, is_live=False):
self._get_auth_token()
data = self._download_json(
f'https://api.nfl.com/play/v1/asset/{mcp_id}', mcp_id, headers={
'Authorization': f'Bearer {self._TOKEN}',
'Accept': 'application/json',
'Content-Type': 'application/json',
}, data=json.dumps({'init': True, 'live': is_live}, separators=(',', ':')).encode())
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
data['accessUrl'], mcp_id, 'mp4', m3u8_id='hls')
return {
'id': mcp_id,
'formats': formats,
'subtitles': subtitles,
'is_live': is_live,
'_old_archive_ids': [make_archive_id(AnvatoIE, mcp_id)],
**traverse_obj(data, ('metadata', {
'title': ('event', ('def_title', 'friendlyName'), {str}, any),
'description': ('event', 'def_description', {str}),
'duration': ('event', 'duration', {int_or_none}),
'thumbnails': ('thumbnails', ..., 'url', {'url': {url_or_none}}),
})),
}
def _parse_video_config(self, video_config, display_id): def _parse_video_config(self, video_config, display_id):
video_config = self._parse_json(video_config, display_id) video_config = self._parse_json(video_config, display_id)
is_live = traverse_obj(video_config, ('live', {bool})) or False
item = video_config['playlist'][0] item = video_config['playlist'][0]
mcp_id = item.get('mcpID') if mcp_id := item.get('mcpID'):
if mcp_id: return self._extract_video(mcp_id, is_live=is_live)
info = self.url_result(f'{self._ANVATO_PREFIX}{mcp_id}', AnvatoIE, mcp_id)
info = {'id': item.get('id') or item['entityId']}
item_url = item['url']
ext = determine_ext(item_url)
if ext == 'm3u8':
info['formats'] = self._extract_m3u8_formats(item_url, info['id'], 'mp4')
else: else:
media_id = item.get('id') or item['entityId'] info['url'] = item_url
title = item.get('title') if item.get('audio') is True:
item_url = item['url'] info['vcodec'] = 'none'
info = {'id': media_id}
ext = determine_ext(item_url) thumbnails = None
if ext == 'm3u8': if image_url := traverse_obj(item, 'imageSrc', 'posterImage', expected_type=url_or_none):
info['formats'] = self._extract_m3u8_formats(item_url, media_id, 'mp4') thumbnails = [{
else: 'url': image_url,
info['url'] = item_url 'ext': determine_ext(image_url, 'jpg'),
if item.get('audio') is True: }]
info['vcodec'] = 'none'
is_live = video_config.get('live') is True info.update({
thumbnails = None **traverse_obj(item, {
image_url = item.get(item.get('imageSrc')) or item.get(item.get('posterImage')) 'title': ('title', {str}),
if image_url: 'description': ('description', {clean_html}),
thumbnails = [{ }),
'url': image_url, 'is_live': is_live,
'ext': determine_ext(image_url, 'jpg'), 'thumbnails': thumbnails,
}] })
info.update({
'title': title,
'is_live': is_live,
'description': clean_html(item.get('description')),
'thumbnails': thumbnails,
})
return info return info
@ -188,24 +207,20 @@ class NFLIE(NFLBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': "Baker Mayfield's game-changing plays from 3-TD game Week 14", 'title': "Baker Mayfield's game-changing plays from 3-TD game Week 14",
'description': 'md5:85e05a3cc163f8c344340f220521136d', 'description': 'md5:85e05a3cc163f8c344340f220521136d',
'upload_date': '20201215', 'thumbnail': r're:https?://.+\.jpg',
'timestamp': 1608009755,
'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'NFL',
'tags': 'count:6',
'duration': 157, 'duration': 157,
'categories': 'count:3', '_old_archive_ids': ['anvato 899441'],
}, },
}, { }, {
'url': 'https://www.chiefs.com/listen/patrick-mahomes-travis-kelce-react-to-win-over-dolphins-the-breakdown', 'url': 'https://www.chiefs.com/listen/patrick-mahomes-travis-kelce-react-to-win-over-dolphins-the-breakdown',
'md5': '6886b32c24b463038c760ceb55a34566', 'md5': '92a517f05bd3eb50fe50244bc621aec8',
'info_dict': { 'info_dict': {
'id': 'd87e8790-3e14-11eb-8ceb-ff05c2867f99', 'id': '8b7c3625-a461-4751-8db4-85f536f2bbd0',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Patrick Mahomes, Travis Kelce React to Win Over Dolphins | The Breakdown', 'title': 'Patrick Mahomes, Travis Kelce React to Win Over Dolphins | The Breakdown',
'description': 'md5:12ada8ee70e6762658c30e223e095075', 'description': 'md5:12ada8ee70e6762658c30e223e095075',
'thumbnail': 'https://static.clubs.nfl.com/image/private/t_editorial_landscape_12_desktop/v1571153441/chiefs/rfljejccnyhhkpkfq855',
}, },
'skip': 'HTTP Error 404: Not Found',
}, { }, {
'url': 'https://www.buffalobills.com/video/buffalo-bills-military-recognition-week-14', 'url': 'https://www.buffalobills.com/video/buffalo-bills-military-recognition-week-14',
'only_matching': True, 'only_matching': True,
@ -236,13 +251,16 @@ class NFLArticleIE(NFLBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
entries = []
for video_config in re.findall(self._VIDEO_CONFIG_REGEX, webpage): def entries():
entries.append(self._parse_video_config(video_config, display_id)) for video_config in re.findall(self._VIDEO_CONFIG_REGEX, webpage):
yield self._parse_video_config(video_config, display_id)
title = clean_html(get_element_by_class( title = clean_html(get_element_by_class(
'nfl-c-article__title', webpage)) or self._html_search_meta( 'nfl-c-article__title', webpage)) or self._html_search_meta(
['og:title', 'twitter:title'], webpage) ['og:title', 'twitter:title'], webpage)
return self.playlist_result(entries, display_id, title)
return self.playlist_result(entries(), display_id, title)
class NFLPlusReplayIE(NFLBaseIE): class NFLPlusReplayIE(NFLBaseIE):
@ -307,6 +325,9 @@ class NFLPlusReplayIE(NFLBaseIE):
'all_22': 'All-22', 'all_22': 'All-22',
} }
def _real_initialize(self):
self._get_account_info()
def _real_extract(self, url): def _real_extract(self, url):
slug, video_id = self._match_valid_url(url).group('slug', 'id') slug, video_id = self._match_valid_url(url).group('slug', 'id')
requested_types = self._configuration_arg('type', ['all']) requested_types = self._configuration_arg('type', ['all'])
@ -315,7 +336,7 @@ class NFLPlusReplayIE(NFLBaseIE):
requested_types = traverse_obj(self._REPLAY_TYPES, (None, requested_types)) requested_types = traverse_obj(self._REPLAY_TYPES, (None, requested_types))
if not video_id: if not video_id:
self._get_auth_token(url, slug) self._get_auth_token()
headers = {'Authorization': f'Bearer {self._TOKEN}'} headers = {'Authorization': f'Bearer {self._TOKEN}'}
game_id = self._download_json( game_id = self._download_json(
f'https://api.nfl.com/football/v2/games/externalId/slug/{slug}', slug, f'https://api.nfl.com/football/v2/games/externalId/slug/{slug}', slug,
@ -328,14 +349,13 @@ class NFLPlusReplayIE(NFLBaseIE):
'items', lambda _, v: v['subType'] == requested_types[0], 'mcpPlaybackId'), get_all=False) 'items', lambda _, v: v['subType'] == requested_types[0], 'mcpPlaybackId'), get_all=False)
if video_id: if video_id:
return self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id) return self._extract_video(video_id)
def entries(): def entries():
for replay in traverse_obj( for replay in traverse_obj(
replays, ('items', lambda _, v: v['mcpPlaybackId'] and v['subType'] in requested_types), replays, ('items', lambda _, v: v['mcpPlaybackId'] and v['subType'] in requested_types),
): ):
video_id = replay['mcpPlaybackId'] yield self._extract_video(replay['mcpPlaybackId'])
yield self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id)
return self.playlist_result(entries(), slug) return self.playlist_result(entries(), slug)
@ -362,12 +382,15 @@ class NFLPlusEpisodeIE(NFLBaseIE):
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}] }]
def _real_initialize(self):
self._get_account_info()
def _real_extract(self, url): def _real_extract(self, url):
slug = self._match_id(url) slug = self._match_id(url)
self._get_auth_token(url, slug) self._get_auth_token()
video_id = self._download_json( video_id = self._download_json(
f'https://api.nfl.com/content/v1/videos/episodes/{slug}', slug, headers={ f'https://api.nfl.com/content/v1/videos/episodes/{slug}', slug, headers={
'Authorization': f'Bearer {self._TOKEN}', 'Authorization': f'Bearer {self._TOKEN}',
})['mcpPlaybackId'] })['mcpPlaybackId']
return self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id) return self._extract_video(video_id)

View file

@ -371,11 +371,11 @@ class NiconicoIE(InfoExtractor):
'acodec': 'aac', 'acodec': 'aac',
'vcodec': 'h264', 'vcodec': 'h264',
**traverse_obj(audio_quality, ('metadata', { **traverse_obj(audio_quality, ('metadata', {
'abr': ('bitrate', {functools.partial(float_or_none, scale=1000)}), 'abr': ('bitrate', {float_or_none(scale=1000)}),
'asr': ('samplingRate', {int_or_none}), 'asr': ('samplingRate', {int_or_none}),
})), })),
**traverse_obj(video_quality, ('metadata', { **traverse_obj(video_quality, ('metadata', {
'vbr': ('bitrate', {functools.partial(float_or_none, scale=1000)}), 'vbr': ('bitrate', {float_or_none(scale=1000)}),
'height': ('resolution', 'height', {int_or_none}), 'height': ('resolution', 'height', {int_or_none}),
'width': ('resolution', 'width', {int_or_none}), 'width': ('resolution', 'width', {int_or_none}),
})), })),
@ -428,7 +428,7 @@ class NiconicoIE(InfoExtractor):
**audio_fmt, **audio_fmt,
**traverse_obj(audios, (lambda _, v: audio_fmt['format_id'].startswith(v['id']), { **traverse_obj(audios, (lambda _, v: audio_fmt['format_id'].startswith(v['id']), {
'format_id': ('id', {str}), 'format_id': ('id', {str}),
'abr': ('bitRate', {functools.partial(float_or_none, scale=1000)}), 'abr': ('bitRate', {float_or_none(scale=1000)}),
'asr': ('samplingRate', {int_or_none}), 'asr': ('samplingRate', {int_or_none}),
}), get_all=False), }), get_all=False),
'acodec': 'aac', 'acodec': 'aac',

View file

@ -10,10 +10,10 @@ from ..utils import (
get_element_html_by_class, get_element_html_by_class,
get_elements_by_class, get_elements_by_class,
int_or_none, int_or_none,
try_call,
unified_timestamp, unified_timestamp,
urlencode_postdata, urlencode_postdata,
) )
from ..utils.traversal import find_element, find_elements, traverse_obj
class NubilesPornIE(InfoExtractor): class NubilesPornIE(InfoExtractor):
@ -70,9 +70,8 @@ class NubilesPornIE(InfoExtractor):
url, get_element_by_class('watch-page-video-wrapper', page), video_id)[0] url, get_element_by_class('watch-page-video-wrapper', page), video_id)[0]
channel_id, channel_name = self._search_regex( channel_id, channel_name = self._search_regex(
r'/video/website/(?P<id>\d+).+>(?P<name>\w+).com', get_element_html_by_class('site-link', page), r'/video/website/(?P<id>\d+).+>(?P<name>\w+).com', get_element_html_by_class('site-link', page) or '',
'channel', fatal=False, group=('id', 'name')) or (None, None) 'channel', fatal=False, group=('id', 'name')) or (None, None)
channel_name = re.sub(r'([^A-Z]+)([A-Z]+)', r'\1 \2', channel_name)
return { return {
'id': video_id, 'id': video_id,
@ -82,14 +81,14 @@ class NubilesPornIE(InfoExtractor):
'thumbnail': media_entries.get('thumbnail'), 'thumbnail': media_entries.get('thumbnail'),
'description': clean_html(get_element_html_by_class('content-pane-description', page)), 'description': clean_html(get_element_html_by_class('content-pane-description', page)),
'timestamp': unified_timestamp(get_element_by_class('date', page)), 'timestamp': unified_timestamp(get_element_by_class('date', page)),
'channel': channel_name, 'channel': re.sub(r'([^A-Z]+)([A-Z]+)', r'\1 \2', channel_name) if channel_name else None,
'channel_id': channel_id, 'channel_id': channel_id,
'channel_url': format_field(channel_id, None, 'https://members.nubiles-porn.com/video/website/%s'), 'channel_url': format_field(channel_id, None, 'https://members.nubiles-porn.com/video/website/%s'),
'like_count': int_or_none(get_element_by_id('likecount', page)), 'like_count': int_or_none(get_element_by_id('likecount', page)),
'average_rating': float_or_none(get_element_by_class('score', page)), 'average_rating': float_or_none(get_element_by_class('score', page)),
'age_limit': 18, 'age_limit': 18,
'categories': try_call(lambda: list(map(clean_html, get_elements_by_class('btn', get_element_by_class('categories', page))))), 'categories': traverse_obj(page, ({find_element(cls='categories')}, {find_elements(cls='btn')}, ..., {clean_html})),
'tags': try_call(lambda: list(map(clean_html, get_elements_by_class('btn', get_elements_by_class('tags', page)[1])))), 'tags': traverse_obj(page, ({find_elements(cls='tags')}, 1, {find_elements(cls='btn')}, ..., {clean_html})),
'cast': get_elements_by_class('content-pane-performer', page), 'cast': get_elements_by_class('content-pane-performer', page),
'availability': 'needs_auth', 'availability': 'needs_auth',
'series': channel_name, 'series': channel_name,

Some files were not shown because too many files have changed in this diff Show more