Merge branch 'yt-dlp:master' into fix/foxsports

This commit is contained in:
bashonly 2024-07-01 22:38:34 -05:00
commit 8cbf4659bd
No known key found for this signature in database
GPG key ID: 783F096F253D15B0
95 changed files with 2735 additions and 1195 deletions

View file

@ -237,27 +237,43 @@ jobs:
macos: macos:
needs: process needs: process
if: inputs.macos if: inputs.macos
permissions:
contents: read
actions: write # For cleaning up cache
runs-on: macos-12 runs-on: macos-12
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
# NB: Building universal2 does not work with python from actions/setup-python # NB: Building universal2 does not work with python from actions/setup-python
- name: Restore cached requirements
id: restore-cache
uses: actions/cache/restore@v4
env:
SEGMENT_DOWNLOAD_TIMEOUT_MINS: 1
with:
path: |
~/yt-dlp-build-venv
key: cache-reqs-${{ github.job }}
- name: Install Requirements - name: Install Requirements
run: | run: |
brew install coreutils brew install coreutils
python3 devscripts/install_deps.py --user -o --include build python3 -m venv ~/yt-dlp-build-venv
source ~/yt-dlp-build-venv/bin/activate
python3 devscripts/install_deps.py -o --include build
python3 devscripts/install_deps.py --print --include pyinstaller > requirements.txt python3 devscripts/install_deps.py --print --include pyinstaller > requirements.txt
# We need to ignore wheels otherwise we break universal2 builds # We need to ignore wheels otherwise we break universal2 builds
python3 -m pip install -U --user --no-binary :all: -r requirements.txt python3 -m pip install -U --no-binary :all: -r requirements.txt
# We need to fuse our own universal2 wheels for curl_cffi # We need to fuse our own universal2 wheels for curl_cffi
python3 -m pip install -U --user delocate python3 -m pip install -U delocate
mkdir curl_cffi_whls curl_cffi_universal2 mkdir curl_cffi_whls curl_cffi_universal2
python3 devscripts/install_deps.py --print -o --include curl-cffi > requirements.txt python3 devscripts/install_deps.py --print -o --include curl-cffi > requirements.txt
for platform in "macosx_11_0_arm64" "macosx_11_0_x86_64"; do for platform in "macosx_11_0_arm64" "macosx_11_0_x86_64"; do
python3 -m pip download \ python3 -m pip download \
--only-binary=:all: \ --only-binary=:all: \
--platform "${platform}" \ --platform "${platform}" \
--pre -d curl_cffi_whls \ -d curl_cffi_whls \
-r requirements.txt -r requirements.txt
done done
( # Overwrite x86_64-only libs with fat/universal2 libs or else Pyinstaller will do the opposite ( # Overwrite x86_64-only libs with fat/universal2 libs or else Pyinstaller will do the opposite
@ -274,9 +290,10 @@ jobs:
) )
python3 -m delocate.cmd.delocate_fuse curl_cffi_whls/curl_cffi*.whl -w curl_cffi_universal2 python3 -m delocate.cmd.delocate_fuse curl_cffi_whls/curl_cffi*.whl -w curl_cffi_universal2
python3 -m delocate.cmd.delocate_fuse curl_cffi_whls/cffi*.whl -w curl_cffi_universal2 python3 -m delocate.cmd.delocate_fuse curl_cffi_whls/cffi*.whl -w curl_cffi_universal2
cd curl_cffi_universal2 for wheel in curl_cffi_universal2/*cffi*.whl; do
for wheel in ./*cffi*.whl; do mv -n -- "${wheel}" "${wheel/x86_64/universal2}"; done mv -n -- "${wheel}" "${wheel/x86_64/universal2}"
python3 -m pip install -U --user ./*cffi*.whl done
python3 -m pip install --force-reinstall -U curl_cffi_universal2/*cffi*.whl
- name: Prepare - name: Prepare
run: | run: |
@ -284,6 +301,7 @@ jobs:
python3 devscripts/make_lazy_extractors.py python3 devscripts/make_lazy_extractors.py
- name: Build - name: Build
run: | run: |
source ~/yt-dlp-build-venv/bin/activate
python3 -m bundle.pyinstaller --target-architecture universal2 --onedir python3 -m bundle.pyinstaller --target-architecture universal2 --onedir
(cd ./dist/yt-dlp_macos && zip -r ../yt-dlp_macos.zip .) (cd ./dist/yt-dlp_macos && zip -r ../yt-dlp_macos.zip .)
python3 -m bundle.pyinstaller --target-architecture universal2 python3 -m bundle.pyinstaller --target-architecture universal2
@ -307,6 +325,24 @@ jobs:
dist/yt-dlp_macos.zip dist/yt-dlp_macos.zip
compression-level: 0 compression-level: 0
- name: Cleanup cache
if: steps.restore-cache.outputs.cache-hit == 'true'
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
cache_key: cache-reqs-${{ github.job }}
repository: ${{ github.repository }}
branch: ${{ github.ref }}
run: |
gh extension install actions/gh-actions-cache
gh actions-cache delete "${cache_key}" -R "${repository}" -B "${branch}" --confirm
- name: Cache requirements
uses: actions/cache/save@v4
with:
path: |
~/yt-dlp-build-venv
key: cache-reqs-${{ github.job }}
macos_legacy: macos_legacy:
needs: process needs: process
if: inputs.macos_legacy if: inputs.macos_legacy
@ -489,6 +525,10 @@ jobs:
# make sure SHA sums are also printed to stdout # make sure SHA sums are also printed to stdout
sha256sum -- * | tee ../SHA2-256SUMS sha256sum -- * | tee ../SHA2-256SUMS
sha512sum -- * | tee ../SHA2-512SUMS sha512sum -- * | tee ../SHA2-512SUMS
# also print as permanent annotations to the summary page
while read -r shasum; do
echo "::notice title=${shasum##* }::sha256: ${shasum% *}"
done < ../SHA2-256SUMS
- name: Make Update spec - name: Make Update spec
run: | run: |

View file

@ -24,6 +24,7 @@ jobs:
source: master source: master
permissions: permissions:
contents: write contents: write
packages: write packages: write # For package cache
actions: write # For cleaning up cache
id-token: write # mandatory for trusted publishing id-token: write # mandatory for trusted publishing
secrets: inherit secrets: inherit

View file

@ -37,6 +37,7 @@ jobs:
source: nightly source: nightly
permissions: permissions:
contents: write contents: write
packages: write packages: write # For package cache
actions: write # For cleaning up cache
id-token: write # mandatory for trusted publishing id-token: write # mandatory for trusted publishing
secrets: inherit secrets: inherit

View file

@ -229,6 +229,7 @@ jobs:
permissions: permissions:
contents: read contents: read
packages: write # For package cache packages: write # For package cache
actions: write # For cleaning up cache
secrets: secrets:
GPG_SIGNING_KEY: ${{ secrets.GPG_SIGNING_KEY }} GPG_SIGNING_KEY: ${{ secrets.GPG_SIGNING_KEY }}

View file

@ -631,3 +631,16 @@ voidful
vtexier vtexier
WyohKnott WyohKnott
trueauracoral trueauracoral
ASertacAkkaya
axpauls
chilinux
hafeoz
JSubelj
jucor
megumintyan
mgedmin
Niluge-KiWi
peisenwang
TheZ3ro
tippfehlr
varunchopra

View file

@ -4,6 +4,87 @@
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
--> -->
### 2024.07.01
#### Important changes
- Security: [[CVE-2024-10123](https://nvd.nist.gov/vuln/detail/CVE-2024-10123)] [Properly sanitize file-extension to prevent file system modification and RCE](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j)
- Unsafe extensions are now blocked from being downloaded
#### Core changes
- [Add `playlist_channel` and `playlist_channel_id` fields](https://github.com/yt-dlp/yt-dlp/commit/55e3e6fd21e741ec5ae3d8624de5e5ea345810eb) ([#10266](https://github.com/yt-dlp/yt-dlp/issues/10266)) by [bashonly](https://github.com/bashonly)
- [Disallow unsafe extensions (CVE-2024-38519)](https://github.com/yt-dlp/yt-dlp/commit/5ce582448ececb8d9c30c8c31f58330090ced03a) by [Grub4K](https://github.com/Grub4K)
- **cookies**: [Fix `--cookies-from-browser` DE detection on Linux](https://github.com/yt-dlp/yt-dlp/commit/a8520244b8642880e4d35925e9e49eff94d548de) ([#10237](https://github.com/yt-dlp/yt-dlp/issues/10237)) by [peisenwang](https://github.com/peisenwang)
#### Extractor changes
- **afreecatv**
- [Support browser impersonation](https://github.com/yt-dlp/yt-dlp/commit/e8352ad6599de7b5371dc39a1a1edc7890aaedb4) ([#10174](https://github.com/yt-dlp/yt-dlp/issues/10174)) by [hui1601](https://github.com/hui1601)
- catchstory: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/054a3ba7d1293f9fbe21800d62d1e5ddcbded238) ([#10235](https://github.com/yt-dlp/yt-dlp/issues/10235)) by [hui1601](https://github.com/hui1601)
- **bilibili**: [Support legacy formats](https://github.com/yt-dlp/yt-dlp/commit/1d6ab17d0752ee9cf19e3e63c7dec7b600d3f228) ([#9117](https://github.com/yt-dlp/yt-dlp/issues/9117)) by [c-basalt](https://github.com/c-basalt), [GD-Slime](https://github.com/GD-Slime)
- **bitchute**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/5b1a2aa978d0074cee278e7659f32f52ecc4ab53) ([#10301](https://github.com/yt-dlp/yt-dlp/issues/10301)) by [seproDev](https://github.com/seproDev)
- **brightcove**: [Upgrade requests to HTTPS](https://github.com/yt-dlp/yt-dlp/commit/90c3721a322756bb7f4ca10ceb73744500bee37e) ([#10202](https://github.com/yt-dlp/yt-dlp/issues/10202)) by [bashonly](https://github.com/bashonly)
- **cloudflarestream**: [Fix `_VALID_URL` and embed extraction](https://github.com/yt-dlp/yt-dlp/commit/7aa322c02cec54eb77154a89da7e400194f0bd03) ([#10215](https://github.com/yt-dlp/yt-dlp/issues/10215)) by [bashonly](https://github.com/bashonly)
- **cloudycdn**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/b758877afa225747fba81c8a580e27583a231734) ([#10271](https://github.com/yt-dlp/yt-dlp/issues/10271)) by [Caesim404](https://github.com/Caesim404)
- **digitalconcerthall**: [Rework extractor](https://github.com/yt-dlp/yt-dlp/commit/2a4f2e82dbeeb0c9130883c83dac689d5260c871) ([#10152](https://github.com/yt-dlp/yt-dlp/issues/10152)) by [seproDev](https://github.com/seproDev), [tippfehlr](https://github.com/tippfehlr)
- **facebook**: reel: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/8ca1d57ed08d00efa117820a5a82f763b20e2d1d) ([#10232](https://github.com/yt-dlp/yt-dlp/issues/10232)) by [bashonly](https://github.com/bashonly)
- **francetv**
- [Detect and raise errors for DRM](https://github.com/yt-dlp/yt-dlp/commit/3690c2f59827c79a1bbe388a7c1ae75db7477db2) ([#10165](https://github.com/yt-dlp/yt-dlp/issues/10165)) by [bashonly](https://github.com/bashonly)
- [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/081708d6074dfbb907e25af61ba530bba0d4b31d) ([#10177](https://github.com/yt-dlp/yt-dlp/issues/10177)) by [bashonly](https://github.com/bashonly)
- **generic**: [Add `key_query` extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/5dbac313ae4e3e8521dfe2e1a6a048a98ff4b4fe) by [bashonly](https://github.com/bashonly)
- **graspop**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/1d369b4096d79233e0ac2c93762746a64d7a69c8) ([#10268](https://github.com/yt-dlp/yt-dlp/issues/10268)) by [Niluge-KiWi](https://github.com/Niluge-KiWi)
- **jiocinema**: series: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/61714f46956f61612032bba857aed7ad1387eccd) ([#10139](https://github.com/yt-dlp/yt-dlp/issues/10139)) by [varunchopra](https://github.com/varunchopra)
- **khanacademy**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/4093eb1fcc29a0e2aea9adfcba479787d9ae0c0c) ([#9136](https://github.com/yt-dlp/yt-dlp/issues/9136)) by [c-basalt](https://github.com/c-basalt)
- **laracasts**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/b8da8a98f897599095d4ef1644b8c5fd39921118) ([#10055](https://github.com/yt-dlp/yt-dlp/issues/10055)) by [ASertacAkkaya](https://github.com/ASertacAkkaya), [seproDev](https://github.com/seproDev)
- **matchtv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/f3411af12e209bc5624e1ac31271b8aabe2d3c90) ([#10190](https://github.com/yt-dlp/yt-dlp/issues/10190)) by [megumintyan](https://github.com/megumintyan)
- **mediasite**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/0953209a857c51648aee89d205c086b0e1dd3864) ([#10273](https://github.com/yt-dlp/yt-dlp/issues/10273)) by [bashonly](https://github.com/bashonly)
- **microsoftembed**: [Add extractors for dev materials](https://github.com/yt-dlp/yt-dlp/commit/9200bc70c94546b2191bb6fbfc9cea98a919cc56) ([#9177](https://github.com/yt-dlp/yt-dlp/issues/9177)) by [c-basalt](https://github.com/c-basalt)
- **mlbtv**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/61edf57f8f13f6dfd81154174e647eb5fdd26089) ([#10296](https://github.com/yt-dlp/yt-dlp/issues/10296)) by [bashonly](https://github.com/bashonly)
- **neteasemusic**: [Extract more formats from new API](https://github.com/yt-dlp/yt-dlp/commit/7a03f88c40b80d3cf54f68edd9d4bdd6aa527570) ([#10258](https://github.com/yt-dlp/yt-dlp/issues/10258)) by [hafeoz](https://github.com/hafeoz)
- **nhkradiru**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/b8e2a5e0e1030076f833917906e19bb6c7b318f6) ([#10106](https://github.com/yt-dlp/yt-dlp/issues/10106)) by [garret1317](https://github.com/garret1317)
- **nuum**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/aefede25561a06cba398d4f593eee2fbe942693b) ([#10316](https://github.com/yt-dlp/yt-dlp/issues/10316)) by [DmitryScaletta](https://github.com/DmitryScaletta)
- **orf**
- on
- [Add `prefer_segments_playlist` extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/e6a22834df1776ec4e486526f6df2bf53cb7e06f) ([#10314](https://github.com/yt-dlp/yt-dlp/issues/10314)) by [seproDev](https://github.com/seproDev)
- [Support segmented episodes](https://github.com/yt-dlp/yt-dlp/commit/8b46ad4d8b8ee8c5472af0cde863baa89ca3f425) ([#10053](https://github.com/yt-dlp/yt-dlp/issues/10053)) by [seproDev](https://github.com/seproDev)
- **patreoncampaign**: [Fix `campaign_id` extraction](https://github.com/yt-dlp/yt-dlp/commit/2e5a47da400b645aadbda6afd1156bd89c744f48) ([#10070](https://github.com/yt-dlp/yt-dlp/issues/10070)) by [bashonly](https://github.com/bashonly)
- **podbayfm**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/d4b52ce3fcb8d9578ed12365648eaba8718c603e) ([#10195](https://github.com/yt-dlp/yt-dlp/issues/10195)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev)
- **pokergo**: [Make metadata extraction non-fatal](https://github.com/yt-dlp/yt-dlp/commit/36e8dd832579b5375a0f6626af4268b86b4eb21a) ([#10319](https://github.com/yt-dlp/yt-dlp/issues/10319)) by [axpauls](https://github.com/axpauls)
- **qqmusic**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/4f5d7be3c5590bb257d8ff521572aee9839ab754) ([#9768](https://github.com/yt-dlp/yt-dlp/issues/9768)) by [c-basalt](https://github.com/c-basalt)
- **rtvslo.si**: show: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/92a1c4abaeeba9a69d611c57b73555cb1a1f00ad) ([#8418](https://github.com/yt-dlp/yt-dlp/issues/8418)) by [JSubelj](https://github.com/JSubelj), [seproDev](https://github.com/seproDev)
- **soundcloud**: [Fix `download` format extraction](https://github.com/yt-dlp/yt-dlp/commit/e53e56b73543799638fa6abb0c78f8b091aa84e1) ([#10125](https://github.com/yt-dlp/yt-dlp/issues/10125)) by [bashonly](https://github.com/bashonly)
- **sproutvideo**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/d6c2c2bc84f1434255be5c73baeb17d893d2c0d4) ([#10098](https://github.com/yt-dlp/yt-dlp/issues/10098)) by [bashonly](https://github.com/bashonly), [TheZ3ro](https://github.com/TheZ3ro)
- **tiktok**
- [Detect and raise when login is required](https://github.com/yt-dlp/yt-dlp/commit/ea88129784fcbb6987161df9ba05909325d8e2e9) ([#10124](https://github.com/yt-dlp/yt-dlp/issues/10124)) by [bashonly](https://github.com/bashonly)
- [Fix API extraction](https://github.com/yt-dlp/yt-dlp/commit/96472d72f29550c25c5dcedcde02c38c192b0011) ([#10216](https://github.com/yt-dlp/yt-dlp/issues/10216)) by [bashonly](https://github.com/bashonly)
- **tubitv**
- [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/bef9a9e5361fd7a72e21d0f1a8c8afb70d89e8c5) ([#9975](https://github.com/yt-dlp/yt-dlp/issues/9975)) by [chilinux](https://github.com/chilinux)
- series: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/d7d861811c15585a4f7ec9d5ae68d2ac28de28a0) ([#10116](https://github.com/yt-dlp/yt-dlp/issues/10116)) by [bashonly](https://github.com/bashonly)
- **vimeo**: [Support browser impersonation](https://github.com/yt-dlp/yt-dlp/commit/d4b99a233314bf31f9c842035ea9884673d5313a) ([#10327](https://github.com/yt-dlp/yt-dlp/issues/10327)) by [bashonly](https://github.com/bashonly)
- **youtube**
- [Extract all formats from multi-language m3u8s](https://github.com/yt-dlp/yt-dlp/commit/9bd85019931927a99b0fe0dc58ac51acca9fbe72) ([#9875](https://github.com/yt-dlp/yt-dlp/issues/9875)) by [bashonly](https://github.com/bashonly), [clienthax](https://github.com/clienthax)
- [Skip formats if nsig decoding fails](https://github.com/yt-dlp/yt-dlp/commit/800ec085ccf98420584d8bb38c20a2c079669b09) ([#10223](https://github.com/yt-dlp/yt-dlp/issues/10223)) by [bashonly](https://github.com/bashonly)
- [Suppress "Unavailable videos are hidden" warning](https://github.com/yt-dlp/yt-dlp/commit/24f3097ea9a470a984d0454dc013cafa2325f5f8) ([#10159](https://github.com/yt-dlp/yt-dlp/issues/10159)) by [mgedmin](https://github.com/mgedmin)
- tab: [Fix channel metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/a0d9967f6822fc279e86bce33464194985148727) ([#10071](https://github.com/yt-dlp/yt-dlp/issues/10071)) by [bashonly](https://github.com/bashonly), [shoxie007](https://github.com/shoxie007)
#### Downloader changes
- **hls**: [Apply `extra_param_to_key_url` from info dict](https://github.com/yt-dlp/yt-dlp/commit/ca8885edd93bdf8912af6c22ee335b6222cb9ba9) by [bashonly](https://github.com/bashonly)
#### Postprocessor changes
- **embedthumbnail**: [Fix postprocessor](https://github.com/yt-dlp/yt-dlp/commit/f2a4ea1794718e4dc0148bc172cb877f1080903b) ([#10248](https://github.com/yt-dlp/yt-dlp/issues/10248)) by [Grub4K](https://github.com/Grub4K)
#### Networking changes
- **Request Handler**: requests: [Bump minimum `requests` version to 2.32.2](https://github.com/yt-dlp/yt-dlp/commit/db50f19d76c6870a5a13d0cab9287d684fd7449a) ([#10079](https://github.com/yt-dlp/yt-dlp/issues/10079)) by [bashonly](https://github.com/bashonly)
#### Misc. changes
- **build**
- [Bump Pyinstaller to `>=6.7.0` for all builds](https://github.com/yt-dlp/yt-dlp/commit/5fdd13006a1c5d78642c8d3c4c7df0448273c2ae) ([#10069](https://github.com/yt-dlp/yt-dlp/issues/10069)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev)
- [Cache dependencies for `macos` job](https://github.com/yt-dlp/yt-dlp/commit/46c1b7cfec1d0e6155083ca7e6948674c64ecb97) ([#10088](https://github.com/yt-dlp/yt-dlp/issues/10088)) by [bashonly](https://github.com/bashonly)
- [Use `macos-12` image for `yt-dlp_macos`](https://github.com/yt-dlp/yt-dlp/commit/03334d639d5282cd4107edb32c623ba400262fc4) ([#10063](https://github.com/yt-dlp/yt-dlp/issues/10063)) by [bashonly](https://github.com/bashonly)
- **cleanup**
- [Add more ruff rules](https://github.com/yt-dlp/yt-dlp/commit/add96eb9f84cfffe85682bf2fb85135746994ee8) ([#10149](https://github.com/yt-dlp/yt-dlp/issues/10149)) by [seproDev](https://github.com/seproDev)
- [Bump ruff to 0.5.x](https://github.com/yt-dlp/yt-dlp/commit/7814c50948a2b9a4c746441ecbc509ae563d5d1f) ([#10282](https://github.com/yt-dlp/yt-dlp/issues/10282)) by [seproDev](https://github.com/seproDev)
- Miscellaneous: [6aaf96a](https://github.com/yt-dlp/yt-dlp/commit/6aaf96a3d6e7d0d426e97e11a2fcf52fda00e733) by [bashonly](https://github.com/bashonly), [c-basalt](https://github.com/c-basalt), [jucor](https://github.com/jucor), [seproDev](https://github.com/seproDev)
- **test**: download: [Raise on network errors](https://github.com/yt-dlp/yt-dlp/commit/54a63e80af82791d2f0985bd0176bb182963fd5f) ([#10283](https://github.com/yt-dlp/yt-dlp/issues/10283)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev)
### 2024.05.27 ### 2024.05.27
#### Extractor changes #### Extractor changes

View file

@ -61,3 +61,10 @@ You can also find lists of all [contributors of yt-dlp](CONTRIBUTORS) and [autho
* Reworked internals like `traverse_obj`, various core refactors and bugs fixes * Reworked internals like `traverse_obj`, various core refactors and bugs fixes
* Implemented proper progress reporting for parallel downloads * Implemented proper progress reporting for parallel downloads
* Improved/fixed/added Bundestag, crunchyroll, pr0gramm, Twitter, WrestleUniverse etc * Improved/fixed/added Bundestag, crunchyroll, pr0gramm, Twitter, WrestleUniverse etc
## [sepro](https://github.com/seproDev)
* UX improvements: Warn when ffmpeg is missing, warn when double-clicking exe
* Code cleanup: Remove dead extractors, mark extractors as broken, enable/apply ruff rules
* Improved/fixed/added ArdMediathek, DRTV, Floatplane, MagentaMusik, Naver, Nebula, OnDemandKorea, Vbox7 etc

View file

@ -141,7 +141,7 @@ You can use `yt-dlp -U` to update if you are using the [release binaries](#relea
If you [installed with pip](https://github.com/yt-dlp/yt-dlp/wiki/Installation#with-pip), simply re-run the same command that was used to install the program If you [installed with pip](https://github.com/yt-dlp/yt-dlp/wiki/Installation#with-pip), simply re-run the same command that was used to install the program
For other third-party package managers, see [the wiki](https://github.com/yt-dlp/yt-dlp/wiki/Installation#third-party-package-managers) or refer their documentation For other third-party package managers, see [the wiki](https://github.com/yt-dlp/yt-dlp/wiki/Installation#third-party-package-managers) or refer to their documentation
<a id="update-channels"></a> <a id="update-channels"></a>
@ -184,7 +184,7 @@ While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly
### Strongly recommended ### Strongly recommended
* [**ffmpeg** and **ffprobe**](https://www.ffmpeg.org) - Required for [merging separate video and audio files](#format-selection) as well as for various [post-processing](#post-processing-options) tasks. License [depends on the build](https://www.ffmpeg.org/legal.html) * [**ffmpeg** and **ffprobe**](https://www.ffmpeg.org) - Required for [merging separate video and audio files](#format-selection), as well as for various [post-processing](#post-processing-options) tasks. License [depends on the build](https://www.ffmpeg.org/legal.html)
There are bugs in ffmpeg that cause various issues when used alongside yt-dlp. Since ffmpeg is such an important dependency, we provide [custom builds](https://github.com/yt-dlp/FFmpeg-Builds#ffmpeg-static-auto-builds) with patches for some of these issues at [yt-dlp/FFmpeg-Builds](https://github.com/yt-dlp/FFmpeg-Builds). See [the readme](https://github.com/yt-dlp/FFmpeg-Builds#patches-applied) for details on the specific issues solved by these builds There are bugs in ffmpeg that cause various issues when used alongside yt-dlp. Since ffmpeg is such an important dependency, we provide [custom builds](https://github.com/yt-dlp/FFmpeg-Builds#ffmpeg-static-auto-builds) with patches for some of these issues at [yt-dlp/FFmpeg-Builds](https://github.com/yt-dlp/FFmpeg-Builds). See [the readme](https://github.com/yt-dlp/FFmpeg-Builds#patches-applied) for details on the specific issues solved by these builds
@ -275,7 +275,7 @@ py -m bundle.py2exe
### Related scripts ### Related scripts
* **`devscripts/install_deps.py`** - Install dependencies for yt-dlp. * **`devscripts/install_deps.py`** - Install dependencies for yt-dlp.
* **`devscripts/update-version.py`** - Update the version number based on current date. * **`devscripts/update-version.py`** - Update the version number based on the current date.
* **`devscripts/set-variant.py`** - Set the build variant of the executable. * **`devscripts/set-variant.py`** - Set the build variant of the executable.
* **`devscripts/make_changelog.py`** - Create a markdown changelog using short commit messages and update `CONTRIBUTORS` file. * **`devscripts/make_changelog.py`** - Create a markdown changelog using short commit messages and update `CONTRIBUTORS` file.
* **`devscripts/make_lazy_extractors.py`** - Create lazy extractors. Running this before building the binaries (any variant) will improve their startup performance. Set the environment variable `YTDLP_NO_LAZY_EXTRACTORS=1` if you wish to forcefully disable lazy extractor loading. * **`devscripts/make_lazy_extractors.py`** - Create lazy extractors. Running this before building the binaries (any variant) will improve their startup performance. Set the environment variable `YTDLP_NO_LAZY_EXTRACTORS=1` if you wish to forcefully disable lazy extractor loading.
@ -456,8 +456,8 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
is not present, and "&" to check multiple is not present, and "&" to check multiple
conditions. Use a "\" to escape "&" or conditions. Use a "\" to escape "&" or
quotes if needed. If used multiple times, quotes if needed. If used multiple times,
the filter matches if atleast one of the the filter matches if at least one of the
conditions are met. E.g. --match-filter conditions is met. E.g. --match-filter
!is_live --match-filter "like_count>?100 & !is_live --match-filter "like_count>?100 &
description~='(?i)\bcats \& dogs\b'" matches description~='(?i)\bcats \& dogs\b'" matches
only videos that are not live OR those that only videos that are not live OR those that
@ -674,7 +674,7 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
PROFILE to load cookies from, and the PROFILE to load cookies from, and the
CONTAINER name (if Firefox) ("none" for no CONTAINER name (if Firefox) ("none" for no
container) can be given with their container) can be given with their
respective seperators. By default, all respective separators. By default, all
containers of the most recently accessed containers of the most recently accessed
profile are used. Currently supported profile are used. Currently supported
keyrings are: basictext, gnomekeyring, keyrings are: basictext, gnomekeyring,
@ -1036,7 +1036,7 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
--print/--output), "before_dl" (before each --print/--output), "before_dl" (before each
video download), "post_process" (after each video download), "post_process" (after each
video download; default), "after_move" video download; default), "after_move"
(after moving video file to it's final (after moving video file to its final
locations), "after_video" (after downloading locations), "after_video" (after downloading
and processing all formats of a video), or and processing all formats of a video), or
"playlist" (at end of playlist). This option "playlist" (at end of playlist). This option
@ -1125,7 +1125,7 @@ You can configure yt-dlp by placing any supported command line option to a confi
* `/etc/yt-dlp/config` * `/etc/yt-dlp/config`
* `/etc/yt-dlp/config.txt` * `/etc/yt-dlp/config.txt`
E.g. with the following configuration file yt-dlp will always extract the audio, not copy the mtime, use a proxy and save all videos under `YouTube` directory in your home directory: E.g. with the following configuration file, yt-dlp will always extract the audio, not copy the mtime, use a proxy and save all videos under `YouTube` directory in your home directory:
``` ```
# Lines starting with # are comments # Lines starting with # are comments
@ -1142,7 +1142,7 @@ E.g. with the following configuration file yt-dlp will always extract the audio,
-o ~/YouTube/%(title)s.%(ext)s -o ~/YouTube/%(title)s.%(ext)s
``` ```
**Note**: Options in configuration file are just the same options aka switches used in regular command line calls; thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`. They must also be quoted when necessary as-if it were a UNIX shell. **Note**: Options in configuration file are just the same options aka switches used in regular command line calls; thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`. They must also be quoted when necessary, as if it were a UNIX shell.
You can use `--ignore-config` if you want to disable all configuration files for a particular yt-dlp run. If `--ignore-config` is found inside any configuration file, no further configuration will be loaded. For example, having the option in the portable configuration file prevents loading of home, user, and system configurations. Additionally, (for backward compatibility) if `--ignore-config` is found inside the system configuration file, the user configuration is not loaded. You can use `--ignore-config` if you want to disable all configuration files for a particular yt-dlp run. If `--ignore-config` is found inside any configuration file, no further configuration will be loaded. For example, having the option in the portable configuration file prevents loading of home, user, and system configurations. Additionally, (for backward compatibility) if `--ignore-config` is found inside the system configuration file, the user configuration is not loaded.
@ -1154,12 +1154,12 @@ If you want your file to be decoded differently, add `# coding: ENCODING` to the
### Authentication with netrc ### Authentication with netrc
You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every yt-dlp execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](https://stackoverflow.com/tags/.netrc/info) on a per-extractor basis. For that you will need to create a `.netrc` file in `--netrc-location` and restrict permissions to read/write by only you: You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every yt-dlp execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](https://stackoverflow.com/tags/.netrc/info) on a per-extractor basis. For that, you will need to create a `.netrc` file in `--netrc-location` and restrict permissions to read/write by only you:
``` ```
touch ${HOME}/.netrc touch ${HOME}/.netrc
chmod a-rwx,u+rw ${HOME}/.netrc chmod a-rwx,u+rw ${HOME}/.netrc
``` ```
After that you can add credentials for an extractor in the following format, where *extractor* is the name of the extractor in lowercase: After that, you can add credentials for an extractor in the following format, where *extractor* is the name of the extractor in lowercase:
``` ```
machine <extractor> login <username> password <password> machine <extractor> login <username> password <password>
``` ```
@ -1201,7 +1201,7 @@ It may however also contain special sequences that will be replaced when downloa
The field names themselves (the part inside the parenthesis) can also have some special formatting: The field names themselves (the part inside the parenthesis) can also have some special formatting:
1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a dot `.` separator; e.g. `%(tags.0)s`, `%(subtitles.en.-1.ext)s`. You can do Python slicing with colon `:`; E.g. `%(id.3:7:-1)s`, `%(formats.:.format_id)s`. Curly braces `{}` can be used to build dictionaries with only specific keys; e.g. `%(formats.:.{format_id,height})#j`. An empty field name `%()s` refers to the entire infodict; e.g. `%(.{id,title})s`. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields 1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a dot `.` separator; e.g. `%(tags.0)s`, `%(subtitles.en.-1.ext)s`. You can do Python slicing with colon `:`; E.g. `%(id.3:7)s`, `%(id.6:2:-1)s`, `%(formats.:.format_id)s`. Curly braces `{}` can be used to build dictionaries with only specific keys; e.g. `%(formats.:.{format_id,height})#j`. An empty field name `%()s` refers to the entire infodict; e.g. `%(.{id,title})s`. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields
1. **Arithmetic**: Simple arithmetic can be done on numeric fields using `+`, `-` and `*`. E.g. `%(playlist_index+10)03d`, `%(n_entries+1-playlist_index)d` 1. **Arithmetic**: Simple arithmetic can be done on numeric fields using `+`, `-` and `*`. E.g. `%(playlist_index+10)03d`, `%(n_entries+1-playlist_index)d`
@ -1282,13 +1282,15 @@ The available fields are:
- `n_entries` (numeric): Total number of extracted items in the playlist - `n_entries` (numeric): Total number of extracted items in the playlist
- `playlist_id` (string): Identifier of the playlist that contains the video - `playlist_id` (string): Identifier of the playlist that contains the video
- `playlist_title` (string): Name of the playlist that contains the video - `playlist_title` (string): Name of the playlist that contains the video
- `playlist` (string): `playlist_id` or `playlist_title` - `playlist` (string): `playlist_title` if available or else `playlist_id`
- `playlist_count` (numeric): Total number of items in the playlist. May not be known if entire playlist is not extracted - `playlist_count` (numeric): Total number of items in the playlist. May not be known if entire playlist is not extracted
- `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according the final index - `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according the final index
- `playlist_autonumber` (numeric): Position of the video in the playlist download queue padded with leading zeros according to the total length of the playlist - `playlist_autonumber` (numeric): Position of the video in the playlist download queue padded with leading zeros according to the total length of the playlist
- `playlist_uploader` (string): Full name of the playlist uploader - `playlist_uploader` (string): Full name of the playlist uploader
- `playlist_uploader_id` (string): Nickname or id of the playlist uploader - `playlist_uploader_id` (string): Nickname or id of the playlist uploader
- `webpage_url` (string): A URL to the video webpage which if given to yt-dlp should allow to get the same result again - `playlist_channel` (string): Display name of the channel that uploaded the playlist
- `playlist_channel_id` (string): Identifier of the channel that uploaded the playlist
- `webpage_url` (string): A URL to the video webpage which, if given to yt-dlp, should yield the same result again
- `webpage_url_basename` (string): The basename of the webpage URL - `webpage_url_basename` (string): The basename of the webpage URL
- `webpage_url_domain` (string): The domain of the webpage URL - `webpage_url_domain` (string): The domain of the webpage URL
- `original_url` (string): The URL given by the user (or same as `webpage_url` for playlist entries) - `original_url` (string): The URL given by the user (or same as `webpage_url` for playlist entries)
@ -1304,10 +1306,10 @@ Available for the video that belongs to some logical chapter or section:
- `chapter_number` (numeric): Number of the chapter the video belongs to - `chapter_number` (numeric): Number of the chapter the video belongs to
- `chapter_id` (string): Id of the chapter the video belongs to - `chapter_id` (string): Id of the chapter the video belongs to
Available for the video that is an episode of some series or programme: Available for the video that is an episode of some series or program:
- `series` (string): Title of the series or programme the video episode belongs to - `series` (string): Title of the series or program the video episode belongs to
- `series_id` (string): Id of the series or programme the video episode belongs to - `series_id` (string): Id of the series or program the video episode belongs to
- `season` (string): Title of the season the video episode belongs to - `season` (string): Title of the season the video episode belongs to
- `season_number` (numeric): Number of the season the video episode belongs to - `season_number` (numeric): Number of the season the video episode belongs to
- `season_id` (string): Id of the season the video episode belongs to - `season_id` (string): Id of the season the video episode belongs to
@ -1364,7 +1366,7 @@ Available only in `--sponsorblock-chapter-title`:
Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. E.g. for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `yt-dlp test video` and id `BaW_jenozKc`, this will result in a `yt-dlp test video-BaW_jenozKc.mp4` file created in the current directory. Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. E.g. for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `yt-dlp test video` and id `BaW_jenozKc`, this will result in a `yt-dlp test video-BaW_jenozKc.mp4` file created in the current directory.
**Note**: Some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default). **Note**: Some of the sequences are not guaranteed to be present, since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default).
**Tip**: Look at the `-j` output to identify which fields are available for the particular URL **Tip**: Look at the `-j` output to identify which fields are available for the particular URL
@ -1442,7 +1444,7 @@ You can also use special names to select particular edge case formats:
- `all`: Select **all formats** separately - `all`: Select **all formats** separately
- `mergeall`: Select and **merge all formats** (Must be used with `--audio-multistreams`, `--video-multistreams` or both) - `mergeall`: Select and **merge all formats** (Must be used with `--audio-multistreams`, `--video-multistreams` or both)
- `b*`, `best*`: Select the best quality format that **contains either** a video or an audio or both (ie; `vcodec!=none or acodec!=none`) - `b*`, `best*`: Select the best quality format that **contains either** a video or an audio or both (i.e.; `vcodec!=none or acodec!=none`)
- `b`, `best`: Select the best quality format that **contains both** video and audio. Equivalent to `best*[vcodec!=none][acodec!=none]` - `b`, `best`: Select the best quality format that **contains both** video and audio. Equivalent to `best*[vcodec!=none][acodec!=none]`
- `bv`, `bestvideo`: Select the best quality **video-only** format. Equivalent to `best*[acodec=none]` - `bv`, `bestvideo`: Select the best quality **video-only** format. Equivalent to `best*[acodec=none]`
- `bv*`, `bestvideo*`: Select the best quality format that **contains video**. It may also contain audio. Equivalent to `best*[vcodec!=none]` - `bv*`, `bestvideo*`: Select the best quality format that **contains video**. It may also contain audio. Equivalent to `best*[vcodec!=none]`
@ -1455,7 +1457,7 @@ You can also use special names to select particular edge case formats:
- `wa`, `worstaudio`: Select the worst quality audio-only format. Equivalent to `worst*[vcodec=none]` - `wa`, `worstaudio`: Select the worst quality audio-only format. Equivalent to `worst*[vcodec=none]`
- `wa*`, `worstaudio*`: Select the worst quality format that contains audio. It may also contain video. Equivalent to `worst*[acodec!=none]` - `wa*`, `worstaudio*`: Select the worst quality format that contains audio. It may also contain video. Equivalent to `worst*[acodec!=none]`
For example, to download the worst quality video-only format you can use `-f worstvideo`. It is however recommended not to use `worst` and related options. When your format selector is `worst`, the format which is worst in all respects is selected. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-S +size` or more rigorously, `-S +size,+br,+res,+fps` instead of `-f worst`. See [Sorting Formats](#sorting-formats) for more details. For example, to download the worst quality video-only format you can use `-f worstvideo`. It is, however, recommended not to use `worst` and related options. When your format selector is `worst`, the format which is worst in all respects is selected. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-S +size` or more rigorously, `-S +size,+br,+res,+fps` instead of `-f worst`. See [Sorting Formats](#sorting-formats) for more details.
You can select the n'th best format of a type by using `best<type>.<n>`. For example, `best.2` will select the 2nd best combined format. Similarly, `bv*.3` will select the 3rd best format that contains a video stream. You can select the n'th best format of a type by using `best<type>.<n>`. For example, `best.2` will select the 2nd best combined format. Similarly, `bv*.3` will select the 3rd best format that contains a video stream.
@ -1505,7 +1507,7 @@ Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends
Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain). The comparand of a string comparison needs to be quoted with either double or single quotes if it contains spaces or special characters other than `._-`. Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain). The comparand of a string comparison needs to be quoted with either double or single quotes if it contains spaces or special characters other than `._-`.
**Note**: None of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the website. Any other field made available by the extractor can also be used for filtering. **Note**: None of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by the particular extractor, i.e. the metadata offered by the website. Any other field made available by the extractor can also be used for filtering.
Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "bv[height<=?720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 kbps. You can also use the filters with `all` to download all formats that satisfy the filter, e.g. `-f "all[vcodec=none]"` selects all audio-only formats. Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "bv[height<=?720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 kbps. You can also use the filters with `all` to download all formats that satisfy the filter, e.g. `-f "all[vcodec=none]"` selects all audio-only formats.
@ -1549,9 +1551,9 @@ The available fields are:
All fields, unless specified otherwise, are sorted in descending order. To reverse this, prefix the field with a `+`. E.g. `+res` prefers format with the smallest resolution. Additionally, you can suffix a preferred value for the fields, separated by a `:`. E.g. `res:720` prefers larger videos, but no larger than 720p and the smallest video if there are no videos less than 720p. For `codec` and `ext`, you can provide two preferred values, the first for video and the second for audio. E.g. `+codec:avc:m4a` (equivalent to `+vcodec:avc,+acodec:m4a`) sets the video codec preference to `h264` > `h265` > `vp9` > `vp9.2` > `av01` > `vp8` > `h263` > `theora` and audio codec preference to `mp4a` > `aac` > `vorbis` > `opus` > `mp3` > `ac3` > `dts`. You can also make the sorting prefer the nearest values to the provided by using `~` as the delimiter. E.g. `filesize~1G` prefers the format with filesize closest to 1 GiB. All fields, unless specified otherwise, are sorted in descending order. To reverse this, prefix the field with a `+`. E.g. `+res` prefers format with the smallest resolution. Additionally, you can suffix a preferred value for the fields, separated by a `:`. E.g. `res:720` prefers larger videos, but no larger than 720p and the smallest video if there are no videos less than 720p. For `codec` and `ext`, you can provide two preferred values, the first for video and the second for audio. E.g. `+codec:avc:m4a` (equivalent to `+vcodec:avc,+acodec:m4a`) sets the video codec preference to `h264` > `h265` > `vp9` > `vp9.2` > `av01` > `vp8` > `h263` > `theora` and audio codec preference to `mp4a` > `aac` > `vorbis` > `opus` > `mp3` > `ac3` > `dts`. You can also make the sorting prefer the nearest values to the provided by using `~` as the delimiter. E.g. `filesize~1G` prefers the format with filesize closest to 1 GiB.
The fields `hasvid` and `ie_pref` are always given highest priority in sorting, irrespective of the user-defined order. This behaviour can be changed by using `--format-sort-force`. Apart from these, the default order used is: `lang,quality,res,fps,hdr:12,vcodec:vp9.2,channels,acodec,size,br,asr,proto,ext,hasaud,source,id`. The extractors may override this default order, but they cannot override the user-provided order. The fields `hasvid` and `ie_pref` are always given highest priority in sorting, irrespective of the user-defined order. This behavior can be changed by using `--format-sort-force`. Apart from these, the default order used is: `lang,quality,res,fps,hdr:12,vcodec:vp9.2,channels,acodec,size,br,asr,proto,ext,hasaud,source,id`. The extractors may override this default order, but they cannot override the user-provided order.
Note that the default has `vcodec:vp9.2`; i.e. `av1` is not preferred. Similarly, the default for hdr is `hdr:12`; i.e. dolby vision is not preferred. These choices are made since DV and AV1 formats are not yet fully compatible with most devices. This may be changed in the future as more devices become capable of smoothly playing back these formats. Note that the default has `vcodec:vp9.2`; i.e. `av1` is not preferred. Similarly, the default for hdr is `hdr:12`; i.e. Dolby Vision is not preferred. These choices are made since DV and AV1 formats are not yet fully compatible with most devices. This may be changed in the future as more devices become capable of smoothly playing back these formats.
If your format selector is `worst`, the last item is selected after sorting. This means it will select the format that is worst in all respects. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-f best -S +size,+br,+res,+fps`. If your format selector is `worst`, the last item is selected after sorting. This means it will select the format that is worst in all respects. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-f best -S +size,+br,+res,+fps`.
@ -1763,7 +1765,7 @@ The following extractors use this feature:
#### youtube #### youtube
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes * `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively * `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb`, `mweb_embedscreen` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. The `android` clients will always be given lowest priority since their formats are broken. You can use `all` to use all the clients, and `default` for the default clients. * `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mediaconnect`, `mweb`, `mweb_embedscreen` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. The `android` clients will always be given lowest priority since their formats are broken. You can use `all` to use all the clients, and `default` for the default clients.
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp. * `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
@ -1779,8 +1781,9 @@ The following extractors use this feature:
* `approximate_date`: Extract approximate `upload_date` and `timestamp` in flat-playlist. This may cause date-based filters to be slightly off * `approximate_date`: Extract approximate `upload_date` and `timestamp` in flat-playlist. This may cause date-based filters to be slightly off
#### generic #### generic
* `fragment_query`: Passthrough any query in mpd/m3u8 manifest URLs to their fragments if no value is provided, or else apply the query string given as `fragment_query=VALUE`. Does not apply to ffmpeg * `fragment_query`: Passthrough any query in mpd/m3u8 manifest URLs to their fragments if no value is provided, or else apply the query string given as `fragment_query=VALUE`. Note that if the stream has an HLS AES-128 key, then the query parameters will be passed to the key URI as well, unless the `key_query` extractor-arg is passed, or unless an external key URI is provided via the `hls_key` extractor-arg. Does not apply to ffmpeg
* `variant_query`: Passthrough the master m3u8 URL query to its variant playlist URLs if no value is provided, or else apply the query string given as `variant_query=VALUE` * `variant_query`: Passthrough the master m3u8 URL query to its variant playlist URLs if no value is provided, or else apply the query string given as `variant_query=VALUE`
* `key_query`: Passthrough the master m3u8 URL query to its HLS AES-128 decryption key URI if no value is provided, or else apply the query string given as `key_query=VALUE`. Note that this will have no effect if the key URI is provided via the `hls_key` extractor-arg. Does not apply to ffmpeg
* `hls_key`: An HLS AES-128 key URI *or* key (as hex), and optionally the IV (as hex), in the form of `(URI|KEY)[,IV]`; e.g. `generic:hls_key=ABCDEF1234567980,0xFEDCBA0987654321`. Passing any of these values will force usage of the native HLS downloader and override the corresponding values found in the m3u8 playlist * `hls_key`: An HLS AES-128 key URI *or* key (as hex), and optionally the IV (as hex), in the form of `(URI|KEY)[,IV]`; e.g. `generic:hls_key=ABCDEF1234567980,0xFEDCBA0987654321`. Passing any of these values will force usage of the native HLS downloader and override the corresponding values found in the m3u8 playlist
* `is_live`: Bypass live HLS detection and manually set `live_status` - a value of `false` will set `not_live`, any other value (or no value) will set `is_live` * `is_live`: Bypass live HLS detection and manually set `live_status` - a value of `false` will set `not_live`, any other value (or no value) will set `is_live`
@ -1848,7 +1851,13 @@ The following extractors use this feature:
* `cdn`: One or more CDN IDs to use with the API call for stream URLs, e.g. `gcp_cdn`, `gs_cdn_pc_app`, `gs_cdn_mobile_web`, `gs_cdn_pc_web` * `cdn`: One or more CDN IDs to use with the API call for stream URLs, e.g. `gcp_cdn`, `gs_cdn_pc_app`, `gs_cdn_mobile_web`, `gs_cdn_pc_web`
#### soundcloud #### soundcloud
* `formats`: Formats to request from the API. Requested values should be in the format of `{protocol}_{extension}` (omitting the bitrate), e.g. `hls_opus,http_aac`. The `*` character functions as a wildcard, e.g. `*_mp3`, and can passed by itself to request all formats. Known protocols include `http`, `hls` and `hls-aes`; known extensions include `aac`, `opus` and `mp3`. Original `download` formats are always extracted. Default is `http_aac,hls_aac,http_opus,hls_opus,http_mp3,hls_mp3` * `formats`: Formats to request from the API. Requested values should be in the format of `{protocol}_{extension}` (omitting the bitrate), e.g. `hls_opus,http_aac`. The `*` character functions as a wildcard, e.g. `*_mp3`, and can be passed by itself to request all formats. Known protocols include `http`, `hls` and `hls-aes`; known extensions include `aac`, `opus` and `mp3`. Original `download` formats are always extracted. Default is `http_aac,hls_aac,http_opus,hls_opus,http_mp3,hls_mp3`
#### orfon (orf:on)
* `prefer_segments_playlist`: Prefer a playlist of program segments instead of a single complete video when available. If individual segments are desired, use `--concat-playlist never --extractor-args "orfon:prefer_segments_playlist"`
#### bilibili
* `prefer_multi_flv`: Prefer extracting flv formats over mp4 for older videos that still provide legacy formats
**Note**: These options may be changed/removed in the future without concern for backward compatibility **Note**: These options may be changed/removed in the future without concern for backward compatibility
@ -1861,7 +1870,7 @@ Note that **all** plugins are imported even if not invoked, and that **there are
Plugins can be of `<type>`s `extractor` or `postprocessor`. Plugins can be of `<type>`s `extractor` or `postprocessor`.
- Extractor plugins do not need to be enabled from the CLI and are automatically invoked when the input URL is suitable for it. - Extractor plugins do not need to be enabled from the CLI and are automatically invoked when the input URL is suitable for it.
- Extractor plugins take priority over builtin extractors. - Extractor plugins take priority over built-in extractors.
- Postprocessor plugins can be invoked using `--use-postprocessor NAME`. - Postprocessor plugins can be invoked using `--use-postprocessor NAME`.
@ -1916,7 +1925,7 @@ Run yt-dlp with `--verbose` to check if the plugin has been loaded.
See the [yt-dlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) repo for a template plugin package and the [Plugin Development](https://github.com/yt-dlp/yt-dlp/wiki/Plugin-Development) section of the wiki for a plugin development guide. See the [yt-dlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) repo for a template plugin package and the [Plugin Development](https://github.com/yt-dlp/yt-dlp/wiki/Plugin-Development) section of the wiki for a plugin development guide.
All public classes with a name ending in `IE`/`PP` are imported from each file for extractors and postprocessors repectively. This respects underscore prefix (e.g. `_MyBasePluginIE` is private) and `__all__`. Modules can similarly be excluded by prefixing the module name with an underscore (e.g. `_myplugin.py`). All public classes with a name ending in `IE`/`PP` are imported from each file for extractors and postprocessors respectively. This respects underscore prefix (e.g. `_MyBasePluginIE` is private) and `__all__`. Modules can similarly be excluded by prefixing the module name with an underscore (e.g. `_myplugin.py`).
To replace an existing extractor with a subclass of one, set the `plugin_name` class keyword argument (e.g. `class MyPluginIE(ABuiltInIE, plugin_name='myplugin')` will replace `ABuiltInIE` with `MyPluginIE`). Since the extractor replaces the parent, you should exclude the subclass extractor from being imported separately by making it private using one of the methods described above. To replace an existing extractor with a subclass of one, set the `plugin_name` class keyword argument (e.g. `class MyPluginIE(ABuiltInIE, plugin_name='myplugin')` will replace `ABuiltInIE` with `MyPluginIE`). Since the extractor replaces the parent, you should exclude the subclass extractor from being imported separately by making it private using one of the methods described above.
@ -1928,7 +1937,7 @@ See the [Developer Instructions](https://github.com/yt-dlp/yt-dlp/blob/master/CO
yt-dlp makes the best effort to be a good command-line program, and thus should be callable from any programming language. yt-dlp makes the best effort to be a good command-line program, and thus should be callable from any programming language.
Your program should avoid parsing the normal stdout since they may change in future versions. Instead they should use options such as `-J`, `--print`, `--progress-template`, `--exec` etc to create console output that you can reliably reproduce and parse. Your program should avoid parsing the normal stdout since they may change in future versions. Instead, they should use options such as `-J`, `--print`, `--progress-template`, `--exec` etc to create console output that you can reliably reproduce and parse.
From a Python program, you can embed yt-dlp in a more powerful fashion, like this: From a Python program, you can embed yt-dlp in a more powerful fashion, like this:
@ -2220,6 +2229,14 @@ For ease of use, a few more compat options are available:
* `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler,manifest-filesize-approx` * `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler,manifest-filesize-approx`
* `--compat-options 2023`: Currently does nothing. Use this to enable all future compat options * `--compat-options 2023`: Currently does nothing. Use this to enable all future compat options
The following compat options restore vulnerable behavior from before security patches:
* `--compat-options allow-unsafe-ext`: Allow files with any extension (including unsafe ones) to be downloaded ([GHSA-79w7-vh3h-8g4j](<https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j>))
> :warning: Only use if a valid file download is rejected because its extension is detected as uncommon
>
> **This option can enable remote code execution! Consider [opening an issue](<https://github.com/yt-dlp/yt-dlp/issues/new/choose>) instead!**
### Deprecated options ### Deprecated options
These are all the deprecated options and the current alternative to achieve the same effect These are all the deprecated options and the current alternative to achieve the same effect

View file

@ -169,5 +169,16 @@
"when": "5c019f6328ad40d66561eac3c4de0b3cd070d0f6", "when": "5c019f6328ad40d66561eac3c4de0b3cd070d0f6",
"short": "[cleanup] Misc (#9765)", "short": "[cleanup] Misc (#9765)",
"authors": ["bashonly", "Grub4K", "seproDev"] "authors": ["bashonly", "Grub4K", "seproDev"]
},
{
"action": "change",
"when": "e6a22834df1776ec4e486526f6df2bf53cb7e06f",
"short": "[ie/orf:on] Add `prefer_segments_playlist` extractor-arg (#10314)",
"authors": ["seproDev"]
},
{
"action": "add",
"when": "6aaf96a3d6e7d0d426e97e11a2fcf52fda00e733",
"short": "[priority] Security: [[CVE-2024-10123](https://nvd.nist.gov/vuln/detail/CVE-2024-10123)] [Properly sanitize file-extension to prevent file system modification and RCE](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j)\n - Unsafe extensions are now blocked from being downloaded"
} }
] ]

2
devscripts/cli_to_api.py Normal file → Executable file
View file

@ -1,3 +1,5 @@
#!/usr/bin/env python3
# Allow direct execution # Allow direct execution
import os import os
import sys import sys

View file

@ -72,7 +72,7 @@ dev = [
] ]
static-analysis = [ static-analysis = [
"autopep8~=2.0", "autopep8~=2.0",
"ruff~=0.4.4", "ruff~=0.5.0",
] ]
test = [ test = [
"pytest~=8.1", "pytest~=8.1",
@ -211,6 +211,7 @@ ignore = [
"TD002", # missing-todo-author "TD002", # missing-todo-author
"TD003", # missing-todo-link "TD003", # missing-todo-link
"PLE0604", # invalid-all-object (false positives) "PLE0604", # invalid-all-object (false positives)
"PLE0643", # potential-index-error (false positives)
"PLW0603", # global-statement "PLW0603", # global-statement
"PLW1510", # subprocess-run-without-check "PLW1510", # subprocess-run-without-check
"PLW2901", # redefined-loop-name "PLW2901", # redefined-loop-name
@ -298,7 +299,7 @@ banned-from = [
"string", "string",
"sys", "sys",
"time", "time",
"urllib", "urllib.parse",
"uuid", "uuid",
"xml", "xml",
] ]

View file

@ -46,6 +46,7 @@
- **aenetworks:show** - **aenetworks:show**
- **AeonCo** - **AeonCo**
- **afreecatv**: [*afreecatv*](## "netrc machine") afreecatv.com - **afreecatv**: [*afreecatv*](## "netrc machine") afreecatv.com
- **afreecatv:catchstory**: [*afreecatv*](## "netrc machine") afreecatv.com catch story
- **afreecatv:live**: [*afreecatv*](## "netrc machine") afreecatv.com livestreams - **afreecatv:live**: [*afreecatv*](## "netrc machine") afreecatv.com livestreams
- **afreecatv:user** - **afreecatv:user**
- **AirTV** - **AirTV**
@ -542,6 +543,7 @@
- **Goshgay** - **Goshgay**
- **GoToStage** - **GoToStage**
- **GPUTechConf** - **GPUTechConf**
- **Graspop**
- **Gronkh** - **Gronkh**
- **gronkh:feed** - **gronkh:feed**
- **gronkh:vods** - **gronkh:vods**
@ -678,6 +680,8 @@
- **la7.it** - **la7.it**
- **la7.it:pod:episode** - **la7.it:pod:episode**
- **la7.it:podcast** - **la7.it:podcast**
- **laracasts**
- **laracasts:series**
- **LastFM** - **LastFM**
- **LastFMPlaylist** - **LastFMPlaylist**
- **LastFMUser** - **LastFMUser**
@ -775,7 +779,12 @@
- **MelonVOD** - **MelonVOD**
- **Metacritic** - **Metacritic**
- **mewatch** - **mewatch**
- **MicrosoftBuild**
- **MicrosoftEmbed** - **MicrosoftEmbed**
- **MicrosoftLearnEpisode**
- **MicrosoftLearnPlaylist**
- **MicrosoftLearnSession**
- **MicrosoftMedius**
- **microsoftstream**: Microsoft Stream - **microsoftstream**: Microsoft Stream
- **mildom**: Record ongoing live by specific user in Mildom - **mildom**: Record ongoing live by specific user in Mildom
- **mildom:clip**: Clip in Mildom - **mildom:clip**: Clip in Mildom
@ -838,8 +847,6 @@
- **MusicdexArtist** - **MusicdexArtist**
- **MusicdexPlaylist** - **MusicdexPlaylist**
- **MusicdexSong** - **MusicdexSong**
- **mva**: Microsoft Virtual Academy videos
- **mva:course**: Microsoft Virtual Academy courses
- **Mx3** - **Mx3**
- **Mx3Neo** - **Mx3Neo**
- **Mx3Volksmusik** - **Mx3Volksmusik**
@ -1131,6 +1138,7 @@
- **QingTing** - **QingTing**
- **qqmusic**: QQ音乐 - **qqmusic**: QQ音乐
- **qqmusic:album**: QQ音乐 - 专辑 - **qqmusic:album**: QQ音乐 - 专辑
- **qqmusic:mv**: QQ音乐 - MV
- **qqmusic:playlist**: QQ音乐 - 歌单 - **qqmusic:playlist**: QQ音乐 - 歌单
- **qqmusic:singer**: QQ音乐 - 歌手 - **qqmusic:singer**: QQ音乐 - 歌手
- **qqmusic:toplist**: QQ音乐 - 排行榜 - **qqmusic:toplist**: QQ音乐 - 排行榜
@ -1237,6 +1245,7 @@
- **rtve.es:television** - **rtve.es:television**
- **RTVS** - **RTVS**
- **rtvslo.si** - **rtvslo.si**
- **rtvslo.si:show**
- **RudoVideo** - **RudoVideo**
- **Rule34Video** - **Rule34Video**
- **Rumble** - **Rumble**
@ -1360,6 +1369,7 @@
- **SpreakerShowPage** - **SpreakerShowPage**
- **SpringboardPlatform** - **SpringboardPlatform**
- **Sprout** - **Sprout**
- **SproutVideo**
- **sr:mediathek**: Saarländischer Rundfunk (**Currently broken**) - **sr:mediathek**: Saarländischer Rundfunk (**Currently broken**)
- **SRGSSR** - **SRGSSR**
- **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites - **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites
@ -1494,8 +1504,8 @@
- **Tube8**: (**Currently broken**) - **Tube8**: (**Currently broken**)
- **TubeTuGraz**: [*tubetugraz*](## "netrc machine") tube.tugraz.at - **TubeTuGraz**: [*tubetugraz*](## "netrc machine") tube.tugraz.at
- **TubeTuGrazSeries**: [*tubetugraz*](## "netrc machine") - **TubeTuGrazSeries**: [*tubetugraz*](## "netrc machine")
- **TubiTv**: [*tubitv*](## "netrc machine") - **tubitv**: [*tubitv*](## "netrc machine")
- **TubiTvShow** - **tubitv:series**
- **Tumblr**: [*tumblr*](## "netrc machine") - **Tumblr**: [*tumblr*](## "netrc machine")
- **TuneInPodcast** - **TuneInPodcast**
- **TuneInPodcastEpisode** - **TuneInPodcastEpisode**
@ -1607,6 +1617,7 @@
- **VidioPremier**: [*vidio*](## "netrc machine") - **VidioPremier**: [*vidio*](## "netrc machine")
- **VidLii** - **VidLii**
- **Vidly** - **Vidly**
- **vids.io**
- **viewlift** - **viewlift**
- **viewlift:embed** - **viewlift:embed**
- **Viidea** - **Viidea**

View file

@ -67,6 +67,7 @@ class TestCookies(unittest.TestCase):
({'XDG_CURRENT_DESKTOP': 'GNOME'}, _LinuxDesktopEnvironment.GNOME), ({'XDG_CURRENT_DESKTOP': 'GNOME'}, _LinuxDesktopEnvironment.GNOME),
({'XDG_CURRENT_DESKTOP': 'GNOME:GNOME-Classic'}, _LinuxDesktopEnvironment.GNOME), ({'XDG_CURRENT_DESKTOP': 'GNOME:GNOME-Classic'}, _LinuxDesktopEnvironment.GNOME),
({'XDG_CURRENT_DESKTOP': 'GNOME : GNOME-Classic'}, _LinuxDesktopEnvironment.GNOME), ({'XDG_CURRENT_DESKTOP': 'GNOME : GNOME-Classic'}, _LinuxDesktopEnvironment.GNOME),
({'XDG_CURRENT_DESKTOP': 'ubuntu:GNOME'}, _LinuxDesktopEnvironment.GNOME),
({'XDG_CURRENT_DESKTOP': 'Unity', 'DESKTOP_SESSION': 'gnome-fallback'}, _LinuxDesktopEnvironment.GNOME), ({'XDG_CURRENT_DESKTOP': 'Unity', 'DESKTOP_SESSION': 'gnome-fallback'}, _LinuxDesktopEnvironment.GNOME),
({'XDG_CURRENT_DESKTOP': 'KDE', 'KDE_SESSION_VERSION': '5'}, _LinuxDesktopEnvironment.KDE5), ({'XDG_CURRENT_DESKTOP': 'KDE', 'KDE_SESSION_VERSION': '5'}, _LinuxDesktopEnvironment.KDE5),

View file

@ -20,7 +20,6 @@ from test.helper import (
gettestcases, gettestcases,
getwebpagetestcases, getwebpagetestcases,
is_download_test, is_download_test,
report_warning,
try_rm, try_rm,
) )
@ -178,8 +177,7 @@ def generator(test_case, tname):
raise raise
if try_num == RETRIES: if try_num == RETRIES:
report_warning(f'{tname} failed due to network errors, skipping...') raise
return
print(f'Retrying: {try_num} failed tries\n\n##########\n\n') print(f'Retrying: {try_num} failed tries\n\n##########\n\n')

View file

@ -92,6 +92,7 @@ class TestJSInterpreter(unittest.TestCase):
self._test('function f(){return 0 && 1 || 2;}', 2) self._test('function f(){return 0 && 1 || 2;}', 2)
self._test('function f(){return 0 ?? 42;}', 0) self._test('function f(){return 0 ?? 42;}', 0)
self._test('function f(){return "life, the universe and everything" < 42;}', False) self._test('function f(){return "life, the universe and everything" < 42;}', False)
self._test('function f(){return 0 - 7 * - 6;}', 42)
def test_array_access(self): def test_array_access(self):
self._test('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}', [5, 2, 7]) self._test('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}', [5, 2, 7])

View file

@ -130,6 +130,7 @@ from yt_dlp.utils import (
xpath_text, xpath_text,
xpath_with_ns, xpath_with_ns,
) )
from yt_dlp.utils._utils import _UnsafeExtensionError
from yt_dlp.utils.networking import ( from yt_dlp.utils.networking import (
HTTPHeaderDict, HTTPHeaderDict,
escape_rfc3986, escape_rfc3986,
@ -281,6 +282,13 @@ class TestUtil(unittest.TestCase):
finally: finally:
os.environ['HOME'] = old_home or '' os.environ['HOME'] = old_home or ''
_uncommon_extensions = [
('exe', 'abc.exe.ext'),
('de', 'abc.de.ext'),
('../.mp4', None),
('..\\.mp4', None),
]
def test_prepend_extension(self): def test_prepend_extension(self):
self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext') self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext')
self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext') self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext')
@ -289,6 +297,19 @@ class TestUtil(unittest.TestCase):
self.assertEqual(prepend_extension('.abc', 'temp'), '.abc.temp') self.assertEqual(prepend_extension('.abc', 'temp'), '.abc.temp')
self.assertEqual(prepend_extension('.abc.ext', 'temp'), '.abc.temp.ext') self.assertEqual(prepend_extension('.abc.ext', 'temp'), '.abc.temp.ext')
# Test uncommon extensions
self.assertEqual(prepend_extension('abc.ext', 'bin'), 'abc.bin.ext')
for ext, result in self._uncommon_extensions:
with self.assertRaises(_UnsafeExtensionError):
prepend_extension('abc', ext)
if result:
self.assertEqual(prepend_extension('abc.ext', ext, 'ext'), result)
else:
with self.assertRaises(_UnsafeExtensionError):
prepend_extension('abc.ext', ext, 'ext')
with self.assertRaises(_UnsafeExtensionError):
prepend_extension('abc.unexpected_ext', ext, 'ext')
def test_replace_extension(self): def test_replace_extension(self):
self.assertEqual(replace_extension('abc.ext', 'temp'), 'abc.temp') self.assertEqual(replace_extension('abc.ext', 'temp'), 'abc.temp')
self.assertEqual(replace_extension('abc.ext', 'temp', 'ext'), 'abc.temp') self.assertEqual(replace_extension('abc.ext', 'temp', 'ext'), 'abc.temp')
@ -297,6 +318,16 @@ class TestUtil(unittest.TestCase):
self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp') self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp') self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
# Test uncommon extensions
self.assertEqual(replace_extension('abc.ext', 'bin'), 'abc.unknown_video')
for ext, _ in self._uncommon_extensions:
with self.assertRaises(_UnsafeExtensionError):
replace_extension('abc', ext)
with self.assertRaises(_UnsafeExtensionError):
replace_extension('abc.ext', ext, 'ext')
with self.assertRaises(_UnsafeExtensionError):
replace_extension('abc.unexpected_ext', ext, 'ext')
def test_subtitles_filename(self): def test_subtitles_filename(self):
self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt'), 'abc.en.vtt') self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt'), 'abc.en.vtt')
self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt', 'ext'), 'abc.en.vtt') self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt', 'ext'), 'abc.en.vtt')

View file

@ -163,6 +163,10 @@ _NSIG_TESTS = [
'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js', 'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js',
'_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ', '_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ',
), ),
(
'https://www.youtube.com/s/player/590f65a6/player_ias.vflset/en_US/base.js',
'1tm7-g_A9zsI8_Lay_', 'xI4Vem4Put_rOg',
),
] ]

View file

@ -4,6 +4,7 @@ import copy
import datetime as dt import datetime as dt
import errno import errno
import fileinput import fileinput
import functools
import http.cookiejar import http.cookiejar
import io import io
import itertools import itertools
@ -24,7 +25,7 @@ import traceback
import unicodedata import unicodedata
from .cache import Cache from .cache import Cache
from .compat import functools, urllib # isort: split from .compat import urllib # isort: split
from .compat import compat_os_name, urllib_req_to_req from .compat import compat_os_name, urllib_req_to_req
from .cookies import LenientSimpleCookie, load_cookies from .cookies import LenientSimpleCookie, load_cookies
from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
@ -158,7 +159,7 @@ from .utils import (
write_json_file, write_json_file,
write_string, write_string,
) )
from .utils._utils import _YDLLogger from .utils._utils import _UnsafeExtensionError, _YDLLogger
from .utils.networking import ( from .utils.networking import (
HTTPHeaderDict, HTTPHeaderDict,
clean_headers, clean_headers,
@ -171,6 +172,20 @@ if compat_os_name == 'nt':
import ctypes import ctypes
def _catch_unsafe_extension_error(func):
@functools.wraps(func)
def wrapper(self, *args, **kwargs):
try:
return func(self, *args, **kwargs)
except _UnsafeExtensionError as error:
self.report_error(
f'The extracted extension ({error.extension!r}) is unusual '
'and will be skipped for safety reasons. '
f'If you believe this is an error{bug_reports_message(",")}')
return wrapper
class YoutubeDL: class YoutubeDL:
"""YoutubeDL class. """YoutubeDL class.
@ -453,8 +468,9 @@ class YoutubeDL:
Set the value to 'native' to use the native downloader Set the value to 'native' to use the native downloader
compat_opts: Compatibility options. See "Differences in default behavior". compat_opts: Compatibility options. See "Differences in default behavior".
The following options do not work when used through the API: The following options do not work when used through the API:
filename, abort-on-error, multistreams, no-live-chat, format-sort filename, abort-on-error, multistreams, no-live-chat,
no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json. format-sort, no-clean-infojson, no-playlist-metafiles,
no-keep-subs, no-attach-info-json, allow-unsafe-ext.
Refer __init__.py for their implementation Refer __init__.py for their implementation
progress_template: Dictionary of templates for progress outputs. progress_template: Dictionary of templates for progress outputs.
Allowed keys are 'download', 'postprocess', Allowed keys are 'download', 'postprocess',
@ -581,8 +597,9 @@ class YoutubeDL:
'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns', 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start', 'is_dash_periods', 'request_data', 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start', 'is_dash_periods', 'request_data',
'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies', 'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies',
'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options', 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'extra_param_to_key_url',
'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time', 'hls_aes', 'downloader_options', 'page_url', 'app', 'play_path', 'tc_url', 'flash_version',
'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time',
} }
_deprecated_multivalue_fields = { _deprecated_multivalue_fields = {
'album_artist': 'album_artists', 'album_artist': 'album_artists',
@ -1398,6 +1415,7 @@ class YoutubeDL:
outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs) outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
return self.escape_outtmpl(outtmpl) % info_dict return self.escape_outtmpl(outtmpl) % info_dict
@_catch_unsafe_extension_error
def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None): def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive' assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
if outtmpl is None: if outtmpl is None:
@ -1925,6 +1943,8 @@ class YoutubeDL:
'playlist_title': ie_result.get('title'), 'playlist_title': ie_result.get('title'),
'playlist_uploader': ie_result.get('uploader'), 'playlist_uploader': ie_result.get('uploader'),
'playlist_uploader_id': ie_result.get('uploader_id'), 'playlist_uploader_id': ie_result.get('uploader_id'),
'playlist_channel': ie_result.get('channel'),
'playlist_channel_id': ie_result.get('channel_id'),
**kwargs, **kwargs,
} }
if strict: if strict:
@ -3188,6 +3208,7 @@ class YoutubeDL:
os.remove(file) os.remove(file)
return None return None
@_catch_unsafe_extension_error
def process_info(self, info_dict): def process_info(self, info_dict):
"""Process a single resolved IE result. (Modifies it in-place)""" """Process a single resolved IE result. (Modifies it in-place)"""

View file

@ -64,6 +64,7 @@ from .utils import (
write_string, write_string,
) )
from .utils.networking import std_headers from .utils.networking import std_headers
from .utils._utils import _UnsafeExtensionError
from .YoutubeDL import YoutubeDL from .YoutubeDL import YoutubeDL
_IN_CLI = False _IN_CLI = False
@ -593,6 +594,13 @@ def validate_options(opts):
if opts.ap_username is not None and opts.ap_password is None: if opts.ap_username is not None and opts.ap_password is None:
opts.ap_password = getpass.getpass('Type TV provider account password and press [Return]: ') opts.ap_password = getpass.getpass('Type TV provider account password and press [Return]: ')
# compat option changes global state destructively; only allow from cli
if 'allow-unsafe-ext' in opts.compat_opts:
warnings.append(
'Using allow-unsafe-ext opens you up to potential attacks. '
'Use with great care!')
_UnsafeExtensionError.sanitize_extension = lambda x: x
return warnings, deprecation_warnings return warnings, deprecation_warnings

View file

@ -1,16 +1,22 @@
tests = {
'webp': lambda h: h[0:4] == b'RIFF' and h[8:] == b'WEBP',
'png': lambda h: h[:8] == b'\211PNG\r\n\032\n',
'jpeg': lambda h: h[6:10] in (b'JFIF', b'Exif'),
'gif': lambda h: h[:6] in (b'GIF87a', b'GIF89a'),
}
def what(file=None, h=None): def what(file=None, h=None):
"""Detect format of image (Currently supports jpeg, png, webp, gif only) """Detect format of image (Currently supports jpeg, png, webp, gif only)
Ref: https://github.com/python/cpython/blob/3.10/Lib/imghdr.py Ref: https://github.com/python/cpython/blob/3.11/Lib/imghdr.py
Ref: https://www.w3.org/Graphics/JPEG/itu-t81.pdf
""" """
if h is None: if h is None:
with open(file, 'rb') as f: with open(file, 'rb') as f:
h = f.read(12) h = f.read(12)
return next((type_ for type_, test in tests.items() if test(h)), None)
if h.startswith(b'RIFF') and h.startswith(b'WEBP', 8):
return 'webp'
if h.startswith(b'\x89PNG'):
return 'png'
if h.startswith(b'\xFF\xD8\xFF'):
return 'jpeg'
if h.startswith(b'GIF'):
return 'gif'
return None

View file

@ -2,7 +2,9 @@ import base64
import collections import collections
import contextlib import contextlib
import datetime as dt import datetime as dt
import functools
import glob import glob
import hashlib
import http.cookiejar import http.cookiejar
import http.cookies import http.cookies
import io import io
@ -17,14 +19,12 @@ import tempfile
import time import time
import urllib.request import urllib.request
from enum import Enum, auto from enum import Enum, auto
from hashlib import pbkdf2_hmac
from .aes import ( from .aes import (
aes_cbc_decrypt_bytes, aes_cbc_decrypt_bytes,
aes_gcm_decrypt_and_verify_bytes, aes_gcm_decrypt_and_verify_bytes,
unpad_pkcs7, unpad_pkcs7,
) )
from .compat import functools # isort: split
from .compat import compat_os_name from .compat import compat_os_name
from .dependencies import ( from .dependencies import (
_SECRETSTORAGE_UNAVAILABLE_REASON, _SECRETSTORAGE_UNAVAILABLE_REASON,
@ -740,20 +740,19 @@ def _get_linux_desktop_environment(env, logger):
xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None) xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
desktop_session = env.get('DESKTOP_SESSION', None) desktop_session = env.get('DESKTOP_SESSION', None)
if xdg_current_desktop is not None: if xdg_current_desktop is not None:
xdg_current_desktop = xdg_current_desktop.split(':')[0].strip() for part in map(str.strip, xdg_current_desktop.split(':')):
if part == 'Unity':
if xdg_current_desktop == 'Unity':
if desktop_session is not None and 'gnome-fallback' in desktop_session: if desktop_session is not None and 'gnome-fallback' in desktop_session:
return _LinuxDesktopEnvironment.GNOME return _LinuxDesktopEnvironment.GNOME
else: else:
return _LinuxDesktopEnvironment.UNITY return _LinuxDesktopEnvironment.UNITY
elif xdg_current_desktop == 'Deepin': elif part == 'Deepin':
return _LinuxDesktopEnvironment.DEEPIN return _LinuxDesktopEnvironment.DEEPIN
elif xdg_current_desktop == 'GNOME': elif part == 'GNOME':
return _LinuxDesktopEnvironment.GNOME return _LinuxDesktopEnvironment.GNOME
elif xdg_current_desktop == 'X-Cinnamon': elif part == 'X-Cinnamon':
return _LinuxDesktopEnvironment.CINNAMON return _LinuxDesktopEnvironment.CINNAMON
elif xdg_current_desktop == 'KDE': elif part == 'KDE':
kde_version = env.get('KDE_SESSION_VERSION', None) kde_version = env.get('KDE_SESSION_VERSION', None)
if kde_version == '5': if kde_version == '5':
return _LinuxDesktopEnvironment.KDE5 return _LinuxDesktopEnvironment.KDE5
@ -764,15 +763,14 @@ def _get_linux_desktop_environment(env, logger):
else: else:
logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4') logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4')
return _LinuxDesktopEnvironment.KDE4 return _LinuxDesktopEnvironment.KDE4
elif xdg_current_desktop == 'Pantheon': elif part == 'Pantheon':
return _LinuxDesktopEnvironment.PANTHEON return _LinuxDesktopEnvironment.PANTHEON
elif xdg_current_desktop == 'XFCE': elif part == 'XFCE':
return _LinuxDesktopEnvironment.XFCE return _LinuxDesktopEnvironment.XFCE
elif xdg_current_desktop == 'UKUI': elif part == 'UKUI':
return _LinuxDesktopEnvironment.UKUI return _LinuxDesktopEnvironment.UKUI
elif xdg_current_desktop == 'LXQt': elif part == 'LXQt':
return _LinuxDesktopEnvironment.LXQT return _LinuxDesktopEnvironment.LXQT
else:
logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"') logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')
elif desktop_session is not None: elif desktop_session is not None:
@ -1001,7 +999,7 @@ def _get_windows_v10_key(browser_root, logger):
def pbkdf2_sha1(password, salt, iterations, key_length): def pbkdf2_sha1(password, salt, iterations, key_length):
return pbkdf2_hmac('sha1', password, salt, iterations, key_length) return hashlib.pbkdf2_hmac('sha1', password, salt, iterations, key_length)
def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16): def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16):

View file

@ -1,4 +1,5 @@
import enum import enum
import functools
import json import json
import os import os
import re import re
@ -9,7 +10,6 @@ import time
import uuid import uuid
from .fragment import FragmentFD from .fragment import FragmentFD
from ..compat import functools
from ..networking import Request from ..networking import Request
from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor
from ..utils import ( from ..utils import (
@ -108,7 +108,7 @@ class ExternalFD(FragmentFD):
return all(( return all((
not info_dict.get('to_stdout') or Features.TO_STDOUT in cls.SUPPORTED_FEATURES, not info_dict.get('to_stdout') or Features.TO_STDOUT in cls.SUPPORTED_FEATURES,
'+' not in info_dict['protocol'] or Features.MULTIPLE_FORMATS in cls.SUPPORTED_FEATURES, '+' not in info_dict['protocol'] or Features.MULTIPLE_FORMATS in cls.SUPPORTED_FEATURES,
not traverse_obj(info_dict, ('hls_aes', ...), 'extra_param_to_segment_url'), not traverse_obj(info_dict, ('hls_aes', ...), 'extra_param_to_segment_url', 'extra_param_to_key_url'),
all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+')), all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+')),
)) ))

View file

@ -160,10 +160,12 @@ class HlsFD(FragmentFD):
extra_state = ctx.setdefault('extra_state', {}) extra_state = ctx.setdefault('extra_state', {})
format_index = info_dict.get('format_index') format_index = info_dict.get('format_index')
extra_query = None extra_segment_query = None
extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url') if extra_param_to_segment_url := info_dict.get('extra_param_to_segment_url'):
if extra_param_to_segment_url: extra_segment_query = urllib.parse.parse_qs(extra_param_to_segment_url)
extra_query = urllib.parse.parse_qs(extra_param_to_segment_url) extra_key_query = None
if extra_param_to_key_url := info_dict.get('extra_param_to_key_url'):
extra_key_query = urllib.parse.parse_qs(extra_param_to_key_url)
i = 0 i = 0
media_sequence = 0 media_sequence = 0
decrypt_info = {'METHOD': 'NONE'} decrypt_info = {'METHOD': 'NONE'}
@ -190,8 +192,8 @@ class HlsFD(FragmentFD):
if frag_index <= ctx['fragment_index']: if frag_index <= ctx['fragment_index']:
continue continue
frag_url = urljoin(man_url, line) frag_url = urljoin(man_url, line)
if extra_query: if extra_segment_query:
frag_url = update_url_query(frag_url, extra_query) frag_url = update_url_query(frag_url, extra_segment_query)
fragments.append({ fragments.append({
'frag_index': frag_index, 'frag_index': frag_index,
@ -212,8 +214,8 @@ class HlsFD(FragmentFD):
frag_index += 1 frag_index += 1
map_info = parse_m3u8_attributes(line[11:]) map_info = parse_m3u8_attributes(line[11:])
frag_url = urljoin(man_url, map_info.get('URI')) frag_url = urljoin(man_url, map_info.get('URI'))
if extra_query: if extra_segment_query:
frag_url = update_url_query(frag_url, extra_query) frag_url = update_url_query(frag_url, extra_segment_query)
if map_info.get('BYTERANGE'): if map_info.get('BYTERANGE'):
splitted_byte_range = map_info.get('BYTERANGE').split('@') splitted_byte_range = map_info.get('BYTERANGE').split('@')
@ -244,8 +246,10 @@ class HlsFD(FragmentFD):
decrypt_info['KEY'] = external_aes_key decrypt_info['KEY'] = external_aes_key
else: else:
decrypt_info['URI'] = urljoin(man_url, decrypt_info['URI']) decrypt_info['URI'] = urljoin(man_url, decrypt_info['URI'])
if extra_query: if extra_key_query or extra_segment_query:
decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query) # Fall back to extra_segment_query to key for backwards compat
decrypt_info['URI'] = update_url_query(
decrypt_info['URI'], extra_key_query or extra_segment_query)
if decrypt_url != decrypt_info['URI']: if decrypt_url != decrypt_info['URI']:
decrypt_info['KEY'] = None decrypt_info['KEY'] = None

View file

@ -76,6 +76,7 @@ from .aenetworks import (
) )
from .aeonco import AeonCoIE from .aeonco import AeonCoIE
from .afreecatv import ( from .afreecatv import (
AfreecaTVCatchStoryIE,
AfreecaTVIE, AfreecaTVIE,
AfreecaTVLiveIE, AfreecaTVLiveIE,
AfreecaTVUserIE, AfreecaTVUserIE,
@ -779,6 +780,7 @@ from .gopro import GoProIE
from .goshgay import GoshgayIE from .goshgay import GoshgayIE
from .gotostage import GoToStageIE from .gotostage import GoToStageIE
from .gputechconf import GPUTechConfIE from .gputechconf import GPUTechConfIE
from .graspop import GraspopIE
from .gronkh import ( from .gronkh import (
GronkhFeedIE, GronkhFeedIE,
GronkhIE, GronkhIE,
@ -969,6 +971,10 @@ from .la7 import (
LA7PodcastEpisodeIE, LA7PodcastEpisodeIE,
LA7PodcastIE, LA7PodcastIE,
) )
from .laracasts import (
LaracastsIE,
LaracastsPlaylistIE,
)
from .lastfm import ( from .lastfm import (
LastFMIE, LastFMIE,
LastFMPlaylistIE, LastFMPlaylistIE,
@ -1113,12 +1119,15 @@ from .meipai import MeipaiIE
from .melonvod import MelonVODIE from .melonvod import MelonVODIE
from .metacritic import MetacriticIE from .metacritic import MetacriticIE
from .mgtv import MGTVIE from .mgtv import MGTVIE
from .microsoftembed import MicrosoftEmbedIE from .microsoftembed import (
from .microsoftstream import MicrosoftStreamIE MicrosoftBuildIE,
from .microsoftvirtualacademy import ( MicrosoftEmbedIE,
MicrosoftVirtualAcademyCourseIE, MicrosoftLearnEpisodeIE,
MicrosoftVirtualAcademyIE, MicrosoftLearnPlaylistIE,
MicrosoftLearnSessionIE,
MicrosoftMediusIE,
) )
from .microsoftstream import MicrosoftStreamIE
from .mildom import ( from .mildom import (
MildomClipIE, MildomClipIE,
MildomIE, MildomIE,
@ -1603,6 +1612,7 @@ from .qqmusic import (
QQMusicPlaylistIE, QQMusicPlaylistIE,
QQMusicSingerIE, QQMusicSingerIE,
QQMusicToplistIE, QQMusicToplistIE,
QQMusicVideoIE,
) )
from .r7 import ( from .r7 import (
R7IE, R7IE,
@ -1755,7 +1765,10 @@ from .rtve import (
RTVETelevisionIE, RTVETelevisionIE,
) )
from .rtvs import RTVSIE from .rtvs import RTVSIE
from .rtvslo import RTVSLOIE from .rtvslo import (
RTVSLOIE,
RTVSLOShowIE,
)
from .rudovideo import RudoVideoIE from .rudovideo import RudoVideoIE
from .rule34video import Rule34VideoIE from .rule34video import Rule34VideoIE
from .rumble import ( from .rumble import (
@ -1925,6 +1938,10 @@ from .spreaker import (
) )
from .springboardplatform import SpringboardPlatformIE from .springboardplatform import SpringboardPlatformIE
from .sprout import SproutIE from .sprout import SproutIE
from .sproutvideo import (
SproutVideoIE,
VidsIoIE,
)
from .srgssr import ( from .srgssr import (
SRGSSRIE, SRGSSRIE,
SRGSSRPlayIE, SRGSSRPlayIE,

View file

@ -72,7 +72,7 @@ class AfreecaTVIE(AfreecaTVBaseIE):
)\?.*?\bnTitleNo=| )\?.*?\bnTitleNo=|
vod\.afreecatv\.com/(PLAYER/STATION|player)/ vod\.afreecatv\.com/(PLAYER/STATION|player)/
) )
(?P<id>\d+) (?P<id>\d+)/?(?:$|[?#&])
''' '''
_TESTS = [{ _TESTS = [{
'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=', 'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
@ -189,7 +189,7 @@ class AfreecaTVIE(AfreecaTVBaseIE):
headers={'Referer': url}, data=urlencode_postdata({ headers={'Referer': url}, data=urlencode_postdata({
'nTitleNo': video_id, 'nTitleNo': video_id,
'nApiLevel': 10, 'nApiLevel': 10,
}))['data'] }), impersonate=True)['data']
error_code = traverse_obj(data, ('code', {int})) error_code = traverse_obj(data, ('code', {int}))
if error_code == -6221: if error_code == -6221:
@ -253,6 +253,43 @@ class AfreecaTVIE(AfreecaTVBaseIE):
return self.playlist_result(entries, video_id, multi_video=True, **common_info) return self.playlist_result(entries, video_id, multi_video=True, **common_info)
class AfreecaTVCatchStoryIE(AfreecaTVBaseIE):
IE_NAME = 'afreecatv:catchstory'
IE_DESC = 'afreecatv.com catch story'
_VALID_URL = r'https?://vod\.afreecatv\.com/player/(?P<id>\d+)/catchstory'
_TESTS = [{
'url': 'https://vod.afreecatv.com/player/103247/catchstory',
'info_dict': {
'id': '103247',
},
'playlist_count': 2,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
data = self._download_json(
'https://api.m.afreecatv.com/catchstory/a/view', video_id, headers={'Referer': url},
query={'aStoryListIdx': '', 'nStoryIdx': video_id}, impersonate=True)
return self.playlist_result(self._entries(data), video_id)
@staticmethod
def _entries(data):
# 'files' is always a list with 1 element
yield from traverse_obj(data, (
'data', lambda _, v: v['story_type'] == 'catch',
'catch_list', lambda _, v: v['files'][0]['file'], {
'id': ('files', 0, 'file_info_key', {str}),
'url': ('files', 0, 'file', {url_or_none}),
'duration': ('files', 0, 'duration', {functools.partial(int_or_none, scale=1000)}),
'title': ('title', {str}),
'uploader': ('writer_nick', {str}),
'uploader_id': ('writer_id', {str}),
'thumbnail': ('thumb', {url_or_none}),
'timestamp': ('write_timestamp', {int_or_none}),
}))
class AfreecaTVLiveIE(AfreecaTVBaseIE): class AfreecaTVLiveIE(AfreecaTVBaseIE):
IE_NAME = 'afreecatv:live' IE_NAME = 'afreecatv:live'
IE_DESC = 'afreecatv.com livestreams' IE_DESC = 'afreecatv.com livestreams'

View file

@ -4,6 +4,7 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
extract_attributes, extract_attributes,
int_or_none, int_or_none,
join_nonempty,
parse_iso8601, parse_iso8601,
try_get, try_get,
) )
@ -136,7 +137,7 @@ class ArcPublishingIE(InfoExtractor):
else: else:
vbr = int_or_none(s.get('bitrate')) vbr = int_or_none(s.get('bitrate'))
formats.append({ formats.append({
'format_id': f'{stream_type}-{vbr}' if vbr else stream_type, 'format_id': join_nonempty(stream_type, vbr),
'vbr': vbr, 'vbr': vbr,
'width': int_or_none(s.get('width')), 'width': int_or_none(s.get('width')),
'height': int_or_none(s.get('height')), 'height': int_or_none(s.get('height')),

View file

@ -131,8 +131,8 @@ class ArkenaIE(InfoExtractor):
formats.extend(self._extract_f4m_formats( formats.extend(self._extract_f4m_formats(
href, video_id, f4m_id='hds', fatal=False)) href, video_id, f4m_id='hds', fatal=False))
elif mime_type == 'application/dash+xml': elif mime_type == 'application/dash+xml':
formats.extend(self._extract_f4m_formats( formats.extend(self._extract_mpd_formats(
href, video_id, f4m_id='hds', fatal=False)) href, video_id, mpd_id='dash', fatal=False))
elif mime_type == 'application/vnd.ms-sstr+xml': elif mime_type == 'application/vnd.ms-sstr+xml':
formats.extend(self._extract_ism_formats( formats.extend(self._extract_ism_formats(
href, video_id, ism_id='mss', fatal=False)) href, video_id, ism_id='mss', fatal=False))

View file

@ -33,14 +33,6 @@ class AtresPlayerIE(InfoExtractor):
] ]
_API_BASE = 'https://api.atresplayer.com/' _API_BASE = 'https://api.atresplayer.com/'
def _handle_error(self, e, code):
if isinstance(e.cause, HTTPError) and e.cause.status == code:
error = self._parse_json(e.cause.response.read(), None)
if error.get('error') == 'required_registered':
self.raise_login_required()
raise ExtractorError(error['error_description'], expected=True)
raise
def _perform_login(self, username, password): def _perform_login(self, username, password):
self._request_webpage( self._request_webpage(
self._API_BASE + 'login', None, 'Downloading login page') self._API_BASE + 'login', None, 'Downloading login page')
@ -55,7 +47,9 @@ class AtresPlayerIE(InfoExtractor):
'password': password, 'password': password,
}))['targetUrl'] }))['targetUrl']
except ExtractorError as e: except ExtractorError as e:
self._handle_error(e, 400) if isinstance(e.cause, HTTPError) and e.cause.status == 400:
raise ExtractorError('Invalid username and/or password', expected=True)
raise
self._request_webpage(target_url, None, 'Following Target URL') self._request_webpage(target_url, None, 'Following Target URL')
@ -66,7 +60,12 @@ class AtresPlayerIE(InfoExtractor):
episode = self._download_json( episode = self._download_json(
self._API_BASE + 'client/v1/player/episode/' + video_id, video_id) self._API_BASE + 'client/v1/player/episode/' + video_id, video_id)
except ExtractorError as e: except ExtractorError as e:
self._handle_error(e, 403) if isinstance(e.cause, HTTPError) and e.cause.status == 403:
error = self._parse_json(e.cause.response.read(), None)
if error.get('error') == 'required_registered':
self.raise_login_required()
raise ExtractorError(error['error_description'], expected=True)
raise
title = episode['titulo'] title = episode['titulo']

View file

@ -41,7 +41,7 @@ class BandcampIE(InfoExtractor):
'uploader_id': 'youtube-dl', 'uploader_id': 'youtube-dl',
'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg', 'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg',
}, },
'_skip': 'There is a limit of 200 free downloads / month for the test song', 'skip': 'There is a limit of 200 free downloads / month for the test song',
}, { }, {
# free download # free download
'url': 'http://benprunty.bandcamp.com/track/lanius-battle', 'url': 'http://benprunty.bandcamp.com/track/lanius-battle',

View file

@ -31,12 +31,12 @@ from ..utils import (
mimetype2ext, mimetype2ext,
parse_count, parse_count,
parse_qs, parse_qs,
parse_resolution,
qualities, qualities,
smuggle_url, smuggle_url,
srt_subtitles_timecode, srt_subtitles_timecode,
str_or_none, str_or_none,
traverse_obj, traverse_obj,
try_call,
unified_timestamp, unified_timestamp,
unsmuggle_url, unsmuggle_url,
url_or_none, url_or_none,
@ -47,6 +47,23 @@ from ..utils import (
class BilibiliBaseIE(InfoExtractor): class BilibiliBaseIE(InfoExtractor):
_FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?') _FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
_WBI_KEY_CACHE_TIMEOUT = 30 # exact expire timeout is unclear, use 30s for one session
_wbi_key_cache = {}
@property
def is_logged_in(self):
return bool(self._get_cookies('https://api.bilibili.com').get('SESSDATA'))
def _check_missing_formats(self, play_info, formats):
parsed_qualities = set(traverse_obj(formats, (..., 'quality')))
missing_formats = join_nonempty(*[
traverse_obj(fmt, 'new_description', 'display_desc', 'quality')
for fmt in traverse_obj(play_info, (
'support_formats', lambda _, v: v['quality'] not in parsed_qualities))], delim=', ')
if missing_formats:
self.to_screen(
f'Format(s) {missing_formats} are missing; you have to login or '
f'become a premium member to download them. {self._login_hint()}')
def extract_formats(self, play_info): def extract_formats(self, play_info):
format_names = { format_names = {
@ -86,18 +103,75 @@ class BilibiliBaseIE(InfoExtractor):
'format': format_names.get(video.get('id')), 'format': format_names.get(video.get('id')),
} for video in traverse_obj(play_info, ('dash', 'video', ...))) } for video in traverse_obj(play_info, ('dash', 'video', ...)))
missing_formats = format_names.keys() - set(traverse_obj(formats, (..., 'quality'))) if formats:
if missing_formats: self._check_missing_formats(play_info, formats)
self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
f'you have to login or become premium member to download them. {self._login_hint()}')
fragments = traverse_obj(play_info, ('durl', lambda _, v: url_or_none(v['url']), {
'url': ('url', {url_or_none}),
'duration': ('length', {functools.partial(float_or_none, scale=1000)}),
'filesize': ('size', {int_or_none}),
}))
if fragments:
formats.append({
'url': fragments[0]['url'],
'filesize': sum(traverse_obj(fragments, (..., 'filesize'))),
**({
'fragments': fragments,
'protocol': 'http_dash_segments',
} if len(fragments) > 1 else {}),
**traverse_obj(play_info, {
'quality': ('quality', {int_or_none}),
'format_id': ('quality', {str_or_none}),
'format_note': ('quality', {lambda x: format_names.get(x)}),
'duration': ('timelength', {functools.partial(float_or_none, scale=1000)}),
}),
**parse_resolution(format_names.get(play_info.get('quality'))),
})
return formats return formats
def _download_playinfo(self, video_id, cid, headers=None): def _get_wbi_key(self, video_id):
if time.time() < self._wbi_key_cache.get('ts', 0) + self._WBI_KEY_CACHE_TIMEOUT:
return self._wbi_key_cache['key']
session_data = self._download_json(
'https://api.bilibili.com/x/web-interface/nav', video_id, note='Downloading wbi sign')
lookup = ''.join(traverse_obj(session_data, (
'data', 'wbi_img', ('img_url', 'sub_url'),
{lambda x: x.rpartition('/')[2].partition('.')[0]})))
# from getMixinKey() in the vendor js
mixin_key_enc_tab = [
46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49,
33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40,
61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, 57, 62, 11,
36, 20, 34, 44, 52,
]
self._wbi_key_cache.update({
'key': ''.join(lookup[i] for i in mixin_key_enc_tab)[:32],
'ts': time.time(),
})
return self._wbi_key_cache['key']
def _sign_wbi(self, params, video_id):
params['wts'] = round(time.time())
params = {
k: ''.join(filter(lambda char: char not in "!'()*", str(v)))
for k, v in sorted(params.items())
}
query = urllib.parse.urlencode(params)
params['w_rid'] = hashlib.md5(f'{query}{self._get_wbi_key(video_id)}'.encode()).hexdigest()
return params
def _download_playinfo(self, bvid, cid, headers=None, qn=None):
params = {'bvid': bvid, 'cid': cid, 'fnval': 4048}
if qn:
params['qn'] = qn
return self._download_json( return self._download_json(
'https://api.bilibili.com/x/player/playurl', video_id, 'https://api.bilibili.com/x/player/wbi/playurl', bvid,
query={'bvid': video_id, 'cid': cid, 'fnval': 4048}, query=self._sign_wbi(params, bvid), headers=headers,
note=f'Downloading video formats for cid {cid}', headers=headers)['data'] note=f'Downloading video formats for cid {cid} {qn or ""}')['data']
def json2srt(self, json_data): def json2srt(self, json_data):
srt_data = '' srt_data = ''
@ -115,15 +189,15 @@ class BilibiliBaseIE(InfoExtractor):
}], }],
} }
subtitle_info = traverse_obj(self._download_json( video_info = self._download_json(
'https://api.bilibili.com/x/player/v2', video_id, 'https://api.bilibili.com/x/player/v2', video_id,
query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid}, query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
note=f'Extracting subtitle info {cid}'), ('data', 'subtitle')) note=f'Extracting subtitle info {cid}')
subs_list = traverse_obj(subtitle_info, ('subtitles', lambda _, v: v['subtitle_url'] and v['lan'])) if traverse_obj(video_info, ('data', 'need_login_subtitle')):
if not subs_list and traverse_obj(subtitle_info, 'allow_submit'): self.report_warning(
if not self._get_cookies('https://api.bilibili.com').get('SESSDATA'): # no login session cookie f'Subtitles are only available when logged in. {self._login_hint()}', only_once=True)
self.report_warning(f'CC subtitles (if any) are only visible when logged in. {self._login_hint()}', only_once=True) for s in traverse_obj(video_info, (
for s in subs_list: 'data', 'subtitle', 'subtitles', lambda _, v: v['subtitle_url'] and v['lan'])):
subtitles.setdefault(s['lan'], []).append({ subtitles.setdefault(s['lan'], []).append({
'ext': 'srt', 'ext': 'srt',
'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)), 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)),
@ -203,15 +277,15 @@ class BilibiliBaseIE(InfoExtractor):
self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges) self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges)
return cid_edges return cid_edges
def _get_interactive_entries(self, video_id, cid, metainfo): def _get_interactive_entries(self, video_id, cid, metainfo, headers=None):
graph_version = traverse_obj( graph_version = traverse_obj(
self._download_json( self._download_json(
'https://api.bilibili.com/x/player/wbi/v2', video_id, 'https://api.bilibili.com/x/player/wbi/v2', video_id,
'Extracting graph version', query={'bvid': video_id, 'cid': cid}), 'Extracting graph version', query={'bvid': video_id, 'cid': cid}, headers=headers),
('data', 'interaction', 'graph_version', {int_or_none})) ('data', 'interaction', 'graph_version', {int_or_none}))
cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1) cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
for cid, edges in cid_edges.items(): for cid, edges in cid_edges.items():
play_info = self._download_playinfo(video_id, cid) play_info = self._download_playinfo(video_id, cid, headers=headers)
yield { yield {
**metainfo, **metainfo,
'id': f'{video_id}_{cid}', 'id': f'{video_id}_{cid}',
@ -243,17 +317,17 @@ class BiliBiliIE(BilibiliBaseIE):
'timestamp': 1488353834, 'timestamp': 1488353834,
'like_count': int, 'like_count': int,
'view_count': int, 'view_count': int,
'_old_archive_ids': ['bilibili 8903802_part1'],
}, },
}, { }, {
'note': 'old av URL version', 'note': 'old av URL version',
'url': 'http://www.bilibili.com/video/av1074402/', 'url': 'http://www.bilibili.com/video/av1074402/',
'info_dict': { 'info_dict': {
'thumbnail': r're:^https?://.*\.(jpg|jpeg)$', 'id': 'BV11x411K7CN',
'ext': 'mp4', 'ext': 'mp4',
'title': '【金坷垃】金泡沫',
'uploader': '菊子桑', 'uploader': '菊子桑',
'uploader_id': '156160', 'uploader_id': '156160',
'id': 'BV11x411K7CN',
'title': '【金坷垃】金泡沫',
'duration': 308.36, 'duration': 308.36,
'upload_date': '20140420', 'upload_date': '20140420',
'timestamp': 1397983878, 'timestamp': 1397983878,
@ -262,6 +336,8 @@ class BiliBiliIE(BilibiliBaseIE):
'comment_count': int, 'comment_count': int,
'view_count': int, 'view_count': int,
'tags': list, 'tags': list,
'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
'_old_archive_ids': ['bilibili 1074402_part1'],
}, },
'params': {'skip_download': True}, 'params': {'skip_download': True},
}, { }, {
@ -288,6 +364,7 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int, 'view_count': int,
'description': 'md5:e3c401cf7bc363118d1783dd74068a68', 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
'duration': 90.314, 'duration': 90.314,
'_old_archive_ids': ['bilibili 498159642_part1'],
}, },
}], }],
}, { }, {
@ -308,28 +385,8 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int, 'view_count': int,
'description': 'md5:e3c401cf7bc363118d1783dd74068a68', 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
'duration': 90.314, 'duration': 90.314,
'_old_archive_ids': ['bilibili 498159642_part1'],
}, },
}, {
'note': 'video has subtitles',
'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
'info_dict': {
'id': 'BV12N4y1M7rh',
'ext': 'mp4',
'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
'tags': list,
'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
'duration': 313.557,
'upload_date': '20220709',
'uploader': '小夫太渴',
'timestamp': 1657347907,
'uploader_id': '1326814124',
'comment_count': int,
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'subtitles': 'count:2',
},
'params': {'listsubtitles': True},
}, { }, {
'url': 'https://www.bilibili.com/video/av8903802/', 'url': 'https://www.bilibili.com/video/av8903802/',
'info_dict': { 'info_dict': {
@ -347,6 +404,7 @@ class BiliBiliIE(BilibiliBaseIE):
'comment_count': int, 'comment_count': int,
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'_old_archive_ids': ['bilibili 8903802_part1'],
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -370,6 +428,7 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'_old_archive_ids': ['bilibili 463665680_part1'],
}, },
'params': {'skip_download': True}, 'params': {'skip_download': True},
}, { }, {
@ -388,8 +447,8 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'_old_archive_ids': ['bilibili 893839363_part1'],
}, },
'params': {'skip_download': True},
}, { }, {
'note': 'newer festival video', 'note': 'newer festival video',
'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f', 'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
@ -406,8 +465,57 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'_old_archive_ids': ['bilibili 778246196_part1'],
},
}, {
'note': 'legacy flv/mp4 video',
'url': 'https://www.bilibili.com/video/BV1ms411Q7vw/?p=4',
'info_dict': {
'id': 'BV1ms411Q7vw_p4',
'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
'timestamp': 1458222815,
'upload_date': '20160317',
'description': '云南方言快乐生产线出品',
'duration': float,
'uploader': '一笑颠天',
'uploader_id': '3916081',
'view_count': int,
'comment_count': int,
'like_count': int,
'tags': list,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'_old_archive_ids': ['bilibili 4120229_part4'],
},
'params': {'extractor_args': {'bilibili': {'prefer_multi_flv': ['32']}}},
'playlist_count': 19,
'playlist': [{
'info_dict': {
'id': 'BV1ms411Q7vw_p4_0',
'ext': 'flv',
'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
'duration': 399.102,
},
}],
}, {
'note': 'legacy mp4-only video',
'url': 'https://www.bilibili.com/video/BV1nx411u79K',
'info_dict': {
'id': 'BV1nx411u79K',
'ext': 'mp4',
'title': '【练习室】201603声乐练习《No Air》with VigoVan',
'timestamp': 1508893551,
'upload_date': '20171025',
'description': '@ZERO-G伯远\n声乐练习 《No Air》with Vigo Van',
'duration': 80.384,
'uploader': '伯远',
'uploader_id': '10584494',
'comment_count': int,
'view_count': int,
'like_count': int,
'tags': list,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'_old_archive_ids': ['bilibili 15700301_part1'],
}, },
'params': {'skip_download': True},
}, { }, {
'note': 'interactive/split-path video', 'note': 'interactive/split-path video',
'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/', 'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
@ -425,6 +533,7 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'_old_archive_ids': ['bilibili 292734508_part1'],
}, },
'playlist_count': 33, 'playlist_count': 33,
'playlist': [{ 'playlist': [{
@ -443,6 +552,7 @@ class BiliBiliIE(BilibiliBaseIE):
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'_old_archive_ids': ['bilibili 292734508_part1'],
}, },
}], }],
}, { }, {
@ -465,6 +575,29 @@ class BiliBiliIE(BilibiliBaseIE):
'upload_date': '20191021', 'upload_date': '20191021',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
}, },
}, {
'note': 'video has subtitles, which requires login',
'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
'info_dict': {
'id': 'BV12N4y1M7rh',
'ext': 'mp4',
'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
'tags': list,
'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
'duration': 313.557,
'upload_date': '20220709',
'uploader': '小夫太渴',
'timestamp': 1657347907,
'uploader_id': '1326814124',
'comment_count': int,
'view_count': int,
'like_count': int,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
'subtitles': 'count:2', # login required for CC subtitle
'_old_archive_ids': ['bilibili 898179753_part1'],
},
'params': {'listsubtitles': True},
'skip': 'login required for subtitle',
}, { }, {
'url': 'https://www.bilibili.com/video/BV1jL41167ZG/', 'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
'info_dict': { 'info_dict': {
@ -498,8 +631,9 @@ class BiliBiliIE(BilibiliBaseIE):
if not self._match_valid_url(urlh.url): if not self._match_valid_url(urlh.url):
return self.url_result(urlh.url) return self.url_result(urlh.url)
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id) headers['Referer'] = url
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
is_festival = 'videoData' not in initial_state is_festival = 'videoData' not in initial_state
if is_festival: if is_festival:
video_data = initial_state['videoInfo'] video_data = initial_state['videoInfo']
@ -548,7 +682,6 @@ class BiliBiliIE(BilibiliBaseIE):
aid = video_data.get('aid') aid = video_data.get('aid')
old_video_id = format_field(aid, None, f'%s_part{part_id or 1}') old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid') cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
festival_info = {} festival_info = {}
@ -586,16 +719,63 @@ class BiliBiliIE(BilibiliBaseIE):
is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate')) is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate'))
if is_interactive: if is_interactive:
return self.playlist_result( return self.playlist_result(
self._get_interactive_entries(video_id, cid, metainfo), **metainfo, self._get_interactive_entries(video_id, cid, metainfo, headers=headers), **metainfo,
duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})), duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
__post_extractor=self.extract_comments(aid)) __post_extractor=self.extract_comments(aid))
else: else:
formats = self.extract_formats(play_info)
if not traverse_obj(play_info, ('dash')):
# we only have legacy formats and need additional work
has_qn = lambda x: x in traverse_obj(formats, (..., 'quality'))
for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})):
formats.extend(traverse_obj(
self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, qn=qn)),
lambda _, v: not has_qn(v['quality'])))
self._check_missing_formats(play_info, formats)
flv_formats = traverse_obj(formats, lambda _, v: v['fragments'])
if flv_formats and len(flv_formats) < len(formats):
# Flv and mp4 are incompatible due to `multi_video` workaround, so drop one
if not self._configuration_arg('prefer_multi_flv'):
dropped_fmts = ', '.join(
f'{f.get("format_note")} ({f.get("format_id")})' for f in flv_formats)
formats = traverse_obj(formats, lambda _, v: not v.get('fragments'))
if dropped_fmts:
self.to_screen(
f'Dropping incompatible flv format(s) {dropped_fmts} since mp4 is available. '
'To extract flv, pass --extractor-args "bilibili:prefer_multi_flv"')
else:
formats = traverse_obj(
# XXX: Filtering by extractor-arg is for testing purposes
formats, lambda _, v: v['quality'] == int(self._configuration_arg('prefer_multi_flv')[0]),
) or [max(flv_formats, key=lambda x: x['quality'])]
if traverse_obj(formats, (0, 'fragments')):
# We have flv formats, which are individual short videos with their own timestamps and metainfo
# Binary concatenation corrupts their timestamps, so we need a `multi_video` workaround
return { return {
**metainfo, **metainfo,
'_type': 'multi_video',
'entries': [{
'id': f'{metainfo["id"]}_{idx}',
'title': metainfo['title'],
'http_headers': metainfo['http_headers'],
'formats': [{
**fragment,
'format_id': formats[0].get('format_id'),
}],
'subtitles': self.extract_subtitles(video_id, cid) if idx == 0 else None,
'__post_extractor': self.extract_comments(aid) if idx == 0 else None,
} for idx, fragment in enumerate(formats[0]['fragments'])],
'duration': float_or_none(play_info.get('timelength'), scale=1000),
}
else:
return {
**metainfo,
'formats': formats,
'duration': float_or_none(play_info.get('timelength'), scale=1000), 'duration': float_or_none(play_info.get('timelength'), scale=1000),
'chapters': self._get_chapters(aid, cid), 'chapters': self._get_chapters(aid, cid),
'subtitles': self.extract_subtitles(video_id, cid), 'subtitles': self.extract_subtitles(video_id, cid),
'formats': self.extract_formats(play_info),
'__post_extractor': self.extract_comments(aid), '__post_extractor': self.extract_comments(aid),
} }
@ -968,7 +1148,7 @@ class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE):
})) }))
class BilibiliSpaceBaseIE(InfoExtractor): class BilibiliSpaceBaseIE(BilibiliBaseIE):
def _extract_playlist(self, fetch_page, get_metadata, get_entries): def _extract_playlist(self, fetch_page, get_metadata, get_entries):
first_page = fetch_page(0) first_page = fetch_page(0)
metadata = get_metadata(first_page) metadata = get_metadata(first_page)
@ -988,73 +1168,53 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
'id': '3985676', 'id': '3985676',
}, },
'playlist_mincount': 178, 'playlist_mincount': 178,
'skip': 'login required',
}, { }, {
'url': 'https://space.bilibili.com/313580179/video', 'url': 'https://space.bilibili.com/313580179/video',
'info_dict': { 'info_dict': {
'id': '313580179', 'id': '313580179',
}, },
'playlist_mincount': 92, 'playlist_mincount': 92,
'skip': 'login required',
}] }]
def _extract_signature(self, playlist_id):
session_data = self._download_json('https://api.bilibili.com/x/web-interface/nav', playlist_id, fatal=False)
key_from_url = lambda x: x[x.rfind('/') + 1:].split('.')[0]
img_key = traverse_obj(
session_data, ('data', 'wbi_img', 'img_url', {key_from_url})) or '34478ba821254d9d93542680e3b86100'
sub_key = traverse_obj(
session_data, ('data', 'wbi_img', 'sub_url', {key_from_url})) or '7e16a90d190a4355a78fd00b32a38de6'
session_key = img_key + sub_key
signature_values = []
for position in (
46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
57, 62, 11, 36, 20, 34, 44, 52,
):
char_at_position = try_call(lambda: session_key[position])
if char_at_position:
signature_values.append(char_at_position)
return ''.join(signature_values)[:32]
def _real_extract(self, url): def _real_extract(self, url):
playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video') playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
if not is_video_url: if not is_video_url:
self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. ' self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
'To download audios, add a "/audio" to the URL') 'To download audios, add a "/audio" to the URL')
signature = self._extract_signature(playlist_id)
def fetch_page(page_idx): def fetch_page(page_idx):
query = { query = {
'keyword': '', 'keyword': '',
'mid': playlist_id, 'mid': playlist_id,
'order': 'pubdate', 'order': traverse_obj(parse_qs(url), ('order', 0)) or 'pubdate',
'order_avoided': 'true', 'order_avoided': 'true',
'platform': 'web', 'platform': 'web',
'pn': page_idx + 1, 'pn': page_idx + 1,
'ps': 30, 'ps': 30,
'tid': 0, 'tid': 0,
'web_location': 1550101, 'web_location': 1550101,
'wts': int(time.time()),
} }
query['w_rid'] = hashlib.md5(f'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()
try: try:
response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search', response = self._download_json(
playlist_id, note=f'Downloading page {page_idx}', query=query, 'https://api.bilibili.com/x/space/wbi/arc/search', playlist_id,
headers={'referer': url}) query=self._sign_wbi(query, playlist_id),
note=f'Downloading space page {page_idx}', headers={'Referer': url})
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 412: if isinstance(e.cause, HTTPError) and e.cause.status == 412:
raise ExtractorError( raise ExtractorError(
'Request is blocked by server (412), please add cookies, wait and try later.', expected=True) 'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
raise raise
if response['code'] in (-352, -401): status_code = response['code']
if status_code == -401:
raise ExtractorError( raise ExtractorError(
f'Request is blocked by server ({-response["code"]}), ' 'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
'please add cookies, wait and try later.', expected=True) elif status_code == -352 and not self.is_logged_in:
self.raise_login_required('Request is rejected, you need to login to access playlist')
elif status_code != 0:
raise ExtractorError(f'Request failed ({status_code}): {response.get("message") or "Unknown error"}')
return response['data'] return response['data']
def get_metadata(page_data): def get_metadata(page_data):
@ -1280,7 +1440,10 @@ class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
_VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)' _VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.bilibili.com/watchlater/#/list', 'url': 'https://www.bilibili.com/watchlater/#/list',
'info_dict': {'id': 'watchlater'}, 'info_dict': {
'id': r're:\d+',
'title': '稍后再看',
},
'playlist_mincount': 0, 'playlist_mincount': 0,
'skip': 'login required', 'skip': 'login required',
}] }]
@ -1356,14 +1519,19 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
'skip': 'redirect url', 'skip': 'redirect url',
}, { }, {
'url': 'https://www.bilibili.com/list/watchlater', 'url': 'https://www.bilibili.com/list/watchlater',
'info_dict': {'id': 'watchlater'}, 'info_dict': {
'id': r're:2_\d+',
'title': '稍后再看',
'uploader': str,
'uploader_id': str,
},
'playlist_mincount': 0, 'playlist_mincount': 0,
'skip': 'login required', 'skip': 'login required',
}, { }, {
'url': 'https://www.bilibili.com/medialist/play/watchlater', 'url': 'https://www.bilibili.com/medialist/play/watchlater',
'info_dict': {'id': 'watchlater'}, 'info_dict': {'id': 'watchlater'},
'playlist_mincount': 0, 'playlist_mincount': 0,
'skip': 'login required', 'skip': 'redirect url & login required',
}] }]
def _extract_medialist(self, query, list_id): def _extract_medialist(self, query, list_id):
@ -1414,7 +1582,7 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
'title': ('title', {str}), 'title': ('title', {str}),
'uploader': ('upper', 'name', {str}), 'uploader': ('upper', 'name', {str}),
'uploader_id': ('upper', 'mid', {str_or_none}), 'uploader_id': ('upper', 'mid', {str_or_none}),
'timestamp': ('ctime', {int_or_none}), 'timestamp': ('ctime', {int_or_none}, {lambda x: x or None}),
'thumbnail': ('cover', {url_or_none}), 'thumbnail': ('cover', {url_or_none}),
})), })),
} }
@ -1808,7 +1976,8 @@ class BiliIntlBaseIE(InfoExtractor):
public_key = Cryptodome.RSA.importKey(key_data['key']) public_key = Cryptodome.RSA.importKey(key_data['key'])
password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode()) password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode())
login_post = self._download_json( login_post = self._download_json(
'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({ 'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None,
data=urlencode_postdata({
'username': username, 'username': username,
'password': base64.b64encode(password_hash).decode('ascii'), 'password': base64.b64encode(password_hash).decode('ascii'),
'keep_me': 'true', 'keep_me': 'true',
@ -2140,7 +2309,8 @@ class BiliIntlSeriesIE(BiliIntlBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
series_id = self._match_id(url) series_id = self._match_id(url)
series_info = self._call_api(f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {} series_info = self._call_api(
f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
return self.playlist_result( return self.playlist_result(
self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'), self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none), categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),

View file

@ -24,7 +24,7 @@ from ..utils import (
class BitChuteIE(InfoExtractor): class BitChuteIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
_EMBED_REGEX = [rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{_VALID_URL})'] _EMBED_REGEX = [rf'<(?:script|iframe)[^>]+\bsrc=(["\'])(?P<url>{_VALID_URL})']
_TESTS = [{ _TESTS = [{
'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/', 'url': 'https://www.bitchute.com/video/UGlrF9o9b-Q/',
@ -91,6 +91,9 @@ class BitChuteIE(InfoExtractor):
}, { }, {
'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent', 'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://old.bitchute.com/video/UGlrF9o9b-Q/',
'only_matching': True,
}] }]
_GEO_BYPASS = False _GEO_BYPASS = False
@ -132,7 +135,7 @@ class BitChuteIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage( webpage = self._download_webpage(
f'https://www.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS) f'https://old.bitchute.com/video/{video_id}', video_id, headers=self._HEADERS)
self._raise_if_restricted(webpage) self._raise_if_restricted(webpage)
publish_date = clean_html(get_element_by_class('video-publish-date', webpage)) publish_date = clean_html(get_element_by_class('video-publish-date', webpage))
@ -171,13 +174,13 @@ class BitChuteIE(InfoExtractor):
class BitChuteChannelIE(InfoExtractor): class BitChuteChannelIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?P<type>channel|playlist)/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:(?:www|old)\.)?bitchute\.com/(?P<type>channel|playlist)/(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.bitchute.com/channel/bitchute/', 'url': 'https://www.bitchute.com/channel/bitchute/',
'info_dict': { 'info_dict': {
'id': 'bitchute', 'id': 'bitchute',
'title': 'BitChute', 'title': 'BitChute',
'description': 'md5:5329fb3866125afa9446835594a9b138', 'description': 'md5:2134c37d64fc3a4846787c402956adac',
}, },
'playlist': [ 'playlist': [
{ {
@ -210,6 +213,9 @@ class BitChuteChannelIE(InfoExtractor):
'title': 'Bruce MacDonald and "The Light of Darkness"', 'title': 'Bruce MacDonald and "The Light of Darkness"',
'description': 'md5:747724ef404eebdfc04277714f81863e', 'description': 'md5:747724ef404eebdfc04277714f81863e',
}, },
}, {
'url': 'https://old.bitchute.com/playlist/wV9Imujxasw9/',
'only_matching': True,
}] }]
_TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7' _TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7'
@ -230,7 +236,7 @@ class BitChuteChannelIE(InfoExtractor):
@staticmethod @staticmethod
def _make_url(playlist_id, playlist_type): def _make_url(playlist_id, playlist_type):
return f'https://www.bitchute.com/{playlist_type}/{playlist_id}/' return f'https://old.bitchute.com/{playlist_type}/{playlist_id}/'
def _fetch_page(self, playlist_id, playlist_type, page_num): def _fetch_page(self, playlist_id, playlist_type, page_num):
playlist_url = self._make_url(playlist_id, playlist_type) playlist_url = self._make_url(playlist_id, playlist_type)

View file

@ -18,6 +18,7 @@ from ..utils import (
fix_xml_ampersands, fix_xml_ampersands,
float_or_none, float_or_none,
int_or_none, int_or_none,
join_nonempty,
js_to_json, js_to_json,
mimetype2ext, mimetype2ext,
parse_iso8601, parse_iso8601,
@ -386,7 +387,7 @@ class BrightcoveLegacyIE(InfoExtractor):
@classmethod @classmethod
def _make_brightcove_url(cls, params): def _make_brightcove_url(cls, params):
return update_url_query( return update_url_query(
'http://c.brightcove.com/services/viewer/htmlFederated', params) 'https://c.brightcove.com/services/viewer/htmlFederated', params)
@classmethod @classmethod
def _extract_brightcove_url(cls, webpage): def _extract_brightcove_url(cls, webpage):
@ -470,7 +471,7 @@ class BrightcoveLegacyIE(InfoExtractor):
if referer: if referer:
headers['Referer'] = referer headers['Referer'] = referer
player_page = self._download_webpage( player_page = self._download_webpage(
'http://link.brightcove.com/services/player/bcpid' + player_id[0], 'https://link.brightcove.com/services/player/bcpid' + player_id[0],
video_id, headers=headers, fatal=False) video_id, headers=headers, fatal=False)
if player_page: if player_page:
player_key = self._search_regex( player_key = self._search_regex(
@ -480,7 +481,7 @@ class BrightcoveLegacyIE(InfoExtractor):
enc_pub_id = player_key.split(',')[1].replace('~', '=') enc_pub_id = player_key.split(',')[1].replace('~', '=')
publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0] publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0]
if publisher_id: if publisher_id:
brightcove_new_url = f'http://players.brightcove.net/{publisher_id}/default_default/index.html?videoId={video_id}' brightcove_new_url = f'https://players.brightcove.net/{publisher_id}/default_default/index.html?videoId={video_id}'
if referer: if referer:
brightcove_new_url = smuggle_url(brightcove_new_url, {'referrer': referer}) brightcove_new_url = smuggle_url(brightcove_new_url, {'referrer': referer})
return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id) return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
@ -538,12 +539,7 @@ class BrightcoveNewBaseIE(AdobePassIE):
}) })
def build_format_id(kind): def build_format_id(kind):
format_id = kind return join_nonempty(kind, tbr and f'{int(tbr)}k', height and f'{height}p')
if tbr:
format_id += f'-{int(tbr)}k'
if height:
format_id += f'-{height}p'
return format_id
if src or streaming_src: if src or streaming_src:
f.update({ f.update({
@ -801,7 +797,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
# Look for iframe embeds [1] # Look for iframe embeds [1]
for _, url in re.findall( for _, url in re.findall(
r'<iframe[^>]+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage): r'<iframe[^>]+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
entries.append(url if url.startswith('http') else 'http:' + url) entries.append(url if url.startswith(('http:', 'https:')) else 'https:' + url)
# Look for <video> tags [2] and embed_in_page embeds [3] # Look for <video> tags [2] and embed_in_page embeds [3]
# [2] looks like: # [2] looks like:
@ -830,7 +826,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
player_id = player_id or attrs.get('data-player') or 'default' player_id = player_id or attrs.get('data-player') or 'default'
embed = embed or attrs.get('data-embed') or 'default' embed = embed or attrs.get('data-embed') or 'default'
bc_url = f'http://players.brightcove.net/{account_id}/{player_id}_{embed}/index.html?videoId={video_id}' bc_url = f'https://players.brightcove.net/{account_id}/{player_id}_{embed}/index.html?videoId={video_id}'
# Some brightcove videos may be embedded with video tag only and # Some brightcove videos may be embedded with video tag only and
# without script tag or any mentioning of brightcove at all. Such # without script tag or any mentioning of brightcove at all. Such
@ -867,7 +863,7 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
store_pk = lambda x: self.cache.store('brightcove', policy_key_id, x) store_pk = lambda x: self.cache.store('brightcove', policy_key_id, x)
def extract_policy_key(): def extract_policy_key():
base_url = f'http://players.brightcove.net/{account_id}/{player_id}_{embed}/' base_url = f'https://players.brightcove.net/{account_id}/{player_id}_{embed}/'
config = self._download_json( config = self._download_json(
base_url + 'config.json', video_id, fatal=False) or {} base_url + 'config.json', video_id, fatal=False) or {}
policy_key = try_get( policy_key = try_get(

View file

@ -455,10 +455,8 @@ class CBCGemIE(InfoExtractor):
def claims_token_expired(self): def claims_token_expired(self):
exp = self._get_claims_token_expiry() exp = self._get_claims_token_expiry()
if exp - time.time() < 10:
# It will expire in less than 10 seconds, or has already expired # It will expire in less than 10 seconds, or has already expired
return True return exp - time.time() < 10
return False
def claims_token_valid(self): def claims_token_valid(self):
return self._claims_token is not None and not self.claims_token_expired() return self._claims_token is not None and not self.claims_token_expired()

View file

@ -1,6 +1,5 @@
import base64 import base64
import re import re
import urllib.error
import urllib.parse import urllib.parse
import zlib import zlib

View file

@ -6,11 +6,11 @@ from .common import InfoExtractor
class CloudflareStreamIE(InfoExtractor): class CloudflareStreamIE(InfoExtractor):
_SUBDOMAIN_RE = r'(?:(?:watch|iframe|customer-\w+)\.)?' _SUBDOMAIN_RE = r'(?:(?:watch|iframe|customer-\w+)\.)?'
_DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)' _DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
_EMBED_RE = rf'embed\.{_DOMAIN_RE}/embed/[^/]+\.js\?.*?\bvideo=' _EMBED_RE = rf'(?:embed\.|{_SUBDOMAIN_RE}){_DOMAIN_RE}/embed/[^/?#]+\.js\?(?:[^#]+&)?video='
_ID_RE = r'[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+' _ID_RE = r'[\da-f]{32}|eyJ[\w-]+\.[\w-]+\.[\w-]+'
_VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}{_DOMAIN_RE}/|{_EMBED_RE})(?P<id>{_ID_RE})' _VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}{_DOMAIN_RE}/|{_EMBED_RE})(?P<id>{_ID_RE})'
_EMBED_REGEX = [ _EMBED_REGEX = [
rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE}).*?)\1', rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE})(?:(?!\1).)*)\1',
rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})', rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})',
] ]
_TESTS = [{ _TESTS = [{
@ -24,6 +24,14 @@ class CloudflareStreamIE(InfoExtractor):
'params': { 'params': {
'skip_download': 'm3u8', 'skip_download': 'm3u8',
}, },
}, {
'url': 'https://watch.cloudflarestream.com/embed/sdk-iframe-integration.fla9.latest.js?video=0e8e040aec776862e1d632a699edf59e',
'info_dict': {
'id': '0e8e040aec776862e1d632a699edf59e',
'ext': 'mp4',
'title': '0e8e040aec776862e1d632a699edf59e',
'thumbnail': 'https://videodelivery.net/0e8e040aec776862e1d632a699edf59e/thumbnails/thumbnail.jpg',
},
}, { }, {
'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1', 'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
'only_matching': True, 'only_matching': True,
@ -36,6 +44,9 @@ class CloudflareStreamIE(InfoExtractor):
}, { }, {
'url': 'https://customer-aw5py76sw8wyqzmh.cloudflarestream.com/2463f6d3e06fa29710a337f5f5389fd8/iframe', 'url': 'https://customer-aw5py76sw8wyqzmh.cloudflarestream.com/2463f6d3e06fa29710a337f5f5389fd8/iframe',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://watch.cloudflarestream.com/eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJraWQiOiJmYTA0YjViMzQ2NDkwYTM5NWJiNzQ1NWFhZTA2YzYwZSIsInN1YiI6Ijg4ZDQxMDhhMzY0MjA3M2VhYmFhZjg3ZGExODJkMjYzIiwiZXhwIjoxNjAwNjA5MzE5fQ.xkRJwLGkt0nZ%5F0BlPiwU7iW4pqb4lKkznbKfAhGg0tGcxSS6ZBA3lcTUwu7W%2DyCFbnAl%2Dhqk3Fn%5FqeQS%5FQydP27qTHpB9iIFFsMtk1tqzGZV5v4yrYDnwLSKzEKvVd6QwJnfABtxH2JdpSNuWlMUiVXFxGWgjOw6QeTNDDklTQYXV%5FNLV7sErSn5CeOPeRRkdXb%2D8ip%5FVOcfk1nDsFoOo4fctFtGP0wYMyY5ae8nhhatydHwevuvJCcEvEfh%2D4qjq9mCZOodevmtSQ4YWmggf4BxtWnDWYrGW8Otp6oqezrR8oY4%2DbKdV6PaqBj49aJdcls6xK7PmM8%5Fvjy3xfm0Mg',
'only_matching': True,
}] }]
_WEBPAGE_TESTS = [{ _WEBPAGE_TESTS = [{
'url': 'https://upride.cc/incident/shoulder-pass-at-light/', 'url': 'https://upride.cc/incident/shoulder-pass-at-light/',

View file

@ -1,3 +1,5 @@
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
@ -35,6 +37,20 @@ class CloudyCDNIE(InfoExtractor):
'duration': 1205, 'duration': 1205,
'upload_date': '20221130', 'upload_date': '20221130',
}, },
}, {
# Video-only m3u8 formats need manual fixup
'url': 'https://embed.cloudycdn.services/ltv/media/08j_d24-6000-074',
'md5': 'fc472e40f6e6238446509be411c920e2',
'info_dict': {
'id': '08j_d24-6000-074',
'ext': 'mp4',
'upload_date': '20240620',
'duration': 1673,
'title': 'D24-6000-074-cetstud',
'timestamp': 1718902233,
'thumbnail': 'https://store.cloudycdn.services/tmsp00060/assets/media/788392/placeholder1718903938.jpg',
},
'params': {'format': 'bv'},
}] }]
_WEBPAGE_TESTS = [{ _WEBPAGE_TESTS = [{
'url': 'https://www.tavaklase.lv/video/es-esmu-mina-um-2/', 'url': 'https://www.tavaklase.lv/video/es-esmu-mina-um-2/',
@ -63,6 +79,9 @@ class CloudyCDNIE(InfoExtractor):
formats, subtitles = [], {} formats, subtitles = [], {}
for m3u8_url in traverse_obj(data, ('source', 'sources', ..., 'src', {url_or_none})): for m3u8_url in traverse_obj(data, ('source', 'sources', ..., 'src', {url_or_none})):
fmts, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, fatal=False) fmts, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, fatal=False)
for fmt in fmts:
if re.search(r'chunklist_b\d+_vo_', fmt['url']):
fmt['acodec'] = 'none'
formats.extend(fmts) formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles) self._merge_subtitles(subs, target=subtitles)

View file

@ -234,7 +234,14 @@ class InfoExtractor:
'maybe' if the format may have DRM and has to be tested before download. 'maybe' if the format may have DRM and has to be tested before download.
* extra_param_to_segment_url A query string to append to each * extra_param_to_segment_url A query string to append to each
fragment's URL, or to update each existing query string fragment's URL, or to update each existing query string
with. Only applied by the native HLS/DASH downloaders. with. If it is an HLS stream with an AES-128 decryption key,
the query paramaters will be passed to the key URI as well,
unless there is an `extra_param_to_key_url` given,
or unless an external key URI is provided via `hls_aes`.
Only applied by the native HLS/DASH downloaders.
* extra_param_to_key_url A query string to append to the URL
of the format's HLS AES-128 decryption key.
Only applied by the native HLS downloader.
* hls_aes A dictionary of HLS AES-128 decryption information * hls_aes A dictionary of HLS AES-128 decryption information
used by the native HLS downloader to override the used by the native HLS downloader to override the
values in the media playlist when an '#EXT-X-KEY' tag values in the media playlist when an '#EXT-X-KEY' tag
@ -2215,6 +2222,11 @@ class InfoExtractor:
'quality': quality, 'quality': quality,
'has_drm': has_drm, 'has_drm': has_drm,
} }
# YouTube-specific
if yt_audio_content_id := last_stream_inf.get('YT-EXT-AUDIO-CONTENT-ID'):
f['language'] = yt_audio_content_id.split('.')[0]
resolution = last_stream_inf.get('RESOLUTION') resolution = last_stream_inf.get('RESOLUTION')
if resolution: if resolution:
mobj = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', resolution) mobj = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', resolution)

View file

@ -2,6 +2,7 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_protocol, determine_protocol,
int_or_none, int_or_none,
join_nonempty,
try_get, try_get,
unescapeHTML, unescapeHTML,
) )
@ -52,7 +53,7 @@ class DailyMailIE(InfoExtractor):
is_hls = container == 'M2TS' is_hls = container == 'M2TS'
protocol = 'm3u8_native' if is_hls else determine_protocol({'url': rendition_url}) protocol = 'm3u8_native' if is_hls else determine_protocol({'url': rendition_url})
formats.append({ formats.append({
'format_id': ('hls' if is_hls else protocol) + (f'-{tbr}' if tbr else ''), 'format_id': join_nonempty('hls' if is_hls else protocol, tbr),
'url': rendition_url, 'url': rendition_url,
'width': int_or_none(rendition.get('frameWidth')), 'width': int_or_none(rendition.get('frameWidth')),
'height': int_or_none(rendition.get('frameHeight')), 'height': int_or_none(rendition.get('frameHeight')),

View file

@ -1,16 +1,16 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
parse_resolution,
traverse_obj,
try_get, try_get,
url_or_none,
urlencode_postdata, urlencode_postdata,
) )
from ..utils.traversal import traverse_obj
class DigitalConcertHallIE(InfoExtractor): class DigitalConcertHallIE(InfoExtractor):
IE_DESC = 'DigitalConcertHall extractor' IE_DESC = 'DigitalConcertHall extractor'
_VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P<language>[a-z]+)/(?P<type>film|concert)/(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P<language>[a-z]+)/(?P<type>film|concert|work)/(?P<id>[0-9]+)-?(?P<part>[0-9]+)?'
_OAUTH_URL = 'https://api.digitalconcerthall.com/v2/oauth2/token' _OAUTH_URL = 'https://api.digitalconcerthall.com/v2/oauth2/token'
_ACCESS_TOKEN = None _ACCESS_TOKEN = None
_NETRC_MACHINE = 'digitalconcerthall' _NETRC_MACHINE = 'digitalconcerthall'
@ -26,7 +26,8 @@ class DigitalConcertHallIE(InfoExtractor):
'upload_date': '20210624', 'upload_date': '20210624',
'timestamp': 1624548600, 'timestamp': 1624548600,
'duration': 2798, 'duration': 2798,
'album_artist': 'Members of the Berliner Philharmoniker / Simon Rössler', 'album_artists': ['Members of the Berliner Philharmoniker', 'Simon Rössler'],
'composers': ['Kurt Weill'],
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, { }, {
@ -34,8 +35,9 @@ class DigitalConcertHallIE(InfoExtractor):
'url': 'https://www.digitalconcerthall.com/en/concert/53785', 'url': 'https://www.digitalconcerthall.com/en/concert/53785',
'info_dict': { 'info_dict': {
'id': '53785', 'id': '53785',
'album_artist': 'Berliner Philharmoniker / Kirill Petrenko', 'album_artists': ['Berliner Philharmoniker', 'Kirill Petrenko'],
'title': 'Kirill Petrenko conducts Mendelssohn and Shostakovich', 'title': 'Kirill Petrenko conducts Mendelssohn and Shostakovich',
'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$',
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
'playlist_count': 3, 'playlist_count': 3,
@ -49,9 +51,20 @@ class DigitalConcertHallIE(InfoExtractor):
'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$', 'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$',
'upload_date': '20220714', 'upload_date': '20220714',
'timestamp': 1657785600, 'timestamp': 1657785600,
'album_artist': 'Frank Peter Zimmermann / Benedikt von Bernstorff / Jakob von Bernstorff', 'album_artists': ['Frank Peter Zimmermann', 'Benedikt von Bernstorff', 'Jakob von Bernstorff'],
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, {
'note': 'Concert with several works and an interview',
'url': 'https://www.digitalconcerthall.com/en/work/53785-1',
'info_dict': {
'id': '53785',
'album_artists': ['Berliner Philharmoniker', 'Kirill Petrenko'],
'title': 'Kirill Petrenko conducts Mendelssohn and Shostakovich',
'thumbnail': r're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$',
},
'params': {'skip_download': 'm3u8'},
'playlist_count': 1,
}] }]
def _perform_login(self, username, password): def _perform_login(self, username, password):
@ -97,15 +110,14 @@ class DigitalConcertHallIE(InfoExtractor):
'Accept-Language': language, 'Accept-Language': language,
}) })
m3u8_url = traverse_obj( formats = []
stream_info, ('channel', lambda k, _: k.startswith('vod_mixed'), 'stream', 0, 'url'), get_all=False) for m3u8_url in traverse_obj(stream_info, ('channel', ..., 'stream', ..., 'url', {url_or_none})):
formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', 'm3u8_native', fatal=False) formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', fatal=False))
yield { yield {
'id': video_id, 'id': video_id,
'title': item.get('title'), 'title': item.get('title'),
'composer': item.get('name_composer'), 'composer': item.get('name_composer'),
'url': m3u8_url,
'formats': formats, 'formats': formats,
'duration': item.get('duration_total'), 'duration': item.get('duration_total'),
'timestamp': traverse_obj(item, ('date', 'published')), 'timestamp': traverse_obj(item, ('date', 'published')),
@ -119,31 +131,32 @@ class DigitalConcertHallIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
language, type_, video_id = self._match_valid_url(url).group('language', 'type', 'id') language, type_, video_id, part = self._match_valid_url(url).group('language', 'type', 'id', 'part')
if not language: if not language:
language = 'en' language = 'en'
thumbnail_url = self._html_search_regex( api_type = 'concert' if type_ == 'work' else type_
r'(https?://images\.digitalconcerthall\.com/cms/thumbnails/.*\.jpg)',
self._download_webpage(url, video_id), 'thumbnail')
thumbnails = [{
'url': thumbnail_url,
**parse_resolution(thumbnail_url),
}]
vid_info = self._download_json( vid_info = self._download_json(
f'https://api.digitalconcerthall.com/v2/{type_}/{video_id}', video_id, headers={ f'https://api.digitalconcerthall.com/v2/{api_type}/{video_id}', video_id, headers={
'Accept': 'application/json', 'Accept': 'application/json',
'Accept-Language': language, 'Accept-Language': language,
}) })
album_artist = ' / '.join(traverse_obj(vid_info, ('_links', 'artist', ..., 'name')) or '') album_artists = traverse_obj(vid_info, ('_links', 'artist', ..., 'name'))
videos = [vid_info] if type_ == 'film' else traverse_obj(vid_info, ('_embedded', ..., ...)) videos = [vid_info] if type_ == 'film' else traverse_obj(vid_info, ('_embedded', ..., ...))
if type_ == 'work':
videos = [videos[int(part) - 1]]
thumbnail = traverse_obj(vid_info, (
'image', ..., {self._proto_relative_url}, {url_or_none},
{lambda x: x.format(width=0, height=0)}, any)) # NB: 0x0 is the original size
return { return {
'_type': 'playlist', '_type': 'playlist',
'id': video_id, 'id': video_id,
'title': vid_info.get('title'), 'title': vid_info.get('title'),
'entries': self._entries(videos, language, thumbnails=thumbnails, album_artist=album_artist, type_=type_), 'entries': self._entries(
'thumbnails': thumbnails, videos, language, type_, thumbnail=thumbnail, album_artists=album_artists),
'album_artist': album_artist, 'thumbnail': thumbnail,
'album_artists': album_artists,
} }

View file

@ -1,6 +1,11 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..networking import Request from ..networking import Request
from ..utils import float_or_none, int_or_none, parse_iso8601 from ..utils import (
float_or_none,
int_or_none,
join_nonempty,
parse_iso8601,
)
class EitbIE(InfoExtractor): class EitbIE(InfoExtractor):
@ -37,12 +42,9 @@ class EitbIE(InfoExtractor):
if not video_url: if not video_url:
continue continue
tbr = float_or_none(rendition.get('ENCODING_RATE'), 1000) tbr = float_or_none(rendition.get('ENCODING_RATE'), 1000)
format_id = 'http'
if tbr:
format_id += f'-{int(tbr)}'
formats.append({ formats.append({
'url': rendition['PMD_URL'], 'url': rendition['PMD_URL'],
'format_id': format_id, 'format_id': join_nonempty('http', int_or_none(tbr)),
'width': int_or_none(rendition.get('FRAME_WIDTH')), 'width': int_or_none(rendition.get('FRAME_WIDTH')),
'height': int_or_none(rendition.get('FRAME_HEIGHT')), 'height': int_or_none(rendition.get('FRAME_HEIGHT')),
'tbr': tbr, 'tbr': tbr,

View file

@ -29,9 +29,6 @@ class EpornerIE(InfoExtractor):
'view_count': int, 'view_count': int,
'age_limit': 18, 'age_limit': 18,
}, },
'params': {
'proxy': '127.0.0.1:8118',
},
}, { }, {
# New (May 2016) URL layout # New (May 2016) URL layout
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/', 'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/',

View file

@ -621,6 +621,9 @@ class FacebookIE(InfoExtractor):
'url': playable_url, 'url': playable_url,
}) })
extract_dash_manifest(video, formats) extract_dash_manifest(video, formats)
if not formats:
# Do not append false positive entry w/o any formats
return
automatic_captions, subtitles = {}, {} automatic_captions, subtitles = {}, {}
is_broadcast = traverse_obj(video, ('is_video_broadcast', {bool})) is_broadcast = traverse_obj(video, ('is_video_broadcast', {bool}))

View file

@ -5,6 +5,7 @@ from .common import InfoExtractor
from .dailymotion import DailymotionIE from .dailymotion import DailymotionIE
from ..networking import HEADRequest from ..networking import HEADRequest
from ..utils import ( from ..utils import (
clean_html,
determine_ext, determine_ext,
filter_dict, filter_dict,
format_field, format_field,
@ -33,6 +34,7 @@ class FranceTVIE(InfoExtractor):
_GEO_BYPASS = False _GEO_BYPASS = False
_TESTS = [{ _TESTS = [{
# tokenized url is in dinfo['video']['token']
'url': 'francetv:ec217ecc-0733-48cf-ac06-af1347b849d1', 'url': 'francetv:ec217ecc-0733-48cf-ac06-af1347b849d1',
'info_dict': { 'info_dict': {
'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1', 'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1',
@ -44,6 +46,19 @@ class FranceTVIE(InfoExtractor):
'upload_date': '20170813', 'upload_date': '20170813',
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, {
# tokenized url is in dinfo['video']['token']['akamai']
'url': 'francetv:c5bda21d-2c6f-4470-8849-3d8327adb2ba',
'info_dict': {
'id': 'c5bda21d-2c6f-4470-8849-3d8327adb2ba',
'ext': 'mp4',
'title': '13h15, le dimanche... - Les mystères de Jésus',
'timestamp': 1514118300,
'duration': 2880,
'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20171224',
},
'params': {'skip_download': 'm3u8'},
}, { }, {
'url': 'francetv:162311093', 'url': 'francetv:162311093',
'only_matching': True, 'only_matching': True,
@ -68,6 +83,7 @@ class FranceTVIE(InfoExtractor):
def _extract_video(self, video_id, hostname=None): def _extract_video(self, video_id, hostname=None):
is_live = None is_live = None
videos = [] videos = []
drm_formats = False
title = None title = None
subtitle = None subtitle = None
episode_number = None episode_number = None
@ -85,13 +101,12 @@ class FranceTVIE(InfoExtractor):
'device_type': device_type, 'device_type': device_type,
'browser': browser, 'browser': browser,
'domain': hostname, 'domain': hostname,
}), fatal=False) }), fatal=False, expected_status=422) # 422 json gives detailed error code/message
if not dinfo: if not dinfo:
continue continue
video = traverse_obj(dinfo, ('video', {dict})) if video := traverse_obj(dinfo, ('video', {dict})):
if video:
videos.append(video) videos.append(video)
if duration is None: if duration is None:
duration = video.get('duration') duration = video.get('duration')
@ -99,9 +114,19 @@ class FranceTVIE(InfoExtractor):
is_live = video.get('is_live') is_live = video.get('is_live')
if spritesheets is None: if spritesheets is None:
spritesheets = video.get('spritesheets') spritesheets = video.get('spritesheets')
elif code := traverse_obj(dinfo, ('code', {int})):
if code == 2009:
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
elif code in (2015, 2017):
# 2015: L'accès à cette vidéo est impossible. (DRM-only)
# 2017: Cette vidéo n'est pas disponible depuis le site web mobile (b/c DRM)
drm_formats = True
continue
self.report_warning(
f'{self.IE_NAME} said: {code} "{clean_html(dinfo.get("message"))}"')
continue
meta = traverse_obj(dinfo, ('meta', {dict})) if meta := traverse_obj(dinfo, ('meta', {dict})):
if meta:
if title is None: if title is None:
title = meta.get('title') title = meta.get('title')
# meta['pre_title'] contains season and episode number for series in format "S<ID> E<ID>" # meta['pre_title'] contains season and episode number for series in format "S<ID> E<ID>"
@ -114,12 +139,15 @@ class FranceTVIE(InfoExtractor):
if timestamp is None: if timestamp is None:
timestamp = parse_iso8601(meta.get('broadcasted_at')) timestamp = parse_iso8601(meta.get('broadcasted_at'))
if not videos and drm_formats:
self.report_drm(video_id)
formats, subtitles, video_url = [], {}, None formats, subtitles, video_url = [], {}, None
for video in traverse_obj(videos, lambda _, v: url_or_none(v['url'])): for video in traverse_obj(videos, lambda _, v: url_or_none(v['url'])):
video_url = video['url'] video_url = video['url']
format_id = video.get('format') format_id = video.get('format')
if token_url := url_or_none(video.get('token')): if token_url := traverse_obj(video, ('token', (None, 'akamai'), {url_or_none}, any)):
tokenized_url = traverse_obj(self._download_json( tokenized_url = traverse_obj(self._download_json(
token_url, video_id, f'Downloading signed {format_id} manifest URL', token_url, video_id, f'Downloading signed {format_id} manifest URL',
fatal=False, query={ fatal=False, query={
@ -225,13 +253,13 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor):
_TESTS = [{ _TESTS = [{
'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html', 'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html',
'info_dict': { 'info_dict': {
'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1', 'id': 'c5bda21d-2c6f-4470-8849-3d8327adb2ba',
'ext': 'mp4', 'ext': 'mp4',
'title': '13h15, le dimanche... - Les mystères de Jésus', 'title': '13h15, le dimanche... - Les mystères de Jésus',
'timestamp': 1502623500, 'timestamp': 1514118300,
'duration': 2580, 'duration': 2880,
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20170813', 'upload_date': '20171224',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,

View file

@ -2167,7 +2167,15 @@ class GenericIE(InfoExtractor):
urllib.parse.urlparse(fragment_query).query or fragment_query urllib.parse.urlparse(fragment_query).query or fragment_query
or urllib.parse.urlparse(manifest_url).query or None) or urllib.parse.urlparse(manifest_url).query or None)
hex_or_none = lambda x: x if re.fullmatch(r'(0x)?[\da-f]+', x, re.IGNORECASE) else None key_query = self._configuration_arg('key_query', [None], casesense=True)[0]
if key_query is not None:
info['extra_param_to_key_url'] = (
urllib.parse.urlparse(key_query).query or key_query
or urllib.parse.urlparse(manifest_url).query or None)
def hex_or_none(value):
return value if re.fullmatch(r'(0x)?[\da-f]+', value, re.IGNORECASE) else None
info['hls_aes'] = traverse_obj(self._configuration_arg('hls_key', casesense=True), { info['hls_aes'] = traverse_obj(self._configuration_arg('hls_key', casesense=True), {
'uri': (0, {url_or_none}), 'key': (0, {hex_or_none}), 'iv': (1, {hex_or_none}), 'uri': (0, {url_or_none}), 'key': (0, {hex_or_none}), 'iv': (1, {hex_or_none}),
}) or None }) or None

View file

@ -5,6 +5,7 @@ from ..utils import (
ExtractorError, ExtractorError,
determine_ext, determine_ext,
int_or_none, int_or_none,
join_nonempty,
parse_age_limit, parse_age_limit,
remove_end, remove_end,
remove_start, remove_start,
@ -287,7 +288,7 @@ class GoIE(AdobePassIE):
if mobj: if mobj:
height = int(mobj.group(2)) height = int(mobj.group(2))
f.update({ f.update({
'format_id': (f'{format_id}-' if format_id else '') + f'{height}P', 'format_id': join_nonempty(format_id, f'{height}P'),
'width': int(mobj.group(1)), 'width': int(mobj.group(1)),
'height': height, 'height': height,
}) })

View file

@ -0,0 +1,32 @@
from .common import InfoExtractor
from ..utils import update_url, url_or_none
from ..utils.traversal import traverse_obj
class GraspopIE(InfoExtractor):
_VALID_URL = r'https?://vod\.graspop\.be/[a-z]{2}/(?P<id>\d+)/'
_TESTS = [{
'url': 'https://vod.graspop.be/fr/101556/thy-art-is-murder-concert/',
'info_dict': {
'id': '101556',
'ext': 'mp4',
'title': 'Thy Art Is Murder',
'thumbnail': r're:https://cdn-mds\.pickx\.be/festivals/v3/global/original/.+\.jpg',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
metadata = self._download_json(
f'https://tv.proximus.be/MWC/videocenter/festivals/{video_id}/stream', video_id)
return {
'id': video_id,
'formats': self._extract_m3u8_formats(
# Downgrade manifest request to avoid incomplete certificate chain error
update_url(metadata['source']['assetUri'], scheme='http'), video_id, 'mp4'),
**traverse_obj(metadata, {
'title': ('name', {str}),
'thumbnail': ('source', 'poster', {url_or_none}),
}),
}

View file

@ -3,6 +3,7 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
join_nonempty,
parse_duration, parse_duration,
urljoin, urljoin,
xpath_element, xpath_element,
@ -69,7 +70,7 @@ class HBOBaseIE(InfoExtractor):
height = format_info.get('height') height = format_info.get('height')
fmt = { fmt = {
'url': path, 'url': path,
'format_id': 'http{}'.format(f'-{height}p' if height else ''), 'format_id': join_nonempty('http'. height and f'{height}p'),
'width': format_info.get('width'), 'width': format_info.get('width'),
'height': height, 'height': height,
} }

View file

@ -44,9 +44,6 @@ class HKETVIE(InfoExtractor):
'duration': 907, 'duration': 907,
'subtitles': {}, 'subtitles': {},
}, },
'params': {
'geo_verification_proxy': '<HK proxy here>',
},
'skip': 'Geo restricted to HK', 'skip': 'Geo restricted to HK',
}] }]

View file

@ -453,7 +453,7 @@ class InstagramIE(InstagramBaseIE):
else: else:
self.report_warning('Main webpage is locked behind the login page. Retrying with embed webpage (some metadata might be missing).') self.report_warning('Main webpage is locked behind the login page. Retrying with embed webpage (some metadata might be missing).')
webpage = self._download_webpage( webpage = self._download_webpage(
f'{url}/embed/', video_id, note='Downloading embed webpage', fatal=False) f'{url}/embed/', video_id, note='Downloading embed webpage', fatal=False) or ''
additional_data = self._search_json( additional_data = self._search_json(
r'window\.__additionalDataLoaded\s*\(\s*[^,]+,', webpage, 'additional data', video_id, fatal=False) r'window\.__additionalDataLoaded\s*\(\s*[^,]+,', webpage, 'additional data', video_id, fatal=False)
if not additional_data and not media: if not additional_data and not media:

View file

@ -2,7 +2,6 @@ import functools
import hashlib import hashlib
import json import json
import time import time
import urllib.error
import urllib.parse import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor

View file

@ -364,20 +364,25 @@ class JioCinemaSeriesIE(JioCinemaBaseIE):
'title': 'naagin', 'title': 'naagin',
}, },
'playlist_mincount': 120, 'playlist_mincount': 120,
}, {
'url': 'https://www.jiocinema.com/tv-shows/mtv-splitsvilla-x5/3499820',
'info_dict': {
'id': '3499820',
'title': 'mtv-splitsvilla-x5',
},
'playlist_mincount': 310,
}] }]
def _entries(self, series_id): def _entries(self, series_id):
seasons = self._download_json( seasons = traverse_obj(self._download_json(
f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/season-by-show', series_id, f'{self._METADATA_API_BASE}/voot/v1/voot-web/view/show/{series_id}', series_id,
'Downloading series metadata JSON', query={ 'Downloading series metadata JSON', query={'responseType': 'common'}), (
'sort': 'season:asc', 'trays', lambda _, v: v['trayId'] == 'season-by-show-multifilter',
'id': series_id, 'trayTabs', lambda _, v: v['id']))
'responseType': 'common',
})
for season_num, season in enumerate(traverse_obj(seasons, ('result', lambda _, v: v['id'])), 1): for season_num, season in enumerate(seasons, start=1):
season_id = season['id'] season_id = season['id']
label = season.get('season') or season_num label = season.get('label') or season_num
for page_num in itertools.count(1): for page_num in itertools.count(1):
episodes = traverse_obj(self._download_json( episodes = traverse_obj(self._download_json(
f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/series-wise-episode', f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/series-wise-episode',

View file

@ -3,43 +3,52 @@ import json
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
make_archive_id,
parse_iso8601, parse_iso8601,
try_get, str_or_none,
traverse_obj,
url_or_none,
urljoin,
) )
class KhanAcademyBaseIE(InfoExtractor): class KhanAcademyBaseIE(InfoExtractor):
_VALID_URL_TEMPL = r'https?://(?:www\.)?khanacademy\.org/(?P<id>(?:[^/]+/){%s}%s[^?#/&]+)' _VALID_URL_TEMPL = r'https?://(?:www\.)?khanacademy\.org/(?P<id>(?:[^/]+/){%s}%s[^?#/&]+)'
_PUBLISHED_CONTENT_VERSION = '171419ab20465d931b356f22d20527f13969bb70'
def _parse_video(self, video): def _parse_video(self, video):
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'url': video['youtubeId'], 'url': video['youtubeId'],
'id': video.get('slug'), 'id': video['youtubeId'],
'title': video.get('title'),
'thumbnail': video.get('imageUrl') or video.get('thumbnailUrl'),
'duration': int_or_none(video.get('duration')),
'description': video.get('description'),
'ie_key': 'Youtube', 'ie_key': 'Youtube',
**traverse_obj(video, {
'display_id': ('id', {str_or_none}),
'title': ('translatedTitle', {str}),
'thumbnail': ('thumbnailUrls', ..., 'url', {url_or_none}),
'duration': ('duration', {int_or_none}),
'description': ('description', {str}),
}, get_all=False),
} }
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
content = self._download_json( content = self._download_json(
'https://www.khanacademy.org/api/internal/graphql/FetchContentData', 'https://www.khanacademy.org/api/internal/graphql/ContentForPath', display_id,
display_id, query={ query={
'fastly_cacheable': 'persist_until_publish', 'fastly_cacheable': 'persist_until_publish',
'hash': '4134764944', 'pcv': self._PUBLISHED_CONTENT_VERSION,
'lang': 'en', 'hash': '1242644265',
'variables': json.dumps({ 'variables': json.dumps({
'path': display_id, 'path': display_id,
'queryParams': 'lang=en',
'isModal': False,
'followRedirects': True,
'countryCode': 'US', 'countryCode': 'US',
'kaLocale': 'en',
'clientPublishedContentVersion': self._PUBLISHED_CONTENT_VERSION,
}), }),
})['data']['contentJson'] 'lang': 'en',
return self._parse_component_props(self._parse_json(content, display_id)['componentProps']) })['data']['contentRoute']['listedPathData']
return self._parse_component_props(content, display_id)
class KhanAcademyIE(KhanAcademyBaseIE): class KhanAcademyIE(KhanAcademyBaseIE):
@ -47,64 +56,98 @@ class KhanAcademyIE(KhanAcademyBaseIE):
_VALID_URL = KhanAcademyBaseIE._VALID_URL_TEMPL % ('4', 'v/') _VALID_URL = KhanAcademyBaseIE._VALID_URL_TEMPL % ('4', 'v/')
_TEST = { _TEST = {
'url': 'https://www.khanacademy.org/computing/computer-science/cryptography/crypt/v/one-time-pad', 'url': 'https://www.khanacademy.org/computing/computer-science/cryptography/crypt/v/one-time-pad',
'md5': '9c84b7b06f9ebb80d22a5c8dedefb9a0', 'md5': '1d5c2e70fa6aa29c38eca419f12515ce',
'info_dict': { 'info_dict': {
'id': 'FlIG3TvQCBQ', 'id': 'FlIG3TvQCBQ',
'ext': 'mp4', 'ext': 'mp4',
'title': 'The one-time pad', 'title': 'The one-time pad',
'description': 'The perfect cipher', 'description': 'The perfect cipher',
'display_id': '716378217',
'duration': 176, 'duration': 176,
'uploader': 'Brit Cruise', 'uploader': 'Khan Academy',
'uploader_id': 'khanacademy', 'uploader_id': '@khanacademy',
'uploader_url': 'https://www.youtube.com/@khanacademy',
'upload_date': '20120411', 'upload_date': '20120411',
'timestamp': 1334170113, 'timestamp': 1334170113,
'license': 'cc-by-nc-sa', 'license': 'cc-by-nc-sa',
'live_status': 'not_live',
'channel': 'Khan Academy',
'channel_id': 'UC4a-Gbdw7vOaccHmFo40b9g',
'channel_url': 'https://www.youtube.com/channel/UC4a-Gbdw7vOaccHmFo40b9g',
'channel_is_verified': True,
'playable_in_embed': True,
'categories': ['Education'],
'creators': ['Brit Cruise'],
'tags': [],
'age_limit': 0,
'availability': 'public',
'comment_count': int,
'channel_follower_count': int,
'thumbnail': str,
'view_count': int,
'like_count': int,
'heatmap': list,
}, },
'add_ie': ['Youtube'], 'add_ie': ['Youtube'],
} }
def _parse_component_props(self, component_props): def _parse_component_props(self, component_props, display_id):
video = component_props['tutorialPageData']['contentModel'] video = component_props['content']
info = self._parse_video(video) return {
author_names = video.get('authorNames') **self._parse_video(video),
info.update({ **traverse_obj(video, {
'uploader': ', '.join(author_names) if author_names else None, 'creators': ('authorNames', ..., {str}),
'timestamp': parse_iso8601(video.get('dateAdded')), 'timestamp': ('dateAdded', {parse_iso8601}),
'license': video.get('kaUserLicense'), 'license': ('kaUserLicense', {str}),
}) }),
return info }
class KhanAcademyUnitIE(KhanAcademyBaseIE): class KhanAcademyUnitIE(KhanAcademyBaseIE):
IE_NAME = 'khanacademy:unit' IE_NAME = 'khanacademy:unit'
_VALID_URL = (KhanAcademyBaseIE._VALID_URL_TEMPL % ('2', '')) + '/?(?:[?#&]|$)' _VALID_URL = (KhanAcademyBaseIE._VALID_URL_TEMPL % ('1,2', '')) + '/?(?:[?#&]|$)'
_TEST = { _TESTS = [{
'url': 'https://www.khanacademy.org/computing/computer-science/cryptography', 'url': 'https://www.khanacademy.org/computing/computer-science/cryptography',
'info_dict': { 'info_dict': {
'id': 'cryptography', 'id': 'x48c910b6',
'title': 'Cryptography', 'title': 'Cryptography',
'description': 'How have humans protected their secret messages through history? What has changed today?', 'description': 'How have humans protected their secret messages through history? What has changed today?',
'display_id': 'computing/computer-science/cryptography',
'_old_archive_ids': ['khanacademyunit cryptography'],
}, },
'playlist_mincount': 31, 'playlist_mincount': 31,
} }, {
'url': 'https://www.khanacademy.org/computing/computer-science',
'info_dict': {
'id': 'x301707a0',
'title': 'Computer science theory',
'description': 'md5:4b472a4646e6cf6ec4ccb52c4062f8ba',
'display_id': 'computing/computer-science',
'_old_archive_ids': ['khanacademyunit computer-science'],
},
'playlist_mincount': 50,
}]
def _parse_component_props(self, component_props): def _parse_component_props(self, component_props, display_id):
curation = component_props['curation'] course = component_props['course']
selected_unit = traverse_obj(course, (
'unitChildren', lambda _, v: v['relativeUrl'] == f'/{display_id}', any)) or course
entries = [] def build_entry(entry):
tutorials = try_get(curation, lambda x: x['tabs'][0]['modules'][0]['tutorials'], list) or [] return self.url_result(urljoin(
for tutorial_number, tutorial in enumerate(tutorials, 1): 'https://www.khanacademy.org', entry['canonicalUrl']),
chapter_info = { KhanAcademyIE, title=entry.get('translatedTitle'))
'chapter': tutorial.get('title'),
'chapter_number': tutorial_number, entries = traverse_obj(selected_unit, (
'chapter_id': tutorial.get('id'), (('unitChildren', ...), None), 'allOrderedChildren', ..., 'curatedChildren',
} lambda _, v: v['contentKind'] == 'Video' and v['canonicalUrl'], {build_entry}))
for content_item in (tutorial.get('contentItems') or []):
if content_item.get('kind') == 'Video':
info = self._parse_video(content_item)
info.update(chapter_info)
entries.append(info)
return self.playlist_result( return self.playlist_result(
entries, curation.get('unit'), curation.get('title'), entries,
curation.get('description')) display_id=display_id,
**traverse_obj(selected_unit, {
'id': ('id', {str}),
'title': ('translatedTitle', {str}),
'description': ('translatedDescription', {str}),
'_old_archive_ids': ('slug', {str}, {lambda x: [make_archive_id(self, x)] if x else None}),
}))

View file

@ -0,0 +1,114 @@
import json
from .common import InfoExtractor
from .vimeo import VimeoIE
from ..utils import (
clean_html,
extract_attributes,
get_element_html_by_id,
int_or_none,
parse_duration,
str_or_none,
unified_strdate,
url_or_none,
urljoin,
)
from ..utils.traversal import traverse_obj
class LaracastsBaseIE(InfoExtractor):
def _get_prop_data(self, url, display_id):
webpage = self._download_webpage(url, display_id)
return traverse_obj(
get_element_html_by_id('app', webpage),
({extract_attributes}, 'data-page', {json.loads}, 'props'))
def _parse_episode(self, episode):
if not traverse_obj(episode, 'vimeoId'):
self.raise_login_required('This video is only available for subscribers.')
return self.url_result(
VimeoIE._smuggle_referrer(
f'https://player.vimeo.com/video/{episode["vimeoId"]}', 'https://laracasts.com/'),
VimeoIE, url_transparent=True,
**traverse_obj(episode, {
'id': ('id', {int}, {str_or_none}),
'webpage_url': ('path', {lambda x: urljoin('https://laracasts.com', x)}),
'title': ('title', {clean_html}),
'season_number': ('chapter', {int_or_none}),
'episode_number': ('position', {int_or_none}),
'description': ('body', {clean_html}),
'thumbnail': ('largeThumbnail', {url_or_none}),
'duration': ('length', {int_or_none}),
'date': ('dateSegments', 'published', {unified_strdate}),
}))
class LaracastsIE(LaracastsBaseIE):
IE_NAME = 'laracasts'
_VALID_URL = r'https?://(?:www\.)?laracasts\.com/series/(?P<id>[\w-]+/episodes/\d+)/?(?:[?#]|$)'
_TESTS = [{
'url': 'https://laracasts.com/series/30-days-to-learn-laravel-11/episodes/1',
'md5': 'c8f5e7b02ad0e438ef9280a08c8493dc',
'info_dict': {
'id': '922040563',
'title': 'Hello, Laravel',
'ext': 'mp4',
'duration': 519,
'date': '20240312',
'thumbnail': 'https://laracasts.s3.amazonaws.com/videos/thumbnails/youtube/30-days-to-learn-laravel-11-1.png',
'description': 'md5:ddd658bb241975871d236555657e1dd1',
'season_number': 1,
'season': 'Season 1',
'episode_number': 1,
'episode': 'Episode 1',
'uploader': 'Laracasts',
'uploader_id': 'user20182673',
'uploader_url': 'https://vimeo.com/user20182673',
},
'expected_warnings': ['Failed to parse XML'], # TODO: Remove when vimeo extractor is fixed
}]
def _real_extract(self, url):
display_id = self._match_id(url)
return self._parse_episode(self._get_prop_data(url, display_id)['lesson'])
class LaracastsPlaylistIE(LaracastsBaseIE):
IE_NAME = 'laracasts:series'
_VALID_URL = r'https?://(?:www\.)?laracasts\.com/series/(?P<id>[\w-]+)/?(?:[?#]|$)'
_TESTS = [{
'url': 'https://laracasts.com/series/30-days-to-learn-laravel-11',
'info_dict': {
'title': '30 Days to Learn Laravel',
'id': '210',
'thumbnail': 'https://laracasts.s3.amazonaws.com/series/thumbnails/social-cards/30-days-to-learn-laravel-11.png?v=2',
'duration': 30600.0,
'modified_date': '20240511',
'description': 'md5:27c260a1668a450984e8f901579912dd',
'categories': ['Frameworks'],
'tags': ['Laravel'],
'display_id': '30-days-to-learn-laravel-11',
},
'playlist_count': 30,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
series = self._get_prop_data(url, display_id)['series']
metadata = {
'display_id': display_id,
**traverse_obj(series, {
'title': ('title', {str}),
'id': ('id', {int}, {str_or_none}),
'description': ('body', {clean_html}),
'thumbnail': (('large_thumbnail', 'thumbnail'), {url_or_none}, any),
'duration': ('runTime', {parse_duration}),
'categories': ('taxonomy', 'name', {str}, {lambda x: x and [x]}),
'tags': ('topics', ..., 'name', {str}),
'modified_date': ('lastUpdated', {unified_strdate}),
}),
}
return self.playlist_result(traverse_obj(
series, ('chapters', ..., 'episodes', lambda _, v: v['vimeoId'], {self._parse_episode})), **metadata)

View file

@ -1,51 +1,35 @@
import random
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import xpath_text
class MatchTVIE(InfoExtractor): class MatchTVIE(InfoExtractor):
_VALID_URL = r'https?://matchtv\.ru(?:/on-air|/?#live-player)' _VALID_URL = [
r'https?://matchtv\.ru/on-air/?(?:$|[?#])',
r'https?://video\.matchtv\.ru/iframe/channel/106/?(?:$|[?#])',
]
_TESTS = [{ _TESTS = [{
'url': 'http://matchtv.ru/#live-player', 'url': 'http://matchtv.ru/on-air/',
'info_dict': { 'info_dict': {
'id': 'matchtv-live', 'id': 'matchtv-live',
'ext': 'flv', 'ext': 'mp4',
'title': r're:^Матч ТВ - Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', 'title': r're:^Матч ТВ - Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'is_live': True, 'live_status': 'is_live',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, { }, {
'url': 'http://matchtv.ru/on-air/', 'url': 'https://video.matchtv.ru/iframe/channel/106',
'only_matching': True, 'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = 'matchtv-live' video_id = 'matchtv-live'
video_url = self._download_json( webpage = self._download_webpage('https://video.matchtv.ru/iframe/channel/106', video_id)
'http://player.matchtv.ntvplus.tv/player/smil', video_id, video_url = self._html_search_regex(
query={ r'data-config="config=(https?://[^?"]+)[?"]', webpage, 'video URL').replace('/feed/', '/media/') + '.m3u8'
'ts': '',
'quality': 'SD',
'contentId': '561d2c0df7159b37178b4567',
'sign': '',
'includeHighlights': '0',
'userId': '',
'sessionId': random.randint(1, 1000000000),
'contentType': 'channel',
'timeShift': '0',
'platform': 'portal',
},
headers={
'Referer': 'http://player.matchtv.ntvplus.tv/embed-player/NTVEmbedPlayer.swf',
})['data']['videoUrl']
f4m_url = xpath_text(self._download_xml(video_url, video_id), './to')
formats = self._extract_f4m_formats(f4m_url, video_id)
return { return {
'id': video_id, 'id': video_id,
'title': 'Матч ТВ - Прямой эфир', 'title': 'Матч ТВ - Прямой эфир',
'is_live': True, 'is_live': True,
'formats': formats, 'formats': self._extract_m3u8_formats(video_url, video_id, 'mp4', live=True),
} }

View file

@ -15,6 +15,7 @@ from ..utils import (
url_or_none, url_or_none,
urljoin, urljoin,
) )
from ..utils.traversal import traverse_obj
_ID_RE = r'(?:[0-9a-f]{32,34}|[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12,14})' _ID_RE = r'(?:[0-9a-f]{32,34}|[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12,14})'
@ -212,13 +213,14 @@ class MediasiteIE(InfoExtractor):
stream_type, 'type%u' % stream_type) stream_type, 'type%u' % stream_type)
stream_formats = [] stream_formats = []
for unum, video_url in enumerate(video_urls): for unum, video in enumerate(video_urls):
video_url = url_or_none(video_url.get('Location')) video_url = url_or_none(video.get('Location'))
if not video_url: if not video_url:
continue continue
# XXX: if Stream.get('CanChangeScheme', False), switch scheme to HTTP/HTTPS # XXX: if Stream.get('CanChangeScheme', False), switch scheme to HTTP/HTTPS
media_type = video_url.get('MediaType') media_type = video.get('MediaType')
ext = mimetype2ext(video.get('MimeType'))
if media_type == 'SS': if media_type == 'SS':
stream_formats.extend(self._extract_ism_formats( stream_formats.extend(self._extract_ism_formats(
video_url, resource_id, video_url, resource_id,
@ -229,15 +231,20 @@ class MediasiteIE(InfoExtractor):
video_url, resource_id, video_url, resource_id,
mpd_id=f'{stream_id}-{snum}.{unum}', mpd_id=f'{stream_id}-{snum}.{unum}',
fatal=False)) fatal=False))
elif ext in ('m3u', 'm3u8'):
stream_formats.extend(self._extract_m3u8_formats(
video_url, resource_id,
m3u8_id=f'{stream_id}-{snum}.{unum}',
fatal=False))
else: else:
stream_formats.append({ stream_formats.append({
'format_id': f'{stream_id}-{snum}.{unum}', 'format_id': f'{stream_id}-{snum}.{unum}',
'url': video_url, 'url': video_url,
'ext': mimetype2ext(video_url.get('MimeType')), 'ext': ext,
}) })
if stream.get('HasSlideContent', False): images = traverse_obj(player_options, ('PlayerLayoutOptions', 'Images', {dict}))
images = player_options['PlayerLayoutOptions']['Images'] if stream.get('HasSlideContent') and images:
stream_formats.append(self.__extract_slides( stream_formats.append(self.__extract_slides(
stream_id=stream_id, stream_id=stream_id,
snum=snum, snum=snum,

View file

@ -1,5 +1,14 @@
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import int_or_none, traverse_obj, unified_timestamp from ..utils import (
int_or_none,
parse_iso8601,
traverse_obj,
unified_timestamp,
url_basename,
url_or_none,
)
class MicrosoftEmbedIE(InfoExtractor): class MicrosoftEmbedIE(InfoExtractor):
@ -63,3 +72,250 @@ class MicrosoftEmbedIE(InfoExtractor):
'subtitles': subtitles, 'subtitles': subtitles,
'thumbnails': thumbnails, 'thumbnails': thumbnails,
} }
class MicrosoftMediusBaseIE(InfoExtractor):
@staticmethod
def _sub_to_dict(subtitle_list):
subtitles = {}
for sub in subtitle_list:
subtitles.setdefault(sub.pop('tag', 'und'), []).append(sub)
return subtitles
def _extract_ism(self, ism_url, video_id):
formats = self._extract_ism_formats(ism_url, video_id)
for fmt in formats:
if fmt['language'] != 'eng' and 'English' not in fmt['format_id']:
fmt['language_preference'] = -10
return formats
class MicrosoftMediusIE(MicrosoftMediusBaseIE):
_VALID_URL = r'https?://medius\.microsoft\.com/Embed/(?:Video\?id=|video-nc/|VideoDetails/)(?P<id>[\da-f-]+)'
_TESTS = [{
'url': 'https://medius.microsoft.com/Embed/video-nc/9640d86c-f513-4889-959e-5dace86e7d2b',
'info_dict': {
'id': '9640d86c-f513-4889-959e-5dace86e7d2b',
'ext': 'ismv',
'title': 'Rapidly code, test and ship from secure cloud developer environments',
'description': 'md5:33c8e4facadc438613476eea24165f71',
'thumbnail': r're:https://mediusimg\.event\.microsoft\.com/video-\d+/thumbnail\.jpg.*',
'subtitles': 'count:30',
},
}, {
'url': 'https://medius.microsoft.com/Embed/video-nc/81215af5-c813-4dcd-aede-94f4e1a7daa3',
'info_dict': {
'id': '81215af5-c813-4dcd-aede-94f4e1a7daa3',
'ext': 'ismv',
'title': 'Microsoft Build opening',
'description': 'md5:43455096141077a1f23144cab8cec1cb',
'thumbnail': r're:https://mediusimg\.event\.microsoft\.com/video-\d+/thumbnail\.jpg.*',
'subtitles': 'count:31',
},
}, {
'url': 'https://medius.microsoft.com/Embed/VideoDetails/78493569-9b3b-4a85-a409-ee76e789e25c',
'info_dict': {
'id': '78493569-9b3b-4a85-a409-ee76e789e25c',
'ext': 'ismv',
'title': ' Anomaly Detection & Root cause at Edge',
'description': 'md5:f8f1ad93d7918649bfb97fa081b03b83',
'thumbnail': r're:https://mediusdownload.event.microsoft.com/asset.*\.jpg.*',
'subtitles': 'count:17',
},
}, {
'url': 'https://medius.microsoft.com/Embed/Video?id=0dc69bda-079b-4070-a7db-a8da1a06a9c7',
'only_matching': True,
}, {
'url': 'https://medius.microsoft.com/Embed/video-nc/fe823a91-959c-465b-96d4-8f4db624f72c',
'only_matching': True,
}]
def _extract_subtitle(self, webpage, video_id):
captions = traverse_obj(
self._search_json(r'const\s+captionsConfiguration\s*=', webpage, 'captions', video_id, default=None),
('languageList', lambda _, v: url_or_none(v['src']), {
'url': 'src',
'tag': ('srclang', {str}),
'name': ('kind', {str}),
})) or [{'url': url, 'tag': url_basename(url).split('.vtt')[0].split('_')[-1]}
for url in re.findall(r'var\s+file\s+=\s+\{[^}]+\'(https://[^\']+\.vtt\?[^\']+)', webpage)]
return self._sub_to_dict(captions)
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(f'https://medius.microsoft.com/Embed/video-nc/{video_id}', video_id)
return {
'id': video_id,
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'formats': self._extract_ism(
self._search_regex(r'StreamUrl\s*=\s*"([^"]+manifest)"', webpage, 'ism url'), video_id),
'thumbnail': self._og_search_thumbnail(webpage),
'subtitles': self._extract_subtitle(webpage, video_id),
}
class MicrosoftLearnPlaylistIE(InfoExtractor):
_VALID_URL = r'https?://learn\.microsoft\.com/(?:[\w-]+/)?(?P<type>shows|events)/(?P<id>[\w-]+)/?(?:[?#]|$)'
_TESTS = [{
'url': 'https://learn.microsoft.com/en-us/shows/bash-for-beginners',
'info_dict': {
'id': 'bash-for-beginners',
'title': 'Bash for Beginners',
'description': 'md5:16a91c07222117d1e00912f0dbc02c2c',
},
'playlist_count': 20,
}, {
'url': 'https://learn.microsoft.com/en-us/events/build-2022',
'info_dict': {
'id': 'build-2022',
'title': 'Microsoft Build 2022 - Events',
'description': 'md5:c16b43848027df837b22c6fbac7648d3',
},
'playlist_count': 201,
}]
def _entries(self, url_base, video_id):
skip = 0
while True:
playlist_info = self._download_json(url_base, video_id, f'Downloading entries {skip}', query={
'locale': 'en-us',
'$skip': skip,
})
url_paths = traverse_obj(playlist_info, ('results', ..., 'url', {str}))
for url_path in url_paths:
yield self.url_result(f'https://learn.microsoft.com/en-us{url_path}')
skip += len(url_paths)
if skip >= playlist_info.get('count', 0) or not url_paths:
break
def _real_extract(self, url):
playlist_id, playlist_type = self._match_valid_url(url).group('id', 'type')
webpage = self._download_webpage(url, playlist_id)
metainfo = {
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
}
sub_type = 'episodes' if playlist_type == 'shows' else 'sessions'
url_base = f'https://learn.microsoft.com/api/contentbrowser/search/{playlist_type}/{playlist_id}/{sub_type}'
return self.playlist_result(self._entries(url_base, playlist_id), playlist_id, **metainfo)
class MicrosoftLearnEpisodeIE(MicrosoftMediusBaseIE):
_VALID_URL = r'https?://learn\.microsoft\.com/(?:[\w-]+/)?shows/[\w-]+/(?P<id>[^?#/]+)'
_TESTS = [{
'url': 'https://learn.microsoft.com/en-us/shows/bash-for-beginners/what-is-the-difference-between-a-terminal-and-a-shell-2-of-20-bash-for-beginners/',
'info_dict': {
'id': 'd44e1a03-a0e5-45c2-9496-5c9fa08dc94c',
'ext': 'ismv',
'title': 'What is the Difference Between a Terminal and a Shell? (Part 2 of 20)',
'description': 'md5:7bbbfb593d21c2cf2babc3715ade6b88',
'timestamp': 1676339547,
'upload_date': '20230214',
'thumbnail': r're:https://learn\.microsoft\.com/video/media/.*\.png',
'subtitles': 'count:14',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
entry_id = self._html_search_meta('entryId', webpage, 'entryId', fatal=True)
video_info = self._download_json(
f'https://learn.microsoft.com/api/video/public/v1/entries/{entry_id}', video_id)
return {
'id': entry_id,
'formats': self._extract_ism(video_info['publicVideo']['adaptiveVideoUrl'], video_id),
'subtitles': self._sub_to_dict(traverse_obj(video_info, (
'publicVideo', 'captions', lambda _, v: url_or_none(v['url']), {
'tag': ('language', {str}),
'url': 'url',
}))),
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
**traverse_obj(video_info, {
'timestamp': ('createTime', {parse_iso8601}),
'thumbnails': ('publicVideo', 'thumbnailOtherSizes', ..., {'url': {url_or_none}}),
}),
}
class MicrosoftLearnSessionIE(InfoExtractor):
_VALID_URL = r'https?://learn\.microsoft\.com/(?:[\w-]+/)?events/[\w-]+/(?P<id>[^?#/]+)'
_TESTS = [{
'url': 'https://learn.microsoft.com/en-us/events/build-2022/ts01-rapidly-code-test-ship-from-secure-cloud-developer-environments',
'info_dict': {
'id': '9640d86c-f513-4889-959e-5dace86e7d2b',
'ext': 'ismv',
'title': 'Rapidly code, test and ship from secure cloud developer environments - Events',
'description': 'md5:f26c1a85d41c1cffd27a0279254a25c3',
'timestamp': 1653408600,
'upload_date': '20220524',
'thumbnail': r're:https://mediusimg\.event\.microsoft\.com/video-\d+/thumbnail\.jpg.*',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
metainfo = {
'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage),
'timestamp': parse_iso8601(self._html_search_meta('startDate', webpage, 'startDate')),
}
return self.url_result(
self._html_search_meta('externalVideoUrl', webpage, 'videoUrl', fatal=True),
url_transparent=True, ie=MicrosoftMediusIE, **metainfo)
class MicrosoftBuildIE(InfoExtractor):
_VALID_URL = [
r'https?://build\.microsoft\.com/[\w-]+/sessions/(?P<id>[\da-f-]+)',
r'https?://build\.microsoft\.com/[\w-]+/(?P<id>sessions)/?(?:[?#]|$)',
]
_TESTS = [{
'url': 'https://build.microsoft.com/en-US/sessions/b49feb31-afcd-4217-a538-d3ca1d171198?source=sessions',
'info_dict': {
'id': 'aee55fb5-fcf9-4b38-b764-a3527cb57554',
'ext': 'ismv',
'title': 'Microsoft Build opening keynote',
'description': 'md5:d38338f336ef4b6ef9ad2a7466a76655',
'timestamp': 1716307200,
'upload_date': '20240521',
'thumbnail': r're:https://mediusimg\.event\.microsoft\.com/video-\d+/thumbnail\.jpg.*',
},
}, {
'url': 'https://build.microsoft.com/en-US/sessions',
'info_dict': {
'id': 'sessions',
},
'playlist_mincount': 418,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
entries = [
self.url_result(
video_info['onDemand'], ie=MicrosoftMediusIE, url_transparent=True, **traverse_obj(video_info, {
'id': ('sessionId', {str}),
'title': ('title', {str}),
'description': ('description', {str}),
'timestamp': ('startDateTime', {parse_iso8601}),
}))
for video_info in self._download_json(
'https://api-v2.build.microsoft.com/api/session/all/en-US', video_id, 'Downloading video info')
]
if video_id == 'sessions':
return self.playlist_result(entries, video_id)
else:
return traverse_obj(entries, (lambda _, v: v['id'] == video_id), get_all=False)

View file

@ -1,188 +0,0 @@
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_duration,
smuggle_url,
unsmuggle_url,
xpath_text,
)
class MicrosoftVirtualAcademyBaseIE(InfoExtractor):
def _extract_base_url(self, course_id, display_id):
return self._download_json(
f'https://api-mlxprod.microsoft.com/services/products/anonymous/{course_id}',
display_id, 'Downloading course base URL')
def _extract_chapter_and_title(self, title):
if not title:
return None, None
m = re.search(r'(?P<chapter>\d+)\s*\|\s*(?P<title>.+)', title)
return (int(m.group('chapter')), m.group('title')) if m else (None, title)
class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE):
IE_NAME = 'mva'
IE_DESC = 'Microsoft Virtual Academy videos'
_VALID_URL = rf'(?:{IE_NAME}:|https?://(?:mva\.microsoft|(?:www\.)?microsoftvirtualacademy)\.com/[^/]+/training-courses/[^/?#&]+-)(?P<course_id>\d+)(?::|\?l=)(?P<id>[\da-zA-Z]+_\d+)'
_TESTS = [{
'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788?l=gfVXISmEB_6804984382',
'md5': '7826c44fc31678b12ad8db11f6b5abb9',
'info_dict': {
'id': 'gfVXISmEB_6804984382',
'ext': 'mp4',
'title': 'Course Introduction',
'formats': 'mincount:3',
'subtitles': {
'en': [{
'ext': 'ttml',
}],
},
},
}, {
'url': 'mva:11788:gfVXISmEB_6804984382',
'only_matching': True,
}]
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
mobj = self._match_valid_url(url)
course_id = mobj.group('course_id')
video_id = mobj.group('id')
base_url = smuggled_data.get('base_url') or self._extract_base_url(course_id, video_id)
settings = self._download_xml(
f'{base_url}/content/content_{video_id}/videosettings.xml?v=1',
video_id, 'Downloading video settings XML')
_, title = self._extract_chapter_and_title(xpath_text(
settings, './/Title', 'title', fatal=True))
formats = []
for sources in settings.findall('.//MediaSources'):
sources_type = sources.get('videoType')
for source in sources.findall('./MediaSource'):
video_url = source.text
if not video_url or not video_url.startswith('http'):
continue
if sources_type == 'smoothstreaming':
formats.extend(self._extract_ism_formats(
video_url, video_id, 'mss', fatal=False))
continue
video_mode = source.get('videoMode')
height = int_or_none(self._search_regex(
r'^(\d+)[pP]$', video_mode or '', 'height', default=None))
codec = source.get('codec')
acodec, vcodec = [None] * 2
if codec:
codecs = codec.split(',')
if len(codecs) == 2:
acodec, vcodec = codecs
elif len(codecs) == 1:
vcodec = codecs[0]
formats.append({
'url': video_url,
'format_id': video_mode,
'height': height,
'acodec': acodec,
'vcodec': vcodec,
})
subtitles = {}
for source in settings.findall('.//MarkerResourceSource'):
subtitle_url = source.text
if not subtitle_url:
continue
subtitles.setdefault('en', []).append({
'url': f'{base_url}/{subtitle_url}',
'ext': source.get('type'),
})
return {
'id': video_id,
'title': title,
'subtitles': subtitles,
'formats': formats,
}
class MicrosoftVirtualAcademyCourseIE(MicrosoftVirtualAcademyBaseIE):
IE_NAME = 'mva:course'
IE_DESC = 'Microsoft Virtual Academy courses'
_VALID_URL = rf'(?:{IE_NAME}:|https?://(?:mva\.microsoft|(?:www\.)?microsoftvirtualacademy)\.com/[^/]+/training-courses/(?P<display_id>[^/?#&]+)-)(?P<id>\d+)'
_TESTS = [{
'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788',
'info_dict': {
'id': '11788',
'title': 'Microsoft Azure Fundamentals: Virtual Machines',
},
'playlist_count': 36,
}, {
# with emphasized chapters
'url': 'https://mva.microsoft.com/en-US/training-courses/developing-windows-10-games-with-construct-2-16335',
'info_dict': {
'id': '16335',
'title': 'Developing Windows 10 Games with Construct 2',
},
'playlist_count': 10,
}, {
'url': 'https://www.microsoftvirtualacademy.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788',
'only_matching': True,
}, {
'url': 'mva:course:11788',
'only_matching': True,
}]
@classmethod
def suitable(cls, url):
return False if MicrosoftVirtualAcademyIE.suitable(url) else super().suitable(url)
def _real_extract(self, url):
mobj = self._match_valid_url(url)
course_id = mobj.group('id')
display_id = mobj.group('display_id')
base_url = self._extract_base_url(course_id, display_id)
manifest = self._download_json(
f'{base_url}/imsmanifestlite.json',
display_id, 'Downloading course manifest JSON')['manifest']
organization = manifest['organizations']['organization'][0]
entries = []
for chapter in organization['item']:
chapter_number, chapter_title = self._extract_chapter_and_title(chapter.get('title'))
chapter_id = chapter.get('@identifier')
for item in chapter.get('item', []):
item_id = item.get('@identifier')
if not item_id:
continue
metadata = item.get('resource', {}).get('metadata') or {}
if metadata.get('learningresourcetype') != 'Video':
continue
_, title = self._extract_chapter_and_title(item.get('title'))
duration = parse_duration(metadata.get('duration'))
description = metadata.get('description')
entries.append({
'_type': 'url_transparent',
'url': smuggle_url(
f'mva:{course_id}:{item_id}', {'base_url': base_url}),
'title': title,
'description': description,
'duration': duration,
'chapter': chapter_title,
'chapter_number': chapter_number,
'chapter_id': chapter_id,
})
title = organization.get('title') or manifest.get('metadata', {}).get('title')
return self.playlist_result(entries, course_id, title)

View file

@ -9,9 +9,10 @@ from ..utils import (
join_nonempty, join_nonempty,
parse_duration, parse_duration,
parse_iso8601, parse_iso8601,
traverse_obj,
try_get, try_get,
url_or_none,
) )
from ..utils.traversal import traverse_obj
class MLBBaseIE(InfoExtractor): class MLBBaseIE(InfoExtractor):
@ -326,15 +327,20 @@ class MLBTVIE(InfoExtractor):
video_id)['data']['Airings'] video_id)['data']['Airings']
formats, subtitles = [], {} formats, subtitles = [], {}
for airing in airings: for airing in traverse_obj(airings, lambda _, v: v['playbackUrls'][0]['href']):
m3u8_url = self._download_json( format_id = join_nonempty('feedType', 'feedLanguage', from_dict=airing)
m3u8_url = traverse_obj(self._download_json(
airing['playbackUrls'][0]['href'].format(scenario='browser~csai'), video_id, airing['playbackUrls'][0]['href'].format(scenario='browser~csai'), video_id,
headers={ note=f'Downloading {format_id} stream info JSON',
errnote=f'Failed to download {format_id} stream info, skipping',
fatal=False, headers={
'Authorization': self._access_token, 'Authorization': self._access_token,
'Accept': 'application/vnd.media-service+json; version=2', 'Accept': 'application/vnd.media-service+json; version=2',
})['stream']['complete'] }), ('stream', 'complete', {url_or_none}))
if not m3u8_url:
continue
f, s = self._extract_m3u8_formats_and_subtitles( f, s = self._extract_m3u8_formats_and_subtitles(
m3u8_url, video_id, 'mp4', m3u8_id=join_nonempty(airing.get('feedType'), airing.get('feedLanguage'))) m3u8_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)
formats.extend(f) formats.extend(f)
self._merge_subtitles(s, target=subtitles) self._merge_subtitles(s, target=subtitles)

View file

@ -16,6 +16,7 @@ from ..utils import (
determine_ext, determine_ext,
float_or_none, float_or_none,
int_or_none, int_or_none,
join_nonempty,
mimetype2ext, mimetype2ext,
parse_age_limit, parse_age_limit,
parse_duration, parse_duration,
@ -498,10 +499,8 @@ class NBCNewsIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
m3u8_id=format_id, fatal=False)) m3u8_id=format_id, fatal=False))
continue continue
tbr = int_or_none(va.get('bitrate'), 1000) tbr = int_or_none(va.get('bitrate'), 1000)
if tbr:
format_id += f'-{tbr}'
formats.append({ formats.append({
'format_id': format_id, 'format_id': join_nonempty(format_id, tbr),
'url': public_url, 'url': public_url,
'width': int_or_none(va.get('width')), 'width': int_or_none(va.get('width')),
'height': int_or_none(va.get('height')), 'height': int_or_none(va.get('height')),

View file

@ -22,12 +22,22 @@ from ..utils import (
class NetEaseMusicBaseIE(InfoExtractor): class NetEaseMusicBaseIE(InfoExtractor):
_FORMATS = ['bMusic', 'mMusic', 'hMusic'] # XXX: _extract_formats logic depends on the order of the levels in each tier
_LEVELS = (
'standard', # free tier; 标准; 128kbps mp3 or aac
'higher', # free tier; 192kbps mp3 or aac
'exhigh', # free tier; 极高 (HQ); 320kbps mp3 or aac
'lossless', # VIP tier; 无损 (SQ); 48kHz/16bit flac
'hires', # VIP tier; 高解析度无损 (Hi-Res); 192kHz/24bit flac
'jyeffect', # VIP tier; 高清臻音 (Spatial Audio); 96kHz/24bit flac
'jymaster', # SVIP tier; 超清母带 (Master); 192kHz/24bit flac
'sky', # SVIP tier; 沉浸环绕声 (Surround Audio); flac
)
_API_BASE = 'http://music.163.com/api/' _API_BASE = 'http://music.163.com/api/'
_GEO_BYPASS = False _GEO_BYPASS = False
@staticmethod @staticmethod
def kilo_or_none(value): def _kilo_or_none(value):
return int_or_none(value, scale=1000) return int_or_none(value, scale=1000)
def _create_eapi_cipher(self, api_path, query_body, cookies): def _create_eapi_cipher(self, api_path, query_body, cookies):
@ -66,45 +76,43 @@ class NetEaseMusicBaseIE(InfoExtractor):
**headers, **headers,
}, **kwargs) }, **kwargs)
def _call_player_api(self, song_id, bitrate): def _call_player_api(self, song_id, level):
return self._download_eapi_json( return self._download_eapi_json(
'/song/enhance/player/url', song_id, {'ids': f'[{song_id}]', 'br': bitrate}, '/song/enhance/player/url/v1', song_id,
note=f'Downloading song URL info: bitrate {bitrate}') {'ids': f'[{song_id}]', 'level': level, 'encodeType': 'flac'},
note=f'Downloading song URL info: level {level}')
def extract_formats(self, info): def _extract_formats(self, info):
err = 0
formats = [] formats = []
song_id = info['id'] song_id = info['id']
for song_format in self._FORMATS: for level in self._LEVELS:
details = info.get(song_format) song = traverse_obj(
if not details: self._call_player_api(song_id, level), ('data', lambda _, v: url_or_none(v['url']), any))
if not song:
break # Media is not available due to removal or geo-restriction
actual_level = song.get('level')
if actual_level and actual_level != level:
if level in ('lossless', 'jymaster'):
break # We've already extracted the highest level of the user's account tier
continue continue
bitrate = int_or_none(details.get('bitrate')) or 999000
for song in traverse_obj(self._call_player_api(song_id, bitrate), ('data', lambda _, v: url_or_none(v['url']))):
song_url = song['url']
if self._is_valid_url(song_url, info['id'], 'song'):
formats.append({ formats.append({
'url': song_url, 'url': song['url'],
'format_id': song_format, 'format_id': level,
'asr': traverse_obj(details, ('sr', {int_or_none})), 'vcodec': 'none',
**traverse_obj(song, { **traverse_obj(song, {
'ext': ('type', {str}), 'ext': ('type', {str}),
'abr': ('br', {self.kilo_or_none}), 'abr': ('br', {self._kilo_or_none}),
'filesize': ('size', {int_or_none}), 'filesize': ('size', {int_or_none}),
}), }),
}) })
elif err == 0: if not actual_level:
err = traverse_obj(song, ('code', {int})) or 0 break # Only 1 level is available if API does not return a value (netease:program)
if not formats: if not formats:
if err != 0 and (err < 200 or err >= 400):
raise ExtractorError(f'No media links found (site code {err})', expected=True)
else:
self.raise_geo_restricted( self.raise_geo_restricted(
'No media links found: probably due to geo restriction.', countries=['CN']) 'No media links found; possibly due to geo restriction', countries=['CN'])
return formats return formats
def query_api(self, endpoint, video_id, note): def _query_api(self, endpoint, video_id, note):
result = self._download_json( result = self._download_json(
f'{self._API_BASE}{endpoint}', video_id, note, headers={'Referer': self._API_BASE}) f'{self._API_BASE}{endpoint}', video_id, note, headers={'Referer': self._API_BASE})
code = traverse_obj(result, ('code', {int})) code = traverse_obj(result, ('code', {int}))
@ -128,32 +136,29 @@ class NetEaseMusicBaseIE(InfoExtractor):
class NetEaseMusicIE(NetEaseMusicBaseIE): class NetEaseMusicIE(NetEaseMusicBaseIE):
IE_NAME = 'netease:song' IE_NAME = 'netease:song'
IE_DESC = '网易云音乐' IE_DESC = '网易云音乐'
_VALID_URL = r'https?://(y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://music.163.com/#/song?id=548648087', 'url': 'https://music.163.com/#/song?id=550136151',
'info_dict': { 'info_dict': {
'id': '548648087', 'id': '550136151',
'ext': 'mp3', 'ext': 'mp3',
'title': '戒烟 (Live)', 'title': 'It\'s Ok (Live)',
'creator': '李荣浩 / 朱正廷 / 陈立农 / 尤长靖 / ONER灵超 / ONER木子洋 / 杨非同 / 陆定昊', 'creators': 'count:10',
'timestamp': 1522944000, 'timestamp': 1522944000,
'upload_date': '20180405', 'upload_date': '20180405',
'description': 'md5:3650af9ee22c87e8637cb2dde22a765c', 'description': 'md5:9fd07059c2ccee3950dc8363429a3135',
'subtitles': {'lyrics': [{'ext': 'lrc'}]}, 'duration': 197,
'duration': 256,
'thumbnail': r're:^http.*\.jpg', 'thumbnail': r're:^http.*\.jpg',
'album': '偶像练习生 表演曲目合集', 'album': '偶像练习生 表演曲目合集',
'average_rating': int, 'average_rating': int,
'album_artist': '偶像练习生', 'album_artists': ['偶像练习生'],
}, },
}, { }, {
'note': 'No lyrics.',
'url': 'http://music.163.com/song?id=17241424', 'url': 'http://music.163.com/song?id=17241424',
'info_dict': { 'info_dict': {
'id': '17241424', 'id': '17241424',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Opus 28', 'title': 'Opus 28',
'creator': 'Dustin O\'Halloran',
'upload_date': '20080211', 'upload_date': '20080211',
'timestamp': 1202745600, 'timestamp': 1202745600,
'duration': 263, 'duration': 263,
@ -161,15 +166,18 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'album': 'Piano Solos Vol. 2', 'album': 'Piano Solos Vol. 2',
'album_artist': 'Dustin O\'Halloran', 'album_artist': 'Dustin O\'Halloran',
'average_rating': int, 'average_rating': int,
'description': '[00:05.00]纯音乐,请欣赏\n',
'album_artists': ['Dustin O\'Halloran'],
'creators': ['Dustin O\'Halloran'],
'subtitles': {'lyrics': [{'ext': 'lrc'}]},
}, },
}, { }, {
'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846', 'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846',
'md5': '95826c73ea50b1c288b22180ec9e754d', 'md5': 'b896be78d8d34bd7bb665b26710913ff',
'info_dict': { 'info_dict': {
'id': '95670', 'id': '95670',
'ext': 'mp3', 'ext': 'mp3',
'title': '国际歌', 'title': '国际歌',
'creator': '马备',
'upload_date': '19911130', 'upload_date': '19911130',
'timestamp': 691516800, 'timestamp': 691516800,
'description': 'md5:1ba2f911a2b0aa398479f595224f2141', 'description': 'md5:1ba2f911a2b0aa398479f595224f2141',
@ -180,6 +188,8 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'average_rating': int, 'average_rating': int,
'album': '红色摇滚', 'album': '红色摇滚',
'album_artist': '侯牧人', 'album_artist': '侯牧人',
'creators': ['马备'],
'album_artists': ['侯牧人'],
}, },
}, { }, {
'url': 'http://music.163.com/#/song?id=32102397', 'url': 'http://music.163.com/#/song?id=32102397',
@ -188,7 +198,7 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'id': '32102397', 'id': '32102397',
'ext': 'mp3', 'ext': 'mp3',
'title': 'Bad Blood', 'title': 'Bad Blood',
'creator': 'Taylor Swift / Kendrick Lamar', 'creators': ['Taylor Swift', 'Kendrick Lamar'],
'upload_date': '20150516', 'upload_date': '20150516',
'timestamp': 1431792000, 'timestamp': 1431792000,
'description': 'md5:21535156efb73d6d1c355f95616e285a', 'description': 'md5:21535156efb73d6d1c355f95616e285a',
@ -207,7 +217,7 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'id': '22735043', 'id': '22735043',
'ext': 'mp3', 'ext': 'mp3',
'title': '소원을 말해봐 (Genie)', 'title': '소원을 말해봐 (Genie)',
'creator': '少女时代', 'creators': ['少女时代'],
'upload_date': '20100127', 'upload_date': '20100127',
'timestamp': 1264608000, 'timestamp': 1264608000,
'description': 'md5:03d1ffebec3139aa4bafe302369269c5', 'description': 'md5:03d1ffebec3139aa4bafe302369269c5',
@ -251,12 +261,12 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
song_id = self._match_id(url) song_id = self._match_id(url)
info = self.query_api( info = self._query_api(
f'song/detail?id={song_id}&ids=%5B{song_id}%5D', song_id, 'Downloading song info')['songs'][0] f'song/detail?id={song_id}&ids=%5B{song_id}%5D', song_id, 'Downloading song info')['songs'][0]
formats = self.extract_formats(info) formats = self._extract_formats(info)
lyrics = self._process_lyrics(self.query_api( lyrics = self._process_lyrics(self._query_api(
f'song/lyric?id={song_id}&lv=-1&tv=-1', song_id, 'Downloading lyrics data')) f'song/lyric?id={song_id}&lv=-1&tv=-1', song_id, 'Downloading lyrics data'))
lyric_data = { lyric_data = {
'description': traverse_obj(lyrics, (('lyrics_merged', 'lyrics'), 0, 'data'), get_all=False), 'description': traverse_obj(lyrics, (('lyrics_merged', 'lyrics'), 0, 'data'), get_all=False),
@ -267,14 +277,14 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
'id': song_id, 'id': song_id,
'formats': formats, 'formats': formats,
'alt_title': '/'.join(traverse_obj(info, (('transNames', 'alias'), ...))) or None, 'alt_title': '/'.join(traverse_obj(info, (('transNames', 'alias'), ...))) or None,
'creator': ' / '.join(traverse_obj(info, ('artists', ..., 'name'))) or None, 'creators': traverse_obj(info, ('artists', ..., 'name')) or None,
'album_artist': ' / '.join(traverse_obj(info, ('album', 'artists', ..., 'name'))) or None, 'album_artists': traverse_obj(info, ('album', 'artists', ..., 'name')) or None,
**lyric_data, **lyric_data,
**traverse_obj(info, { **traverse_obj(info, {
'title': ('name', {str}), 'title': ('name', {str}),
'timestamp': ('album', 'publishTime', {self.kilo_or_none}), 'timestamp': ('album', 'publishTime', {self._kilo_or_none}),
'thumbnail': ('album', 'picUrl', {url_or_none}), 'thumbnail': ('album', 'picUrl', {url_or_none}),
'duration': ('duration', {self.kilo_or_none}), 'duration': ('duration', {self._kilo_or_none}),
'album': ('album', 'name', {str}), 'album': ('album', 'name', {str}),
'average_rating': ('score', {int_or_none}), 'average_rating': ('score', {int_or_none}),
}), }),
@ -284,7 +294,7 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
class NetEaseMusicAlbumIE(NetEaseMusicBaseIE): class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
IE_NAME = 'netease:album' IE_NAME = 'netease:album'
IE_DESC = '网易云音乐 - 专辑' IE_DESC = '网易云音乐 - 专辑'
_VALID_URL = r'https?://music\.163\.com/(#/)?album\?id=(?P<id>[0-9]+)' _VALID_URL = r'https?://music\.163\.com/(?:#/)?album\?id=(?P<id>[0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://music.163.com/#/album?id=133153666', 'url': 'https://music.163.com/#/album?id=133153666',
'info_dict': { 'info_dict': {
@ -294,7 +304,7 @@ class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
'description': '桃几2021年翻唱合集', 'description': '桃几2021年翻唱合集',
'thumbnail': r're:^http.*\.jpg', 'thumbnail': r're:^http.*\.jpg',
}, },
'playlist_mincount': 13, 'playlist_mincount': 12,
}, { }, {
'url': 'http://music.163.com/#/album?id=220780', 'url': 'http://music.163.com/#/album?id=220780',
'info_dict': { 'info_dict': {
@ -328,7 +338,7 @@ class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
class NetEaseMusicSingerIE(NetEaseMusicBaseIE): class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
IE_NAME = 'netease:singer' IE_NAME = 'netease:singer'
IE_DESC = '网易云音乐 - 歌手' IE_DESC = '网易云音乐 - 歌手'
_VALID_URL = r'https?://music\.163\.com/(#/)?artist\?id=(?P<id>[0-9]+)' _VALID_URL = r'https?://music\.163\.com/(?:#/)?artist\?id=(?P<id>[0-9]+)'
_TESTS = [{ _TESTS = [{
'note': 'Singer has aliases.', 'note': 'Singer has aliases.',
'url': 'http://music.163.com/#/artist?id=10559', 'url': 'http://music.163.com/#/artist?id=10559',
@ -358,7 +368,7 @@ class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
singer_id = self._match_id(url) singer_id = self._match_id(url)
info = self.query_api( info = self._query_api(
f'artist/{singer_id}?id={singer_id}', singer_id, note='Downloading singer data') f'artist/{singer_id}?id={singer_id}', singer_id, note='Downloading singer data')
name = join_nonempty( name = join_nonempty(
@ -372,7 +382,7 @@ class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
class NetEaseMusicListIE(NetEaseMusicBaseIE): class NetEaseMusicListIE(NetEaseMusicBaseIE):
IE_NAME = 'netease:playlist' IE_NAME = 'netease:playlist'
IE_DESC = '网易云音乐 - 歌单' IE_DESC = '网易云音乐 - 歌单'
_VALID_URL = r'https?://music\.163\.com/(#/)?(playlist|discover/toplist)\?id=(?P<id>[0-9]+)' _VALID_URL = r'https?://music\.163\.com/(?:#/)?(?:playlist|discover/toplist)\?id=(?P<id>[0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://music.163.com/#/playlist?id=79177352', 'url': 'http://music.163.com/#/playlist?id=79177352',
'info_dict': { 'info_dict': {
@ -405,11 +415,15 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE):
'url': 'http://music.163.com/#/discover/toplist?id=3733003', 'url': 'http://music.163.com/#/discover/toplist?id=3733003',
'info_dict': { 'info_dict': {
'id': '3733003', 'id': '3733003',
'title': 're:韩国Melon排行榜周榜 [0-9]{4}-[0-9]{2}-[0-9]{2}', 'title': 're:韩国Melon排行榜周榜(?: [0-9]{4}-[0-9]{2}-[0-9]{2})?',
'description': 'md5:73ec782a612711cadc7872d9c1e134fc', 'description': 'md5:73ec782a612711cadc7872d9c1e134fc',
'upload_date': '20200109',
'uploader_id': '2937386',
'tags': ['韩语', '榜单'],
'uploader': 'Melon榜单',
'timestamp': 1578569373,
}, },
'playlist_count': 50, 'playlist_count': 50,
'skip': 'Blocked outside Mainland China',
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -426,7 +440,7 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE):
'tags': ('tags', ..., {str}), 'tags': ('tags', ..., {str}),
'uploader': ('creator', 'nickname', {str}), 'uploader': ('creator', 'nickname', {str}),
'uploader_id': ('creator', 'userId', {str_or_none}), 'uploader_id': ('creator', 'userId', {str_or_none}),
'timestamp': ('updateTime', {self.kilo_or_none}), 'timestamp': ('updateTime', {self._kilo_or_none}),
})) }))
if traverse_obj(info, ('playlist', 'specialType')) == 10: if traverse_obj(info, ('playlist', 'specialType')) == 10:
metainfo['title'] = f'{metainfo.get("title")} {strftime_or_none(metainfo.get("timestamp"), "%Y-%m-%d")}' metainfo['title'] = f'{metainfo.get("title")} {strftime_or_none(metainfo.get("timestamp"), "%Y-%m-%d")}'
@ -437,7 +451,7 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE):
class NetEaseMusicMvIE(NetEaseMusicBaseIE): class NetEaseMusicMvIE(NetEaseMusicBaseIE):
IE_NAME = 'netease:mv' IE_NAME = 'netease:mv'
IE_DESC = '网易云音乐 - MV' IE_DESC = '网易云音乐 - MV'
_VALID_URL = r'https?://music\.163\.com/(#/)?mv\?id=(?P<id>[0-9]+)' _VALID_URL = r'https?://music\.163\.com/(?:#/)?mv\?id=(?P<id>[0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://music.163.com/#/mv?id=10958064', 'url': 'https://music.163.com/#/mv?id=10958064',
'info_dict': { 'info_dict': {
@ -445,7 +459,7 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': '交换余生', 'title': '交换余生',
'description': 'md5:e845872cff28820642a2b02eda428fea', 'description': 'md5:e845872cff28820642a2b02eda428fea',
'creator': '林俊杰', 'creators': ['林俊杰'],
'upload_date': '20200916', 'upload_date': '20200916',
'thumbnail': r're:http.*\.jpg', 'thumbnail': r're:http.*\.jpg',
'duration': 364, 'duration': 364,
@ -460,7 +474,7 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': '이럴거면 그러지말지', 'title': '이럴거면 그러지말지',
'description': '白雅言自作曲唱甜蜜爱情', 'description': '白雅言自作曲唱甜蜜爱情',
'creator': '白娥娟', 'creators': ['白娥娟'],
'upload_date': '20150520', 'upload_date': '20150520',
'thumbnail': r're:http.*\.jpg', 'thumbnail': r're:http.*\.jpg',
'duration': 216, 'duration': 216,
@ -468,12 +482,28 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
'like_count': int, 'like_count': int,
'comment_count': int, 'comment_count': int,
}, },
'skip': 'Blocked outside Mainland China',
}, {
'note': 'This MV has multiple creators.',
'url': 'https://music.163.com/#/mv?id=22593543',
'info_dict': {
'id': '22593543',
'ext': 'mp4',
'title': '老北京杀器',
'creators': ['秃子2z', '辉子', 'Saber梁维嘉'],
'duration': 206,
'upload_date': '20240618',
'like_count': int,
'comment_count': int,
'thumbnail': r're:http.*\.jpg',
'view_count': int,
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mv_id = self._match_id(url) mv_id = self._match_id(url)
info = self.query_api( info = self._query_api(
f'mv/detail?id={mv_id}&type=mp4', mv_id, 'Downloading mv info')['data'] f'mv/detail?id={mv_id}&type=mp4', mv_id, 'Downloading mv info')['data']
formats = [ formats = [
@ -484,13 +514,13 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
return { return {
'id': mv_id, 'id': mv_id,
'formats': formats, 'formats': formats,
'creators': traverse_obj(info, ('artists', ..., 'name')) or [info.get('artistName')],
**traverse_obj(info, { **traverse_obj(info, {
'title': ('name', {str}), 'title': ('name', {str}),
'description': (('desc', 'briefDesc'), {str}, {lambda x: x or None}), 'description': (('desc', 'briefDesc'), {str}, {lambda x: x or None}),
'creator': ('artistName', {str}),
'upload_date': ('publishTime', {unified_strdate}), 'upload_date': ('publishTime', {unified_strdate}),
'thumbnail': ('cover', {url_or_none}), 'thumbnail': ('cover', {url_or_none}),
'duration': ('duration', {self.kilo_or_none}), 'duration': ('duration', {self._kilo_or_none}),
'view_count': ('playCount', {int_or_none}), 'view_count': ('playCount', {int_or_none}),
'like_count': ('likeCount', {int_or_none}), 'like_count': ('likeCount', {int_or_none}),
'comment_count': ('commentCount', {int_or_none}), 'comment_count': ('commentCount', {int_or_none}),
@ -501,7 +531,7 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE):
class NetEaseMusicProgramIE(NetEaseMusicBaseIE): class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
IE_NAME = 'netease:program' IE_NAME = 'netease:program'
IE_DESC = '网易云音乐 - 电台节目' IE_DESC = '网易云音乐 - 电台节目'
_VALID_URL = r'https?://music\.163\.com/(#/?)program\?id=(?P<id>[0-9]+)' _VALID_URL = r'https?://music\.163\.com/(?:#/)?program\?id=(?P<id>[0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://music.163.com/#/program?id=10109055', 'url': 'http://music.163.com/#/program?id=10109055',
'info_dict': { 'info_dict': {
@ -509,7 +539,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
'ext': 'mp3', 'ext': 'mp3',
'title': '不丹足球背后的故事', 'title': '不丹足球背后的故事',
'description': '喜马拉雅人的足球梦 ...', 'description': '喜马拉雅人的足球梦 ...',
'creator': '大话西藏', 'creators': ['大话西藏'],
'timestamp': 1434179287, 'timestamp': 1434179287,
'upload_date': '20150613', 'upload_date': '20150613',
'thumbnail': r're:http.*\.jpg', 'thumbnail': r're:http.*\.jpg',
@ -522,7 +552,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
'id': '10141022', 'id': '10141022',
'title': '滚滚电台的有声节目', 'title': '滚滚电台的有声节目',
'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b', 'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
'creator': '滚滚电台ORZ', 'creators': ['滚滚电台ORZ'],
'timestamp': 1434450733, 'timestamp': 1434450733,
'upload_date': '20150616', 'upload_date': '20150616',
'thumbnail': r're:http.*\.jpg', 'thumbnail': r're:http.*\.jpg',
@ -536,7 +566,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
'ext': 'mp3', 'ext': 'mp3',
'title': '滚滚电台的有声节目', 'title': '滚滚电台的有声节目',
'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b', 'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
'creator': '滚滚电台ORZ', 'creators': ['滚滚电台ORZ'],
'timestamp': 1434450733, 'timestamp': 1434450733,
'upload_date': '20150616', 'upload_date': '20150616',
'thumbnail': r're:http.*\.jpg', 'thumbnail': r're:http.*\.jpg',
@ -550,7 +580,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
program_id = self._match_id(url) program_id = self._match_id(url)
info = self.query_api( info = self._query_api(
f'dj/program/detail?id={program_id}', program_id, note='Downloading program info')['program'] f'dj/program/detail?id={program_id}', program_id, note='Downloading program info')['program']
metainfo = traverse_obj(info, { metainfo = traverse_obj(info, {
@ -558,17 +588,17 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
'description': ('description', {str}), 'description': ('description', {str}),
'creator': ('dj', 'brand', {str}), 'creator': ('dj', 'brand', {str}),
'thumbnail': ('coverUrl', {url_or_none}), 'thumbnail': ('coverUrl', {url_or_none}),
'timestamp': ('createTime', {self.kilo_or_none}), 'timestamp': ('createTime', {self._kilo_or_none}),
}) })
if not self._yes_playlist( if not self._yes_playlist(
info['songs'] and program_id, info['mainSong']['id'], playlist_label='program', video_label='song'): info['songs'] and program_id, info['mainSong']['id'], playlist_label='program', video_label='song'):
formats = self.extract_formats(info['mainSong']) formats = self._extract_formats(info['mainSong'])
return { return {
'id': str(info['mainSong']['id']), 'id': str(info['mainSong']['id']),
'formats': formats, 'formats': formats,
'duration': traverse_obj(info, ('mainSong', 'duration', {self.kilo_or_none})), 'duration': traverse_obj(info, ('mainSong', 'duration', {self._kilo_or_none})),
**metainfo, **metainfo,
} }
@ -579,7 +609,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE): class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
IE_NAME = 'netease:djradio' IE_NAME = 'netease:djradio'
IE_DESC = '网易云音乐 - 电台' IE_DESC = '网易云音乐 - 电台'
_VALID_URL = r'https?://music\.163\.com/(#/)?djradio\?id=(?P<id>[0-9]+)' _VALID_URL = r'https?://music\.163\.com/(?:#/)?djradio\?id=(?P<id>[0-9]+)'
_TEST = { _TEST = {
'url': 'http://music.163.com/#/djradio?id=42', 'url': 'http://music.163.com/#/djradio?id=42',
'info_dict': { 'info_dict': {
@ -597,7 +627,7 @@ class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
metainfo = {} metainfo = {}
entries = [] entries = []
for offset in itertools.count(start=0, step=self._PAGE_SIZE): for offset in itertools.count(start=0, step=self._PAGE_SIZE):
info = self.query_api( info = self._query_api(
f'dj/program/byradio?asc=false&limit={self._PAGE_SIZE}&radioId={dj_id}&offset={offset}', f'dj/program/byradio?asc=false&limit={self._PAGE_SIZE}&radioId={dj_id}&offset={offset}',
dj_id, note=f'Downloading dj programs - {offset}') dj_id, note=f'Downloading dj programs - {offset}')

View file

@ -4,6 +4,7 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
clean_html, clean_html,
filter_dict,
get_element_by_class, get_element_by_class,
int_or_none, int_or_none,
join_nonempty, join_nonempty,
@ -590,21 +591,22 @@ class NhkRadiruIE(InfoExtractor):
IE_DESC = 'NHK らじる (Radiru/Rajiru)' IE_DESC = 'NHK らじる (Radiru/Rajiru)'
_VALID_URL = r'https?://www\.nhk\.or\.jp/radio/(?:player/ondemand|ondemand/detail)\.html\?p=(?P<site>[\da-zA-Z]+)_(?P<corner>[\da-zA-Z]+)(?:_(?P<headline>[\da-zA-Z]+))?' _VALID_URL = r'https?://www\.nhk\.or\.jp/radio/(?:player/ondemand|ondemand/detail)\.html\?p=(?P<site>[\da-zA-Z]+)_(?P<corner>[\da-zA-Z]+)(?:_(?P<headline>[\da-zA-Z]+))?'
_TESTS = [{ _TESTS = [{
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=0449_01_3926210', 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=0449_01_4003239',
'skip': 'Episode expired on 2024-02-24', 'skip': 'Episode expired on 2024-06-09',
'info_dict': { 'info_dict': {
'title': 'ジャズ・トゥナイト シリーズJAZZジャイアンツ 56 ジョニー・ホッジス', 'title': 'ジャズ・トゥナイト ジャズ「Night and Day」特集',
'id': '0449_01_3926210', 'id': '0449_01_4003239',
'ext': 'm4a', 'ext': 'm4a',
'uploader': 'NHK FM 東京',
'description': 'md5:ad05f3c3f3f6e99b2e69f9b5e49551dc',
'series': 'ジャズ・トゥナイト', 'series': 'ジャズ・トゥナイト',
'uploader': 'NHK-FM', 'channel': 'NHK FM 東京',
'channel': 'NHK-FM',
'thumbnail': 'https://www.nhk.or.jp/prog/img/449/g449.jpg', 'thumbnail': 'https://www.nhk.or.jp/prog/img/449/g449.jpg',
'release_date': '20240217', 'upload_date': '20240601',
'description': 'md5:a456ee8e5e59e6dd2a7d32e62386e811', 'series_id': '0449_01',
'timestamp': 1708185600, 'release_date': '20240601',
'release_timestamp': 1708178400, 'timestamp': 1717257600,
'upload_date': '20240217', 'release_timestamp': 1717250400,
}, },
}, { }, {
# playlist, airs every weekday so it should _hopefully_ be okay forever # playlist, airs every weekday so it should _hopefully_ be okay forever
@ -613,71 +615,145 @@ class NhkRadiruIE(InfoExtractor):
'id': '0458_01', 'id': '0458_01',
'title': 'ベストオブクラシック', 'title': 'ベストオブクラシック',
'description': '世界中の上質な演奏会をじっくり堪能する本格派クラシック番組。', 'description': '世界中の上質な演奏会をじっくり堪能する本格派クラシック番組。',
'channel': 'NHK-FM',
'uploader': 'NHK-FM',
'thumbnail': 'https://www.nhk.or.jp/prog/img/458/g458.jpg', 'thumbnail': 'https://www.nhk.or.jp/prog/img/458/g458.jpg',
'series_id': '0458_01',
'uploader': 'NHK FM',
'channel': 'NHK FM',
'series': 'ベストオブクラシック',
}, },
'playlist_mincount': 3, 'playlist_mincount': 3,
}, { }, {
# one with letters in the id # one with letters in the id
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F300_06_3738470', 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F683_01_3910688',
'note': 'Expires on 2024-03-31', 'note': 'Expires on 2025-03-31',
'info_dict': { 'info_dict': {
'id': 'F300_06_3738470', 'id': 'F683_01_3910688',
'ext': 'm4a', 'ext': 'm4a',
'title': '有島武郎「一房のぶどう」', 'title': '夏目漱石「文鳥」第1回',
'description': '朗読:川野一宇(ラジオ深夜便アンカー)\r\n\r\n2016年12月8日放送「ラジオ深夜便『アンカー朗読シリーズ』」より', 'series': '【らじる文庫】夏目漱石「文鳥」全4回',
'channel': 'NHKラジオ第1、NHK-FM', 'series_id': 'F683_01',
'uploader': 'NHKラジオ第1、NHK-FM', 'description': '朗読:浅井理アナウンサー',
'timestamp': 1635757200, 'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F683/img/roudoku_05_rod_640.jpg',
'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F300/img/corner/box_109_thumbnail.jpg', 'upload_date': '20240106',
'release_date': '20161207', 'release_date': '20240106',
'series': 'らじる文庫 by ラジオ深夜便 ', 'uploader': 'NHK R1',
'release_timestamp': 1481126700, 'release_timestamp': 1704511800,
'upload_date': '20211101', 'channel': 'NHK R1',
'timestamp': 1704512700,
}, },
'expected_warnings': ['Unable to download JSON metadata', 'Failed to get extended description'], 'expected_warnings': ['Unable to download JSON metadata',
'Failed to get extended metadata. API returned Error 1: Invalid parameters'],
}, { }, {
# news # news
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F261_01_3855109', 'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F261_01_4012173',
'skip': 'Expires on 2023-04-17',
'info_dict': { 'info_dict': {
'id': 'F261_01_3855109', 'id': 'F261_01_4012173',
'ext': 'm4a', 'ext': 'm4a',
'channel': 'NHKラジオ第1', 'channel': 'NHKラジオ第1',
'uploader': 'NHKラジオ第1', 'uploader': 'NHKラジオ第1',
'timestamp': 1681635900,
'release_date': '20230416',
'series': 'NHKラジオニュース', 'series': 'NHKラジオニュース',
'title': '後6時のNHKニュース', 'title': '午前時のNHKニュース',
'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F261/img/RADIONEWS_640.jpg', 'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F261/img/RADIONEWS_640.jpg',
'upload_date': '20230416', 'release_timestamp': 1718290800,
'release_timestamp': 1681635600, 'release_date': '20240613',
'timestamp': 1718291400,
'upload_date': '20240613',
}, },
}, {
# fallback when extended metadata fails
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=2834_01_4009298',
'skip': 'Expires on 2024-06-07',
'info_dict': {
'id': '2834_01_4009298',
'title': 'まち☆キラ!開成町特集',
'ext': 'm4a',
'release_date': '20240531',
'upload_date': '20240531',
'series': 'はま☆キラ!',
'thumbnail': 'https://www.nhk.or.jp/prog/img/2834/g2834.jpg',
'channel': 'NHK R1,FM',
'description': '',
'timestamp': 1717123800,
'uploader': 'NHK R1,FM',
'release_timestamp': 1717120800,
'series_id': '2834_01',
},
'expected_warnings': ['Failed to get extended metadata. API returned empty list.'],
}] }]
_API_URL_TMPL = None _API_URL_TMPL = None
def _extract_extended_description(self, episode_id, episode): def _extract_extended_metadata(self, episode_id, aa_vinfo):
service, _, area = traverse_obj(episode, ('aa_vinfo2', {str}, {lambda x: (x or '').partition(',')})) service, _, area = traverse_obj(aa_vinfo, (2, {str}, {lambda x: (x or '').partition(',')}))
aa_vinfo3 = traverse_obj(episode, ('aa_vinfo3', {str}))
detail_url = try_call( detail_url = try_call(
lambda: self._API_URL_TMPL.format(service=service, area=area, dateid=aa_vinfo3)) lambda: self._API_URL_TMPL.format(area=area, service=service, dateid=aa_vinfo[3]))
if not detail_url: if not detail_url:
return return {}
full_meta = traverse_obj( response = self._download_json(
self._download_json(detail_url, episode_id, 'Downloading extended metadata', fatal=False), detail_url, episode_id, 'Downloading extended metadata',
('list', service, 0, {dict})) or {} 'Failed to download extended metadata', fatal=False, expected_status=400)
return join_nonempty('subtitle', 'content', 'act', 'music', delim='\n\n', from_dict=full_meta) if not response:
return {}
def _extract_episode_info(self, headline, programme_id, series_meta): if error := traverse_obj(response, ('error', {dict})):
self.report_warning(
'Failed to get extended metadata. API returned '
f'Error {join_nonempty("code", "message", from_dict=error, delim=": ")}')
return {}
full_meta = traverse_obj(response, ('list', service, 0, {dict}))
if not full_meta:
self.report_warning('Failed to get extended metadata. API returned empty list.')
return {}
station = ' '.join(traverse_obj(full_meta, (('service', 'area'), 'name', {str}))) or None
thumbnails = [{
'id': str(id_),
'preference': 1 if id_.startswith('thumbnail') else -2 if id_.startswith('logo') else -1,
**traverse_obj(thumb, {
'url': 'url',
'width': ('width', {int_or_none}),
'height': ('height', {int_or_none}),
}),
} for id_, thumb in traverse_obj(full_meta, ('images', {dict.items}, lambda _, v: v[1]['url']))]
return filter_dict({
'channel': station,
'uploader': station,
'description': join_nonempty(
'subtitle', 'content', 'act', 'music', delim='\n\n', from_dict=full_meta),
'thumbnails': thumbnails,
**traverse_obj(full_meta, {
'title': ('title', {str}),
'timestamp': ('end_time', {unified_timestamp}),
'release_timestamp': ('start_time', {unified_timestamp}),
}),
})
def _extract_episode_info(self, episode, programme_id, series_meta):
episode_id = f'{programme_id}_{episode["id"]}'
aa_vinfo = traverse_obj(episode, ('aa_contents_id', {lambda x: x.split(';')}))
extended_metadata = self._extract_extended_metadata(episode_id, aa_vinfo)
fallback_start_time, _, fallback_end_time = traverse_obj(
aa_vinfo, (4, {str}, {lambda x: (x or '').partition('_')}))
return {
**series_meta,
'id': episode_id,
'formats': self._extract_m3u8_formats(episode.get('stream_url'), episode_id, fatal=False),
'container': 'm4a_dash', # force fixup, AAC-only HLS
'was_live': True,
'title': episode.get('program_title'),
'description': episode.get('program_sub_title'), # fallback
'timestamp': unified_timestamp(fallback_end_time),
'release_timestamp': unified_timestamp(fallback_start_time),
**extended_metadata,
}
def _extract_news_info(self, headline, programme_id, series_meta):
episode_id = f'{programme_id}_{headline["headline_id"]}' episode_id = f'{programme_id}_{headline["headline_id"]}'
episode = traverse_obj(headline, ('file_list', 0, {dict})) episode = traverse_obj(headline, ('file_list', 0, {dict}))
description = self._extract_extended_description(episode_id, episode)
if not description:
self.report_warning('Failed to get extended description, falling back to summary')
description = traverse_obj(episode, ('file_title_sub', {str}))
return { return {
**series_meta, **series_meta,
@ -687,9 +763,9 @@ class NhkRadiruIE(InfoExtractor):
'was_live': True, 'was_live': True,
'series': series_meta.get('title'), 'series': series_meta.get('title'),
'thumbnail': url_or_none(headline.get('headline_image')) or series_meta.get('thumbnail'), 'thumbnail': url_or_none(headline.get('headline_image')) or series_meta.get('thumbnail'),
'description': description,
**traverse_obj(episode, { **traverse_obj(episode, {
'title': 'file_title', 'title': ('file_title', {str}),
'description': ('file_title_sub', {str}),
'timestamp': ('open_time', {unified_timestamp}), 'timestamp': ('open_time', {unified_timestamp}),
'release_timestamp': ('aa_vinfo4', {lambda x: x.split('_')[0]}, {unified_timestamp}), 'release_timestamp': ('aa_vinfo4', {lambda x: x.split('_')[0]}, {unified_timestamp}),
}), }),
@ -706,32 +782,58 @@ class NhkRadiruIE(InfoExtractor):
site_id, corner_id, headline_id = self._match_valid_url(url).group('site', 'corner', 'headline') site_id, corner_id, headline_id = self._match_valid_url(url).group('site', 'corner', 'headline')
programme_id = f'{site_id}_{corner_id}' programme_id = f'{site_id}_{corner_id}'
if site_id == 'F261': if site_id == 'F261': # XXX: News programmes use old API (for now?)
json_url = 'https://www.nhk.or.jp/s-media/news/news-site/list/v1/all.json' meta = self._download_json(
else: 'https://www.nhk.or.jp/s-media/news/news-site/list/v1/all.json', programme_id)['main']
json_url = f'https://www.nhk.or.jp/radioondemand/json/{site_id}/bangumi_{programme_id}.json'
meta = self._download_json(json_url, programme_id)['main']
series_meta = traverse_obj(meta, { series_meta = traverse_obj(meta, {
'title': 'program_name', 'title': ('program_name', {str}),
'channel': 'media_name', 'channel': ('media_name', {str}),
'uploader': 'media_name', 'uploader': ('media_name', {str}),
'thumbnail': (('thumbnail_c', 'thumbnail_p'), {url_or_none}), 'thumbnail': (('thumbnail_c', 'thumbnail_p'), {url_or_none}),
}, get_all=False) }, get_all=False)
if headline_id: if headline_id:
return self._extract_episode_info( headline = traverse_obj(
traverse_obj(meta, ( meta, ('detail_list', lambda _, v: v['headline_id'] == headline_id, any))
'detail_list', lambda _, v: v['headline_id'] == headline_id), get_all=False), if not headline:
programme_id, series_meta) raise ExtractorError('Content not found; it has most likely expired', expected=True)
return self._extract_news_info(headline, programme_id, series_meta)
def entries(): def news_entries():
for headline in traverse_obj(meta, ('detail_list', ..., {dict})): for headline in traverse_obj(meta, ('detail_list', ..., {dict})):
yield self._extract_episode_info(headline, programme_id, series_meta) yield self._extract_news_info(headline, programme_id, series_meta)
return self.playlist_result( return self.playlist_result(
entries(), programme_id, playlist_description=meta.get('site_detail'), **series_meta) news_entries(), programme_id, description=meta.get('site_detail'), **series_meta)
meta = self._download_json(
'https://www.nhk.or.jp/radio-api/app/v1/web/ondemand/series', programme_id, query={
'site_id': site_id,
'corner_site_id': corner_id,
})
fallback_station = join_nonempty('NHK', traverse_obj(meta, ('radio_broadcast', {str})), delim=' ')
series_meta = {
'series': join_nonempty('title', 'corner_name', delim=' ', from_dict=meta),
'series_id': programme_id,
'thumbnail': traverse_obj(meta, ('thumbnail_url', {url_or_none})),
'channel': fallback_station,
'uploader': fallback_station,
}
if headline_id:
episode = traverse_obj(meta, ('episodes', lambda _, v: v['id'] == int(headline_id), any))
if not episode:
raise ExtractorError('Content not found; it has most likely expired', expected=True)
return self._extract_episode_info(episode, programme_id, series_meta)
def entries():
for episode in traverse_obj(meta, ('episodes', ..., {dict})):
yield self._extract_episode_info(episode, programme_id, series_meta)
return self.playlist_result(
entries(), programme_id, title=series_meta.get('series'),
description=meta.get('series_description'), **series_meta)
class NhkRadioNewsPageIE(InfoExtractor): class NhkRadioNewsPageIE(InfoExtractor):

View file

@ -2,6 +2,7 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
int_or_none, int_or_none,
join_nonempty,
parse_duration, parse_duration,
parse_iso8601, parse_iso8601,
) )
@ -41,7 +42,7 @@ class NHLBaseIE(InfoExtractor):
else: else:
height = int_or_none(playback.get('height')) height = int_or_none(playback.get('height'))
formats.append({ formats.append({
'format_id': playback.get('name', 'http' + (f'-{height}p' if height else '')), 'format_id': playback.get('name') or join_nonempty('http', height and f'{height}p'),
'url': playback_url, 'url': playback_url,
'width': int_or_none(playback.get('width')), 'width': int_or_none(playback.get('width')),
'height': height, 'height': height,

View file

@ -43,15 +43,17 @@ class NuumBaseIE(InfoExtractor):
is_live = media.get('media_status') == 'RUNNING' is_live = media.get('media_status') == 'RUNNING'
formats, subtitles = None, None formats, subtitles = None, None
headers = {'Referer': 'https://nuum.ru/'}
if extract_formats: if extract_formats:
formats, subtitles = self._extract_m3u8_formats_and_subtitles( formats, subtitles = self._extract_m3u8_formats_and_subtitles(
media_url, video_id, 'mp4', live=is_live) media_url, video_id, 'mp4', live=is_live, headers=headers)
return filter_dict({ return filter_dict({
'id': video_id, 'id': video_id,
'is_live': is_live, 'is_live': is_live,
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'http_headers': headers,
**traverse_obj(container, { **traverse_obj(container, {
'title': ('media_container_name', {str}), 'title': ('media_container_name', {str}),
'description': ('media_container_description', {str}), 'description': ('media_container_description', {str}),
@ -78,7 +80,7 @@ class NuumMediaIE(NuumBaseIE):
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://nuum.ru/videos/1567547-toxi-hurtz', 'url': 'https://nuum.ru/videos/1567547-toxi-hurtz',
'md5': 'f1d9118a30403e32b702a204eb03aca3', 'md5': 'ce28837a5bbffe6952d7bfd3d39811b0',
'info_dict': { 'info_dict': {
'id': '1567547', 'id': '1567547',
'ext': 'mp4', 'ext': 'mp4',

View file

@ -550,7 +550,8 @@ class ORFONIE(InfoExtractor):
return self._extract_video_info(segment_id, selected_segment) return self._extract_video_info(segment_id, selected_segment)
# Even some segmented videos have an unsegmented version available in API response root # Even some segmented videos have an unsegmented version available in API response root
if not traverse_obj(api_json, ('sources', ..., ..., 'src', {url_or_none})): if (self._configuration_arg('prefer_segments_playlist')
or not traverse_obj(api_json, ('sources', ..., ..., 'src', {url_or_none}))):
return self.playlist_result( return self.playlist_result(
(self._extract_video_info(str(segment['id']), segment) for segment in segments), (self._extract_video_info(str(segment['id']), segment) for segment in segments),
video_id, **self._parse_metadata(api_json), multi_video=True) video_id, **self._parse_metadata(api_json), multi_video=True)

View file

@ -2,6 +2,7 @@ import itertools
import urllib.parse import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from .sproutvideo import VidsIoIE
from .vimeo import VimeoIE from .vimeo import VimeoIE
from ..networking.exceptions import HTTPError from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
@ -12,6 +13,7 @@ from ..utils import (
int_or_none, int_or_none,
mimetype2ext, mimetype2ext,
parse_iso8601, parse_iso8601,
smuggle_url,
str_or_none, str_or_none,
traverse_obj, traverse_obj,
url_or_none, url_or_none,
@ -305,22 +307,28 @@ class PatreonIE(PatreonBaseIE):
'channel_follower_count': ('attributes', 'patron_count', {int_or_none}), 'channel_follower_count': ('attributes', 'patron_count', {int_or_none}),
})) }))
# all-lowercase 'referer' so we can smuggle it to Generic, SproutVideo, Vimeo
headers = {'referer': 'https://patreon.com/'}
# handle Vimeo embeds # handle Vimeo embeds
if traverse_obj(attributes, ('embed', 'provider')) == 'Vimeo': if traverse_obj(attributes, ('embed', 'provider')) == 'Vimeo':
v_url = urllib.parse.unquote(self._html_search_regex( v_url = urllib.parse.unquote(self._html_search_regex(
r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)', r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)',
traverse_obj(attributes, ('embed', 'html', {str})), 'vimeo url', fatal=False) or '') traverse_obj(attributes, ('embed', 'html', {str})), 'vimeo url', fatal=False) or '')
if url_or_none(v_url) and self._request_webpage( if url_or_none(v_url) and self._request_webpage(
v_url, video_id, 'Checking Vimeo embed URL', v_url, video_id, 'Checking Vimeo embed URL', headers=headers,
headers={'Referer': 'https://patreon.com/'}, fatal=False, errnote=False, expected_status=429): # 429 is TLS fingerprint rejection
fatal=False, errnote=False):
entries.append(self.url_result( entries.append(self.url_result(
VimeoIE._smuggle_referrer(v_url, 'https://patreon.com/'), VimeoIE._smuggle_referrer(v_url, 'https://patreon.com/'),
VimeoIE, url_transparent=True)) VimeoIE, url_transparent=True))
embed_url = traverse_obj(attributes, ('embed', 'url', {url_or_none})) embed_url = traverse_obj(attributes, ('embed', 'url', {url_or_none}))
if embed_url and self._request_webpage(embed_url, video_id, 'Checking embed URL', fatal=False, errnote=False): if embed_url and (urlh := self._request_webpage(
entries.append(self.url_result(embed_url)) embed_url, video_id, 'Checking embed URL', headers=headers,
fatal=False, errnote=False, expected_status=403)):
# Password-protected vids.io embeds return 403 errors w/o --video-password or session cookie
if urlh.status != 403 or VidsIoIE.suitable(embed_url):
entries.append(self.url_result(smuggle_url(embed_url, headers)))
post_file = traverse_obj(attributes, ('post_file', {dict})) post_file = traverse_obj(attributes, ('post_file', {dict}))
if post_file: if post_file:

View file

@ -41,7 +41,7 @@ class PelotonIE(InfoExtractor):
}, 'params': { }, 'params': {
'skip_download': 'm3u8', 'skip_download': 'm3u8',
}, },
'_skip': 'Account needed', 'skip': 'Account needed',
}, { }, {
'url': 'https://members.onepeloton.com/classes/player/26603d53d6bb4de1b340514864a6a6a8', 'url': 'https://members.onepeloton.com/classes/player/26603d53d6bb4de1b340514864a6a6a8',
'info_dict': { 'info_dict': {
@ -61,7 +61,7 @@ class PelotonIE(InfoExtractor):
}, 'params': { }, 'params': {
'skip_download': 'm3u8', 'skip_download': 'm3u8',
}, },
'_skip': 'Account needed', 'skip': 'Account needed',
}] }]
_MANIFEST_URL_TEMPLATE = '%s?hdnea=%s' _MANIFEST_URL_TEMPLATE = '%s?hdnea=%s'
@ -199,7 +199,7 @@ class PelotonLiveIE(InfoExtractor):
'params': { 'params': {
'skip_download': 'm3u8', 'skip_download': 'm3u8',
}, },
'_skip': 'Account needed', 'skip': 'Account needed',
} }
def _real_extract(self, url): def _real_extract(self, url):

View file

@ -1,5 +1,5 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import int_or_none from ..utils import int_or_none, join_nonempty
class PerformGroupIE(InfoExtractor): class PerformGroupIE(InfoExtractor):
@ -50,11 +50,8 @@ class PerformGroupIE(InfoExtractor):
if not c_url: if not c_url:
continue continue
tbr = int_or_none(c.get('bitrate'), 1000) tbr = int_or_none(c.get('bitrate'), 1000)
format_id = 'http'
if tbr:
format_id += f'-{tbr}'
formats.append({ formats.append({
'format_id': format_id, 'format_id': join_nonempty('http', tbr),
'url': c_url, 'url': c_url,
'tbr': tbr, 'tbr': tbr,
'width': int_or_none(c.get('width')), 'width': int_or_none(c.get('width')),

View file

@ -1,28 +1,40 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import OnDemandPagedList, int_or_none, jwt_decode_hs256, try_call from ..utils import (
OnDemandPagedList,
clean_html,
int_or_none,
jwt_decode_hs256,
url_or_none,
)
from ..utils.traversal import traverse_obj
def result_from_props(props, episode_id=None): def result_from_props(props):
return { return {
'id': props.get('podcast_id') or episode_id, **traverse_obj(props, {
'title': props.get('title'), 'id': ('_id', {str}),
'url': props['mediaURL'], 'title': ('title', {str}),
'url': ('mediaURL', {url_or_none}),
'description': ('description', {clean_html}),
'thumbnail': ('image', {jwt_decode_hs256}, 'url', {url_or_none}),
'timestamp': ('timestamp', {int_or_none}),
'duration': ('duration', {int_or_none}),
}),
'ext': 'mp3', 'ext': 'mp3',
'thumbnail': try_call(lambda: jwt_decode_hs256(props['image'])['url']), 'vcodec': 'none',
'timestamp': props.get('timestamp'),
'duration': int_or_none(props.get('duration')),
} }
class PodbayFMIE(InfoExtractor): class PodbayFMIE(InfoExtractor):
_VALID_URL = r'https?://podbay\.fm/p/[^/]*/e/(?P<id>[^/]*)/?(?:[\?#].*)?$' _VALID_URL = r'https?://podbay\.fm/p/[^/?#]+/e/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://podbay.fm/p/behind-the-bastards/e/1647338400', 'url': 'https://podbay.fm/p/behind-the-bastards/e/1647338400',
'md5': '98b41285dcf7989d105a4ed0404054cf', 'md5': '895ac8505de349515f5ee8a4a3195c93',
'info_dict': { 'info_dict': {
'id': '1647338400', 'id': '62306451f4a48e58d0c4d6a8',
'title': 'Part One: Kissinger', 'title': 'Part One: Kissinger',
'ext': 'mp3', 'ext': 'mp3',
'description': r're:^We begin our epic six part series on Henry Kissinger.+',
'thumbnail': r're:^https?://.*\.jpg', 'thumbnail': r're:^https?://.*\.jpg',
'timestamp': 1647338400, 'timestamp': 1647338400,
'duration': 5001, 'duration': 5001,
@ -34,24 +46,25 @@ class PodbayFMIE(InfoExtractor):
episode_id = self._match_id(url) episode_id = self._match_id(url)
webpage = self._download_webpage(url, episode_id) webpage = self._download_webpage(url, episode_id)
data = self._search_nextjs_data(webpage, episode_id) data = self._search_nextjs_data(webpage, episode_id)
return result_from_props(data['props']['pageProps']['episode'], episode_id) return result_from_props(data['props']['pageProps']['episode'])
class PodbayFMChannelIE(InfoExtractor): class PodbayFMChannelIE(InfoExtractor):
_VALID_URL = r'https?://podbay\.fm/p/(?P<id>[^/]*)/?(?:[\?#].*)?$' _VALID_URL = r'https?://podbay\.fm/p/(?P<id>[^/?#]+)/?(?:$|[?#])'
_TESTS = [{ _TESTS = [{
'url': 'https://podbay.fm/p/behind-the-bastards', 'url': 'https://podbay.fm/p/behind-the-bastards',
'info_dict': { 'info_dict': {
'id': 'behind-the-bastards', 'id': 'behind-the-bastards',
'title': 'Behind the Bastards', 'title': 'Behind the Bastards',
}, },
'playlist_mincount': 21,
}] }]
_PAGE_SIZE = 10 _PAGE_SIZE = 10
def _fetch_page(self, channel_id, pagenum): def _fetch_page(self, channel_id, pagenum):
return self._download_json( return self._download_json(
f'https://podbay.fm/api/podcast?reverse=true&page={pagenum}&slug={channel_id}', f'https://podbay.fm/api/podcast?reverse=true&page={pagenum}&slug={channel_id}',
channel_id)['podcast'] f'Downloading channel JSON page {pagenum + 1}', channel_id)['podcast']
@staticmethod @staticmethod
def _results_from_page(channel_id, page): def _results_from_page(channel_id, page):

View file

@ -5,6 +5,7 @@ from ..utils import (
ExtractorError, ExtractorError,
try_get, try_get,
) )
from ..utils.traversal import traverse_obj
class PokerGoBaseIE(InfoExtractor): class PokerGoBaseIE(InfoExtractor):
@ -65,7 +66,7 @@ class PokerGoIE(PokerGoBaseIE):
'width': image.get('width'), 'width': image.get('width'),
'height': image.get('height'), 'height': image.get('height'),
} for image in data_json.get('images') or [] if image.get('url')] } for image in data_json.get('images') or [] if image.get('url')]
series_json = next(dct for dct in data_json.get('show_tags') or [] if dct.get('video_id') == video_id) or {} series_json = traverse_obj(data_json, ('show_tags', lambda _, v: v['video_id'] == video_id, any)) or {}
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',

View file

@ -1,9 +1,9 @@
import datetime as dt import datetime as dt
import functools
import json import json
import urllib.parse import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import functools
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
float_or_none, float_or_none,

View file

@ -7,6 +7,7 @@ from ..utils import (
determine_ext, determine_ext,
float_or_none, float_or_none,
int_or_none, int_or_none,
join_nonempty,
merge_dicts, merge_dicts,
unified_strdate, unified_strdate,
) )
@ -147,13 +148,13 @@ class ProSiebenSat1BaseIE(InfoExtractor):
'page_url': 'http://www.prosieben.de', 'page_url': 'http://www.prosieben.de',
'tbr': tbr, 'tbr': tbr,
'ext': 'flv', 'ext': 'flv',
'format_id': 'rtmp{}'.format(f'-{tbr}' if tbr else ''), 'format_id': join_nonempty('rtmp', tbr),
}) })
else: else:
formats.append({ formats.append({
'url': source_url, 'url': source_url,
'tbr': tbr, 'tbr': tbr,
'format_id': 'http{}'.format(f'-{tbr}' if tbr else ''), 'format_id': join_nonempty('http', tbr),
}) })
return { return {

View file

@ -1,48 +1,125 @@
import base64
import functools
import json
import random import random
import re
import time import time
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
OnDemandPagedList,
clean_html, clean_html,
int_or_none,
join_nonempty,
js_to_json,
str_or_none,
strip_jsonp, strip_jsonp,
traverse_obj,
unescapeHTML, unescapeHTML,
url_or_none,
urljoin,
) )
class QQMusicIE(InfoExtractor): class QQMusicBaseIE(InfoExtractor):
def _get_cookie(self, key, default=None):
return getattr(self._get_cookies('https://y.qq.com').get(key), 'value', default)
def _get_g_tk(self):
n = 5381
for c in self._get_cookie('qqmusic_key', ''):
n += (n << 5) + ord(c)
return n & 2147483647
def _get_uin(self):
return int_or_none(self._get_cookie('uin')) or 0
@property
def is_logged_in(self):
return bool(self._get_uin() and self._get_cookie('fqm_pvqid'))
# Reference: m_r_GetRUin() in top_player.js
# http://imgcache.gtimg.cn/music/portal_v3/y/top_player.js
@staticmethod
def _m_r_get_ruin():
cur_ms = int(time.time() * 1000) % 1000
return int(round(random.random() * 2147483647) * cur_ms % 1E10)
def _download_init_data(self, url, mid, fatal=True):
webpage = self._download_webpage(url, mid, fatal=fatal)
return self._search_json(r'window\.__INITIAL_DATA__\s*=', webpage,
'init data', mid, transform_source=js_to_json, fatal=fatal)
def _make_fcu_req(self, req_dict, mid, headers={}, **kwargs):
return self._download_json(
'https://u.y.qq.com/cgi-bin/musicu.fcg', mid, data=json.dumps({
'comm': {
'cv': 0,
'ct': 24,
'format': 'json',
'uin': self._get_uin(),
},
**req_dict,
}, separators=(',', ':')).encode(), headers=headers, **kwargs)
class QQMusicIE(QQMusicBaseIE):
IE_NAME = 'qqmusic' IE_NAME = 'qqmusic'
IE_DESC = 'QQ音乐' IE_DESC = 'QQ音乐'
_VALID_URL = r'https?://y\.qq\.com/n/yqq/song/(?P<id>[0-9A-Za-z]+)\.html' _VALID_URL = r'https?://y\.qq\.com/n/ryqq/songDetail/(?P<id>[0-9A-Za-z]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://y.qq.com/n/yqq/song/004295Et37taLD.html', 'url': 'https://y.qq.com/n/ryqq/songDetail/004Ti8rT003TaZ',
'md5': 'd7adc5c438d12e2cb648cca81593fd47',
'info_dict': {
'id': '004Ti8rT003TaZ',
'ext': 'mp3',
'title': '永夜のパレード (永夜的游行)',
'album': '幻想遊園郷 -Fantastic Park-',
'release_date': '20111230',
'duration': 281,
'creators': ['ケーキ姫', 'JUMA'],
'genres': ['Pop'],
'description': 'md5:b5261f3d595657ae561e9e6aee7eb7d9',
'size': 4501244,
'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
'subtitles': 'count:1',
},
}, {
'url': 'https://y.qq.com/n/ryqq/songDetail/004295Et37taLD',
'md5': '5f1e6cea39e182857da7ffc5ef5e6bb8', 'md5': '5f1e6cea39e182857da7ffc5ef5e6bb8',
'info_dict': { 'info_dict': {
'id': '004295Et37taLD', 'id': '004295Et37taLD',
'ext': 'mp3', 'ext': 'mp3',
'title': '可惜没如果', 'title': '可惜没如果',
'release_date': '20141227', 'album': '新地球 - 人 (Special Edition)',
'creator': '林俊杰', 'release_date': '20150129',
'description': 'md5:d85afb3051952ecc50a1ee8a286d1eac', 'duration': 298,
'thumbnail': r're:^https?://.*\.jpg$', 'creators': ['林俊杰'],
'genres': ['Pop'],
'description': 'md5:f568421ff618d2066e74b65a04149c4e',
'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
}, },
'skip': 'premium member only',
}, { }, {
'note': 'There is no mp3-320 version of this song.', 'note': 'There is no mp3-320 version of this song.',
'url': 'https://y.qq.com/n/yqq/song/004MsGEo3DdNxV.html', 'url': 'https://y.qq.com/n/ryqq/songDetail/004MsGEo3DdNxV',
'md5': 'fa3926f0c585cda0af8fa4f796482e3e', 'md5': '028aaef1ae13d8a9f4861a92614887f9',
'info_dict': { 'info_dict': {
'id': '004MsGEo3DdNxV', 'id': '004MsGEo3DdNxV',
'ext': 'mp3', 'ext': 'mp3',
'title': '如果', 'title': '如果',
'album': '新传媒电视连续剧金曲系列II',
'release_date': '20050626', 'release_date': '20050626',
'creator': '李季美', 'duration': 220,
'description': 'md5:46857d5ed62bc4ba84607a805dccf437', 'creators': ['李季美'],
'thumbnail': r're:^https?://.*\.jpg$', 'genres': [],
'description': 'md5:fc711212aa623b28534954dc4bd67385',
'size': 3535730,
'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
}, },
}, { }, {
'note': 'lyrics not in .lrc format', 'note': 'lyrics not in .lrc format',
'url': 'https://y.qq.com/n/yqq/song/001JyApY11tIp6.html', 'url': 'https://y.qq.com/n/ryqq/songDetail/001JyApY11tIp6',
'info_dict': { 'info_dict': {
'id': '001JyApY11tIp6', 'id': '001JyApY11tIp6',
'ext': 'mp3', 'ext': 'mp3',
@ -50,185 +127,193 @@ class QQMusicIE(InfoExtractor):
'release_date': '19970225', 'release_date': '19970225',
'creator': 'Dark Funeral', 'creator': 'Dark Funeral',
'description': 'md5:c9b20210587cbcd6836a1c597bab4525', 'description': 'md5:c9b20210587cbcd6836a1c597bab4525',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
},
'params': {
'skip_download': True,
}, },
'params': {'skip_download': True},
'skip': 'no longer available',
}] }]
_FORMATS = { _FORMATS = {
'mp3-320': {'prefix': 'M800', 'ext': 'mp3', 'preference': 40, 'abr': 320}, 'F000': {'name': 'flac', 'prefix': 'F000', 'ext': 'flac', 'preference': 60},
'mp3-128': {'prefix': 'M500', 'ext': 'mp3', 'preference': 30, 'abr': 128}, 'A000': {'name': 'ape', 'prefix': 'A000', 'ext': 'ape', 'preference': 50},
'm4a': {'prefix': 'C200', 'ext': 'm4a', 'preference': 10}, 'M800': {'name': '320mp3', 'prefix': 'M800', 'ext': 'mp3', 'preference': 40, 'abr': 320},
'M500': {'name': '128mp3', 'prefix': 'M500', 'ext': 'mp3', 'preference': 30, 'abr': 128},
'C400': {'name': '96aac', 'prefix': 'C400', 'ext': 'm4a', 'preference': 20, 'abr': 96},
'C200': {'name': '48aac', 'prefix': 'C200', 'ext': 'm4a', 'preference': 20, 'abr': 48},
} }
# Reference: m_r_GetRUin() in top_player.js
# http://imgcache.gtimg.cn/music/portal_v3/y/top_player.js
@staticmethod
def m_r_get_ruin():
cur_ms = int(time.time() * 1000) % 1000
return int(round(random.random() * 2147483647) * cur_ms % 1E10)
def _real_extract(self, url): def _real_extract(self, url):
mid = self._match_id(url) mid = self._match_id(url)
detail_info_page = self._download_webpage( init_data = self._download_init_data(url, mid, fatal=False)
f'http://s.plcloud.music.qq.com/fcgi-bin/fcg_yqq_song_detail_info.fcg?songmid={mid}&play=0', info_data = self._make_fcu_req({'info': {
mid, note='Download song detail info', 'module': 'music.pf_song_detail_svr',
errnote='Unable to get song detail info', encoding='gbk') 'method': 'get_song_detail_yqq',
'param': {
'song_mid': mid,
'song_type': 0,
},
}}, mid, note='Downloading song info')['info']['data']['track_info']
song_name = self._html_search_regex( media_mid = info_data['file']['media_mid']
r"songname:\s*'([^']+)'", detail_info_page, 'song name')
publish_time = self._html_search_regex( data = self._make_fcu_req({
r'发行时间:(\d{4}-\d{2}-\d{2})', detail_info_page, 'req_1': {
'publish time', default=None) 'module': 'vkey.GetVkeyServer',
if publish_time: 'method': 'CgiGetVkey',
publish_time = publish_time.replace('-', '') 'param': {
'guid': str(self._m_r_get_ruin()),
singer = self._html_search_regex( 'songmid': [mid] * len(self._FORMATS),
r"singer:\s*'([^']+)", detail_info_page, 'singer', default=None) 'songtype': [0] * len(self._FORMATS),
'uin': str(self._get_uin()),
lrc_content = self._html_search_regex( 'loginflag': 1,
r'<div class="content" id="lrc_content"[^<>]*>([^<>]+)</div>', 'platform': '20',
detail_info_page, 'LRC lyrics', default=None) 'filename': [f'{f["prefix"]}{media_mid}.{f["ext"]}' for f in self._FORMATS.values()],
if lrc_content: },
lrc_content = lrc_content.replace('\\n', '\n') },
'req_2': {
thumbnail_url = None 'module': 'music.musichallSong.PlayLyricInfo',
albummid = self._search_regex( 'method': 'GetPlayLyricInfo',
[r'albummid:\'([0-9a-zA-Z]+)\'', r'"albummid":"([0-9a-zA-Z]+)"'], 'param': {'songMID': mid},
detail_info_page, 'album mid', default=None) },
if albummid: }, mid, note='Downloading formats and lyric', headers=self.geo_verification_headers())
thumbnail_url = f'http://i.gtimg.cn/music/photo/mid_album_500/{albummid[-2:-1]}/{albummid[-1]}/{albummid}.jpg'
guid = self.m_r_get_ruin()
vkey = self._download_json(
f'http://base.music.qq.com/fcgi-bin/fcg_musicexpress.fcg?json=3&guid={guid}',
mid, note='Retrieve vkey', errnote='Unable to get vkey',
transform_source=strip_jsonp)['key']
code = traverse_obj(data, ('req_1', 'code', {int}))
if code != 0:
raise ExtractorError(f'Failed to download format info, error code {code or "unknown"}')
formats = [] formats = []
for format_id, details in self._FORMATS.items(): for media_info in traverse_obj(data, (
'req_1', 'data', 'midurlinfo', lambda _, v: v['songmid'] == mid and v['purl']),
):
format_key = traverse_obj(media_info, ('filename', {str}, {lambda x: x[:4]}))
format_info = self._FORMATS.get(format_key) or {}
format_id = format_info.get('name')
formats.append({ formats.append({
'url': 'http://cc.stream.qqmusic.qq.com/{}{}.{}?vkey={}&guid={}&fromtag=0'.format( 'url': urljoin('https://dl.stream.qqmusic.qq.com', media_info['purl']),
details['prefix'], mid, details['ext'], vkey, guid),
'format': format_id, 'format': format_id,
'format_id': format_id, 'format_id': format_id,
'quality': details['preference'], 'size': traverse_obj(info_data, ('file', f'size_{format_id}', {int_or_none})),
'abr': details.get('abr'), 'quality': format_info.get('preference'),
'abr': format_info.get('abr'),
'ext': format_info.get('ext'),
'vcodec': 'none',
}) })
self._check_formats(formats, mid)
actual_lrc_lyrics = ''.join( if not formats and not self.is_logged_in:
line + '\n' for line in re.findall( self.raise_login_required()
r'(?m)^(\[[0-9]{2}:[0-9]{2}(?:\.[0-9]{2,})?\][^\n]*|\[[^\]]*\])', lrc_content))
if traverse_obj(data, ('req_2', 'code')):
self.report_warning(f'Failed to download lyric, error {data["req_2"]["code"]!r}')
lrc_content = traverse_obj(data, ('req_2', 'data', 'lyric', {lambda x: base64.b64decode(x).decode('utf-8')}))
info_dict = { info_dict = {
'id': mid, 'id': mid,
'formats': formats, 'formats': formats,
'title': song_name, **traverse_obj(info_data, {
'release_date': publish_time, 'title': ('title', {str}),
'creator': singer, 'album': ('album', 'title', {str}, {lambda x: x or None}),
'description': lrc_content, 'release_date': ('time_public', {lambda x: x.replace('-', '') or None}),
'thumbnail': thumbnail_url, 'creators': ('singer', ..., 'name', {str}),
} 'alt_title': ('subtitle', {str}, {lambda x: x or None}),
if actual_lrc_lyrics: 'duration': ('interval', {int_or_none}),
info_dict['subtitles'] = { }),
'origin': [{ **traverse_obj(init_data, ('detail', {
'ext': 'lrc', 'thumbnail': ('picurl', {url_or_none}),
'data': actual_lrc_lyrics, 'description': ('info', 'intro', 'content', ..., 'value', {str}),
}], 'genres': ('info', 'genre', 'content', ..., 'value', {str}, all),
}), get_all=False),
} }
if lrc_content:
info_dict['subtitles'] = {'origin': [{'ext': 'lrc', 'data': lrc_content}]}
info_dict['description'] = join_nonempty(info_dict.get('description'), lrc_content, delim='\n')
return info_dict return info_dict
class QQPlaylistBaseIE(InfoExtractor): class QQMusicSingerIE(QQMusicBaseIE):
@staticmethod
def qq_static_url(category, mid):
return f'http://y.qq.com/y/static/{category}/{mid[-2]}/{mid[-1]}/{mid}.html'
def get_singer_all_songs(self, singmid, num):
return self._download_webpage(
r'https://c.y.qq.com/v8/fcg-bin/fcg_v8_singer_track_cp.fcg', singmid,
query={
'format': 'json',
'inCharset': 'utf8',
'outCharset': 'utf-8',
'platform': 'yqq',
'needNewCode': 0,
'singermid': singmid,
'order': 'listen',
'begin': 0,
'num': num,
'songstatus': 1,
})
def get_entries_from_page(self, singmid):
entries = []
default_num = 1
json_text = self.get_singer_all_songs(singmid, default_num)
json_obj_all_songs = self._parse_json(json_text, singmid)
if json_obj_all_songs['code'] == 0:
total = json_obj_all_songs['data']['total']
json_text = self.get_singer_all_songs(singmid, total)
json_obj_all_songs = self._parse_json(json_text, singmid)
for item in json_obj_all_songs['data']['list']:
if item['musicData'].get('songmid') is not None:
songmid = item['musicData']['songmid']
entries.append(self.url_result(
rf'https://y.qq.com/n/yqq/song/{songmid}.html', 'QQMusic', songmid))
return entries
class QQMusicSingerIE(QQPlaylistBaseIE):
IE_NAME = 'qqmusic:singer' IE_NAME = 'qqmusic:singer'
IE_DESC = 'QQ音乐 - 歌手' IE_DESC = 'QQ音乐 - 歌手'
_VALID_URL = r'https?://y\.qq\.com/n/yqq/singer/(?P<id>[0-9A-Za-z]+)\.html' _VALID_URL = r'https?://y\.qq\.com/n/ryqq/singer/(?P<id>[0-9A-Za-z]+)'
_TEST = { _TESTS = [{
'url': 'https://y.qq.com/n/yqq/singer/001BLpXF2DyJe2.html', 'url': 'https://y.qq.com/n/ryqq/singer/001BLpXF2DyJe2',
'info_dict': { 'info_dict': {
'id': '001BLpXF2DyJe2', 'id': '001BLpXF2DyJe2',
'title': '林俊杰', 'title': '林俊杰',
'description': 'md5:870ec08f7d8547c29c93010899103751', 'description': 'md5:10624ce73b06fa400bc846f59b0305fa',
'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
}, },
'playlist_mincount': 12, 'playlist_mincount': 100,
} }, {
'url': 'https://y.qq.com/n/ryqq/singer/000Q00f213YzNV',
'info_dict': {
'id': '000Q00f213YzNV',
'title': '桃几OvO',
'description': '小破站小唱见~希望大家喜欢听我唱歌~',
'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
},
'playlist_count': 12,
'playlist': [{
'info_dict': {
'id': '0016cvsy02mmCl',
'ext': 'mp3',
'title': '群青',
'album': '桃几2021年翻唱集',
'release_date': '20210913',
'duration': 248,
'creators': ['桃几OvO'],
'genres': ['Pop'],
'description': 'md5:4296005a04edcb5cdbe0889d5055a7ae',
'size': 3970822,
'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
},
}],
}]
_PAGE_SIZE = 50
def _fetch_page(self, mid, page_size, page_num):
data = self._make_fcu_req({'req_1': {
'module': 'music.web_singer_info_svr',
'method': 'get_singer_detail_info',
'param': {
'sort': 5,
'singermid': mid,
'sin': page_num * page_size,
'num': page_size,
}}}, mid, note=f'Downloading page {page_num}')
yield from traverse_obj(data, ('req_1', 'data', 'songlist', ..., {lambda x: self.url_result(
f'https://y.qq.com/n/ryqq/songDetail/{x["mid"]}', QQMusicIE, x['mid'], x.get('title'))}))
def _real_extract(self, url): def _real_extract(self, url):
mid = self._match_id(url) mid = self._match_id(url)
init_data = self._download_init_data(url, mid, fatal=False)
entries = self.get_entries_from_page(mid) return self.playlist_result(
singer_page = self._download_webpage(url, mid, 'Download singer page') OnDemandPagedList(functools.partial(self._fetch_page, mid, self._PAGE_SIZE), self._PAGE_SIZE),
singer_name = self._html_search_regex( mid, **traverse_obj(init_data, ('singerDetail', {
r"singername\s*:\s*'(.*?)'", singer_page, 'singer name', default=None) 'title': ('basic_info', 'name', {str}),
singer_desc = None 'description': ('ex_info', 'desc', {str}),
'thumbnail': ('pic', 'pic', {url_or_none}),
})))
if mid:
singer_desc_page = self._download_xml(
'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg', mid,
'Donwload singer description XML',
query={'utf8': 1, 'outCharset': 'utf-8', 'format': 'xml', 'singermid': mid},
headers={'Referer': 'https://y.qq.com/n/yqq/singer/'})
singer_desc = singer_desc_page.find('./data/info/desc').text class QQPlaylistBaseIE(InfoExtractor):
def _extract_entries(self, info_json, path):
return self.playlist_result(entries, mid, singer_name, singer_desc) for song in traverse_obj(info_json, path):
song_mid = song.get('songmid')
if not song_mid:
continue
yield self.url_result(
f'https://y.qq.com/n/ryqq/songDetail/{song_mid}',
QQMusicIE, song_mid, song.get('songname'))
class QQMusicAlbumIE(QQPlaylistBaseIE): class QQMusicAlbumIE(QQPlaylistBaseIE):
IE_NAME = 'qqmusic:album' IE_NAME = 'qqmusic:album'
IE_DESC = 'QQ音乐 - 专辑' IE_DESC = 'QQ音乐 - 专辑'
_VALID_URL = r'https?://y\.qq\.com/n/yqq/album/(?P<id>[0-9A-Za-z]+)\.html' _VALID_URL = r'https?://y\.qq\.com/n/ryqq/albumDetail/(?P<id>[0-9A-Za-z]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://y.qq.com/n/yqq/album/000gXCTb2AhRR1.html', 'url': 'https://y.qq.com/n/ryqq/albumDetail/000gXCTb2AhRR1',
'info_dict': { 'info_dict': {
'id': '000gXCTb2AhRR1', 'id': '000gXCTb2AhRR1',
'title': '我们都是这样长大的', 'title': '我们都是这样长大的',
@ -236,10 +321,10 @@ class QQMusicAlbumIE(QQPlaylistBaseIE):
}, },
'playlist_count': 4, 'playlist_count': 4,
}, { }, {
'url': 'https://y.qq.com/n/yqq/album/002Y5a3b3AlCu3.html', 'url': 'https://y.qq.com/n/ryqq/albumDetail/002Y5a3b3AlCu3',
'info_dict': { 'info_dict': {
'id': '002Y5a3b3AlCu3', 'id': '002Y5a3b3AlCu3',
'title': '그리고...', 'title': '그리고',
'description': 'md5:a48823755615508a95080e81b51ba729', 'description': 'md5:a48823755615508a95080e81b51ba729',
}, },
'playlist_count': 8, 'playlist_count': 8,
@ -248,49 +333,45 @@ class QQMusicAlbumIE(QQPlaylistBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
mid = self._match_id(url) mid = self._match_id(url)
album = self._download_json( album_json = self._download_json(
f'http://i.y.qq.com/v8/fcg-bin/fcg_v8_album_info_cp.fcg?albummid={mid}&format=json', 'http://i.y.qq.com/v8/fcg-bin/fcg_v8_album_info_cp.fcg',
mid, 'Download album page')['data'] mid, 'Download album page',
query={'albummid': mid, 'format': 'json'})['data']
entries = [ entries = self._extract_entries(album_json, ('list', ...))
self.url_result(
'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid'],
) for song in album['list']
]
album_name = album.get('name')
album_detail = album.get('desc')
if album_detail is not None:
album_detail = album_detail.strip()
return self.playlist_result(entries, mid, album_name, album_detail) return self.playlist_result(entries, mid, **traverse_obj(album_json, {
'title': ('name', {str}),
'description': ('desc', {str.strip}),
}))
class QQMusicToplistIE(QQPlaylistBaseIE): class QQMusicToplistIE(QQPlaylistBaseIE):
IE_NAME = 'qqmusic:toplist' IE_NAME = 'qqmusic:toplist'
IE_DESC = 'QQ音乐 - 排行榜' IE_DESC = 'QQ音乐 - 排行榜'
_VALID_URL = r'https?://y\.qq\.com/n/yqq/toplist/(?P<id>[0-9]+)\.html' _VALID_URL = r'https?://y\.qq\.com/n/ryqq/toplist/(?P<id>[0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://y.qq.com/n/yqq/toplist/123.html', 'url': 'https://y.qq.com/n/ryqq/toplist/123',
'info_dict': { 'info_dict': {
'id': '123', 'id': '123',
'title': '美国iTunes榜', 'title': r're:美国热门音乐榜 \d{4}-\d{2}-\d{2}',
'description': 'md5:89db2335fdbb10678dee2d43fe9aba08', 'description': '美国热门音乐榜,每周一更新。',
}, },
'playlist_count': 100, 'playlist_count': 95,
}, { }, {
'url': 'https://y.qq.com/n/yqq/toplist/3.html', 'url': 'https://y.qq.com/n/ryqq/toplist/3',
'info_dict': { 'info_dict': {
'id': '3', 'id': '3',
'title': '巅峰榜·欧美', 'title': r're:巅峰榜·欧美 \d{4}-\d{2}-\d{2}',
'description': 'md5:5a600d42c01696b26b71f8c4d43407da', 'description': 'md5:4def03b60d3644be4c9a36f21fd33857',
}, },
'playlist_count': 100, 'playlist_count': 100,
}, { }, {
'url': 'https://y.qq.com/n/yqq/toplist/106.html', 'url': 'https://y.qq.com/n/ryqq/toplist/106',
'info_dict': { 'info_dict': {
'id': '106', 'id': '106',
'title': '韩国Mnet榜', 'title': r're:韩国Mnet榜 \d{4}-\d{2}-\d{2}',
'description': 'md5:cb84b325215e1d21708c615cac82a6e7', 'description': 'md5:cb84b325215e1d21708c615cac82a6e7',
}, },
'playlist_count': 50, 'playlist_count': 50,
@ -304,33 +385,20 @@ class QQMusicToplistIE(QQPlaylistBaseIE):
note='Download toplist page', note='Download toplist page',
query={'type': 'toplist', 'topid': list_id, 'format': 'json'}) query={'type': 'toplist', 'topid': list_id, 'format': 'json'})
entries = [self.url_result( return self.playlist_result(
'https://y.qq.com/n/yqq/song/' + song['data']['songmid'] + '.html', 'QQMusic', self._extract_entries(toplist_json, ('songlist', ..., 'data')), list_id,
song['data']['songmid']) playlist_title=join_nonempty(*traverse_obj(
for song in toplist_json['songlist']] toplist_json, ((('topinfo', 'ListName'), 'update_time'), None)), delim=' '),
playlist_description=traverse_obj(toplist_json, ('topinfo', 'info')))
topinfo = toplist_json.get('topinfo', {})
list_name = topinfo.get('ListName')
list_description = topinfo.get('info')
return self.playlist_result(entries, list_id, list_name, list_description)
class QQMusicPlaylistIE(QQPlaylistBaseIE): class QQMusicPlaylistIE(QQPlaylistBaseIE):
IE_NAME = 'qqmusic:playlist' IE_NAME = 'qqmusic:playlist'
IE_DESC = 'QQ音乐 - 歌单' IE_DESC = 'QQ音乐 - 歌单'
_VALID_URL = r'https?://y\.qq\.com/n/yqq/playlist/(?P<id>[0-9]+)\.html' _VALID_URL = r'https?://y\.qq\.com/n/ryqq/playlist/(?P<id>[0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://y.qq.com/n/yqq/playlist/3462654915.html', 'url': 'https://y.qq.com/n/ryqq/playlist/1374105607',
'info_dict': {
'id': '3462654915',
'title': '韩国5月新歌精选下旬',
'description': 'md5:d2c9d758a96b9888cf4fe82f603121d4',
},
'playlist_count': 40,
'skip': 'playlist gone',
}, {
'url': 'https://y.qq.com/n/yqq/playlist/1374105607.html',
'info_dict': { 'info_dict': {
'id': '1374105607', 'id': '1374105607',
'title': '易入人心的华语民谣', 'title': '易入人心的华语民谣',
@ -346,19 +414,83 @@ class QQMusicPlaylistIE(QQPlaylistBaseIE):
'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg', 'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg',
list_id, 'Download list page', list_id, 'Download list page',
query={'type': 1, 'json': 1, 'utf8': 1, 'onlysong': 0, 'disstid': list_id}, query={'type': 1, 'json': 1, 'utf8': 1, 'onlysong': 0, 'disstid': list_id},
transform_source=strip_jsonp) transform_source=strip_jsonp, headers={'Referer': url})
if not len(list_json.get('cdlist', [])): if not len(list_json.get('cdlist', [])):
if list_json.get('code'): raise ExtractorError(join_nonempty(
raise ExtractorError( 'Unable to get playlist info',
'QQ Music said: error %d in fetching playlist info' % list_json['code'], join_nonempty('code', 'subcode', from_dict=list_json),
expected=True) list_json.get('msg'), delim=': '))
raise ExtractorError('Unable to get playlist info')
cdlist = list_json['cdlist'][0] entries = self._extract_entries(list_json, ('cdlist', 0, 'songlist', ...))
entries = [self.url_result(
'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid'])
for song in cdlist['songlist']]
list_name = cdlist.get('dissname') return self.playlist_result(entries, list_id, **traverse_obj(list_json, ('cdlist', 0, {
list_description = clean_html(unescapeHTML(cdlist.get('desc'))) 'title': ('dissname', {str}),
return self.playlist_result(entries, list_id, list_name, list_description) 'description': ('desc', {unescapeHTML}, {clean_html}),
})))
class QQMusicVideoIE(QQMusicBaseIE):
IE_NAME = 'qqmusic:mv'
IE_DESC = 'QQ音乐 - MV'
_VALID_URL = r'https?://y\.qq\.com/n/ryqq/mv/(?P<id>[0-9A-Za-z]+)'
_TESTS = [{
'url': 'https://y.qq.com/n/ryqq/mv/002Vsarh3SVU8K',
'info_dict': {
'id': '002Vsarh3SVU8K',
'ext': 'mp4',
'title': 'The Chant (Extended Mix / Audio)',
'description': '',
'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])',
'release_timestamp': 1688918400,
'release_date': '20230709',
'duration': 313,
'creators': ['Duke Dumont'],
'view_count': int,
},
}]
def _parse_url_formats(self, url_data):
return traverse_obj(url_data, ('mp4', lambda _, v: v['freeflow_url'], {
'url': ('freeflow_url', 0, {url_or_none}),
'filesize': ('fileSize', {int_or_none}),
'format_id': ('newFileType', {str_or_none}),
}))
def _real_extract(self, url):
video_id = self._match_id(url)
video_info = self._make_fcu_req({
'mvInfo': {
'module': 'music.video.VideoData',
'method': 'get_video_info_batch',
'param': {
'vidlist': [video_id],
'required': [
'vid', 'type', 'sid', 'cover_pic', 'duration', 'singers',
'video_pay', 'hint', 'code', 'msg', 'name', 'desc',
'playcnt', 'pubdate', 'play_forbid_reason'],
},
},
'mvUrl': {
'module': 'music.stream.MvUrlProxy',
'method': 'GetMvUrls',
'param': {'vids': [video_id]},
},
}, video_id, headers=self.geo_verification_headers())
if traverse_obj(video_info, ('mvInfo', 'data', video_id, 'play_forbid_reason')) == 3:
self.raise_geo_restricted()
return {
'id': video_id,
'formats': self._parse_url_formats(traverse_obj(video_info, ('mvUrl', 'data', video_id))),
**traverse_obj(video_info, ('mvInfo', 'data', video_id, {
'title': ('name', {str}),
'description': ('desc', {str}),
'thumbnail': ('cover_pic', {url_or_none}),
'release_timestamp': ('pubdate', {int_or_none}),
'duration': ('duration', {int_or_none}),
'creators': ('singers', ..., 'name', {str}),
'view_count': ('playcnt', {int_or_none}),
})),
}

View file

@ -1,3 +1,5 @@
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
@ -6,6 +8,7 @@ from ..utils import (
traverse_obj, traverse_obj,
unified_timestamp, unified_timestamp,
url_or_none, url_or_none,
urljoin,
) )
@ -21,8 +24,7 @@ class RTVSLOIE(InfoExtractor):
_API_BASE = 'https://api.rtvslo.si/ava/{}/{}?client_id=82013fb3a531d5414f478747c1aca622' _API_BASE = 'https://api.rtvslo.si/ava/{}/{}?client_id=82013fb3a531d5414f478747c1aca622'
SUB_LANGS_MAP = {'Slovenski': 'sl'} SUB_LANGS_MAP = {'Slovenski': 'sl'}
_TESTS = [ _TESTS = [{
{
'url': 'https://www.rtvslo.si/rtv365/arhiv/174842550?s=tv', 'url': 'https://www.rtvslo.si/rtv365/arhiv/174842550?s=tv',
'info_dict': { 'info_dict': {
'id': '174842550', 'id': '174842550',
@ -88,8 +90,7 @@ class RTVSLOIE(InfoExtractor):
}, { }, {
'url': 'https://4d.rtvslo.si/arhiv/dnevnik/174842550', 'url': 'https://4d.rtvslo.si/arhiv/dnevnik/174842550',
'only_matching': True, 'only_matching': True,
}, }]
]
def _real_extract(self, url): def _real_extract(self, url):
v_id = self._match_id(url) v_id = self._match_id(url)
@ -164,3 +165,26 @@ class RTVSLOIE(InfoExtractor):
'series': meta.get('showName'), 'series': meta.get('showName'),
'series_id': meta.get('showId'), 'series_id': meta.get('showId'),
} }
class RTVSLOShowIE(InfoExtractor):
IE_NAME = 'rtvslo.si:show'
_VALID_URL = r'https?://(?:365|4d)\.rtvslo.si/oddaja/[^/?#&]+/(?P<id>\d+)'
_TESTS = [{
'url': 'https://365.rtvslo.si/oddaja/ekipa-bled/173250997',
'info_dict': {
'id': '173250997',
'title': 'Ekipa Bled',
},
'playlist_count': 18,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
return self.playlist_from_matches(
re.findall(r'<a [^>]*\bhref="(/arhiv/[^"]+)"', webpage),
playlist_id, self._html_extract_title(webpage),
getter=lambda x: urljoin('https://365.rtvslo.si', x), ie=RTVSLOIE)

View file

@ -95,7 +95,7 @@ class SoundcloudBaseIE(InfoExtractor):
return return
raise ExtractorError('Unable to extract client id') raise ExtractorError('Unable to extract client id')
def _download_json(self, *args, **kwargs): def _call_api(self, *args, **kwargs):
non_fatal = kwargs.get('fatal') is False non_fatal = kwargs.get('fatal') is False
if non_fatal: if non_fatal:
del kwargs['fatal'] del kwargs['fatal']
@ -104,7 +104,7 @@ class SoundcloudBaseIE(InfoExtractor):
query['client_id'] = self._CLIENT_ID query['client_id'] = self._CLIENT_ID
kwargs['query'] = query kwargs['query'] = query
try: try:
return super()._download_json(*args, **kwargs) return self._download_json(*args, **kwargs)
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status in (401, 403): if isinstance(e.cause, HTTPError) and e.cause.status in (401, 403):
self._store_client_id(None) self._store_client_id(None)
@ -163,7 +163,7 @@ class SoundcloudBaseIE(InfoExtractor):
'user_agent': self._USER_AGENT 'user_agent': self._USER_AGENT
} }
response = self._download_json( response = self._call_api(
self._API_AUTH_URL_PW % (self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID), self._API_AUTH_URL_PW % (self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID),
None, note='Verifying login token...', fatal=False, None, note='Verifying login token...', fatal=False,
data=json.dumps(payload).encode()) data=json.dumps(payload).encode())
@ -217,12 +217,26 @@ class SoundcloudBaseIE(InfoExtractor):
query['secret_token'] = secret_token query['secret_token'] = secret_token
if not extract_flat and info.get('downloadable') and info.get('has_downloads_left'): if not extract_flat and info.get('downloadable') and info.get('has_downloads_left'):
download_url = update_url_query( try:
self._API_V2_BASE + 'tracks/' + track_id + '/download', query) # Do not use _call_api(); HTTP Error codes have different meanings for this request
redirect_url = (self._download_json(download_url, track_id, fatal=False) or {}).get('redirectUri') download_data = self._download_json(
if redirect_url: f'{self._API_V2_BASE}tracks/{track_id}/download', track_id,
'Downloading original download format info JSON', query=query, headers=self._HEADERS)
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
self.report_warning(
'Original download format is only available '
f'for registered users. {self._login_hint()}')
elif isinstance(e.cause, HTTPError) and e.cause.status == 403:
self.write_debug('Original download format is not available for this client')
else:
self.report_warning(e.msg)
download_data = None
if redirect_url := traverse_obj(download_data, ('redirectUri', {url_or_none})):
urlh = self._request_webpage( urlh = self._request_webpage(
HEADRequest(redirect_url), track_id, 'Checking for original download format', fatal=False) HEADRequest(redirect_url), track_id, 'Checking original download format availability',
'Original download format is not available', fatal=False)
if urlh: if urlh:
format_url = urlh.url format_url = urlh.url
format_urls.add(format_url) format_urls.add(format_url)
@ -303,7 +317,7 @@ class SoundcloudBaseIE(InfoExtractor):
stream = None stream = None
for retry in self.RetryManager(fatal=False): for retry in self.RetryManager(fatal=False):
try: try:
stream = self._download_json( stream = self._call_api(
format_url, track_id, f'Downloading {identifier} format info JSON', format_url, track_id, f'Downloading {identifier} format info JSON',
query=query, headers=self._HEADERS) query=query, headers=self._HEADERS)
except ExtractorError as e: except ExtractorError as e:
@ -630,7 +644,7 @@ class SoundcloudIE(SoundcloudBaseIE):
resolve_title += f'/{token}' resolve_title += f'/{token}'
info_json_url = self._resolv_url(self._BASE_URL + resolve_title) info_json_url = self._resolv_url(self._BASE_URL + resolve_title)
info = self._download_json( info = self._call_api(
info_json_url, full_title, 'Downloading info JSON', query=query, headers=self._HEADERS) info_json_url, full_title, 'Downloading info JSON', query=query, headers=self._HEADERS)
return self._extract_info_dict(info, full_title, token) return self._extract_info_dict(info, full_title, token)
@ -641,7 +655,7 @@ class SoundcloudPlaylistBaseIE(SoundcloudBaseIE):
playlist_id = str(playlist['id']) playlist_id = str(playlist['id'])
tracks = playlist.get('tracks') or [] tracks = playlist.get('tracks') or []
if not all(t.get('permalink_url') for t in tracks) and token: if not all(t.get('permalink_url') for t in tracks) and token:
tracks = self._download_json( tracks = self._call_api(
self._API_V2_BASE + 'tracks', playlist_id, self._API_V2_BASE + 'tracks', playlist_id,
'Downloading tracks', query={ 'Downloading tracks', query={
'ids': ','.join([str(t['id']) for t in tracks]), 'ids': ','.join([str(t['id']) for t in tracks]),
@ -699,7 +713,7 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
if token: if token:
full_title += '/' + token full_title += '/' + token
info = self._download_json(self._resolv_url( info = self._call_api(self._resolv_url(
self._BASE_URL + full_title), full_title, headers=self._HEADERS) self._BASE_URL + full_title), full_title, headers=self._HEADERS)
if 'errors' in info: if 'errors' in info:
@ -730,7 +744,7 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudBaseIE):
for i in itertools.count(): for i in itertools.count():
for retry in self.RetryManager(): for retry in self.RetryManager():
try: try:
response = self._download_json( response = self._call_api(
url, playlist_id, query=query, headers=self._HEADERS, url, playlist_id, query=query, headers=self._HEADERS,
note=f'Downloading track page {i + 1}') note=f'Downloading track page {i + 1}')
break break
@ -838,7 +852,7 @@ class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):
mobj = self._match_valid_url(url) mobj = self._match_valid_url(url)
uploader = mobj.group('user') uploader = mobj.group('user')
user = self._download_json( user = self._call_api(
self._resolv_url(self._BASE_URL + uploader), self._resolv_url(self._BASE_URL + uploader),
uploader, 'Downloading user info', headers=self._HEADERS) uploader, 'Downloading user info', headers=self._HEADERS)
@ -864,7 +878,7 @@ class SoundcloudUserPermalinkIE(SoundcloudPagedPlaylistBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
user_id = self._match_id(url) user_id = self._match_id(url)
user = self._download_json( user = self._call_api(
self._resolv_url(url), user_id, 'Downloading user info', headers=self._HEADERS) self._resolv_url(url), user_id, 'Downloading user info', headers=self._HEADERS)
return self._extract_playlist( return self._extract_playlist(
@ -886,7 +900,7 @@ class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
track_name = self._match_id(url) track_name = self._match_id(url)
track = self._download_json(self._resolv_url(url), track_name, headers=self._HEADERS) track = self._call_api(self._resolv_url(url), track_name, headers=self._HEADERS)
track_id = self._search_regex( track_id = self._search_regex(
r'soundcloud:track-stations:(\d+)', track['id'], 'track id') r'soundcloud:track-stations:(\d+)', track['id'], 'track id')
@ -930,7 +944,7 @@ class SoundcloudRelatedIE(SoundcloudPagedPlaylistBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
slug, relation = self._match_valid_url(url).group('slug', 'relation') slug, relation = self._match_valid_url(url).group('slug', 'relation')
track = self._download_json( track = self._call_api(
self._resolv_url(self._BASE_URL + slug), self._resolv_url(self._BASE_URL + slug),
slug, 'Downloading track info', headers=self._HEADERS) slug, 'Downloading track info', headers=self._HEADERS)
@ -965,7 +979,7 @@ class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
if token: if token:
query['secret_token'] = token query['secret_token'] = token
data = self._download_json( data = self._call_api(
self._API_V2_BASE + 'playlists/' + playlist_id, self._API_V2_BASE + 'playlists/' + playlist_id,
playlist_id, 'Downloading playlist', query=query, headers=self._HEADERS) playlist_id, 'Downloading playlist', query=query, headers=self._HEADERS)
@ -1000,7 +1014,7 @@ class SoundcloudSearchIE(SoundcloudBaseIE, SearchInfoExtractor):
next_url = update_url_query(self._API_V2_BASE + endpoint, query) next_url = update_url_query(self._API_V2_BASE + endpoint, query)
for i in itertools.count(1): for i in itertools.count(1):
response = self._download_json( response = self._call_api(
next_url, collection_id, f'Downloading page {i}', next_url, collection_id, f'Downloading page {i}',
'Unable to download API page', headers=self._HEADERS) 'Unable to download API page', headers=self._HEADERS)

View file

@ -0,0 +1,198 @@
import base64
import urllib.parse
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
int_or_none,
qualities,
remove_start,
smuggle_url,
unsmuggle_url,
update_url_query,
url_or_none,
urlencode_postdata,
)
from ..utils.traversal import traverse_obj
class SproutVideoIE(InfoExtractor):
_NO_SCHEME_RE = r'//videos\.sproutvideo\.com/embed/(?P<id>[\da-f]+)/[\da-f]+'
_VALID_URL = rf'https?:{_NO_SCHEME_RE}'
_EMBED_REGEX = [rf'<iframe [^>]*\bsrc=["\'](?P<url>(?:https?:)?{_NO_SCHEME_RE}[^"\']*)["\']']
_TESTS = [{
'url': 'https://videos.sproutvideo.com/embed/4c9dddb01910e3c9c4/0fc24387c4f24ee3',
'md5': '1343ce1a6cb39d67889bfa07c7b02b0e',
'info_dict': {
'id': '4c9dddb01910e3c9c4',
'ext': 'mp4',
'title': 'Adrien Labaeye : Berlin, des communautés aux communs',
'duration': 576,
'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
},
}, {
'url': 'https://videos.sproutvideo.com/embed/a79fdcb21f1be2c62e/93bf31e41e39ca27',
'md5': 'cebae5cf558cca83271917cf4ec03f26',
'info_dict': {
'id': 'a79fdcb21f1be2c62e',
'ext': 'mp4',
'title': 'HS_01_Live Stream 2023-01-14 10:00',
'duration': 703,
'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
},
}, {
# http formats 'sd' and 'hd' are available
'url': 'https://videos.sproutvideo.com/embed/119cd6bc1a18e6cd98/30751a1761ae5b90',
'md5': 'f368c78df07e78a749508b221528672c',
'info_dict': {
'id': '119cd6bc1a18e6cd98',
'ext': 'mp4',
'title': '3. Updating your Partner details',
'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
'duration': 60,
},
'params': {'format': 'hd'},
}, {
# subtitles
'url': 'https://videos.sproutvideo.com/embed/119dd8ba121ee0cc98/4ee50c88a343215d?type=hd',
'md5': '7f6798f037d7a3e3e07e67959de68fc6',
'info_dict': {
'id': '119dd8ba121ee0cc98',
'ext': 'mp4',
'title': 'Recipients Setup - Domestic Wire Only',
'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
'duration': 77,
'subtitles': {'en': 'count:1'},
},
}]
_WEBPAGE_TESTS = [{
'url': 'https://www.solidarum.org/vivre-ensemble/adrien-labaeye-berlin-des-communautes-aux-communs',
'info_dict': {
'id': '4c9dddb01910e3c9c4',
'ext': 'mp4',
'title': 'Adrien Labaeye : Berlin, des communautés aux communs',
'duration': 576,
'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
},
}]
_M3U8_URL_TMPL = 'https://{base}.videos.sproutvideo.com/{s3_user_hash}/{s3_video_hash}/video/index.m3u8'
_QUALITIES = ('hd', 'uhd', 'source') # Exclude 'sd' to prioritize hls formats above it
@staticmethod
def _policy_to_qs(policy, signature_key, as_string=False):
query = {}
for key, value in policy['signatures'][signature_key].items():
query[remove_start(key, 'CloudFront-')] = value
query['sessionID'] = policy['sessionID']
return urllib.parse.urlencode(query, doseq=True) if as_string else query
@classmethod
def _extract_embed_urls(cls, url, webpage):
for embed_url in super()._extract_embed_urls(url, webpage):
if embed_url.startswith('//'):
embed_url = f'https:{embed_url}'
yield smuggle_url(embed_url, {'referer': url})
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
video_id = self._match_id(url)
webpage = self._download_webpage(
url, video_id, headers=traverse_obj(smuggled_data, {'Referer': 'referer'}))
data = self._search_json(
r'var\s+dat\s*=\s*["\']', webpage, 'data', video_id, contains_pattern=r'[A-Za-z0-9+/=]+',
end_pattern=r'["\'];', transform_source=lambda x: base64.b64decode(x).decode())
formats, subtitles = [], {}
headers = {
'Accept': '*/*',
'Origin': 'https://videos.sproutvideo.com',
'Referer': url,
}
# HLS extraction is fatal; only attempt it if the JSON data says it's available
if traverse_obj(data, 'hls'):
manifest_query = self._policy_to_qs(data, 'm')
fragment_query = self._policy_to_qs(data, 't', as_string=True)
key_query = self._policy_to_qs(data, 'k', as_string=True)
formats.extend(self._extract_m3u8_formats(
self._M3U8_URL_TMPL.format(**data), video_id, 'mp4',
m3u8_id='hls', headers=headers, query=manifest_query))
for fmt in formats:
fmt.update({
'url': update_url_query(fmt['url'], manifest_query),
'extra_param_to_segment_url': fragment_query,
'extra_param_to_key_url': key_query,
})
if downloads := traverse_obj(data, ('downloads', {dict.items}, lambda _, v: url_or_none(v[1]))):
quality = qualities(self._QUALITIES)
acodec = 'none' if data.get('has_audio') is False else None
formats.extend([{
'format_id': str(format_id),
'url': format_url,
'ext': 'mp4',
'quality': quality(format_id),
'acodec': acodec,
} for format_id, format_url in downloads])
for sub_data in traverse_obj(data, ('subtitleData', lambda _, v: url_or_none(v['src']))):
subtitles.setdefault(sub_data.get('srclang', 'en'), []).append({
'url': sub_data['src'],
})
return {
'id': video_id,
'formats': formats,
'subtitles': subtitles,
'http_headers': headers,
**traverse_obj(data, {
'title': ('title', {str}),
'duration': ('duration', {int_or_none}),
'thumbnail': ('posterframe_url', {url_or_none}),
}),
}
class VidsIoIE(InfoExtractor):
IE_NAME = 'vids.io'
_VALID_URL = r'https?://[\w-]+\.vids\.io/videos/(?P<id>[\da-f]+)/(?P<display_id>[\w-]+)'
_TESTS = [{
'url': 'https://how-to-video.vids.io/videos/799cd8b11c10efc1f0/how-to-video-live-streaming',
'md5': '9bbbb2c0c0739eb163b80f87b8d77c9e',
'info_dict': {
'id': '799cd8b11c10efc1f0',
'ext': 'mp4',
'title': 'How to Video: Live Streaming',
'duration': 2787,
'thumbnail': r're:https?://images\.sproutvideo\.com/.+\.jpg',
},
}]
def _real_extract(self, url):
video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
webpage, urlh = self._download_webpage_handle(url, display_id, expected_status=403)
if urlh.status == 403:
password = self.get_param('videopassword')
if not password:
raise ExtractorError(
'This video is password-protected; use the --video-password option', expected=True)
try:
webpage = self._download_webpage(
url, display_id, 'Submitting video password',
data=urlencode_postdata({
'password': password,
**self._hidden_inputs(webpage),
}))
# Requests with user's session cookie `_sproutvideo_session` are now authorized
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 403:
raise ExtractorError('Incorrect password', expected=True)
raise
if embed_url := next(SproutVideoIE._extract_embed_urls(url, webpage), None):
return self.url_result(embed_url, SproutVideoIE, video_id)
raise ExtractorError('Unable to extract any SproutVideo embed url')

View file

@ -30,6 +30,7 @@ from ..utils import (
try_call, try_call,
try_get, try_get,
url_or_none, url_or_none,
urlencode_postdata,
) )
@ -43,8 +44,8 @@ class TikTokBaseIE(InfoExtractor):
'iid': None, 'iid': None,
# TikTok (KR/PH/TW/TH/VN) = trill, TikTok (rest of world) = musical_ly, Douyin = aweme # TikTok (KR/PH/TW/TH/VN) = trill, TikTok (rest of world) = musical_ly, Douyin = aweme
'app_name': 'musical_ly', 'app_name': 'musical_ly',
'app_version': '34.1.2', 'app_version': '35.1.3',
'manifest_app_version': '2023401020', 'manifest_app_version': '2023501030',
# "app id": aweme = 1128, trill = 1180, musical_ly = 1233, universal = 0 # "app id": aweme = 1128, trill = 1180, musical_ly = 1233, universal = 0
'aid': '0', 'aid': '0',
} }
@ -114,7 +115,7 @@ class TikTokBaseIE(InfoExtractor):
'universal data', display_id, end_pattern=r'</script>', default={}), 'universal data', display_id, end_pattern=r'</script>', default={}),
('__DEFAULT_SCOPE__', {dict})) or {} ('__DEFAULT_SCOPE__', {dict})) or {}
def _call_api_impl(self, ep, query, video_id, fatal=True, def _call_api_impl(self, ep, video_id, query=None, data=None, headers=None, fatal=True,
note='Downloading API JSON', errnote='Unable to download API page'): note='Downloading API JSON', errnote='Unable to download API page'):
self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choices('0123456789abcdef', k=160))) self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choices('0123456789abcdef', k=160)))
webpage_cookies = self._get_cookies(self._WEBPAGE_HOST) webpage_cookies = self._get_cookies(self._WEBPAGE_HOST)
@ -125,7 +126,8 @@ class TikTokBaseIE(InfoExtractor):
fatal=fatal, note=note, errnote=errnote, headers={ fatal=fatal, note=note, errnote=errnote, headers={
'User-Agent': self._APP_USER_AGENT, 'User-Agent': self._APP_USER_AGENT,
'Accept': 'application/json', 'Accept': 'application/json',
}, query=query) **(headers or {}),
}, query=query, data=data)
def _build_api_query(self, query): def _build_api_query(self, query):
return filter_dict({ return filter_dict({
@ -174,7 +176,7 @@ class TikTokBaseIE(InfoExtractor):
'openudid': ''.join(random.choices('0123456789abcdef', k=16)), 'openudid': ''.join(random.choices('0123456789abcdef', k=16)),
}) })
def _call_api(self, ep, query, video_id, fatal=True, def _call_api(self, ep, video_id, query=None, data=None, headers=None, fatal=True,
note='Downloading API JSON', errnote='Unable to download API page'): note='Downloading API JSON', errnote='Unable to download API page'):
if not self._APP_INFO and not self._get_next_app_info(): if not self._APP_INFO and not self._get_next_app_info():
message = 'No working app info is available' message = 'No working app info is available'
@ -187,9 +189,11 @@ class TikTokBaseIE(InfoExtractor):
max_tries = len(self._APP_INFO_POOL) + 1 # _APP_INFO_POOL + _APP_INFO max_tries = len(self._APP_INFO_POOL) + 1 # _APP_INFO_POOL + _APP_INFO
for count in itertools.count(1): for count in itertools.count(1):
self.write_debug(str(self._APP_INFO)) self.write_debug(str(self._APP_INFO))
real_query = self._build_api_query(query) real_query = self._build_api_query(query or {})
try: try:
return self._call_api_impl(ep, real_query, video_id, fatal, note, errnote) return self._call_api_impl(
ep, video_id, query=real_query, data=data, headers=headers,
fatal=fatal, note=note, errnote=errnote)
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0: if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:
message = str(e.cause or e.msg) message = str(e.cause or e.msg)
@ -204,17 +208,29 @@ class TikTokBaseIE(InfoExtractor):
raise raise
def _extract_aweme_app(self, aweme_id): def _extract_aweme_app(self, aweme_id):
feed_list = self._call_api( aweme_detail = traverse_obj(
'feed', {'aweme_id': aweme_id}, aweme_id, note='Downloading video feed', self._call_api('multi/aweme/detail', aweme_id, data=urlencode_postdata({
errnote='Unable to download video feed').get('aweme_list') or [] 'aweme_ids': f'[{aweme_id}]',
aweme_detail = next((aweme for aweme in feed_list if str(aweme.get('aweme_id')) == aweme_id), None) 'request_source': '0',
}), headers={'X-Argus': ''}), ('aweme_details', 0, {dict}))
if not aweme_detail: if not aweme_detail:
raise ExtractorError('Unable to find video in feed', video_id=aweme_id) raise ExtractorError('Unable to extract aweme detail info', video_id=aweme_id)
return self._parse_aweme_video_app(aweme_detail) return self._parse_aweme_video_app(aweme_detail)
def _extract_web_data_and_status(self, url, video_id, fatal=True): def _extract_web_data_and_status(self, url, video_id, fatal=True):
webpage = self._download_webpage(url, video_id, headers={'User-Agent': 'Mozilla/5.0'}, fatal=fatal) or '' video_data, status = {}, -1
video_data, status = {}, None
res = self._download_webpage_handle(url, video_id, fatal=fatal, headers={'User-Agent': 'Mozilla/5.0'})
if res is False:
return video_data, status
webpage, urlh = res
if urllib.parse.urlparse(urlh.url).path == '/login':
message = 'TikTok is requiring login for access to this content'
if fatal:
self.raise_login_required(message)
self.report_warning(f'{message}. {self._login_hint()}')
return video_data, status
if universal_data := self._get_universal_data(webpage, video_id): if universal_data := self._get_universal_data(webpage, video_id):
self.write_debug('Found universal data for rehydration') self.write_debug('Found universal data for rehydration')
@ -1026,7 +1042,8 @@ class TikTokBaseListIE(TikTokBaseIE): # XXX: Conventionally, base classes shoul
for retry in self.RetryManager(): for retry in self.RetryManager():
try: try:
post_list = self._call_api( post_list = self._call_api(
self._API_ENDPOINT, query, display_id, note=f'Downloading video list page {page}', self._API_ENDPOINT, display_id, query=query,
note=f'Downloading video list page {page}',
errnote='Unable to download video list') errnote='Unable to download video list')
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0: if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:

View file

@ -21,7 +21,7 @@ class TubeTuGrazBaseIE(InfoExtractor):
if not urlh: if not urlh:
return return
content, urlh = self._download_webpage_handle( response = self._download_webpage_handle(
urlh.url, None, fatal=False, headers={'referer': urlh.url}, urlh.url, None, fatal=False, headers={'referer': urlh.url},
note='logging in', errnote='unable to log in', note='logging in', errnote='unable to log in',
data=urlencode_postdata({ data=urlencode_postdata({
@ -30,7 +30,11 @@ class TubeTuGrazBaseIE(InfoExtractor):
'j_username': username, 'j_username': username,
'j_password': password, 'j_password': password,
})) }))
if not urlh or urlh.url == 'https://tube.tugraz.at/paella/ui/index.html': if not response:
return
content, urlh = response
if urlh.url == 'https://tube.tugraz.at/paella/ui/index.html':
return return
if not self._html_search_regex( if not self._html_search_regex(
@ -39,7 +43,7 @@ class TubeTuGrazBaseIE(InfoExtractor):
self.report_warning('unable to login: incorrect password') self.report_warning('unable to login: incorrect password')
return return
content, urlh = self._download_webpage_handle( urlh = self._request_webpage(
urlh.url, None, fatal=False, headers={'referer': urlh.url}, urlh.url, None, fatal=False, headers={'referer': urlh.url},
note='logging in with TFA', errnote='unable to log in with TFA', note='logging in with TFA', errnote='unable to log in with TFA',
data=urlencode_postdata({ data=urlencode_postdata({

View file

@ -13,6 +13,7 @@ from ..utils import (
class TubiTvIE(InfoExtractor): class TubiTvIE(InfoExtractor):
IE_NAME = 'tubitv'
_VALID_URL = r'https?://(?:www\.)?tubitv\.com/(?P<type>video|movies|tv-shows)/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?tubitv\.com/(?P<type>video|movies|tv-shows)/(?P<id>\d+)'
_LOGIN_URL = 'http://tubitv.com/login' _LOGIN_URL = 'http://tubitv.com/login'
_NETRC_MACHINE = 'tubitv' _NETRC_MACHINE = 'tubitv'
@ -148,30 +149,54 @@ class TubiTvIE(InfoExtractor):
class TubiTvShowIE(InfoExtractor): class TubiTvShowIE(InfoExtractor):
_WORKING = False IE_NAME = 'tubitv:series'
_VALID_URL = r'https?://(?:www\.)?tubitv\.com/series/[0-9]+/(?P<show_name>[^/?#]+)' _VALID_URL = r'https?://(?:www\.)?tubitv\.com/series/\d+/(?P<show_name>[^/?#]+)(?:/season-(?P<season>\d+))?'
_TESTS = [{ _TESTS = [{
'url': 'https://tubitv.com/series/3936/the-joy-of-painting-with-bob-ross?start=true', 'url': 'https://tubitv.com/series/3936/the-joy-of-painting-with-bob-ross?start=true',
'playlist_mincount': 390, 'playlist_mincount': 389,
'info_dict': { 'info_dict': {
'id': 'the-joy-of-painting-with-bob-ross', 'id': 'the-joy-of-painting-with-bob-ross',
}, },
}, {
'url': 'https://tubitv.com/series/2311/the-saddle-club/season-1',
'playlist_count': 26,
'info_dict': {
'id': 'the-saddle-club-season-1',
},
}, {
'url': 'https://tubitv.com/series/2311/the-saddle-club/season-3',
'playlist_count': 19,
'info_dict': {
'id': 'the-saddle-club-season-3',
},
}, {
'url': 'https://tubitv.com/series/2311/the-saddle-club/',
'playlist_mincount': 71,
'info_dict': {
'id': 'the-saddle-club',
},
}] }]
def _entries(self, show_url, show_name): def _entries(self, show_url, playlist_id, selected_season):
show_webpage = self._download_webpage(show_url, show_name) webpage = self._download_webpage(show_url, playlist_id)
show_json = self._parse_json(self._search_regex( data = self._search_json(
r'window\.__data\s*=\s*({[^<]+});\s*</script>', r'window\.__data\s*=', webpage, 'data', playlist_id,
show_webpage, 'data'), show_name, transform_source=js_to_json)['video'] transform_source=js_to_json)['video']
for episode_id in show_json['fullContentById']: # v['number'] is already a decimal string, but stringify to protect against API changes
if traverse_obj(show_json, ('byId', episode_id, 'type')) == 's': path = [lambda _, v: str(v['number']) == selected_season] if selected_season else [..., {dict}]
continue
for season in traverse_obj(data, ('byId', lambda _, v: v['type'] == 's', 'seasons', *path)):
season_number = int_or_none(season.get('number'))
for episode in traverse_obj(season, ('episodes', lambda _, v: v['id'])):
episode_id = episode['id']
yield self.url_result( yield self.url_result(
f'https://tubitv.com/tv-shows/{episode_id}/', f'https://tubitv.com/tv-shows/{episode_id}/', TubiTvIE, episode_id,
ie=TubiTvIE.ie_key(), video_id=episode_id) season_number=season_number, episode_number=int_or_none(episode.get('num')))
def _real_extract(self, url): def _real_extract(self, url):
show_name = self._match_valid_url(url).group('show_name') playlist_id, selected_season = self._match_valid_url(url).group('show_name', 'season')
return self.playlist_result(self._entries(url, show_name), playlist_id=show_name) if selected_season:
playlist_id = f'{playlist_id}-season-{selected_season}'
return self.playlist_result(self._entries(url, playlist_id, selected_season), playlist_id)

View file

@ -14,6 +14,7 @@ from ..utils import (
float_or_none, float_or_none,
format_field, format_field,
int_or_none, int_or_none,
join_nonempty,
make_archive_id, make_archive_id,
remove_end, remove_end,
str_or_none, str_or_none,
@ -107,7 +108,7 @@ class TwitterBaseIE(InfoExtractor):
tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
f = { f = {
'url': variant_url, 'url': variant_url,
'format_id': 'http' + (f'-{tbr}' if tbr else ''), 'format_id': join_nonempty('http', tbr),
'tbr': tbr, 'tbr': tbr,
} }
self._search_dimensions_in_video_url(f, variant_url) self._search_dimensions_in_video_url(f, variant_url)

View file

@ -5,6 +5,7 @@ from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
join_nonempty,
parse_age_limit, parse_age_limit,
traverse_obj, traverse_obj,
) )
@ -120,7 +121,7 @@ class ViewLiftEmbedIE(ViewLiftBaseIE):
'height', default=None)) 'height', default=None))
formats.append({ formats.append({
'url': video_asset_url, 'url': video_asset_url,
'format_id': 'http{}'.format(f'-{bitrate}' if bitrate else ''), 'format_id': join_nonempty('http', bitrate),
'tbr': bitrate, 'tbr': bitrate,
'height': height, 'height': height,
'vcodec': video_asset.get('codec'), 'vcodec': video_asset.get('codec'),

View file

@ -829,21 +829,33 @@ class VimeoIE(VimeoBaseInfoExtractor):
url = 'https://vimeo.com/' + video_id url = 'https://vimeo.com/' + video_id
self._try_album_password(url) self._try_album_password(url)
is_secure = urllib.parse.urlparse(url).scheme == 'https'
try: try:
# Retrieve video webpage to extract further information # Retrieve video webpage to extract further information
webpage, urlh = self._download_webpage_handle( webpage, urlh = self._download_webpage_handle(
url, video_id, headers=headers) url, video_id, headers=headers, impersonate=is_secure)
redirect_url = urlh.url redirect_url = urlh.url
except ExtractorError as ee: except ExtractorError as error:
if isinstance(ee.cause, HTTPError) and ee.cause.status == 403: if not isinstance(error.cause, HTTPError) or error.cause.status not in (403, 429):
errmsg = ee.cause.response.read() raise
errmsg = error.cause.response.read()
if b'Because of its privacy settings, this video cannot be played here' in errmsg: if b'Because of its privacy settings, this video cannot be played here' in errmsg:
raise ExtractorError( raise ExtractorError(
'Cannot download embed-only video without embedding ' 'Cannot download embed-only video without embedding URL. Please call yt-dlp '
'URL. Please call yt-dlp with the URL of the page ' 'with the URL of the page that embeds this video.', expected=True)
'that embeds this video.', # 403 == vimeo.com TLS fingerprint or DC IP block; 429 == player.vimeo.com TLS FP block
expected=True) status = error.cause.status
raise dcip_msg = 'If you are using a data center IP or VPN/proxy, your IP may be blocked'
if target := error.cause.response.extensions.get('impersonate'):
raise ExtractorError(
f'Got HTTP Error {status} when using impersonate target "{target}". {dcip_msg}')
elif not is_secure:
raise ExtractorError(f'Got HTTP Error {status}. {dcip_msg}', expected=True)
raise ExtractorError(
'This request has been blocked due to its TLS fingerprint. Install a '
'required impersonation dependency if possible, or else if you are okay with '
f'{self._downloader._format_err("compromising your security/cookies", "light red")}, '
f'try replacing "https:" with "http:" in the input URL. {dcip_msg}.', expected=True)
if '://player.vimeo.com/video/' in url: if '://player.vimeo.com/video/' in url:
config = self._search_json( config = self._search_json(

View file

@ -52,6 +52,7 @@ class WeiboBaseIE(InfoExtractor):
}) })
def _weibo_download_json(self, url, video_id, *args, fatal=True, note='Downloading JSON metadata', **kwargs): def _weibo_download_json(self, url, video_id, *args, fatal=True, note='Downloading JSON metadata', **kwargs):
# XXX: Always fatal; _download_webpage_handle only returns False (not a tuple) on error
webpage, urlh = self._download_webpage_handle(url, video_id, *args, fatal=fatal, note=note, **kwargs) webpage, urlh = self._download_webpage_handle(url, video_id, *args, fatal=fatal, note=note, **kwargs)
if urllib.parse.urlparse(urlh.url).netloc == 'passport.weibo.com': if urllib.parse.urlparse(urlh.url).netloc == 'passport.weibo.com':
self._update_visitor_cookies(urlh.url, video_id) self._update_visitor_cookies(urlh.url, video_id)

View file

@ -2,6 +2,7 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
float_or_none, float_or_none,
int_or_none, int_or_none,
join_nonempty,
unified_strdate, unified_strdate,
) )
@ -76,7 +77,7 @@ class WSJIE(InfoExtractor):
tbr = int_or_none(v.get('bitrate')) tbr = int_or_none(v.get('bitrate'))
formats.append({ formats.append({
'url': mp4_url, 'url': mp4_url,
'format_id': 'http' + (f'-{tbr}' if tbr else ''), 'format_id': join_nonempty('http', tbr),
'tbr': tbr, 'tbr': tbr,
'width': int_or_none(v.get('width')), 'width': int_or_none(v.get('width')),
'height': int_or_none(v.get('height')), 'height': int_or_none(v.get('height')),

View file

@ -8,6 +8,7 @@ from ..utils import (
ExtractorError, ExtractorError,
clean_html, clean_html,
int_or_none, int_or_none,
join_nonempty,
mimetype2ext, mimetype2ext,
parse_iso8601, parse_iso8601,
traverse_obj, traverse_obj,
@ -213,7 +214,7 @@ class YahooIE(InfoExtractor):
tbr = int_or_none(s.get('bitrate')) tbr = int_or_none(s.get('bitrate'))
formats.append({ formats.append({
'url': s_url, 'url': s_url,
'format_id': fmt + (f'-{tbr}' if tbr else ''), 'format_id': join_nonempty(fmt, tbr),
'width': int_or_none(s.get('width')), 'width': int_or_none(s.get('width')),
'height': int_or_none(s.get('height')), 'height': int_or_none(s.get('height')),
'tbr': tbr, 'tbr': tbr,
@ -371,12 +372,13 @@ class YahooJapanNewsIE(InfoExtractor):
url, content_id, 'mp4', 'm3u8_native', url, content_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False)) m3u8_id='hls', fatal=False))
else: else:
bitrate = int_or_none(vid.get('bitrate'))
formats.append({ formats.append({
'url': url, 'url': url,
'format_id': f'http-{vid.get("bitrate")}', 'format_id': join_nonempty('http', bitrate),
'height': int_or_none(vid.get('height')), 'height': int_or_none(vid.get('height')),
'width': int_or_none(vid.get('width')), 'width': int_or_none(vid.get('width')),
'tbr': int_or_none(vid.get('bitrate')), 'tbr': bitrate,
}) })
self._remove_duplicate_formats(formats) self._remove_duplicate_formats(formats)

View file

@ -5,6 +5,7 @@ from ..utils import (
determine_ext, determine_ext,
float_or_none, float_or_none,
int_or_none, int_or_none,
join_nonempty,
mimetype2ext, mimetype2ext,
try_get, try_get,
urljoin, urljoin,
@ -116,12 +117,9 @@ class YandexDiskIE(InfoExtractor):
else: else:
size = video.get('size') or {} size = video.get('size') or {}
height = int_or_none(size.get('height')) height = int_or_none(size.get('height'))
format_id = 'hls'
if height:
format_id += f'-{height}p'
formats.append({ formats.append({
'ext': 'mp4', 'ext': 'mp4',
'format_id': format_id, 'format_id': join_nonempty('hls', height and f'{height}p'),
'height': height, 'height': height,
'protocol': 'm3u8_native', 'protocol': 'm3u8_native',
'url': format_url, 'url': format_url,

View file

@ -4,6 +4,7 @@ import collections
import copy import copy
import datetime as dt import datetime as dt
import enum import enum
import functools
import hashlib import hashlib
import itertools import itertools
import json import json
@ -20,7 +21,6 @@ import urllib.parse
from .common import InfoExtractor, SearchInfoExtractor from .common import InfoExtractor, SearchInfoExtractor
from .openload import PhantomJSwrapper from .openload import PhantomJSwrapper
from ..compat import functools
from ..jsinterp import JSInterpreter from ..jsinterp import JSInterpreter
from ..networking.exceptions import HTTPError, network_exceptions from ..networking.exceptions import HTTPError, network_exceptions
from ..utils import ( from ..utils import (
@ -468,7 +468,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko', 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko',
] ]
_IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'} _IGNORED_WARNINGS = {
'Unavailable videos will be hidden during playback',
'Unavailable videos are hidden',
}
_YT_HANDLE_RE = r'@[\w.-]{3,30}' # https://support.google.com/youtube/answer/11585688?hl=en _YT_HANDLE_RE = r'@[\w.-]{3,30}' # https://support.google.com/youtube/answer/11585688?hl=en
_YT_CHANNEL_UCID_RE = r'UC[\w-]{22}' _YT_CHANNEL_UCID_RE = r'UC[\w-]{22}'
@ -885,14 +888,14 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
return count return count
@staticmethod @staticmethod
def _extract_thumbnails(data, *path_list): def _extract_thumbnails(data, *path_list, final_key='thumbnails'):
""" """
Extract thumbnails from thumbnails dict Extract thumbnails from thumbnails dict
@param path_list: path list to level that contains 'thumbnails' key @param path_list: path list to level that contains 'thumbnails' key
""" """
thumbnails = [] thumbnails = []
for path in path_list or [()]: for path in path_list or [()]:
for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...)): for thumbnail in traverse_obj(data, (*variadic(path), final_key, ...)):
thumbnail_url = url_or_none(thumbnail.get('url')) thumbnail_url = url_or_none(thumbnail.get('url'))
if not thumbnail_url: if not thumbnail_url:
continue continue
@ -3797,6 +3800,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration): def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
CHUNK_SIZE = 10 << 20 CHUNK_SIZE = 10 << 20
PREFERRED_LANG_VALUE = 10
original_language = None
itags, stream_ids = collections.defaultdict(set), [] itags, stream_ids = collections.defaultdict(set), []
itag_qualities, res_qualities = {}, {0: None} itag_qualities, res_qualities = {}, {0: None}
q = qualities([ q = qualities([
@ -3845,6 +3850,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
itag_qualities[itag] = quality itag_qualities[itag] = quality
if height: if height:
res_qualities[height] = quality res_qualities[height] = quality
is_default = audio_track.get('audioIsDefault')
is_descriptive = 'descriptive' in (audio_track.get('displayName') or '').lower()
language_code = audio_track.get('id', '').split('.')[0]
if language_code and is_default:
original_language = language_code
# FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
# (adding `&sq=0` to the URL) and parsing emsg box to determine the # (adding `&sq=0` to the URL) and parsing emsg box to determine the
# number of fragment that would subsequently requested with (`&sq=N`) # number of fragment that would subsequently requested with (`&sq=N`)
@ -3870,7 +3882,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
continue continue
query = parse_qs(fmt_url) query = parse_qs(fmt_url)
throttled = False
if query.get('n'): if query.get('n'):
try: try:
decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0]) decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
@ -3884,20 +3895,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n') f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
if player_url: if player_url:
self.report_warning( self.report_warning(
f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}' f'nsig extraction failed: Some formats may be missing\n{phantomjs_hint}'
f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True) f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
self.write_debug(e, only_once=True) self.write_debug(e, only_once=True)
else: else:
self.report_warning( self.report_warning(
'Cannot decrypt nsig without player_url: You may experience throttling for some formats', 'Cannot decrypt nsig without player_url: Some formats may be missing',
video_id=video_id, only_once=True) video_id=video_id, only_once=True)
throttled = True continue
tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
language_preference = (
10 if audio_track.get('audioIsDefault') and 10
else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
else -1)
format_duration = traverse_obj(fmt, ('approxDurationMs', {lambda x: float_or_none(x, 1000)})) format_duration = traverse_obj(fmt, ('approxDurationMs', {lambda x: float_or_none(x, 1000)}))
# Some formats may have much smaller duration than others (possibly damaged during encoding) # Some formats may have much smaller duration than others (possibly damaged during encoding)
# E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823 # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
@ -3924,17 +3931,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'filesize': int_or_none(fmt.get('contentLength')), 'filesize': int_or_none(fmt.get('contentLength')),
'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}', 'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}',
'format_note': join_nonempty( 'format_note': join_nonempty(
join_nonempty(audio_track.get('displayName'), join_nonempty(audio_track.get('displayName'), is_default and ' (default)', delim=''),
language_preference > 0 and ' (default)', delim=''),
name, fmt.get('isDrc') and 'DRC', name, fmt.get('isDrc') and 'DRC',
try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()), try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()), try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
throttled and 'THROTTLED', is_damaged and 'DAMAGED', is_broken and 'BROKEN', is_damaged and 'DAMAGED', is_broken and 'BROKEN',
(self.get_param('verbose') or all_formats) and client_name, (self.get_param('verbose') or all_formats) and client_name,
delim=', '), delim=', '),
# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372 # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
'source_preference': ((-10 if throttled else -5 if itag == '22' else -1) 'source_preference': (-5 if itag == '22' else -1) + (100 if 'Premium' in name else 0),
+ (100 if 'Premium' in name else 0)),
'fps': fps if fps > 1 else None, # For some formats, fps is wrongly returned as 1 'fps': fps if fps > 1 else None, # For some formats, fps is wrongly returned as 1
'audio_channels': fmt.get('audioChannels'), 'audio_channels': fmt.get('audioChannels'),
'height': height, 'height': height,
@ -3944,9 +3949,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'filesize_approx': filesize_from_tbr(tbr, format_duration), 'filesize_approx': filesize_from_tbr(tbr, format_duration),
'url': fmt_url, 'url': fmt_url,
'width': int_or_none(fmt.get('width')), 'width': int_or_none(fmt.get('width')),
'language': join_nonempty(audio_track.get('id', '').split('.')[0], 'language': join_nonempty(language_code, 'desc' if is_descriptive else '') or None,
'desc' if language_preference < -1 else '') or None, 'language_preference': PREFERRED_LANG_VALUE if is_default else -10 if is_descriptive else -1,
'language_preference': language_preference,
# Strictly de-prioritize broken, damaged and 3gp formats # Strictly de-prioritize broken, damaged and 3gp formats
'preference': -20 if is_broken else -10 if is_damaged else -2 if itag == '17' else None, 'preference': -20 if is_broken else -10 if is_damaged else -2 if itag == '17' else None,
} }
@ -4007,6 +4011,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
elif itag: elif itag:
f['format_id'] = itag f['format_id'] = itag
if original_language and f.get('language') == original_language:
f['format_note'] = join_nonempty(f.get('format_note'), '(default)', delim=' ')
f['language_preference'] = PREFERRED_LANG_VALUE
if f.get('source_preference') is None: if f.get('source_preference') is None:
f['source_preference'] = -1 f['source_preference'] = -1
@ -4351,7 +4359,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'), 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
'live_status': live_status, 'live_status': live_status,
'release_timestamp': live_start_time, 'release_timestamp': live_start_time,
'_format_sort_fields': ( # source_preference is lower for throttled/potentially damaged formats '_format_sort_fields': ( # source_preference is lower for potentially damaged formats
'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'), 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'),
} }
@ -5124,6 +5132,10 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
else: else:
metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict) metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict)
# pageHeaderViewModel slow rollout began April 2024
page_header_view_model = traverse_obj(data, (
'header', 'pageHeaderRenderer', 'content', 'pageHeaderViewModel', {dict}))
# We can get the uncropped banner/avatar by replacing the crop params with '=s0' # We can get the uncropped banner/avatar by replacing the crop params with '=s0'
# See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714 # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714
def _get_uncropped(url): def _get_uncropped(url):
@ -5139,8 +5151,10 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
'preference': 1, 'preference': 1,
}) })
channel_banners = self._extract_thumbnails( channel_banners = (
data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner'))) self._extract_thumbnails(data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner')))
or self._extract_thumbnails(
page_header_view_model, ('banner', 'imageBannerViewModel', 'image'), final_key='sources'))
for banner in channel_banners: for banner in channel_banners:
banner['preference'] = -10 banner['preference'] = -10
@ -5167,7 +5181,11 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
or self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag'))
or info['id']), or info['id']),
'availability': self._extract_availability(data), 'availability': self._extract_availability(data),
'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')), 'channel_follower_count': (
self._get_count(data, ('header', ..., 'subscriberCountText'))
or traverse_obj(page_header_view_model, (
'metadata', 'contentMetadataViewModel', 'metadataRows', ..., 'metadataParts',
lambda _, v: 'subscribers' in v['text']['content'], 'text', 'content', {parse_count}, any))),
'description': try_get(metadata_renderer, lambda x: x.get('description', '')), 'description': try_get(metadata_renderer, lambda x: x.get('description', '')),
'tags': (traverse_obj(data, ('microformat', 'microformatDataRenderer', 'tags', ..., {str})) 'tags': (traverse_obj(data, ('microformat', 'microformatDataRenderer', 'tags', ..., {str}))
or traverse_obj(metadata_renderer, ('keywords', {lambda x: x and shlex.split(x)}, ...))), or traverse_obj(metadata_renderer, ('keywords', {lambda x: x and shlex.split(x)}, ...))),

View file

@ -667,12 +667,12 @@ class JSInterpreter:
self.interpret_expression(v, local_vars, allow_recursion) self.interpret_expression(v, local_vars, allow_recursion)
for v in self._separate(arg_str)] for v in self._separate(arg_str)]
if obj == str: if obj is str:
if member == 'fromCharCode': if member == 'fromCharCode':
assertion(argvals, 'takes one or more arguments') assertion(argvals, 'takes one or more arguments')
return ''.join(map(chr, argvals)) return ''.join(map(chr, argvals))
raise self.Exception(f'Unsupported String method {member}', expr) raise self.Exception(f'Unsupported String method {member}', expr)
elif obj == float: elif obj is float:
if member == 'pow': if member == 'pow':
assertion(len(argvals) == 2, 'takes two arguments') assertion(len(argvals) == 2, 'takes two arguments')
return argvals[0] ** argvals[1] return argvals[0] ** argvals[1]

View file

@ -230,9 +230,7 @@ class Urllib3LoggingFilter(logging.Filter):
def filter(self, record): def filter(self, record):
# Ignore HTTP request messages since HTTPConnection prints those # Ignore HTTP request messages since HTTPConnection prints those
if record.msg == '%s://%s:%s "%s %s %s" %s %s': return record.msg != '%s://%s:%s "%s %s %s" %s %s'
return False
return True
class Urllib3LoggingHandler(logging.Handler): class Urllib3LoggingHandler(logging.Handler):

View file

@ -1,6 +1,7 @@
from __future__ import annotations from __future__ import annotations
import contextlib import contextlib
import functools
import io import io
import logging import logging
import ssl import ssl
@ -22,7 +23,6 @@ from .exceptions import (
TransportError, TransportError,
) )
from .websocket import WebSocketRequestHandler, WebSocketResponse from .websocket import WebSocketRequestHandler, WebSocketResponse
from ..compat import functools
from ..dependencies import websockets from ..dependencies import websockets
from ..socks import ProxyError as SocksProxyError from ..socks import ProxyError as SocksProxyError
from ..utils import int_or_none from ..utils import int_or_none

View file

@ -474,7 +474,7 @@ def create_parser():
'no-attach-info-json', 'embed-thumbnail-atomicparsley', 'no-external-downloader-progress', 'no-attach-info-json', 'embed-thumbnail-atomicparsley', 'no-external-downloader-progress',
'embed-metadata', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi', 'embed-metadata', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi',
'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-youtube-prefer-utc-upload-date', 'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-youtube-prefer-utc-upload-date',
'prefer-legacy-http-handler', 'manifest-filesize-approx', 'prefer-legacy-http-handler', 'manifest-filesize-approx', 'allow-unsafe-ext',
}, 'aliases': { }, 'aliases': {
'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx'], 'youtube-dl': ['all', '-multistreams', '-playlist-match-filter', '-manifest-filesize-approx'],
'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx'], 'youtube-dlc': ['all', '-no-youtube-channel-redirect', '-no-live-chat', '-playlist-match-filter', '-manifest-filesize-approx'],
@ -646,7 +646,7 @@ def create_parser():
'You can also simply specify a field to match if the field is present, ' 'You can also simply specify a field to match if the field is present, '
'use "!field" to check if the field is not present, and "&" to check multiple conditions. ' 'use "!field" to check if the field is not present, and "&" to check multiple conditions. '
'Use a "\\" to escape "&" or quotes if needed. If used multiple times, ' 'Use a "\\" to escape "&" or quotes if needed. If used multiple times, '
'the filter matches if atleast one of the conditions are met. E.g. --match-filter ' 'the filter matches if at least one of the conditions is met. E.g. --match-filter '
'!is_live --match-filter "like_count>?100 & description~=\'(?i)\\bcats \\& dogs\\b\'" ' '!is_live --match-filter "like_count>?100 & description~=\'(?i)\\bcats \\& dogs\\b\'" '
'matches only videos that are not live OR those that have a like count more than 100 ' 'matches only videos that are not live OR those that have a like count more than 100 '
'(or the like field is not available) and also has a description ' '(or the like field is not available) and also has a description '
@ -1479,7 +1479,7 @@ def create_parser():
'Optionally, the KEYRING used for decrypting Chromium cookies on Linux, ' 'Optionally, the KEYRING used for decrypting Chromium cookies on Linux, '
'the name/path of the PROFILE to load cookies from, ' 'the name/path of the PROFILE to load cookies from, '
'and the CONTAINER name (if Firefox) ("none" for no container) ' 'and the CONTAINER name (if Firefox) ("none" for no container) '
'can be given with their respective seperators. ' 'can be given with their respective separators. '
'By default, all containers of the most recently accessed profile are used. ' 'By default, all containers of the most recently accessed profile are used. '
f'Currently supported keyrings are: {", ".join(map(str.lower, sorted(SUPPORTED_KEYRINGS)))}')) f'Currently supported keyrings are: {", ".join(map(str.lower, sorted(SUPPORTED_KEYRINGS)))}'))
filesystem.add_option( filesystem.add_option(
@ -1781,7 +1781,7 @@ def create_parser():
'It can be one of "pre_process" (after video extraction), "after_filter" (after video passes filter), ' 'It can be one of "pre_process" (after video extraction), "after_filter" (after video passes filter), '
'"video" (after --format; before --print/--output), "before_dl" (before each video download), ' '"video" (after --format; before --print/--output), "before_dl" (before each video download), '
'"post_process" (after each video download; default), ' '"post_process" (after each video download; default), '
'"after_move" (after moving video file to it\'s final locations), ' '"after_move" (after moving video file to its final locations), '
'"after_video" (after downloading and processing all formats of a video), ' '"after_video" (after downloading and processing all formats of a video), '
'or "playlist" (at end of playlist). ' 'or "playlist" (at end of playlist). '
'This option can be used multiple times to add different postprocessors')) 'This option can be used multiple times to add different postprocessors'))

View file

@ -119,14 +119,21 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
if not mutagen or prefer_atomicparsley: if not mutagen or prefer_atomicparsley:
success = False success = False
else: else:
try:
self._report_run('mutagen', filename) self._report_run('mutagen', filename)
f = {'jpeg': MP4Cover.FORMAT_JPEG, 'png': MP4Cover.FORMAT_PNG}
try:
with open(thumbnail_filename, 'rb') as thumbfile:
thumb_data = thumbfile.read()
type_ = imghdr.what(h=thumb_data)
if not type_:
raise ValueError('could not determine image type')
elif type_ not in f:
raise ValueError(f'incompatible image type: {type_}')
meta = MP4(filename) meta = MP4(filename)
# NOTE: the 'covr' atom is a non-standard MPEG-4 atom, # NOTE: the 'covr' atom is a non-standard MPEG-4 atom,
# Apple iTunes 'M4A' files include the 'moov.udta.meta.ilst' atom. # Apple iTunes 'M4A' files include the 'moov.udta.meta.ilst' atom.
f = {'jpeg': MP4Cover.FORMAT_JPEG, 'png': MP4Cover.FORMAT_PNG}[imghdr.what(thumbnail_filename)]
with open(thumbnail_filename, 'rb') as thumbfile:
thumb_data = thumbfile.read()
meta.tags['covr'] = [MP4Cover(data=thumb_data, imageformat=f)] meta.tags['covr'] = [MP4Cover(data=thumb_data, imageformat=f)]
meta.save() meta.save()
temp_filename = filename temp_filename = filename
@ -160,9 +167,10 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
stdout, stderr, returncode = Popen.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr, returncode = Popen.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if returncode: if returncode:
self.report_warning(f'Unable to embed thumbnails using AtomicParsley; {stderr.strip()}') self.report_warning(f'Unable to embed thumbnails using AtomicParsley; {stderr.strip()}')
success = False
# for formats that don't support thumbnails (like 3gp) AtomicParsley # for formats that don't support thumbnails (like 3gp) AtomicParsley
# won't create to the temporary file # won't create to the temporary file
if 'No changes' in stdout: elif 'No changes' in stdout:
self.report_warning('The file format doesn\'t support embedding a thumbnail') self.report_warning('The file format doesn\'t support embedding a thumbnail')
success = False success = False

View file

@ -1,5 +1,6 @@
import collections import collections
import contextvars import contextvars
import functools
import itertools import itertools
import json import json
import os import os
@ -8,7 +9,7 @@ import subprocess
import time import time
from .common import PostProcessor from .common import PostProcessor
from ..compat import functools, imghdr from ..compat import imghdr
from ..utils import ( from ..utils import (
MEDIA_EXTENSIONS, MEDIA_EXTENSIONS,
ISO639Utils, ISO639Utils,

View file

@ -2085,17 +2085,20 @@ def parse_duration(s):
(days, 86400), (hours, 3600), (mins, 60), (secs, 1), (ms, 1))) (days, 86400), (hours, 3600), (mins, 60), (secs, 1), (ms, 1)))
def prepend_extension(filename, ext, expected_real_ext=None): def _change_extension(prepend, filename, ext, expected_real_ext=None):
name, real_ext = os.path.splitext(filename) name, real_ext = os.path.splitext(filename)
return (
f'{name}.{ext}{real_ext}' if not expected_real_ext or real_ext[1:] == expected_real_ext:
if not expected_real_ext or real_ext[1:] == expected_real_ext filename = name
else f'{filename}.{ext}') if prepend and real_ext:
_UnsafeExtensionError.sanitize_extension(ext, prepend=True)
return f'{filename}.{ext}{real_ext}'
return f'{filename}.{_UnsafeExtensionError.sanitize_extension(ext)}'
def replace_extension(filename, ext, expected_real_ext=None): prepend_extension = functools.partial(_change_extension, True)
name, real_ext = os.path.splitext(filename) replace_extension = functools.partial(_change_extension, False)
return f'{name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename}.{ext}'
def check_executable(exe, args=[]): def check_executable(exe, args=[]):
@ -5035,6 +5038,101 @@ MEDIA_EXTENSIONS.audio += MEDIA_EXTENSIONS.common_audio
KNOWN_EXTENSIONS = (*MEDIA_EXTENSIONS.video, *MEDIA_EXTENSIONS.audio, *MEDIA_EXTENSIONS.manifests) KNOWN_EXTENSIONS = (*MEDIA_EXTENSIONS.video, *MEDIA_EXTENSIONS.audio, *MEDIA_EXTENSIONS.manifests)
class _UnsafeExtensionError(Exception):
"""
Mitigation exception for uncommon/malicious file extensions
This should be caught in YoutubeDL.py alongside a warning
Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j
"""
ALLOWED_EXTENSIONS = frozenset([
# internal
'description',
'json',
'meta',
'orig',
'part',
'temp',
'uncut',
'unknown_video',
'ytdl',
# video
*MEDIA_EXTENSIONS.video,
'avif',
'ismv',
'm2ts',
'm4s',
'mng',
'mpeg',
'qt',
'swf',
'ts',
'vp9',
'wvm',
# audio
*MEDIA_EXTENSIONS.audio,
'isma',
'mid',
'mpga',
'ra',
# image
*MEDIA_EXTENSIONS.thumbnails,
'bmp',
'gif',
'heic',
'ico',
'jng',
'jpeg',
'jxl',
'svg',
'tif',
'wbmp',
# subtitle
*MEDIA_EXTENSIONS.subtitles,
'dfxp',
'fs',
'ismt',
'sami',
'scc',
'ssa',
'tt',
'ttml',
# others
*MEDIA_EXTENSIONS.manifests,
*MEDIA_EXTENSIONS.storyboards,
'desktop',
'ism',
'm3u',
'sbv',
'url',
'webloc',
'xml',
])
def __init__(self, extension, /):
super().__init__(f'unsafe file extension: {extension!r}')
self.extension = extension
@classmethod
def sanitize_extension(cls, extension, /, *, prepend=False):
if '/' in extension or '\\' in extension:
raise cls(extension)
if not prepend:
_, _, last = extension.rpartition('.')
if last == 'bin':
extension = last = 'unknown_video'
if last.lower() not in cls.ALLOWED_EXTENSIONS:
raise cls(extension)
return extension
class RetryManager: class RetryManager:
"""Usage: """Usage:
for retry in RetryManager(...): for retry in RetryManager(...):

View file

@ -1,8 +1,8 @@
# Autogenerated by devscripts/update-version.py # Autogenerated by devscripts/update-version.py
__version__ = '2024.05.27' __version__ = '2024.07.01'
RELEASE_GIT_HEAD = '12b248ce60be1aa1362edd839d915bba70dbee4b' RELEASE_GIT_HEAD = '5ce582448ececb8d9c30c8c31f58330090ced03a'
VARIANT = None VARIANT = None
@ -12,4 +12,4 @@ CHANNEL = 'stable'
ORIGIN = 'yt-dlp/yt-dlp' ORIGIN = 'yt-dlp/yt-dlp'
_pkg_version = '2024.05.27' _pkg_version = '2024.07.01'