mirror of
https://github.com/yt-dlp/yt-dlp
synced 2024-12-28 22:24:34 +01:00
Merge branch 'yt-dlp:master' into master
This commit is contained in:
commit
9483bb86df
126 changed files with 5013 additions and 2795 deletions
111
.github/workflows/build.yml
vendored
111
.github/workflows/build.yml
vendored
|
@ -12,6 +12,9 @@ on:
|
||||||
unix:
|
unix:
|
||||||
default: true
|
default: true
|
||||||
type: boolean
|
type: boolean
|
||||||
|
linux_static:
|
||||||
|
default: true
|
||||||
|
type: boolean
|
||||||
linux_arm:
|
linux_arm:
|
||||||
default: true
|
default: true
|
||||||
type: boolean
|
type: boolean
|
||||||
|
@ -27,9 +30,6 @@ on:
|
||||||
windows32:
|
windows32:
|
||||||
default: true
|
default: true
|
||||||
type: boolean
|
type: boolean
|
||||||
meta_files:
|
|
||||||
default: true
|
|
||||||
type: boolean
|
|
||||||
origin:
|
origin:
|
||||||
required: false
|
required: false
|
||||||
default: ''
|
default: ''
|
||||||
|
@ -52,7 +52,11 @@ on:
|
||||||
default: stable
|
default: stable
|
||||||
type: string
|
type: string
|
||||||
unix:
|
unix:
|
||||||
description: yt-dlp, yt-dlp.tar.gz, yt-dlp_linux, yt-dlp_linux.zip
|
description: yt-dlp, yt-dlp.tar.gz
|
||||||
|
default: true
|
||||||
|
type: boolean
|
||||||
|
linux_static:
|
||||||
|
description: yt-dlp_linux
|
||||||
default: true
|
default: true
|
||||||
type: boolean
|
type: boolean
|
||||||
linux_arm:
|
linux_arm:
|
||||||
|
@ -75,10 +79,6 @@ on:
|
||||||
description: yt-dlp_x86.exe
|
description: yt-dlp_x86.exe
|
||||||
default: true
|
default: true
|
||||||
type: boolean
|
type: boolean
|
||||||
meta_files:
|
|
||||||
description: SHA2-256SUMS, SHA2-512SUMS, _update_spec
|
|
||||||
default: true
|
|
||||||
type: boolean
|
|
||||||
origin:
|
origin:
|
||||||
description: Origin
|
description: Origin
|
||||||
required: false
|
required: false
|
||||||
|
@ -112,26 +112,9 @@ jobs:
|
||||||
- uses: actions/setup-python@v5
|
- uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: "3.10"
|
python-version: "3.10"
|
||||||
- uses: conda-incubator/setup-miniconda@v3
|
|
||||||
with:
|
|
||||||
miniforge-variant: Mambaforge
|
|
||||||
use-mamba: true
|
|
||||||
channels: conda-forge
|
|
||||||
auto-update-conda: true
|
|
||||||
activate-environment: ""
|
|
||||||
auto-activate-base: false
|
|
||||||
- name: Install Requirements
|
- name: Install Requirements
|
||||||
run: |
|
run: |
|
||||||
sudo apt -y install zip pandoc man sed
|
sudo apt -y install zip pandoc man sed
|
||||||
cat > ./requirements.txt << EOF
|
|
||||||
python=3.10.*
|
|
||||||
brotli-python
|
|
||||||
EOF
|
|
||||||
python devscripts/install_deps.py --print \
|
|
||||||
--exclude brotli --exclude brotlicffi \
|
|
||||||
--include secretstorage --include pyinstaller >> ./requirements.txt
|
|
||||||
mamba create -n build --file ./requirements.txt
|
|
||||||
|
|
||||||
- name: Prepare
|
- name: Prepare
|
||||||
run: |
|
run: |
|
||||||
python devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}"
|
python devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}"
|
||||||
|
@ -140,30 +123,15 @@ jobs:
|
||||||
- name: Build Unix platform-independent binary
|
- name: Build Unix platform-independent binary
|
||||||
run: |
|
run: |
|
||||||
make all tar
|
make all tar
|
||||||
- name: Build Unix standalone binary
|
|
||||||
shell: bash -l {0}
|
|
||||||
run: |
|
|
||||||
unset LD_LIBRARY_PATH # Harmful; set by setup-python
|
|
||||||
conda activate build
|
|
||||||
python -m bundle.pyinstaller --onedir
|
|
||||||
(cd ./dist/yt-dlp_linux && zip -r ../yt-dlp_linux.zip .)
|
|
||||||
python -m bundle.pyinstaller
|
|
||||||
mv ./dist/yt-dlp_linux ./yt-dlp_linux
|
|
||||||
mv ./dist/yt-dlp_linux.zip ./yt-dlp_linux.zip
|
|
||||||
|
|
||||||
- name: Verify --update-to
|
- name: Verify --update-to
|
||||||
if: vars.UPDATE_TO_VERIFICATION
|
if: vars.UPDATE_TO_VERIFICATION
|
||||||
run: |
|
run: |
|
||||||
binaries=("yt-dlp" "yt-dlp_linux")
|
chmod +x ./yt-dlp
|
||||||
for binary in "${binaries[@]}"; do
|
cp ./yt-dlp ./yt-dlp_downgraded
|
||||||
chmod +x ./${binary}
|
version="$(./yt-dlp --version)"
|
||||||
cp ./${binary} ./${binary}_downgraded
|
./yt-dlp_downgraded -v --update-to yt-dlp/yt-dlp@2023.03.04
|
||||||
version="$(./${binary} --version)"
|
downgraded_version="$(./yt-dlp_downgraded --version)"
|
||||||
./${binary}_downgraded -v --update-to yt-dlp/yt-dlp@2023.03.04
|
|
||||||
downgraded_version="$(./${binary}_downgraded --version)"
|
|
||||||
[[ "$version" != "$downgraded_version" ]]
|
[[ "$version" != "$downgraded_version" ]]
|
||||||
done
|
|
||||||
|
|
||||||
- name: Upload artifacts
|
- name: Upload artifacts
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
|
@ -171,8 +139,39 @@ jobs:
|
||||||
path: |
|
path: |
|
||||||
yt-dlp
|
yt-dlp
|
||||||
yt-dlp.tar.gz
|
yt-dlp.tar.gz
|
||||||
yt-dlp_linux
|
compression-level: 0
|
||||||
yt-dlp_linux.zip
|
|
||||||
|
linux_static:
|
||||||
|
needs: process
|
||||||
|
if: inputs.linux_static
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
- name: Build static executable
|
||||||
|
env:
|
||||||
|
channel: ${{ inputs.channel }}
|
||||||
|
origin: ${{ needs.process.outputs.origin }}
|
||||||
|
version: ${{ inputs.version }}
|
||||||
|
run: |
|
||||||
|
mkdir ~/build
|
||||||
|
cd bundle/docker
|
||||||
|
docker compose up --build static
|
||||||
|
sudo chown "${USER}:docker" ~/build/yt-dlp_linux
|
||||||
|
- name: Verify --update-to
|
||||||
|
if: vars.UPDATE_TO_VERIFICATION
|
||||||
|
run: |
|
||||||
|
chmod +x ~/build/yt-dlp_linux
|
||||||
|
cp ~/build/yt-dlp_linux ~/build/yt-dlp_linux_downgraded
|
||||||
|
version="$(~/build/yt-dlp_linux --version)"
|
||||||
|
~/build/yt-dlp_linux_downgraded -v --update-to yt-dlp/yt-dlp@2023.03.04
|
||||||
|
downgraded_version="$(~/build/yt-dlp_linux_downgraded --version)"
|
||||||
|
[[ "$version" != "$downgraded_version" ]]
|
||||||
|
- name: Upload artifacts
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: build-bin-${{ github.job }}
|
||||||
|
path: |
|
||||||
|
~/build/yt-dlp_linux
|
||||||
compression-level: 0
|
compression-level: 0
|
||||||
|
|
||||||
linux_arm:
|
linux_arm:
|
||||||
|
@ -247,13 +246,13 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
brew install coreutils
|
brew install coreutils
|
||||||
python3 devscripts/install_deps.py --user -o --include build
|
python3 devscripts/install_deps.py --user -o --include build
|
||||||
python3 devscripts/install_deps.py --print --include pyinstaller_macos > requirements.txt
|
python3 devscripts/install_deps.py --print --include pyinstaller > requirements.txt
|
||||||
# We need to ignore wheels otherwise we break universal2 builds
|
# We need to ignore wheels otherwise we break universal2 builds
|
||||||
python3 -m pip install -U --user --no-binary :all: -r requirements.txt
|
python3 -m pip install -U --user --no-binary :all: -r requirements.txt
|
||||||
# We need to fuse our own universal2 wheels for curl_cffi
|
# We need to fuse our own universal2 wheels for curl_cffi
|
||||||
python3 -m pip install -U --user delocate
|
python3 -m pip install -U --user delocate
|
||||||
mkdir curl_cffi_whls curl_cffi_universal2
|
mkdir curl_cffi_whls curl_cffi_universal2
|
||||||
python3 devscripts/install_deps.py --print -o --include curl_cffi > requirements.txt
|
python3 devscripts/install_deps.py --print -o --include curl-cffi > requirements.txt
|
||||||
for platform in "macosx_11_0_arm64" "macosx_11_0_x86_64"; do
|
for platform in "macosx_11_0_arm64" "macosx_11_0_x86_64"; do
|
||||||
python3 -m pip download \
|
python3 -m pip download \
|
||||||
--only-binary=:all: \
|
--only-binary=:all: \
|
||||||
|
@ -299,7 +298,7 @@ jobs:
|
||||||
macos_legacy:
|
macos_legacy:
|
||||||
needs: process
|
needs: process
|
||||||
if: inputs.macos_legacy
|
if: inputs.macos_legacy
|
||||||
runs-on: macos-latest
|
runs-on: macos-12
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
|
@ -319,7 +318,7 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
brew install coreutils
|
brew install coreutils
|
||||||
python3 devscripts/install_deps.py --user -o --include build
|
python3 devscripts/install_deps.py --user -o --include build
|
||||||
python3 devscripts/install_deps.py --user --include pyinstaller_macos --include curl_cffi
|
python3 devscripts/install_deps.py --user --include pyinstaller
|
||||||
|
|
||||||
- name: Prepare
|
- name: Prepare
|
||||||
run: |
|
run: |
|
||||||
|
@ -361,7 +360,7 @@ jobs:
|
||||||
- name: Install Requirements
|
- name: Install Requirements
|
||||||
run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds
|
run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds
|
||||||
python devscripts/install_deps.py -o --include build
|
python devscripts/install_deps.py -o --include build
|
||||||
python devscripts/install_deps.py --include py2exe --include curl_cffi
|
python devscripts/install_deps.py --include py2exe --include curl-cffi
|
||||||
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.8.0-py3-none-any.whl"
|
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.8.0-py3-none-any.whl"
|
||||||
|
|
||||||
- name: Prepare
|
- name: Prepare
|
||||||
|
@ -446,10 +445,11 @@ jobs:
|
||||||
compression-level: 0
|
compression-level: 0
|
||||||
|
|
||||||
meta_files:
|
meta_files:
|
||||||
if: inputs.meta_files && always() && !cancelled()
|
if: always() && !cancelled()
|
||||||
needs:
|
needs:
|
||||||
- process
|
- process
|
||||||
- unix
|
- unix
|
||||||
|
- linux_static
|
||||||
- linux_arm
|
- linux_arm
|
||||||
- macos
|
- macos
|
||||||
- macos_legacy
|
- macos_legacy
|
||||||
|
@ -466,8 +466,9 @@ jobs:
|
||||||
- name: Make SHA2-SUMS files
|
- name: Make SHA2-SUMS files
|
||||||
run: |
|
run: |
|
||||||
cd ./artifact/
|
cd ./artifact/
|
||||||
sha256sum * > ../SHA2-256SUMS
|
# make sure SHA sums are also printed to stdout
|
||||||
sha512sum * > ../SHA2-512SUMS
|
sha256sum * | tee ../SHA2-256SUMS
|
||||||
|
sha512sum * | tee ../SHA2-512SUMS
|
||||||
|
|
||||||
- name: Make Update spec
|
- name: Make Update spec
|
||||||
run: |
|
run: |
|
||||||
|
|
2
.github/workflows/core.yml
vendored
2
.github/workflows/core.yml
vendored
|
@ -53,7 +53,7 @@ jobs:
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
- name: Install test requirements
|
- name: Install test requirements
|
||||||
run: python3 ./devscripts/install_deps.py --include dev --include curl_cffi
|
run: python3 ./devscripts/install_deps.py --include dev --include curl-cffi
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
continue-on-error: False
|
continue-on-error: False
|
||||||
run: |
|
run: |
|
||||||
|
|
2
.github/workflows/quick-test.yml
vendored
2
.github/workflows/quick-test.yml
vendored
|
@ -27,6 +27,8 @@ jobs:
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
- uses: actions/setup-python@v5
|
- uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: '3.8'
|
||||||
- name: Install flake8
|
- name: Install flake8
|
||||||
run: python3 ./devscripts/install_deps.py -o --include dev
|
run: python3 ./devscripts/install_deps.py -o --include dev
|
||||||
- name: Make lazy extractors
|
- name: Make lazy extractors
|
||||||
|
|
10
CONTRIBUTORS
10
CONTRIBUTORS
|
@ -600,3 +600,13 @@ xpadev-net
|
||||||
Xpl0itU
|
Xpl0itU
|
||||||
YoshichikaAAA
|
YoshichikaAAA
|
||||||
zhijinwuu
|
zhijinwuu
|
||||||
|
alb
|
||||||
|
hruzgar
|
||||||
|
kasper93
|
||||||
|
leoheitmannruiz
|
||||||
|
luiso1979
|
||||||
|
nipotan
|
||||||
|
Offert4324
|
||||||
|
sta1us
|
||||||
|
Tomoka1
|
||||||
|
trwstin
|
||||||
|
|
95
Changelog.md
95
Changelog.md
|
@ -4,6 +4,101 @@
|
||||||
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
|
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
|
||||||
-->
|
-->
|
||||||
|
|
||||||
|
### 2024.04.09
|
||||||
|
|
||||||
|
#### Important changes
|
||||||
|
- Security: [[CVE-2024-22423](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2024-22423)] [Prevent RCE when using `--exec` with `%q` on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-hjq6-52gw-2g7p)
|
||||||
|
- The shell escape function now properly escapes `%`, `\` and `\n`.
|
||||||
|
- `utils.Popen` has been patched accordingly.
|
||||||
|
|
||||||
|
#### Core changes
|
||||||
|
- [Add new option `--progress-delta`](https://github.com/yt-dlp/yt-dlp/commit/9590cc6b4768e190183d7d071a6c78170889116a) ([#9082](https://github.com/yt-dlp/yt-dlp/issues/9082)) by [Grub4K](https://github.com/Grub4K)
|
||||||
|
- [Add new options `--impersonate` and `--list-impersonate-targets`](https://github.com/yt-dlp/yt-dlp/commit/0b81d4d252bd065ccd352722987ea34fe17f9244) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan)
|
||||||
|
- [Add option `--no-break-on-existing`](https://github.com/yt-dlp/yt-dlp/commit/16be117729150b2784f3b17755c886cb0cf73374) ([#9610](https://github.com/yt-dlp/yt-dlp/issues/9610)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- [Fix `filesize_approx` calculation](https://github.com/yt-dlp/yt-dlp/commit/86e3b82261e8ebc6c6707c09544c9dfb8907c0fd) ([#9560](https://github.com/yt-dlp/yt-dlp/issues/9560)) by [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev)
|
||||||
|
- [Infer `acodec` for single-codec containers](https://github.com/yt-dlp/yt-dlp/commit/86a972033e05fea80e5fe7f2aff6723dbe2f3952) by [pukkandan](https://github.com/pukkandan)
|
||||||
|
- [Prevent RCE when using `--exec` with `%q` (CVE-2024-22423)](https://github.com/yt-dlp/yt-dlp/commit/ff07792676f404ffff6ee61b5638c9dc1a33a37a) by [Grub4K](https://github.com/Grub4K)
|
||||||
|
- **cookies**: [Add `--cookies-from-browser` support for Firefox Flatpak](https://github.com/yt-dlp/yt-dlp/commit/2ab2651a4a7be18939e2b4cb21be79fe477c797a) ([#9619](https://github.com/yt-dlp/yt-dlp/issues/9619)) by [un-def](https://github.com/un-def)
|
||||||
|
- **utils**
|
||||||
|
- `traverse_obj`
|
||||||
|
- [Allow unbranching using `all` and `any`](https://github.com/yt-dlp/yt-dlp/commit/3699eeb67cad333272b14a42dd3843d93fda1a2e) ([#9571](https://github.com/yt-dlp/yt-dlp/issues/9571)) by [Grub4K](https://github.com/Grub4K)
|
||||||
|
- [Convenience improvements](https://github.com/yt-dlp/yt-dlp/commit/32abfb00bdbd119ca675fdc6d1719331f0a2741a) ([#9577](https://github.com/yt-dlp/yt-dlp/issues/9577)) by [Grub4K](https://github.com/Grub4K)
|
||||||
|
|
||||||
|
#### Extractor changes
|
||||||
|
- [Add extractor impersonate API](https://github.com/yt-dlp/yt-dlp/commit/50c29352312f5662acf9a64b0012766f5c40af61) ([#9474](https://github.com/yt-dlp/yt-dlp/issues/9474)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan)
|
||||||
|
- **afreecatv**
|
||||||
|
- [Overhaul extractor](https://github.com/yt-dlp/yt-dlp/commit/9415f1a5ef88482ebafe3083e8bcb778ac512df7) ([#9566](https://github.com/yt-dlp/yt-dlp/issues/9566)) by [bashonly](https://github.com/bashonly), [Tomoka1](https://github.com/Tomoka1)
|
||||||
|
- live: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/9073ae6458f4c6a832aa832c67174c61852869be) ([#9348](https://github.com/yt-dlp/yt-dlp/issues/9348)) by [hui1601](https://github.com/hui1601)
|
||||||
|
- **asobistage**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/0284f1fee202302a78888420f933deae19d9f4e1) ([#8735](https://github.com/yt-dlp/yt-dlp/issues/8735)) by [pzhlkj6612](https://github.com/pzhlkj6612)
|
||||||
|
- **box**: [Support URLs without file IDs](https://github.com/yt-dlp/yt-dlp/commit/07f5b2f7570fd9ac85aed17f4c0118f6eac77beb) ([#9504](https://github.com/yt-dlp/yt-dlp/issues/9504)) by [shreyasminocha](https://github.com/shreyasminocha)
|
||||||
|
- **cbc.ca**: player: [Support new URL format](https://github.com/yt-dlp/yt-dlp/commit/b49d5ffc53a72d8245ba319ff07bdc5b8c6a4f0c) ([#9561](https://github.com/yt-dlp/yt-dlp/issues/9561)) by [trainman261](https://github.com/trainman261)
|
||||||
|
- **crunchyroll**
|
||||||
|
- [Extract `vo_adaptive_hls` formats by default](https://github.com/yt-dlp/yt-dlp/commit/be77923ffe842f667971019460f6005f3cad01eb) ([#9447](https://github.com/yt-dlp/yt-dlp/issues/9447)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/954e57e405f79188450eb30103a9308732cd318f) ([#9615](https://github.com/yt-dlp/yt-dlp/issues/9615)) by [bytedream](https://github.com/bytedream)
|
||||||
|
- **dropbox**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/a48cc86d6f6b20427553620c2ddb990ede6a4b41) ([#9627](https://github.com/yt-dlp/yt-dlp/issues/9627)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **fathom**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/bc2b8c0596fd6b75af24822c4f0f1da6783d71f7) ([#9495](https://github.com/yt-dlp/yt-dlp/issues/9495)) by [src-tinkerer](https://github.com/src-tinkerer)
|
||||||
|
- **gofile**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/0da66980d3193cad3dae0120cddddbfcabddf7a1) ([#9446](https://github.com/yt-dlp/yt-dlp/issues/9446)) by [jazz1611](https://github.com/jazz1611)
|
||||||
|
- **imgur**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/86d2f4d24849af0d1f3af7c0e2ac43bf8a058f74) ([#9471](https://github.com/yt-dlp/yt-dlp/issues/9471)) by [trwstin](https://github.com/trwstin)
|
||||||
|
- **jiosaavn**
|
||||||
|
- [Extract artists](https://github.com/yt-dlp/yt-dlp/commit/0ae16ceb1846cc4e609b70ce7c5d8e7458efceb2) ([#9612](https://github.com/yt-dlp/yt-dlp/issues/9612)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- [Fix format extensions](https://github.com/yt-dlp/yt-dlp/commit/443e206ec41e64ca2aef61d8ef91640fb69b3113) ([#9609](https://github.com/yt-dlp/yt-dlp/issues/9609)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- [Support playlists](https://github.com/yt-dlp/yt-dlp/commit/2e94602f241f6e41bdc48576c61089435529339b) ([#9622](https://github.com/yt-dlp/yt-dlp/issues/9622)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **joqrag**: [Fix live status detection](https://github.com/yt-dlp/yt-dlp/commit/f2fd449b46c4058222e1744f7a35caa20b2d003d) ([#9624](https://github.com/yt-dlp/yt-dlp/issues/9624)) by [pzhlkj6612](https://github.com/pzhlkj6612)
|
||||||
|
- **kick**: [Support browser impersonation](https://github.com/yt-dlp/yt-dlp/commit/c8a61a910096c77ce08dad5e1b2fbda5eb964156) ([#9611](https://github.com/yt-dlp/yt-dlp/issues/9611)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **loom**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/f859ed3ba1e8b129ae6a467592c65687e73fbca1) ([#8686](https://github.com/yt-dlp/yt-dlp/issues/8686)) by [bashonly](https://github.com/bashonly), [hruzgar](https://github.com/hruzgar)
|
||||||
|
- **medici**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/4cd9e251b9abada107b10830de997bf4d79ca369) ([#9518](https://github.com/yt-dlp/yt-dlp/issues/9518)) by [Offert4324](https://github.com/Offert4324)
|
||||||
|
- **mixch**
|
||||||
|
- [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/4c3b7a0769706f7f0ea24adf1f219d5ae82d2b07) ([#9608](https://github.com/yt-dlp/yt-dlp/issues/9608)) by [bashonly](https://github.com/bashonly), [nipotan](https://github.com/nipotan)
|
||||||
|
- archive: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/c59de48e2bb4c681b03b93b584a05f52609ce4a0) ([#8761](https://github.com/yt-dlp/yt-dlp/issues/8761)) by [pzhlkj6612](https://github.com/pzhlkj6612)
|
||||||
|
- **nhk**: [Fix NHK World extractors](https://github.com/yt-dlp/yt-dlp/commit/4af9d5c2f6aa81403ae2a8a5ae3cc824730f0b86) ([#9623](https://github.com/yt-dlp/yt-dlp/issues/9623)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **patreon**: [Do not extract dead embed URLs](https://github.com/yt-dlp/yt-dlp/commit/36b240f9a72af57eb2c9d927ebb7fd1c917ebf18) ([#9613](https://github.com/yt-dlp/yt-dlp/issues/9613)) by [johnvictorfs](https://github.com/johnvictorfs)
|
||||||
|
- **radio1be**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/36baaa10e06715ccba06b78885b2042c4844c826) ([#9122](https://github.com/yt-dlp/yt-dlp/issues/9122)) by [HobbyistDev](https://github.com/HobbyistDev)
|
||||||
|
- **sharepoint**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/ff349ff94aae0b2b148bd3670f7c91d39c2f1d8e) ([#6531](https://github.com/yt-dlp/yt-dlp/issues/6531)) by [bashonly](https://github.com/bashonly), [C0D3D3V](https://github.com/C0D3D3V)
|
||||||
|
- **sonylivseries**: [Fix season extraction](https://github.com/yt-dlp/yt-dlp/commit/f2868b26e917354203f82a370ad2396646edb813) ([#9423](https://github.com/yt-dlp/yt-dlp/issues/9423)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **soundcloud**
|
||||||
|
- [Adjust format sorting](https://github.com/yt-dlp/yt-dlp/commit/a2d0840739cddd585d24e0ce4796394fc8a4fa2e) ([#9584](https://github.com/yt-dlp/yt-dlp/issues/9584)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- [Support cookies](https://github.com/yt-dlp/yt-dlp/commit/97362712a1f2b04e735bdf54f749ad99165a62fe) ([#9586](https://github.com/yt-dlp/yt-dlp/issues/9586)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- [Support retries for API rate-limit](https://github.com/yt-dlp/yt-dlp/commit/246571ae1d867df8bf31a056bdf3bbbfd398366a) ([#9585](https://github.com/yt-dlp/yt-dlp/issues/9585)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **thisoldhouse**: [Support Brightcove embeds](https://github.com/yt-dlp/yt-dlp/commit/0df63cce69026d2f4c0cbb4dd36163e83eac93dc) ([#9576](https://github.com/yt-dlp/yt-dlp/issues/9576)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **tiktok**
|
||||||
|
- [Fix API extraction](https://github.com/yt-dlp/yt-dlp/commit/cb61e20c266facabb7a30f9ce53bd79dfc158475) ([#9548](https://github.com/yt-dlp/yt-dlp/issues/9548)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
|
||||||
|
- [Prefer non-bytevc2 formats](https://github.com/yt-dlp/yt-dlp/commit/63f685f341f35f6f02b0368d1ba53bdb5b520410) ([#9575](https://github.com/yt-dlp/yt-dlp/issues/9575)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- [Restore `carrier_region` API parameter](https://github.com/yt-dlp/yt-dlp/commit/fc53ec13ff1ee926a3e533a68cfca8acc887b661) ([#9637](https://github.com/yt-dlp/yt-dlp/issues/9637)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- [Update API hostname](https://github.com/yt-dlp/yt-dlp/commit/8c05b3ebae23c5b444857549a85b84004c01a536) ([#9444](https://github.com/yt-dlp/yt-dlp/issues/9444)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **twitch**: [Extract AV1 and HEVC formats](https://github.com/yt-dlp/yt-dlp/commit/02f93ff51b3ff9436d60c4993562b366eaae8851) ([#9158](https://github.com/yt-dlp/yt-dlp/issues/9158)) by [kasper93](https://github.com/kasper93)
|
||||||
|
- **vkplay**: [Fix `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/b15b0c1d2106437ec61a5c436c543e8760eac160) ([#9636](https://github.com/yt-dlp/yt-dlp/issues/9636)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **xvideos**: [Support new URL format](https://github.com/yt-dlp/yt-dlp/commit/aa7e9ae4f48276bd5d0173966c77db9484f65a0a) ([#9502](https://github.com/yt-dlp/yt-dlp/issues/9502)) by [sta1us](https://github.com/sta1us)
|
||||||
|
- **youtube**
|
||||||
|
- [Calculate more accurate `filesize`](https://github.com/yt-dlp/yt-dlp/commit/a25a424323267e3f6f9f63c0b62df499bd7b8d46) by [pukkandan](https://github.com/pukkandan)
|
||||||
|
- [Update `android` params](https://github.com/yt-dlp/yt-dlp/commit/e7b17fce14775bd2448695c8eb7379b8d31d3537) by [pukkandan](https://github.com/pukkandan)
|
||||||
|
- search: [Fix params for uncensored results](https://github.com/yt-dlp/yt-dlp/commit/17d248a58781e2588d18a5ebe00c441d10011fcd) ([#9456](https://github.com/yt-dlp/yt-dlp/issues/9456)) by [alb](https://github.com/alb), [pukkandan](https://github.com/pukkandan)
|
||||||
|
|
||||||
|
#### Downloader changes
|
||||||
|
- **ffmpeg**: [Accept output args from info dict](https://github.com/yt-dlp/yt-dlp/commit/9c42b7eef547e826e9fcc7beb6706a2523949d05) ([#9278](https://github.com/yt-dlp/yt-dlp/issues/9278)) by [bashonly](https://github.com/bashonly)
|
||||||
|
|
||||||
|
#### Networking changes
|
||||||
|
- [Respect `SSLKEYLOGFILE` environment variable](https://github.com/yt-dlp/yt-dlp/commit/79a451e5763eda8b10d00684d5d3378f3255ee01) ([#9543](https://github.com/yt-dlp/yt-dlp/issues/9543)) by [luiso1979](https://github.com/luiso1979)
|
||||||
|
- **Request Handler**
|
||||||
|
- curlcffi: [Add support for `curl_cffi`](https://github.com/yt-dlp/yt-dlp/commit/52f5be1f1e0dc45bb397ab950f564721976a39bf) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan)
|
||||||
|
- websockets: [Workaround race condition causing issues on PyPy](https://github.com/yt-dlp/yt-dlp/commit/e5d4f11104ce7ea1717a90eea82c0f7d230ea5d5) ([#9514](https://github.com/yt-dlp/yt-dlp/issues/9514)) by [coletdjnz](https://github.com/coletdjnz)
|
||||||
|
|
||||||
|
#### Misc. changes
|
||||||
|
- **build**
|
||||||
|
- [Do not include `curl_cffi` in `macos_legacy`](https://github.com/yt-dlp/yt-dlp/commit/b19ae095fdddd43c2a2c67d10fbe0d9a645bb98f) ([#9653](https://github.com/yt-dlp/yt-dlp/issues/9653)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- [Optional dependencies cleanup](https://github.com/yt-dlp/yt-dlp/commit/58dd0f8d1eee6bc9fdc57f1923bed772fa3c946d) ([#9550](https://github.com/yt-dlp/yt-dlp/issues/9550)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- [Print SHA sums to GHA logs](https://github.com/yt-dlp/yt-dlp/commit/e8032503b9517465b0e86d776fc1e60d8795d673) ([#9582](https://github.com/yt-dlp/yt-dlp/issues/9582)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- [Update changelog for tarball and sdist](https://github.com/yt-dlp/yt-dlp/commit/17b96974a334688f76b57d350e07cae8cda46877) ([#9425](https://github.com/yt-dlp/yt-dlp/issues/9425)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **cleanup**
|
||||||
|
- [Standardize `import datetime as dt`](https://github.com/yt-dlp/yt-dlp/commit/c305a25c1b16bcf7a5ec499c3b786ed1e2c748da) ([#8978](https://github.com/yt-dlp/yt-dlp/issues/8978)) by [pukkandan](https://github.com/pukkandan)
|
||||||
|
- ie: [No `from` stdlib imports in extractors](https://github.com/yt-dlp/yt-dlp/commit/e3a3ed8a981d9395c4859b6ef56cd02bc3148db2) by [pukkandan](https://github.com/pukkandan)
|
||||||
|
- Miscellaneous: [216f6a3](https://github.com/yt-dlp/yt-dlp/commit/216f6a3cb57824e6a3c859649ce058c199b1b247) by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan)
|
||||||
|
- **docs**
|
||||||
|
- [Update yt-dlp tagline](https://github.com/yt-dlp/yt-dlp/commit/388c979ac63a8774339fac2516fe1cc852b4276e) ([#9481](https://github.com/yt-dlp/yt-dlp/issues/9481)) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev)
|
||||||
|
- [Various manpage fixes](https://github.com/yt-dlp/yt-dlp/commit/df0e138fc02ae2764a44f2f59fc93c756c4d3ee2) by [leoheitmannruiz](https://github.com/leoheitmannruiz)
|
||||||
|
- **test**
|
||||||
|
- [Workaround websocket server hanging](https://github.com/yt-dlp/yt-dlp/commit/f849d77ab54788446b995d256e1ee0894c4fb927) ([#9467](https://github.com/yt-dlp/yt-dlp/issues/9467)) by [coletdjnz](https://github.com/coletdjnz)
|
||||||
|
- `traversal`: [Separate traversal tests](https://github.com/yt-dlp/yt-dlp/commit/979ce2e786f2ee3fc783b6dc1ef4188d8805c923) ([#9574](https://github.com/yt-dlp/yt-dlp/issues/9574)) by [Grub4K](https://github.com/Grub4K)
|
||||||
|
|
||||||
### 2024.03.10
|
### 2024.03.10
|
||||||
|
|
||||||
#### Core changes
|
#### Core changes
|
||||||
|
|
5
Makefile
5
Makefile
|
@ -12,7 +12,10 @@ tar: yt-dlp.tar.gz
|
||||||
pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites \
|
pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites \
|
||||||
completions yt-dlp.1 pyproject.toml setup.cfg devscripts/* test/*
|
completions yt-dlp.1 pyproject.toml setup.cfg devscripts/* test/*
|
||||||
|
|
||||||
.PHONY: all clean install test tar pypi-files completions ot offlinetest codetest supportedsites
|
.PHONY: all clean clean-all clean-test clean-dist clean-cache \
|
||||||
|
completions completion-bash completion-fish completion-zsh \
|
||||||
|
doc issuetemplates supportedsites ot offlinetest codetest test \
|
||||||
|
tar pypi-files lazy-extractors install uninstall
|
||||||
|
|
||||||
clean-test:
|
clean-test:
|
||||||
rm -rf test/testdata/sigs/player-*.js tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \
|
rm -rf test/testdata/sigs/player-*.js tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \
|
||||||
|
|
45
README.md
45
README.md
|
@ -158,6 +158,7 @@ When using `--update`/`-U`, a release binary will only update to its current cha
|
||||||
You may also use `--update-to <repository>` (`<owner>/<repository>`) to update to a channel on a completely different repository. Be careful with what repository you are updating to though, there is no verification done for binaries from different repositories.
|
You may also use `--update-to <repository>` (`<owner>/<repository>`) to update to a channel on a completely different repository. Be careful with what repository you are updating to though, there is no verification done for binaries from different repositories.
|
||||||
|
|
||||||
Example usage:
|
Example usage:
|
||||||
|
|
||||||
* `yt-dlp --update-to master` switch to the `master` channel and update to its latest release
|
* `yt-dlp --update-to master` switch to the `master` channel and update to its latest release
|
||||||
* `yt-dlp --update-to stable@2023.07.06` upgrade/downgrade to release to `stable` channel tag `2023.07.06`
|
* `yt-dlp --update-to stable@2023.07.06` upgrade/downgrade to release to `stable` channel tag `2023.07.06`
|
||||||
* `yt-dlp --update-to 2023.10.07` upgrade/downgrade to tag `2023.10.07` if it exists on the current channel
|
* `yt-dlp --update-to 2023.10.07` upgrade/downgrade to tag `2023.10.07` if it exists on the current channel
|
||||||
|
@ -201,8 +202,8 @@ While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly
|
||||||
The following provide support for impersonating browser requests. This may be required for some sites that employ TLS fingerprinting.
|
The following provide support for impersonating browser requests. This may be required for some sites that employ TLS fingerprinting.
|
||||||
|
|
||||||
* [**curl_cffi**](https://github.com/yifeikong/curl_cffi) (recommended) - Python binding for [curl-impersonate](https://github.com/lwthiker/curl-impersonate). Provides impersonation targets for Chrome, Edge and Safari. Licensed under [MIT](https://github.com/yifeikong/curl_cffi/blob/main/LICENSE)
|
* [**curl_cffi**](https://github.com/yifeikong/curl_cffi) (recommended) - Python binding for [curl-impersonate](https://github.com/lwthiker/curl-impersonate). Provides impersonation targets for Chrome, Edge and Safari. Licensed under [MIT](https://github.com/yifeikong/curl_cffi/blob/main/LICENSE)
|
||||||
* Can be installed with the `curl_cffi` group, e.g. `pip install yt-dlp[default,curl_cffi]`
|
* Can be installed with the `curl-cffi` group, e.g. `pip install yt-dlp[default,curl-cffi]`
|
||||||
* Only included in `yt-dlp.exe`, `yt-dlp_macos` and `yt-dlp_macos_legacy` builds
|
* Currently only included in `yt-dlp.exe` and `yt-dlp_macos` builds
|
||||||
|
|
||||||
|
|
||||||
### Metadata
|
### Metadata
|
||||||
|
@ -481,6 +482,9 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
|
||||||
--max-downloads NUMBER Abort after downloading NUMBER files
|
--max-downloads NUMBER Abort after downloading NUMBER files
|
||||||
--break-on-existing Stop the download process when encountering
|
--break-on-existing Stop the download process when encountering
|
||||||
a file that is in the archive
|
a file that is in the archive
|
||||||
|
--no-break-on-existing Do not stop the download process when
|
||||||
|
encountering a file that is in the archive
|
||||||
|
(default)
|
||||||
--break-per-input Alters --max-downloads, --break-on-existing,
|
--break-per-input Alters --max-downloads, --break-on-existing,
|
||||||
--break-match-filter, and autonumber to
|
--break-match-filter, and autonumber to
|
||||||
reset per input URL
|
reset per input URL
|
||||||
|
@ -662,7 +666,7 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
|
||||||
The name of the browser to load cookies
|
The name of the browser to load cookies
|
||||||
from. Currently supported browsers are:
|
from. Currently supported browsers are:
|
||||||
brave, chrome, chromium, edge, firefox,
|
brave, chrome, chromium, edge, firefox,
|
||||||
opera, safari, vivaldi. Optionally, the
|
opera, safari, vivaldi, whale. Optionally, the
|
||||||
KEYRING used for decrypting Chromium cookies
|
KEYRING used for decrypting Chromium cookies
|
||||||
on Linux, the name/path of the PROFILE to
|
on Linux, the name/path of the PROFILE to
|
||||||
load cookies from, and the CONTAINER name
|
load cookies from, and the CONTAINER name
|
||||||
|
@ -754,6 +758,7 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
|
||||||
accessible under "progress" key. E.g.
|
accessible under "progress" key. E.g.
|
||||||
--console-title --progress-template
|
--console-title --progress-template
|
||||||
"download-title:%(info.id)s-%(progress.eta)s"
|
"download-title:%(info.id)s-%(progress.eta)s"
|
||||||
|
--progress-delta SECONDS Time between progress output (default: 0)
|
||||||
-v, --verbose Print various debugging information
|
-v, --verbose Print various debugging information
|
||||||
--dump-pages Print downloaded pages encoded using base64
|
--dump-pages Print downloaded pages encoded using base64
|
||||||
to debug problems (very verbose)
|
to debug problems (very verbose)
|
||||||
|
@ -1472,9 +1477,9 @@ The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `
|
||||||
- `width`: Width of the video, if known
|
- `width`: Width of the video, if known
|
||||||
- `height`: Height of the video, if known
|
- `height`: Height of the video, if known
|
||||||
- `aspect_ratio`: Aspect ratio of the video, if known
|
- `aspect_ratio`: Aspect ratio of the video, if known
|
||||||
- `tbr`: Average bitrate of audio and video in KBit/s
|
- `tbr`: Average bitrate of audio and video in [kbps](## "1000 bits/sec")
|
||||||
- `abr`: Average audio bitrate in KBit/s
|
- `abr`: Average audio bitrate in [kbps](## "1000 bits/sec")
|
||||||
- `vbr`: Average video bitrate in KBit/s
|
- `vbr`: Average video bitrate in [kbps](## "1000 bits/sec")
|
||||||
- `asr`: Audio sampling rate in Hertz
|
- `asr`: Audio sampling rate in Hertz
|
||||||
- `fps`: Frame rate
|
- `fps`: Frame rate
|
||||||
- `audio_channels`: The number of audio channels
|
- `audio_channels`: The number of audio channels
|
||||||
|
@ -1499,7 +1504,7 @@ Any string comparison may be prefixed with negation `!` in order to produce an o
|
||||||
|
|
||||||
**Note**: None of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the website. Any other field made available by the extractor can also be used for filtering.
|
**Note**: None of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the website. Any other field made available by the extractor can also be used for filtering.
|
||||||
|
|
||||||
Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "bv[height<=?720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. You can also use the filters with `all` to download all formats that satisfy the filter, e.g. `-f "all[vcodec=none]"` selects all audio-only formats.
|
Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "bv[height<=?720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 kbps. You can also use the filters with `all` to download all formats that satisfy the filter, e.g. `-f "all[vcodec=none]"` selects all audio-only formats.
|
||||||
|
|
||||||
Format selectors can also be grouped using parentheses; e.g. `-f "(mp4,webm)[height<480]"` will download the best pre-merged mp4 and webm formats with a height lower than 480.
|
Format selectors can also be grouped using parentheses; e.g. `-f "(mp4,webm)[height<480]"` will download the best pre-merged mp4 and webm formats with a height lower than 480.
|
||||||
|
|
||||||
|
@ -1531,10 +1536,10 @@ The available fields are:
|
||||||
- `fps`: Framerate of video
|
- `fps`: Framerate of video
|
||||||
- `hdr`: The dynamic range of the video (`DV` > `HDR12` > `HDR10+` > `HDR10` > `HLG` > `SDR`)
|
- `hdr`: The dynamic range of the video (`DV` > `HDR12` > `HDR10+` > `HDR10` > `HLG` > `SDR`)
|
||||||
- `channels`: The number of audio channels
|
- `channels`: The number of audio channels
|
||||||
- `tbr`: Total average bitrate in KBit/s
|
- `tbr`: Total average bitrate in [kbps](## "1000 bits/sec")
|
||||||
- `vbr`: Average video bitrate in KBit/s
|
- `vbr`: Average video bitrate in [kbps](## "1000 bits/sec")
|
||||||
- `abr`: Average audio bitrate in KBit/s
|
- `abr`: Average audio bitrate in [kbps](## "1000 bits/sec")
|
||||||
- `br`: Average bitrate in KBit/s, `tbr`/`vbr`/`abr`
|
- `br`: Average bitrate in [kbps](## "1000 bits/sec"), `tbr`/`vbr`/`abr`
|
||||||
- `asr`: Audio sample rate in Hz
|
- `asr`: Audio sample rate in Hz
|
||||||
|
|
||||||
**Deprecation warning**: Many of these fields have (currently undocumented) aliases, that may be removed in a future version. It is recommended to use only the documented field names.
|
**Deprecation warning**: Many of these fields have (currently undocumented) aliases, that may be removed in a future version. It is recommended to use only the documented field names.
|
||||||
|
@ -1755,7 +1760,7 @@ The following extractors use this feature:
|
||||||
#### youtube
|
#### youtube
|
||||||
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
|
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
|
||||||
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
|
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
|
||||||
* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb`, `mweb_embedscreen` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients.
|
* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb`, `mweb_embedscreen` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. The `android` clients will always be given lowest priority since their formats are broken. You can use `all` to use all the clients, and `default` for the default clients.
|
||||||
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
|
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
|
||||||
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
|
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
|
||||||
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
|
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
|
||||||
|
@ -1781,8 +1786,7 @@ The following extractors use this feature:
|
||||||
* `version`: The video version to extract - `uncut` or `simulcast`
|
* `version`: The video version to extract - `uncut` or `simulcast`
|
||||||
|
|
||||||
#### crunchyrollbeta (Crunchyroll)
|
#### crunchyrollbeta (Crunchyroll)
|
||||||
* `format`: Which stream type(s) to extract (default: `adaptive_hls`). Potentially useful values include `adaptive_hls`, `adaptive_dash`, `vo_adaptive_hls`, `vo_adaptive_dash`, `download_hls`, `download_dash`, `multitrack_adaptive_hls_v2`
|
* `hardsub`: One or more hardsub versions to extract (in order of preference), or `all` (default: `None` = no hardsubs will be extracted), e.g. `crunchyrollbeta:hardsub=en-US,de-DE`
|
||||||
* `hardsub`: Preference order for which hardsub versions to extract, or `all` (default: `None` = no hardsubs), e.g. `crunchyrollbeta:hardsub=en-US,None`
|
|
||||||
|
|
||||||
#### vikichannel
|
#### vikichannel
|
||||||
* `video_types`: Types of videos to download - one or more of `episodes`, `movies`, `clips`, `trailers`
|
* `video_types`: Types of videos to download - one or more of `episodes`, `movies`, `clips`, `trailers`
|
||||||
|
@ -1805,9 +1809,12 @@ The following extractors use this feature:
|
||||||
* `max_comments`: Maximum number of comments to extract - default is `120`
|
* `max_comments`: Maximum number of comments to extract - default is `120`
|
||||||
|
|
||||||
#### tiktok
|
#### tiktok
|
||||||
* `api_hostname`: Hostname to use for mobile API requests, e.g. `api-h2.tiktokv.com`
|
* `api_hostname`: Hostname to use for mobile API calls, e.g. `api22-normal-c-alisg.tiktokv.com`
|
||||||
* `app_version`: App version to call mobile APIs with - should be set along with `manifest_app_version`, e.g. `20.2.1`
|
* `app_name`: Default app name to use with mobile API calls, e.g. `trill`
|
||||||
* `manifest_app_version`: Numeric app version to call mobile APIs with, e.g. `221`
|
* `app_version`: Default app version to use with mobile API calls - should be set along with `manifest_app_version`, e.g. `34.1.2`
|
||||||
|
* `manifest_app_version`: Default numeric app version to use with mobile API calls, e.g. `2023401020`
|
||||||
|
* `aid`: Default app ID to use with mobile API calls, e.g. `1180`
|
||||||
|
* `app_info`: Enable mobile API extraction with one or more app info strings in the format of `<iid>/[app_name]/[app_version]/[manifest_app_version]/[aid]`, where `iid` is the unique app install ID. `iid` is the only required value; all other values and their `/` separators can be omitted, e.g. `tiktok:app_info=1234567890123456789` or `tiktok:app_info=123,456/trill///1180,789//34.0.1/340001`
|
||||||
|
|
||||||
#### rokfinchannel
|
#### rokfinchannel
|
||||||
* `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks`
|
* `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks`
|
||||||
|
@ -1830,6 +1837,9 @@ The following extractors use this feature:
|
||||||
#### jiosaavn
|
#### jiosaavn
|
||||||
* `bitrate`: Audio bitrates to request. One or more of `16`, `32`, `64`, `128`, `320`. Default is `128,320`
|
* `bitrate`: Audio bitrates to request. One or more of `16`, `32`, `64`, `128`, `320`. Default is `128,320`
|
||||||
|
|
||||||
|
#### afreecatvlive
|
||||||
|
* `cdn`: One or more CDN IDs to use with the API call for stream URLs, e.g. `gcp_cdn`, `gs_cdn_pc_app`, `gs_cdn_mobile_web`, `gs_cdn_pc_web`
|
||||||
|
|
||||||
**Note**: These options may be changed/removed in the future without concern for backward compatibility
|
**Note**: These options may be changed/removed in the future without concern for backward compatibility
|
||||||
|
|
||||||
<!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->
|
<!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE -->
|
||||||
|
@ -1887,6 +1897,7 @@ Plugins can be installed using various methods and locations.
|
||||||
|
|
||||||
|
|
||||||
`.zip`, `.egg` and `.whl` archives containing a `yt_dlp_plugins` namespace folder in their root are also supported as plugin packages.
|
`.zip`, `.egg` and `.whl` archives containing a `yt_dlp_plugins` namespace folder in their root are also supported as plugin packages.
|
||||||
|
|
||||||
* e.g. `${XDG_CONFIG_HOME}/yt-dlp/plugins/mypluginpkg.zip` where `mypluginpkg.zip` contains `yt_dlp_plugins/<type>/myplugin.py`
|
* e.g. `${XDG_CONFIG_HOME}/yt-dlp/plugins/mypluginpkg.zip` where `mypluginpkg.zip` contains `yt_dlp_plugins/<type>/myplugin.py`
|
||||||
|
|
||||||
Run yt-dlp with `--verbose` to check if the plugin has been loaded.
|
Run yt-dlp with `--verbose` to check if the plugin has been loaded.
|
||||||
|
|
10
bundle/docker/compose.yml
Normal file
10
bundle/docker/compose.yml
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
services:
|
||||||
|
static:
|
||||||
|
build: static
|
||||||
|
environment:
|
||||||
|
channel: ${channel}
|
||||||
|
origin: ${origin}
|
||||||
|
version: ${version}
|
||||||
|
volumes:
|
||||||
|
- ~/build:/build
|
||||||
|
- ../..:/yt-dlp
|
21
bundle/docker/static/Dockerfile
Normal file
21
bundle/docker/static/Dockerfile
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
FROM alpine:3.19 as base
|
||||||
|
|
||||||
|
RUN apk --update add --no-cache \
|
||||||
|
build-base \
|
||||||
|
python3 \
|
||||||
|
pipx \
|
||||||
|
;
|
||||||
|
|
||||||
|
RUN pipx install pyinstaller
|
||||||
|
# Requires above step to prepare the shared venv
|
||||||
|
RUN ~/.local/share/pipx/shared/bin/python -m pip install -U wheel
|
||||||
|
RUN apk --update add --no-cache \
|
||||||
|
scons \
|
||||||
|
patchelf \
|
||||||
|
binutils \
|
||||||
|
;
|
||||||
|
RUN pipx install staticx
|
||||||
|
|
||||||
|
WORKDIR /yt-dlp
|
||||||
|
COPY entrypoint.sh /entrypoint.sh
|
||||||
|
ENTRYPOINT /entrypoint.sh
|
13
bundle/docker/static/entrypoint.sh
Executable file
13
bundle/docker/static/entrypoint.sh
Executable file
|
@ -0,0 +1,13 @@
|
||||||
|
#!/bin/ash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
source ~/.local/share/pipx/venvs/pyinstaller/bin/activate
|
||||||
|
python -m devscripts.install_deps --include secretstorage
|
||||||
|
python -m devscripts.make_lazy_extractors
|
||||||
|
python devscripts/update-version.py -c "${channel}" -r "${origin}" "${version}"
|
||||||
|
python -m bundle.pyinstaller
|
||||||
|
deactivate
|
||||||
|
|
||||||
|
source ~/.local/share/pipx/venvs/staticx/bin/activate
|
||||||
|
staticx /yt-dlp/dist/yt-dlp_linux /build/yt-dlp_linux
|
||||||
|
deactivate
|
|
@ -126,5 +126,26 @@
|
||||||
"when": "4ce57d3b873c2887814cbec03d029533e82f7db5",
|
"when": "4ce57d3b873c2887814cbec03d029533e82f7db5",
|
||||||
"short": "[ie] Support multi-period MPD streams (#6654)",
|
"short": "[ie] Support multi-period MPD streams (#6654)",
|
||||||
"authors": ["alard", "pukkandan"]
|
"authors": ["alard", "pukkandan"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action": "change",
|
||||||
|
"when": "aa7e9ae4f48276bd5d0173966c77db9484f65a0a",
|
||||||
|
"short": "[ie/xvideos] Support new URL format (#9502)",
|
||||||
|
"authors": ["sta1us"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action": "remove",
|
||||||
|
"when": "22e4dfacb61f62dfbb3eb41b31c7b69ba1059b80"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action": "change",
|
||||||
|
"when": "e3a3ed8a981d9395c4859b6ef56cd02bc3148db2",
|
||||||
|
"short": "[cleanup:ie] No `from` stdlib imports in extractors",
|
||||||
|
"authors": ["pukkandan"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action": "add",
|
||||||
|
"when": "9590cc6b4768e190183d7d071a6c78170889116a",
|
||||||
|
"short": "[priority] Security: [[CVE-2024-22423](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2024-22423)] [Prevent RCE when using `--exec` with `%q` on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-hjq6-52gw-2g7p)\n - The shell escape function now properly escapes `%`, `\\` and `\\n`.\n - `utils.Popen` has been patched accordingly."
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
|
@ -10,6 +10,8 @@ import argparse
|
||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
from devscripts.tomlparse import parse_toml
|
from devscripts.tomlparse import parse_toml
|
||||||
from devscripts.utils import read_file
|
from devscripts.utils import read_file
|
||||||
|
|
||||||
|
@ -17,17 +19,23 @@ from devscripts.utils import read_file
|
||||||
def parse_args():
|
def parse_args():
|
||||||
parser = argparse.ArgumentParser(description='Install dependencies for yt-dlp')
|
parser = argparse.ArgumentParser(description='Install dependencies for yt-dlp')
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'input', nargs='?', metavar='TOMLFILE', default='pyproject.toml', help='Input file (default: %(default)s)')
|
'input', nargs='?', metavar='TOMLFILE', default=Path(__file__).parent.parent / 'pyproject.toml',
|
||||||
|
help='input file (default: %(default)s)')
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'-e', '--exclude', metavar='DEPENDENCY', action='append', help='Exclude a dependency')
|
'-e', '--exclude', metavar='DEPENDENCY', action='append',
|
||||||
|
help='exclude a dependency')
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'-i', '--include', metavar='GROUP', action='append', help='Include an optional dependency group')
|
'-i', '--include', metavar='GROUP', action='append',
|
||||||
|
help='include an optional dependency group')
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'-o', '--only-optional', action='store_true', help='Only install optional dependencies')
|
'-o', '--only-optional', action='store_true',
|
||||||
|
help='only install optional dependencies')
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'-p', '--print', action='store_true', help='Only print a requirements.txt to stdout')
|
'-p', '--print', action='store_true',
|
||||||
|
help='only print requirements to stdout')
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'-u', '--user', action='store_true', help='Install with pip as --user')
|
'-u', '--user', action='store_true',
|
||||||
|
help='install with pip as --user')
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
@ -37,24 +45,16 @@ def main():
|
||||||
optional_groups = project_table['optional-dependencies']
|
optional_groups = project_table['optional-dependencies']
|
||||||
excludes = args.exclude or []
|
excludes = args.exclude or []
|
||||||
|
|
||||||
deps = []
|
targets = []
|
||||||
if not args.only_optional: # `-o` should exclude 'dependencies' and the 'default' group
|
if not args.only_optional: # `-o` should exclude 'dependencies' and the 'default' group
|
||||||
deps.extend(project_table['dependencies'])
|
targets.extend(project_table['dependencies'])
|
||||||
if 'default' not in excludes: # `--exclude default` should exclude entire 'default' group
|
if 'default' not in excludes: # `--exclude default` should exclude entire 'default' group
|
||||||
deps.extend(optional_groups['default'])
|
targets.extend(optional_groups['default'])
|
||||||
|
|
||||||
def name(dependency):
|
|
||||||
return re.match(r'[\w-]+', dependency)[0].lower()
|
|
||||||
|
|
||||||
target_map = {name(dep): dep for dep in deps}
|
|
||||||
|
|
||||||
for include in filter(None, map(optional_groups.get, args.include or [])):
|
for include in filter(None, map(optional_groups.get, args.include or [])):
|
||||||
target_map.update(zip(map(name, include), include))
|
targets.extend(include)
|
||||||
|
|
||||||
for exclude in map(name, excludes):
|
targets = [t for t in targets if re.match(r'[\w-]+', t).group(0).lower() not in excludes]
|
||||||
target_map.pop(exclude, None)
|
|
||||||
|
|
||||||
targets = list(target_map.values())
|
|
||||||
|
|
||||||
if args.print:
|
if args.print:
|
||||||
for target in targets:
|
for target in targets:
|
||||||
|
|
|
@ -43,6 +43,27 @@ def filter_excluded_sections(readme):
|
||||||
'', readme)
|
'', readme)
|
||||||
|
|
||||||
|
|
||||||
|
def _convert_code_blocks(readme):
|
||||||
|
current_code_block = None
|
||||||
|
|
||||||
|
for line in readme.splitlines(True):
|
||||||
|
if current_code_block:
|
||||||
|
if line == current_code_block:
|
||||||
|
current_code_block = None
|
||||||
|
yield '\n'
|
||||||
|
else:
|
||||||
|
yield f' {line}'
|
||||||
|
elif line.startswith('```'):
|
||||||
|
current_code_block = line.count('`') * '`' + '\n'
|
||||||
|
yield '\n'
|
||||||
|
else:
|
||||||
|
yield line
|
||||||
|
|
||||||
|
|
||||||
|
def convert_code_blocks(readme):
|
||||||
|
return ''.join(_convert_code_blocks(readme))
|
||||||
|
|
||||||
|
|
||||||
def move_sections(readme):
|
def move_sections(readme):
|
||||||
MOVE_TAG_TEMPLATE = '<!-- MANPAGE: MOVE "%s" SECTION HERE -->'
|
MOVE_TAG_TEMPLATE = '<!-- MANPAGE: MOVE "%s" SECTION HERE -->'
|
||||||
sections = re.findall(r'(?m)^%s$' % (
|
sections = re.findall(r'(?m)^%s$' % (
|
||||||
|
@ -65,8 +86,10 @@ def move_sections(readme):
|
||||||
|
|
||||||
def filter_options(readme):
|
def filter_options(readme):
|
||||||
section = re.search(r'(?sm)^# USAGE AND OPTIONS\n.+?(?=^# )', readme).group(0)
|
section = re.search(r'(?sm)^# USAGE AND OPTIONS\n.+?(?=^# )', readme).group(0)
|
||||||
|
section_new = section.replace('*', R'\*')
|
||||||
|
|
||||||
options = '# OPTIONS\n'
|
options = '# OPTIONS\n'
|
||||||
for line in section.split('\n')[1:]:
|
for line in section_new.split('\n')[1:]:
|
||||||
mobj = re.fullmatch(r'''(?x)
|
mobj = re.fullmatch(r'''(?x)
|
||||||
\s{4}(?P<opt>-(?:,\s|[^\s])+)
|
\s{4}(?P<opt>-(?:,\s|[^\s])+)
|
||||||
(?:\s(?P<meta>(?:[^\s]|\s(?!\s))+))?
|
(?:\s(?P<meta>(?:[^\s]|\s(?!\s))+))?
|
||||||
|
@ -86,7 +109,7 @@ def filter_options(readme):
|
||||||
return readme.replace(section, options, 1)
|
return readme.replace(section, options, 1)
|
||||||
|
|
||||||
|
|
||||||
TRANSFORM = compose_functions(filter_excluded_sections, move_sections, filter_options)
|
TRANSFORM = compose_functions(filter_excluded_sections, convert_code_blocks, move_sections, filter_options)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|
|
@ -11,7 +11,7 @@ IMPORTANT: INVALID FILES OR MULTILINE STRINGS ARE NOT SUPPORTED!
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import datetime
|
import datetime as dt
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
@ -115,9 +115,9 @@ def parse_value(data: str, index: int):
|
||||||
for func in [
|
for func in [
|
||||||
int,
|
int,
|
||||||
float,
|
float,
|
||||||
datetime.time.fromisoformat,
|
dt.time.fromisoformat,
|
||||||
datetime.date.fromisoformat,
|
dt.date.fromisoformat,
|
||||||
datetime.datetime.fromisoformat,
|
dt.datetime.fromisoformat,
|
||||||
{'true': True, 'false': False}.get,
|
{'true': True, 'false': False}.get,
|
||||||
]:
|
]:
|
||||||
try:
|
try:
|
||||||
|
@ -179,7 +179,7 @@ def main():
|
||||||
data = file.read()
|
data = file.read()
|
||||||
|
|
||||||
def default(obj):
|
def default(obj):
|
||||||
if isinstance(obj, (datetime.date, datetime.time, datetime.datetime)):
|
if isinstance(obj, (dt.date, dt.time, dt.datetime)):
|
||||||
return obj.isoformat()
|
return obj.isoformat()
|
||||||
|
|
||||||
print(json.dumps(parse_toml(data), default=default))
|
print(json.dumps(parse_toml(data), default=default))
|
||||||
|
|
|
@ -9,15 +9,15 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import contextlib
|
import contextlib
|
||||||
|
import datetime as dt
|
||||||
import sys
|
import sys
|
||||||
from datetime import datetime, timezone
|
|
||||||
|
|
||||||
from devscripts.utils import read_version, run_process, write_file
|
from devscripts.utils import read_version, run_process, write_file
|
||||||
|
|
||||||
|
|
||||||
def get_new_version(version, revision):
|
def get_new_version(version, revision):
|
||||||
if not version:
|
if not version:
|
||||||
version = datetime.now(timezone.utc).strftime('%Y.%m.%d')
|
version = dt.datetime.now(dt.timezone.utc).strftime('%Y.%m.%d')
|
||||||
|
|
||||||
if revision:
|
if revision:
|
||||||
assert revision.isdecimal(), 'Revision must be a number'
|
assert revision.isdecimal(), 'Revision must be a number'
|
||||||
|
|
|
@ -53,7 +53,7 @@ dependencies = [
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
default = []
|
default = []
|
||||||
curl_cffi = ["curl-cffi==0.5.10; implementation_name=='cpython'"]
|
curl-cffi = ["curl-cffi==0.5.10; implementation_name=='cpython'"]
|
||||||
secretstorage = [
|
secretstorage = [
|
||||||
"cffi",
|
"cffi",
|
||||||
"secretstorage",
|
"secretstorage",
|
||||||
|
@ -69,8 +69,10 @@ dev = [
|
||||||
"isort",
|
"isort",
|
||||||
"pytest",
|
"pytest",
|
||||||
]
|
]
|
||||||
pyinstaller = ["pyinstaller>=6.3"]
|
pyinstaller = [
|
||||||
pyinstaller_macos = ["pyinstaller==5.13.2"] # needed for curl_cffi builds
|
"pyinstaller>=6.3; sys_platform!='darwin'",
|
||||||
|
"pyinstaller==5.13.2; sys_platform=='darwin'", # needed for curl_cffi
|
||||||
|
]
|
||||||
py2exe = ["py2exe>=0.12"]
|
py2exe = ["py2exe>=0.12"]
|
||||||
|
|
||||||
[project.urls]
|
[project.urls]
|
||||||
|
|
|
@ -47,7 +47,7 @@
|
||||||
- **aenetworks:show**
|
- **aenetworks:show**
|
||||||
- **AeonCo**
|
- **AeonCo**
|
||||||
- **afreecatv**: [*afreecatv*](## "netrc machine") afreecatv.com
|
- **afreecatv**: [*afreecatv*](## "netrc machine") afreecatv.com
|
||||||
- **afreecatv:live**: [*afreecatv*](## "netrc machine") afreecatv.com
|
- **afreecatv:live**: [*afreecatv*](## "netrc machine") afreecatv.com livestreams
|
||||||
- **afreecatv:user**
|
- **afreecatv:user**
|
||||||
- **AirTV**
|
- **AirTV**
|
||||||
- **AitubeKZVideo**
|
- **AitubeKZVideo**
|
||||||
|
@ -105,6 +105,7 @@
|
||||||
- **ArteTVPlaylist**
|
- **ArteTVPlaylist**
|
||||||
- **asobichannel**: ASOBI CHANNEL
|
- **asobichannel**: ASOBI CHANNEL
|
||||||
- **asobichannel:tag**: ASOBI CHANNEL
|
- **asobichannel:tag**: ASOBI CHANNEL
|
||||||
|
- **AsobiStage**: ASOBISTAGE (アソビステージ)
|
||||||
- **AtresPlayer**: [*atresplayer*](## "netrc machine")
|
- **AtresPlayer**: [*atresplayer*](## "netrc machine")
|
||||||
- **AtScaleConfEvent**
|
- **AtScaleConfEvent**
|
||||||
- **ATVAt**
|
- **ATVAt**
|
||||||
|
@ -436,6 +437,7 @@
|
||||||
- **FacebookPluginsVideo**
|
- **FacebookPluginsVideo**
|
||||||
- **fancode:live**: [*fancode*](## "netrc machine") (**Currently broken**)
|
- **fancode:live**: [*fancode*](## "netrc machine") (**Currently broken**)
|
||||||
- **fancode:vod**: [*fancode*](## "netrc machine") (**Currently broken**)
|
- **fancode:vod**: [*fancode*](## "netrc machine") (**Currently broken**)
|
||||||
|
- **Fathom**
|
||||||
- **faz.net**
|
- **faz.net**
|
||||||
- **fc2**: [*fc2*](## "netrc machine")
|
- **fc2**: [*fc2*](## "netrc machine")
|
||||||
- **fc2:embed**
|
- **fc2:embed**
|
||||||
|
@ -633,8 +635,9 @@
|
||||||
- **Jamendo**
|
- **Jamendo**
|
||||||
- **JamendoAlbum**
|
- **JamendoAlbum**
|
||||||
- **JeuxVideo**: (**Currently broken**)
|
- **JeuxVideo**: (**Currently broken**)
|
||||||
- **JioSaavnAlbum**
|
- **jiosaavn:album**
|
||||||
- **JioSaavnSong**
|
- **jiosaavn:playlist**
|
||||||
|
- **jiosaavn:song**
|
||||||
- **Joj**
|
- **Joj**
|
||||||
- **JoqrAg**: 超!A&G+ 文化放送 (f.k.a. AGQR) Nippon Cultural Broadcasting, Inc. (JOQR)
|
- **JoqrAg**: 超!A&G+ 文化放送 (f.k.a. AGQR) Nippon Cultural Broadcasting, Inc. (JOQR)
|
||||||
- **Jove**
|
- **Jove**
|
||||||
|
@ -716,6 +719,8 @@
|
||||||
- **Lnk**
|
- **Lnk**
|
||||||
- **LnkGo**
|
- **LnkGo**
|
||||||
- **loc**: Library of Congress
|
- **loc**: Library of Congress
|
||||||
|
- **loom**
|
||||||
|
- **loom:folder**
|
||||||
- **LoveHomePorn**
|
- **LoveHomePorn**
|
||||||
- **LRTStream**
|
- **LRTStream**
|
||||||
- **LRTVOD**
|
- **LRTVOD**
|
||||||
|
@ -1136,6 +1141,7 @@
|
||||||
- **Radiko**
|
- **Radiko**
|
||||||
- **RadikoRadio**
|
- **RadikoRadio**
|
||||||
- **radio.de**: (**Currently broken**)
|
- **radio.de**: (**Currently broken**)
|
||||||
|
- **Radio1Be**
|
||||||
- **radiocanada**
|
- **radiocanada**
|
||||||
- **radiocanada:audiovideo**
|
- **radiocanada:audiovideo**
|
||||||
- **RadioComercial**
|
- **RadioComercial**
|
||||||
|
@ -1288,6 +1294,7 @@
|
||||||
- **SeznamZpravyArticle**
|
- **SeznamZpravyArticle**
|
||||||
- **Shahid**: [*shahid*](## "netrc machine")
|
- **Shahid**: [*shahid*](## "netrc machine")
|
||||||
- **ShahidShow**
|
- **ShahidShow**
|
||||||
|
- **SharePoint**
|
||||||
- **ShareVideosEmbed**
|
- **ShareVideosEmbed**
|
||||||
- **ShemarooMe**
|
- **ShemarooMe**
|
||||||
- **ShowRoomLive**
|
- **ShowRoomLive**
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
import functools
|
|
||||||
import inspect
|
import inspect
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
@ -10,7 +9,9 @@ from yt_dlp.utils._utils import _YDLLogger as FakeLogger
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def handler(request):
|
def handler(request):
|
||||||
RH_KEY = request.param
|
RH_KEY = getattr(request, 'param', None)
|
||||||
|
if not RH_KEY:
|
||||||
|
return
|
||||||
if inspect.isclass(RH_KEY) and issubclass(RH_KEY, RequestHandler):
|
if inspect.isclass(RH_KEY) and issubclass(RH_KEY, RequestHandler):
|
||||||
handler = RH_KEY
|
handler = RH_KEY
|
||||||
elif RH_KEY in _REQUEST_HANDLERS:
|
elif RH_KEY in _REQUEST_HANDLERS:
|
||||||
|
@ -18,9 +19,46 @@ def handler(request):
|
||||||
else:
|
else:
|
||||||
pytest.skip(f'{RH_KEY} request handler is not available')
|
pytest.skip(f'{RH_KEY} request handler is not available')
|
||||||
|
|
||||||
return functools.partial(handler, logger=FakeLogger)
|
class HandlerWrapper(handler):
|
||||||
|
RH_KEY = handler.RH_KEY
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(logger=FakeLogger, *args, **kwargs)
|
||||||
|
|
||||||
|
return HandlerWrapper
|
||||||
|
|
||||||
|
|
||||||
def validate_and_send(rh, req):
|
@pytest.fixture(autouse=True)
|
||||||
rh.validate(req)
|
def skip_handler(request, handler):
|
||||||
return rh.send(req)
|
"""usage: pytest.mark.skip_handler('my_handler', 'reason')"""
|
||||||
|
for marker in request.node.iter_markers('skip_handler'):
|
||||||
|
if marker.args[0] == handler.RH_KEY:
|
||||||
|
pytest.skip(marker.args[1] if len(marker.args) > 1 else '')
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def skip_handler_if(request, handler):
|
||||||
|
"""usage: pytest.mark.skip_handler_if('my_handler', lambda request: True, 'reason')"""
|
||||||
|
for marker in request.node.iter_markers('skip_handler_if'):
|
||||||
|
if marker.args[0] == handler.RH_KEY and marker.args[1](request):
|
||||||
|
pytest.skip(marker.args[2] if len(marker.args) > 2 else '')
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def skip_handlers_if(request, handler):
|
||||||
|
"""usage: pytest.mark.skip_handlers_if(lambda request, handler: True, 'reason')"""
|
||||||
|
for marker in request.node.iter_markers('skip_handlers_if'):
|
||||||
|
if handler and marker.args[0](request, handler):
|
||||||
|
pytest.skip(marker.args[1] if len(marker.args) > 1 else '')
|
||||||
|
|
||||||
|
|
||||||
|
def pytest_configure(config):
|
||||||
|
config.addinivalue_line(
|
||||||
|
"markers", "skip_handler(handler): skip test for the given handler",
|
||||||
|
)
|
||||||
|
config.addinivalue_line(
|
||||||
|
"markers", "skip_handler_if(handler): skip test for the given handler if condition is true"
|
||||||
|
)
|
||||||
|
config.addinivalue_line(
|
||||||
|
"markers", "skip_handlers_if(handler): skip test for handlers when the condition is true"
|
||||||
|
)
|
||||||
|
|
|
@ -338,3 +338,8 @@ def http_server_port(httpd):
|
||||||
def verify_address_availability(address):
|
def verify_address_availability(address):
|
||||||
if find_available_port(address) is None:
|
if find_available_port(address) is None:
|
||||||
pytest.skip(f'Unable to bind to source address {address} (address may not exist)')
|
pytest.skip(f'Unable to bind to source address {address} (address may not exist)')
|
||||||
|
|
||||||
|
|
||||||
|
def validate_and_send(rh, req):
|
||||||
|
rh.validate(req)
|
||||||
|
return rh.send(req)
|
||||||
|
|
|
@ -1906,6 +1906,15 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||||
expected_status=TEAPOT_RESPONSE_STATUS)
|
expected_status=TEAPOT_RESPONSE_STATUS)
|
||||||
self.assertEqual(content, TEAPOT_RESPONSE_BODY)
|
self.assertEqual(content, TEAPOT_RESPONSE_BODY)
|
||||||
|
|
||||||
|
def test_search_nextjs_data(self):
|
||||||
|
data = '<script id="__NEXT_DATA__" type="application/json">{"props":{}}</script>'
|
||||||
|
self.assertEqual(self.ie._search_nextjs_data(data, None), {'props': {}})
|
||||||
|
self.assertEqual(self.ie._search_nextjs_data('', None, fatal=False), {})
|
||||||
|
self.assertEqual(self.ie._search_nextjs_data('', None, default=None), None)
|
||||||
|
self.assertEqual(self.ie._search_nextjs_data('', None, default={}), {})
|
||||||
|
with self.assertRaises(DeprecationWarning):
|
||||||
|
self.assertEqual(self.ie._search_nextjs_data('', None, default='{}'), {})
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -183,7 +183,7 @@ class TestFormatSelection(unittest.TestCase):
|
||||||
]
|
]
|
||||||
|
|
||||||
info_dict = _make_result(formats)
|
info_dict = _make_result(formats)
|
||||||
ydl = YDL({'format': 'best'})
|
ydl = YDL({'format': 'best', 'format_sort': ['abr', 'ext']})
|
||||||
ydl.sort_formats(info_dict)
|
ydl.sort_formats(info_dict)
|
||||||
ydl.process_ie_result(copy.deepcopy(info_dict))
|
ydl.process_ie_result(copy.deepcopy(info_dict))
|
||||||
downloaded = ydl.downloaded_info_dicts[0]
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
@ -195,7 +195,7 @@ class TestFormatSelection(unittest.TestCase):
|
||||||
downloaded = ydl.downloaded_info_dicts[0]
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
self.assertEqual(downloaded['format_id'], 'mp3-64')
|
self.assertEqual(downloaded['format_id'], 'mp3-64')
|
||||||
|
|
||||||
ydl = YDL({'prefer_free_formats': True})
|
ydl = YDL({'prefer_free_formats': True, 'format_sort': ['abr', 'ext']})
|
||||||
ydl.sort_formats(info_dict)
|
ydl.sort_formats(info_dict)
|
||||||
ydl.process_ie_result(copy.deepcopy(info_dict))
|
ydl.process_ie_result(copy.deepcopy(info_dict))
|
||||||
downloaded = ydl.downloaded_info_dicts[0]
|
downloaded = ydl.downloaded_info_dicts[0]
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
|
import datetime as dt
|
||||||
import unittest
|
import unittest
|
||||||
from datetime import datetime, timezone
|
|
||||||
|
|
||||||
from yt_dlp import cookies
|
from yt_dlp import cookies
|
||||||
from yt_dlp.cookies import (
|
from yt_dlp.cookies import (
|
||||||
|
@ -138,7 +138,7 @@ class TestCookies(unittest.TestCase):
|
||||||
self.assertEqual(cookie.name, 'foo')
|
self.assertEqual(cookie.name, 'foo')
|
||||||
self.assertEqual(cookie.value, 'test%20%3Bcookie')
|
self.assertEqual(cookie.value, 'test%20%3Bcookie')
|
||||||
self.assertFalse(cookie.secure)
|
self.assertFalse(cookie.secure)
|
||||||
expected_expiration = datetime(2021, 6, 18, 21, 39, 19, tzinfo=timezone.utc)
|
expected_expiration = dt.datetime(2021, 6, 18, 21, 39, 19, tzinfo=dt.timezone.utc)
|
||||||
self.assertEqual(cookie.expires, int(expected_expiration.timestamp()))
|
self.assertEqual(cookie.expires, int(expected_expiration.timestamp()))
|
||||||
|
|
||||||
def test_pbkdf2_sha1(self):
|
def test_pbkdf2_sha1(self):
|
||||||
|
|
379
test/test_http_proxy.py
Normal file
379
test/test_http_proxy.py
Normal file
|
@ -0,0 +1,379 @@
|
||||||
|
import abc
|
||||||
|
import base64
|
||||||
|
import contextlib
|
||||||
|
import functools
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import random
|
||||||
|
import ssl
|
||||||
|
import threading
|
||||||
|
from http.server import BaseHTTPRequestHandler
|
||||||
|
from socketserver import ThreadingTCPServer
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from test.helper import http_server_port, verify_address_availability
|
||||||
|
from test.test_networking import TEST_DIR
|
||||||
|
from test.test_socks import IPv6ThreadingTCPServer
|
||||||
|
from yt_dlp.dependencies import urllib3
|
||||||
|
from yt_dlp.networking import Request
|
||||||
|
from yt_dlp.networking.exceptions import HTTPError, ProxyError, SSLError
|
||||||
|
|
||||||
|
|
||||||
|
class HTTPProxyAuthMixin:
|
||||||
|
|
||||||
|
def proxy_auth_error(self):
|
||||||
|
self.send_response(407)
|
||||||
|
self.send_header('Proxy-Authenticate', 'Basic realm="test http proxy"')
|
||||||
|
self.end_headers()
|
||||||
|
return False
|
||||||
|
|
||||||
|
def do_proxy_auth(self, username, password):
|
||||||
|
if username is None and password is None:
|
||||||
|
return True
|
||||||
|
|
||||||
|
proxy_auth_header = self.headers.get('Proxy-Authorization', None)
|
||||||
|
if proxy_auth_header is None:
|
||||||
|
return self.proxy_auth_error()
|
||||||
|
|
||||||
|
if not proxy_auth_header.startswith('Basic '):
|
||||||
|
return self.proxy_auth_error()
|
||||||
|
|
||||||
|
auth = proxy_auth_header[6:]
|
||||||
|
|
||||||
|
try:
|
||||||
|
auth_username, auth_password = base64.b64decode(auth).decode().split(':', 1)
|
||||||
|
except Exception:
|
||||||
|
return self.proxy_auth_error()
|
||||||
|
|
||||||
|
if auth_username != (username or '') or auth_password != (password or ''):
|
||||||
|
return self.proxy_auth_error()
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
class HTTPProxyHandler(BaseHTTPRequestHandler, HTTPProxyAuthMixin):
|
||||||
|
def __init__(self, *args, proxy_info=None, username=None, password=None, request_handler=None, **kwargs):
|
||||||
|
self.username = username
|
||||||
|
self.password = password
|
||||||
|
self.proxy_info = proxy_info
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
def do_GET(self):
|
||||||
|
if not self.do_proxy_auth(self.username, self.password):
|
||||||
|
self.server.close_request(self.request)
|
||||||
|
return
|
||||||
|
if self.path.endswith('/proxy_info'):
|
||||||
|
payload = json.dumps(self.proxy_info or {
|
||||||
|
'client_address': self.client_address,
|
||||||
|
'connect': False,
|
||||||
|
'connect_host': None,
|
||||||
|
'connect_port': None,
|
||||||
|
'headers': dict(self.headers),
|
||||||
|
'path': self.path,
|
||||||
|
'proxy': ':'.join(str(y) for y in self.connection.getsockname()),
|
||||||
|
})
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header('Content-Type', 'application/json; charset=utf-8')
|
||||||
|
self.send_header('Content-Length', str(len(payload)))
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(payload.encode())
|
||||||
|
else:
|
||||||
|
self.send_response(404)
|
||||||
|
self.end_headers()
|
||||||
|
|
||||||
|
self.server.close_request(self.request)
|
||||||
|
|
||||||
|
|
||||||
|
if urllib3:
|
||||||
|
import urllib3.util.ssltransport
|
||||||
|
|
||||||
|
class SSLTransport(urllib3.util.ssltransport.SSLTransport):
|
||||||
|
"""
|
||||||
|
Modified version of urllib3 SSLTransport to support server side SSL
|
||||||
|
|
||||||
|
This allows us to chain multiple TLS connections.
|
||||||
|
"""
|
||||||
|
def __init__(self, socket, ssl_context, server_hostname=None, suppress_ragged_eofs=True, server_side=False):
|
||||||
|
self.incoming = ssl.MemoryBIO()
|
||||||
|
self.outgoing = ssl.MemoryBIO()
|
||||||
|
|
||||||
|
self.suppress_ragged_eofs = suppress_ragged_eofs
|
||||||
|
self.socket = socket
|
||||||
|
|
||||||
|
self.sslobj = ssl_context.wrap_bio(
|
||||||
|
self.incoming,
|
||||||
|
self.outgoing,
|
||||||
|
server_hostname=server_hostname,
|
||||||
|
server_side=server_side
|
||||||
|
)
|
||||||
|
self._ssl_io_loop(self.sslobj.do_handshake)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _io_refs(self):
|
||||||
|
return self.socket._io_refs
|
||||||
|
|
||||||
|
@_io_refs.setter
|
||||||
|
def _io_refs(self, value):
|
||||||
|
self.socket._io_refs = value
|
||||||
|
|
||||||
|
def shutdown(self, *args, **kwargs):
|
||||||
|
self.socket.shutdown(*args, **kwargs)
|
||||||
|
else:
|
||||||
|
SSLTransport = None
|
||||||
|
|
||||||
|
|
||||||
|
class HTTPSProxyHandler(HTTPProxyHandler):
|
||||||
|
def __init__(self, request, *args, **kwargs):
|
||||||
|
certfn = os.path.join(TEST_DIR, 'testcert.pem')
|
||||||
|
sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
|
||||||
|
sslctx.load_cert_chain(certfn, None)
|
||||||
|
if isinstance(request, ssl.SSLSocket):
|
||||||
|
request = SSLTransport(request, ssl_context=sslctx, server_side=True)
|
||||||
|
else:
|
||||||
|
request = sslctx.wrap_socket(request, server_side=True)
|
||||||
|
super().__init__(request, *args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
class HTTPConnectProxyHandler(BaseHTTPRequestHandler, HTTPProxyAuthMixin):
|
||||||
|
protocol_version = 'HTTP/1.1'
|
||||||
|
default_request_version = 'HTTP/1.1'
|
||||||
|
|
||||||
|
def __init__(self, *args, username=None, password=None, request_handler=None, **kwargs):
|
||||||
|
self.username = username
|
||||||
|
self.password = password
|
||||||
|
self.request_handler = request_handler
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
def do_CONNECT(self):
|
||||||
|
if not self.do_proxy_auth(self.username, self.password):
|
||||||
|
self.server.close_request(self.request)
|
||||||
|
return
|
||||||
|
self.send_response(200)
|
||||||
|
self.end_headers()
|
||||||
|
proxy_info = {
|
||||||
|
'client_address': self.client_address,
|
||||||
|
'connect': True,
|
||||||
|
'connect_host': self.path.split(':')[0],
|
||||||
|
'connect_port': int(self.path.split(':')[1]),
|
||||||
|
'headers': dict(self.headers),
|
||||||
|
'path': self.path,
|
||||||
|
'proxy': ':'.join(str(y) for y in self.connection.getsockname()),
|
||||||
|
}
|
||||||
|
self.request_handler(self.request, self.client_address, self.server, proxy_info=proxy_info)
|
||||||
|
self.server.close_request(self.request)
|
||||||
|
|
||||||
|
|
||||||
|
class HTTPSConnectProxyHandler(HTTPConnectProxyHandler):
|
||||||
|
def __init__(self, request, *args, **kwargs):
|
||||||
|
certfn = os.path.join(TEST_DIR, 'testcert.pem')
|
||||||
|
sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
|
||||||
|
sslctx.load_cert_chain(certfn, None)
|
||||||
|
request = sslctx.wrap_socket(request, server_side=True)
|
||||||
|
self._original_request = request
|
||||||
|
super().__init__(request, *args, **kwargs)
|
||||||
|
|
||||||
|
def do_CONNECT(self):
|
||||||
|
super().do_CONNECT()
|
||||||
|
self.server.close_request(self._original_request)
|
||||||
|
|
||||||
|
|
||||||
|
@contextlib.contextmanager
|
||||||
|
def proxy_server(proxy_server_class, request_handler, bind_ip=None, **proxy_server_kwargs):
|
||||||
|
server = server_thread = None
|
||||||
|
try:
|
||||||
|
bind_address = bind_ip or '127.0.0.1'
|
||||||
|
server_type = ThreadingTCPServer if '.' in bind_address else IPv6ThreadingTCPServer
|
||||||
|
server = server_type(
|
||||||
|
(bind_address, 0), functools.partial(proxy_server_class, request_handler=request_handler, **proxy_server_kwargs))
|
||||||
|
server_port = http_server_port(server)
|
||||||
|
server_thread = threading.Thread(target=server.serve_forever)
|
||||||
|
server_thread.daemon = True
|
||||||
|
server_thread.start()
|
||||||
|
if '.' not in bind_address:
|
||||||
|
yield f'[{bind_address}]:{server_port}'
|
||||||
|
else:
|
||||||
|
yield f'{bind_address}:{server_port}'
|
||||||
|
finally:
|
||||||
|
server.shutdown()
|
||||||
|
server.server_close()
|
||||||
|
server_thread.join(2.0)
|
||||||
|
|
||||||
|
|
||||||
|
class HTTPProxyTestContext(abc.ABC):
|
||||||
|
REQUEST_HANDLER_CLASS = None
|
||||||
|
REQUEST_PROTO = None
|
||||||
|
|
||||||
|
def http_server(self, server_class, *args, **kwargs):
|
||||||
|
return proxy_server(server_class, self.REQUEST_HANDLER_CLASS, *args, **kwargs)
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
|
def proxy_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs) -> dict:
|
||||||
|
"""return a dict of proxy_info"""
|
||||||
|
|
||||||
|
|
||||||
|
class HTTPProxyHTTPTestContext(HTTPProxyTestContext):
|
||||||
|
# Standard HTTP Proxy for http requests
|
||||||
|
REQUEST_HANDLER_CLASS = HTTPProxyHandler
|
||||||
|
REQUEST_PROTO = 'http'
|
||||||
|
|
||||||
|
def proxy_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs):
|
||||||
|
request = Request(f'http://{target_domain or "127.0.0.1"}:{target_port or "40000"}/proxy_info', **req_kwargs)
|
||||||
|
handler.validate(request)
|
||||||
|
return json.loads(handler.send(request).read().decode())
|
||||||
|
|
||||||
|
|
||||||
|
class HTTPProxyHTTPSTestContext(HTTPProxyTestContext):
|
||||||
|
# HTTP Connect proxy, for https requests
|
||||||
|
REQUEST_HANDLER_CLASS = HTTPSProxyHandler
|
||||||
|
REQUEST_PROTO = 'https'
|
||||||
|
|
||||||
|
def proxy_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs):
|
||||||
|
request = Request(f'https://{target_domain or "127.0.0.1"}:{target_port or "40000"}/proxy_info', **req_kwargs)
|
||||||
|
handler.validate(request)
|
||||||
|
return json.loads(handler.send(request).read().decode())
|
||||||
|
|
||||||
|
|
||||||
|
CTX_MAP = {
|
||||||
|
'http': HTTPProxyHTTPTestContext,
|
||||||
|
'https': HTTPProxyHTTPSTestContext,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope='module')
|
||||||
|
def ctx(request):
|
||||||
|
return CTX_MAP[request.param]()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
'handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||||
|
@pytest.mark.parametrize('ctx', ['http'], indirect=True) # pure http proxy can only support http
|
||||||
|
class TestHTTPProxy:
|
||||||
|
def test_http_no_auth(self, handler, ctx):
|
||||||
|
with ctx.http_server(HTTPProxyHandler) as server_address:
|
||||||
|
with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}) as rh:
|
||||||
|
proxy_info = ctx.proxy_info_request(rh)
|
||||||
|
assert proxy_info['proxy'] == server_address
|
||||||
|
assert proxy_info['connect'] is False
|
||||||
|
assert 'Proxy-Authorization' not in proxy_info['headers']
|
||||||
|
|
||||||
|
def test_http_auth(self, handler, ctx):
|
||||||
|
with ctx.http_server(HTTPProxyHandler, username='test', password='test') as server_address:
|
||||||
|
with handler(proxies={ctx.REQUEST_PROTO: f'http://test:test@{server_address}'}) as rh:
|
||||||
|
proxy_info = ctx.proxy_info_request(rh)
|
||||||
|
assert proxy_info['proxy'] == server_address
|
||||||
|
assert 'Proxy-Authorization' in proxy_info['headers']
|
||||||
|
|
||||||
|
def test_http_bad_auth(self, handler, ctx):
|
||||||
|
with ctx.http_server(HTTPProxyHandler, username='test', password='test') as server_address:
|
||||||
|
with handler(proxies={ctx.REQUEST_PROTO: f'http://test:bad@{server_address}'}) as rh:
|
||||||
|
with pytest.raises(HTTPError) as exc_info:
|
||||||
|
ctx.proxy_info_request(rh)
|
||||||
|
assert exc_info.value.response.status == 407
|
||||||
|
exc_info.value.response.close()
|
||||||
|
|
||||||
|
def test_http_source_address(self, handler, ctx):
|
||||||
|
with ctx.http_server(HTTPProxyHandler) as server_address:
|
||||||
|
source_address = f'127.0.0.{random.randint(5, 255)}'
|
||||||
|
verify_address_availability(source_address)
|
||||||
|
with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'},
|
||||||
|
source_address=source_address) as rh:
|
||||||
|
proxy_info = ctx.proxy_info_request(rh)
|
||||||
|
assert proxy_info['proxy'] == server_address
|
||||||
|
assert proxy_info['client_address'][0] == source_address
|
||||||
|
|
||||||
|
@pytest.mark.skip_handler('Urllib', 'urllib does not support https proxies')
|
||||||
|
def test_https(self, handler, ctx):
|
||||||
|
with ctx.http_server(HTTPSProxyHandler) as server_address:
|
||||||
|
with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh:
|
||||||
|
proxy_info = ctx.proxy_info_request(rh)
|
||||||
|
assert proxy_info['proxy'] == server_address
|
||||||
|
assert proxy_info['connect'] is False
|
||||||
|
assert 'Proxy-Authorization' not in proxy_info['headers']
|
||||||
|
|
||||||
|
@pytest.mark.skip_handler('Urllib', 'urllib does not support https proxies')
|
||||||
|
def test_https_verify_failed(self, handler, ctx):
|
||||||
|
with ctx.http_server(HTTPSProxyHandler) as server_address:
|
||||||
|
with handler(verify=True, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh:
|
||||||
|
# Accept SSLError as may not be feasible to tell if it is proxy or request error.
|
||||||
|
# note: if request proto also does ssl verification, this may also be the error of the request.
|
||||||
|
# Until we can support passing custom cacerts to handlers, we cannot properly test this for all cases.
|
||||||
|
with pytest.raises((ProxyError, SSLError)):
|
||||||
|
ctx.proxy_info_request(rh)
|
||||||
|
|
||||||
|
def test_http_with_idn(self, handler, ctx):
|
||||||
|
with ctx.http_server(HTTPProxyHandler) as server_address:
|
||||||
|
with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}) as rh:
|
||||||
|
proxy_info = ctx.proxy_info_request(rh, target_domain='中文.tw')
|
||||||
|
assert proxy_info['proxy'] == server_address
|
||||||
|
assert proxy_info['path'].startswith('http://xn--fiq228c.tw')
|
||||||
|
assert proxy_info['headers']['Host'].split(':', 1)[0] == 'xn--fiq228c.tw'
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
'handler,ctx', [
|
||||||
|
('Requests', 'https'),
|
||||||
|
('CurlCFFI', 'https'),
|
||||||
|
], indirect=True)
|
||||||
|
class TestHTTPConnectProxy:
|
||||||
|
def test_http_connect_no_auth(self, handler, ctx):
|
||||||
|
with ctx.http_server(HTTPConnectProxyHandler) as server_address:
|
||||||
|
with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}) as rh:
|
||||||
|
proxy_info = ctx.proxy_info_request(rh)
|
||||||
|
assert proxy_info['proxy'] == server_address
|
||||||
|
assert proxy_info['connect'] is True
|
||||||
|
assert 'Proxy-Authorization' not in proxy_info['headers']
|
||||||
|
|
||||||
|
def test_http_connect_auth(self, handler, ctx):
|
||||||
|
with ctx.http_server(HTTPConnectProxyHandler, username='test', password='test') as server_address:
|
||||||
|
with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'http://test:test@{server_address}'}) as rh:
|
||||||
|
proxy_info = ctx.proxy_info_request(rh)
|
||||||
|
assert proxy_info['proxy'] == server_address
|
||||||
|
assert 'Proxy-Authorization' in proxy_info['headers']
|
||||||
|
|
||||||
|
@pytest.mark.skip_handler(
|
||||||
|
'Requests',
|
||||||
|
'bug in urllib3 causes unclosed socket: https://github.com/urllib3/urllib3/issues/3374'
|
||||||
|
)
|
||||||
|
def test_http_connect_bad_auth(self, handler, ctx):
|
||||||
|
with ctx.http_server(HTTPConnectProxyHandler, username='test', password='test') as server_address:
|
||||||
|
with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'http://test:bad@{server_address}'}) as rh:
|
||||||
|
with pytest.raises(ProxyError):
|
||||||
|
ctx.proxy_info_request(rh)
|
||||||
|
|
||||||
|
def test_http_connect_source_address(self, handler, ctx):
|
||||||
|
with ctx.http_server(HTTPConnectProxyHandler) as server_address:
|
||||||
|
source_address = f'127.0.0.{random.randint(5, 255)}'
|
||||||
|
verify_address_availability(source_address)
|
||||||
|
with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'},
|
||||||
|
source_address=source_address,
|
||||||
|
verify=False) as rh:
|
||||||
|
proxy_info = ctx.proxy_info_request(rh)
|
||||||
|
assert proxy_info['proxy'] == server_address
|
||||||
|
assert proxy_info['client_address'][0] == source_address
|
||||||
|
|
||||||
|
@pytest.mark.skipif(urllib3 is None, reason='requires urllib3 to test')
|
||||||
|
def test_https_connect_proxy(self, handler, ctx):
|
||||||
|
with ctx.http_server(HTTPSConnectProxyHandler) as server_address:
|
||||||
|
with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh:
|
||||||
|
proxy_info = ctx.proxy_info_request(rh)
|
||||||
|
assert proxy_info['proxy'] == server_address
|
||||||
|
assert proxy_info['connect'] is True
|
||||||
|
assert 'Proxy-Authorization' not in proxy_info['headers']
|
||||||
|
|
||||||
|
@pytest.mark.skipif(urllib3 is None, reason='requires urllib3 to test')
|
||||||
|
def test_https_connect_verify_failed(self, handler, ctx):
|
||||||
|
with ctx.http_server(HTTPSConnectProxyHandler) as server_address:
|
||||||
|
with handler(verify=True, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh:
|
||||||
|
# Accept SSLError as may not be feasible to tell if it is proxy or request error.
|
||||||
|
# note: if request proto also does ssl verification, this may also be the error of the request.
|
||||||
|
# Until we can support passing custom cacerts to handlers, we cannot properly test this for all cases.
|
||||||
|
with pytest.raises((ProxyError, SSLError)):
|
||||||
|
ctx.proxy_info_request(rh)
|
||||||
|
|
||||||
|
@pytest.mark.skipif(urllib3 is None, reason='requires urllib3 to test')
|
||||||
|
def test_https_connect_proxy_auth(self, handler, ctx):
|
||||||
|
with ctx.http_server(HTTPSConnectProxyHandler, username='test', password='test') as server_address:
|
||||||
|
with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'https://test:test@{server_address}'}) as rh:
|
||||||
|
proxy_info = ctx.proxy_info_request(rh)
|
||||||
|
assert proxy_info['proxy'] == server_address
|
||||||
|
assert 'Proxy-Authorization' in proxy_info['headers']
|
|
@ -6,6 +6,8 @@ import sys
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from yt_dlp.networking.common import Features, DEFAULT_TIMEOUT
|
||||||
|
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
import gzip
|
import gzip
|
||||||
|
@ -27,8 +29,12 @@ import zlib
|
||||||
from email.message import Message
|
from email.message import Message
|
||||||
from http.cookiejar import CookieJar
|
from http.cookiejar import CookieJar
|
||||||
|
|
||||||
from test.conftest import validate_and_send
|
from test.helper import (
|
||||||
from test.helper import FakeYDL, http_server_port, verify_address_availability
|
FakeYDL,
|
||||||
|
http_server_port,
|
||||||
|
validate_and_send,
|
||||||
|
verify_address_availability,
|
||||||
|
)
|
||||||
from yt_dlp.cookies import YoutubeDLCookieJar
|
from yt_dlp.cookies import YoutubeDLCookieJar
|
||||||
from yt_dlp.dependencies import brotli, curl_cffi, requests, urllib3
|
from yt_dlp.dependencies import brotli, curl_cffi, requests, urllib3
|
||||||
from yt_dlp.networking import (
|
from yt_dlp.networking import (
|
||||||
|
@ -62,21 +68,6 @@ from yt_dlp.utils.networking import HTTPHeaderDict, std_headers
|
||||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
||||||
|
|
||||||
def _build_proxy_handler(name):
|
|
||||||
class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
|
|
||||||
proxy_name = name
|
|
||||||
|
|
||||||
def log_message(self, format, *args):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def do_GET(self):
|
|
||||||
self.send_response(200)
|
|
||||||
self.send_header('Content-Type', 'text/plain; charset=utf-8')
|
|
||||||
self.end_headers()
|
|
||||||
self.wfile.write(f'{self.proxy_name}: {self.path}'.encode())
|
|
||||||
return HTTPTestRequestHandler
|
|
||||||
|
|
||||||
|
|
||||||
class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
|
class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
|
||||||
protocol_version = 'HTTP/1.1'
|
protocol_version = 'HTTP/1.1'
|
||||||
default_request_version = 'HTTP/1.1'
|
default_request_version = 'HTTP/1.1'
|
||||||
|
@ -317,8 +308,9 @@ class TestRequestHandlerBase:
|
||||||
cls.https_server_thread.start()
|
cls.https_server_thread.start()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||||
class TestHTTPRequestHandler(TestRequestHandlerBase):
|
class TestHTTPRequestHandler(TestRequestHandlerBase):
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_verify_cert(self, handler):
|
def test_verify_cert(self, handler):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
with pytest.raises(CertificateVerifyError):
|
with pytest.raises(CertificateVerifyError):
|
||||||
|
@ -329,7 +321,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
|
||||||
assert r.status == 200
|
assert r.status == 200
|
||||||
r.close()
|
r.close()
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_ssl_error(self, handler):
|
def test_ssl_error(self, handler):
|
||||||
# HTTPS server with too old TLS version
|
# HTTPS server with too old TLS version
|
||||||
# XXX: is there a better way to test this than to create a new server?
|
# XXX: is there a better way to test this than to create a new server?
|
||||||
|
@ -347,7 +338,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
|
||||||
validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
|
validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers'))
|
||||||
assert not issubclass(exc_info.type, CertificateVerifyError)
|
assert not issubclass(exc_info.type, CertificateVerifyError)
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_percent_encode(self, handler):
|
def test_percent_encode(self, handler):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
# Unicode characters should be encoded with uppercase percent-encoding
|
# Unicode characters should be encoded with uppercase percent-encoding
|
||||||
|
@ -359,7 +349,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
|
||||||
assert res.status == 200
|
assert res.status == 200
|
||||||
res.close()
|
res.close()
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
@pytest.mark.parametrize('path', [
|
@pytest.mark.parametrize('path', [
|
||||||
'/a/b/./../../headers',
|
'/a/b/./../../headers',
|
||||||
'/redirect_dotsegments',
|
'/redirect_dotsegments',
|
||||||
|
@ -375,15 +364,13 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
|
||||||
assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
|
assert res.url == f'http://127.0.0.1:{self.http_port}/headers'
|
||||||
res.close()
|
res.close()
|
||||||
|
|
||||||
# Not supported by CurlCFFI (non-standard)
|
@pytest.mark.skip_handler('CurlCFFI', 'not supported by curl-cffi (non-standard)')
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
|
||||||
def test_unicode_path_redirection(self, handler):
|
def test_unicode_path_redirection(self, handler):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
r = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect'))
|
r = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect'))
|
||||||
assert r.url == f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html'
|
assert r.url == f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html'
|
||||||
r.close()
|
r.close()
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_raise_http_error(self, handler):
|
def test_raise_http_error(self, handler):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
for bad_status in (400, 500, 599, 302):
|
for bad_status in (400, 500, 599, 302):
|
||||||
|
@ -393,7 +380,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
|
||||||
# Should not raise an error
|
# Should not raise an error
|
||||||
validate_and_send(rh, Request('http://127.0.0.1:%d/gen_200' % self.http_port)).close()
|
validate_and_send(rh, Request('http://127.0.0.1:%d/gen_200' % self.http_port)).close()
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_response_url(self, handler):
|
def test_response_url(self, handler):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
# Response url should be that of the last url in redirect chain
|
# Response url should be that of the last url in redirect chain
|
||||||
|
@ -405,7 +391,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
|
||||||
res2.close()
|
res2.close()
|
||||||
|
|
||||||
# Covers some basic cases we expect some level of consistency between request handlers for
|
# Covers some basic cases we expect some level of consistency between request handlers for
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
@pytest.mark.parametrize('redirect_status,method,expected', [
|
@pytest.mark.parametrize('redirect_status,method,expected', [
|
||||||
# A 303 must either use GET or HEAD for subsequent request
|
# A 303 must either use GET or HEAD for subsequent request
|
||||||
(303, 'POST', ('', 'GET', False)),
|
(303, 'POST', ('', 'GET', False)),
|
||||||
|
@ -447,7 +432,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
|
||||||
assert expected[1] == res.headers.get('method')
|
assert expected[1] == res.headers.get('method')
|
||||||
assert expected[2] == ('content-length' in headers.decode().lower())
|
assert expected[2] == ('content-length' in headers.decode().lower())
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_request_cookie_header(self, handler):
|
def test_request_cookie_header(self, handler):
|
||||||
# We should accept a Cookie header being passed as in normal headers and handle it appropriately.
|
# We should accept a Cookie header being passed as in normal headers and handle it appropriately.
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
|
@ -480,19 +464,16 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
|
||||||
assert b'cookie: test=ytdlp' not in data.lower()
|
assert b'cookie: test=ytdlp' not in data.lower()
|
||||||
assert b'cookie: test=test3' in data.lower()
|
assert b'cookie: test=test3' in data.lower()
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_redirect_loop(self, handler):
|
def test_redirect_loop(self, handler):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
with pytest.raises(HTTPError, match='redirect loop'):
|
with pytest.raises(HTTPError, match='redirect loop'):
|
||||||
validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_loop'))
|
validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_loop'))
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_incompleteread(self, handler):
|
def test_incompleteread(self, handler):
|
||||||
with handler(timeout=2) as rh:
|
with handler(timeout=2) as rh:
|
||||||
with pytest.raises(IncompleteRead, match='13 bytes read, 234221 more expected'):
|
with pytest.raises(IncompleteRead, match='13 bytes read, 234221 more expected'):
|
||||||
validate_and_send(rh, Request('http://127.0.0.1:%d/incompleteread' % self.http_port)).read()
|
validate_and_send(rh, Request('http://127.0.0.1:%d/incompleteread' % self.http_port)).read()
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_cookies(self, handler):
|
def test_cookies(self, handler):
|
||||||
cookiejar = YoutubeDLCookieJar()
|
cookiejar = YoutubeDLCookieJar()
|
||||||
cookiejar.set_cookie(http.cookiejar.Cookie(
|
cookiejar.set_cookie(http.cookiejar.Cookie(
|
||||||
|
@ -509,7 +490,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
|
||||||
rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': cookiejar})).read()
|
rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': cookiejar})).read()
|
||||||
assert b'cookie: test=ytdlp' in data.lower()
|
assert b'cookie: test=ytdlp' in data.lower()
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_headers(self, handler):
|
def test_headers(self, handler):
|
||||||
|
|
||||||
with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh:
|
with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh:
|
||||||
|
@ -525,7 +505,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
|
||||||
assert b'test2: test2' not in data
|
assert b'test2: test2' not in data
|
||||||
assert b'test3: test3' in data
|
assert b'test3: test3' in data
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_read_timeout(self, handler):
|
def test_read_timeout(self, handler):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
# Default timeout is 20 seconds, so this should go through
|
# Default timeout is 20 seconds, so this should go through
|
||||||
|
@ -541,26 +520,21 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
|
||||||
validate_and_send(
|
validate_and_send(
|
||||||
rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1', extensions={'timeout': 4}))
|
rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1', extensions={'timeout': 4}))
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_connect_timeout(self, handler):
|
def test_connect_timeout(self, handler):
|
||||||
# nothing should be listening on this port
|
# nothing should be listening on this port
|
||||||
connect_timeout_url = 'http://10.255.255.255'
|
connect_timeout_url = 'http://10.255.255.255'
|
||||||
with handler(timeout=0.01) as rh:
|
with handler(timeout=0.01) as rh, pytest.raises(TransportError):
|
||||||
now = time.time()
|
now = time.time()
|
||||||
with pytest.raises(TransportError):
|
validate_and_send(rh, Request(connect_timeout_url))
|
||||||
validate_and_send(
|
assert time.time() - now < DEFAULT_TIMEOUT
|
||||||
rh, Request(connect_timeout_url))
|
|
||||||
assert 0.01 <= time.time() - now < 20
|
|
||||||
|
|
||||||
with handler() as rh:
|
|
||||||
with pytest.raises(TransportError):
|
|
||||||
# Per request timeout, should override handler timeout
|
# Per request timeout, should override handler timeout
|
||||||
|
request = Request(connect_timeout_url, extensions={'timeout': 0.01})
|
||||||
|
with handler() as rh, pytest.raises(TransportError):
|
||||||
now = time.time()
|
now = time.time()
|
||||||
validate_and_send(
|
validate_and_send(rh, request)
|
||||||
rh, Request(connect_timeout_url, extensions={'timeout': 0.01}))
|
assert time.time() - now < DEFAULT_TIMEOUT
|
||||||
assert 0.01 <= time.time() - now < 20
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_source_address(self, handler):
|
def test_source_address(self, handler):
|
||||||
source_address = f'127.0.0.{random.randint(5, 255)}'
|
source_address = f'127.0.0.{random.randint(5, 255)}'
|
||||||
# on some systems these loopback addresses we need for testing may not be available
|
# on some systems these loopback addresses we need for testing may not be available
|
||||||
|
@ -572,13 +546,13 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
|
||||||
assert source_address == data
|
assert source_address == data
|
||||||
|
|
||||||
# Not supported by CurlCFFI
|
# Not supported by CurlCFFI
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
@pytest.mark.skip_handler('CurlCFFI', 'not supported by curl-cffi')
|
||||||
def test_gzip_trailing_garbage(self, handler):
|
def test_gzip_trailing_garbage(self, handler):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
data = validate_and_send(rh, Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode()
|
data = validate_and_send(rh, Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode()
|
||||||
assert data == '<html><video src="/vid.mp4" /></html>'
|
assert data == '<html><video src="/vid.mp4" /></html>'
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
@pytest.mark.skip_handler('CurlCFFI', 'not applicable to curl-cffi')
|
||||||
@pytest.mark.skipif(not brotli, reason='brotli support is not installed')
|
@pytest.mark.skipif(not brotli, reason='brotli support is not installed')
|
||||||
def test_brotli(self, handler):
|
def test_brotli(self, handler):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
|
@ -589,7 +563,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
|
||||||
assert res.headers.get('Content-Encoding') == 'br'
|
assert res.headers.get('Content-Encoding') == 'br'
|
||||||
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_deflate(self, handler):
|
def test_deflate(self, handler):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
res = validate_and_send(
|
res = validate_and_send(
|
||||||
|
@ -599,7 +572,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
|
||||||
assert res.headers.get('Content-Encoding') == 'deflate'
|
assert res.headers.get('Content-Encoding') == 'deflate'
|
||||||
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_gzip(self, handler):
|
def test_gzip(self, handler):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
res = validate_and_send(
|
res = validate_and_send(
|
||||||
|
@ -609,7 +581,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
|
||||||
assert res.headers.get('Content-Encoding') == 'gzip'
|
assert res.headers.get('Content-Encoding') == 'gzip'
|
||||||
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_multiple_encodings(self, handler):
|
def test_multiple_encodings(self, handler):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
|
for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
|
||||||
|
@ -620,8 +591,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
|
||||||
assert res.headers.get('Content-Encoding') == pair
|
assert res.headers.get('Content-Encoding') == pair
|
||||||
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
assert res.read() == b'<html><video src="/vid.mp4" /></html>'
|
||||||
|
|
||||||
# Not supported by curl_cffi
|
@pytest.mark.skip_handler('CurlCFFI', 'not supported by curl-cffi')
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True)
|
|
||||||
def test_unsupported_encoding(self, handler):
|
def test_unsupported_encoding(self, handler):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
res = validate_and_send(
|
res = validate_and_send(
|
||||||
|
@ -631,7 +601,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
|
||||||
assert res.headers.get('Content-Encoding') == 'unsupported'
|
assert res.headers.get('Content-Encoding') == 'unsupported'
|
||||||
assert res.read() == b'raw'
|
assert res.read() == b'raw'
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_read(self, handler):
|
def test_read(self, handler):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
res = validate_and_send(
|
res = validate_and_send(
|
||||||
|
@ -642,83 +611,48 @@ class TestHTTPRequestHandler(TestRequestHandlerBase):
|
||||||
assert res.read().decode().endswith('\n\n')
|
assert res.read().decode().endswith('\n\n')
|
||||||
assert res.read() == b''
|
assert res.read() == b''
|
||||||
|
|
||||||
|
def test_request_disable_proxy(self, handler):
|
||||||
class TestHTTPProxy(TestRequestHandlerBase):
|
for proxy_proto in handler._SUPPORTED_PROXY_SCHEMES or ['http']:
|
||||||
# Note: this only tests http urls over non-CONNECT proxy
|
# Given the handler is configured with a proxy
|
||||||
@classmethod
|
with handler(proxies={'http': f'{proxy_proto}://10.255.255.255'}, timeout=5) as rh:
|
||||||
def setup_class(cls):
|
# When a proxy is explicitly set to None for the request
|
||||||
super().setup_class()
|
|
||||||
# HTTP Proxy server
|
|
||||||
cls.proxy = http.server.ThreadingHTTPServer(
|
|
||||||
('127.0.0.1', 0), _build_proxy_handler('normal'))
|
|
||||||
cls.proxy_port = http_server_port(cls.proxy)
|
|
||||||
cls.proxy_thread = threading.Thread(target=cls.proxy.serve_forever)
|
|
||||||
cls.proxy_thread.daemon = True
|
|
||||||
cls.proxy_thread.start()
|
|
||||||
|
|
||||||
# Geo proxy server
|
|
||||||
cls.geo_proxy = http.server.ThreadingHTTPServer(
|
|
||||||
('127.0.0.1', 0), _build_proxy_handler('geo'))
|
|
||||||
cls.geo_port = http_server_port(cls.geo_proxy)
|
|
||||||
cls.geo_proxy_thread = threading.Thread(target=cls.geo_proxy.serve_forever)
|
|
||||||
cls.geo_proxy_thread.daemon = True
|
|
||||||
cls.geo_proxy_thread.start()
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_http_proxy(self, handler):
|
|
||||||
http_proxy = f'http://127.0.0.1:{self.proxy_port}'
|
|
||||||
geo_proxy = f'http://127.0.0.1:{self.geo_port}'
|
|
||||||
|
|
||||||
# Test global http proxy
|
|
||||||
# Test per request http proxy
|
|
||||||
# Test per request http proxy disables proxy
|
|
||||||
url = 'http://foo.com/bar'
|
|
||||||
|
|
||||||
# Global HTTP proxy
|
|
||||||
with handler(proxies={'http': http_proxy}) as rh:
|
|
||||||
res = validate_and_send(rh, Request(url)).read().decode()
|
|
||||||
assert res == f'normal: {url}'
|
|
||||||
|
|
||||||
# Per request proxy overrides global
|
|
||||||
res = validate_and_send(rh, Request(url, proxies={'http': geo_proxy})).read().decode()
|
|
||||||
assert res == f'geo: {url}'
|
|
||||||
|
|
||||||
# and setting to None disables all proxies for that request
|
|
||||||
real_url = f'http://127.0.0.1:{self.http_port}/headers'
|
|
||||||
res = validate_and_send(
|
res = validate_and_send(
|
||||||
rh, Request(real_url, proxies={'http': None})).read().decode()
|
rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'http': None}))
|
||||||
assert res != f'normal: {real_url}'
|
# Then no proxy should be used
|
||||||
assert 'Accept' in res
|
res.close()
|
||||||
|
assert res.status == 200
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
@pytest.mark.skip_handlers_if(
|
||||||
|
lambda _, handler: Features.NO_PROXY not in handler._SUPPORTED_FEATURES, 'handler does not support NO_PROXY')
|
||||||
def test_noproxy(self, handler):
|
def test_noproxy(self, handler):
|
||||||
with handler(proxies={'proxy': f'http://127.0.0.1:{self.proxy_port}'}) as rh:
|
for proxy_proto in handler._SUPPORTED_PROXY_SCHEMES or ['http']:
|
||||||
# NO_PROXY
|
# Given the handler is configured with a proxy
|
||||||
|
with handler(proxies={'http': f'{proxy_proto}://10.255.255.255'}, timeout=5) as rh:
|
||||||
for no_proxy in (f'127.0.0.1:{self.http_port}', '127.0.0.1', 'localhost'):
|
for no_proxy in (f'127.0.0.1:{self.http_port}', '127.0.0.1', 'localhost'):
|
||||||
|
# When request no proxy includes the request url host
|
||||||
nop_response = validate_and_send(
|
nop_response = validate_and_send(
|
||||||
rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'no': no_proxy})).read().decode(
|
rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'no': no_proxy}))
|
||||||
'utf-8')
|
# Then the proxy should not be used
|
||||||
assert 'Accept' in nop_response
|
assert nop_response.status == 200
|
||||||
|
nop_response.close()
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
@pytest.mark.skip_handlers_if(
|
||||||
|
lambda _, handler: Features.ALL_PROXY not in handler._SUPPORTED_FEATURES, 'handler does not support ALL_PROXY')
|
||||||
def test_allproxy(self, handler):
|
def test_allproxy(self, handler):
|
||||||
url = 'http://foo.com/bar'
|
# This is a bit of a hacky test, but it should be enough to check whether the handler is using the proxy.
|
||||||
with handler() as rh:
|
# 0.1s might not be enough of a timeout if proxy is not used in all cases, but should still get failures.
|
||||||
response = validate_and_send(rh, Request(url, proxies={'all': f'http://127.0.0.1:{self.proxy_port}'})).read().decode(
|
with handler(proxies={'all': 'http://10.255.255.255'}, timeout=0.1) as rh:
|
||||||
'utf-8')
|
with pytest.raises(TransportError):
|
||||||
assert response == f'normal: {url}'
|
validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).close()
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
with handler(timeout=0.1) as rh:
|
||||||
def test_http_proxy_with_idn(self, handler):
|
with pytest.raises(TransportError):
|
||||||
with handler(proxies={
|
validate_and_send(
|
||||||
'http': f'http://127.0.0.1:{self.proxy_port}',
|
rh, Request(
|
||||||
}) as rh:
|
f'http://127.0.0.1:{self.http_port}/headers', proxies={'all': 'http://10.255.255.255'})).close()
|
||||||
url = 'http://中文.tw/'
|
|
||||||
response = rh.send(Request(url)).read().decode()
|
|
||||||
# b'xn--fiq228c' is '中文'.encode('idna')
|
|
||||||
assert response == 'normal: http://xn--fiq228c.tw/'
|
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
||||||
class TestClientCertificate:
|
class TestClientCertificate:
|
||||||
@classmethod
|
@classmethod
|
||||||
def setup_class(cls):
|
def setup_class(cls):
|
||||||
|
@ -745,27 +679,23 @@ class TestClientCertificate:
|
||||||
) as rh:
|
) as rh:
|
||||||
validate_and_send(rh, Request(f'https://127.0.0.1:{self.port}/video.html')).read().decode()
|
validate_and_send(rh, Request(f'https://127.0.0.1:{self.port}/video.html')).read().decode()
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_certificate_combined_nopass(self, handler):
|
def test_certificate_combined_nopass(self, handler):
|
||||||
self._run_test(handler, client_cert={
|
self._run_test(handler, client_cert={
|
||||||
'client_certificate': os.path.join(self.certdir, 'clientwithkey.crt'),
|
'client_certificate': os.path.join(self.certdir, 'clientwithkey.crt'),
|
||||||
})
|
})
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_certificate_nocombined_nopass(self, handler):
|
def test_certificate_nocombined_nopass(self, handler):
|
||||||
self._run_test(handler, client_cert={
|
self._run_test(handler, client_cert={
|
||||||
'client_certificate': os.path.join(self.certdir, 'client.crt'),
|
'client_certificate': os.path.join(self.certdir, 'client.crt'),
|
||||||
'client_certificate_key': os.path.join(self.certdir, 'client.key'),
|
'client_certificate_key': os.path.join(self.certdir, 'client.key'),
|
||||||
})
|
})
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_certificate_combined_pass(self, handler):
|
def test_certificate_combined_pass(self, handler):
|
||||||
self._run_test(handler, client_cert={
|
self._run_test(handler, client_cert={
|
||||||
'client_certificate': os.path.join(self.certdir, 'clientwithencryptedkey.crt'),
|
'client_certificate': os.path.join(self.certdir, 'clientwithencryptedkey.crt'),
|
||||||
'client_certificate_password': 'foobar',
|
'client_certificate_password': 'foobar',
|
||||||
})
|
})
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_certificate_nocombined_pass(self, handler):
|
def test_certificate_nocombined_pass(self, handler):
|
||||||
self._run_test(handler, client_cert={
|
self._run_test(handler, client_cert={
|
||||||
'client_certificate': os.path.join(self.certdir, 'client.crt'),
|
'client_certificate': os.path.join(self.certdir, 'client.crt'),
|
||||||
|
@ -785,6 +715,25 @@ class TestHTTPImpersonateRequestHandler(TestRequestHandlerBase):
|
||||||
assert res.status == 200
|
assert res.status == 200
|
||||||
assert std_headers['user-agent'].lower() not in res.read().decode().lower()
|
assert std_headers['user-agent'].lower() not in res.read().decode().lower()
|
||||||
|
|
||||||
|
def test_response_extensions(self, handler):
|
||||||
|
with handler() as rh:
|
||||||
|
for target in rh.supported_targets:
|
||||||
|
request = Request(
|
||||||
|
f'http://127.0.0.1:{self.http_port}/gen_200', extensions={'impersonate': target})
|
||||||
|
res = validate_and_send(rh, request)
|
||||||
|
assert res.extensions['impersonate'] == rh._get_request_target(request)
|
||||||
|
|
||||||
|
def test_http_error_response_extensions(self, handler):
|
||||||
|
with handler() as rh:
|
||||||
|
for target in rh.supported_targets:
|
||||||
|
request = Request(
|
||||||
|
f'http://127.0.0.1:{self.http_port}/gen_404', extensions={'impersonate': target})
|
||||||
|
try:
|
||||||
|
validate_and_send(rh, request)
|
||||||
|
except HTTPError as e:
|
||||||
|
res = e.response
|
||||||
|
assert res.extensions['impersonate'] == rh._get_request_target(request)
|
||||||
|
|
||||||
|
|
||||||
class TestRequestHandlerMisc:
|
class TestRequestHandlerMisc:
|
||||||
"""Misc generic tests for request handlers, not related to request or validation testing"""
|
"""Misc generic tests for request handlers, not related to request or validation testing"""
|
||||||
|
@ -805,8 +754,8 @@ class TestRequestHandlerMisc:
|
||||||
assert len(logging_handlers) == before_count
|
assert len(logging_handlers) == before_count
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
||||||
class TestUrllibRequestHandler(TestRequestHandlerBase):
|
class TestUrllibRequestHandler(TestRequestHandlerBase):
|
||||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
|
||||||
def test_file_urls(self, handler):
|
def test_file_urls(self, handler):
|
||||||
# See https://github.com/ytdl-org/youtube-dl/issues/8227
|
# See https://github.com/ytdl-org/youtube-dl/issues/8227
|
||||||
tf = tempfile.NamedTemporaryFile(delete=False)
|
tf = tempfile.NamedTemporaryFile(delete=False)
|
||||||
|
@ -828,7 +777,6 @@ class TestUrllibRequestHandler(TestRequestHandlerBase):
|
||||||
|
|
||||||
os.unlink(tf.name)
|
os.unlink(tf.name)
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
|
||||||
def test_http_error_returns_content(self, handler):
|
def test_http_error_returns_content(self, handler):
|
||||||
# urllib HTTPError will try close the underlying response if reference to the HTTPError object is lost
|
# urllib HTTPError will try close the underlying response if reference to the HTTPError object is lost
|
||||||
def get_response():
|
def get_response():
|
||||||
|
@ -841,7 +789,6 @@ class TestUrllibRequestHandler(TestRequestHandlerBase):
|
||||||
|
|
||||||
assert get_response().read() == b'<html></html>'
|
assert get_response().read() == b'<html></html>'
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
|
||||||
def test_verify_cert_error_text(self, handler):
|
def test_verify_cert_error_text(self, handler):
|
||||||
# Check the output of the error message
|
# Check the output of the error message
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
|
@ -851,7 +798,6 @@ class TestUrllibRequestHandler(TestRequestHandlerBase):
|
||||||
):
|
):
|
||||||
validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
|
validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers'))
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
|
|
||||||
@pytest.mark.parametrize('req,match,version_check', [
|
@pytest.mark.parametrize('req,match,version_check', [
|
||||||
# https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1256
|
# https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1256
|
||||||
# bpo-39603: Check implemented in 3.7.9+, 3.8.5+
|
# bpo-39603: Check implemented in 3.7.9+, 3.8.5+
|
||||||
|
@ -1183,7 +1129,7 @@ class TestRequestHandlerValidation:
|
||||||
]
|
]
|
||||||
|
|
||||||
PROXY_SCHEME_TESTS = [
|
PROXY_SCHEME_TESTS = [
|
||||||
# scheme, expected to fail
|
# proxy scheme, expected to fail
|
||||||
('Urllib', 'http', [
|
('Urllib', 'http', [
|
||||||
('http', False),
|
('http', False),
|
||||||
('https', UnsupportedRequest),
|
('https', UnsupportedRequest),
|
||||||
|
@ -1209,30 +1155,41 @@ class TestRequestHandlerValidation:
|
||||||
('socks5', False),
|
('socks5', False),
|
||||||
('socks5h', False),
|
('socks5h', False),
|
||||||
]),
|
]),
|
||||||
|
('Websockets', 'ws', [
|
||||||
|
('http', UnsupportedRequest),
|
||||||
|
('https', UnsupportedRequest),
|
||||||
|
('socks4', False),
|
||||||
|
('socks4a', False),
|
||||||
|
('socks5', False),
|
||||||
|
('socks5h', False),
|
||||||
|
]),
|
||||||
(NoCheckRH, 'http', [('http', False)]),
|
(NoCheckRH, 'http', [('http', False)]),
|
||||||
(HTTPSupportedRH, 'http', [('http', UnsupportedRequest)]),
|
(HTTPSupportedRH, 'http', [('http', UnsupportedRequest)]),
|
||||||
('Websockets', 'ws', [('http', UnsupportedRequest)]),
|
|
||||||
(NoCheckRH, 'http', [('http', False)]),
|
(NoCheckRH, 'http', [('http', False)]),
|
||||||
(HTTPSupportedRH, 'http', [('http', UnsupportedRequest)]),
|
(HTTPSupportedRH, 'http', [('http', UnsupportedRequest)]),
|
||||||
]
|
]
|
||||||
|
|
||||||
PROXY_KEY_TESTS = [
|
PROXY_KEY_TESTS = [
|
||||||
# key, expected to fail
|
# proxy key, proxy scheme, expected to fail
|
||||||
('Urllib', [
|
('Urllib', 'http', [
|
||||||
('all', False),
|
('all', 'http', False),
|
||||||
('unrelated', False),
|
('unrelated', 'http', False),
|
||||||
]),
|
]),
|
||||||
('Requests', [
|
('Requests', 'http', [
|
||||||
('all', False),
|
('all', 'http', False),
|
||||||
('unrelated', False),
|
('unrelated', 'http', False),
|
||||||
]),
|
]),
|
||||||
('CurlCFFI', [
|
('CurlCFFI', 'http', [
|
||||||
('all', False),
|
('all', 'http', False),
|
||||||
('unrelated', False),
|
('unrelated', 'http', False),
|
||||||
]),
|
]),
|
||||||
(NoCheckRH, [('all', False)]),
|
('Websockets', 'ws', [
|
||||||
(HTTPSupportedRH, [('all', UnsupportedRequest)]),
|
('all', 'socks5', False),
|
||||||
(HTTPSupportedRH, [('no', UnsupportedRequest)]),
|
('unrelated', 'socks5', False),
|
||||||
|
]),
|
||||||
|
(NoCheckRH, 'http', [('all', 'http', False)]),
|
||||||
|
(HTTPSupportedRH, 'http', [('all', 'http', UnsupportedRequest)]),
|
||||||
|
(HTTPSupportedRH, 'http', [('no', 'http', UnsupportedRequest)]),
|
||||||
]
|
]
|
||||||
|
|
||||||
EXTENSION_TESTS = [
|
EXTENSION_TESTS = [
|
||||||
|
@ -1274,28 +1231,54 @@ class TestRequestHandlerValidation:
|
||||||
]),
|
]),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('handler,fail,scheme', [
|
||||||
|
('Urllib', False, 'http'),
|
||||||
|
('Requests', False, 'http'),
|
||||||
|
('CurlCFFI', False, 'http'),
|
||||||
|
('Websockets', False, 'ws')
|
||||||
|
], indirect=['handler'])
|
||||||
|
def test_no_proxy(self, handler, fail, scheme):
|
||||||
|
run_validation(handler, fail, Request(f'{scheme}://', proxies={'no': '127.0.0.1,github.com'}))
|
||||||
|
run_validation(handler, fail, Request(f'{scheme}://'), proxies={'no': '127.0.0.1,github.com'})
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('handler,scheme', [
|
||||||
|
('Urllib', 'http'),
|
||||||
|
(HTTPSupportedRH, 'http'),
|
||||||
|
('Requests', 'http'),
|
||||||
|
('CurlCFFI', 'http'),
|
||||||
|
('Websockets', 'ws')
|
||||||
|
], indirect=['handler'])
|
||||||
|
def test_empty_proxy(self, handler, scheme):
|
||||||
|
run_validation(handler, False, Request(f'{scheme}://', proxies={scheme: None}))
|
||||||
|
run_validation(handler, False, Request(f'{scheme}://'), proxies={scheme: None})
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1', '/a/b/c'])
|
||||||
|
@pytest.mark.parametrize('handler,scheme', [
|
||||||
|
('Urllib', 'http'),
|
||||||
|
(HTTPSupportedRH, 'http'),
|
||||||
|
('Requests', 'http'),
|
||||||
|
('CurlCFFI', 'http'),
|
||||||
|
('Websockets', 'ws')
|
||||||
|
], indirect=['handler'])
|
||||||
|
def test_invalid_proxy_url(self, handler, scheme, proxy_url):
|
||||||
|
run_validation(handler, UnsupportedRequest, Request(f'{scheme}://', proxies={scheme: proxy_url}))
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler,scheme,fail,handler_kwargs', [
|
@pytest.mark.parametrize('handler,scheme,fail,handler_kwargs', [
|
||||||
(handler_tests[0], scheme, fail, handler_kwargs)
|
(handler_tests[0], scheme, fail, handler_kwargs)
|
||||||
for handler_tests in URL_SCHEME_TESTS
|
for handler_tests in URL_SCHEME_TESTS
|
||||||
for scheme, fail, handler_kwargs in handler_tests[1]
|
for scheme, fail, handler_kwargs in handler_tests[1]
|
||||||
|
|
||||||
], indirect=['handler'])
|
], indirect=['handler'])
|
||||||
def test_url_scheme(self, handler, scheme, fail, handler_kwargs):
|
def test_url_scheme(self, handler, scheme, fail, handler_kwargs):
|
||||||
run_validation(handler, fail, Request(f'{scheme}://'), **(handler_kwargs or {}))
|
run_validation(handler, fail, Request(f'{scheme}://'), **(handler_kwargs or {}))
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler,fail', [('Urllib', False), ('Requests', False), ('CurlCFFI', False)], indirect=['handler'])
|
@pytest.mark.parametrize('handler,scheme,proxy_key,proxy_scheme,fail', [
|
||||||
def test_no_proxy(self, handler, fail):
|
(handler_tests[0], handler_tests[1], proxy_key, proxy_scheme, fail)
|
||||||
run_validation(handler, fail, Request('http://', proxies={'no': '127.0.0.1,github.com'}))
|
|
||||||
run_validation(handler, fail, Request('http://'), proxies={'no': '127.0.0.1,github.com'})
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler,proxy_key,fail', [
|
|
||||||
(handler_tests[0], proxy_key, fail)
|
|
||||||
for handler_tests in PROXY_KEY_TESTS
|
for handler_tests in PROXY_KEY_TESTS
|
||||||
for proxy_key, fail in handler_tests[1]
|
for proxy_key, proxy_scheme, fail in handler_tests[2]
|
||||||
], indirect=['handler'])
|
], indirect=['handler'])
|
||||||
def test_proxy_key(self, handler, proxy_key, fail):
|
def test_proxy_key(self, handler, scheme, proxy_key, proxy_scheme, fail):
|
||||||
run_validation(handler, fail, Request('http://', proxies={proxy_key: 'http://example.com'}))
|
run_validation(handler, fail, Request(f'{scheme}://', proxies={proxy_key: f'{proxy_scheme}://example.com'}))
|
||||||
run_validation(handler, fail, Request('http://'), proxies={proxy_key: 'http://example.com'})
|
run_validation(handler, fail, Request(f'{scheme}://'), proxies={proxy_key: f'{proxy_scheme}://example.com'})
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler,req_scheme,scheme,fail', [
|
@pytest.mark.parametrize('handler,req_scheme,scheme,fail', [
|
||||||
(handler_tests[0], handler_tests[1], scheme, fail)
|
(handler_tests[0], handler_tests[1], scheme, fail)
|
||||||
|
@ -1306,16 +1289,6 @@ class TestRequestHandlerValidation:
|
||||||
run_validation(handler, fail, Request(f'{req_scheme}://', proxies={req_scheme: f'{scheme}://example.com'}))
|
run_validation(handler, fail, Request(f'{req_scheme}://', proxies={req_scheme: f'{scheme}://example.com'}))
|
||||||
run_validation(handler, fail, Request(f'{req_scheme}://'), proxies={req_scheme: f'{scheme}://example.com'})
|
run_validation(handler, fail, Request(f'{req_scheme}://'), proxies={req_scheme: f'{scheme}://example.com'})
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', HTTPSupportedRH, 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_empty_proxy(self, handler):
|
|
||||||
run_validation(handler, False, Request('http://', proxies={'http': None}))
|
|
||||||
run_validation(handler, False, Request('http://'), proxies={'http': None})
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1', '/a/b/c'])
|
|
||||||
@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True)
|
|
||||||
def test_invalid_proxy_url(self, handler, proxy_url):
|
|
||||||
run_validation(handler, UnsupportedRequest, Request('http://', proxies={'http': proxy_url}))
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler,scheme,extensions,fail', [
|
@pytest.mark.parametrize('handler,scheme,extensions,fail', [
|
||||||
(handler_tests[0], handler_tests[1], extensions, fail)
|
(handler_tests[0], handler_tests[1], extensions, fail)
|
||||||
for handler_tests in EXTENSION_TESTS
|
for handler_tests in EXTENSION_TESTS
|
||||||
|
|
444
test/test_traversal.py
Normal file
444
test/test_traversal.py
Normal file
|
@ -0,0 +1,444 @@
|
||||||
|
import http.cookies
|
||||||
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from yt_dlp.utils import dict_get, int_or_none, str_or_none
|
||||||
|
from yt_dlp.utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
_TEST_DATA = {
|
||||||
|
100: 100,
|
||||||
|
1.2: 1.2,
|
||||||
|
'str': 'str',
|
||||||
|
'None': None,
|
||||||
|
'...': ...,
|
||||||
|
'urls': [
|
||||||
|
{'index': 0, 'url': 'https://www.example.com/0'},
|
||||||
|
{'index': 1, 'url': 'https://www.example.com/1'},
|
||||||
|
],
|
||||||
|
'data': (
|
||||||
|
{'index': 2},
|
||||||
|
{'index': 3},
|
||||||
|
),
|
||||||
|
'dict': {},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class TestTraversal:
|
||||||
|
def test_traversal_base(self):
|
||||||
|
assert traverse_obj(_TEST_DATA, ('str',)) == 'str', \
|
||||||
|
'allow tuple path'
|
||||||
|
assert traverse_obj(_TEST_DATA, ['str']) == 'str', \
|
||||||
|
'allow list path'
|
||||||
|
assert traverse_obj(_TEST_DATA, (value for value in ("str",))) == 'str', \
|
||||||
|
'allow iterable path'
|
||||||
|
assert traverse_obj(_TEST_DATA, 'str') == 'str', \
|
||||||
|
'single items should be treated as a path'
|
||||||
|
assert traverse_obj(_TEST_DATA, 100) == 100, \
|
||||||
|
'allow int path'
|
||||||
|
assert traverse_obj(_TEST_DATA, 1.2) == 1.2, \
|
||||||
|
'allow float path'
|
||||||
|
assert traverse_obj(_TEST_DATA, None) == _TEST_DATA, \
|
||||||
|
'`None` should not perform any modification'
|
||||||
|
|
||||||
|
def test_traversal_ellipsis(self):
|
||||||
|
assert traverse_obj(_TEST_DATA, ...) == [x for x in _TEST_DATA.values() if x not in (None, {})], \
|
||||||
|
'`...` should give all non discarded values'
|
||||||
|
assert traverse_obj(_TEST_DATA, ('urls', 0, ...)) == list(_TEST_DATA['urls'][0].values()), \
|
||||||
|
'`...` selection for dicts should select all values'
|
||||||
|
assert traverse_obj(_TEST_DATA, (..., ..., 'url')) == ['https://www.example.com/0', 'https://www.example.com/1'], \
|
||||||
|
'nested `...` queries should work'
|
||||||
|
assert traverse_obj(_TEST_DATA, (..., ..., 'index')) == list(range(4)), \
|
||||||
|
'`...` query result should be flattened'
|
||||||
|
assert traverse_obj(iter(range(4)), ...) == list(range(4)), \
|
||||||
|
'`...` should accept iterables'
|
||||||
|
|
||||||
|
def test_traversal_function(self):
|
||||||
|
filter_func = lambda x, y: x == 'urls' and isinstance(y, list)
|
||||||
|
assert traverse_obj(_TEST_DATA, filter_func) == [_TEST_DATA['urls']], \
|
||||||
|
'function as query key should perform a filter based on (key, value)'
|
||||||
|
assert traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], str)) == ['str'], \
|
||||||
|
'exceptions in the query function should be catched'
|
||||||
|
assert traverse_obj(iter(range(4)), lambda _, x: x % 2 == 0) == [0, 2], \
|
||||||
|
'function key should accept iterables'
|
||||||
|
# Wrong function signature should raise (debug mode)
|
||||||
|
with pytest.raises(Exception):
|
||||||
|
traverse_obj(_TEST_DATA, lambda a: ...)
|
||||||
|
with pytest.raises(Exception):
|
||||||
|
traverse_obj(_TEST_DATA, lambda a, b, c: ...)
|
||||||
|
|
||||||
|
def test_traversal_set(self):
|
||||||
|
# transformation/type, like `expected_type`
|
||||||
|
assert traverse_obj(_TEST_DATA, (..., {str.upper}, )) == ['STR'], \
|
||||||
|
'Function in set should be a transformation'
|
||||||
|
assert traverse_obj(_TEST_DATA, (..., {str})) == ['str'], \
|
||||||
|
'Type in set should be a type filter'
|
||||||
|
assert traverse_obj(_TEST_DATA, (..., {str, int})) == [100, 'str'], \
|
||||||
|
'Multiple types in set should be a type filter'
|
||||||
|
assert traverse_obj(_TEST_DATA, {dict}) == _TEST_DATA, \
|
||||||
|
'A single set should be wrapped into a path'
|
||||||
|
assert traverse_obj(_TEST_DATA, (..., {str.upper})) == ['STR'], \
|
||||||
|
'Transformation function should not raise'
|
||||||
|
expected = [x for x in map(str_or_none, _TEST_DATA.values()) if x is not None]
|
||||||
|
assert traverse_obj(_TEST_DATA, (..., {str_or_none})) == expected, \
|
||||||
|
'Function in set should be a transformation'
|
||||||
|
assert traverse_obj(_TEST_DATA, ('fail', {lambda _: 'const'})) == 'const', \
|
||||||
|
'Function in set should always be called'
|
||||||
|
# Sets with length < 1 or > 1 not including only types should raise
|
||||||
|
with pytest.raises(Exception):
|
||||||
|
traverse_obj(_TEST_DATA, set())
|
||||||
|
with pytest.raises(Exception):
|
||||||
|
traverse_obj(_TEST_DATA, {str.upper, str})
|
||||||
|
|
||||||
|
def test_traversal_slice(self):
|
||||||
|
_SLICE_DATA = [0, 1, 2, 3, 4]
|
||||||
|
|
||||||
|
assert traverse_obj(_TEST_DATA, ('dict', slice(1))) is None, \
|
||||||
|
'slice on a dictionary should not throw'
|
||||||
|
assert traverse_obj(_SLICE_DATA, slice(1)) == _SLICE_DATA[:1], \
|
||||||
|
'slice key should apply slice to sequence'
|
||||||
|
assert traverse_obj(_SLICE_DATA, slice(1, 2)) == _SLICE_DATA[1:2], \
|
||||||
|
'slice key should apply slice to sequence'
|
||||||
|
assert traverse_obj(_SLICE_DATA, slice(1, 4, 2)) == _SLICE_DATA[1:4:2], \
|
||||||
|
'slice key should apply slice to sequence'
|
||||||
|
|
||||||
|
def test_traversal_alternatives(self):
|
||||||
|
assert traverse_obj(_TEST_DATA, 'fail', 'str') == 'str', \
|
||||||
|
'multiple `paths` should be treated as alternative paths'
|
||||||
|
assert traverse_obj(_TEST_DATA, 'str', 100) == 'str', \
|
||||||
|
'alternatives should exit early'
|
||||||
|
assert traverse_obj(_TEST_DATA, 'fail', 'fail') is None, \
|
||||||
|
'alternatives should return `default` if exhausted'
|
||||||
|
assert traverse_obj(_TEST_DATA, (..., 'fail'), 100) == 100, \
|
||||||
|
'alternatives should track their own branching return'
|
||||||
|
assert traverse_obj(_TEST_DATA, ('dict', ...), ('data', ...)) == list(_TEST_DATA['data']), \
|
||||||
|
'alternatives on empty objects should search further'
|
||||||
|
|
||||||
|
def test_traversal_branching_nesting(self):
|
||||||
|
assert traverse_obj(_TEST_DATA, ('urls', (3, 0), 'url')) == ['https://www.example.com/0'], \
|
||||||
|
'tuple as key should be treated as branches'
|
||||||
|
assert traverse_obj(_TEST_DATA, ('urls', [3, 0], 'url')) == ['https://www.example.com/0'], \
|
||||||
|
'list as key should be treated as branches'
|
||||||
|
assert traverse_obj(_TEST_DATA, ('urls', ((1, 'fail'), (0, 'url')))) == ['https://www.example.com/0'], \
|
||||||
|
'double nesting in path should be treated as paths'
|
||||||
|
assert traverse_obj(['0', [1, 2]], [(0, 1), 0]) == [1], \
|
||||||
|
'do not fail early on branching'
|
||||||
|
expected = ['https://www.example.com/0', 'https://www.example.com/1']
|
||||||
|
assert traverse_obj(_TEST_DATA, ('urls', ((0, ('fail', 'url')), (1, 'url')))) == expected, \
|
||||||
|
'tripple nesting in path should be treated as branches'
|
||||||
|
assert traverse_obj(_TEST_DATA, ('urls', ('fail', (..., 'url')))) == expected, \
|
||||||
|
'ellipsis as branch path start gets flattened'
|
||||||
|
|
||||||
|
def test_traversal_dict(self):
|
||||||
|
assert traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}) == {0: 100, 1: 1.2}, \
|
||||||
|
'dict key should result in a dict with the same keys'
|
||||||
|
expected = {0: 'https://www.example.com/0'}
|
||||||
|
assert traverse_obj(_TEST_DATA, {0: ('urls', 0, 'url')}) == expected, \
|
||||||
|
'dict key should allow paths'
|
||||||
|
expected = {0: ['https://www.example.com/0']}
|
||||||
|
assert traverse_obj(_TEST_DATA, {0: ('urls', (3, 0), 'url')}) == expected, \
|
||||||
|
'tuple in dict path should be treated as branches'
|
||||||
|
assert traverse_obj(_TEST_DATA, {0: ('urls', ((1, 'fail'), (0, 'url')))}) == expected, \
|
||||||
|
'double nesting in dict path should be treated as paths'
|
||||||
|
expected = {0: ['https://www.example.com/1', 'https://www.example.com/0']}
|
||||||
|
assert traverse_obj(_TEST_DATA, {0: ('urls', ((1, ('fail', 'url')), (0, 'url')))}) == expected, \
|
||||||
|
'tripple nesting in dict path should be treated as branches'
|
||||||
|
assert traverse_obj(_TEST_DATA, {0: 'fail'}) == {}, \
|
||||||
|
'remove `None` values when top level dict key fails'
|
||||||
|
assert traverse_obj(_TEST_DATA, {0: 'fail'}, default=...) == {0: ...}, \
|
||||||
|
'use `default` if key fails and `default`'
|
||||||
|
assert traverse_obj(_TEST_DATA, {0: 'dict'}) == {}, \
|
||||||
|
'remove empty values when dict key'
|
||||||
|
assert traverse_obj(_TEST_DATA, {0: 'dict'}, default=...) == {0: ...}, \
|
||||||
|
'use `default` when dict key and `default`'
|
||||||
|
assert traverse_obj(_TEST_DATA, {0: {0: 'fail'}}) == {}, \
|
||||||
|
'remove empty values when nested dict key fails'
|
||||||
|
assert traverse_obj(None, {0: 'fail'}) == {}, \
|
||||||
|
'default to dict if pruned'
|
||||||
|
assert traverse_obj(None, {0: 'fail'}, default=...) == {0: ...}, \
|
||||||
|
'default to dict if pruned and default is given'
|
||||||
|
assert traverse_obj(_TEST_DATA, {0: {0: 'fail'}}, default=...) == {0: {0: ...}}, \
|
||||||
|
'use nested `default` when nested dict key fails and `default`'
|
||||||
|
assert traverse_obj(_TEST_DATA, {0: ('dict', ...)}) == {}, \
|
||||||
|
'remove key if branch in dict key not successful'
|
||||||
|
|
||||||
|
def test_traversal_default(self):
|
||||||
|
_DEFAULT_DATA = {'None': None, 'int': 0, 'list': []}
|
||||||
|
|
||||||
|
assert traverse_obj(_DEFAULT_DATA, 'fail') is None, \
|
||||||
|
'default value should be `None`'
|
||||||
|
assert traverse_obj(_DEFAULT_DATA, 'fail', 'fail', default=...) == ..., \
|
||||||
|
'chained fails should result in default'
|
||||||
|
assert traverse_obj(_DEFAULT_DATA, 'None', 'int') == 0, \
|
||||||
|
'should not short cirquit on `None`'
|
||||||
|
assert traverse_obj(_DEFAULT_DATA, 'fail', default=1) == 1, \
|
||||||
|
'invalid dict key should result in `default`'
|
||||||
|
assert traverse_obj(_DEFAULT_DATA, 'None', default=1) == 1, \
|
||||||
|
'`None` is a deliberate sentinel and should become `default`'
|
||||||
|
assert traverse_obj(_DEFAULT_DATA, ('list', 10)) is None, \
|
||||||
|
'`IndexError` should result in `default`'
|
||||||
|
assert traverse_obj(_DEFAULT_DATA, (..., 'fail'), default=1) == 1, \
|
||||||
|
'if branched but not successful return `default` if defined, not `[]`'
|
||||||
|
assert traverse_obj(_DEFAULT_DATA, (..., 'fail'), default=None) is None, \
|
||||||
|
'if branched but not successful return `default` even if `default` is `None`'
|
||||||
|
assert traverse_obj(_DEFAULT_DATA, (..., 'fail')) == [], \
|
||||||
|
'if branched but not successful return `[]`, not `default`'
|
||||||
|
assert traverse_obj(_DEFAULT_DATA, ('list', ...)) == [], \
|
||||||
|
'if branched but object is empty return `[]`, not `default`'
|
||||||
|
assert traverse_obj(None, ...) == [], \
|
||||||
|
'if branched but object is `None` return `[]`, not `default`'
|
||||||
|
assert traverse_obj({0: None}, (0, ...)) == [], \
|
||||||
|
'if branched but state is `None` return `[]`, not `default`'
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('path', [
|
||||||
|
('fail', ...),
|
||||||
|
(..., 'fail'),
|
||||||
|
100 * ('fail',) + (...,),
|
||||||
|
(...,) + 100 * ('fail',),
|
||||||
|
])
|
||||||
|
def test_traversal_branching(self, path):
|
||||||
|
assert traverse_obj({}, path) == [], \
|
||||||
|
'if branched but state is `None`, return `[]` (not `default`)'
|
||||||
|
assert traverse_obj({}, 'fail', path) == [], \
|
||||||
|
'if branching in last alternative and previous did not match, return `[]` (not `default`)'
|
||||||
|
assert traverse_obj({0: 'x'}, 0, path) == 'x', \
|
||||||
|
'if branching in last alternative and previous did match, return single value'
|
||||||
|
assert traverse_obj({0: 'x'}, path, 0) == 'x', \
|
||||||
|
'if branching in first alternative and non-branching path does match, return single value'
|
||||||
|
assert traverse_obj({}, path, 'fail') is None, \
|
||||||
|
'if branching in first alternative and non-branching path does not match, return `default`'
|
||||||
|
|
||||||
|
def test_traversal_expected_type(self):
|
||||||
|
_EXPECTED_TYPE_DATA = {'str': 'str', 'int': 0}
|
||||||
|
|
||||||
|
assert traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=str) == 'str', \
|
||||||
|
'accept matching `expected_type` type'
|
||||||
|
assert traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int) is None, \
|
||||||
|
'reject non matching `expected_type` type'
|
||||||
|
assert traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: str(x)) == '0', \
|
||||||
|
'transform type using type function'
|
||||||
|
assert traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=lambda _: 1 / 0) is None, \
|
||||||
|
'wrap expected_type fuction in try_call'
|
||||||
|
assert traverse_obj(_EXPECTED_TYPE_DATA, ..., expected_type=str) == ['str'], \
|
||||||
|
'eliminate items that expected_type fails on'
|
||||||
|
assert traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}, expected_type=int) == {0: 100}, \
|
||||||
|
'type as expected_type should filter dict values'
|
||||||
|
assert traverse_obj(_TEST_DATA, {0: 100, 1: 1.2, 2: 'None'}, expected_type=str_or_none) == {0: '100', 1: '1.2'}, \
|
||||||
|
'function as expected_type should transform dict values'
|
||||||
|
assert traverse_obj(_TEST_DATA, ({0: 1.2}, 0, {int_or_none}), expected_type=int) == 1, \
|
||||||
|
'expected_type should not filter non final dict values'
|
||||||
|
assert traverse_obj(_TEST_DATA, {0: {0: 100, 1: 'str'}}, expected_type=int) == {0: {0: 100}}, \
|
||||||
|
'expected_type should transform deep dict values'
|
||||||
|
assert traverse_obj(_TEST_DATA, [({0: '...'}, {0: '...'})], expected_type=type(...)) == [{0: ...}, {0: ...}], \
|
||||||
|
'expected_type should transform branched dict values'
|
||||||
|
assert traverse_obj({1: {3: 4}}, [(1, 2), 3], expected_type=int) == [4], \
|
||||||
|
'expected_type regression for type matching in tuple branching'
|
||||||
|
assert traverse_obj(_TEST_DATA, ['data', ...], expected_type=int) == [], \
|
||||||
|
'expected_type regression for type matching in dict result'
|
||||||
|
|
||||||
|
def test_traversal_get_all(self):
|
||||||
|
_GET_ALL_DATA = {'key': [0, 1, 2]}
|
||||||
|
|
||||||
|
assert traverse_obj(_GET_ALL_DATA, ('key', ...), get_all=False) == 0, \
|
||||||
|
'if not `get_all`, return only first matching value'
|
||||||
|
assert traverse_obj(_GET_ALL_DATA, ..., get_all=False) == [0, 1, 2], \
|
||||||
|
'do not overflatten if not `get_all`'
|
||||||
|
|
||||||
|
def test_traversal_casesense(self):
|
||||||
|
_CASESENSE_DATA = {
|
||||||
|
'KeY': 'value0',
|
||||||
|
0: {
|
||||||
|
'KeY': 'value1',
|
||||||
|
0: {'KeY': 'value2'},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
assert traverse_obj(_CASESENSE_DATA, 'key') is None, \
|
||||||
|
'dict keys should be case sensitive unless `casesense`'
|
||||||
|
assert traverse_obj(_CASESENSE_DATA, 'keY', casesense=False) == 'value0', \
|
||||||
|
'allow non matching key case if `casesense`'
|
||||||
|
assert traverse_obj(_CASESENSE_DATA, [0, ('keY',)], casesense=False) == ['value1'], \
|
||||||
|
'allow non matching key case in branch if `casesense`'
|
||||||
|
assert traverse_obj(_CASESENSE_DATA, [0, ([0, 'keY'],)], casesense=False) == ['value2'], \
|
||||||
|
'allow non matching key case in branch path if `casesense`'
|
||||||
|
|
||||||
|
def test_traversal_traverse_string(self):
|
||||||
|
_TRAVERSE_STRING_DATA = {'str': 'str', 1.2: 1.2}
|
||||||
|
|
||||||
|
assert traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0)) is None, \
|
||||||
|
'do not traverse into string if not `traverse_string`'
|
||||||
|
assert traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0), traverse_string=True) == 's', \
|
||||||
|
'traverse into string if `traverse_string`'
|
||||||
|
assert traverse_obj(_TRAVERSE_STRING_DATA, (1.2, 1), traverse_string=True) == '.', \
|
||||||
|
'traverse into converted data if `traverse_string`'
|
||||||
|
assert traverse_obj(_TRAVERSE_STRING_DATA, ('str', ...), traverse_string=True) == 'str', \
|
||||||
|
'`...` should result in string (same value) if `traverse_string`'
|
||||||
|
assert traverse_obj(_TRAVERSE_STRING_DATA, ('str', slice(0, None, 2)), traverse_string=True) == 'sr', \
|
||||||
|
'`slice` should result in string if `traverse_string`'
|
||||||
|
assert traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda i, v: i or v == "s"), traverse_string=True) == 'str', \
|
||||||
|
'function should result in string if `traverse_string`'
|
||||||
|
assert traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)), traverse_string=True) == ['s', 'r'], \
|
||||||
|
'branching should result in list if `traverse_string`'
|
||||||
|
assert traverse_obj({}, (0, ...), traverse_string=True) == [], \
|
||||||
|
'branching should result in list if `traverse_string`'
|
||||||
|
assert traverse_obj({}, (0, lambda x, y: True), traverse_string=True) == [], \
|
||||||
|
'branching should result in list if `traverse_string`'
|
||||||
|
assert traverse_obj({}, (0, slice(1)), traverse_string=True) == [], \
|
||||||
|
'branching should result in list if `traverse_string`'
|
||||||
|
|
||||||
|
def test_traversal_re(self):
|
||||||
|
mobj = re.fullmatch(r'0(12)(?P<group>3)(4)?', '0123')
|
||||||
|
assert traverse_obj(mobj, ...) == [x for x in mobj.groups() if x is not None], \
|
||||||
|
'`...` on a `re.Match` should give its `groups()`'
|
||||||
|
assert traverse_obj(mobj, lambda k, _: k in (0, 2)) == ['0123', '3'], \
|
||||||
|
'function on a `re.Match` should give groupno, value starting at 0'
|
||||||
|
assert traverse_obj(mobj, 'group') == '3', \
|
||||||
|
'str key on a `re.Match` should give group with that name'
|
||||||
|
assert traverse_obj(mobj, 2) == '3', \
|
||||||
|
'int key on a `re.Match` should give group with that name'
|
||||||
|
assert traverse_obj(mobj, 'gRoUp', casesense=False) == '3', \
|
||||||
|
'str key on a `re.Match` should respect casesense'
|
||||||
|
assert traverse_obj(mobj, 'fail') is None, \
|
||||||
|
'failing str key on a `re.Match` should return `default`'
|
||||||
|
assert traverse_obj(mobj, 'gRoUpS', casesense=False) is None, \
|
||||||
|
'failing str key on a `re.Match` should return `default`'
|
||||||
|
assert traverse_obj(mobj, 8) is None, \
|
||||||
|
'failing int key on a `re.Match` should return `default`'
|
||||||
|
assert traverse_obj(mobj, lambda k, _: k in (0, 'group')) == ['0123', '3'], \
|
||||||
|
'function on a `re.Match` should give group name as well'
|
||||||
|
|
||||||
|
def test_traversal_xml_etree(self):
|
||||||
|
etree = xml.etree.ElementTree.fromstring('''<?xml version="1.0"?>
|
||||||
|
<data>
|
||||||
|
<country name="Liechtenstein">
|
||||||
|
<rank>1</rank>
|
||||||
|
<year>2008</year>
|
||||||
|
<gdppc>141100</gdppc>
|
||||||
|
<neighbor name="Austria" direction="E"/>
|
||||||
|
<neighbor name="Switzerland" direction="W"/>
|
||||||
|
</country>
|
||||||
|
<country name="Singapore">
|
||||||
|
<rank>4</rank>
|
||||||
|
<year>2011</year>
|
||||||
|
<gdppc>59900</gdppc>
|
||||||
|
<neighbor name="Malaysia" direction="N"/>
|
||||||
|
</country>
|
||||||
|
<country name="Panama">
|
||||||
|
<rank>68</rank>
|
||||||
|
<year>2011</year>
|
||||||
|
<gdppc>13600</gdppc>
|
||||||
|
<neighbor name="Costa Rica" direction="W"/>
|
||||||
|
<neighbor name="Colombia" direction="E"/>
|
||||||
|
</country>
|
||||||
|
</data>''')
|
||||||
|
assert traverse_obj(etree, '') == etree, \
|
||||||
|
'empty str key should return the element itself'
|
||||||
|
assert traverse_obj(etree, 'country') == list(etree), \
|
||||||
|
'str key should lead all children with that tag name'
|
||||||
|
assert traverse_obj(etree, ...) == list(etree), \
|
||||||
|
'`...` as key should return all children'
|
||||||
|
assert traverse_obj(etree, lambda _, x: x[0].text == '4') == [etree[1]], \
|
||||||
|
'function as key should get element as value'
|
||||||
|
assert traverse_obj(etree, lambda i, _: i == 1) == [etree[1]], \
|
||||||
|
'function as key should get index as key'
|
||||||
|
assert traverse_obj(etree, 0) == etree[0], \
|
||||||
|
'int key should return the nth child'
|
||||||
|
expected = ['Austria', 'Switzerland', 'Malaysia', 'Costa Rica', 'Colombia']
|
||||||
|
assert traverse_obj(etree, './/neighbor/@name') == expected, \
|
||||||
|
'`@<attribute>` at end of path should give that attribute'
|
||||||
|
assert traverse_obj(etree, '//neighbor/@fail') == [None, None, None, None, None], \
|
||||||
|
'`@<nonexistant>` at end of path should give `None`'
|
||||||
|
assert traverse_obj(etree, ('//neighbor/@', 2)) == {'name': 'Malaysia', 'direction': 'N'}, \
|
||||||
|
'`@` should give the full attribute dict'
|
||||||
|
assert traverse_obj(etree, '//year/text()') == ['2008', '2011', '2011'], \
|
||||||
|
'`text()` at end of path should give the inner text'
|
||||||
|
assert traverse_obj(etree, '//*[@direction]/@direction') == ['E', 'W', 'N', 'W', 'E'], \
|
||||||
|
'full Python xpath features should be supported'
|
||||||
|
assert traverse_obj(etree, (0, '@name')) == 'Liechtenstein', \
|
||||||
|
'special transformations should act on current element'
|
||||||
|
assert traverse_obj(etree, ('country', 0, ..., 'text()', {int_or_none})) == [1, 2008, 141100], \
|
||||||
|
'special transformations should act on current element'
|
||||||
|
|
||||||
|
def test_traversal_unbranching(self):
|
||||||
|
assert traverse_obj(_TEST_DATA, [(100, 1.2), all]) == [100, 1.2], \
|
||||||
|
'`all` should give all results as list'
|
||||||
|
assert traverse_obj(_TEST_DATA, [(100, 1.2), any]) == 100, \
|
||||||
|
'`any` should give the first result'
|
||||||
|
assert traverse_obj(_TEST_DATA, [100, all]) == [100], \
|
||||||
|
'`all` should give list if non branching'
|
||||||
|
assert traverse_obj(_TEST_DATA, [100, any]) == 100, \
|
||||||
|
'`any` should give single item if non branching'
|
||||||
|
assert traverse_obj(_TEST_DATA, [('dict', 'None', 100), all]) == [100], \
|
||||||
|
'`all` should filter `None` and empty dict'
|
||||||
|
assert traverse_obj(_TEST_DATA, [('dict', 'None', 100), any]) == 100, \
|
||||||
|
'`any` should filter `None` and empty dict'
|
||||||
|
assert traverse_obj(_TEST_DATA, [{
|
||||||
|
'all': [('dict', 'None', 100, 1.2), all],
|
||||||
|
'any': [('dict', 'None', 100, 1.2), any],
|
||||||
|
}]) == {'all': [100, 1.2], 'any': 100}, \
|
||||||
|
'`all`/`any` should apply to each dict path separately'
|
||||||
|
assert traverse_obj(_TEST_DATA, [{
|
||||||
|
'all': [('dict', 'None', 100, 1.2), all],
|
||||||
|
'any': [('dict', 'None', 100, 1.2), any],
|
||||||
|
}], get_all=False) == {'all': [100, 1.2], 'any': 100}, \
|
||||||
|
'`all`/`any` should apply to dict regardless of `get_all`'
|
||||||
|
assert traverse_obj(_TEST_DATA, [('dict', 'None', 100, 1.2), all, {float}]) is None, \
|
||||||
|
'`all` should reset branching status'
|
||||||
|
assert traverse_obj(_TEST_DATA, [('dict', 'None', 100, 1.2), any, {float}]) is None, \
|
||||||
|
'`any` should reset branching status'
|
||||||
|
assert traverse_obj(_TEST_DATA, [('dict', 'None', 100, 1.2), all, ..., {float}]) == [1.2], \
|
||||||
|
'`all` should allow further branching'
|
||||||
|
assert traverse_obj(_TEST_DATA, [('dict', 'None', 'urls', 'data'), any, ..., 'index']) == [0, 1], \
|
||||||
|
'`any` should allow further branching'
|
||||||
|
|
||||||
|
def test_traversal_morsel(self):
|
||||||
|
values = {
|
||||||
|
'expires': 'a',
|
||||||
|
'path': 'b',
|
||||||
|
'comment': 'c',
|
||||||
|
'domain': 'd',
|
||||||
|
'max-age': 'e',
|
||||||
|
'secure': 'f',
|
||||||
|
'httponly': 'g',
|
||||||
|
'version': 'h',
|
||||||
|
'samesite': 'i',
|
||||||
|
}
|
||||||
|
morsel = http.cookies.Morsel()
|
||||||
|
morsel.set('item_key', 'item_value', 'coded_value')
|
||||||
|
morsel.update(values)
|
||||||
|
values['key'] = 'item_key'
|
||||||
|
values['value'] = 'item_value'
|
||||||
|
|
||||||
|
for key, value in values.items():
|
||||||
|
assert traverse_obj(morsel, key) == value, \
|
||||||
|
'Morsel should provide access to all values'
|
||||||
|
assert traverse_obj(morsel, ...) == list(values.values()), \
|
||||||
|
'`...` should yield all values'
|
||||||
|
assert traverse_obj(morsel, lambda k, v: True) == list(values.values()), \
|
||||||
|
'function key should yield all values'
|
||||||
|
assert traverse_obj(morsel, [(None,), any]) == morsel, \
|
||||||
|
'Morsel should not be implicitly changed to dict on usage'
|
||||||
|
|
||||||
|
|
||||||
|
class TestDictGet:
|
||||||
|
def test_dict_get(self):
|
||||||
|
FALSE_VALUES = {
|
||||||
|
'none': None,
|
||||||
|
'false': False,
|
||||||
|
'zero': 0,
|
||||||
|
'empty_string': '',
|
||||||
|
'empty_list': [],
|
||||||
|
}
|
||||||
|
d = {**FALSE_VALUES, 'a': 42}
|
||||||
|
assert dict_get(d, 'a') == 42
|
||||||
|
assert dict_get(d, 'b') is None
|
||||||
|
assert dict_get(d, 'b', 42) == 42
|
||||||
|
assert dict_get(d, ('a',)) == 42
|
||||||
|
assert dict_get(d, ('b', 'a')) == 42
|
||||||
|
assert dict_get(d, ('b', 'c', 'a', 'd')) == 42
|
||||||
|
assert dict_get(d, ('b', 'c')) is None
|
||||||
|
assert dict_get(d, ('b', 'c'), 42) == 42
|
||||||
|
for key, false_value in FALSE_VALUES.items():
|
||||||
|
assert dict_get(d, ('b', 'c', key)) is None
|
||||||
|
assert dict_get(d, ('b', 'c', key), skip_false_values=False) == false_value
|
|
@ -2,7 +2,6 @@
|
||||||
|
|
||||||
# Allow direct execution
|
# Allow direct execution
|
||||||
import os
|
import os
|
||||||
import re
|
|
||||||
import sys
|
import sys
|
||||||
import unittest
|
import unittest
|
||||||
import warnings
|
import warnings
|
||||||
|
@ -45,7 +44,6 @@ from yt_dlp.utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
determine_file_encoding,
|
determine_file_encoding,
|
||||||
dfxp2srt,
|
dfxp2srt,
|
||||||
dict_get,
|
|
||||||
encode_base_n,
|
encode_base_n,
|
||||||
encode_compat_str,
|
encode_compat_str,
|
||||||
encodeFilename,
|
encodeFilename,
|
||||||
|
@ -106,13 +104,11 @@ from yt_dlp.utils import (
|
||||||
sanitize_url,
|
sanitize_url,
|
||||||
shell_quote,
|
shell_quote,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
str_or_none,
|
|
||||||
str_to_int,
|
str_to_int,
|
||||||
strip_jsonp,
|
strip_jsonp,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
subtitles_filename,
|
subtitles_filename,
|
||||||
timeconvert,
|
timeconvert,
|
||||||
traverse_obj,
|
|
||||||
try_call,
|
try_call,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
@ -755,28 +751,6 @@ class TestUtil(unittest.TestCase):
|
||||||
self.assertRaises(
|
self.assertRaises(
|
||||||
ValueError, multipart_encode, {b'field': b'value'}, boundary='value')
|
ValueError, multipart_encode, {b'field': b'value'}, boundary='value')
|
||||||
|
|
||||||
def test_dict_get(self):
|
|
||||||
FALSE_VALUES = {
|
|
||||||
'none': None,
|
|
||||||
'false': False,
|
|
||||||
'zero': 0,
|
|
||||||
'empty_string': '',
|
|
||||||
'empty_list': [],
|
|
||||||
}
|
|
||||||
d = FALSE_VALUES.copy()
|
|
||||||
d['a'] = 42
|
|
||||||
self.assertEqual(dict_get(d, 'a'), 42)
|
|
||||||
self.assertEqual(dict_get(d, 'b'), None)
|
|
||||||
self.assertEqual(dict_get(d, 'b', 42), 42)
|
|
||||||
self.assertEqual(dict_get(d, ('a', )), 42)
|
|
||||||
self.assertEqual(dict_get(d, ('b', 'a', )), 42)
|
|
||||||
self.assertEqual(dict_get(d, ('b', 'c', 'a', 'd', )), 42)
|
|
||||||
self.assertEqual(dict_get(d, ('b', 'c', )), None)
|
|
||||||
self.assertEqual(dict_get(d, ('b', 'c', ), 42), 42)
|
|
||||||
for key, false_value in FALSE_VALUES.items():
|
|
||||||
self.assertEqual(dict_get(d, ('b', 'c', key, )), None)
|
|
||||||
self.assertEqual(dict_get(d, ('b', 'c', key, ), skip_false_values=False), false_value)
|
|
||||||
|
|
||||||
def test_merge_dicts(self):
|
def test_merge_dicts(self):
|
||||||
self.assertEqual(merge_dicts({'a': 1}, {'b': 2}), {'a': 1, 'b': 2})
|
self.assertEqual(merge_dicts({'a': 1}, {'b': 2}), {'a': 1, 'b': 2})
|
||||||
self.assertEqual(merge_dicts({'a': 1}, {'a': 2}), {'a': 1})
|
self.assertEqual(merge_dicts({'a': 1}, {'a': 2}), {'a': 1})
|
||||||
|
@ -2039,359 +2013,6 @@ Line 1
|
||||||
warnings.simplefilter('ignore')
|
warnings.simplefilter('ignore')
|
||||||
self.assertEqual(variadic('spam', allowed_types=[dict]), 'spam')
|
self.assertEqual(variadic('spam', allowed_types=[dict]), 'spam')
|
||||||
|
|
||||||
def test_traverse_obj(self):
|
|
||||||
_TEST_DATA = {
|
|
||||||
100: 100,
|
|
||||||
1.2: 1.2,
|
|
||||||
'str': 'str',
|
|
||||||
'None': None,
|
|
||||||
'...': ...,
|
|
||||||
'urls': [
|
|
||||||
{'index': 0, 'url': 'https://www.example.com/0'},
|
|
||||||
{'index': 1, 'url': 'https://www.example.com/1'},
|
|
||||||
],
|
|
||||||
'data': (
|
|
||||||
{'index': 2},
|
|
||||||
{'index': 3},
|
|
||||||
),
|
|
||||||
'dict': {},
|
|
||||||
}
|
|
||||||
|
|
||||||
# Test base functionality
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, ('str',)), 'str',
|
|
||||||
msg='allow tuple path')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, ['str']), 'str',
|
|
||||||
msg='allow list path')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, (value for value in ("str",))), 'str',
|
|
||||||
msg='allow iterable path')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, 'str'), 'str',
|
|
||||||
msg='single items should be treated as a path')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, None), _TEST_DATA)
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, 100), 100)
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, 1.2), 1.2)
|
|
||||||
|
|
||||||
# Test Ellipsis behavior
|
|
||||||
self.assertCountEqual(traverse_obj(_TEST_DATA, ...),
|
|
||||||
(item for item in _TEST_DATA.values() if item not in (None, {})),
|
|
||||||
msg='`...` should give all non discarded values')
|
|
||||||
self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', 0, ...)), _TEST_DATA['urls'][0].values(),
|
|
||||||
msg='`...` selection for dicts should select all values')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, (..., ..., 'url')),
|
|
||||||
['https://www.example.com/0', 'https://www.example.com/1'],
|
|
||||||
msg='nested `...` queries should work')
|
|
||||||
self.assertCountEqual(traverse_obj(_TEST_DATA, (..., ..., 'index')), range(4),
|
|
||||||
msg='`...` query result should be flattened')
|
|
||||||
self.assertEqual(traverse_obj(iter(range(4)), ...), list(range(4)),
|
|
||||||
msg='`...` should accept iterables')
|
|
||||||
|
|
||||||
# Test function as key
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, lambda x, y: x == 'urls' and isinstance(y, list)),
|
|
||||||
[_TEST_DATA['urls']],
|
|
||||||
msg='function as query key should perform a filter based on (key, value)')
|
|
||||||
self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], str)), {'str'},
|
|
||||||
msg='exceptions in the query function should be catched')
|
|
||||||
self.assertEqual(traverse_obj(iter(range(4)), lambda _, x: x % 2 == 0), [0, 2],
|
|
||||||
msg='function key should accept iterables')
|
|
||||||
if __debug__:
|
|
||||||
with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'):
|
|
||||||
traverse_obj(_TEST_DATA, lambda a: ...)
|
|
||||||
with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'):
|
|
||||||
traverse_obj(_TEST_DATA, lambda a, b, c: ...)
|
|
||||||
|
|
||||||
# Test set as key (transformation/type, like `expected_type`)
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, (..., {str.upper}, )), ['STR'],
|
|
||||||
msg='Function in set should be a transformation')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, (..., {str})), ['str'],
|
|
||||||
msg='Type in set should be a type filter')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {dict}), _TEST_DATA,
|
|
||||||
msg='A single set should be wrapped into a path')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, (..., {str.upper})), ['STR'],
|
|
||||||
msg='Transformation function should not raise')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, (..., {str_or_none})),
|
|
||||||
[item for item in map(str_or_none, _TEST_DATA.values()) if item is not None],
|
|
||||||
msg='Function in set should be a transformation')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, ('fail', {lambda _: 'const'})), 'const',
|
|
||||||
msg='Function in set should always be called')
|
|
||||||
if __debug__:
|
|
||||||
with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
|
|
||||||
traverse_obj(_TEST_DATA, set())
|
|
||||||
with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
|
|
||||||
traverse_obj(_TEST_DATA, {str.upper, str})
|
|
||||||
|
|
||||||
# Test `slice` as a key
|
|
||||||
_SLICE_DATA = [0, 1, 2, 3, 4]
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, ('dict', slice(1))), None,
|
|
||||||
msg='slice on a dictionary should not throw')
|
|
||||||
self.assertEqual(traverse_obj(_SLICE_DATA, slice(1)), _SLICE_DATA[:1],
|
|
||||||
msg='slice key should apply slice to sequence')
|
|
||||||
self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 2)), _SLICE_DATA[1:2],
|
|
||||||
msg='slice key should apply slice to sequence')
|
|
||||||
self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 4, 2)), _SLICE_DATA[1:4:2],
|
|
||||||
msg='slice key should apply slice to sequence')
|
|
||||||
|
|
||||||
# Test alternative paths
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'str'), 'str',
|
|
||||||
msg='multiple `paths` should be treated as alternative paths')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, 'str', 100), 'str',
|
|
||||||
msg='alternatives should exit early')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'fail'), None,
|
|
||||||
msg='alternatives should return `default` if exhausted')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, (..., 'fail'), 100), 100,
|
|
||||||
msg='alternatives should track their own branching return')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, ('dict', ...), ('data', ...)), list(_TEST_DATA['data']),
|
|
||||||
msg='alternatives on empty objects should search further')
|
|
||||||
|
|
||||||
# Test branch and path nesting
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', (3, 0), 'url')), ['https://www.example.com/0'],
|
|
||||||
msg='tuple as key should be treated as branches')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', [3, 0], 'url')), ['https://www.example.com/0'],
|
|
||||||
msg='list as key should be treated as branches')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ((1, 'fail'), (0, 'url')))), ['https://www.example.com/0'],
|
|
||||||
msg='double nesting in path should be treated as paths')
|
|
||||||
self.assertEqual(traverse_obj(['0', [1, 2]], [(0, 1), 0]), [1],
|
|
||||||
msg='do not fail early on branching')
|
|
||||||
self.assertCountEqual(traverse_obj(_TEST_DATA, ('urls', ((1, ('fail', 'url')), (0, 'url')))),
|
|
||||||
['https://www.example.com/0', 'https://www.example.com/1'],
|
|
||||||
msg='tripple nesting in path should be treated as branches')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, ('urls', ('fail', (..., 'url')))),
|
|
||||||
['https://www.example.com/0', 'https://www.example.com/1'],
|
|
||||||
msg='ellipsis as branch path start gets flattened')
|
|
||||||
|
|
||||||
# Test dictionary as key
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}), {0: 100, 1: 1.2},
|
|
||||||
msg='dict key should result in a dict with the same keys')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', 0, 'url')}),
|
|
||||||
{0: 'https://www.example.com/0'},
|
|
||||||
msg='dict key should allow paths')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', (3, 0), 'url')}),
|
|
||||||
{0: ['https://www.example.com/0']},
|
|
||||||
msg='tuple in dict path should be treated as branches')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, 'fail'), (0, 'url')))}),
|
|
||||||
{0: ['https://www.example.com/0']},
|
|
||||||
msg='double nesting in dict path should be treated as paths')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('urls', ((1, ('fail', 'url')), (0, 'url')))}),
|
|
||||||
{0: ['https://www.example.com/1', 'https://www.example.com/0']},
|
|
||||||
msg='tripple nesting in dict path should be treated as branches')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}), {},
|
|
||||||
msg='remove `None` values when top level dict key fails')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'fail'}, default=...), {0: ...},
|
|
||||||
msg='use `default` if key fails and `default`')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}), {},
|
|
||||||
msg='remove empty values when dict key')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 'dict'}, default=...), {0: ...},
|
|
||||||
msg='use `default` when dict key and `default`')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}), {},
|
|
||||||
msg='remove empty values when nested dict key fails')
|
|
||||||
self.assertEqual(traverse_obj(None, {0: 'fail'}), {},
|
|
||||||
msg='default to dict if pruned')
|
|
||||||
self.assertEqual(traverse_obj(None, {0: 'fail'}, default=...), {0: ...},
|
|
||||||
msg='default to dict if pruned and default is given')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 'fail'}}, default=...), {0: {0: ...}},
|
|
||||||
msg='use nested `default` when nested dict key fails and `default`')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: ('dict', ...)}), {},
|
|
||||||
msg='remove key if branch in dict key not successful')
|
|
||||||
|
|
||||||
# Testing default parameter behavior
|
|
||||||
_DEFAULT_DATA = {'None': None, 'int': 0, 'list': []}
|
|
||||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail'), None,
|
|
||||||
msg='default value should be `None`')
|
|
||||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', 'fail', default=...), ...,
|
|
||||||
msg='chained fails should result in default')
|
|
||||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', 'int'), 0,
|
|
||||||
msg='should not short cirquit on `None`')
|
|
||||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'fail', default=1), 1,
|
|
||||||
msg='invalid dict key should result in `default`')
|
|
||||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, 'None', default=1), 1,
|
|
||||||
msg='`None` is a deliberate sentinel and should become `default`')
|
|
||||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', 10)), None,
|
|
||||||
msg='`IndexError` should result in `default`')
|
|
||||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, (..., 'fail'), default=1), 1,
|
|
||||||
msg='if branched but not successful return `default` if defined, not `[]`')
|
|
||||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, (..., 'fail'), default=None), None,
|
|
||||||
msg='if branched but not successful return `default` even if `default` is `None`')
|
|
||||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, (..., 'fail')), [],
|
|
||||||
msg='if branched but not successful return `[]`, not `default`')
|
|
||||||
self.assertEqual(traverse_obj(_DEFAULT_DATA, ('list', ...)), [],
|
|
||||||
msg='if branched but object is empty return `[]`, not `default`')
|
|
||||||
self.assertEqual(traverse_obj(None, ...), [],
|
|
||||||
msg='if branched but object is `None` return `[]`, not `default`')
|
|
||||||
self.assertEqual(traverse_obj({0: None}, (0, ...)), [],
|
|
||||||
msg='if branched but state is `None` return `[]`, not `default`')
|
|
||||||
|
|
||||||
branching_paths = [
|
|
||||||
('fail', ...),
|
|
||||||
(..., 'fail'),
|
|
||||||
100 * ('fail',) + (...,),
|
|
||||||
(...,) + 100 * ('fail',),
|
|
||||||
]
|
|
||||||
for branching_path in branching_paths:
|
|
||||||
self.assertEqual(traverse_obj({}, branching_path), [],
|
|
||||||
msg='if branched but state is `None`, return `[]` (not `default`)')
|
|
||||||
self.assertEqual(traverse_obj({}, 'fail', branching_path), [],
|
|
||||||
msg='if branching in last alternative and previous did not match, return `[]` (not `default`)')
|
|
||||||
self.assertEqual(traverse_obj({0: 'x'}, 0, branching_path), 'x',
|
|
||||||
msg='if branching in last alternative and previous did match, return single value')
|
|
||||||
self.assertEqual(traverse_obj({0: 'x'}, branching_path, 0), 'x',
|
|
||||||
msg='if branching in first alternative and non-branching path does match, return single value')
|
|
||||||
self.assertEqual(traverse_obj({}, branching_path, 'fail'), None,
|
|
||||||
msg='if branching in first alternative and non-branching path does not match, return `default`')
|
|
||||||
|
|
||||||
# Testing expected_type behavior
|
|
||||||
_EXPECTED_TYPE_DATA = {'str': 'str', 'int': 0}
|
|
||||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=str),
|
|
||||||
'str', msg='accept matching `expected_type` type')
|
|
||||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int),
|
|
||||||
None, msg='reject non matching `expected_type` type')
|
|
||||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: str(x)),
|
|
||||||
'0', msg='transform type using type function')
|
|
||||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=lambda _: 1 / 0),
|
|
||||||
None, msg='wrap expected_type fuction in try_call')
|
|
||||||
self.assertEqual(traverse_obj(_EXPECTED_TYPE_DATA, ..., expected_type=str),
|
|
||||||
['str'], msg='eliminate items that expected_type fails on')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}, expected_type=int),
|
|
||||||
{0: 100}, msg='type as expected_type should filter dict values')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: 100, 1: 1.2, 2: 'None'}, expected_type=str_or_none),
|
|
||||||
{0: '100', 1: '1.2'}, msg='function as expected_type should transform dict values')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, ({0: 1.2}, 0, {int_or_none}), expected_type=int),
|
|
||||||
1, msg='expected_type should not filter non final dict values')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, {0: {0: 100, 1: 'str'}}, expected_type=int),
|
|
||||||
{0: {0: 100}}, msg='expected_type should transform deep dict values')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, [({0: '...'}, {0: '...'})], expected_type=type(...)),
|
|
||||||
[{0: ...}, {0: ...}], msg='expected_type should transform branched dict values')
|
|
||||||
self.assertEqual(traverse_obj({1: {3: 4}}, [(1, 2), 3], expected_type=int),
|
|
||||||
[4], msg='expected_type regression for type matching in tuple branching')
|
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, ['data', ...], expected_type=int),
|
|
||||||
[], msg='expected_type regression for type matching in dict result')
|
|
||||||
|
|
||||||
# Test get_all behavior
|
|
||||||
_GET_ALL_DATA = {'key': [0, 1, 2]}
|
|
||||||
self.assertEqual(traverse_obj(_GET_ALL_DATA, ('key', ...), get_all=False), 0,
|
|
||||||
msg='if not `get_all`, return only first matching value')
|
|
||||||
self.assertEqual(traverse_obj(_GET_ALL_DATA, ..., get_all=False), [0, 1, 2],
|
|
||||||
msg='do not overflatten if not `get_all`')
|
|
||||||
|
|
||||||
# Test casesense behavior
|
|
||||||
_CASESENSE_DATA = {
|
|
||||||
'KeY': 'value0',
|
|
||||||
0: {
|
|
||||||
'KeY': 'value1',
|
|
||||||
0: {'KeY': 'value2'},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
self.assertEqual(traverse_obj(_CASESENSE_DATA, 'key'), None,
|
|
||||||
msg='dict keys should be case sensitive unless `casesense`')
|
|
||||||
self.assertEqual(traverse_obj(_CASESENSE_DATA, 'keY',
|
|
||||||
casesense=False), 'value0',
|
|
||||||
msg='allow non matching key case if `casesense`')
|
|
||||||
self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ('keY',)),
|
|
||||||
casesense=False), ['value1'],
|
|
||||||
msg='allow non matching key case in branch if `casesense`')
|
|
||||||
self.assertEqual(traverse_obj(_CASESENSE_DATA, (0, ((0, 'keY'),)),
|
|
||||||
casesense=False), ['value2'],
|
|
||||||
msg='allow non matching key case in branch path if `casesense`')
|
|
||||||
|
|
||||||
# Test traverse_string behavior
|
|
||||||
_TRAVERSE_STRING_DATA = {'str': 'str', 1.2: 1.2}
|
|
||||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0)), None,
|
|
||||||
msg='do not traverse into string if not `traverse_string`')
|
|
||||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0),
|
|
||||||
traverse_string=True), 's',
|
|
||||||
msg='traverse into string if `traverse_string`')
|
|
||||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, (1.2, 1),
|
|
||||||
traverse_string=True), '.',
|
|
||||||
msg='traverse into converted data if `traverse_string`')
|
|
||||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', ...),
|
|
||||||
traverse_string=True), 'str',
|
|
||||||
msg='`...` should result in string (same value) if `traverse_string`')
|
|
||||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', slice(0, None, 2)),
|
|
||||||
traverse_string=True), 'sr',
|
|
||||||
msg='`slice` should result in string if `traverse_string`')
|
|
||||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda i, v: i or v == "s"),
|
|
||||||
traverse_string=True), 'str',
|
|
||||||
msg='function should result in string if `traverse_string`')
|
|
||||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)),
|
|
||||||
traverse_string=True), ['s', 'r'],
|
|
||||||
msg='branching should result in list if `traverse_string`')
|
|
||||||
self.assertEqual(traverse_obj({}, (0, ...), traverse_string=True), [],
|
|
||||||
msg='branching should result in list if `traverse_string`')
|
|
||||||
self.assertEqual(traverse_obj({}, (0, lambda x, y: True), traverse_string=True), [],
|
|
||||||
msg='branching should result in list if `traverse_string`')
|
|
||||||
self.assertEqual(traverse_obj({}, (0, slice(1)), traverse_string=True), [],
|
|
||||||
msg='branching should result in list if `traverse_string`')
|
|
||||||
|
|
||||||
# Test re.Match as input obj
|
|
||||||
mobj = re.fullmatch(r'0(12)(?P<group>3)(4)?', '0123')
|
|
||||||
self.assertEqual(traverse_obj(mobj, ...), [x for x in mobj.groups() if x is not None],
|
|
||||||
msg='`...` on a `re.Match` should give its `groups()`')
|
|
||||||
self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 2)), ['0123', '3'],
|
|
||||||
msg='function on a `re.Match` should give groupno, value starting at 0')
|
|
||||||
self.assertEqual(traverse_obj(mobj, 'group'), '3',
|
|
||||||
msg='str key on a `re.Match` should give group with that name')
|
|
||||||
self.assertEqual(traverse_obj(mobj, 2), '3',
|
|
||||||
msg='int key on a `re.Match` should give group with that name')
|
|
||||||
self.assertEqual(traverse_obj(mobj, 'gRoUp', casesense=False), '3',
|
|
||||||
msg='str key on a `re.Match` should respect casesense')
|
|
||||||
self.assertEqual(traverse_obj(mobj, 'fail'), None,
|
|
||||||
msg='failing str key on a `re.Match` should return `default`')
|
|
||||||
self.assertEqual(traverse_obj(mobj, 'gRoUpS', casesense=False), None,
|
|
||||||
msg='failing str key on a `re.Match` should return `default`')
|
|
||||||
self.assertEqual(traverse_obj(mobj, 8), None,
|
|
||||||
msg='failing int key on a `re.Match` should return `default`')
|
|
||||||
self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 'group')), ['0123', '3'],
|
|
||||||
msg='function on a `re.Match` should give group name as well')
|
|
||||||
|
|
||||||
# Test xml.etree.ElementTree.Element as input obj
|
|
||||||
etree = xml.etree.ElementTree.fromstring('''<?xml version="1.0"?>
|
|
||||||
<data>
|
|
||||||
<country name="Liechtenstein">
|
|
||||||
<rank>1</rank>
|
|
||||||
<year>2008</year>
|
|
||||||
<gdppc>141100</gdppc>
|
|
||||||
<neighbor name="Austria" direction="E"/>
|
|
||||||
<neighbor name="Switzerland" direction="W"/>
|
|
||||||
</country>
|
|
||||||
<country name="Singapore">
|
|
||||||
<rank>4</rank>
|
|
||||||
<year>2011</year>
|
|
||||||
<gdppc>59900</gdppc>
|
|
||||||
<neighbor name="Malaysia" direction="N"/>
|
|
||||||
</country>
|
|
||||||
<country name="Panama">
|
|
||||||
<rank>68</rank>
|
|
||||||
<year>2011</year>
|
|
||||||
<gdppc>13600</gdppc>
|
|
||||||
<neighbor name="Costa Rica" direction="W"/>
|
|
||||||
<neighbor name="Colombia" direction="E"/>
|
|
||||||
</country>
|
|
||||||
</data>''')
|
|
||||||
self.assertEqual(traverse_obj(etree, ''), etree,
|
|
||||||
msg='empty str key should return the element itself')
|
|
||||||
self.assertEqual(traverse_obj(etree, 'country'), list(etree),
|
|
||||||
msg='str key should lead all children with that tag name')
|
|
||||||
self.assertEqual(traverse_obj(etree, ...), list(etree),
|
|
||||||
msg='`...` as key should return all children')
|
|
||||||
self.assertEqual(traverse_obj(etree, lambda _, x: x[0].text == '4'), [etree[1]],
|
|
||||||
msg='function as key should get element as value')
|
|
||||||
self.assertEqual(traverse_obj(etree, lambda i, _: i == 1), [etree[1]],
|
|
||||||
msg='function as key should get index as key')
|
|
||||||
self.assertEqual(traverse_obj(etree, 0), etree[0],
|
|
||||||
msg='int key should return the nth child')
|
|
||||||
self.assertEqual(traverse_obj(etree, './/neighbor/@name'),
|
|
||||||
['Austria', 'Switzerland', 'Malaysia', 'Costa Rica', 'Colombia'],
|
|
||||||
msg='`@<attribute>` at end of path should give that attribute')
|
|
||||||
self.assertEqual(traverse_obj(etree, '//neighbor/@fail'), [None, None, None, None, None],
|
|
||||||
msg='`@<nonexistant>` at end of path should give `None`')
|
|
||||||
self.assertEqual(traverse_obj(etree, ('//neighbor/@', 2)), {'name': 'Malaysia', 'direction': 'N'},
|
|
||||||
msg='`@` should give the full attribute dict')
|
|
||||||
self.assertEqual(traverse_obj(etree, '//year/text()'), ['2008', '2011', '2011'],
|
|
||||||
msg='`text()` at end of path should give the inner text')
|
|
||||||
self.assertEqual(traverse_obj(etree, '//*[@direction]/@direction'), ['E', 'W', 'N', 'W', 'E'],
|
|
||||||
msg='full Python xpath features should be supported')
|
|
||||||
self.assertEqual(traverse_obj(etree, (0, '@name')), 'Liechtenstein',
|
|
||||||
msg='special transformations should act on current element')
|
|
||||||
self.assertEqual(traverse_obj(etree, ('country', 0, ..., 'text()', {int_or_none})), [1, 2008, 141100],
|
|
||||||
msg='special transformations should act on current element')
|
|
||||||
|
|
||||||
def test_http_header_dict(self):
|
def test_http_header_dict(self):
|
||||||
headers = HTTPHeaderDict()
|
headers = HTTPHeaderDict()
|
||||||
headers['ytdl-test'] = b'0'
|
headers['ytdl-test'] = b'0'
|
||||||
|
@ -2438,7 +2059,22 @@ Line 1
|
||||||
assert extract_basic_auth('http://user:pass@foo.bar') == ('http://foo.bar', 'Basic dXNlcjpwYXNz')
|
assert extract_basic_auth('http://user:pass@foo.bar') == ('http://foo.bar', 'Basic dXNlcjpwYXNz')
|
||||||
|
|
||||||
@unittest.skipUnless(compat_os_name == 'nt', 'Only relevant on Windows')
|
@unittest.skipUnless(compat_os_name == 'nt', 'Only relevant on Windows')
|
||||||
def test_Popen_windows_escaping(self):
|
def test_windows_escaping(self):
|
||||||
|
tests = [
|
||||||
|
'test"&',
|
||||||
|
'%CMDCMDLINE:~-1%&',
|
||||||
|
'a\nb',
|
||||||
|
'"',
|
||||||
|
'\\',
|
||||||
|
'!',
|
||||||
|
'^!',
|
||||||
|
'a \\ b',
|
||||||
|
'a \\" b',
|
||||||
|
'a \\ b\\',
|
||||||
|
# We replace \r with \n
|
||||||
|
('a\r\ra', 'a\n\na'),
|
||||||
|
]
|
||||||
|
|
||||||
def run_shell(args):
|
def run_shell(args):
|
||||||
stdout, stderr, error = Popen.run(
|
stdout, stderr, error = Popen.run(
|
||||||
args, text=True, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
args, text=True, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
|
@ -2446,11 +2082,15 @@ Line 1
|
||||||
assert not error
|
assert not error
|
||||||
return stdout
|
return stdout
|
||||||
|
|
||||||
# Test escaping
|
for argument in tests:
|
||||||
assert run_shell(['echo', 'test"&']) == '"test""&"\n'
|
if isinstance(argument, str):
|
||||||
# Test if delayed expansion is disabled
|
expected = argument
|
||||||
assert run_shell(['echo', '^!']) == '"^!"\n'
|
else:
|
||||||
assert run_shell('echo "^!"') == '"^!"\n'
|
argument, expected = argument
|
||||||
|
|
||||||
|
args = [sys.executable, '-c', 'import sys; print(end=sys.argv[1])', argument, 'end']
|
||||||
|
assert run_shell(args) == expected
|
||||||
|
assert run_shell(shell_quote(args, shell=True)) == expected
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -3,10 +3,12 @@
|
||||||
# Allow direct execution
|
# Allow direct execution
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from test.helper import verify_address_availability
|
from test.helper import verify_address_availability
|
||||||
|
from yt_dlp.networking.common import Features, DEFAULT_TIMEOUT
|
||||||
|
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
@ -18,7 +20,7 @@ import random
|
||||||
import ssl
|
import ssl
|
||||||
import threading
|
import threading
|
||||||
|
|
||||||
from yt_dlp import socks
|
from yt_dlp import socks, traverse_obj
|
||||||
from yt_dlp.cookies import YoutubeDLCookieJar
|
from yt_dlp.cookies import YoutubeDLCookieJar
|
||||||
from yt_dlp.dependencies import websockets
|
from yt_dlp.dependencies import websockets
|
||||||
from yt_dlp.networking import Request
|
from yt_dlp.networking import Request
|
||||||
|
@ -114,6 +116,7 @@ def ws_validate_and_send(rh, req):
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(not websockets, reason='websockets must be installed to test websocket request handlers')
|
@pytest.mark.skipif(not websockets, reason='websockets must be installed to test websocket request handlers')
|
||||||
|
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
||||||
class TestWebsSocketRequestHandlerConformance:
|
class TestWebsSocketRequestHandlerConformance:
|
||||||
@classmethod
|
@classmethod
|
||||||
def setup_class(cls):
|
def setup_class(cls):
|
||||||
|
@ -129,7 +132,6 @@ class TestWebsSocketRequestHandlerConformance:
|
||||||
cls.mtls_wss_thread, cls.mtls_wss_port = create_mtls_wss_websocket_server()
|
cls.mtls_wss_thread, cls.mtls_wss_port = create_mtls_wss_websocket_server()
|
||||||
cls.mtls_wss_base_url = f'wss://127.0.0.1:{cls.mtls_wss_port}'
|
cls.mtls_wss_base_url = f'wss://127.0.0.1:{cls.mtls_wss_port}'
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
|
||||||
def test_basic_websockets(self, handler):
|
def test_basic_websockets(self, handler):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
ws = ws_validate_and_send(rh, Request(self.ws_base_url))
|
ws = ws_validate_and_send(rh, Request(self.ws_base_url))
|
||||||
|
@ -141,7 +143,6 @@ class TestWebsSocketRequestHandlerConformance:
|
||||||
|
|
||||||
# https://www.rfc-editor.org/rfc/rfc6455.html#section-5.6
|
# https://www.rfc-editor.org/rfc/rfc6455.html#section-5.6
|
||||||
@pytest.mark.parametrize('msg,opcode', [('str', 1), (b'bytes', 2)])
|
@pytest.mark.parametrize('msg,opcode', [('str', 1), (b'bytes', 2)])
|
||||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
|
||||||
def test_send_types(self, handler, msg, opcode):
|
def test_send_types(self, handler, msg, opcode):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
ws = ws_validate_and_send(rh, Request(self.ws_base_url))
|
ws = ws_validate_and_send(rh, Request(self.ws_base_url))
|
||||||
|
@ -149,7 +150,6 @@ class TestWebsSocketRequestHandlerConformance:
|
||||||
assert int(ws.recv()) == opcode
|
assert int(ws.recv()) == opcode
|
||||||
ws.close()
|
ws.close()
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
|
||||||
def test_verify_cert(self, handler):
|
def test_verify_cert(self, handler):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
with pytest.raises(CertificateVerifyError):
|
with pytest.raises(CertificateVerifyError):
|
||||||
|
@ -160,14 +160,12 @@ class TestWebsSocketRequestHandlerConformance:
|
||||||
assert ws.status == 101
|
assert ws.status == 101
|
||||||
ws.close()
|
ws.close()
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
|
||||||
def test_ssl_error(self, handler):
|
def test_ssl_error(self, handler):
|
||||||
with handler(verify=False) as rh:
|
with handler(verify=False) as rh:
|
||||||
with pytest.raises(SSLError, match=r'ssl(?:v3|/tls) alert handshake failure') as exc_info:
|
with pytest.raises(SSLError, match=r'ssl(?:v3|/tls) alert handshake failure') as exc_info:
|
||||||
ws_validate_and_send(rh, Request(self.bad_wss_host))
|
ws_validate_and_send(rh, Request(self.bad_wss_host))
|
||||||
assert not issubclass(exc_info.type, CertificateVerifyError)
|
assert not issubclass(exc_info.type, CertificateVerifyError)
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
|
||||||
@pytest.mark.parametrize('path,expected', [
|
@pytest.mark.parametrize('path,expected', [
|
||||||
# Unicode characters should be encoded with uppercase percent-encoding
|
# Unicode characters should be encoded with uppercase percent-encoding
|
||||||
('/中文', '/%E4%B8%AD%E6%96%87'),
|
('/中文', '/%E4%B8%AD%E6%96%87'),
|
||||||
|
@ -182,7 +180,6 @@ class TestWebsSocketRequestHandlerConformance:
|
||||||
assert ws.status == 101
|
assert ws.status == 101
|
||||||
ws.close()
|
ws.close()
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
|
||||||
def test_remove_dot_segments(self, handler):
|
def test_remove_dot_segments(self, handler):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
# This isn't a comprehensive test,
|
# This isn't a comprehensive test,
|
||||||
|
@ -195,7 +192,6 @@ class TestWebsSocketRequestHandlerConformance:
|
||||||
|
|
||||||
# We are restricted to known HTTP status codes in http.HTTPStatus
|
# We are restricted to known HTTP status codes in http.HTTPStatus
|
||||||
# Redirects are not supported for websockets
|
# Redirects are not supported for websockets
|
||||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
|
||||||
@pytest.mark.parametrize('status', (200, 204, 301, 302, 303, 400, 500, 511))
|
@pytest.mark.parametrize('status', (200, 204, 301, 302, 303, 400, 500, 511))
|
||||||
def test_raise_http_error(self, handler, status):
|
def test_raise_http_error(self, handler, status):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
|
@ -203,17 +199,30 @@ class TestWebsSocketRequestHandlerConformance:
|
||||||
ws_validate_and_send(rh, Request(f'{self.ws_base_url}/gen_{status}'))
|
ws_validate_and_send(rh, Request(f'{self.ws_base_url}/gen_{status}'))
|
||||||
assert exc_info.value.status == status
|
assert exc_info.value.status == status
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
|
||||||
@pytest.mark.parametrize('params,extensions', [
|
@pytest.mark.parametrize('params,extensions', [
|
||||||
({'timeout': sys.float_info.min}, {}),
|
({'timeout': sys.float_info.min}, {}),
|
||||||
({}, {'timeout': sys.float_info.min}),
|
({}, {'timeout': sys.float_info.min}),
|
||||||
])
|
])
|
||||||
def test_timeout(self, handler, params, extensions):
|
def test_read_timeout(self, handler, params, extensions):
|
||||||
with handler(**params) as rh:
|
with handler(**params) as rh:
|
||||||
with pytest.raises(TransportError):
|
with pytest.raises(TransportError):
|
||||||
ws_validate_and_send(rh, Request(self.ws_base_url, extensions=extensions))
|
ws_validate_and_send(rh, Request(self.ws_base_url, extensions=extensions))
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
def test_connect_timeout(self, handler):
|
||||||
|
# nothing should be listening on this port
|
||||||
|
connect_timeout_url = 'ws://10.255.255.255'
|
||||||
|
with handler(timeout=0.01) as rh, pytest.raises(TransportError):
|
||||||
|
now = time.time()
|
||||||
|
ws_validate_and_send(rh, Request(connect_timeout_url))
|
||||||
|
assert time.time() - now < DEFAULT_TIMEOUT
|
||||||
|
|
||||||
|
# Per request timeout, should override handler timeout
|
||||||
|
request = Request(connect_timeout_url, extensions={'timeout': 0.01})
|
||||||
|
with handler() as rh, pytest.raises(TransportError):
|
||||||
|
now = time.time()
|
||||||
|
ws_validate_and_send(rh, request)
|
||||||
|
assert time.time() - now < DEFAULT_TIMEOUT
|
||||||
|
|
||||||
def test_cookies(self, handler):
|
def test_cookies(self, handler):
|
||||||
cookiejar = YoutubeDLCookieJar()
|
cookiejar = YoutubeDLCookieJar()
|
||||||
cookiejar.set_cookie(http.cookiejar.Cookie(
|
cookiejar.set_cookie(http.cookiejar.Cookie(
|
||||||
|
@ -239,7 +248,6 @@ class TestWebsSocketRequestHandlerConformance:
|
||||||
assert json.loads(ws.recv())['cookie'] == 'test=ytdlp'
|
assert json.loads(ws.recv())['cookie'] == 'test=ytdlp'
|
||||||
ws.close()
|
ws.close()
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
|
||||||
def test_source_address(self, handler):
|
def test_source_address(self, handler):
|
||||||
source_address = f'127.0.0.{random.randint(5, 255)}'
|
source_address = f'127.0.0.{random.randint(5, 255)}'
|
||||||
verify_address_availability(source_address)
|
verify_address_availability(source_address)
|
||||||
|
@ -249,7 +257,6 @@ class TestWebsSocketRequestHandlerConformance:
|
||||||
assert source_address == ws.recv()
|
assert source_address == ws.recv()
|
||||||
ws.close()
|
ws.close()
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
|
||||||
def test_response_url(self, handler):
|
def test_response_url(self, handler):
|
||||||
with handler() as rh:
|
with handler() as rh:
|
||||||
url = f'{self.ws_base_url}/something'
|
url = f'{self.ws_base_url}/something'
|
||||||
|
@ -257,7 +264,6 @@ class TestWebsSocketRequestHandlerConformance:
|
||||||
assert ws.url == url
|
assert ws.url == url
|
||||||
ws.close()
|
ws.close()
|
||||||
|
|
||||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
|
||||||
def test_request_headers(self, handler):
|
def test_request_headers(self, handler):
|
||||||
with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh:
|
with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh:
|
||||||
# Global Headers
|
# Global Headers
|
||||||
|
@ -293,7 +299,6 @@ class TestWebsSocketRequestHandlerConformance:
|
||||||
'client_certificate_password': 'foobar',
|
'client_certificate_password': 'foobar',
|
||||||
}
|
}
|
||||||
))
|
))
|
||||||
@pytest.mark.parametrize('handler', ['Websockets'], indirect=True)
|
|
||||||
def test_mtls(self, handler, client_cert):
|
def test_mtls(self, handler, client_cert):
|
||||||
with handler(
|
with handler(
|
||||||
# Disable client-side validation of unacceptable self-signed testcert.pem
|
# Disable client-side validation of unacceptable self-signed testcert.pem
|
||||||
|
@ -303,6 +308,44 @@ class TestWebsSocketRequestHandlerConformance:
|
||||||
) as rh:
|
) as rh:
|
||||||
ws_validate_and_send(rh, Request(self.mtls_wss_base_url)).close()
|
ws_validate_and_send(rh, Request(self.mtls_wss_base_url)).close()
|
||||||
|
|
||||||
|
def test_request_disable_proxy(self, handler):
|
||||||
|
for proxy_proto in handler._SUPPORTED_PROXY_SCHEMES or ['ws']:
|
||||||
|
# Given handler is configured with a proxy
|
||||||
|
with handler(proxies={'ws': f'{proxy_proto}://10.255.255.255'}, timeout=5) as rh:
|
||||||
|
# When a proxy is explicitly set to None for the request
|
||||||
|
ws = ws_validate_and_send(rh, Request(self.ws_base_url, proxies={'http': None}))
|
||||||
|
# Then no proxy should be used
|
||||||
|
assert ws.status == 101
|
||||||
|
ws.close()
|
||||||
|
|
||||||
|
@pytest.mark.skip_handlers_if(
|
||||||
|
lambda _, handler: Features.NO_PROXY not in handler._SUPPORTED_FEATURES, 'handler does not support NO_PROXY')
|
||||||
|
def test_noproxy(self, handler):
|
||||||
|
for proxy_proto in handler._SUPPORTED_PROXY_SCHEMES or ['ws']:
|
||||||
|
# Given the handler is configured with a proxy
|
||||||
|
with handler(proxies={'ws': f'{proxy_proto}://10.255.255.255'}, timeout=5) as rh:
|
||||||
|
for no_proxy in (f'127.0.0.1:{self.ws_port}', '127.0.0.1', 'localhost'):
|
||||||
|
# When request no proxy includes the request url host
|
||||||
|
ws = ws_validate_and_send(rh, Request(self.ws_base_url, proxies={'no': no_proxy}))
|
||||||
|
# Then the proxy should not be used
|
||||||
|
assert ws.status == 101
|
||||||
|
ws.close()
|
||||||
|
|
||||||
|
@pytest.mark.skip_handlers_if(
|
||||||
|
lambda _, handler: Features.ALL_PROXY not in handler._SUPPORTED_FEATURES, 'handler does not support ALL_PROXY')
|
||||||
|
def test_allproxy(self, handler):
|
||||||
|
supported_proto = traverse_obj(handler._SUPPORTED_PROXY_SCHEMES, 0, default='ws')
|
||||||
|
# This is a bit of a hacky test, but it should be enough to check whether the handler is using the proxy.
|
||||||
|
# 0.1s might not be enough of a timeout if proxy is not used in all cases, but should still get failures.
|
||||||
|
with handler(proxies={'all': f'{supported_proto}://10.255.255.255'}, timeout=0.1) as rh:
|
||||||
|
with pytest.raises(TransportError):
|
||||||
|
ws_validate_and_send(rh, Request(self.ws_base_url)).close()
|
||||||
|
|
||||||
|
with handler(timeout=0.1) as rh:
|
||||||
|
with pytest.raises(TransportError):
|
||||||
|
ws_validate_and_send(
|
||||||
|
rh, Request(self.ws_base_url, proxies={'all': f'{supported_proto}://10.255.255.255'})).close()
|
||||||
|
|
||||||
|
|
||||||
def create_fake_ws_connection(raised):
|
def create_fake_ws_connection(raised):
|
||||||
import websockets.sync.client
|
import websockets.sync.client
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import collections
|
import collections
|
||||||
import contextlib
|
import contextlib
|
||||||
import copy
|
import copy
|
||||||
import datetime
|
import datetime as dt
|
||||||
import errno
|
import errno
|
||||||
import fileinput
|
import fileinput
|
||||||
import http.cookiejar
|
import http.cookiejar
|
||||||
|
@ -25,7 +25,7 @@ import unicodedata
|
||||||
|
|
||||||
from .cache import Cache
|
from .cache import Cache
|
||||||
from .compat import functools, urllib # isort: split
|
from .compat import functools, urllib # isort: split
|
||||||
from .compat import compat_os_name, compat_shlex_quote, urllib_req_to_req
|
from .compat import compat_os_name, urllib_req_to_req
|
||||||
from .cookies import LenientSimpleCookie, load_cookies
|
from .cookies import LenientSimpleCookie, load_cookies
|
||||||
from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
|
from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
|
||||||
from .downloader.rtmp import rtmpdump_version
|
from .downloader.rtmp import rtmpdump_version
|
||||||
|
@ -102,7 +102,6 @@ from .utils import (
|
||||||
UserNotLive,
|
UserNotLive,
|
||||||
YoutubeDLError,
|
YoutubeDLError,
|
||||||
age_restricted,
|
age_restricted,
|
||||||
args_to_str,
|
|
||||||
bug_reports_message,
|
bug_reports_message,
|
||||||
date_from_str,
|
date_from_str,
|
||||||
deprecation_warning,
|
deprecation_warning,
|
||||||
|
@ -141,11 +140,13 @@ from .utils import (
|
||||||
sanitize_filename,
|
sanitize_filename,
|
||||||
sanitize_path,
|
sanitize_path,
|
||||||
sanitize_url,
|
sanitize_url,
|
||||||
|
shell_quote,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
strftime_or_none,
|
strftime_or_none,
|
||||||
subtitles_filename,
|
subtitles_filename,
|
||||||
supports_terminal_sequences,
|
supports_terminal_sequences,
|
||||||
system_identifier,
|
system_identifier,
|
||||||
|
filesize_from_tbr,
|
||||||
timetuple_from_msec,
|
timetuple_from_msec,
|
||||||
to_high_limit_path,
|
to_high_limit_path,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
|
@ -480,7 +481,7 @@ class YoutubeDL:
|
||||||
nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
|
nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
|
||||||
max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
|
max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
|
||||||
continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
|
continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
|
||||||
external_downloader_args, concurrent_fragment_downloads.
|
external_downloader_args, concurrent_fragment_downloads, progress_delta.
|
||||||
|
|
||||||
The following options are used by the post processors:
|
The following options are used by the post processors:
|
||||||
ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
|
ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
|
||||||
|
@ -822,7 +823,7 @@ class YoutubeDL:
|
||||||
self.report_warning(
|
self.report_warning(
|
||||||
'Long argument string detected. '
|
'Long argument string detected. '
|
||||||
'Use -- to separate parameters and URLs, like this:\n%s' %
|
'Use -- to separate parameters and URLs, like this:\n%s' %
|
||||||
args_to_str(correct_argv))
|
shell_quote(correct_argv))
|
||||||
|
|
||||||
def add_info_extractor(self, ie):
|
def add_info_extractor(self, ie):
|
||||||
"""Add an InfoExtractor object to the end of the list."""
|
"""Add an InfoExtractor object to the end of the list."""
|
||||||
|
@ -1354,7 +1355,7 @@ class YoutubeDL:
|
||||||
value, fmt = escapeHTML(str(value)), str_fmt
|
value, fmt = escapeHTML(str(value)), str_fmt
|
||||||
elif fmt[-1] == 'q': # quoted
|
elif fmt[-1] == 'q': # quoted
|
||||||
value = map(str, variadic(value) if '#' in flags else [value])
|
value = map(str, variadic(value) if '#' in flags else [value])
|
||||||
value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
|
value, fmt = shell_quote(value, shell=True), str_fmt
|
||||||
elif fmt[-1] == 'B': # bytes
|
elif fmt[-1] == 'B': # bytes
|
||||||
value = f'%{str_fmt}'.encode() % str(value).encode()
|
value = f'%{str_fmt}'.encode() % str(value).encode()
|
||||||
value, fmt = value.decode('utf-8', 'ignore'), 's'
|
value, fmt = value.decode('utf-8', 'ignore'), 's'
|
||||||
|
@ -2135,6 +2136,11 @@ class YoutubeDL:
|
||||||
|
|
||||||
def _check_formats(self, formats):
|
def _check_formats(self, formats):
|
||||||
for f in formats:
|
for f in formats:
|
||||||
|
working = f.get('__working')
|
||||||
|
if working is not None:
|
||||||
|
if working:
|
||||||
|
yield f
|
||||||
|
continue
|
||||||
self.to_screen('[info] Testing format %s' % f['format_id'])
|
self.to_screen('[info] Testing format %s' % f['format_id'])
|
||||||
path = self.get_output_path('temp')
|
path = self.get_output_path('temp')
|
||||||
if not self._ensure_dir_exists(f'{path}/'):
|
if not self._ensure_dir_exists(f'{path}/'):
|
||||||
|
@ -2151,33 +2157,44 @@ class YoutubeDL:
|
||||||
os.remove(temp_file.name)
|
os.remove(temp_file.name)
|
||||||
except OSError:
|
except OSError:
|
||||||
self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
|
self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
|
||||||
|
f['__working'] = success
|
||||||
if success:
|
if success:
|
||||||
yield f
|
yield f
|
||||||
else:
|
else:
|
||||||
self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
|
self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
|
||||||
|
|
||||||
|
def _select_formats(self, formats, selector):
|
||||||
|
return list(selector({
|
||||||
|
'formats': formats,
|
||||||
|
'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
|
||||||
|
'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats) # No formats with video
|
||||||
|
or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio
|
||||||
|
}))
|
||||||
|
|
||||||
def _default_format_spec(self, info_dict, download=True):
|
def _default_format_spec(self, info_dict, download=True):
|
||||||
|
download = download and not self.params.get('simulate')
|
||||||
|
prefer_best = download and (
|
||||||
|
self.params['outtmpl']['default'] == '-'
|
||||||
|
or info_dict.get('is_live') and not self.params.get('live_from_start'))
|
||||||
|
|
||||||
def can_merge():
|
def can_merge():
|
||||||
merger = FFmpegMergerPP(self)
|
merger = FFmpegMergerPP(self)
|
||||||
return merger.available and merger.can_merge()
|
return merger.available and merger.can_merge()
|
||||||
|
|
||||||
prefer_best = (
|
if not prefer_best and download and not can_merge():
|
||||||
not self.params.get('simulate')
|
prefer_best = True
|
||||||
and download
|
formats = self._get_formats(info_dict)
|
||||||
and (
|
evaluate_formats = lambda spec: self._select_formats(formats, self.build_format_selector(spec))
|
||||||
not can_merge()
|
if evaluate_formats('b/bv+ba') != evaluate_formats('bv*+ba/b'):
|
||||||
or info_dict.get('is_live') and not self.params.get('live_from_start')
|
self.report_warning('ffmpeg not found. The downloaded format may not be the best available. '
|
||||||
or self.params['outtmpl']['default'] == '-'))
|
'Installing ffmpeg is strongly recommended: https://github.com/yt-dlp/yt-dlp#dependencies')
|
||||||
compat = (
|
|
||||||
prefer_best
|
compat = (self.params.get('allow_multiple_audio_streams')
|
||||||
or self.params.get('allow_multiple_audio_streams', False)
|
|
||||||
or 'format-spec' in self.params['compat_opts'])
|
or 'format-spec' in self.params['compat_opts'])
|
||||||
|
|
||||||
return (
|
return ('best/bestvideo+bestaudio' if prefer_best
|
||||||
'best/bestvideo+bestaudio' if prefer_best
|
else 'bestvideo+bestaudio/best' if compat
|
||||||
else 'bestvideo*+bestaudio/best' if not compat
|
else 'bestvideo*+bestaudio/best')
|
||||||
else 'bestvideo+bestaudio/best')
|
|
||||||
|
|
||||||
def build_format_selector(self, format_spec):
|
def build_format_selector(self, format_spec):
|
||||||
def syntax_error(note, start):
|
def syntax_error(note, start):
|
||||||
|
@ -2628,7 +2645,7 @@ class YoutubeDL:
|
||||||
# Working around out-of-range timestamp values (e.g. negative ones on Windows,
|
# Working around out-of-range timestamp values (e.g. negative ones on Windows,
|
||||||
# see http://bugs.python.org/issue1646728)
|
# see http://bugs.python.org/issue1646728)
|
||||||
with contextlib.suppress(ValueError, OverflowError, OSError):
|
with contextlib.suppress(ValueError, OverflowError, OSError):
|
||||||
upload_date = datetime.datetime.fromtimestamp(info_dict[ts_key], datetime.timezone.utc)
|
upload_date = dt.datetime.fromtimestamp(info_dict[ts_key], dt.timezone.utc)
|
||||||
info_dict[date_key] = upload_date.strftime('%Y%m%d')
|
info_dict[date_key] = upload_date.strftime('%Y%m%d')
|
||||||
|
|
||||||
if not info_dict.get('release_year'):
|
if not info_dict.get('release_year'):
|
||||||
|
@ -2782,7 +2799,7 @@ class YoutubeDL:
|
||||||
|
|
||||||
get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
|
get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
|
||||||
if not get_from_start:
|
if not get_from_start:
|
||||||
info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
|
info_dict['title'] += ' ' + dt.datetime.now().strftime('%Y-%m-%d %H:%M')
|
||||||
if info_dict.get('is_live') and formats:
|
if info_dict.get('is_live') and formats:
|
||||||
formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
|
formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
|
||||||
if get_from_start and not formats:
|
if get_from_start and not formats:
|
||||||
|
@ -2813,6 +2830,9 @@ class YoutubeDL:
|
||||||
format['url'] = sanitize_url(format['url'])
|
format['url'] = sanitize_url(format['url'])
|
||||||
if format.get('ext') is None:
|
if format.get('ext') is None:
|
||||||
format['ext'] = determine_ext(format['url']).lower()
|
format['ext'] = determine_ext(format['url']).lower()
|
||||||
|
if format['ext'] in ('aac', 'opus', 'mp3', 'flac', 'vorbis'):
|
||||||
|
if format.get('acodec') is None:
|
||||||
|
format['acodec'] = format['ext']
|
||||||
if format.get('protocol') is None:
|
if format.get('protocol') is None:
|
||||||
format['protocol'] = determine_protocol(format)
|
format['protocol'] = determine_protocol(format)
|
||||||
if format.get('resolution') is None:
|
if format.get('resolution') is None:
|
||||||
|
@ -2823,9 +2843,8 @@ class YoutubeDL:
|
||||||
format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))
|
format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))
|
||||||
# For fragmented formats, "tbr" is often max bitrate and not average
|
# For fragmented formats, "tbr" is often max bitrate and not average
|
||||||
if (('manifest-filesize-approx' in self.params['compat_opts'] or not format.get('manifest_url'))
|
if (('manifest-filesize-approx' in self.params['compat_opts'] or not format.get('manifest_url'))
|
||||||
and info_dict.get('duration') and format.get('tbr')
|
|
||||||
and not format.get('filesize') and not format.get('filesize_approx')):
|
and not format.get('filesize') and not format.get('filesize_approx')):
|
||||||
format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
|
format['filesize_approx'] = filesize_from_tbr(format.get('tbr'), info_dict.get('duration'))
|
||||||
format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict), load_cookies=True)
|
format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict), load_cookies=True)
|
||||||
|
|
||||||
# Safeguard against old/insecure infojson when using --load-info-json
|
# Safeguard against old/insecure infojson when using --load-info-json
|
||||||
|
@ -2925,12 +2944,7 @@ class YoutubeDL:
|
||||||
self.write_debug(f'Default format spec: {req_format}')
|
self.write_debug(f'Default format spec: {req_format}')
|
||||||
format_selector = self.build_format_selector(req_format)
|
format_selector = self.build_format_selector(req_format)
|
||||||
|
|
||||||
formats_to_download = list(format_selector({
|
formats_to_download = self._select_formats(formats, format_selector)
|
||||||
'formats': formats,
|
|
||||||
'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
|
|
||||||
'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats) # No formats with video
|
|
||||||
or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio
|
|
||||||
}))
|
|
||||||
if interactive_format_selection and not formats_to_download:
|
if interactive_format_selection and not formats_to_download:
|
||||||
self.report_error('Requested format is not available', tb=False, is_error=False)
|
self.report_error('Requested format is not available', tb=False, is_error=False)
|
||||||
continue
|
continue
|
||||||
|
@ -3057,7 +3071,7 @@ class YoutubeDL:
|
||||||
f = formats[-1]
|
f = formats[-1]
|
||||||
self.report_warning(
|
self.report_warning(
|
||||||
'No subtitle format found matching "%s" for language %s, '
|
'No subtitle format found matching "%s" for language %s, '
|
||||||
'using %s' % (formats_query, lang, f['ext']))
|
'using %s. Use --list-subs for a list of available subtitles' % (formats_query, lang, f['ext']))
|
||||||
subs[lang] = f
|
subs[lang] = f
|
||||||
return subs
|
return subs
|
||||||
|
|
||||||
|
@ -3875,8 +3889,8 @@ class YoutubeDL:
|
||||||
delim, (
|
delim, (
|
||||||
format_field(f, 'filesize', ' \t%s', func=format_bytes)
|
format_field(f, 'filesize', ' \t%s', func=format_bytes)
|
||||||
or format_field(f, 'filesize_approx', '≈\t%s', func=format_bytes)
|
or format_field(f, 'filesize_approx', '≈\t%s', func=format_bytes)
|
||||||
or format_field(try_call(lambda: format_bytes(int(info_dict['duration'] * f['tbr'] * (1024 / 8)))),
|
or format_field(filesize_from_tbr(f.get('tbr'), info_dict.get('duration')), None,
|
||||||
None, self._format_out('~\t%s', self.Styles.SUPPRESS))),
|
self._format_out('~\t%s', self.Styles.SUPPRESS), func=format_bytes)),
|
||||||
format_field(f, 'tbr', '\t%dk', func=round),
|
format_field(f, 'tbr', '\t%dk', func=round),
|
||||||
shorten_protocol_name(f.get('protocol', '')),
|
shorten_protocol_name(f.get('protocol', '')),
|
||||||
delim,
|
delim,
|
||||||
|
|
|
@ -836,6 +836,7 @@ def parse_options(argv=None):
|
||||||
'noprogress': opts.quiet if opts.noprogress is None else opts.noprogress,
|
'noprogress': opts.quiet if opts.noprogress is None else opts.noprogress,
|
||||||
'progress_with_newline': opts.progress_with_newline,
|
'progress_with_newline': opts.progress_with_newline,
|
||||||
'progress_template': opts.progress_template,
|
'progress_template': opts.progress_template,
|
||||||
|
'progress_delta': opts.progress_delta,
|
||||||
'playliststart': opts.playliststart,
|
'playliststart': opts.playliststart,
|
||||||
'playlistend': opts.playlistend,
|
'playlistend': opts.playlistend,
|
||||||
'playlistreverse': opts.playlist_reverse,
|
'playlistreverse': opts.playlist_reverse,
|
||||||
|
|
|
@ -27,12 +27,9 @@ def compat_etree_fromstring(text):
|
||||||
compat_os_name = os._name if os.name == 'java' else os.name
|
compat_os_name = os._name if os.name == 'java' else os.name
|
||||||
|
|
||||||
|
|
||||||
if compat_os_name == 'nt':
|
def compat_shlex_quote(s):
|
||||||
def compat_shlex_quote(s):
|
from ..utils import shell_quote
|
||||||
import re
|
return shell_quote(s)
|
||||||
return s if re.match(r'^[-_\w./]+$', s) else s.replace('"', '""').join('""')
|
|
||||||
else:
|
|
||||||
from shlex import quote as compat_shlex_quote # noqa: F401
|
|
||||||
|
|
||||||
|
|
||||||
def compat_ord(c):
|
def compat_ord(c):
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
import base64
|
import base64
|
||||||
import collections
|
import collections
|
||||||
import contextlib
|
import contextlib
|
||||||
|
import datetime as dt
|
||||||
import glob
|
import glob
|
||||||
import http.cookiejar
|
import http.cookiejar
|
||||||
import http.cookies
|
import http.cookies
|
||||||
|
@ -15,7 +16,6 @@ import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
import time
|
import time
|
||||||
import urllib.request
|
import urllib.request
|
||||||
from datetime import datetime, timedelta, timezone
|
|
||||||
from enum import Enum, auto
|
from enum import Enum, auto
|
||||||
from hashlib import pbkdf2_hmac
|
from hashlib import pbkdf2_hmac
|
||||||
|
|
||||||
|
@ -46,7 +46,7 @@ from .utils import (
|
||||||
from .utils._utils import _YDLLogger
|
from .utils._utils import _YDLLogger
|
||||||
from .utils.networking import normalize_url
|
from .utils.networking import normalize_url
|
||||||
|
|
||||||
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
|
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi', 'whale'}
|
||||||
SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
|
SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
|
||||||
|
|
||||||
|
|
||||||
|
@ -194,7 +194,11 @@ def _firefox_browser_dirs():
|
||||||
yield os.path.expanduser('~/Library/Application Support/Firefox/Profiles')
|
yield os.path.expanduser('~/Library/Application Support/Firefox/Profiles')
|
||||||
|
|
||||||
else:
|
else:
|
||||||
yield from map(os.path.expanduser, ('~/.mozilla/firefox', '~/snap/firefox/common/.mozilla/firefox'))
|
yield from map(os.path.expanduser, (
|
||||||
|
'~/.mozilla/firefox',
|
||||||
|
'~/snap/firefox/common/.mozilla/firefox',
|
||||||
|
'~/.var/app/org.mozilla.firefox/.mozilla/firefox',
|
||||||
|
))
|
||||||
|
|
||||||
|
|
||||||
def _firefox_cookie_dbs(roots):
|
def _firefox_cookie_dbs(roots):
|
||||||
|
@ -215,6 +219,7 @@ def _get_chromium_based_browser_settings(browser_name):
|
||||||
'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
|
'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
|
||||||
'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
|
'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
|
||||||
'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
|
'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
|
||||||
|
'whale': os.path.join(appdata_local, R'Naver\Naver Whale\User Data'),
|
||||||
}[browser_name]
|
}[browser_name]
|
||||||
|
|
||||||
elif sys.platform == 'darwin':
|
elif sys.platform == 'darwin':
|
||||||
|
@ -226,6 +231,7 @@ def _get_chromium_based_browser_settings(browser_name):
|
||||||
'edge': os.path.join(appdata, 'Microsoft Edge'),
|
'edge': os.path.join(appdata, 'Microsoft Edge'),
|
||||||
'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
|
'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
|
||||||
'vivaldi': os.path.join(appdata, 'Vivaldi'),
|
'vivaldi': os.path.join(appdata, 'Vivaldi'),
|
||||||
|
'whale': os.path.join(appdata, 'Naver/Whale'),
|
||||||
}[browser_name]
|
}[browser_name]
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
@ -237,6 +243,7 @@ def _get_chromium_based_browser_settings(browser_name):
|
||||||
'edge': os.path.join(config, 'microsoft-edge'),
|
'edge': os.path.join(config, 'microsoft-edge'),
|
||||||
'opera': os.path.join(config, 'opera'),
|
'opera': os.path.join(config, 'opera'),
|
||||||
'vivaldi': os.path.join(config, 'vivaldi'),
|
'vivaldi': os.path.join(config, 'vivaldi'),
|
||||||
|
'whale': os.path.join(config, 'naver-whale'),
|
||||||
}[browser_name]
|
}[browser_name]
|
||||||
|
|
||||||
# Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
|
# Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
|
||||||
|
@ -248,6 +255,7 @@ def _get_chromium_based_browser_settings(browser_name):
|
||||||
'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
|
'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
|
||||||
'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
|
'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
|
||||||
'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
|
'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
|
||||||
|
'whale': 'Whale',
|
||||||
}[browser_name]
|
}[browser_name]
|
||||||
|
|
||||||
browsers_without_profiles = {'opera'}
|
browsers_without_profiles = {'opera'}
|
||||||
|
@ -343,6 +351,11 @@ def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, pa
|
||||||
if value is None:
|
if value is None:
|
||||||
return is_encrypted, None
|
return is_encrypted, None
|
||||||
|
|
||||||
|
# In chrome, session cookies have expires_utc set to 0
|
||||||
|
# In our cookie-store, cookies that do not expire should have expires set to None
|
||||||
|
if not expires_utc:
|
||||||
|
expires_utc = None
|
||||||
|
|
||||||
return is_encrypted, http.cookiejar.Cookie(
|
return is_encrypted, http.cookiejar.Cookie(
|
||||||
version=0, name=name, value=value, port=None, port_specified=False,
|
version=0, name=name, value=value, port=None, port_specified=False,
|
||||||
domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
|
domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
|
||||||
|
@ -594,7 +607,7 @@ class DataParser:
|
||||||
|
|
||||||
|
|
||||||
def _mac_absolute_time_to_posix(timestamp):
|
def _mac_absolute_time_to_posix(timestamp):
|
||||||
return int((datetime(2001, 1, 1, 0, 0, tzinfo=timezone.utc) + timedelta(seconds=timestamp)).timestamp())
|
return int((dt.datetime(2001, 1, 1, 0, 0, tzinfo=dt.timezone.utc) + dt.timedelta(seconds=timestamp)).timestamp())
|
||||||
|
|
||||||
|
|
||||||
def _parse_safari_cookies_header(data, logger):
|
def _parse_safari_cookies_header(data, logger):
|
||||||
|
|
|
@ -4,6 +4,7 @@ import functools
|
||||||
import os
|
import os
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
|
import threading
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from ..minicurses import (
|
from ..minicurses import (
|
||||||
|
@ -63,6 +64,7 @@ class FileDownloader:
|
||||||
min_filesize: Skip files smaller than this size
|
min_filesize: Skip files smaller than this size
|
||||||
max_filesize: Skip files larger than this size
|
max_filesize: Skip files larger than this size
|
||||||
xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
|
xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
|
||||||
|
progress_delta: The minimum time between progress output, in seconds
|
||||||
external_downloader_args: A dictionary of downloader keys (in lower case)
|
external_downloader_args: A dictionary of downloader keys (in lower case)
|
||||||
and a list of additional command-line arguments for the
|
and a list of additional command-line arguments for the
|
||||||
executable. Use 'default' as the name for arguments to be
|
executable. Use 'default' as the name for arguments to be
|
||||||
|
@ -88,6 +90,9 @@ class FileDownloader:
|
||||||
self.params = params
|
self.params = params
|
||||||
self._prepare_multiline_status()
|
self._prepare_multiline_status()
|
||||||
self.add_progress_hook(self.report_progress)
|
self.add_progress_hook(self.report_progress)
|
||||||
|
if self.params.get('progress_delta'):
|
||||||
|
self._progress_delta_lock = threading.Lock()
|
||||||
|
self._progress_delta_time = time.monotonic()
|
||||||
|
|
||||||
def _set_ydl(self, ydl):
|
def _set_ydl(self, ydl):
|
||||||
self.ydl = ydl
|
self.ydl = ydl
|
||||||
|
@ -366,6 +371,12 @@ class FileDownloader:
|
||||||
if s['status'] != 'downloading':
|
if s['status'] != 'downloading':
|
||||||
return
|
return
|
||||||
|
|
||||||
|
if update_delta := self.params.get('progress_delta'):
|
||||||
|
with self._progress_delta_lock:
|
||||||
|
if time.monotonic() < self._progress_delta_time:
|
||||||
|
return
|
||||||
|
self._progress_delta_time += update_delta
|
||||||
|
|
||||||
s.update({
|
s.update({
|
||||||
'_eta_str': self.format_eta(s.get('eta')).strip(),
|
'_eta_str': self.format_eta(s.get('eta')).strip(),
|
||||||
'_speed_str': self.format_speed(s.get('speed')),
|
'_speed_str': self.format_speed(s.get('speed')),
|
||||||
|
|
|
@ -491,7 +491,7 @@ class FFmpegFD(ExternalFD):
|
||||||
if not self.params.get('verbose'):
|
if not self.params.get('verbose'):
|
||||||
args += ['-hide_banner']
|
args += ['-hide_banner']
|
||||||
|
|
||||||
args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args'), default=[])
|
args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args', ...))
|
||||||
|
|
||||||
# These exists only for compatibility. Extractors should use
|
# These exists only for compatibility. Extractors should use
|
||||||
# info_dict['downloader_options']['ffmpeg_args'] instead
|
# info_dict['downloader_options']['ffmpeg_args'] instead
|
||||||
|
@ -615,6 +615,8 @@ class FFmpegFD(ExternalFD):
|
||||||
else:
|
else:
|
||||||
args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)]
|
args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)]
|
||||||
|
|
||||||
|
args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args_out', ...))
|
||||||
|
|
||||||
args += self._configuration_args(('_o1', '_o', ''))
|
args += self._configuration_args(('_o1', '_o', ''))
|
||||||
|
|
||||||
args = [encodeArgument(opt) for opt in args]
|
args = [encodeArgument(opt) for opt in args]
|
||||||
|
|
|
@ -150,6 +150,7 @@ from .arte import (
|
||||||
)
|
)
|
||||||
from .arnes import ArnesIE
|
from .arnes import ArnesIE
|
||||||
from .asobichannel import AsobiChannelIE, AsobiChannelTagURLIE
|
from .asobichannel import AsobiChannelIE, AsobiChannelTagURLIE
|
||||||
|
from .asobistage import AsobiStageIE
|
||||||
from .atresplayer import AtresPlayerIE
|
from .atresplayer import AtresPlayerIE
|
||||||
from .atscaleconf import AtScaleConfEventIE
|
from .atscaleconf import AtScaleConfEventIE
|
||||||
from .atvat import ATVAtIE
|
from .atvat import ATVAtIE
|
||||||
|
@ -287,7 +288,6 @@ from .bundestag import BundestagIE
|
||||||
from .buzzfeed import BuzzFeedIE
|
from .buzzfeed import BuzzFeedIE
|
||||||
from .byutv import BYUtvIE
|
from .byutv import BYUtvIE
|
||||||
from .c56 import C56IE
|
from .c56 import C56IE
|
||||||
from .cableav import CableAVIE
|
|
||||||
from .callin import CallinIE
|
from .callin import CallinIE
|
||||||
from .caltrans import CaltransIE
|
from .caltrans import CaltransIE
|
||||||
from .cam4 import CAM4IE
|
from .cam4 import CAM4IE
|
||||||
|
@ -386,7 +386,11 @@ from .comedycentral import (
|
||||||
ComedyCentralIE,
|
ComedyCentralIE,
|
||||||
ComedyCentralTVIE,
|
ComedyCentralTVIE,
|
||||||
)
|
)
|
||||||
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
from .commonmistakes import (
|
||||||
|
BlobIE,
|
||||||
|
CommonMistakesIE,
|
||||||
|
UnicodeBOMIE,
|
||||||
|
)
|
||||||
from .commonprotocols import (
|
from .commonprotocols import (
|
||||||
MmsIE,
|
MmsIE,
|
||||||
RtmpIE,
|
RtmpIE,
|
||||||
|
@ -543,7 +547,6 @@ from .egghead import (
|
||||||
EggheadLessonIE,
|
EggheadLessonIE,
|
||||||
)
|
)
|
||||||
from .eighttracks import EightTracksIE
|
from .eighttracks import EightTracksIE
|
||||||
from .einthusan import EinthusanIE
|
|
||||||
from .eitb import EitbIE
|
from .eitb import EitbIE
|
||||||
from .elementorembed import ElementorEmbedIE
|
from .elementorembed import ElementorEmbedIE
|
||||||
from .elonet import ElonetIE
|
from .elonet import ElonetIE
|
||||||
|
@ -590,6 +593,7 @@ from .facebook import (
|
||||||
FacebookReelIE,
|
FacebookReelIE,
|
||||||
FacebookAdsIE,
|
FacebookAdsIE,
|
||||||
)
|
)
|
||||||
|
from .fathom import FathomIE
|
||||||
from .fancode import (
|
from .fancode import (
|
||||||
FancodeVodIE,
|
FancodeVodIE,
|
||||||
FancodeLiveIE
|
FancodeLiveIE
|
||||||
|
@ -855,10 +859,6 @@ from .iwara import (
|
||||||
)
|
)
|
||||||
from .ixigua import IxiguaIE
|
from .ixigua import IxiguaIE
|
||||||
from .izlesene import IzleseneIE
|
from .izlesene import IzleseneIE
|
||||||
from .jable import (
|
|
||||||
JableIE,
|
|
||||||
JablePlaylistIE,
|
|
||||||
)
|
|
||||||
from .jamendo import (
|
from .jamendo import (
|
||||||
JamendoIE,
|
JamendoIE,
|
||||||
JamendoAlbumIE,
|
JamendoAlbumIE,
|
||||||
|
@ -874,6 +874,7 @@ from .jeuxvideo import JeuxVideoIE
|
||||||
from .jiosaavn import (
|
from .jiosaavn import (
|
||||||
JioSaavnSongIE,
|
JioSaavnSongIE,
|
||||||
JioSaavnAlbumIE,
|
JioSaavnAlbumIE,
|
||||||
|
JioSaavnPlaylistIE,
|
||||||
)
|
)
|
||||||
from .jove import JoveIE
|
from .jove import JoveIE
|
||||||
from .joj import JojIE
|
from .joj import JojIE
|
||||||
|
@ -989,6 +990,10 @@ from .lnkgo import (
|
||||||
LnkGoIE,
|
LnkGoIE,
|
||||||
LnkIE,
|
LnkIE,
|
||||||
)
|
)
|
||||||
|
from .loom import (
|
||||||
|
LoomIE,
|
||||||
|
LoomFolderIE,
|
||||||
|
)
|
||||||
from .lovehomeporn import LoveHomePornIE
|
from .lovehomeporn import LoveHomePornIE
|
||||||
from .lrt import (
|
from .lrt import (
|
||||||
LRTVODIE,
|
LRTVODIE,
|
||||||
|
@ -1488,7 +1493,6 @@ from .polskieradio import (
|
||||||
)
|
)
|
||||||
from .popcorntimes import PopcorntimesIE
|
from .popcorntimes import PopcorntimesIE
|
||||||
from .popcorntv import PopcornTVIE
|
from .popcorntv import PopcornTVIE
|
||||||
from .porn91 import Porn91IE
|
|
||||||
from .pornbox import PornboxIE
|
from .pornbox import PornboxIE
|
||||||
from .pornflip import PornFlipIE
|
from .pornflip import PornFlipIE
|
||||||
from .pornhub import (
|
from .pornhub import (
|
||||||
|
@ -1750,6 +1754,7 @@ from .shahid import (
|
||||||
ShahidIE,
|
ShahidIE,
|
||||||
ShahidShowIE,
|
ShahidShowIE,
|
||||||
)
|
)
|
||||||
|
from .sharepoint import SharePointIE
|
||||||
from .sharevideos import ShareVideosEmbedIE
|
from .sharevideos import ShareVideosEmbedIE
|
||||||
from .sibnet import SibnetEmbedIE
|
from .sibnet import SibnetEmbedIE
|
||||||
from .shemaroome import ShemarooMeIE
|
from .shemaroome import ShemarooMeIE
|
||||||
|
@ -2283,6 +2288,7 @@ from .vrt import (
|
||||||
VrtNUIE,
|
VrtNUIE,
|
||||||
KetnetIE,
|
KetnetIE,
|
||||||
DagelijkseKostIE,
|
DagelijkseKostIE,
|
||||||
|
Radio1BeIE,
|
||||||
)
|
)
|
||||||
from .vtm import VTMIE
|
from .vtm import VTMIE
|
||||||
from .medialaan import MedialaanIE
|
from .medialaan import MedialaanIE
|
||||||
|
@ -2364,7 +2370,6 @@ from .wykop import (
|
||||||
)
|
)
|
||||||
from .xanimu import XanimuIE
|
from .xanimu import XanimuIE
|
||||||
from .xboxclips import XboxClipsIE
|
from .xboxclips import XboxClipsIE
|
||||||
from .xfileshare import XFileShareIE
|
|
||||||
from .xhamster import (
|
from .xhamster import (
|
||||||
XHamsterIE,
|
XHamsterIE,
|
||||||
XHamsterEmbedIE,
|
XHamsterEmbedIE,
|
||||||
|
@ -2419,8 +2424,6 @@ from .younow import (
|
||||||
YouNowMomentIE,
|
YouNowMomentIE,
|
||||||
)
|
)
|
||||||
from .youporn import YouPornIE
|
from .youporn import YouPornIE
|
||||||
from .yourporn import YourPornIE
|
|
||||||
from .yourupload import YourUploadIE
|
|
||||||
from .zaiko import (
|
from .zaiko import (
|
||||||
ZaikoIE,
|
ZaikoIE,
|
||||||
ZaikoETicketIE,
|
ZaikoETicketIE,
|
||||||
|
|
|
@ -1,25 +1,65 @@
|
||||||
import functools
|
import functools
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
OnDemandPagedList,
|
OnDemandPagedList,
|
||||||
date_from_str,
|
UserNotLive,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
filter_dict,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
qualities,
|
orderedSet,
|
||||||
traverse_obj,
|
|
||||||
unified_strdate,
|
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
update_url_query,
|
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
xpath_text,
|
urljoin,
|
||||||
)
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class AfreecaTVIE(InfoExtractor):
|
class AfreecaTVBaseIE(InfoExtractor):
|
||||||
|
_NETRC_MACHINE = 'afreecatv'
|
||||||
|
|
||||||
|
def _perform_login(self, username, password):
|
||||||
|
login_form = {
|
||||||
|
'szWork': 'login',
|
||||||
|
'szType': 'json',
|
||||||
|
'szUid': username,
|
||||||
|
'szPassword': password,
|
||||||
|
'isSaveId': 'false',
|
||||||
|
'szScriptVar': 'oLoginRet',
|
||||||
|
'szAction': '',
|
||||||
|
}
|
||||||
|
|
||||||
|
response = self._download_json(
|
||||||
|
'https://login.afreecatv.com/app/LoginAction.php', None,
|
||||||
|
'Logging in', data=urlencode_postdata(login_form))
|
||||||
|
|
||||||
|
_ERRORS = {
|
||||||
|
-4: 'Your account has been suspended due to a violation of our terms and policies.',
|
||||||
|
-5: 'https://member.afreecatv.com/app/user_delete_progress.php',
|
||||||
|
-6: 'https://login.afreecatv.com/membership/changeMember.php',
|
||||||
|
-8: "Hello! AfreecaTV here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.",
|
||||||
|
-9: 'https://member.afreecatv.com/app/pop_login_block.php',
|
||||||
|
-11: 'https://login.afreecatv.com/afreeca/second_login.php',
|
||||||
|
-12: 'https://member.afreecatv.com/app/user_security.php',
|
||||||
|
0: 'The username does not exist or you have entered the wrong password.',
|
||||||
|
-1: 'The username does not exist or you have entered the wrong password.',
|
||||||
|
-3: 'You have entered your username/password incorrectly.',
|
||||||
|
-7: 'You cannot use your Global AfreecaTV account to access Korean AfreecaTV.',
|
||||||
|
-10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.',
|
||||||
|
-32008: 'You have failed to log in. Please contact our Help Center.',
|
||||||
|
}
|
||||||
|
|
||||||
|
result = int_or_none(response.get('RESULT'))
|
||||||
|
if result != 1:
|
||||||
|
error = _ERRORS.get(result, 'You have failed to log in.')
|
||||||
|
raise ExtractorError(
|
||||||
|
'Unable to login: %s said: %s' % (self.IE_NAME, error),
|
||||||
|
expected=True)
|
||||||
|
|
||||||
|
|
||||||
|
class AfreecaTVIE(AfreecaTVBaseIE):
|
||||||
IE_NAME = 'afreecatv'
|
IE_NAME = 'afreecatv'
|
||||||
IE_DESC = 'afreecatv.com'
|
IE_DESC = 'afreecatv.com'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
|
@ -34,7 +74,6 @@ class AfreecaTVIE(InfoExtractor):
|
||||||
)
|
)
|
||||||
(?P<id>\d+)
|
(?P<id>\d+)
|
||||||
'''
|
'''
|
||||||
_NETRC_MACHINE = 'afreecatv'
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
|
'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=',
|
||||||
'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
|
'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
|
||||||
|
@ -87,6 +126,7 @@ class AfreecaTVIE(InfoExtractor):
|
||||||
'uploader': '♥이슬이',
|
'uploader': '♥이슬이',
|
||||||
'uploader_id': 'dasl8121',
|
'uploader_id': 'dasl8121',
|
||||||
'upload_date': '20170411',
|
'upload_date': '20170411',
|
||||||
|
'timestamp': 1491929865,
|
||||||
'duration': 213,
|
'duration': 213,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
|
@ -120,219 +160,102 @@ class AfreecaTVIE(InfoExtractor):
|
||||||
'uploader_id': 'rlantnghks',
|
'uploader_id': 'rlantnghks',
|
||||||
'uploader': '페이즈으',
|
'uploader': '페이즈으',
|
||||||
'duration': 10840,
|
'duration': 10840,
|
||||||
'thumbnail': 'http://videoimg.afreecatv.com/php/SnapshotLoad.php?rowKey=20230108_9FF5BEE1_244432674_1_r',
|
'thumbnail': r're:https?://videoimg\.afreecatv\.com/.+',
|
||||||
'upload_date': '20230108',
|
'upload_date': '20230108',
|
||||||
|
'timestamp': 1673218805,
|
||||||
'title': '젠지 페이즈',
|
'title': '젠지 페이즈',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# adult content
|
||||||
|
'url': 'https://vod.afreecatv.com/player/70395877',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# subscribers only
|
||||||
|
'url': 'https://vod.afreecatv.com/player/104647403',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# private
|
||||||
|
'url': 'https://vod.afreecatv.com/player/81669846',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def parse_video_key(key):
|
|
||||||
video_key = {}
|
|
||||||
m = re.match(r'^(?P<upload_date>\d{8})_\w+_(?P<part>\d+)$', key)
|
|
||||||
if m:
|
|
||||||
video_key['upload_date'] = m.group('upload_date')
|
|
||||||
video_key['part'] = int(m.group('part'))
|
|
||||||
return video_key
|
|
||||||
|
|
||||||
def _perform_login(self, username, password):
|
|
||||||
login_form = {
|
|
||||||
'szWork': 'login',
|
|
||||||
'szType': 'json',
|
|
||||||
'szUid': username,
|
|
||||||
'szPassword': password,
|
|
||||||
'isSaveId': 'false',
|
|
||||||
'szScriptVar': 'oLoginRet',
|
|
||||||
'szAction': '',
|
|
||||||
}
|
|
||||||
|
|
||||||
response = self._download_json(
|
|
||||||
'https://login.afreecatv.com/app/LoginAction.php', None,
|
|
||||||
'Logging in', data=urlencode_postdata(login_form))
|
|
||||||
|
|
||||||
_ERRORS = {
|
|
||||||
-4: 'Your account has been suspended due to a violation of our terms and policies.',
|
|
||||||
-5: 'https://member.afreecatv.com/app/user_delete_progress.php',
|
|
||||||
-6: 'https://login.afreecatv.com/membership/changeMember.php',
|
|
||||||
-8: "Hello! AfreecaTV here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.",
|
|
||||||
-9: 'https://member.afreecatv.com/app/pop_login_block.php',
|
|
||||||
-11: 'https://login.afreecatv.com/afreeca/second_login.php',
|
|
||||||
-12: 'https://member.afreecatv.com/app/user_security.php',
|
|
||||||
0: 'The username does not exist or you have entered the wrong password.',
|
|
||||||
-1: 'The username does not exist or you have entered the wrong password.',
|
|
||||||
-3: 'You have entered your username/password incorrectly.',
|
|
||||||
-7: 'You cannot use your Global AfreecaTV account to access Korean AfreecaTV.',
|
|
||||||
-10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.',
|
|
||||||
-32008: 'You have failed to log in. Please contact our Help Center.',
|
|
||||||
}
|
|
||||||
|
|
||||||
result = int_or_none(response.get('RESULT'))
|
|
||||||
if result != 1:
|
|
||||||
error = _ERRORS.get(result, 'You have failed to log in.')
|
|
||||||
raise ExtractorError(
|
|
||||||
'Unable to login: %s said: %s' % (self.IE_NAME, error),
|
|
||||||
expected=True)
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
partial_view = False
|
|
||||||
adult_view = False
|
|
||||||
for _ in range(2):
|
|
||||||
data = self._download_json(
|
data = self._download_json(
|
||||||
'https://api.m.afreecatv.com/station/video/a/view',
|
'https://api.m.afreecatv.com/station/video/a/view', video_id,
|
||||||
video_id, headers={'Referer': url}, data=urlencode_postdata({
|
headers={'Referer': url}, data=urlencode_postdata({
|
||||||
'nTitleNo': video_id,
|
'nTitleNo': video_id,
|
||||||
'nApiLevel': 10,
|
'nApiLevel': 10,
|
||||||
}))['data']
|
}))['data']
|
||||||
if traverse_obj(data, ('code', {int})) == -6221:
|
|
||||||
|
error_code = traverse_obj(data, ('code', {int}))
|
||||||
|
if error_code == -6221:
|
||||||
raise ExtractorError('The VOD does not exist', expected=True)
|
raise ExtractorError('The VOD does not exist', expected=True)
|
||||||
query = {
|
elif error_code == -6205:
|
||||||
'nTitleNo': video_id,
|
raise ExtractorError('This VOD is private', expected=True)
|
||||||
'nStationNo': data['station_no'],
|
|
||||||
'nBbsNo': data['bbs_no'],
|
|
||||||
}
|
|
||||||
if partial_view:
|
|
||||||
query['partialView'] = 'SKIP_ADULT'
|
|
||||||
if adult_view:
|
|
||||||
query['adultView'] = 'ADULT_VIEW'
|
|
||||||
video_xml = self._download_xml(
|
|
||||||
'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php',
|
|
||||||
video_id, 'Downloading video info XML%s'
|
|
||||||
% (' (skipping adult)' if partial_view else ''),
|
|
||||||
video_id, headers={
|
|
||||||
'Referer': url,
|
|
||||||
}, query=query)
|
|
||||||
|
|
||||||
flag = xpath_text(video_xml, './track/flag', 'flag', default=None)
|
common_info = traverse_obj(data, {
|
||||||
if flag and flag == 'SUCCEED':
|
'title': ('title', {str}),
|
||||||
break
|
'uploader': ('writer_nick', {str}),
|
||||||
if flag == 'PARTIAL_ADULT':
|
'uploader_id': ('bj_id', {str}),
|
||||||
self.report_warning(
|
'duration': ('total_file_duration', {functools.partial(int_or_none, scale=1000)}),
|
||||||
'In accordance with local laws and regulations, underage users are restricted from watching adult content. '
|
'thumbnail': ('thumb', {url_or_none}),
|
||||||
'Only content suitable for all ages will be downloaded. '
|
|
||||||
'Provide account credentials if you wish to download restricted content.')
|
|
||||||
partial_view = True
|
|
||||||
continue
|
|
||||||
elif flag == 'ADULT':
|
|
||||||
if not adult_view:
|
|
||||||
adult_view = True
|
|
||||||
continue
|
|
||||||
error = 'Only users older than 19 are able to watch this video. Provide account credentials to download this content.'
|
|
||||||
else:
|
|
||||||
error = flag
|
|
||||||
raise ExtractorError(
|
|
||||||
'%s said: %s' % (self.IE_NAME, error), expected=True)
|
|
||||||
else:
|
|
||||||
raise ExtractorError('Unable to download video info')
|
|
||||||
|
|
||||||
video_element = video_xml.findall('./track/video')[-1]
|
|
||||||
if video_element is None or video_element.text is None:
|
|
||||||
raise ExtractorError(
|
|
||||||
'Video %s does not exist' % video_id, expected=True)
|
|
||||||
|
|
||||||
video_url = video_element.text.strip()
|
|
||||||
|
|
||||||
title = xpath_text(video_xml, './track/title', 'title', fatal=True)
|
|
||||||
|
|
||||||
uploader = xpath_text(video_xml, './track/nickname', 'uploader')
|
|
||||||
uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id')
|
|
||||||
duration = int_or_none(xpath_text(
|
|
||||||
video_xml, './track/duration', 'duration'))
|
|
||||||
thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail')
|
|
||||||
|
|
||||||
common_entry = {
|
|
||||||
'uploader': uploader,
|
|
||||||
'uploader_id': uploader_id,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
}
|
|
||||||
|
|
||||||
info = common_entry.copy()
|
|
||||||
info.update({
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'duration': duration,
|
|
||||||
})
|
})
|
||||||
|
|
||||||
if not video_url:
|
|
||||||
entries = []
|
entries = []
|
||||||
file_elements = video_element.findall('./file')
|
for file_num, file_element in enumerate(
|
||||||
one = len(file_elements) == 1
|
traverse_obj(data, ('files', lambda _, v: url_or_none(v['file']))), start=1):
|
||||||
for file_num, file_element in enumerate(file_elements, start=1):
|
file_url = file_element['file']
|
||||||
file_url = url_or_none(file_element.text)
|
|
||||||
if not file_url:
|
|
||||||
continue
|
|
||||||
key = file_element.get('key', '')
|
|
||||||
upload_date = unified_strdate(self._search_regex(
|
|
||||||
r'^(\d{8})_', key, 'upload date', default=None))
|
|
||||||
if upload_date is not None:
|
|
||||||
# sometimes the upload date isn't included in the file name
|
|
||||||
# instead, another random ID is, which may parse as a valid
|
|
||||||
# date but be wildly out of a reasonable range
|
|
||||||
parsed_date = date_from_str(upload_date)
|
|
||||||
if parsed_date.year < 2000 or parsed_date.year >= 2100:
|
|
||||||
upload_date = None
|
|
||||||
file_duration = int_or_none(file_element.get('duration'))
|
|
||||||
format_id = key if key else '%s_%s' % (video_id, file_num)
|
|
||||||
if determine_ext(file_url) == 'm3u8':
|
if determine_ext(file_url) == 'm3u8':
|
||||||
formats = self._extract_m3u8_formats(
|
formats = self._extract_m3u8_formats(
|
||||||
file_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
file_url, video_id, 'mp4', m3u8_id='hls',
|
||||||
m3u8_id='hls',
|
note=f'Downloading part {file_num} m3u8 information')
|
||||||
note='Downloading part %d m3u8 information' % file_num)
|
|
||||||
else:
|
else:
|
||||||
formats = [{
|
formats = [{
|
||||||
'url': file_url,
|
'url': file_url,
|
||||||
'format_id': 'http',
|
'format_id': 'http',
|
||||||
}]
|
}]
|
||||||
if not formats and not self.get_param('ignore_no_formats'):
|
|
||||||
continue
|
|
||||||
file_info = common_entry.copy()
|
|
||||||
file_info.update({
|
|
||||||
'id': format_id,
|
|
||||||
'title': title if one else '%s (part %d)' % (title, file_num),
|
|
||||||
'upload_date': upload_date,
|
|
||||||
'duration': file_duration,
|
|
||||||
'formats': formats,
|
|
||||||
})
|
|
||||||
entries.append(file_info)
|
|
||||||
entries_info = info.copy()
|
|
||||||
entries_info.update({
|
|
||||||
'_type': 'multi_video',
|
|
||||||
'entries': entries,
|
|
||||||
})
|
|
||||||
return entries_info
|
|
||||||
|
|
||||||
info = {
|
entries.append({
|
||||||
'id': video_id,
|
**common_info,
|
||||||
'title': title,
|
'id': file_element.get('file_info_key') or f'{video_id}_{file_num}',
|
||||||
'uploader': uploader,
|
'title': f'{common_info.get("title") or "Untitled"} (part {file_num})',
|
||||||
'uploader_id': uploader_id,
|
'formats': formats,
|
||||||
'duration': duration,
|
**traverse_obj(file_element, {
|
||||||
'thumbnail': thumbnail,
|
'duration': ('duration', {functools.partial(int_or_none, scale=1000)}),
|
||||||
|
'timestamp': ('file_start', {unified_timestamp}),
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
if traverse_obj(data, ('adult_status', {str})) == 'notLogin':
|
||||||
|
if not entries:
|
||||||
|
self.raise_login_required(
|
||||||
|
'Only users older than 19 are able to watch this video', method='password')
|
||||||
|
self.report_warning(
|
||||||
|
'In accordance with local laws and regulations, underage users are '
|
||||||
|
'restricted from watching adult content. Only content suitable for all '
|
||||||
|
f'ages will be downloaded. {self._login_hint("password")}')
|
||||||
|
|
||||||
|
if not entries and traverse_obj(data, ('sub_upload_type', {str})):
|
||||||
|
self.raise_login_required('This VOD is for subscribers only', method='password')
|
||||||
|
|
||||||
|
if len(entries) == 1:
|
||||||
|
return {
|
||||||
|
**entries[0],
|
||||||
|
'title': common_info.get('title'),
|
||||||
}
|
}
|
||||||
|
|
||||||
if determine_ext(video_url) == 'm3u8':
|
common_info['timestamp'] = traverse_obj(entries, (..., 'timestamp'), get_all=False)
|
||||||
info['formats'] = self._extract_m3u8_formats(
|
|
||||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
|
||||||
m3u8_id='hls')
|
|
||||||
else:
|
|
||||||
app, playpath = video_url.split('mp4:')
|
|
||||||
info.update({
|
|
||||||
'url': app,
|
|
||||||
'ext': 'flv',
|
|
||||||
'play_path': 'mp4:' + playpath,
|
|
||||||
'rtmp_live': True, # downloading won't end without this
|
|
||||||
})
|
|
||||||
|
|
||||||
return info
|
return self.playlist_result(entries, video_id, multi_video=True, **common_info)
|
||||||
|
|
||||||
|
|
||||||
class AfreecaTVLiveIE(AfreecaTVIE): # XXX: Do not subclass from concrete IE
|
class AfreecaTVLiveIE(AfreecaTVBaseIE):
|
||||||
|
|
||||||
IE_NAME = 'afreecatv:live'
|
IE_NAME = 'afreecatv:live'
|
||||||
|
IE_DESC = 'afreecatv.com livestreams'
|
||||||
_VALID_URL = r'https?://play\.afreeca(?:tv)?\.com/(?P<id>[^/]+)(?:/(?P<bno>\d+))?'
|
_VALID_URL = r'https?://play\.afreeca(?:tv)?\.com/(?P<id>[^/]+)(?:/(?P<bno>\d+))?'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://play.afreecatv.com/pyh3646/237852185',
|
'url': 'https://play.afreecatv.com/pyh3646/237852185',
|
||||||
|
@ -347,77 +270,97 @@ class AfreecaTVLiveIE(AfreecaTVIE): # XXX: Do not subclass from concrete IE
|
||||||
},
|
},
|
||||||
'skip': 'Livestream has ended',
|
'skip': 'Livestream has ended',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://play.afreeca.com/pyh3646/237852185',
|
'url': 'https://play.afreecatv.com/pyh3646/237852185',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://play.afreeca.com/pyh3646',
|
'url': 'https://play.afreecatv.com/pyh3646',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_LIVE_API_URL = 'https://live.afreecatv.com/afreeca/player_live_api.php'
|
_LIVE_API_URL = 'https://live.afreecatv.com/afreeca/player_live_api.php'
|
||||||
|
_WORKING_CDNS = [
|
||||||
|
'gcp_cdn', # live-global-cdn-v02.afreecatv.com
|
||||||
|
'gs_cdn_pc_app', # pc-app.stream.afreecatv.com
|
||||||
|
'gs_cdn_mobile_web', # mobile-web.stream.afreecatv.com
|
||||||
|
'gs_cdn_pc_web', # pc-web.stream.afreecatv.com
|
||||||
|
]
|
||||||
|
_BAD_CDNS = [
|
||||||
|
'gs_cdn', # chromecast.afreeca.gscdn.com (cannot resolve)
|
||||||
|
'gs_cdn_chromecast', # chromecast.stream.afreecatv.com (HTTP Error 400)
|
||||||
|
'azure_cdn', # live-global-cdn-v01.afreecatv.com (cannot resolve)
|
||||||
|
'aws_cf', # live-global-cdn-v03.afreecatv.com (cannot resolve)
|
||||||
|
'kt_cdn', # kt.stream.afreecatv.com (HTTP Error 400)
|
||||||
|
]
|
||||||
|
|
||||||
_QUALITIES = ('sd', 'hd', 'hd2k', 'original')
|
def _extract_formats(self, channel_info, broadcast_no, aid):
|
||||||
|
stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.afreecatv.com'
|
||||||
|
|
||||||
|
# If user has not passed CDN IDs, try API-provided CDN ID followed by other working CDN IDs
|
||||||
|
default_cdn_ids = orderedSet([
|
||||||
|
*traverse_obj(channel_info, ('CDN', {str}, all, lambda _, v: v not in self._BAD_CDNS)),
|
||||||
|
*self._WORKING_CDNS,
|
||||||
|
])
|
||||||
|
cdn_ids = self._configuration_arg('cdn', default_cdn_ids)
|
||||||
|
|
||||||
|
for attempt, cdn_id in enumerate(cdn_ids, start=1):
|
||||||
|
m3u8_url = traverse_obj(self._download_json(
|
||||||
|
urljoin(stream_base_url, 'broad_stream_assign.html'), broadcast_no,
|
||||||
|
f'Downloading {cdn_id} stream info', f'Unable to download {cdn_id} stream info',
|
||||||
|
fatal=False, query={
|
||||||
|
'return_type': cdn_id,
|
||||||
|
'broad_key': f'{broadcast_no}-common-master-hls',
|
||||||
|
}), ('view_url', {url_or_none}))
|
||||||
|
try:
|
||||||
|
return self._extract_m3u8_formats(
|
||||||
|
m3u8_url, broadcast_no, 'mp4', m3u8_id='hls', query={'aid': aid},
|
||||||
|
headers={'Referer': 'https://play.afreecatv.com/'})
|
||||||
|
except ExtractorError as e:
|
||||||
|
if attempt == len(cdn_ids):
|
||||||
|
raise
|
||||||
|
self.report_warning(
|
||||||
|
f'{e.cause or e.msg}. Retrying... (attempt {attempt} of {len(cdn_ids)})')
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
broadcaster_id, broadcast_no = self._match_valid_url(url).group('id', 'bno')
|
broadcaster_id, broadcast_no = self._match_valid_url(url).group('id', 'bno')
|
||||||
password = self.get_param('videopassword')
|
channel_info = traverse_obj(self._download_json(
|
||||||
|
self._LIVE_API_URL, broadcaster_id, data=urlencode_postdata({'bid': broadcaster_id})),
|
||||||
|
('CHANNEL', {dict})) or {}
|
||||||
|
|
||||||
info = self._download_json(self._LIVE_API_URL, broadcaster_id, fatal=False,
|
|
||||||
data=urlencode_postdata({'bid': broadcaster_id})) or {}
|
|
||||||
channel_info = info.get('CHANNEL') or {}
|
|
||||||
broadcaster_id = channel_info.get('BJID') or broadcaster_id
|
broadcaster_id = channel_info.get('BJID') or broadcaster_id
|
||||||
broadcast_no = channel_info.get('BNO') or broadcast_no
|
broadcast_no = channel_info.get('BNO') or broadcast_no
|
||||||
password_protected = channel_info.get('BPWD')
|
|
||||||
if not broadcast_no:
|
if not broadcast_no:
|
||||||
raise ExtractorError(f'Unable to extract broadcast number ({broadcaster_id} may not be live)', expected=True)
|
raise UserNotLive(video_id=broadcaster_id)
|
||||||
if password_protected == 'Y' and password is None:
|
|
||||||
|
password = self.get_param('videopassword')
|
||||||
|
if channel_info.get('BPWD') == 'Y' and password is None:
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'This livestream is protected by a password, use the --video-password option',
|
'This livestream is protected by a password, use the --video-password option',
|
||||||
expected=True)
|
expected=True)
|
||||||
|
|
||||||
formats = []
|
token_info = traverse_obj(self._download_json(
|
||||||
quality_key = qualities(self._QUALITIES)
|
self._LIVE_API_URL, broadcast_no, 'Downloading access token for stream',
|
||||||
for quality_str in self._QUALITIES:
|
'Unable to download access token for stream', data=urlencode_postdata(filter_dict({
|
||||||
params = {
|
|
||||||
'bno': broadcast_no,
|
'bno': broadcast_no,
|
||||||
'stream_type': 'common',
|
'stream_type': 'common',
|
||||||
'type': 'aid',
|
'type': 'aid',
|
||||||
'quality': quality_str,
|
'quality': 'master',
|
||||||
}
|
'pwd': password,
|
||||||
if password is not None:
|
}))), ('CHANNEL', {dict})) or {}
|
||||||
params['pwd'] = password
|
aid = token_info.get('AID')
|
||||||
aid_response = self._download_json(
|
|
||||||
self._LIVE_API_URL, broadcast_no, fatal=False,
|
|
||||||
data=urlencode_postdata(params),
|
|
||||||
note=f'Downloading access token for {quality_str} stream',
|
|
||||||
errnote=f'Unable to download access token for {quality_str} stream')
|
|
||||||
aid = traverse_obj(aid_response, ('CHANNEL', 'AID'))
|
|
||||||
if not aid:
|
if not aid:
|
||||||
continue
|
result = token_info.get('RESULT')
|
||||||
|
if result == 0:
|
||||||
|
raise ExtractorError('This livestream has ended', expected=True)
|
||||||
|
elif result == -6:
|
||||||
|
self.raise_login_required('This livestream is for subscribers only', method='password')
|
||||||
|
raise ExtractorError('Unable to extract access token')
|
||||||
|
|
||||||
stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.afreecatv.com'
|
formats = self._extract_formats(channel_info, broadcast_no, aid)
|
||||||
stream_info = self._download_json(
|
|
||||||
f'{stream_base_url}/broad_stream_assign.html', broadcast_no, fatal=False,
|
|
||||||
query={
|
|
||||||
'return_type': channel_info.get('CDN', 'gcp_cdn'),
|
|
||||||
'broad_key': f'{broadcast_no}-common-{quality_str}-hls',
|
|
||||||
},
|
|
||||||
note=f'Downloading metadata for {quality_str} stream',
|
|
||||||
errnote=f'Unable to download metadata for {quality_str} stream') or {}
|
|
||||||
|
|
||||||
if stream_info.get('view_url'):
|
station_info = traverse_obj(self._download_json(
|
||||||
formats.append({
|
|
||||||
'format_id': quality_str,
|
|
||||||
'url': update_url_query(stream_info['view_url'], {'aid': aid}),
|
|
||||||
'ext': 'mp4',
|
|
||||||
'protocol': 'm3u8',
|
|
||||||
'quality': quality_key(quality_str),
|
|
||||||
})
|
|
||||||
|
|
||||||
station_info = self._download_json(
|
|
||||||
'https://st.afreecatv.com/api/get_station_status.php', broadcast_no,
|
'https://st.afreecatv.com/api/get_station_status.php', broadcast_no,
|
||||||
query={'szBjId': broadcaster_id}, fatal=False,
|
'Downloading channel metadata', 'Unable to download channel metadata',
|
||||||
note='Downloading channel metadata', errnote='Unable to download channel metadata') or {}
|
query={'szBjId': broadcaster_id}, fatal=False), {dict}) or {}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': broadcast_no,
|
'id': broadcast_no,
|
||||||
|
@ -427,6 +370,7 @@ class AfreecaTVLiveIE(AfreecaTVIE): # XXX: Do not subclass from concrete IE
|
||||||
'timestamp': unified_timestamp(station_info.get('broad_start')),
|
'timestamp': unified_timestamp(station_info.get('broad_start')),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
|
'http_headers': {'Referer': url},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -39,7 +39,7 @@ class AluraIE(InfoExtractor):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
||||||
course, video_id = self._match_valid_url(url)
|
course, video_id = self._match_valid_url(url).group('course_name', 'id')
|
||||||
video_url = self._VIDEO_URL % (course, video_id)
|
video_url = self._VIDEO_URL % (course, video_id)
|
||||||
|
|
||||||
video_dict = self._download_json(video_url, video_id, 'Searching for videos')
|
video_dict = self._download_json(video_url, video_id, 'Searching for videos')
|
||||||
|
@ -52,7 +52,7 @@ class AluraIE(InfoExtractor):
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for video_obj in video_dict:
|
for video_obj in video_dict:
|
||||||
video_url_m3u8 = video_obj.get('link')
|
video_url_m3u8 = video_obj.get('mp4')
|
||||||
video_format = self._extract_m3u8_formats(
|
video_format = self._extract_m3u8_formats(
|
||||||
video_url_m3u8, None, 'mp4', entry_protocol='m3u8_native',
|
video_url_m3u8, None, 'mp4', entry_protocol='m3u8_native',
|
||||||
m3u8_id='hls', fatal=False)
|
m3u8_id='hls', fatal=False)
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
|
import functools
|
||||||
import re
|
import re
|
||||||
from functools import partial
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
@ -349,7 +349,7 @@ class ARDBetaMediathekIE(InfoExtractor):
|
||||||
r'(?P<title>.*)',
|
r'(?P<title>.*)',
|
||||||
]
|
]
|
||||||
|
|
||||||
return traverse_obj(patterns, (..., {partial(re.match, string=title)}, {
|
return traverse_obj(patterns, (..., {functools.partial(re.match, string=title)}, {
|
||||||
'season_number': ('season_number', {int_or_none}),
|
'season_number': ('season_number', {int_or_none}),
|
||||||
'episode_number': ('episode_number', {int_or_none}),
|
'episode_number': ('episode_number', {int_or_none}),
|
||||||
'episode': ((
|
'episode': ((
|
||||||
|
|
154
yt_dlp/extractor/asobistage.py
Normal file
154
yt_dlp/extractor/asobistage.py
Normal file
|
@ -0,0 +1,154 @@
|
||||||
|
import functools
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import str_or_none, url_or_none
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
|
class AsobiStageIE(InfoExtractor):
|
||||||
|
IE_DESC = 'ASOBISTAGE (アソビステージ)'
|
||||||
|
_VALID_URL = r'https?://asobistage\.asobistore\.jp/event/(?P<id>(?P<event>\w+)/(?P<type>archive|player)/(?P<slug>\w+))(?:[?#]|$)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://asobistage.asobistore.jp/event/315passionhour_2022summer/archive/frame',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '315passionhour_2022summer/archive/frame',
|
||||||
|
'title': '315プロダクションプレゼンツ 315パッションアワー!!!',
|
||||||
|
'thumbnail': r're:^https?://[\w.-]+/\w+/\w+',
|
||||||
|
},
|
||||||
|
'playlist_count': 1,
|
||||||
|
'playlist': [{
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'edff52f2',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '315passion_FRAME_only',
|
||||||
|
'thumbnail': r're:^https?://[\w.-]+/\w+/\w+',
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
}, {
|
||||||
|
'url': 'https://asobistage.asobistore.jp/event/idolmaster_idolworld2023_goods/archive/live',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'idolmaster_idolworld2023_goods/archive/live',
|
||||||
|
'title': 'md5:378510b6e830129d505885908bd6c576',
|
||||||
|
'thumbnail': r're:^https?://[\w.-]+/\w+/\w+',
|
||||||
|
},
|
||||||
|
'playlist_count': 1,
|
||||||
|
'playlist': [{
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3aef7110',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'asobistore_station_1020_serverREC',
|
||||||
|
'thumbnail': r're:^https?://[\w.-]+/\w+/\w+',
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
}, {
|
||||||
|
'url': 'https://asobistage.asobistore.jp/event/sidem_fclive_bpct/archive/premium_hc',
|
||||||
|
'playlist_count': 4,
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'sidem_fclive_bpct/archive/premium_hc',
|
||||||
|
'title': '315 Production presents F@NTASTIC COMBINATION LIVE ~BRAINPOWER!!~/~CONNECTIME!!!!~',
|
||||||
|
'thumbnail': r're:^https?://[\w.-]+/\w+/\w+',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://asobistage.asobistore.jp/event/ijigenfes_utagassen/player/day1',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
_API_HOST = 'https://asobistage-api.asobistore.jp'
|
||||||
|
_HEADERS = {}
|
||||||
|
_is_logged_in = False
|
||||||
|
|
||||||
|
@functools.cached_property
|
||||||
|
def _owned_tickets(self):
|
||||||
|
owned_tickets = set()
|
||||||
|
if not self._is_logged_in:
|
||||||
|
return owned_tickets
|
||||||
|
|
||||||
|
for path, name in [
|
||||||
|
('api/v1/purchase_history/list', 'ticket purchase history'),
|
||||||
|
('api/v1/serialcode/list', 'redemption history'),
|
||||||
|
]:
|
||||||
|
response = self._download_json(
|
||||||
|
f'{self._API_HOST}/{path}', None, f'Downloading {name}',
|
||||||
|
f'Unable to download {name}', expected_status=400)
|
||||||
|
if traverse_obj(response, ('payload', 'error_message'), 'error') == 'notlogin':
|
||||||
|
self._is_logged_in = False
|
||||||
|
break
|
||||||
|
owned_tickets.update(
|
||||||
|
traverse_obj(response, ('payload', 'value', ..., 'digital_product_id', {str_or_none})))
|
||||||
|
|
||||||
|
return owned_tickets
|
||||||
|
|
||||||
|
def _get_available_channel_id(self, channel):
|
||||||
|
channel_id = traverse_obj(channel, ('chennel_vspf_id', {str}))
|
||||||
|
if not channel_id:
|
||||||
|
return None
|
||||||
|
# if rights_type_id == 6, then 'No conditions (no login required - non-members are OK)'
|
||||||
|
if traverse_obj(channel, ('viewrights', lambda _, v: v['rights_type_id'] == 6)):
|
||||||
|
return channel_id
|
||||||
|
available_tickets = traverse_obj(channel, (
|
||||||
|
'viewrights', ..., ('tickets', 'serialcodes'), ..., 'digital_product_id', {str_or_none}))
|
||||||
|
if not self._owned_tickets.intersection(available_tickets):
|
||||||
|
self.report_warning(
|
||||||
|
f'You are not a ticketholder for "{channel.get("channel_name") or channel_id}"')
|
||||||
|
return None
|
||||||
|
return channel_id
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
if self._get_cookies(self._API_HOST):
|
||||||
|
self._is_logged_in = True
|
||||||
|
token = self._download_json(
|
||||||
|
f'{self._API_HOST}/api/v1/vspf/token', None, 'Getting token', 'Unable to get token')
|
||||||
|
self._HEADERS['Authorization'] = f'Bearer {token}'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id, event, type_, slug = self._match_valid_url(url).group('id', 'event', 'type', 'slug')
|
||||||
|
video_type = {'archive': 'archives', 'player': 'broadcasts'}[type_]
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
event_data = traverse_obj(
|
||||||
|
self._search_nextjs_data(webpage, video_id, default={}),
|
||||||
|
('props', 'pageProps', 'eventCMSData', {
|
||||||
|
'title': ('event_name', {str}),
|
||||||
|
'thumbnail': ('event_thumbnail_image', {url_or_none}),
|
||||||
|
}))
|
||||||
|
|
||||||
|
available_channels = traverse_obj(self._download_json(
|
||||||
|
f'https://asobistage.asobistore.jp/cdn/v101/events/{event}/{video_type}.json',
|
||||||
|
video_id, 'Getting channel list', 'Unable to get channel list'), (
|
||||||
|
video_type, lambda _, v: v['broadcast_slug'] == slug,
|
||||||
|
'channels', lambda _, v: v['chennel_vspf_id'] != '00000'))
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for channel_id in traverse_obj(available_channels, (..., {self._get_available_channel_id})):
|
||||||
|
if video_type == 'archives':
|
||||||
|
channel_json = self._download_json(
|
||||||
|
f'https://survapi.channel.or.jp/proxy/v1/contents/{channel_id}/get_by_cuid', channel_id,
|
||||||
|
'Getting archive channel info', 'Unable to get archive channel info', fatal=False,
|
||||||
|
headers=self._HEADERS)
|
||||||
|
channel_data = traverse_obj(channel_json, ('ex_content', {
|
||||||
|
'm3u8_url': 'streaming_url',
|
||||||
|
'title': 'title',
|
||||||
|
'thumbnail': ('thumbnail', 'url'),
|
||||||
|
}))
|
||||||
|
else: # video_type == 'broadcasts'
|
||||||
|
channel_json = self._download_json(
|
||||||
|
f'https://survapi.channel.or.jp/ex/events/{channel_id}', channel_id,
|
||||||
|
'Getting live channel info', 'Unable to get live channel info', fatal=False,
|
||||||
|
headers=self._HEADERS, query={'embed': 'channel'})
|
||||||
|
channel_data = traverse_obj(channel_json, ('data', {
|
||||||
|
'm3u8_url': ('Channel', 'Custom_live_url'),
|
||||||
|
'title': 'Name',
|
||||||
|
'thumbnail': 'Poster_url',
|
||||||
|
}))
|
||||||
|
|
||||||
|
entries.append({
|
||||||
|
'id': channel_id,
|
||||||
|
'title': channel_data.get('title'),
|
||||||
|
'formats': self._extract_m3u8_formats(channel_data.get('m3u8_url'), channel_id, fatal=False),
|
||||||
|
'is_live': video_type == 'broadcasts',
|
||||||
|
'thumbnail': url_or_none(channel_data.get('thumbnail')),
|
||||||
|
})
|
||||||
|
|
||||||
|
if not self._is_logged_in and not entries:
|
||||||
|
self.raise_login_required()
|
||||||
|
|
||||||
|
return self.playlist_result(entries, video_id, **event_data)
|
|
@ -1,4 +1,4 @@
|
||||||
import datetime
|
import datetime as dt
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
@ -71,9 +71,9 @@ class ATVAtIE(InfoExtractor):
|
||||||
content_ids = [{'id': id, 'subclip_start': content['start'], 'subclip_end': content['end']}
|
content_ids = [{'id': id, 'subclip_start': content['start'], 'subclip_end': content['end']}
|
||||||
for id, content in enumerate(contentResource)]
|
for id, content in enumerate(contentResource)]
|
||||||
|
|
||||||
time_of_request = datetime.datetime.now()
|
time_of_request = dt.datetime.now()
|
||||||
not_before = time_of_request - datetime.timedelta(minutes=5)
|
not_before = time_of_request - dt.timedelta(minutes=5)
|
||||||
expire = time_of_request + datetime.timedelta(minutes=5)
|
expire = time_of_request + dt.timedelta(minutes=5)
|
||||||
payload = {
|
payload = {
|
||||||
'content_ids': {
|
'content_ids': {
|
||||||
content_id: content_ids,
|
content_id: content_ids,
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
import datetime
|
import datetime as dt
|
||||||
import hashlib
|
import hashlib
|
||||||
import hmac
|
import hmac
|
||||||
|
|
||||||
|
@ -12,7 +12,7 @@ class AWSIE(InfoExtractor): # XXX: Conventionally, base classes should end with
|
||||||
|
|
||||||
def _aws_execute_api(self, aws_dict, video_id, query=None):
|
def _aws_execute_api(self, aws_dict, video_id, query=None):
|
||||||
query = query or {}
|
query = query or {}
|
||||||
amz_date = datetime.datetime.now(datetime.timezone.utc).strftime('%Y%m%dT%H%M%SZ')
|
amz_date = dt.datetime.now(dt.timezone.utc).strftime('%Y%m%dT%H%M%SZ')
|
||||||
date = amz_date[:8]
|
date = amz_date[:8]
|
||||||
headers = {
|
headers = {
|
||||||
'Accept': 'application/json',
|
'Accept': 'application/json',
|
||||||
|
|
|
@ -602,7 +602,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||||
'url': 'http://www.bbc.com/news/world-europe-32668511',
|
'url': 'http://www.bbc.com/news/world-europe-32668511',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'world-europe-32668511',
|
'id': 'world-europe-32668511',
|
||||||
'title': 'Russia stages massive WW2 parade',
|
'title': 'Russia stages massive WW2 parade despite Western boycott',
|
||||||
'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
|
'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
|
||||||
},
|
},
|
||||||
'playlist_count': 2,
|
'playlist_count': 2,
|
||||||
|
@ -623,6 +623,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3662a707-0af9-3149-963f-47bea720b460',
|
'id': '3662a707-0af9-3149-963f-47bea720b460',
|
||||||
'title': 'BUGGER',
|
'title': 'BUGGER',
|
||||||
|
'description': r're:BUGGER The recent revelations by the whistleblower Edward Snowden were fascinating. .{211}\.{3}$',
|
||||||
},
|
},
|
||||||
'playlist_count': 18,
|
'playlist_count': 18,
|
||||||
}, {
|
}, {
|
||||||
|
@ -631,14 +632,14 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'p02mprgb',
|
'id': 'p02mprgb',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Aerial footage showed the site of the crash in the Alps - courtesy BFM TV',
|
'title': 'Germanwings crash site aerial video',
|
||||||
'description': 'md5:2868290467291b37feda7863f7a83f54',
|
'description': r're:(?s)Aerial video showed the site where the Germanwings flight 4U 9525, .{156} BFM TV\.$',
|
||||||
'duration': 47,
|
'duration': 47,
|
||||||
'timestamp': 1427219242,
|
'timestamp': 1427219242,
|
||||||
'upload_date': '20150324',
|
'upload_date': '20150324',
|
||||||
|
'thumbnail': 'https://ichef.bbci.co.uk/news/1024/media/images/81879000/jpg/_81879090_81879089.jpg',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
|
@ -656,21 +657,24 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
},
|
||||||
|
'skip': 'now SIMORGH_DATA with no video',
|
||||||
}, {
|
}, {
|
||||||
# single video embedded with data-playable containing XML playlists (regional section)
|
# single video embedded with data-playable containing XML playlists (regional section)
|
||||||
'url': 'http://www.bbc.com/mundo/video_fotos/2015/06/150619_video_honduras_militares_hospitales_corrupcion_aw',
|
'url': 'http://www.bbc.com/mundo/video_fotos/2015/06/150619_video_honduras_militares_hospitales_corrupcion_aw',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
|
'id': '39275083',
|
||||||
|
'display_id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
|
'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
|
||||||
'description': 'md5:1525f17448c4ee262b64b8f0c9ce66c8',
|
'description': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
|
||||||
'timestamp': 1434713142,
|
'timestamp': 1434713142,
|
||||||
'upload_date': '20150619',
|
'upload_date': '20150619',
|
||||||
|
'thumbnail': 'https://a.files.bbci.co.uk/worldservice/live/assets/images/2015/06/19/150619132146_honduras_hsopitales_militares_640x360_aptn_nocredit.jpg',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
},
|
||||||
}, {
|
}, {
|
||||||
# single video from video playlist embedded with vxp-playlist-data JSON
|
# single video from video playlist embedded with vxp-playlist-data JSON
|
||||||
'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376',
|
'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376',
|
||||||
|
@ -683,22 +687,21 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
},
|
||||||
|
'skip': '404 Not Found',
|
||||||
}, {
|
}, {
|
||||||
# single video story with digitalData
|
# single video story with __PWA_PRELOADED_STATE__
|
||||||
'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret',
|
'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'p02q6gc4',
|
'id': 'p02q6gc4',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Sri Lanka’s spicy secret',
|
'title': 'Tasting the spice of life in Jaffna',
|
||||||
'description': 'As a new train line to Jaffna opens up the country’s north, travellers can experience a truly distinct slice of Tamil culture.',
|
'description': r're:(?s)BBC Travel Show’s Henry Golding explores the city of Jaffna .{151} aftertaste\.$',
|
||||||
'timestamp': 1437674293,
|
'timestamp': 1646058397,
|
||||||
'upload_date': '20150723',
|
'upload_date': '20220228',
|
||||||
|
'duration': 255,
|
||||||
|
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/1920xn/p02vxvkn.jpg',
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
|
||||||
}
|
|
||||||
}, {
|
}, {
|
||||||
# single video story without digitalData
|
# single video story without digitalData
|
||||||
'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
|
'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
|
||||||
|
@ -710,12 +713,10 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||||
'timestamp': 1415867444,
|
'timestamp': 1415867444,
|
||||||
'upload_date': '20141113',
|
'upload_date': '20141113',
|
||||||
},
|
},
|
||||||
'params': {
|
'skip': 'redirects to TopGear home page',
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
|
||||||
}
|
|
||||||
}, {
|
}, {
|
||||||
# single video embedded with Morph
|
# single video embedded with Morph
|
||||||
|
# TODO: replacement test page
|
||||||
'url': 'http://www.bbc.co.uk/sport/live/olympics/36895975',
|
'url': 'http://www.bbc.co.uk/sport/live/olympics/36895975',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'p041vhd0',
|
'id': 'p041vhd0',
|
||||||
|
@ -726,27 +727,22 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||||
'uploader': 'BBC Sport',
|
'uploader': 'BBC Sport',
|
||||||
'uploader_id': 'bbc_sport',
|
'uploader_id': 'bbc_sport',
|
||||||
},
|
},
|
||||||
'params': {
|
'skip': 'Video no longer in page',
|
||||||
# m3u8 download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
'skip': 'Georestricted to UK',
|
|
||||||
}, {
|
}, {
|
||||||
# single video with playlist.sxml URL in playlist param
|
# single video in __INITIAL_DATA__
|
||||||
'url': 'http://www.bbc.com/sport/0/football/33653409',
|
'url': 'http://www.bbc.com/sport/0/football/33653409',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'p02xycnp',
|
'id': 'p02xycnp',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
|
'title': 'Ronaldo to Man Utd, Arsenal to spend?',
|
||||||
'description': 'BBC Sport\'s David Ornstein has the latest transfer gossip, including rumours of a Manchester United return for Cristiano Ronaldo.',
|
'description': r're:(?s)BBC Sport\'s David Ornstein rounds up the latest transfer reports, .{359} here\.$',
|
||||||
|
'timestamp': 1437750175,
|
||||||
|
'upload_date': '20150724',
|
||||||
|
'thumbnail': r're:https?://.+/.+media/images/69320000/png/_69320754_mmgossipcolumnextraaugust18.png',
|
||||||
'duration': 140,
|
'duration': 140,
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
|
||||||
}
|
|
||||||
}, {
|
}, {
|
||||||
# article with multiple videos embedded with playlist.sxml in playlist param
|
# article with multiple videos embedded with Morph.setPayload
|
||||||
'url': 'http://www.bbc.com/sport/0/football/34475836',
|
'url': 'http://www.bbc.com/sport/0/football/34475836',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '34475836',
|
'id': '34475836',
|
||||||
|
@ -754,6 +750,21 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||||
'description': 'Fast-paced football, wit, wisdom and a ready smile - why Liverpool fans should come to love new boss Jurgen Klopp.',
|
'description': 'Fast-paced football, wit, wisdom and a ready smile - why Liverpool fans should come to love new boss Jurgen Klopp.',
|
||||||
},
|
},
|
||||||
'playlist_count': 3,
|
'playlist_count': 3,
|
||||||
|
}, {
|
||||||
|
# Testing noplaylist
|
||||||
|
'url': 'http://www.bbc.com/sport/0/football/34475836',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'p034ppnv',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'All you need to know about Jurgen Klopp',
|
||||||
|
'timestamp': 1444335081,
|
||||||
|
'upload_date': '20151008',
|
||||||
|
'duration': 122.0,
|
||||||
|
'thumbnail': 'https://ichef.bbci.co.uk/onesport/cps/976/cpsprodpb/7542/production/_85981003_klopp.jpg',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'noplaylist': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
# school report article with single video
|
# school report article with single video
|
||||||
'url': 'http://www.bbc.co.uk/schoolreport/35744779',
|
'url': 'http://www.bbc.co.uk/schoolreport/35744779',
|
||||||
|
@ -762,6 +773,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||||
'title': 'School which breaks down barriers in Jerusalem',
|
'title': 'School which breaks down barriers in Jerusalem',
|
||||||
},
|
},
|
||||||
'playlist_count': 1,
|
'playlist_count': 1,
|
||||||
|
'skip': 'redirects to Young Reporter home page https://www.bbc.co.uk/news/topics/cg41ylwv43pt',
|
||||||
}, {
|
}, {
|
||||||
# single video with playlist URL from weather section
|
# single video with playlist URL from weather section
|
||||||
'url': 'http://www.bbc.com/weather/features/33601775',
|
'url': 'http://www.bbc.com/weather/features/33601775',
|
||||||
|
@ -778,18 +790,33 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||||
'timestamp': 1437785037,
|
'timestamp': 1437785037,
|
||||||
'upload_date': '20150725',
|
'upload_date': '20150725',
|
||||||
|
'duration': 105,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# video with window.__INITIAL_DATA__ and value as JSON string
|
# video with window.__INITIAL_DATA__ and value as JSON string
|
||||||
'url': 'https://www.bbc.com/news/av/world-europe-59468682',
|
'url': 'https://www.bbc.com/news/av/world-europe-59468682',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'p0b71qth',
|
'id': 'p0b779gc',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Why France is making this woman a national hero',
|
'title': 'Why France is making this woman a national hero',
|
||||||
'description': 'md5:7affdfab80e9c3a1f976230a1ff4d5e4',
|
'description': r're:(?s)France is honouring the US-born 20th Century singer and activist Josephine .{208} Second World War.',
|
||||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||||
'timestamp': 1638230731,
|
'timestamp': 1638215626,
|
||||||
'upload_date': '20211130',
|
'upload_date': '20211129',
|
||||||
|
'duration': 125,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# video with script id __NEXT_DATA__ and value as JSON string
|
||||||
|
'url': 'https://www.bbc.com/news/uk-68546268',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'p0hj0lq7',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Nasser Hospital doctor describes his treatment by IDF',
|
||||||
|
'description': r're:(?s)Doctor Abu Sabha said he was detained by Israeli forces after .{276} hostages\."$',
|
||||||
|
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||||
|
'timestamp': 1710188248,
|
||||||
|
'upload_date': '20240311',
|
||||||
|
'duration': 104,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# single video article embedded with data-media-vpid
|
# single video article embedded with data-media-vpid
|
||||||
|
@ -817,6 +844,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||||
'uploader': 'Radio 3',
|
'uploader': 'Radio 3',
|
||||||
'uploader_id': 'bbc_radio_three',
|
'uploader_id': 'bbc_radio_three',
|
||||||
},
|
},
|
||||||
|
'skip': '404 Not Found',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227',
|
'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -824,6 +852,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'md5:2fabf12a726603193a2879a055f72514',
|
'title': 'md5:2fabf12a726603193a2879a055f72514',
|
||||||
'description': 'Learn English words and phrases from this story',
|
'description': 'Learn English words and phrases from this story',
|
||||||
|
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/1200x675/p06pq9gk.jpg',
|
||||||
},
|
},
|
||||||
'add_ie': [BBCCoUkIE.ie_key()],
|
'add_ie': [BBCCoUkIE.ie_key()],
|
||||||
}, {
|
}, {
|
||||||
|
@ -832,28 +861,30 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'p07c6sb9',
|
'id': 'p07c6sb9',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'How positive thinking is harming your happiness',
|
'title': 'The downsides of positive thinking',
|
||||||
'alt_title': 'The downsides of positive thinking',
|
'description': 'The downsides of positive thinking',
|
||||||
'description': 'md5:fad74b31da60d83b8265954ee42d85b4',
|
|
||||||
'duration': 235,
|
'duration': 235,
|
||||||
'thumbnail': r're:https?://.+/p07c9dsr.jpg',
|
'thumbnail': r're:https?://.+/p07c9dsr\.(?:jpg|webp|png)',
|
||||||
'upload_date': '20190604',
|
'upload_date': '20220223',
|
||||||
'categories': ['Psychology'],
|
'timestamp': 1645632746,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# BBC Sounds
|
# BBC Sounds
|
||||||
'url': 'https://www.bbc.co.uk/sounds/play/m001q78b',
|
'url': 'https://www.bbc.co.uk/sounds/play/w3ct5rgx',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'm001q789',
|
'id': 'p0hrw4nr',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'The Night Tracks Mix - Music for the darkling hour',
|
'title': 'Are our coastlines being washed away?',
|
||||||
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0c00hym.jpg',
|
'description': r're:(?s)Around the world, coastlines are constantly changing .{2000,} Images\)$',
|
||||||
'chapters': 'count:8',
|
'timestamp': 1713556800,
|
||||||
'description': 'md5:815fb51cbdaa270040aab8145b3f1d67',
|
'upload_date': '20240419',
|
||||||
'uploader': 'Radio 3',
|
'duration': 1588,
|
||||||
'duration': 1800,
|
'thumbnail': 'https://ichef.bbci.co.uk/images/ic/raw/p0hrnxbl.jpg',
|
||||||
'uploader_id': 'bbc_radio_three',
|
'uploader': 'World Service',
|
||||||
},
|
'uploader_id': 'bbc_world_service',
|
||||||
|
'series': 'CrowdScience',
|
||||||
|
'chapters': [],
|
||||||
|
}
|
||||||
}, { # onion routes
|
}, { # onion routes
|
||||||
'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576',
|
'url': 'https://www.bbcnewsd73hkzno2ini43t4gblxvycyac5aw4gnv7t2rccijh7745uqd.onion/news/av/world-europe-63208576',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -1008,8 +1039,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||||
webpage, 'group id', default=None)
|
webpage, 'group id', default=None)
|
||||||
if group_id:
|
if group_id:
|
||||||
return self.url_result(
|
return self.url_result(
|
||||||
'https://www.bbc.co.uk/programmes/%s' % group_id,
|
f'https://www.bbc.co.uk/programmes/{group_id}', BBCCoUkIE)
|
||||||
ie=BBCCoUkIE.ie_key())
|
|
||||||
|
|
||||||
# single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
|
# single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
|
||||||
programme_id = self._search_regex(
|
programme_id = self._search_regex(
|
||||||
|
@ -1069,84 +1099,134 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||||
}
|
}
|
||||||
|
|
||||||
# Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
|
# Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
|
||||||
# There are several setPayload calls may be present but the video
|
# Several setPayload calls may be present but the video(s)
|
||||||
# seems to be always related to the first one
|
# should be in one that mentions leadMedia or videoData
|
||||||
morph_payload = self._parse_json(
|
morph_payload = self._search_json(
|
||||||
self._search_regex(
|
r'\bMorph\s*\.\s*setPayload\s*\([^,]+,', webpage, 'morph payload', playlist_id,
|
||||||
r'Morph\.setPayload\([^,]+,\s*({.+?})\);',
|
contains_pattern=r'{(?s:(?:(?!</script>).)+(?:"leadMedia"|\\"videoData\\")\s*:.+)}',
|
||||||
webpage, 'morph payload', default='{}'),
|
default={})
|
||||||
playlist_id, fatal=False)
|
|
||||||
if morph_payload:
|
if morph_payload:
|
||||||
components = try_get(morph_payload, lambda x: x['body']['components'], list) or []
|
for lead_media in traverse_obj(morph_payload, (
|
||||||
for component in components:
|
'body', 'components', ..., 'props', 'leadMedia', {dict})):
|
||||||
if not isinstance(component, dict):
|
programme_id = traverse_obj(lead_media, ('identifiers', ('vpid', 'playablePid'), {str}, any))
|
||||||
continue
|
|
||||||
lead_media = try_get(component, lambda x: x['props']['leadMedia'], dict)
|
|
||||||
if not lead_media:
|
|
||||||
continue
|
|
||||||
identifiers = lead_media.get('identifiers')
|
|
||||||
if not identifiers or not isinstance(identifiers, dict):
|
|
||||||
continue
|
|
||||||
programme_id = identifiers.get('vpid') or identifiers.get('playablePid')
|
|
||||||
if not programme_id:
|
if not programme_id:
|
||||||
continue
|
continue
|
||||||
title = lead_media.get('title') or self._og_search_title(webpage)
|
|
||||||
formats, subtitles = self._download_media_selector(programme_id)
|
formats, subtitles = self._download_media_selector(programme_id)
|
||||||
description = lead_media.get('summary')
|
|
||||||
uploader = lead_media.get('masterBrand')
|
|
||||||
uploader_id = lead_media.get('mid')
|
|
||||||
duration = None
|
|
||||||
duration_d = lead_media.get('duration')
|
|
||||||
if isinstance(duration_d, dict):
|
|
||||||
duration = parse_duration(dict_get(
|
|
||||||
duration_d, ('rawDuration', 'formattedDuration', 'spokenDuration')))
|
|
||||||
return {
|
return {
|
||||||
'id': programme_id,
|
'id': programme_id,
|
||||||
'title': title,
|
'title': lead_media.get('title') or self._og_search_title(webpage),
|
||||||
'description': description,
|
**traverse_obj(lead_media, {
|
||||||
'duration': duration,
|
'description': ('summary', {str}),
|
||||||
'uploader': uploader,
|
'duration': ('duration', ('rawDuration', 'formattedDuration', 'spokenDuration'), {parse_duration}),
|
||||||
'uploader_id': uploader_id,
|
'uploader': ('masterBrand', {str}),
|
||||||
|
'uploader_id': ('mid', {str}),
|
||||||
|
}),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
body = self._parse_json(traverse_obj(morph_payload, (
|
||||||
|
'body', 'content', 'article', 'body')), playlist_id, fatal=False)
|
||||||
|
for video_data in traverse_obj(body, (lambda _, v: v['videoData']['pid'], 'videoData')):
|
||||||
|
if video_data.get('vpid'):
|
||||||
|
video_id = video_data['vpid']
|
||||||
|
formats, subtitles = self._download_media_selector(video_id)
|
||||||
|
entry = {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
video_id = video_data['pid']
|
||||||
|
entry = self.url_result(
|
||||||
|
f'https://www.bbc.co.uk/programmes/{video_id}', BBCCoUkIE,
|
||||||
|
video_id, url_transparent=True)
|
||||||
|
entry.update({
|
||||||
|
'timestamp': traverse_obj(morph_payload, (
|
||||||
|
'body', 'content', 'article', 'dateTimeInfo', 'dateTime', {parse_iso8601})
|
||||||
|
),
|
||||||
|
**traverse_obj(video_data, {
|
||||||
|
'thumbnail': (('iChefImage', 'image'), {url_or_none}, any),
|
||||||
|
'title': (('title', 'caption'), {str}, any),
|
||||||
|
'duration': ('duration', {parse_duration}),
|
||||||
|
}),
|
||||||
|
})
|
||||||
|
if video_data.get('isLead') and not self._yes_playlist(playlist_id, video_id):
|
||||||
|
return entry
|
||||||
|
entries.append(entry)
|
||||||
|
if entries:
|
||||||
|
playlist_title = traverse_obj(morph_payload, (
|
||||||
|
'body', 'content', 'article', 'headline', {str})) or playlist_title
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, playlist_id, playlist_title, playlist_description)
|
||||||
|
|
||||||
preload_state = self._parse_json(self._search_regex(
|
# various PRELOADED_STATE JSON
|
||||||
r'window\.__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
|
preload_state = self._search_json(
|
||||||
'preload state', default='{}'), playlist_id, fatal=False)
|
r'window\.__(?:PWA_)?PRELOADED_STATE__\s*=', webpage,
|
||||||
if preload_state:
|
'preload state', playlist_id, transform_source=js_to_json, default={})
|
||||||
current_programme = preload_state.get('programmes', {}).get('current') or {}
|
# PRELOADED_STATE with current programmme
|
||||||
programme_id = current_programme.get('id')
|
current_programme = traverse_obj(preload_state, ('programmes', 'current', {dict}))
|
||||||
if current_programme and programme_id and current_programme.get('type') == 'playable_item':
|
programme_id = traverse_obj(current_programme, ('id', {str}))
|
||||||
title = current_programme.get('titles', {}).get('tertiary') or playlist_title
|
if programme_id and current_programme.get('type') == 'playable_item':
|
||||||
|
title = traverse_obj(current_programme, ('titles', ('tertiary', 'secondary'), {str}, any)) or playlist_title
|
||||||
formats, subtitles = self._download_media_selector(programme_id)
|
formats, subtitles = self._download_media_selector(programme_id)
|
||||||
synopses = current_programme.get('synopses') or {}
|
|
||||||
network = current_programme.get('network') or {}
|
|
||||||
duration = int_or_none(
|
|
||||||
current_programme.get('duration', {}).get('value'))
|
|
||||||
thumbnail = None
|
|
||||||
image_url = current_programme.get('image_url')
|
|
||||||
if image_url:
|
|
||||||
thumbnail = image_url.replace('{recipe}', 'raw')
|
|
||||||
return {
|
return {
|
||||||
'id': programme_id,
|
'id': programme_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': dict_get(synopses, ('long', 'medium', 'short')),
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'duration': duration,
|
|
||||||
'uploader': network.get('short_title'),
|
|
||||||
'uploader_id': network.get('id'),
|
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
**traverse_obj(current_programme, {
|
||||||
|
'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
|
||||||
|
'thumbnail': ('image_url', {lambda u: url_or_none(u.replace('{recipe}', 'raw'))}),
|
||||||
|
'duration': ('duration', 'value', {int_or_none}),
|
||||||
|
'uploader': ('network', 'short_title', {str}),
|
||||||
|
'uploader_id': ('network', 'id', {str}),
|
||||||
|
'timestamp': ((('availability', 'from'), ('release', 'date')), {parse_iso8601}, any),
|
||||||
|
'series': ('titles', 'primary', {str}),
|
||||||
|
}),
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'chapters': traverse_obj(preload_state, (
|
'chapters': traverse_obj(preload_state, (
|
||||||
'tracklist', 'tracks', lambda _, v: float_or_none(v['offset']['start']), {
|
'tracklist', 'tracks', lambda _, v: float(v['offset']['start']), {
|
||||||
'title': ('titles', {lambda x: join_nonempty(
|
'title': ('titles', {lambda x: join_nonempty(
|
||||||
'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
|
'primary', 'secondary', 'tertiary', delim=' - ', from_dict=x)}),
|
||||||
'start_time': ('offset', 'start', {float_or_none}),
|
'start_time': ('offset', 'start', {float_or_none}),
|
||||||
'end_time': ('offset', 'end', {float_or_none}),
|
'end_time': ('offset', 'end', {float_or_none}),
|
||||||
})) or None,
|
})
|
||||||
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# PWA_PRELOADED_STATE with article video asset
|
||||||
|
asset_id = traverse_obj(preload_state, (
|
||||||
|
'entities', 'articles', lambda k, _: k.rsplit('/', 1)[-1] == playlist_id,
|
||||||
|
'assetVideo', 0, {str}, any))
|
||||||
|
if asset_id:
|
||||||
|
video_id = traverse_obj(preload_state, ('entities', 'videos', asset_id, 'vpid', {str}))
|
||||||
|
if video_id:
|
||||||
|
article = traverse_obj(preload_state, (
|
||||||
|
'entities', 'articles', lambda _, v: v['assetVideo'][0] == asset_id, any))
|
||||||
|
|
||||||
|
def image_url(image_id):
|
||||||
|
return traverse_obj(preload_state, (
|
||||||
|
'entities', 'images', image_id, 'url',
|
||||||
|
{lambda u: url_or_none(u.replace('$recipe', 'raw'))}))
|
||||||
|
|
||||||
|
formats, subtitles = self._download_media_selector(video_id)
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
**traverse_obj(preload_state, ('entities', 'videos', asset_id, {
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'description': (('synopsisLong', 'synopsisMedium', 'synopsisShort'), {str}, any),
|
||||||
|
'thumbnail': (0, {image_url}),
|
||||||
|
'duration': ('duration', {int_or_none}),
|
||||||
|
})),
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'timestamp': traverse_obj(article, ('displayDate', {parse_iso8601})),
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
return self.url_result(
|
||||||
|
f'https://www.bbc.co.uk/programmes/{asset_id}', BBCCoUkIE,
|
||||||
|
asset_id, playlist_title, display_id=playlist_id,
|
||||||
|
description=playlist_description)
|
||||||
|
|
||||||
bbc3_config = self._parse_json(
|
bbc3_config = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
|
r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
|
||||||
|
@ -1191,6 +1271,28 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
entries, playlist_id, playlist_title, playlist_description)
|
entries, playlist_id, playlist_title, playlist_description)
|
||||||
|
|
||||||
|
def parse_model(model):
|
||||||
|
"""Extract single video from model structure"""
|
||||||
|
item_id = traverse_obj(model, ('versions', 0, 'versionId', {str}))
|
||||||
|
if not item_id:
|
||||||
|
return
|
||||||
|
formats, subtitles = self._download_media_selector(item_id)
|
||||||
|
return {
|
||||||
|
'id': item_id,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
**traverse_obj(model, {
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
|
||||||
|
'description': ('synopses', ('long', 'medium', 'short'), {str}, {lambda x: x or None}, any),
|
||||||
|
'duration': ('versions', 0, 'duration', {int}),
|
||||||
|
'timestamp': ('versions', 0, 'availableFrom', {functools.partial(int_or_none, scale=1000)}),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
def is_type(*types):
|
||||||
|
return lambda _, v: v['type'] in types
|
||||||
|
|
||||||
initial_data = self._search_regex(
|
initial_data = self._search_regex(
|
||||||
r'window\.__INITIAL_DATA__\s*=\s*("{.+?}")\s*;', webpage,
|
r'window\.__INITIAL_DATA__\s*=\s*("{.+?}")\s*;', webpage,
|
||||||
'quoted preload state', default=None)
|
'quoted preload state', default=None)
|
||||||
|
@ -1202,6 +1304,19 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||||
initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
|
initial_data = self._parse_json(initial_data or '"{}"', playlist_id, fatal=False)
|
||||||
initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
|
initial_data = self._parse_json(initial_data, playlist_id, fatal=False)
|
||||||
if initial_data:
|
if initial_data:
|
||||||
|
for video_data in traverse_obj(initial_data, (
|
||||||
|
'stores', 'article', 'articleBodyContent', is_type('video'))):
|
||||||
|
model = traverse_obj(video_data, (
|
||||||
|
'model', 'blocks', is_type('aresMedia'),
|
||||||
|
'model', 'blocks', is_type('aresMediaMetadata'),
|
||||||
|
'model', {dict}, any))
|
||||||
|
entry = parse_model(model)
|
||||||
|
if entry:
|
||||||
|
entries.append(entry)
|
||||||
|
if entries:
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, playlist_id, playlist_title, playlist_description)
|
||||||
|
|
||||||
def parse_media(media):
|
def parse_media(media):
|
||||||
if not media:
|
if not media:
|
||||||
return
|
return
|
||||||
|
@ -1234,19 +1349,64 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'timestamp': item_time,
|
'timestamp': item_time,
|
||||||
'description': strip_or_none(item_desc),
|
'description': strip_or_none(item_desc),
|
||||||
|
'duration': int_or_none(item.get('duration')),
|
||||||
})
|
})
|
||||||
for resp in (initial_data.get('data') or {}).values():
|
|
||||||
name = resp.get('name')
|
for resp in traverse_obj(initial_data, ('data', lambda _, v: v['name'])):
|
||||||
|
name = resp['name']
|
||||||
if name == 'media-experience':
|
if name == 'media-experience':
|
||||||
parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
|
parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
|
||||||
elif name == 'article':
|
elif name == 'article':
|
||||||
for block in (try_get(resp,
|
for block in traverse_obj(resp, (
|
||||||
(lambda x: x['data']['blocks'],
|
'data', (None, ('content', 'model')), 'blocks',
|
||||||
lambda x: x['data']['content']['model']['blocks'],),
|
is_type('media', 'video'), 'model', {dict})):
|
||||||
list) or []):
|
parse_media(block)
|
||||||
if block.get('type') not in ['media', 'video']:
|
return self.playlist_result(
|
||||||
continue
|
entries, playlist_id, playlist_title, playlist_description)
|
||||||
parse_media(block.get('model'))
|
|
||||||
|
# extract from SIMORGH_DATA hydration JSON
|
||||||
|
simorgh_data = self._search_json(
|
||||||
|
r'window\s*\.\s*SIMORGH_DATA\s*=', webpage,
|
||||||
|
'simorgh data', playlist_id, default={})
|
||||||
|
if simorgh_data:
|
||||||
|
done = False
|
||||||
|
for video_data in traverse_obj(simorgh_data, (
|
||||||
|
'pageData', 'content', 'model', 'blocks', is_type('video', 'legacyMedia'))):
|
||||||
|
model = traverse_obj(video_data, (
|
||||||
|
'model', 'blocks', is_type('aresMedia'),
|
||||||
|
'model', 'blocks', is_type('aresMediaMetadata'),
|
||||||
|
'model', {dict}, any))
|
||||||
|
if video_data['type'] == 'video':
|
||||||
|
entry = parse_model(model)
|
||||||
|
else: # legacyMedia: no duration, subtitles
|
||||||
|
block_id, entry = traverse_obj(model, ('blockId', {str})), None
|
||||||
|
media_data = traverse_obj(simorgh_data, (
|
||||||
|
'pageData', 'promo', 'media',
|
||||||
|
{lambda x: x if x['id'] == block_id else None}))
|
||||||
|
formats = traverse_obj(media_data, ('playlist', lambda _, v: url_or_none(v['url']), {
|
||||||
|
'url': ('url', {url_or_none}),
|
||||||
|
'ext': ('format', {str}),
|
||||||
|
'tbr': ('bitrate', {functools.partial(int_or_none, scale=1000)}),
|
||||||
|
}))
|
||||||
|
if formats:
|
||||||
|
entry = {
|
||||||
|
'id': block_id,
|
||||||
|
'display_id': playlist_id,
|
||||||
|
'formats': formats,
|
||||||
|
'description': traverse_obj(simorgh_data, ('pageData', 'promo', 'summary', {str})),
|
||||||
|
**traverse_obj(model, {
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
|
||||||
|
'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
|
||||||
|
'timestamp': ('firstPublished', {functools.partial(int_or_none, scale=1000)}),
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
done = True
|
||||||
|
if entry:
|
||||||
|
entries.append(entry)
|
||||||
|
if done:
|
||||||
|
break
|
||||||
|
if entries:
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
entries, playlist_id, playlist_title, playlist_description)
|
entries, playlist_id, playlist_title, playlist_description)
|
||||||
|
|
||||||
|
@ -1255,6 +1415,24 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
|
||||||
lambda s: self._parse_json(s, playlist_id, fatal=False),
|
lambda s: self._parse_json(s, playlist_id, fatal=False),
|
||||||
re.findall(pattern, webpage))))
|
re.findall(pattern, webpage))))
|
||||||
|
|
||||||
|
# US accessed article with single embedded video (e.g.
|
||||||
|
# https://www.bbc.com/news/uk-68546268)
|
||||||
|
next_data = traverse_obj(self._search_nextjs_data(webpage, playlist_id, default={}),
|
||||||
|
('props', 'pageProps', 'page'))
|
||||||
|
model = traverse_obj(next_data, (
|
||||||
|
..., 'contents', is_type('video'),
|
||||||
|
'model', 'blocks', is_type('media'),
|
||||||
|
'model', 'blocks', is_type('mediaMetadata'),
|
||||||
|
'model', {dict}, any))
|
||||||
|
if model and (entry := parse_model(model)):
|
||||||
|
if not entry.get('timestamp'):
|
||||||
|
entry['timestamp'] = traverse_obj(next_data, (
|
||||||
|
..., 'contents', is_type('timestamp'), 'model',
|
||||||
|
'timestamp', {functools.partial(int_or_none, scale=1000)}, any))
|
||||||
|
entries.append(entry)
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, playlist_id, playlist_title, playlist_description)
|
||||||
|
|
||||||
# Multiple video article (e.g.
|
# Multiple video article (e.g.
|
||||||
# http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
|
# http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
|
||||||
EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+%s(?:\b[^"]+)?' % self._ID_REGEX
|
EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+%s(?:\b[^"]+)?' % self._ID_REGEX
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
from functools import partial
|
import functools
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
@ -50,7 +50,7 @@ class BibelTVBaseIE(InfoExtractor):
|
||||||
**traverse_obj(data, {
|
**traverse_obj(data, {
|
||||||
'title': 'title',
|
'title': 'title',
|
||||||
'description': 'description',
|
'description': 'description',
|
||||||
'duration': ('duration', {partial(int_or_none, scale=1000)}),
|
'duration': ('duration', {functools.partial(int_or_none, scale=1000)}),
|
||||||
'timestamp': ('schedulingStart', {parse_iso8601}),
|
'timestamp': ('schedulingStart', {parse_iso8601}),
|
||||||
'season_number': 'seasonNumber',
|
'season_number': 'seasonNumber',
|
||||||
'episode_number': 'episodeNumber',
|
'episode_number': 'episodeNumber',
|
||||||
|
|
|
@ -93,11 +93,11 @@ class BilibiliBaseIE(InfoExtractor):
|
||||||
|
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
def _download_playinfo(self, video_id, cid):
|
def _download_playinfo(self, video_id, cid, headers=None):
|
||||||
return self._download_json(
|
return self._download_json(
|
||||||
'https://api.bilibili.com/x/player/playurl', video_id,
|
'https://api.bilibili.com/x/player/playurl', video_id,
|
||||||
query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
|
query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
|
||||||
note=f'Downloading video formats for cid {cid}')['data']
|
note=f'Downloading video formats for cid {cid}', headers=headers)['data']
|
||||||
|
|
||||||
def json2srt(self, json_data):
|
def json2srt(self, json_data):
|
||||||
srt_data = ''
|
srt_data = ''
|
||||||
|
@ -493,7 +493,8 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage, urlh = self._download_webpage_handle(url, video_id)
|
headers = self.geo_verification_headers()
|
||||||
|
webpage, urlh = self._download_webpage_handle(url, video_id, headers=headers)
|
||||||
if not self._match_valid_url(urlh.url):
|
if not self._match_valid_url(urlh.url):
|
||||||
return self.url_result(urlh.url)
|
return self.url_result(urlh.url)
|
||||||
|
|
||||||
|
@ -531,7 +532,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||||
self._download_json(
|
self._download_json(
|
||||||
'https://api.bilibili.com/x/player/pagelist', video_id,
|
'https://api.bilibili.com/x/player/pagelist', video_id,
|
||||||
fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
|
fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
|
||||||
note='Extracting videos in anthology'),
|
note='Extracting videos in anthology', headers=headers),
|
||||||
'data', expected_type=list) or []
|
'data', expected_type=list) or []
|
||||||
is_anthology = len(page_list_json) > 1
|
is_anthology = len(page_list_json) > 1
|
||||||
|
|
||||||
|
@ -552,7 +553,7 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||||
|
|
||||||
festival_info = {}
|
festival_info = {}
|
||||||
if is_festival:
|
if is_festival:
|
||||||
play_info = self._download_playinfo(video_id, cid)
|
play_info = self._download_playinfo(video_id, cid, headers=headers)
|
||||||
|
|
||||||
festival_info = traverse_obj(initial_state, {
|
festival_info = traverse_obj(initial_state, {
|
||||||
'uploader': ('videoInfo', 'upName'),
|
'uploader': ('videoInfo', 'upName'),
|
||||||
|
@ -666,14 +667,15 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
episode_id = self._match_id(url)
|
episode_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, episode_id)
|
headers = self.geo_verification_headers()
|
||||||
|
webpage = self._download_webpage(url, episode_id, headers=headers)
|
||||||
|
|
||||||
if '您所在的地区无法观看本片' in webpage:
|
if '您所在的地区无法观看本片' in webpage:
|
||||||
raise GeoRestrictedError('This video is restricted')
|
raise GeoRestrictedError('This video is restricted')
|
||||||
elif '正在观看预览,大会员免费看全片' in webpage:
|
elif '正在观看预览,大会员免费看全片' in webpage:
|
||||||
self.raise_login_required('This video is for premium members only')
|
self.raise_login_required('This video is for premium members only')
|
||||||
|
|
||||||
headers = {'Referer': url, **self.geo_verification_headers()}
|
headers['Referer'] = url
|
||||||
play_info = self._download_json(
|
play_info = self._download_json(
|
||||||
'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
|
'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
|
||||||
'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
|
'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
|
||||||
|
@ -724,7 +726,7 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
|
||||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||||
'subtitles': self.extract_subtitles(episode_id, episode_info.get('cid'), aid=aid),
|
'subtitles': self.extract_subtitles(episode_id, episode_info.get('cid'), aid=aid),
|
||||||
'__post_extractor': self.extract_comments(aid),
|
'__post_extractor': self.extract_comments(aid),
|
||||||
'http_headers': headers,
|
'http_headers': {'Referer': url},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1043,15 +1045,17 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
|
response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
|
||||||
playlist_id, note=f'Downloading page {page_idx}', query=query)
|
playlist_id, note=f'Downloading page {page_idx}', query=query,
|
||||||
|
headers={'referer': url})
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, HTTPError) and e.cause.status == 412:
|
if isinstance(e.cause, HTTPError) and e.cause.status == 412:
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
|
'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
|
||||||
raise
|
raise
|
||||||
if response['code'] == -401:
|
if response['code'] in (-352, -401):
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
|
f'Request is blocked by server ({-response["code"]}), '
|
||||||
|
'please add cookies, wait and try later.', expected=True)
|
||||||
return response['data']
|
return response['data']
|
||||||
|
|
||||||
def get_metadata(page_data):
|
def get_metadata(page_data):
|
||||||
|
|
|
@ -1,7 +1,11 @@
|
||||||
|
import json
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .youtube import YoutubeIE
|
from .youtube import YoutubeIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
bug_reports_message,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
qualities,
|
qualities,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
|
@ -162,9 +166,19 @@ class BoostyIE(InfoExtractor):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
user, post_id = self._match_valid_url(url).group('user', 'post_id')
|
user, post_id = self._match_valid_url(url).group('user', 'post_id')
|
||||||
|
|
||||||
|
auth_headers = {}
|
||||||
|
auth_cookie = self._get_cookies('https://boosty.to/').get('auth')
|
||||||
|
if auth_cookie is not None:
|
||||||
|
try:
|
||||||
|
auth_data = json.loads(urllib.parse.unquote(auth_cookie.value))
|
||||||
|
auth_headers['Authorization'] = f'Bearer {auth_data["accessToken"]}'
|
||||||
|
except (json.JSONDecodeError, KeyError):
|
||||||
|
self.report_warning(f'Failed to extract token from auth cookie{bug_reports_message()}')
|
||||||
|
|
||||||
post = self._download_json(
|
post = self._download_json(
|
||||||
f'https://api.boosty.to/v1/blog/{user}/post/{post_id}', post_id,
|
f'https://api.boosty.to/v1/blog/{user}/post/{post_id}', post_id,
|
||||||
note='Downloading post data', errnote='Unable to download post data')
|
note='Downloading post data', errnote='Unable to download post data', headers=auth_headers)
|
||||||
|
|
||||||
post_title = post.get('title')
|
post_title = post.get('title')
|
||||||
if not post_title:
|
if not post_title:
|
||||||
|
@ -202,7 +216,9 @@ class BoostyIE(InfoExtractor):
|
||||||
'thumbnail': (('previewUrl', 'defaultPreview'), {url_or_none}),
|
'thumbnail': (('previewUrl', 'defaultPreview'), {url_or_none}),
|
||||||
}, get_all=False)})
|
}, get_all=False)})
|
||||||
|
|
||||||
if not entries:
|
if not entries and not post.get('hasAccess'):
|
||||||
|
self.raise_login_required('This post requires a subscription', metadata_available=True)
|
||||||
|
elif not entries:
|
||||||
raise ExtractorError('No videos found', expected=True)
|
raise ExtractorError('No videos found', expected=True)
|
||||||
if len(entries) == 1:
|
if len(entries) == 1:
|
||||||
return entries[0]
|
return entries[0]
|
||||||
|
|
|
@ -3,6 +3,7 @@ import urllib.parse
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
|
@ -11,8 +12,8 @@ from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class BoxIE(InfoExtractor):
|
class BoxIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P<shared_name>[^/?#]+)/file/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P<shared_name>[^/?#]+)(?:/file/(?P<id>\d+))?'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'https://mlssoccer.app.box.com/s/0evd2o3e08l60lr4ygukepvnkord1o1x/file/510727257538',
|
'url': 'https://mlssoccer.app.box.com/s/0evd2o3e08l60lr4ygukepvnkord1o1x/file/510727257538',
|
||||||
'md5': '1f81b2fd3960f38a40a3b8823e5fcd43',
|
'md5': '1f81b2fd3960f38a40a3b8823e5fcd43',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -25,14 +26,36 @@ class BoxIE(InfoExtractor):
|
||||||
'uploader_id': '235196876',
|
'uploader_id': '235196876',
|
||||||
},
|
},
|
||||||
'params': {'skip_download': 'dash fragment too small'},
|
'params': {'skip_download': 'dash fragment too small'},
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://utexas.app.box.com/s/2x6vanv85fdl8j2eqlcxmv0gp1wvps6e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '787379022466',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Webinar recording: Take the Leap!.mp4',
|
||||||
|
'uploader': 'Patricia Mosele',
|
||||||
|
'timestamp': 1615824864,
|
||||||
|
'upload_date': '20210315',
|
||||||
|
'uploader_id': '239068974',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'dash fragment too small'},
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
shared_name, file_id = self._match_valid_url(url).groups()
|
shared_name, file_id = self._match_valid_url(url).groups()
|
||||||
webpage = self._download_webpage(url, file_id)
|
webpage = self._download_webpage(url, file_id or shared_name)
|
||||||
request_token = self._parse_json(self._search_regex(
|
|
||||||
r'Box\.config\s*=\s*({.+?});', webpage,
|
if not file_id:
|
||||||
'Box config'), file_id)['requestToken']
|
post_stream_data = self._search_json(
|
||||||
|
r'Box\.postStreamData\s*=', webpage, 'Box post-stream data', shared_name)
|
||||||
|
shared_item = traverse_obj(
|
||||||
|
post_stream_data, ('/app-api/enduserapp/shared-item', {dict})) or {}
|
||||||
|
if shared_item.get('itemType') != 'file':
|
||||||
|
raise ExtractorError('The requested resource is not a file', expected=True)
|
||||||
|
|
||||||
|
file_id = str(shared_item['itemID'])
|
||||||
|
|
||||||
|
request_token = self._search_json(
|
||||||
|
r'Box\.config\s*=', webpage, 'Box config', file_id)['requestToken']
|
||||||
access_token = self._download_json(
|
access_token = self._download_json(
|
||||||
'https://app.box.com/app-api/enduserapp/elements/tokens', file_id,
|
'https://app.box.com/app-api/enduserapp/elements/tokens', file_id,
|
||||||
'Downloading token JSON metadata',
|
'Downloading token JSON metadata',
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
|
import functools
|
||||||
import re
|
import re
|
||||||
from functools import partial
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..networking.exceptions import HTTPError
|
from ..networking.exceptions import HTTPError
|
||||||
|
@ -115,9 +115,9 @@ class BundestagIE(InfoExtractor):
|
||||||
note='Downloading metadata overlay', fatal=False,
|
note='Downloading metadata overlay', fatal=False,
|
||||||
), {
|
), {
|
||||||
'title': (
|
'title': (
|
||||||
{partial(get_element_text_and_html_by_tag, 'h3')}, 0,
|
{functools.partial(get_element_text_and_html_by_tag, 'h3')}, 0,
|
||||||
{partial(re.sub, r'<span[^>]*>[^<]+</span>', '')}, {clean_html}),
|
{functools.partial(re.sub, r'<span[^>]*>[^<]+</span>', '')}, {clean_html}),
|
||||||
'description': ({partial(get_element_text_and_html_by_tag, 'p')}, 0, {clean_html}),
|
'description': ({functools.partial(get_element_text_and_html_by_tag, 'p')}, 0, {clean_html}),
|
||||||
}))
|
}))
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
|
@ -1,32 +0,0 @@
|
||||||
from .common import InfoExtractor
|
|
||||||
|
|
||||||
|
|
||||||
class CableAVIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://cableav\.tv/(?P<id>[a-zA-Z0-9]+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://cableav.tv/lS4iR9lWjN8/',
|
|
||||||
'md5': '7e3fe5e49d61c4233b7f5b0f69b15e18',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'lS4iR9lWjN8',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': '國產麻豆AV 叮叮映畫 DDF001 情欲小說家 - CableAV',
|
|
||||||
'description': '國產AV 480p, 720p 国产麻豆AV 叮叮映画 DDF001 情欲小说家',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
}
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
video_url = self._og_search_video_url(webpage, secure=False)
|
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(video_url, video_id, 'mp4')
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': self._og_search_title(webpage),
|
|
||||||
'description': self._og_search_description(webpage),
|
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
|
@ -40,7 +40,7 @@ class CanalAlphaIE(InfoExtractor):
|
||||||
'id': '24484',
|
'id': '24484',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Ces innovations qui veulent rendre l’agriculture plus durable',
|
'title': 'Ces innovations qui veulent rendre l’agriculture plus durable',
|
||||||
'description': 'md5:3de3f151180684621e85be7c10e4e613',
|
'description': 'md5:85d594a3b5dc6ccfc4a85aba6e73b129',
|
||||||
'thumbnail': 'https://static.canalalpha.ch/poster/magazine/magazine_10236.jpg',
|
'thumbnail': 'https://static.canalalpha.ch/poster/magazine/magazine_10236.jpg',
|
||||||
'upload_date': '20211026',
|
'upload_date': '20211026',
|
||||||
'duration': 360,
|
'duration': 360,
|
||||||
|
@ -58,14 +58,25 @@ class CanalAlphaIE(InfoExtractor):
|
||||||
'duration': 360,
|
'duration': 360,
|
||||||
},
|
},
|
||||||
'params': {'skip_download': True}
|
'params': {'skip_download': True}
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.canalalpha.ch/play/le-journal/topic/33500/encore-des-mesures-deconomie-dans-le-jura',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '33500',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Encore des mesures d\'économie dans le Jura',
|
||||||
|
'description': 'md5:938b5b556592f2d1b9ab150268082a80',
|
||||||
|
'thumbnail': 'https://static.canalalpha.ch/poster/news/news_46665.jpg',
|
||||||
|
'upload_date': '20240411',
|
||||||
|
'duration': 105,
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
data_json = self._parse_json(self._search_regex(
|
data_json = self._parse_json(self._search_regex(
|
||||||
r'window\.__SERVER_STATE__\s?=\s?({(?:(?!};)[^"]|"([^"]|\\")*")+})\s?;',
|
r'window\.__SERVER_STATE__\s?=\s?({(?:(?!};)[^"]|"([^"]|\\")*")+})\s?;',
|
||||||
webpage, 'data_json'), id)['1']['data']['data']
|
webpage, 'data_json'), video_id)['1']['data']['data']
|
||||||
manifests = try_get(data_json, lambda x: x['video']['manifests'], expected_type=dict) or {}
|
manifests = try_get(data_json, lambda x: x['video']['manifests'], expected_type=dict) or {}
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
formats = [{
|
formats = [{
|
||||||
|
@ -75,15 +86,17 @@ class CanalAlphaIE(InfoExtractor):
|
||||||
'height': try_get(video, lambda x: x['res']['height'], expected_type=int),
|
'height': try_get(video, lambda x: x['res']['height'], expected_type=int),
|
||||||
} for video in try_get(data_json, lambda x: x['video']['mp4'], expected_type=list) or [] if video.get('$url')]
|
} for video in try_get(data_json, lambda x: x['video']['mp4'], expected_type=list) or [] if video.get('$url')]
|
||||||
if manifests.get('hls'):
|
if manifests.get('hls'):
|
||||||
m3u8_frmts, m3u8_subs = self._parse_m3u8_formats_and_subtitles(manifests['hls'], video_id=id)
|
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||||
formats.extend(m3u8_frmts)
|
manifests['hls'], video_id, m3u8_id='hls', fatal=False)
|
||||||
subtitles = self._merge_subtitles(subtitles, m3u8_subs)
|
formats.extend(fmts)
|
||||||
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
if manifests.get('dash'):
|
if manifests.get('dash'):
|
||||||
dash_frmts, dash_subs = self._parse_mpd_formats_and_subtitles(manifests['dash'])
|
fmts, subs = self._extract_mpd_formats_and_subtitles(
|
||||||
formats.extend(dash_frmts)
|
manifests['dash'], video_id, mpd_id='dash', fatal=False)
|
||||||
subtitles = self._merge_subtitles(subtitles, dash_subs)
|
formats.extend(fmts)
|
||||||
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
return {
|
return {
|
||||||
'id': id,
|
'id': video_id,
|
||||||
'title': data_json.get('title').strip(),
|
'title': data_json.get('title').strip(),
|
||||||
'description': clean_html(dict_get(data_json, ('longDesc', 'shortDesc'))),
|
'description': clean_html(dict_get(data_json, ('longDesc', 'shortDesc'))),
|
||||||
'thumbnail': data_json.get('poster'),
|
'thumbnail': data_json.get('poster'),
|
||||||
|
|
|
@ -151,7 +151,7 @@ class CBCIE(InfoExtractor):
|
||||||
|
|
||||||
class CBCPlayerIE(InfoExtractor):
|
class CBCPlayerIE(InfoExtractor):
|
||||||
IE_NAME = 'cbc.ca:player'
|
IE_NAME = 'cbc.ca:player'
|
||||||
_VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>\d+)'
|
_VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/(?:video/)?|i/caffeine/syndicate/\?mediaId=))(?P<id>(?:\d\.)?\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.cbc.ca/player/play/2683190193',
|
'url': 'http://www.cbc.ca/player/play/2683190193',
|
||||||
'md5': '64d25f841ddf4ddb28a235338af32e2c',
|
'md5': '64d25f841ddf4ddb28a235338af32e2c',
|
||||||
|
@ -165,9 +165,52 @@ class CBCPlayerIE(InfoExtractor):
|
||||||
'uploader': 'CBCC-NEW',
|
'uploader': 'CBCC-NEW',
|
||||||
},
|
},
|
||||||
'skip': 'Geo-restricted to Canada and no longer available',
|
'skip': 'Geo-restricted to Canada and no longer available',
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.cbc.ca/i/caffeine/syndicate/?mediaId=2657631896',
|
||||||
|
'md5': 'e5e708c34ae6fca156aafe17c43e8b75',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2657631896',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'CBC Montreal is organizing its first ever community hackathon!',
|
||||||
|
'description': 'md5:dd3b692f0a139b0369943150bd1c46a9',
|
||||||
|
'timestamp': 1425704400,
|
||||||
|
'upload_date': '20150307',
|
||||||
|
'uploader': 'CBCC-NEW',
|
||||||
|
'thumbnail': 'http://thumbnails.cbc.ca/maven_legacy/thumbnails/sonali-karnick-220.jpg',
|
||||||
|
'chapters': [],
|
||||||
|
'duration': 494.811,
|
||||||
|
'categories': ['AudioMobile/All in a Weekend Montreal'],
|
||||||
|
'tags': 'count:8',
|
||||||
|
'location': 'Quebec',
|
||||||
|
'series': 'All in a Weekend Montreal',
|
||||||
|
'season': 'Season 2015',
|
||||||
|
'season_number': 2015,
|
||||||
|
'media_type': 'Excerpt',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.cbc.ca/i/caffeine/syndicate/?mediaId=2164402062',
|
||||||
|
'md5': '33fcd8f6719b9dd60a5e73adcb83b9f6',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2164402062',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Cancer survivor four times over',
|
||||||
|
'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.',
|
||||||
|
'timestamp': 1320410746,
|
||||||
|
'upload_date': '20111104',
|
||||||
|
'uploader': 'CBCC-NEW',
|
||||||
|
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/277/67/cancer_852x480_2164412612.jpg',
|
||||||
|
'chapters': [],
|
||||||
|
'duration': 186.867,
|
||||||
|
'series': 'CBC News: Windsor at 6:00',
|
||||||
|
'categories': ['News/Canada/Windsor'],
|
||||||
|
'location': 'Windsor',
|
||||||
|
'tags': ['cancer'],
|
||||||
|
'creators': ['Allison Johnson'],
|
||||||
|
'media_type': 'Excerpt',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
# Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
|
# Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
|
||||||
'url': 'http://www.cbc.ca/player/play/2657631896',
|
'url': 'https://www.cbc.ca/player/play/1.2985700',
|
||||||
'md5': 'e5e708c34ae6fca156aafe17c43e8b75',
|
'md5': 'e5e708c34ae6fca156aafe17c43e8b75',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2657631896',
|
'id': '2657631896',
|
||||||
|
@ -189,7 +232,7 @@ class CBCPlayerIE(InfoExtractor):
|
||||||
'media_type': 'Excerpt',
|
'media_type': 'Excerpt',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.cbc.ca/player/play/2164402062',
|
'url': 'https://www.cbc.ca/player/play/1.1711287',
|
||||||
'md5': '33fcd8f6719b9dd60a5e73adcb83b9f6',
|
'md5': '33fcd8f6719b9dd60a5e73adcb83b9f6',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2164402062',
|
'id': '2164402062',
|
||||||
|
@ -206,38 +249,75 @@ class CBCPlayerIE(InfoExtractor):
|
||||||
'categories': ['News/Canada/Windsor'],
|
'categories': ['News/Canada/Windsor'],
|
||||||
'location': 'Windsor',
|
'location': 'Windsor',
|
||||||
'tags': ['cancer'],
|
'tags': ['cancer'],
|
||||||
'creator': 'Allison Johnson',
|
'creators': ['Allison Johnson'],
|
||||||
'media_type': 'Excerpt',
|
'media_type': 'Excerpt',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# Has subtitles
|
# Has subtitles
|
||||||
# These broadcasts expire after ~1 month, can find new test URL here:
|
# These broadcasts expire after ~1 month, can find new test URL here:
|
||||||
# https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast
|
# https://www.cbc.ca/player/news/TV%20Shows/The%20National/Latest%20Broadcast
|
||||||
'url': 'http://www.cbc.ca/player/play/2284799043667',
|
'url': 'https://www.cbc.ca/player/play/1.7159484',
|
||||||
'md5': '9b49f0839e88b6ec0b01d840cf3d42b5',
|
'md5': '6ed6cd0fc2ef568d2297ba68a763d455',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2284799043667',
|
'id': '2324213316001',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'The National | Hockey coach charged, Green grants, Safer drugs',
|
'title': 'The National | School boards sue social media giants',
|
||||||
'description': 'md5:84ef46321c94bcf7d0159bb565d26bfa',
|
'description': 'md5:4b4db69322fa32186c3ce426da07402c',
|
||||||
'timestamp': 1700272800,
|
'timestamp': 1711681200,
|
||||||
'duration': 2718.833,
|
'duration': 2743.400,
|
||||||
'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
|
'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
|
||||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/907/171/thumbnail.jpeg',
|
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/607/559/thumbnail.jpeg',
|
||||||
'uploader': 'CBCC-NEW',
|
'uploader': 'CBCC-NEW',
|
||||||
'chapters': 'count:5',
|
'chapters': 'count:5',
|
||||||
'upload_date': '20231118',
|
'upload_date': '20240329',
|
||||||
'categories': 'count:4',
|
'categories': 'count:4',
|
||||||
'series': 'The National - Full Show',
|
'series': 'The National - Full Show',
|
||||||
'tags': 'count:1',
|
'tags': 'count:1',
|
||||||
'creator': 'News',
|
'creators': ['News'],
|
||||||
'location': 'Canada',
|
'location': 'Canada',
|
||||||
'media_type': 'Full Program',
|
'media_type': 'Full Program',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.cbc.ca/player/play/video/1.7194274',
|
||||||
|
'md5': '188b96cf6bdcb2540e178a6caa957128',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2334524995812',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '#TheMoment a rare white spirit moose was spotted in Alberta',
|
||||||
|
'description': 'md5:18ae269a2d0265c5b0bbe4b2e1ac61a3',
|
||||||
|
'timestamp': 1714788791,
|
||||||
|
'duration': 77.678,
|
||||||
|
'subtitles': {'eng': [{'ext': 'vtt', 'protocol': 'm3u8_native'}]},
|
||||||
|
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/201/543/THE_MOMENT.jpg',
|
||||||
|
'uploader': 'CBCC-NEW',
|
||||||
|
'chapters': 'count:0',
|
||||||
|
'upload_date': '20240504',
|
||||||
|
'categories': 'count:3',
|
||||||
|
'series': 'The National',
|
||||||
|
'tags': 'count:15',
|
||||||
|
'creators': ['encoder'],
|
||||||
|
'location': 'Canada',
|
||||||
|
'media_type': 'Excerpt',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'cbcplayer:1.7159484',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'cbcplayer:2164402062',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.cbc.ca/player/play/2657631896',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
if '.' in video_id:
|
||||||
|
webpage = self._download_webpage(f'https://www.cbc.ca/player/play/{video_id}', video_id)
|
||||||
|
video_id = self._search_json(
|
||||||
|
r'window\.__INITIAL_STATE__\s*=', webpage,
|
||||||
|
'initial state', video_id)['video']['currentClip']['mediaId']
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'ie_key': 'ThePlatform',
|
'ie_key': 'ThePlatform',
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
import base64
|
import base64
|
||||||
import codecs
|
import codecs
|
||||||
import datetime
|
import datetime as dt
|
||||||
import hashlib
|
import hashlib
|
||||||
import hmac
|
import hmac
|
||||||
import json
|
import json
|
||||||
|
@ -16,7 +16,6 @@ from ..utils import (
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
multipart_encode,
|
multipart_encode,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
random_birthday,
|
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
try_call,
|
try_call,
|
||||||
try_get,
|
try_get,
|
||||||
|
@ -63,38 +62,57 @@ class CDAIE(InfoExtractor):
|
||||||
'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a',
|
'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'uploader': 'crash404',
|
'uploader': 'crash404',
|
||||||
'view_count': int,
|
|
||||||
'average_rating': float,
|
'average_rating': float,
|
||||||
'duration': 137,
|
'duration': 137,
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
|
'upload_date': '20160220',
|
||||||
|
'timestamp': 1455968218,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
# Age-restricted
|
# Age-restricted with vfilm redirection
|
||||||
'url': 'http://www.cda.pl/video/1273454c4',
|
'url': 'https://www.cda.pl/video/8753244c4',
|
||||||
|
'md5': 'd8eeb83d63611289507010d3df3bb8b3',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1273454c4',
|
'id': '8753244c4',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Bronson (2008) napisy HD 1080p',
|
'title': '[18+] Bez Filtra: Rezerwowe Psy czyli... najwulgarniejsza polska gra?',
|
||||||
'description': 'md5:1b6cb18508daf2dc4e0fa4db77fec24c',
|
'description': 'md5:ae80bac31bd6a9f077a6cce03c7c077e',
|
||||||
'height': 1080,
|
'height': 1080,
|
||||||
'uploader': 'boniek61',
|
'uploader': 'arhn eu',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'duration': 5554,
|
'duration': 991,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
'view_count': int,
|
|
||||||
'average_rating': float,
|
'average_rating': float,
|
||||||
},
|
'timestamp': 1633888264,
|
||||||
|
'upload_date': '20211010',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# Age-restricted without vfilm redirection
|
||||||
|
'url': 'https://www.cda.pl/video/17028157b8',
|
||||||
|
'md5': 'c1fe5ff4582bace95d4f0ce0fbd0f992',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '17028157b8',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'STENDUPY MICHAŁ OGIŃSKI',
|
||||||
|
'description': 'md5:5851f3272bfc31f762d616040a1d609a',
|
||||||
|
'height': 480,
|
||||||
|
'uploader': 'oginski',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'duration': 18855,
|
||||||
|
'age_limit': 18,
|
||||||
|
'average_rating': float,
|
||||||
|
'timestamp': 1699705901,
|
||||||
|
'upload_date': '20231111',
|
||||||
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://ebd.cda.pl/0x0/5749950c',
|
'url': 'http://ebd.cda.pl/0x0/5749950c',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _download_age_confirm_page(self, url, video_id, *args, **kwargs):
|
def _download_age_confirm_page(self, url, video_id, *args, **kwargs):
|
||||||
form_data = random_birthday('rok', 'miesiac', 'dzien')
|
data, content_type = multipart_encode({'age_confirm': ''})
|
||||||
form_data.update({'return': url, 'module': 'video', 'module_id': video_id})
|
|
||||||
data, content_type = multipart_encode(form_data)
|
|
||||||
return self._download_webpage(
|
return self._download_webpage(
|
||||||
urljoin(url, '/a/validatebirth'), video_id, *args,
|
url, video_id, *args,
|
||||||
data=data, headers={
|
data=data, headers={
|
||||||
'Referer': url,
|
'Referer': url,
|
||||||
'Content-Type': content_type,
|
'Content-Type': content_type,
|
||||||
|
@ -134,7 +152,7 @@ class CDAIE(InfoExtractor):
|
||||||
self._API_HEADERS['User-Agent'] = f'pl.cda 1.0 (version {app_version}; Android {android_version}; {phone_model})'
|
self._API_HEADERS['User-Agent'] = f'pl.cda 1.0 (version {app_version}; Android {android_version}; {phone_model})'
|
||||||
|
|
||||||
cached_bearer = self.cache.load(self._BEARER_CACHE, username) or {}
|
cached_bearer = self.cache.load(self._BEARER_CACHE, username) or {}
|
||||||
if cached_bearer.get('valid_until', 0) > datetime.datetime.now().timestamp() + 5:
|
if cached_bearer.get('valid_until', 0) > dt.datetime.now().timestamp() + 5:
|
||||||
self._API_HEADERS['Authorization'] = f'Bearer {cached_bearer["token"]}'
|
self._API_HEADERS['Authorization'] = f'Bearer {cached_bearer["token"]}'
|
||||||
return
|
return
|
||||||
|
|
||||||
|
@ -154,7 +172,7 @@ class CDAIE(InfoExtractor):
|
||||||
})
|
})
|
||||||
self.cache.store(self._BEARER_CACHE, username, {
|
self.cache.store(self._BEARER_CACHE, username, {
|
||||||
'token': token_res['access_token'],
|
'token': token_res['access_token'],
|
||||||
'valid_until': token_res['expires_in'] + datetime.datetime.now().timestamp(),
|
'valid_until': token_res['expires_in'] + dt.datetime.now().timestamp(),
|
||||||
})
|
})
|
||||||
self._API_HEADERS['Authorization'] = f'Bearer {token_res["access_token"]}'
|
self._API_HEADERS['Authorization'] = f'Bearer {token_res["access_token"]}'
|
||||||
|
|
||||||
|
@ -164,7 +182,7 @@ class CDAIE(InfoExtractor):
|
||||||
if 'Authorization' in self._API_HEADERS:
|
if 'Authorization' in self._API_HEADERS:
|
||||||
return self._api_extract(video_id)
|
return self._api_extract(video_id)
|
||||||
else:
|
else:
|
||||||
return self._web_extract(video_id, url)
|
return self._web_extract(video_id)
|
||||||
|
|
||||||
def _api_extract(self, video_id):
|
def _api_extract(self, video_id):
|
||||||
meta = self._download_json(
|
meta = self._download_json(
|
||||||
|
@ -197,9 +215,9 @@ class CDAIE(InfoExtractor):
|
||||||
'view_count': meta.get('views'),
|
'view_count': meta.get('views'),
|
||||||
}
|
}
|
||||||
|
|
||||||
def _web_extract(self, video_id, url):
|
def _web_extract(self, video_id):
|
||||||
self._set_cookie('cda.pl', 'cda.player', 'html5')
|
self._set_cookie('cda.pl', 'cda.player', 'html5')
|
||||||
webpage = self._download_webpage(
|
webpage, urlh = self._download_webpage_handle(
|
||||||
f'{self._BASE_URL}/video/{video_id}/vfilm', video_id)
|
f'{self._BASE_URL}/video/{video_id}/vfilm', video_id)
|
||||||
|
|
||||||
if 'Ten film jest dostępny dla użytkowników premium' in webpage:
|
if 'Ten film jest dostępny dla użytkowników premium' in webpage:
|
||||||
|
@ -209,10 +227,10 @@ class CDAIE(InfoExtractor):
|
||||||
self.raise_geo_restricted()
|
self.raise_geo_restricted()
|
||||||
|
|
||||||
need_confirm_age = False
|
need_confirm_age = False
|
||||||
if self._html_search_regex(r'(<form[^>]+action="[^"]*/a/validatebirth[^"]*")',
|
if self._html_search_regex(r'(<button[^>]+name="[^"]*age_confirm[^"]*")',
|
||||||
webpage, 'birthday validate form', default=None):
|
webpage, 'birthday validate form', default=None):
|
||||||
webpage = self._download_age_confirm_page(
|
webpage = self._download_age_confirm_page(
|
||||||
url, video_id, note='Confirming age')
|
urlh.url, video_id, note='Confirming age')
|
||||||
need_confirm_age = True
|
need_confirm_age = True
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
@ -222,9 +240,6 @@ class CDAIE(InfoExtractor):
|
||||||
(?:<\1[^>]*>[^<]*</\1>|(?!</\1>)(?:.|\n))*?
|
(?:<\1[^>]*>[^<]*</\1>|(?!</\1>)(?:.|\n))*?
|
||||||
<(span|meta)[^>]+itemprop=(["\'])name\4[^>]*>(?P<uploader>[^<]+)</\3>
|
<(span|meta)[^>]+itemprop=(["\'])name\4[^>]*>(?P<uploader>[^<]+)</\3>
|
||||||
''', webpage, 'uploader', default=None, group='uploader')
|
''', webpage, 'uploader', default=None, group='uploader')
|
||||||
view_count = self._search_regex(
|
|
||||||
r'Odsłony:(?:\s| )*([0-9]+)', webpage,
|
|
||||||
'view_count', default=None)
|
|
||||||
average_rating = self._search_regex(
|
average_rating = self._search_regex(
|
||||||
(r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
|
(r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
|
||||||
r'<span[^>]+\bclass=["\']rating["\'][^>]*>(?P<rating_value>[0-9.]+)'), webpage, 'rating', fatal=False,
|
r'<span[^>]+\bclass=["\']rating["\'][^>]*>(?P<rating_value>[0-9.]+)'), webpage, 'rating', fatal=False,
|
||||||
|
@ -235,7 +250,6 @@ class CDAIE(InfoExtractor):
|
||||||
'title': self._og_search_title(webpage),
|
'title': self._og_search_title(webpage),
|
||||||
'description': self._og_search_description(webpage),
|
'description': self._og_search_description(webpage),
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'view_count': int_or_none(view_count),
|
|
||||||
'average_rating': float_or_none(average_rating),
|
'average_rating': float_or_none(average_rating),
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
|
|
@ -37,6 +37,7 @@ from ..networking.exceptions import (
|
||||||
IncompleteRead,
|
IncompleteRead,
|
||||||
network_exceptions,
|
network_exceptions,
|
||||||
)
|
)
|
||||||
|
from ..networking.impersonate import ImpersonateTarget
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
IDENTITY,
|
IDENTITY,
|
||||||
JSON_LD_RE,
|
JSON_LD_RE,
|
||||||
|
@ -170,12 +171,12 @@ class InfoExtractor:
|
||||||
Automatically calculated from width and height
|
Automatically calculated from width and height
|
||||||
* dynamic_range The dynamic range of the video. One of:
|
* dynamic_range The dynamic range of the video. One of:
|
||||||
"SDR" (None), "HDR10", "HDR10+, "HDR12", "HLG, "DV"
|
"SDR" (None), "HDR10", "HDR10+, "HDR12", "HLG, "DV"
|
||||||
* tbr Average bitrate of audio and video in KBit/s
|
* tbr Average bitrate of audio and video in kbps (1000 bits/sec)
|
||||||
* abr Average audio bitrate in KBit/s
|
* abr Average audio bitrate in kbps (1000 bits/sec)
|
||||||
* acodec Name of the audio codec in use
|
* acodec Name of the audio codec in use
|
||||||
* asr Audio sampling rate in Hertz
|
* asr Audio sampling rate in Hertz
|
||||||
* audio_channels Number of audio channels
|
* audio_channels Number of audio channels
|
||||||
* vbr Average video bitrate in KBit/s
|
* vbr Average video bitrate in kbps (1000 bits/sec)
|
||||||
* fps Frame rate
|
* fps Frame rate
|
||||||
* vcodec Name of the video codec in use
|
* vcodec Name of the video codec in use
|
||||||
* container Name of the container format
|
* container Name of the container format
|
||||||
|
@ -246,7 +247,8 @@ class InfoExtractor:
|
||||||
* downloader_options A dictionary of downloader options
|
* downloader_options A dictionary of downloader options
|
||||||
(For internal use only)
|
(For internal use only)
|
||||||
* http_chunk_size Chunk size for HTTP downloads
|
* http_chunk_size Chunk size for HTTP downloads
|
||||||
* ffmpeg_args Extra arguments for ffmpeg downloader
|
* ffmpeg_args Extra arguments for ffmpeg downloader (input)
|
||||||
|
* ffmpeg_args_out Extra arguments for ffmpeg downloader (output)
|
||||||
* is_dash_periods Whether the format is a result of merging
|
* is_dash_periods Whether the format is a result of merging
|
||||||
multiple DASH periods.
|
multiple DASH periods.
|
||||||
RTMP formats can also have the additional fields: page_url,
|
RTMP formats can also have the additional fields: page_url,
|
||||||
|
@ -817,7 +819,7 @@ class InfoExtractor:
|
||||||
else:
|
else:
|
||||||
return err.status in variadic(expected_status)
|
return err.status in variadic(expected_status)
|
||||||
|
|
||||||
def _create_request(self, url_or_request, data=None, headers=None, query=None):
|
def _create_request(self, url_or_request, data=None, headers=None, query=None, extensions=None):
|
||||||
if isinstance(url_or_request, urllib.request.Request):
|
if isinstance(url_or_request, urllib.request.Request):
|
||||||
self._downloader.deprecation_warning(
|
self._downloader.deprecation_warning(
|
||||||
'Passing a urllib.request.Request to _create_request() is deprecated. '
|
'Passing a urllib.request.Request to _create_request() is deprecated. '
|
||||||
|
@ -826,10 +828,11 @@ class InfoExtractor:
|
||||||
elif not isinstance(url_or_request, Request):
|
elif not isinstance(url_or_request, Request):
|
||||||
url_or_request = Request(url_or_request)
|
url_or_request = Request(url_or_request)
|
||||||
|
|
||||||
url_or_request.update(data=data, headers=headers, query=query)
|
url_or_request.update(data=data, headers=headers, query=query, extensions=extensions)
|
||||||
return url_or_request
|
return url_or_request
|
||||||
|
|
||||||
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers=None, query=None, expected_status=None):
|
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None,
|
||||||
|
headers=None, query=None, expected_status=None, impersonate=None, require_impersonation=False):
|
||||||
"""
|
"""
|
||||||
Return the response handle.
|
Return the response handle.
|
||||||
|
|
||||||
|
@ -860,8 +863,31 @@ class InfoExtractor:
|
||||||
headers = (headers or {}).copy()
|
headers = (headers or {}).copy()
|
||||||
headers.setdefault('X-Forwarded-For', self._x_forwarded_for_ip)
|
headers.setdefault('X-Forwarded-For', self._x_forwarded_for_ip)
|
||||||
|
|
||||||
|
extensions = {}
|
||||||
|
|
||||||
|
if impersonate in (True, ''):
|
||||||
|
impersonate = ImpersonateTarget()
|
||||||
|
requested_targets = [
|
||||||
|
t if isinstance(t, ImpersonateTarget) else ImpersonateTarget.from_str(t)
|
||||||
|
for t in variadic(impersonate)
|
||||||
|
] if impersonate else []
|
||||||
|
|
||||||
|
available_target = next(filter(self._downloader._impersonate_target_available, requested_targets), None)
|
||||||
|
if available_target:
|
||||||
|
extensions['impersonate'] = available_target
|
||||||
|
elif requested_targets:
|
||||||
|
message = 'The extractor is attempting impersonation, but '
|
||||||
|
message += (
|
||||||
|
'no impersonate target is available' if not str(impersonate)
|
||||||
|
else f'none of these impersonate targets are available: "{", ".join(map(str, requested_targets))}"')
|
||||||
|
info_msg = ('see https://github.com/yt-dlp/yt-dlp#impersonation '
|
||||||
|
'for information on installing the required dependencies')
|
||||||
|
if require_impersonation:
|
||||||
|
raise ExtractorError(f'{message}; {info_msg}', expected=True)
|
||||||
|
self.report_warning(f'{message}; if you encounter errors, then {info_msg}', only_once=True)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query))
|
return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query, extensions))
|
||||||
except network_exceptions as err:
|
except network_exceptions as err:
|
||||||
if isinstance(err, HTTPError):
|
if isinstance(err, HTTPError):
|
||||||
if self.__can_accept_status_code(err, expected_status):
|
if self.__can_accept_status_code(err, expected_status):
|
||||||
|
@ -880,13 +906,14 @@ class InfoExtractor:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True,
|
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True,
|
||||||
encoding=None, data=None, headers={}, query={}, expected_status=None):
|
encoding=None, data=None, headers={}, query={}, expected_status=None,
|
||||||
|
impersonate=None, require_impersonation=False):
|
||||||
"""
|
"""
|
||||||
Return a tuple (page content as string, URL handle).
|
Return a tuple (page content as string, URL handle).
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
url_or_request -- plain text URL as a string or
|
url_or_request -- plain text URL as a string or
|
||||||
a urllib.request.Request object
|
a yt_dlp.networking.Request object
|
||||||
video_id -- Video/playlist/item identifier (string)
|
video_id -- Video/playlist/item identifier (string)
|
||||||
|
|
||||||
Keyword arguments:
|
Keyword arguments:
|
||||||
|
@ -911,17 +938,27 @@ class InfoExtractor:
|
||||||
returning True if it should be accepted
|
returning True if it should be accepted
|
||||||
Note that this argument does not affect success status codes (2xx)
|
Note that this argument does not affect success status codes (2xx)
|
||||||
which are always accepted.
|
which are always accepted.
|
||||||
|
impersonate -- the impersonate target. Can be any of the following entities:
|
||||||
|
- an instance of yt_dlp.networking.impersonate.ImpersonateTarget
|
||||||
|
- a string in the format of CLIENT[:OS]
|
||||||
|
- a list or a tuple of CLIENT[:OS] strings or ImpersonateTarget instances
|
||||||
|
- a boolean value; True means any impersonate target is sufficient
|
||||||
|
require_impersonation -- flag to toggle whether the request should raise an error
|
||||||
|
if impersonation is not possible (bool, default: False)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Strip hashes from the URL (#1038)
|
# Strip hashes from the URL (#1038)
|
||||||
if isinstance(url_or_request, str):
|
if isinstance(url_or_request, str):
|
||||||
url_or_request = url_or_request.partition('#')[0]
|
url_or_request = url_or_request.partition('#')[0]
|
||||||
|
|
||||||
urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query, expected_status=expected_status)
|
urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data,
|
||||||
|
headers=headers, query=query, expected_status=expected_status,
|
||||||
|
impersonate=impersonate, require_impersonation=require_impersonation)
|
||||||
if urlh is False:
|
if urlh is False:
|
||||||
assert not fatal
|
assert not fatal
|
||||||
return False
|
return False
|
||||||
content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal, encoding=encoding)
|
content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal,
|
||||||
|
encoding=encoding, data=data)
|
||||||
return (content, urlh)
|
return (content, urlh)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -969,8 +1006,10 @@ class InfoExtractor:
|
||||||
'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
|
'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
|
||||||
expected=True)
|
expected=True)
|
||||||
|
|
||||||
def _request_dump_filename(self, url, video_id):
|
def _request_dump_filename(self, url, video_id, data=None):
|
||||||
basen = f'{video_id}_{url}'
|
if data is not None:
|
||||||
|
data = hashlib.md5(data).hexdigest()
|
||||||
|
basen = join_nonempty(video_id, data, url, delim='_')
|
||||||
trim_length = self.get_param('trim_file_name') or 240
|
trim_length = self.get_param('trim_file_name') or 240
|
||||||
if len(basen) > trim_length:
|
if len(basen) > trim_length:
|
||||||
h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
|
h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
|
||||||
|
@ -992,7 +1031,8 @@ class InfoExtractor:
|
||||||
except LookupError:
|
except LookupError:
|
||||||
return webpage_bytes.decode('utf-8', 'replace')
|
return webpage_bytes.decode('utf-8', 'replace')
|
||||||
|
|
||||||
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None):
|
def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True,
|
||||||
|
prefix=None, encoding=None, data=None):
|
||||||
webpage_bytes = urlh.read()
|
webpage_bytes = urlh.read()
|
||||||
if prefix is not None:
|
if prefix is not None:
|
||||||
webpage_bytes = prefix + webpage_bytes
|
webpage_bytes = prefix + webpage_bytes
|
||||||
|
@ -1001,7 +1041,9 @@ class InfoExtractor:
|
||||||
dump = base64.b64encode(webpage_bytes).decode('ascii')
|
dump = base64.b64encode(webpage_bytes).decode('ascii')
|
||||||
self._downloader.to_screen(dump)
|
self._downloader.to_screen(dump)
|
||||||
if self.get_param('write_pages'):
|
if self.get_param('write_pages'):
|
||||||
filename = self._request_dump_filename(urlh.url, video_id)
|
if isinstance(url_or_request, Request):
|
||||||
|
data = self._create_request(url_or_request, data).data
|
||||||
|
filename = self._request_dump_filename(urlh.url, video_id, data)
|
||||||
self.to_screen(f'Saving request to {filename}')
|
self.to_screen(f'Saving request to {filename}')
|
||||||
with open(filename, 'wb') as outf:
|
with open(filename, 'wb') as outf:
|
||||||
outf.write(webpage_bytes)
|
outf.write(webpage_bytes)
|
||||||
|
@ -1046,20 +1088,23 @@ class InfoExtractor:
|
||||||
return getattr(ie, parser)(content, *args, **kwargs)
|
return getattr(ie, parser)(content, *args, **kwargs)
|
||||||
|
|
||||||
def download_handle(self, url_or_request, video_id, note=note, errnote=errnote, transform_source=None,
|
def download_handle(self, url_or_request, video_id, note=note, errnote=errnote, transform_source=None,
|
||||||
fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
|
fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None,
|
||||||
|
impersonate=None, require_impersonation=False):
|
||||||
res = self._download_webpage_handle(
|
res = self._download_webpage_handle(
|
||||||
url_or_request, video_id, note=note, errnote=errnote, fatal=fatal, encoding=encoding,
|
url_or_request, video_id, note=note, errnote=errnote, fatal=fatal, encoding=encoding,
|
||||||
data=data, headers=headers, query=query, expected_status=expected_status)
|
data=data, headers=headers, query=query, expected_status=expected_status,
|
||||||
|
impersonate=impersonate, require_impersonation=require_impersonation)
|
||||||
if res is False:
|
if res is False:
|
||||||
return res
|
return res
|
||||||
content, urlh = res
|
content, urlh = res
|
||||||
return parse(self, content, video_id, transform_source=transform_source, fatal=fatal, errnote=errnote), urlh
|
return parse(self, content, video_id, transform_source=transform_source, fatal=fatal, errnote=errnote), urlh
|
||||||
|
|
||||||
def download_content(self, url_or_request, video_id, note=note, errnote=errnote, transform_source=None,
|
def download_content(self, url_or_request, video_id, note=note, errnote=errnote, transform_source=None,
|
||||||
fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
|
fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None,
|
||||||
|
impersonate=None, require_impersonation=False):
|
||||||
if self.get_param('load_pages'):
|
if self.get_param('load_pages'):
|
||||||
url_or_request = self._create_request(url_or_request, data, headers, query)
|
url_or_request = self._create_request(url_or_request, data, headers, query)
|
||||||
filename = self._request_dump_filename(url_or_request.url, video_id)
|
filename = self._request_dump_filename(url_or_request.url, video_id, url_or_request.data)
|
||||||
self.to_screen(f'Loading request from {filename}')
|
self.to_screen(f'Loading request from {filename}')
|
||||||
try:
|
try:
|
||||||
with open(filename, 'rb') as dumpf:
|
with open(filename, 'rb') as dumpf:
|
||||||
|
@ -1079,6 +1124,8 @@ class InfoExtractor:
|
||||||
'headers': headers,
|
'headers': headers,
|
||||||
'query': query,
|
'query': query,
|
||||||
'expected_status': expected_status,
|
'expected_status': expected_status,
|
||||||
|
'impersonate': impersonate,
|
||||||
|
'require_impersonation': require_impersonation,
|
||||||
}
|
}
|
||||||
if parser is None:
|
if parser is None:
|
||||||
kwargs.pop('transform_source')
|
kwargs.pop('transform_source')
|
||||||
|
@ -1697,12 +1744,16 @@ class InfoExtractor:
|
||||||
traverse_json_ld(json_ld)
|
traverse_json_ld(json_ld)
|
||||||
return filter_dict(info)
|
return filter_dict(info)
|
||||||
|
|
||||||
def _search_nextjs_data(self, webpage, video_id, *, transform_source=None, fatal=True, **kw):
|
def _search_nextjs_data(self, webpage, video_id, *, fatal=True, default=NO_DEFAULT, **kw):
|
||||||
return self._parse_json(
|
if default == '{}':
|
||||||
self._search_regex(
|
self._downloader.deprecation_warning('using `default=\'{}\'` is deprecated, use `default={}` instead')
|
||||||
r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
|
default = {}
|
||||||
webpage, 'next.js data', fatal=fatal, **kw),
|
if default is not NO_DEFAULT:
|
||||||
video_id, transform_source=transform_source, fatal=fatal)
|
fatal = False
|
||||||
|
|
||||||
|
return self._search_json(
|
||||||
|
r'<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>', webpage, 'next.js data',
|
||||||
|
video_id, end_pattern='</script>', fatal=fatal, default=default, **kw)
|
||||||
|
|
||||||
def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)):
|
def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__', *, fatal=True, traverse=('data', 0)):
|
||||||
"""Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
|
"""Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function"""
|
||||||
|
|
|
@ -40,3 +40,19 @@ class UnicodeBOMIE(InfoExtractor):
|
||||||
'Your URL starts with a Byte Order Mark (BOM). '
|
'Your URL starts with a Byte Order Mark (BOM). '
|
||||||
'Removing the BOM and looking for "%s" ...' % real_url)
|
'Removing the BOM and looking for "%s" ...' % real_url)
|
||||||
return self.url_result(real_url)
|
return self.url_result(real_url)
|
||||||
|
|
||||||
|
|
||||||
|
class BlobIE(InfoExtractor):
|
||||||
|
IE_DESC = False
|
||||||
|
_VALID_URL = r'blob:'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'blob:https://www.youtube.com/4eb3d090-a761-46e6-8083-c32016a36e3b',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
raise ExtractorError(
|
||||||
|
'You\'ve asked yt-dlp to download a blob URL. '
|
||||||
|
'A blob URL exists only locally in your browser. '
|
||||||
|
'It is not possible for yt-dlp to access it.', expected=True)
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
import base64
|
import base64
|
||||||
|
import uuid
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..networking.exceptions import HTTPError
|
from ..networking.exceptions import HTTPError
|
||||||
|
@ -7,12 +8,11 @@ from ..utils import (
|
||||||
float_or_none,
|
float_or_none,
|
||||||
format_field,
|
format_field,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
join_nonempty,
|
jwt_decode_hs256,
|
||||||
parse_age_limit,
|
parse_age_limit,
|
||||||
parse_count,
|
parse_count,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
qualities,
|
qualities,
|
||||||
remove_start,
|
|
||||||
time_seconds,
|
time_seconds,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
|
@ -24,10 +24,15 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||||
_BASE_URL = 'https://www.crunchyroll.com'
|
_BASE_URL = 'https://www.crunchyroll.com'
|
||||||
_API_BASE = 'https://api.crunchyroll.com'
|
_API_BASE = 'https://api.crunchyroll.com'
|
||||||
_NETRC_MACHINE = 'crunchyroll'
|
_NETRC_MACHINE = 'crunchyroll'
|
||||||
|
_REFRESH_TOKEN = None
|
||||||
_AUTH_HEADERS = None
|
_AUTH_HEADERS = None
|
||||||
|
_AUTH_EXPIRY = None
|
||||||
_API_ENDPOINT = None
|
_API_ENDPOINT = None
|
||||||
_BASIC_AUTH = None
|
_BASIC_AUTH = 'Basic ' + base64.b64encode(':'.join((
|
||||||
_CLIENT_ID = ('cr_web', 'noaihdevm_6iyg0a8l0q')
|
't-kdgp2h8c3jub8fn0fq',
|
||||||
|
'yfLDfMfrYvKXh4JXS1LEI2cCqu1v5Wan',
|
||||||
|
)).encode()).decode()
|
||||||
|
_IS_PREMIUM = None
|
||||||
_LOCALE_LOOKUP = {
|
_LOCALE_LOOKUP = {
|
||||||
'ar': 'ar-SA',
|
'ar': 'ar-SA',
|
||||||
'de': 'de-DE',
|
'de': 'de-DE',
|
||||||
|
@ -42,63 +47,78 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||||
'hi': 'hi-IN',
|
'hi': 'hi-IN',
|
||||||
}
|
}
|
||||||
|
|
||||||
@property
|
def _set_auth_info(self, response):
|
||||||
def is_logged_in(self):
|
CrunchyrollBaseIE._IS_PREMIUM = 'cr_premium' in traverse_obj(response, ('access_token', {jwt_decode_hs256}, 'benefits', ...))
|
||||||
return bool(self._get_cookies(self._BASE_URL).get('etp_rt'))
|
CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': response['token_type'] + ' ' + response['access_token']}
|
||||||
|
CrunchyrollBaseIE._AUTH_EXPIRY = time_seconds(seconds=traverse_obj(response, ('expires_in', {float_or_none}), default=300) - 10)
|
||||||
|
|
||||||
def _perform_login(self, username, password):
|
def _request_token(self, headers, data, note='Requesting token', errnote='Failed to request token'):
|
||||||
if self.is_logged_in:
|
|
||||||
return
|
|
||||||
|
|
||||||
upsell_response = self._download_json(
|
|
||||||
f'{self._API_BASE}/get_upsell_data.0.json', None, 'Getting session id',
|
|
||||||
query={
|
|
||||||
'sess_id': 1,
|
|
||||||
'device_id': 'whatvalueshouldbeforweb',
|
|
||||||
'device_type': 'com.crunchyroll.static',
|
|
||||||
'access_token': 'giKq5eY27ny3cqz',
|
|
||||||
'referer': f'{self._BASE_URL}/welcome/login'
|
|
||||||
})
|
|
||||||
if upsell_response['code'] != 'ok':
|
|
||||||
raise ExtractorError('Could not get session id')
|
|
||||||
session_id = upsell_response['data']['session_id']
|
|
||||||
|
|
||||||
login_response = self._download_json(
|
|
||||||
f'{self._API_BASE}/login.1.json', None, 'Logging in',
|
|
||||||
data=urlencode_postdata({
|
|
||||||
'account': username,
|
|
||||||
'password': password,
|
|
||||||
'session_id': session_id
|
|
||||||
}))
|
|
||||||
if login_response['code'] != 'ok':
|
|
||||||
raise ExtractorError('Login failed. Server message: %s' % login_response['message'], expected=True)
|
|
||||||
if not self.is_logged_in:
|
|
||||||
raise ExtractorError('Login succeeded but did not set etp_rt cookie')
|
|
||||||
|
|
||||||
def _update_auth(self):
|
|
||||||
if CrunchyrollBaseIE._AUTH_HEADERS and CrunchyrollBaseIE._AUTH_REFRESH > time_seconds():
|
|
||||||
return
|
|
||||||
|
|
||||||
if not CrunchyrollBaseIE._BASIC_AUTH:
|
|
||||||
cx_api_param = self._CLIENT_ID[self.is_logged_in]
|
|
||||||
self.write_debug(f'Using cxApiParam={cx_api_param}')
|
|
||||||
CrunchyrollBaseIE._BASIC_AUTH = 'Basic ' + base64.b64encode(f'{cx_api_param}:'.encode()).decode()
|
|
||||||
|
|
||||||
grant_type = 'etp_rt_cookie' if self.is_logged_in else 'client_id'
|
|
||||||
try:
|
try:
|
||||||
auth_response = self._download_json(
|
return self._download_json(
|
||||||
f'{self._BASE_URL}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
|
f'{self._BASE_URL}/auth/v1/token', None, note=note, errnote=errnote,
|
||||||
headers={'Authorization': CrunchyrollBaseIE._BASIC_AUTH}, data=f'grant_type={grant_type}'.encode())
|
headers=headers, data=urlencode_postdata(data), impersonate=True)
|
||||||
except ExtractorError as error:
|
except ExtractorError as error:
|
||||||
if isinstance(error.cause, HTTPError) and error.cause.status == 403:
|
if not isinstance(error.cause, HTTPError) or error.cause.status != 403:
|
||||||
|
raise
|
||||||
|
if target := error.cause.response.extensions.get('impersonate'):
|
||||||
|
raise ExtractorError(f'Got HTTP Error 403 when using impersonate target "{target}"')
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'Request blocked by Cloudflare; navigate to Crunchyroll in your browser, '
|
'Request blocked by Cloudflare. '
|
||||||
|
'Install the required impersonation dependency if possible, '
|
||||||
|
'or else navigate to Crunchyroll in your browser, '
|
||||||
'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
|
'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
|
||||||
'and your browser\'s User-Agent (with --user-agent)', expected=True)
|
'and your browser\'s User-Agent (with --user-agent)', expected=True)
|
||||||
|
|
||||||
|
def _perform_login(self, username, password):
|
||||||
|
if not CrunchyrollBaseIE._REFRESH_TOKEN:
|
||||||
|
CrunchyrollBaseIE._REFRESH_TOKEN = self.cache.load(self._NETRC_MACHINE, username)
|
||||||
|
if CrunchyrollBaseIE._REFRESH_TOKEN:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
login_response = self._request_token(
|
||||||
|
headers={'Authorization': self._BASIC_AUTH}, data={
|
||||||
|
'username': username,
|
||||||
|
'password': password,
|
||||||
|
'grant_type': 'password',
|
||||||
|
'scope': 'offline_access',
|
||||||
|
}, note='Logging in', errnote='Failed to log in')
|
||||||
|
except ExtractorError as error:
|
||||||
|
if isinstance(error.cause, HTTPError) and error.cause.status == 401:
|
||||||
|
raise ExtractorError('Invalid username and/or password', expected=True)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']}
|
CrunchyrollBaseIE._REFRESH_TOKEN = login_response['refresh_token']
|
||||||
CrunchyrollBaseIE._AUTH_REFRESH = time_seconds(seconds=traverse_obj(auth_response, ('expires_in', {float_or_none}), default=300) - 10)
|
self.cache.store(self._NETRC_MACHINE, username, CrunchyrollBaseIE._REFRESH_TOKEN)
|
||||||
|
self._set_auth_info(login_response)
|
||||||
|
|
||||||
|
def _update_auth(self):
|
||||||
|
if CrunchyrollBaseIE._AUTH_HEADERS and CrunchyrollBaseIE._AUTH_EXPIRY > time_seconds():
|
||||||
|
return
|
||||||
|
|
||||||
|
auth_headers = {'Authorization': self._BASIC_AUTH}
|
||||||
|
if CrunchyrollBaseIE._REFRESH_TOKEN:
|
||||||
|
data = {
|
||||||
|
'refresh_token': CrunchyrollBaseIE._REFRESH_TOKEN,
|
||||||
|
'grant_type': 'refresh_token',
|
||||||
|
'scope': 'offline_access',
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
data = {'grant_type': 'client_id'}
|
||||||
|
auth_headers['ETP-Anonymous-ID'] = uuid.uuid4()
|
||||||
|
try:
|
||||||
|
auth_response = self._request_token(auth_headers, data)
|
||||||
|
except ExtractorError as error:
|
||||||
|
username, password = self._get_login_info()
|
||||||
|
if not username or not isinstance(error.cause, HTTPError) or error.cause.status != 400:
|
||||||
|
raise
|
||||||
|
self.to_screen('Refresh token has expired. Re-logging in')
|
||||||
|
CrunchyrollBaseIE._REFRESH_TOKEN = None
|
||||||
|
self.cache.store(self._NETRC_MACHINE, username, None)
|
||||||
|
self._perform_login(username, password)
|
||||||
|
return
|
||||||
|
|
||||||
|
self._set_auth_info(auth_response)
|
||||||
|
|
||||||
def _locale_from_language(self, language):
|
def _locale_from_language(self, language):
|
||||||
config_locale = self._configuration_arg('metadata', ie_key=CrunchyrollBetaIE, casesense=True)
|
config_locale = self._configuration_arg('metadata', ie_key=CrunchyrollBetaIE, casesense=True)
|
||||||
|
@ -135,62 +155,73 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||||
raise ExtractorError(f'Unexpected response when downloading {note} JSON')
|
raise ExtractorError(f'Unexpected response when downloading {note} JSON')
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def _extract_formats(self, stream_response, display_id=None):
|
def _extract_chapters(self, internal_id):
|
||||||
requested_formats = self._configuration_arg('format') or ['vo_adaptive_hls']
|
# if no skip events are available, a 403 xml error is returned
|
||||||
available_formats = {}
|
skip_events = self._download_json(
|
||||||
for stream_type, streams in traverse_obj(
|
f'https://static.crunchyroll.com/skip-events/production/{internal_id}.json',
|
||||||
stream_response, (('streams', ('data', 0)), {dict.items}, ...)):
|
internal_id, note='Downloading chapter info', fatal=False, errnote=False)
|
||||||
if stream_type not in requested_formats:
|
if not skip_events:
|
||||||
|
return None
|
||||||
|
|
||||||
|
chapters = []
|
||||||
|
for event in ('recap', 'intro', 'credits', 'preview'):
|
||||||
|
start = traverse_obj(skip_events, (event, 'start', {float_or_none}))
|
||||||
|
end = traverse_obj(skip_events, (event, 'end', {float_or_none}))
|
||||||
|
# some chapters have no start and/or ending time, they will just be ignored
|
||||||
|
if start is None or end is None:
|
||||||
continue
|
continue
|
||||||
for stream in traverse_obj(streams, lambda _, v: v['url']):
|
chapters.append({'title': event.capitalize(), 'start_time': start, 'end_time': end})
|
||||||
hardsub_lang = stream.get('hardsub_locale') or ''
|
|
||||||
format_id = join_nonempty(stream_type, format_field(stream, 'hardsub_locale', 'hardsub-%s'))
|
return chapters
|
||||||
available_formats[hardsub_lang] = (stream_type, format_id, hardsub_lang, stream['url'])
|
|
||||||
|
def _extract_stream(self, identifier, display_id=None):
|
||||||
|
if not display_id:
|
||||||
|
display_id = identifier
|
||||||
|
|
||||||
|
self._update_auth()
|
||||||
|
stream_response = self._download_json(
|
||||||
|
f'https://cr-play-service.prd.crunchyrollsvc.com/v1/{identifier}/console/switch/play',
|
||||||
|
display_id, note='Downloading stream info', errnote='Failed to download stream info',
|
||||||
|
headers=CrunchyrollBaseIE._AUTH_HEADERS)
|
||||||
|
|
||||||
|
available_formats = {'': ('', '', stream_response['url'])}
|
||||||
|
for hardsub_lang, stream in traverse_obj(stream_response, ('hardSubs', {dict.items}, lambda _, v: v[1]['url'])):
|
||||||
|
available_formats[hardsub_lang] = (f'hardsub-{hardsub_lang}', hardsub_lang, stream['url'])
|
||||||
|
|
||||||
requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])]
|
requested_hardsubs = [('' if val == 'none' else val) for val in (self._configuration_arg('hardsub') or ['none'])]
|
||||||
if '' in available_formats and 'all' not in requested_hardsubs:
|
hardsub_langs = [lang for lang in available_formats if lang]
|
||||||
|
if hardsub_langs and 'all' not in requested_hardsubs:
|
||||||
full_format_langs = set(requested_hardsubs)
|
full_format_langs = set(requested_hardsubs)
|
||||||
|
self.to_screen(f'Available hardsub languages: {", ".join(hardsub_langs)}')
|
||||||
self.to_screen(
|
self.to_screen(
|
||||||
'To get all formats of a hardsub language, use '
|
'To extract formats of a hardsub language, use '
|
||||||
'"--extractor-args crunchyrollbeta:hardsub=<language_code or all>". '
|
'"--extractor-args crunchyrollbeta:hardsub=<language_code or all>". '
|
||||||
'See https://github.com/yt-dlp/yt-dlp#crunchyrollbeta-crunchyroll for more info',
|
'See https://github.com/yt-dlp/yt-dlp#crunchyrollbeta-crunchyroll for more info',
|
||||||
only_once=True)
|
only_once=True)
|
||||||
else:
|
else:
|
||||||
full_format_langs = set(map(str.lower, available_formats))
|
full_format_langs = set(map(str.lower, available_formats))
|
||||||
|
|
||||||
audio_locale = traverse_obj(stream_response, ((None, 'meta'), 'audio_locale'), get_all=False)
|
audio_locale = traverse_obj(stream_response, ('audioLocale', {str}))
|
||||||
hardsub_preference = qualities(requested_hardsubs[::-1])
|
hardsub_preference = qualities(requested_hardsubs[::-1])
|
||||||
formats = []
|
formats, subtitles = [], {}
|
||||||
for stream_type, format_id, hardsub_lang, stream_url in available_formats.values():
|
for format_id, hardsub_lang, stream_url in available_formats.values():
|
||||||
if stream_type.endswith('hls'):
|
|
||||||
if hardsub_lang.lower() in full_format_langs:
|
if hardsub_lang.lower() in full_format_langs:
|
||||||
adaptive_formats = self._extract_m3u8_formats(
|
adaptive_formats, dash_subs = self._extract_mpd_formats_and_subtitles(
|
||||||
stream_url, display_id, 'mp4', m3u8_id=format_id,
|
stream_url, display_id, mpd_id=format_id, headers=CrunchyrollBaseIE._AUTH_HEADERS,
|
||||||
fatal=False, note=f'Downloading {format_id} HLS manifest')
|
fatal=False, note=f'Downloading {f"{format_id} " if hardsub_lang else ""}MPD manifest')
|
||||||
|
self._merge_subtitles(dash_subs, target=subtitles)
|
||||||
else:
|
else:
|
||||||
adaptive_formats = (self._m3u8_meta_format(stream_url, ext='mp4', m3u8_id=format_id),)
|
continue # XXX: Update this if/when meta mpd formats are working
|
||||||
elif stream_type.endswith('dash'):
|
|
||||||
adaptive_formats = self._extract_mpd_formats(
|
|
||||||
stream_url, display_id, mpd_id=format_id,
|
|
||||||
fatal=False, note=f'Downloading {format_id} MPD manifest')
|
|
||||||
else:
|
|
||||||
self.report_warning(f'Encountered unknown stream_type: {stream_type!r}', display_id, only_once=True)
|
|
||||||
continue
|
|
||||||
for f in adaptive_formats:
|
for f in adaptive_formats:
|
||||||
if f.get('acodec') != 'none':
|
if f.get('acodec') != 'none':
|
||||||
f['language'] = audio_locale
|
f['language'] = audio_locale
|
||||||
f['quality'] = hardsub_preference(hardsub_lang.lower())
|
f['quality'] = hardsub_preference(hardsub_lang.lower())
|
||||||
formats.extend(adaptive_formats)
|
formats.extend(adaptive_formats)
|
||||||
|
|
||||||
return formats
|
for locale, subtitle in traverse_obj(stream_response, (('subtitles', 'captions'), {dict.items}, ...)):
|
||||||
|
subtitles.setdefault(locale, []).append(traverse_obj(subtitle, {'url': 'url', 'ext': 'format'}))
|
||||||
|
|
||||||
def _extract_subtitles(self, data):
|
return formats, subtitles
|
||||||
subtitles = {}
|
|
||||||
|
|
||||||
for locale, subtitle in traverse_obj(data, ((None, 'meta'), 'subtitles', {dict.items}, ...)):
|
|
||||||
subtitles[locale] = [traverse_obj(subtitle, {'url': 'url', 'ext': 'format'})]
|
|
||||||
|
|
||||||
return subtitles
|
|
||||||
|
|
||||||
|
|
||||||
class CrunchyrollCmsBaseIE(CrunchyrollBaseIE):
|
class CrunchyrollCmsBaseIE(CrunchyrollBaseIE):
|
||||||
|
@ -245,7 +276,11 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'dislike_count': int,
|
'dislike_count': int,
|
||||||
},
|
},
|
||||||
'params': {'skip_download': 'm3u8', 'format': 'all[format_id~=hardsub]'},
|
'params': {
|
||||||
|
'skip_download': 'm3u8',
|
||||||
|
'extractor_args': {'crunchyrollbeta': {'hardsub': ['de-DE']}},
|
||||||
|
'format': 'bv[format_id~=hardsub]',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
# Premium only
|
# Premium only
|
||||||
'url': 'https://www.crunchyroll.com/watch/GYE5WKQGR',
|
'url': 'https://www.crunchyroll.com/watch/GYE5WKQGR',
|
||||||
|
@ -306,6 +341,7 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
|
||||||
'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
'thumbnail': r're:^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||||
},
|
},
|
||||||
'params': {'skip_download': 'm3u8'},
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
'skip': 'no longer exists',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.crunchyroll.com/watch/G62PEZ2E6',
|
'url': 'https://www.crunchyroll.com/watch/G62PEZ2E6',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -359,31 +395,16 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
|
||||||
else:
|
else:
|
||||||
raise ExtractorError(f'Unknown object type {object_type}')
|
raise ExtractorError(f'Unknown object type {object_type}')
|
||||||
|
|
||||||
# There might be multiple audio languages for one object (`<object>_metadata.versions`),
|
if not self._IS_PREMIUM and traverse_obj(response, (f'{object_type}_metadata', 'is_premium_only')):
|
||||||
# so we need to get the id from `streams_link` instead or we dont know which language to choose
|
|
||||||
streams_link = response.get('streams_link')
|
|
||||||
if not streams_link and traverse_obj(response, (f'{object_type}_metadata', 'is_premium_only')):
|
|
||||||
message = f'This {object_type} is for premium members only'
|
message = f'This {object_type} is for premium members only'
|
||||||
if self.is_logged_in:
|
if CrunchyrollBaseIE._REFRESH_TOKEN:
|
||||||
raise ExtractorError(message, expected=True)
|
self.raise_no_formats(message, expected=True, video_id=internal_id)
|
||||||
self.raise_login_required(message)
|
else:
|
||||||
|
self.raise_login_required(message, method='password', metadata_available=True)
|
||||||
|
else:
|
||||||
|
result['formats'], result['subtitles'] = self._extract_stream(internal_id)
|
||||||
|
|
||||||
# We need go from unsigned to signed api to avoid getting soft banned
|
result['chapters'] = self._extract_chapters(internal_id)
|
||||||
stream_response = self._call_cms_api_signed(remove_start(
|
|
||||||
streams_link, '/content/v2/cms/'), internal_id, lang, 'stream info')
|
|
||||||
result['formats'] = self._extract_formats(stream_response, internal_id)
|
|
||||||
result['subtitles'] = self._extract_subtitles(stream_response)
|
|
||||||
|
|
||||||
# if no intro chapter is available, a 403 without usable data is returned
|
|
||||||
intro_chapter = self._download_json(
|
|
||||||
f'https://static.crunchyroll.com/datalab-intro-v2/{internal_id}.json',
|
|
||||||
internal_id, note='Downloading chapter info', fatal=False, errnote=False)
|
|
||||||
if isinstance(intro_chapter, dict):
|
|
||||||
result['chapters'] = [{
|
|
||||||
'title': 'Intro',
|
|
||||||
'start_time': float_or_none(intro_chapter.get('startTime')),
|
|
||||||
'end_time': float_or_none(intro_chapter.get('endTime')),
|
|
||||||
}]
|
|
||||||
|
|
||||||
def calculate_count(item):
|
def calculate_count(item):
|
||||||
return parse_count(''.join((item['displayed'], item.get('unit') or '')))
|
return parse_count(''.join((item['displayed'], item.get('unit') or '')))
|
||||||
|
@ -512,7 +533,7 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
|
||||||
'display_id': 'egaono-hana',
|
'display_id': 'egaono-hana',
|
||||||
'title': 'Egaono Hana',
|
'title': 'Egaono Hana',
|
||||||
'track': 'Egaono Hana',
|
'track': 'Egaono Hana',
|
||||||
'artist': 'Goose house',
|
'artists': ['Goose house'],
|
||||||
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||||
'genres': ['J-Pop'],
|
'genres': ['J-Pop'],
|
||||||
},
|
},
|
||||||
|
@ -525,11 +546,12 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
|
||||||
'display_id': 'crossing-field',
|
'display_id': 'crossing-field',
|
||||||
'title': 'Crossing Field',
|
'title': 'Crossing Field',
|
||||||
'track': 'Crossing Field',
|
'track': 'Crossing Field',
|
||||||
'artist': 'LiSA',
|
'artists': ['LiSA'],
|
||||||
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||||
'genres': ['Anime'],
|
'genres': ['Anime'],
|
||||||
},
|
},
|
||||||
'params': {'skip_download': 'm3u8'},
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
'skip': 'no longer exists',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.crunchyroll.com/watch/concert/MC2E2AC135',
|
'url': 'https://www.crunchyroll.com/watch/concert/MC2E2AC135',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -538,7 +560,7 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
|
||||||
'display_id': 'live-is-smile-always-364joker-at-yokohama-arena',
|
'display_id': 'live-is-smile-always-364joker-at-yokohama-arena',
|
||||||
'title': 'LiVE is Smile Always-364+JOKER- at YOKOHAMA ARENA',
|
'title': 'LiVE is Smile Always-364+JOKER- at YOKOHAMA ARENA',
|
||||||
'track': 'LiVE is Smile Always-364+JOKER- at YOKOHAMA ARENA',
|
'track': 'LiVE is Smile Always-364+JOKER- at YOKOHAMA ARENA',
|
||||||
'artist': 'LiSA',
|
'artists': ['LiSA'],
|
||||||
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
|
||||||
'description': 'md5:747444e7e6300907b7a43f0a0503072e',
|
'description': 'md5:747444e7e6300907b7a43f0a0503072e',
|
||||||
'genres': ['J-Pop'],
|
'genres': ['J-Pop'],
|
||||||
|
@ -566,16 +588,16 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
|
||||||
if not response:
|
if not response:
|
||||||
raise ExtractorError(f'No video with id {internal_id} could be found (possibly region locked?)', expected=True)
|
raise ExtractorError(f'No video with id {internal_id} could be found (possibly region locked?)', expected=True)
|
||||||
|
|
||||||
streams_link = response.get('streams_link')
|
|
||||||
if not streams_link and response.get('isPremiumOnly'):
|
|
||||||
message = f'This {response.get("type") or "media"} is for premium members only'
|
|
||||||
if self.is_logged_in:
|
|
||||||
raise ExtractorError(message, expected=True)
|
|
||||||
self.raise_login_required(message)
|
|
||||||
|
|
||||||
result = self._transform_music_response(response)
|
result = self._transform_music_response(response)
|
||||||
stream_response = self._call_api(streams_link, internal_id, lang, 'stream info')
|
|
||||||
result['formats'] = self._extract_formats(stream_response, internal_id)
|
if not self._IS_PREMIUM and response.get('isPremiumOnly'):
|
||||||
|
message = f'This {response.get("type") or "media"} is for premium members only'
|
||||||
|
if CrunchyrollBaseIE._REFRESH_TOKEN:
|
||||||
|
self.raise_no_formats(message, expected=True, video_id=internal_id)
|
||||||
|
else:
|
||||||
|
self.raise_login_required(message, method='password', metadata_available=True)
|
||||||
|
else:
|
||||||
|
result['formats'], _ = self._extract_stream(f'music/{internal_id}', internal_id)
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
@ -587,7 +609,7 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
|
||||||
'display_id': 'slug',
|
'display_id': 'slug',
|
||||||
'title': 'title',
|
'title': 'title',
|
||||||
'track': 'title',
|
'track': 'title',
|
||||||
'artist': ('artist', 'name'),
|
'artists': ('artist', 'name', all),
|
||||||
'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n') or None}),
|
'description': ('description', {str}, {lambda x: x.replace(r'\r\n', '\n') or None}),
|
||||||
'thumbnails': ('images', ..., ..., {
|
'thumbnails': ('images', ..., ..., {
|
||||||
'url': ('source', {url_or_none}),
|
'url': ('source', {url_or_none}),
|
||||||
|
@ -611,7 +633,7 @@ class CrunchyrollArtistIE(CrunchyrollBaseIE):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'MA179CB50D',
|
'id': 'MA179CB50D',
|
||||||
'title': 'LiSA',
|
'title': 'LiSA',
|
||||||
'genres': ['J-Pop', 'Anime', 'Rock'],
|
'genres': ['Anime', 'J-Pop', 'Rock'],
|
||||||
'description': 'md5:16d87de61a55c3f7d6c454b73285938e',
|
'description': 'md5:16d87de61a55c3f7d6c454b73285938e',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 83,
|
'playlist_mincount': 83,
|
||||||
|
|
|
@ -65,12 +65,14 @@ class DropboxIE(InfoExtractor):
|
||||||
formats, subtitles, has_anonymous_download = [], {}, False
|
formats, subtitles, has_anonymous_download = [], {}, False
|
||||||
for encoded in reversed(re.findall(r'registerStreamedPrefetch\s*\(\s*"[\w/+=]+"\s*,\s*"([\w/+=]+)"', webpage)):
|
for encoded in reversed(re.findall(r'registerStreamedPrefetch\s*\(\s*"[\w/+=]+"\s*,\s*"([\w/+=]+)"', webpage)):
|
||||||
decoded = base64.b64decode(encoded).decode('utf-8', 'ignore')
|
decoded = base64.b64decode(encoded).decode('utf-8', 'ignore')
|
||||||
|
if not has_anonymous_download:
|
||||||
|
has_anonymous_download = self._search_regex(
|
||||||
|
r'(anonymous:\tanonymous)', decoded, 'anonymous', default=False)
|
||||||
transcode_url = self._search_regex(
|
transcode_url = self._search_regex(
|
||||||
r'\n.(https://[^\x03\x08\x12\n]+\.m3u8)', decoded, 'transcode url', default=None)
|
r'\n.(https://[^\x03\x08\x12\n]+\.m3u8)', decoded, 'transcode url', default=None)
|
||||||
if not transcode_url:
|
if not transcode_url:
|
||||||
continue
|
continue
|
||||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id, 'mp4')
|
formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id, 'mp4')
|
||||||
has_anonymous_download = self._search_regex(r'(anonymous:\tanonymous)', decoded, 'anonymous', default=False)
|
|
||||||
break
|
break
|
||||||
|
|
||||||
# downloads enabled we can get the original file
|
# downloads enabled we can get the original file
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
import json
|
import json
|
||||||
from socket import timeout
|
import socket
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
@ -56,7 +56,7 @@ class DTubeIE(InfoExtractor):
|
||||||
try:
|
try:
|
||||||
self.to_screen('%s: Checking %s video format URL' % (video_id, format_id))
|
self.to_screen('%s: Checking %s video format URL' % (video_id, format_id))
|
||||||
self._downloader._opener.open(video_url, timeout=5).close()
|
self._downloader._opener.open(video_url, timeout=5).close()
|
||||||
except timeout:
|
except socket.timeout:
|
||||||
self.to_screen(
|
self.to_screen(
|
||||||
'%s: %s URL is invalid, skipping' % (video_id, format_id))
|
'%s: %s URL is invalid, skipping' % (video_id, format_id))
|
||||||
continue
|
continue
|
||||||
|
|
|
@ -1,105 +0,0 @@
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..compat import (
|
|
||||||
compat_b64decode,
|
|
||||||
compat_str,
|
|
||||||
compat_urlparse,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
|
||||||
extract_attributes,
|
|
||||||
ExtractorError,
|
|
||||||
get_elements_by_class,
|
|
||||||
urlencode_postdata,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class EinthusanIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?P<host>einthusan\.(?:tv|com|ca))/movie/watch/(?P<id>[^/?#&]+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://einthusan.tv/movie/watch/9097/',
|
|
||||||
'md5': 'ff0f7f2065031b8a2cf13a933731c035',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '9097',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Ae Dil Hai Mushkil',
|
|
||||||
'description': 'md5:33ef934c82a671a94652a9b4e54d931b',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'url': 'https://einthusan.tv/movie/watch/51MZ/?lang=hindi',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'https://einthusan.com/movie/watch/9097/',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'https://einthusan.ca/movie/watch/4E9n/?lang=hindi',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
# reversed from jsoncrypto.prototype.decrypt() in einthusan-PGMovieWatcher.js
|
|
||||||
def _decrypt(self, encrypted_data, video_id):
|
|
||||||
return self._parse_json(compat_b64decode((
|
|
||||||
encrypted_data[:10] + encrypted_data[-1] + encrypted_data[12:-1]
|
|
||||||
)).decode('utf-8'), video_id)
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = self._match_valid_url(url)
|
|
||||||
host = mobj.group('host')
|
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
title = self._html_search_regex(r'<h3>([^<]+)</h3>', webpage, 'title')
|
|
||||||
|
|
||||||
player_params = extract_attributes(self._search_regex(
|
|
||||||
r'(<section[^>]+id="UIVideoPlayer"[^>]+>)', webpage, 'player parameters'))
|
|
||||||
|
|
||||||
page_id = self._html_search_regex(
|
|
||||||
'<html[^>]+data-pageid="([^"]+)"', webpage, 'page ID')
|
|
||||||
video_data = self._download_json(
|
|
||||||
'https://%s/ajax/movie/watch/%s/' % (host, video_id), video_id,
|
|
||||||
data=urlencode_postdata({
|
|
||||||
'xEvent': 'UIVideoPlayer.PingOutcome',
|
|
||||||
'xJson': json.dumps({
|
|
||||||
'EJOutcomes': player_params['data-ejpingables'],
|
|
||||||
'NativeHLS': False
|
|
||||||
}),
|
|
||||||
'arcVersion': 3,
|
|
||||||
'appVersion': 59,
|
|
||||||
'gorilla.csrf.Token': page_id,
|
|
||||||
}))['Data']
|
|
||||||
|
|
||||||
if isinstance(video_data, compat_str) and video_data.startswith('/ratelimited/'):
|
|
||||||
raise ExtractorError(
|
|
||||||
'Download rate reached. Please try again later.', expected=True)
|
|
||||||
|
|
||||||
ej_links = self._decrypt(video_data['EJLinks'], video_id)
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
|
|
||||||
m3u8_url = ej_links.get('HLSLink')
|
|
||||||
if m3u8_url:
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native'))
|
|
||||||
|
|
||||||
mp4_url = ej_links.get('MP4Link')
|
|
||||||
if mp4_url:
|
|
||||||
formats.append({
|
|
||||||
'url': mp4_url,
|
|
||||||
})
|
|
||||||
|
|
||||||
description = get_elements_by_class('synopsis', webpage)[0]
|
|
||||||
thumbnail = self._html_search_regex(
|
|
||||||
r'''<img[^>]+src=(["'])(?P<url>(?!\1).+?/moviecovers/(?!\1).+?)\1''',
|
|
||||||
webpage, 'thumbnail url', fatal=False, group='url')
|
|
||||||
if thumbnail is not None:
|
|
||||||
thumbnail = compat_urlparse.urljoin(url, thumbnail)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'formats': formats,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'description': description,
|
|
||||||
}
|
|
|
@ -16,13 +16,31 @@ class EplusIbIE(InfoExtractor):
|
||||||
_VALID_URL = [r'https?://live\.eplus\.jp/ex/player\?ib=(?P<id>(?:\w|%2B|%2F){86}%3D%3D)',
|
_VALID_URL = [r'https?://live\.eplus\.jp/ex/player\?ib=(?P<id>(?:\w|%2B|%2F){86}%3D%3D)',
|
||||||
r'https?://live\.eplus\.jp/(?P<id>sample|\d+)']
|
r'https?://live\.eplus\.jp/(?P<id>sample|\d+)']
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://live.eplus.jp/ex/player?ib=YEFxb3Vyc2Dombnjg7blkrLlrablnJLjgrnjgq%2Fjg7zjg6vjgqLjgqTjg4njg6vlkIzlpb3kvJpgTGllbGxhIQ%3D%3D',
|
'url': 'https://live.eplus.jp/ex/player?ib=41K6Wzbr3PlcMD%2FOKHFlC%2FcZCe2Eaw7FK%2BpJS1ooUHki8d0vGSy2mYqxillQBe1dSnOxU%2B8%2FzXKls4XPBSb3vw%3D%3D',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '354502-0001-002',
|
'id': '335699-0001-006',
|
||||||
'title': 'LoveLive!Series Presents COUNTDOWN LoveLive! 2021→2022~LIVE with a smile!~【Streaming+(配信)】',
|
'title': '少女☆歌劇 レヴュースタァライト -The LIVE 青嵐- BLUE GLITTER <定点映像配信>【Streaming+(配信)】',
|
||||||
'live_status': 'was_live',
|
'live_status': 'was_live',
|
||||||
'release_date': '20211231',
|
'release_date': '20201221',
|
||||||
'release_timestamp': 1640952000,
|
'release_timestamp': 1608544800,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
'ignore_no_formats_error': True,
|
||||||
|
},
|
||||||
|
'expected_warnings': [
|
||||||
|
'This event may not be accessible',
|
||||||
|
'No video formats found',
|
||||||
|
'Requested format is not available',
|
||||||
|
],
|
||||||
|
}, {
|
||||||
|
'url': 'https://live.eplus.jp/ex/player?ib=6QSsQdyRAwOFZrEHWlhRm7vocgV%2FO0YzBZ%2BaBEBg1XR%2FmbLn0R%2F048dUoAY038%2F%2F92MJ73BsoAtvUpbV6RLtDQ%3D%3D&show_id=2371511',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '348021-0054-001',
|
||||||
|
'title': 'ラブライブ!スーパースター!! Liella! First LoveLive! Tour ~Starlines~【東京/DAY.1】',
|
||||||
|
'live_status': 'was_live',
|
||||||
|
'release_date': '20220115',
|
||||||
|
'release_timestamp': 1642233600,
|
||||||
'description': str,
|
'description': str,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
|
@ -124,6 +142,10 @@ class EplusIbIE(InfoExtractor):
|
||||||
if data_json.get('drm_mode') == 'ON':
|
if data_json.get('drm_mode') == 'ON':
|
||||||
self.report_drm(video_id)
|
self.report_drm(video_id)
|
||||||
|
|
||||||
|
if data_json.get('is_pass_ticket') == 'YES':
|
||||||
|
raise ExtractorError(
|
||||||
|
'This URL is for a pass ticket instead of a player page', expected=True)
|
||||||
|
|
||||||
delivery_status = data_json.get('delivery_status')
|
delivery_status = data_json.get('delivery_status')
|
||||||
archive_mode = data_json.get('archive_mode')
|
archive_mode = data_json.get('archive_mode')
|
||||||
release_timestamp = try_call(lambda: unified_timestamp(data_json['event_datetime']) - 32400)
|
release_timestamp = try_call(lambda: unified_timestamp(data_json['event_datetime']) - 32400)
|
||||||
|
|
|
@ -94,13 +94,14 @@ class EuropaIE(InfoExtractor):
|
||||||
|
|
||||||
class EuroParlWebstreamIE(InfoExtractor):
|
class EuroParlWebstreamIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://multimedia\.europarl\.europa\.eu/[^/#?]+/
|
https?://multimedia\.europarl\.europa\.eu/
|
||||||
(?:(?!video)[^/#?]+/[\w-]+_)(?P<id>[\w-]+)
|
(?:\w+/)?webstreaming/(?:[\w-]+_)?(?P<id>[\w-]+)
|
||||||
'''
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://multimedia.europarl.europa.eu/pl/webstreaming/plenary-session_20220914-0900-PLENARY',
|
'url': 'https://multimedia.europarl.europa.eu/pl/webstreaming/plenary-session_20220914-0900-PLENARY',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '62388b15-d85b-4add-99aa-ba12ccf64f0d',
|
'id': '62388b15-d85b-4add-99aa-ba12ccf64f0d',
|
||||||
|
'display_id': '20220914-0900-PLENARY',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Plenary session',
|
'title': 'Plenary session',
|
||||||
'release_timestamp': 1663139069,
|
'release_timestamp': 1663139069,
|
||||||
|
@ -125,6 +126,7 @@ class EuroParlWebstreamIE(InfoExtractor):
|
||||||
'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/committee-on-culture-and-education_20230301-1130-COMMITTEE-CULT',
|
'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/committee-on-culture-and-education_20230301-1130-COMMITTEE-CULT',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '7355662c-8eac-445e-4bb9-08db14b0ddd7',
|
'id': '7355662c-8eac-445e-4bb9-08db14b0ddd7',
|
||||||
|
'display_id': '20230301-1130-COMMITTEE-CULT',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'release_date': '20230301',
|
'release_date': '20230301',
|
||||||
'title': 'Committee on Culture and Education',
|
'title': 'Committee on Culture and Education',
|
||||||
|
@ -142,6 +144,19 @@ class EuroParlWebstreamIE(InfoExtractor):
|
||||||
'live_status': 'is_live',
|
'live_status': 'is_live',
|
||||||
},
|
},
|
||||||
'skip': 'Not live anymore'
|
'skip': 'Not live anymore'
|
||||||
|
}, {
|
||||||
|
'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/20240320-1345-SPECIAL-PRESSER',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'c1f11567-5b52-470a-f3e1-08dc3c216ace',
|
||||||
|
'display_id': '20240320-1345-SPECIAL-PRESSER',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'release_date': '20240320',
|
||||||
|
'title': 'md5:7c6c814cac55dea5e2d87bf8d3db2234',
|
||||||
|
'release_timestamp': 1710939767,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://multimedia.europarl.europa.eu/webstreaming/briefing-for-media-on-2024-european-elections_20240429-1000-SPECIAL-OTHER',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -166,6 +181,7 @@ class EuroParlWebstreamIE(InfoExtractor):
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': json_info['id'],
|
'id': json_info['id'],
|
||||||
|
'display_id': display_id,
|
||||||
'title': traverse_obj(webpage_nextjs, (('mediaItem', 'title'), ('title', )), get_all=False),
|
'title': traverse_obj(webpage_nextjs, (('mediaItem', 'title'), ('title', )), get_all=False),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
|
|
|
@ -560,7 +560,7 @@ class FacebookIE(InfoExtractor):
|
||||||
js_data, lambda x: x['jsmods']['instances'], list) or [])
|
js_data, lambda x: x['jsmods']['instances'], list) or [])
|
||||||
|
|
||||||
def extract_dash_manifest(video, formats):
|
def extract_dash_manifest(video, formats):
|
||||||
dash_manifest = video.get('dash_manifest')
|
dash_manifest = traverse_obj(video, 'dash_manifest', 'playlist', expected_type=str)
|
||||||
if dash_manifest:
|
if dash_manifest:
|
||||||
formats.extend(self._parse_mpd_formats(
|
formats.extend(self._parse_mpd_formats(
|
||||||
compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
|
compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
|
||||||
|
|
54
yt_dlp/extractor/fathom.py
Normal file
54
yt_dlp/extractor/fathom.py
Normal file
|
@ -0,0 +1,54 @@
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
extract_attributes,
|
||||||
|
float_or_none,
|
||||||
|
get_element_html_by_id,
|
||||||
|
parse_iso8601,
|
||||||
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
|
class FathomIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?fathom\.video/share/(?P<id>[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://fathom.video/share/G9mkjkspnohVVZ_L5nrsoPycyWcB8y7s',
|
||||||
|
'md5': '0decd5343b8f30ae268625e79a02b60f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '47200596',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'eCom Inucbator - Coaching Session',
|
||||||
|
'duration': 8125.380507,
|
||||||
|
'timestamp': 1699048914,
|
||||||
|
'upload_date': '20231103',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://fathom.video/share/mEws3bybftHL2QLymxYEDeE21vtLxGVm',
|
||||||
|
'md5': '4f5cb382126c22d1aba8a939f9c49690',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '46812957',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Jon, Lawrence, Neman chat about practice',
|
||||||
|
'duration': 3571.517847,
|
||||||
|
'timestamp': 1698933600,
|
||||||
|
'upload_date': '20231102',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
props = traverse_obj(
|
||||||
|
get_element_html_by_id('app', webpage), ({extract_attributes}, 'data-page', {json.loads}, 'props'))
|
||||||
|
video_id = str(props['call']['id'])
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': self._extract_m3u8_formats(props['call']['video_url'], video_id, 'mp4'),
|
||||||
|
**traverse_obj(props, {
|
||||||
|
'title': ('head', 'title', {str}),
|
||||||
|
'duration': ('duration', {float_or_none}),
|
||||||
|
'timestamp': ('call', 'started_at', {parse_iso8601}),
|
||||||
|
}),
|
||||||
|
}
|
|
@ -2104,22 +2104,6 @@ class GenericIE(InfoExtractor):
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
|
||||||
'note': 'JW Player embed with unicode-escape sequences in URL',
|
|
||||||
'url': 'https://www.medici.tv/en/concerts/lahav-shani-mozart-mahler-israel-philharmonic-abu-dhabi-classics',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'm',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Lahav Shani conducts the Israel Philharmonic\'s first-ever concert in Abu Dhabi',
|
|
||||||
'description': 'Mahler\'s ',
|
|
||||||
'uploader': 'www.medici.tv',
|
|
||||||
'age_limit': 0,
|
|
||||||
'thumbnail': r're:^https?://.+\.jpg',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
'url': 'https://shooshtime.com/videos/284002/just-out-of-the-shower-joi/',
|
'url': 'https://shooshtime.com/videos/284002/just-out-of-the-shower-joi/',
|
||||||
'md5': 'e2f0a4c329f7986280b7328e24036d60',
|
'md5': 'e2f0a4c329f7986280b7328e24036d60',
|
||||||
|
|
|
@ -1,9 +1,11 @@
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from .youtube import YoutubeIE
|
||||||
from ..compat import compat_parse_qs
|
from ..compat import compat_parse_qs
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
bug_reports_message,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
get_element_by_class,
|
get_element_by_class,
|
||||||
|
@ -38,6 +40,17 @@ class GoogleDriveIE(InfoExtractor):
|
||||||
'duration': 45,
|
'duration': 45,
|
||||||
'thumbnail': 'https://drive.google.com/thumbnail?id=0ByeS4oOUV-49Zzh4R1J6R09zazQ',
|
'thumbnail': 'https://drive.google.com/thumbnail?id=0ByeS4oOUV-49Zzh4R1J6R09zazQ',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
# has itag 50 which is not in YoutubeIE._formats (royalty Free music from 1922)
|
||||||
|
'url': 'https://drive.google.com/uc?id=1IP0o8dHcQrIHGgVyp0Ofvx2cGfLzyO1x',
|
||||||
|
'md5': '322db8d63dd19788c04050a4bba67073',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1IP0o8dHcQrIHGgVyp0Ofvx2cGfLzyO1x',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'My Buddy - Henry Burr - Gus Kahn - Walter Donaldson.mp3',
|
||||||
|
'duration': 184,
|
||||||
|
'thumbnail': 'https://drive.google.com/thumbnail?id=1IP0o8dHcQrIHGgVyp0Ofvx2cGfLzyO1x',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
# video can't be watched anonymously due to view count limit reached,
|
# video can't be watched anonymously due to view count limit reached,
|
||||||
# but can be downloaded (see https://github.com/ytdl-org/youtube-dl/issues/14046)
|
# but can be downloaded (see https://github.com/ytdl-org/youtube-dl/issues/14046)
|
||||||
|
@ -58,22 +71,8 @@ class GoogleDriveIE(InfoExtractor):
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
_FORMATS_EXT = {
|
_FORMATS_EXT = {
|
||||||
'5': 'flv',
|
**{k: v['ext'] for k, v in YoutubeIE._formats.items() if v.get('ext')},
|
||||||
'6': 'flv',
|
'50': 'm4a',
|
||||||
'13': '3gp',
|
|
||||||
'17': '3gp',
|
|
||||||
'18': 'mp4',
|
|
||||||
'22': 'mp4',
|
|
||||||
'34': 'flv',
|
|
||||||
'35': 'flv',
|
|
||||||
'36': '3gp',
|
|
||||||
'37': 'mp4',
|
|
||||||
'38': 'mp4',
|
|
||||||
'43': 'webm',
|
|
||||||
'44': 'webm',
|
|
||||||
'45': 'webm',
|
|
||||||
'46': 'webm',
|
|
||||||
'59': 'mp4',
|
|
||||||
}
|
}
|
||||||
_BASE_URL_CAPTIONS = 'https://drive.google.com/timedtext'
|
_BASE_URL_CAPTIONS = 'https://drive.google.com/timedtext'
|
||||||
_CAPTIONS_ENTRY_TAG = {
|
_CAPTIONS_ENTRY_TAG = {
|
||||||
|
@ -194,10 +193,13 @@ class GoogleDriveIE(InfoExtractor):
|
||||||
if len(fmt_stream_split) < 2:
|
if len(fmt_stream_split) < 2:
|
||||||
continue
|
continue
|
||||||
format_id, format_url = fmt_stream_split[:2]
|
format_id, format_url = fmt_stream_split[:2]
|
||||||
|
ext = self._FORMATS_EXT.get(format_id)
|
||||||
|
if not ext:
|
||||||
|
self.report_warning(f'Unknown format {format_id}{bug_reports_message()}')
|
||||||
f = {
|
f = {
|
||||||
'url': lowercase_escape(format_url),
|
'url': lowercase_escape(format_url),
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'ext': self._FORMATS_EXT[format_id],
|
'ext': ext,
|
||||||
}
|
}
|
||||||
resolution = resolutions.get(format_id)
|
resolution = resolutions.get(format_id)
|
||||||
if resolution:
|
if resolution:
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
import base64
|
import base64
|
||||||
import binascii
|
import binascii
|
||||||
import datetime
|
import datetime as dt
|
||||||
import hashlib
|
import hashlib
|
||||||
import hmac
|
import hmac
|
||||||
import json
|
import json
|
||||||
|
@ -422,7 +422,7 @@ class AwsIdp:
|
||||||
months = [None, 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
|
months = [None, 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
|
||||||
days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
|
days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
|
||||||
|
|
||||||
time_now = datetime.datetime.now(datetime.timezone.utc)
|
time_now = dt.datetime.now(dt.timezone.utc)
|
||||||
format_string = "{} {} {} %H:%M:%S UTC %Y".format(days[time_now.weekday()], months[time_now.month], time_now.day)
|
format_string = "{} {} {} %H:%M:%S UTC %Y".format(days[time_now.weekday()], months[time_now.month], time_now.day)
|
||||||
time_string = time_now.strftime(format_string)
|
time_string = time_now.strftime(format_string)
|
||||||
return time_string
|
return time_string
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
from .cloudflarestream import CloudflareStreamIE
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import traverse_obj
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class HytaleIE(InfoExtractor):
|
class HytaleIE(InfoExtractor):
|
||||||
|
@ -49,7 +50,7 @@ class HytaleIE(InfoExtractor):
|
||||||
entries = [
|
entries = [
|
||||||
self.url_result(
|
self.url_result(
|
||||||
f'https://cloudflarestream.com/{video_hash}/manifest/video.mpd?parentOrigin=https%3A%2F%2Fhytale.com',
|
f'https://cloudflarestream.com/{video_hash}/manifest/video.mpd?parentOrigin=https%3A%2F%2Fhytale.com',
|
||||||
title=self._titles.get(video_hash), url_transparent=True)
|
CloudflareStreamIE, title=self._titles.get(video_hash), url_transparent=True)
|
||||||
for video_hash in re.findall(
|
for video_hash in re.findall(
|
||||||
r'<stream\s+class\s*=\s*"ql-video\s+cf-stream"\s+src\s*=\s*"([a-f0-9]{32})"',
|
r'<stream\s+class\s*=\s*"ql-video\s+cf-stream"\s+src\s*=\s*"([a-f0-9]{32})"',
|
||||||
webpage)
|
webpage)
|
||||||
|
|
|
@ -1,103 +0,0 @@
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
InAdvancePagedList,
|
|
||||||
int_or_none,
|
|
||||||
orderedSet,
|
|
||||||
unified_strdate,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class JableIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?jable\.tv/videos/(?P<id>[\w-]+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://jable.tv/videos/pppd-812/',
|
|
||||||
'md5': 'f1537283a9bc073c31ff86ca35d9b2a6',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'pppd-812',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'PPPD-812 只要表現好巨乳女教師吉根柚莉愛就獎勵學生們在白虎穴內射出精液',
|
|
||||||
'description': 'md5:5b6d4199a854f62c5e56e26ccad19967',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'age_limit': 18,
|
|
||||||
'like_count': int,
|
|
||||||
'view_count': int,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://jable.tv/videos/apak-220/',
|
|
||||||
'md5': '71f9239d69ced58ab74a816908847cc1',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'apak-220',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'md5:5c3861b7cf80112a6e2b70bccf170824',
|
|
||||||
'description': '',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'age_limit': 18,
|
|
||||||
'like_count': int,
|
|
||||||
'view_count': int,
|
|
||||||
'upload_date': '20220319',
|
|
||||||
},
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
formats = self._extract_m3u8_formats(
|
|
||||||
self._search_regex(r'var\s+hlsUrl\s*=\s*\'([^\']+)', webpage, 'hls_url'), video_id, 'mp4', m3u8_id='hls')
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': self._og_search_title(webpage),
|
|
||||||
'description': self._og_search_description(webpage, default=''),
|
|
||||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
|
||||||
'formats': formats,
|
|
||||||
'age_limit': 18,
|
|
||||||
'upload_date': unified_strdate(self._search_regex(
|
|
||||||
r'class="inactive-color">\D+\s+(\d{4}-\d+-\d+)', webpage, 'upload_date', default=None)),
|
|
||||||
'view_count': int_or_none(self._search_regex(
|
|
||||||
r'#icon-eye"></use></svg>\n*<span class="mr-3">([\d ]+)',
|
|
||||||
webpage, 'view_count', default='').replace(' ', '')),
|
|
||||||
'like_count': int_or_none(self._search_regex(
|
|
||||||
r'#icon-heart"></use></svg><span class="count">(\d+)', webpage, 'link_count', default=None)),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class JablePlaylistIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?jable\.tv/(?:categories|models|tags)/(?P<id>[\w-]+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://jable.tv/models/kaede-karen/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'kaede-karen',
|
|
||||||
'title': '楓カレン',
|
|
||||||
},
|
|
||||||
'playlist_count': 34,
|
|
||||||
}, {
|
|
||||||
'url': 'https://jable.tv/categories/roleplay/',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'https://jable.tv/tags/girl/',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
playlist_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
|
||||||
|
|
||||||
def page_func(page_num):
|
|
||||||
return [
|
|
||||||
self.url_result(player_url, JableIE)
|
|
||||||
for player_url in orderedSet(re.findall(
|
|
||||||
r'href="(https://jable.tv/videos/[\w-]+/?)"',
|
|
||||||
self._download_webpage(url, playlist_id, query={
|
|
||||||
'mode': 'async',
|
|
||||||
'from': page_num + 1,
|
|
||||||
'function': 'get_block',
|
|
||||||
'block_id': 'list_videos_common_videos_list',
|
|
||||||
}, note=f'Downloading page {page_num + 1}')))]
|
|
||||||
|
|
||||||
return self.playlist_result(
|
|
||||||
InAdvancePagedList(page_func, int_or_none(self._search_regex(
|
|
||||||
r'from:(\d+)">[^<]+\s*»', webpage, 'last page number', default=1)), 24),
|
|
||||||
playlist_id, self._search_regex(
|
|
||||||
r'<h2 class="h3-md mb-1">([^<]+)', webpage, 'playlist title', default=None))
|
|
|
@ -1,89 +1,143 @@
|
||||||
|
import functools
|
||||||
|
import math
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
InAdvancePagedList,
|
||||||
|
clean_html,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
make_archive_id,
|
||||||
|
smuggle_url,
|
||||||
|
unsmuggle_url,
|
||||||
|
url_basename,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
urljoin,
|
|
||||||
)
|
)
|
||||||
from ..utils.traversal import traverse_obj
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class JioSaavnBaseIE(InfoExtractor):
|
class JioSaavnBaseIE(InfoExtractor):
|
||||||
def _extract_initial_data(self, url, audio_id):
|
_API_URL = 'https://www.jiosaavn.com/api.php'
|
||||||
webpage = self._download_webpage(url, audio_id)
|
_VALID_BITRATES = {'16', '32', '64', '128', '320'}
|
||||||
return self._search_json(
|
|
||||||
r'window\.__INITIAL_DATA__\s*=', webpage,
|
|
||||||
'init json', audio_id, transform_source=js_to_json)
|
|
||||||
|
|
||||||
|
@functools.cached_property
|
||||||
class JioSaavnSongIE(JioSaavnBaseIE):
|
def requested_bitrates(self):
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?:jiosaavn\.com/song/[^/?#]+/|saavn\.com/s/song/(?:[^/?#]+/){3})(?P<id>[^/?#]+)'
|
requested_bitrates = self._configuration_arg('bitrate', ['128', '320'], ie_key='JioSaavn')
|
||||||
_TESTS = [{
|
if invalid_bitrates := set(requested_bitrates) - self._VALID_BITRATES:
|
||||||
'url': 'https://www.jiosaavn.com/song/leja-re/OQsEfQFVUXk',
|
|
||||||
'md5': '3b84396d15ed9e083c3106f1fa589c04',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'OQsEfQFVUXk',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Leja Re',
|
|
||||||
'album': 'Leja Re',
|
|
||||||
'thumbnail': 'https://c.saavncdn.com/258/Leja-Re-Hindi-2018-20181124024539-500x500.jpg',
|
|
||||||
'duration': 205,
|
|
||||||
'view_count': int,
|
|
||||||
'release_year': 2018,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://www.saavn.com/s/song/hindi/Saathiya/O-Humdum-Suniyo-Re/KAMiazoCblU',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
_VALID_BITRATES = ('16', '32', '64', '128', '320')
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
audio_id = self._match_id(url)
|
|
||||||
extract_bitrates = self._configuration_arg('bitrate', ['128', '320'], ie_key='JioSaavn')
|
|
||||||
if invalid_bitrates := [br for br in extract_bitrates if br not in self._VALID_BITRATES]:
|
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f'Invalid bitrate(s): {", ".join(invalid_bitrates)}. '
|
f'Invalid bitrate(s): {", ".join(invalid_bitrates)}. '
|
||||||
+ f'Valid bitrates are: {", ".join(self._VALID_BITRATES)}')
|
+ f'Valid bitrates are: {", ".join(sorted(self._VALID_BITRATES, key=int))}')
|
||||||
|
return requested_bitrates
|
||||||
|
|
||||||
song_data = self._extract_initial_data(url, audio_id)['song']['song']
|
def _extract_formats(self, song_data):
|
||||||
formats = []
|
for bitrate in self.requested_bitrates:
|
||||||
for bitrate in extract_bitrates:
|
|
||||||
media_data = self._download_json(
|
media_data = self._download_json(
|
||||||
'https://www.jiosaavn.com/api.php', audio_id, f'Downloading format info for {bitrate}',
|
self._API_URL, song_data['id'],
|
||||||
|
f'Downloading format info for {bitrate}',
|
||||||
fatal=False, data=urlencode_postdata({
|
fatal=False, data=urlencode_postdata({
|
||||||
'__call': 'song.generateAuthToken',
|
'__call': 'song.generateAuthToken',
|
||||||
'_format': 'json',
|
'_format': 'json',
|
||||||
'bitrate': bitrate,
|
'bitrate': bitrate,
|
||||||
'url': song_data['encrypted_media_url'],
|
'url': song_data['encrypted_media_url'],
|
||||||
}))
|
}))
|
||||||
if not media_data.get('auth_url'):
|
if not traverse_obj(media_data, ('auth_url', {url_or_none})):
|
||||||
self.report_warning(f'Unable to extract format info for {bitrate}')
|
self.report_warning(f'Unable to extract format info for {bitrate}')
|
||||||
continue
|
continue
|
||||||
formats.append({
|
ext = media_data.get('type')
|
||||||
|
yield {
|
||||||
'url': media_data['auth_url'],
|
'url': media_data['auth_url'],
|
||||||
'ext': media_data.get('type'),
|
'ext': 'm4a' if ext == 'mp4' else ext,
|
||||||
'format_id': bitrate,
|
'format_id': bitrate,
|
||||||
'abr': int(bitrate),
|
'abr': int(bitrate),
|
||||||
'vcodec': 'none',
|
'vcodec': 'none',
|
||||||
})
|
}
|
||||||
|
|
||||||
return {
|
def _extract_song(self, song_data, url=None):
|
||||||
'id': audio_id,
|
info = traverse_obj(song_data, {
|
||||||
'formats': formats,
|
'id': ('id', {str}),
|
||||||
**traverse_obj(song_data, {
|
'title': ('song', {clean_html}),
|
||||||
'title': ('title', 'text'),
|
'album': ('album', {clean_html}),
|
||||||
'album': ('album', 'text'),
|
'thumbnail': ('image', {url_or_none}, {lambda x: re.sub(r'-\d+x\d+\.', '-500x500.', x)}),
|
||||||
'thumbnail': ('image', 0, {url_or_none}),
|
|
||||||
'duration': ('duration', {int_or_none}),
|
'duration': ('duration', {int_or_none}),
|
||||||
'view_count': ('play_count', {int_or_none}),
|
'view_count': ('play_count', {int_or_none}),
|
||||||
'release_year': ('year', {int_or_none}),
|
'release_year': ('year', {int_or_none}),
|
||||||
}),
|
'artists': ('primary_artists', {lambda x: x.split(', ') if x else None}),
|
||||||
}
|
'webpage_url': ('perma_url', {url_or_none}),
|
||||||
|
})
|
||||||
|
if webpage_url := info.get('webpage_url') or url:
|
||||||
|
info['display_id'] = url_basename(webpage_url)
|
||||||
|
info['_old_archive_ids'] = [make_archive_id(JioSaavnSongIE, info['display_id'])]
|
||||||
|
|
||||||
|
return info
|
||||||
|
|
||||||
|
def _call_api(self, type_, token, note='API', params={}):
|
||||||
|
return self._download_json(
|
||||||
|
self._API_URL, token, f'Downloading {note} JSON', f'Unable to download {note} JSON',
|
||||||
|
query={
|
||||||
|
'__call': 'webapi.get',
|
||||||
|
'_format': 'json',
|
||||||
|
'_marker': '0',
|
||||||
|
'ctx': 'web6dot0',
|
||||||
|
'token': token,
|
||||||
|
'type': type_,
|
||||||
|
**params,
|
||||||
|
})
|
||||||
|
|
||||||
|
def _yield_songs(self, playlist_data):
|
||||||
|
for song_data in traverse_obj(playlist_data, ('songs', lambda _, v: v['id'] and v['perma_url'])):
|
||||||
|
song_info = self._extract_song(song_data)
|
||||||
|
url = smuggle_url(song_info['webpage_url'], {
|
||||||
|
'id': song_data['id'],
|
||||||
|
'encrypted_media_url': song_data['encrypted_media_url'],
|
||||||
|
})
|
||||||
|
yield self.url_result(url, JioSaavnSongIE, url_transparent=True, **song_info)
|
||||||
|
|
||||||
|
|
||||||
|
class JioSaavnSongIE(JioSaavnBaseIE):
|
||||||
|
IE_NAME = 'jiosaavn:song'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?(?:jiosaavn\.com/song/[^/?#]+/|saavn\.com/s/song/(?:[^/?#]+/){3})(?P<id>[^/?#]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.jiosaavn.com/song/leja-re/OQsEfQFVUXk',
|
||||||
|
'md5': '3b84396d15ed9e083c3106f1fa589c04',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'IcoLuefJ',
|
||||||
|
'display_id': 'OQsEfQFVUXk',
|
||||||
|
'ext': 'm4a',
|
||||||
|
'title': 'Leja Re',
|
||||||
|
'album': 'Leja Re',
|
||||||
|
'thumbnail': r're:https?://c.saavncdn.com/258/Leja-Re-Hindi-2018-20181124024539-500x500.jpg',
|
||||||
|
'duration': 205,
|
||||||
|
'view_count': int,
|
||||||
|
'release_year': 2018,
|
||||||
|
'artists': ['Sandesh Shandilya', 'Dhvani Bhanushali', 'Tanishk Bagchi'],
|
||||||
|
'_old_archive_ids': ['jiosaavnsong OQsEfQFVUXk'],
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.saavn.com/s/song/hindi/Saathiya/O-Humdum-Suniyo-Re/KAMiazoCblU',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
url, smuggled_data = unsmuggle_url(url)
|
||||||
|
song_data = traverse_obj(smuggled_data, ({
|
||||||
|
'id': ('id', {str}),
|
||||||
|
'encrypted_media_url': ('encrypted_media_url', {str}),
|
||||||
|
}))
|
||||||
|
|
||||||
|
if 'id' in song_data and 'encrypted_media_url' in song_data:
|
||||||
|
result = {'id': song_data['id']}
|
||||||
|
else:
|
||||||
|
# only extract metadata if this is not a url_transparent result
|
||||||
|
song_data = self._call_api('song', self._match_id(url))['songs'][0]
|
||||||
|
result = self._extract_song(song_data, url)
|
||||||
|
|
||||||
|
result['formats'] = list(self._extract_formats(song_data))
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
class JioSaavnAlbumIE(JioSaavnBaseIE):
|
class JioSaavnAlbumIE(JioSaavnBaseIE):
|
||||||
|
IE_NAME = 'jiosaavn:album'
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?:jio)?saavn\.com/album/[^/?#]+/(?P<id>[^/?#]+)'
|
_VALID_URL = r'https?://(?:www\.)?(?:jio)?saavn\.com/album/[^/?#]+/(?P<id>[^/?#]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.jiosaavn.com/album/96/buIOjYZDrNA_',
|
'url': 'https://www.jiosaavn.com/album/96/buIOjYZDrNA_',
|
||||||
|
@ -95,11 +149,46 @@ class JioSaavnAlbumIE(JioSaavnBaseIE):
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
album_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
album_view = self._extract_initial_data(url, album_id)['albumView']
|
album_data = self._call_api('album', display_id)
|
||||||
|
|
||||||
return self.playlist_from_matches(
|
return self.playlist_result(
|
||||||
traverse_obj(album_view, (
|
self._yield_songs(album_data), display_id, traverse_obj(album_data, ('title', {str})))
|
||||||
'modules', lambda _, x: x['key'] == 'list', 'data', ..., 'title', 'action', {str})),
|
|
||||||
album_id, traverse_obj(album_view, ('album', 'title', 'text', {str})), ie=JioSaavnSongIE,
|
|
||||||
getter=lambda x: urljoin('https://www.jiosaavn.com/', x))
|
class JioSaavnPlaylistIE(JioSaavnBaseIE):
|
||||||
|
IE_NAME = 'jiosaavn:playlist'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?(?:jio)?saavn\.com/s/playlist/(?:[^/?#]+/){2}(?P<id>[^/?#]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.jiosaavn.com/s/playlist/2279fbe391defa793ad7076929a2f5c9/mood-english/LlJ8ZWT1ibN5084vKHRj2Q__',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'LlJ8ZWT1ibN5084vKHRj2Q__',
|
||||||
|
'title': 'Mood English',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 301,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.jiosaavn.com/s/playlist/2279fbe391defa793ad7076929a2f5c9/mood-hindi/DVR,pFUOwyXqIp77B1JF,A__',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'DVR,pFUOwyXqIp77B1JF,A__',
|
||||||
|
'title': 'Mood Hindi',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 801,
|
||||||
|
}]
|
||||||
|
_PAGE_SIZE = 50
|
||||||
|
|
||||||
|
def _fetch_page(self, token, page):
|
||||||
|
return self._call_api(
|
||||||
|
'playlist', token, f'playlist page {page}', {'p': page, 'n': self._PAGE_SIZE})
|
||||||
|
|
||||||
|
def _entries(self, token, first_page_data, page):
|
||||||
|
page_data = first_page_data if not page else self._fetch_page(token, page + 1)
|
||||||
|
yield from self._yield_songs(page_data)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
playlist_data = self._fetch_page(display_id, 1)
|
||||||
|
total_pages = math.ceil(int(playlist_data['list_count']) / self._PAGE_SIZE)
|
||||||
|
|
||||||
|
return self.playlist_result(InAdvancePagedList(
|
||||||
|
functools.partial(self._entries, display_id, playlist_data),
|
||||||
|
total_pages, self._PAGE_SIZE), display_id, traverse_obj(playlist_data, ('listname', {str})))
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
import datetime
|
import datetime as dt
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
@ -50,8 +50,8 @@ class JoqrAgIE(InfoExtractor):
|
||||||
|
|
||||||
def _extract_start_timestamp(self, video_id, is_live):
|
def _extract_start_timestamp(self, video_id, is_live):
|
||||||
def extract_start_time_from(date_str):
|
def extract_start_time_from(date_str):
|
||||||
dt = datetime_from_str(date_str) + datetime.timedelta(hours=9)
|
dt_ = datetime_from_str(date_str) + dt.timedelta(hours=9)
|
||||||
date = dt.strftime('%Y%m%d')
|
date = dt_.strftime('%Y%m%d')
|
||||||
start_time = self._search_regex(
|
start_time = self._search_regex(
|
||||||
r'<h3[^>]+\bclass="dailyProgram-itemHeaderTime"[^>]*>[\s\d:]+–\s*(\d{1,2}:\d{1,2})',
|
r'<h3[^>]+\bclass="dailyProgram-itemHeaderTime"[^>]*>[\s\d:]+–\s*(\d{1,2}:\d{1,2})',
|
||||||
self._download_webpage(
|
self._download_webpage(
|
||||||
|
@ -60,7 +60,7 @@ class JoqrAgIE(InfoExtractor):
|
||||||
errnote=f'Failed to download program list of {date}') or '',
|
errnote=f'Failed to download program list of {date}') or '',
|
||||||
'start time', default=None)
|
'start time', default=None)
|
||||||
if start_time:
|
if start_time:
|
||||||
return unified_timestamp(f'{dt.strftime("%Y/%m/%d")} {start_time} +09:00')
|
return unified_timestamp(f'{dt_.strftime("%Y/%m/%d")} {start_time} +09:00')
|
||||||
return None
|
return None
|
||||||
|
|
||||||
start_timestamp = extract_start_time_from('today')
|
start_timestamp = extract_start_time_from('today')
|
||||||
|
@ -80,14 +80,14 @@ class JoqrAgIE(InfoExtractor):
|
||||||
note='Downloading metadata', errnote='Failed to download metadata')
|
note='Downloading metadata', errnote='Failed to download metadata')
|
||||||
title = self._extract_metadata('Program_name', metadata)
|
title = self._extract_metadata('Program_name', metadata)
|
||||||
|
|
||||||
if title == '放送休止':
|
if not title or title == '放送休止':
|
||||||
formats = []
|
formats = []
|
||||||
live_status = 'is_upcoming'
|
live_status = 'is_upcoming'
|
||||||
release_timestamp = self._extract_start_timestamp(video_id, False)
|
release_timestamp = self._extract_start_timestamp(video_id, False)
|
||||||
msg = 'This stream is not currently live'
|
msg = 'This stream is not currently live'
|
||||||
if release_timestamp:
|
if release_timestamp:
|
||||||
msg += (' and will start at '
|
msg += (' and will start at '
|
||||||
+ datetime.datetime.fromtimestamp(release_timestamp).strftime('%Y-%m-%d %H:%M:%S'))
|
+ dt.datetime.fromtimestamp(release_timestamp).strftime('%Y-%m-%d %H:%M:%S'))
|
||||||
self.raise_no_formats(msg, expected=True)
|
self.raise_no_formats(msg, expected=True)
|
||||||
else:
|
else:
|
||||||
m3u8_path = self._search_regex(
|
m3u8_path = self._search_regex(
|
||||||
|
|
|
@ -13,7 +13,8 @@ from ..utils import (
|
||||||
|
|
||||||
class KickBaseIE(InfoExtractor):
|
class KickBaseIE(InfoExtractor):
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
self._request_webpage(HEADRequest('https://kick.com/'), None, 'Setting up session', fatal=False)
|
self._request_webpage(
|
||||||
|
HEADRequest('https://kick.com/'), None, 'Setting up session', fatal=False, impersonate=True)
|
||||||
xsrf_token = self._get_cookies('https://kick.com/').get('XSRF-TOKEN')
|
xsrf_token = self._get_cookies('https://kick.com/').get('XSRF-TOKEN')
|
||||||
if not xsrf_token:
|
if not xsrf_token:
|
||||||
self.write_debug('kick.com did not set XSRF-TOKEN cookie')
|
self.write_debug('kick.com did not set XSRF-TOKEN cookie')
|
||||||
|
@ -25,7 +26,7 @@ class KickBaseIE(InfoExtractor):
|
||||||
def _call_api(self, path, display_id, note='Downloading API JSON', headers={}, **kwargs):
|
def _call_api(self, path, display_id, note='Downloading API JSON', headers={}, **kwargs):
|
||||||
return self._download_json(
|
return self._download_json(
|
||||||
f'https://kick.com/api/v1/{path}', display_id, note=note,
|
f'https://kick.com/api/v1/{path}', display_id, note=note,
|
||||||
headers=merge_dicts(headers, self._API_HEADERS), **kwargs)
|
headers=merge_dicts(headers, self._API_HEADERS), impersonate=True, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
class KickIE(KickBaseIE):
|
class KickIE(KickBaseIE):
|
||||||
|
@ -82,26 +83,27 @@ class KickIE(KickBaseIE):
|
||||||
class KickVODIE(KickBaseIE):
|
class KickVODIE(KickBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?kick\.com/video/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
|
_VALID_URL = r'https?://(?:www\.)?kick\.com/video/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://kick.com/video/54244b5e-050a-4df4-a013-b2433dafbe35',
|
'url': 'https://kick.com/video/58bac65b-e641-4476-a7ba-3707a35e60e3',
|
||||||
'md5': '73691206a6a49db25c5aa1588e6538fc',
|
'md5': '3870f94153e40e7121a6e46c068b70cb',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '54244b5e-050a-4df4-a013-b2433dafbe35',
|
'id': '58bac65b-e641-4476-a7ba-3707a35e60e3',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Making 710-carBoosting. Kinda No Pixel inspired. !guilded - !links',
|
'title': '🤠REBIRTH IS BACK!!!!🤠!stake CODE JAREDFPS 🤠',
|
||||||
'description': 'md5:a0d3546bf7955d0a8252ffe0fd6f518f',
|
'description': 'md5:02b0c46f9b4197fb545ab09dddb85b1d',
|
||||||
'channel': 'kmack710',
|
'channel': 'jaredfps',
|
||||||
'channel_id': '16278',
|
'channel_id': '26608',
|
||||||
'uploader': 'Kmack710',
|
'uploader': 'JaredFPS',
|
||||||
'uploader_id': '16412',
|
'uploader_id': '26799',
|
||||||
'upload_date': '20221206',
|
'upload_date': '20240402',
|
||||||
'timestamp': 1670318289,
|
'timestamp': 1712097108,
|
||||||
'duration': 40104.0,
|
'duration': 33859.0,
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'categories': ['Grand Theft Auto V'],
|
'categories': ['Call of Duty: Warzone'],
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': 'm3u8',
|
'skip_download': 'm3u8',
|
||||||
},
|
},
|
||||||
|
'expected_warnings': [r'impersonation'],
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
import datetime
|
import datetime as dt
|
||||||
import hashlib
|
import hashlib
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
|
@ -185,7 +185,7 @@ class LeIE(InfoExtractor):
|
||||||
|
|
||||||
publish_time = parse_iso8601(self._html_search_regex(
|
publish_time = parse_iso8601(self._html_search_regex(
|
||||||
r'发布时间 ([^<>]+) ', page, 'publish time', default=None),
|
r'发布时间 ([^<>]+) ', page, 'publish time', default=None),
|
||||||
delimiter=' ', timezone=datetime.timedelta(hours=8))
|
delimiter=' ', timezone=dt.timedelta(hours=8))
|
||||||
description = self._html_search_meta('description', page, fatal=False)
|
description = self._html_search_meta('description', page, fatal=False)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
from itertools import zip_longest
|
import itertools
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
@ -156,7 +156,7 @@ class LinkedInLearningIE(LinkedInLearningBaseIE):
|
||||||
|
|
||||||
def json2srt(self, transcript_lines, duration=None):
|
def json2srt(self, transcript_lines, duration=None):
|
||||||
srt_data = ''
|
srt_data = ''
|
||||||
for line, (line_dict, next_dict) in enumerate(zip_longest(transcript_lines, transcript_lines[1:])):
|
for line, (line_dict, next_dict) in enumerate(itertools.zip_longest(transcript_lines, transcript_lines[1:])):
|
||||||
start_time, caption = line_dict['transcriptStartAt'] / 1000, line_dict['caption']
|
start_time, caption = line_dict['transcriptStartAt'] / 1000, line_dict['caption']
|
||||||
end_time = next_dict['transcriptStartAt'] / 1000 if next_dict else duration or start_time + 1
|
end_time = next_dict['transcriptStartAt'] / 1000 if next_dict else duration or start_time + 1
|
||||||
srt_data += '%d\n%s --> %s\n%s\n\n' % (line + 1, srt_subtitles_timecode(start_time),
|
srt_data += '%d\n%s --> %s\n%s\n\n' % (line + 1, srt_subtitles_timecode(start_time),
|
||||||
|
|
461
yt_dlp/extractor/loom.py
Normal file
461
yt_dlp/extractor/loom.py
Normal file
|
@ -0,0 +1,461 @@
|
||||||
|
import json
|
||||||
|
import textwrap
|
||||||
|
import urllib.parse
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
determine_ext,
|
||||||
|
filter_dict,
|
||||||
|
get_first,
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
update_url,
|
||||||
|
url_or_none,
|
||||||
|
variadic,
|
||||||
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
|
class LoomIE(InfoExtractor):
|
||||||
|
IE_NAME = 'loom'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?loom\.com/(?:share|embed)/(?P<id>[\da-f]{32})'
|
||||||
|
_EMBED_REGEX = [rf'<iframe[^>]+\bsrc=["\'](?P<url>{_VALID_URL})']
|
||||||
|
_TESTS = [{
|
||||||
|
# m3u8 raw-url, mp4 transcoded-url, cdn url == raw-url, json subs only
|
||||||
|
'url': 'https://www.loom.com/share/43d05f362f734614a2e81b4694a3a523',
|
||||||
|
'md5': 'bfc2d7e9c2e0eb4813212230794b6f42',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '43d05f362f734614a2e81b4694a3a523',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'A Ruler for Windows - 28 March 2022',
|
||||||
|
'uploader': 'wILLIAM PIP',
|
||||||
|
'upload_date': '20220328',
|
||||||
|
'timestamp': 1648454238,
|
||||||
|
'duration': 27,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# webm raw-url, mp4 transcoded-url, cdn url == transcoded-url, no subs
|
||||||
|
'url': 'https://www.loom.com/share/c43a642f815f4378b6f80a889bb73d8d',
|
||||||
|
'md5': '70f529317be8cf880fcc2c649a531900',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'c43a642f815f4378b6f80a889bb73d8d',
|
||||||
|
'ext': 'webm',
|
||||||
|
'title': 'Lilah Nielsen Intro Video',
|
||||||
|
'uploader': 'Lilah Nielsen',
|
||||||
|
'upload_date': '20200826',
|
||||||
|
'timestamp': 1598480716,
|
||||||
|
'duration': 20,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# m3u8 raw-url, mp4 transcoded-url, cdn url == raw-url, vtt sub and json subs
|
||||||
|
'url': 'https://www.loom.com/share/9458bcbf79784162aa62ffb8dd66201b',
|
||||||
|
'md5': '51737ec002969dd28344db4d60b9cbbb',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '9458bcbf79784162aa62ffb8dd66201b',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Sharing screen with gpt-4',
|
||||||
|
'description': 'Sharing screen with GPT 4 vision model and asking questions to guide through blender.',
|
||||||
|
'uploader': 'Suneel Matham',
|
||||||
|
'chapters': 'count:3',
|
||||||
|
'upload_date': '20231109',
|
||||||
|
'timestamp': 1699518978,
|
||||||
|
'duration': 93,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# mpd raw-url, mp4 transcoded-url, cdn url == raw-url, no subs
|
||||||
|
'url': 'https://www.loom.com/share/24351eb8b317420289b158e4b7e96ff2',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '24351eb8b317420289b158e4b7e96ff2',
|
||||||
|
'ext': 'webm',
|
||||||
|
'title': 'OMFG clown',
|
||||||
|
'description': 'md5:285c5ee9d62aa087b7e3271b08796815',
|
||||||
|
'uploader': 'MrPumkin B',
|
||||||
|
'upload_date': '20210924',
|
||||||
|
'timestamp': 1632519618,
|
||||||
|
'duration': 210,
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'dash'},
|
||||||
|
}, {
|
||||||
|
# password-protected
|
||||||
|
'url': 'https://www.loom.com/share/50e26e8aeb7940189dff5630f95ce1f4',
|
||||||
|
'md5': '5cc7655e7d55d281d203f8ffd14771f7',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '50e26e8aeb7940189dff5630f95ce1f4',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'iOS Mobile Upload',
|
||||||
|
'uploader': 'Simon Curran',
|
||||||
|
'upload_date': '20200520',
|
||||||
|
'timestamp': 1590000123,
|
||||||
|
'duration': 35,
|
||||||
|
},
|
||||||
|
'params': {'videopassword': 'seniorinfants2'},
|
||||||
|
}, {
|
||||||
|
# embed, transcoded-url endpoint sends empty JSON response
|
||||||
|
'url': 'https://www.loom.com/embed/ddcf1c1ad21f451ea7468b1e33917e4e',
|
||||||
|
'md5': '8488817242a0db1cb2ad0ea522553cf6',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'ddcf1c1ad21f451ea7468b1e33917e4e',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'CF Reset User\'s Password',
|
||||||
|
'uploader': 'Aimee Heintz',
|
||||||
|
'upload_date': '20220707',
|
||||||
|
'timestamp': 1657216459,
|
||||||
|
'duration': 181,
|
||||||
|
},
|
||||||
|
'expected_warnings': ['Failed to parse JSON'],
|
||||||
|
}]
|
||||||
|
_WEBPAGE_TESTS = [{
|
||||||
|
'url': 'https://www.loom.com/community/e1229802a8694a09909e8ba0fbb6d073-pg',
|
||||||
|
'md5': 'ec838cd01b576cf0386f32e1ae424609',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'e1229802a8694a09909e8ba0fbb6d073',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Rexie Jane Cimafranca - Founder\'s Presentation',
|
||||||
|
'uploader': 'Rexie Cimafranca',
|
||||||
|
'upload_date': '20230213',
|
||||||
|
'duration': 247,
|
||||||
|
'timestamp': 1676274030,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
_GRAPHQL_VARIABLES = {
|
||||||
|
'GetVideoSource': {
|
||||||
|
'acceptableMimes': ['DASH', 'M3U8', 'MP4'],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
_GRAPHQL_QUERIES = {
|
||||||
|
'GetVideoSSR': textwrap.dedent('''\
|
||||||
|
query GetVideoSSR($videoId: ID!, $password: String) {
|
||||||
|
getVideo(id: $videoId, password: $password) {
|
||||||
|
__typename
|
||||||
|
... on PrivateVideo {
|
||||||
|
id
|
||||||
|
status
|
||||||
|
message
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
... on VideoPasswordMissingOrIncorrect {
|
||||||
|
id
|
||||||
|
message
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
... on RegularUserVideo {
|
||||||
|
id
|
||||||
|
__typename
|
||||||
|
createdAt
|
||||||
|
description
|
||||||
|
download_enabled
|
||||||
|
folder_id
|
||||||
|
is_protected
|
||||||
|
needs_password
|
||||||
|
owner {
|
||||||
|
display_name
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
privacy
|
||||||
|
s3_id
|
||||||
|
name
|
||||||
|
video_properties {
|
||||||
|
avgBitRate
|
||||||
|
client
|
||||||
|
camera_enabled
|
||||||
|
client_version
|
||||||
|
duration
|
||||||
|
durationMs
|
||||||
|
format
|
||||||
|
height
|
||||||
|
microphone_enabled
|
||||||
|
os
|
||||||
|
os_version
|
||||||
|
recordingClient
|
||||||
|
recording_type
|
||||||
|
recording_version
|
||||||
|
screen_type
|
||||||
|
tab_audio
|
||||||
|
trim_duration
|
||||||
|
width
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
playable_duration
|
||||||
|
source_duration
|
||||||
|
visibility
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}\n'''),
|
||||||
|
'GetVideoSource': textwrap.dedent('''\
|
||||||
|
query GetVideoSource($videoId: ID!, $password: String, $acceptableMimes: [CloudfrontVideoAcceptableMime]) {
|
||||||
|
getVideo(id: $videoId, password: $password) {
|
||||||
|
... on RegularUserVideo {
|
||||||
|
id
|
||||||
|
nullableRawCdnUrl(acceptableMimes: $acceptableMimes, password: $password) {
|
||||||
|
url
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
}\n'''),
|
||||||
|
'FetchVideoTranscript': textwrap.dedent('''\
|
||||||
|
query FetchVideoTranscript($videoId: ID!, $password: String) {
|
||||||
|
fetchVideoTranscript(videoId: $videoId, password: $password) {
|
||||||
|
... on VideoTranscriptDetails {
|
||||||
|
id
|
||||||
|
video_id
|
||||||
|
source_url
|
||||||
|
captions_source_url
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
... on GenericError {
|
||||||
|
message
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
}\n'''),
|
||||||
|
'FetchChapters': textwrap.dedent('''\
|
||||||
|
query FetchChapters($videoId: ID!, $password: String) {
|
||||||
|
fetchVideoChapters(videoId: $videoId, password: $password) {
|
||||||
|
... on VideoChapters {
|
||||||
|
video_id
|
||||||
|
content
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
... on EmptyChaptersPayload {
|
||||||
|
content
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
... on InvalidRequestWarning {
|
||||||
|
message
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
... on Error {
|
||||||
|
message
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
__typename
|
||||||
|
}
|
||||||
|
}\n'''),
|
||||||
|
}
|
||||||
|
_APOLLO_GRAPHQL_VERSION = '0a1856c'
|
||||||
|
|
||||||
|
def _call_graphql_api(self, operations, video_id, note=None, errnote=None):
|
||||||
|
password = self.get_param('videopassword')
|
||||||
|
return self._download_json(
|
||||||
|
'https://www.loom.com/graphql', video_id, note or 'Downloading GraphQL JSON',
|
||||||
|
errnote or 'Failed to download GraphQL JSON', headers={
|
||||||
|
'Accept': 'application/json',
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'x-loom-request-source': f'loom_web_{self._APOLLO_GRAPHQL_VERSION}',
|
||||||
|
'apollographql-client-name': 'web',
|
||||||
|
'apollographql-client-version': self._APOLLO_GRAPHQL_VERSION,
|
||||||
|
}, data=json.dumps([{
|
||||||
|
'operationName': operation_name,
|
||||||
|
'variables': {
|
||||||
|
'videoId': video_id,
|
||||||
|
'password': password,
|
||||||
|
**self._GRAPHQL_VARIABLES.get(operation_name, {}),
|
||||||
|
},
|
||||||
|
'query': self._GRAPHQL_QUERIES[operation_name],
|
||||||
|
} for operation_name in variadic(operations)], separators=(',', ':')).encode())
|
||||||
|
|
||||||
|
def _call_url_api(self, endpoint, video_id):
|
||||||
|
response = self._download_json(
|
||||||
|
f'https://www.loom.com/api/campaigns/sessions/{video_id}/{endpoint}', video_id,
|
||||||
|
f'Downloading {endpoint} JSON', f'Failed to download {endpoint} JSON', fatal=False,
|
||||||
|
headers={'Accept': 'application/json', 'Content-Type': 'application/json'},
|
||||||
|
data=json.dumps({
|
||||||
|
'anonID': str(uuid.uuid4()),
|
||||||
|
'deviceID': None,
|
||||||
|
'force_original': False, # HTTP error 401 if True
|
||||||
|
'password': self.get_param('videopassword'),
|
||||||
|
}, separators=(',', ':')).encode())
|
||||||
|
return traverse_obj(response, ('url', {url_or_none}))
|
||||||
|
|
||||||
|
def _extract_formats(self, video_id, metadata, gql_data):
|
||||||
|
formats = []
|
||||||
|
video_properties = traverse_obj(metadata, ('video_properties', {
|
||||||
|
'width': ('width', {int_or_none}),
|
||||||
|
'height': ('height', {int_or_none}),
|
||||||
|
'acodec': ('microphone_enabled', {lambda x: 'none' if x is False else None}),
|
||||||
|
}))
|
||||||
|
|
||||||
|
def get_formats(format_url, format_id, quality):
|
||||||
|
if not format_url:
|
||||||
|
return
|
||||||
|
ext = determine_ext(format_url)
|
||||||
|
query = urllib.parse.urlparse(format_url).query
|
||||||
|
|
||||||
|
if ext == 'm3u8':
|
||||||
|
# Extract pre-merged HLS formats to avoid buggy parsing of metadata in split playlists
|
||||||
|
format_url = format_url.replace('-split.m3u8', '.m3u8')
|
||||||
|
m3u8_formats = self._extract_m3u8_formats(
|
||||||
|
format_url, video_id, 'mp4', m3u8_id=f'hls-{format_id}', fatal=False, quality=quality)
|
||||||
|
for fmt in m3u8_formats:
|
||||||
|
yield {
|
||||||
|
**fmt,
|
||||||
|
'url': update_url(fmt['url'], query=query),
|
||||||
|
'extra_param_to_segment_url': query,
|
||||||
|
}
|
||||||
|
|
||||||
|
elif ext == 'mpd':
|
||||||
|
dash_formats = self._extract_mpd_formats(
|
||||||
|
format_url, video_id, mpd_id=f'dash-{format_id}', fatal=False)
|
||||||
|
for fmt in dash_formats:
|
||||||
|
yield {
|
||||||
|
**fmt,
|
||||||
|
'extra_param_to_segment_url': query,
|
||||||
|
'quality': quality,
|
||||||
|
}
|
||||||
|
|
||||||
|
else:
|
||||||
|
yield {
|
||||||
|
'url': format_url,
|
||||||
|
'ext': ext,
|
||||||
|
'format_id': f'http-{format_id}',
|
||||||
|
'quality': quality,
|
||||||
|
**video_properties,
|
||||||
|
}
|
||||||
|
|
||||||
|
raw_url = self._call_url_api('raw-url', video_id)
|
||||||
|
formats.extend(get_formats(raw_url, 'raw', quality=1)) # original quality
|
||||||
|
|
||||||
|
transcoded_url = self._call_url_api('transcoded-url', video_id)
|
||||||
|
formats.extend(get_formats(transcoded_url, 'transcoded', quality=-1)) # transcoded quality
|
||||||
|
|
||||||
|
cdn_url = get_first(gql_data, ('data', 'getVideo', 'nullableRawCdnUrl', 'url', {url_or_none}))
|
||||||
|
# cdn_url is usually a dupe, but the raw-url/transcoded-url endpoints could return errors
|
||||||
|
valid_urls = [update_url(url, query=None) for url in (raw_url, transcoded_url) if url]
|
||||||
|
if cdn_url and update_url(cdn_url, query=None) not in valid_urls:
|
||||||
|
formats.extend(get_formats(cdn_url, 'cdn', quality=0)) # could be original or transcoded
|
||||||
|
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
metadata = get_first(
|
||||||
|
self._call_graphql_api('GetVideoSSR', video_id, 'Downloading GraphQL metadata JSON'),
|
||||||
|
('data', 'getVideo', {dict})) or {}
|
||||||
|
|
||||||
|
if metadata.get('__typename') == 'VideoPasswordMissingOrIncorrect':
|
||||||
|
if not self.get_param('videopassword'):
|
||||||
|
raise ExtractorError(
|
||||||
|
'This video is password-protected, use the --video-password option', expected=True)
|
||||||
|
raise ExtractorError('Invalid video password', expected=True)
|
||||||
|
|
||||||
|
gql_data = self._call_graphql_api(['FetchChapters', 'FetchVideoTranscript', 'GetVideoSource'], video_id)
|
||||||
|
duration = traverse_obj(metadata, ('video_properties', 'duration', {int_or_none}))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'duration': duration,
|
||||||
|
'chapters': self._extract_chapters_from_description(
|
||||||
|
get_first(gql_data, ('data', 'fetchVideoChapters', 'content', {str})), duration) or None,
|
||||||
|
'formats': self._extract_formats(video_id, metadata, gql_data),
|
||||||
|
'subtitles': filter_dict({
|
||||||
|
'en': traverse_obj(gql_data, (
|
||||||
|
..., 'data', 'fetchVideoTranscript',
|
||||||
|
('source_url', 'captions_source_url'), {
|
||||||
|
'url': {url_or_none},
|
||||||
|
})) or None,
|
||||||
|
}),
|
||||||
|
**traverse_obj(metadata, {
|
||||||
|
'title': ('name', {str}),
|
||||||
|
'description': ('description', {str}),
|
||||||
|
'uploader': ('owner', 'display_name', {str}),
|
||||||
|
'timestamp': ('createdAt', {parse_iso8601}),
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class LoomFolderIE(InfoExtractor):
|
||||||
|
IE_NAME = 'loom:folder'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?loom\.com/share/folder/(?P<id>[\da-f]{32})'
|
||||||
|
_TESTS = [{
|
||||||
|
# 2 subfolders, no videos in root
|
||||||
|
'url': 'https://www.loom.com/share/folder/997db4db046f43e5912f10dc5f817b5c',
|
||||||
|
'playlist_mincount': 16,
|
||||||
|
'info_dict': {
|
||||||
|
'id': '997db4db046f43e5912f10dc5f817b5c',
|
||||||
|
'title': 'Blending Lessons',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# only videos, no subfolders
|
||||||
|
'url': 'https://www.loom.com/share/folder/9a8a87f6b6f546d9a400c8e7575ff7f2',
|
||||||
|
'playlist_mincount': 12,
|
||||||
|
'info_dict': {
|
||||||
|
'id': '9a8a87f6b6f546d9a400c8e7575ff7f2',
|
||||||
|
'title': 'List A- a, i, o',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# videos in root and empty subfolder
|
||||||
|
'url': 'https://www.loom.com/share/folder/886e534218c24fd292e97e9563078cc4',
|
||||||
|
'playlist_mincount': 21,
|
||||||
|
'info_dict': {
|
||||||
|
'id': '886e534218c24fd292e97e9563078cc4',
|
||||||
|
'title': 'Medicare Agent Training videos',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# videos in root and videos in subfolders
|
||||||
|
'url': 'https://www.loom.com/share/folder/b72c4ecdf04745da9403926d80a40c38',
|
||||||
|
'playlist_mincount': 21,
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b72c4ecdf04745da9403926d80a40c38',
|
||||||
|
'title': 'Quick Altos Q & A Tutorials',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# recursive folder extraction
|
||||||
|
'url': 'https://www.loom.com/share/folder/8b458a94e0e4449b8df9ea7a68fafc4e',
|
||||||
|
'playlist_count': 23,
|
||||||
|
'info_dict': {
|
||||||
|
'id': '8b458a94e0e4449b8df9ea7a68fafc4e',
|
||||||
|
'title': 'Sezer Texting Guide',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# more than 50 videos in 1 folder
|
||||||
|
'url': 'https://www.loom.com/share/folder/e056a91d290d47ca9b00c9d1df56c463',
|
||||||
|
'playlist_mincount': 61,
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'e056a91d290d47ca9b00c9d1df56c463',
|
||||||
|
'title': 'User Videos',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# many subfolders
|
||||||
|
'url': 'https://www.loom.com/share/folder/c2dde8cc67454f0e99031677279d8954',
|
||||||
|
'playlist_mincount': 75,
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'c2dde8cc67454f0e99031677279d8954',
|
||||||
|
'title': 'Honors 1',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.loom.com/share/folder/bae17109a68146c7803454f2893c8cf8/Edpuzzle',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _extract_folder_data(self, folder_id):
|
||||||
|
return self._download_json(
|
||||||
|
f'https://www.loom.com/v1/folders/{folder_id}', folder_id,
|
||||||
|
'Downloading folder info JSON', query={'limit': '10000'})
|
||||||
|
|
||||||
|
def _extract_folder_entries(self, folder_id, initial_folder_data=None):
|
||||||
|
folder_data = initial_folder_data or self._extract_folder_data(folder_id)
|
||||||
|
|
||||||
|
for video in traverse_obj(folder_data, ('videos', lambda _, v: v['id'])):
|
||||||
|
video_id = video['id']
|
||||||
|
yield self.url_result(
|
||||||
|
f'https://www.loom.com/share/{video_id}', LoomIE, video_id, video.get('name'))
|
||||||
|
|
||||||
|
# Recurse into subfolders
|
||||||
|
for subfolder_id in traverse_obj(folder_data, (
|
||||||
|
'folders', lambda _, v: v['id'] != folder_id, 'id', {str})):
|
||||||
|
yield from self._extract_folder_entries(subfolder_id)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
playlist_data = self._extract_folder_data(playlist_id)
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
self._extract_folder_entries(playlist_id, playlist_data), playlist_id,
|
||||||
|
traverse_obj(playlist_data, ('folder', 'name', {str.strip})))
|
|
@ -1,4 +1,3 @@
|
||||||
from __future__ import unicode_literals
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
|
|
|
@ -1,67 +1,153 @@
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
unified_strdate,
|
filter_dict,
|
||||||
update_url_query,
|
parse_iso8601,
|
||||||
urlencode_postdata,
|
traverse_obj,
|
||||||
|
try_call,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class MediciIE(InfoExtractor):
|
class MediciIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?medici\.tv/#!/(?P<id>[^?#&]+)'
|
_VALID_URL = r'https?://(?:(?P<sub>www|edu)\.)?medici\.tv/[a-z]{2}/[\w.-]+/(?P<id>[^/?#&]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.medici.tv/#!/daniel-harding-frans-helmerson-verbier-festival-music-camp',
|
'url': 'https://www.medici.tv/en/operas/thomas-ades-the-exterminating-angel-calixto-bieito-opera-bastille-paris',
|
||||||
'md5': '004c21bb0a57248085b6ff3fec72719d',
|
'md5': 'd483f74e7a7a9eac0dbe152ab189050d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3059',
|
'id': '8032',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Daniel Harding conducts the Verbier Festival Music Camp \u2013 With Frans Helmerson',
|
'title': 'Thomas Adès\'s The Exterminating Angel',
|
||||||
'description': 'md5:322a1e952bafb725174fd8c1a8212f58',
|
'description': 'md5:708ae6350dadc604225b4a6e32482bab',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:https://.+/.+\.jpg',
|
||||||
'upload_date': '20170408',
|
'upload_date': '20240304',
|
||||||
|
'timestamp': 1709561766,
|
||||||
|
'display_id': 'thomas-ades-the-exterminating-angel-calixto-bieito-opera-bastille-paris',
|
||||||
},
|
},
|
||||||
}
|
'expected_warnings': [r'preview'],
|
||||||
|
}, {
|
||||||
|
'url': 'https://edu.medici.tv/en/operas/wagner-lohengrin-paris-opera-kirill-serebrennikov-piotr-beczala-kwangchul-youn-johanni-van-oostrum',
|
||||||
|
'md5': '4ef3f4079a6e1c617584463a9eb84f99',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '7900',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Wagner\'s Lohengrin',
|
||||||
|
'description': 'md5:a384a62937866101f86902f21752cd89',
|
||||||
|
'thumbnail': r're:https://.+/.+\.jpg',
|
||||||
|
'upload_date': '20231017',
|
||||||
|
'timestamp': 1697554771,
|
||||||
|
'display_id': 'wagner-lohengrin-paris-opera-kirill-serebrennikov-piotr-beczala-kwangchul-youn-johanni-van-oostrum',
|
||||||
|
},
|
||||||
|
'expected_warnings': [r'preview'],
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.medici.tv/en/concerts/sergey-smbatyan-conducts-mansurian-chouchane-siranossian-mario-brunello',
|
||||||
|
'md5': '9dd757e53b22b2511e85ea9ea60e4815',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5712',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Sergey Smbatyan conducts Tigran Mansurian — With Chouchane Siranossian and Mario Brunello',
|
||||||
|
'thumbnail': r're:https://.+/.+\.jpg',
|
||||||
|
'description': 'md5:9411fe44c874bb10e9af288c65816e41',
|
||||||
|
'upload_date': '20200323',
|
||||||
|
'timestamp': 1584975600,
|
||||||
|
'display_id': 'sergey-smbatyan-conducts-mansurian-chouchane-siranossian-mario-brunello',
|
||||||
|
},
|
||||||
|
'expected_warnings': [r'preview'],
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.medici.tv/en/ballets/carmen-ballet-choregraphie-de-jiri-bubenicek-teatro-dellopera-di-roma',
|
||||||
|
'md5': '40f5e76cb701a97a6d7ba23b62c49990',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '7857',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Carmen by Jiří Bubeníček after Roland Petit, music by Bizet, de Falla, Castelnuovo-Tedesco, and Bonolis',
|
||||||
|
'thumbnail': r're:https://.+/.+\.jpg',
|
||||||
|
'description': 'md5:0f15a15611ed748020c769873e10a8bb',
|
||||||
|
'upload_date': '20240223',
|
||||||
|
'timestamp': 1708707600,
|
||||||
|
'display_id': 'carmen-ballet-choregraphie-de-jiri-bubenicek-teatro-dellopera-di-roma',
|
||||||
|
},
|
||||||
|
'expected_warnings': [r'preview'],
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.medici.tv/en/documentaries/la-sonnambula-liege-2023-documentaire',
|
||||||
|
'md5': '87ff198018ce79a34757ab0dd6f21080',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '7513',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'La Sonnambula',
|
||||||
|
'thumbnail': r're:https://.+/.+\.jpg',
|
||||||
|
'description': 'md5:0caf9109a860fd50cd018df062a67f34',
|
||||||
|
'upload_date': '20231103',
|
||||||
|
'timestamp': 1699010830,
|
||||||
|
'display_id': 'la-sonnambula-liege-2023-documentaire',
|
||||||
|
},
|
||||||
|
'expected_warnings': [r'preview'],
|
||||||
|
}, {
|
||||||
|
'url': 'https://edu.medici.tv/en/masterclasses/yvonne-loriod-olivier-messiaen',
|
||||||
|
'md5': 'fb5dcec46d76ad20fbdbaabb01da191d',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3024',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Olivier Messiaen and Yvonne Loriod, pianists and teachers',
|
||||||
|
'thumbnail': r're:https://.+/.+\.jpg',
|
||||||
|
'description': 'md5:aab948e2f7690214b5c28896c83f1fc1',
|
||||||
|
'upload_date': '20150223',
|
||||||
|
'timestamp': 1424706608,
|
||||||
|
'display_id': 'yvonne-loriod-olivier-messiaen',
|
||||||
|
},
|
||||||
|
'skip': 'Requires authentication; preview starts in the middle',
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.medici.tv/en/jazz/makaya-mccraven-la-rochelle',
|
||||||
|
'md5': '4cc279a8b06609782747c8f50beea2b3',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '7922',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'NEW: Makaya McCraven in La Rochelle',
|
||||||
|
'thumbnail': r're:https://.+/.+\.jpg',
|
||||||
|
'description': 'md5:b5a8aaeb6993d8ccb18bde8abb8aa8d2',
|
||||||
|
'upload_date': '20231228',
|
||||||
|
'timestamp': 1703754863,
|
||||||
|
'display_id': 'makaya-mccraven-la-rochelle',
|
||||||
|
},
|
||||||
|
'expected_warnings': [r'preview'],
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
display_id, subdomain = self._match_valid_url(url).group('id', 'sub')
|
||||||
|
self._request_webpage(url, display_id, 'Requesting CSRF token cookie')
|
||||||
|
|
||||||
# Sets csrftoken cookie
|
subdomain = 'edu-' if subdomain == 'edu' else ''
|
||||||
self._download_webpage(url, video_id)
|
origin = f'https://{urllib.parse.urlparse(url).hostname}'
|
||||||
|
|
||||||
MEDICI_URL = 'http://www.medici.tv/'
|
|
||||||
|
|
||||||
data = self._download_json(
|
data = self._download_json(
|
||||||
MEDICI_URL, video_id,
|
f'https://api.medici.tv/{subdomain}satie/edito/movie-file/{display_id}/', display_id,
|
||||||
data=urlencode_postdata({
|
headers=filter_dict({
|
||||||
'json': 'true',
|
'Authorization': try_call(
|
||||||
'page': '/%s' % video_id,
|
lambda: urllib.parse.unquote(self._get_cookies(url)['auth._token.mAuth'].value)),
|
||||||
'timezone_offset': -420,
|
'Device-Type': 'web',
|
||||||
}), headers={
|
'Origin': origin,
|
||||||
'X-CSRFToken': self._get_cookies(url)['csrftoken'].value,
|
'Referer': f'{origin}/',
|
||||||
'X-Requested-With': 'XMLHttpRequest',
|
'Accept': 'application/json, text/plain, */*',
|
||||||
'Referer': MEDICI_URL,
|
}))
|
||||||
'Content-Type': 'application/x-www-form-urlencoded',
|
|
||||||
})
|
|
||||||
|
|
||||||
video = data['video']['videos']['video1']
|
if not traverse_obj(data, ('video', 'is_full_video')) and traverse_obj(
|
||||||
|
data, ('video', 'is_limited_by_user_access')):
|
||||||
|
self.report_warning(
|
||||||
|
'The full video is for subscribers only. Only previews will be downloaded. If you '
|
||||||
|
'have used the --cookies-from-browser option, try using the --cookies option instead')
|
||||||
|
|
||||||
title = video.get('nom') or data['title']
|
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||||
|
data['video']['video_url'], display_id, 'mp4')
|
||||||
video_id = video.get('id') or video_id
|
|
||||||
formats = self._extract_f4m_formats(
|
|
||||||
update_url_query(video['url_akamai'], {
|
|
||||||
'hdcore': '3.1.0',
|
|
||||||
'plugin=aasp': '3.1.0.43.124',
|
|
||||||
}), video_id, f4m_id='hds')
|
|
||||||
|
|
||||||
description = data.get('meta_description')
|
|
||||||
thumbnail = video.get('url_thumbnail') or data.get('main_image')
|
|
||||||
upload_date = unified_strdate(data['video'].get('date'))
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': str(data['id']),
|
||||||
'title': title,
|
'display_id': display_id,
|
||||||
'description': description,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'upload_date': upload_date,
|
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
**traverse_obj(data, {
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'description': ('subtitle', {str}),
|
||||||
|
'thumbnail': ('picture', {url_or_none}),
|
||||||
|
'timestamp': ('date_publish', {parse_iso8601}),
|
||||||
|
}),
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
from base64 import b64decode
|
import base64
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
@ -81,7 +81,7 @@ class MicrosoftStreamIE(InfoExtractor):
|
||||||
'url': thumbnail_url,
|
'url': thumbnail_url,
|
||||||
}
|
}
|
||||||
thumb_name = url_basename(thumbnail_url)
|
thumb_name = url_basename(thumbnail_url)
|
||||||
thumb_name = str(b64decode(thumb_name + '=' * (-len(thumb_name) % 4)))
|
thumb_name = str(base64.b64decode(thumb_name + '=' * (-len(thumb_name) % 4)))
|
||||||
thumb.update(parse_resolution(thumb_name))
|
thumb.update(parse_resolution(thumb_name))
|
||||||
thumbnails.append(thumb)
|
thumbnails.append(thumb)
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,13 @@
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import UserNotLive, traverse_obj
|
from ..networking.exceptions import HTTPError
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
UserNotLive,
|
||||||
|
int_or_none,
|
||||||
|
str_or_none,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class MixchIE(InfoExtractor):
|
class MixchIE(InfoExtractor):
|
||||||
|
@ -7,17 +15,20 @@ class MixchIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?mixch\.tv/u/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?mixch\.tv/u/(?P<id>\d+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://mixch.tv/u/16236849/live',
|
'url': 'https://mixch.tv/u/16943797/live',
|
||||||
'skip': 'don\'t know if this live persists',
|
'skip': 'don\'t know if this live persists',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '16236849',
|
'id': '16943797',
|
||||||
'title': '24配信シェア⭕️投票🙏💦',
|
'ext': 'mp4',
|
||||||
'comment_count': 13145,
|
'title': '#EntView #カリナ #セブチ 2024-05-05 06:58',
|
||||||
'view_count': 28348,
|
'comment_count': int,
|
||||||
'timestamp': 1636189377,
|
'view_count': int,
|
||||||
'uploader': '🦥伊咲👶🏻#フレアワ',
|
'timestamp': 1714726805,
|
||||||
'uploader_id': '16236849',
|
'uploader': 'Ent.View K-news🎶💕',
|
||||||
}
|
'uploader_id': '16943797',
|
||||||
|
'live_status': 'is_live',
|
||||||
|
'upload_date': '20240503',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://mixch.tv/u/16137876/live',
|
'url': 'https://mixch.tv/u/16137876/live',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -25,31 +36,41 @@ class MixchIE(InfoExtractor):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(f'https://mixch.tv/u/{video_id}/live', video_id)
|
data = self._download_json(f'https://mixch.tv/api-web/users/{video_id}/live', video_id)
|
||||||
|
if not traverse_obj(data, ('liveInfo', {dict})):
|
||||||
initial_js_state = self._parse_json(self._search_regex(
|
|
||||||
r'(?m)^\s*window\.__INITIAL_JS_STATE__\s*=\s*(\{.+?\});\s*$', webpage, 'initial JS state'), video_id)
|
|
||||||
if not initial_js_state.get('liveInfo'):
|
|
||||||
raise UserNotLive(video_id=video_id)
|
raise UserNotLive(video_id=video_id)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': traverse_obj(initial_js_state, ('liveInfo', 'title')),
|
|
||||||
'comment_count': traverse_obj(initial_js_state, ('liveInfo', 'comments')),
|
|
||||||
'view_count': traverse_obj(initial_js_state, ('liveInfo', 'visitor')),
|
|
||||||
'timestamp': traverse_obj(initial_js_state, ('liveInfo', 'created')),
|
|
||||||
'uploader': traverse_obj(initial_js_state, ('broadcasterInfo', 'name')),
|
|
||||||
'uploader_id': video_id,
|
'uploader_id': video_id,
|
||||||
|
**traverse_obj(data, {
|
||||||
|
'title': ('liveInfo', 'title', {str}),
|
||||||
|
'comment_count': ('liveInfo', 'comments', {int_or_none}),
|
||||||
|
'view_count': ('liveInfo', 'visitor', {int_or_none}),
|
||||||
|
'timestamp': ('liveInfo', 'created', {int_or_none}),
|
||||||
|
'uploader': ('broadcasterInfo', 'name', {str}),
|
||||||
|
}),
|
||||||
'formats': [{
|
'formats': [{
|
||||||
'format_id': 'hls',
|
'format_id': 'hls',
|
||||||
'url': (traverse_obj(initial_js_state, ('liveInfo', 'hls'))
|
'url': data['liveInfo']['hls'],
|
||||||
or f'https://d1hd0ww6piyb43.cloudfront.net/hls/torte_{video_id}.m3u8'),
|
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'protocol': 'm3u8',
|
'protocol': 'm3u8',
|
||||||
}],
|
}],
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
|
'__post_extractor': self.extract_comments(video_id),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _get_comments(self, video_id):
|
||||||
|
yield from traverse_obj(self._download_json(
|
||||||
|
f'https://mixch.tv/api-web/lives/{video_id}/messages', video_id,
|
||||||
|
note='Downloading comments', errnote='Failed to download comments'), (..., {
|
||||||
|
'author': ('name', {str}),
|
||||||
|
'author_id': ('user_id', {str_or_none}),
|
||||||
|
'id': ('message_id', {str}, {lambda x: x or None}),
|
||||||
|
'text': ('body', {str}),
|
||||||
|
'timestamp': ('created', {int}),
|
||||||
|
}))
|
||||||
|
|
||||||
|
|
||||||
class MixchArchiveIE(InfoExtractor):
|
class MixchArchiveIE(InfoExtractor):
|
||||||
IE_NAME = 'mixch:archive'
|
IE_NAME = 'mixch:archive'
|
||||||
|
@ -60,22 +81,38 @@ class MixchArchiveIE(InfoExtractor):
|
||||||
'skip': 'paid video, no DRM. expires at Jan 23',
|
'skip': 'paid video, no DRM. expires at Jan 23',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '421',
|
'id': '421',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': '96NEKO SHOW TIME',
|
'title': '96NEKO SHOW TIME',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://mixch.tv/archive/1213',
|
||||||
|
'skip': 'paid video, no DRM. expires at Dec 31, 2023',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1213',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '【特別トーク番組アーカイブス】Merm4id×燐舞曲 2nd LIVE「VERSUS」',
|
||||||
|
'release_date': '20231201',
|
||||||
|
'thumbnail': str,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://mixch.tv/archive/1214',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
html5_videos = self._parse_html5_media_entries(
|
try:
|
||||||
url, webpage.replace('video-js', 'video'), video_id, 'hls')
|
info_json = self._download_json(
|
||||||
if not html5_videos:
|
f'https://mixch.tv/api-web/archive/{video_id}', video_id)['archive']
|
||||||
self.raise_login_required(method='cookies')
|
except ExtractorError as e:
|
||||||
infodict = html5_videos[0]
|
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||||
infodict.update({
|
self.raise_login_required()
|
||||||
|
raise
|
||||||
|
|
||||||
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': self._html_search_regex(r'class="archive-title">(.+?)</', webpage, 'title')
|
'title': traverse_obj(info_json, ('title', {str})),
|
||||||
})
|
'formats': self._extract_m3u8_formats(info_json['archiveURL'], video_id),
|
||||||
|
'thumbnail': traverse_obj(info_json, ('thumbnailURL', {url_or_none})),
|
||||||
return infodict
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
import datetime
|
import datetime as dt
|
||||||
import re
|
import re
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
|
@ -151,7 +151,7 @@ class MotherlessIE(InfoExtractor):
|
||||||
'd': 'days',
|
'd': 'days',
|
||||||
}
|
}
|
||||||
kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta}
|
kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta}
|
||||||
upload_date = (datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(**kwargs)).strftime('%Y%m%d')
|
upload_date = (dt.datetime.now(dt.timezone.utc) - dt.timedelta(**kwargs)).strftime('%Y%m%d')
|
||||||
|
|
||||||
comment_count = len(re.findall(r'''class\s*=\s*['"]media-comment-contents\b''', webpage))
|
comment_count = len(re.findall(r'''class\s*=\s*['"]media-comment-contents\b''', webpage))
|
||||||
uploader_id = self._html_search_regex(
|
uploader_id = self._html_search_regex(
|
||||||
|
|
|
@ -4,8 +4,8 @@ import hmac
|
||||||
import itertools
|
import itertools
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
import urllib.parse
|
||||||
import time
|
import time
|
||||||
from urllib.parse import parse_qs, urlparse
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
@ -388,7 +388,7 @@ class NaverNowIE(NaverBaseIE):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
show_id = self._match_id(url)
|
show_id = self._match_id(url)
|
||||||
qs = parse_qs(urlparse(url).query)
|
qs = urllib.parse.parse_qs(urllib.parse.urlparse(url).query)
|
||||||
|
|
||||||
if not self._yes_playlist(show_id, qs.get('shareHightlight')):
|
if not self._yes_playlist(show_id, qs.get('shareHightlight')):
|
||||||
return self._extract_highlight(show_id, qs['shareHightlight'][0])
|
return self._extract_highlight(show_id, qs['shareHightlight'][0])
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
|
import hashlib
|
||||||
import itertools
|
import itertools
|
||||||
import json
|
import json
|
||||||
|
import random
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
from hashlib import md5
|
|
||||||
from random import randint
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..aes import aes_ecb_encrypt, pkcs7_padding
|
from ..aes import aes_ecb_encrypt, pkcs7_padding
|
||||||
|
@ -34,7 +34,7 @@ class NetEaseMusicBaseIE(InfoExtractor):
|
||||||
request_text = json.dumps({**query_body, 'header': cookies}, separators=(',', ':'))
|
request_text = json.dumps({**query_body, 'header': cookies}, separators=(',', ':'))
|
||||||
|
|
||||||
message = f'nobody{api_path}use{request_text}md5forencrypt'.encode('latin1')
|
message = f'nobody{api_path}use{request_text}md5forencrypt'.encode('latin1')
|
||||||
msg_digest = md5(message).hexdigest()
|
msg_digest = hashlib.md5(message).hexdigest()
|
||||||
|
|
||||||
data = pkcs7_padding(list(str.encode(
|
data = pkcs7_padding(list(str.encode(
|
||||||
f'{api_path}-36cd479b6b5-{request_text}-36cd479b6b5-{msg_digest}')))
|
f'{api_path}-36cd479b6b5-{request_text}-36cd479b6b5-{msg_digest}')))
|
||||||
|
@ -53,7 +53,7 @@ class NetEaseMusicBaseIE(InfoExtractor):
|
||||||
'__csrf': '',
|
'__csrf': '',
|
||||||
'os': 'pc',
|
'os': 'pc',
|
||||||
'channel': 'undefined',
|
'channel': 'undefined',
|
||||||
'requestId': f'{int(time.time() * 1000)}_{randint(0, 1000):04}',
|
'requestId': f'{int(time.time() * 1000)}_{random.randint(0, 1000):04}',
|
||||||
**traverse_obj(self._get_cookies(self._API_BASE), {
|
**traverse_obj(self._get_cookies(self._API_BASE), {
|
||||||
'MUSIC_U': ('MUSIC_U', {lambda i: i.value}),
|
'MUSIC_U': ('MUSIC_U', {lambda i: i.value}),
|
||||||
})
|
})
|
||||||
|
@ -561,7 +561,8 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
|
||||||
'timestamp': ('createTime', {self.kilo_or_none}),
|
'timestamp': ('createTime', {self.kilo_or_none}),
|
||||||
})
|
})
|
||||||
|
|
||||||
if not self._yes_playlist(info['songs'] and program_id, info['mainSong']['id']):
|
if not self._yes_playlist(
|
||||||
|
info['songs'] and program_id, info['mainSong']['id'], playlist_label='program', video_label='song'):
|
||||||
formats = self.extract_formats(info['mainSong'])
|
formats = self.extract_formats(info['mainSong'])
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
|
|
@ -5,7 +5,6 @@ from ..utils import (
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
parse_count,
|
parse_count,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urljoin,
|
|
||||||
)
|
)
|
||||||
from ..utils.traversal import traverse_obj
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
@ -16,8 +15,7 @@ class NFBBaseIE(InfoExtractor):
|
||||||
|
|
||||||
def _extract_ep_data(self, webpage, video_id, fatal=False):
|
def _extract_ep_data(self, webpage, video_id, fatal=False):
|
||||||
return self._search_json(
|
return self._search_json(
|
||||||
r'const\s+episodesData\s*=', webpage, 'episode data', video_id,
|
r'episodesData\s*:', webpage, 'episode data', video_id, fatal=fatal) or {}
|
||||||
contains_pattern=r'\[\s*{(?s:.+)}\s*\]', fatal=fatal) or []
|
|
||||||
|
|
||||||
def _extract_ep_info(self, data, video_id, slug=None):
|
def _extract_ep_info(self, data, video_id, slug=None):
|
||||||
info = traverse_obj(data, (lambda _, v: video_id in v['embed_url'], {
|
info = traverse_obj(data, (lambda _, v: video_id in v['embed_url'], {
|
||||||
|
@ -224,18 +222,14 @@ class NFBIE(NFBBaseIE):
|
||||||
# type_ can change from film to serie(s) after redirect; new slug may have episode number
|
# type_ can change from film to serie(s) after redirect; new slug may have episode number
|
||||||
type_, slug = self._match_valid_url(urlh.url).group('type', 'id')
|
type_, slug = self._match_valid_url(urlh.url).group('type', 'id')
|
||||||
|
|
||||||
embed_url = urljoin(f'https://www.{site}.ca', self._html_search_regex(
|
player_data = self._search_json(
|
||||||
r'<[^>]+\bid=["\']player-iframe["\'][^>]*\bsrc=["\']([^"\']+)', webpage, 'embed url'))
|
r'window\.PLAYER_OPTIONS\[[^\]]+\]\s*=', webpage, 'player data', slug)
|
||||||
video_id = self._match_id(embed_url) # embed url has unique slug
|
video_id = self._match_id(player_data['overlay']['url']) # overlay url always has unique slug
|
||||||
player = self._download_webpage(embed_url, video_id, 'Downloading player page')
|
|
||||||
if 'MESSAGE_GEOBLOCKED' in player:
|
|
||||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
|
||||||
|
|
||||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||||
self._html_search_regex(r'source:\s*\'([^\']+)', player, 'm3u8 url'),
|
player_data['source'], video_id, 'mp4', m3u8_id='hls')
|
||||||
video_id, 'mp4', m3u8_id='hls')
|
|
||||||
|
|
||||||
if dv_source := self._html_search_regex(r'dvSource:\s*\'([^\']+)', player, 'dv', default=None):
|
if dv_source := url_or_none(player_data.get('dvSource')):
|
||||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||||
dv_source, video_id, 'mp4', m3u8_id='dv', preference=-2, fatal=False)
|
dv_source, video_id, 'mp4', m3u8_id='dv', preference=-2, fatal=False)
|
||||||
for fmt in fmts:
|
for fmt in fmts:
|
||||||
|
@ -246,17 +240,16 @@ class NFBIE(NFBBaseIE):
|
||||||
info = {
|
info = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': self._html_search_regex(
|
'title': self._html_search_regex(
|
||||||
r'<[^>]+\bid=["\']titleHeader["\'][^>]*>\s*<h1[^>]*>\s*([^<]+?)\s*</h1>',
|
r'["\']nfb_version_title["\']\s*:\s*["\']([^"\']+)',
|
||||||
webpage, 'title', default=None),
|
webpage, 'title', default=None),
|
||||||
'description': self._html_search_regex(
|
'description': self._html_search_regex(
|
||||||
r'<[^>]+\bid=["\']tabSynopsis["\'][^>]*>\s*<p[^>]*>\s*([^<]+)',
|
r'<[^>]+\bid=["\']tabSynopsis["\'][^>]*>\s*<p[^>]*>\s*([^<]+)',
|
||||||
webpage, 'description', default=None),
|
webpage, 'description', default=None),
|
||||||
'thumbnail': self._html_search_regex(
|
'thumbnail': url_or_none(player_data.get('poster')),
|
||||||
r'poster:\s*\'([^\']+)', player, 'thumbnail', default=None),
|
|
||||||
'uploader': self._html_search_regex(
|
'uploader': self._html_search_regex(
|
||||||
r'<[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)', webpage, 'uploader', default=None),
|
r'<[^>]+\bitemprop=["\']director["\'][^>]*>([^<]+)', webpage, 'uploader', default=None),
|
||||||
'release_year': int_or_none(self._html_search_regex(
|
'release_year': int_or_none(self._html_search_regex(
|
||||||
r'<[^>]+\bitemprop=["\']datePublished["\'][^>]*>([^<]+)',
|
r'["\']nfb_version_year["\']\s*:\s*["\']([^"\']+)',
|
||||||
webpage, 'release_year', default=None)),
|
webpage, 'release_year', default=None)),
|
||||||
} if type_ == 'film' else self._extract_ep_info(self._extract_ep_data(webpage, video_id, slug), video_id)
|
} if type_ == 'film' else self._extract_ep_info(self._extract_ep_data(webpage, video_id, slug), video_id)
|
||||||
|
|
||||||
|
|
|
@ -8,6 +8,7 @@ from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
join_nonempty,
|
join_nonempty,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
|
remove_end,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
try_call,
|
try_call,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
@ -19,8 +20,7 @@ from ..utils import (
|
||||||
|
|
||||||
class NhkBaseIE(InfoExtractor):
|
class NhkBaseIE(InfoExtractor):
|
||||||
_API_URL_TEMPLATE = 'https://nwapi.nhk.jp/nhkworld/%sod%slist/v7b/%s/%s/%s/all%s.json'
|
_API_URL_TEMPLATE = 'https://nwapi.nhk.jp/nhkworld/%sod%slist/v7b/%s/%s/%s/all%s.json'
|
||||||
_BASE_URL_REGEX = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/ondemand'
|
_BASE_URL_REGEX = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/'
|
||||||
_TYPE_REGEX = r'/(?P<type>video|audio)/'
|
|
||||||
|
|
||||||
def _call_api(self, m_id, lang, is_video, is_episode, is_clip):
|
def _call_api(self, m_id, lang, is_video, is_episode, is_clip):
|
||||||
return self._download_json(
|
return self._download_json(
|
||||||
|
@ -83,7 +83,7 @@ class NhkBaseIE(InfoExtractor):
|
||||||
def _extract_episode_info(self, url, episode=None):
|
def _extract_episode_info(self, url, episode=None):
|
||||||
fetch_episode = episode is None
|
fetch_episode = episode is None
|
||||||
lang, m_type, episode_id = NhkVodIE._match_valid_url(url).group('lang', 'type', 'id')
|
lang, m_type, episode_id = NhkVodIE._match_valid_url(url).group('lang', 'type', 'id')
|
||||||
is_video = m_type == 'video'
|
is_video = m_type != 'audio'
|
||||||
|
|
||||||
if is_video:
|
if is_video:
|
||||||
episode_id = episode_id[:4] + '-' + episode_id[4:]
|
episode_id = episode_id[:4] + '-' + episode_id[4:]
|
||||||
|
@ -138,9 +138,10 @@ class NhkBaseIE(InfoExtractor):
|
||||||
|
|
||||||
else:
|
else:
|
||||||
if fetch_episode:
|
if fetch_episode:
|
||||||
audio_path = episode['audio']['audio']
|
# From https://www3.nhk.or.jp/nhkworld/common/player/radio/inline/rod.html
|
||||||
|
audio_path = remove_end(episode['audio']['audio'], '.m4a')
|
||||||
info['formats'] = self._extract_m3u8_formats(
|
info['formats'] = self._extract_m3u8_formats(
|
||||||
'https://nhkworld-vh.akamaihd.net/i%s/master.m3u8' % audio_path,
|
f'{urljoin("https://vod-stream.nhk.jp", audio_path)}/index.m3u8',
|
||||||
episode_id, 'm4a', entry_protocol='m3u8_native',
|
episode_id, 'm4a', entry_protocol='m3u8_native',
|
||||||
m3u8_id='hls', fatal=False)
|
m3u8_id='hls', fatal=False)
|
||||||
for f in info['formats']:
|
for f in info['formats']:
|
||||||
|
@ -155,9 +156,11 @@ class NhkBaseIE(InfoExtractor):
|
||||||
|
|
||||||
|
|
||||||
class NhkVodIE(NhkBaseIE):
|
class NhkVodIE(NhkBaseIE):
|
||||||
# the 7-character IDs can have alphabetic chars too: assume [a-z] rather than just [a-f], eg
|
_VALID_URL = [
|
||||||
_VALID_URL = [rf'{NhkBaseIE._BASE_URL_REGEX}/(?P<type>video)/(?P<id>[0-9a-z]+)',
|
rf'{NhkBaseIE._BASE_URL_REGEX}shows/(?:(?P<type>video)/)?(?P<id>\d{{4}}[\da-z]\d+)/?(?:$|[?#])',
|
||||||
rf'{NhkBaseIE._BASE_URL_REGEX}/(?P<type>audio)/(?P<id>[^/?#]+?-\d{{8}}-[0-9a-z]+)']
|
rf'{NhkBaseIE._BASE_URL_REGEX}(?:ondemand|shows)/(?P<type>audio)/(?P<id>[^/?#]+?-\d{{8}}-[\da-z]+)',
|
||||||
|
rf'{NhkBaseIE._BASE_URL_REGEX}ondemand/(?P<type>video)/(?P<id>\d{{4}}[\da-z]\d+)', # deprecated
|
||||||
|
]
|
||||||
# Content available only for a limited period of time. Visit
|
# Content available only for a limited period of time. Visit
|
||||||
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
|
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
@ -167,17 +170,16 @@ class NhkVodIE(NhkBaseIE):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Japan Railway Journal - The Tohoku Shinkansen: Full Speed Ahead',
|
'title': 'Japan Railway Journal - The Tohoku Shinkansen: Full Speed Ahead',
|
||||||
'description': 'md5:49f7c5b206e03868a2fdf0d0814b92f6',
|
'description': 'md5:49f7c5b206e03868a2fdf0d0814b92f6',
|
||||||
'thumbnail': 'md5:51bcef4a21936e7fea1ff4e06353f463',
|
'thumbnail': r're:https://.+/.+\.jpg',
|
||||||
'episode': 'The Tohoku Shinkansen: Full Speed Ahead',
|
'episode': 'The Tohoku Shinkansen: Full Speed Ahead',
|
||||||
'series': 'Japan Railway Journal',
|
'series': 'Japan Railway Journal',
|
||||||
'modified_timestamp': 1694243656,
|
'modified_timestamp': 1707217907,
|
||||||
'timestamp': 1681428600,
|
'timestamp': 1681428600,
|
||||||
'release_timestamp': 1693883728,
|
'release_timestamp': 1693883728,
|
||||||
'duration': 1679,
|
'duration': 1679,
|
||||||
'upload_date': '20230413',
|
'upload_date': '20230413',
|
||||||
'modified_date': '20230909',
|
'modified_date': '20240206',
|
||||||
'release_date': '20230905',
|
'release_date': '20230905',
|
||||||
|
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# video clip
|
# video clip
|
||||||
|
@ -188,15 +190,15 @@ class NhkVodIE(NhkBaseIE):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Dining with the Chef - Chef Saito\'s Family recipe: MENCHI-KATSU',
|
'title': 'Dining with the Chef - Chef Saito\'s Family recipe: MENCHI-KATSU',
|
||||||
'description': 'md5:5aee4a9f9d81c26281862382103b0ea5',
|
'description': 'md5:5aee4a9f9d81c26281862382103b0ea5',
|
||||||
'thumbnail': 'md5:d6a4d9b6e9be90aaadda0bcce89631ed',
|
'thumbnail': r're:https://.+/.+\.jpg',
|
||||||
'series': 'Dining with the Chef',
|
'series': 'Dining with the Chef',
|
||||||
'episode': 'Chef Saito\'s Family recipe: MENCHI-KATSU',
|
'episode': 'Chef Saito\'s Family recipe: MENCHI-KATSU',
|
||||||
'duration': 148,
|
'duration': 148,
|
||||||
'upload_date': '20190816',
|
'upload_date': '20190816',
|
||||||
'release_date': '20230902',
|
'release_date': '20230902',
|
||||||
'release_timestamp': 1693619292,
|
'release_timestamp': 1693619292,
|
||||||
'modified_timestamp': 1694168033,
|
'modified_timestamp': 1707217907,
|
||||||
'modified_date': '20230908',
|
'modified_date': '20240206',
|
||||||
'timestamp': 1565997540,
|
'timestamp': 1565997540,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
|
@ -208,7 +210,7 @@ class NhkVodIE(NhkBaseIE):
|
||||||
'title': 'Living in Japan - Tips for Travelers to Japan / Ramen Vending Machines',
|
'title': 'Living in Japan - Tips for Travelers to Japan / Ramen Vending Machines',
|
||||||
'series': 'Living in Japan',
|
'series': 'Living in Japan',
|
||||||
'description': 'md5:0a0e2077d8f07a03071e990a6f51bfab',
|
'description': 'md5:0a0e2077d8f07a03071e990a6f51bfab',
|
||||||
'thumbnail': 'md5:960622fb6e06054a4a1a0c97ea752545',
|
'thumbnail': r're:https://.+/.+\.jpg',
|
||||||
'episode': 'Tips for Travelers to Japan / Ramen Vending Machines'
|
'episode': 'Tips for Travelers to Japan / Ramen Vending Machines'
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
|
@ -245,7 +247,7 @@ class NhkVodIE(NhkBaseIE):
|
||||||
'title': 'おはよう日本(7時台) - 10月8日放送',
|
'title': 'おはよう日本(7時台) - 10月8日放送',
|
||||||
'series': 'おはよう日本(7時台)',
|
'series': 'おはよう日本(7時台)',
|
||||||
'episode': '10月8日放送',
|
'episode': '10月8日放送',
|
||||||
'thumbnail': 'md5:d733b1c8e965ab68fb02b2d347d0e9b4',
|
'thumbnail': r're:https://.+/.+\.jpg',
|
||||||
'description': 'md5:9c1d6cbeadb827b955b20e99ab920ff0',
|
'description': 'md5:9c1d6cbeadb827b955b20e99ab920ff0',
|
||||||
},
|
},
|
||||||
'skip': 'expires 2023-10-15',
|
'skip': 'expires 2023-10-15',
|
||||||
|
@ -255,17 +257,100 @@ class NhkVodIE(NhkBaseIE):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'nw_vod_v_en_3004_952_20230723091000_01_1690074552',
|
'id': 'nw_vod_v_en_3004_952_20230723091000_01_1690074552',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Barakan Discovers AMAMI OSHIMA: Isson\'s Treasure Island',
|
'title': 'Barakan Discovers - AMAMI OSHIMA: Isson\'s Treasure Isla',
|
||||||
'description': 'md5:5db620c46a0698451cc59add8816b797',
|
'description': 'md5:5db620c46a0698451cc59add8816b797',
|
||||||
'thumbnail': 'md5:67d9ff28009ba379bfa85ad1aaa0e2bd',
|
'thumbnail': r're:https://.+/.+\.jpg',
|
||||||
'release_date': '20230905',
|
'release_date': '20230905',
|
||||||
'timestamp': 1690103400,
|
'timestamp': 1690103400,
|
||||||
'duration': 2939,
|
'duration': 2939,
|
||||||
'release_timestamp': 1693898699,
|
'release_timestamp': 1693898699,
|
||||||
'modified_timestamp': 1698057495,
|
|
||||||
'modified_date': '20231023',
|
|
||||||
'upload_date': '20230723',
|
'upload_date': '20230723',
|
||||||
|
'modified_timestamp': 1707217907,
|
||||||
|
'modified_date': '20240206',
|
||||||
|
'episode': 'AMAMI OSHIMA: Isson\'s Treasure Isla',
|
||||||
|
'series': 'Barakan Discovers',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# /ondemand/video/ url with alphabetical character in 5th position of id
|
||||||
|
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999a07/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'nw_c_en_9999-a07',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'episode': 'Mini-Dramas on SDGs: Ep 1 Close the Gender Gap [Director\'s Cut]',
|
||||||
|
'series': 'Mini-Dramas on SDGs',
|
||||||
|
'modified_date': '20240206',
|
||||||
|
'title': 'Mini-Dramas on SDGs - Mini-Dramas on SDGs: Ep 1 Close the Gender Gap [Director\'s Cut]',
|
||||||
|
'description': 'md5:3f9dcb4db22fceb675d90448a040d3f6',
|
||||||
|
'timestamp': 1621962360,
|
||||||
|
'duration': 189,
|
||||||
|
'release_date': '20230903',
|
||||||
|
'modified_timestamp': 1707217907,
|
||||||
|
'upload_date': '20210525',
|
||||||
|
'thumbnail': r're:https://.+/.+\.jpg',
|
||||||
|
'release_timestamp': 1693713487,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999d17/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'nw_c_en_9999-d17',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Flowers of snow blossom - The 72 Pentads of Yamato',
|
||||||
|
'description': 'Today’s focus: Snow',
|
||||||
|
'release_timestamp': 1693792402,
|
||||||
|
'release_date': '20230904',
|
||||||
|
'upload_date': '20220128',
|
||||||
|
'timestamp': 1643370960,
|
||||||
|
'thumbnail': r're:https://.+/.+\.jpg',
|
||||||
|
'duration': 136,
|
||||||
|
'series': '',
|
||||||
|
'modified_date': '20240206',
|
||||||
|
'modified_timestamp': 1707217907,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# new /shows/ url format
|
||||||
|
'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/2032307/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'nw_vod_v_en_2032_307_20240321113000_01_1710990282',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Japanology Plus - 20th Anniversary Special Part 1',
|
||||||
|
'description': 'md5:817d41fc8e54339ad2a916161ea24faf',
|
||||||
|
'episode': '20th Anniversary Special Part 1',
|
||||||
|
'series': 'Japanology Plus',
|
||||||
|
'thumbnail': r're:https://.+/.+\.jpg',
|
||||||
|
'duration': 1680,
|
||||||
|
'timestamp': 1711020600,
|
||||||
|
'upload_date': '20240321',
|
||||||
|
'release_timestamp': 1711022683,
|
||||||
|
'release_date': '20240321',
|
||||||
|
'modified_timestamp': 1711031012,
|
||||||
|
'modified_date': '20240321',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/3020025/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'nw_vod_v_en_3020_025_20230325144000_01_1679723944',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '100 Ideas to Save the World - Working Styles Evolve',
|
||||||
|
'description': 'md5:9e6c7778eaaf4f7b4af83569649f84d9',
|
||||||
|
'episode': 'Working Styles Evolve',
|
||||||
|
'series': '100 Ideas to Save the World',
|
||||||
|
'thumbnail': r're:https://.+/.+\.jpg',
|
||||||
|
'duration': 899,
|
||||||
|
'upload_date': '20230325',
|
||||||
|
'timestamp': 1679755200,
|
||||||
|
'release_date': '20230905',
|
||||||
|
'release_timestamp': 1693880540,
|
||||||
|
'modified_date': '20240206',
|
||||||
|
'modified_timestamp': 1707217907,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# new /shows/audio/ url format
|
||||||
|
'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/audio/livinginjapan-20231001-1/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# valid url even if can't be found in wild; support needed for clip entries extraction
|
||||||
|
'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/9999o80/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -273,18 +358,21 @@ class NhkVodIE(NhkBaseIE):
|
||||||
|
|
||||||
|
|
||||||
class NhkVodProgramIE(NhkBaseIE):
|
class NhkVodProgramIE(NhkBaseIE):
|
||||||
_VALID_URL = rf'{NhkBaseIE._BASE_URL_REGEX}/program{NhkBaseIE._TYPE_REGEX}(?P<id>\w+)(?:.+?\btype=(?P<episode_type>clip|(?:radio|tv)Episode))?'
|
_VALID_URL = rf'''(?x)
|
||||||
|
{NhkBaseIE._BASE_URL_REGEX}(?:shows|tv)/
|
||||||
|
(?:(?P<type>audio)/programs/)?(?P<id>\w+)/?
|
||||||
|
(?:\?(?:[^#]+&)?type=(?P<episode_type>clip|(?:radio|tv)Episode))?'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# video program episodes
|
# video program episodes
|
||||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/sumo',
|
'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/sumo/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'sumo',
|
'id': 'sumo',
|
||||||
'title': 'GRAND SUMO Highlights',
|
'title': 'GRAND SUMO Highlights',
|
||||||
'description': 'md5:fc20d02dc6ce85e4b72e0273aa52fdbf',
|
'description': 'md5:fc20d02dc6ce85e4b72e0273aa52fdbf',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 0,
|
'playlist_mincount': 1,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/japanrailway',
|
'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/japanrailway/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'japanrailway',
|
'id': 'japanrailway',
|
||||||
'title': 'Japan Railway Journal',
|
'title': 'Japan Railway Journal',
|
||||||
|
@ -293,40 +381,68 @@ class NhkVodProgramIE(NhkBaseIE):
|
||||||
'playlist_mincount': 12,
|
'playlist_mincount': 12,
|
||||||
}, {
|
}, {
|
||||||
# video program clips
|
# video program clips
|
||||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/japanrailway/?type=clip',
|
'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/japanrailway/?type=clip',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'japanrailway',
|
'id': 'japanrailway',
|
||||||
'title': 'Japan Railway Journal',
|
'title': 'Japan Railway Journal',
|
||||||
'description': 'md5:ea39d93af7d05835baadf10d1aae0e3f',
|
'description': 'md5:ea39d93af7d05835baadf10d1aae0e3f',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 5,
|
'playlist_mincount': 12,
|
||||||
}, {
|
|
||||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/10yearshayaomiyazaki/',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
}, {
|
||||||
# audio program
|
# audio program
|
||||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/audio/listener/',
|
'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/audio/programs/livinginjapan/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'livinginjapan',
|
||||||
|
'title': 'Living in Japan',
|
||||||
|
'description': 'md5:665bb36ec2a12c5a7f598ee713fc2b54',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 12,
|
||||||
|
}, {
|
||||||
|
# /tv/ program url
|
||||||
|
'url': 'https://www3.nhk.or.jp/nhkworld/en/tv/designtalksplus/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'designtalksplus',
|
||||||
|
'title': 'DESIGN TALKS plus',
|
||||||
|
'description': 'md5:47b3b3a9f10d4ac7b33b53b70a7d2837',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 20,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www3.nhk.or.jp/nhkworld/en/shows/10yearshayaomiyazaki/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if NhkVodIE.suitable(url) else super().suitable(url)
|
||||||
|
|
||||||
|
def _extract_meta_from_class_elements(self, class_values, html):
|
||||||
|
for class_value in class_values:
|
||||||
|
if value := clean_html(get_element_by_class(class_value, html)):
|
||||||
|
return value
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
lang, m_type, program_id, episode_type = self._match_valid_url(url).group('lang', 'type', 'id', 'episode_type')
|
lang, m_type, program_id, episode_type = self._match_valid_url(url).group('lang', 'type', 'id', 'episode_type')
|
||||||
episodes = self._call_api(
|
episodes = self._call_api(
|
||||||
program_id, lang, m_type == 'video', False, episode_type == 'clip')
|
program_id, lang, m_type != 'audio', False, episode_type == 'clip')
|
||||||
|
|
||||||
entries = []
|
def entries():
|
||||||
for episode in episodes:
|
for episode in episodes:
|
||||||
episode_path = episode.get('url')
|
if episode_path := episode.get('url'):
|
||||||
if not episode_path:
|
yield self._extract_episode_info(urljoin(url, episode_path), episode)
|
||||||
continue
|
|
||||||
entries.append(self._extract_episode_info(
|
|
||||||
urljoin(url, episode_path), episode))
|
|
||||||
|
|
||||||
html = self._download_webpage(url, program_id)
|
html = self._download_webpage(url, program_id)
|
||||||
program_title = clean_html(get_element_by_class('p-programDetail__title', html))
|
program_title = self._extract_meta_from_class_elements([
|
||||||
program_description = clean_html(get_element_by_class('p-programDetail__text', html))
|
'p-programDetail__title', # /ondemand/program/
|
||||||
|
'pProgramHero__logoText', # /shows/
|
||||||
|
'tAudioProgramMain__title', # /shows/audio/programs/
|
||||||
|
'p-program-name'], html) # /tv/
|
||||||
|
program_description = self._extract_meta_from_class_elements([
|
||||||
|
'p-programDetail__text', # /ondemand/program/
|
||||||
|
'pProgramHero__description', # /shows/
|
||||||
|
'tAudioProgramMain__info', # /shows/audio/programs/
|
||||||
|
'p-program-description'], html) # /tv/
|
||||||
|
|
||||||
return self.playlist_result(entries, program_id, program_title, program_description)
|
return self.playlist_result(entries(), program_id, program_title, program_description)
|
||||||
|
|
||||||
|
|
||||||
class NhkForSchoolBangumiIE(InfoExtractor):
|
class NhkForSchoolBangumiIE(InfoExtractor):
|
||||||
|
|
|
@ -1,11 +1,10 @@
|
||||||
import datetime
|
import datetime as dt
|
||||||
import functools
|
import functools
|
||||||
import itertools
|
import itertools
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
|
import urllib.parse
|
||||||
from urllib.parse import urlparse
|
|
||||||
|
|
||||||
from .common import InfoExtractor, SearchInfoExtractor
|
from .common import InfoExtractor, SearchInfoExtractor
|
||||||
from ..networking import Request
|
from ..networking import Request
|
||||||
|
@ -820,12 +819,12 @@ class NicovideoSearchDateIE(NicovideoSearchBaseIE, SearchInfoExtractor):
|
||||||
'playlist_mincount': 1610,
|
'playlist_mincount': 1610,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_START_DATE = datetime.date(2007, 1, 1)
|
_START_DATE = dt.date(2007, 1, 1)
|
||||||
_RESULTS_PER_PAGE = 32
|
_RESULTS_PER_PAGE = 32
|
||||||
_MAX_PAGES = 50
|
_MAX_PAGES = 50
|
||||||
|
|
||||||
def _entries(self, url, item_id, start_date=None, end_date=None):
|
def _entries(self, url, item_id, start_date=None, end_date=None):
|
||||||
start_date, end_date = start_date or self._START_DATE, end_date or datetime.datetime.now().date()
|
start_date, end_date = start_date or self._START_DATE, end_date or dt.datetime.now().date()
|
||||||
|
|
||||||
# If the last page has a full page of videos, we need to break down the query interval further
|
# If the last page has a full page of videos, we need to break down the query interval further
|
||||||
last_page_len = len(list(self._get_entries_for_date(
|
last_page_len = len(list(self._get_entries_for_date(
|
||||||
|
@ -957,7 +956,7 @@ class NiconicoLiveIE(InfoExtractor):
|
||||||
'frontend_id': traverse_obj(embedded_data, ('site', 'frontendId')) or '9',
|
'frontend_id': traverse_obj(embedded_data, ('site', 'frontendId')) or '9',
|
||||||
})
|
})
|
||||||
|
|
||||||
hostname = remove_start(urlparse(urlh.url).hostname, 'sp.')
|
hostname = remove_start(urllib.parse.urlparse(urlh.url).hostname, 'sp.')
|
||||||
latency = try_get(self._configuration_arg('latency'), lambda x: x[0])
|
latency = try_get(self._configuration_arg('latency'), lambda x: x[0])
|
||||||
if latency not in self._KNOWN_LATENCY:
|
if latency not in self._KNOWN_LATENCY:
|
||||||
latency = 'high'
|
latency = 'high'
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
import calendar
|
import calendar
|
||||||
import json
|
import datetime as dt
|
||||||
import functools
|
import functools
|
||||||
from datetime import datetime, timezone
|
import json
|
||||||
from random import random
|
import random
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
|
@ -243,7 +243,7 @@ class PanoptoIE(PanoptoBaseIE):
|
||||||
invocation_id = delivery_info.get('InvocationId')
|
invocation_id = delivery_info.get('InvocationId')
|
||||||
stream_id = traverse_obj(delivery_info, ('Delivery', 'Streams', ..., 'PublicID'), get_all=False, expected_type=str)
|
stream_id = traverse_obj(delivery_info, ('Delivery', 'Streams', ..., 'PublicID'), get_all=False, expected_type=str)
|
||||||
if invocation_id and stream_id and duration:
|
if invocation_id and stream_id and duration:
|
||||||
timestamp_str = f'/Date({calendar.timegm(datetime.now(timezone.utc).timetuple())}000)/'
|
timestamp_str = f'/Date({calendar.timegm(dt.datetime.now(dt.timezone.utc).timetuple())}000)/'
|
||||||
data = {
|
data = {
|
||||||
'streamRequests': [
|
'streamRequests': [
|
||||||
{
|
{
|
||||||
|
@ -415,7 +415,7 @@ class PanoptoIE(PanoptoBaseIE):
|
||||||
'cast': traverse_obj(delivery, ('Contributors', ..., 'DisplayName'), expected_type=lambda x: x or None),
|
'cast': traverse_obj(delivery, ('Contributors', ..., 'DisplayName'), expected_type=lambda x: x or None),
|
||||||
'timestamp': session_start_time - 11640000000 if session_start_time else None,
|
'timestamp': session_start_time - 11640000000 if session_start_time else None,
|
||||||
'duration': delivery.get('Duration'),
|
'duration': delivery.get('Duration'),
|
||||||
'thumbnail': base_url + f'/Services/FrameGrabber.svc/FrameRedirect?objectId={video_id}&mode=Delivery&random={random()}',
|
'thumbnail': base_url + f'/Services/FrameGrabber.svc/FrameRedirect?objectId={video_id}&mode=Delivery&random={random.random()}',
|
||||||
'average_rating': delivery.get('AverageRating'),
|
'average_rating': delivery.get('AverageRating'),
|
||||||
'chapters': self._extract_chapters(timestamps),
|
'chapters': self._extract_chapters(timestamps),
|
||||||
'uploader': delivery.get('OwnerDisplayName') or None,
|
'uploader': delivery.get('OwnerDisplayName') or None,
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
import itertools
|
import itertools
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .vimeo import VimeoIE
|
from .vimeo import VimeoIE
|
||||||
from ..compat import compat_urllib_parse_unquote
|
|
||||||
from ..networking.exceptions import HTTPError
|
from ..networking.exceptions import HTTPError
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
KNOWN_EXTENSIONS,
|
KNOWN_EXTENSIONS,
|
||||||
|
@ -14,7 +14,6 @@ from ..utils import (
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
try_get,
|
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
|
@ -92,7 +91,7 @@ class PatreonIE(PatreonBaseIE):
|
||||||
'thumbnail': 're:^https?://.*$',
|
'thumbnail': 're:^https?://.*$',
|
||||||
'upload_date': '20150211',
|
'upload_date': '20150211',
|
||||||
'description': 'md5:8af6425f50bd46fbf29f3db0fc3a8364',
|
'description': 'md5:8af6425f50bd46fbf29f3db0fc3a8364',
|
||||||
'uploader_id': 'TraciJHines',
|
'uploader_id': '@TraciHinesMusic',
|
||||||
'categories': ['Entertainment'],
|
'categories': ['Entertainment'],
|
||||||
'duration': 282,
|
'duration': 282,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
|
@ -106,8 +105,10 @@ class PatreonIE(PatreonBaseIE):
|
||||||
'availability': 'public',
|
'availability': 'public',
|
||||||
'channel_follower_count': int,
|
'channel_follower_count': int,
|
||||||
'playable_in_embed': True,
|
'playable_in_embed': True,
|
||||||
'uploader_url': 'http://www.youtube.com/user/TraciJHines',
|
'uploader_url': 'https://www.youtube.com/@TraciHinesMusic',
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
|
'channel_is_verified': True,
|
||||||
|
'chapters': 'count:4',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'noplaylist': True,
|
'noplaylist': True,
|
||||||
|
@ -176,7 +177,71 @@ class PatreonIE(PatreonBaseIE):
|
||||||
'uploader_url': 'https://www.patreon.com/thenormies',
|
'uploader_url': 'https://www.patreon.com/thenormies',
|
||||||
},
|
},
|
||||||
'skip': 'Patron-only content',
|
'skip': 'Patron-only content',
|
||||||
|
}, {
|
||||||
|
# dead vimeo and embed URLs, need to extract post_file
|
||||||
|
'url': 'https://www.patreon.com/posts/hunter-x-hunter-34007913',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '34007913',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Hunter x Hunter | Kurapika DESTROYS Uvogin!!!',
|
||||||
|
'like_count': int,
|
||||||
|
'uploader': 'YaBoyRoshi',
|
||||||
|
'timestamp': 1581636833,
|
||||||
|
'channel_url': 'https://www.patreon.com/yaboyroshi',
|
||||||
|
'thumbnail': r're:^https?://.*$',
|
||||||
|
'tags': ['Hunter x Hunter'],
|
||||||
|
'uploader_id': '14264111',
|
||||||
|
'comment_count': int,
|
||||||
|
'channel_follower_count': int,
|
||||||
|
'description': 'Kurapika is a walking cheat code!',
|
||||||
|
'upload_date': '20200213',
|
||||||
|
'channel_id': '2147162',
|
||||||
|
'uploader_url': 'https://www.patreon.com/yaboyroshi',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# NSFW vimeo embed URL
|
||||||
|
'url': 'https://www.patreon.com/posts/4k-spiderman-4k-96414599',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '902250943',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '❤️(4K) Spiderman Girl Yeonhwa’s Gift ❤️(4K) 스파이더맨걸 연화의 선물',
|
||||||
|
'description': '❤️(4K) Spiderman Girl Yeonhwa’s Gift \n❤️(4K) 스파이더맨걸 연화의 선물',
|
||||||
|
'uploader': 'Npickyeonhwa',
|
||||||
|
'uploader_id': '90574422',
|
||||||
|
'uploader_url': 'https://www.patreon.com/Yeonhwa726',
|
||||||
|
'channel_id': '10237902',
|
||||||
|
'channel_url': 'https://www.patreon.com/Yeonhwa726',
|
||||||
|
'duration': 70,
|
||||||
|
'timestamp': 1705150153,
|
||||||
|
'upload_date': '20240113',
|
||||||
|
'comment_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'thumbnail': r're:^https?://.+',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}, {
|
||||||
|
# multiple attachments/embeds
|
||||||
|
'url': 'https://www.patreon.com/posts/holy-wars-solos-100601977',
|
||||||
|
'playlist_count': 3,
|
||||||
|
'info_dict': {
|
||||||
|
'id': '100601977',
|
||||||
|
'title': '"Holy Wars" (Megadeth) Solos Transcription & Lesson/Analysis',
|
||||||
|
'description': 'md5:d099ab976edfce6de2a65c2b169a88d3',
|
||||||
|
'uploader': 'Bradley Hall',
|
||||||
|
'uploader_id': '24401883',
|
||||||
|
'uploader_url': 'https://www.patreon.com/bradleyhallguitar',
|
||||||
|
'channel_id': '3193932',
|
||||||
|
'channel_url': 'https://www.patreon.com/bradleyhallguitar',
|
||||||
|
'channel_follower_count': int,
|
||||||
|
'timestamp': 1710777855,
|
||||||
|
'upload_date': '20240318',
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'thumbnail': r're:^https?://.+',
|
||||||
|
},
|
||||||
|
'skip': 'Patron-only content',
|
||||||
}]
|
}]
|
||||||
|
_RETURN_TYPE = 'video'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
@ -191,102 +256,108 @@ class PatreonIE(PatreonBaseIE):
|
||||||
'include': 'audio,user,user_defined_tags,campaign,attachments_media',
|
'include': 'audio,user,user_defined_tags,campaign,attachments_media',
|
||||||
})
|
})
|
||||||
attributes = post['data']['attributes']
|
attributes = post['data']['attributes']
|
||||||
title = attributes['title'].strip()
|
info = traverse_obj(attributes, {
|
||||||
image = attributes.get('image') or {}
|
'title': ('title', {str.strip}),
|
||||||
info = {
|
'description': ('content', {clean_html}),
|
||||||
'id': video_id,
|
'thumbnail': ('image', ('large_url', 'url'), {url_or_none}, any),
|
||||||
'title': title,
|
'timestamp': ('published_at', {parse_iso8601}),
|
||||||
'description': clean_html(attributes.get('content')),
|
'like_count': ('like_count', {int_or_none}),
|
||||||
'thumbnail': image.get('large_url') or image.get('url'),
|
'comment_count': ('comment_count', {int_or_none}),
|
||||||
'timestamp': parse_iso8601(attributes.get('published_at')),
|
})
|
||||||
'like_count': int_or_none(attributes.get('like_count')),
|
|
||||||
'comment_count': int_or_none(attributes.get('comment_count')),
|
|
||||||
}
|
|
||||||
can_view_post = traverse_obj(attributes, 'current_user_can_view')
|
|
||||||
if can_view_post and info['comment_count']:
|
|
||||||
info['__post_extractor'] = self.extract_comments(video_id)
|
|
||||||
|
|
||||||
for i in post.get('included', []):
|
entries = []
|
||||||
i_type = i.get('type')
|
idx = 0
|
||||||
if i_type == 'media':
|
for include in traverse_obj(post, ('included', lambda _, v: v['type'])):
|
||||||
media_attributes = i.get('attributes') or {}
|
include_type = include['type']
|
||||||
download_url = media_attributes.get('download_url')
|
if include_type == 'media':
|
||||||
|
media_attributes = traverse_obj(include, ('attributes', {dict})) or {}
|
||||||
|
download_url = url_or_none(media_attributes.get('download_url'))
|
||||||
ext = mimetype2ext(media_attributes.get('mimetype'))
|
ext = mimetype2ext(media_attributes.get('mimetype'))
|
||||||
|
|
||||||
# if size_bytes is None, this media file is likely unavailable
|
# if size_bytes is None, this media file is likely unavailable
|
||||||
# See: https://github.com/yt-dlp/yt-dlp/issues/4608
|
# See: https://github.com/yt-dlp/yt-dlp/issues/4608
|
||||||
size_bytes = int_or_none(media_attributes.get('size_bytes'))
|
size_bytes = int_or_none(media_attributes.get('size_bytes'))
|
||||||
if download_url and ext in KNOWN_EXTENSIONS and size_bytes is not None:
|
if download_url and ext in KNOWN_EXTENSIONS and size_bytes is not None:
|
||||||
# XXX: what happens if there are multiple attachments?
|
idx += 1
|
||||||
return {
|
entries.append({
|
||||||
**info,
|
'id': f'{video_id}-{idx}',
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
'filesize': size_bytes,
|
'filesize': size_bytes,
|
||||||
'url': download_url,
|
'url': download_url,
|
||||||
}
|
|
||||||
elif i_type == 'user':
|
|
||||||
user_attributes = i.get('attributes')
|
|
||||||
if user_attributes:
|
|
||||||
info.update({
|
|
||||||
'uploader': user_attributes.get('full_name'),
|
|
||||||
'uploader_id': str_or_none(i.get('id')),
|
|
||||||
'uploader_url': user_attributes.get('url'),
|
|
||||||
})
|
})
|
||||||
|
|
||||||
elif i_type == 'post_tag':
|
elif include_type == 'user':
|
||||||
info.setdefault('tags', []).append(traverse_obj(i, ('attributes', 'value')))
|
info.update(traverse_obj(include, {
|
||||||
|
'uploader': ('attributes', 'full_name', {str}),
|
||||||
|
'uploader_id': ('id', {str_or_none}),
|
||||||
|
'uploader_url': ('attributes', 'url', {url_or_none}),
|
||||||
|
}))
|
||||||
|
|
||||||
elif i_type == 'campaign':
|
elif include_type == 'post_tag':
|
||||||
info.update({
|
if post_tag := traverse_obj(include, ('attributes', 'value', {str})):
|
||||||
'channel': traverse_obj(i, ('attributes', 'title')),
|
info.setdefault('tags', []).append(post_tag)
|
||||||
'channel_id': str_or_none(i.get('id')),
|
|
||||||
'channel_url': traverse_obj(i, ('attributes', 'url')),
|
elif include_type == 'campaign':
|
||||||
'channel_follower_count': int_or_none(traverse_obj(i, ('attributes', 'patron_count'))),
|
info.update(traverse_obj(include, {
|
||||||
})
|
'channel': ('attributes', 'title', {str}),
|
||||||
|
'channel_id': ('id', {str_or_none}),
|
||||||
|
'channel_url': ('attributes', 'url', {url_or_none}),
|
||||||
|
'channel_follower_count': ('attributes', 'patron_count', {int_or_none}),
|
||||||
|
}))
|
||||||
|
|
||||||
# handle Vimeo embeds
|
# handle Vimeo embeds
|
||||||
if try_get(attributes, lambda x: x['embed']['provider']) == 'Vimeo':
|
if traverse_obj(attributes, ('embed', 'provider')) == 'Vimeo':
|
||||||
embed_html = try_get(attributes, lambda x: x['embed']['html'])
|
v_url = urllib.parse.unquote(self._html_search_regex(
|
||||||
v_url = url_or_none(compat_urllib_parse_unquote(
|
r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)',
|
||||||
self._search_regex(r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)', embed_html, 'vimeo url', fatal=False)))
|
traverse_obj(attributes, ('embed', 'html', {str})), 'vimeo url', fatal=False) or '')
|
||||||
if v_url:
|
if url_or_none(v_url) and self._request_webpage(
|
||||||
return {
|
v_url, video_id, 'Checking Vimeo embed URL',
|
||||||
**info,
|
headers={'Referer': 'https://patreon.com/'},
|
||||||
'_type': 'url_transparent',
|
fatal=False, errnote=False):
|
||||||
'url': VimeoIE._smuggle_referrer(v_url, 'https://patreon.com'),
|
entries.append(self.url_result(
|
||||||
'ie_key': 'Vimeo',
|
VimeoIE._smuggle_referrer(v_url, 'https://patreon.com/'),
|
||||||
}
|
VimeoIE, url_transparent=True))
|
||||||
|
|
||||||
embed_url = try_get(attributes, lambda x: x['embed']['url'])
|
embed_url = traverse_obj(attributes, ('embed', 'url', {url_or_none}))
|
||||||
if embed_url:
|
if embed_url and self._request_webpage(embed_url, video_id, 'Checking embed URL', fatal=False, errnote=False):
|
||||||
return {
|
entries.append(self.url_result(embed_url))
|
||||||
**info,
|
|
||||||
'_type': 'url',
|
|
||||||
'url': embed_url,
|
|
||||||
}
|
|
||||||
|
|
||||||
post_file = traverse_obj(attributes, 'post_file')
|
post_file = traverse_obj(attributes, ('post_file', {dict}))
|
||||||
if post_file:
|
if post_file:
|
||||||
name = post_file.get('name')
|
name = post_file.get('name')
|
||||||
ext = determine_ext(name)
|
ext = determine_ext(name)
|
||||||
if ext in KNOWN_EXTENSIONS:
|
if ext in KNOWN_EXTENSIONS:
|
||||||
return {
|
entries.append({
|
||||||
**info,
|
'id': video_id,
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
'url': post_file['url'],
|
'url': post_file['url'],
|
||||||
}
|
})
|
||||||
elif name == 'video' or determine_ext(post_file.get('url')) == 'm3u8':
|
elif name == 'video' or determine_ext(post_file.get('url')) == 'm3u8':
|
||||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(post_file['url'], video_id)
|
formats, subtitles = self._extract_m3u8_formats_and_subtitles(post_file['url'], video_id)
|
||||||
return {
|
entries.append({
|
||||||
**info,
|
'id': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
})
|
||||||
|
|
||||||
if can_view_post is False:
|
can_view_post = traverse_obj(attributes, 'current_user_can_view')
|
||||||
|
comments = None
|
||||||
|
if can_view_post and info.get('comment_count'):
|
||||||
|
comments = self.extract_comments(video_id)
|
||||||
|
|
||||||
|
if not entries and can_view_post is False:
|
||||||
self.raise_no_formats('You do not have access to this post', video_id=video_id, expected=True)
|
self.raise_no_formats('You do not have access to this post', video_id=video_id, expected=True)
|
||||||
else:
|
elif not entries:
|
||||||
self.raise_no_formats('No supported media found in this post', video_id=video_id, expected=True)
|
self.raise_no_formats('No supported media found in this post', video_id=video_id, expected=True)
|
||||||
|
elif len(entries) == 1:
|
||||||
|
info.update(entries[0])
|
||||||
|
else:
|
||||||
|
for entry in entries:
|
||||||
|
entry.update(info)
|
||||||
|
return self.playlist_result(entries, video_id, **info, __post_extractor=comments)
|
||||||
|
|
||||||
|
info['id'] = video_id
|
||||||
|
info['__post_extractor'] = comments
|
||||||
return info
|
return info
|
||||||
|
|
||||||
def _get_comments(self, post_id):
|
def _get_comments(self, post_id):
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
from uuid import uuid4
|
|
||||||
import json
|
import json
|
||||||
|
import uuid
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
@ -51,7 +51,7 @@ class PolsatGoIE(InfoExtractor):
|
||||||
}
|
}
|
||||||
|
|
||||||
def _call_api(self, endpoint, media_id, method, params):
|
def _call_api(self, endpoint, media_id, method, params):
|
||||||
rand_uuid = str(uuid4())
|
rand_uuid = str(uuid.uuid4())
|
||||||
res = self._download_json(
|
res = self._download_json(
|
||||||
f'https://b2c-mobile.redefine.pl/rpc/{endpoint}/', media_id,
|
f'https://b2c-mobile.redefine.pl/rpc/{endpoint}/', media_id,
|
||||||
note=f'Downloading {method} JSON metadata',
|
note=f'Downloading {method} JSON metadata',
|
||||||
|
|
|
@ -1,95 +0,0 @@
|
||||||
import urllib.parse
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
determine_ext,
|
|
||||||
int_or_none,
|
|
||||||
parse_duration,
|
|
||||||
remove_end,
|
|
||||||
unified_strdate,
|
|
||||||
ExtractorError,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class Porn91IE(InfoExtractor):
|
|
||||||
IE_NAME = '91porn'
|
|
||||||
_VALID_URL = r'(?:https?://)(?:www\.|)91porn\.com/view_video.php\?([^#]+&)?viewkey=(?P<id>\w+)'
|
|
||||||
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://91porn.com/view_video.php?viewkey=7e42283b4f5ab36da134',
|
|
||||||
'md5': 'd869db281402e0ef4ddef3c38b866f86',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '7e42283b4f5ab36da134',
|
|
||||||
'title': '18岁大一漂亮学妹,水嫩性感,再爽一次!',
|
|
||||||
'description': 'md5:1ff241f579b07ae936a54e810ad2e891',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'duration': 431,
|
|
||||||
'upload_date': '20150520',
|
|
||||||
'comment_count': int,
|
|
||||||
'view_count': int,
|
|
||||||
'age_limit': 18,
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'url': 'https://91porn.com/view_video.php?viewkey=7ef0cf3d362c699ab91c',
|
|
||||||
'md5': 'f8fd50540468a6d795378cd778b40226',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '7ef0cf3d362c699ab91c',
|
|
||||||
'title': '真实空乘,冲上云霄第二部',
|
|
||||||
'description': 'md5:618bf9652cafcc66cd277bd96789baea',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'duration': 248,
|
|
||||||
'upload_date': '20221119',
|
|
||||||
'comment_count': int,
|
|
||||||
'view_count': int,
|
|
||||||
'age_limit': 18,
|
|
||||||
}
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
self._set_cookie('91porn.com', 'language', 'cn_CN')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
|
||||||
'http://91porn.com/view_video.php?viewkey=%s' % video_id, video_id)
|
|
||||||
|
|
||||||
if '视频不存在,可能已经被删除或者被举报为不良内容!' in webpage:
|
|
||||||
raise ExtractorError('91 Porn says: Video does not exist', expected=True)
|
|
||||||
|
|
||||||
daily_limit = self._search_regex(
|
|
||||||
r'作为游客,你每天只可观看([\d]+)个视频', webpage, 'exceeded daily limit', default=None, fatal=False)
|
|
||||||
if daily_limit:
|
|
||||||
raise ExtractorError(f'91 Porn says: Daily limit {daily_limit} videos exceeded', expected=True)
|
|
||||||
|
|
||||||
video_link_url = self._search_regex(
|
|
||||||
r'document\.write\(\s*strencode2\s*\(\s*((?:"[^"]+")|(?:\'[^\']+\'))', webpage, 'video link')
|
|
||||||
video_link_url = self._search_regex(
|
|
||||||
r'src=["\']([^"\']+)["\']', urllib.parse.unquote(video_link_url), 'unquoted video link')
|
|
||||||
|
|
||||||
formats, subtitles = self._get_formats_and_subtitle(video_link_url, video_id)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': remove_end(self._html_extract_title(webpage).replace('\n', ''), 'Chinese homemade video').strip(),
|
|
||||||
'formats': formats,
|
|
||||||
'subtitles': subtitles,
|
|
||||||
'upload_date': unified_strdate(self._search_regex(
|
|
||||||
r'<span\s+class=["\']title-yakov["\']>(\d{4}-\d{2}-\d{2})</span>', webpage, 'upload_date', fatal=False)),
|
|
||||||
'description': self._html_search_regex(
|
|
||||||
r'<span\s+class=["\']more title["\']>\s*([^<]+)', webpage, 'description', fatal=False),
|
|
||||||
'duration': parse_duration(self._search_regex(
|
|
||||||
r'时长:\s*<span[^>]*>\s*(\d+(?::\d+){1,2})', webpage, 'duration', fatal=False)),
|
|
||||||
'comment_count': int_or_none(self._search_regex(
|
|
||||||
r'留言:\s*<span[^>]*>\s*(\d+)\s*</span>', webpage, 'comment count', fatal=False)),
|
|
||||||
'view_count': int_or_none(self._search_regex(
|
|
||||||
r'热度:\s*<span[^>]*>\s*(\d+)\s*</span>', webpage, 'view count', fatal=False)),
|
|
||||||
'age_limit': 18,
|
|
||||||
}
|
|
||||||
|
|
||||||
def _get_formats_and_subtitle(self, video_link_url, video_id):
|
|
||||||
ext = determine_ext(video_link_url)
|
|
||||||
if ext == 'm3u8':
|
|
||||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_link_url, video_id, ext='mp4')
|
|
||||||
else:
|
|
||||||
formats = [{'url': video_link_url, 'ext': ext}]
|
|
||||||
subtitles = {}
|
|
||||||
|
|
||||||
return formats, subtitles
|
|
|
@ -97,7 +97,7 @@ class PornHubBaseIE(InfoExtractor):
|
||||||
login_form = self._hidden_inputs(login_page)
|
login_form = self._hidden_inputs(login_page)
|
||||||
|
|
||||||
login_form.update({
|
login_form.update({
|
||||||
'username': username,
|
'email': username,
|
||||||
'password': password,
|
'password': password,
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
|
import datetime as dt
|
||||||
import json
|
import json
|
||||||
from urllib.parse import unquote
|
import urllib.parse
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import functools
|
from ..compat import functools
|
||||||
|
@ -114,7 +115,7 @@ class Pr0grammIE(InfoExtractor):
|
||||||
cookies = self._get_cookies(self.BASE_URL)
|
cookies = self._get_cookies(self.BASE_URL)
|
||||||
if 'me' not in cookies:
|
if 'me' not in cookies:
|
||||||
self._download_webpage(self.BASE_URL, None, 'Refreshing verification information')
|
self._download_webpage(self.BASE_URL, None, 'Refreshing verification information')
|
||||||
if traverse_obj(cookies, ('me', {lambda x: x.value}, {unquote}, {json.loads}, 'verified')):
|
if traverse_obj(cookies, ('me', {lambda x: x.value}, {urllib.parse.unquote}, {json.loads}, 'verified')):
|
||||||
flags |= 0b00110
|
flags |= 0b00110
|
||||||
|
|
||||||
return flags
|
return flags
|
||||||
|
@ -196,6 +197,7 @@ class Pr0grammIE(InfoExtractor):
|
||||||
'like_count': ('up', {int}),
|
'like_count': ('up', {int}),
|
||||||
'dislike_count': ('down', {int}),
|
'dislike_count': ('down', {int}),
|
||||||
'timestamp': ('created', {int}),
|
'timestamp': ('created', {int}),
|
||||||
|
'upload_date': ('created', {int}, {dt.date.fromtimestamp}, {lambda x: x.strftime('%Y%m%d')}),
|
||||||
'thumbnail': ('thumb', {lambda x: urljoin('https://thumb.pr0gramm.com', x)})
|
'thumbnail': ('thumb', {lambda x: urljoin('https://thumb.pr0gramm.com', x)})
|
||||||
}),
|
}),
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
|
import hashlib
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from hashlib import sha1
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
@ -42,7 +42,7 @@ class ProSiebenSat1BaseIE(InfoExtractor):
|
||||||
'Downloading protocols JSON',
|
'Downloading protocols JSON',
|
||||||
headers=self.geo_verification_headers(), query={
|
headers=self.geo_verification_headers(), query={
|
||||||
'access_id': self._ACCESS_ID,
|
'access_id': self._ACCESS_ID,
|
||||||
'client_token': sha1((raw_ct).encode()).hexdigest(),
|
'client_token': hashlib.sha1((raw_ct).encode()).hexdigest(),
|
||||||
'video_id': clip_id,
|
'video_id': clip_id,
|
||||||
}, fatal=False, expected_status=(403,)) or {}
|
}, fatal=False, expected_status=(403,)) or {}
|
||||||
error = protocols.get('error') or {}
|
error = protocols.get('error') or {}
|
||||||
|
@ -53,7 +53,7 @@ class ProSiebenSat1BaseIE(InfoExtractor):
|
||||||
urls = (self._download_json(
|
urls = (self._download_json(
|
||||||
self._V4_BASE_URL + 'urls', clip_id, 'Downloading urls JSON', query={
|
self._V4_BASE_URL + 'urls', clip_id, 'Downloading urls JSON', query={
|
||||||
'access_id': self._ACCESS_ID,
|
'access_id': self._ACCESS_ID,
|
||||||
'client_token': sha1((raw_ct + server_token + self._SUPPORTED_PROTOCOLS).encode()).hexdigest(),
|
'client_token': hashlib.sha1((raw_ct + server_token + self._SUPPORTED_PROTOCOLS).encode()).hexdigest(),
|
||||||
'protocols': self._SUPPORTED_PROTOCOLS,
|
'protocols': self._SUPPORTED_PROTOCOLS,
|
||||||
'server_token': server_token,
|
'server_token': server_token,
|
||||||
'video_id': clip_id,
|
'video_id': clip_id,
|
||||||
|
@ -77,7 +77,7 @@ class ProSiebenSat1BaseIE(InfoExtractor):
|
||||||
if not formats:
|
if not formats:
|
||||||
source_ids = [compat_str(source['id']) for source in video['sources']]
|
source_ids = [compat_str(source['id']) for source in video['sources']]
|
||||||
|
|
||||||
client_id = self._SALT[:2] + sha1(''.join([clip_id, self._SALT, self._TOKEN, client_location, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest()
|
client_id = self._SALT[:2] + hashlib.sha1(''.join([clip_id, self._SALT, self._TOKEN, client_location, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest()
|
||||||
|
|
||||||
sources = self._download_json(
|
sources = self._download_json(
|
||||||
'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources' % clip_id,
|
'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources' % clip_id,
|
||||||
|
@ -96,7 +96,7 @@ class ProSiebenSat1BaseIE(InfoExtractor):
|
||||||
return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate
|
return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate
|
||||||
|
|
||||||
for source_id in source_ids:
|
for source_id in source_ids:
|
||||||
client_id = self._SALT[:2] + sha1(''.join([self._SALT, clip_id, self._TOKEN, server_id, client_location, source_id, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest()
|
client_id = self._SALT[:2] + hashlib.sha1(''.join([self._SALT, clip_id, self._TOKEN, server_id, client_location, source_id, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest()
|
||||||
urls = self._download_json(
|
urls = self._download_json(
|
||||||
'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url' % clip_id,
|
'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url' % clip_id,
|
||||||
clip_id, 'Downloading urls JSON', fatal=False, query={
|
clip_id, 'Downloading urls JSON', fatal=False, query={
|
||||||
|
|
|
@ -1,18 +1,14 @@
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
clean_html,
|
|
||||||
traverse_obj,
|
|
||||||
unescapeHTML,
|
|
||||||
)
|
|
||||||
|
|
||||||
import itertools
|
import itertools
|
||||||
from urllib.parse import urlencode
|
import urllib.parse
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import clean_html, traverse_obj, unescapeHTML
|
||||||
|
|
||||||
|
|
||||||
class RadioKapitalBaseIE(InfoExtractor):
|
class RadioKapitalBaseIE(InfoExtractor):
|
||||||
def _call_api(self, resource, video_id, note='Downloading JSON metadata', qs={}):
|
def _call_api(self, resource, video_id, note='Downloading JSON metadata', qs={}):
|
||||||
return self._download_json(
|
return self._download_json(
|
||||||
f'https://www.radiokapital.pl/wp-json/kapital/v1/{resource}?{urlencode(qs)}',
|
f'https://www.radiokapital.pl/wp-json/kapital/v1/{resource}?{urllib.parse.urlencode(qs)}',
|
||||||
video_id, note=note)
|
video_id, note=note)
|
||||||
|
|
||||||
def _parse_episode(self, data):
|
def _parse_episode(self, data):
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
|
import datetime as dt
|
||||||
import itertools
|
import itertools
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
from .common import InfoExtractor, SearchInfoExtractor
|
from .common import InfoExtractor, SearchInfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
@ -156,7 +156,7 @@ class RokfinIE(InfoExtractor):
|
||||||
self.raise_login_required('This video is only available to premium users', True, method='cookies')
|
self.raise_login_required('This video is only available to premium users', True, method='cookies')
|
||||||
elif scheduled:
|
elif scheduled:
|
||||||
self.raise_no_formats(
|
self.raise_no_formats(
|
||||||
f'Stream is offline; scheduled for {datetime.fromtimestamp(scheduled).strftime("%Y-%m-%d %H:%M:%S")}',
|
f'Stream is offline; scheduled for {dt.datetime.fromtimestamp(scheduled).strftime("%Y-%m-%d %H:%M:%S")}',
|
||||||
video_id=video_id, expected=True)
|
video_id=video_id, expected=True)
|
||||||
|
|
||||||
uploader = traverse_obj(metadata, ('createdBy', 'username'), ('creator', 'username'))
|
uploader = traverse_obj(metadata, ('createdBy', 'username'), ('creator', 'username'))
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
import datetime
|
import datetime as dt
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .redge import RedCDNLivxIE
|
from .redge import RedCDNLivxIE
|
||||||
|
@ -13,16 +13,16 @@ from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
def is_dst(date):
|
def is_dst(date):
|
||||||
last_march = datetime.datetime(date.year, 3, 31)
|
last_march = dt.datetime(date.year, 3, 31)
|
||||||
last_october = datetime.datetime(date.year, 10, 31)
|
last_october = dt.datetime(date.year, 10, 31)
|
||||||
last_sunday_march = last_march - datetime.timedelta(days=last_march.isoweekday() % 7)
|
last_sunday_march = last_march - dt.timedelta(days=last_march.isoweekday() % 7)
|
||||||
last_sunday_october = last_october - datetime.timedelta(days=last_october.isoweekday() % 7)
|
last_sunday_october = last_october - dt.timedelta(days=last_october.isoweekday() % 7)
|
||||||
return last_sunday_march.replace(hour=2) <= date <= last_sunday_october.replace(hour=3)
|
return last_sunday_march.replace(hour=2) <= date <= last_sunday_october.replace(hour=3)
|
||||||
|
|
||||||
|
|
||||||
def rfc3339_to_atende(date):
|
def rfc3339_to_atende(date):
|
||||||
date = datetime.datetime.fromisoformat(date)
|
date = dt.datetime.fromisoformat(date)
|
||||||
date = date + datetime.timedelta(hours=1 if is_dst(date) else 0)
|
date = date + dt.timedelta(hours=1 if is_dst(date) else 0)
|
||||||
return int((date.timestamp() - 978307200) * 1000)
|
return int((date.timestamp() - 978307200) * 1000)
|
||||||
|
|
||||||
|
|
||||||
|
|
112
yt_dlp/extractor/sharepoint.py
Normal file
112
yt_dlp/extractor/sharepoint.py
Normal file
|
@ -0,0 +1,112 @@
|
||||||
|
import json
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import determine_ext, int_or_none, url_or_none
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
|
class SharePointIE(InfoExtractor):
|
||||||
|
_BASE_URL_RE = r'https?://[\w-]+\.sharepoint\.com/'
|
||||||
|
_VALID_URL = [
|
||||||
|
rf'{_BASE_URL_RE}:v:/[a-z]/(?:[^/?#]+/)*(?P<id>[^/?#]{{46}})/?(?:$|[?#])',
|
||||||
|
rf'{_BASE_URL_RE}(?!:v:)(?:[^/?#]+/)*stream\.aspx\?(?:[^#]+&)?id=(?P<id>[^&#]+)',
|
||||||
|
]
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://lut-my.sharepoint.com/:v:/g/personal/juha_eerola_student_lab_fi/EUrAmrktb4ZMhUcY9J2PqMEBD_9x_l0DyYWVgAvp-TTOMw?e=ZpQOOw',
|
||||||
|
'md5': '2950821d0d4937a0a76373782093b435',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '01EQRS7EKKYCNLSLLPQZGIKRYY6SOY7KGB',
|
||||||
|
'display_id': 'EUrAmrktb4ZMhUcY9J2PqMEBD_9x_l0DyYWVgAvp-TTOMw',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'CmvpJST',
|
||||||
|
'duration': 54.567,
|
||||||
|
'thumbnail': r're:https://.+/thumbnail',
|
||||||
|
'uploader_id': '8dcec565-a956-4b91-95e5-bacfb8bc015f',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://greaternyace.sharepoint.com/:v:/s/acementornydrive/ETski5eAfNVEoPRZUAyy1wEBpLgVFYWso5bjbZjfBLlPUg?e=PQUfVb',
|
||||||
|
'md5': 'c496a01644223273bff12e93e501afd1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '01QI4AVTZ3ESFZPAD42VCKB5CZKAGLFVYB',
|
||||||
|
'display_id': 'ETski5eAfNVEoPRZUAyy1wEBpLgVFYWso5bjbZjfBLlPUg',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '930103681233985536',
|
||||||
|
'duration': 3797.326,
|
||||||
|
'thumbnail': r're:https://.+/thumbnail',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://lut-my.sharepoint.com/personal/juha_eerola_student_lab_fi/_layouts/15/stream.aspx?id=%2Fpersonal%2Fjuha_eerola_student_lab_fi%2FDocuments%2FM-DL%2FCmvpJST.mp4&ga=1&referrer=StreamWebApp.Web&referrerScenario=AddressBarCopied.view',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '01EQRS7EKKYCNLSLLPQZGIKRYY6SOY7KGB',
|
||||||
|
'display_id': '/personal/juha_eerola_student_lab_fi/Documents/M-DL/CmvpJST.mp4',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'CmvpJST',
|
||||||
|
'duration': 54.567,
|
||||||
|
'thumbnail': r're:https://.+/thumbnail',
|
||||||
|
'uploader_id': '8dcec565-a956-4b91-95e5-bacfb8bc015f',
|
||||||
|
},
|
||||||
|
'skip': 'Session cookies needed',
|
||||||
|
}, {
|
||||||
|
'url': 'https://izoobasisschool.sharepoint.com/:v:/g/Eaqleq8COVBIvIPvod0U27oBypC6aWOkk8ptuDpmJ6arHw',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://uskudaredutr-my.sharepoint.com/:v:/g/personal/songul_turkaydin_uskudar_edu_tr/EbTf-VRUIbtGuIN73tx1MuwBCHBOmNcWNqSLw61Fd2_o0g?e=n5Vkof',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://epam-my.sharepoint.com/:v:/p/dzmitry_tamashevich/Ec4ZOs-rATZHjFYZWVxjczEB649FCoYFKDV_x3RxZiWAGA?e=4hswgA',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://microsoft.sharepoint.com/:v:/t/MicrosoftSPARKRecordings-MSFTInternal/EWCyeqByVWBAt8wDvNZdV-UB0BvU5YVbKm0UHgdrUlI6dg?e=QbPck6',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = urllib.parse.unquote(self._match_id(url))
|
||||||
|
webpage, urlh = self._download_webpage_handle(url, display_id)
|
||||||
|
if urllib.parse.urlparse(urlh.url).hostname == 'login.microsoftonline.com':
|
||||||
|
self.raise_login_required(
|
||||||
|
'Session cookies are required for this URL and can be passed '
|
||||||
|
'with the --cookies option. The --cookies-from-browser option will not work', method=None)
|
||||||
|
|
||||||
|
video_data = self._search_json(r'g_fileInfo\s*=', webpage, 'player config', display_id)
|
||||||
|
video_id = video_data['VroomItemId']
|
||||||
|
|
||||||
|
parsed_url = urllib.parse.urlparse(video_data['.transformUrl'])
|
||||||
|
base_media_url = urllib.parse.urlunparse(parsed_url._replace(
|
||||||
|
path=urllib.parse.urljoin(f'{parsed_url.path}/', '../videomanifest'),
|
||||||
|
query=urllib.parse.urlencode({
|
||||||
|
**urllib.parse.parse_qs(parsed_url.query),
|
||||||
|
'cTag': video_data['.ctag'],
|
||||||
|
'action': 'Access',
|
||||||
|
'part': 'index',
|
||||||
|
}, doseq=True)))
|
||||||
|
|
||||||
|
# Web player adds more params to the format URLs but we still get all formats without them
|
||||||
|
formats = self._extract_mpd_formats(
|
||||||
|
base_media_url, video_id, mpd_id='dash', query={'format': 'dash'}, fatal=False)
|
||||||
|
for hls_type in ('hls', 'hls-vnext'):
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
base_media_url, video_id, 'mp4', m3u8_id=hls_type,
|
||||||
|
query={'format': hls_type}, fatal=False, quality=-2))
|
||||||
|
|
||||||
|
if video_url := traverse_obj(video_data, ('downloadUrl', {url_or_none})):
|
||||||
|
formats.append({
|
||||||
|
'url': video_url,
|
||||||
|
'ext': determine_ext(video_data.get('extension') or video_data.get('name')),
|
||||||
|
'quality': 1,
|
||||||
|
'format_id': 'source',
|
||||||
|
'filesize': int_or_none(video_data.get('size')),
|
||||||
|
'vcodec': 'none' if video_data.get('isAudio') is True else None,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'title': video_data.get('title') or video_data.get('displayName'),
|
||||||
|
'display_id': display_id,
|
||||||
|
'uploader_id': video_data.get('authorId'),
|
||||||
|
'duration': traverse_obj(video_data, (
|
||||||
|
'MediaServiceFastMetadata', {json.loads}, 'media', 'duration', {lambda x: x / 10000000})),
|
||||||
|
'thumbnail': url_or_none(video_data.get('thumbnailUrl')),
|
||||||
|
}
|
|
@ -1,4 +1,4 @@
|
||||||
import datetime
|
import datetime as dt
|
||||||
import itertools
|
import itertools
|
||||||
import json
|
import json
|
||||||
import math
|
import math
|
||||||
|
@ -94,7 +94,7 @@ class SonyLIVIE(InfoExtractor):
|
||||||
'mobileNumber': username,
|
'mobileNumber': username,
|
||||||
'channelPartnerID': 'MSMIND',
|
'channelPartnerID': 'MSMIND',
|
||||||
'country': 'IN',
|
'country': 'IN',
|
||||||
'timestamp': datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%MZ'),
|
'timestamp': dt.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%MZ'),
|
||||||
'otpSize': 6,
|
'otpSize': 6,
|
||||||
'loginType': 'REGISTERORSIGNIN',
|
'loginType': 'REGISTERORSIGNIN',
|
||||||
'isMobileMandatory': True,
|
'isMobileMandatory': True,
|
||||||
|
@ -111,7 +111,7 @@ class SonyLIVIE(InfoExtractor):
|
||||||
'otp': self._get_tfa_info('OTP'),
|
'otp': self._get_tfa_info('OTP'),
|
||||||
'dmaId': 'IN',
|
'dmaId': 'IN',
|
||||||
'ageConfirmation': True,
|
'ageConfirmation': True,
|
||||||
'timestamp': datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%MZ'),
|
'timestamp': dt.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%MZ'),
|
||||||
'isMobileMandatory': True,
|
'isMobileMandatory': True,
|
||||||
}).encode())
|
}).encode())
|
||||||
if otp_verify_json['resultCode'] == 'KO':
|
if otp_verify_json['resultCode'] == 'KO':
|
||||||
|
|
|
@ -1,30 +1,27 @@
|
||||||
import itertools
|
import itertools
|
||||||
import re
|
|
||||||
import json
|
import json
|
||||||
# import random
|
import re
|
||||||
|
|
||||||
from .common import (
|
from .common import InfoExtractor, SearchInfoExtractor
|
||||||
InfoExtractor,
|
|
||||||
SearchInfoExtractor
|
|
||||||
)
|
|
||||||
from ..compat import compat_str
|
from ..compat import compat_str
|
||||||
from ..networking import HEADRequest, Request
|
from ..networking import HEADRequest
|
||||||
from ..networking.exceptions import HTTPError
|
from ..networking.exceptions import HTTPError
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
error_to_compat_str,
|
KNOWN_EXTENSIONS,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
error_to_compat_str,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
KNOWN_EXTENSIONS,
|
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
parse_qs,
|
parse_qs,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
try_get,
|
try_call,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urlhandle_detect_ext,
|
urlhandle_detect_ext,
|
||||||
)
|
)
|
||||||
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class SoundcloudEmbedIE(InfoExtractor):
|
class SoundcloudEmbedIE(InfoExtractor):
|
||||||
|
@ -54,7 +51,6 @@ class SoundcloudBaseIE(InfoExtractor):
|
||||||
_API_AUTH_QUERY_TEMPLATE = '?client_id=%s'
|
_API_AUTH_QUERY_TEMPLATE = '?client_id=%s'
|
||||||
_API_AUTH_URL_PW = 'https://api-auth.soundcloud.com/web-auth/sign-in/password%s'
|
_API_AUTH_URL_PW = 'https://api-auth.soundcloud.com/web-auth/sign-in/password%s'
|
||||||
_API_VERIFY_AUTH_TOKEN = 'https://api-auth.soundcloud.com/connect/session%s'
|
_API_VERIFY_AUTH_TOKEN = 'https://api-auth.soundcloud.com/connect/session%s'
|
||||||
_access_token = None
|
|
||||||
_HEADERS = {}
|
_HEADERS = {}
|
||||||
|
|
||||||
_IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg'
|
_IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg'
|
||||||
|
@ -112,21 +108,31 @@ class SoundcloudBaseIE(InfoExtractor):
|
||||||
def _initialize_pre_login(self):
|
def _initialize_pre_login(self):
|
||||||
self._CLIENT_ID = self.cache.load('soundcloud', 'client_id') or 'a3e059563d7fd3372b49b37f00a00bcf'
|
self._CLIENT_ID = self.cache.load('soundcloud', 'client_id') or 'a3e059563d7fd3372b49b37f00a00bcf'
|
||||||
|
|
||||||
def _perform_login(self, username, password):
|
def _verify_oauth_token(self, token):
|
||||||
if username != 'oauth':
|
if self._request_webpage(
|
||||||
self.report_warning(
|
self._API_VERIFY_AUTH_TOKEN % (self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID),
|
||||||
'Login using username and password is not currently supported. '
|
None, note='Verifying login token...', fatal=False,
|
||||||
'Use "--username oauth --password <oauth_token>" to login using an oauth token')
|
data=json.dumps({'session': {'access_token': token}}).encode()):
|
||||||
self._access_token = password
|
self._HEADERS['Authorization'] = f'OAuth {token}'
|
||||||
query = self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID
|
|
||||||
payload = {'session': {'access_token': self._access_token}}
|
|
||||||
token_verification = Request(self._API_VERIFY_AUTH_TOKEN % query, json.dumps(payload).encode('utf-8'))
|
|
||||||
response = self._download_json(token_verification, None, note='Verifying login token...', fatal=False)
|
|
||||||
if response is not False:
|
|
||||||
self._HEADERS = {'Authorization': 'OAuth ' + self._access_token}
|
|
||||||
self.report_login()
|
self.report_login()
|
||||||
else:
|
else:
|
||||||
self.report_warning('Provided authorization token seems to be invalid. Continue as guest')
|
self.report_warning('Provided authorization token is invalid. Continuing as guest')
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
if self._HEADERS:
|
||||||
|
return
|
||||||
|
if token := try_call(lambda: self._get_cookies(self._BASE_URL)['oauth_token'].value):
|
||||||
|
self._verify_oauth_token(token)
|
||||||
|
|
||||||
|
def _perform_login(self, username, password):
|
||||||
|
if username != 'oauth':
|
||||||
|
raise ExtractorError(
|
||||||
|
'Login using username and password is not currently supported. '
|
||||||
|
'Use "--username oauth --password <oauth_token>" to login using an oauth token, '
|
||||||
|
f'or else {self._login_hint(method="cookies")}', expected=True)
|
||||||
|
if self._HEADERS:
|
||||||
|
return
|
||||||
|
self._verify_oauth_token(password)
|
||||||
|
|
||||||
r'''
|
r'''
|
||||||
def genDevId():
|
def genDevId():
|
||||||
|
@ -147,14 +153,17 @@ class SoundcloudBaseIE(InfoExtractor):
|
||||||
'user_agent': self._USER_AGENT
|
'user_agent': self._USER_AGENT
|
||||||
}
|
}
|
||||||
|
|
||||||
query = self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID
|
response = self._download_json(
|
||||||
login = sanitized_Request(self._API_AUTH_URL_PW % query, json.dumps(payload).encode('utf-8'))
|
self._API_AUTH_URL_PW % (self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID),
|
||||||
response = self._download_json(login, None)
|
None, note='Verifying login token...', fatal=False,
|
||||||
self._access_token = response.get('session').get('access_token')
|
data=json.dumps(payload).encode())
|
||||||
if not self._access_token:
|
|
||||||
self.report_warning('Unable to get access token, login may has failed')
|
if token := traverse_obj(response, ('session', 'access_token', {str})):
|
||||||
else:
|
self._HEADERS['Authorization'] = f'OAuth {token}'
|
||||||
self._HEADERS = {'Authorization': 'OAuth ' + self._access_token}
|
self.report_login()
|
||||||
|
return
|
||||||
|
|
||||||
|
raise ExtractorError('Unable to get access token, login may have failed', expected=True)
|
||||||
'''
|
'''
|
||||||
|
|
||||||
# signature generation
|
# signature generation
|
||||||
|
@ -217,6 +226,7 @@ class SoundcloudBaseIE(InfoExtractor):
|
||||||
'filesize': int_or_none(urlh.headers.get('Content-Length')),
|
'filesize': int_or_none(urlh.headers.get('Content-Length')),
|
||||||
'url': format_url,
|
'url': format_url,
|
||||||
'quality': 10,
|
'quality': 10,
|
||||||
|
'format_note': 'Original',
|
||||||
})
|
})
|
||||||
|
|
||||||
def invalid_url(url):
|
def invalid_url(url):
|
||||||
|
@ -233,9 +243,13 @@ class SoundcloudBaseIE(InfoExtractor):
|
||||||
format_id_list.append(protocol)
|
format_id_list.append(protocol)
|
||||||
ext = f.get('ext')
|
ext = f.get('ext')
|
||||||
if ext == 'aac':
|
if ext == 'aac':
|
||||||
f['abr'] = '256'
|
f.update({
|
||||||
|
'abr': 256,
|
||||||
|
'quality': 5,
|
||||||
|
'format_note': 'Premium',
|
||||||
|
})
|
||||||
for k in ('ext', 'abr'):
|
for k in ('ext', 'abr'):
|
||||||
v = f.get(k)
|
v = str_or_none(f.get(k))
|
||||||
if v:
|
if v:
|
||||||
format_id_list.append(v)
|
format_id_list.append(v)
|
||||||
preview = is_preview or re.search(r'/(?:preview|playlist)/0/30/', f['url'])
|
preview = is_preview or re.search(r'/(?:preview|playlist)/0/30/', f['url'])
|
||||||
|
@ -256,16 +270,25 @@ class SoundcloudBaseIE(InfoExtractor):
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
|
|
||||||
# New API
|
# New API
|
||||||
transcodings = try_get(
|
for t in traverse_obj(info, ('media', 'transcodings', lambda _, v: url_or_none(v['url']))):
|
||||||
info, lambda x: x['media']['transcodings'], list) or []
|
if extract_flat:
|
||||||
for t in transcodings:
|
break
|
||||||
if not isinstance(t, dict):
|
format_url = t['url']
|
||||||
continue
|
stream = None
|
||||||
format_url = url_or_none(t.get('url'))
|
|
||||||
if not format_url:
|
for retry in self.RetryManager(fatal=False):
|
||||||
continue
|
try:
|
||||||
stream = None if extract_flat else self._download_json(
|
stream = self._download_json(format_url, track_id, query=query, headers=self._HEADERS)
|
||||||
format_url, track_id, query=query, fatal=False, headers=self._HEADERS)
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, HTTPError) and e.cause.status == 429:
|
||||||
|
self.report_warning(
|
||||||
|
'You have reached the API rate limit, which is ~600 requests per '
|
||||||
|
'10 minutes. Use the --extractor-retries and --retry-sleep options '
|
||||||
|
'to configure an appropriate retry count and wait time', only_once=True)
|
||||||
|
retry.error = e.cause
|
||||||
|
else:
|
||||||
|
self.report_warning(e.msg)
|
||||||
|
|
||||||
if not isinstance(stream, dict):
|
if not isinstance(stream, dict):
|
||||||
continue
|
continue
|
||||||
stream_url = url_or_none(stream.get('url'))
|
stream_url = url_or_none(stream.get('url'))
|
||||||
|
@ -338,7 +361,7 @@ class SoundcloudBaseIE(InfoExtractor):
|
||||||
'like_count': extract_count('favoritings') or extract_count('likes'),
|
'like_count': extract_count('favoritings') or extract_count('likes'),
|
||||||
'comment_count': extract_count('comment'),
|
'comment_count': extract_count('comment'),
|
||||||
'repost_count': extract_count('reposts'),
|
'repost_count': extract_count('reposts'),
|
||||||
'genre': info.get('genre'),
|
'genres': traverse_obj(info, ('genre', {str}, {lambda x: x or None}, all)),
|
||||||
'formats': formats if not extract_flat else None
|
'formats': formats if not extract_flat else None
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -372,10 +395,10 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
|
'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
|
||||||
'md5': 'ebef0a451b909710ed1d7787dddbf0d7',
|
'md5': 'de9bac153e7427a7333b4b0c1b6a18d2',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '62986583',
|
'id': '62986583',
|
||||||
'ext': 'mp3',
|
'ext': 'opus',
|
||||||
'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
|
'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
|
||||||
'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d',
|
'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d',
|
||||||
'uploader': 'E.T. ExTerrestrial Music',
|
'uploader': 'E.T. ExTerrestrial Music',
|
||||||
|
@ -388,6 +411,9 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
|
'thumbnail': 'https://i1.sndcdn.com/artworks-000031955188-rwb18x-original.jpg',
|
||||||
|
'uploader_url': 'https://soundcloud.com/ethmusic',
|
||||||
|
'genres': [],
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
# geo-restricted
|
# geo-restricted
|
||||||
|
@ -395,7 +421,7 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||||
'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
|
'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '47127627',
|
'id': '47127627',
|
||||||
'ext': 'mp3',
|
'ext': 'opus',
|
||||||
'title': 'Goldrushed',
|
'title': 'Goldrushed',
|
||||||
'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
|
'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
|
||||||
'uploader': 'The Royal Concept',
|
'uploader': 'The Royal Concept',
|
||||||
|
@ -408,6 +434,9 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
|
'uploader_url': 'https://soundcloud.com/the-concept-band',
|
||||||
|
'thumbnail': 'https://i1.sndcdn.com/artworks-v8bFHhXm7Au6-0-original.jpg',
|
||||||
|
'genres': ['Alternative'],
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# private link
|
# private link
|
||||||
|
@ -429,6 +458,9 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
|
'uploader_url': 'https://soundcloud.com/jaimemf',
|
||||||
|
'thumbnail': 'https://a1.sndcdn.com/images/default_avatar_large.png',
|
||||||
|
'genres': ['youtubedl'],
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# private link (alt format)
|
# private link (alt format)
|
||||||
|
@ -450,6 +482,9 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
|
'uploader_url': 'https://soundcloud.com/jaimemf',
|
||||||
|
'thumbnail': 'https://a1.sndcdn.com/images/default_avatar_large.png',
|
||||||
|
'genres': ['youtubedl'],
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# downloadable song
|
# downloadable song
|
||||||
|
@ -459,6 +494,21 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '343609555',
|
'id': '343609555',
|
||||||
'ext': 'wav',
|
'ext': 'wav',
|
||||||
|
'title': 'The Following',
|
||||||
|
'description': '',
|
||||||
|
'uploader': '80M',
|
||||||
|
'uploader_id': '312384765',
|
||||||
|
'uploader_url': 'https://soundcloud.com/the80m',
|
||||||
|
'upload_date': '20170922',
|
||||||
|
'timestamp': 1506120436,
|
||||||
|
'duration': 397.228,
|
||||||
|
'thumbnail': 'https://i1.sndcdn.com/artworks-000243916348-ktoo7d-original.jpg',
|
||||||
|
'license': 'all-rights-reserved',
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'repost_count': int,
|
||||||
|
'view_count': int,
|
||||||
|
'genres': ['Dance & EDM'],
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# private link, downloadable format
|
# private link, downloadable format
|
||||||
|
@ -480,6 +530,9 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
|
'thumbnail': 'https://i1.sndcdn.com/artworks-000240712245-kedn4p-original.jpg',
|
||||||
|
'uploader_url': 'https://soundcloud.com/oriuplift',
|
||||||
|
'genres': ['Trance'],
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# no album art, use avatar pic for thumbnail
|
# no album art, use avatar pic for thumbnail
|
||||||
|
@ -502,6 +555,8 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
|
'uploader_url': 'https://soundcloud.com/garyvee',
|
||||||
|
'genres': [],
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
|
@ -509,13 +564,13 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'https://soundcloud.com/giovannisarani/mezzo-valzer',
|
'url': 'https://soundcloud.com/giovannisarani/mezzo-valzer',
|
||||||
'md5': 'e22aecd2bc88e0e4e432d7dcc0a1abf7',
|
'md5': '8227c3473a4264df6b02ad7e5b7527ac',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '583011102',
|
'id': '583011102',
|
||||||
'ext': 'mp3',
|
'ext': 'opus',
|
||||||
'title': 'Mezzo Valzer',
|
'title': 'Mezzo Valzer',
|
||||||
'description': 'md5:4138d582f81866a530317bae316e8b61',
|
'description': 'md5:f4d5f39d52e0ccc2b4f665326428901a',
|
||||||
'uploader': 'Micronie',
|
'uploader': 'Giovanni Sarani',
|
||||||
'uploader_id': '3352531',
|
'uploader_id': '3352531',
|
||||||
'timestamp': 1551394171,
|
'timestamp': 1551394171,
|
||||||
'upload_date': '20190228',
|
'upload_date': '20190228',
|
||||||
|
@ -526,6 +581,8 @@ class SoundcloudIE(SoundcloudBaseIE):
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
|
'genres': ['Piano'],
|
||||||
|
'uploader_url': 'https://soundcloud.com/giovannisarani',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
@ -174,7 +174,7 @@ class TheaterComplexTownBaseIE(StacommuBaseIE):
|
||||||
|
|
||||||
|
|
||||||
class TheaterComplexTownVODIE(TheaterComplexTownBaseIE):
|
class TheaterComplexTownVODIE(TheaterComplexTownBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:en/)?videos/episodes/(?P<id>\w+)'
|
_VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:(?:en|ja)/)?videos/episodes/(?P<id>\w+)'
|
||||||
IE_NAME = 'theatercomplextown:vod'
|
IE_NAME = 'theatercomplextown:vod'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.theater-complex.town/videos/episodes/hoxqidYNoAn7bP92DN6p78',
|
'url': 'https://www.theater-complex.town/videos/episodes/hoxqidYNoAn7bP92DN6p78',
|
||||||
|
@ -195,6 +195,9 @@ class TheaterComplexTownVODIE(TheaterComplexTownBaseIE):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.theater-complex.town/en/videos/episodes/6QT7XYwM9dJz5Gf9VB6K5y',
|
'url': 'https://www.theater-complex.town/en/videos/episodes/6QT7XYwM9dJz5Gf9VB6K5y',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.theater-complex.town/ja/videos/episodes/hoxqidYNoAn7bP92DN6p78',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_API_PATH = 'videoEpisodes'
|
_API_PATH = 'videoEpisodes'
|
||||||
|
@ -204,7 +207,7 @@ class TheaterComplexTownVODIE(TheaterComplexTownBaseIE):
|
||||||
|
|
||||||
|
|
||||||
class TheaterComplexTownPPVIE(TheaterComplexTownBaseIE):
|
class TheaterComplexTownPPVIE(TheaterComplexTownBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:en/)?ppv/(?P<id>\w+)'
|
_VALID_URL = r'https?://(?:www\.)?theater-complex\.town/(?:(?:en|ja)/)?ppv/(?P<id>\w+)'
|
||||||
IE_NAME = 'theatercomplextown:ppv'
|
IE_NAME = 'theatercomplextown:ppv'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.theater-complex.town/ppv/wytW3X7khrjJBUpKuV3jen',
|
'url': 'https://www.theater-complex.town/ppv/wytW3X7khrjJBUpKuV3jen',
|
||||||
|
@ -223,6 +226,9 @@ class TheaterComplexTownPPVIE(TheaterComplexTownBaseIE):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.theater-complex.town/en/ppv/wytW3X7khrjJBUpKuV3jen',
|
'url': 'https://www.theater-complex.town/en/ppv/wytW3X7khrjJBUpKuV3jen',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.theater-complex.town/ja/ppv/qwUVmLmGEiZ3ZW6it9uGys',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_API_PATH = 'events'
|
_API_PATH = 'events'
|
||||||
|
|
|
@ -41,7 +41,7 @@ class STVPlayerIE(InfoExtractor):
|
||||||
ptype, video_id = self._match_valid_url(url).groups()
|
ptype, video_id = self._match_valid_url(url).groups()
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id, fatal=False) or ''
|
webpage = self._download_webpage(url, video_id, fatal=False) or ''
|
||||||
props = self._search_nextjs_data(webpage, video_id, default='{}').get('props') or {}
|
props = self._search_nextjs_data(webpage, video_id, default={}).get('props') or {}
|
||||||
player_api_cache = try_get(
|
player_api_cache = try_get(
|
||||||
props, lambda x: x['initialReduxState']['playerApiCache']) or {}
|
props, lambda x: x['initialReduxState']['playerApiCache']) or {}
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,7 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
import functools
|
||||||
import json
|
import json
|
||||||
from functools import partial
|
import textwrap
|
||||||
from textwrap import dedent
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import ExtractorError, format_field, int_or_none, parse_iso8601
|
from ..utils import ExtractorError, format_field, int_or_none, parse_iso8601
|
||||||
|
@ -10,7 +9,7 @@ from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
|
||||||
def _fmt_url(url):
|
def _fmt_url(url):
|
||||||
return partial(format_field, template=url, default=None)
|
return functools.partial(format_field, template=url, default=None)
|
||||||
|
|
||||||
|
|
||||||
class TelewebionIE(InfoExtractor):
|
class TelewebionIE(InfoExtractor):
|
||||||
|
@ -88,7 +87,7 @@ class TelewebionIE(InfoExtractor):
|
||||||
if not video_id.startswith('0x'):
|
if not video_id.startswith('0x'):
|
||||||
video_id = hex(int(video_id))
|
video_id = hex(int(video_id))
|
||||||
|
|
||||||
episode_data = self._call_graphql_api('getEpisodeDetail', video_id, dedent('''
|
episode_data = self._call_graphql_api('getEpisodeDetail', video_id, textwrap.dedent('''
|
||||||
queryEpisode(filter: {EpisodeID: $EpisodeId}, first: 1) {
|
queryEpisode(filter: {EpisodeID: $EpisodeId}, first: 1) {
|
||||||
title
|
title
|
||||||
program {
|
program {
|
||||||
|
@ -127,7 +126,7 @@ class TelewebionIE(InfoExtractor):
|
||||||
'formats': (
|
'formats': (
|
||||||
'channel', 'descriptor', {str},
|
'channel', 'descriptor', {str},
|
||||||
{_fmt_url(f'https://cdna.telewebion.com/%s/episode/{video_id}/playlist.m3u8')},
|
{_fmt_url(f'https://cdna.telewebion.com/%s/episode/{video_id}/playlist.m3u8')},
|
||||||
{partial(self._extract_m3u8_formats, video_id=video_id, ext='mp4', m3u8_id='hls')}),
|
{functools.partial(self._extract_m3u8_formats, video_id=video_id, ext='mp4', m3u8_id='hls')}),
|
||||||
}))
|
}))
|
||||||
info_dict['id'] = video_id
|
info_dict['id'] = video_id
|
||||||
return info_dict
|
return info_dict
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import base64
|
import base64
|
||||||
|
import datetime as dt
|
||||||
import functools
|
import functools
|
||||||
import itertools
|
import itertools
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..networking import HEADRequest
|
from ..networking import HEADRequest
|
||||||
|
@ -70,7 +70,7 @@ class TenPlayIE(InfoExtractor):
|
||||||
username, password = self._get_login_info()
|
username, password = self._get_login_info()
|
||||||
if username is None or password is None:
|
if username is None or password is None:
|
||||||
self.raise_login_required('Your 10play account\'s details must be provided with --username and --password.')
|
self.raise_login_required('Your 10play account\'s details must be provided with --username and --password.')
|
||||||
_timestamp = datetime.now().strftime('%Y%m%d000000')
|
_timestamp = dt.datetime.now().strftime('%Y%m%d000000')
|
||||||
_auth_header = base64.b64encode(_timestamp.encode('ascii')).decode('ascii')
|
_auth_header = base64.b64encode(_timestamp.encode('ascii')).decode('ascii')
|
||||||
data = self._download_json('https://10play.com.au/api/user/auth', video_id, 'Getting bearer token', headers={
|
data = self._download_json('https://10play.com.au/api/user/auth', video_id, 'Getting bearer token', headers={
|
||||||
'X-Network-Ten-Auth': _auth_header,
|
'X-Network-Ten-Auth': _auth_header,
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
from .brightcove import BrightcoveNewIE
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .zype import ZypeIE
|
from .zype import ZypeIE
|
||||||
from ..networking import HEADRequest
|
from ..networking import HEADRequest
|
||||||
|
@ -8,6 +9,7 @@ from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
filter_dict,
|
filter_dict,
|
||||||
parse_qs,
|
parse_qs,
|
||||||
|
smuggle_url,
|
||||||
try_call,
|
try_call,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
@ -17,23 +19,43 @@ class ThisOldHouseIE(InfoExtractor):
|
||||||
_NETRC_MACHINE = 'thisoldhouse'
|
_NETRC_MACHINE = 'thisoldhouse'
|
||||||
_VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode|(?:[^/?#]+/)?\d+)/(?P<id>[^/?#]+)'
|
_VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode|(?:[^/?#]+/)?\d+)/(?P<id>[^/?#]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
# Unresolved Brightcove URL embed (formerly Zype), free
|
||||||
'url': 'https://www.thisoldhouse.com/furniture/21017078/how-to-build-a-storage-bench',
|
'url': 'https://www.thisoldhouse.com/furniture/21017078/how-to-build-a-storage-bench',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '5dcdddf673c3f956ef5db202',
|
'id': '6325298523112',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'How to Build a Storage Bench',
|
'title': 'How to Build a Storage Bench',
|
||||||
'description': 'In the workshop, Tom Silva and Kevin O\'Connor build a storage bench for an entryway.',
|
'description': 'In the workshop, Tom Silva and Kevin O\'Connor build a storage bench for an entryway.',
|
||||||
'timestamp': 1442548800,
|
'timestamp': 1681793639,
|
||||||
'upload_date': '20150918',
|
'upload_date': '20230418',
|
||||||
'duration': 674,
|
'duration': 674.54,
|
||||||
'view_count': int,
|
'tags': 'count:11',
|
||||||
'average_rating': 0,
|
'uploader_id': '6314471934001',
|
||||||
'thumbnail': r're:^https?://.*\.jpg\?\d+$',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'display_id': 'how-to-build-a-storage-bench',
|
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# Brightcove embed, authwalled
|
||||||
|
'url': 'https://www.thisoldhouse.com/glen-ridge-generational/99537/s45-e17-multi-generational',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6349675446112',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'E17 | Glen Ridge Generational | Multi-Generational',
|
||||||
|
'description': 'md5:53c6bc2e8031f3033d693d9a3563222c',
|
||||||
|
'timestamp': 1711382202,
|
||||||
|
'upload_date': '20240325',
|
||||||
|
'duration': 1422.229,
|
||||||
|
'tags': 'count:13',
|
||||||
|
'uploader_id': '6314471934001',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
|
},
|
||||||
|
'expected_warnings': ['Login with password is not supported for this website'],
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'skip': 'Requires subscription',
|
||||||
}, {
|
}, {
|
||||||
# Page no longer has video
|
# Page no longer has video
|
||||||
'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins',
|
'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins',
|
||||||
|
@ -98,7 +120,15 @@ class ThisOldHouseIE(InfoExtractor):
|
||||||
|
|
||||||
video_url, video_id = self._search_regex(
|
video_url, video_id = self._search_regex(
|
||||||
r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?thisoldhouse\.(?:chorus\.build|com)/videos/zype/([0-9a-f]{24})[^\'"]*)[\'"]',
|
r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?thisoldhouse\.(?:chorus\.build|com)/videos/zype/([0-9a-f]{24})[^\'"]*)[\'"]',
|
||||||
webpage, 'video url', group=(1, 2))
|
webpage, 'zype url', group=(1, 2), default=(None, None))
|
||||||
|
if video_url:
|
||||||
video_url = self._request_webpage(HEADRequest(video_url), video_id, 'Resolving Zype URL').url
|
video_url = self._request_webpage(HEADRequest(video_url), video_id, 'Resolving Zype URL').url
|
||||||
|
|
||||||
return self.url_result(video_url, ZypeIE, video_id)
|
return self.url_result(video_url, ZypeIE, video_id)
|
||||||
|
|
||||||
|
video_url, video_id = self._search_regex([
|
||||||
|
r'<iframe[^>]+src=[\'"]((?:https?:)?//players\.brightcove\.net/\d+/\w+/index\.html\?videoId=(\d+))',
|
||||||
|
r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)thisoldhouse\.com/videos/brightcove/(\d+))'],
|
||||||
|
webpage, 'iframe url', group=(1, 2))
|
||||||
|
if not parse_qs(video_url).get('videoId'):
|
||||||
|
video_url = self._request_webpage(HEADRequest(video_url), video_id, 'Resolving Brightcove URL').url
|
||||||
|
return self.url_result(smuggle_url(video_url, {'referrer': url}), BrightcoveNewIE, video_id)
|
||||||
|
|
|
@ -4,6 +4,7 @@ import random
|
||||||
import re
|
import re
|
||||||
import string
|
import string
|
||||||
import time
|
import time
|
||||||
|
import uuid
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_urllib_parse_urlparse
|
from ..compat import compat_urllib_parse_urlparse
|
||||||
|
@ -30,18 +31,60 @@ from ..utils import (
|
||||||
|
|
||||||
|
|
||||||
class TikTokBaseIE(InfoExtractor):
|
class TikTokBaseIE(InfoExtractor):
|
||||||
_APP_VERSIONS = [('26.1.3', '260103'), ('26.1.2', '260102'), ('26.1.1', '260101'), ('25.6.2', '250602')]
|
|
||||||
_WORKING_APP_VERSION = None
|
|
||||||
_APP_NAME = 'trill'
|
|
||||||
_AID = 1180
|
|
||||||
_UPLOADER_URL_FORMAT = 'https://www.tiktok.com/@%s'
|
_UPLOADER_URL_FORMAT = 'https://www.tiktok.com/@%s'
|
||||||
_WEBPAGE_HOST = 'https://www.tiktok.com/'
|
_WEBPAGE_HOST = 'https://www.tiktok.com/'
|
||||||
QUALITIES = ('360p', '540p', '720p', '1080p')
|
QUALITIES = ('360p', '540p', '720p', '1080p')
|
||||||
|
|
||||||
|
_APP_INFO_DEFAULTS = {
|
||||||
|
# unique "install id"
|
||||||
|
'iid': None,
|
||||||
|
# TikTok (KR/PH/TW/TH/VN) = trill, TikTok (rest of world) = musical_ly, Douyin = aweme
|
||||||
|
'app_name': 'musical_ly',
|
||||||
|
'app_version': '34.1.2',
|
||||||
|
'manifest_app_version': '2023401020',
|
||||||
|
# "app id": aweme = 1128, trill = 1180, musical_ly = 1233, universal = 0
|
||||||
|
'aid': '0',
|
||||||
|
}
|
||||||
|
_APP_INFO_POOL = None
|
||||||
|
_APP_INFO = None
|
||||||
|
_APP_USER_AGENT = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _KNOWN_APP_INFO(self):
|
||||||
|
return self._configuration_arg('app_info', ie_key=TikTokIE)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def _API_HOSTNAME(self):
|
def _API_HOSTNAME(self):
|
||||||
return self._configuration_arg(
|
return self._configuration_arg(
|
||||||
'api_hostname', ['api22-normal-c-useast2a.tiktokv.com'], ie_key=TikTokIE)[0]
|
'api_hostname', ['api16-normal-c-useast1a.tiktokv.com'], ie_key=TikTokIE)[0]
|
||||||
|
|
||||||
|
def _get_next_app_info(self):
|
||||||
|
if self._APP_INFO_POOL is None:
|
||||||
|
defaults = {
|
||||||
|
key: self._configuration_arg(key, [default], ie_key=TikTokIE)[0]
|
||||||
|
for key, default in self._APP_INFO_DEFAULTS.items()
|
||||||
|
if key != 'iid'
|
||||||
|
}
|
||||||
|
self._APP_INFO_POOL = [
|
||||||
|
{**defaults, **dict(
|
||||||
|
(k, v) for k, v in zip(self._APP_INFO_DEFAULTS, app_info.split('/')) if v
|
||||||
|
)} for app_info in self._KNOWN_APP_INFO
|
||||||
|
]
|
||||||
|
|
||||||
|
if not self._APP_INFO_POOL:
|
||||||
|
return False
|
||||||
|
|
||||||
|
self._APP_INFO = self._APP_INFO_POOL.pop(0)
|
||||||
|
|
||||||
|
app_name = self._APP_INFO['app_name']
|
||||||
|
version = self._APP_INFO['manifest_app_version']
|
||||||
|
if app_name == 'musical_ly':
|
||||||
|
package = f'com.zhiliaoapp.musically/{version}'
|
||||||
|
else: # trill, aweme
|
||||||
|
package = f'com.ss.android.ugc.{app_name}/{version}'
|
||||||
|
self._APP_USER_AGENT = f'{package} (Linux; U; Android 13; en_US; Pixel 7; Build/TD1A.220804.031; Cronet/58.0.2991.0)'
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _create_url(user_id, video_id):
|
def _create_url(user_id, video_id):
|
||||||
|
@ -58,7 +101,7 @@ class TikTokBaseIE(InfoExtractor):
|
||||||
'universal data', display_id, end_pattern=r'</script>', default={}),
|
'universal data', display_id, end_pattern=r'</script>', default={}),
|
||||||
('__DEFAULT_SCOPE__', {dict})) or {}
|
('__DEFAULT_SCOPE__', {dict})) or {}
|
||||||
|
|
||||||
def _call_api_impl(self, ep, query, manifest_app_version, video_id, fatal=True,
|
def _call_api_impl(self, ep, query, video_id, fatal=True,
|
||||||
note='Downloading API JSON', errnote='Unable to download API page'):
|
note='Downloading API JSON', errnote='Unable to download API page'):
|
||||||
self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choices('0123456789abcdef', k=160)))
|
self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choices('0123456789abcdef', k=160)))
|
||||||
webpage_cookies = self._get_cookies(self._WEBPAGE_HOST)
|
webpage_cookies = self._get_cookies(self._WEBPAGE_HOST)
|
||||||
|
@ -67,80 +110,85 @@ class TikTokBaseIE(InfoExtractor):
|
||||||
return self._download_json(
|
return self._download_json(
|
||||||
'https://%s/aweme/v1/%s/' % (self._API_HOSTNAME, ep), video_id=video_id,
|
'https://%s/aweme/v1/%s/' % (self._API_HOSTNAME, ep), video_id=video_id,
|
||||||
fatal=fatal, note=note, errnote=errnote, headers={
|
fatal=fatal, note=note, errnote=errnote, headers={
|
||||||
'User-Agent': f'com.ss.android.ugc.{self._APP_NAME}/{manifest_app_version} (Linux; U; Android 13; en_US; Pixel 7; Build/TD1A.220804.031; Cronet/58.0.2991.0)',
|
'User-Agent': self._APP_USER_AGENT,
|
||||||
'Accept': 'application/json',
|
'Accept': 'application/json',
|
||||||
}, query=query)
|
}, query=query)
|
||||||
|
|
||||||
def _build_api_query(self, query, app_version, manifest_app_version):
|
def _build_api_query(self, query):
|
||||||
return {
|
return {
|
||||||
**query,
|
**query,
|
||||||
'version_name': app_version,
|
|
||||||
'version_code': manifest_app_version,
|
|
||||||
'build_number': app_version,
|
|
||||||
'manifest_version_code': manifest_app_version,
|
|
||||||
'update_version_code': manifest_app_version,
|
|
||||||
'openudid': ''.join(random.choices('0123456789abcdef', k=16)),
|
|
||||||
'uuid': ''.join(random.choices(string.digits, k=16)),
|
|
||||||
'_rticket': int(time.time() * 1000),
|
|
||||||
'ts': int(time.time()),
|
|
||||||
'device_brand': 'Google',
|
|
||||||
'device_type': 'Pixel 7',
|
|
||||||
'device_platform': 'android',
|
'device_platform': 'android',
|
||||||
|
'os': 'android',
|
||||||
|
'ssmix': 'a',
|
||||||
|
'_rticket': int(time.time() * 1000),
|
||||||
|
'cdid': str(uuid.uuid4()),
|
||||||
|
'channel': 'googleplay',
|
||||||
|
'aid': self._APP_INFO['aid'],
|
||||||
|
'app_name': self._APP_INFO['app_name'],
|
||||||
|
'version_code': ''.join((f'{int(v):02d}' for v in self._APP_INFO['app_version'].split('.'))),
|
||||||
|
'version_name': self._APP_INFO['app_version'],
|
||||||
|
'manifest_version_code': self._APP_INFO['manifest_app_version'],
|
||||||
|
'update_version_code': self._APP_INFO['manifest_app_version'],
|
||||||
|
'ab_version': self._APP_INFO['app_version'],
|
||||||
'resolution': '1080*2400',
|
'resolution': '1080*2400',
|
||||||
'dpi': 420,
|
'dpi': 420,
|
||||||
'os_version': '13',
|
'device_type': 'Pixel 7',
|
||||||
'os_api': '29',
|
'device_brand': 'Google',
|
||||||
'carrier_region': 'US',
|
|
||||||
'sys_region': 'US',
|
|
||||||
'region': 'US',
|
|
||||||
'app_name': self._APP_NAME,
|
|
||||||
'app_language': 'en',
|
|
||||||
'language': 'en',
|
'language': 'en',
|
||||||
'timezone_name': 'America/New_York',
|
'os_api': '29',
|
||||||
'timezone_offset': '-14400',
|
'os_version': '13',
|
||||||
'channel': 'googleplay',
|
|
||||||
'ac': 'wifi',
|
'ac': 'wifi',
|
||||||
'mcc_mnc': '310260',
|
'is_pad': '0',
|
||||||
'is_my_cn': 0,
|
'current_region': 'US',
|
||||||
'aid': self._AID,
|
'app_type': 'normal',
|
||||||
'ssmix': 'a',
|
'sys_region': 'US',
|
||||||
'as': 'a1qwert123',
|
'last_install_time': int(time.time()) - random.randint(86400, 1123200),
|
||||||
'cp': 'cbfhckdckkde1',
|
'timezone_name': 'America/New_York',
|
||||||
|
'residence': 'US',
|
||||||
|
'app_language': 'en',
|
||||||
|
'timezone_offset': '-14400',
|
||||||
|
'host_abi': 'armeabi-v7a',
|
||||||
|
'locale': 'en',
|
||||||
|
'ac2': 'wifi5g',
|
||||||
|
'uoo': '1',
|
||||||
|
'carrier_region': 'US',
|
||||||
|
'op_region': 'US',
|
||||||
|
'build_number': self._APP_INFO['app_version'],
|
||||||
|
'region': 'US',
|
||||||
|
'ts': int(time.time()),
|
||||||
|
'iid': self._APP_INFO['iid'],
|
||||||
|
'device_id': random.randint(7250000000000000000, 7351147085025500000),
|
||||||
|
'openudid': ''.join(random.choices('0123456789abcdef', k=16)),
|
||||||
}
|
}
|
||||||
|
|
||||||
def _call_api(self, ep, query, video_id, fatal=True,
|
def _call_api(self, ep, query, video_id, fatal=True,
|
||||||
note='Downloading API JSON', errnote='Unable to download API page'):
|
note='Downloading API JSON', errnote='Unable to download API page'):
|
||||||
if not self._WORKING_APP_VERSION:
|
if not self._APP_INFO and not self._get_next_app_info():
|
||||||
app_version = self._configuration_arg('app_version', [''], ie_key=TikTokIE.ie_key())[0]
|
message = 'No working app info is available'
|
||||||
manifest_app_version = self._configuration_arg('manifest_app_version', [''], ie_key=TikTokIE.ie_key())[0]
|
if fatal:
|
||||||
if app_version and manifest_app_version:
|
raise ExtractorError(message, expected=True)
|
||||||
self._WORKING_APP_VERSION = (app_version, manifest_app_version)
|
else:
|
||||||
self.write_debug('Imported app version combo from extractor arguments')
|
self.report_warning(message)
|
||||||
elif app_version or manifest_app_version:
|
return
|
||||||
self.report_warning('Only one of the two required version params are passed as extractor arguments', only_once=True)
|
|
||||||
|
|
||||||
if self._WORKING_APP_VERSION:
|
max_tries = len(self._APP_INFO_POOL) + 1 # _APP_INFO_POOL + _APP_INFO
|
||||||
app_version, manifest_app_version = self._WORKING_APP_VERSION
|
for count in itertools.count(1):
|
||||||
real_query = self._build_api_query(query, app_version, manifest_app_version)
|
self.write_debug(str(self._APP_INFO))
|
||||||
return self._call_api_impl(ep, real_query, manifest_app_version, video_id, fatal, note, errnote)
|
real_query = self._build_api_query(query)
|
||||||
|
|
||||||
for count, (app_version, manifest_app_version) in enumerate(self._APP_VERSIONS, start=1):
|
|
||||||
real_query = self._build_api_query(query, app_version, manifest_app_version)
|
|
||||||
try:
|
try:
|
||||||
res = self._call_api_impl(ep, real_query, manifest_app_version, video_id, fatal, note, errnote)
|
return self._call_api_impl(ep, real_query, video_id, fatal, note, errnote)
|
||||||
self._WORKING_APP_VERSION = (app_version, manifest_app_version)
|
|
||||||
return res
|
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:
|
if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:
|
||||||
if count == len(self._APP_VERSIONS):
|
message = str(e.cause or e.msg)
|
||||||
|
if not self._get_next_app_info():
|
||||||
if fatal:
|
if fatal:
|
||||||
raise e
|
raise
|
||||||
else:
|
else:
|
||||||
self.report_warning(str(e.cause or e.msg))
|
self.report_warning(message)
|
||||||
return
|
return
|
||||||
self.report_warning('%s. Retrying... (attempt %s of %s)' % (str(e.cause or e.msg), count, len(self._APP_VERSIONS)))
|
self.report_warning(f'{message}. Retrying... (attempt {count} of {max_tries})')
|
||||||
continue
|
continue
|
||||||
raise e
|
raise
|
||||||
|
|
||||||
def _extract_aweme_app(self, aweme_id):
|
def _extract_aweme_app(self, aweme_id):
|
||||||
feed_list = self._call_api(
|
feed_list = self._call_api(
|
||||||
|
@ -223,6 +271,7 @@ class TikTokBaseIE(InfoExtractor):
|
||||||
|
|
||||||
def extract_addr(addr, add_meta={}):
|
def extract_addr(addr, add_meta={}):
|
||||||
parsed_meta, res = parse_url_key(addr.get('url_key', ''))
|
parsed_meta, res = parse_url_key(addr.get('url_key', ''))
|
||||||
|
is_bytevc2 = parsed_meta.get('vcodec') == 'bytevc2'
|
||||||
if res:
|
if res:
|
||||||
known_resolutions.setdefault(res, {}).setdefault('height', int_or_none(addr.get('height')))
|
known_resolutions.setdefault(res, {}).setdefault('height', int_or_none(addr.get('height')))
|
||||||
known_resolutions[res].setdefault('width', int_or_none(addr.get('width')))
|
known_resolutions[res].setdefault('width', int_or_none(addr.get('width')))
|
||||||
|
@ -235,8 +284,11 @@ class TikTokBaseIE(InfoExtractor):
|
||||||
'acodec': 'aac',
|
'acodec': 'aac',
|
||||||
'source_preference': -2 if 'aweme/v1' in url else -1, # Downloads from API might get blocked
|
'source_preference': -2 if 'aweme/v1' in url else -1, # Downloads from API might get blocked
|
||||||
**add_meta, **parsed_meta,
|
**add_meta, **parsed_meta,
|
||||||
|
# bytevc2 is bytedance's proprietary (unplayable) video codec
|
||||||
|
'preference': -100 if is_bytevc2 else -1,
|
||||||
'format_note': join_nonempty(
|
'format_note': join_nonempty(
|
||||||
add_meta.get('format_note'), '(API)' if 'aweme/v1' in url else None, delim=' '),
|
add_meta.get('format_note'), '(API)' if 'aweme/v1' in url else None,
|
||||||
|
'(UNPLAYABLE)' if is_bytevc2 else None, delim=' '),
|
||||||
**audio_meta(url),
|
**audio_meta(url),
|
||||||
} for url in addr.get('url_list') or []]
|
} for url in addr.get('url_list') or []]
|
||||||
|
|
||||||
|
@ -701,6 +753,8 @@ class TikTokIE(TikTokBaseIE):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id, user_id = self._match_valid_url(url).group('id', 'user_id')
|
video_id, user_id = self._match_valid_url(url).group('id', 'user_id')
|
||||||
|
|
||||||
|
if self._KNOWN_APP_INFO:
|
||||||
try:
|
try:
|
||||||
return self._extract_aweme_app(video_id)
|
return self._extract_aweme_app(video_id)
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
|
@ -720,7 +774,7 @@ class TikTokIE(TikTokBaseIE):
|
||||||
status = traverse_obj(sigi_data, ('VideoPage', 'statusCode', {int})) or 0
|
status = traverse_obj(sigi_data, ('VideoPage', 'statusCode', {int})) or 0
|
||||||
video_data = traverse_obj(sigi_data, ('ItemModule', video_id, {dict}))
|
video_data = traverse_obj(sigi_data, ('ItemModule', video_id, {dict}))
|
||||||
|
|
||||||
elif next_data := self._search_nextjs_data(webpage, video_id, default='{}'):
|
elif next_data := self._search_nextjs_data(webpage, video_id, default={}):
|
||||||
self.write_debug('Found next.js data')
|
self.write_debug('Found next.js data')
|
||||||
status = traverse_obj(next_data, ('props', 'pageProps', 'statusCode', {int})) or 0
|
status = traverse_obj(next_data, ('props', 'pageProps', 'statusCode', {int})) or 0
|
||||||
video_data = traverse_obj(next_data, ('props', 'pageProps', 'itemInfo', 'itemStruct', {dict}))
|
video_data = traverse_obj(next_data, ('props', 'pageProps', 'itemInfo', 'itemStruct', {dict}))
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue