merge 'master'

This commit is contained in:
Mozi 2024-11-22 16:08:04 +00:00
commit 2aeebc40ea
189 changed files with 5258 additions and 3017 deletions

View file

@ -63,14 +63,15 @@ body:
placeholder: | placeholder: |
[debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp-nightly-builds [1a176d874] (win_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Python 3.10.11 (CPython AMD64 64bit) - Windows-10-10.0.20348-SP0 (OpenSSL 1.1.1t 7 Feb 2023)
[debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 [debug] exe versions: ffmpeg 7.0.2 (setts), ffprobe 7.0.2
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Optional libraries: Cryptodome-3.21.0, brotli-1.1.0, certifi-2024.08.30, curl_cffi-0.5.10, mutagen-1.47.0, requests-2.32.3, sqlite3-3.40.1, urllib3-2.2.3, websockets-13.1
[debug] Proxy map: {} [debug] Proxy map: {}
[debug] Request Handlers: urllib, requests [debug] Request Handlers: urllib, requests, websockets, curl_cffi
[debug] Loaded 1893 extractors [debug] Loaded 1838 extractors
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: nightly@... from yt-dlp/yt-dlp-nightly-builds
yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds)
[youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc
<more lines> <more lines>

View file

@ -75,14 +75,15 @@ body:
placeholder: | placeholder: |
[debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp-nightly-builds [1a176d874] (win_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Python 3.10.11 (CPython AMD64 64bit) - Windows-10-10.0.20348-SP0 (OpenSSL 1.1.1t 7 Feb 2023)
[debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 [debug] exe versions: ffmpeg 7.0.2 (setts), ffprobe 7.0.2
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Optional libraries: Cryptodome-3.21.0, brotli-1.1.0, certifi-2024.08.30, curl_cffi-0.5.10, mutagen-1.47.0, requests-2.32.3, sqlite3-3.40.1, urllib3-2.2.3, websockets-13.1
[debug] Proxy map: {} [debug] Proxy map: {}
[debug] Request Handlers: urllib, requests [debug] Request Handlers: urllib, requests, websockets, curl_cffi
[debug] Loaded 1893 extractors [debug] Loaded 1838 extractors
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: nightly@... from yt-dlp/yt-dlp-nightly-builds
yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds)
[youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc
<more lines> <more lines>

View file

@ -71,14 +71,15 @@ body:
placeholder: | placeholder: |
[debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp-nightly-builds [1a176d874] (win_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Python 3.10.11 (CPython AMD64 64bit) - Windows-10-10.0.20348-SP0 (OpenSSL 1.1.1t 7 Feb 2023)
[debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 [debug] exe versions: ffmpeg 7.0.2 (setts), ffprobe 7.0.2
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Optional libraries: Cryptodome-3.21.0, brotli-1.1.0, certifi-2024.08.30, curl_cffi-0.5.10, mutagen-1.47.0, requests-2.32.3, sqlite3-3.40.1, urllib3-2.2.3, websockets-13.1
[debug] Proxy map: {} [debug] Proxy map: {}
[debug] Request Handlers: urllib, requests [debug] Request Handlers: urllib, requests, websockets, curl_cffi
[debug] Loaded 1893 extractors [debug] Loaded 1838 extractors
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: nightly@... from yt-dlp/yt-dlp-nightly-builds
yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds)
[youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc
<more lines> <more lines>

View file

@ -56,14 +56,15 @@ body:
placeholder: | placeholder: |
[debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp-nightly-builds [1a176d874] (win_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Python 3.10.11 (CPython AMD64 64bit) - Windows-10-10.0.20348-SP0 (OpenSSL 1.1.1t 7 Feb 2023)
[debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 [debug] exe versions: ffmpeg 7.0.2 (setts), ffprobe 7.0.2
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Optional libraries: Cryptodome-3.21.0, brotli-1.1.0, certifi-2024.08.30, curl_cffi-0.5.10, mutagen-1.47.0, requests-2.32.3, sqlite3-3.40.1, urllib3-2.2.3, websockets-13.1
[debug] Proxy map: {} [debug] Proxy map: {}
[debug] Request Handlers: urllib, requests [debug] Request Handlers: urllib, requests, websockets, curl_cffi
[debug] Loaded 1893 extractors [debug] Loaded 1838 extractors
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: nightly@... from yt-dlp/yt-dlp-nightly-builds
yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds)
[youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc
<more lines> <more lines>

View file

@ -52,14 +52,15 @@ body:
placeholder: | placeholder: |
[debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp-nightly-builds [1a176d874] (win_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Python 3.10.11 (CPython AMD64 64bit) - Windows-10-10.0.20348-SP0 (OpenSSL 1.1.1t 7 Feb 2023)
[debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 [debug] exe versions: ffmpeg 7.0.2 (setts), ffprobe 7.0.2
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Optional libraries: Cryptodome-3.21.0, brotli-1.1.0, certifi-2024.08.30, curl_cffi-0.5.10, mutagen-1.47.0, requests-2.32.3, sqlite3-3.40.1, urllib3-2.2.3, websockets-13.1
[debug] Proxy map: {} [debug] Proxy map: {}
[debug] Request Handlers: urllib, requests [debug] Request Handlers: urllib, requests, websockets, curl_cffi
[debug] Loaded 1893 extractors [debug] Loaded 1838 extractors
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: nightly@... from yt-dlp/yt-dlp-nightly-builds
yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds)
[youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc
<more lines> <more lines>

View file

@ -58,14 +58,15 @@ body:
placeholder: | placeholder: |
[debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp-nightly-builds [1a176d874] (win_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Python 3.10.11 (CPython AMD64 64bit) - Windows-10-10.0.20348-SP0 (OpenSSL 1.1.1t 7 Feb 2023)
[debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 [debug] exe versions: ffmpeg 7.0.2 (setts), ffprobe 7.0.2
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Optional libraries: Cryptodome-3.21.0, brotli-1.1.0, certifi-2024.08.30, curl_cffi-0.5.10, mutagen-1.47.0, requests-2.32.3, sqlite3-3.40.1, urllib3-2.2.3, websockets-13.1
[debug] Proxy map: {} [debug] Proxy map: {}
[debug] Request Handlers: urllib, requests [debug] Request Handlers: urllib, requests, websockets, curl_cffi
[debug] Loaded 1893 extractors [debug] Loaded 1838 extractors
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: nightly@... from yt-dlp/yt-dlp-nightly-builds
yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds)
[youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc
<more lines> <more lines>

View file

@ -72,7 +72,7 @@ on:
default: true default: true
type: boolean type: boolean
windows: windows:
description: yt-dlp.exe, yt-dlp_min.exe, yt-dlp_win.zip description: yt-dlp.exe, yt-dlp_win.zip
default: true default: true
type: boolean type: boolean
windows32: windows32:
@ -199,22 +199,24 @@ jobs:
GITHUB_WORKFLOW: build GITHUB_WORKFLOW: build
githubToken: ${{ github.token }} # To cache image githubToken: ${{ github.token }} # To cache image
arch: ${{ matrix.architecture }} arch: ${{ matrix.architecture }}
distro: ubuntu18.04 # Standalone executable should be built on minimum supported OS distro: ubuntu20.04 # Standalone executable should be built on minimum supported OS
dockerRunArgs: --volume "${PWD}/repo:/repo" dockerRunArgs: --volume "${PWD}/repo:/repo"
install: | # Installing Python 3.10 from the Deadsnakes repo raises errors install: | # Installing Python 3.10 from the Deadsnakes repo raises errors
apt update apt update
apt -y install zlib1g-dev libffi-dev python3.8 python3.8-dev python3.8-distutils python3-pip apt -y install zlib1g-dev libffi-dev python3.9 python3.9-dev python3.9-distutils python3-pip \
python3.8 -m pip install -U pip setuptools wheel python3-secretstorage # Cannot build cryptography wheel in virtual armv7 environment
# Cannot access any files from the repo directory at this stage python3.9 -m pip install -U pip wheel 'setuptools>=71.0.2'
python3.8 -m pip install -U Pyinstaller mutagen pycryptodomex websockets brotli certifi secretstorage cffi # XXX: Keep this in sync with pyproject.toml (it can't be accessed at this stage) and exclude secretstorage
python3.9 -m pip install -U Pyinstaller mutagen pycryptodomex brotli certifi cffi \
'requests>=2.32.2,<3' 'urllib3>=1.26.17,<3' 'websockets>=13.0'
run: | run: |
cd repo cd repo
python3.8 devscripts/install_deps.py -o --include build python3.9 devscripts/install_deps.py -o --include build
python3.8 devscripts/install_deps.py --include pyinstaller --include secretstorage # Cached version may be out of date python3.9 devscripts/install_deps.py --include pyinstaller # Cached versions may be out of date
python3.8 devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}" python3.9 devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}"
python3.8 devscripts/make_lazy_extractors.py python3.9 devscripts/make_lazy_extractors.py
python3.8 -m bundle.pyinstaller python3.9 -m bundle.pyinstaller
if ${{ vars.UPDATE_TO_VERIFICATION && 'true' || 'false' }}; then if ${{ vars.UPDATE_TO_VERIFICATION && 'true' || 'false' }}; then
arch="${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }}" arch="${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }}"
@ -403,13 +405,13 @@ jobs:
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- uses: actions/setup-python@v5 - uses: actions/setup-python@v5
with: # 3.8 is used for Win7 support with:
python-version: "3.8" python-version: "3.10"
- name: Install Requirements - name: Install Requirements
run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds
python devscripts/install_deps.py -o --include build python devscripts/install_deps.py -o --include build
python devscripts/install_deps.py --include curl-cffi python devscripts/install_deps.py --include curl-cffi
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-6.10.0-py3-none-any.whl" python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-6.11.1-py3-none-any.whl"
- name: Prepare - name: Prepare
run: | run: |
@ -419,22 +421,12 @@ jobs:
run: | run: |
python -m bundle.pyinstaller python -m bundle.pyinstaller
python -m bundle.pyinstaller --onedir python -m bundle.pyinstaller --onedir
Move-Item ./dist/yt-dlp.exe ./dist/yt-dlp_real.exe
Compress-Archive -Path ./dist/yt-dlp/* -DestinationPath ./dist/yt-dlp_win.zip Compress-Archive -Path ./dist/yt-dlp/* -DestinationPath ./dist/yt-dlp_win.zip
- name: Install Requirements (py2exe)
run: |
python devscripts/install_deps.py --include py2exe
- name: Build (py2exe)
run: |
python -m bundle.py2exe
Move-Item ./dist/yt-dlp.exe ./dist/yt-dlp_min.exe
Move-Item ./dist/yt-dlp_real.exe ./dist/yt-dlp.exe
- name: Verify --update-to - name: Verify --update-to
if: vars.UPDATE_TO_VERIFICATION if: vars.UPDATE_TO_VERIFICATION
run: | run: |
foreach ($name in @("yt-dlp","yt-dlp_min")) { foreach ($name in @("yt-dlp")) {
Copy-Item "./dist/${name}.exe" "./dist/${name}_downgraded.exe" Copy-Item "./dist/${name}.exe" "./dist/${name}_downgraded.exe"
$version = & "./dist/${name}.exe" --version $version = & "./dist/${name}.exe" --version
& "./dist/${name}_downgraded.exe" -v --update-to yt-dlp/yt-dlp@2023.03.04 & "./dist/${name}_downgraded.exe" -v --update-to yt-dlp/yt-dlp@2023.03.04
@ -450,7 +442,6 @@ jobs:
name: build-bin-${{ github.job }} name: build-bin-${{ github.job }}
path: | path: |
dist/yt-dlp.exe dist/yt-dlp.exe
dist/yt-dlp_min.exe
dist/yt-dlp_win.zip dist/yt-dlp_win.zip
compression-level: 0 compression-level: 0
@ -463,13 +454,13 @@ jobs:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- uses: actions/setup-python@v5 - uses: actions/setup-python@v5
with: with:
python-version: "3.8" python-version: "3.10"
architecture: "x86" architecture: "x86"
- name: Install Requirements - name: Install Requirements
run: | run: |
python devscripts/install_deps.py -o --include build python devscripts/install_deps.py -o --include build
python devscripts/install_deps.py python devscripts/install_deps.py
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-6.10.0-py3-none-any.whl" python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-6.11.1-py3-none-any.whl"
- name: Prepare - name: Prepare
run: | run: |
@ -513,7 +504,8 @@ jobs:
- windows32 - windows32
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/download-artifact@v4 - name: Download artifacts
uses: actions/download-artifact@v4
with: with:
path: artifact path: artifact
pattern: build-bin-* pattern: build-bin-*
@ -537,13 +529,29 @@ jobs:
lock 2022.08.18.36 .+ Python 3\.6 lock 2022.08.18.36 .+ Python 3\.6
lock 2023.11.16 (?!win_x86_exe).+ Python 3\.7 lock 2023.11.16 (?!win_x86_exe).+ Python 3\.7
lock 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server) lock 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server)
lock 2024.10.22 py2exe .+
lock 2024.10.22 linux_(?:armv7l|aarch64)_exe .+-glibc2\.(?:[12]?\d|30)\b
lock 2024.10.22 (?!\w+_exe).+ Python 3\.8
lock 2024.10.22 win(?:_x86)?_exe Python 3\.[78].+ Windows-(?:7-|2008ServerR2)
lockV2 yt-dlp/yt-dlp 2022.08.18.36 .+ Python 3\.6 lockV2 yt-dlp/yt-dlp 2022.08.18.36 .+ Python 3\.6
lockV2 yt-dlp/yt-dlp 2023.11.16 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp 2023.11.16 (?!win_x86_exe).+ Python 3\.7
lockV2 yt-dlp/yt-dlp 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server) lockV2 yt-dlp/yt-dlp 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server)
lockV2 yt-dlp/yt-dlp 2024.10.22 py2exe .+
lockV2 yt-dlp/yt-dlp 2024.10.22 linux_(?:armv7l|aarch64)_exe .+-glibc2\.(?:[12]?\d|30)\b
lockV2 yt-dlp/yt-dlp 2024.10.22 (?!\w+_exe).+ Python 3\.8
lockV2 yt-dlp/yt-dlp 2024.10.22 win(?:_x86)?_exe Python 3\.[78].+ Windows-(?:7-|2008ServerR2)
lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 (?!win_x86_exe).+ Python 3\.7
lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 win_x86_exe .+ Windows-(?:Vista|2008Server) lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 win_x86_exe .+ Windows-(?:Vista|2008Server)
lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 py2exe .+
lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 linux_(?:armv7l|aarch64)_exe .+-glibc2\.(?:[12]?\d|30)\b
lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 (?!\w+_exe).+ Python 3\.8
lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 win(?:_x86)?_exe Python 3\.[78].+ Windows-(?:7-|2008ServerR2)
lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 (?!win_x86_exe).+ Python 3\.7
lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 win_x86_exe .+ Windows-(?:Vista|2008Server) lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 win_x86_exe .+ Windows-(?:Vista|2008Server)
lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.045052 py2exe .+
lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.060347 linux_(?:armv7l|aarch64)_exe .+-glibc2\.(?:[12]?\d|30)\b
lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.060347 (?!\w+_exe).+ Python 3\.8
lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.060347 win(?:_x86)?_exe Python 3\.[78].+ Windows-(?:7-|2008ServerR2)
EOF EOF
- name: Sign checksum files - name: Sign checksum files

View file

@ -36,16 +36,20 @@ jobs:
fail-fast: false fail-fast: false
matrix: matrix:
os: [ubuntu-latest] os: [ubuntu-latest]
# CPython 3.8 is in quick-test # CPython 3.9 is in quick-test
python-version: ['3.9', '3.10', '3.11', '3.12', pypy-3.8, pypy-3.10] python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.10]
include: include:
# atleast one of each CPython/PyPy tests must be in windows # atleast one of each CPython/PyPy tests must be in windows
- os: windows-latest - os: windows-latest
python-version: '3.8' python-version: '3.9'
- os: windows-latest
python-version: '3.10'
- os: windows-latest - os: windows-latest
python-version: '3.12' python-version: '3.12'
- os: windows-latest - os: windows-latest
python-version: pypy-3.9 python-version: '3.13'
- os: windows-latest
python-version: pypy-3.10
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }} - name: Set up Python ${{ matrix.python-version }}

View file

@ -28,13 +28,13 @@ jobs:
fail-fast: true fail-fast: true
matrix: matrix:
os: [ubuntu-latest] os: [ubuntu-latest]
python-version: ['3.10', '3.11', '3.12', pypy-3.8, pypy-3.10] python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.10]
include: include:
# atleast one of each CPython/PyPy tests must be in windows # atleast one of each CPython/PyPy tests must be in windows
- os: windows-latest - os: windows-latest
python-version: '3.8' python-version: '3.9'
- os: windows-latest - os: windows-latest
python-version: pypy-3.9 python-version: pypy-3.10
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }} - name: Set up Python ${{ matrix.python-version }}

View file

@ -10,10 +10,10 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- name: Set up Python 3.8 - name: Set up Python 3.9
uses: actions/setup-python@v5 uses: actions/setup-python@v5
with: with:
python-version: '3.8' python-version: '3.9'
- name: Install test requirements - name: Install test requirements
run: python3 ./devscripts/install_deps.py -o --include test run: python3 ./devscripts/install_deps.py -o --include test
- name: Run tests - name: Run tests
@ -29,7 +29,7 @@ jobs:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- uses: actions/setup-python@v5 - uses: actions/setup-python@v5
with: with:
python-version: '3.8' python-version: '3.9'
- name: Install dev dependencies - name: Install dev dependencies
run: python3 ./devscripts/install_deps.py -o --include static-analysis run: python3 ./devscripts/install_deps.py -o --include static-analysis
- name: Make lazy extractors - name: Make lazy extractors

View file

@ -28,3 +28,20 @@ jobs:
actions: write # For cleaning up cache actions: write # For cleaning up cache
id-token: write # mandatory for trusted publishing id-token: write # mandatory for trusted publishing
secrets: inherit secrets: inherit
publish_pypi:
needs: [release]
if: vars.MASTER_PYPI_PROJECT != ''
runs-on: ubuntu-latest
permissions:
id-token: write # mandatory for trusted publishing
steps:
- name: Download artifacts
uses: actions/download-artifact@v4
with:
path: dist
name: build-pypi
- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
verbose: true

View file

@ -41,3 +41,20 @@ jobs:
actions: write # For cleaning up cache actions: write # For cleaning up cache
id-token: write # mandatory for trusted publishing id-token: write # mandatory for trusted publishing
secrets: inherit secrets: inherit
publish_pypi:
needs: [release]
if: vars.NIGHTLY_PYPI_PROJECT != ''
runs-on: ubuntu-latest
permissions:
id-token: write # mandatory for trusted publishing
steps:
- name: Download artifacts
uses: actions/download-artifact@v4
with:
path: dist
name: build-pypi
- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
verbose: true

View file

@ -2,10 +2,6 @@ name: Release
on: on:
workflow_call: workflow_call:
inputs: inputs:
prerelease:
required: false
default: true
type: boolean
source: source:
required: false required: false
default: '' default: ''
@ -18,6 +14,10 @@ on:
required: false required: false
default: '' default: ''
type: string type: string
prerelease:
required: false
default: true
type: boolean
workflow_dispatch: workflow_dispatch:
inputs: inputs:
source: source:
@ -278,7 +278,17 @@ jobs:
make clean-cache make clean-cache
python -m build --no-isolation . python -m build --no-isolation .
- name: Upload artifacts
if: github.event_name != 'workflow_dispatch'
uses: actions/upload-artifact@v4
with:
name: build-pypi
path: |
dist/*
compression-level: 0
- name: Publish to PyPI - name: Publish to PyPI
if: github.event_name == 'workflow_dispatch'
uses: pypa/gh-action-pypi-publish@release/v1 uses: pypa/gh-action-pypi-publish@release/v1
with: with:
verbose: true verbose: true

View file

@ -37,14 +37,18 @@ Bugs and suggestions should be reported at: [yt-dlp/yt-dlp/issues](https://githu
**Please include the full output of yt-dlp when run with `-vU`**, i.e. **add** `-vU` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this: **Please include the full output of yt-dlp when run with `-vU`**, i.e. **add** `-vU` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this:
``` ```
$ yt-dlp -vU <your command line> $ yt-dlp -vU <your command line>
[debug] Command-line config: ['-v', 'demo.com'] [debug] Command-line config: ['-vU', 'https://www.example.com/']
[debug] Encodings: locale UTF-8, fs utf-8, out utf-8, pref UTF-8 [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version 2021.09.25 (zip) [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp-nightly-builds [1a176d874] (win_exe)
[debug] Python version 3.8.10 (CPython 64bit) - Linux-5.4.0-74-generic-x86_64-with-glibc2.29 [debug] Python 3.10.11 (CPython AMD64 64bit) - Windows-10-10.0.20348-SP0 (OpenSSL 1.1.1t 7 Feb 2023)
[debug] exe versions: ffmpeg 4.2.4, ffprobe 4.2.4 [debug] exe versions: ffmpeg 7.0.2 (setts), ffprobe 7.0.2
[debug] Optional libraries: Cryptodome-3.21.0, brotli-1.1.0, certifi-2024.08.30, curl_cffi-0.5.10, mutagen-1.47.0, requests-2.32.3, sqlite3-3.40.1, urllib3-2.2.3, websockets-13.1
[debug] Proxy map: {} [debug] Proxy map: {}
Current Build Hash 25cc412d1d3c0725a1f2f5b7e4682f6fb40e6d15f7024e96f7afd572e9919535 [debug] Request Handlers: urllib, requests, websockets, curl_cffi
yt-dlp is up to date (2021.09.25) [debug] Loaded 1838 extractors
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: nightly@... from yt-dlp/yt-dlp-nightly-builds
yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds)
... ...
``` ```
**Do not post screenshots of verbose logs; only plain text is acceptable.** **Do not post screenshots of verbose logs; only plain text is acceptable.**
@ -233,7 +237,7 @@ After you have ensured this site is distributing its content legally, you can fo
# * MD5 checksum; start the string with 'md5:', e.g. # * MD5 checksum; start the string with 'md5:', e.g.
# 'description': 'md5:098f6bcd4621d373cade4e832627b4f6', # 'description': 'md5:098f6bcd4621d373cade4e832627b4f6',
# * A regular expression; start the string with 're:', e.g. # * A regular expression; start the string with 're:', e.g.
# 'thumbnail': r're:^https?://.*\.jpg$', # 'thumbnail': r're:https?://.*\.jpg$',
# * A count of elements in a list; start the string with 'count:', e.g. # * A count of elements in a list; start the string with 'count:', e.g.
# 'tags': 'count:10', # 'tags': 'count:10',
# * Any Python type, e.g. # * Any Python type, e.g.
@ -268,7 +272,7 @@ After you have ensured this site is distributing its content legally, you can fo
You can use `hatch fmt` to automatically fix problems. Rules that the linter/formatter enforces should not be disabled with `# noqa` unless a maintainer requests it. The only exception allowed is for old/printf-style string formatting in GraphQL query templates (use `# noqa: UP031`). You can use `hatch fmt` to automatically fix problems. Rules that the linter/formatter enforces should not be disabled with `# noqa` unless a maintainer requests it. The only exception allowed is for old/printf-style string formatting in GraphQL query templates (use `# noqa: UP031`).
1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython and PyPy for Python 3.8 and above. Backward compatibility is not required for even older versions of Python. 1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython >=3.9 and PyPy >=3.10. Backward compatibility is not required for even older versions of Python.
1. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files, [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: 1. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files, [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
```shell ```shell
@ -302,10 +306,9 @@ Extractors are very fragile by nature since they depend on the layout of the sou
For extraction to work yt-dlp relies on metadata your extractor extracts and provides to yt-dlp expressed by an [information dictionary](yt_dlp/extractor/common.py#L119-L440) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by yt-dlp: For extraction to work yt-dlp relies on metadata your extractor extracts and provides to yt-dlp expressed by an [information dictionary](yt_dlp/extractor/common.py#L119-L440) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by yt-dlp:
- `id` (media identifier) - `id` (media identifier)
- `title` (media title)
- `url` (media download URL) or `formats` - `url` (media download URL) or `formats`
The aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken. While all extractors must return a `title`, they must also allow it's extraction to be non-fatal. The aforementioned metadata fields are the critical data without which extraction does not make any sense. If any of them fail to be extracted, then the extractor is considered broken. All other metadata extraction should be completely non-fatal.
For pornographic sites, appropriate `age_limit` must also be returned. For pornographic sites, appropriate `age_limit` must also be returned.

View file

@ -678,3 +678,32 @@ coreywright
eric321 eric321
poyhen poyhen
tetra-fox tetra-fox
444995
63427083
allendema
DarkZeros
DTrombett
imranh2
KarboniteKream
mikkovedru
pktiuk
rubyevadestaxes
avagordon01
CounterPillow
JoseAngelB
KBelmin
kesor
MellowKyler
Wesley107772
a13ssandr0
ChocoLZS
doe1080
hugovdev
jshumphrey
julionc
manavchaudhary1
powergold1
Sakura286
SamDecrock
stratus-ss
subrat-lima

View file

@ -4,6 +4,170 @@
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
--> -->
### 2024.11.18
#### Important changes
- **Login with OAuth is no longer supported for YouTube**
Due to a change made by the site, yt-dlp is longer able to support OAuth login for YouTube. [Read more](https://github.com/yt-dlp/yt-dlp/issues/11462#issuecomment-2471703090)
#### Core changes
- [Catch broken Cryptodome installations](https://github.com/yt-dlp/yt-dlp/commit/b83ca24eb72e1e558b0185bd73975586c0bc0546) ([#11486](https://github.com/yt-dlp/yt-dlp/issues/11486)) by [seproDev](https://github.com/seproDev)
- **utils**
- [Fix `join_nonempty`, add `**kwargs` to `unpack`](https://github.com/yt-dlp/yt-dlp/commit/39d79c9b9cf23411d935910685c40aa1a2fdb409) ([#11559](https://github.com/yt-dlp/yt-dlp/issues/11559)) by [Grub4K](https://github.com/Grub4K)
- `subs_list_to_dict`: [Add `lang` default parameter](https://github.com/yt-dlp/yt-dlp/commit/c014fbcddcb4c8f79d914ac5bb526758b540ea33) ([#11508](https://github.com/yt-dlp/yt-dlp/issues/11508)) by [Grub4K](https://github.com/Grub4K)
#### Extractor changes
- [Allow `ext` override for thumbnails](https://github.com/yt-dlp/yt-dlp/commit/eb64ae7d5def6df2aba74fb703e7f168fb299865) ([#11545](https://github.com/yt-dlp/yt-dlp/issues/11545)) by [bashonly](https://github.com/bashonly)
- **adobepass**: [Fix provider requests](https://github.com/yt-dlp/yt-dlp/commit/85fdc66b6e01d19a94b4f39b58e3c0cf23600902) ([#11472](https://github.com/yt-dlp/yt-dlp/issues/11472)) by [bashonly](https://github.com/bashonly)
- **archive.org**: [Fix comments extraction](https://github.com/yt-dlp/yt-dlp/commit/f2a4983df7a64c4e93b56f79dbd16a781bd90206) ([#11527](https://github.com/yt-dlp/yt-dlp/issues/11527)) by [jshumphrey](https://github.com/jshumphrey)
- **bandlab**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/6365e92589e4bc17b8fffb0125a716d144ad2137) ([#11535](https://github.com/yt-dlp/yt-dlp/issues/11535)) by [seproDev](https://github.com/seproDev)
- **chaturbate**
- [Extract from API and support impersonation](https://github.com/yt-dlp/yt-dlp/commit/720b3dc453c342bc2e8df7dbc0acaab4479de46c) ([#11555](https://github.com/yt-dlp/yt-dlp/issues/11555)) by [powergold1](https://github.com/powergold1) (With fixes in [7cecd29](https://github.com/yt-dlp/yt-dlp/commit/7cecd299e4a5ef1f0f044b2fedc26f17e41f15e3) by [seproDev](https://github.com/seproDev))
- [Support alternate domains](https://github.com/yt-dlp/yt-dlp/commit/a9f85670d03ab993dc589f21a9ffffcad61392d5) ([#10595](https://github.com/yt-dlp/yt-dlp/issues/10595)) by [manavchaudhary1](https://github.com/manavchaudhary1)
- **cloudflarestream**: [Avoid extraction via videodelivery.net](https://github.com/yt-dlp/yt-dlp/commit/2db8c2e7d57a1784b06057c48e3e91023720d195) ([#11478](https://github.com/yt-dlp/yt-dlp/issues/11478)) by [hugovdev](https://github.com/hugovdev)
- **ctvnews**
- [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/f351440f1dc5b3dfbfc5737b037a869d946056fe) ([#11534](https://github.com/yt-dlp/yt-dlp/issues/11534)) by [bashonly](https://github.com/bashonly), [jshumphrey](https://github.com/jshumphrey)
- [Fix playlist ID extraction](https://github.com/yt-dlp/yt-dlp/commit/f9d98509a898737c12977b2e2117277bada2c196) ([#8892](https://github.com/yt-dlp/yt-dlp/issues/8892)) by [qbnu](https://github.com/qbnu)
- **digitalconcerthall**: [Support login with access/refresh tokens](https://github.com/yt-dlp/yt-dlp/commit/f7257588bdff5f0b0452635a66b253a783c97357) ([#11571](https://github.com/yt-dlp/yt-dlp/issues/11571)) by [bashonly](https://github.com/bashonly)
- **facebook**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/bacc31b05a04181b63100c481565256b14813a5e) ([#11513](https://github.com/yt-dlp/yt-dlp/issues/11513)) by [bashonly](https://github.com/bashonly)
- **gamedevtv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/be3579aaf0c3b71a0a3195e1955415d5e4d6b3d8) ([#11368](https://github.com/yt-dlp/yt-dlp/issues/11368)) by [bashonly](https://github.com/bashonly), [stratus-ss](https://github.com/stratus-ss)
- **goplay**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6b43a8d84b881d769b480ba6e20ec691e9d1b92d) ([#11466](https://github.com/yt-dlp/yt-dlp/issues/11466)) by [bashonly](https://github.com/bashonly), [SamDecrock](https://github.com/SamDecrock)
- **kenh14**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/eb15fd5a32d8b35ef515f7a3d1158c03025648ff) ([#3996](https://github.com/yt-dlp/yt-dlp/issues/3996)) by [krichbanana](https://github.com/krichbanana), [pzhlkj6612](https://github.com/pzhlkj6612)
- **litv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/e079ffbda66de150c0a9ebef05e89f61bb4d5f76) ([#11071](https://github.com/yt-dlp/yt-dlp/issues/11071)) by [jiru](https://github.com/jiru)
- **mixchmovie**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/0ec9bfed4d4a52bfb4f8733da1acf0aeeae21e6b) ([#10897](https://github.com/yt-dlp/yt-dlp/issues/10897)) by [Sakura286](https://github.com/Sakura286)
- **patreon**: [Fix comments extraction](https://github.com/yt-dlp/yt-dlp/commit/1d253b0a27110d174c40faf8fb1c999d099e0cde) ([#11530](https://github.com/yt-dlp/yt-dlp/issues/11530)) by [bashonly](https://github.com/bashonly), [jshumphrey](https://github.com/jshumphrey)
- **pialive**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/d867f99622ef7fba690b08da56c39d739b822bb7) ([#10811](https://github.com/yt-dlp/yt-dlp/issues/10811)) by [ChocoLZS](https://github.com/ChocoLZS)
- **radioradicale**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/70c55cb08f780eab687e881ef42bb5c6007d290b) ([#5607](https://github.com/yt-dlp/yt-dlp/issues/5607)) by [a13ssandr0](https://github.com/a13ssandr0), [pzhlkj6612](https://github.com/pzhlkj6612)
- **reddit**: [Improve error handling](https://github.com/yt-dlp/yt-dlp/commit/7ea2787920cccc6b8ea30791993d114fbd564434) ([#11573](https://github.com/yt-dlp/yt-dlp/issues/11573)) by [bashonly](https://github.com/bashonly)
- **redgifsuser**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/d215fba7edb69d4fa665f43663756fd260b1489f) ([#11531](https://github.com/yt-dlp/yt-dlp/issues/11531)) by [jshumphrey](https://github.com/jshumphrey)
- **rutube**: [Rework extractors](https://github.com/yt-dlp/yt-dlp/commit/e398217aae19bb25f91797bfbe8a3243698d7f45) ([#11480](https://github.com/yt-dlp/yt-dlp/issues/11480)) by [seproDev](https://github.com/seproDev)
- **sonylivseries**: [Add `sort_order` extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/2009cb27e17014787bf63eaa2ada51293d54f22a) ([#11569](https://github.com/yt-dlp/yt-dlp/issues/11569)) by [bashonly](https://github.com/bashonly)
- **soop**: [Fix thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/c699bafc5038b59c9afe8c2e69175fb66424c832) ([#11545](https://github.com/yt-dlp/yt-dlp/issues/11545)) by [bashonly](https://github.com/bashonly)
- **spankbang**: [Support browser impersonation](https://github.com/yt-dlp/yt-dlp/commit/8388ec256f7753b02488788e3cfa771f6e1db247) ([#11542](https://github.com/yt-dlp/yt-dlp/issues/11542)) by [jshumphrey](https://github.com/jshumphrey)
- **spreaker**
- [Support episode pages and access keys](https://github.com/yt-dlp/yt-dlp/commit/c39016f66df76d14284c705736ca73db8055d8de) ([#11489](https://github.com/yt-dlp/yt-dlp/issues/11489)) by [julionc](https://github.com/julionc)
- [Support podcast and feed pages](https://github.com/yt-dlp/yt-dlp/commit/c6737310619022248f5d0fd13872073cac168453) ([#10968](https://github.com/yt-dlp/yt-dlp/issues/10968)) by [subrat-lima](https://github.com/subrat-lima)
- **youtube**
- [Player client maintenance](https://github.com/yt-dlp/yt-dlp/commit/637d62a3a9fc723d68632c1af25c30acdadeeb85) ([#11528](https://github.com/yt-dlp/yt-dlp/issues/11528)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev)
- [Remove broken OAuth support](https://github.com/yt-dlp/yt-dlp/commit/52c0ffe40ad6e8404d93296f575007b05b04c686) ([#11558](https://github.com/yt-dlp/yt-dlp/issues/11558)) by [bashonly](https://github.com/bashonly)
- tab: [Fix podcasts tab extraction](https://github.com/yt-dlp/yt-dlp/commit/37cd7660eaff397c551ee18d80507702342b0c2b) ([#11567](https://github.com/yt-dlp/yt-dlp/issues/11567)) by [seproDev](https://github.com/seproDev)
#### Misc. changes
- **build**
- [Bump PyInstaller version pin to `>=6.11.1`](https://github.com/yt-dlp/yt-dlp/commit/f9c8deb4e5887ff5150e911ac0452e645f988044) ([#11507](https://github.com/yt-dlp/yt-dlp/issues/11507)) by [bashonly](https://github.com/bashonly)
- [Enable attestations for trusted publishing](https://github.com/yt-dlp/yt-dlp/commit/f13df591d4d7ca8e2f31b35c9c91e69ba9e9b013) ([#11420](https://github.com/yt-dlp/yt-dlp/issues/11420)) by [bashonly](https://github.com/bashonly)
- [Pin `websockets` version to >=13.0,<14](https://github.com/yt-dlp/yt-dlp/commit/240a7d43c8a67ffb86d44dc276805aa43c358dcc) ([#11488](https://github.com/yt-dlp/yt-dlp/issues/11488)) by [bashonly](https://github.com/bashonly)
- **cleanup**
- [Deprecate more compat functions](https://github.com/yt-dlp/yt-dlp/commit/f95a92b3d0169a784ee15a138fbe09d82b2754a1) ([#11439](https://github.com/yt-dlp/yt-dlp/issues/11439)) by [seproDev](https://github.com/seproDev)
- [Remove dead extractors](https://github.com/yt-dlp/yt-dlp/commit/10fc719bc7f1eef469389c5219102266ef411f29) ([#11566](https://github.com/yt-dlp/yt-dlp/issues/11566)) by [doe1080](https://github.com/doe1080)
- Miscellaneous: [da252d9](https://github.com/yt-dlp/yt-dlp/commit/da252d9d322af3e2178ac5eae324809502a0a862) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K), [seproDev](https://github.com/seproDev)
### 2024.11.04
#### Important changes
- **Beginning with this release, yt-dlp's Python dependencies *must* be installed using the `default` group**
If you're installing yt-dlp with pip/pipx or requiring yt-dlp in your own Python project, you'll need to specify `yt-dlp[default]` if you want to also install yt-dlp's optional dependencies (which were previously included by default). [Read more](https://github.com/yt-dlp/yt-dlp/pull/11255)
- **The minimum *required* Python version has been raised to 3.9**
Python 3.8 reached its end-of-life on 2024.10.07, and yt-dlp has now removed support for it. As an unfortunate side effect, the official `yt-dlp.exe` and `yt-dlp_x86.exe` binaries are no longer supported on Windows 7. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10086)
#### Core changes
- [Allow thumbnails with `.jpe` extension](https://github.com/yt-dlp/yt-dlp/commit/5bc5fb2835ea59bdf326bd12176d74d2c7348a95) ([#11408](https://github.com/yt-dlp/yt-dlp/issues/11408)) by [bashonly](https://github.com/bashonly)
- [Expand paths in `--plugin-dirs`](https://github.com/yt-dlp/yt-dlp/commit/914af9a0cf51c9a3f74aa88d952bee8334c67511) ([#11334](https://github.com/yt-dlp/yt-dlp/issues/11334)) by [bashonly](https://github.com/bashonly)
- [Fix `--netrc` empty string parsing for Python <=3.10](https://github.com/yt-dlp/yt-dlp/commit/88402b714ec124633933737bc156b172a3dec3d6) ([#11414](https://github.com/yt-dlp/yt-dlp/issues/11414)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
- [Populate format sorting fields before dependent fields](https://github.com/yt-dlp/yt-dlp/commit/5c880ef42e9c2b2fc412f6d69dad37d34fb75a62) ([#11353](https://github.com/yt-dlp/yt-dlp/issues/11353)) by [Grub4K](https://github.com/Grub4K)
- [Prioritize AV1](https://github.com/yt-dlp/yt-dlp/commit/3945677a75e94a1fecc085432d791e1c21220cd3) ([#11153](https://github.com/yt-dlp/yt-dlp/issues/11153)) by [seproDev](https://github.com/seproDev)
- [Remove Python 3.8 support](https://github.com/yt-dlp/yt-dlp/commit/d784464399b600ba9516bbcec6286f11d68974dd) ([#11321](https://github.com/yt-dlp/yt-dlp/issues/11321)) by [bashonly](https://github.com/bashonly)
- **aes**: [Fix GCM pad length calculation](https://github.com/yt-dlp/yt-dlp/commit/beae2db127d3b5017cbcf685da9de7a9ef496541) ([#11438](https://github.com/yt-dlp/yt-dlp/issues/11438)) by [seproDev](https://github.com/seproDev)
- **cookies**: [Support chrome table version 24](https://github.com/yt-dlp/yt-dlp/commit/4613096f2e6eab9dcbac0e98b6cec760bbc99375) ([#11425](https://github.com/yt-dlp/yt-dlp/issues/11425)) by [kesor](https://github.com/kesor), [seproDev](https://github.com/seproDev)
- **utils**
- [Allow partial application for more functions](https://github.com/yt-dlp/yt-dlp/commit/b6dc2c49e8793c6dfa21275e61caf49ec1148b81) ([#11391](https://github.com/yt-dlp/yt-dlp/issues/11391)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) (With fixes in [422195e](https://github.com/yt-dlp/yt-dlp/commit/422195ec70a00b0d2002b238cacbae7790c57fdf) by [Grub4K](https://github.com/Grub4K))
- [Fix `find_element` by class](https://github.com/yt-dlp/yt-dlp/commit/f93c16395cea1fe9ffc3c594d3e019c3b214544c) ([#11402](https://github.com/yt-dlp/yt-dlp/issues/11402)) by [bashonly](https://github.com/bashonly)
- [Fix and improve `find_element` and `find_elements`](https://github.com/yt-dlp/yt-dlp/commit/b103aca24d35b72b405c340357dc01a0ed534281) ([#11443](https://github.com/yt-dlp/yt-dlp/issues/11443)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
#### Extractor changes
- [Resolve `language` to ISO639-2 for ISM formats](https://github.com/yt-dlp/yt-dlp/commit/21cdcf03a237a0c4979c941d5a5385cae44c7906) ([#11359](https://github.com/yt-dlp/yt-dlp/issues/11359)) by [bashonly](https://github.com/bashonly)
- **ardmediathek**: [Extract chapters](https://github.com/yt-dlp/yt-dlp/commit/59f8dd8239c31f00b708da53b39b1e2e9409b6e6) ([#11442](https://github.com/yt-dlp/yt-dlp/issues/11442)) by [iw0nderhow](https://github.com/iw0nderhow)
- **bfmtv**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/754940e9a558565d6bd3c0c529802569b1d0ae4e) ([#11444](https://github.com/yt-dlp/yt-dlp/issues/11444)) by [seproDev](https://github.com/seproDev)
- **bluesky**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/5c7a5aaab27e9c3cb367b663a6136ca58866e547) ([#11055](https://github.com/yt-dlp/yt-dlp/issues/11055)) by [MellowKyler](https://github.com/MellowKyler), [seproDev](https://github.com/seproDev)
- **ccma**: [Support new 3cat.cat domain](https://github.com/yt-dlp/yt-dlp/commit/330335386d4f7603d92d6796798375336005275e) ([#11222](https://github.com/yt-dlp/yt-dlp/issues/11222)) by [JoseAngelB](https://github.com/JoseAngelB)
- **chzzk**: video: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/9c6534da81e485b2325b3489ee4128943e6d3e4b) ([#11228](https://github.com/yt-dlp/yt-dlp/issues/11228)) by [hui1601](https://github.com/hui1601)
- **cnn**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/9acf79c91a8c6c55ca972747c6858e784e2da351) ([#10185](https://github.com/yt-dlp/yt-dlp/issues/10185)) by [kylegustavo](https://github.com/kylegustavo), [seproDev](https://github.com/seproDev)
- **dailymotion**
- [Improve embed extraction](https://github.com/yt-dlp/yt-dlp/commit/a403dcf9be20b49cbb3017328f4aaa352fb6d685) ([#10843](https://github.com/yt-dlp/yt-dlp/issues/10843)) by [bashonly](https://github.com/bashonly), [pzhlkj6612](https://github.com/pzhlkj6612)
- [Support shortened URLs](https://github.com/yt-dlp/yt-dlp/commit/d1358231371f20fa23020fa9176be3b56119873e) ([#11374](https://github.com/yt-dlp/yt-dlp/issues/11374)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev)
- **facebook**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/ec9b25043f399de6a591d8370d32bf0e66c117f2) ([#11343](https://github.com/yt-dlp/yt-dlp/issues/11343)) by [kclauhk](https://github.com/kclauhk)
- **generic**: [Do not impersonate by default](https://github.com/yt-dlp/yt-dlp/commit/c29f5a7fae93a08f3cfbb6127b2faa75145b06a0) ([#11336](https://github.com/yt-dlp/yt-dlp/issues/11336)) by [bashonly](https://github.com/bashonly)
- **nfl**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/838f4385de8300a4dd4e7ffbbf0e5b7b85fb52c2) ([#11409](https://github.com/yt-dlp/yt-dlp/issues/11409)) by [bashonly](https://github.com/bashonly)
- **niconicouser**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6abef74232c0fc695cd803c18ae446cacb129389) ([#11324](https://github.com/yt-dlp/yt-dlp/issues/11324)) by [Wesley107772](https://github.com/Wesley107772)
- **soundcloud**: [Extract artists](https://github.com/yt-dlp/yt-dlp/commit/f101e5d34c97c608156ad5396714c2a2edca966a) ([#11377](https://github.com/yt-dlp/yt-dlp/issues/11377)) by [seproDev](https://github.com/seproDev)
- **tumblr**: [Support more URLs](https://github.com/yt-dlp/yt-dlp/commit/b03267bf0675eeb8df5baf1daac7cf67840c91a5) ([#6057](https://github.com/yt-dlp/yt-dlp/issues/6057)) by [selfisekai](https://github.com/selfisekai), [seproDev](https://github.com/seproDev)
- **twitter**: [Remove cookies migration workaround](https://github.com/yt-dlp/yt-dlp/commit/76802f461332d444e596437c42374fa237fa5174) ([#11392](https://github.com/yt-dlp/yt-dlp/issues/11392)) by [bashonly](https://github.com/bashonly)
- **vimeo**: [Fix API retries](https://github.com/yt-dlp/yt-dlp/commit/57212a5f97ce367590aaa5c3e9a135eead8f81f7) ([#11351](https://github.com/yt-dlp/yt-dlp/issues/11351)) by [bashonly](https://github.com/bashonly)
- **yle_areena**: [Support live events](https://github.com/yt-dlp/yt-dlp/commit/a6783a3b9905e547f6c1d4df9d7c7999feda8afa) ([#11358](https://github.com/yt-dlp/yt-dlp/issues/11358)) by [bashonly](https://github.com/bashonly), [CounterPillow](https://github.com/CounterPillow)
- **youtube**: [Adjust OAuth refresh token handling](https://github.com/yt-dlp/yt-dlp/commit/d569a8845254d90ce13ad74ae76695e8d6441068) ([#11414](https://github.com/yt-dlp/yt-dlp/issues/11414)) by [bashonly](https://github.com/bashonly)
#### Misc. changes
- **build**
- [Disable attestations for trusted publishing](https://github.com/yt-dlp/yt-dlp/commit/428ffb75aa3534b275cf54de42693a4d261519da) ([#11418](https://github.com/yt-dlp/yt-dlp/issues/11418)) by [bashonly](https://github.com/bashonly)
- [Move optional dependencies to the `default` group](https://github.com/yt-dlp/yt-dlp/commit/87884f15580910e4e0fe0e1db73508debc657471) ([#11255](https://github.com/yt-dlp/yt-dlp/issues/11255)) by [bashonly](https://github.com/bashonly)
- [Use Ubuntu 20.04 and Python 3.9 for Linux ARM builds](https://github.com/yt-dlp/yt-dlp/commit/dd2e24446954246a2ec4d4a7e95531f52a14b351) ([#8638](https://github.com/yt-dlp/yt-dlp/issues/8638)) by [bashonly](https://github.com/bashonly)
- **cleanup**
- Miscellaneous
- [ea9e35d](https://github.com/yt-dlp/yt-dlp/commit/ea9e35d85fba5eab341cdcaf1eaed69b57f7e465) by [bashonly](https://github.com/bashonly)
- [c998238](https://github.com/yt-dlp/yt-dlp/commit/c998238c2e76c62d1d29962c6e8ebe916cc7913b) by [bashonly](https://github.com/bashonly), [KBelmin](https://github.com/KBelmin)
- [197d0b0](https://github.com/yt-dlp/yt-dlp/commit/197d0b03b6a3c8fe4fa5ace630eeffec629bf72c) by [avagordon01](https://github.com/avagordon01), [bashonly](https://github.com/bashonly), [grqz](https://github.com/grqz), [Grub4K](https://github.com/Grub4K), [seproDev](https://github.com/seproDev)
- **devscripts**: `make_changelog`: [Parse full commit message for fixes](https://github.com/yt-dlp/yt-dlp/commit/0a3991edae0e10f2ea41ece9fdea5e48f789f1de) ([#11366](https://github.com/yt-dlp/yt-dlp/issues/11366)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
### 2024.10.22
#### Important changes
- **Following this release, yt-dlp's Python dependencies *must* be installed using the `default` group**
If you're installing yt-dlp with pip/pipx or requiring yt-dlp in your own Python project, you'll need to specify `yt-dlp[default]` if you want to also install yt-dlp's optional dependencies (which were previously included by default). [Read more](https://github.com/yt-dlp/yt-dlp/pull/11255)
- **py2exe is no longer supported**
This release's `yt-dlp_min.exe` will be the last, and it's actually a PyInstaller-bundled executable so that yt-dlp users updating their py2exe build with `-U` will be automatically migrated. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10087)
#### Core changes
- [Add extractor helpers](https://github.com/yt-dlp/yt-dlp/commit/d710a6ca7c622705c0c8c8a3615916f531137d5d) ([#10653](https://github.com/yt-dlp/yt-dlp/issues/10653)) by [Grub4K](https://github.com/Grub4K)
- [Add option `--plugin-dirs`](https://github.com/yt-dlp/yt-dlp/commit/0f593dca9fa995d88eb763170a932da61c8f24dc) ([#11277](https://github.com/yt-dlp/yt-dlp/issues/11277)) by [coletdjnz](https://github.com/coletdjnz), [imranh2](https://github.com/imranh2)
- **cookies**: [Fix compatibility for Python <=3.9 in traceback](https://github.com/yt-dlp/yt-dlp/commit/c5f0f58efd8c3930de8202c15a5c53b1b635bd51) by [Grub4K](https://github.com/Grub4K)
- **utils**
- `Popen`: [Reset PyInstaller environment](https://github.com/yt-dlp/yt-dlp/commit/fbc66e3ab35743cc847a21223c67d88bb463cd9c) ([#11258](https://github.com/yt-dlp/yt-dlp/issues/11258)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K)
- `sanitize_path`: [Reimplement function](https://github.com/yt-dlp/yt-dlp/commit/85b87c991af25dcb35630fa94580fd418e78ee33) ([#11198](https://github.com/yt-dlp/yt-dlp/issues/11198)) by [Grub4K](https://github.com/Grub4K)
#### Extractor changes
- **adobepass**: [Use newer user-agent for provider redirect request](https://github.com/yt-dlp/yt-dlp/commit/dcfeea4dd5e5686821350baa6c7767a011944867) ([#11250](https://github.com/yt-dlp/yt-dlp/issues/11250)) by [bashonly](https://github.com/bashonly)
- **afreecatv**: [Adapt extractors to new sooplive.co.kr domain](https://github.com/yt-dlp/yt-dlp/commit/46fe60ff19395698a87113b2944453779e04ab9d) ([#11266](https://github.com/yt-dlp/yt-dlp/issues/11266)) by [63427083](https://github.com/63427083), [bashonly](https://github.com/bashonly)
- **cda**: [Support folders](https://github.com/yt-dlp/yt-dlp/commit/c4d95f67ddc522297bb1fea875255cf94b34d595) ([#10786](https://github.com/yt-dlp/yt-dlp/issues/10786)) by [pktiuk](https://github.com/pktiuk)
- **cwtv**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/9d43dcb2c5c38f443f84dfc126cd32720e1a1ad6) ([#11230](https://github.com/yt-dlp/yt-dlp/issues/11230)) by [bashonly](https://github.com/bashonly)
- **drtv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/f4338714241b11d9d43768ae71a25f5e952f677d) ([#11141](https://github.com/yt-dlp/yt-dlp/issues/11141)) by [444995](https://github.com/444995)
- **funk**: [Extend `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/8de431ec97a4b62b73df8f686b6e21e462775336) ([#11269](https://github.com/yt-dlp/yt-dlp/issues/11269)) by [seproDev](https://github.com/seproDev)
- **gem.cbc.ca**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/40054cb4a7ebbea30d335d444e6f58b298a3baa0) ([#11196](https://github.com/yt-dlp/yt-dlp/issues/11196)) by [DavidSkrundz](https://github.com/DavidSkrundz)
- **generic**: [Impersonate browser by default](https://github.com/yt-dlp/yt-dlp/commit/edfd095b1917701c5046bd51f9542897c17d41a7) ([#11206](https://github.com/yt-dlp/yt-dlp/issues/11206)) by [Grub4K](https://github.com/Grub4K)
- **imgur**
- [Fix thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/87408ccfd772ddf31a8323d8151c24f9577cbc9f) ([#11298](https://github.com/yt-dlp/yt-dlp/issues/11298)) by [seproDev](https://github.com/seproDev)
- [Support new URL format](https://github.com/yt-dlp/yt-dlp/commit/5af774d7a36c00bea618c7047c9326532cd3f616) ([#11075](https://github.com/yt-dlp/yt-dlp/issues/11075)) by [Deer-Spangle](https://github.com/Deer-Spangle)
- **patreon**: campaign: [Stricter URL matching](https://github.com/yt-dlp/yt-dlp/commit/babb70960595e2146f06f81affc29c7e713e34e2) ([#11235](https://github.com/yt-dlp/yt-dlp/issues/11235)) by [bashonly](https://github.com/bashonly)
- **reddit**: [Detect and raise when login is required](https://github.com/yt-dlp/yt-dlp/commit/cba7868502f04175fecf9ab3e363296aee7ebec2) ([#11202](https://github.com/yt-dlp/yt-dlp/issues/11202)) by [pzhlkj6612](https://github.com/pzhlkj6612)
- **substack**: [Resolve podcast file extensions](https://github.com/yt-dlp/yt-dlp/commit/3148c1822f66533998278f0a1cf842b9bea1526a) ([#11275](https://github.com/yt-dlp/yt-dlp/issues/11275)) by [bashonly](https://github.com/bashonly)
- **telecinco**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/0b7ec08816fb196cd41d392f8331b4eb8366c4f8) ([#11142](https://github.com/yt-dlp/yt-dlp/issues/11142)) by [bashonly](https://github.com/bashonly), [DarkZeros](https://github.com/DarkZeros)
- **tubitv**: [Strip extra whitespace from titles](https://github.com/yt-dlp/yt-dlp/commit/e68b4c19af122876561a41f2dd8093fae7b417c7) ([#10795](https://github.com/yt-dlp/yt-dlp/issues/10795)) by [allendema](https://github.com/allendema)
- **tver**: [Support series URLs](https://github.com/yt-dlp/yt-dlp/commit/ceaea731b6e314dbbdfb2e358d7677785ed0b4fc) ([#9507](https://github.com/yt-dlp/yt-dlp/issues/9507)) by [pzhlkj6612](https://github.com/pzhlkj6612), [vvto33](https://github.com/vvto33)
- **twitter**: spaces: [Allow extraction when not logged in](https://github.com/yt-dlp/yt-dlp/commit/679c68240a26481ea7c07cc0c014745631ea8481) ([#11289](https://github.com/yt-dlp/yt-dlp/issues/11289)) by [rubyevadestaxes](https://github.com/rubyevadestaxes)
- **weverse**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/5310fa87f6cb7f66bf42e2520878952fbf6b1652) ([#11215](https://github.com/yt-dlp/yt-dlp/issues/11215)) by [bashonly](https://github.com/bashonly)
- **youtube**
- [Fix `comment_count` extraction](https://github.com/yt-dlp/yt-dlp/commit/7af1ddaaf2a6a0a750373a9ab53c7770af4f9fe4) ([#11274](https://github.com/yt-dlp/yt-dlp/issues/11274)) by [bashonly](https://github.com/bashonly)
- [Remove broken `android_producer` client](https://github.com/yt-dlp/yt-dlp/commit/fed53d70bdb7d3e37ef63dd7fcf0ef74356167fd) ([#11297](https://github.com/yt-dlp/yt-dlp/issues/11297)) by [bashonly](https://github.com/bashonly)
- [Remove broken age-restriction workaround](https://github.com/yt-dlp/yt-dlp/commit/ec2f4bf0823a13043f98f5bd0bf6677837bf09dc) ([#11297](https://github.com/yt-dlp/yt-dlp/issues/11297)) by [bashonly](https://github.com/bashonly)
- [Support logging in with OAuth](https://github.com/yt-dlp/yt-dlp/commit/b8635c1d4779da195e71aa281f73aaad702c935e) ([#11001](https://github.com/yt-dlp/yt-dlp/issues/11001)) by [coletdjnz](https://github.com/coletdjnz)
#### Misc. changes
- **build**
- [Migrate `py2exe` builds to `win_exe`](https://github.com/yt-dlp/yt-dlp/commit/a886cf3e900f4a2ec00af705f883539269545609) ([#11256](https://github.com/yt-dlp/yt-dlp/issues/11256)) by [bashonly](https://github.com/bashonly)
- [Use `macos-13` image for macOS builds](https://github.com/yt-dlp/yt-dlp/commit/64d84d75ca8c19ec06558cc7c511f5f4f7a822bc) ([#11236](https://github.com/yt-dlp/yt-dlp/issues/11236)) by [bashonly](https://github.com/bashonly)
- `make_lazy_extractors`: [Force running without plugins](https://github.com/yt-dlp/yt-dlp/commit/1a830394a21a81a3e9918f9e175abc9fbb21f089) ([#11205](https://github.com/yt-dlp/yt-dlp/issues/11205)) by [Grub4K](https://github.com/Grub4K)
- **cleanup**: Miscellaneous: [67adeb7](https://github.com/yt-dlp/yt-dlp/commit/67adeb7bab00662ba55d473e405b301abb42fe61) by [bashonly](https://github.com/bashonly), [DTrombett](https://github.com/DTrombett), [grqz](https://github.com/grqz), [Grub4K](https://github.com/Grub4K), [KarboniteKream](https://github.com/KarboniteKream), [mikkovedru](https://github.com/mikkovedru), [seproDev](https://github.com/seproDev)
- **test**: [Allow running tests explicitly](https://github.com/yt-dlp/yt-dlp/commit/16eb28026a2ddf5608d0a628ef15949b8d3805a9) ([#11203](https://github.com/yt-dlp/yt-dlp/issues/11203)) by [Grub4K](https://github.com/Grub4K)
### 2024.10.07 ### 2024.10.07
#### Core changes #### Core changes

162
README.md
View file

@ -4,7 +4,7 @@
[![YT-DLP](https://raw.githubusercontent.com/yt-dlp/yt-dlp/master/.github/banner.svg)](#readme) [![YT-DLP](https://raw.githubusercontent.com/yt-dlp/yt-dlp/master/.github/banner.svg)](#readme)
[![Release version](https://img.shields.io/github/v/release/yt-dlp/yt-dlp?color=brightgreen&label=Download&style=for-the-badge)](#installation "Installation") [![Release version](https://img.shields.io/github/v/release/yt-dlp/yt-dlp?color=brightgreen&label=Download&style=for-the-badge)](#installation "Installation")
[![PyPi](https://img.shields.io/badge/-PyPi-blue.svg?logo=pypi&labelColor=555555&style=for-the-badge)](https://pypi.org/project/yt-dlp "PyPi") [![PyPI](https://img.shields.io/badge/-PyPI-blue.svg?logo=pypi&labelColor=555555&style=for-the-badge)](https://pypi.org/project/yt-dlp "PyPI")
[![Donate](https://img.shields.io/badge/_-Donate-red.svg?logo=githubsponsors&labelColor=555555&style=for-the-badge)](Collaborators.md#collaborators "Donate") [![Donate](https://img.shields.io/badge/_-Donate-red.svg?logo=githubsponsors&labelColor=555555&style=for-the-badge)](Collaborators.md#collaborators "Donate")
[![Matrix](https://img.shields.io/matrix/yt-dlp:matrix.org?color=brightgreen&labelColor=555555&label=&logo=element&style=for-the-badge)](https://matrix.to/#/#yt-dlp:matrix.org "Matrix") [![Matrix](https://img.shields.io/matrix/yt-dlp:matrix.org?color=brightgreen&labelColor=555555&label=&logo=element&style=for-the-badge)](https://matrix.to/#/#yt-dlp:matrix.org "Matrix")
[![Discord](https://img.shields.io/discord/807245652072857610?color=blue&labelColor=555555&label=&logo=discord&style=for-the-badge)](https://discord.gg/H5MNcFW63r "Discord") [![Discord](https://img.shields.io/discord/807245652072857610?color=blue&labelColor=555555&label=&logo=discord&style=for-the-badge)](https://discord.gg/H5MNcFW63r "Discord")
@ -81,7 +81,7 @@ yt-dlp is a feature-rich command-line audio/video downloader with support for [t
[![Windows](https://img.shields.io/badge/-Windows_x64-blue.svg?style=for-the-badge&logo=windows)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe) [![Windows](https://img.shields.io/badge/-Windows_x64-blue.svg?style=for-the-badge&logo=windows)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)
[![Unix](https://img.shields.io/badge/-Linux/BSD-red.svg?style=for-the-badge&logo=linux)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp) [![Unix](https://img.shields.io/badge/-Linux/BSD-red.svg?style=for-the-badge&logo=linux)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)
[![MacOS](https://img.shields.io/badge/-MacOS-lightblue.svg?style=for-the-badge&logo=apple)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos) [![MacOS](https://img.shields.io/badge/-MacOS-lightblue.svg?style=for-the-badge&logo=apple)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos)
[![PyPi](https://img.shields.io/badge/-PyPi-blue.svg?logo=pypi&labelColor=555555&style=for-the-badge)](https://pypi.org/project/yt-dlp) [![PyPI](https://img.shields.io/badge/-PyPI-blue.svg?logo=pypi&labelColor=555555&style=for-the-badge)](https://pypi.org/project/yt-dlp)
[![Source Tarball](https://img.shields.io/badge/-Source_tar-green.svg?style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz) [![Source Tarball](https://img.shields.io/badge/-Source_tar-green.svg?style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz)
[![Other variants](https://img.shields.io/badge/-Other-grey.svg?style=for-the-badge)](#release-files) [![Other variants](https://img.shields.io/badge/-Other-grey.svg?style=for-the-badge)](#release-files)
[![All versions](https://img.shields.io/badge/-All_Versions-lightgrey.svg?style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/releases) [![All versions](https://img.shields.io/badge/-All_Versions-lightgrey.svg?style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/releases)
@ -98,15 +98,14 @@ You can install yt-dlp using [the binaries](#release-files), [pip](https://pypi.
File|Description File|Description
:---|:--- :---|:---
[yt-dlp](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)|Platform-independent [zipimport](https://docs.python.org/3/library/zipimport.html) binary. Needs Python (recommended for **Linux/BSD**) [yt-dlp](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)|Platform-independent [zipimport](https://docs.python.org/3/library/zipimport.html) binary. Needs Python (recommended for **Linux/BSD**)
[yt-dlp.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)|Windows (Win7 SP1+) standalone x64 binary (recommended for **Windows**) [yt-dlp.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)|Windows (Win8+) standalone x64 binary (recommended for **Windows**)
[yt-dlp_macos](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos)|Universal MacOS (10.15+) standalone executable (recommended for **MacOS**) [yt-dlp_macos](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos)|Universal MacOS (10.15+) standalone executable (recommended for **MacOS**)
#### Alternatives #### Alternatives
File|Description File|Description
:---|:--- :---|:---
[yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows (Win7 SP1+) standalone x86 (32-bit) binary [yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows (Win8+) standalone x86 (32-bit) binary
[yt-dlp_min.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_min.exe)|Windows (Win7 SP1+) standalone x64 binary built with `py2exe`<br/> ([Not recommended](#standalone-py2exe-builds-windows))
[yt-dlp_linux](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux)|Linux standalone x64 binary [yt-dlp_linux](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux)|Linux standalone x64 binary
[yt-dlp_linux_armv7l](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_armv7l)|Linux standalone armv7l (32-bit) binary [yt-dlp_linux_armv7l](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_armv7l)|Linux standalone armv7l (32-bit) binary
[yt-dlp_linux_aarch64](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_aarch64)|Linux standalone aarch64 (64-bit) binary [yt-dlp_linux_aarch64](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_aarch64)|Linux standalone aarch64 (64-bit) binary
@ -173,11 +172,11 @@ python3 -m pip install -U --pre "yt-dlp[default]"
``` ```
## DEPENDENCIES ## DEPENDENCIES
Python versions 3.8+ (CPython and PyPy) are supported. Other versions and implementations may or may not work correctly. Python versions 3.9+ (CPython) and 3.10+ (PyPy) are supported. Other versions and implementations may or may not work correctly.
<!-- Python 3.5+ uses VC++14 and it is already embedded in the binary created <!-- Python 3.5+ uses VC++14 and it is already embedded in the binary created
<!x-- https://www.microsoft.com/en-us/download/details.aspx?id=26999 --x> <!x-- https://www.microsoft.com/en-us/download/details.aspx?id=26999 --x>
On windows, [Microsoft Visual C++ 2010 SP1 Redistributable Package (x86)](https://download.microsoft.com/download/1/6/5/165255E7-1014-4D0A-B094-B6A430A6BFFC/vcredist_x86.exe) is also necessary to run yt-dlp. You probably already have this, but if the executable throws an error due to missing `MSVCR100.dll` you need to install it manually. On Windows, [Microsoft Visual C++ 2010 SP1 Redistributable Package (x86)](https://download.microsoft.com/download/1/6/5/165255E7-1014-4D0A-B094-B6A430A6BFFC/vcredist_x86.exe) is also necessary to run yt-dlp. You probably already have this, but if the executable throws an error due to missing `MSVCR100.dll` you need to install it manually.
--> -->
While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly recommended While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly recommended
@ -254,24 +253,12 @@ On some systems, you may need to use `py` or `python` instead of `python3`.
**Important**: Running `pyinstaller` directly **instead of** using `python -m bundle.pyinstaller` is **not** officially supported. This may or may not work correctly. **Important**: Running `pyinstaller` directly **instead of** using `python -m bundle.pyinstaller` is **not** officially supported. This may or may not work correctly.
### Platform-independent Binary (UNIX) ### Platform-independent Binary (UNIX)
You will need the build tools `python` (3.8+), `zip`, `make` (GNU), `pandoc`\* and `pytest`\*. You will need the build tools `python` (3.9+), `zip`, `make` (GNU), `pandoc`\* and `pytest`\*.
After installing these, simply run `make`. After installing these, simply run `make`.
You can also run `make yt-dlp` instead to compile only the binary without updating any of the additional files. (The build tools marked with **\*** are not needed for this) You can also run `make yt-dlp` instead to compile only the binary without updating any of the additional files. (The build tools marked with **\*** are not needed for this)
### Standalone Py2Exe Builds (Windows)
While we provide the option to build with [py2exe](https://www.py2exe.org), it is recommended to build [using PyInstaller](#standalone-pyinstaller-builds) instead since the py2exe builds **cannot contain `pycryptodomex`/`certifi`/`requests` and need VC++14** on the target computer to run.
If you wish to build it anyway, install Python (if it is not already installed) and you can run the following commands:
```
py devscripts/install_deps.py --include py2exe
py devscripts/make_lazy_extractors.py
py -m bundle.py2exe
```
### Related scripts ### Related scripts
* **`devscripts/install_deps.py`** - Install dependencies for yt-dlp. * **`devscripts/install_deps.py`** - Install dependencies for yt-dlp.
@ -348,8 +335,16 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
containing directory ("-" for stdin). Can be containing directory ("-" for stdin). Can be
used multiple times and inside other used multiple times and inside other
configuration files configuration files
--flat-playlist Do not extract the videos of a playlist, --plugin-dirs PATH Path to an additional directory to search
only list them for plugins. This option can be used
multiple times to add multiple directories.
Note that this currently only works for
extractor plugins; postprocessor plugins can
only be loaded from the default plugin
directories
--flat-playlist Do not extract a playlist's URL result
entries; some entry metadata may be missing
and downloading may be bypassed
--no-flat-playlist Fully extract the videos of a playlist --no-flat-playlist Fully extract the videos of a playlist
(default) (default)
--live-from-start Download livestreams from the start. --live-from-start Download livestreams from the start.
@ -444,10 +439,10 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
E.g. "--date today-2weeks" downloads only E.g. "--date today-2weeks" downloads only
videos uploaded on the same day two weeks ago videos uploaded on the same day two weeks ago
--datebefore DATE Download only videos uploaded on or before --datebefore DATE Download only videos uploaded on or before
this date. The date formats accepted is the this date. The date formats accepted are the
same as --date same as --date
--dateafter DATE Download only videos uploaded on or after --dateafter DATE Download only videos uploaded on or after
this date. The date formats accepted is the this date. The date formats accepted are the
same as --date same as --date
--match-filters FILTER Generic video filter. Any "OUTPUT TEMPLATE" --match-filters FILTER Generic video filter. Any "OUTPUT TEMPLATE"
field can be compared with a number or a field can be compared with a number or a
@ -485,7 +480,8 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
--no-download-archive Do not use archive file (default) --no-download-archive Do not use archive file (default)
--max-downloads NUMBER Abort after downloading NUMBER files --max-downloads NUMBER Abort after downloading NUMBER files
--break-on-existing Stop the download process when encountering --break-on-existing Stop the download process when encountering
a file that is in the archive a file that is in the archive supplied with
the --download-archive option
--no-break-on-existing Do not stop the download process when --no-break-on-existing Do not stop the download process when
encountering a file that is in the archive encountering a file that is in the archive
(default) (default)
@ -732,16 +728,16 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
used. This option can be used multiple times used. This option can be used multiple times
--print-to-file [WHEN:]TEMPLATE FILE --print-to-file [WHEN:]TEMPLATE FILE
Append given template to the file. The Append given template to the file. The
values of WHEN and TEMPLATE are same as that values of WHEN and TEMPLATE are the same as
of --print. FILE uses the same syntax as the that of --print. FILE uses the same syntax
output template. This option can be used as the output template. This option can be
multiple times used multiple times
-j, --dump-json Quiet, but print JSON information for each -j, --dump-json Quiet, but print JSON information for each
video. Simulate unless --no-simulate is video. Simulate unless --no-simulate is
used. See "OUTPUT TEMPLATE" for a used. See "OUTPUT TEMPLATE" for a
description of available keys description of available keys
-J, --dump-single-json Quiet, but print JSON information for each -J, --dump-single-json Quiet, but print JSON information for each
url or infojson passed. Simulate unless URL or infojson passed. Simulate unless
--no-simulate is used. If the URL refers to --no-simulate is used. If the URL refers to
a playlist, the whole playlist information a playlist, the whole playlist information
is dumped in a single line is dumped in a single line
@ -816,9 +812,9 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
--no-audio-multistreams Only one audio stream is downloaded for each --no-audio-multistreams Only one audio stream is downloaded for each
output file (default) output file (default)
--prefer-free-formats Prefer video formats with free containers --prefer-free-formats Prefer video formats with free containers
over non-free ones of same quality. Use with over non-free ones of the same quality. Use
"-S ext" to strictly prefer free containers with "-S ext" to strictly prefer free
irrespective of quality containers irrespective of quality
--no-prefer-free-formats Don't give any special preference to free --no-prefer-free-formats Don't give any special preference to free
containers (default) containers (default)
--check-formats Make sure formats are selected only from --check-formats Make sure formats are selected only from
@ -843,15 +839,17 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
(default) (Alias: --no-write-automatic-subs) (default) (Alias: --no-write-automatic-subs)
--list-subs List available subtitles of each video. --list-subs List available subtitles of each video.
Simulate unless --no-simulate is used Simulate unless --no-simulate is used
--sub-format FORMAT Subtitle format; accepts formats preference, --sub-format FORMAT Subtitle format; accepts formats preference
e.g. "srt" or "ass/srt/best" separated by "/", e.g. "srt" or "ass/srt/best"
--sub-langs LANGS Languages of the subtitles to download (can --sub-langs LANGS Languages of the subtitles to download (can
be regex) or "all" separated by commas, e.g. be regex) or "all" separated by commas, e.g.
--sub-langs "en.*,ja". You can prefix the --sub-langs "en.*,ja" (where "en.*" is a
language code with a "-" to exclude it from regex pattern that matches "en" followed by
the requested languages, e.g. --sub-langs 0 or more of any character). You can prefix
all,-live_chat. Use --list-subs for a list the language code with a "-" to exclude it
of available language tags from the requested languages, e.g. --sub-
langs all,-live_chat. Use --list-subs for a
list of available language tags
## Authentication Options: ## Authentication Options:
-u, --username USERNAME Login with this account ID -u, --username USERNAME Login with this account ID
@ -899,9 +897,9 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
necessary (currently supported: avi, flv, necessary (currently supported: avi, flv,
gif, mkv, mov, mp4, webm, aac, aiff, alac, gif, mkv, mov, mp4, webm, aac, aiff, alac,
flac, m4a, mka, mp3, ogg, opus, vorbis, flac, m4a, mka, mp3, ogg, opus, vorbis,
wav). If target container does not support wav). If the target container does not
the video/audio codec, remuxing will fail. support the video/audio codec, remuxing will
You can specify multiple rules; e.g. fail. You can specify multiple rules; e.g.
"aac>m4a/mov>mp4/mkv" will remux aac to m4a, "aac>m4a/mov>mp4/mkv" will remux aac to m4a,
mov to mp4 and anything else to mkv mov to mp4 and anything else to mkv
--recode-video FORMAT Re-encode the video into another format if --recode-video FORMAT Re-encode the video into another format if
@ -969,29 +967,29 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
are the same as that of --use-postprocessor are the same as that of --use-postprocessor
(default: pre_process) (default: pre_process)
--xattrs Write metadata to the video file's xattrs --xattrs Write metadata to the video file's xattrs
(using dublin core and xdg standards) (using Dublin Core and XDG standards)
--concat-playlist POLICY Concatenate videos in a playlist. One of --concat-playlist POLICY Concatenate videos in a playlist. One of
"never", "always", or "multi_video" "never", "always", or "multi_video"
(default; only when the videos form a single (default; only when the videos form a single
show). All the video files must have same show). All the video files must have the
codecs and number of streams to be same codecs and number of streams to be
concatable. The "pl_video:" prefix can be concatenable. The "pl_video:" prefix can be
used with "--paths" and "--output" to set used with "--paths" and "--output" to set
the output filename for the concatenated the output filename for the concatenated
files. See "OUTPUT TEMPLATE" for details files. See "OUTPUT TEMPLATE" for details
--fixup POLICY Automatically correct known faults of the --fixup POLICY Automatically correct known faults of the
file. One of never (do nothing), warn (only file. One of never (do nothing), warn (only
emit a warning), detect_or_warn (the emit a warning), detect_or_warn (the
default; fix file if we can, warn default; fix the file if we can, warn
otherwise), force (try fixing even if file otherwise), force (try fixing even if the
already exists) file already exists)
--ffmpeg-location PATH Location of the ffmpeg binary; either the --ffmpeg-location PATH Location of the ffmpeg binary; either the
path to the binary or its containing directory path to the binary or its containing directory
--exec [WHEN:]CMD Execute a command, optionally prefixed with --exec [WHEN:]CMD Execute a command, optionally prefixed with
when to execute it, separated by a ":". when to execute it, separated by a ":".
Supported values of "WHEN" are the same as Supported values of "WHEN" are the same as
that of --use-postprocessor (default: that of --use-postprocessor (default:
after_move). Same syntax as the output after_move). The same syntax as the output
template can be used to pass any field as template can be used to pass any field as
arguments to the command. If no fields are arguments to the command. If no fields are
passed, %(filepath,_filename|)q is appended passed, %(filepath,_filename|)q is appended
@ -1029,7 +1027,7 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
--no-force-keyframes-at-cuts Do not force keyframes around the chapters --no-force-keyframes-at-cuts Do not force keyframes around the chapters
when cutting/splitting (default) when cutting/splitting (default)
--use-postprocessor NAME[:ARGS] --use-postprocessor NAME[:ARGS]
The (case sensitive) name of plugin The (case-sensitive) name of plugin
postprocessors to be enabled, and postprocessors to be enabled, and
(optionally) arguments to be passed to it, (optionally) arguments to be passed to it,
separated by a colon ":". ARGS are a separated by a colon ":". ARGS are a
@ -1042,8 +1040,8 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
--print/--output), "before_dl" (before each --print/--output), "before_dl" (before each
video download), "post_process" (after each video download), "post_process" (after each
video download; default), "after_move" video download; default), "after_move"
(after moving video file to its final (after moving the video file to its final
locations), "after_video" (after downloading location), "after_video" (after downloading
and processing all formats of a video), or and processing all formats of a video), or
"playlist" (at end of playlist). This option "playlist" (at end of playlist). This option
can be used multiple times to add different can be used multiple times to add different
@ -1061,7 +1059,7 @@ Make chapter entries for, or remove various segments (sponsor,
music_offtopic, poi_highlight, chapter, all music_offtopic, poi_highlight, chapter, all
and default (=all). You can prefix the and default (=all). You can prefix the
category with a "-" to exclude it. See [1] category with a "-" to exclude it. See [1]
for description of the categories. E.g. for descriptions of the categories. E.g.
--sponsorblock-mark all,-preview --sponsorblock-mark all,-preview
[1] https://wiki.sponsor.ajay.app/w/Segment_Categories [1] https://wiki.sponsor.ajay.app/w/Segment_Categories
--sponsorblock-remove CATS SponsorBlock categories to be removed from --sponsorblock-remove CATS SponsorBlock categories to be removed from
@ -1093,7 +1091,7 @@ Make chapter entries for, or remove various segments (sponsor,
(Alias: --no-allow-dynamic-mpd) (Alias: --no-allow-dynamic-mpd)
--hls-split-discontinuity Split HLS playlists to different formats at --hls-split-discontinuity Split HLS playlists to different formats at
discontinuities such as ad breaks discontinuities such as ad breaks
--no-hls-split-discontinuity Do not split HLS playlists to different --no-hls-split-discontinuity Do not split HLS playlists into different
formats at discontinuities such as ad breaks formats at discontinuities such as ad breaks
(default) (default)
--extractor-args IE_KEY:ARGS Pass ARGS arguments to the IE_KEY extractor. --extractor-args IE_KEY:ARGS Pass ARGS arguments to the IE_KEY extractor.
@ -1103,7 +1101,7 @@ Make chapter entries for, or remove various segments (sponsor,
# CONFIGURATION # CONFIGURATION
You can configure yt-dlp by placing any supported command line option to a configuration file. The configuration is loaded from the following locations: You can configure yt-dlp by placing any supported command line option in a configuration file. The configuration is loaded from the following locations:
1. **Main Configuration**: 1. **Main Configuration**:
* The file given to `--config-location` * The file given to `--config-location`
@ -1148,7 +1146,7 @@ E.g. with the following configuration file, yt-dlp will always extract the audio
-o ~/YouTube/%(title)s.%(ext)s -o ~/YouTube/%(title)s.%(ext)s
``` ```
**Note**: Options in configuration file are just the same options aka switches used in regular command line calls; thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`. They must also be quoted when necessary, as if it were a UNIX shell. **Note**: Options in a configuration file are just the same options aka switches used in regular command line calls; thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`. They must also be quoted when necessary, as if it were a UNIX shell.
You can use `--ignore-config` if you want to disable all configuration files for a particular yt-dlp run. If `--ignore-config` is found inside any configuration file, no further configuration will be loaded. For example, having the option in the portable configuration file prevents loading of home, user, and system configurations. Additionally, (for backward compatibility) if `--ignore-config` is found inside the system configuration file, the user configuration is not loaded. You can use `--ignore-config` if you want to disable all configuration files for a particular yt-dlp run. If `--ignore-config` is found inside any configuration file, no further configuration will be loaded. For example, having the option in the portable configuration file prevents loading of home, user, and system configurations. Additionally, (for backward compatibility) if `--ignore-config` is found inside the system configuration file, the user configuration is not loaded.
@ -1182,13 +1180,13 @@ As an alternative to using the `.netrc` file, which has the disadvantage of keep
E.g. To use an encrypted `.netrc` file stored as `.authinfo.gpg` E.g. To use an encrypted `.netrc` file stored as `.authinfo.gpg`
``` ```
yt-dlp --netrc-cmd 'gpg --decrypt ~/.authinfo.gpg' https://www.youtube.com/watch?v=BaW_jenozKc yt-dlp --netrc-cmd 'gpg --decrypt ~/.authinfo.gpg' 'https://www.youtube.com/watch?v=BaW_jenozKc'
``` ```
### Notes about environment variables ### Notes about environment variables
* Environment variables are normally specified as `${VARIABLE}`/`$VARIABLE` on UNIX and `%VARIABLE%` on Windows; but is always shown as `${VARIABLE}` in this documentation * Environment variables are normally specified as `${VARIABLE}`/`$VARIABLE` on UNIX and `%VARIABLE%` on Windows; but is always shown as `${VARIABLE}` in this documentation
* yt-dlp also allow using UNIX-style variables on Windows for path-like options; e.g. `--output`, `--config-location` * yt-dlp also allows using UNIX-style variables on Windows for path-like options; e.g. `--output`, `--config-location`
* If unset, `${XDG_CONFIG_HOME}` defaults to `~/.config` and `${XDG_CACHE_HOME}` to `~/.cache` * If unset, `${XDG_CONFIG_HOME}` defaults to `~/.config` and `${XDG_CACHE_HOME}` to `~/.cache`
* On Windows, `~` points to `${HOME}` if present; or, `${USERPROFILE}` or `${HOMEDRIVE}${HOMEPATH}` otherwise * On Windows, `~` points to `${HOME}` if present; or, `${USERPROFILE}` or `${HOMEDRIVE}${HOMEPATH}` otherwise
* On Windows, `${USERPROFILE}` generally points to `C:\Users\<user name>` and `${APPDATA}` to `${USERPROFILE}\AppData\Roaming` * On Windows, `${USERPROFILE}` generally points to `C:\Users\<user name>` and `${APPDATA}` to `${USERPROFILE}\AppData\Roaming`
@ -1269,7 +1267,7 @@ The available fields are:
- `like_count` (numeric): Number of positive ratings of the video - `like_count` (numeric): Number of positive ratings of the video
- `dislike_count` (numeric): Number of negative ratings of the video - `dislike_count` (numeric): Number of negative ratings of the video
- `repost_count` (numeric): Number of reposts of the video - `repost_count` (numeric): Number of reposts of the video
- `average_rating` (numeric): Average rating give by users, the scale used depends on the webpage - `average_rating` (numeric): Average rating given by users, the scale used depends on the webpage
- `comment_count` (numeric): Number of comments on the video (For some extractors, comments are only downloaded at the end, and so this field cannot be used) - `comment_count` (numeric): Number of comments on the video (For some extractors, comments are only downloaded at the end, and so this field cannot be used)
- `age_limit` (numeric): Age restriction for the video (years) - `age_limit` (numeric): Age restriction for the video (years)
- `live_status` (string): One of "not_live", "is_live", "is_upcoming", "was_live", "post_live" (was live, but VOD is not yet processed) - `live_status` (string): One of "not_live", "is_live", "is_upcoming", "was_live", "post_live" (was live, but VOD is not yet processed)
@ -1299,7 +1297,7 @@ The available fields are:
- `webpage_url` (string): A URL to the video webpage which, if given to yt-dlp, should yield the same result again - `webpage_url` (string): A URL to the video webpage which, if given to yt-dlp, should yield the same result again
- `webpage_url_basename` (string): The basename of the webpage URL - `webpage_url_basename` (string): The basename of the webpage URL
- `webpage_url_domain` (string): The domain of the webpage URL - `webpage_url_domain` (string): The domain of the webpage URL
- `original_url` (string): The URL given by the user (or same as `webpage_url` for playlist entries) - `original_url` (string): The URL given by the user (or the same as `webpage_url` for playlist entries)
- `categories` (list): List of categories the video belongs to - `categories` (list): List of categories the video belongs to
- `tags` (list): List of tags assigned to the video - `tags` (list): List of tags assigned to the video
- `cast` (list): List of cast members - `cast` (list): List of cast members
@ -1376,7 +1374,7 @@ Each aforementioned sequence when referenced in an output template will be repla
**Tip**: Look at the `-j` output to identify which fields are available for the particular URL **Tip**: Look at the `-j` output to identify which fields are available for the particular URL
For numeric sequences you can use [numeric related formatting](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting); e.g. `%(view_count)05d` will result in a string with view count padded with zeros up to 5 characters, like in `00042`. For numeric sequences, you can use [numeric related formatting](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting); e.g. `%(view_count)05d` will result in a string with view count padded with zeros up to 5 characters, like in `00042`.
Output templates can also contain arbitrary hierarchical path, e.g. `-o "%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s"` which will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you. Output templates can also contain arbitrary hierarchical path, e.g. `-o "%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s"` which will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you.
@ -1418,7 +1416,7 @@ $ yt-dlp -P "C:/MyVideos" -o "%(series)s/%(season_number)s - %(season)s/%(episod
# Download video as "C:\MyVideos\uploader\title.ext", subtitles as "C:\MyVideos\subs\uploader\title.ext" # Download video as "C:\MyVideos\uploader\title.ext", subtitles as "C:\MyVideos\subs\uploader\title.ext"
# and put all temporary files in "C:\MyVideos\tmp" # and put all temporary files in "C:\MyVideos\tmp"
$ yt-dlp -P "C:/MyVideos" -P "temp:tmp" -P "subtitle:subs" -o "%(uploader)s/%(title)s.%(ext)s" BaW_jenoz --write-subs $ yt-dlp -P "C:/MyVideos" -P "temp:tmp" -P "subtitle:subs" -o "%(uploader)s/%(title)s.%(ext)s" BaW_jenozKc --write-subs
# Download video as "C:\MyVideos\uploader\title.ext" and subtitles as "C:\MyVideos\uploader\subs\title.ext" # Download video as "C:\MyVideos\uploader\title.ext" and subtitles as "C:\MyVideos\uploader\subs\title.ext"
$ yt-dlp -P "C:/MyVideos" -o "%(uploader)s/%(title)s.%(ext)s" -o "subtitle:%(uploader)s/subs/%(title)s.%(ext)s" BaW_jenozKc --write-subs $ yt-dlp -P "C:/MyVideos" -o "%(uploader)s/%(title)s.%(ext)s" -o "subtitle:%(uploader)s/subs/%(title)s.%(ext)s" BaW_jenozKc --write-subs
@ -1557,9 +1555,9 @@ The available fields are:
All fields, unless specified otherwise, are sorted in descending order. To reverse this, prefix the field with a `+`. E.g. `+res` prefers format with the smallest resolution. Additionally, you can suffix a preferred value for the fields, separated by a `:`. E.g. `res:720` prefers larger videos, but no larger than 720p and the smallest video if there are no videos less than 720p. For `codec` and `ext`, you can provide two preferred values, the first for video and the second for audio. E.g. `+codec:avc:m4a` (equivalent to `+vcodec:avc,+acodec:m4a`) sets the video codec preference to `h264` > `h265` > `vp9` > `vp9.2` > `av01` > `vp8` > `h263` > `theora` and audio codec preference to `mp4a` > `aac` > `vorbis` > `opus` > `mp3` > `ac3` > `dts`. You can also make the sorting prefer the nearest values to the provided by using `~` as the delimiter. E.g. `filesize~1G` prefers the format with filesize closest to 1 GiB. All fields, unless specified otherwise, are sorted in descending order. To reverse this, prefix the field with a `+`. E.g. `+res` prefers format with the smallest resolution. Additionally, you can suffix a preferred value for the fields, separated by a `:`. E.g. `res:720` prefers larger videos, but no larger than 720p and the smallest video if there are no videos less than 720p. For `codec` and `ext`, you can provide two preferred values, the first for video and the second for audio. E.g. `+codec:avc:m4a` (equivalent to `+vcodec:avc,+acodec:m4a`) sets the video codec preference to `h264` > `h265` > `vp9` > `vp9.2` > `av01` > `vp8` > `h263` > `theora` and audio codec preference to `mp4a` > `aac` > `vorbis` > `opus` > `mp3` > `ac3` > `dts`. You can also make the sorting prefer the nearest values to the provided by using `~` as the delimiter. E.g. `filesize~1G` prefers the format with filesize closest to 1 GiB.
The fields `hasvid` and `ie_pref` are always given highest priority in sorting, irrespective of the user-defined order. This behavior can be changed by using `--format-sort-force`. Apart from these, the default order used is: `lang,quality,res,fps,hdr:12,vcodec:vp9.2,channels,acodec,size,br,asr,proto,ext,hasaud,source,id`. The extractors may override this default order, but they cannot override the user-provided order. The fields `hasvid` and `ie_pref` are always given highest priority in sorting, irrespective of the user-defined order. This behavior can be changed by using `--format-sort-force`. Apart from these, the default order used is: `lang,quality,res,fps,hdr:12,vcodec,channels,acodec,size,br,asr,proto,ext,hasaud,source,id`. The extractors may override this default order, but they cannot override the user-provided order.
Note that the default has `vcodec:vp9.2`; i.e. `av1` is not preferred. Similarly, the default for hdr is `hdr:12`; i.e. Dolby Vision is not preferred. These choices are made since DV and AV1 formats are not yet fully compatible with most devices. This may be changed in the future as more devices become capable of smoothly playing back these formats. Note that the default for hdr is `hdr:12`; i.e. Dolby Vision is not preferred. This choice was made since DV formats are not yet fully compatible with most devices. This may be changed in the future.
If your format selector is `worst`, the last item is selected after sorting. This means it will select the format that is worst in all respects. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-f best -S +size,+br,+res,+fps`. If your format selector is `worst`, the last item is selected after sorting. This means it will select the format that is worst in all respects. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-f best -S +size,+br,+res,+fps`.
@ -1636,11 +1634,11 @@ $ yt-dlp -S "res:480"
# or the worst video (that also has audio) if there is no video under 50 MB # or the worst video (that also has audio) if there is no video under 50 MB
$ yt-dlp -f "b[filesize<50M] / w" $ yt-dlp -f "b[filesize<50M] / w"
# Download largest video (that also has audio) but no bigger than 50 MB, # Download the largest video (that also has audio) but no bigger than 50 MB,
# or the smallest video (that also has audio) if there is no video under 50 MB # or the smallest video (that also has audio) if there is no video under 50 MB
$ yt-dlp -f "b" -S "filesize:50M" $ yt-dlp -f "b" -S "filesize:50M"
# Download best video (that also has audio) that is closest in size to 50 MB # Download the best video (that also has audio) that is closest in size to 50 MB
$ yt-dlp -f "b" -S "filesize~50M" $ yt-dlp -f "b" -S "filesize~50M"
@ -1696,7 +1694,7 @@ The metadata obtained by the extractors can be modified by using `--parse-metada
The general syntax of `--parse-metadata FROM:TO` is to give the name of a field or an [output template](#output-template) to extract data from, and the format to interpret it as, separated by a colon `:`. Either a [Python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax) with named capture groups, a single field name, or a similar syntax to the [output template](#output-template) (only `%(field)s` formatting is supported) can be used for `TO`. The option can be used multiple times to parse and modify various fields. The general syntax of `--parse-metadata FROM:TO` is to give the name of a field or an [output template](#output-template) to extract data from, and the format to interpret it as, separated by a colon `:`. Either a [Python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax) with named capture groups, a single field name, or a similar syntax to the [output template](#output-template) (only `%(field)s` formatting is supported) can be used for `TO`. The option can be used multiple times to parse and modify various fields.
Note that these options preserve their relative order, allowing replacements to be made in parsed fields and viceversa. Also, any field thus created can be used in the [output template](#output-template) and will also affect the media file's metadata added when using `--embed-metadata`. Note that these options preserve their relative order, allowing replacements to be made in parsed fields and vice versa. Also, any field thus created can be used in the [output template](#output-template) and will also affect the media file's metadata added when using `--embed-metadata`.
This option also has a few special uses: This option also has a few special uses:
@ -1771,7 +1769,7 @@ The following extractors use this feature:
#### youtube #### youtube
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes * `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively * `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music` and `_creator` (e.g. `ios_creator`); and `mediaconnect`, `mweb`, `android_producer`, `android_testsuite`, `android_vr`, `web_safari`, `web_embedded`, `tv` and `tv_embedded` with no variants. By default, `ios,mweb` is used, and `tv_embedded`, `web_creator` and `mediaconnect` are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. Most `android` clients will be given lowest priority since their formats are broken. You can use `all` to use all the clients, and `default` for the default clients. You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=all,-web` * `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music` and `_creator` (e.g. `ios_creator`); and `mweb`, `mediaconnect`, `android_vr`, `web_safari`, `web_embedded`, `tv` and `tv_embedded` with no variants. By default, `ios,mweb` is used, and `web_creator` is added as needed for age-gated videos when account age verification is required. Similarly, the `_music` variants are added for `music.youtube.com` URLs. Some clients, such as `web` and `android`, require a `po_token` for their formats to be downloadable. Some clients, such as the `_creator` variants, will only work with authentication. You can use `all` to use all the clients, and `default` for the default clients. You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=all,-web`
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp. * `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
@ -1795,7 +1793,7 @@ The following extractors use this feature:
* `key_query`: Passthrough the master m3u8 URL query to its HLS AES-128 decryption key URI if no value is provided, or else apply the query string given as `key_query=VALUE`. Note that this will have no effect if the key URI is provided via the `hls_key` extractor-arg. Does not apply to ffmpeg * `key_query`: Passthrough the master m3u8 URL query to its HLS AES-128 decryption key URI if no value is provided, or else apply the query string given as `key_query=VALUE`. Note that this will have no effect if the key URI is provided via the `hls_key` extractor-arg. Does not apply to ffmpeg
* `hls_key`: An HLS AES-128 key URI *or* key (as hex), and optionally the IV (as hex), in the form of `(URI|KEY)[,IV]`; e.g. `generic:hls_key=ABCDEF1234567980,0xFEDCBA0987654321`. Passing any of these values will force usage of the native HLS downloader and override the corresponding values found in the m3u8 playlist * `hls_key`: An HLS AES-128 key URI *or* key (as hex), and optionally the IV (as hex), in the form of `(URI|KEY)[,IV]`; e.g. `generic:hls_key=ABCDEF1234567980,0xFEDCBA0987654321`. Passing any of these values will force usage of the native HLS downloader and override the corresponding values found in the m3u8 playlist
* `is_live`: Bypass live HLS detection and manually set `live_status` - a value of `false` will set `not_live`, any other value (or no value) will set `is_live` * `is_live`: Bypass live HLS detection and manually set `live_status` - a value of `false` will set `not_live`, any other value (or no value) will set `is_live`
* `impersonate`: Target(s) to try and impersonate with the initial webpage request; e.g. `safari,chrome-110`. By default any available target will be used. Use `false` to disable impersonation * `impersonate`: Target(s) to try and impersonate with the initial webpage request; e.g. `generic:impersonate=safari,chrome-110`. Use `generic:impersonate` to impersonate any available target, and use `generic:impersonate=false` to disable impersonation (default)
#### funimation #### funimation
* `language`: Audio languages to extract, e.g. `funimation:language=english,japanese` * `language`: Audio languages to extract, e.g. `funimation:language=english,japanese`
@ -1869,8 +1867,8 @@ The following extractors use this feature:
#### bilibili #### bilibili
* `prefer_multi_flv`: Prefer extracting flv formats over mp4 for older videos that still provide legacy formats * `prefer_multi_flv`: Prefer extracting flv formats over mp4 for older videos that still provide legacy formats
#### digitalconcerthall #### sonylivseries
* `prefer_combined_hls`: Prefer extracting combined/pre-merged video and audio HLS formats. This will exclude 4K/HEVC video and lossless/FLAC audio formats, which are only available as split video/audio HLS formats * `sort_order`: Episode sort order for series extraction - one of `asc` (ascending, oldest first) or `desc` (descending, newest first). Default is `asc`
**Note**: These options may be changed/removed in the future without concern for backward compatibility **Note**: These options may be changed/removed in the future without concern for backward compatibility
@ -1926,7 +1924,7 @@ Plugins can be installed using various methods and locations.
* Plugin packages can be installed and managed using `pip`. See [yt-dlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) for an example. * Plugin packages can be installed and managed using `pip`. See [yt-dlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) for an example.
* Note: plugin files between plugin packages installed with pip must have unique filenames. * Note: plugin files between plugin packages installed with pip must have unique filenames.
* Any path in `PYTHONPATH` is searched in for the `yt_dlp_plugins` namespace folder. * Any path in `PYTHONPATH` is searched in for the `yt_dlp_plugins` namespace folder.
* Note: This does not apply for Pyinstaller/py2exe builds. * Note: This does not apply for Pyinstaller builds.
`.zip`, `.egg` and `.whl` archives containing a `yt_dlp_plugins` namespace folder in their root are also supported as plugin packages. `.zip`, `.egg` and `.whl` archives containing a `yt_dlp_plugins` namespace folder in their root are also supported as plugin packages.
@ -2160,9 +2158,9 @@ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
* **YouTube improvements**: * **YouTube improvements**:
* Supports Clips, Stories (`ytstories:<channel UCID>`), Search (including filters)**\***, YouTube Music Search, Channel-specific search, Search prefixes (`ytsearch:`, `ytsearchdate:`)**\***, Mixes, and Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`, `:ytnotif`) * Supports Clips, Stories (`ytstories:<channel UCID>`), Search (including filters)**\***, YouTube Music Search, Channel-specific search, Search prefixes (`ytsearch:`, `ytsearchdate:`)**\***, Mixes, and Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`, `:ytnotif`)
* Fix for [n-sig based throttling](https://github.com/ytdl-org/youtube-dl/issues/29326) **\*** * Fix for [n-sig based throttling](https://github.com/ytdl-org/youtube-dl/issues/29326) **\***
* Supports some (but not all) age-gated content without cookies
* Download livestreams from the start using `--live-from-start` (*experimental*) * Download livestreams from the start using `--live-from-start` (*experimental*)
* Channel URLs download all uploads of the channel, including shorts and live * Channel URLs download all uploads of the channel, including shorts and live
* Support for [logging in with OAuth](https://github.com/yt-dlp/yt-dlp/wiki/Extractors#logging-in-with-oauth)
* **Cookies from browser**: Cookies can be automatically extracted from all major web browsers using `--cookies-from-browser BROWSER[+KEYRING][:PROFILE][::CONTAINER]` * **Cookies from browser**: Cookies can be automatically extracted from all major web browsers using `--cookies-from-browser BROWSER[+KEYRING][:PROFILE][::CONTAINER]`
@ -2204,12 +2202,12 @@ Features marked with a **\*** have been back-ported to youtube-dl
Some of yt-dlp's default options are different from that of youtube-dl and youtube-dlc: Some of yt-dlp's default options are different from that of youtube-dl and youtube-dlc:
* yt-dlp supports only [Python 3.8+](## "Windows 7"), and *may* remove support for more versions as they [become EOL](https://devguide.python.org/versions/#python-release-cycle); while [youtube-dl still supports Python 2.6+ and 3.2+](https://github.com/ytdl-org/youtube-dl/issues/30568#issue-1118238743) * yt-dlp supports only [Python 3.9+](## "Windows 8"), and will remove support for more versions as they [become EOL](https://devguide.python.org/versions/#python-release-cycle); while [youtube-dl still supports Python 2.6+ and 3.2+](https://github.com/ytdl-org/youtube-dl/issues/30568#issue-1118238743)
* The options `--auto-number` (`-A`), `--title` (`-t`) and `--literal` (`-l`), no longer work. See [removed options](#Removed) for details * The options `--auto-number` (`-A`), `--title` (`-t`) and `--literal` (`-l`), no longer work. See [removed options](#Removed) for details
* `avconv` is not supported as an alternative to `ffmpeg` * `avconv` is not supported as an alternative to `ffmpeg`
* yt-dlp stores config files in slightly different locations to youtube-dl. See [CONFIGURATION](#configuration) for a list of correct locations * yt-dlp stores config files in slightly different locations to youtube-dl. See [CONFIGURATION](#configuration) for a list of correct locations
* The default [output template](#output-template) is `%(title)s [%(id)s].%(ext)s`. There is no real reason for this change. This was changed before yt-dlp was ever made public and now there are no plans to change it back to `%(title)s-%(id)s.%(ext)s`. Instead, you may use `--compat-options filename` * The default [output template](#output-template) is `%(title)s [%(id)s].%(ext)s`. There is no real reason for this change. This was changed before yt-dlp was ever made public and now there are no plans to change it back to `%(title)s-%(id)s.%(ext)s`. Instead, you may use `--compat-options filename`
* The default [format sorting](#sorting-formats) is different from youtube-dl and prefers higher resolution and better codecs rather than higher bitrates. You can use the `--format-sort` option to change this to any order you prefer, or use `--compat-options format-sort` to use youtube-dl's sorting order * The default [format sorting](#sorting-formats) is different from youtube-dl and prefers higher resolution and better codecs rather than higher bitrates. You can use the `--format-sort` option to change this to any order you prefer, or use `--compat-options format-sort` to use youtube-dl's sorting order. Older versions of yt-dlp preferred VP9 due to its broader compatibility; you can use `--compat-options prefer-vp9-sort` to revert to that format sorting preference. These two compat options cannot be used together
* The default format selector is `bv*+ba/b`. This means that if a combined video + audio format that is better than the best video-only format is found, the former will be preferred. Use `-f bv+ba/b` or `--compat-options format-spec` to revert this * The default format selector is `bv*+ba/b`. This means that if a combined video + audio format that is better than the best video-only format is found, the former will be preferred. Use `-f bv+ba/b` or `--compat-options format-spec` to revert this
* Unlike youtube-dlc, yt-dlp does not allow merging multiple audio/video streams into one file by default (since this conflicts with the use of `-f bv*+ba`). If needed, this feature must be enabled using `--audio-multistreams` and `--video-multistreams`. You can also use `--compat-options multistreams` to enable both * Unlike youtube-dlc, yt-dlp does not allow merging multiple audio/video streams into one file by default (since this conflicts with the use of `-f bv*+ba`). If needed, this feature must be enabled using `--audio-multistreams` and `--video-multistreams`. You can also use `--compat-options multistreams` to enable both
* `--no-abort-on-error` is enabled by default. Use `--abort-on-error` or `--compat-options abort-on-error` to abort on errors instead * `--no-abort-on-error` is enabled by default. Use `--abort-on-error` or `--compat-options abort-on-error` to abort on errors instead
@ -2238,11 +2236,11 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu
For ease of use, a few more compat options are available: For ease of use, a few more compat options are available:
* `--compat-options all`: Use all compat options (**Do NOT use this!**) * `--compat-options all`: Use all compat options (**Do NOT use this!**)
* `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams,-playlist-match-filter,-manifest-filesize-approx,-allow-unsafe-ext` * `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams,-playlist-match-filter,-manifest-filesize-approx,-allow-unsafe-ext,-prefer-vp9-sort`
* `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect,-playlist-match-filter,-manifest-filesize-approx,-allow-unsafe-ext` * `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect,-playlist-match-filter,-manifest-filesize-approx,-allow-unsafe-ext,-prefer-vp9-sort`
* `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date` * `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date`
* `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler,manifest-filesize-approx` * `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler,manifest-filesize-approx`
* `--compat-options 2023`: Currently does nothing. Use this to enable all future compat options * `--compat-options 2023`: Same as `--compat-options prefer-vp9-sort`. Use this to enable all future compat options
The following compat options restore vulnerable behavior from before security patches: The following compat options restore vulnerable behavior from before security patches:
@ -2280,8 +2278,8 @@ While these options are redundant, they are still expected to be used due to the
--min-views COUNT --match-filters "view_count >=? COUNT" --min-views COUNT --match-filters "view_count >=? COUNT"
--max-views COUNT --match-filters "view_count <=? COUNT" --max-views COUNT --match-filters "view_count <=? COUNT"
--break-on-reject Use --break-match-filters --break-on-reject Use --break-match-filters
--user-agent UA --add-header "User-Agent:UA" --user-agent UA --add-headers "User-Agent:UA"
--referer URL --add-header "Referer:URL" --referer URL --add-headers "Referer:URL"
--playlist-start NUMBER -I NUMBER: --playlist-start NUMBER -I NUMBER:
--playlist-end NUMBER -I :NUMBER --playlist-end NUMBER -I :NUMBER
--playlist-reverse -I ::-1 --playlist-reverse -I ::-1

View file

@ -1,59 +0,0 @@
#!/usr/bin/env python3
# Allow execution from anywhere
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import warnings
from py2exe import freeze
from devscripts.utils import read_version
VERSION = read_version()
def main():
warnings.warn(
'py2exe builds do not support pycryptodomex and needs VC++14 to run. '
'It is recommended to run "pyinst.py" to build using pyinstaller instead')
freeze(
console=[{
'script': './yt_dlp/__main__.py',
'dest_base': 'yt-dlp',
'icon_resources': [(1, 'devscripts/logo.ico')],
}],
version_info={
'version': VERSION,
'description': 'A feature-rich command-line audio/video downloader',
'comments': 'Official repository: <https://github.com/yt-dlp/yt-dlp>',
'product_name': 'yt-dlp',
'product_version': VERSION,
},
options={
'bundle_files': 0,
'compressed': 1,
'optimize': 2,
'dist_dir': './dist',
'excludes': [
# py2exe cannot import Crypto
'Crypto',
'Cryptodome',
# requests >=2.32.0 breaks py2exe builds due to certifi dependency
'requests',
'urllib3',
],
'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'],
# Modules that are only imported dynamically must be added here
'includes': ['yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated',
'yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated'],
},
zipfile=None,
)
if __name__ == '__main__':
main()

View file

@ -196,5 +196,48 @@
"when": "b31b81d85f00601710d4fac590c3e4efb4133283", "when": "b31b81d85f00601710d4fac590c3e4efb4133283",
"short": "[ci] Rerun failed tests (#11143)", "short": "[ci] Rerun failed tests (#11143)",
"authors": ["Grub4K"] "authors": ["Grub4K"]
},
{
"action": "add",
"when": "a886cf3e900f4a2ec00af705f883539269545609",
"short": "[priority] **py2exe is no longer supported**\nThis release's `yt-dlp_min.exe` will be the last, and it's actually a PyInstaller-bundled executable so that yt-dlp users updating their py2exe build with `-U` will be automatically migrated. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10087)"
},
{
"action": "add",
"when": "a886cf3e900f4a2ec00af705f883539269545609",
"short": "[priority] **Following this release, yt-dlp's Python dependencies *must* be installed using the `default` group**\nIf you're installing yt-dlp with pip/pipx or requiring yt-dlp in your own Python project, you'll need to specify `yt-dlp[default]` if you want to also install yt-dlp's optional dependencies (which were previously included by default). [Read more](https://github.com/yt-dlp/yt-dlp/pull/11255)"
},
{
"action": "add",
"when": "87884f15580910e4e0fe0e1db73508debc657471",
"short": "[priority] **Beginning with this release, yt-dlp's Python dependencies *must* be installed using the `default` group**\nIf you're installing yt-dlp with pip/pipx or requiring yt-dlp in your own Python project, you'll need to specify `yt-dlp[default]` if you want to also install yt-dlp's optional dependencies (which were previously included by default). [Read more](https://github.com/yt-dlp/yt-dlp/pull/11255)"
},
{
"action": "add",
"when": "d784464399b600ba9516bbcec6286f11d68974dd",
"short": "[priority] **The minimum *required* Python version has been raised to 3.9**\nPython 3.8 reached its end-of-life on 2024.10.07, and yt-dlp has now removed support for it. As an unfortunate side effect, the official `yt-dlp.exe` and `yt-dlp_x86.exe` binaries are no longer supported on Windows 7. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10086)"
},
{
"action": "change",
"when": "914af9a0cf51c9a3f74aa88d952bee8334c67511",
"short": "Expand paths in `--plugin-dirs` (#11334)",
"authors": ["bashonly"]
},
{
"action": "change",
"when": "c29f5a7fae93a08f3cfbb6127b2faa75145b06a0",
"short": "[ie/generic] Do not impersonate by default (#11336)",
"authors": ["bashonly"]
},
{
"action": "change",
"when": "57212a5f97ce367590aaa5c3e9a135eead8f81f7",
"short": "[ie/vimeo] Fix API retries (#11351)",
"authors": ["bashonly"]
},
{
"action": "add",
"when": "52c0ffe40ad6e8404d93296f575007b05b04c686",
"short": "[priority] **Login with OAuth is no longer supported for YouTube**\nDue to a change made by the site, yt-dlp is longer able to support OAuth login for YouTube. [Read more](https://github.com/yt-dlp/yt-dlp/issues/11462#issuecomment-2471703090)"
} }
] ]

View file

@ -11,13 +11,12 @@ import codecs
import subprocess import subprocess
from yt_dlp.aes import aes_encrypt, key_expansion from yt_dlp.aes import aes_encrypt, key_expansion
from yt_dlp.utils import intlist_to_bytes
secret_msg = b'Secret message goes here' secret_msg = b'Secret message goes here'
def hex_str(int_list): def hex_str(int_list):
return codecs.encode(intlist_to_bytes(int_list), 'hex') return codecs.encode(bytes(int_list), 'hex')
def openssl_encode(algo, key, iv): def openssl_encode(algo, key, iv):

View file

@ -71,14 +71,13 @@ class CommitGroup(enum.Enum):
def get(cls, value: str) -> tuple[CommitGroup | None, str | None]: def get(cls, value: str) -> tuple[CommitGroup | None, str | None]:
group, _, subgroup = (group.strip().lower() for group in value.partition('/')) group, _, subgroup = (group.strip().lower() for group in value.partition('/'))
result = cls.group_lookup().get(group) if result := cls.group_lookup().get(group):
if not result: return result, subgroup or None
if subgroup:
return None, value
subgroup = group
result = cls.subgroup_lookup().get(subgroup)
return result, subgroup or None if subgroup:
return None, value
return cls.subgroup_lookup().get(group), group or None
@dataclass @dataclass
@ -136,8 +135,7 @@ class Changelog:
first = False first = False
yield '\n<details><summary><h3>Changelog</h3></summary>\n' yield '\n<details><summary><h3>Changelog</h3></summary>\n'
group = groups[item] if group := groups[item]:
if group:
yield self.format_module(item.value, group) yield self.format_module(item.value, group)
if self._collapsible: if self._collapsible:
@ -253,7 +251,7 @@ class CommitRange:
''', re.VERBOSE | re.DOTALL) ''', re.VERBOSE | re.DOTALL)
EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE) EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE)
REVERT_RE = re.compile(r'(?:\[[^\]]+\]\s+)?(?i:Revert)\s+([\da-f]{40})') REVERT_RE = re.compile(r'(?:\[[^\]]+\]\s+)?(?i:Revert)\s+([\da-f]{40})')
FIXES_RE = re.compile(r'(?i:Fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Revert|Improve)\s+([\da-f]{40})') FIXES_RE = re.compile(r'(?i:(?:bug\s*)?fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Improve)\s+([\da-f]{40})')
UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)') UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)')
def __init__(self, start, end, default_author=None): def __init__(self, start, end, default_author=None):
@ -287,11 +285,16 @@ class CommitRange:
short = next(lines) short = next(lines)
skip = short.startswith('Release ') or short == '[version] update' skip = short.startswith('Release ') or short == '[version] update'
fix_commitish = None
if match := self.FIXES_RE.search(short):
fix_commitish = match.group(1)
authors = [default_author] if default_author else [] authors = [default_author] if default_author else []
for line in iter(lambda: next(lines), self.COMMIT_SEPARATOR): for line in iter(lambda: next(lines), self.COMMIT_SEPARATOR):
match = self.AUTHOR_INDICATOR_RE.match(line) if match := self.AUTHOR_INDICATOR_RE.match(line):
if match:
authors = sorted(map(str.strip, line[match.end():].split(',')), key=str.casefold) authors = sorted(map(str.strip, line[match.end():].split(',')), key=str.casefold)
if not fix_commitish and (match := self.FIXES_RE.fullmatch(line)):
fix_commitish = match.group(1)
commit = Commit(commit_hash, short, authors) commit = Commit(commit_hash, short, authors)
if skip and (self._start or not i): if skip and (self._start or not i):
@ -301,21 +304,17 @@ class CommitRange:
logger.debug(f'Reached Release commit, breaking: {commit}') logger.debug(f'Reached Release commit, breaking: {commit}')
break break
revert_match = self.REVERT_RE.fullmatch(commit.short) if match := self.REVERT_RE.fullmatch(commit.short):
if revert_match: reverts[match.group(1)] = commit
reverts[revert_match.group(1)] = commit
continue continue
fix_match = self.FIXES_RE.search(commit.short) if fix_commitish:
if fix_match: fixes[fix_commitish].append(commit)
commitish = fix_match.group(1)
fixes[commitish].append(commit)
commits[commit.hash] = commit commits[commit.hash] = commit
for commitish, revert_commit in reverts.items(): for commitish, revert_commit in reverts.items():
reverted = commits.pop(commitish, None) if reverted := commits.pop(commitish, None):
if reverted:
logger.debug(f'{commitish} fully reverted {reverted}') logger.debug(f'{commitish} fully reverted {reverted}')
else: else:
commits[revert_commit.hash] = revert_commit commits[revert_commit.hash] = revert_commit
@ -461,8 +460,7 @@ def create_changelog(args):
logger.info(f'Loaded {len(commits)} commits') logger.info(f'Loaded {len(commits)} commits')
new_contributors = get_new_contributors(args.contributors_path, commits) if new_contributors := get_new_contributors(args.contributors_path, commits):
if new_contributors:
if args.contributors: if args.contributors:
write_file(args.contributors_path, '\n'.join(new_contributors) + '\n', mode='a') write_file(args.contributors_path, '\n'.join(new_contributors) + '\n', mode='a')
logger.info(f'New contributors: {", ".join(new_contributors)}') logger.info(f'New contributors: {", ".join(new_contributors)}')

View file

@ -32,14 +32,15 @@ VERBOSE_TMPL = '''
placeholder: | placeholder: |
[debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp-nightly-builds [1a176d874] (win_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Python 3.10.11 (CPython AMD64 64bit) - Windows-10-10.0.20348-SP0 (OpenSSL 1.1.1t 7 Feb 2023)
[debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 [debug] exe versions: ffmpeg 7.0.2 (setts), ffprobe 7.0.2
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Optional libraries: Cryptodome-3.21.0, brotli-1.1.0, certifi-2024.08.30, curl_cffi-0.5.10, mutagen-1.47.0, requests-2.32.3, sqlite3-3.40.1, urllib3-2.2.3, websockets-13.1
[debug] Proxy map: {} [debug] Proxy map: {}
[debug] Request Handlers: urllib, requests [debug] Request Handlers: urllib, requests, websockets, curl_cffi
[debug] Loaded 1893 extractors [debug] Loaded 1838 extractors
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: nightly@... from yt-dlp/yt-dlp-nightly-builds
yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds)
[youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc
<more lines> <more lines>

View file

@ -13,7 +13,7 @@ maintainers = [
] ]
description = "A feature-rich command-line audio/video downloader" description = "A feature-rich command-line audio/video downloader"
readme = "README.md" readme = "README.md"
requires-python = ">=3.8" requires-python = ">=3.9"
keywords = [ keywords = [
"youtube-dl", "youtube-dl",
"video-downloader", "video-downloader",
@ -29,11 +29,11 @@ classifiers = [
"Environment :: Console", "Environment :: Console",
"Programming Language :: Python", "Programming Language :: Python",
"Programming Language :: Python :: 3 :: Only", "Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: Implementation", "Programming Language :: Python :: Implementation",
"Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy", "Programming Language :: Python :: Implementation :: PyPy",
@ -41,7 +41,10 @@ classifiers = [
"Operating System :: OS Independent", "Operating System :: OS Independent",
] ]
dynamic = ["version"] dynamic = ["version"]
dependencies = [ dependencies = []
[project.optional-dependencies]
default = [
"brotli; implementation_name=='cpython'", "brotli; implementation_name=='cpython'",
"brotlicffi; implementation_name!='cpython'", "brotlicffi; implementation_name!='cpython'",
"certifi", "certifi",
@ -49,11 +52,8 @@ dependencies = [
"pycryptodomex", "pycryptodomex",
"requests>=2.32.2,<3", "requests>=2.32.2,<3",
"urllib3>=1.26.17,<3", "urllib3>=1.26.17,<3",
"websockets>=13.0", "websockets>=13.0,<14",
] ]
[project.optional-dependencies]
default = []
curl-cffi = [ curl-cffi = [
"curl-cffi==0.5.10; os_name=='nt' and implementation_name=='cpython'", "curl-cffi==0.5.10; os_name=='nt' and implementation_name=='cpython'",
"curl-cffi>=0.5.10,!=0.6.*,<0.7.2; os_name!='nt' and implementation_name=='cpython'", "curl-cffi>=0.5.10,!=0.6.*,<0.7.2; os_name!='nt' and implementation_name=='cpython'",
@ -76,17 +76,14 @@ dev = [
] ]
static-analysis = [ static-analysis = [
"autopep8~=2.0", "autopep8~=2.0",
"ruff~=0.6.0", "ruff~=0.7.0",
] ]
test = [ test = [
"pytest~=8.1", "pytest~=8.1",
"pytest-rerunfailures~=14.0", "pytest-rerunfailures~=14.0",
] ]
pyinstaller = [ pyinstaller = [
"pyinstaller>=6.10.0", # Windows temp cleanup fixed in 6.10.0 "pyinstaller>=6.11.1", # Windows temp cleanup fixed in 6.11.1
]
py2exe = [
"py2exe>=0.12",
] ]
[project.urls] [project.urls]
@ -172,13 +169,11 @@ run-cov = "echo Code coverage not implemented && exit 1"
[[tool.hatch.envs.hatch-test.matrix]] [[tool.hatch.envs.hatch-test.matrix]]
python = [ python = [
"3.8",
"3.9", "3.9",
"3.10", "3.10",
"3.11", "3.11",
"3.12", "3.12",
"pypy3.8", "3.13",
"pypy3.9",
"pypy3.10", "pypy3.10",
] ]
@ -318,6 +313,16 @@ banned-from = [
"yt_dlp.compat.compat_urllib_parse_urlparse".msg = "Use `urllib.parse.urlparse` instead." "yt_dlp.compat.compat_urllib_parse_urlparse".msg = "Use `urllib.parse.urlparse` instead."
"yt_dlp.compat.compat_shlex_quote".msg = "Use `yt_dlp.utils.shell_quote` instead." "yt_dlp.compat.compat_shlex_quote".msg = "Use `yt_dlp.utils.shell_quote` instead."
"yt_dlp.utils.error_to_compat_str".msg = "Use `str` instead." "yt_dlp.utils.error_to_compat_str".msg = "Use `str` instead."
"yt_dlp.utils.bytes_to_intlist".msg = "Use `list` instead."
"yt_dlp.utils.intlist_to_bytes".msg = "Use `bytes` instead."
"yt_dlp.utils.decodeArgument".msg = "Do not use"
"yt_dlp.utils.decodeFilename".msg = "Do not use"
"yt_dlp.utils.encodeFilename".msg = "Do not use"
"yt_dlp.compat.compat_os_name".msg = "Use `os.name` instead."
"yt_dlp.compat.compat_realpath".msg = "Use `os.path.realpath` instead."
"yt_dlp.compat.functools".msg = "Use `functools` instead."
"yt_dlp.utils.decodeOption".msg = "Do not use"
"yt_dlp.utils.compiled_regex_type".msg = "Use `re.Pattern` instead."
[tool.autopep8] [tool.autopep8]
max_line_length = 120 max_line_length = 120

View file

@ -16,7 +16,7 @@ remove-unused-variables = true
[tox:tox] [tox:tox]
skipsdist = true skipsdist = true
envlist = py{38,39,310,311,312},pypy{38,39,310} envlist = py{39,310,311,312,313},pypy310
skip_missing_interpreters = true skip_missing_interpreters = true
[testenv] # tox [testenv] # tox
@ -29,7 +29,7 @@ setenv =
[isort] [isort]
py_version = 38 py_version = 39
multi_line_output = VERTICAL_HANGING_INDENT multi_line_output = VERTICAL_HANGING_INDENT
line_length = 80 line_length = 80
reverse_relative = true reverse_relative = true

View file

@ -45,10 +45,6 @@
- **aenetworks:collection** - **aenetworks:collection**
- **aenetworks:show** - **aenetworks:show**
- **AeonCo** - **AeonCo**
- **afreecatv**: [*afreecatv*](## "netrc machine") afreecatv.com
- **afreecatv:catchstory**: [*afreecatv*](## "netrc machine") afreecatv.com catch story
- **afreecatv:live**: [*afreecatv*](## "netrc machine") afreecatv.com livestreams
- **afreecatv:user**
- **AirTV** - **AirTV**
- **AitubeKZVideo** - **AitubeKZVideo**
- **AliExpressLive** - **AliExpressLive**
@ -133,6 +129,8 @@
- **Bandcamp:album** - **Bandcamp:album**
- **Bandcamp:user** - **Bandcamp:user**
- **Bandcamp:weekly** - **Bandcamp:weekly**
- **Bandlab**
- **BandlabPlaylist**
- **BannedVideo** - **BannedVideo**
- **bbc**: [*bbc*](## "netrc machine") BBC - **bbc**: [*bbc*](## "netrc machine") BBC
- **bbc.co.uk**: [*bbc*](## "netrc machine") BBC iPlayer - **bbc.co.uk**: [*bbc*](## "netrc machine") BBC iPlayer
@ -194,6 +192,7 @@
- **blerp** - **blerp**
- **blogger.com** - **blogger.com**
- **Bloomberg** - **Bloomberg**
- **Bluesky**
- **BokeCC** - **BokeCC**
- **BongaCams** - **BongaCams**
- **Boosty** - **Boosty**
@ -251,9 +250,10 @@
- **cbsnews:livevideo**: CBS News Live Videos - **cbsnews:livevideo**: CBS News Live Videos
- **cbssports**: (**Currently broken**) - **cbssports**: (**Currently broken**)
- **cbssports:embed**: (**Currently broken**) - **cbssports:embed**: (**Currently broken**)
- **CCMA** - **CCMA**: 3Cat, TV3 and Catalunya Ràdio
- **CCTV**: 央视网 - **CCTV**: 央视网
- **CDA**: [*cdapl*](## "netrc machine") - **CDA**: [*cdapl*](## "netrc machine")
- **CDAFolder**
- **Cellebrite** - **Cellebrite**
- **CeskaTelevize** - **CeskaTelevize**
- **CGTN** - **CGTN**
@ -283,8 +283,6 @@
- **cmt.com**: (**Currently broken**) - **cmt.com**: (**Currently broken**)
- **CNBCVideo** - **CNBCVideo**
- **CNN** - **CNN**
- **CNNArticle**
- **CNNBlogs**
- **CNNIndonesia** - **CNNIndonesia**
- **ComedyCentral** - **ComedyCentral**
- **ComedyCentralTV** - **ComedyCentralTV**
@ -488,6 +486,7 @@
- **Gab** - **Gab**
- **GabTV** - **GabTV**
- **Gaia**: [*gaia*](## "netrc machine") - **Gaia**: [*gaia*](## "netrc machine")
- **GameDevTVDashboard**: [*gamedevtv*](## "netrc machine")
- **GameJolt** - **GameJolt**
- **GameJoltCommunity** - **GameJoltCommunity**
- **GameJoltGame** - **GameJoltGame**
@ -655,6 +654,8 @@
- **Karaoketv** - **Karaoketv**
- **Katsomo**: (**Currently broken**) - **Katsomo**: (**Currently broken**)
- **KelbyOne**: (**Currently broken**) - **KelbyOne**: (**Currently broken**)
- **Kenh14Playlist**
- **Kenh14Video**
- **Ketnet** - **Ketnet**
- **khanacademy** - **khanacademy**
- **khanacademy:unit** - **khanacademy:unit**
@ -688,9 +689,9 @@
- **LastFMPlaylist** - **LastFMPlaylist**
- **LastFMUser** - **LastFMUser**
- **LaXarxaMes**: [*laxarxames*](## "netrc machine") - **LaXarxaMes**: [*laxarxames*](## "netrc machine")
- **lbry** - **lbry**: odysee.com
- **lbry:channel** - **lbry:channel**: odysee.com channels
- **lbry:playlist** - **lbry:playlist**: odysee.com playlists
- **LCI** - **LCI**
- **Lcp** - **Lcp**
- **LcpPlay** - **LcpPlay**
@ -788,10 +789,6 @@
- **MicrosoftLearnSession** - **MicrosoftLearnSession**
- **MicrosoftMedius** - **MicrosoftMedius**
- **microsoftstream**: Microsoft Stream - **microsoftstream**: Microsoft Stream
- **mildom**: Record ongoing live by specific user in Mildom
- **mildom:clip**: Clip in Mildom
- **mildom:user:vod**: Download all VODs from specific user in Mildom
- **mildom:vod**: VOD in Mildom
- **minds** - **minds**
- **minds:channel** - **minds:channel**
- **minds:group** - **minds:group**
@ -802,6 +799,7 @@
- **MiTele**: mitele.es - **MiTele**: mitele.es
- **mixch** - **mixch**
- **mixch:archive** - **mixch:archive**
- **mixch:movie**
- **mixcloud** - **mixcloud**
- **mixcloud:playlist** - **mixcloud:playlist**
- **mixcloud:user** - **mixcloud:user**
@ -1046,8 +1044,8 @@
- **Parler**: Posts on parler.com - **Parler**: Posts on parler.com
- **parliamentlive.tv**: UK parliament videos - **parliamentlive.tv**: UK parliament videos
- **Parlview**: (**Currently broken**) - **Parlview**: (**Currently broken**)
- **Patreon** - **patreon**
- **PatreonCampaign** - **patreon:campaign**
- **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC) - **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC)
- **PBSKids** - **PBSKids**
- **PearVideo** - **PearVideo**
@ -1064,8 +1062,8 @@
- **PhilharmonieDeParis**: Philharmonie de Paris - **PhilharmonieDeParis**: Philharmonie de Paris
- **phoenix.de** - **phoenix.de**
- **Photobucket** - **Photobucket**
- **PiaLive**
- **Piapro**: [*piapro*](## "netrc machine") - **Piapro**: [*piapro*](## "netrc machine")
- **PIAULIZAPortal**: ulizaportal.jp - PIA LIVE STREAM
- **Picarto** - **Picarto**
- **PicartoVod** - **PicartoVod**
- **Piksel** - **Piksel**
@ -1092,8 +1090,6 @@
- **PodbayFMChannel** - **PodbayFMChannel**
- **Podchaser** - **Podchaser**
- **podomatic**: (**Currently broken**) - **podomatic**: (**Currently broken**)
- **Pokemon**
- **PokemonWatch**
- **PokerGo**: [*pokergo*](## "netrc machine") - **PokerGo**: [*pokergo*](## "netrc machine")
- **PokerGoCollection**: [*pokergo*](## "netrc machine") - **PokerGoCollection**: [*pokergo*](## "netrc machine")
- **PolsatGo** - **PolsatGo**
@ -1164,6 +1160,7 @@
- **RadioJavan**: (**Currently broken**) - **RadioJavan**: (**Currently broken**)
- **radiokapital** - **radiokapital**
- **radiokapital:show** - **radiokapital:show**
- **RadioRadicale**
- **RadioZetPodcast** - **RadioZetPodcast**
- **radlive** - **radlive**
- **radlive:channel** - **radlive:channel**
@ -1339,6 +1336,10 @@
- **SohuV** - **SohuV**
- **SonyLIV**: [*sonyliv*](## "netrc machine") - **SonyLIV**: [*sonyliv*](## "netrc machine")
- **SonyLIVSeries** - **SonyLIVSeries**
- **soop**: [*afreecatv*](## "netrc machine") sooplive.co.kr
- **soop:catchstory**: [*afreecatv*](## "netrc machine") sooplive.co.kr catch story
- **soop:live**: [*afreecatv*](## "netrc machine") sooplive.co.kr livestreams
- **soop:user**: [*afreecatv*](## "netrc machine")
- **soundcloud**: [*soundcloud*](## "netrc machine") - **soundcloud**: [*soundcloud*](## "netrc machine")
- **soundcloud:playlist**: [*soundcloud*](## "netrc machine") - **soundcloud:playlist**: [*soundcloud*](## "netrc machine")
- **soundcloud:related**: [*soundcloud*](## "netrc machine") - **soundcloud:related**: [*soundcloud*](## "netrc machine")
@ -1367,9 +1368,7 @@
- **spotify**: Spotify episodes (**Currently broken**) - **spotify**: Spotify episodes (**Currently broken**)
- **spotify:show**: Spotify shows (**Currently broken**) - **spotify:show**: Spotify shows (**Currently broken**)
- **Spreaker** - **Spreaker**
- **SpreakerPage**
- **SpreakerShow** - **SpreakerShow**
- **SpreakerShowPage**
- **SpringboardPlatform** - **SpringboardPlatform**
- **Sprout** - **Sprout**
- **SproutVideo** - **SproutVideo**
@ -1445,7 +1444,7 @@
- **TeleQuebecSquat** - **TeleQuebecSquat**
- **TeleQuebecVideo** - **TeleQuebecVideo**
- **TeleTask**: (**Currently broken**) - **TeleTask**: (**Currently broken**)
- **Telewebion** - **Telewebion**: (**Currently broken**)
- **Tempo** - **Tempo**
- **TennisTV**: [*tennistv*](## "netrc machine") - **TennisTV**: [*tennistv*](## "netrc machine")
- **TenPlay**: [*10play*](## "netrc machine") - **TenPlay**: [*10play*](## "netrc machine")
@ -1570,6 +1569,8 @@
- **UFCTV**: [*ufctv*](## "netrc machine") - **UFCTV**: [*ufctv*](## "netrc machine")
- **ukcolumn**: (**Currently broken**) - **ukcolumn**: (**Currently broken**)
- **UKTVPlay** - **UKTVPlay**
- **UlizaPlayer**
- **UlizaPortal**: ulizaportal.jp
- **umg:de**: Universal Music Deutschland (**Currently broken**) - **umg:de**: Universal Music Deutschland (**Currently broken**)
- **Unistra** - **Unistra**
- **Unity**: (**Currently broken**) - **Unity**: (**Currently broken**)
@ -1587,8 +1588,6 @@
- **Varzesh3**: (**Currently broken**) - **Varzesh3**: (**Currently broken**)
- **Vbox7** - **Vbox7**
- **Veo** - **Veo**
- **Veoh**
- **veoh:user**
- **Vesti**: Вести.Ru (**Currently broken**) - **Vesti**: Вести.Ru (**Currently broken**)
- **Vevo** - **Vevo**
- **VevoPlaylist** - **VevoPlaylist**
@ -1778,24 +1777,24 @@
- **YouPornStar**: YouPorn Pornstar, with description, sorting and pagination - **YouPornStar**: YouPorn Pornstar, with description, sorting and pagination
- **YouPornTag**: YouPorn tag (porntags), with sorting, filtering and pagination - **YouPornTag**: YouPorn tag (porntags), with sorting, filtering and pagination
- **YouPornVideos**: YouPorn video (browse) playlists, with sorting, filtering and pagination - **YouPornVideos**: YouPorn video (browse) playlists, with sorting, filtering and pagination
- **youtube**: YouTube - **youtube**: [*youtube*](## "netrc machine") YouTube
- **youtube:clip** - **youtube:clip**: [*youtube*](## "netrc machine")
- **youtube:favorites**: YouTube liked videos; ":ytfav" keyword (requires cookies) - **youtube:favorites**: [*youtube*](## "netrc machine") YouTube liked videos; ":ytfav" keyword (requires cookies)
- **youtube:history**: Youtube watch history; ":ythis" keyword (requires cookies) - **youtube:history**: [*youtube*](## "netrc machine") Youtube watch history; ":ythis" keyword (requires cookies)
- **youtube:music:search_url**: YouTube music search URLs with selectable sections, e.g. #songs - **youtube:music:search_url**: [*youtube*](## "netrc machine") YouTube music search URLs with selectable sections, e.g. #songs
- **youtube:notif**: YouTube notifications; ":ytnotif" keyword (requires cookies) - **youtube:notif**: [*youtube*](## "netrc machine") YouTube notifications; ":ytnotif" keyword (requires cookies)
- **youtube:playlist**: YouTube playlists - **youtube:playlist**: [*youtube*](## "netrc machine") YouTube playlists
- **youtube:recommended**: YouTube recommended videos; ":ytrec" keyword - **youtube:recommended**: [*youtube*](## "netrc machine") YouTube recommended videos; ":ytrec" keyword
- **youtube:search**: YouTube search; "ytsearch:" prefix - **youtube:search**: [*youtube*](## "netrc machine") YouTube search; "ytsearch:" prefix
- **youtube:search:date**: YouTube search, newest videos first; "ytsearchdate:" prefix - **youtube:search:date**: [*youtube*](## "netrc machine") YouTube search, newest videos first; "ytsearchdate:" prefix
- **youtube:search_url**: YouTube search URLs with sorting and filter support - **youtube:search_url**: [*youtube*](## "netrc machine") YouTube search URLs with sorting and filter support
- **youtube:shorts:pivot:audio**: YouTube Shorts audio pivot (Shorts using audio of a given video) - **youtube:shorts:pivot:audio**: [*youtube*](## "netrc machine") YouTube Shorts audio pivot (Shorts using audio of a given video)
- **youtube:subscriptions**: YouTube subscriptions feed; ":ytsubs" keyword (requires cookies) - **youtube:subscriptions**: [*youtube*](## "netrc machine") YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)
- **youtube:tab**: YouTube Tabs - **youtube:tab**: [*youtube*](## "netrc machine") YouTube Tabs
- **youtube:user**: YouTube user videos; "ytuser:" prefix - **youtube:user**: [*youtube*](## "netrc machine") YouTube user videos; "ytuser:" prefix
- **youtube:watchlater**: Youtube watch later list; ":ytwatchlater" keyword (requires cookies) - **youtube:watchlater**: [*youtube*](## "netrc machine") Youtube watch later list; ":ytwatchlater" keyword (requires cookies)
- **YoutubeLivestreamEmbed**: YouTube livestream embeds - **YoutubeLivestreamEmbed**: [*youtube*](## "netrc machine") YouTube livestream embeds
- **YoutubeYtBe**: youtu.be - **YoutubeYtBe**: [*youtube*](## "netrc machine") youtu.be
- **Zaiko** - **Zaiko**
- **ZaikoETicket** - **ZaikoETicket**
- **Zapiks** - **Zapiks**

View file

@ -9,7 +9,6 @@ import types
import yt_dlp.extractor import yt_dlp.extractor
from yt_dlp import YoutubeDL from yt_dlp import YoutubeDL
from yt_dlp.compat import compat_os_name
from yt_dlp.utils import preferredencoding, try_call, write_string, find_available_port from yt_dlp.utils import preferredencoding, try_call, write_string, find_available_port
if 'pytest' in sys.modules: if 'pytest' in sys.modules:
@ -49,7 +48,7 @@ def report_warning(message, *args, **kwargs):
Print the message to stderr, it will be prefixed with 'WARNING:' Print the message to stderr, it will be prefixed with 'WARNING:'
If stderr is a tty file the 'WARNING:' will be colored If stderr is a tty file the 'WARNING:' will be colored
""" """
if sys.stderr.isatty() and compat_os_name != 'nt': if sys.stderr.isatty() and os.name != 'nt':
_msg_header = '\033[0;33mWARNING:\033[0m' _msg_header = '\033[0;33mWARNING:\033[0m'
else: else:
_msg_header = 'WARNING:' _msg_header = 'WARNING:'

View file

@ -53,6 +53,18 @@ class TestInfoExtractor(unittest.TestCase):
def test_ie_key(self): def test_ie_key(self):
self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE) self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE)
def test_get_netrc_login_info(self):
for params in [
{'usenetrc': True, 'netrc_location': './test/testdata/netrc/netrc'},
{'netrc_cmd': f'{sys.executable} ./test/testdata/netrc/print_netrc.py'},
]:
ie = DummyIE(FakeYDL(params))
self.assertEqual(ie._get_netrc_login_info(netrc_machine='normal_use'), ('user', 'pass'))
self.assertEqual(ie._get_netrc_login_info(netrc_machine='empty_user'), ('', 'pass'))
self.assertEqual(ie._get_netrc_login_info(netrc_machine='empty_pass'), ('user', ''))
self.assertEqual(ie._get_netrc_login_info(netrc_machine='both_empty'), ('', ''))
self.assertEqual(ie._get_netrc_login_info(netrc_machine='nonexistent'), (None, None))
def test_html_search_regex(self): def test_html_search_regex(self):
html = '<p id="foo">Watch this <a href="http://www.youtube.com/watch?v=BaW_jenozKc">video</a></p>' html = '<p id="foo">Watch this <a href="http://www.youtube.com/watch?v=BaW_jenozKc">video</a></p>'
search = lambda re, *args: self.ie._html_search_regex(re, html, *args) search = lambda re, *args: self.ie._html_search_regex(re, html, *args)

View file

@ -15,7 +15,6 @@ import json
from test.helper import FakeYDL, assertRegexpMatches, try_rm from test.helper import FakeYDL, assertRegexpMatches, try_rm
from yt_dlp import YoutubeDL from yt_dlp import YoutubeDL
from yt_dlp.compat import compat_os_name
from yt_dlp.extractor import YoutubeIE from yt_dlp.extractor import YoutubeIE
from yt_dlp.extractor.common import InfoExtractor from yt_dlp.extractor.common import InfoExtractor
from yt_dlp.postprocessor.common import PostProcessor from yt_dlp.postprocessor.common import PostProcessor
@ -839,8 +838,8 @@ class TestYoutubeDL(unittest.TestCase):
test('%(filesize)#D', '1Ki') test('%(filesize)#D', '1Ki')
test('%(height)5.2D', ' 1.08k') test('%(height)5.2D', ' 1.08k')
test('%(title4)#S', 'foo_bar_test') test('%(title4)#S', 'foo_bar_test')
test('%(title4).10S', ('foo bar ', 'foo bar' + ('#' if compat_os_name == 'nt' else ' '))) test('%(title4).10S', ('foo bar ', 'foo bar' + ('#' if os.name == 'nt' else ' ')))
if compat_os_name == 'nt': if os.name == 'nt':
test('%(title4)q', ('"foo ""bar"" test"', None)) test('%(title4)q', ('"foo ""bar"" test"', None))
test('%(formats.:.id)#q', ('"id 1" "id 2" "id 3"', None)) test('%(formats.:.id)#q', ('"id 1" "id 2" "id 3"', None))
test('%(formats.0.id)#q', ('"id 1"', None)) test('%(formats.0.id)#q', ('"id 1"', None))
@ -903,9 +902,9 @@ class TestYoutubeDL(unittest.TestCase):
# Environment variable expansion for prepare_filename # Environment variable expansion for prepare_filename
os.environ['__yt_dlp_var'] = 'expanded' os.environ['__yt_dlp_var'] = 'expanded'
envvar = '%__yt_dlp_var%' if compat_os_name == 'nt' else '$__yt_dlp_var' envvar = '%__yt_dlp_var%' if os.name == 'nt' else '$__yt_dlp_var'
test(envvar, (envvar, 'expanded')) test(envvar, (envvar, 'expanded'))
if compat_os_name == 'nt': if os.name == 'nt':
test('%s%', ('%s%', '%s%')) test('%s%', ('%s%', '%s%'))
os.environ['s'] = 'expanded' os.environ['s'] = 'expanded'
test('%s%', ('%s%', 'expanded')) # %s% should be expanded before escaping %s test('%s%', ('%s%', 'expanded')) # %s% should be expanded before escaping %s

View file

@ -27,7 +27,6 @@ from yt_dlp.aes import (
pad_block, pad_block,
) )
from yt_dlp.dependencies import Cryptodome from yt_dlp.dependencies import Cryptodome
from yt_dlp.utils import bytes_to_intlist, intlist_to_bytes
# the encrypted data can be generate with 'devscripts/generate_aes_testdata.py' # the encrypted data can be generate with 'devscripts/generate_aes_testdata.py'
@ -40,33 +39,33 @@ class TestAES(unittest.TestCase):
def test_encrypt(self): def test_encrypt(self):
msg = b'message' msg = b'message'
key = list(range(16)) key = list(range(16))
encrypted = aes_encrypt(bytes_to_intlist(msg), key) encrypted = aes_encrypt(list(msg), key)
decrypted = intlist_to_bytes(aes_decrypt(encrypted, key)) decrypted = bytes(aes_decrypt(encrypted, key))
self.assertEqual(decrypted, msg) self.assertEqual(decrypted, msg)
def test_cbc_decrypt(self): def test_cbc_decrypt(self):
data = b'\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6\x27\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd' data = b'\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6\x27\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd'
decrypted = intlist_to_bytes(aes_cbc_decrypt(bytes_to_intlist(data), self.key, self.iv)) decrypted = bytes(aes_cbc_decrypt(list(data), self.key, self.iv))
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
if Cryptodome.AES: if Cryptodome.AES:
decrypted = aes_cbc_decrypt_bytes(data, intlist_to_bytes(self.key), intlist_to_bytes(self.iv)) decrypted = aes_cbc_decrypt_bytes(data, bytes(self.key), bytes(self.iv))
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
def test_cbc_encrypt(self): def test_cbc_encrypt(self):
data = bytes_to_intlist(self.secret_msg) data = list(self.secret_msg)
encrypted = intlist_to_bytes(aes_cbc_encrypt(data, self.key, self.iv)) encrypted = bytes(aes_cbc_encrypt(data, self.key, self.iv))
self.assertEqual( self.assertEqual(
encrypted, encrypted,
b'\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6\'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd') b'\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6\'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd')
def test_ctr_decrypt(self): def test_ctr_decrypt(self):
data = bytes_to_intlist(b'\x03\xc7\xdd\xd4\x8e\xb3\xbc\x1a*O\xdc1\x12+8Aio\xd1z\xb5#\xaf\x08') data = list(b'\x03\xc7\xdd\xd4\x8e\xb3\xbc\x1a*O\xdc1\x12+8Aio\xd1z\xb5#\xaf\x08')
decrypted = intlist_to_bytes(aes_ctr_decrypt(data, self.key, self.iv)) decrypted = bytes(aes_ctr_decrypt(data, self.key, self.iv))
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
def test_ctr_encrypt(self): def test_ctr_encrypt(self):
data = bytes_to_intlist(self.secret_msg) data = list(self.secret_msg)
encrypted = intlist_to_bytes(aes_ctr_encrypt(data, self.key, self.iv)) encrypted = bytes(aes_ctr_encrypt(data, self.key, self.iv))
self.assertEqual( self.assertEqual(
encrypted, encrypted,
b'\x03\xc7\xdd\xd4\x8e\xb3\xbc\x1a*O\xdc1\x12+8Aio\xd1z\xb5#\xaf\x08') b'\x03\xc7\xdd\xd4\x8e\xb3\xbc\x1a*O\xdc1\x12+8Aio\xd1z\xb5#\xaf\x08')
@ -75,47 +74,59 @@ class TestAES(unittest.TestCase):
data = b'\x159Y\xcf5eud\x90\x9c\x85&]\x14\x1d\x0f.\x08\xb4T\xe4/\x17\xbd' data = b'\x159Y\xcf5eud\x90\x9c\x85&]\x14\x1d\x0f.\x08\xb4T\xe4/\x17\xbd'
authentication_tag = b'\xe8&I\x80rI\x07\x9d}YWuU@:e' authentication_tag = b'\xe8&I\x80rI\x07\x9d}YWuU@:e'
decrypted = intlist_to_bytes(aes_gcm_decrypt_and_verify( decrypted = bytes(aes_gcm_decrypt_and_verify(
bytes_to_intlist(data), self.key, bytes_to_intlist(authentication_tag), self.iv[:12])) list(data), self.key, list(authentication_tag), self.iv[:12]))
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
if Cryptodome.AES: if Cryptodome.AES:
decrypted = aes_gcm_decrypt_and_verify_bytes( decrypted = aes_gcm_decrypt_and_verify_bytes(
data, intlist_to_bytes(self.key), authentication_tag, intlist_to_bytes(self.iv[:12])) data, bytes(self.key), authentication_tag, bytes(self.iv[:12]))
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
def test_gcm_aligned_decrypt(self):
data = b'\x159Y\xcf5eud\x90\x9c\x85&]\x14\x1d\x0f'
authentication_tag = b'\x08\xb1\x9d!&\x98\xd0\xeaRq\x90\xe6;\xb5]\xd8'
decrypted = bytes(aes_gcm_decrypt_and_verify(
list(data), self.key, list(authentication_tag), self.iv[:12]))
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg[:16])
if Cryptodome.AES:
decrypted = aes_gcm_decrypt_and_verify_bytes(
data, bytes(self.key), authentication_tag, bytes(self.iv[:12]))
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg[:16])
def test_decrypt_text(self): def test_decrypt_text(self):
password = intlist_to_bytes(self.key).decode() password = bytes(self.key).decode()
encrypted = base64.b64encode( encrypted = base64.b64encode(
intlist_to_bytes(self.iv[:8]) bytes(self.iv[:8])
+ b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae', + b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae',
).decode() ).decode()
decrypted = (aes_decrypt_text(encrypted, password, 16)) decrypted = (aes_decrypt_text(encrypted, password, 16))
self.assertEqual(decrypted, self.secret_msg) self.assertEqual(decrypted, self.secret_msg)
password = intlist_to_bytes(self.key).decode() password = bytes(self.key).decode()
encrypted = base64.b64encode( encrypted = base64.b64encode(
intlist_to_bytes(self.iv[:8]) bytes(self.iv[:8])
+ b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83', + b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83',
).decode() ).decode()
decrypted = (aes_decrypt_text(encrypted, password, 32)) decrypted = (aes_decrypt_text(encrypted, password, 32))
self.assertEqual(decrypted, self.secret_msg) self.assertEqual(decrypted, self.secret_msg)
def test_ecb_encrypt(self): def test_ecb_encrypt(self):
data = bytes_to_intlist(self.secret_msg) data = list(self.secret_msg)
encrypted = intlist_to_bytes(aes_ecb_encrypt(data, self.key)) encrypted = bytes(aes_ecb_encrypt(data, self.key))
self.assertEqual( self.assertEqual(
encrypted, encrypted,
b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:') b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:')
def test_ecb_decrypt(self): def test_ecb_decrypt(self):
data = bytes_to_intlist(b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:') data = list(b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:')
decrypted = intlist_to_bytes(aes_ecb_decrypt(data, self.key, self.iv)) decrypted = bytes(aes_ecb_decrypt(data, self.key, self.iv))
self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg)
def test_key_expansion(self): def test_key_expansion(self):
key = '4f6bdaa39e2f8cb07f5e722d9edef314' key = '4f6bdaa39e2f8cb07f5e722d9edef314'
self.assertEqual(key_expansion(bytes_to_intlist(bytearray.fromhex(key))), [ self.assertEqual(key_expansion(list(bytearray.fromhex(key))), [
0x4F, 0x6B, 0xDA, 0xA3, 0x9E, 0x2F, 0x8C, 0xB0, 0x7F, 0x5E, 0x72, 0x2D, 0x9E, 0xDE, 0xF3, 0x14, 0x4F, 0x6B, 0xDA, 0xA3, 0x9E, 0x2F, 0x8C, 0xB0, 0x7F, 0x5E, 0x72, 0x2D, 0x9E, 0xDE, 0xF3, 0x14,
0x53, 0x66, 0x20, 0xA8, 0xCD, 0x49, 0xAC, 0x18, 0xB2, 0x17, 0xDE, 0x35, 0x2C, 0xC9, 0x2D, 0x21, 0x53, 0x66, 0x20, 0xA8, 0xCD, 0x49, 0xAC, 0x18, 0xB2, 0x17, 0xDE, 0x35, 0x2C, 0xC9, 0x2D, 0x21,
0x8C, 0xBE, 0xDD, 0xD9, 0x41, 0xF7, 0x71, 0xC1, 0xF3, 0xE0, 0xAF, 0xF4, 0xDF, 0x29, 0x82, 0xD5, 0x8C, 0xBE, 0xDD, 0xD9, 0x41, 0xF7, 0x71, 0xC1, 0xF3, 0xE0, 0xAF, 0xF4, 0xDF, 0x29, 0x82, 0xD5,

View file

@ -12,12 +12,7 @@ import struct
from yt_dlp import compat from yt_dlp import compat
from yt_dlp.compat import urllib # isort: split from yt_dlp.compat import urllib # isort: split
from yt_dlp.compat import ( from yt_dlp.compat import compat_etree_fromstring, compat_expanduser
compat_etree_fromstring,
compat_expanduser,
compat_urllib_parse_unquote, # noqa: TID251
compat_urllib_parse_urlencode, # noqa: TID251
)
from yt_dlp.compat.urllib.request import getproxies from yt_dlp.compat.urllib.request import getproxies
@ -43,39 +38,6 @@ class TestCompat(unittest.TestCase):
finally: finally:
os.environ['HOME'] = old_home or '' os.environ['HOME'] = old_home or ''
def test_compat_urllib_parse_unquote(self):
self.assertEqual(compat_urllib_parse_unquote('abc%20def'), 'abc def')
self.assertEqual(compat_urllib_parse_unquote('%7e/abc+def'), '~/abc+def')
self.assertEqual(compat_urllib_parse_unquote(''), '')
self.assertEqual(compat_urllib_parse_unquote('%'), '%')
self.assertEqual(compat_urllib_parse_unquote('%%'), '%%')
self.assertEqual(compat_urllib_parse_unquote('%%%'), '%%%')
self.assertEqual(compat_urllib_parse_unquote('%2F'), '/')
self.assertEqual(compat_urllib_parse_unquote('%2f'), '/')
self.assertEqual(compat_urllib_parse_unquote('%E6%B4%A5%E6%B3%A2'), '津波')
self.assertEqual(
compat_urllib_parse_unquote('''<meta property="og:description" content="%E2%96%81%E2%96%82%E2%96%83%E2%96%84%25%E2%96%85%E2%96%86%E2%96%87%E2%96%88" />
%<a href="https://ar.wikipedia.org/wiki/%D8%AA%D8%B3%D9%88%D9%86%D8%A7%D9%85%D9%8A">%a'''),
'''<meta property="og:description" content="▁▂▃▄%▅▆▇█" />
%<a href="https://ar.wikipedia.org/wiki/تسونامي">%a''')
self.assertEqual(
compat_urllib_parse_unquote('''%28%5E%E2%97%A3_%E2%97%A2%5E%29%E3%81%A3%EF%B8%BB%E3%83%87%E2%95%90%E4%B8%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%86%B6%I%Break%25Things%'''),
'''(^◣_◢^)っ︻デ═一 ⇀ ⇀ ⇀ ⇀ ⇀ ↶%I%Break%Things%''')
def test_compat_urllib_parse_unquote_plus(self):
self.assertEqual(urllib.parse.unquote_plus('abc%20def'), 'abc def')
self.assertEqual(urllib.parse.unquote_plus('%7e/abc+def'), '~/abc def')
def test_compat_urllib_parse_urlencode(self):
self.assertEqual(compat_urllib_parse_urlencode({'abc': 'def'}), 'abc=def')
self.assertEqual(compat_urllib_parse_urlencode({'abc': b'def'}), 'abc=def')
self.assertEqual(compat_urllib_parse_urlencode({b'abc': 'def'}), 'abc=def')
self.assertEqual(compat_urllib_parse_urlencode({b'abc': b'def'}), 'abc=def')
self.assertEqual(compat_urllib_parse_urlencode([('abc', 'def')]), 'abc=def')
self.assertEqual(compat_urllib_parse_urlencode([('abc', b'def')]), 'abc=def')
self.assertEqual(compat_urllib_parse_urlencode([(b'abc', 'def')]), 'abc=def')
self.assertEqual(compat_urllib_parse_urlencode([(b'abc', b'def')]), 'abc=def')
def test_compat_etree_fromstring(self): def test_compat_etree_fromstring(self):
xml = ''' xml = '''
<root foo="bar" spam="中文"> <root foo="bar" spam="中文">

View file

@ -105,6 +105,13 @@ class TestCookies(unittest.TestCase):
decryptor = LinuxChromeCookieDecryptor('Chrome', Logger()) decryptor = LinuxChromeCookieDecryptor('Chrome', Logger())
self.assertEqual(decryptor.decrypt(encrypted_value), value) self.assertEqual(decryptor.decrypt(encrypted_value), value)
def test_chrome_cookie_decryptor_linux_v10_meta24(self):
with MonkeyPatch(cookies, {'_get_linux_keyring_password': lambda *args, **kwargs: b''}):
encrypted_value = b'v10\x1f\xe4\x0e[\x83\x0c\xcc*kPi \xce\x8d\x1d\xbb\x80\r\x11\t\xbb\x9e^Hy\x94\xf4\x963\x9f\x82\xba\xfe\xa1\xed\xb9\xf1)\x00710\x92\xc8/<\x96B'
value = 'DE'
decryptor = LinuxChromeCookieDecryptor('Chrome', Logger(), meta_version=24)
self.assertEqual(decryptor.decrypt(encrypted_value), value)
def test_chrome_cookie_decryptor_windows_v10(self): def test_chrome_cookie_decryptor_windows_v10(self):
with MonkeyPatch(cookies, { with MonkeyPatch(cookies, {
'_get_windows_v10_key': lambda *args, **kwargs: b'Y\xef\xad\xad\xeerp\xf0Y\xe6\x9b\x12\xc2<z\x16]\n\xbb\xb8\xcb\xd7\x9bA\xc3\x14e\x99{\xd6\xf4&', '_get_windows_v10_key': lambda *args, **kwargs: b'Y\xef\xad\xad\xeerp\xf0Y\xe6\x9b\x12\xc2<z\x16]\n\xbb\xb8\xcb\xd7\x9bA\xc3\x14e\x99{\xd6\xf4&',
@ -114,6 +121,15 @@ class TestCookies(unittest.TestCase):
decryptor = WindowsChromeCookieDecryptor('', Logger()) decryptor = WindowsChromeCookieDecryptor('', Logger())
self.assertEqual(decryptor.decrypt(encrypted_value), value) self.assertEqual(decryptor.decrypt(encrypted_value), value)
def test_chrome_cookie_decryptor_windows_v10_meta24(self):
with MonkeyPatch(cookies, {
'_get_windows_v10_key': lambda *args, **kwargs: b'\xea\x8b\x02\xc3\xc6\xc5\x99\xc3\xa3[ j\xfa\xf6\xfcU\xac\x13u\xdc\x0c\x0e\xf1\x03\x90\xb6\xdf\xbb\x8fL\xb1\xb2',
}):
encrypted_value = b'v10dN\xe1\xacy\x84^\xe1I\xact\x03r\xfb\xe2\xce{^\x0e<(\xb0y\xeb\x01\xfb@"\x9e\x8c\xa53~\xdb*\x8f\xac\x8b\xe3\xfd3\x06\xe5\x93\x19OyOG\xb2\xfb\x1d$\xc0\xda\x13j\x9e\xfe\xc5\xa3\xa8\xfe\xd9'
value = '1234'
decryptor = WindowsChromeCookieDecryptor('', Logger(), meta_version=24)
self.assertEqual(decryptor.decrypt(encrypted_value), value)
def test_chrome_cookie_decryptor_mac_v10(self): def test_chrome_cookie_decryptor_mac_v10(self):
with MonkeyPatch(cookies, {'_get_mac_keyring_password': lambda *args, **kwargs: b'6eIDUdtKAacvlHwBVwvg/Q=='}): with MonkeyPatch(cookies, {'_get_mac_keyring_password': lambda *args, **kwargs: b'6eIDUdtKAacvlHwBVwvg/Q=='}):
encrypted_value = b'v10\xb3\xbe\xad\xa1[\x9fC\xa1\x98\xe0\x9a\x01\xd9\xcf\xbfc' encrypted_value = b'v10\xb3\xbe\xad\xa1[\x9fC\xa1\x98\xe0\x9a\x01\xd9\xcf\xbfc'

View file

@ -15,7 +15,6 @@ import threading
from test.helper import http_server_port, try_rm from test.helper import http_server_port, try_rm
from yt_dlp import YoutubeDL from yt_dlp import YoutubeDL
from yt_dlp.downloader.http import HttpFD from yt_dlp.downloader.http import HttpFD
from yt_dlp.utils import encodeFilename
from yt_dlp.utils._utils import _YDLLogger as FakeLogger from yt_dlp.utils._utils import _YDLLogger as FakeLogger
TEST_DIR = os.path.dirname(os.path.abspath(__file__)) TEST_DIR = os.path.dirname(os.path.abspath(__file__))
@ -82,12 +81,12 @@ class TestHttpFD(unittest.TestCase):
ydl = YoutubeDL(params) ydl = YoutubeDL(params)
downloader = HttpFD(ydl, params) downloader = HttpFD(ydl, params)
filename = 'testfile.mp4' filename = 'testfile.mp4'
try_rm(encodeFilename(filename)) try_rm(filename)
self.assertTrue(downloader.real_download(filename, { self.assertTrue(downloader.real_download(filename, {
'url': f'http://127.0.0.1:{self.port}/{ep}', 'url': f'http://127.0.0.1:{self.port}/{ep}',
}), ep) }), ep)
self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE, ep) self.assertEqual(os.path.getsize(filename), TEST_SIZE, ep)
try_rm(encodeFilename(filename)) try_rm(filename)
def download_all(self, params): def download_all(self, params):
for ep in ('regular', 'no-content-length', 'no-range', 'no-range-no-content-length'): for ep in ('regular', 'no-content-length', 'no-range', 'no-range-no-content-length'):

View file

@ -10,6 +10,7 @@ TEST_DATA_DIR = Path(os.path.dirname(os.path.abspath(__file__)), 'testdata')
sys.path.append(str(TEST_DATA_DIR)) sys.path.append(str(TEST_DATA_DIR))
importlib.invalidate_caches() importlib.invalidate_caches()
from yt_dlp.utils import Config
from yt_dlp.plugins import PACKAGE_NAME, directories, load_plugins from yt_dlp.plugins import PACKAGE_NAME, directories, load_plugins
@ -68,6 +69,24 @@ class TestPlugins(unittest.TestCase):
os.remove(zip_path) os.remove(zip_path)
importlib.invalidate_caches() # reset the import caches importlib.invalidate_caches() # reset the import caches
def test_plugin_dirs(self):
# Internal plugin dirs hack for CLI --plugin-dirs
# To be replaced with proper system later
custom_plugin_dir = TEST_DATA_DIR / 'plugin_packages'
Config._plugin_dirs = [str(custom_plugin_dir)]
importlib.invalidate_caches() # reset the import caches
try:
package = importlib.import_module(f'{PACKAGE_NAME}.extractor')
self.assertIn(custom_plugin_dir / 'testpackage' / PACKAGE_NAME / 'extractor', map(Path, package.__path__))
plugins_ie = load_plugins('extractor', 'IE')
self.assertIn('PackagePluginIE', plugins_ie.keys())
finally:
Config._plugin_dirs = []
importlib.invalidate_caches() # reset the import caches
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View file

@ -9,12 +9,17 @@ from yt_dlp.utils import (
determine_ext, determine_ext,
dict_get, dict_get,
int_or_none, int_or_none,
join_nonempty,
str_or_none, str_or_none,
) )
from yt_dlp.utils.traversal import ( from yt_dlp.utils.traversal import (
traverse_obj, find_element,
find_elements,
require, require,
subs_list_to_dict, subs_list_to_dict,
traverse_obj,
trim_str,
unpack,
) )
_TEST_DATA = { _TEST_DATA = {
@ -34,6 +39,14 @@ _TEST_DATA = {
'dict': {}, 'dict': {},
} }
_TEST_HTML = '''<html><body>
<div class="a">1</div>
<div class="a" id="x" custom="z">2</div>
<div class="b" data-id="y" custom="z">3</div>
<p class="a">4</p>
<p id="d" custom="e">5</p>
</body></html>'''
class TestTraversal: class TestTraversal:
def test_traversal_base(self): def test_traversal_base(self):
@ -468,7 +481,7 @@ class TestTraversalHelpers:
'id': 'name', 'id': 'name',
'data': 'content', 'data': 'content',
'url': 'url', 'url': 'url',
}, all, {subs_list_to_dict}]) == { }, all, {subs_list_to_dict(lang=None)}]) == {
'de': [{'url': 'https://example.com/subs/de.ass'}], 'de': [{'url': 'https://example.com/subs/de.ass'}],
'en': [{'data': 'content'}], 'en': [{'data': 'content'}],
}, 'subs with mandatory items missing should be filtered' }, 'subs with mandatory items missing should be filtered'
@ -477,7 +490,7 @@ class TestTraversalHelpers:
{'url': 'https://example.com/subs/en', 'name': 'en'}, {'url': 'https://example.com/subs/en', 'name': 'en'},
], [..., { ], [..., {
'id': 'name', 'id': 'name',
'ext': ['url', {lambda x: determine_ext(x, default_ext=None)}], 'ext': ['url', {determine_ext(default_ext=None)}],
'url': 'url', 'url': 'url',
}, all, {subs_list_to_dict(ext='ext')}]) == { }, all, {subs_list_to_dict(ext='ext')}]) == {
'de': [{'url': 'https://example.com/subs/de.ass', 'ext': 'ass'}], 'de': [{'url': 'https://example.com/subs/de.ass', 'ext': 'ass'}],
@ -494,6 +507,121 @@ class TestTraversalHelpers:
{'url': 'https://example.com/subs/en1', 'ext': 'ext'}, {'url': 'https://example.com/subs/en1', 'ext': 'ext'},
{'url': 'https://example.com/subs/en2', 'ext': 'ext'}, {'url': 'https://example.com/subs/en2', 'ext': 'ext'},
]}, '`quality` key should sort subtitle list accordingly' ]}, '`quality` key should sort subtitle list accordingly'
assert traverse_obj([
{'name': 'de', 'url': 'https://example.com/subs/de.ass'},
{'name': 'de'},
{'name': 'en', 'content': 'content'},
{'url': 'https://example.com/subs/en'},
], [..., {
'id': 'name',
'url': 'url',
'data': 'content',
}, all, {subs_list_to_dict(lang='en')}]) == {
'de': [{'url': 'https://example.com/subs/de.ass'}],
'en': [
{'data': 'content'},
{'url': 'https://example.com/subs/en'},
],
}, 'optionally provided lang should be used if no id available'
assert traverse_obj([
{'name': 1, 'url': 'https://example.com/subs/de1'},
{'name': {}, 'url': 'https://example.com/subs/de2'},
{'name': 'de', 'ext': 1, 'url': 'https://example.com/subs/de3'},
{'name': 'de', 'ext': {}, 'url': 'https://example.com/subs/de4'},
], [..., {
'id': 'name',
'url': 'url',
'ext': 'ext',
}, all, {subs_list_to_dict(lang=None)}]) == {
'de': [
{'url': 'https://example.com/subs/de3'},
{'url': 'https://example.com/subs/de4'},
],
}, 'non str types should be ignored for id and ext'
assert traverse_obj([
{'name': 1, 'url': 'https://example.com/subs/de1'},
{'name': {}, 'url': 'https://example.com/subs/de2'},
{'name': 'de', 'ext': 1, 'url': 'https://example.com/subs/de3'},
{'name': 'de', 'ext': {}, 'url': 'https://example.com/subs/de4'},
], [..., {
'id': 'name',
'url': 'url',
'ext': 'ext',
}, all, {subs_list_to_dict(lang='de')}]) == {
'de': [
{'url': 'https://example.com/subs/de1'},
{'url': 'https://example.com/subs/de2'},
{'url': 'https://example.com/subs/de3'},
{'url': 'https://example.com/subs/de4'},
],
}, 'non str types should be replaced by default id'
def test_trim_str(self):
with pytest.raises(TypeError):
trim_str('positional')
assert callable(trim_str(start='a'))
assert trim_str(start='ab')('abc') == 'c'
assert trim_str(end='bc')('abc') == 'a'
assert trim_str(start='a', end='c')('abc') == 'b'
assert trim_str(start='ab', end='c')('abc') == ''
assert trim_str(start='a', end='bc')('abc') == ''
assert trim_str(start='ab', end='bc')('abc') == ''
assert trim_str(start='abc', end='abc')('abc') == ''
assert trim_str(start='', end='')('abc') == 'abc'
def test_unpack(self):
assert unpack(lambda *x: ''.join(map(str, x)))([1, 2, 3]) == '123'
assert unpack(join_nonempty)([1, 2, 3]) == '1-2-3'
assert unpack(join_nonempty, delim=' ')([1, 2, 3]) == '1 2 3'
with pytest.raises(TypeError):
unpack(join_nonempty)()
with pytest.raises(TypeError):
unpack()
def test_find_element(self):
for improper_kwargs in [
dict(attr='data-id'),
dict(value='y'),
dict(attr='data-id', value='y', cls='a'),
dict(attr='data-id', value='y', id='x'),
dict(cls='a', id='x'),
dict(cls='a', tag='p'),
dict(cls='[ab]', regex=True),
]:
with pytest.raises(AssertionError):
find_element(**improper_kwargs)(_TEST_HTML)
assert find_element(cls='a')(_TEST_HTML) == '1'
assert find_element(cls='a', html=True)(_TEST_HTML) == '<div class="a">1</div>'
assert find_element(id='x')(_TEST_HTML) == '2'
assert find_element(id='[ex]')(_TEST_HTML) is None
assert find_element(id='[ex]', regex=True)(_TEST_HTML) == '2'
assert find_element(id='x', html=True)(_TEST_HTML) == '<div class="a" id="x" custom="z">2</div>'
assert find_element(attr='data-id', value='y')(_TEST_HTML) == '3'
assert find_element(attr='data-id', value='y(?:es)?')(_TEST_HTML) is None
assert find_element(attr='data-id', value='y(?:es)?', regex=True)(_TEST_HTML) == '3'
assert find_element(
attr='data-id', value='y', html=True)(_TEST_HTML) == '<div class="b" data-id="y" custom="z">3</div>'
def test_find_elements(self):
for improper_kwargs in [
dict(tag='p'),
dict(attr='data-id'),
dict(value='y'),
dict(attr='data-id', value='y', cls='a'),
dict(cls='a', tag='div'),
dict(cls='[ab]', regex=True),
]:
with pytest.raises(AssertionError):
find_elements(**improper_kwargs)(_TEST_HTML)
assert find_elements(cls='a')(_TEST_HTML) == ['1', '2', '4']
assert find_elements(cls='a', html=True)(_TEST_HTML) == [
'<div class="a">1</div>', '<div class="a" id="x" custom="z">2</div>', '<p class="a">4</p>']
assert find_elements(attr='custom', value='z')(_TEST_HTML) == ['2', '3']
assert find_elements(attr='custom', value='[ez]')(_TEST_HTML) == []
assert find_elements(attr='custom', value='[ez]', regex=True)(_TEST_HTML) == ['2', '3', '5']
class TestDictGet: class TestDictGet:

View file

@ -82,16 +82,32 @@ TEST_LOCKFILE_V1 = rf'''{TEST_LOCKFILE_COMMENT}
lock 2022.08.18.36 .+ Python 3\.6 lock 2022.08.18.36 .+ Python 3\.6
lock 2023.11.16 (?!win_x86_exe).+ Python 3\.7 lock 2023.11.16 (?!win_x86_exe).+ Python 3\.7
lock 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server) lock 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server)
lock 2024.10.22 py2exe .+
lock 2024.10.22 linux_(?:armv7l|aarch64)_exe .+-glibc2\.(?:[12]?\d|30)\b
lock 2024.10.22 (?!\w+_exe).+ Python 3\.8
lock 2024.10.22 win(?:_x86)?_exe Python 3\.[78].+ Windows-(?:7-|2008ServerR2)
''' '''
TEST_LOCKFILE_V2_TMPL = r'''%s TEST_LOCKFILE_V2_TMPL = r'''%s
lockV2 yt-dlp/yt-dlp 2022.08.18.36 .+ Python 3\.6 lockV2 yt-dlp/yt-dlp 2022.08.18.36 .+ Python 3\.6
lockV2 yt-dlp/yt-dlp 2023.11.16 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp 2023.11.16 (?!win_x86_exe).+ Python 3\.7
lockV2 yt-dlp/yt-dlp 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server) lockV2 yt-dlp/yt-dlp 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server)
lockV2 yt-dlp/yt-dlp 2024.10.22 py2exe .+
lockV2 yt-dlp/yt-dlp 2024.10.22 linux_(?:armv7l|aarch64)_exe .+-glibc2\.(?:[12]?\d|30)\b
lockV2 yt-dlp/yt-dlp 2024.10.22 (?!\w+_exe).+ Python 3\.8
lockV2 yt-dlp/yt-dlp 2024.10.22 win(?:_x86)?_exe Python 3\.[78].+ Windows-(?:7-|2008ServerR2)
lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 (?!win_x86_exe).+ Python 3\.7
lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 win_x86_exe .+ Windows-(?:Vista|2008Server) lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 win_x86_exe .+ Windows-(?:Vista|2008Server)
lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 py2exe .+
lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 linux_(?:armv7l|aarch64)_exe .+-glibc2\.(?:[12]?\d|30)\b
lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 (?!\w+_exe).+ Python 3\.8
lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 win(?:_x86)?_exe Python 3\.[78].+ Windows-(?:7-|2008ServerR2)
lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 (?!win_x86_exe).+ Python 3\.7
lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 win_x86_exe .+ Windows-(?:Vista|2008Server) lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 win_x86_exe .+ Windows-(?:Vista|2008Server)
lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.045052 py2exe .+
lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.060347 linux_(?:armv7l|aarch64)_exe .+-glibc2\.(?:[12]?\d|30)\b
lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.060347 (?!\w+_exe).+ Python 3\.8
lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.060347 win(?:_x86)?_exe Python 3\.[78].+ Windows-(?:7-|2008ServerR2)
''' '''
TEST_LOCKFILE_V2 = TEST_LOCKFILE_V2_TMPL % TEST_LOCKFILE_COMMENT TEST_LOCKFILE_V2 = TEST_LOCKFILE_V2_TMPL % TEST_LOCKFILE_COMMENT
@ -145,43 +161,76 @@ class TestUpdate(unittest.TestCase):
for lockfile in (TEST_LOCKFILE_V1, TEST_LOCKFILE_V2, TEST_LOCKFILE_ACTUAL, TEST_LOCKFILE_FORK): for lockfile in (TEST_LOCKFILE_V1, TEST_LOCKFILE_V2, TEST_LOCKFILE_ACTUAL, TEST_LOCKFILE_FORK):
# Normal operation # Normal operation
test(lockfile, 'zip Python 3.12.0', '2023.12.31', '2023.12.31') test(lockfile, 'zip Python 3.12.0', '2023.12.31', '2023.12.31')
test(lockfile, 'zip stable Python 3.12.0', '2023.12.31', '2023.12.31', exact=True) test(lockfile, 'zip Python 3.12.0', '2023.12.31', '2023.12.31', exact=True)
# Python 3.6 --update should update only to its lock # py2exe should never update beyond 2024.10.22
test(lockfile, 'py2exe Python 3.8', '2025.01.01', '2024.10.22')
test(lockfile, 'py2exe Python 3.8', '2025.01.01', None, exact=True)
# Python 3.6 --update should update only to the py3.6 lock
test(lockfile, 'zip Python 3.6.0', '2023.11.16', '2022.08.18.36') test(lockfile, 'zip Python 3.6.0', '2023.11.16', '2022.08.18.36')
# --update-to an exact version later than the lock should return None # Python 3.6 --update-to an exact version later than the py3.6 lock should return None
test(lockfile, 'zip stable Python 3.6.0', '2023.11.16', None, exact=True) test(lockfile, 'zip Python 3.6.0', '2023.11.16', None, exact=True)
# Python 3.7 should be able to update to its lock # Python 3.7 should be able to update to the py3.7 lock
test(lockfile, 'zip Python 3.7.0', '2023.11.16', '2023.11.16') test(lockfile, 'zip Python 3.7.0', '2023.11.16', '2023.11.16')
test(lockfile, 'zip stable Python 3.7.1', '2023.11.16', '2023.11.16', exact=True) test(lockfile, 'zip Python 3.7.1', '2023.11.16', '2023.11.16', exact=True)
# Non-win_x86_exe builds on py3.7 must be locked # Non-win_x86_exe builds on py3.7 must be locked at py3.7 lock
test(lockfile, 'zip Python 3.7.1', '2023.12.31', '2023.11.16') test(lockfile, 'zip Python 3.7.1', '2023.12.31', '2023.11.16')
test(lockfile, 'zip stable Python 3.7.1', '2023.12.31', None, exact=True) test(lockfile, 'zip Python 3.7.1', '2023.12.31', None, exact=True)
test( # Windows Vista w/ win_x86_exe must be locked # Python 3.8 should only update to the py3.8 lock
lockfile, 'win_x86_exe stable Python 3.7.9 (CPython x86 32bit) - Windows-Vista-6.0.6003-SP2', test(lockfile, 'zip Python 3.8.10', '2025.01.01', '2024.10.22')
test(lockfile, 'zip Python 3.8.110', '2025.01.01', None, exact=True)
test( # Windows Vista w/ win_x86_exe must be locked at Vista lock
lockfile, 'win_x86_exe Python 3.7.9 (CPython x86 32bit) - Windows-Vista-6.0.6003-SP2',
'2023.12.31', '2023.11.16') '2023.12.31', '2023.11.16')
test( # Windows 2008Server w/ win_x86_exe must be locked test( # Windows 2008Server w/ win_x86_exe must be locked at Vista lock
lockfile, 'win_x86_exe Python 3.7.9 (CPython x86 32bit) - Windows-2008Server', lockfile, 'win_x86_exe Python 3.7.9 (CPython x86 32bit) - Windows-2008Server',
'2023.12.31', None, exact=True) '2023.12.31', None, exact=True)
test( # Windows 7 w/ win_x86_exe py3.7 build should be able to update beyond lock test( # Windows 7 w/ win_x86_exe py3.7 build should be able to update beyond py3.7 lock
lockfile, 'win_x86_exe stable Python 3.7.9 (CPython x86 32bit) - Windows-7-6.1.7601-SP1', lockfile, 'win_x86_exe Python 3.7.9 (CPython x86 32bit) - Windows-7-6.1.7601-SP1',
'2023.12.31', '2023.12.31') '2023.12.31', '2023.12.31', exact=True)
test( # Windows 8.1 w/ '2008Server' in platform string should be able to update beyond lock test( # Windows 7 win_x86_exe should only update to Win7 lock
lockfile, 'win_x86_exe Python 3.7.9 (CPython x86 32bit) - Windows-7-6.1.7601-SP1',
'2025.01.01', '2024.10.22')
test( # Windows 2008ServerR2 win_exe should only update to Win7 lock
lockfile, 'win_exe Python 3.8.10 (CPython x86 32bit) - Windows-2008ServerR2',
'2025.12.31', '2024.10.22')
test( # Windows 8.1 w/ '2008Server' in platform string should be able to update beyond py3.7 lock
lockfile, 'win_x86_exe Python 3.7.9 (CPython x86 32bit) - Windows-post2008Server-6.2.9200', lockfile, 'win_x86_exe Python 3.7.9 (CPython x86 32bit) - Windows-post2008Server-6.2.9200',
'2023.12.31', '2023.12.31', exact=True) '2023.12.31', '2023.12.31', exact=True)
test( # win_exe built w/Python 3.8 on Windows>=8 should be able to update beyond py3.8 lock
lockfile, 'win_exe Python 3.8.10 (CPython AMD64 64bit) - Windows-10-10.0.20348-SP0',
'2025.01.01', '2025.01.01', exact=True)
test( # linux_armv7l_exe w/glibc2.7 should only update to glibc<2.31 lock
lockfile, 'linux_armv7l_exe Python 3.8.0 (CPython armv7l 32bit) - Linux-6.5.0-1025-azure-armv7l-with-glibc2.7',
'2025.01.01', '2024.10.22')
test( # linux_armv7l_exe w/Python 3.8 and glibc>=2.31 should be able to update beyond py3.8 and glibc<2.31 locks
lockfile, 'linux_armv7l_exe Python 3.8.0 (CPython armv7l 32bit) - Linux-6.5.0-1025-azure-armv7l-with-glibc2.31',
'2025.01.01', '2025.01.01')
test( # linux_armv7l_exe w/glibc2.30 should only update to glibc<2.31 lock
lockfile, 'linux_armv7l_exe Python 3.8.0 (CPython armv7l 64bit) - Linux-6.5.0-1025-azure-aarch64-with-glibc2.30 (OpenSSL',
'2025.01.01', '2024.10.22')
test( # linux_aarch64_exe w/glibc2.17 should only update to glibc<2.31 lock
lockfile, 'linux_aarch64_exe Python 3.8.0 (CPython aarch64 64bit) - Linux-6.5.0-1025-azure-aarch64-with-glibc2.17',
'2025.01.01', '2024.10.22')
test( # linux_aarch64_exe w/glibc2.40 and glibc>=2.31 should be able to update beyond py3.8 and glibc<2.31 locks
lockfile, 'linux_aarch64_exe Python 3.8.0 (CPython aarch64 64bit) - Linux-6.5.0-1025-azure-aarch64-with-glibc2.40',
'2025.01.01', '2025.01.01')
test( # linux_aarch64_exe w/glibc2.3 should only update to glibc<2.31 lock
lockfile, 'linux_aarch64_exe Python 3.8.0 (CPython aarch64 64bit) - Linux-6.5.0-1025-azure-aarch64-with-glibc2.3 (OpenSSL',
'2025.01.01', '2024.10.22')
# Forks can block updates to non-numeric tags rather than lock # Forks can block updates to non-numeric tags rather than lock
test(TEST_LOCKFILE_FORK, 'zip Python 3.6.3', 'pr0000', None, repo='fork/yt-dlp') test(TEST_LOCKFILE_FORK, 'zip Python 3.6.3', 'pr0000', None, repo='fork/yt-dlp')
test(TEST_LOCKFILE_FORK, 'zip stable Python 3.7.4', 'pr0000', 'pr0000', repo='fork/yt-dlp') test(TEST_LOCKFILE_FORK, 'zip Python 3.7.4', 'pr0000', 'pr0000', repo='fork/yt-dlp')
test(TEST_LOCKFILE_FORK, 'zip stable Python 3.7.4', 'pr1234', None, repo='fork/yt-dlp') test(TEST_LOCKFILE_FORK, 'zip Python 3.7.4', 'pr1234', None, repo='fork/yt-dlp')
test(TEST_LOCKFILE_FORK, 'zip Python 3.8.1', 'pr1234', 'pr1234', repo='fork/yt-dlp', exact=True) test(TEST_LOCKFILE_FORK, 'zip Python 3.8.1', 'pr1234', 'pr1234', repo='fork/yt-dlp', exact=True)
test( test(
TEST_LOCKFILE_FORK, 'win_x86_exe stable Python 3.7.9 (CPython x86 32bit) - Windows-Vista-6.0.6003-SP2', TEST_LOCKFILE_FORK, 'win_x86_exe Python 3.7.9 (CPython x86 32bit) - Windows-Vista-6.0.6003-SP2',
'pr1234', None, repo='fork/yt-dlp') 'pr1234', None, repo='fork/yt-dlp')
test( test(
TEST_LOCKFILE_FORK, 'win_x86_exe stable Python 3.7.9 (CPython x86 32bit) - Windows-7-6.1.7601-SP1', TEST_LOCKFILE_FORK, 'win_x86_exe Python 3.7.9 (CPython x86 32bit) - Windows-7-6.1.7601-SP1',
'2023.12.31', '2023.12.31', repo='fork/yt-dlp') '2023.12.31', '2023.12.31', repo='fork/yt-dlp')
test(TEST_LOCKFILE_FORK, 'zip Python 3.11.2', 'pr9999', None, repo='fork/yt-dlp', exact=True) test(TEST_LOCKFILE_FORK, 'zip Python 3.11.2', 'pr9999', None, repo='fork/yt-dlp', exact=True)
test(TEST_LOCKFILE_FORK, 'zip stable Python 3.12.0', 'pr9999', 'pr9999', repo='fork/yt-dlp') test(TEST_LOCKFILE_FORK, 'zip Python 3.12.0', 'pr9999', 'pr9999', repo='fork/yt-dlp')
def test_query_update(self): def test_query_update(self):
ydl = FakeYDL() ydl = FakeYDL()

View file

@ -4,6 +4,7 @@
import os import os
import sys import sys
import unittest import unittest
import unittest.mock
import warnings import warnings
import datetime as dt import datetime as dt
@ -20,7 +21,6 @@ import xml.etree.ElementTree
from yt_dlp.compat import ( from yt_dlp.compat import (
compat_etree_fromstring, compat_etree_fromstring,
compat_HTMLParseError, compat_HTMLParseError,
compat_os_name,
) )
from yt_dlp.utils import ( from yt_dlp.utils import (
Config, Config,
@ -48,7 +48,6 @@ from yt_dlp.utils import (
dfxp2srt, dfxp2srt,
encode_base_n, encode_base_n,
encode_compat_str, encode_compat_str,
encodeFilename,
expand_path, expand_path,
extract_attributes, extract_attributes,
extract_basic_auth, extract_basic_auth,
@ -68,7 +67,6 @@ from yt_dlp.utils import (
get_elements_html_by_class, get_elements_html_by_class,
get_elements_text_and_html_by_attribute, get_elements_text_and_html_by_attribute,
int_or_none, int_or_none,
intlist_to_bytes,
iri_to_uri, iri_to_uri,
is_html, is_html,
js_to_json, js_to_json,
@ -343,11 +341,13 @@ class TestUtil(unittest.TestCase):
self.assertEqual(remove_start(None, 'A - '), None) self.assertEqual(remove_start(None, 'A - '), None)
self.assertEqual(remove_start('A - B', 'A - '), 'B') self.assertEqual(remove_start('A - B', 'A - '), 'B')
self.assertEqual(remove_start('B - A', 'A - '), 'B - A') self.assertEqual(remove_start('B - A', 'A - '), 'B - A')
self.assertEqual(remove_start('non-empty', ''), 'non-empty')
def test_remove_end(self): def test_remove_end(self):
self.assertEqual(remove_end(None, ' - B'), None) self.assertEqual(remove_end(None, ' - B'), None)
self.assertEqual(remove_end('A - B', ' - B'), 'A') self.assertEqual(remove_end('A - B', ' - B'), 'A')
self.assertEqual(remove_end('B - A', ' - B'), 'B - A') self.assertEqual(remove_end('B - A', ' - B'), 'B - A')
self.assertEqual(remove_end('non-empty', ''), 'non-empty')
def test_remove_quotes(self): def test_remove_quotes(self):
self.assertEqual(remove_quotes(None), None) self.assertEqual(remove_quotes(None), None)
@ -563,10 +563,10 @@ class TestUtil(unittest.TestCase):
self.assertEqual(res_data, {'a': 'b', 'c': 'd'}) self.assertEqual(res_data, {'a': 'b', 'c': 'd'})
def test_shell_quote(self): def test_shell_quote(self):
args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')] args = ['ffmpeg', '-i', 'ñ€ß\'.mp4']
self.assertEqual( self.assertEqual(
shell_quote(args), shell_quote(args),
"""ffmpeg -i 'ñ€ß'"'"'.mp4'""" if compat_os_name != 'nt' else '''ffmpeg -i "ñ€ß'.mp4"''') """ffmpeg -i 'ñ€ß'"'"'.mp4'""" if os.name != 'nt' else '''ffmpeg -i "ñ€ß'.mp4"''')
def test_float_or_none(self): def test_float_or_none(self):
self.assertEqual(float_or_none('42.42'), 42.42) self.assertEqual(float_or_none('42.42'), 42.42)
@ -1306,15 +1306,10 @@ class TestUtil(unittest.TestCase):
self.assertEqual(clean_html('a:\n "b"'), 'a: "b"') self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
self.assertEqual(clean_html('a<br>\xa0b'), 'a\nb') self.assertEqual(clean_html('a<br>\xa0b'), 'a\nb')
def test_intlist_to_bytes(self):
self.assertEqual(
intlist_to_bytes([0, 1, 127, 128, 255]),
b'\x00\x01\x7f\x80\xff')
def test_args_to_str(self): def test_args_to_str(self):
self.assertEqual( self.assertEqual(
args_to_str(['foo', 'ba/r', '-baz', '2 be', '']), args_to_str(['foo', 'ba/r', '-baz', '2 be', '']),
'foo ba/r -baz \'2 be\' \'\'' if compat_os_name != 'nt' else 'foo ba/r -baz "2 be" ""', 'foo ba/r -baz \'2 be\' \'\'' if os.name != 'nt' else 'foo ba/r -baz "2 be" ""',
) )
def test_parse_filesize(self): def test_parse_filesize(self):
@ -2114,7 +2109,7 @@ Line 1
assert extract_basic_auth('http://user:@foo.bar') == ('http://foo.bar', 'Basic dXNlcjo=') assert extract_basic_auth('http://user:@foo.bar') == ('http://foo.bar', 'Basic dXNlcjo=')
assert extract_basic_auth('http://user:pass@foo.bar') == ('http://foo.bar', 'Basic dXNlcjpwYXNz') assert extract_basic_auth('http://user:pass@foo.bar') == ('http://foo.bar', 'Basic dXNlcjpwYXNz')
@unittest.skipUnless(compat_os_name == 'nt', 'Only relevant on Windows') @unittest.skipUnless(os.name == 'nt', 'Only relevant on Windows')
def test_windows_escaping(self): def test_windows_escaping(self):
tests = [ tests = [
'test"&', 'test"&',
@ -2148,6 +2143,12 @@ Line 1
assert run_shell(args) == expected assert run_shell(args) == expected
assert run_shell(shell_quote(args, shell=True)) == expected assert run_shell(shell_quote(args, shell=True)) == expected
def test_partial_application(self):
assert callable(int_or_none(scale=10)), 'missing positional parameter should apply partially'
assert int_or_none(10, scale=0.1) == 100, 'positionally passed argument should call function'
assert int_or_none(v=10) == 10, 'keyword passed positional should call function'
assert int_or_none(scale=0.1)(10) == 100, 'call after partial application should call the function'
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

4
test/testdata/netrc/netrc vendored Normal file
View file

@ -0,0 +1,4 @@
machine normal_use login user password pass
machine empty_user login "" password pass
machine empty_pass login user password ""
machine both_empty login "" password ""

2
test/testdata/netrc/print_netrc.py vendored Normal file
View file

@ -0,0 +1,2 @@
with open('./test/testdata/netrc/netrc', encoding='utf-8') as fp:
print(fp.read())

View file

@ -0,0 +1,5 @@
from yt_dlp.extractor.common import InfoExtractor
class PackagePluginIE(InfoExtractor):
pass

View file

@ -26,7 +26,7 @@ import unicodedata
from .cache import Cache from .cache import Cache
from .compat import urllib # isort: split from .compat import urllib # isort: split
from .compat import compat_os_name, urllib_req_to_req from .compat import urllib_req_to_req
from .cookies import CookieLoadError, LenientSimpleCookie, load_cookies from .cookies import CookieLoadError, LenientSimpleCookie, load_cookies
from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
from .downloader.rtmp import rtmpdump_version from .downloader.rtmp import rtmpdump_version
@ -109,7 +109,6 @@ from .utils import (
determine_ext, determine_ext,
determine_protocol, determine_protocol,
encode_compat_str, encode_compat_str,
encodeFilename,
escapeHTML, escapeHTML,
expand_path, expand_path,
extract_basic_auth, extract_basic_auth,
@ -154,7 +153,6 @@ from .utils import (
try_get, try_get,
url_basename, url_basename,
variadic, variadic,
version_tuple,
windows_enable_vt_mode, windows_enable_vt_mode,
write_json_file, write_json_file,
write_string, write_string,
@ -168,7 +166,7 @@ from .utils.networking import (
) )
from .version import CHANNEL, ORIGIN, RELEASE_GIT_HEAD, VARIANT, __version__ from .version import CHANNEL, ORIGIN, RELEASE_GIT_HEAD, VARIANT, __version__
if compat_os_name == 'nt': if os.name == 'nt':
import ctypes import ctypes
@ -251,7 +249,7 @@ class YoutubeDL:
format_sort_force: Force the given format_sort. see "Sorting Formats" format_sort_force: Force the given format_sort. see "Sorting Formats"
for more details. for more details.
prefer_free_formats: Whether to prefer video formats with free containers prefer_free_formats: Whether to prefer video formats with free containers
over non-free ones of same quality. over non-free ones of the same quality.
allow_multiple_video_streams: Allow multiple video streams to be merged allow_multiple_video_streams: Allow multiple video streams to be merged
into a single file into a single file
allow_multiple_audio_streams: Allow multiple audio streams to be merged allow_multiple_audio_streams: Allow multiple audio streams to be merged
@ -285,7 +283,7 @@ class YoutubeDL:
rejecttitle: Reject downloads for matching titles. rejecttitle: Reject downloads for matching titles.
logger: Log messages to a logging.Logger instance. logger: Log messages to a logging.Logger instance.
logtostderr: Print everything to stderr instead of stdout. logtostderr: Print everything to stderr instead of stdout.
consoletitle: Display progress in console window's titlebar. consoletitle: Display progress in the console window's titlebar.
writedescription: Write the video description to a .description file writedescription: Write the video description to a .description file
writeinfojson: Write the video description to a .info.json file writeinfojson: Write the video description to a .info.json file
clean_infojson: Remove internal metadata from the infojson clean_infojson: Remove internal metadata from the infojson
@ -471,7 +469,7 @@ class YoutubeDL:
The following options do not work when used through the API: The following options do not work when used through the API:
filename, abort-on-error, multistreams, no-live-chat, filename, abort-on-error, multistreams, no-live-chat,
format-sort, no-clean-infojson, no-playlist-metafiles, format-sort, no-clean-infojson, no-playlist-metafiles,
no-keep-subs, no-attach-info-json, allow-unsafe-ext. no-keep-subs, no-attach-info-json, allow-unsafe-ext, prefer-vp9-sort.
Refer __init__.py for their implementation Refer __init__.py for their implementation
progress_template: Dictionary of templates for progress outputs. progress_template: Dictionary of templates for progress outputs.
Allowed keys are 'download', 'postprocess', Allowed keys are 'download', 'postprocess',
@ -513,7 +511,7 @@ class YoutubeDL:
The following options are used by the extractors: The following options are used by the extractors:
extractor_retries: Number of times to retry for known errors (default: 3) extractor_retries: Number of times to retry for known errors (default: 3)
dynamic_mpd: Whether to process dynamic DASH manifests (default: True) dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
hls_split_discontinuity: Split HLS playlists to different formats at hls_split_discontinuity: Split HLS playlists into different formats at
discontinuities such as ad breaks (default: False) discontinuities such as ad breaks (default: False)
extractor_args: A dictionary of arguments to be passed to the extractors. extractor_args: A dictionary of arguments to be passed to the extractors.
See "EXTRACTOR ARGUMENTS" for details. See "EXTRACTOR ARGUMENTS" for details.
@ -553,7 +551,7 @@ class YoutubeDL:
include_ads: - Doesn't work include_ads: - Doesn't work
Download ads as well Download ads as well
call_home: - Not implemented call_home: - Not implemented
Boolean, true iff we are allowed to contact the Boolean, true if we are allowed to contact the
yt-dlp servers for debugging. yt-dlp servers for debugging.
post_hooks: - Register a custom postprocessor post_hooks: - Register a custom postprocessor
A list of functions that get called as the final step A list of functions that get called as the final step
@ -644,7 +642,7 @@ class YoutubeDL:
out=stdout, out=stdout,
error=sys.stderr, error=sys.stderr,
screen=sys.stderr if self.params.get('quiet') else stdout, screen=sys.stderr if self.params.get('quiet') else stdout,
console=None if compat_os_name == 'nt' else next( console=None if os.name == 'nt' else next(
filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None), filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None),
) )
@ -953,7 +951,7 @@ class YoutubeDL:
self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once) self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
def _send_console_code(self, code): def _send_console_code(self, code):
if compat_os_name == 'nt' or not self._out_files.console: if os.name == 'nt' or not self._out_files.console:
return return
self._write_string(code, self._out_files.console) self._write_string(code, self._out_files.console)
@ -961,7 +959,7 @@ class YoutubeDL:
if not self.params.get('consoletitle', False): if not self.params.get('consoletitle', False):
return return
message = remove_terminal_sequences(message) message = remove_terminal_sequences(message)
if compat_os_name == 'nt': if os.name == 'nt':
if ctypes.windll.kernel32.GetConsoleWindow(): if ctypes.windll.kernel32.GetConsoleWindow():
# c_wchar_p() might not be necessary if `message` is # c_wchar_p() might not be necessary if `message` is
# already of type unicode() # already of type unicode()
@ -2850,13 +2848,10 @@ class YoutubeDL:
sanitize_string_field(fmt, 'format_id') sanitize_string_field(fmt, 'format_id')
sanitize_numeric_fields(fmt) sanitize_numeric_fields(fmt)
fmt['url'] = sanitize_url(fmt['url']) fmt['url'] = sanitize_url(fmt['url'])
if fmt.get('ext') is None: FormatSorter._fill_sorting_fields(fmt)
fmt['ext'] = determine_ext(fmt['url']).lower()
if fmt['ext'] in ('aac', 'opus', 'mp3', 'flac', 'vorbis'): if fmt['ext'] in ('aac', 'opus', 'mp3', 'flac', 'vorbis'):
if fmt.get('acodec') is None: if fmt.get('acodec') is None:
fmt['acodec'] = fmt['ext'] fmt['acodec'] = fmt['ext']
if fmt.get('protocol') is None:
fmt['protocol'] = determine_protocol(fmt)
if fmt.get('resolution') is None: if fmt.get('resolution') is None:
fmt['resolution'] = self.format_resolution(fmt, default=None) fmt['resolution'] = self.format_resolution(fmt, default=None)
if fmt.get('dynamic_range') is None and fmt.get('vcodec') != 'none': if fmt.get('dynamic_range') is None and fmt.get('vcodec') != 'none':
@ -3259,9 +3254,9 @@ class YoutubeDL:
if full_filename is None: if full_filename is None:
return return
if not self._ensure_dir_exists(encodeFilename(full_filename)): if not self._ensure_dir_exists(full_filename):
return return
if not self._ensure_dir_exists(encodeFilename(temp_filename)): if not self._ensure_dir_exists(temp_filename):
return return
if self._write_description('video', info_dict, if self._write_description('video', info_dict,
@ -3293,16 +3288,16 @@ class YoutubeDL:
if self.params.get('writeannotations', False): if self.params.get('writeannotations', False):
annofn = self.prepare_filename(info_dict, 'annotation') annofn = self.prepare_filename(info_dict, 'annotation')
if annofn: if annofn:
if not self._ensure_dir_exists(encodeFilename(annofn)): if not self._ensure_dir_exists(annofn):
return return
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)): if not self.params.get('overwrites', True) and os.path.exists(annofn):
self.to_screen('[info] Video annotations are already present') self.to_screen('[info] Video annotations are already present')
elif not info_dict.get('annotations'): elif not info_dict.get('annotations'):
self.report_warning('There are no annotations to write.') self.report_warning('There are no annotations to write.')
else: else:
try: try:
self.to_screen('[info] Writing video annotations to: ' + annofn) self.to_screen('[info] Writing video annotations to: ' + annofn)
with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile: with open(annofn, 'w', encoding='utf-8') as annofile:
annofile.write(info_dict['annotations']) annofile.write(info_dict['annotations'])
except (KeyError, TypeError): except (KeyError, TypeError):
self.report_warning('There are no annotations to write.') self.report_warning('There are no annotations to write.')
@ -3318,14 +3313,14 @@ class YoutubeDL:
f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown') f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
return True return True
linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext')) linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
if not self._ensure_dir_exists(encodeFilename(linkfn)): if not self._ensure_dir_exists(linkfn):
return False return False
if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)): if self.params.get('overwrites', True) and os.path.exists(linkfn):
self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present') self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
return True return True
try: try:
self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}') self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', with open(to_high_limit_path(linkfn), 'w', encoding='utf-8',
newline='\r\n' if link_type == 'url' else '\n') as linkfile: newline='\r\n' if link_type == 'url' else '\n') as linkfile:
template_vars = {'url': url} template_vars = {'url': url}
if link_type == 'desktop': if link_type == 'desktop':
@ -3356,7 +3351,7 @@ class YoutubeDL:
if self.params.get('skip_download'): if self.params.get('skip_download'):
info_dict['filepath'] = temp_filename info_dict['filepath'] = temp_filename
info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename))) info_dict['__finaldir'] = os.path.dirname(os.path.abspath(full_filename))
info_dict['__files_to_move'] = files_to_move info_dict['__files_to_move'] = files_to_move
replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)) replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
info_dict['__write_download_archive'] = self.params.get('force_write_download_archive') info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
@ -3486,7 +3481,7 @@ class YoutubeDL:
self.report_file_already_downloaded(dl_filename) self.report_file_already_downloaded(dl_filename)
dl_filename = dl_filename or temp_filename dl_filename = dl_filename or temp_filename
info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename))) info_dict['__finaldir'] = os.path.dirname(os.path.abspath(full_filename))
except network_exceptions as err: except network_exceptions as err:
self.report_error(f'unable to download video data: {err}') self.report_error(f'unable to download video data: {err}')
@ -4089,17 +4084,6 @@ class YoutubeDL:
if plugin_dirs: if plugin_dirs:
write_debug(f'Plugin directories: {plugin_dirs}') write_debug(f'Plugin directories: {plugin_dirs}')
# Not implemented
if False and self.params.get('call_home'):
ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
write_debug(f'Public IP address: {ipaddr}')
latest_version = self.urlopen(
'https://yt-dl.org/latest/version').read().decode()
if version_tuple(latest_version) > version_tuple(__version__):
self.report_warning(
f'You are using an outdated version (newest version: {latest_version})! '
'See https://yt-dl.org/update if you need help updating.')
@functools.cached_property @functools.cached_property
def proxies(self): def proxies(self):
"""Global proxy configuration""" """Global proxy configuration"""
@ -4312,7 +4296,7 @@ class YoutubeDL:
else: else:
try: try:
self.to_screen(f'[info] Writing {label} description to: {descfn}') self.to_screen(f'[info] Writing {label} description to: {descfn}')
with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: with open(descfn, 'w', encoding='utf-8') as descfile:
descfile.write(ie_result['description']) descfile.write(ie_result['description'])
except OSError: except OSError:
self.report_error(f'Cannot write {label} description file {descfn}') self.report_error(f'Cannot write {label} description file {descfn}')
@ -4396,7 +4380,9 @@ class YoutubeDL:
return None return None
for idx, t in list(enumerate(thumbnails))[::-1]: for idx, t in list(enumerate(thumbnails))[::-1]:
thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg') thumb_ext = t.get('ext') or determine_ext(t['url'], 'jpg')
if multiple:
thumb_ext = f'{t["id"]}.{thumb_ext}'
thumb_display_id = f'{label} thumbnail {t["id"]}' thumb_display_id = f'{label} thumbnail {t["id"]}'
thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext')) thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext')) thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
@ -4412,7 +4398,7 @@ class YoutubeDL:
try: try:
uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {}))) uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {})))
self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}') self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
with open(encodeFilename(thumb_filename), 'wb') as thumbf: with open(thumb_filename, 'wb') as thumbf:
shutil.copyfileobj(uf, thumbf) shutil.copyfileobj(uf, thumbf)
ret.append((thumb_filename, thumb_filename_final)) ret.append((thumb_filename, thumb_filename_final))
t['filepath'] = thumb_filename t['filepath'] = thumb_filename

View file

@ -1,8 +1,8 @@
import sys import sys
if sys.version_info < (3, 8): if sys.version_info < (3, 9):
raise ImportError( raise ImportError(
f'You are using an unsupported version of Python. Only Python versions 3.8 and above are supported by yt-dlp') # noqa: F541 f'You are using an unsupported version of Python. Only Python versions 3.9 and above are supported by yt-dlp') # noqa: F541
__license__ = 'The Unlicense' __license__ = 'The Unlicense'
@ -14,7 +14,6 @@ import os
import re import re
import traceback import traceback
from .compat import compat_os_name
from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS, CookieLoadError from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS, CookieLoadError
from .downloader.external import get_external_downloader from .downloader.external import get_external_downloader
from .extractor import list_extractor_classes from .extractor import list_extractor_classes
@ -34,6 +33,7 @@ from .postprocessor import (
) )
from .update import Updater from .update import Updater
from .utils import ( from .utils import (
Config,
NO_DEFAULT, NO_DEFAULT,
POSTPROCESS_WHEN, POSTPROCESS_WHEN,
DateRange, DateRange,
@ -43,7 +43,6 @@ from .utils import (
GeoUtils, GeoUtils,
PlaylistEntries, PlaylistEntries,
SameFileError, SameFileError,
decodeOption,
download_range_func, download_range_func,
expand_path, expand_path,
float_or_none, float_or_none,
@ -158,6 +157,9 @@ def set_compat_opts(opts):
opts.embed_infojson = False opts.embed_infojson = False
if 'format-sort' in opts.compat_opts: if 'format-sort' in opts.compat_opts:
opts.format_sort.extend(FormatSorter.ytdl_default) opts.format_sort.extend(FormatSorter.ytdl_default)
elif 'prefer-vp9-sort' in opts.compat_opts:
opts.format_sort.extend(FormatSorter._prefer_vp9_sort)
_video_multistreams_set = set_default_compat('multistreams', 'allow_multiple_video_streams', False, remove_compat=False) _video_multistreams_set = set_default_compat('multistreams', 'allow_multiple_video_streams', False, remove_compat=False)
_audio_multistreams_set = set_default_compat('multistreams', 'allow_multiple_audio_streams', False, remove_compat=False) _audio_multistreams_set = set_default_compat('multistreams', 'allow_multiple_audio_streams', False, remove_compat=False)
if _video_multistreams_set is False and _audio_multistreams_set is False: if _video_multistreams_set is False and _audio_multistreams_set is False:
@ -879,8 +881,8 @@ def parse_options(argv=None):
'listsubtitles': opts.listsubtitles, 'listsubtitles': opts.listsubtitles,
'subtitlesformat': opts.subtitlesformat, 'subtitlesformat': opts.subtitlesformat,
'subtitleslangs': opts.subtitleslangs, 'subtitleslangs': opts.subtitleslangs,
'matchtitle': decodeOption(opts.matchtitle), 'matchtitle': opts.matchtitle,
'rejecttitle': decodeOption(opts.rejecttitle), 'rejecttitle': opts.rejecttitle,
'max_downloads': opts.max_downloads, 'max_downloads': opts.max_downloads,
'prefer_free_formats': opts.prefer_free_formats, 'prefer_free_formats': opts.prefer_free_formats,
'trim_file_name': opts.trim_file_name, 'trim_file_name': opts.trim_file_name,
@ -967,6 +969,11 @@ def _real_main(argv=None):
parser, opts, all_urls, ydl_opts = parse_options(argv) parser, opts, all_urls, ydl_opts = parse_options(argv)
# HACK: Set the plugin dirs early on
# TODO(coletdjnz): remove when plugin globals system is implemented
if opts.plugin_dirs is not None:
Config._plugin_dirs = list(map(expand_path, opts.plugin_dirs))
# Dump user agent # Dump user agent
if opts.dump_user_agent: if opts.dump_user_agent:
ua = traverse_obj(opts.headers, 'User-Agent', casesense=False, default=std_headers['User-Agent']) ua = traverse_obj(opts.headers, 'User-Agent', casesense=False, default=std_headers['User-Agent'])
@ -1044,7 +1051,7 @@ def _real_main(argv=None):
ydl.warn_if_short_id(args) ydl.warn_if_short_id(args)
# Show a useful error message and wait for keypress if not launched from shell on Windows # Show a useful error message and wait for keypress if not launched from shell on Windows
if not args and compat_os_name == 'nt' and getattr(sys, 'frozen', False): if not args and os.name == 'nt' and getattr(sys, 'frozen', False):
import ctypes.wintypes import ctypes.wintypes
import msvcrt import msvcrt

View file

@ -3,7 +3,6 @@ from math import ceil
from .compat import compat_ord from .compat import compat_ord
from .dependencies import Cryptodome from .dependencies import Cryptodome
from .utils import bytes_to_intlist, intlist_to_bytes
if Cryptodome.AES: if Cryptodome.AES:
def aes_cbc_decrypt_bytes(data, key, iv): def aes_cbc_decrypt_bytes(data, key, iv):
@ -17,15 +16,15 @@ if Cryptodome.AES:
else: else:
def aes_cbc_decrypt_bytes(data, key, iv): def aes_cbc_decrypt_bytes(data, key, iv):
""" Decrypt bytes with AES-CBC using native implementation since pycryptodome is unavailable """ """ Decrypt bytes with AES-CBC using native implementation since pycryptodome is unavailable """
return intlist_to_bytes(aes_cbc_decrypt(*map(bytes_to_intlist, (data, key, iv)))) return bytes(aes_cbc_decrypt(*map(list, (data, key, iv))))
def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce): def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce):
""" Decrypt bytes with AES-GCM using native implementation since pycryptodome is unavailable """ """ Decrypt bytes with AES-GCM using native implementation since pycryptodome is unavailable """
return intlist_to_bytes(aes_gcm_decrypt_and_verify(*map(bytes_to_intlist, (data, key, tag, nonce)))) return bytes(aes_gcm_decrypt_and_verify(*map(list, (data, key, tag, nonce))))
def aes_cbc_encrypt_bytes(data, key, iv, **kwargs): def aes_cbc_encrypt_bytes(data, key, iv, **kwargs):
return intlist_to_bytes(aes_cbc_encrypt(*map(bytes_to_intlist, (data, key, iv)), **kwargs)) return bytes(aes_cbc_encrypt(*map(list, (data, key, iv)), **kwargs))
BLOCK_SIZE_BYTES = 16 BLOCK_SIZE_BYTES = 16
@ -221,7 +220,7 @@ def aes_gcm_decrypt_and_verify(data, key, tag, nonce):
j0 = [*nonce, 0, 0, 0, 1] j0 = [*nonce, 0, 0, 0, 1]
else: else:
fill = (BLOCK_SIZE_BYTES - (len(nonce) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES + 8 fill = (BLOCK_SIZE_BYTES - (len(nonce) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES + 8
ghash_in = nonce + [0] * fill + bytes_to_intlist((8 * len(nonce)).to_bytes(8, 'big')) ghash_in = nonce + [0] * fill + list((8 * len(nonce)).to_bytes(8, 'big'))
j0 = ghash(hash_subkey, ghash_in) j0 = ghash(hash_subkey, ghash_in)
# TODO: add nonce support to aes_ctr_decrypt # TODO: add nonce support to aes_ctr_decrypt
@ -230,13 +229,13 @@ def aes_gcm_decrypt_and_verify(data, key, tag, nonce):
iv_ctr = inc(j0) iv_ctr = inc(j0)
decrypted_data = aes_ctr_decrypt(data, key, iv_ctr + [0] * (BLOCK_SIZE_BYTES - len(iv_ctr))) decrypted_data = aes_ctr_decrypt(data, key, iv_ctr + [0] * (BLOCK_SIZE_BYTES - len(iv_ctr)))
pad_len = len(data) // 16 * 16 pad_len = (BLOCK_SIZE_BYTES - (len(data) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES
s_tag = ghash( s_tag = ghash(
hash_subkey, hash_subkey,
data data
+ [0] * (BLOCK_SIZE_BYTES - len(data) + pad_len) # pad + [0] * pad_len # pad
+ bytes_to_intlist((0 * 8).to_bytes(8, 'big') # length of associated data + list((0 * 8).to_bytes(8, 'big') # length of associated data
+ ((len(data) * 8).to_bytes(8, 'big'))), # length of data + ((len(data) * 8).to_bytes(8, 'big'))), # length of data
) )
if tag != aes_ctr_encrypt(s_tag, key, j0): if tag != aes_ctr_encrypt(s_tag, key, j0):
@ -300,8 +299,8 @@ def aes_decrypt_text(data, password, key_size_bytes):
""" """
NONCE_LENGTH_BYTES = 8 NONCE_LENGTH_BYTES = 8
data = bytes_to_intlist(base64.b64decode(data)) data = list(base64.b64decode(data))
password = bytes_to_intlist(password.encode()) password = list(password.encode())
key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password)) key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password))
key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES) key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES)
@ -310,7 +309,7 @@ def aes_decrypt_text(data, password, key_size_bytes):
cipher = data[NONCE_LENGTH_BYTES:] cipher = data[NONCE_LENGTH_BYTES:]
decrypted_data = aes_ctr_decrypt(cipher, key, nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)) decrypted_data = aes_ctr_decrypt(cipher, key, nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES))
return intlist_to_bytes(decrypted_data) return bytes(decrypted_data)
RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36) RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36)

View file

@ -1,5 +1,4 @@
import os import os
import sys
import xml.etree.ElementTree as etree import xml.etree.ElementTree as etree
from .compat_utils import passthrough_module from .compat_utils import passthrough_module
@ -24,33 +23,14 @@ def compat_etree_fromstring(text):
return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder())) return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
compat_os_name = os._name if os.name == 'java' else os.name
def compat_shlex_quote(s):
from ..utils import shell_quote
return shell_quote(s)
def compat_ord(c): def compat_ord(c):
return c if isinstance(c, int) else ord(c) return c if isinstance(c, int) else ord(c)
if compat_os_name == 'nt' and sys.version_info < (3, 8):
# os.path.realpath on Windows does not follow symbolic links
# prior to Python 3.8 (see https://bugs.python.org/issue9949)
def compat_realpath(path):
while os.path.islink(path):
path = os.path.abspath(os.readlink(path))
return os.path.realpath(path)
else:
compat_realpath = os.path.realpath
# Python 3.8+ does not honor %HOME% on windows, but this breaks compatibility with youtube-dl # Python 3.8+ does not honor %HOME% on windows, but this breaks compatibility with youtube-dl
# See https://github.com/yt-dlp/yt-dlp/issues/792 # See https://github.com/yt-dlp/yt-dlp/issues/792
# https://docs.python.org/3/library/os.path.html#os.path.expanduser # https://docs.python.org/3/library/os.path.html#os.path.expanduser
if compat_os_name in ('nt', 'ce'): if os.name in ('nt', 'ce'):
def compat_expanduser(path): def compat_expanduser(path):
HOME = os.environ.get('HOME') HOME = os.environ.get('HOME')
if not HOME: if not HOME:

View file

@ -8,16 +8,14 @@ passthrough_module(__name__, '.._legacy', callback=lambda attr: warnings.warn(
DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=6)) DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=6))
del passthrough_module del passthrough_module
import base64 import functools # noqa: F401
import urllib.error import os
import urllib.parse
compat_str = str
compat_b64decode = base64.b64decode compat_os_name = os.name
compat_realpath = os.path.realpath
compat_urlparse = urllib.parse
compat_parse_qs = urllib.parse.parse_qs def compat_shlex_quote(s):
compat_urllib_parse_unquote = urllib.parse.unquote from ..utils import shell_quote
compat_urllib_parse_urlencode = urllib.parse.urlencode return shell_quote(s)
compat_urllib_parse_urlparse = urllib.parse.urlparse

View file

@ -30,7 +30,7 @@ from asyncio import run as compat_asyncio_run # noqa: F401
from re import Pattern as compat_Pattern # noqa: F401 from re import Pattern as compat_Pattern # noqa: F401
from re import match as compat_Match # noqa: F401 from re import match as compat_Match # noqa: F401
from . import compat_expanduser, compat_HTMLParseError, compat_realpath from . import compat_expanduser, compat_HTMLParseError
from .compat_utils import passthrough_module from .compat_utils import passthrough_module
from ..dependencies import brotli as compat_brotli # noqa: F401 from ..dependencies import brotli as compat_brotli # noqa: F401
from ..dependencies import websockets as compat_websockets # noqa: F401 from ..dependencies import websockets as compat_websockets # noqa: F401
@ -78,7 +78,7 @@ compat_kwargs = lambda kwargs: kwargs
compat_map = map compat_map = map
compat_numeric_types = (int, float, complex) compat_numeric_types = (int, float, complex)
compat_os_path_expanduser = compat_expanduser compat_os_path_expanduser = compat_expanduser
compat_os_path_realpath = compat_realpath compat_os_path_realpath = os.path.realpath
compat_print = print compat_print = print
compat_shlex_split = shlex.split compat_shlex_split = shlex.split
compat_socket_create_connection = socket.create_connection compat_socket_create_connection = socket.create_connection
@ -104,5 +104,12 @@ compat_xml_parse_error = compat_xml_etree_ElementTree_ParseError = etree.ParseEr
compat_xpath = lambda xpath: xpath compat_xpath = lambda xpath: xpath
compat_zip = zip compat_zip = zip
workaround_optparse_bug9161 = lambda: None workaround_optparse_bug9161 = lambda: None
compat_str = str
compat_b64decode = base64.b64decode
compat_urlparse = urllib.parse
compat_parse_qs = urllib.parse.parse_qs
compat_urllib_parse_unquote = urllib.parse.unquote
compat_urllib_parse_urlencode = urllib.parse.urlencode
compat_urllib_parse_urlparse = urllib.parse.urlparse
legacy = [] legacy = []

View file

@ -57,7 +57,7 @@ def passthrough_module(parent, child, allowed_attributes=(..., ), *, callback=la
callback(attr) callback(attr)
return ret return ret
@functools.lru_cache(maxsize=None) @functools.cache
def from_child(attr): def from_child(attr):
nonlocal child nonlocal child
if attr not in allowed_attributes: if attr not in allowed_attributes:

View file

@ -1,12 +0,0 @@
# flake8: noqa: F405
from functools import * # noqa: F403
from .compat_utils import passthrough_module
passthrough_module(__name__, 'functools')
del passthrough_module
try:
_ = cache # >= 3.9
except NameError:
cache = lru_cache(maxsize=None)

View file

@ -7,9 +7,9 @@ passthrough_module(__name__, 'urllib.request')
del passthrough_module del passthrough_module
from .. import compat_os_name import os
if compat_os_name == 'nt': if os.name == 'nt':
# On older Python versions, proxies are extracted from Windows registry erroneously. [1] # On older Python versions, proxies are extracted from Windows registry erroneously. [1]
# If the https proxy in the registry does not have a scheme, urllib will incorrectly add https:// to it. [2] # If the https proxy in the registry does not have a scheme, urllib will incorrectly add https:// to it. [2]
# It is unlikely that the user has actually set it to be https, so we should be fine to safely downgrade # It is unlikely that the user has actually set it to be https, so we should be fine to safely downgrade
@ -37,4 +37,4 @@ if compat_os_name == 'nt':
def getproxies(): def getproxies():
return getproxies_environment() or getproxies_registry_patched() return getproxies_environment() or getproxies_registry_patched()
del compat_os_name del os

View file

@ -25,7 +25,6 @@ from .aes import (
aes_gcm_decrypt_and_verify_bytes, aes_gcm_decrypt_and_verify_bytes,
unpad_pkcs7, unpad_pkcs7,
) )
from .compat import compat_os_name
from .dependencies import ( from .dependencies import (
_SECRETSTORAGE_UNAVAILABLE_REASON, _SECRETSTORAGE_UNAVAILABLE_REASON,
secretstorage, secretstorage,
@ -302,12 +301,18 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger):
raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"') raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
logger.debug(f'Extracting cookies from: "{cookie_database_path}"') logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir: with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
cursor = None cursor = None
try: try:
cursor = _open_database_copy(cookie_database_path, tmpdir) cursor = _open_database_copy(cookie_database_path, tmpdir)
# meta_version is necessary to determine if we need to trim the hash prefix from the cookies
# Ref: https://chromium.googlesource.com/chromium/src/+/b02dcebd7cafab92770734dc2bc317bd07f1d891/net/extras/sqlite/sqlite_persistent_cookie_store.cc#223
meta_version = int(cursor.execute('SELECT value FROM meta WHERE key = "version"').fetchone()[0])
decryptor = get_cookie_decryptor(
config['browser_dir'], config['keyring_name'], logger,
keyring=keyring, meta_version=meta_version)
cursor.connection.text_factory = bytes cursor.connection.text_factory = bytes
column_names = _get_column_names(cursor, 'cookies') column_names = _get_column_names(cursor, 'cookies')
secure_column = 'is_secure' if 'is_secure' in column_names else 'secure' secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
@ -337,7 +342,7 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger):
logger.debug(f'cookie version breakdown: {counts}') logger.debug(f'cookie version breakdown: {counts}')
return jar return jar
except PermissionError as error: except PermissionError as error:
if compat_os_name == 'nt' and error.errno == 13: if os.name == 'nt' and error.errno == 13:
message = 'Could not copy Chrome cookie database. See https://github.com/yt-dlp/yt-dlp/issues/7271 for more info' message = 'Could not copy Chrome cookie database. See https://github.com/yt-dlp/yt-dlp/issues/7271 for more info'
logger.error(message) logger.error(message)
raise DownloadError(message) # force exit raise DownloadError(message) # force exit
@ -405,22 +410,23 @@ class ChromeCookieDecryptor:
raise NotImplementedError('Must be implemented by sub classes') raise NotImplementedError('Must be implemented by sub classes')
def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None): def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None, meta_version=None):
if sys.platform == 'darwin': if sys.platform == 'darwin':
return MacChromeCookieDecryptor(browser_keyring_name, logger) return MacChromeCookieDecryptor(browser_keyring_name, logger, meta_version=meta_version)
elif sys.platform in ('win32', 'cygwin'): elif sys.platform in ('win32', 'cygwin'):
return WindowsChromeCookieDecryptor(browser_root, logger) return WindowsChromeCookieDecryptor(browser_root, logger, meta_version=meta_version)
return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring) return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring, meta_version=meta_version)
class LinuxChromeCookieDecryptor(ChromeCookieDecryptor): class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
def __init__(self, browser_keyring_name, logger, *, keyring=None): def __init__(self, browser_keyring_name, logger, *, keyring=None, meta_version=None):
self._logger = logger self._logger = logger
self._v10_key = self.derive_key(b'peanuts') self._v10_key = self.derive_key(b'peanuts')
self._empty_key = self.derive_key(b'') self._empty_key = self.derive_key(b'')
self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0} self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
self._browser_keyring_name = browser_keyring_name self._browser_keyring_name = browser_keyring_name
self._keyring = keyring self._keyring = keyring
self._meta_version = meta_version or 0
@functools.cached_property @functools.cached_property
def _v11_key(self): def _v11_key(self):
@ -449,14 +455,18 @@ class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
if version == b'v10': if version == b'v10':
self._cookie_counts['v10'] += 1 self._cookie_counts['v10'] += 1
return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key, self._empty_key), self._logger) return _decrypt_aes_cbc_multi(
ciphertext, (self._v10_key, self._empty_key), self._logger,
hash_prefix=self._meta_version >= 24)
elif version == b'v11': elif version == b'v11':
self._cookie_counts['v11'] += 1 self._cookie_counts['v11'] += 1
if self._v11_key is None: if self._v11_key is None:
self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True) self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
return None return None
return _decrypt_aes_cbc_multi(ciphertext, (self._v11_key, self._empty_key), self._logger) return _decrypt_aes_cbc_multi(
ciphertext, (self._v11_key, self._empty_key), self._logger,
hash_prefix=self._meta_version >= 24)
else: else:
self._logger.warning(f'unknown cookie version: "{version}"', only_once=True) self._logger.warning(f'unknown cookie version: "{version}"', only_once=True)
@ -465,11 +475,12 @@ class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
class MacChromeCookieDecryptor(ChromeCookieDecryptor): class MacChromeCookieDecryptor(ChromeCookieDecryptor):
def __init__(self, browser_keyring_name, logger): def __init__(self, browser_keyring_name, logger, meta_version=None):
self._logger = logger self._logger = logger
password = _get_mac_keyring_password(browser_keyring_name, logger) password = _get_mac_keyring_password(browser_keyring_name, logger)
self._v10_key = None if password is None else self.derive_key(password) self._v10_key = None if password is None else self.derive_key(password)
self._cookie_counts = {'v10': 0, 'other': 0} self._cookie_counts = {'v10': 0, 'other': 0}
self._meta_version = meta_version or 0
@staticmethod @staticmethod
def derive_key(password): def derive_key(password):
@ -487,7 +498,8 @@ class MacChromeCookieDecryptor(ChromeCookieDecryptor):
self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True) self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
return None return None
return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key,), self._logger) return _decrypt_aes_cbc_multi(
ciphertext, (self._v10_key,), self._logger, hash_prefix=self._meta_version >= 24)
else: else:
self._cookie_counts['other'] += 1 self._cookie_counts['other'] += 1
@ -497,10 +509,11 @@ class MacChromeCookieDecryptor(ChromeCookieDecryptor):
class WindowsChromeCookieDecryptor(ChromeCookieDecryptor): class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
def __init__(self, browser_root, logger): def __init__(self, browser_root, logger, meta_version=None):
self._logger = logger self._logger = logger
self._v10_key = _get_windows_v10_key(browser_root, logger) self._v10_key = _get_windows_v10_key(browser_root, logger)
self._cookie_counts = {'v10': 0, 'other': 0} self._cookie_counts = {'v10': 0, 'other': 0}
self._meta_version = meta_version or 0
def decrypt(self, encrypted_value): def decrypt(self, encrypted_value):
version = encrypted_value[:3] version = encrypted_value[:3]
@ -524,7 +537,9 @@ class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length] ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
authentication_tag = raw_ciphertext[-authentication_tag_length:] authentication_tag = raw_ciphertext[-authentication_tag_length:]
return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger) return _decrypt_aes_gcm(
ciphertext, self._v10_key, nonce, authentication_tag, self._logger,
hash_prefix=self._meta_version >= 24)
else: else:
self._cookie_counts['other'] += 1 self._cookie_counts['other'] += 1
@ -1010,10 +1025,12 @@ def pbkdf2_sha1(password, salt, iterations, key_length):
return hashlib.pbkdf2_hmac('sha1', password, salt, iterations, key_length) return hashlib.pbkdf2_hmac('sha1', password, salt, iterations, key_length)
def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16): def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16, hash_prefix=False):
for key in keys: for key in keys:
plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector)) plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
try: try:
if hash_prefix:
return plaintext[32:].decode()
return plaintext.decode() return plaintext.decode()
except UnicodeDecodeError: except UnicodeDecodeError:
pass pass
@ -1021,7 +1038,7 @@ def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' '
return None return None
def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger): def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger, hash_prefix=False):
try: try:
plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce) plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
except ValueError: except ValueError:
@ -1029,6 +1046,8 @@ def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
return None return None
try: try:
if hash_prefix:
return plaintext[32:].decode()
return plaintext.decode() return plaintext.decode()
except UnicodeDecodeError: except UnicodeDecodeError:
logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True) logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)

View file

@ -24,7 +24,7 @@ try:
from Crypto.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5 # noqa: F401 from Crypto.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5 # noqa: F401
from Crypto.Hash import CMAC, SHA1 # noqa: F401 from Crypto.Hash import CMAC, SHA1 # noqa: F401
from Crypto.PublicKey import RSA # noqa: F401 from Crypto.PublicKey import RSA # noqa: F401
except ImportError: except (ImportError, OSError):
__version__ = f'broken {__version__}'.strip() __version__ = f'broken {__version__}'.strip()

View file

@ -20,9 +20,7 @@ from ..utils import (
Namespace, Namespace,
RetryManager, RetryManager,
classproperty, classproperty,
decodeArgument,
deprecation_warning, deprecation_warning,
encodeFilename,
format_bytes, format_bytes,
join_nonempty, join_nonempty,
parse_bytes, parse_bytes,
@ -219,7 +217,7 @@ class FileDownloader:
def temp_name(self, filename): def temp_name(self, filename):
"""Returns a temporary filename for the given filename.""" """Returns a temporary filename for the given filename."""
if self.params.get('nopart', False) or filename == '-' or \ if self.params.get('nopart', False) or filename == '-' or \
(os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))): (os.path.exists(filename) and not os.path.isfile(filename)):
return filename return filename
return filename + '.part' return filename + '.part'
@ -273,7 +271,7 @@ class FileDownloader:
"""Try to set the last-modified time of the given file.""" """Try to set the last-modified time of the given file."""
if last_modified_hdr is None: if last_modified_hdr is None:
return return
if not os.path.isfile(encodeFilename(filename)): if not os.path.isfile(filename):
return return
timestr = last_modified_hdr timestr = last_modified_hdr
if timestr is None: if timestr is None:
@ -432,13 +430,13 @@ class FileDownloader:
""" """
nooverwrites_and_exists = ( nooverwrites_and_exists = (
not self.params.get('overwrites', True) not self.params.get('overwrites', True)
and os.path.exists(encodeFilename(filename)) and os.path.exists(filename)
) )
if not hasattr(filename, 'write'): if not hasattr(filename, 'write'):
continuedl_and_exists = ( continuedl_and_exists = (
self.params.get('continuedl', True) self.params.get('continuedl', True)
and os.path.isfile(encodeFilename(filename)) and os.path.isfile(filename)
and not self.params.get('nopart', False) and not self.params.get('nopart', False)
) )
@ -448,7 +446,7 @@ class FileDownloader:
self._hook_progress({ self._hook_progress({
'filename': filename, 'filename': filename,
'status': 'finished', 'status': 'finished',
'total_bytes': os.path.getsize(encodeFilename(filename)), 'total_bytes': os.path.getsize(filename),
}, info_dict) }, info_dict)
self._finish_multiline_status() self._finish_multiline_status()
return True, False return True, False
@ -489,9 +487,7 @@ class FileDownloader:
if not self.params.get('verbose', False): if not self.params.get('verbose', False):
return return
str_args = [decodeArgument(a) for a in args]
if exe is None: if exe is None:
exe = os.path.basename(str_args[0]) exe = os.path.basename(args[0])
self.write_debug(f'{exe} command line: {shell_quote(str_args)}') self.write_debug(f'{exe} command line: {shell_quote(args)}')

View file

@ -23,7 +23,6 @@ from ..utils import (
cli_valueless_option, cli_valueless_option,
determine_ext, determine_ext,
encodeArgument, encodeArgument,
encodeFilename,
find_available_port, find_available_port,
remove_end, remove_end,
traverse_obj, traverse_obj,
@ -67,7 +66,7 @@ class ExternalFD(FragmentFD):
'elapsed': time.time() - started, 'elapsed': time.time() - started,
} }
if filename != '-': if filename != '-':
fsize = os.path.getsize(encodeFilename(tmpfilename)) fsize = os.path.getsize(tmpfilename)
self.try_rename(tmpfilename, filename) self.try_rename(tmpfilename, filename)
status.update({ status.update({
'downloaded_bytes': fsize, 'downloaded_bytes': fsize,
@ -184,9 +183,9 @@ class ExternalFD(FragmentFD):
dest.write(decrypt_fragment(fragment, src.read())) dest.write(decrypt_fragment(fragment, src.read()))
src.close() src.close()
if not self.params.get('keep_fragments', False): if not self.params.get('keep_fragments', False):
self.try_remove(encodeFilename(fragment_filename)) self.try_remove(fragment_filename)
dest.close() dest.close()
self.try_remove(encodeFilename(f'{tmpfilename}.frag.urls')) self.try_remove(f'{tmpfilename}.frag.urls')
return 0 return 0
def _call_process(self, cmd, info_dict): def _call_process(self, cmd, info_dict):
@ -620,7 +619,7 @@ class FFmpegFD(ExternalFD):
args += self._configuration_args(('_o1', '_o', '')) args += self._configuration_args(('_o1', '_o', ''))
args = [encodeArgument(opt) for opt in args] args = [encodeArgument(opt) for opt in args]
args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True)) args.append(ffpp._ffmpeg_filename_argument(tmpfilename))
self._debug_cmd(args) self._debug_cmd(args)
piped = any(fmt['url'] in ('-', 'pipe:') for fmt in selected_formats) piped = any(fmt['url'] in ('-', 'pipe:') for fmt in selected_formats)

View file

@ -9,10 +9,9 @@ import time
from .common import FileDownloader from .common import FileDownloader
from .http import HttpFD from .http import HttpFD
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7 from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
from ..compat import compat_os_name
from ..networking import Request from ..networking import Request
from ..networking.exceptions import HTTPError, IncompleteRead from ..networking.exceptions import HTTPError, IncompleteRead
from ..utils import DownloadError, RetryManager, encodeFilename, traverse_obj from ..utils import DownloadError, RetryManager, traverse_obj
from ..utils.networking import HTTPHeaderDict from ..utils.networking import HTTPHeaderDict
from ..utils.progress import ProgressCalculator from ..utils.progress import ProgressCalculator
@ -152,7 +151,7 @@ class FragmentFD(FileDownloader):
if self.__do_ytdl_file(ctx): if self.__do_ytdl_file(ctx):
self._write_ytdl_file(ctx) self._write_ytdl_file(ctx)
if not self.params.get('keep_fragments', False): if not self.params.get('keep_fragments', False):
self.try_remove(encodeFilename(ctx['fragment_filename_sanitized'])) self.try_remove(ctx['fragment_filename_sanitized'])
del ctx['fragment_filename_sanitized'] del ctx['fragment_filename_sanitized']
def _prepare_frag_download(self, ctx): def _prepare_frag_download(self, ctx):
@ -188,7 +187,7 @@ class FragmentFD(FileDownloader):
}) })
if self.__do_ytdl_file(ctx): if self.__do_ytdl_file(ctx):
ytdl_file_exists = os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))) ytdl_file_exists = os.path.isfile(self.ytdl_filename(ctx['filename']))
continuedl = self.params.get('continuedl', True) continuedl = self.params.get('continuedl', True)
if continuedl and ytdl_file_exists: if continuedl and ytdl_file_exists:
self._read_ytdl_file(ctx) self._read_ytdl_file(ctx)
@ -390,7 +389,7 @@ class FragmentFD(FileDownloader):
def __exit__(self, exc_type, exc_val, exc_tb): def __exit__(self, exc_type, exc_val, exc_tb):
pass pass
if compat_os_name == 'nt': if os.name == 'nt':
def future_result(future): def future_result(future):
while True: while True:
try: try:

View file

@ -15,7 +15,6 @@ from ..utils import (
ThrottledDownload, ThrottledDownload,
XAttrMetadataError, XAttrMetadataError,
XAttrUnavailableError, XAttrUnavailableError,
encodeFilename,
int_or_none, int_or_none,
parse_http_range, parse_http_range,
try_call, try_call,
@ -58,9 +57,8 @@ class HttpFD(FileDownloader):
if self.params.get('continuedl', True): if self.params.get('continuedl', True):
# Establish possible resume length # Establish possible resume length
if os.path.isfile(encodeFilename(ctx.tmpfilename)): if os.path.isfile(ctx.tmpfilename):
ctx.resume_len = os.path.getsize( ctx.resume_len = os.path.getsize(ctx.tmpfilename)
encodeFilename(ctx.tmpfilename))
ctx.is_resume = ctx.resume_len > 0 ctx.is_resume = ctx.resume_len > 0
@ -241,7 +239,7 @@ class HttpFD(FileDownloader):
ctx.resume_len = byte_counter ctx.resume_len = byte_counter
else: else:
try: try:
ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename)) ctx.resume_len = os.path.getsize(ctx.tmpfilename)
except FileNotFoundError: except FileNotFoundError:
ctx.resume_len = 0 ctx.resume_len = 0
raise RetryDownload(e) raise RetryDownload(e)

View file

@ -8,7 +8,6 @@ from ..utils import (
Popen, Popen,
check_executable, check_executable,
encodeArgument, encodeArgument,
encodeFilename,
get_exe_version, get_exe_version,
) )
@ -179,7 +178,7 @@ class RtmpFD(FileDownloader):
return False return False
while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live: while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live:
prevsize = os.path.getsize(encodeFilename(tmpfilename)) prevsize = os.path.getsize(tmpfilename)
self.to_screen(f'[rtmpdump] Downloaded {prevsize} bytes') self.to_screen(f'[rtmpdump] Downloaded {prevsize} bytes')
time.sleep(5.0) # This seems to be needed time.sleep(5.0) # This seems to be needed
args = [*basic_args, '--resume'] args = [*basic_args, '--resume']
@ -187,7 +186,7 @@ class RtmpFD(FileDownloader):
args += ['--skip', '1'] args += ['--skip', '1']
args = [encodeArgument(a) for a in args] args = [encodeArgument(a) for a in args]
retval = run_rtmpdump(args) retval = run_rtmpdump(args)
cursize = os.path.getsize(encodeFilename(tmpfilename)) cursize = os.path.getsize(tmpfilename)
if prevsize == cursize and retval == RD_FAILED: if prevsize == cursize and retval == RD_FAILED:
break break
# Some rtmp streams seem abort after ~ 99.8%. Don't complain for those # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
@ -196,7 +195,7 @@ class RtmpFD(FileDownloader):
retval = RD_SUCCESS retval = RD_SUCCESS
break break
if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE): if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE):
fsize = os.path.getsize(encodeFilename(tmpfilename)) fsize = os.path.getsize(tmpfilename)
self.to_screen(f'[rtmpdump] Downloaded {fsize} bytes') self.to_screen(f'[rtmpdump] Downloaded {fsize} bytes')
self.try_rename(tmpfilename, filename) self.try_rename(tmpfilename, filename)
self._hook_progress({ self._hook_progress({

View file

@ -2,7 +2,7 @@ import os
import subprocess import subprocess
from .common import FileDownloader from .common import FileDownloader
from ..utils import check_executable, encodeFilename from ..utils import check_executable
class RtspFD(FileDownloader): class RtspFD(FileDownloader):
@ -26,7 +26,7 @@ class RtspFD(FileDownloader):
retval = subprocess.call(args) retval = subprocess.call(args)
if retval == 0: if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename)) fsize = os.path.getsize(tmpfilename)
self.to_screen(f'\r[{args[0]}] {fsize} bytes') self.to_screen(f'\r[{args[0]}] {fsize} bytes')
self.try_rename(tmpfilename, filename) self.try_rename(tmpfilename, filename)
self._hook_progress({ self._hook_progress({

View file

@ -208,6 +208,10 @@ from .bandcamp import (
BandcampUserIE, BandcampUserIE,
BandcampWeeklyIE, BandcampWeeklyIE,
) )
from .bandlab import (
BandlabIE,
BandlabPlaylistIE,
)
from .bannedvideo import BannedVideoIE from .bannedvideo import BannedVideoIE
from .bbc import ( from .bbc import (
BBCIE, BBCIE,
@ -278,6 +282,7 @@ from .bleacherreport import (
from .blerp import BlerpIE from .blerp import BlerpIE
from .blogger import BloggerIE from .blogger import BloggerIE
from .bloomberg import BloombergIE from .bloomberg import BloombergIE
from .bluesky import BlueskyIE
from .bokecc import BokeCCIE from .bokecc import BokeCCIE
from .bongacams import BongaCamsIE from .bongacams import BongaCamsIE
from .boosty import BoostyIE from .boosty import BoostyIE
@ -363,7 +368,10 @@ from .ccc import (
) )
from .ccma import CCMAIE from .ccma import CCMAIE
from .cctv import CCTVIE from .cctv import CCTVIE
from .cda import CDAIE from .cda import (
CDAIE,
CDAFolderIE,
)
from .cellebrite import CellebriteIE from .cellebrite import CellebriteIE
from .ceskatelevize import CeskaTelevizeIE from .ceskatelevize import CeskaTelevizeIE
from .cgtn import CGTNIE from .cgtn import CGTNIE
@ -398,8 +406,6 @@ from .cmt import CMTIE
from .cnbc import CNBCVideoIE from .cnbc import CNBCVideoIE
from .cnn import ( from .cnn import (
CNNIE, CNNIE,
CNNArticleIE,
CNNBlogsIE,
CNNIndonesiaIE, CNNIndonesiaIE,
) )
from .comedycentral import ( from .comedycentral import (
@ -707,6 +713,7 @@ from .gab import (
GabTVIE, GabTVIE,
) )
from .gaia import GaiaIE from .gaia import GaiaIE
from .gamedevtv import GameDevTVDashboardIE
from .gamejolt import ( from .gamejolt import (
GameJoltCommunityIE, GameJoltCommunityIE,
GameJoltGameIE, GameJoltGameIE,
@ -940,6 +947,10 @@ from .kaltura import KalturaIE
from .kankanews import KankaNewsIE from .kankanews import KankaNewsIE
from .karaoketv import KaraoketvIE from .karaoketv import KaraoketvIE
from .kelbyone import KelbyOneIE from .kelbyone import KelbyOneIE
from .kenh14 import (
Kenh14PlaylistIE,
Kenh14VideoIE,
)
from .khanacademy import ( from .khanacademy import (
KhanAcademyIE, KhanAcademyIE,
KhanAcademyUnitIE, KhanAcademyUnitIE,
@ -1129,12 +1140,6 @@ from .microsoftembed import (
MicrosoftMediusIE, MicrosoftMediusIE,
) )
from .microsoftstream import MicrosoftStreamIE from .microsoftstream import MicrosoftStreamIE
from .mildom import (
MildomClipIE,
MildomIE,
MildomUserVodIE,
MildomVodIE,
)
from .minds import ( from .minds import (
MindsChannelIE, MindsChannelIE,
MindsGroupIE, MindsGroupIE,
@ -1154,6 +1159,7 @@ from .mitele import MiTeleIE
from .mixch import ( from .mixch import (
MixchArchiveIE, MixchArchiveIE,
MixchIE, MixchIE,
MixchMovieIE,
) )
from .mixcloud import ( from .mixcloud import (
MixcloudIE, MixcloudIE,
@ -1515,8 +1521,8 @@ from .pgatour import PGATourIE
from .philharmoniedeparis import PhilharmonieDeParisIE from .philharmoniedeparis import PhilharmonieDeParisIE
from .phoenix import PhoenixIE from .phoenix import PhoenixIE
from .photobucket import PhotobucketIE from .photobucket import PhotobucketIE
from .pialive import PiaLiveIE
from .piapro import PiaproIE from .piapro import PiaproIE
from .piaulizaportal import PIAULIZAPortalIE
from .picarto import ( from .picarto import (
PicartoIE, PicartoIE,
PicartoVodIE, PicartoVodIE,
@ -1552,10 +1558,6 @@ from .podbayfm import (
) )
from .podchaser import PodchaserIE from .podchaser import PodchaserIE
from .podomatic import PodomaticIE from .podomatic import PodomaticIE
from .pokemon import (
PokemonIE,
PokemonWatchIE,
)
from .pokergo import ( from .pokergo import (
PokerGoCollectionIE, PokerGoCollectionIE,
PokerGoIE, PokerGoIE,
@ -1646,6 +1648,7 @@ from .radiokapital import (
RadioKapitalIE, RadioKapitalIE,
RadioKapitalShowIE, RadioKapitalShowIE,
) )
from .radioradicale import RadioRadicaleIE
from .radiozet import RadioZetPodcastIE from .radiozet import RadioZetPodcastIE
from .radlive import ( from .radlive import (
RadLiveChannelIE, RadLiveChannelIE,
@ -1937,9 +1940,7 @@ from .spotify import (
) )
from .spreaker import ( from .spreaker import (
SpreakerIE, SpreakerIE,
SpreakerPageIE,
SpreakerShowIE, SpreakerShowIE,
SpreakerShowPageIE,
) )
from .springboardplatform import SpringboardPlatformIE from .springboardplatform import SpringboardPlatformIE
from .sprout import SproutIE from .sprout import SproutIE
@ -2250,6 +2251,10 @@ from .ufctv import (
) )
from .ukcolumn import UkColumnIE from .ukcolumn import UkColumnIE
from .uktvplay import UKTVPlayIE from .uktvplay import UKTVPlayIE
from .uliza import (
UlizaPlayerIE,
UlizaPortalIE,
)
from .umg import UMGDeIE from .umg import UMGDeIE
from .unistra import UnistraIE from .unistra import UnistraIE
from .unity import UnityIE from .unity import UnityIE
@ -2278,10 +2283,6 @@ from .utreon import UtreonIE
from .varzesh3 import Varzesh3IE from .varzesh3 import Varzesh3IE
from .vbox7 import Vbox7IE from .vbox7 import Vbox7IE
from .veo import VeoIE from .veo import VeoIE
from .veoh import (
VeohIE,
VeohUserIE,
)
from .vesti import VestiIE from .vesti import VestiIE
from .vevo import ( from .vevo import (
VevoIE, VevoIE,

View file

@ -6,7 +6,6 @@ import hmac
import io import io
import json import json
import re import re
import struct
import time import time
import urllib.parse import urllib.parse
import uuid import uuid
@ -18,10 +17,8 @@ from ..networking.exceptions import TransportError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
OnDemandPagedList, OnDemandPagedList,
bytes_to_intlist,
decode_base_n, decode_base_n,
int_or_none, int_or_none,
intlist_to_bytes,
time_seconds, time_seconds,
traverse_obj, traverse_obj,
update_url_query, update_url_query,
@ -72,15 +69,15 @@ class AbemaLicenseRH(RequestHandler):
}) })
res = decode_base_n(license_response['k'], table=self._STRTABLE) res = decode_base_n(license_response['k'], table=self._STRTABLE)
encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff)) encvideokey = list(res.to_bytes(16, 'big'))
h = hmac.new( h = hmac.new(
binascii.unhexlify(self._HKEY), binascii.unhexlify(self._HKEY),
(license_response['cid'] + self.ie._DEVICE_ID).encode(), (license_response['cid'] + self.ie._DEVICE_ID).encode(),
digestmod=hashlib.sha256) digestmod=hashlib.sha256)
enckey = bytes_to_intlist(h.digest()) enckey = list(h.digest())
return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey)) return bytes(aes_ecb_decrypt(encvideokey, enckey))
class AbemaTVBaseIE(InfoExtractor): class AbemaTVBaseIE(InfoExtractor):

View file

@ -11,11 +11,9 @@ from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
ass_subtitles_timecode, ass_subtitles_timecode,
bytes_to_intlist,
bytes_to_long, bytes_to_long,
float_or_none, float_or_none,
int_or_none, int_or_none,
intlist_to_bytes,
join_nonempty, join_nonempty,
long_to_bytes, long_to_bytes,
parse_iso8601, parse_iso8601,
@ -198,16 +196,16 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link') links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link')
self._K = ''.join(random.choices('0123456789abcdef', k=16)) self._K = ''.join(random.choices('0123456789abcdef', k=16))
message = bytes_to_intlist(json.dumps({ message = list(json.dumps({
'k': self._K, 'k': self._K,
't': token, 't': token,
})) }).encode())
# Sometimes authentication fails for no good reason, retry with # Sometimes authentication fails for no good reason, retry with
# a different random padding # a different random padding
links_data = None links_data = None
for _ in range(3): for _ in range(3):
padded_message = intlist_to_bytes(pkcs1pad(message, 128)) padded_message = bytes(pkcs1pad(message, 128))
n, e = self._RSA_KEY n, e = self._RSA_KEY
encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n)) encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n))
authorization = base64.b64encode(encrypted_message).decode() authorization = base64.b64encode(encrypted_message).decode()

View file

@ -1362,7 +1362,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
def _download_webpage_handle(self, *args, **kwargs): def _download_webpage_handle(self, *args, **kwargs):
headers = self.geo_verification_headers() headers = self.geo_verification_headers()
headers.update(kwargs.get('headers', {})) headers.update(kwargs.get('headers') or {})
kwargs['headers'] = headers kwargs['headers'] = headers
return super()._download_webpage_handle( return super()._download_webpage_handle(
*args, **kwargs) *args, **kwargs)

View file

@ -33,21 +33,21 @@ class AfreecaTVBaseIE(InfoExtractor):
} }
response = self._download_json( response = self._download_json(
'https://login.afreecatv.com/app/LoginAction.php', None, 'https://login.sooplive.co.kr/app/LoginAction.php', None,
'Logging in', data=urlencode_postdata(login_form)) 'Logging in', data=urlencode_postdata(login_form))
_ERRORS = { _ERRORS = {
-4: 'Your account has been suspended due to a violation of our terms and policies.', -4: 'Your account has been suspended due to a violation of our terms and policies.',
-5: 'https://member.afreecatv.com/app/user_delete_progress.php', -5: 'https://member.sooplive.co.kr/app/user_delete_progress.php',
-6: 'https://login.afreecatv.com/membership/changeMember.php', -6: 'https://login.sooplive.co.kr/membership/changeMember.php',
-8: "Hello! AfreecaTV here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.", -8: "Hello! Soop here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.",
-9: 'https://member.afreecatv.com/app/pop_login_block.php', -9: 'https://member.sooplive.co.kr/app/pop_login_block.php',
-11: 'https://login.afreecatv.com/afreeca/second_login.php', -11: 'https://login.sooplive.co.kr/afreeca/second_login.php',
-12: 'https://member.afreecatv.com/app/user_security.php', -12: 'https://member.sooplive.co.kr/app/user_security.php',
0: 'The username does not exist or you have entered the wrong password.', 0: 'The username does not exist or you have entered the wrong password.',
-1: 'The username does not exist or you have entered the wrong password.', -1: 'The username does not exist or you have entered the wrong password.',
-3: 'You have entered your username/password incorrectly.', -3: 'You have entered your username/password incorrectly.',
-7: 'You cannot use your Global AfreecaTV account to access Korean AfreecaTV.', -7: 'You cannot use your Global Soop account to access Korean Soop.',
-10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.', -10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.',
-32008: 'You have failed to log in. Please contact our Help Center.', -32008: 'You have failed to log in. Please contact our Help Center.',
} }
@ -61,76 +61,48 @@ class AfreecaTVBaseIE(InfoExtractor):
def _call_api(self, endpoint, display_id, data=None, headers=None, query=None): def _call_api(self, endpoint, display_id, data=None, headers=None, query=None):
return self._download_json(Request( return self._download_json(Request(
f'https://api.m.afreecatv.com/{endpoint}', f'https://api.m.sooplive.co.kr/{endpoint}',
data=data, headers=headers, query=query, data=data, headers=headers, query=query,
extensions={'legacy_ssl': True}), display_id, extensions={'legacy_ssl': True}), display_id,
'Downloading API JSON', 'Unable to download API JSON') 'Downloading API JSON', 'Unable to download API JSON')
@staticmethod
def _fixup_thumb(thumb_url):
if not url_or_none(thumb_url):
return None
# Core would determine_ext as 'php' from the url, so we need to provide the real ext
# See: https://github.com/yt-dlp/yt-dlp/issues/11537
return [{'url': thumb_url, 'ext': 'jpg'}]
class AfreecaTVIE(AfreecaTVBaseIE): class AfreecaTVIE(AfreecaTVBaseIE):
IE_NAME = 'afreecatv' IE_NAME = 'soop'
IE_DESC = 'afreecatv.com' IE_DESC = 'sooplive.co.kr'
_VALID_URL = r'''(?x) _VALID_URL = r'https?://vod\.(?:sooplive\.co\.kr|afreecatv\.com)/(?:PLAYER/STATION|player)/(?P<id>\d+)/?(?:$|[?#&])'
https?://
(?:
(?:(?:live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)?
(?:
/app/(?:index|read_ucc_bbs)\.cgi|
/player/[Pp]layer\.(?:swf|html)
)\?.*?\bnTitleNo=|
vod\.afreecatv\.com/(PLAYER/STATION|player)/
)
(?P<id>\d+)/?(?:$|[?#&])
'''
_TESTS = [{ _TESTS = [{
'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=', 'url': 'https://vod.sooplive.co.kr/player/96753363',
'md5': 'f72c89fe7ecc14c1b5ce506c4996046e',
'info_dict': { 'info_dict': {
'id': '36164052', 'id': '20230108_9FF5BEE1_244432674_1',
'ext': 'mp4', 'ext': 'mp4',
'title': '데일리 에이프릴 요정들의 시상식!', 'uploader_id': 'rlantnghks',
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', 'uploader': '페이즈으',
'uploader': 'dailyapril', 'duration': 10840,
'uploader_id': 'dailyapril', 'thumbnail': r're:https?://videoimg\.sooplive\.co/.kr/.+',
'upload_date': '20160503', 'upload_date': '20230108',
'timestamp': 1673218805,
'title': '젠지 페이즈',
}, },
'skip': 'Video is gone', 'params': {
}, { 'skip_download': True,
'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867',
'info_dict': {
'id': '36153164',
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$',
'uploader': 'dailyapril',
'uploader_id': 'dailyapril',
}, },
'playlist_count': 2,
'playlist': [{
'md5': 'd8b7c174568da61d774ef0203159bf97',
'info_dict': {
'id': '36153164_1',
'ext': 'mp4',
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
'upload_date': '20160502',
},
}, {
'md5': '58f2ce7f6044e34439ab2d50612ab02b',
'info_dict': {
'id': '36153164_2',
'ext': 'mp4',
'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'",
'upload_date': '20160502',
},
}],
'skip': 'Video is gone',
}, { }, {
# non standard key # non standard key
'url': 'http://vod.afreecatv.com/PLAYER/STATION/20515605', 'url': 'http://vod.sooplive.co.kr/PLAYER/STATION/20515605',
'info_dict': { 'info_dict': {
'id': '20170411_BE689A0E_190960999_1_2_h', 'id': '20170411_BE689A0E_190960999_1_2_h',
'ext': 'mp4', 'ext': 'mp4',
'title': '혼자사는여자집', 'title': '혼자사는여자집',
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', 'thumbnail': r're:https?://(?:video|st)img\.sooplive\.co\.kr/.+',
'uploader': '♥이슬이', 'uploader': '♥이슬이',
'uploader_id': 'dasl8121', 'uploader_id': 'dasl8121',
'upload_date': '20170411', 'upload_date': '20170411',
@ -142,12 +114,12 @@ class AfreecaTVIE(AfreecaTVBaseIE):
}, },
}, { }, {
# adult content # adult content
'url': 'https://vod.afreecatv.com/player/97267690', 'url': 'https://vod.sooplive.co.kr/player/97267690',
'info_dict': { 'info_dict': {
'id': '20180327_27901457_202289533_1', 'id': '20180327_27901457_202289533_1',
'ext': 'mp4', 'ext': 'mp4',
'title': '[생]빨개요♥ (part 1)', 'title': '[생]빨개요♥ (part 1)',
'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', 'thumbnail': r're:https?://(?:video|st)img\.sooplive\.co\.kr/.+',
'uploader': '[SA]서아', 'uploader': '[SA]서아',
'uploader_id': 'bjdyrksu', 'uploader_id': 'bjdyrksu',
'upload_date': '20180327', 'upload_date': '20180327',
@ -157,36 +129,17 @@ class AfreecaTVIE(AfreecaTVBaseIE):
'skip_download': True, 'skip_download': True,
}, },
'skip': 'The VOD does not exist', 'skip': 'The VOD does not exist',
}, {
'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652',
'only_matching': True,
}, {
'url': 'https://vod.afreecatv.com/player/96753363',
'info_dict': {
'id': '20230108_9FF5BEE1_244432674_1',
'ext': 'mp4',
'uploader_id': 'rlantnghks',
'uploader': '페이즈으',
'duration': 10840,
'thumbnail': r're:https?://videoimg\.afreecatv\.com/.+',
'upload_date': '20230108',
'timestamp': 1673218805,
'title': '젠지 페이즈',
},
'params': {
'skip_download': True,
},
}, { }, {
# adult content # adult content
'url': 'https://vod.afreecatv.com/player/70395877', 'url': 'https://vod.sooplive.co.kr/player/70395877',
'only_matching': True, 'only_matching': True,
}, { }, {
# subscribers only # subscribers only
'url': 'https://vod.afreecatv.com/player/104647403', 'url': 'https://vod.sooplive.co.kr/player/104647403',
'only_matching': True, 'only_matching': True,
}, { }, {
# private # private
'url': 'https://vod.afreecatv.com/player/81669846', 'url': 'https://vod.sooplive.co.kr/player/81669846',
'only_matching': True, 'only_matching': True,
}] }]
@ -209,8 +162,8 @@ class AfreecaTVIE(AfreecaTVBaseIE):
'title': ('title', {str}), 'title': ('title', {str}),
'uploader': ('writer_nick', {str}), 'uploader': ('writer_nick', {str}),
'uploader_id': ('bj_id', {str}), 'uploader_id': ('bj_id', {str}),
'duration': ('total_file_duration', {functools.partial(int_or_none, scale=1000)}), 'duration': ('total_file_duration', {int_or_none(scale=1000)}),
'thumbnail': ('thumb', {url_or_none}), 'thumbnails': ('thumb', {self._fixup_thumb}),
}) })
entries = [] entries = []
@ -233,7 +186,7 @@ class AfreecaTVIE(AfreecaTVBaseIE):
'title': f'{common_info.get("title") or "Untitled"} (part {file_num})', 'title': f'{common_info.get("title") or "Untitled"} (part {file_num})',
'formats': formats, 'formats': formats,
**traverse_obj(file_element, { **traverse_obj(file_element, {
'duration': ('duration', {functools.partial(int_or_none, scale=1000)}), 'duration': ('duration', {int_or_none(scale=1000)}),
'timestamp': ('file_start', {unified_timestamp}), 'timestamp': ('file_start', {unified_timestamp}),
}), }),
}) })
@ -262,11 +215,11 @@ class AfreecaTVIE(AfreecaTVBaseIE):
class AfreecaTVCatchStoryIE(AfreecaTVBaseIE): class AfreecaTVCatchStoryIE(AfreecaTVBaseIE):
IE_NAME = 'afreecatv:catchstory' IE_NAME = 'soop:catchstory'
IE_DESC = 'afreecatv.com catch story' IE_DESC = 'sooplive.co.kr catch story'
_VALID_URL = r'https?://vod\.afreecatv\.com/player/(?P<id>\d+)/catchstory' _VALID_URL = r'https?://vod\.(?:sooplive\.co\.kr|afreecatv\.com)/player/(?P<id>\d+)/catchstory'
_TESTS = [{ _TESTS = [{
'url': 'https://vod.afreecatv.com/player/103247/catchstory', 'url': 'https://vod.sooplive.co.kr/player/103247/catchstory',
'info_dict': { 'info_dict': {
'id': '103247', 'id': '103247',
}, },
@ -281,29 +234,28 @@ class AfreecaTVCatchStoryIE(AfreecaTVBaseIE):
return self.playlist_result(self._entries(data), video_id) return self.playlist_result(self._entries(data), video_id)
@staticmethod def _entries(self, data):
def _entries(data):
# 'files' is always a list with 1 element # 'files' is always a list with 1 element
yield from traverse_obj(data, ( yield from traverse_obj(data, (
'data', lambda _, v: v['story_type'] == 'catch', 'data', lambda _, v: v['story_type'] == 'catch',
'catch_list', lambda _, v: v['files'][0]['file'], { 'catch_list', lambda _, v: v['files'][0]['file'], {
'id': ('files', 0, 'file_info_key', {str}), 'id': ('files', 0, 'file_info_key', {str}),
'url': ('files', 0, 'file', {url_or_none}), 'url': ('files', 0, 'file', {url_or_none}),
'duration': ('files', 0, 'duration', {functools.partial(int_or_none, scale=1000)}), 'duration': ('files', 0, 'duration', {int_or_none(scale=1000)}),
'title': ('title', {str}), 'title': ('title', {str}),
'uploader': ('writer_nick', {str}), 'uploader': ('writer_nick', {str}),
'uploader_id': ('writer_id', {str}), 'uploader_id': ('writer_id', {str}),
'thumbnail': ('thumb', {url_or_none}), 'thumbnails': ('thumb', {self._fixup_thumb}),
'timestamp': ('write_timestamp', {int_or_none}), 'timestamp': ('write_timestamp', {int_or_none}),
})) }))
class AfreecaTVLiveIE(AfreecaTVBaseIE): class AfreecaTVLiveIE(AfreecaTVBaseIE):
IE_NAME = 'afreecatv:live' IE_NAME = 'soop:live'
IE_DESC = 'afreecatv.com livestreams' IE_DESC = 'sooplive.co.kr livestreams'
_VALID_URL = r'https?://play\.afreeca(?:tv)?\.com/(?P<id>[^/]+)(?:/(?P<bno>\d+))?' _VALID_URL = r'https?://play\.(?:sooplive\.co\.kr|afreecatv\.com)/(?P<id>[^/?#]+)(?:/(?P<bno>\d+))?'
_TESTS = [{ _TESTS = [{
'url': 'https://play.afreecatv.com/pyh3646/237852185', 'url': 'https://play.sooplive.co.kr/pyh3646/237852185',
'info_dict': { 'info_dict': {
'id': '237852185', 'id': '237852185',
'ext': 'mp4', 'ext': 'mp4',
@ -315,30 +267,30 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
}, },
'skip': 'Livestream has ended', 'skip': 'Livestream has ended',
}, { }, {
'url': 'https://play.afreecatv.com/pyh3646/237852185', 'url': 'https://play.sooplive.co.kr/pyh3646/237852185',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://play.afreecatv.com/pyh3646', 'url': 'https://play.sooplive.co.kr/pyh3646',
'only_matching': True, 'only_matching': True,
}] }]
_LIVE_API_URL = 'https://live.afreecatv.com/afreeca/player_live_api.php' _LIVE_API_URL = 'https://live.sooplive.co.kr/afreeca/player_live_api.php'
_WORKING_CDNS = [ _WORKING_CDNS = [
'gcp_cdn', # live-global-cdn-v02.afreecatv.com 'gcp_cdn', # live-global-cdn-v02.sooplive.co.kr
'gs_cdn_pc_app', # pc-app.stream.afreecatv.com 'gs_cdn_pc_app', # pc-app.stream.sooplive.co.kr
'gs_cdn_mobile_web', # mobile-web.stream.afreecatv.com 'gs_cdn_mobile_web', # mobile-web.stream.sooplive.co.kr
'gs_cdn_pc_web', # pc-web.stream.afreecatv.com 'gs_cdn_pc_web', # pc-web.stream.sooplive.co.kr
] ]
_BAD_CDNS = [ _BAD_CDNS = [
'gs_cdn', # chromecast.afreeca.gscdn.com (cannot resolve) 'gs_cdn', # chromecast.afreeca.gscdn.com (cannot resolve)
'gs_cdn_chromecast', # chromecast.stream.afreecatv.com (HTTP Error 400) 'gs_cdn_chromecast', # chromecast.stream.sooplive.co.kr (HTTP Error 400)
'azure_cdn', # live-global-cdn-v01.afreecatv.com (cannot resolve) 'azure_cdn', # live-global-cdn-v01.sooplive.co.kr (cannot resolve)
'aws_cf', # live-global-cdn-v03.afreecatv.com (cannot resolve) 'aws_cf', # live-global-cdn-v03.sooplive.co.kr (cannot resolve)
'kt_cdn', # kt.stream.afreecatv.com (HTTP Error 400) 'kt_cdn', # kt.stream.sooplive.co.kr (HTTP Error 400)
] ]
def _extract_formats(self, channel_info, broadcast_no, aid): def _extract_formats(self, channel_info, broadcast_no, aid):
stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.afreecatv.com' stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.sooplive.co.kr'
# If user has not passed CDN IDs, try API-provided CDN ID followed by other working CDN IDs # If user has not passed CDN IDs, try API-provided CDN ID followed by other working CDN IDs
default_cdn_ids = orderedSet([ default_cdn_ids = orderedSet([
@ -358,7 +310,7 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
try: try:
return self._extract_m3u8_formats( return self._extract_m3u8_formats(
m3u8_url, broadcast_no, 'mp4', m3u8_id='hls', query={'aid': aid}, m3u8_url, broadcast_no, 'mp4', m3u8_id='hls', query={'aid': aid},
headers={'Referer': 'https://play.afreecatv.com/'}) headers={'Referer': 'https://play.sooplive.co.kr/'})
except ExtractorError as e: except ExtractorError as e:
if attempt == len(cdn_ids): if attempt == len(cdn_ids):
raise raise
@ -374,7 +326,13 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
broadcaster_id = channel_info.get('BJID') or broadcaster_id broadcaster_id = channel_info.get('BJID') or broadcaster_id
broadcast_no = channel_info.get('BNO') or broadcast_no broadcast_no = channel_info.get('BNO') or broadcast_no
if not broadcast_no: if not broadcast_no:
raise UserNotLive(video_id=broadcaster_id) result = channel_info.get('RESULT')
if result == 0:
raise UserNotLive(video_id=broadcaster_id)
elif result == -6:
self.raise_login_required(
'This channel is streaming for subscribers only', method='password')
raise ExtractorError('Unable to extract broadcast number')
password = self.get_param('videopassword') password = self.get_param('videopassword')
if channel_info.get('BPWD') == 'Y' and password is None: if channel_info.get('BPWD') == 'Y' and password is None:
@ -403,7 +361,7 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
formats = self._extract_formats(channel_info, broadcast_no, aid) formats = self._extract_formats(channel_info, broadcast_no, aid)
station_info = traverse_obj(self._download_json( station_info = traverse_obj(self._download_json(
'https://st.afreecatv.com/api/get_station_status.php', broadcast_no, 'https://st.sooplive.co.kr/api/get_station_status.php', broadcast_no,
'Downloading channel metadata', 'Unable to download channel metadata', 'Downloading channel metadata', 'Unable to download channel metadata',
query={'szBjId': broadcaster_id}, fatal=False), {dict}) or {} query={'szBjId': broadcaster_id}, fatal=False), {dict}) or {}
@ -419,11 +377,11 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
} }
class AfreecaTVUserIE(InfoExtractor): class AfreecaTVUserIE(AfreecaTVBaseIE):
IE_NAME = 'afreecatv:user' IE_NAME = 'soop:user'
_VALID_URL = r'https?://bj\.afreeca(?:tv)?\.com/(?P<id>[^/]+)/vods/?(?P<slug_type>[^/]+)?' _VALID_URL = r'https?://ch\.(?:sooplive\.co\.kr|afreecatv\.com)/(?P<id>[^/?#]+)/vods/?(?P<slug_type>[^/?#]+)?'
_TESTS = [{ _TESTS = [{
'url': 'https://bj.afreecatv.com/ryuryu24/vods/review', 'url': 'https://ch.sooplive.co.kr/ryuryu24/vods/review',
'info_dict': { 'info_dict': {
'_type': 'playlist', '_type': 'playlist',
'id': 'ryuryu24', 'id': 'ryuryu24',
@ -431,7 +389,7 @@ class AfreecaTVUserIE(InfoExtractor):
}, },
'playlist_count': 218, 'playlist_count': 218,
}, { }, {
'url': 'https://bj.afreecatv.com/parang1995/vods/highlight', 'url': 'https://ch.sooplive.co.kr/parang1995/vods/highlight',
'info_dict': { 'info_dict': {
'_type': 'playlist', '_type': 'playlist',
'id': 'parang1995', 'id': 'parang1995',
@ -439,7 +397,7 @@ class AfreecaTVUserIE(InfoExtractor):
}, },
'playlist_count': 997, 'playlist_count': 997,
}, { }, {
'url': 'https://bj.afreecatv.com/ryuryu24/vods', 'url': 'https://ch.sooplive.co.kr/ryuryu24/vods',
'info_dict': { 'info_dict': {
'_type': 'playlist', '_type': 'playlist',
'id': 'ryuryu24', 'id': 'ryuryu24',
@ -447,7 +405,7 @@ class AfreecaTVUserIE(InfoExtractor):
}, },
'playlist_count': 221, 'playlist_count': 221,
}, { }, {
'url': 'https://bj.afreecatv.com/ryuryu24/vods/balloonclip', 'url': 'https://ch.sooplive.co.kr/ryuryu24/vods/balloonclip',
'info_dict': { 'info_dict': {
'_type': 'playlist', '_type': 'playlist',
'id': 'ryuryu24', 'id': 'ryuryu24',
@ -459,12 +417,12 @@ class AfreecaTVUserIE(InfoExtractor):
def _fetch_page(self, user_id, user_type, page): def _fetch_page(self, user_id, user_type, page):
page += 1 page += 1
info = self._download_json(f'https://bjapi.afreecatv.com/api/{user_id}/vods/{user_type}', user_id, info = self._download_json(f'https://chapi.sooplive.co.kr/api/{user_id}/vods/{user_type}', user_id,
query={'page': page, 'per_page': self._PER_PAGE, 'orderby': 'reg_date'}, query={'page': page, 'per_page': self._PER_PAGE, 'orderby': 'reg_date'},
note=f'Downloading {user_type} video page {page}') note=f'Downloading {user_type} video page {page}')
for item in info['data']: for item in info['data']:
yield self.url_result( yield self.url_result(
f'https://vod.afreecatv.com/player/{item["title_no"]}/', AfreecaTVIE, item['title_no']) f'https://vod.sooplive.co.kr/player/{item["title_no"]}/', AfreecaTVIE, item['title_no'])
def _real_extract(self, url): def _real_extract(self, url):
user_id, user_type = self._match_valid_url(url).group('id', 'slug_type') user_id, user_type = self._match_valid_url(url).group('id', 'slug_type')

View file

@ -71,7 +71,7 @@ class AllstarBaseIE(InfoExtractor):
'thumbnails': (('clipImageThumb', 'clipImageSource'), {'url': {media_url_or_none}}), 'thumbnails': (('clipImageThumb', 'clipImageSource'), {'url': {media_url_or_none}}),
'duration': ('clipLength', {int_or_none}), 'duration': ('clipLength', {int_or_none}),
'filesize': ('clipSizeBytes', {int_or_none}), 'filesize': ('clipSizeBytes', {int_or_none}),
'timestamp': ('createdDate', {functools.partial(int_or_none, scale=1000)}), 'timestamp': ('createdDate', {int_or_none(scale=1000)}),
'uploader': ('username', {str}), 'uploader': ('username', {str}),
'uploader_id': ('user', '_id', {str}), 'uploader_id': ('user', '_id', {str}),
'view_count': ('views', {int_or_none}), 'view_count': ('views', {int_or_none}),

View file

@ -8,10 +8,8 @@ import time
from .common import InfoExtractor from .common import InfoExtractor
from ..aes import aes_encrypt from ..aes import aes_encrypt
from ..utils import ( from ..utils import (
bytes_to_intlist,
determine_ext, determine_ext,
int_or_none, int_or_none,
intlist_to_bytes,
join_nonempty, join_nonempty,
smuggle_url, smuggle_url,
strip_jsonp, strip_jsonp,
@ -33,24 +31,6 @@ class AnvatoIE(InfoExtractor):
_AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce' # from anvplayer.min.js _AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce' # from anvplayer.min.js
_TESTS = [{ _TESTS = [{
# from https://www.nfl.com/videos/baker-mayfield-s-game-changing-plays-from-3-td-game-week-14
'url': 'anvato:GXvEgwyJeWem8KCYXfeoHWknwP48Mboj:899441',
'md5': '921919dab3cd0b849ff3d624831ae3e2',
'info_dict': {
'id': '899441',
'ext': 'mp4',
'title': 'Baker Mayfield\'s game-changing plays from 3-TD game Week 14',
'description': 'md5:85e05a3cc163f8c344340f220521136d',
'upload_date': '20201215',
'timestamp': 1608009755,
'thumbnail': r're:^https?://.*\.jpg',
'uploader': 'NFL',
'tags': ['Baltimore Ravens at Cleveland Browns (2020-REG-14)', 'Baker Mayfield', 'Game Highlights',
'Player Highlights', 'Cleveland Browns', 'league'],
'duration': 157,
'categories': ['Entertainment', 'Game', 'Highlights'],
},
}, {
# from https://ktla.com/news/99-year-old-woman-learns-to-fly-in-torrance-checks-off-bucket-list-dream/ # from https://ktla.com/news/99-year-old-woman-learns-to-fly-in-torrance-checks-off-bucket-list-dream/
'url': 'anvato:X8POa4zpGZMmeiq0wqiO8IP5rMqQM9VN:8032455', 'url': 'anvato:X8POa4zpGZMmeiq0wqiO8IP5rMqQM9VN:8032455',
'md5': '837718bcfb3a7778d022f857f7a9b19e', 'md5': '837718bcfb3a7778d022f857f7a9b19e',
@ -241,31 +221,6 @@ class AnvatoIE(InfoExtractor):
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582', 'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582',
} }
def _generate_nfl_token(self, anvack, mcp_id):
reroute = self._download_json(
'https://api.nfl.com/v1/reroute', mcp_id, data=b'grant_type=client_credentials',
headers={'X-Domain-Id': 100}, note='Fetching token info')
token_type = reroute.get('token_type') or 'Bearer'
auth_token = f'{token_type} {reroute["access_token"]}'
response = self._download_json(
'https://api.nfl.com/v3/shield/', mcp_id, data=json.dumps({
'query': '''{
viewer {
mediaToken(anvack: "%s", id: %s) {
token
}
}
}''' % (anvack, mcp_id), # noqa: UP031
}).encode(), headers={
'Authorization': auth_token,
'Content-Type': 'application/json',
}, note='Fetching NFL API token')
return traverse_obj(response, ('data', 'viewer', 'mediaToken', 'token'))
_TOKEN_GENERATORS = {
'GXvEgwyJeWem8KCYXfeoHWknwP48Mboj': _generate_nfl_token,
}
def _server_time(self, access_key, video_id): def _server_time(self, access_key, video_id):
return int_or_none(traverse_obj(self._download_json( return int_or_none(traverse_obj(self._download_json(
f'{self._API_BASE_URL}/server_time', video_id, query={'anvack': access_key}, f'{self._API_BASE_URL}/server_time', video_id, query={'anvack': access_key},
@ -277,8 +232,8 @@ class AnvatoIE(InfoExtractor):
server_time = self._server_time(access_key, video_id) server_time = self._server_time(access_key, video_id)
input_data = f'{server_time}~{md5_text(video_data_url)}~{md5_text(server_time)}' input_data = f'{server_time}~{md5_text(video_data_url)}~{md5_text(server_time)}'
auth_secret = intlist_to_bytes(aes_encrypt( auth_secret = bytes(aes_encrypt(
bytes_to_intlist(input_data[:64]), bytes_to_intlist(self._AUTH_KEY))) list(input_data[:64].encode()), list(self._AUTH_KEY)))
query = { query = {
'X-Anvato-Adst-Auth': base64.b64encode(auth_secret).decode('ascii'), 'X-Anvato-Adst-Auth': base64.b64encode(auth_secret).decode('ascii'),
'rtyp': 'fp', 'rtyp': 'fp',
@ -290,8 +245,6 @@ class AnvatoIE(InfoExtractor):
} }
if extracted_token is not None: if extracted_token is not None:
api['anvstk2'] = extracted_token api['anvstk2'] = extracted_token
elif self._TOKEN_GENERATORS.get(access_key) is not None:
api['anvstk2'] = self._TOKEN_GENERATORS[access_key](self, access_key, video_id)
elif self._ANVACK_TABLE.get(access_key) is not None: elif self._ANVACK_TABLE.get(access_key) is not None:
api['anvstk'] = md5_text(f'{access_key}|{anvrid}|{server_time}|{self._ANVACK_TABLE[access_key]}') api['anvstk'] = md5_text(f'{access_key}|{anvrid}|{server_time}|{self._ANVACK_TABLE[access_key]}')
else: else:

View file

@ -205,6 +205,26 @@ class ArchiveOrgIE(InfoExtractor):
}, },
}, },
], ],
}, {
# The reviewbody is None for one of the reviews; just need to extract data without crashing
'url': 'https://archive.org/details/gd95-04-02.sbd.11622.sbeok.shnf/gd95-04-02d1t04.shn',
'info_dict': {
'id': 'gd95-04-02.sbd.11622.sbeok.shnf/gd95-04-02d1t04.shn',
'ext': 'mp3',
'title': 'Stuck Inside of Mobile with the Memphis Blues Again',
'creators': ['Grateful Dead'],
'duration': 338.31,
'track': 'Stuck Inside of Mobile with the Memphis Blues Again',
'description': 'md5:764348a470b986f1217ffd38d6ac7b72',
'display_id': 'gd95-04-02d1t04.shn',
'location': 'Pyramid Arena',
'uploader': 'jon@archive.org',
'album': '1995-04-02 - Pyramid Arena',
'upload_date': '20040519',
'track_number': 4,
'release_date': '19950402',
'timestamp': 1084927901,
},
}] }]
@staticmethod @staticmethod
@ -335,7 +355,7 @@ class ArchiveOrgIE(InfoExtractor):
info['comments'].append({ info['comments'].append({
'id': review.get('review_id'), 'id': review.get('review_id'),
'author': review.get('reviewer'), 'author': review.get('reviewer'),
'text': str_or_none(review.get('reviewtitle'), '') + '\n\n' + review.get('reviewbody'), 'text': join_nonempty('reviewtitle', 'reviewbody', from_dict=review, delim='\n\n'),
'timestamp': unified_timestamp(review.get('createdate')), 'timestamp': unified_timestamp(review.get('createdate')),
'parent': 'root'}) 'parent': 'root'})

View file

@ -299,7 +299,7 @@ class ARDBetaMediathekIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '94834686', 'id': '94834686',
'ext': 'mp4', 'ext': 'mp4',
'duration': 2700, 'duration': 2670,
'episode': '7 Tage ... unter harten Jungs', 'episode': '7 Tage ... unter harten Jungs',
'description': 'md5:0f215470dcd2b02f59f4bd10c963f072', 'description': 'md5:0f215470dcd2b02f59f4bd10c963f072',
'upload_date': '20231005', 'upload_date': '20231005',
@ -307,10 +307,28 @@ class ARDBetaMediathekIE(InfoExtractor):
'display_id': 'N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3', 'display_id': 'N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3',
'series': '7 Tage ...', 'series': '7 Tage ...',
'channel': 'HR', 'channel': 'HR',
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:f6e6d5ffac41925c?w=960&ch=fa32ba69bc87989a', 'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:430c86d233afa42d?w=960&ch=fa32ba69bc87989a',
'title': '7 Tage ... unter harten Jungs', 'title': '7 Tage ... unter harten Jungs',
'_old_archive_ids': ['ardbetamediathek N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3'], '_old_archive_ids': ['ardbetamediathek N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3'],
}, },
}, {
'url': 'https://www.ardmediathek.de/video/lokalzeit-aus-duesseldorf/lokalzeit-aus-duesseldorf-oder-31-10-2024/wdr-duesseldorf/Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtOWFkMTc0ZWMtMDA5ZS00ZDEwLWFjYjctMGNmNTdhNzVmNzUz',
'info_dict': {
'id': '13847165',
'chapters': 'count:8',
'ext': 'mp4',
'channel': 'WDR',
'display_id': 'Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtOWFkMTc0ZWMtMDA5ZS00ZDEwLWFjYjctMGNmNTdhNzVmNzUz',
'episode': 'Lokalzeit aus Düsseldorf | 31.10.2024',
'series': 'Lokalzeit aus Düsseldorf',
'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:f02ec9bd9b7bd5f6?w=960&ch=612491dcd5e09b0c',
'title': 'Lokalzeit aus Düsseldorf | 31.10.2024',
'upload_date': '20241031',
'timestamp': 1730399400,
'description': 'md5:12db30b3b706314efe3778b8df1a7058',
'duration': 1759,
'_old_archive_ids': ['ardbetamediathek Y3JpZDovL3dkci5kZS9CZWl0cmFnLXNvcGhvcmEtOWFkMTc0ZWMtMDA5ZS00ZDEwLWFjYjctMGNmNTdhNzVmNzUz'],
},
}, { }, {
'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE', 'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
'only_matching': True, 'only_matching': True,
@ -455,6 +473,12 @@ class ARDBetaMediathekIE(InfoExtractor):
'subtitles': subtitles, 'subtitles': subtitles,
'is_live': is_live, 'is_live': is_live,
'age_limit': age_limit, 'age_limit': age_limit,
**traverse_obj(media_data, {
'chapters': ('pluginData', 'jumpmarks@all', 'chapterArray', lambda _, v: int_or_none(v['chapterTime']), {
'start_time': ('chapterTime', {int_or_none}),
'title': ('chapterTitle', {str}),
}),
}),
**traverse_obj(media_data, ('meta', { **traverse_obj(media_data, ('meta', {
'title': 'title', 'title': 'title',
'description': 'synopsis', 'description': 'synopsis',

View file

@ -1,4 +1,3 @@
import functools
import json import json
import random import random
import re import re
@ -10,7 +9,6 @@ from ..utils import (
ExtractorError, ExtractorError,
extract_attributes, extract_attributes,
float_or_none, float_or_none,
get_element_html_by_id,
int_or_none, int_or_none,
parse_filesize, parse_filesize,
str_or_none, str_or_none,
@ -21,7 +19,7 @@ from ..utils import (
url_or_none, url_or_none,
urljoin, urljoin,
) )
from ..utils.traversal import traverse_obj from ..utils.traversal import find_element, traverse_obj
class BandcampIE(InfoExtractor): class BandcampIE(InfoExtractor):
@ -45,6 +43,8 @@ class BandcampIE(InfoExtractor):
'uploader_url': 'https://youtube-dl.bandcamp.com', 'uploader_url': 'https://youtube-dl.bandcamp.com',
'uploader_id': 'youtube-dl', 'uploader_id': 'youtube-dl',
'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg', 'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg',
'artists': ['youtube-dl "\'/\\ä↭'],
'album_artists': ['youtube-dl "\'/\\ä↭'],
}, },
'skip': 'There is a limit of 200 free downloads / month for the test song', 'skip': 'There is a limit of 200 free downloads / month for the test song',
}, { }, {
@ -271,6 +271,18 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
'timestamp': 1311756226, 'timestamp': 1311756226,
'upload_date': '20110727', 'upload_date': '20110727',
'uploader': 'Blazo', 'uploader': 'Blazo',
'thumbnail': 'https://f4.bcbits.com/img/a1721150828_5.jpg',
'album_artists': ['Blazo'],
'uploader_url': 'https://blazo.bandcamp.com',
'release_date': '20110727',
'release_timestamp': 1311724800.0,
'track': 'Intro',
'uploader_id': 'blazo',
'track_number': 1,
'album': 'Jazz Format Mixtape vol.1',
'artists': ['Blazo'],
'duration': 19.335,
'track_id': '1353101989',
}, },
}, },
{ {
@ -282,6 +294,18 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
'timestamp': 1311757238, 'timestamp': 1311757238,
'upload_date': '20110727', 'upload_date': '20110727',
'uploader': 'Blazo', 'uploader': 'Blazo',
'track': 'Kero One - Keep It Alive (Blazo remix)',
'release_date': '20110727',
'track_id': '38097443',
'track_number': 2,
'duration': 181.467,
'uploader_url': 'https://blazo.bandcamp.com',
'album': 'Jazz Format Mixtape vol.1',
'uploader_id': 'blazo',
'album_artists': ['Blazo'],
'artists': ['Blazo'],
'thumbnail': 'https://f4.bcbits.com/img/a1721150828_5.jpg',
'release_timestamp': 1311724800.0,
}, },
}, },
], ],
@ -289,6 +313,7 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
'title': 'Jazz Format Mixtape vol.1', 'title': 'Jazz Format Mixtape vol.1',
'id': 'jazz-format-mixtape-vol-1', 'id': 'jazz-format-mixtape-vol-1',
'uploader_id': 'blazo', 'uploader_id': 'blazo',
'description': 'md5:38052a93217f3ffdc033cd5dbbce2989',
}, },
'params': { 'params': {
'playlistend': 2, 'playlistend': 2,
@ -363,10 +388,10 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://bandcamp.com/?show=224', 'url': 'https://bandcamp.com/?show=224',
'md5': 'b00df799c733cf7e0c567ed187dea0fd', 'md5': '61acc9a002bed93986b91168aa3ab433',
'info_dict': { 'info_dict': {
'id': '224', 'id': '224',
'ext': 'opus', 'ext': 'mp3',
'title': 'BC Weekly April 4th 2017 - Magic Moments', 'title': 'BC Weekly April 4th 2017 - Magic Moments',
'description': 'md5:5d48150916e8e02d030623a48512c874', 'description': 'md5:5d48150916e8e02d030623a48512c874',
'duration': 5829.77, 'duration': 5829.77,
@ -376,7 +401,7 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE
'episode_id': '224', 'episode_id': '224',
}, },
'params': { 'params': {
'format': 'opus-lo', 'format': 'mp3-128',
}, },
}, { }, {
'url': 'https://bandcamp.com/?blah/blah@&show=228', 'url': 'https://bandcamp.com/?blah/blah@&show=228',
@ -484,7 +509,7 @@ class BandcampUserIE(InfoExtractor):
or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage)) or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage))
yield from traverse_obj(webpage, ( yield from traverse_obj(webpage, (
{functools.partial(get_element_html_by_id, 'music-grid')}, {extract_attributes}, {find_element(id='music-grid', html=True)}, {extract_attributes},
'data-client-items', {json.loads}, ..., 'page_url', {str})) 'data-client-items', {json.loads}, ..., 'page_url', {str}))
def _real_extract(self, url): def _real_extract(self, url):
@ -493,4 +518,4 @@ class BandcampUserIE(InfoExtractor):
return self.playlist_from_matches( return self.playlist_from_matches(
self._yield_items(webpage), uploader, f'Discography of {uploader}', self._yield_items(webpage), uploader, f'Discography of {uploader}',
getter=functools.partial(urljoin, url)) getter=urljoin(url))

437
yt_dlp/extractor/bandlab.py Normal file
View file

@ -0,0 +1,437 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
float_or_none,
format_field,
int_or_none,
parse_iso8601,
parse_qs,
truncate_string,
url_or_none,
)
from ..utils.traversal import traverse_obj, value
class BandlabBaseIE(InfoExtractor):
def _call_api(self, endpoint, asset_id, **kwargs):
headers = kwargs.pop('headers', None) or {}
return self._download_json(
f'https://www.bandlab.com/api/v1.3/{endpoint}/{asset_id}',
asset_id, headers={
'accept': 'application/json',
'referer': 'https://www.bandlab.com/',
'x-client-id': 'BandLab-Web',
'x-client-version': '10.1.124',
**headers,
}, **kwargs)
def _parse_revision(self, revision_data, url=None):
return {
'vcodec': 'none',
'media_type': 'revision',
'extractor_key': BandlabIE.ie_key(),
'extractor': BandlabIE.IE_NAME,
**traverse_obj(revision_data, {
'webpage_url': (
'id', ({value(url)}, {format_field(template='https://www.bandlab.com/revision/%s')}), filter, any),
'id': (('revisionId', 'id'), {str}, any),
'title': ('song', 'name', {str}),
'track': ('song', 'name', {str}),
'url': ('mixdown', 'file', {url_or_none}),
'thumbnail': ('song', 'picture', 'url', {url_or_none}),
'description': ('description', {str}),
'uploader': ('creator', 'name', {str}),
'uploader_id': ('creator', 'username', {str}),
'timestamp': ('createdOn', {parse_iso8601}),
'duration': ('mixdown', 'duration', {float_or_none}),
'view_count': ('counters', 'plays', {int_or_none}),
'like_count': ('counters', 'likes', {int_or_none}),
'comment_count': ('counters', 'comments', {int_or_none}),
'genres': ('genres', ..., 'name', {str}),
}),
}
def _parse_track(self, track_data, url=None):
return {
'vcodec': 'none',
'media_type': 'track',
'extractor_key': BandlabIE.ie_key(),
'extractor': BandlabIE.IE_NAME,
**traverse_obj(track_data, {
'webpage_url': (
'id', ({value(url)}, {format_field(template='https://www.bandlab.com/post/%s')}), filter, any),
'id': (('revisionId', 'id'), {str}, any),
'url': ('track', 'sample', 'audioUrl', {url_or_none}),
'title': ('track', 'name', {str}),
'track': ('track', 'name', {str}),
'description': ('caption', {str}),
'thumbnail': ('track', 'picture', ('original', 'url'), {url_or_none}, any),
'view_count': ('counters', 'plays', {int_or_none}),
'like_count': ('counters', 'likes', {int_or_none}),
'comment_count': ('counters', 'comments', {int_or_none}),
'duration': ('track', 'sample', 'duration', {float_or_none}),
'uploader': ('creator', 'name', {str}),
'uploader_id': ('creator', 'username', {str}),
'timestamp': ('createdOn', {parse_iso8601}),
}),
}
def _parse_video(self, video_data, url=None):
return {
'media_type': 'video',
'extractor_key': BandlabIE.ie_key(),
'extractor': BandlabIE.IE_NAME,
**traverse_obj(video_data, {
'id': ('id', {str}),
'webpage_url': (
'id', ({value(url)}, {format_field(template='https://www.bandlab.com/post/%s')}), filter, any),
'url': ('video', 'url', {url_or_none}),
'title': ('caption', {lambda x: x.replace('\n', ' ')}, {truncate_string(left=50)}),
'description': ('caption', {str}),
'thumbnail': ('video', 'picture', 'url', {url_or_none}),
'view_count': ('video', 'counters', 'plays', {int_or_none}),
'like_count': ('video', 'counters', 'likes', {int_or_none}),
'comment_count': ('counters', 'comments', {int_or_none}),
'duration': ('video', 'duration', {float_or_none}),
'uploader': ('creator', 'name', {str}),
'uploader_id': ('creator', 'username', {str}),
}),
}
class BandlabIE(BandlabBaseIE):
_VALID_URL = [
r'https?://(?:www\.)?bandlab.com/(?P<url_type>track|post|revision)/(?P<id>[\da-f_-]+)',
r'https?://(?:www\.)?bandlab.com/(?P<url_type>embed)/\?(?:[^#]*&)?id=(?P<id>[\da-f-]+)',
]
_EMBED_REGEX = [rf'<iframe[^>]+src=[\'"](?P<url>{_VALID_URL[1]})[\'"]']
_TESTS = [{
'url': 'https://www.bandlab.com/track/04b37e88dba24967b9dac8eb8567ff39_07d7f906fc96ee11b75e000d3a428fff',
'md5': '46f7b43367dd268bbcf0bbe466753b2c',
'info_dict': {
'id': '02d7f906-fc96-ee11-b75e-000d3a428fff',
'ext': 'm4a',
'uploader_id': 'ender_milze',
'track': 'sweet black',
'description': 'composed by juanjn3737',
'timestamp': 1702171963,
'view_count': int,
'like_count': int,
'duration': 54.629999999999995,
'title': 'sweet black',
'upload_date': '20231210',
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/fa082beb-b856-4730-9170-a57e4e32cc2c/',
'genres': ['Lofi'],
'uploader': 'ender milze',
'comment_count': int,
'media_type': 'revision',
},
}, {
# Same track as above but post URL
'url': 'https://www.bandlab.com/post/07d7f906-fc96-ee11-b75e-000d3a428fff',
'md5': '46f7b43367dd268bbcf0bbe466753b2c',
'info_dict': {
'id': '02d7f906-fc96-ee11-b75e-000d3a428fff',
'ext': 'm4a',
'uploader_id': 'ender_milze',
'track': 'sweet black',
'description': 'composed by juanjn3737',
'timestamp': 1702171973,
'view_count': int,
'like_count': int,
'duration': 54.629999999999995,
'title': 'sweet black',
'upload_date': '20231210',
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/fa082beb-b856-4730-9170-a57e4e32cc2c/',
'genres': ['Lofi'],
'uploader': 'ender milze',
'comment_count': int,
'media_type': 'revision',
},
}, {
# SharedKey Example
'url': 'https://www.bandlab.com/track/048916c2-c6da-ee11-85f9-6045bd2e11f9?sharedKey=0NNWX8qYAEmI38lWAzCNDA',
'md5': '15174b57c44440e2a2008be9cae00250',
'info_dict': {
'id': '038916c2-c6da-ee11-85f9-6045bd2e11f9',
'ext': 'm4a',
'comment_count': int,
'genres': ['Other'],
'uploader_id': 'user8353034818103753',
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/51b18363-da23-4b9b-a29c-2933a3e561ca/',
'timestamp': 1709625771,
'track': 'PodcastMaerchen4b',
'duration': 468.14,
'view_count': int,
'description': 'Podcast: Neues aus der Märchenwelt',
'like_count': int,
'upload_date': '20240305',
'uploader': 'Erna Wageneder',
'title': 'PodcastMaerchen4b',
'media_type': 'revision',
},
}, {
# Different Revision selected
'url': 'https://www.bandlab.com/track/130343fc-148b-ea11-96d2-0003ffd1fc09?revId=110343fc-148b-ea11-96d2-0003ffd1fc09',
'md5': '74e055ef9325d63f37088772fbfe4454',
'info_dict': {
'id': '110343fc-148b-ea11-96d2-0003ffd1fc09',
'ext': 'm4a',
'timestamp': 1588273294,
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/users/b612e533-e4f7-4542-9f50-3fcfd8dd822c/',
'description': 'Final Revision.',
'title': 'Replay ( Instrumental)',
'uploader': 'David R Sparks',
'uploader_id': 'davesnothome69',
'view_count': int,
'comment_count': int,
'track': 'Replay ( Instrumental)',
'genres': ['Rock'],
'upload_date': '20200430',
'like_count': int,
'duration': 279.43,
'media_type': 'revision',
},
}, {
# Video
'url': 'https://www.bandlab.com/post/5cdf9036-3857-ef11-991a-6045bd36e0d9',
'md5': '8caa2ef28e86c1dacf167293cfdbeba9',
'info_dict': {
'id': '5cdf9036-3857-ef11-991a-6045bd36e0d9',
'ext': 'mp4',
'duration': 44.705,
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/videos/67c6cef1-cef6-40d3-831e-a55bc1dcb972/',
'comment_count': int,
'title': 'backing vocals',
'uploader_id': 'marliashya',
'uploader': 'auraa',
'like_count': int,
'description': 'backing vocals',
'media_type': 'video',
},
}, {
# Embed Example
'url': 'https://www.bandlab.com/embed/?blur=false&id=014de0a4-7d82-ea11-a94c-0003ffd19c0f',
'md5': 'a4ad05cb68c54faaed9b0a8453a8cf4a',
'info_dict': {
'id': '014de0a4-7d82-ea11-a94c-0003ffd19c0f',
'ext': 'm4a',
'comment_count': int,
'genres': ['Electronic'],
'uploader': 'Charlie Henson',
'timestamp': 1587328674,
'upload_date': '20200419',
'view_count': int,
'track': 'Positronic Meltdown',
'duration': 318.55,
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/87165bc3-5439-496e-b1f7-a9f13b541ff2/',
'description': 'Checkout my tracks at AOMX http://aomxsounds.com/',
'uploader_id': 'microfreaks',
'title': 'Positronic Meltdown',
'like_count': int,
'media_type': 'revision',
},
}, {
# Track without revisions available
'url': 'https://www.bandlab.com/track/55767ac51789ea11a94c0003ffd1fc09_2f007b0a37b94ec7a69bc25ae15108a5',
'md5': 'f05d68a3769952c2d9257c473e14c15f',
'info_dict': {
'id': '55767ac51789ea11a94c0003ffd1fc09_2f007b0a37b94ec7a69bc25ae15108a5',
'ext': 'm4a',
'track': 'insame',
'like_count': int,
'duration': 84.03,
'title': 'insame',
'view_count': int,
'comment_count': int,
'uploader': 'Sorakime',
'uploader_id': 'sorakime',
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/users/572a351a-0f3a-4c6a-ac39-1a5defdeeb1c/',
'timestamp': 1691162128,
'upload_date': '20230804',
'media_type': 'track',
},
}, {
'url': 'https://www.bandlab.com/revision/014de0a4-7d82-ea11-a94c-0003ffd19c0f',
'only_matching': True,
}]
_WEBPAGE_TESTS = [{
'url': 'https://phantomluigi.github.io/',
'info_dict': {
'id': 'e14223c3-7871-ef11-bdfd-000d3a980db3',
'ext': 'm4a',
'view_count': int,
'upload_date': '20240913',
'uploader_id': 'phantommusicofficial',
'timestamp': 1726194897,
'uploader': 'Phantom',
'comment_count': int,
'genres': ['Progresive Rock'],
'description': 'md5:a38cd668f7a2843295ef284114f18429',
'duration': 225.23,
'like_count': int,
'title': 'Vermilion Pt. 2 (Cover)',
'track': 'Vermilion Pt. 2 (Cover)',
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/songs/62b10750-7aef-4f42-ad08-1af52f577e97/',
'media_type': 'revision',
},
}]
def _real_extract(self, url):
display_id, url_type = self._match_valid_url(url).group('id', 'url_type')
qs = parse_qs(url)
revision_id = traverse_obj(qs, (('revId', 'id'), 0, any))
if url_type == 'revision':
revision_id = display_id
revision_data = None
if not revision_id:
post_data = self._call_api(
'posts', display_id, note='Downloading post data',
query=traverse_obj(qs, {'sharedKey': ('sharedKey', 0)}))
revision_id = traverse_obj(post_data, (('revisionId', ('revision', 'id')), {str}, any))
revision_data = traverse_obj(post_data, ('revision', {dict}))
if not revision_data and not revision_id:
post_type = post_data.get('type')
if post_type == 'Video':
return self._parse_video(post_data, url=url)
if post_type == 'Track':
return self._parse_track(post_data, url=url)
raise ExtractorError(f'Could not extract data for post type {post_type!r}')
if not revision_data:
revision_data = self._call_api(
'revisions', revision_id, note='Downloading revision data', query={'edit': 'false'})
return self._parse_revision(revision_data, url=url)
class BandlabPlaylistIE(BandlabBaseIE):
_VALID_URL = [
r'https?://(?:www\.)?bandlab.com/(?:[\w]+/)?(?P<type>albums|collections)/(?P<id>[\da-f-]+)',
r'https?://(?:www\.)?bandlab.com/(?P<type>embed)/collection/\?(?:[^#]*&)?id=(?P<id>[\da-f-]+)',
]
_EMBED_REGEX = [rf'<iframe[^>]+src=[\'"](?P<url>{_VALID_URL[1]})[\'"]']
_TESTS = [{
'url': 'https://www.bandlab.com/davesnothome69/albums/89b79ea6-de42-ed11-b495-00224845aac7',
'info_dict': {
'thumbnail': 'https://bl-prod-images.azureedge.net/v1.3/albums/69507ff3-579a-45be-afca-9e87eddec944/',
'release_date': '20221003',
'title': 'Remnants',
'album': 'Remnants',
'like_count': int,
'album_type': 'LP',
'description': 'A collection of some feel good, rock hits.',
'comment_count': int,
'view_count': int,
'id': '89b79ea6-de42-ed11-b495-00224845aac7',
'uploader': 'David R Sparks',
'uploader_id': 'davesnothome69',
},
'playlist_count': 10,
}, {
'url': 'https://www.bandlab.com/slytheband/collections/955102d4-1040-ef11-86c3-000d3a42581b',
'info_dict': {
'id': '955102d4-1040-ef11-86c3-000d3a42581b',
'timestamp': 1720762659,
'view_count': int,
'title': 'My Shit 🖤',
'uploader_id': 'slytheband',
'uploader': '𝓢𝓛𝓨',
'upload_date': '20240712',
'like_count': int,
'thumbnail': 'https://bandlabimages.azureedge.net/v1.0/collections/2c64ca12-b180-4b76-8587-7a8da76bddc8/',
},
'playlist_count': 15,
}, {
# Embeds can contain both albums and collections with the same URL pattern. This is an album
'url': 'https://www.bandlab.com/embed/collection/?id=12cc6f7f-951b-ee11-907c-00224844f303',
'info_dict': {
'id': '12cc6f7f-951b-ee11-907c-00224844f303',
'release_date': '20230706',
'description': 'This is a collection of songs I created when I had an Amiga computer.',
'view_count': int,
'title': 'Mark Salud The Amiga Collection',
'uploader_id': 'mssirmooth1962',
'comment_count': int,
'thumbnail': 'https://bl-prod-images.azureedge.net/v1.3/albums/d618bd7b-0537-40d5-bdd8-61b066e77d59/',
'like_count': int,
'uploader': 'Mark Salud',
'album': 'Mark Salud The Amiga Collection',
'album_type': 'LP',
},
'playlist_count': 24,
}, {
# Tracks without revision id
'url': 'https://www.bandlab.com/embed/collection/?id=e98aafb5-d932-ee11-b8f0-00224844c719',
'info_dict': {
'like_count': int,
'uploader_id': 'sorakime',
'comment_count': int,
'uploader': 'Sorakime',
'view_count': int,
'description': 'md5:4ec31c568a5f5a5a2b17572ea64c3825',
'release_date': '20230812',
'title': 'Art',
'album': 'Art',
'album_type': 'Album',
'id': 'e98aafb5-d932-ee11-b8f0-00224844c719',
'thumbnail': 'https://bl-prod-images.azureedge.net/v1.3/albums/20c890de-e94a-4422-828a-2da6377a13c8/',
},
'playlist_count': 13,
}, {
'url': 'https://www.bandlab.com/albums/89b79ea6-de42-ed11-b495-00224845aac7',
'only_matching': True,
}]
def _entries(self, album_data):
for post in traverse_obj(album_data, ('posts', lambda _, v: v['type'])):
post_type = post['type']
if post_type == 'Revision':
yield self._parse_revision(post.get('revision'))
elif post_type == 'Track':
yield self._parse_track(post)
elif post_type == 'Video':
yield self._parse_video(post)
else:
self.report_warning(f'Skipping unknown post type: "{post_type}"')
def _real_extract(self, url):
playlist_id, playlist_type = self._match_valid_url(url).group('id', 'type')
endpoints = {
'albums': ['albums'],
'collections': ['collections'],
'embed': ['collections', 'albums'],
}.get(playlist_type)
for endpoint in endpoints:
playlist_data = self._call_api(
endpoint, playlist_id, note=f'Downloading {endpoint[:-1]} data',
fatal=False, expected_status=404)
if not playlist_data.get('errorCode'):
playlist_type = endpoint
break
if error_code := playlist_data.get('errorCode'):
raise ExtractorError(f'Could not find playlist data. Error code: "{error_code}"')
return self.playlist_result(
self._entries(playlist_data), playlist_id,
**traverse_obj(playlist_data, {
'title': ('name', {str}),
'description': ('description', {str}),
'uploader': ('creator', 'name', {str}),
'uploader_id': ('creator', 'username', {str}),
'timestamp': ('createdOn', {parse_iso8601}),
'release_date': ('releaseDate', {lambda x: x.replace('-', '')}, filter),
'thumbnail': ('picture', ('original', 'url'), {url_or_none}, any),
'like_count': ('counters', 'likes', {int_or_none}),
'comment_count': ('counters', 'comments', {int_or_none}),
'view_count': ('counters', 'plays', {int_or_none}),
}),
**(traverse_obj(playlist_data, {
'album': ('name', {str}),
'album_type': ('type', {str}),
}) if playlist_type == 'albums' else {}))

View file

@ -1284,9 +1284,9 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
**traverse_obj(model, { **traverse_obj(model, {
'title': ('title', {str}), 'title': ('title', {str}),
'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}), 'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
'description': ('synopses', ('long', 'medium', 'short'), {str}, {lambda x: x or None}, any), 'description': ('synopses', ('long', 'medium', 'short'), {str}, filter, any),
'duration': ('versions', 0, 'duration', {int}), 'duration': ('versions', 0, 'duration', {int}),
'timestamp': ('versions', 0, 'availableFrom', {functools.partial(int_or_none, scale=1000)}), 'timestamp': ('versions', 0, 'availableFrom', {int_or_none(scale=1000)}),
}), }),
} }
@ -1386,7 +1386,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
formats = traverse_obj(media_data, ('playlist', lambda _, v: url_or_none(v['url']), { formats = traverse_obj(media_data, ('playlist', lambda _, v: url_or_none(v['url']), {
'url': ('url', {url_or_none}), 'url': ('url', {url_or_none}),
'ext': ('format', {str}), 'ext': ('format', {str}),
'tbr': ('bitrate', {functools.partial(int_or_none, scale=1000)}), 'tbr': ('bitrate', {int_or_none(scale=1000)}),
})) }))
if formats: if formats:
entry = { entry = {
@ -1398,7 +1398,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
'title': ('title', {str}), 'title': ('title', {str}),
'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}), 'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
'description': ('synopses', ('long', 'medium', 'short'), {str}, any), 'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
'timestamp': ('firstPublished', {functools.partial(int_or_none, scale=1000)}), 'timestamp': ('firstPublished', {int_or_none(scale=1000)}),
}), }),
} }
done = True done = True
@ -1428,7 +1428,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
if not entry.get('timestamp'): if not entry.get('timestamp'):
entry['timestamp'] = traverse_obj(next_data, ( entry['timestamp'] = traverse_obj(next_data, (
..., 'contents', is_type('timestamp'), 'model', ..., 'contents', is_type('timestamp'), 'model',
'timestamp', {functools.partial(int_or_none, scale=1000)}, any)) 'timestamp', {int_or_none(scale=1000)}, any))
entries.append(entry) entries.append(entry)
return self.playlist_result( return self.playlist_result(
entries, playlist_id, playlist_title, playlist_description) entries, playlist_id, playlist_title, playlist_description)

View file

@ -1,18 +1,33 @@
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import extract_attributes from ..utils import ExtractorError, extract_attributes
class BFMTVBaseIE(InfoExtractor): class BFMTVBaseIE(InfoExtractor):
_VALID_URL_BASE = r'https?://(?:www\.|rmc\.)?bfmtv\.com/' _VALID_URL_BASE = r'https?://(?:www\.|rmc\.)?bfmtv\.com/'
_VALID_URL_TMPL = _VALID_URL_BASE + r'(?:[^/]+/)*[^/?&#]+_%s[A-Z]-(?P<id>\d{12})\.html' _VALID_URL_TMPL = _VALID_URL_BASE + r'(?:[^/]+/)*[^/?&#]+_%s[A-Z]-(?P<id>\d{12})\.html'
_VIDEO_BLOCK_REGEX = r'(<div[^>]+class="video_block[^"]*"[^>]*>)' _VIDEO_BLOCK_REGEX = r'(<div[^>]+class="video_block[^"]*"[^>]*>.*?</div>)'
_VIDEO_ELEMENT_REGEX = r'(<video-js[^>]+>)'
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s' BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
def _brightcove_url_result(self, video_id, video_block): def _extract_video(self, video_block):
account_id = video_block.get('accountid') or '876450612001' video_element = self._search_regex(
player_id = video_block.get('playerid') or 'I2qBTln4u' self._VIDEO_ELEMENT_REGEX, video_block, 'video element', default=None)
if video_element:
video_element_attrs = extract_attributes(video_element)
video_id = video_element_attrs.get('data-video-id')
if not video_id:
return
account_id = video_element_attrs.get('data-account') or '876450610001'
player_id = video_element_attrs.get('adjustplayer') or '19dszYXgm'
else:
video_block_attrs = extract_attributes(video_block)
video_id = video_block_attrs.get('videoid')
if not video_id:
return
account_id = video_block_attrs.get('accountid') or '876630703001'
player_id = video_block_attrs.get('playerid') or 'KbPwEbuHx'
return self.url_result( return self.url_result(
self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id), self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id),
'BrightcoveNew', video_id) 'BrightcoveNew', video_id)
@ -40,23 +55,25 @@ class BFMTVIE(BFMTVBaseIE):
def _real_extract(self, url): def _real_extract(self, url):
bfmtv_id = self._match_id(url) bfmtv_id = self._match_id(url)
webpage = self._download_webpage(url, bfmtv_id) webpage = self._download_webpage(url, bfmtv_id)
video_block = extract_attributes(self._search_regex( video = self._extract_video(self._search_regex(
self._VIDEO_BLOCK_REGEX, webpage, 'video block')) self._VIDEO_BLOCK_REGEX, webpage, 'video block'))
return self._brightcove_url_result(video_block['videoid'], video_block) if not video:
raise ExtractorError('Failed to extract video')
return video
class BFMTVLiveIE(BFMTVIE): # XXX: Do not subclass from concrete IE class BFMTVLiveIE(BFMTVBaseIE):
IE_NAME = 'bfmtv:live' IE_NAME = 'bfmtv:live'
_VALID_URL = BFMTVBaseIE._VALID_URL_BASE + '(?P<id>(?:[^/]+/)?en-direct)' _VALID_URL = BFMTVBaseIE._VALID_URL_BASE + '(?P<id>(?:[^/]+/)?en-direct)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.bfmtv.com/en-direct/', 'url': 'https://www.bfmtv.com/en-direct/',
'info_dict': { 'info_dict': {
'id': '5615950982001', 'id': '6346069778112',
'ext': 'mp4', 'ext': 'mp4',
'title': r're:^le direct BFMTV WEB \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', 'title': r're:^Le Live BFM TV \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'uploader_id': '876450610001', 'uploader_id': '876450610001',
'upload_date': '20220926', 'upload_date': '20240202',
'timestamp': 1664207191, 'timestamp': 1706887572,
'live_status': 'is_live', 'live_status': 'is_live',
'thumbnail': r're:https://.+/image\.jpg', 'thumbnail': r're:https://.+/image\.jpg',
'tags': [], 'tags': [],
@ -69,6 +86,15 @@ class BFMTVLiveIE(BFMTVIE): # XXX: Do not subclass from concrete IE
'only_matching': True, 'only_matching': True,
}] }]
def _real_extract(self, url):
bfmtv_id = self._match_id(url)
webpage = self._download_webpage(url, bfmtv_id)
video = self._extract_video(self._search_regex(
self._VIDEO_BLOCK_REGEX, webpage, 'video block'))
if not video:
raise ExtractorError('Failed to extract video')
return video
class BFMTVArticleIE(BFMTVBaseIE): class BFMTVArticleIE(BFMTVBaseIE):
IE_NAME = 'bfmtv:article' IE_NAME = 'bfmtv:article'
@ -102,18 +128,16 @@ class BFMTVArticleIE(BFMTVBaseIE):
}, },
}] }]
def _entries(self, webpage):
for video_block_el in re.findall(self._VIDEO_BLOCK_REGEX, webpage):
video = self._extract_video(video_block_el)
if video:
yield video
def _real_extract(self, url): def _real_extract(self, url):
bfmtv_id = self._match_id(url) bfmtv_id = self._match_id(url)
webpage = self._download_webpage(url, bfmtv_id) webpage = self._download_webpage(url, bfmtv_id)
entries = []
for video_block_el in re.findall(self._VIDEO_BLOCK_REGEX, webpage):
video_block = extract_attributes(video_block_el)
video_id = video_block.get('videoid')
if not video_id:
continue
entries.append(self._brightcove_url_result(video_id, video_block))
return self.playlist_result( return self.playlist_result(
entries, bfmtv_id, self._og_search_title(webpage, fatal=False), self._entries(webpage), bfmtv_id, self._og_search_title(webpage, fatal=False),
self._html_search_meta(['og:description', 'description'], webpage)) self._html_search_meta(['og:description', 'description'], webpage))

View file

@ -1,4 +1,3 @@
import functools
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -50,7 +49,7 @@ class BibelTVBaseIE(InfoExtractor):
**traverse_obj(data, { **traverse_obj(data, {
'title': 'title', 'title': 'title',
'description': 'description', 'description': 'description',
'duration': ('duration', {functools.partial(int_or_none, scale=1000)}), 'duration': ('duration', {int_or_none(scale=1000)}),
'timestamp': ('schedulingStart', {parse_iso8601}), 'timestamp': ('schedulingStart', {parse_iso8601}),
'season_number': 'seasonNumber', 'season_number': 'seasonNumber',
'episode_number': 'episodeNumber', 'episode_number': 'episodeNumber',

View file

@ -109,7 +109,7 @@ class BilibiliBaseIE(InfoExtractor):
fragments = traverse_obj(play_info, ('durl', lambda _, v: url_or_none(v['url']), { fragments = traverse_obj(play_info, ('durl', lambda _, v: url_or_none(v['url']), {
'url': ('url', {url_or_none}), 'url': ('url', {url_or_none}),
'duration': ('length', {functools.partial(float_or_none, scale=1000)}), 'duration': ('length', {float_or_none(scale=1000)}),
'filesize': ('size', {int_or_none}), 'filesize': ('size', {int_or_none}),
})) }))
if fragments: if fragments:
@ -124,7 +124,7 @@ class BilibiliBaseIE(InfoExtractor):
'quality': ('quality', {int_or_none}), 'quality': ('quality', {int_or_none}),
'format_id': ('quality', {str_or_none}), 'format_id': ('quality', {str_or_none}),
'format_note': ('quality', {lambda x: format_names.get(x)}), 'format_note': ('quality', {lambda x: format_names.get(x)}),
'duration': ('timelength', {functools.partial(float_or_none, scale=1000)}), 'duration': ('timelength', {float_or_none(scale=1000)}),
}), }),
**parse_resolution(format_names.get(play_info.get('quality'))), **parse_resolution(format_names.get(play_info.get('quality'))),
}) })
@ -1585,7 +1585,7 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
'title': ('title', {str}), 'title': ('title', {str}),
'uploader': ('upper', 'name', {str}), 'uploader': ('upper', 'name', {str}),
'uploader_id': ('upper', 'mid', {str_or_none}), 'uploader_id': ('upper', 'mid', {str_or_none}),
'timestamp': ('ctime', {int_or_none}, {lambda x: x or None}), 'timestamp': ('ctime', {int_or_none}, filter),
'thumbnail': ('cover', {url_or_none}), 'thumbnail': ('cover', {url_or_none}),
})), })),
} }

388
yt_dlp/extractor/bluesky.py Normal file
View file

@ -0,0 +1,388 @@
from .common import InfoExtractor
from ..utils import (
ExtractorError,
format_field,
int_or_none,
mimetype2ext,
orderedSet,
parse_iso8601,
truncate_string,
update_url_query,
url_basename,
url_or_none,
variadic,
)
from ..utils.traversal import traverse_obj
class BlueskyIE(InfoExtractor):
_VALID_URL = [
r'https?://(?:www\.)?(?:bsky\.app|main\.bsky\.dev)/profile/(?P<handle>[\w.:%-]+)/post/(?P<id>\w+)',
r'at://(?P<handle>[\w.:%-]+)/app\.bsky\.feed\.post/(?P<id>\w+)',
]
_TESTS = [{
'url': 'https://bsky.app/profile/blu3blue.bsky.social/post/3l4omssdl632g',
'md5': '375539c1930ab05d15585ed772ab54fd',
'info_dict': {
'id': '3l4omssdl632g',
'ext': 'mp4',
'uploader': 'Blu3Blu3Lilith',
'uploader_id': 'blu3blue.bsky.social',
'uploader_url': 'https://bsky.app/profile/blu3blue.bsky.social',
'channel_id': 'did:plc:pzdr5ylumf7vmvwasrpr5bf2',
'channel_url': 'https://bsky.app/profile/did:plc:pzdr5ylumf7vmvwasrpr5bf2',
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
'title': 'OMG WE HAVE VIDEOS NOW',
'description': 'OMG WE HAVE VIDEOS NOW',
'upload_date': '20240921',
'timestamp': 1726940605,
'like_count': int,
'repost_count': int,
'comment_count': int,
'tags': [],
},
}, {
'url': 'https://bsky.app/profile/bsky.app/post/3l3vgf77uco2g',
'md5': 'b9e344fdbce9f2852c668a97efefb105',
'info_dict': {
'id': '3l3vgf77uco2g',
'ext': 'mp4',
'uploader': 'Bluesky',
'uploader_id': 'bsky.app',
'uploader_url': 'https://bsky.app/profile/bsky.app',
'channel_id': 'did:plc:z72i7hdynmk6r22z27h6tvur',
'channel_url': 'https://bsky.app/profile/did:plc:z72i7hdynmk6r22z27h6tvur',
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
'title': 'Bluesky now has video! Update your app to versi...',
'alt_title': 'Bluesky video feature announcement',
'description': r're:(?s)Bluesky now has video! .{239}',
'upload_date': '20240911',
'timestamp': 1726074716,
'like_count': int,
'repost_count': int,
'comment_count': int,
'tags': [],
'subtitles': {
'en': 'mincount:1',
},
},
}, {
'url': 'https://main.bsky.dev/profile/souris.moe/post/3l4qhp7bcs52c',
'md5': '5f2df8c200b5633eb7fb2c984d29772f',
'info_dict': {
'id': '3l4qhp7bcs52c',
'ext': 'mp4',
'uploader': 'souris',
'uploader_id': 'souris.moe',
'uploader_url': 'https://bsky.app/profile/souris.moe',
'channel_id': 'did:plc:tj7g244gl5v6ai6cm4f4wlqp',
'channel_url': 'https://bsky.app/profile/did:plc:tj7g244gl5v6ai6cm4f4wlqp',
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
'title': 'Bluesky video #3l4qhp7bcs52c',
'upload_date': '20240922',
'timestamp': 1727003838,
'like_count': int,
'repost_count': int,
'comment_count': int,
'tags': [],
},
}, {
'url': 'https://bsky.app/profile/de1.pds.tentacle.expert/post/3l3w4tnezek2e',
'md5': '1af9c7fda061cf7593bbffca89e43d1c',
'info_dict': {
'id': '3l3w4tnezek2e',
'ext': 'mp4',
'uploader': 'clean',
'uploader_id': 'de1.pds.tentacle.expert',
'uploader_url': 'https://bsky.app/profile/de1.pds.tentacle.expert',
'channel_id': 'did:web:de1.tentacle.expert',
'channel_url': 'https://bsky.app/profile/did:web:de1.tentacle.expert',
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
'title': 'Bluesky video #3l3w4tnezek2e',
'upload_date': '20240911',
'timestamp': 1726098823,
'like_count': int,
'repost_count': int,
'comment_count': int,
'tags': [],
},
}, {
'url': 'https://bsky.app/profile/yunayuispink.bsky.social/post/3l7gqcfes742o',
'info_dict': {
'id': 'XxK3t_5V3ao',
'ext': 'mp4',
'uploader': 'yunayu',
'uploader_id': '@yunayuispink',
'uploader_url': 'https://www.youtube.com/@yunayuispink',
'channel': 'yunayu',
'channel_id': 'UCPLvXnHa7lTyNoR_dGsU14w',
'channel_url': 'https://www.youtube.com/channel/UCPLvXnHa7lTyNoR_dGsU14w',
'thumbnail': 'https://i.ytimg.com/vi_webp/XxK3t_5V3ao/maxresdefault.webp',
'description': r're:Have a good goodx10000day',
'title': '5min vs 5hours drawing',
'availability': 'public',
'live_status': 'not_live',
'playable_in_embed': True,
'upload_date': '20241026',
'timestamp': 1729967784,
'duration': 321,
'age_limit': 0,
'like_count': int,
'view_count': int,
'comment_count': int,
'channel_follower_count': int,
'categories': ['Entertainment'],
'tags': [],
},
'add_ie': ['Youtube'],
}, {
'url': 'https://bsky.app/profile/endshark.bsky.social/post/3jzxjkcemae2m',
'info_dict': {
'id': '222792849',
'ext': 'mp3',
'uploader': 'LASERBAT',
'uploader_id': 'laserbatx',
'uploader_url': 'https://laserbatx.bandcamp.com',
'artists': ['LASERBAT'],
'album_artists': ['LASERBAT'],
'album': 'Hari Nezumi [EP]',
'track': 'Forward to the End',
'title': 'LASERBAT - Forward to the End',
'thumbnail': 'https://f4.bcbits.com/img/a2507705510_5.jpg',
'duration': 228.571,
'track_id': '222792849',
'release_date': '20230423',
'upload_date': '20230423',
'timestamp': 1682276040.0,
'release_timestamp': 1682276040.0,
'track_number': 1,
},
'add_ie': ['Bandcamp'],
}, {
'url': 'https://bsky.app/profile/dannybhoix.bsky.social/post/3l6oe5mtr2c2j',
'md5': 'b9e344fdbce9f2852c668a97efefb105',
'info_dict': {
'id': '3l3vgf77uco2g',
'ext': 'mp4',
'uploader': 'Bluesky',
'uploader_id': 'bsky.app',
'uploader_url': 'https://bsky.app/profile/bsky.app',
'channel_id': 'did:plc:z72i7hdynmk6r22z27h6tvur',
'channel_url': 'https://bsky.app/profile/did:plc:z72i7hdynmk6r22z27h6tvur',
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
'title': 'Bluesky now has video! Update your app to versi...',
'alt_title': 'Bluesky video feature announcement',
'description': r're:(?s)Bluesky now has video! .{239}',
'upload_date': '20240911',
'timestamp': 1726074716,
'like_count': int,
'repost_count': int,
'comment_count': int,
'tags': [],
'subtitles': {
'en': 'mincount:1',
},
},
}, {
'url': 'https://bsky.app/profile/alt.bun.how/post/3l7rdfxhyds2f',
'md5': '8775118b235cf9fa6b5ad30f95cda75c',
'info_dict': {
'id': '3l7rdfxhyds2f',
'ext': 'mp4',
'uploader': 'cinnamon',
'uploader_id': 'alt.bun.how',
'uploader_url': 'https://bsky.app/profile/alt.bun.how',
'channel_id': 'did:plc:7x6rtuenkuvxq3zsvffp2ide',
'channel_url': 'https://bsky.app/profile/did:plc:7x6rtuenkuvxq3zsvffp2ide',
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
'title': 'crazy that i look like this tbh',
'description': 'crazy that i look like this tbh',
'upload_date': '20241030',
'timestamp': 1730332128,
'like_count': int,
'repost_count': int,
'comment_count': int,
'tags': ['sexual'],
'age_limit': 18,
},
}, {
'url': 'at://did:plc:ia76kvnndjutgedggx2ibrem/app.bsky.feed.post/3l6zrz6zyl2dr',
'md5': '71b0eb6d85d03145e6af6642c7fc6d78',
'info_dict': {
'id': '3l6zrz6zyl2dr',
'ext': 'mp4',
'uploader': 'mary🐇',
'uploader_id': 'mary.my.id',
'uploader_url': 'https://bsky.app/profile/mary.my.id',
'channel_id': 'did:plc:ia76kvnndjutgedggx2ibrem',
'channel_url': 'https://bsky.app/profile/did:plc:ia76kvnndjutgedggx2ibrem',
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
'title': 'Bluesky video #3l6zrz6zyl2dr',
'upload_date': '20241021',
'timestamp': 1729523172,
'like_count': int,
'repost_count': int,
'comment_count': int,
'tags': [],
},
}, {
'url': 'https://bsky.app/profile/purpleicetea.bsky.social/post/3l7gv55dc2o2w',
'info_dict': {
'id': '3l7gv55dc2o2w',
},
'playlist': [{
'info_dict': {
'id': '3l7gv55dc2o2w',
'ext': 'mp4',
'upload_date': '20241026',
'description': 'One of my favorite videos',
'comment_count': int,
'uploader_url': 'https://bsky.app/profile/purpleicetea.bsky.social',
'uploader': 'Purple.Ice.Tea',
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
'channel_url': 'https://bsky.app/profile/did:plc:bjh5ffwya5f53dfy47dezuwx',
'like_count': int,
'channel_id': 'did:plc:bjh5ffwya5f53dfy47dezuwx',
'repost_count': int,
'timestamp': 1729973202,
'tags': [],
'uploader_id': 'purpleicetea.bsky.social',
'title': 'One of my favorite videos',
},
}, {
'info_dict': {
'id': '3l77u64l7le2e',
'ext': 'mp4',
'title': 'hearing people on twitter say that bluesky isn\'...',
'like_count': int,
'uploader_id': 'thafnine.net',
'uploader_url': 'https://bsky.app/profile/thafnine.net',
'upload_date': '20241024',
'channel_url': 'https://bsky.app/profile/did:plc:6ttyq36rhiyed7wu3ws7dmqj',
'description': r're:(?s)hearing people on twitter say that bluesky .{93}',
'tags': [],
'alt_title': 'md5:9b1ee1937fb3d1a81e932f9ec14d560e',
'uploader': 'T9',
'channel_id': 'did:plc:6ttyq36rhiyed7wu3ws7dmqj',
'thumbnail': r're:https://video.bsky.app/watch/.*\.jpg$',
'timestamp': 1729731642,
'comment_count': int,
'repost_count': int,
},
}],
}]
_BLOB_URL_TMPL = '{}/xrpc/com.atproto.sync.getBlob'
def _get_service_endpoint(self, did, video_id):
if did.startswith('did:web:'):
url = f'https://{did[8:]}/.well-known/did.json'
else:
url = f'https://plc.directory/{did}'
services = self._download_json(
url, video_id, 'Fetching service endpoint', 'Falling back to bsky.social', fatal=False)
return traverse_obj(
services, ('service', lambda _, x: x['type'] == 'AtprotoPersonalDataServer',
'serviceEndpoint', {url_or_none}, any)) or 'https://bsky.social'
def _real_extract(self, url):
handle, video_id = self._match_valid_url(url).group('handle', 'id')
post = self._download_json(
'https://public.api.bsky.app/xrpc/app.bsky.feed.getPostThread',
video_id, query={
'uri': f'at://{handle}/app.bsky.feed.post/{video_id}',
'depth': 0,
'parentHeight': 0,
})['thread']['post']
entries = []
# app.bsky.embed.video.view/app.bsky.embed.external.view
entries.extend(self._extract_videos(post, video_id))
# app.bsky.embed.recordWithMedia.view
entries.extend(self._extract_videos(
post, video_id, embed_path=('embed', 'media'), record_subpath=('embed', 'media')))
# app.bsky.embed.record.view
if nested_post := traverse_obj(post, ('embed', 'record', ('record', None), {dict}, any)):
entries.extend(self._extract_videos(
nested_post, video_id, embed_path=('embeds', 0), record_path='value'))
if not entries:
raise ExtractorError('No video could be found in this post', expected=True)
if len(entries) == 1:
return entries[0]
return self.playlist_result(entries, video_id)
@staticmethod
def _build_profile_url(path):
return format_field(path, None, 'https://bsky.app/profile/%s', default=None)
def _extract_videos(self, root, video_id, embed_path='embed', record_path='record', record_subpath='embed'):
embed_path = variadic(embed_path, (str, bytes, dict, set))
record_path = variadic(record_path, (str, bytes, dict, set))
record_subpath = variadic(record_subpath, (str, bytes, dict, set))
entries = []
if external_uri := traverse_obj(root, (
((*record_path, *record_subpath), embed_path), 'external', 'uri', {url_or_none}, any)):
entries.append(self.url_result(external_uri))
if playlist := traverse_obj(root, (*embed_path, 'playlist', {url_or_none})):
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
playlist, video_id, 'mp4', m3u8_id='hls', fatal=False)
else:
return entries
video_cid = traverse_obj(
root, (*embed_path, 'cid', {str}),
(*record_path, *record_subpath, 'video', 'ref', '$link', {str}))
did = traverse_obj(root, ('author', 'did', {str}))
if did and video_cid:
endpoint = self._get_service_endpoint(did, video_id)
formats.append({
'format_id': 'blob',
'url': update_url_query(
self._BLOB_URL_TMPL.format(endpoint), {'did': did, 'cid': video_cid}),
**traverse_obj(root, (*embed_path, 'aspectRatio', {
'width': ('width', {int_or_none}),
'height': ('height', {int_or_none}),
})),
**traverse_obj(root, (*record_path, *record_subpath, 'video', {
'filesize': ('size', {int_or_none}),
'ext': ('mimeType', {mimetype2ext}),
})),
})
for sub_data in traverse_obj(root, (
*record_path, *record_subpath, 'captions', lambda _, v: v['file']['ref']['$link'])):
subtitles.setdefault(sub_data.get('lang') or 'und', []).append({
'url': update_url_query(
self._BLOB_URL_TMPL.format(endpoint), {'did': did, 'cid': sub_data['file']['ref']['$link']}),
'ext': traverse_obj(sub_data, ('file', 'mimeType', {mimetype2ext})),
})
entries.append({
'id': video_id,
'formats': formats,
'subtitles': subtitles,
**traverse_obj(root, {
'id': ('uri', {url_basename}),
'thumbnail': (*embed_path, 'thumbnail', {url_or_none}),
'alt_title': (*embed_path, 'alt', {str}, filter),
'uploader': ('author', 'displayName', {str}),
'uploader_id': ('author', 'handle', {str}),
'uploader_url': ('author', 'handle', {self._build_profile_url}),
'channel_id': ('author', 'did', {str}),
'channel_url': ('author', 'did', {self._build_profile_url}),
'like_count': ('likeCount', {int_or_none}),
'repost_count': ('repostCount', {int_or_none}),
'comment_count': ('replyCount', {int_or_none}),
'timestamp': ('indexedAt', {parse_iso8601}),
'tags': ('labels', ..., 'val', {str}, all, {orderedSet}),
'age_limit': (
'labels', ..., 'val', {lambda x: 18 if x in ('sexual', 'porn', 'graphic-media') else None}, any),
'description': (*record_path, 'text', {str}, filter),
'title': (*record_path, 'text', {lambda x: x.replace('\n', ' ')}, {truncate_string(left=50)}),
}),
})
return entries

View file

@ -1,35 +1,20 @@
import functools
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html, clean_html,
extract_attributes, extract_attributes,
get_element_text_and_html_by_tag,
get_elements_by_class,
join_nonempty, join_nonempty,
js_to_json, js_to_json,
mimetype2ext, mimetype2ext,
unified_strdate, unified_strdate,
url_or_none, url_or_none,
urljoin, urljoin,
variadic,
) )
from ..utils.traversal import traverse_obj from ..utils.traversal import (
find_element,
traverse_obj,
def html_get_element(tag=None, cls=None): )
assert tag or cls, 'One of tag or class is required'
if cls:
func = functools.partial(get_elements_by_class, cls, tag=tag)
else:
func = functools.partial(get_element_text_and_html_by_tag, tag)
def html_get_element_wrapper(html):
return variadic(func(html))[0]
return html_get_element_wrapper
class BpbIE(InfoExtractor): class BpbIE(InfoExtractor):
@ -41,12 +26,12 @@ class BpbIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '297', 'id': '297',
'ext': 'mp4', 'ext': 'mp4',
'creator': 'Kooperative Berlin', 'creators': ['Kooperative Berlin'],
'description': 'md5:f4f75885ba009d3e2b156247a8941ce6', 'description': r're:Joachim Gauck, .*\n\nKamera: .*',
'release_date': '20160115', 'release_date': '20150716',
'series': 'Interview auf dem Geschichtsforum 1989 | 2009', 'series': 'Interview auf dem Geschichtsforum 1989 | 2009',
'tags': ['Friedliche Revolution', 'Erinnerungskultur', 'Vergangenheitspolitik', 'DDR 1949 - 1990', 'Freiheitsrecht', 'BStU', 'Deutschland'], 'tags': [],
'thumbnail': 'https://www.bpb.de/cache/images/7/297_teaser_16x9_1240.jpg?8839D', 'thumbnail': r're:https?://www\.bpb\.de/cache/images/7/297_teaser_16x9_1240\.jpg.*',
'title': 'Joachim Gauck zu 1989 und die Erinnerung an die DDR', 'title': 'Joachim Gauck zu 1989 und die Erinnerung an die DDR',
'uploader': 'Bundeszentrale für politische Bildung', 'uploader': 'Bundeszentrale für politische Bildung',
}, },
@ -55,11 +40,12 @@ class BpbIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '522184', 'id': '522184',
'ext': 'mp4', 'ext': 'mp4',
'creator': 'Institute for Strategic Dialogue Germany gGmbH (ISD)', 'creators': ['Institute for Strategic Dialogue Germany gGmbH (ISD)'],
'description': 'md5:f83c795ff8f825a69456a9e51fc15903', 'description': 'md5:f83c795ff8f825a69456a9e51fc15903',
'release_date': '20230621', 'release_date': '20230621',
'tags': ['Desinformation', 'Ukraine', 'Russland', 'Geflüchtete'], 'series': 'Narrative über den Krieg Russlands gegen die Ukraine (NUK)',
'thumbnail': 'https://www.bpb.de/cache/images/4/522184_teaser_16x9_1240.png?EABFB', 'tags': [],
'thumbnail': r're:https://www\.bpb\.de/cache/images/4/522184_teaser_16x9_1240\.png.*',
'title': 'md5:9b01ccdbf58dbf9e5c9f6e771a803b1c', 'title': 'md5:9b01ccdbf58dbf9e5c9f6e771a803b1c',
'uploader': 'Bundeszentrale für politische Bildung', 'uploader': 'Bundeszentrale für politische Bildung',
}, },
@ -68,11 +54,12 @@ class BpbIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '518789', 'id': '518789',
'ext': 'mp4', 'ext': 'mp4',
'creator': 'Institute for Strategic Dialogue Germany gGmbH (ISD)', 'creators': ['Institute for Strategic Dialogue Germany gGmbH (ISD)'],
'description': 'md5:85228aed433e84ff0ff9bc582abd4ea8', 'description': 'md5:85228aed433e84ff0ff9bc582abd4ea8',
'release_date': '20230302', 'release_date': '20230302',
'tags': ['Desinformation', 'Ukraine', 'Russland', 'Geflüchtete'], 'series': 'Narrative über den Krieg Russlands gegen die Ukraine (NUK)',
'thumbnail': 'https://www.bpb.de/cache/images/9/518789_teaser_16x9_1240.jpeg?56D0D', 'tags': [],
'thumbnail': r're:https://www\.bpb\.de/cache/images/9/518789_teaser_16x9_1240\.jpeg.*',
'title': 'md5:3e956f264bb501f6383f10495a401da4', 'title': 'md5:3e956f264bb501f6383f10495a401da4',
'uploader': 'Bundeszentrale für politische Bildung', 'uploader': 'Bundeszentrale für politische Bildung',
}, },
@ -84,12 +71,12 @@ class BpbIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '315813', 'id': '315813',
'ext': 'mp3', 'ext': 'mp3',
'creator': 'Axel Schröder', 'creators': ['Axel Schröder'],
'description': 'md5:eda9d1af34e5912efef5baf54fba4427', 'description': 'md5:eda9d1af34e5912efef5baf54fba4427',
'release_date': '20200921', 'release_date': '20200921',
'series': 'Auf Endlagersuche. Der deutsche Weg zu einem sicheren Atommülllager', 'series': 'Auf Endlagersuche. Der deutsche Weg zu einem sicheren Atommülllager',
'tags': ['Atomenergie', 'Endlager', 'hoch-radioaktiver Abfall', 'Endlagersuche', 'Atommüll', 'Atomendlager', 'Gorleben', 'Deutschland'], 'tags': ['Atomenergie', 'Endlager', 'hoch-radioaktiver Abfall', 'Endlagersuche', 'Atommüll', 'Atomendlager', 'Gorleben', 'Deutschland'],
'thumbnail': 'https://www.bpb.de/cache/images/3/315813_teaser_16x9_1240.png?92A94', 'thumbnail': r're:https://www\.bpb\.de/cache/images/3/315813_teaser_16x9_1240\.png.*',
'title': 'Folge 1: Eine Einführung', 'title': 'Folge 1: Eine Einführung',
'uploader': 'Bundeszentrale für politische Bildung', 'uploader': 'Bundeszentrale für politische Bildung',
}, },
@ -98,12 +85,12 @@ class BpbIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '517806', 'id': '517806',
'ext': 'mp3', 'ext': 'mp3',
'creator': 'Bundeszentrale für politische Bildung', 'creators': ['Bundeszentrale für politische Bildung'],
'description': 'md5:594689600e919912aade0b2871cc3fed', 'description': 'md5:594689600e919912aade0b2871cc3fed',
'release_date': '20230127', 'release_date': '20230127',
'series': 'Vorträge des Fachtags "Modernisierer. Grenzgänger. Anstifter. Sechs Jahrzehnte \'Neue Rechte\'"', 'series': 'Vorträge des Fachtags "Modernisierer. Grenzgänger. Anstifter. Sechs Jahrzehnte \'Neue Rechte\'"',
'tags': ['Rechtsextremismus', 'Konservatismus', 'Konservativismus', 'neue Rechte', 'Rechtspopulismus', 'Schnellroda', 'Deutschland'], 'tags': ['Rechtsextremismus', 'Konservatismus', 'Konservativismus', 'neue Rechte', 'Rechtspopulismus', 'Schnellroda', 'Deutschland'],
'thumbnail': 'https://www.bpb.de/cache/images/6/517806_teaser_16x9_1240.png?7A7A0', 'thumbnail': r're:https://www\.bpb\.de/cache/images/6/517806_teaser_16x9_1240\.png.*',
'title': 'Die Weltanschauung der "Neuen Rechten"', 'title': 'Die Weltanschauung der "Neuen Rechten"',
'uploader': 'Bundeszentrale für politische Bildung', 'uploader': 'Bundeszentrale für politische Bildung',
}, },
@ -147,7 +134,7 @@ class BpbIE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
title_result = traverse_obj(webpage, ({html_get_element(cls='opening-header__title')}, {self._TITLE_RE.match})) title_result = traverse_obj(webpage, ({find_element(cls='opening-header__title')}, {self._TITLE_RE.match}))
json_lds = list(self._yield_json_ld(webpage, video_id, fatal=False)) json_lds = list(self._yield_json_ld(webpage, video_id, fatal=False))
return { return {
@ -156,15 +143,15 @@ class BpbIE(InfoExtractor):
# This metadata could be interpreted otherwise, but it fits "series" the most # This metadata could be interpreted otherwise, but it fits "series" the most
'series': traverse_obj(title_result, ('series', {str.strip})) or None, 'series': traverse_obj(title_result, ('series', {str.strip})) or None,
'description': join_nonempty(*traverse_obj(webpage, [( 'description': join_nonempty(*traverse_obj(webpage, [(
{html_get_element(cls='opening-intro')}, {find_element(cls='opening-intro')},
[{html_get_element(tag='bpb-accordion-item')}, {html_get_element(cls='text-content')}], [{find_element(tag='bpb-accordion-item')}, {find_element(cls='text-content')}],
), {clean_html}]), delim='\n\n') or None, ), {clean_html}]), delim='\n\n') or None,
'creator': self._html_search_meta('author', webpage), 'creators': traverse_obj(self._html_search_meta('author', webpage), all),
'uploader': self._html_search_meta('publisher', webpage), 'uploader': self._html_search_meta('publisher', webpage),
'release_date': unified_strdate(self._html_search_meta('date', webpage)), 'release_date': unified_strdate(self._html_search_meta('date', webpage)),
'tags': traverse_obj(json_lds, (..., 'keywords', {lambda x: x.split(',')}, ...)), 'tags': traverse_obj(json_lds, (..., 'keywords', {lambda x: x.split(',')}, ...)),
**traverse_obj(self._parse_vue_attributes('bpb-player', webpage, video_id), { **traverse_obj(self._parse_vue_attributes('bpb-player', webpage, video_id), {
'formats': (':sources', ..., {self._process_source}), 'formats': (':sources', ..., {self._process_source}),
'thumbnail': ('poster', {lambda x: urljoin(url, x)}), 'thumbnail': ('poster', {urljoin(url)}),
}), }),
} }

View file

@ -145,10 +145,9 @@ class BravoTVIE(AdobePassIE):
tp_metadata = self._download_json( tp_metadata = self._download_json(
update_url_query(tp_url, {'format': 'preview'}), video_id, fatal=False) update_url_query(tp_url, {'format': 'preview'}), video_id, fatal=False)
seconds_or_none = lambda x: float_or_none(x, 1000)
chapters = traverse_obj(tp_metadata, ('chapters', ..., { chapters = traverse_obj(tp_metadata, ('chapters', ..., {
'start_time': ('startTime', {seconds_or_none}), 'start_time': ('startTime', {float_or_none(scale=1000)}),
'end_time': ('endTime', {seconds_or_none}), 'end_time': ('endTime', {float_or_none(scale=1000)}),
})) }))
# prune pointless single chapters that span the entire duration from short videos # prune pointless single chapters that span the entire duration from short videos
if len(chapters) == 1 and not traverse_obj(chapters, (0, 'end_time')): if len(chapters) == 1 and not traverse_obj(chapters, (0, 'end_time')):
@ -168,8 +167,8 @@ class BravoTVIE(AdobePassIE):
**merge_dicts(traverse_obj(tp_metadata, { **merge_dicts(traverse_obj(tp_metadata, {
'title': 'title', 'title': 'title',
'description': 'description', 'description': 'description',
'duration': ('duration', {seconds_or_none}), 'duration': ('duration', {float_or_none(scale=1000)}),
'timestamp': ('pubDate', {seconds_or_none}), 'timestamp': ('pubDate', {float_or_none(scale=1000)}),
'season_number': (('pl1$seasonNumber', 'nbcu$seasonNumber'), {int_or_none}), 'season_number': (('pl1$seasonNumber', 'nbcu$seasonNumber'), {int_or_none}),
'episode_number': (('pl1$episodeNumber', 'nbcu$episodeNumber'), {int_or_none}), 'episode_number': (('pl1$episodeNumber', 'nbcu$episodeNumber'), {int_or_none}),
'series': (('pl1$show', 'nbcu$show'), (None, ...), {str}), 'series': (('pl1$show', 'nbcu$show'), (None, ...), {str}),

View file

@ -8,11 +8,13 @@ from ..utils import (
bug_reports_message, bug_reports_message,
clean_html, clean_html,
format_field, format_field,
get_element_text_and_html_by_tag,
int_or_none, int_or_none,
url_or_none, url_or_none,
) )
from ..utils.traversal import traverse_obj from ..utils.traversal import (
find_element,
traverse_obj,
)
class BundestagIE(InfoExtractor): class BundestagIE(InfoExtractor):
@ -115,9 +117,8 @@ class BundestagIE(InfoExtractor):
note='Downloading metadata overlay', fatal=False, note='Downloading metadata overlay', fatal=False,
), { ), {
'title': ( 'title': (
{functools.partial(get_element_text_and_html_by_tag, 'h3')}, 0, {find_element(tag='h3')}, {functools.partial(re.sub, r'<span[^>]*>[^<]+</span>', '')}, {clean_html}),
{functools.partial(re.sub, r'<span[^>]*>[^<]+</span>', '')}, {clean_html}), 'description': ({find_element(tag='p')}, {clean_html}),
'description': ({functools.partial(get_element_text_and_html_by_tag, 'p')}, 0, {clean_html}),
})) }))
return result return result

View file

@ -53,7 +53,7 @@ class CaffeineTVIE(InfoExtractor):
'like_count': ('like_count', {int_or_none}), 'like_count': ('like_count', {int_or_none}),
'view_count': ('view_count', {int_or_none}), 'view_count': ('view_count', {int_or_none}),
'comment_count': ('comment_count', {int_or_none}), 'comment_count': ('comment_count', {int_or_none}),
'tags': ('tags', ..., {str}, {lambda x: x or None}), 'tags': ('tags', ..., {str}, filter),
'uploader': ('user', 'name', {str}), 'uploader': ('user', 'name', {str}),
'uploader_id': (((None, 'user'), 'username'), {str}, any), 'uploader_id': (((None, 'user'), 'username'), {str}, any),
'is_live': ('is_live', {bool}), 'is_live': ('is_live', {bool}),
@ -62,7 +62,7 @@ class CaffeineTVIE(InfoExtractor):
'title': ('broadcast_title', {str}), 'title': ('broadcast_title', {str}),
'duration': ('content_duration', {int_or_none}), 'duration': ('content_duration', {int_or_none}),
'timestamp': ('broadcast_start_time', {parse_iso8601}), 'timestamp': ('broadcast_start_time', {parse_iso8601}),
'thumbnail': ('preview_image_path', {lambda x: urljoin(url, x)}), 'thumbnail': ('preview_image_path', {urljoin(url)}),
}), }),
'age_limit': { 'age_limit': {
# assume Apple Store ratings: https://en.wikipedia.org/wiki/Mobile_software_content_rating_system # assume Apple Store ratings: https://en.wikipedia.org/wiki/Mobile_software_content_rating_system

View file

@ -4,7 +4,6 @@ import json
import re import re
import time import time
import urllib.parse import urllib.parse
import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..networking import HEADRequest from ..networking import HEADRequest
@ -12,7 +11,6 @@ from ..utils import (
ExtractorError, ExtractorError,
float_or_none, float_or_none,
int_or_none, int_or_none,
join_nonempty,
js_to_json, js_to_json,
mimetype2ext, mimetype2ext,
orderedSet, orderedSet,
@ -455,8 +453,8 @@ class CBCPlayerIE(InfoExtractor):
chapters = traverse_obj(data, ( chapters = traverse_obj(data, (
'media', 'chapters', lambda _, v: float(v['startTime']) is not None, { 'media', 'chapters', lambda _, v: float(v['startTime']) is not None, {
'start_time': ('startTime', {functools.partial(float_or_none, scale=1000)}), 'start_time': ('startTime', {float_or_none(scale=1000)}),
'end_time': ('endTime', {functools.partial(float_or_none, scale=1000)}), 'end_time': ('endTime', {float_or_none(scale=1000)}),
'title': ('name', {str}), 'title': ('name', {str}),
})) }))
# Filter out pointless single chapters with start_time==0 and no end_time # Filter out pointless single chapters with start_time==0 and no end_time
@ -467,8 +465,8 @@ class CBCPlayerIE(InfoExtractor):
**traverse_obj(data, { **traverse_obj(data, {
'title': ('title', {str}), 'title': ('title', {str}),
'description': ('description', {str.strip}), 'description': ('description', {str.strip}),
'thumbnail': ('image', 'url', {url_or_none}, {functools.partial(update_url, query=None)}), 'thumbnail': ('image', 'url', {url_or_none}, {update_url(query=None)}),
'timestamp': ('publishedAt', {functools.partial(float_or_none, scale=1000)}), 'timestamp': ('publishedAt', {float_or_none(scale=1000)}),
'media_type': ('media', 'clipType', {str}), 'media_type': ('media', 'clipType', {str}),
'series': ('showName', {str}), 'series': ('showName', {str}),
'season_number': ('media', 'season', {int_or_none}), 'season_number': ('media', 'season', {int_or_none}),
@ -524,14 +522,13 @@ class CBCGemIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
# This is a normal, public, TV show video # This is a normal, public, TV show video
'url': 'https://gem.cbc.ca/media/schitts-creek/s06e01', 'url': 'https://gem.cbc.ca/media/schitts-creek/s06e01',
'md5': '93dbb31c74a8e45b378cf13bd3f6f11e',
'info_dict': { 'info_dict': {
'id': 'schitts-creek/s06e01', 'id': 'schitts-creek/s06e01',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Smoke Signals', 'title': 'Smoke Signals',
'description': 'md5:929868d20021c924020641769eb3e7f1', 'description': 'md5:929868d20021c924020641769eb3e7f1',
'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/episode/perso/cbc_schitts_creek_season_06e01_thumbnail_v01.jpg?im=Resize=(Size)', 'thumbnail': r're:https://images\.radio-canada\.ca/[^#?]+/cbc_schitts_creek_season_06e01_thumbnail_v01\.jpg',
'duration': 1314, 'duration': 1324,
'categories': ['comedy'], 'categories': ['comedy'],
'series': 'Schitt\'s Creek', 'series': 'Schitt\'s Creek',
'season': 'Season 6', 'season': 'Season 6',
@ -539,19 +536,21 @@ class CBCGemIE(InfoExtractor):
'episode': 'Smoke Signals', 'episode': 'Smoke Signals',
'episode_number': 1, 'episode_number': 1,
'episode_id': 'schitts-creek/s06e01', 'episode_id': 'schitts-creek/s06e01',
'upload_date': '20210618',
'timestamp': 1623988800,
'release_date': '20200107',
'release_timestamp': 1578427200,
}, },
'params': {'format': 'bv'}, 'params': {'format': 'bv'},
'skip': 'Geo-restricted to Canada',
}, { }, {
# This video requires an account in the browser, but works fine in yt-dlp # This video requires an account in the browser, but works fine in yt-dlp
'url': 'https://gem.cbc.ca/media/schitts-creek/s01e01', 'url': 'https://gem.cbc.ca/media/schitts-creek/s01e01',
'md5': '297a9600f554f2258aed01514226a697',
'info_dict': { 'info_dict': {
'id': 'schitts-creek/s01e01', 'id': 'schitts-creek/s01e01',
'ext': 'mp4', 'ext': 'mp4',
'title': 'The Cup Runneth Over', 'title': 'The Cup Runneth Over',
'description': 'md5:9bca14ea49ab808097530eb05a29e797', 'description': 'md5:9bca14ea49ab808097530eb05a29e797',
'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/episode/perso/cbc_schitts_creek_season_01e01_thumbnail_v01.jpg?im=Resize=(Size)', 'thumbnail': r're:https://images\.radio-canada\.ca/[^#?]+/cbc_schitts_creek_season_01e01_thumbnail_v01\.jpg',
'series': 'Schitt\'s Creek', 'series': 'Schitt\'s Creek',
'season_number': 1, 'season_number': 1,
'season': 'Season 1', 'season': 'Season 1',
@ -560,9 +559,12 @@ class CBCGemIE(InfoExtractor):
'episode_id': 'schitts-creek/s01e01', 'episode_id': 'schitts-creek/s01e01',
'duration': 1309, 'duration': 1309,
'categories': ['comedy'], 'categories': ['comedy'],
'upload_date': '20210617',
'timestamp': 1623902400,
'release_date': '20151124',
'release_timestamp': 1448323200,
}, },
'params': {'format': 'bv'}, 'params': {'format': 'bv'},
'skip': 'Geo-restricted to Canada',
}, { }, {
'url': 'https://gem.cbc.ca/nadiyas-family-favourites/s01e01', 'url': 'https://gem.cbc.ca/nadiyas-family-favourites/s01e01',
'only_matching': True, 'only_matching': True,
@ -631,38 +633,6 @@ class CBCGemIE(InfoExtractor):
return return
self._claims_token = self.cache.load(self._NETRC_MACHINE, 'claims_token') self._claims_token = self.cache.load(self._NETRC_MACHINE, 'claims_token')
def _find_secret_formats(self, formats, video_id):
""" Find a valid video url and convert it to the secret variant """
base_format = next((f for f in formats if f.get('vcodec') != 'none'), None)
if not base_format:
return
base_url = re.sub(r'(Manifest\(.*?),filter=[\w-]+(.*?\))', r'\1\2', base_format['url'])
url = re.sub(r'(Manifest\(.*?),format=[\w-]+(.*?\))', r'\1\2', base_url)
secret_xml = self._download_xml(url, video_id, note='Downloading secret XML', fatal=False)
if not isinstance(secret_xml, xml.etree.ElementTree.Element):
return
for child in secret_xml:
if child.attrib.get('Type') != 'video':
continue
for video_quality in child:
bitrate = int_or_none(video_quality.attrib.get('Bitrate'))
if not bitrate or 'Index' not in video_quality.attrib:
continue
height = int_or_none(video_quality.attrib.get('MaxHeight'))
yield {
**base_format,
'format_id': join_nonempty('sec', height),
# Note: \g<1> is necessary instead of \1 since bitrate is a number
'url': re.sub(r'(QualityLevels\()\d+(\))', fr'\g<1>{bitrate}\2', base_url),
'width': int_or_none(video_quality.attrib.get('MaxWidth')),
'tbr': bitrate / 1000.0,
'height': height,
}
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
video_info = self._download_json( video_info = self._download_json(
@ -676,7 +646,6 @@ class CBCGemIE(InfoExtractor):
else: else:
headers = {} headers = {}
m3u8_info = self._download_json(video_info['playSession']['url'], video_id, headers=headers) m3u8_info = self._download_json(video_info['playSession']['url'], video_id, headers=headers)
m3u8_url = m3u8_info.get('url')
if m3u8_info.get('errorCode') == 1: if m3u8_info.get('errorCode') == 1:
self.raise_geo_restricted(countries=['CA']) self.raise_geo_restricted(countries=['CA'])
@ -685,9 +654,9 @@ class CBCGemIE(InfoExtractor):
elif m3u8_info.get('errorCode') != 0: elif m3u8_info.get('errorCode') != 0:
raise ExtractorError(f'{self.IE_NAME} said: {m3u8_info.get("errorCode")} - {m3u8_info.get("message")}') raise ExtractorError(f'{self.IE_NAME} said: {m3u8_info.get("errorCode")} - {m3u8_info.get("message")}')
formats = self._extract_m3u8_formats(m3u8_url, video_id, m3u8_id='hls') formats = self._extract_m3u8_formats(
m3u8_info['url'], video_id, 'mp4', m3u8_id='hls', query={'manifestType': ''})
self._remove_duplicate_formats(formats) self._remove_duplicate_formats(formats)
formats.extend(self._find_secret_formats(formats, video_id))
for fmt in formats: for fmt in formats:
if fmt.get('vcodec') == 'none': if fmt.get('vcodec') == 'none':
@ -703,20 +672,21 @@ class CBCGemIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'title': video_info['title'],
'description': video_info.get('description'),
'thumbnail': video_info.get('image'),
'series': video_info.get('series'),
'season_number': video_info.get('season'),
'season': f'Season {video_info.get("season")}',
'episode_number': video_info.get('episode'),
'episode': video_info.get('title'),
'episode_id': video_id, 'episode_id': video_id,
'duration': video_info.get('duration'),
'categories': [video_info.get('category')],
'formats': formats, 'formats': formats,
'release_timestamp': video_info.get('airDate'), **traverse_obj(video_info, {
'timestamp': video_info.get('availableDate'), 'title': ('title', {str}),
'episode': ('title', {str}),
'description': ('description', {str}),
'thumbnail': ('image', {url_or_none}),
'series': ('series', {str}),
'season_number': ('season', {int_or_none}),
'episode_number': ('episode', {int_or_none}),
'duration': ('duration', {int_or_none}),
'categories': ('category', {str}, all),
'release_timestamp': ('airDate', {int_or_none(scale=1000)}),
'timestamp': ('availableDate', {int_or_none(scale=1000)}),
}),
} }

View file

@ -96,7 +96,7 @@ class CBSNewsBaseIE(InfoExtractor):
**traverse_obj(item, { **traverse_obj(item, {
'title': (None, ('fulltitle', 'title')), 'title': (None, ('fulltitle', 'title')),
'description': 'dek', 'description': 'dek',
'timestamp': ('timestamp', {lambda x: float_or_none(x, 1000)}), 'timestamp': ('timestamp', {float_or_none(scale=1000)}),
'duration': ('duration', {float_or_none}), 'duration': ('duration', {float_or_none}),
'subtitles': ('captions', {get_subtitles}), 'subtitles': ('captions', {get_subtitles}),
'thumbnail': ('images', ('hd', 'sd'), {url_or_none}), 'thumbnail': ('images', ('hd', 'sd'), {url_or_none}),

View file

@ -12,53 +12,86 @@ from ..utils import (
class CCMAIE(InfoExtractor): class CCMAIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ccma\.cat/(?:[^/]+/)*?(?P<type>video|audio)/(?P<id>\d+)' IE_DESC = '3Cat, TV3 and Catalunya Ràdio'
_VALID_URL = r'https?://(?:www\.)?3cat\.cat/(?:3cat|tv3/sx3)/[^/?#]+/(?P<type>video|audio)/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.ccma.cat/tv3/alacarta/lespot-de-la-marato-de-tv3/lespot-de-la-marato-de-tv3/video/5630208/', # ccma.cat/tv3/alacarta/ URLs redirect to 3cat.cat/3cat/
'url': 'https://www.3cat.cat/3cat/lespot-de-la-marato-de-tv3/video/5630208/',
'md5': '7296ca43977c8ea4469e719c609b0871', 'md5': '7296ca43977c8ea4469e719c609b0871',
'info_dict': { 'info_dict': {
'id': '5630208', 'id': '5630208',
'ext': 'mp4', 'ext': 'mp4',
'title': 'L\'espot de La Marató de TV3', 'title': 'L\'espot de La Marató 2016: Ictus i les lesions medul·lars i cerebrals traumàtiques',
'description': 'md5:f12987f320e2f6e988e9908e4fe97765', 'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
'timestamp': 1478608140, 'timestamp': 1478608140,
'upload_date': '20161108', 'upload_date': '20161108',
'age_limit': 0, 'age_limit': 0,
'alt_title': 'EsportMarató2016WEB_PerPublicar',
'duration': 79,
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/4/6/1478536106664.jpg',
'series': 'Dedicada a l\'ictus i les lesions medul·lars i cerebrals traumàtiques',
'categories': ['Divulgació'],
}, },
}, { }, {
'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/', # ccma.cat/catradio/alacarta/ URLs redirect to 3cat.cat/3cat/
'url': 'https://www.3cat.cat/3cat/el-consell-de-savis-analitza-el-derbi/audio/943685/',
'md5': 'fa3e38f269329a278271276330261425', 'md5': 'fa3e38f269329a278271276330261425',
'info_dict': { 'info_dict': {
'id': '943685', 'id': '943685',
'ext': 'mp3', 'ext': 'mp3',
'title': 'El Consell de Savis analitza el derbi', 'title': 'El Consell de Savis analitza el derbi',
'description': 'md5:e2a3648145f3241cb9c6b4b624033e53', 'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
'upload_date': '20170512', 'upload_date': '20161217',
'timestamp': 1494622500, 'timestamp': 1482011700,
'vcodec': 'none', 'vcodec': 'none',
'categories': ['Esports'], 'categories': ['Esports'],
'series': 'Tot gira',
'duration': 821,
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/8/9/1482002602598.jpg',
}, },
}, { }, {
'url': 'http://www.ccma.cat/tv3/alacarta/crims/crims-josep-tallada-lespereu-me-capitol-1/video/6031387/', 'url': 'https://www.3cat.cat/3cat/crims-josep-tallada-lespereu-me-part-1/video/6031387/',
'md5': 'b43c3d3486f430f3032b5b160d80cbc3', 'md5': '27493513d08a3e5605814aee9bb778d2',
'info_dict': { 'info_dict': {
'id': '6031387', 'id': '6031387',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Crims - Josep Talleda, l\'"Espereu-me" (capítol 1)', 'title': 'T1xC5 - Josep Talleda, l\'"Espereu-me" (part 1)',
'description': 'md5:7cbdafb640da9d0d2c0f62bad1e74e60', 'description': 'md5:7cbdafb640da9d0d2c0f62bad1e74e60',
'timestamp': 1582577700, 'timestamp': 1582577919,
'upload_date': '20200224', 'upload_date': '20200224',
'subtitles': 'mincount:4', 'subtitles': 'mincount:1',
'age_limit': 16, 'age_limit': 13,
'series': 'Crims', 'series': 'Crims',
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/1/9/1582564376991.jpg',
'duration': 3203,
'categories': ['Divulgació'],
'alt_title': 'Crims - 5 - Josep Talleda, l\'"Espereu-me" (1a part) - Josep Talleda, l\'"Espereu-me" (part 1)',
'episode_number': 5,
'episode': 'Episode 5',
},
}, {
'url': 'https://www.3cat.cat/tv3/sx3/una-mosca-volava-per-la-llum/video/5759227/',
'info_dict': {
'id': '5759227',
'ext': 'mp4',
'title': 'Una mosca volava per la llum',
'alt_title': '17Z004Ç UNA MOSCA VOLAVA PER LA LLUM',
'description': 'md5:9ab64276944b0825336f4147f13f7854',
'series': 'Mic',
'upload_date': '20180411',
'timestamp': 1523440105,
'duration': 160,
'age_limit': 0,
'thumbnail': 'https://img.3cat.cat/multimedia/jpg/6/1/1524071667216.jpg',
'categories': ['Música'],
}, },
}] }]
def _real_extract(self, url): def _real_extract(self, url):
media_type, media_id = self._match_valid_url(url).groups() media_type, media_id = self._match_valid_url(url).group('type', 'id')
media = self._download_json( media = self._download_json(
'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={ 'http://api-media.3cat.cat/pvideo/media.jsp', media_id, query={
'media': media_type, 'media': media_type,
'idint': media_id, 'idint': media_id,
'format': 'dm', 'format': 'dm',

View file

@ -12,6 +12,7 @@ from .common import InfoExtractor
from ..compat import compat_ord from ..compat import compat_ord
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
OnDemandPagedList,
float_or_none, float_or_none,
int_or_none, int_or_none,
merge_dicts, merge_dicts,
@ -351,3 +352,50 @@ class CDAIE(InfoExtractor):
extract_format(webpage, resolution) extract_format(webpage, resolution)
return merge_dicts(info_dict, info) return merge_dicts(info_dict, info)
class CDAFolderIE(InfoExtractor):
_MAX_PAGE_SIZE = 36
_VALID_URL = r'https?://(?:www\.)?cda\.pl/(?P<channel>\w+)/folder/(?P<id>\d+)'
_TESTS = [
{
'url': 'https://www.cda.pl/domino264/folder/31188385',
'info_dict': {
'id': '31188385',
'title': 'SERIA DRUGA',
},
'playlist_mincount': 13,
},
{
'url': 'https://www.cda.pl/smiechawaTV/folder/2664592/vfilm',
'info_dict': {
'id': '2664592',
'title': 'VideoDowcipy - wszystkie odcinki',
},
'playlist_mincount': 71,
},
{
'url': 'https://www.cda.pl/DeliciousBeauty/folder/19129979/vfilm',
'info_dict': {
'id': '19129979',
'title': 'TESTY KOSMETYKÓW',
},
'playlist_mincount': 139,
}]
def _real_extract(self, url):
folder_id, channel = self._match_valid_url(url).group('id', 'channel')
webpage = self._download_webpage(url, folder_id)
def extract_page_entries(page):
webpage = self._download_webpage(
f'https://www.cda.pl/{channel}/folder/{folder_id}/vfilm/{page + 1}', folder_id,
f'Downloading page {page + 1}', expected_status=404)
items = re.findall(r'<a[^>]+href="/video/([0-9a-z]+)"', webpage)
for video_id in items:
yield self.url_result(f'https://www.cda.pl/video/{video_id}', CDAIE, video_id)
return self.playlist_result(
OnDemandPagedList(extract_page_entries, self._MAX_PAGE_SIZE),
folder_id, self._og_search_title(webpage))

View file

@ -5,11 +5,12 @@ from ..utils import (
ExtractorError, ExtractorError,
lowercase_escape, lowercase_escape,
url_or_none, url_or_none,
urlencode_postdata,
) )
class ChaturbateIE(InfoExtractor): class ChaturbateIE(InfoExtractor):
_VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.com/(?:fullvideo/?\?.*?\bb=)?(?P<id>[^/?&#]+)' _VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.(?P<tld>com|eu|global)/(?:fullvideo/?\?.*?\bb=)?(?P<id>[^/?&#]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.chaturbate.com/siswet19/', 'url': 'https://www.chaturbate.com/siswet19/',
'info_dict': { 'info_dict': {
@ -29,16 +30,59 @@ class ChaturbateIE(InfoExtractor):
}, { }, {
'url': 'https://en.chaturbate.com/siswet19/', 'url': 'https://en.chaturbate.com/siswet19/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://chaturbate.eu/siswet19/',
'only_matching': True,
}, {
'url': 'https://chaturbate.eu/fullvideo/?b=caylin',
'only_matching': True,
}, {
'url': 'https://chaturbate.global/siswet19/',
'only_matching': True,
}] }]
_ROOM_OFFLINE = 'Room is currently offline' _ERROR_MAP = {
'offline': 'Room is currently offline',
'private': 'Room is currently in a private show',
'away': 'Performer is currently away',
'password protected': 'Room is password protected',
'hidden': 'Hidden session in progress',
}
def _real_extract(self, url): def _extract_from_api(self, video_id, tld):
video_id = self._match_id(url) response = self._download_json(
f'https://chaturbate.{tld}/get_edge_hls_url_ajax/', video_id,
data=urlencode_postdata({'room_slug': video_id}),
headers={
**self.geo_verification_headers(),
'X-Requested-With': 'XMLHttpRequest',
'Accept': 'application/json',
}, fatal=False, impersonate=True) or {}
status = response.get('room_status')
if status != 'public':
if error := self._ERROR_MAP.get(status):
raise ExtractorError(error, expected=True)
self.report_warning('Falling back to webpage extraction')
return None
m3u8_url = response.get('url')
if not m3u8_url:
self.raise_geo_restricted()
return {
'id': video_id,
'title': video_id,
'thumbnail': f'https://roomimg.stream.highwebmedia.com/ri/{video_id}.jpg',
'is_live': True,
'age_limit': 18,
'formats': self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', live=True),
}
def _extract_from_html(self, video_id, tld):
webpage = self._download_webpage( webpage = self._download_webpage(
f'https://chaturbate.com/{video_id}/', video_id, f'https://chaturbate.{tld}/{video_id}/', video_id,
headers=self.geo_verification_headers()) headers=self.geo_verification_headers(), impersonate=True)
found_m3u8_urls = [] found_m3u8_urls = []
@ -76,8 +120,8 @@ class ChaturbateIE(InfoExtractor):
webpage, 'error', group='error', default=None) webpage, 'error', group='error', default=None)
if not error: if not error:
if any(p in webpage for p in ( if any(p in webpage for p in (
self._ROOM_OFFLINE, 'offline_tipping', 'tip_offline')): self._ERROR_MAP['offline'], 'offline_tipping', 'tip_offline')):
error = self._ROOM_OFFLINE error = self._ERROR_MAP['offline']
if error: if error:
raise ExtractorError(error, expected=True) raise ExtractorError(error, expected=True)
raise ExtractorError('Unable to find stream URL') raise ExtractorError('Unable to find stream URL')
@ -104,3 +148,7 @@ class ChaturbateIE(InfoExtractor):
'is_live': True, 'is_live': True,
'formats': formats, 'formats': formats,
} }
def _real_extract(self, url):
video_id, tld = self._match_valid_url(url).group('id', 'tld')
return self._extract_from_api(video_id, tld) or self._extract_from_html(video_id, tld)

View file

@ -1,5 +1,3 @@
import functools
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
UserNotLive, UserNotLive,
@ -77,7 +75,7 @@ class CHZZKLiveIE(InfoExtractor):
'thumbnails': thumbnails, 'thumbnails': thumbnails,
**traverse_obj(live_detail, { **traverse_obj(live_detail, {
'title': ('liveTitle', {str}), 'title': ('liveTitle', {str}),
'timestamp': ('openDate', {functools.partial(parse_iso8601, delimiter=' ')}), 'timestamp': ('openDate', {parse_iso8601(delimiter=' ')}),
'concurrent_view_count': ('concurrentUserCount', {int_or_none}), 'concurrent_view_count': ('concurrentUserCount', {int_or_none}),
'view_count': ('accumulateCount', {int_or_none}), 'view_count': ('accumulateCount', {int_or_none}),
'channel': ('channel', 'channelName', {str}), 'channel': ('channel', 'channelName', {str}),
@ -146,23 +144,37 @@ class CHZZKVideoIE(InfoExtractor):
video_meta = self._download_json( video_meta = self._download_json(
f'https://api.chzzk.naver.com/service/v3/videos/{video_id}', video_id, f'https://api.chzzk.naver.com/service/v3/videos/{video_id}', video_id,
note='Downloading video info', errnote='Unable to download video info')['content'] note='Downloading video info', errnote='Unable to download video info')['content']
formats, subtitles = self._extract_mpd_formats_and_subtitles(
f'https://apis.naver.com/neonplayer/vodplay/v1/playback/{video_meta["videoId"]}', video_id, live_status = 'was_live' if video_meta.get('liveOpenDate') else 'not_live'
query={ video_status = video_meta.get('vodStatus')
'key': video_meta['inKey'], if video_status == 'UPLOAD':
'env': 'real', playback = self._parse_json(video_meta['liveRewindPlaybackJson'], video_id)
'lc': 'en_US', formats, subtitles = self._extract_m3u8_formats_and_subtitles(
'cpl': 'en_US', playback['media'][0]['path'], video_id, 'mp4', m3u8_id='hls')
}, note='Downloading video playback', errnote='Unable to download video playback') elif video_status == 'ABR_HLS':
formats, subtitles = self._extract_mpd_formats_and_subtitles(
f'https://apis.naver.com/neonplayer/vodplay/v1/playback/{video_meta["videoId"]}',
video_id, query={
'key': video_meta['inKey'],
'env': 'real',
'lc': 'en_US',
'cpl': 'en_US',
})
else:
self.raise_no_formats(
f'Unknown video status detected: "{video_status}"', expected=True, video_id=video_id)
formats, subtitles = [], {}
live_status = 'post_live' if live_status == 'was_live' else None
return { return {
'id': video_id, 'id': video_id,
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'live_status': live_status,
**traverse_obj(video_meta, { **traverse_obj(video_meta, {
'title': ('videoTitle', {str}), 'title': ('videoTitle', {str}),
'thumbnail': ('thumbnailImageUrl', {url_or_none}), 'thumbnail': ('thumbnailImageUrl', {url_or_none}),
'timestamp': ('publishDateAt', {functools.partial(float_or_none, scale=1000)}), 'timestamp': ('publishDateAt', {float_or_none(scale=1000)}),
'view_count': ('readCount', {int_or_none}), 'view_count': ('readCount', {int_or_none}),
'duration': ('duration', {int_or_none}), 'duration': ('duration', {int_or_none}),
'channel': ('channel', 'channelName', {str}), 'channel': ('channel', 'channelName', {str}),

View file

@ -3,6 +3,7 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
filter_dict, filter_dict,
float_or_none,
int_or_none, int_or_none,
parse_age_limit, parse_age_limit,
smuggle_url, smuggle_url,
@ -85,7 +86,7 @@ class CineverseIE(CineverseBaseIE):
'title': 'title', 'title': 'title',
'id': ('details', 'item_id'), 'id': ('details', 'item_id'),
'description': ('details', 'description'), 'description': ('details', 'description'),
'duration': ('duration', {lambda x: x / 1000}), 'duration': ('duration', {float_or_none(scale=1000)}),
'cast': ('details', 'cast', {lambda x: x.split(', ')}), 'cast': ('details', 'cast', {lambda x: x.split(', ')}),
'modified_timestamp': ('details', 'updated_by', 0, 'update_time', 'time', {int_or_none}), 'modified_timestamp': ('details', 'updated_by', 0, 'update_time', 'time', {int_or_none}),
'season_number': ('details', 'season', {int_or_none}), 'season_number': ('details', 'season', {int_or_none}),

View file

@ -8,7 +8,7 @@ class CloudflareStreamIE(InfoExtractor):
_DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)' _DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
_EMBED_RE = rf'(?:embed\.|{_SUBDOMAIN_RE}){_DOMAIN_RE}/embed/[^/?#]+\.js\?(?:[^#]+&)?video=' _EMBED_RE = rf'(?:embed\.|{_SUBDOMAIN_RE}){_DOMAIN_RE}/embed/[^/?#]+\.js\?(?:[^#]+&)?video='
_ID_RE = r'[\da-f]{32}|eyJ[\w-]+\.[\w-]+\.[\w-]+' _ID_RE = r'[\da-f]{32}|eyJ[\w-]+\.[\w-]+\.[\w-]+'
_VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}{_DOMAIN_RE}/|{_EMBED_RE})(?P<id>{_ID_RE})' _VALID_URL = rf'https?://(?:{_SUBDOMAIN_RE}(?P<domain>{_DOMAIN_RE})/|{_EMBED_RE})(?P<id>{_ID_RE})'
_EMBED_REGEX = [ _EMBED_REGEX = [
rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE})(?:(?!\1).)*)\1', rf'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//{_EMBED_RE}(?:{_ID_RE})(?:(?!\1).)*)\1',
rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})', rf'<iframe[^>]+\bsrc=["\'](?P<url>https?://{_SUBDOMAIN_RE}{_DOMAIN_RE}/[\da-f]{{32}})',
@ -19,7 +19,7 @@ class CloudflareStreamIE(InfoExtractor):
'id': '31c9291ab41fac05471db4e73aa11717', 'id': '31c9291ab41fac05471db4e73aa11717',
'ext': 'mp4', 'ext': 'mp4',
'title': '31c9291ab41fac05471db4e73aa11717', 'title': '31c9291ab41fac05471db4e73aa11717',
'thumbnail': 'https://videodelivery.net/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg', 'thumbnail': 'https://cloudflarestream.com/31c9291ab41fac05471db4e73aa11717/thumbnails/thumbnail.jpg',
}, },
'params': { 'params': {
'skip_download': 'm3u8', 'skip_download': 'm3u8',
@ -30,7 +30,7 @@ class CloudflareStreamIE(InfoExtractor):
'id': '0e8e040aec776862e1d632a699edf59e', 'id': '0e8e040aec776862e1d632a699edf59e',
'ext': 'mp4', 'ext': 'mp4',
'title': '0e8e040aec776862e1d632a699edf59e', 'title': '0e8e040aec776862e1d632a699edf59e',
'thumbnail': 'https://videodelivery.net/0e8e040aec776862e1d632a699edf59e/thumbnails/thumbnail.jpg', 'thumbnail': 'https://cloudflarestream.com/0e8e040aec776862e1d632a699edf59e/thumbnails/thumbnail.jpg',
}, },
}, { }, {
'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1', 'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
@ -54,7 +54,7 @@ class CloudflareStreamIE(InfoExtractor):
'id': 'eaef9dea5159cf968be84241b5cedfe7', 'id': 'eaef9dea5159cf968be84241b5cedfe7',
'ext': 'mp4', 'ext': 'mp4',
'title': 'eaef9dea5159cf968be84241b5cedfe7', 'title': 'eaef9dea5159cf968be84241b5cedfe7',
'thumbnail': 'https://videodelivery.net/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg', 'thumbnail': 'https://cloudflarestream.com/eaef9dea5159cf968be84241b5cedfe7/thumbnails/thumbnail.jpg',
}, },
'params': { 'params': {
'skip_download': 'm3u8', 'skip_download': 'm3u8',
@ -62,8 +62,9 @@ class CloudflareStreamIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id, domain = self._match_valid_url(url).group('id', 'domain')
domain = 'bytehighway.net' if 'bytehighway.net/' in url else 'videodelivery.net' if domain != 'bytehighway.net':
domain = 'cloudflarestream.com'
base_url = f'https://{domain}/{video_id}/' base_url = f'https://{domain}/{video_id}/'
if '.' in video_id: if '.' in video_id:
video_id = self._parse_json(base64.urlsafe_b64decode( video_id = self._parse_json(base64.urlsafe_b64decode(

View file

@ -1,146 +1,225 @@
import json
import re
from .common import InfoExtractor from .common import InfoExtractor
from .turner import TurnerBaseIE from ..utils import (
from ..utils import merge_dicts, try_call, url_basename clean_html,
extract_attributes,
int_or_none,
merge_dicts,
parse_duration,
parse_iso8601,
parse_resolution,
try_call,
update_url,
url_or_none,
)
from ..utils.traversal import find_elements, traverse_obj
class CNNIE(TurnerBaseIE): class CNNIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://(?:(?P<sub_domain>edition|www|money)\.)?cnn\.com/(?:video/(?:data/.+?|\?)/)?videos?/ _VALID_URL = r'https?://(?:(?:edition|www|money|cnnespanol)\.)?cnn\.com/(?!audio/)(?P<display_id>[^?#]+?)(?:[?#]|$|/index\.html)'
(?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z\-]+)|(?=&)))'''
_TESTS = [{ _TESTS = [{
'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn', 'url': 'https://www.cnn.com/2024/05/31/sport/video/jadon-sancho-borussia-dortmund-champions-league-exclusive-spt-intl',
'md5': '3e6121ea48df7e2259fe73a0628605c4',
'info_dict': { 'info_dict': {
'id': 'sports/2013/06/09/nadal-1-on-1.cnn', 'id': 'med0e97ad0d154f56e29aa96e57192a14226734b6b',
'display_id': '2024/05/31/sport/video/jadon-sancho-borussia-dortmund-champions-league-exclusive-spt-intl',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Nadal wins 8th French Open title', 'upload_date': '20240531',
'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.', 'description': 'md5:844bcdb0629e1877a7a466c913f4c19c',
'duration': 135, 'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/gettyimages-2151936122.jpg?c=original',
'upload_date': '20130609', 'duration': 373.0,
'timestamp': 1717148586,
'title': 'Borussia Dortmund star Jadon Sancho seeks Wembley redemption after 2020 Euros hurt',
'modified_date': '20240531',
'modified_timestamp': 1717150140,
}, },
'expected_warnings': ['Failed to download m3u8 information'],
}, { }, {
'url': 'http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29', 'url': 'https://edition.cnn.com/2024/06/11/politics/video/inmates-vote-jail-nevada-murray-dnt-ac360-digvid',
'md5': 'b5cc60c60a3477d185af8f19a2a26f4e',
'info_dict': { 'info_dict': {
'id': 'us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology', 'id': 'me522945c4709b299e5cb8657900a7a21ad3b559f9',
'display_id': '2024/06/11/politics/video/inmates-vote-jail-nevada-murray-dnt-ac360-digvid',
'ext': 'mp4', 'ext': 'mp4',
'title': "Student's epic speech stuns new freshmen", 'description': 'md5:e0120fe5da9ad8259fd707c1cbb64a60',
'description': 'A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from "2001: A Space Odyssey."', 'title': 'Heres how some inmates in closely divided state are now able to vote from jail',
'upload_date': '20130821', 'timestamp': 1718158269,
'upload_date': '20240612',
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/still-20701554-13565-571-still.jpg?c=original',
'duration': 202.0,
'modified_date': '20240612',
'modified_timestamp': 1718158509,
}, },
'expected_warnings': ['Failed to download m3u8 information'],
}, { }, {
'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html', 'url': 'https://edition.cnn.com/2024/06/11/style/king-charles-portrait-vandalized/index.html',
'md5': 'f14d02ebd264df951feb2400e2c25a1b',
'info_dict': { 'info_dict': {
'id': 'living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln', 'id': 'mef5f52b9e1fe28b1ad192afcbc9206ae984894b68',
'display_id': '2024/06/11/style/king-charles-portrait-vandalized',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Nashville Ep. 1: Hand crafted skateboards', 'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/still-20701257-8846-816-still.jpg?c=original',
'description': 'md5:e7223a503315c9f150acac52e76de086', 'description': 'md5:19f78338ccec533db0fa8a4511012dae',
'upload_date': '20141222', 'title': 'Video shows King Charles\' portrait being vandalized by activists',
'timestamp': 1718113852,
'upload_date': '20240611',
'duration': 51.0,
'modified_timestamp': 1718116193,
'modified_date': '20240611',
}, },
'expected_warnings': ['Failed to download m3u8 information'],
}, { }, {
'url': 'http://money.cnn.com/video/news/2016/08/19/netflix-stunning-stats.cnnmoney/index.html', 'url': 'https://edition.cnn.com/videos/media/2022/12/05/robin-meade-final-sign-off-broadcast-hln-mxp-contd-vpx.hln',
'md5': '52a515dc1b0f001cd82e4ceda32be9d1',
'info_dict': { 'info_dict': {
'id': '/video/news/2016/08/19/netflix-stunning-stats.cnnmoney', 'id': 'mefba13799201b084ea3b1d0f7ca820ae94d4bb5b2',
'display_id': 'videos/media/2022/12/05/robin-meade-final-sign-off-broadcast-hln-mxp-contd-vpx.hln',
'ext': 'mp4', 'ext': 'mp4',
'title': '5 stunning stats about Netflix', 'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/221205163510-robin-meade-sign-off.jpg?c=original',
'description': 'Did you know that Netflix has more than 80 million members? Here are five facts about the online video distributor that you probably didn\'t know.', 'duration': 158.0,
'upload_date': '20160819', 'title': 'Robin Meade signs off after HLN\'s last broadcast',
'description': 'md5:cff3c62d18d2fbc6c5c75cb029b7353b',
'upload_date': '20221205',
'timestamp': 1670284296,
'modified_timestamp': 1670332404,
'modified_date': '20221206',
}, },
'params': { 'params': {'format': 'direct'},
# m3u8 download }, {
'skip_download': True, 'url': 'https://cnnespanol.cnn.com/video/ataque-misil-israel-beirut-libano-octubre-trax',
'info_dict': {
'id': 'me484a43722642aa00627b812fe928f2e99c6e2997',
'ext': 'mp4',
'display_id': 'video/ataque-misil-israel-beirut-libano-octubre-trax',
'timestamp': 1729501452,
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/ataqeubeirut-1.jpg?c=original',
'description': 'md5:256ee7137d161f776cda429654135e52',
'upload_date': '20241021',
'duration': 31.0,
'title': 'VIDEO | Israel lanza un nuevo ataque sobre Beirut',
'modified_date': '20241021',
'modified_timestamp': 1729501530,
}, },
}, { }, {
'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk', 'url': 'https://edition.cnn.com/2024/10/16/politics/kamala-harris-fox-news-interview/index.html',
'only_matching': True, 'info_dict': {
}, { 'id': '2024/10/16/politics/kamala-harris-fox-news-interview',
'url': 'http://cnn.com/video/?/video/us/2015/04/06/dnt-baker-refuses-anti-gay-order.wkmg', },
'only_matching': True, 'playlist_count': 2,
}, { 'playlist': [{
'url': 'http://edition.cnn.com/videos/arts/2016/04/21/olympic-games-cultural-a-z-brazil.cnn', 'md5': '073ffab87b8bef97c9913e71cc18ef9e',
'only_matching': True, 'info_dict': {
'id': 'me19d548fdd54df0924087039283128ef473ab397d',
'ext': 'mp4',
'title': '\'I\'m not finished\': Harris interview with Fox News gets heated',
'display_id': 'kamala-harris-fox-news-interview-ebof-digvid',
'description': 'md5:e7dd3d1a04df916062230b60ca419a0a',
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/harris-20241016234916617.jpg?c=original',
'duration': 173.0,
'timestamp': 1729122182,
'upload_date': '20241016',
'modified_timestamp': 1729194706,
'modified_date': '20241017',
},
'params': {'format': 'direct'},
}, {
'md5': '11604ab4af83b650826753f1ccb8ecff',
'info_dict': {
'id': 'med04507d8ca3da827001f63d22af321ec29c7d97b',
'ext': 'mp4',
'title': '\'Wise\': Buttigieg on Harris\' handling of interview question about gender transition surgery',
'display_id': 'pete-buttigieg-harris-fox-newssrc-digvid',
'description': 'md5:602a8a7e853ed5e574acd3159428c98e',
'thumbnail': 'https://media.cnn.com/api/v1/images/stellar/prod/buttigieg-20241017040412074.jpg?c=original',
'duration': 145.0,
'timestamp': 1729137765,
'upload_date': '20241017',
'modified_timestamp': 1729138184,
'modified_date': '20241017',
},
'params': {'format': 'direct'},
}],
}] }]
_CONFIG = {
# http://edition.cnn.com/.element/apps/cvp/3.0/cfg/spider/cnn/expansion/config.xml
'edition': {
'data_src': 'http://edition.cnn.com/video/data/3.0/video/%s/index.xml',
'media_src': 'http://pmd.cdn.turner.com/cnn/big',
},
# http://money.cnn.com/.element/apps/cvp2/cfg/config.xml
'money': {
'data_src': 'http://money.cnn.com/video/data/4.0/video/%s.xml',
'media_src': 'http://ht3.cdn.turner.com/money/big',
},
}
def _extract_timestamp(self, video_data):
# TODO: fix timestamp extraction
return None
def _real_extract(self, url): def _real_extract(self, url):
sub_domain, path, page_title = self._match_valid_url(url).groups() display_id = self._match_valid_url(url).group('display_id')
if sub_domain not in ('money', 'edition'): webpage = self._download_webpage(url, display_id)
sub_domain = 'edition' app_id = traverse_obj(
config = self._CONFIG[sub_domain] self._search_json(r'window\.env\s*=', webpage, 'window env', display_id, default={}),
return self._extract_cvp_info( ('TOP_AUTH_SERVICE_APP_ID', {str}))
config['data_src'] % path, page_title, {
'default': { entries = []
'media_src': config['media_src'], for player_data in traverse_obj(webpage, (
}, {find_elements(tag='div', attr='data-component-name', value='video-player', html=True)},
'f4m': { ..., {extract_attributes}, all, lambda _, v: v['data-media-id'])):
'host': 'cnn-vh.akamaihd.net', media_id = player_data['data-media-id']
}, parent_uri = player_data.get('data-video-resource-parent-uri')
formats, subtitles = [], {}
video_data = {}
if parent_uri:
video_data = self._download_json(
'https://fave.api.cnn.io/v1/video', media_id, fatal=False,
query={
'id': media_id,
'stellarUri': parent_uri,
})
for direct_url in traverse_obj(video_data, ('files', ..., 'fileUri', {url_or_none})):
resolution, bitrate = None, None
if mobj := re.search(r'-(?P<res>\d+x\d+)_(?P<tbr>\d+)k\.mp4', direct_url):
resolution, bitrate = mobj.group('res', 'tbr')
formats.append({
'url': direct_url,
'format_id': 'direct',
'quality': 1,
'tbr': int_or_none(bitrate),
**parse_resolution(resolution),
})
for sub_data in traverse_obj(video_data, (
'closedCaptions', 'types', lambda _, v: url_or_none(v['track']['url']), 'track')):
subtitles.setdefault(sub_data.get('lang') or 'en', []).append({
'url': sub_data['url'],
'name': sub_data.get('label'),
})
if app_id:
media_data = self._download_json(
f'https://medium.ngtv.io/v2/media/{media_id}/desktop', media_id, fatal=False,
query={'appId': app_id})
m3u8_url = traverse_obj(media_data, (
'media', 'desktop', 'unprotected', 'unencrypted', 'url', {url_or_none}))
if m3u8_url:
fmts, subs = self._extract_m3u8_formats_and_subtitles(
m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False)
formats.extend(fmts)
self._merge_subtitles(subs, target=subtitles)
entries.append({
**traverse_obj(player_data, {
'title': ('data-headline', {clean_html}),
'description': ('data-description', {clean_html}),
'duration': ('data-duration', {parse_duration}),
'timestamp': ('data-publish-date', {parse_iso8601}),
'thumbnail': (
'data-poster-image-override', {json.loads}, 'big', 'uri', {url_or_none},
{update_url(query='c=original')}),
'display_id': 'data-video-slug',
}),
**traverse_obj(video_data, {
'timestamp': ('dateCreated', 'uts', {int_or_none(scale=1000)}),
'description': ('description', {clean_html}),
'title': ('headline', {str}),
'modified_timestamp': ('lastModified', 'uts', {int_or_none(scale=1000)}),
'duration': ('trt', {int_or_none}),
}),
'id': media_id,
'formats': formats,
'subtitles': subtitles,
}) })
if len(entries) == 1:
return {
**entries[0],
'display_id': display_id,
}
class CNNBlogsIE(InfoExtractor): return self.playlist_result(entries, display_id)
_VALID_URL = r'https?://[^\.]+\.blogs\.cnn\.com/.+'
_TEST = {
'url': 'http://reliablesources.blogs.cnn.com/2014/02/09/criminalizing-journalism/',
'md5': '3e56f97b0b6ffb4b79f4ea0749551084',
'info_dict': {
'id': 'bestoftv/2014/02/09/criminalizing-journalism.cnn',
'ext': 'mp4',
'title': 'Criminalizing journalism?',
'description': 'Glenn Greenwald responds to comments made this week on Capitol Hill that journalists could be criminal accessories.',
'upload_date': '20140209',
},
'expected_warnings': ['Failed to download m3u8 information'],
'add_ie': ['CNN'],
}
def _real_extract(self, url):
webpage = self._download_webpage(url, url_basename(url))
cnn_url = self._html_search_regex(r'data-url="(.+?)"', webpage, 'cnn url')
return self.url_result(cnn_url, CNNIE.ie_key())
class CNNArticleIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!videos?/)'
_TEST = {
'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/',
'md5': '689034c2a3d9c6dc4aa72d65a81efd01',
'info_dict': {
'id': 'bestoftv/2014/12/21/ip-north-korea-obama.cnn',
'ext': 'mp4',
'title': 'Obama: Cyberattack not an act of war',
'description': 'md5:0a802a40d2376f60e6b04c8d5bcebc4b',
'upload_date': '20141221',
},
'expected_warnings': ['Failed to download m3u8 information'],
'add_ie': ['CNN'],
}
def _real_extract(self, url):
webpage = self._download_webpage(url, url_basename(url))
cnn_url = self._html_search_regex(r"video:\s*'([^']+)'", webpage, 'cnn url')
return self.url_result('http://cnn.com/video/?/video/' + cnn_url, CNNIE.ie_key())
class CNNIndonesiaIE(InfoExtractor): class CNNIndonesiaIE(InfoExtractor):

View file

@ -25,7 +25,6 @@ import xml.etree.ElementTree
from ..compat import ( from ..compat import (
compat_etree_fromstring, compat_etree_fromstring,
compat_expanduser, compat_expanduser,
compat_os_name,
urllib_req_to_req, urllib_req_to_req,
) )
from ..cookies import LenientSimpleCookie from ..cookies import LenientSimpleCookie
@ -47,6 +46,7 @@ from ..utils import (
FormatSorter, FormatSorter,
GeoRestrictedError, GeoRestrictedError,
GeoUtils, GeoUtils,
ISO639Utils,
LenientJSONDecoder, LenientJSONDecoder,
Popen, Popen,
RegexNotFoundError, RegexNotFoundError,
@ -278,6 +278,7 @@ class InfoExtractor:
thumbnails: A list of dictionaries, with the following entries: thumbnails: A list of dictionaries, with the following entries:
* "id" (optional, string) - Thumbnail format ID * "id" (optional, string) - Thumbnail format ID
* "url" * "url"
* "ext" (optional, string) - actual image extension if not given in URL
* "preference" (optional, int) - quality of the image * "preference" (optional, int) - quality of the image
* "width" (optional, int) * "width" (optional, int)
* "height" (optional, int) * "height" (optional, int)
@ -333,7 +334,7 @@ class InfoExtractor:
like_count: Number of positive ratings of the video like_count: Number of positive ratings of the video
dislike_count: Number of negative ratings of the video dislike_count: Number of negative ratings of the video
repost_count: Number of reposts of the video repost_count: Number of reposts of the video
average_rating: Average rating give by users, the scale used depends on the webpage average_rating: Average rating given by users, the scale used depends on the webpage
comment_count: Number of comments on the video comment_count: Number of comments on the video
comments: A list of comments, each with one or more of the following comments: A list of comments, each with one or more of the following
properties (all but one of text or html optional): properties (all but one of text or html optional):
@ -520,7 +521,7 @@ class InfoExtractor:
or _extract_from_webpage as necessary. While these are normally classmethods, or _extract_from_webpage as necessary. While these are normally classmethods,
_extract_from_webpage is allowed to be an instance method. _extract_from_webpage is allowed to be an instance method.
_extract_from_webpage may raise self.StopExtraction() to stop further _extract_from_webpage may raise self.StopExtraction to stop further
processing of the webpage and obtain exclusive rights to it. This is useful processing of the webpage and obtain exclusive rights to it. This is useful
when the extractor cannot reliably be matched using just the URL, when the extractor cannot reliably be matched using just the URL,
e.g. invidious/peertube instances e.g. invidious/peertube instances
@ -1027,7 +1028,7 @@ class InfoExtractor:
filename = sanitize_filename(f'{basen}.dump', restricted=True) filename = sanitize_filename(f'{basen}.dump', restricted=True)
# Working around MAX_PATH limitation on Windows (see # Working around MAX_PATH limitation on Windows (see
# http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx) # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
if compat_os_name == 'nt': if os.name == 'nt':
absfilepath = os.path.abspath(filename) absfilepath = os.path.abspath(filename)
if len(absfilepath) > 259: if len(absfilepath) > 259:
filename = fR'\\?\{absfilepath}' filename = fR'\\?\{absfilepath}'
@ -1408,6 +1409,13 @@ class InfoExtractor:
return None, None return None, None
self.write_debug(f'Using netrc for {netrc_machine} authentication') self.write_debug(f'Using netrc for {netrc_machine} authentication')
# compat: <=py3.10: netrc cannot parse tokens as empty strings, will return `""` instead
# Ref: https://github.com/yt-dlp/yt-dlp/issues/11413
# https://github.com/python/cpython/commit/15409c720be0503131713e3d3abc1acd0da07378
if sys.version_info < (3, 11):
return tuple(x if x != '""' else '' for x in info[::2])
return info[0], info[2] return info[0], info[2]
def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None): def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None):
@ -1570,7 +1578,9 @@ class InfoExtractor:
if default is not NO_DEFAULT: if default is not NO_DEFAULT:
fatal = False fatal = False
for mobj in re.finditer(JSON_LD_RE, html): for mobj in re.finditer(JSON_LD_RE, html):
json_ld_item = self._parse_json(mobj.group('json_ld'), video_id, fatal=fatal) json_ld_item = self._parse_json(
mobj.group('json_ld'), video_id, fatal=fatal,
errnote=False if default is not NO_DEFAULT else None)
for json_ld in variadic(json_ld_item): for json_ld in variadic(json_ld_item):
if isinstance(json_ld, dict): if isinstance(json_ld, dict):
yield json_ld yield json_ld
@ -3071,7 +3081,11 @@ class InfoExtractor:
url_pattern = stream.attrib['Url'] url_pattern = stream.attrib['Url']
stream_timescale = int_or_none(stream.get('TimeScale')) or timescale stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
stream_name = stream.get('Name') stream_name = stream.get('Name')
stream_language = stream.get('Language', 'und') # IsmFD expects ISO 639 Set 2 language codes (3-character length)
# See: https://github.com/yt-dlp/yt-dlp/issues/11356
stream_language = stream.get('Language') or 'und'
if len(stream_language) != 3:
stream_language = ISO639Utils.short2long(stream_language) or 'und'
for track in stream.findall('QualityLevel'): for track in stream.findall('QualityLevel'):
KNOWN_TAGS = {'255': 'AACL', '65534': 'EC-3'} KNOWN_TAGS = {'255': 'AACL', '65534': 'EC-3'}
fourcc = track.get('FourCC') or KNOWN_TAGS.get(track.get('AudioTag')) fourcc = track.get('FourCC') or KNOWN_TAGS.get(track.get('AudioTag'))
@ -3753,7 +3767,7 @@ class InfoExtractor:
""" Merge subtitle dictionaries, language by language. """ """ Merge subtitle dictionaries, language by language. """
if target is None: if target is None:
target = {} target = {}
for d in dicts: for d in filter(None, dicts):
for lang, subs in d.items(): for lang, subs in d.items():
target[lang] = cls._merge_subtitle_items(target.get(lang, []), subs) target[lang] = cls._merge_subtitle_items(target.get(lang, []), subs)
return target return target

View file

@ -12,6 +12,7 @@ from ..utils import (
parse_iso8601, parse_iso8601,
strip_or_none, strip_or_none,
try_get, try_get,
urljoin,
) )
@ -112,8 +113,7 @@ class CondeNastIE(InfoExtractor):
m_paths = re.finditer( m_paths = re.finditer(
r'(?s)<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]', webpage) r'(?s)<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]', webpage)
paths = orderedSet(m.group(1) for m in m_paths) paths = orderedSet(m.group(1) for m in m_paths)
build_url = lambda path: urllib.parse.urljoin(base_url, path) entries = [self.url_result(urljoin(base_url, path), 'CondeNast') for path in paths]
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
return self.playlist_result(entries, playlist_title=title) return self.playlist_result(entries, playlist_title=title)
def _extract_video_params(self, webpage, display_id): def _extract_video_params(self, webpage, display_id):

View file

@ -456,7 +456,7 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
}), }),
}), }),
**traverse_obj(metadata, { **traverse_obj(metadata, {
'duration': ('duration_ms', {lambda x: float_or_none(x, 1000)}), 'duration': ('duration_ms', {float_or_none(scale=1000)}),
'timestamp': ('upload_date', {parse_iso8601}), 'timestamp': ('upload_date', {parse_iso8601}),
'series': ('series_title', {str}), 'series': ('series_title', {str}),
'series_id': ('series_id', {str}), 'series_id': ('series_id', {str}),
@ -484,7 +484,7 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
}), }),
}), }),
**traverse_obj(metadata, { **traverse_obj(metadata, {
'duration': ('duration_ms', {lambda x: float_or_none(x, 1000)}), 'duration': ('duration_ms', {float_or_none(scale=1000)}),
'age_limit': ('maturity_ratings', -1, {parse_age_limit}), 'age_limit': ('maturity_ratings', -1, {parse_age_limit}),
}), }),
} }

View file

@ -1,14 +1,27 @@
import json
import re import re
import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import orderedSet from .ninecninemedia import NineCNineMediaIE
from ..utils import extract_attributes, orderedSet
from ..utils.traversal import find_element, traverse_obj
class CTVNewsIE(InfoExtractor): class CTVNewsIE(InfoExtractor):
_VALID_URL = r'https?://(?:.+?\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)' _BASE_REGEX = r'https?://(?:[^.]+\.)?ctvnews\.ca/'
_VIDEO_ID_RE = r'(?P<id>\d{5,})'
_PLAYLIST_ID_RE = r'(?P<id>\d\.\d{5,})'
_VALID_URL = [
rf'{_BASE_REGEX}video/c{_VIDEO_ID_RE}',
rf'{_BASE_REGEX}video(?:-gallery)?/?\?clipId={_VIDEO_ID_RE}',
rf'{_BASE_REGEX}video/?\?(?:playlist|bin)Id={_PLAYLIST_ID_RE}',
rf'{_BASE_REGEX}(?!video/)[^?#]*?{_PLAYLIST_ID_RE}/?(?:$|[?#])',
rf'{_BASE_REGEX}(?!video/)[^?#]+\?binId={_PLAYLIST_ID_RE}',
]
_TESTS = [{ _TESTS = [{
'url': 'http://www.ctvnews.ca/video?clipId=901995', 'url': 'http://www.ctvnews.ca/video?clipId=901995',
'md5': '9b8624ba66351a23e0b6e1391971f9af', 'md5': 'b608f466c7fa24b9666c6439d766ab7e',
'info_dict': { 'info_dict': {
'id': '901995', 'id': '901995',
'ext': 'flv', 'ext': 'flv',
@ -16,6 +29,33 @@ class CTVNewsIE(InfoExtractor):
'description': 'md5:958dd3b4f5bbbf0ed4d045c790d89285', 'description': 'md5:958dd3b4f5bbbf0ed4d045c790d89285',
'timestamp': 1467286284, 'timestamp': 1467286284,
'upload_date': '20160630', 'upload_date': '20160630',
'categories': [],
'season_number': 0,
'season': 'Season 0',
'tags': [],
'series': 'CTV News National | Archive | Stories 2',
'season_id': '57981',
'thumbnail': r're:https?://.*\.jpg$',
'duration': 764.631,
},
}, {
'url': 'https://barrie.ctvnews.ca/video/c3030933-here_s-what_s-making-news-for-nov--15?binId=1272429',
'md5': '8b8c2b33c5c1803e3c26bc74ff8694d5',
'info_dict': {
'id': '3030933',
'ext': 'flv',
'title': 'Heres whats making news for Nov. 15',
'description': 'Here are the top stories were working on for CTV News at 11 for Nov. 15',
'thumbnail': 'http://images2.9c9media.com/image_asset/2021_2_22_a602e68e-1514-410e-a67a-e1f7cccbacab_png_2000x1125.jpg',
'season_id': '58104',
'season_number': 0,
'tags': [],
'season': 'Season 0',
'categories': [],
'series': 'CTV News Barrie',
'upload_date': '20241116',
'duration': 42.943,
'timestamp': 1731722452,
}, },
}, { }, {
'url': 'http://www.ctvnews.ca/video?playlistId=1.2966224', 'url': 'http://www.ctvnews.ca/video?playlistId=1.2966224',
@ -31,6 +71,72 @@ class CTVNewsIE(InfoExtractor):
'id': '1.2876780', 'id': '1.2876780',
}, },
'playlist_mincount': 100, 'playlist_mincount': 100,
}, {
'url': 'https://www.ctvnews.ca/it-s-been-23-years-since-toronto-called-in-the-army-after-a-major-snowstorm-1.5736957',
'info_dict':
{
'id': '1.5736957',
},
'playlist_mincount': 6,
}, {
'url': 'https://www.ctvnews.ca/business/respondents-to-bank-of-canada-questionnaire-largely-oppose-creating-a-digital-loonie-1.6665797',
'md5': '24bc4b88cdc17d8c3fc01dfc228ab72c',
'info_dict': {
'id': '2695026',
'ext': 'flv',
'season_id': '89852',
'series': 'From CTV News Channel',
'description': 'md5:796a985a23cacc7e1e2fafefd94afd0a',
'season': '2023',
'title': 'Bank of Canada asks public about digital currency',
'categories': [],
'tags': [],
'upload_date': '20230526',
'season_number': 2023,
'thumbnail': 'http://images2.9c9media.com/image_asset/2019_3_28_35f5afc3-10f6-4d92-b194-8b9a86f55c6a_png_1920x1080.jpg',
'timestamp': 1685105157,
'duration': 253.553,
},
}, {
'url': 'https://stox.ctvnews.ca/video-gallery?clipId=582589',
'md5': '135cc592df607d29dddc931f1b756ae2',
'info_dict': {
'id': '582589',
'ext': 'flv',
'categories': [],
'timestamp': 1427906183,
'season_number': 0,
'duration': 125.559,
'thumbnail': 'http://images2.9c9media.com/image_asset/2019_3_28_35f5afc3-10f6-4d92-b194-8b9a86f55c6a_png_1920x1080.jpg',
'series': 'CTV News Stox',
'description': 'CTV original footage of the rise and fall of the Berlin Wall.',
'title': 'Berlin Wall',
'season_id': '63817',
'season': 'Season 0',
'tags': [],
'upload_date': '20150401',
},
}, {
'url': 'https://ottawa.ctvnews.ca/features/regional-contact/regional-contact-archive?binId=1.1164587#3023759',
'md5': 'a14c0603557decc6531260791c23cc5e',
'info_dict': {
'id': '3023759',
'ext': 'flv',
'season_number': 2024,
'timestamp': 1731798000,
'season': '2024',
'episode': 'Episode 125',
'description': 'CTV News Ottawa at Six',
'duration': 2712.076,
'episode_number': 125,
'upload_date': '20241116',
'title': 'CTV News Ottawa at Six for Saturday, November 16, 2024',
'thumbnail': 'http://images2.9c9media.com/image_asset/2019_3_28_35f5afc3-10f6-4d92-b194-8b9a86f55c6a_png_1920x1080.jpg',
'categories': [],
'tags': [],
'series': 'CTV News Ottawa at Six',
'season_id': '92667',
},
}, { }, {
'url': 'http://www.ctvnews.ca/1.810401', 'url': 'http://www.ctvnews.ca/1.810401',
'only_matching': True, 'only_matching': True,
@ -42,29 +148,35 @@ class CTVNewsIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
def _ninecninemedia_url_result(self, clip_id):
return self.url_result(f'9c9media:ctvnews_web:{clip_id}', NineCNineMediaIE, clip_id)
def _real_extract(self, url): def _real_extract(self, url):
page_id = self._match_id(url) page_id = self._match_id(url)
def ninecninemedia_url_result(clip_id): if mobj := re.fullmatch(self._VIDEO_ID_RE, urllib.parse.urlparse(url).fragment):
return { page_id = mobj.group('id')
'_type': 'url_transparent',
'id': clip_id,
'url': f'9c9media:ctvnews_web:{clip_id}',
'ie_key': 'NineCNineMedia',
}
if page_id.isdigit(): if re.fullmatch(self._VIDEO_ID_RE, page_id):
return ninecninemedia_url_result(page_id) return self._ninecninemedia_url_result(page_id)
else:
webpage = self._download_webpage(f'http://www.ctvnews.ca/{page_id}', page_id, query={ webpage = self._download_webpage(f'https://www.ctvnews.ca/{page_id}', page_id, query={
'ot': 'example.AjaxPageLayout.ot', 'ot': 'example.AjaxPageLayout.ot',
'maxItemsPerPage': 1000000, 'maxItemsPerPage': 1000000,
}) })
entries = [ninecninemedia_url_result(clip_id) for clip_id in orderedSet( entries = [self._ninecninemedia_url_result(clip_id)
re.findall(r'clip\.id\s*=\s*(\d+);', webpage))] for clip_id in orderedSet(re.findall(r'clip\.id\s*=\s*(\d+);', webpage))]
if not entries: if not entries:
webpage = self._download_webpage(url, page_id) webpage = self._download_webpage(url, page_id)
if 'getAuthStates("' in webpage: if 'getAuthStates("' in webpage:
entries = [ninecninemedia_url_result(clip_id) for clip_id in entries = [self._ninecninemedia_url_result(clip_id) for clip_id in
self._search_regex(r'getAuthStates\("([\d+,]+)"', webpage, 'clip ids').split(',')] self._search_regex(r'getAuthStates\("([\d+,]+)"', webpage, 'clip ids').split(',')]
return self.playlist_result(entries, page_id) else:
entries = [
self._ninecninemedia_url_result(clip_id) for clip_id in
traverse_obj(webpage, (
{find_element(tag='jasper-player-container', html=True)},
{extract_attributes}, 'axis-ids', {json.loads}, ..., 'axisId', {str}))
]
return self.playlist_result(entries, page_id)

View file

@ -10,11 +10,14 @@ from ..utils import (
OnDemandPagedList, OnDemandPagedList,
age_restricted, age_restricted,
clean_html, clean_html,
extract_attributes,
int_or_none, int_or_none,
traverse_obj, traverse_obj,
try_get, try_get,
unescapeHTML, unescapeHTML,
unsmuggle_url, unsmuggle_url,
update_url,
url_or_none,
urlencode_postdata, urlencode_postdata,
) )
@ -98,12 +101,20 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
class DailymotionIE(DailymotionBaseInfoExtractor): class DailymotionIE(DailymotionBaseInfoExtractor):
_VALID_URL = r'''(?ix) _VALID_URL = r'''(?ix)
https?:// https?://
(?:
dai\.ly/|
(?: (?:
(?:(?:www|touch|geo)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:(?:embed|swf|\#)/)|player(?:/\w+)?\.html\?)?video|swf)| (?:(?:www|touch|geo)\.)?dailymotion\.[a-z]{2,3}|
(?:www\.)?lequipe\.fr/video (?:www\.)?lequipe\.fr
)/
(?:
swf/(?!video)|
(?:(?:crawler|embed|swf)/)?video/|
player(?:/[\da-z]+)?\.html\?(?:video|(?P<is_playlist>playlist))=
) )
[/=](?P<id>[^/?_&]+)(?:.+?\bplaylist=(?P<playlist_id>x[0-9a-z]+))? )
''' (?P<id>[^/?_&#]+)(?:[\w-]*\?playlist=(?P<playlist_id>x[0-9a-z]+))?
'''
IE_NAME = 'dailymotion' IE_NAME = 'dailymotion'
_EMBED_REGEX = [r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1'] _EMBED_REGEX = [r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1']
_TESTS = [{ _TESTS = [{
@ -123,7 +134,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'tags': ['hollywood', 'celeb', 'celebrity', 'movies', 'red carpet'], 'tags': ['hollywood', 'celeb', 'celebrity', 'movies', 'red carpet'],
'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/K456B1aXqIx58LKWQ/x1080', 'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/K456B1cmt4ZcZ9KiM/x1080',
}, },
}, { }, {
'url': 'https://geo.dailymotion.com/player.html?video=x89eyek&mute=true', 'url': 'https://geo.dailymotion.com/player.html?video=x89eyek&mute=true',
@ -142,7 +153,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
'view_count': int, 'view_count': int,
'like_count': int, 'like_count': int,
'tags': ['en_quete_d_esprit'], 'tags': ['en_quete_d_esprit'],
'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/Tncwi1YNg_RUl7ueu/x1080', 'thumbnail': r're:https://(?:s[12]\.)dmcdn\.net/v/Tncwi1clTH6StrxMP/x1080',
}, },
}, { }, {
'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames', 'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
@ -217,6 +228,66 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
}, { }, {
'url': 'https://geo.dailymotion.com/player/xakln.html?video=x8mjju4&customConfig%5BcustomParams%5D=%2Ffr-fr%2Ftennis%2Fwimbledon-mens-singles%2Farticles-video', 'url': 'https://geo.dailymotion.com/player/xakln.html?video=x8mjju4&customConfig%5BcustomParams%5D=%2Ffr-fr%2Ftennis%2Fwimbledon-mens-singles%2Farticles-video',
'only_matching': True, 'only_matching': True,
}, { # playlist-only
'url': 'https://geo.dailymotion.com/player/xf7zn.html?playlist=x7wdsj',
'only_matching': True,
}, {
'url': 'https://geo.dailymotion.com/player/xmyye.html?video=x93blhi',
'only_matching': True,
}, {
'url': 'https://www.dailymotion.com/crawler/video/x8u4owg',
'only_matching': True,
}, {
'url': 'https://www.dailymotion.com/embed/video/x8u4owg',
'only_matching': True,
}, {
'url': 'https://dai.ly/x94cnnk',
'only_matching': True,
}]
_WEBPAGE_TESTS = [{
# https://geo.dailymotion.com/player/xmyye.html?video=x93blhi
'url': 'https://www.financialounge.com/video/2024/08/01/borse-europee-in-rosso-dopo-la-fed-a-milano-volano-mediobanca-e-tim-edizione-del-1-agosto/',
'info_dict': {
'id': 'x93blhi',
'ext': 'mp4',
'title': 'OnAir - 01/08/24',
'description': '',
'duration': 217,
'timestamp': 1722505658,
'upload_date': '20240801',
'uploader': 'Financialounge',
'uploader_id': 'x2vtgmm',
'age_limit': 0,
'tags': [],
'view_count': int,
'like_count': int,
},
}, {
# https://geo.dailymotion.com/player/xf7zn.html?playlist=x7wdsj
'url': 'https://www.cycleworld.com/blogs/ask-kevin/ducati-continues-to-evolve-with-v4/',
'info_dict': {
'id': 'x7wdsj',
},
'playlist_mincount': 50,
}, {
# https://www.dailymotion.com/crawler/video/x8u4owg
'url': 'https://www.leparisien.fr/environnement/video-le-veloto-la-voiture-a-pedales-qui-aimerait-se-faire-une-place-sur-les-routes-09-03-2024-KCYMCPM4WFHJXMSKBUI66UNFPU.php',
'info_dict': {
'id': 'x8u4owg',
'ext': 'mp4',
'like_count': int,
'uploader': 'Le Parisien',
'thumbnail': 'https://www.leparisien.fr/resizer/ho_GwveeYftNkLwg_cEta--5Bv4=/1200x675/cloudfront-eu-central-1.images.arcpublishing.com/leparisien/BFXJNEBN75EUNHGYJLORUC3TX4.jpg',
'upload_date': '20240309',
'view_count': int,
'timestamp': 1709997866,
'age_limit': 0,
'uploader_id': 'x32f7b',
'title': 'VIDÉO. Le «\xa0véloto\xa0», la voiture à pédales qui aimerait se faire une place sur les routes',
'duration': 428.0,
'description': 'À bord du « véloto », lalternative à la voiture pour la campagne',
'tags': ['biclou', 'vélo', 'véloto', 'campagne', 'voiture', 'environnement', 'véhicules intermédiaires'],
},
}] }]
_GEO_BYPASS = False _GEO_BYPASS = False
_COMMON_MEDIA_FIELDS = '''description _COMMON_MEDIA_FIELDS = '''description
@ -232,16 +303,35 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
for mobj in re.finditer( for mobj in re.finditer(
r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P<id>[0-9a-zA-Z]+).+?}\s*\);', webpage): r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P<id>[0-9a-zA-Z]+).+?}\s*\);', webpage):
yield from 'https://www.dailymotion.com/embed/video/' + mobj.group('id') yield from 'https://www.dailymotion.com/embed/video/' + mobj.group('id')
for mobj in re.finditer(
r'(?s)<script [^>]*\bsrc=(["\'])(?:https?:)?//[\w-]+\.dailymotion\.com/player/(?:(?!\1).)+\1[^>]*>', webpage):
attrs = extract_attributes(mobj.group(0))
player_url = url_or_none(attrs.get('src'))
if not player_url:
continue
player_url = player_url.replace('.js', '.html')
if player_url.startswith('//'):
player_url = f'https:{player_url}'
if video_id := attrs.get('data-video'):
query_string = f'video={video_id}'
elif playlist_id := attrs.get('data-playlist'):
query_string = f'playlist={playlist_id}'
else:
continue
yield update_url(player_url, query=query_string)
def _real_extract(self, url): def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url) url, smuggled_data = unsmuggle_url(url)
video_id, playlist_id = self._match_valid_url(url).groups() video_id, is_playlist, playlist_id = self._match_valid_url(url).group('id', 'is_playlist', 'playlist_id')
if playlist_id: if is_playlist: # We matched the playlist query param as video_id
if self._yes_playlist(playlist_id, video_id): playlist_id = video_id
return self.url_result( video_id = None
'http://www.dailymotion.com/playlist/' + playlist_id,
'DailymotionPlaylist', playlist_id) if self._yes_playlist(playlist_id, video_id):
return self.url_result(
f'http://www.dailymotion.com/playlist/{playlist_id}',
'DailymotionPlaylist', playlist_id)
password = self.get_param('videopassword') password = self.get_param('videopassword')
media = self._call_api( media = self._call_api(
@ -282,6 +372,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
title = metadata['title'] title = metadata['title']
is_live = media.get('isOnAir') is_live = media.get('isOnAir')
formats = [] formats = []
subtitles = {}
for quality, media_list in metadata['qualities'].items(): for quality, media_list in metadata['qualities'].items():
for m in media_list: for m in media_list:
media_url = m.get('url') media_url = m.get('url')
@ -289,8 +381,10 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
if not media_url or media_type == 'application/vnd.lumberjack.manifest': if not media_url or media_type == 'application/vnd.lumberjack.manifest':
continue continue
if media_type == 'application/x-mpegURL': if media_type == 'application/x-mpegURL':
formats.extend(self._extract_m3u8_formats( fmt, subs = self._extract_m3u8_formats_and_subtitles(
media_url, video_id, 'mp4', live=is_live, m3u8_id='hls', fatal=False)) media_url, video_id, 'mp4', live=is_live, m3u8_id='hls', fatal=False)
formats.extend(fmt)
self._merge_subtitles(subs, target=subtitles)
else: else:
f = { f = {
'url': media_url, 'url': media_url,
@ -310,20 +404,18 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
if not f.get('fps') and f['format_id'].endswith('@60'): if not f.get('fps') and f['format_id'].endswith('@60'):
f['fps'] = 60 f['fps'] = 60
subtitles = {}
subtitles_data = try_get(metadata, lambda x: x['subtitles']['data'], dict) or {} subtitles_data = try_get(metadata, lambda x: x['subtitles']['data'], dict) or {}
for subtitle_lang, subtitle in subtitles_data.items(): for subtitle_lang, subtitle in subtitles_data.items():
subtitles[subtitle_lang] = [{ subtitles[subtitle_lang] = [{
'url': subtitle_url, 'url': subtitle_url,
} for subtitle_url in subtitle.get('urls', [])] } for subtitle_url in subtitle.get('urls', [])]
thumbnails = [] thumbnails = traverse_obj(metadata, (
for height, poster_url in metadata.get('posters', {}).items(): ('posters', 'thumbnails'), {dict.items}, lambda _, v: url_or_none(v[1]), {
thumbnails.append({ 'height': (0, {int_or_none}),
'height': int_or_none(height), 'id': (0, {str}),
'id': height, 'url': 1,
'url': poster_url, }))
})
owner = metadata.get('owner') or {} owner = metadata.get('owner') or {}
stats = media.get('stats') or {} stats = media.get('stats') or {}
@ -447,7 +539,7 @@ class DailymotionSearchIE(DailymotionPlaylistBaseIE):
class DailymotionUserIE(DailymotionPlaylistBaseIE): class DailymotionUserIE(DailymotionPlaylistBaseIE):
IE_NAME = 'dailymotion:user' IE_NAME = 'dailymotion:user'
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist|search)/)(?:(?:old/)?user/)?(?P<id>[^/?#]+)' _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist|search|crawler)/)(?:(?:old/)?user/)?(?P<id>[^/?#]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.dailymotion.com/user/nqtv', 'url': 'https://www.dailymotion.com/user/nqtv',
'info_dict': { 'info_dict': {

View file

@ -40,7 +40,7 @@ class DangalPlayBaseIE(InfoExtractor):
'id': ('content_id', {str}), 'id': ('content_id', {str}),
'title': ('display_title', {str}), 'title': ('display_title', {str}),
'episode': ('title', {str}), 'episode': ('title', {str}),
'series': ('show_name', {str}, {lambda x: x or None}), 'series': ('show_name', {str}, filter),
'series_id': ('catalog_id', {str}), 'series_id': ('catalog_id', {str}),
'duration': ('duration', {int_or_none}), 'duration': ('duration', {int_or_none}),
'release_timestamp': ('release_date_uts', {int_or_none}), 'release_timestamp': ('release_date_uts', {int_or_none}),

View file

@ -1,7 +1,10 @@
import time
from .common import InfoExtractor from .common import InfoExtractor
from ..networking.exceptions import HTTPError from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
jwt_decode_hs256,
parse_codecs, parse_codecs,
try_get, try_get,
url_or_none, url_or_none,
@ -13,9 +16,6 @@ from ..utils.traversal import traverse_obj
class DigitalConcertHallIE(InfoExtractor): class DigitalConcertHallIE(InfoExtractor):
IE_DESC = 'DigitalConcertHall extractor' IE_DESC = 'DigitalConcertHall extractor'
_VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P<language>[a-z]+)/(?P<type>film|concert|work)/(?P<id>[0-9]+)-?(?P<part>[0-9]+)?' _VALID_URL = r'https?://(?:www\.)?digitalconcerthall\.com/(?P<language>[a-z]+)/(?P<type>film|concert|work)/(?P<id>[0-9]+)-?(?P<part>[0-9]+)?'
_OAUTH_URL = 'https://api.digitalconcerthall.com/v2/oauth2/token'
_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15'
_ACCESS_TOKEN = None
_NETRC_MACHINE = 'digitalconcerthall' _NETRC_MACHINE = 'digitalconcerthall'
_TESTS = [{ _TESTS = [{
'note': 'Playlist with only one video', 'note': 'Playlist with only one video',
@ -69,59 +69,157 @@ class DigitalConcertHallIE(InfoExtractor):
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
'playlist_count': 1, 'playlist_count': 1,
}] }]
_LOGIN_HINT = ('Use --username token --password ACCESS_TOKEN where ACCESS_TOKEN '
'is the "access_token_production" from your browser local storage')
_REFRESH_HINT = 'or else use a "refresh_token" with --username refresh --password REFRESH_TOKEN'
_OAUTH_URL = 'https://api.digitalconcerthall.com/v2/oauth2/token'
_CLIENT_ID = 'dch.webapp'
_CLIENT_SECRET = '2ySLN+2Fwb'
_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15'
_OAUTH_HEADERS = {
'Accept': 'application/json',
'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
'Origin': 'https://www.digitalconcerthall.com',
'Referer': 'https://www.digitalconcerthall.com/',
'User-Agent': _USER_AGENT,
}
_access_token = None
_access_token_expiry = 0
_refresh_token = None
def _perform_login(self, username, password): @property
login_token = self._download_json( def _access_token_is_expired(self):
self._OAUTH_URL, return self._access_token_expiry - 30 <= int(time.time())
None, 'Obtaining token', errnote='Unable to obtain token', data=urlencode_postdata({
def _set_access_token(self, value):
self._access_token = value
self._access_token_expiry = traverse_obj(value, ({jwt_decode_hs256}, 'exp', {int})) or 0
def _cache_tokens(self, /):
self.cache.store(self._NETRC_MACHINE, 'tokens', {
'access_token': self._access_token,
'refresh_token': self._refresh_token,
})
def _fetch_new_tokens(self, invalidate=False):
if invalidate:
self.report_warning('Access token has been invalidated')
self._set_access_token(None)
if not self._access_token_is_expired:
return
if not self._refresh_token:
self._set_access_token(None)
self._cache_tokens()
raise ExtractorError(
'Access token has expired or been invalidated. '
'Get a new "access_token_production" value from your browser '
f'and try again, {self._REFRESH_HINT}', expected=True)
# If we only have a refresh token, we need a temporary "initial token" for the refresh flow
bearer_token = self._access_token or self._download_json(
self._OAUTH_URL, None, 'Obtaining initial token', 'Unable to obtain initial token',
data=urlencode_postdata({
'affiliate': 'none', 'affiliate': 'none',
'grant_type': 'device', 'grant_type': 'device',
'device_vendor': 'unknown', 'device_vendor': 'unknown',
# device_model 'Safari' gets split streams of 4K/HEVC video and lossless/FLAC audio # device_model 'Safari' gets split streams of 4K/HEVC video and lossless/FLAC audio,
'device_model': 'unknown' if self._configuration_arg('prefer_combined_hls') else 'Safari', # but this is no longer effective since actual login is not possible anymore
'app_id': 'dch.webapp', 'device_model': 'unknown',
'app_id': self._CLIENT_ID,
'app_distributor': 'berlinphil', 'app_distributor': 'berlinphil',
'app_version': '1.84.0', 'app_version': '1.95.0',
'client_secret': '2ySLN+2Fwb', 'client_secret': self._CLIENT_SECRET,
}), headers={ }), headers=self._OAUTH_HEADERS)['access_token']
'Accept': 'application/json',
'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
'User-Agent': self._USER_AGENT,
})['access_token']
try: try:
login_response = self._download_json( response = self._download_json(
self._OAUTH_URL, self._OAUTH_URL, None, 'Refreshing token', 'Unable to refresh token',
None, note='Logging in', errnote='Unable to login', data=urlencode_postdata({ data=urlencode_postdata({
'grant_type': 'password', 'grant_type': 'refresh_token',
'username': username, 'refresh_token': self._refresh_token,
'password': password, 'client_id': self._CLIENT_ID,
'client_secret': self._CLIENT_SECRET,
}), headers={ }), headers={
'Accept': 'application/json', **self._OAUTH_HEADERS,
'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8', 'Authorization': f'Bearer {bearer_token}',
'Referer': 'https://www.digitalconcerthall.com',
'Authorization': f'Bearer {login_token}',
'User-Agent': self._USER_AGENT,
}) })
except ExtractorError as error: except ExtractorError as e:
if isinstance(error.cause, HTTPError) and error.cause.status == 401: if isinstance(e.cause, HTTPError) and e.cause.status == 401:
raise ExtractorError('Invalid username or password', expected=True) self._set_access_token(None)
self._refresh_token = None
self._cache_tokens()
raise ExtractorError('Your tokens have been invalidated', expected=True)
raise raise
self._ACCESS_TOKEN = login_response['access_token']
self._set_access_token(response['access_token'])
if refresh_token := traverse_obj(response, ('refresh_token', {str})):
self.write_debug('New refresh token granted')
self._refresh_token = refresh_token
self._cache_tokens()
def _perform_login(self, username, password):
self.report_login()
if username == 'refresh':
self._refresh_token = password
self._fetch_new_tokens()
if username == 'token':
if not traverse_obj(password, {jwt_decode_hs256}):
raise ExtractorError(
f'The access token passed to yt-dlp is not valid. {self._LOGIN_HINT}', expected=True)
self._set_access_token(password)
self._cache_tokens()
if username in ('refresh', 'token'):
if self.get_param('cachedir') is not False:
token_type = 'access' if username == 'token' else 'refresh'
self.to_screen(f'Your {token_type} token has been cached to disk. To use the cached '
'token next time, pass --username cache along with any password')
return
if username != 'cache':
raise ExtractorError(
'Login with username and password is no longer supported '
f'for this site. {self._LOGIN_HINT}, {self._REFRESH_HINT}', expected=True)
# Try cached access_token
cached_tokens = self.cache.load(self._NETRC_MACHINE, 'tokens', default={})
self._set_access_token(cached_tokens.get('access_token'))
self._refresh_token = cached_tokens.get('refresh_token')
if not self._access_token_is_expired:
return
# Try cached refresh_token
self._fetch_new_tokens(invalidate=True)
def _real_initialize(self): def _real_initialize(self):
if not self._ACCESS_TOKEN: if not self._access_token:
self.raise_login_required(method='password') self.raise_login_required(
'All content on this site is only available for registered users. '
f'{self._LOGIN_HINT}, {self._REFRESH_HINT}', method=None)
def _entries(self, items, language, type_, **kwargs): def _entries(self, items, language, type_, **kwargs):
for item in items: for item in items:
video_id = item['id'] video_id = item['id']
stream_info = self._download_json(
self._proto_relative_url(item['_links']['streams']['href']), video_id, headers={ for should_retry in (True, False):
'Accept': 'application/json', self._fetch_new_tokens(invalidate=not should_retry)
'Authorization': f'Bearer {self._ACCESS_TOKEN}', try:
'Accept-Language': language, stream_info = self._download_json(
'User-Agent': self._USER_AGENT, self._proto_relative_url(item['_links']['streams']['href']), video_id, headers={
}) 'Accept': 'application/json',
'Authorization': f'Bearer {self._access_token}',
'Accept-Language': language,
'User-Agent': self._USER_AGENT,
})
break
except ExtractorError as error:
if should_retry and isinstance(error.cause, HTTPError) and error.cause.status == 401:
continue
raise
formats = [] formats = []
for m3u8_url in traverse_obj(stream_info, ('channel', ..., 'stream', ..., 'url', {url_or_none})): for m3u8_url in traverse_obj(stream_info, ('channel', ..., 'stream', ..., 'url', {url_or_none})):
@ -157,7 +255,6 @@ class DigitalConcertHallIE(InfoExtractor):
'Accept': 'application/json', 'Accept': 'application/json',
'Accept-Language': language, 'Accept-Language': language,
'User-Agent': self._USER_AGENT, 'User-Agent': self._USER_AGENT,
'Authorization': f'Bearer {self._ACCESS_TOKEN}',
}) })
videos = [vid_info] if type_ == 'film' else traverse_obj(vid_info, ('_embedded', ..., ...)) videos = [vid_info] if type_ == 'film' else traverse_obj(vid_info, ('_embedded', ..., ...))

View file

@ -207,7 +207,7 @@ class ERRJupiterIE(InfoExtractor):
**traverse_obj(data, { **traverse_obj(data, {
'title': ('heading', {str}), 'title': ('heading', {str}),
'alt_title': ('subHeading', {str}), 'alt_title': ('subHeading', {str}),
'description': (('lead', 'body'), {clean_html}, {lambda x: x or None}), 'description': (('lead', 'body'), {clean_html}, filter),
'timestamp': ('created', {int_or_none}), 'timestamp': ('created', {int_or_none}),
'modified_timestamp': ('updated', {int_or_none}), 'modified_timestamp': ('updated', {int_or_none}),
'release_timestamp': (('scheduleStart', 'publicStart'), {int_or_none}), 'release_timestamp': (('scheduleStart', 'publicStart'), {int_or_none}),

View file

@ -563,12 +563,13 @@ class FacebookIE(InfoExtractor):
return extract_video_data(try_get( return extract_video_data(try_get(
js_data, lambda x: x['jsmods']['instances'], list) or []) js_data, lambda x: x['jsmods']['instances'], list) or [])
def extract_dash_manifest(video, formats): def extract_dash_manifest(vid_data, formats, mpd_url=None):
dash_manifest = traverse_obj(video, 'dash_manifest', 'playlist', expected_type=str) dash_manifest = traverse_obj(
vid_data, 'dash_manifest', 'playlist', 'dash_manifest_xml_string', 'manifest_xml', expected_type=str)
if dash_manifest: if dash_manifest:
formats.extend(self._parse_mpd_formats( formats.extend(self._parse_mpd_formats(
compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)), compat_etree_fromstring(urllib.parse.unquote_plus(dash_manifest)),
mpd_url=video.get('dash_manifest_url'))) mpd_url=url_or_none(vid_data.get('dash_manifest_url')) or mpd_url))
def process_formats(info): def process_formats(info):
# Downloads with browser's User-Agent are rate limited. Working around # Downloads with browser's User-Agent are rate limited. Working around
@ -618,16 +619,20 @@ class FacebookIE(InfoExtractor):
video = video['creation_story'] video = video['creation_story']
video['owner'] = traverse_obj(video, ('short_form_video_context', 'video_owner')) video['owner'] = traverse_obj(video, ('short_form_video_context', 'video_owner'))
video.update(reel_info) video.update(reel_info)
formats = [] formats = []
q = qualities(['sd', 'hd']) q = qualities(['sd', 'hd'])
# Legacy formats extraction
fmt_data = traverse_obj(video, ('videoDeliveryLegacyFields', {dict})) or video
for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'), for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'),
('playable_url_dash', ''), ('browser_native_hd_url', 'hd'), ('playable_url_dash', ''), ('browser_native_hd_url', 'hd'),
('browser_native_sd_url', 'sd')): ('browser_native_sd_url', 'sd')):
playable_url = video.get(key) playable_url = fmt_data.get(key)
if not playable_url: if not playable_url:
continue continue
if determine_ext(playable_url) == 'mpd': if determine_ext(playable_url) == 'mpd':
formats.extend(self._extract_mpd_formats(playable_url, video_id)) formats.extend(self._extract_mpd_formats(playable_url, video_id, fatal=False))
else: else:
formats.append({ formats.append({
'format_id': format_id, 'format_id': format_id,
@ -635,7 +640,29 @@ class FacebookIE(InfoExtractor):
'quality': q(format_id) - 3, 'quality': q(format_id) - 3,
'url': playable_url, 'url': playable_url,
}) })
extract_dash_manifest(video, formats) extract_dash_manifest(fmt_data, formats)
# New videoDeliveryResponse formats extraction
fmt_data = traverse_obj(video, ('videoDeliveryResponseFragment', 'videoDeliveryResponseResult'))
mpd_urls = traverse_obj(fmt_data, ('dash_manifest_urls', ..., 'manifest_url', {url_or_none}))
dash_manifests = traverse_obj(fmt_data, ('dash_manifests', lambda _, v: v['manifest_xml']))
for idx, dash_manifest in enumerate(dash_manifests):
extract_dash_manifest(dash_manifest, formats, mpd_url=traverse_obj(mpd_urls, idx))
if not dash_manifests:
# Only extract from MPD URLs if the manifests are not already provided
for mpd_url in mpd_urls:
formats.extend(self._extract_mpd_formats(mpd_url, video_id, fatal=False))
for prog_fmt in traverse_obj(fmt_data, ('progressive_urls', lambda _, v: v['progressive_url'])):
format_id = traverse_obj(prog_fmt, ('metadata', 'quality', {str.lower}))
formats.append({
'format_id': format_id,
# sd, hd formats w/o resolution info should be deprioritized below DASH
'quality': q(format_id) - 3,
'url': prog_fmt['progressive_url'],
})
for m3u8_url in traverse_obj(fmt_data, ('hls_playlist_urls', ..., 'hls_playlist_url', {url_or_none})):
formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', fatal=False, m3u8_id='hls'))
if not formats: if not formats:
# Do not append false positive entry w/o any formats # Do not append false positive entry w/o any formats
return return

View file

@ -3,7 +3,7 @@ from .nexx import NexxIE
class FunkIE(InfoExtractor): class FunkIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.|origin\.)?funk\.net/(?:channel|playlist)/[^/]+/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)' _VALID_URL = r'https?://(?:(?:www|origin|play)\.)?funk\.net/(?:channel|playlist)/[^/?#]+/(?P<display_id>[0-9a-z-]+)-(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.funk.net/channel/ba-793/die-lustigsten-instrumente-aus-dem-internet-teil-2-1155821', 'url': 'https://www.funk.net/channel/ba-793/die-lustigsten-instrumente-aus-dem-internet-teil-2-1155821',
'md5': '8610449476156f338761a75391b0017d', 'md5': '8610449476156f338761a75391b0017d',
@ -27,6 +27,9 @@ class FunkIE(InfoExtractor):
}, { }, {
'url': 'https://www.funk.net/playlist/neuesteVideos/kameras-auf-dem-fusion-festival-1618699', 'url': 'https://www.funk.net/playlist/neuesteVideos/kameras-auf-dem-fusion-festival-1618699',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://play.funk.net/playlist/neuesteVideos/george-floyd-wenn-die-polizei-toetet-der-fall-2004391',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View file

@ -0,0 +1,141 @@
import json
from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import (
ExtractorError,
clean_html,
int_or_none,
join_nonempty,
parse_iso8601,
str_or_none,
url_or_none,
)
from ..utils.traversal import traverse_obj
class GameDevTVDashboardIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?gamedev\.tv/dashboard/courses/(?P<course_id>\d+)(?:/(?P<lecture_id>\d+))?'
_NETRC_MACHINE = 'gamedevtv'
_TESTS = [{
'url': 'https://www.gamedev.tv/dashboard/courses/25',
'info_dict': {
'id': '25',
'title': 'Complete Blender Creator 3: Learn 3D Modelling for Beginners',
'tags': ['blender', 'course', 'all', 'box modelling', 'sculpting'],
'categories': ['Blender', '3D Art'],
'thumbnail': 'https://gamedev-files.b-cdn.net/courses/qisc9pmu1jdc.jpg',
'upload_date': '20220516',
'timestamp': 1652694420,
'modified_date': '20241027',
'modified_timestamp': 1730049658,
},
'playlist_count': 100,
}, {
'url': 'https://www.gamedev.tv/dashboard/courses/63/2279',
'info_dict': {
'id': 'df04f4d8-68a4-4756-a71b-9ca9446c3a01',
'ext': 'mp4',
'modified_timestamp': 1701695752,
'upload_date': '20230504',
'episode': 'MagicaVoxel Community Course Introduction',
'series_id': '63',
'title': 'MagicaVoxel Community Course Introduction',
'timestamp': 1683195397,
'modified_date': '20231204',
'categories': ['3D Art', 'MagicaVoxel'],
'season': 'MagicaVoxel Community Course',
'tags': ['MagicaVoxel', 'all', 'course'],
'series': 'MagicaVoxel 3D Art Mini Course',
'duration': 1405,
'episode_number': 1,
'season_number': 1,
'season_id': '219',
'description': 'md5:a378738c5bbec1c785d76c067652d650',
'display_id': '63-219-2279',
'alt_title': '1_CC_MVX MagicaVoxel Community Course Introduction.mp4',
'thumbnail': 'https://vz-23691c65-6fa.b-cdn.net/df04f4d8-68a4-4756-a71b-9ca9446c3a01/thumbnail.jpg',
},
}]
_API_HEADERS = {}
def _perform_login(self, username, password):
try:
response = self._download_json(
'https://api.gamedev.tv/api/students/login', None, 'Logging in',
headers={'Content-Type': 'application/json'},
data=json.dumps({
'email': username,
'password': password,
'cart_items': [],
}).encode())
except ExtractorError as e:
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
raise ExtractorError('Invalid username/password', expected=True)
raise
self._API_HEADERS['Authorization'] = f'{response["token_type"]} {response["access_token"]}'
def _real_initialize(self):
if not self._API_HEADERS.get('Authorization'):
self.raise_login_required(
'This content is only available with purchase', method='password')
def _entries(self, data, course_id, course_info, selected_lecture):
for section in traverse_obj(data, ('sections', ..., {dict})):
section_info = traverse_obj(section, {
'season_id': ('id', {str_or_none}),
'season': ('title', {str}),
'season_number': ('order', {int_or_none}),
})
for lecture in traverse_obj(section, ('lectures', lambda _, v: url_or_none(v['video']['playListUrl']))):
if selected_lecture and str(lecture.get('id')) != selected_lecture:
continue
display_id = join_nonempty(course_id, section_info.get('season_id'), lecture.get('id'))
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
lecture['video']['playListUrl'], display_id, 'mp4', m3u8_id='hls')
yield {
**course_info,
**section_info,
'id': display_id, # fallback
'display_id': display_id,
'formats': formats,
'subtitles': subtitles,
'series': course_info.get('title'),
'series_id': course_id,
**traverse_obj(lecture, {
'id': ('video', 'guid', {str}),
'title': ('title', {str}),
'alt_title': ('video', 'title', {str}),
'description': ('description', {clean_html}),
'episode': ('title', {str}),
'episode_number': ('order', {int_or_none}),
'duration': ('video', 'duration_in_sec', {int_or_none}),
'timestamp': ('video', 'created_at', {parse_iso8601}),
'modified_timestamp': ('video', 'updated_at', {parse_iso8601}),
'thumbnail': ('video', 'thumbnailUrl', {url_or_none}),
}),
}
def _real_extract(self, url):
course_id, lecture_id = self._match_valid_url(url).group('course_id', 'lecture_id')
data = self._download_json(
f'https://api.gamedev.tv/api/courses/my/{course_id}', course_id,
headers=self._API_HEADERS)['data']
course_info = traverse_obj(data, {
'title': ('title', {str}),
'tags': ('tags', ..., 'name', {str}),
'categories': ('categories', ..., 'title', {str}),
'timestamp': ('created_at', {parse_iso8601}),
'modified_timestamp': ('updated_at', {parse_iso8601}),
'thumbnail': ('image', {url_or_none}),
})
entries = self._entries(data, course_id, course_info, lecture_id)
if lecture_id:
lecture = next(entries, None)
if not lecture:
raise ExtractorError('Lecture not found')
return lecture
return self.playlist_result(entries, course_id, **course_info)

View file

@ -8,6 +8,8 @@ from .common import InfoExtractor
from .commonprotocols import RtmpIE from .commonprotocols import RtmpIE
from .youtube import YoutubeIE from .youtube import YoutubeIE
from ..compat import compat_etree_fromstring from ..compat import compat_etree_fromstring
from ..cookies import LenientSimpleCookie
from ..networking.exceptions import HTTPError
from ..networking.impersonate import ImpersonateTarget from ..networking.impersonate import ImpersonateTarget
from ..utils import ( from ..utils import (
KNOWN_EXTENSIONS, KNOWN_EXTENSIONS,
@ -2374,10 +2376,9 @@ class GenericIE(InfoExtractor):
else: else:
video_id = self._generic_id(url) video_id = self._generic_id(url)
# Try to impersonate a web-browser by default if possible # Do not impersonate by default; see https://github.com/yt-dlp/yt-dlp/issues/11335
# Skip impersonation if not available to omit the warning impersonate = self._configuration_arg('impersonate', ['false'])
impersonate = self._configuration_arg('impersonate', ['']) if 'false' in impersonate:
if 'false' in impersonate or not self._downloader._impersonate_target_available(ImpersonateTarget()):
impersonate = None impersonate = None
# Some webservers may serve compressed content of rather big size (e.g. gzipped flac) # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
@ -2388,10 +2389,29 @@ class GenericIE(InfoExtractor):
# to accept raw bytes and being able to download only a chunk. # to accept raw bytes and being able to download only a chunk.
# It may probably better to solve this by checking Content-Type for application/octet-stream # It may probably better to solve this by checking Content-Type for application/octet-stream
# after a HEAD request, but not sure if we can rely on this. # after a HEAD request, but not sure if we can rely on this.
full_response = self._request_webpage(url, video_id, headers=filter_dict({ try:
'Accept-Encoding': 'identity', full_response = self._request_webpage(url, video_id, headers=filter_dict({
'Referer': smuggled_data.get('referer'), 'Accept-Encoding': 'identity',
}), impersonate=impersonate) 'Referer': smuggled_data.get('referer'),
}), impersonate=impersonate)
except ExtractorError as e:
if not (isinstance(e.cause, HTTPError) and e.cause.status == 403
and e.cause.response.get_header('cf-mitigated') == 'challenge'
and e.cause.response.extensions.get('impersonate') is None):
raise
cf_cookie_domain = traverse_obj(
LenientSimpleCookie(e.cause.response.get_header('set-cookie')),
('__cf_bm', 'domain'))
if cf_cookie_domain:
self.write_debug(f'Clearing __cf_bm cookie for {cf_cookie_domain}')
self.cookiejar.clear(domain=cf_cookie_domain, path='/', name='__cf_bm')
msg = 'Got HTTP Error 403 caused by Cloudflare anti-bot challenge; '
if not self._downloader._impersonate_target_available(ImpersonateTarget()):
msg += ('see https://github.com/yt-dlp/yt-dlp#impersonation for '
'how to install the required impersonation dependency, and ')
raise ExtractorError(
f'{msg}try again with --extractor-args "generic:impersonate"', expected=True)
new_url = full_response.url new_url = full_response.url
if new_url != extract_basic_auth(url)[0]: if new_url != extract_basic_auth(url)[0]:
self.report_following_redirect(new_url) self.report_following_redirect(new_url)

View file

@ -5,56 +5,63 @@ import hashlib
import hmac import hmac
import json import json
import os import os
import re
import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none,
js_to_json,
remove_end,
traverse_obj, traverse_obj,
unescapeHTML,
) )
class GoPlayIE(InfoExtractor): class GoPlayIE(InfoExtractor):
_VALID_URL = r'https?://(www\.)?goplay\.be/video/([^/]+/[^/]+/|)(?P<display_id>[^/#]+)' _VALID_URL = r'https?://(www\.)?goplay\.be/video/([^/?#]+/[^/?#]+/|)(?P<id>[^/#]+)'
_NETRC_MACHINE = 'goplay' _NETRC_MACHINE = 'goplay'
_TESTS = [{ _TESTS = [{
'url': 'https://www.goplay.be/video/de-container-cup/de-container-cup-s3/de-container-cup-s3-aflevering-2#autoplay', 'url': 'https://www.goplay.be/video/de-slimste-mens-ter-wereld/de-slimste-mens-ter-wereld-s22/de-slimste-mens-ter-wereld-s22-aflevering-1',
'info_dict': { 'info_dict': {
'id': '9c4214b8-e55d-4e4b-a446-f015f6c6f811', 'id': '2baa4560-87a0-421b-bffc-359914e3c387',
'ext': 'mp4', 'ext': 'mp4',
'title': 'S3 - Aflevering 2', 'title': 'S22 - Aflevering 1',
'series': 'De Container Cup', 'description': r're:In aflevering 1 nemen Daan Alferink, Tess Elst en Xander De Rycke .{66}',
'season': 'Season 3', 'series': 'De Slimste Mens ter Wereld',
'season_number': 3, 'episode': 'Episode 1',
'episode': 'Episode 2', 'season_number': 22,
'episode_number': 2, 'episode_number': 1,
'season': 'Season 22',
}, },
'params': {'skip_download': True},
'skip': 'This video is only available for registered users', 'skip': 'This video is only available for registered users',
}, { }, {
'url': 'https://www.goplay.be/video/a-family-for-thr-holidays-s1-aflevering-1#autoplay', 'url': 'https://www.goplay.be/video/1917',
'info_dict': { 'info_dict': {
'id': '74e3ed07-748c-49e4-85a0-393a93337dbf', 'id': '40cac41d-8d29-4ef5-aa11-75047b9f0907',
'ext': 'mp4', 'ext': 'mp4',
'title': 'A Family for the Holidays', 'title': '1917',
'description': r're:Op het hoogtepunt van de Eerste Wereldoorlog krijgen twee jonge .{94}',
}, },
'params': {'skip_download': True},
'skip': 'This video is only available for registered users', 'skip': 'This video is only available for registered users',
}, { }, {
'url': 'https://www.goplay.be/video/de-mol/de-mol-s11/de-mol-s11-aflevering-1#autoplay', 'url': 'https://www.goplay.be/video/de-mol/de-mol-s11/de-mol-s11-aflevering-1#autoplay',
'info_dict': { 'info_dict': {
'id': '03eb8f2f-153e-41cb-9805-0d3a29dab656', 'id': 'ecb79672-92b9-4cd9-a0d7-e2f0250681ee',
'ext': 'mp4', 'ext': 'mp4',
'title': 'S11 - Aflevering 1', 'title': 'S11 - Aflevering 1',
'description': r're:Tien kandidaten beginnen aan hun verovering van Amerika en ontmoeten .{102}',
'episode': 'Episode 1', 'episode': 'Episode 1',
'series': 'De Mol', 'series': 'De Mol',
'season_number': 11, 'season_number': 11,
'episode_number': 1, 'episode_number': 1,
'season': 'Season 11', 'season': 'Season 11',
}, },
'params': { 'params': {'skip_download': True},
'skip_download': True,
},
'skip': 'This video is only available for registered users', 'skip': 'This video is only available for registered users',
}] }]
@ -69,27 +76,42 @@ class GoPlayIE(InfoExtractor):
if not self._id_token: if not self._id_token:
raise self.raise_login_required(method='password') raise self.raise_login_required(method='password')
def _real_extract(self, url): def _find_json(self, s):
url, display_id = self._match_valid_url(url).group(0, 'display_id') return self._search_json(
webpage = self._download_webpage(url, display_id) r'\w+\s*:\s*', s, 'next js data', None, contains_pattern=r'\[(?s:.+)\]', default=None)
video_data_json = self._html_search_regex(r'<div\s+data-hero="([^"]+)"', webpage, 'video_data')
video_data = self._parse_json(unescapeHTML(video_data_json), display_id).get('data')
movie = video_data.get('movie') def _real_extract(self, url):
if movie: display_id = self._match_id(url)
video_id = movie['videoUuid'] webpage = self._download_webpage(url, display_id)
info_dict = {
'title': movie.get('title'), nextjs_data = traverse_obj(
} re.findall(r'<script[^>]*>\s*self\.__next_f\.push\(\s*(\[.+?\])\s*\);?\s*</script>', webpage),
else: (..., {js_to_json}, {json.loads}, ..., {self._find_json}, ...))
episode = traverse_obj(video_data, ('playlists', ..., 'episodes', lambda _, v: v['pageInfo']['url'] == url), get_all=False) meta = traverse_obj(nextjs_data, (
video_id = episode['videoUuid'] ..., lambda _, v: v['meta']['path'] == urllib.parse.urlparse(url).path, 'meta', any))
info_dict = {
'title': episode.get('episodeTitle'), video_id = meta['uuid']
'series': traverse_obj(episode, ('program', 'title')), info_dict = traverse_obj(meta, {
'season_number': episode.get('seasonNumber'), 'title': ('title', {str}),
'episode_number': episode.get('episodeNumber'), 'description': ('description', {str.strip}),
} })
if traverse_obj(meta, ('program', 'subtype')) != 'movie':
for season_data in traverse_obj(nextjs_data, (..., 'children', ..., 'playlists', ...)):
episode_data = traverse_obj(
season_data, ('videos', lambda _, v: v['videoId'] == video_id, any))
if not episode_data:
continue
episode_title = traverse_obj(
episode_data, 'contextualTitle', 'episodeTitle', expected_type=str)
info_dict.update({
'title': episode_title or info_dict.get('title'),
'series': remove_end(info_dict.get('title'), f' - {episode_title}'),
'season_number': traverse_obj(season_data, ('season', {int_or_none})),
'episode_number': traverse_obj(episode_data, ('episodeNumber', {int_or_none})),
})
break
api = self._download_json( api = self._download_json(
f'https://api.goplay.be/web/v1/videos/long-form/{video_id}', f'https://api.goplay.be/web/v1/videos/long-form/{video_id}',

View file

@ -1,4 +1,3 @@
import functools
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -63,7 +62,7 @@ class IlPostIE(InfoExtractor):
'url': ('podcast_raw_url', {url_or_none}), 'url': ('podcast_raw_url', {url_or_none}),
'thumbnail': ('image', {url_or_none}), 'thumbnail': ('image', {url_or_none}),
'timestamp': ('timestamp', {int_or_none}), 'timestamp': ('timestamp', {int_or_none}),
'duration': ('milliseconds', {functools.partial(float_or_none, scale=1000)}), 'duration': ('milliseconds', {float_or_none(scale=1000)}),
'availability': ('free', {lambda v: 'public' if v else 'subscriber_only'}), 'availability': ('free', {lambda v: 'public' if v else 'subscriber_only'}),
}), }),
} }

View file

@ -37,7 +37,7 @@ class ImgurBaseIE(InfoExtractor):
class ImgurIE(ImgurBaseIE): class ImgurIE(ImgurBaseIE):
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|t|topic|r)/)(?P<id>[a-zA-Z0-9]+)' _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|t|topic|r)/)(?:[^/?#]+-)?(?P<id>[a-zA-Z0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://imgur.com/A61SaA1', 'url': 'https://imgur.com/A61SaA1',
@ -54,6 +54,22 @@ class ImgurIE(ImgurBaseIE):
'like_count': int, 'like_count': int,
'thumbnail': 'https://i.imgur.com/A61SaA1h.jpg', 'thumbnail': 'https://i.imgur.com/A61SaA1h.jpg',
}, },
}, {
# Test with URL slug
'url': 'https://imgur.com/mrw-gifv-is-up-running-without-any-bugs-A61SaA1',
'info_dict': {
'id': 'A61SaA1',
'ext': 'mp4',
'title': 'MRW gifv is up and running without any bugs',
'timestamp': 1416446068,
'upload_date': '20141120',
'dislike_count': int,
'comment_count': int,
'release_timestamp': 1416446068,
'release_date': '20141120',
'like_count': int,
'thumbnail': 'https://i.imgur.com/A61SaA1h.jpg',
},
}, { }, {
'url': 'https://i.imgur.com/A61SaA1.gifv', 'url': 'https://i.imgur.com/A61SaA1.gifv',
'only_matching': True, 'only_matching': True,
@ -92,6 +108,7 @@ class ImgurIE(ImgurBaseIE):
'comment_count': int, 'comment_count': int,
'release_timestamp': 1710491255, 'release_timestamp': 1710491255,
'release_date': '20240315', 'release_date': '20240315',
'thumbnail': 'https://i.imgur.com/zV03bd5h.jpg',
}, },
}] }]
@ -208,7 +225,10 @@ class ImgurIE(ImgurBaseIE):
}), get_all=False), }), get_all=False),
'id': video_id, 'id': video_id,
'formats': formats, 'formats': formats,
'thumbnail': url_or_none(search('thumbnailUrl')), 'thumbnails': [{
'url': thumbnail_url,
'http_headers': {'Accept': '*/*'},
}] if (thumbnail_url := search(['thumbnailUrl', 'twitter:image', 'og:image'])) else None,
'http_headers': {'Accept': '*/*'}, 'http_headers': {'Accept': '*/*'},
} }
@ -252,17 +272,9 @@ class ImgurGalleryBaseIE(ImgurBaseIE):
class ImgurGalleryIE(ImgurGalleryBaseIE): class ImgurGalleryIE(ImgurGalleryBaseIE):
IE_NAME = 'imgur:gallery' IE_NAME = 'imgur:gallery'
_VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/?#]+)/(?P<id>[a-zA-Z0-9]+)' _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/?#]+)/(?:[^/?#]+-)?(?P<id>[a-zA-Z0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://imgur.com/gallery/Q95ko',
'info_dict': {
'id': 'Q95ko',
'title': 'Adding faces make every GIF better',
},
'playlist_count': 25,
'skip': 'Zoinks! You\'ve taken a wrong turn.',
}, {
# TODO: static images - replace with animated/video gallery # TODO: static images - replace with animated/video gallery
'url': 'http://imgur.com/topic/Aww/ll5Vk', 'url': 'http://imgur.com/topic/Aww/ll5Vk',
'only_matching': True, 'only_matching': True,
@ -280,7 +292,27 @@ class ImgurGalleryIE(ImgurGalleryBaseIE):
'release_timestamp': 1358554297, 'release_timestamp': 1358554297,
'thumbnail': 'https://i.imgur.com/YcAQlkxh.jpg', 'thumbnail': 'https://i.imgur.com/YcAQlkxh.jpg',
'release_date': '20130119', 'release_date': '20130119',
'uploader_url': 'https://i.imgur.com/u3R4I2S_d.png?maxwidth=290&fidelity=grand', 'uploader_url': 'https://i.imgur.com/N5Flb2v_d.png?maxwidth=290&fidelity=grand',
'comment_count': int,
'dislike_count': int,
'like_count': int,
},
}, {
# Test with slug
'url': 'https://imgur.com/gallery/classic-steve-carell-gif-cracks-me-up-everytime-repost-downvotes-YcAQlkx',
'add_ies': ['Imgur'],
'info_dict': {
'id': 'YcAQlkx',
'ext': 'mp4',
'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
'timestamp': 1358554297,
'upload_date': '20130119',
'uploader_id': '1648642',
'uploader': 'wittyusernamehere',
'release_timestamp': 1358554297,
'release_date': '20130119',
'thumbnail': 'https://i.imgur.com/YcAQlkxh.jpg',
'uploader_url': 'https://i.imgur.com/N5Flb2v_d.png?maxwidth=290&fidelity=grand',
'comment_count': int, 'comment_count': int,
'dislike_count': int, 'dislike_count': int,
'like_count': int, 'like_count': int,
@ -317,6 +349,13 @@ class ImgurGalleryIE(ImgurGalleryBaseIE):
'title': 'Penguins !', 'title': 'Penguins !',
}, },
'playlist_count': 3, 'playlist_count': 3,
}, {
'url': 'https://imgur.com/t/unmuted/penguins-penguins-6lAn9VQ',
'info_dict': {
'id': '6lAn9VQ',
'title': 'Penguins !',
},
'playlist_count': 3,
}, { }, {
'url': 'https://imgur.com/t/unmuted/kx2uD3C', 'url': 'https://imgur.com/t/unmuted/kx2uD3C',
'add_ies': ['Imgur'], 'add_ies': ['Imgur'],
@ -357,7 +396,7 @@ class ImgurGalleryIE(ImgurGalleryBaseIE):
class ImgurAlbumIE(ImgurGalleryBaseIE): class ImgurAlbumIE(ImgurGalleryBaseIE):
IE_NAME = 'imgur:album' IE_NAME = 'imgur:album'
_VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?P<id>[a-zA-Z0-9]+)' _VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?:[^/?#]+-)?(?P<id>[a-zA-Z0-9]+)'
_GALLERY = False _GALLERY = False
_TESTS = [{ _TESTS = [{
# TODO: only static images - replace with animated/video gallery # TODO: only static images - replace with animated/video gallery
@ -372,6 +411,14 @@ class ImgurAlbumIE(ImgurGalleryBaseIE):
'title': 'enen-no-shouboutai', 'title': 'enen-no-shouboutai',
}, },
'playlist_count': 2, 'playlist_count': 2,
}, {
# Test with URL slug
'url': 'https://imgur.com/a/enen-no-shouboutai-iX265HX',
'info_dict': {
'id': 'iX265HX',
'title': 'enen-no-shouboutai',
},
'playlist_count': 2,
}, { }, {
'url': 'https://imgur.com/a/8pih2Ed', 'url': 'https://imgur.com/a/8pih2Ed',
'info_dict': { 'info_dict': {

Some files were not shown because too many files have changed in this diff Show more