diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.yml b/.github/ISSUE_TEMPLATE/1_broken_site.yml index 5df13ad9b..20e5e944f 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.yml +++ b/.github/ISSUE_TEMPLATE/1_broken_site.yml @@ -63,17 +63,26 @@ body: placeholder: | [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) - [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 - [debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 - [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 + [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp-nightly-builds [1a176d874] (win_exe) + [debug] Python 3.10.11 (CPython AMD64 64bit) - Windows-10-10.0.20348-SP0 (OpenSSL 1.1.1t 7 Feb 2023) + [debug] exe versions: ffmpeg 7.0.2 (setts), ffprobe 7.0.2 + [debug] Optional libraries: Cryptodome-3.21.0, brotli-1.1.0, certifi-2024.08.30, curl_cffi-0.5.10, mutagen-1.47.0, requests-2.32.3, sqlite3-3.40.1, urllib3-2.2.3, websockets-13.1 [debug] Proxy map: {} - [debug] Request Handlers: urllib, requests - [debug] Loaded 1893 extractors - [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest + [debug] Request Handlers: urllib, requests, websockets, curl_cffi + [debug] Loaded 1838 extractors + [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest + Latest version: nightly@... from yt-dlp/yt-dlp-nightly-builds yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc render: shell validations: required: true + - type: markdown + attributes: + value: | + > [!CAUTION] + > ### GitHub is experiencing a high volume of malicious spam comments. + > ### If you receive any replies asking you download a file, do NOT follow the download links! + > + > Note that this issue may be temporarily locked as an anti-spam measure after it is opened. diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.yml b/.github/ISSUE_TEMPLATE/2_site_support_request.yml index 644c87a7e..4aeff7dc6 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.yml +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.yml @@ -75,17 +75,26 @@ body: placeholder: | [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) - [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 - [debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 - [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 + [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp-nightly-builds [1a176d874] (win_exe) + [debug] Python 3.10.11 (CPython AMD64 64bit) - Windows-10-10.0.20348-SP0 (OpenSSL 1.1.1t 7 Feb 2023) + [debug] exe versions: ffmpeg 7.0.2 (setts), ffprobe 7.0.2 + [debug] Optional libraries: Cryptodome-3.21.0, brotli-1.1.0, certifi-2024.08.30, curl_cffi-0.5.10, mutagen-1.47.0, requests-2.32.3, sqlite3-3.40.1, urllib3-2.2.3, websockets-13.1 [debug] Proxy map: {} - [debug] Request Handlers: urllib, requests - [debug] Loaded 1893 extractors - [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest + [debug] Request Handlers: urllib, requests, websockets, curl_cffi + [debug] Loaded 1838 extractors + [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest + Latest version: nightly@... from yt-dlp/yt-dlp-nightly-builds yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc render: shell validations: required: true + - type: markdown + attributes: + value: | + > [!CAUTION] + > ### GitHub is experiencing a high volume of malicious spam comments. + > ### If you receive any replies asking you download a file, do NOT follow the download links! + > + > Note that this issue may be temporarily locked as an anti-spam measure after it is opened. diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml index 59d0474c2..2f516ebb7 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml @@ -71,17 +71,26 @@ body: placeholder: | [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) - [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 - [debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 - [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 + [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp-nightly-builds [1a176d874] (win_exe) + [debug] Python 3.10.11 (CPython AMD64 64bit) - Windows-10-10.0.20348-SP0 (OpenSSL 1.1.1t 7 Feb 2023) + [debug] exe versions: ffmpeg 7.0.2 (setts), ffprobe 7.0.2 + [debug] Optional libraries: Cryptodome-3.21.0, brotli-1.1.0, certifi-2024.08.30, curl_cffi-0.5.10, mutagen-1.47.0, requests-2.32.3, sqlite3-3.40.1, urllib3-2.2.3, websockets-13.1 [debug] Proxy map: {} - [debug] Request Handlers: urllib, requests - [debug] Loaded 1893 extractors - [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest + [debug] Request Handlers: urllib, requests, websockets, curl_cffi + [debug] Loaded 1838 extractors + [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest + Latest version: nightly@... from yt-dlp/yt-dlp-nightly-builds yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc render: shell validations: required: true + - type: markdown + attributes: + value: | + > [!CAUTION] + > ### GitHub is experiencing a high volume of malicious spam comments. + > ### If you receive any replies asking you download a file, do NOT follow the download links! + > + > Note that this issue may be temporarily locked as an anti-spam measure after it is opened. diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.yml b/.github/ISSUE_TEMPLATE/4_bug_report.yml index e20739673..201586e9d 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/4_bug_report.yml @@ -56,17 +56,26 @@ body: placeholder: | [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) - [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 - [debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 - [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 + [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp-nightly-builds [1a176d874] (win_exe) + [debug] Python 3.10.11 (CPython AMD64 64bit) - Windows-10-10.0.20348-SP0 (OpenSSL 1.1.1t 7 Feb 2023) + [debug] exe versions: ffmpeg 7.0.2 (setts), ffprobe 7.0.2 + [debug] Optional libraries: Cryptodome-3.21.0, brotli-1.1.0, certifi-2024.08.30, curl_cffi-0.5.10, mutagen-1.47.0, requests-2.32.3, sqlite3-3.40.1, urllib3-2.2.3, websockets-13.1 [debug] Proxy map: {} - [debug] Request Handlers: urllib, requests - [debug] Loaded 1893 extractors - [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest + [debug] Request Handlers: urllib, requests, websockets, curl_cffi + [debug] Loaded 1838 extractors + [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest + Latest version: nightly@... from yt-dlp/yt-dlp-nightly-builds yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc render: shell validations: required: true + - type: markdown + attributes: + value: | + > [!CAUTION] + > ### GitHub is experiencing a high volume of malicious spam comments. + > ### If you receive any replies asking you download a file, do NOT follow the download links! + > + > Note that this issue may be temporarily locked as an anti-spam measure after it is opened. diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.yml b/.github/ISSUE_TEMPLATE/5_feature_request.yml index e06db9ccf..765de86a2 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/5_feature_request.yml @@ -52,15 +52,24 @@ body: placeholder: | [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) - [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 - [debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 - [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 + [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp-nightly-builds [1a176d874] (win_exe) + [debug] Python 3.10.11 (CPython AMD64 64bit) - Windows-10-10.0.20348-SP0 (OpenSSL 1.1.1t 7 Feb 2023) + [debug] exe versions: ffmpeg 7.0.2 (setts), ffprobe 7.0.2 + [debug] Optional libraries: Cryptodome-3.21.0, brotli-1.1.0, certifi-2024.08.30, curl_cffi-0.5.10, mutagen-1.47.0, requests-2.32.3, sqlite3-3.40.1, urllib3-2.2.3, websockets-13.1 [debug] Proxy map: {} - [debug] Request Handlers: urllib, requests - [debug] Loaded 1893 extractors - [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest + [debug] Request Handlers: urllib, requests, websockets, curl_cffi + [debug] Loaded 1838 extractors + [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest + Latest version: nightly@... from yt-dlp/yt-dlp-nightly-builds yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc render: shell + - type: markdown + attributes: + value: | + > [!CAUTION] + > ### GitHub is experiencing a high volume of malicious spam comments. + > ### If you receive any replies asking you download a file, do NOT follow the download links! + > + > Note that this issue may be temporarily locked as an anti-spam measure after it is opened. diff --git a/.github/ISSUE_TEMPLATE/6_question.yml b/.github/ISSUE_TEMPLATE/6_question.yml index 571223a9c..198e21bec 100644 --- a/.github/ISSUE_TEMPLATE/6_question.yml +++ b/.github/ISSUE_TEMPLATE/6_question.yml @@ -58,15 +58,24 @@ body: placeholder: | [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) - [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 - [debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 - [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 + [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp-nightly-builds [1a176d874] (win_exe) + [debug] Python 3.10.11 (CPython AMD64 64bit) - Windows-10-10.0.20348-SP0 (OpenSSL 1.1.1t 7 Feb 2023) + [debug] exe versions: ffmpeg 7.0.2 (setts), ffprobe 7.0.2 + [debug] Optional libraries: Cryptodome-3.21.0, brotli-1.1.0, certifi-2024.08.30, curl_cffi-0.5.10, mutagen-1.47.0, requests-2.32.3, sqlite3-3.40.1, urllib3-2.2.3, websockets-13.1 [debug] Proxy map: {} - [debug] Request Handlers: urllib, requests - [debug] Loaded 1893 extractors - [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest + [debug] Request Handlers: urllib, requests, websockets, curl_cffi + [debug] Loaded 1838 extractors + [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest + Latest version: nightly@... from yt-dlp/yt-dlp-nightly-builds yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc render: shell + - type: markdown + attributes: + value: | + > [!CAUTION] + > ### GitHub is experiencing a high volume of malicious spam comments. + > ### If you receive any replies asking you download a file, do NOT follow the download links! + > + > Note that this issue may be temporarily locked as an anti-spam measure after it is opened. diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 12ec5b0d8..a211ae165 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -72,7 +72,7 @@ on: default: true type: boolean windows: - description: yt-dlp.exe, yt-dlp_min.exe, yt-dlp_win.zip + description: yt-dlp.exe, yt-dlp_win.zip default: true type: boolean windows32: @@ -199,22 +199,24 @@ jobs: GITHUB_WORKFLOW: build githubToken: ${{ github.token }} # To cache image arch: ${{ matrix.architecture }} - distro: ubuntu18.04 # Standalone executable should be built on minimum supported OS + distro: ubuntu20.04 # Standalone executable should be built on minimum supported OS dockerRunArgs: --volume "${PWD}/repo:/repo" install: | # Installing Python 3.10 from the Deadsnakes repo raises errors apt update - apt -y install zlib1g-dev libffi-dev python3.8 python3.8-dev python3.8-distutils python3-pip - python3.8 -m pip install -U pip setuptools wheel - # Cannot access any files from the repo directory at this stage - python3.8 -m pip install -U Pyinstaller mutagen pycryptodomex websockets brotli certifi secretstorage cffi + apt -y install zlib1g-dev libffi-dev python3.9 python3.9-dev python3.9-distutils python3-pip \ + python3-secretstorage # Cannot build cryptography wheel in virtual armv7 environment + python3.9 -m pip install -U pip wheel 'setuptools>=71.0.2' + # XXX: Keep this in sync with pyproject.toml (it can't be accessed at this stage) and exclude secretstorage + python3.9 -m pip install -U Pyinstaller mutagen pycryptodomex brotli certifi cffi \ + 'requests>=2.32.2,<3' 'urllib3>=1.26.17,<3' 'websockets>=13.0' run: | cd repo - python3.8 devscripts/install_deps.py -o --include build - python3.8 devscripts/install_deps.py --include pyinstaller --include secretstorage # Cached version may be out of date - python3.8 devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}" - python3.8 devscripts/make_lazy_extractors.py - python3.8 -m bundle.pyinstaller + python3.9 devscripts/install_deps.py -o --include build + python3.9 devscripts/install_deps.py --include pyinstaller # Cached versions may be out of date + python3.9 devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}" + python3.9 devscripts/make_lazy_extractors.py + python3.9 -m bundle.pyinstaller if ${{ vars.UPDATE_TO_VERIFICATION && 'true' || 'false' }}; then arch="${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }}" @@ -240,7 +242,7 @@ jobs: permissions: contents: read actions: write # For cleaning up cache - runs-on: macos-12 + runs-on: macos-13 steps: - uses: actions/checkout@v4 @@ -266,7 +268,7 @@ jobs: # We need to ignore wheels otherwise we break universal2 builds python3 -m pip install -U --no-binary :all: -r requirements.txt # We need to fuse our own universal2 wheels for curl_cffi - python3 -m pip install -U delocate + python3 -m pip install -U 'delocate==0.11.0' mkdir curl_cffi_whls curl_cffi_universal2 python3 devscripts/install_deps.py --print -o --include curl-cffi > requirements.txt for platform in "macosx_11_0_arm64" "macosx_11_0_x86_64"; do @@ -346,7 +348,7 @@ jobs: macos_legacy: needs: process if: inputs.macos_legacy - runs-on: macos-12 + runs-on: macos-13 steps: - uses: actions/checkout@v4 @@ -403,13 +405,13 @@ jobs: steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 - with: # 3.8 is used for Win7 support - python-version: "3.8" + with: + python-version: "3.10" - name: Install Requirements run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds python devscripts/install_deps.py -o --include build python devscripts/install_deps.py --include curl-cffi - python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-6.7.0-py3-none-any.whl" + python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-6.11.1-py3-none-any.whl" - name: Prepare run: | @@ -419,22 +421,12 @@ jobs: run: | python -m bundle.pyinstaller python -m bundle.pyinstaller --onedir - Move-Item ./dist/yt-dlp.exe ./dist/yt-dlp_real.exe Compress-Archive -Path ./dist/yt-dlp/* -DestinationPath ./dist/yt-dlp_win.zip - - name: Install Requirements (py2exe) - run: | - python devscripts/install_deps.py --include py2exe - - name: Build (py2exe) - run: | - python -m bundle.py2exe - Move-Item ./dist/yt-dlp.exe ./dist/yt-dlp_min.exe - Move-Item ./dist/yt-dlp_real.exe ./dist/yt-dlp.exe - - name: Verify --update-to if: vars.UPDATE_TO_VERIFICATION run: | - foreach ($name in @("yt-dlp","yt-dlp_min")) { + foreach ($name in @("yt-dlp")) { Copy-Item "./dist/${name}.exe" "./dist/${name}_downgraded.exe" $version = & "./dist/${name}.exe" --version & "./dist/${name}_downgraded.exe" -v --update-to yt-dlp/yt-dlp@2023.03.04 @@ -450,7 +442,6 @@ jobs: name: build-bin-${{ github.job }} path: | dist/yt-dlp.exe - dist/yt-dlp_min.exe dist/yt-dlp_win.zip compression-level: 0 @@ -463,13 +454,13 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: - python-version: "3.8" + python-version: "3.10" architecture: "x86" - name: Install Requirements run: | python devscripts/install_deps.py -o --include build python devscripts/install_deps.py - python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-6.7.0-py3-none-any.whl" + python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-6.11.1-py3-none-any.whl" - name: Prepare run: | @@ -513,7 +504,8 @@ jobs: - windows32 runs-on: ubuntu-latest steps: - - uses: actions/download-artifact@v4 + - name: Download artifacts + uses: actions/download-artifact@v4 with: path: artifact pattern: build-bin-* @@ -537,13 +529,29 @@ jobs: lock 2022.08.18.36 .+ Python 3\.6 lock 2023.11.16 (?!win_x86_exe).+ Python 3\.7 lock 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server) + lock 2024.10.22 py2exe .+ + lock 2024.10.22 linux_(?:armv7l|aarch64)_exe .+-glibc2\.(?:[12]?\d|30)\b + lock 2024.10.22 (?!\w+_exe).+ Python 3\.8 + lock 2024.10.22 win(?:_x86)?_exe Python 3\.[78].+ Windows-(?:7-|2008ServerR2) lockV2 yt-dlp/yt-dlp 2022.08.18.36 .+ Python 3\.6 lockV2 yt-dlp/yt-dlp 2023.11.16 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server) + lockV2 yt-dlp/yt-dlp 2024.10.22 py2exe .+ + lockV2 yt-dlp/yt-dlp 2024.10.22 linux_(?:armv7l|aarch64)_exe .+-glibc2\.(?:[12]?\d|30)\b + lockV2 yt-dlp/yt-dlp 2024.10.22 (?!\w+_exe).+ Python 3\.8 + lockV2 yt-dlp/yt-dlp 2024.10.22 win(?:_x86)?_exe Python 3\.[78].+ Windows-(?:7-|2008ServerR2) lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 win_x86_exe .+ Windows-(?:Vista|2008Server) + lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 py2exe .+ + lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 linux_(?:armv7l|aarch64)_exe .+-glibc2\.(?:[12]?\d|30)\b + lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 (?!\w+_exe).+ Python 3\.8 + lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 win(?:_x86)?_exe Python 3\.[78].+ Windows-(?:7-|2008ServerR2) lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 win_x86_exe .+ Windows-(?:Vista|2008Server) + lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.045052 py2exe .+ + lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.060347 linux_(?:armv7l|aarch64)_exe .+-glibc2\.(?:[12]?\d|30)\b + lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.060347 (?!\w+_exe).+ Python 3\.8 + lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.060347 win(?:_x86)?_exe Python 3\.[78].+ Windows-(?:7-|2008ServerR2) EOF - name: Sign checksum files diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml index fdfdebc65..9a4342a58 100644 --- a/.github/workflows/core.yml +++ b/.github/workflows/core.yml @@ -36,16 +36,20 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest] - # CPython 3.8 is in quick-test - python-version: ['3.9', '3.10', '3.11', '3.12', pypy-3.8, pypy-3.10] + # CPython 3.9 is in quick-test + python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.10] include: # atleast one of each CPython/PyPy tests must be in windows - os: windows-latest - python-version: '3.8' + python-version: '3.9' + - os: windows-latest + python-version: '3.10' - os: windows-latest python-version: '3.12' - os: windows-latest - python-version: pypy-3.9 + python-version: '3.13' + - os: windows-latest + python-version: pypy-3.10 steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} @@ -55,7 +59,8 @@ jobs: - name: Install test requirements run: python3 ./devscripts/install_deps.py --include test --include curl-cffi - name: Run tests + timeout-minutes: 15 continue-on-error: False run: | python3 -m yt_dlp -v || true # Print debug head - python3 ./devscripts/run_tests.py core + python3 ./devscripts/run_tests.py --pytest-args '--reruns 2 --reruns-delay 3.0' core diff --git a/.github/workflows/download.yml b/.github/workflows/download.yml index 7256804d9..6849fba9b 100644 --- a/.github/workflows/download.yml +++ b/.github/workflows/download.yml @@ -28,13 +28,13 @@ jobs: fail-fast: true matrix: os: [ubuntu-latest] - python-version: ['3.10', '3.11', '3.12', pypy-3.8, pypy-3.10] + python-version: ['3.10', '3.11', '3.12', '3.13', pypy-3.10] include: # atleast one of each CPython/PyPy tests must be in windows - os: windows-latest - python-version: '3.8' + python-version: '3.9' - os: windows-latest - python-version: pypy-3.9 + python-version: pypy-3.10 steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} diff --git a/.github/workflows/issue-lockdown.yml b/.github/workflows/issue-lockdown.yml new file mode 100644 index 000000000..4b973e2e6 --- /dev/null +++ b/.github/workflows/issue-lockdown.yml @@ -0,0 +1,21 @@ +name: Issue Lockdown +on: + issues: + types: [opened] + +permissions: + issues: write + +jobs: + lockdown: + name: Issue Lockdown + if: vars.ISSUE_LOCKDOWN + runs-on: ubuntu-latest + steps: + - name: "Lock new issue" + env: + GH_TOKEN: ${{ github.token }} + ISSUE_NUMBER: ${{ github.event.issue.number }} + REPOSITORY: ${{ github.repository }} + run: | + gh issue lock "${ISSUE_NUMBER}" -R "${REPOSITORY}" diff --git a/.github/workflows/quick-test.yml b/.github/workflows/quick-test.yml index 3afb51a30..1a32bbfe3 100644 --- a/.github/workflows/quick-test.yml +++ b/.github/workflows/quick-test.yml @@ -10,16 +10,17 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - name: Set up Python 3.8 + - name: Set up Python 3.9 uses: actions/setup-python@v5 with: - python-version: '3.8' + python-version: '3.9' - name: Install test requirements - run: python3 ./devscripts/install_deps.py --include test + run: python3 ./devscripts/install_deps.py -o --include test - name: Run tests + timeout-minutes: 15 run: | python3 -m yt_dlp -v || true - python3 ./devscripts/run_tests.py core + python3 ./devscripts/run_tests.py --pytest-args '--reruns 2 --reruns-delay 3.0' core check: name: Code check if: "!contains(github.event.head_commit.message, 'ci skip all')" @@ -28,7 +29,7 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: - python-version: '3.8' + python-version: '3.9' - name: Install dev dependencies run: python3 ./devscripts/install_deps.py -o --include static-analysis - name: Make lazy extractors diff --git a/.github/workflows/release-master.yml b/.github/workflows/release-master.yml index c49319b17..78445e417 100644 --- a/.github/workflows/release-master.yml +++ b/.github/workflows/release-master.yml @@ -28,3 +28,20 @@ jobs: actions: write # For cleaning up cache id-token: write # mandatory for trusted publishing secrets: inherit + + publish_pypi: + needs: [release] + if: vars.MASTER_PYPI_PROJECT != '' + runs-on: ubuntu-latest + permissions: + id-token: write # mandatory for trusted publishing + steps: + - name: Download artifacts + uses: actions/download-artifact@v4 + with: + path: dist + name: build-pypi + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + verbose: true diff --git a/.github/workflows/release-nightly.yml b/.github/workflows/release-nightly.yml index b536c5066..8f7284405 100644 --- a/.github/workflows/release-nightly.yml +++ b/.github/workflows/release-nightly.yml @@ -41,3 +41,20 @@ jobs: actions: write # For cleaning up cache id-token: write # mandatory for trusted publishing secrets: inherit + + publish_pypi: + needs: [release] + if: vars.NIGHTLY_PYPI_PROJECT != '' + runs-on: ubuntu-latest + permissions: + id-token: write # mandatory for trusted publishing + steps: + - name: Download artifacts + uses: actions/download-artifact@v4 + with: + path: dist + name: build-pypi + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + verbose: true diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index fa5ad7e51..26b93e429 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -2,10 +2,6 @@ name: Release on: workflow_call: inputs: - prerelease: - required: false - default: true - type: boolean source: required: false default: '' @@ -18,6 +14,10 @@ on: required: false default: '' type: string + prerelease: + required: false + default: true + type: boolean workflow_dispatch: inputs: source: @@ -204,7 +204,7 @@ jobs: git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com" git add -u git commit -m "Release ${{ env.version }}" \ - -m "Created by: ${{ github.event.sender.login }}" -m ":ci skip all :ci run dl" + -m "Created by: ${{ github.event.sender.login }}" -m ":ci skip all" git push origin --force ${{ github.event.ref }}:release - name: Get target commitish @@ -278,7 +278,17 @@ jobs: make clean-cache python -m build --no-isolation . + - name: Upload artifacts + if: github.event_name != 'workflow_dispatch' + uses: actions/upload-artifact@v4 + with: + name: build-pypi + path: | + dist/* + compression-level: 0 + - name: Publish to PyPI + if: github.event_name == 'workflow_dispatch' uses: pypa/gh-action-pypi-publish@release/v1 with: verbose: true @@ -325,7 +335,7 @@ jobs: "(https://github.com/yt-dlp/yt-dlp-master-builds/releases/latest \"Master builds\")"' || '' }} > ./RELEASE_NOTES printf '\n\n' >> ./RELEASE_NOTES cat >> ./RELEASE_NOTES << EOF - #### A description of the various files are in the [README](https://github.com/${{ github.repository }}#release-files) + #### A description of the various files is in the [README](https://github.com/${{ github.repository }}#release-files) --- $(python ./devscripts/make_changelog.py -vv --collapsible) EOF diff --git a/.github/workflows/sanitize-comment.yml b/.github/workflows/sanitize-comment.yml new file mode 100644 index 000000000..45c87cdd4 --- /dev/null +++ b/.github/workflows/sanitize-comment.yml @@ -0,0 +1,17 @@ +name: Sanitize comment + +on: + issue_comment: + types: [created, edited] + +permissions: + issues: write + +jobs: + sanitize-comment: + name: Sanitize comment + if: vars.SANITIZE_COMMENT && !github.event.issue.pull_request + runs-on: ubuntu-latest + steps: + - name: Sanitize comment + uses: yt-dlp/sanitize-comment@v1 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index dbae6476f..fd7b0f121 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -37,14 +37,18 @@ Bugs and suggestions should be reported at: [yt-dlp/yt-dlp/issues](https://githu **Please include the full output of yt-dlp when run with `-vU`**, i.e. **add** `-vU` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this: ``` $ yt-dlp -vU -[debug] Command-line config: ['-v', 'demo.com'] -[debug] Encodings: locale UTF-8, fs utf-8, out utf-8, pref UTF-8 -[debug] yt-dlp version 2021.09.25 (zip) -[debug] Python version 3.8.10 (CPython 64bit) - Linux-5.4.0-74-generic-x86_64-with-glibc2.29 -[debug] exe versions: ffmpeg 4.2.4, ffprobe 4.2.4 +[debug] Command-line config: ['-vU', 'https://www.example.com/'] +[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 +[debug] yt-dlp version nightly@... from yt-dlp/yt-dlp-nightly-builds [1a176d874] (win_exe) +[debug] Python 3.10.11 (CPython AMD64 64bit) - Windows-10-10.0.20348-SP0 (OpenSSL 1.1.1t 7 Feb 2023) +[debug] exe versions: ffmpeg 7.0.2 (setts), ffprobe 7.0.2 +[debug] Optional libraries: Cryptodome-3.21.0, brotli-1.1.0, certifi-2024.08.30, curl_cffi-0.5.10, mutagen-1.47.0, requests-2.32.3, sqlite3-3.40.1, urllib3-2.2.3, websockets-13.1 [debug] Proxy map: {} -Current Build Hash 25cc412d1d3c0725a1f2f5b7e4682f6fb40e6d15f7024e96f7afd572e9919535 -yt-dlp is up to date (2021.09.25) +[debug] Request Handlers: urllib, requests, websockets, curl_cffi +[debug] Loaded 1838 extractors +[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest +Latest version: nightly@... from yt-dlp/yt-dlp-nightly-builds +yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) ... ``` **Do not post screenshots of verbose logs; only plain text is acceptable.** @@ -233,7 +237,7 @@ After you have ensured this site is distributing its content legally, you can fo # * MD5 checksum; start the string with 'md5:', e.g. # 'description': 'md5:098f6bcd4621d373cade4e832627b4f6', # * A regular expression; start the string with 're:', e.g. - # 'thumbnail': r're:^https?://.*\.jpg$', + # 'thumbnail': r're:https?://.*\.jpg$', # * A count of elements in a list; start the string with 'count:', e.g. # 'tags': 'count:10', # * Any Python type, e.g. @@ -268,7 +272,7 @@ After you have ensured this site is distributing its content legally, you can fo You can use `hatch fmt` to automatically fix problems. Rules that the linter/formatter enforces should not be disabled with `# noqa` unless a maintainer requests it. The only exception allowed is for old/printf-style string formatting in GraphQL query templates (use `# noqa: UP031`). -1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython and PyPy for Python 3.8 and above. Backward compatibility is not required for even older versions of Python. +1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython >=3.9 and PyPy >=3.10. Backward compatibility is not required for even older versions of Python. 1. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files, [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: ```shell @@ -302,10 +306,9 @@ Extractors are very fragile by nature since they depend on the layout of the sou For extraction to work yt-dlp relies on metadata your extractor extracts and provides to yt-dlp expressed by an [information dictionary](yt_dlp/extractor/common.py#L119-L440) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by yt-dlp: - `id` (media identifier) - - `title` (media title) - `url` (media download URL) or `formats` -The aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken. While all extractors must return a `title`, they must also allow it's extraction to be non-fatal. +The aforementioned metadata fields are the critical data without which extraction does not make any sense. If any of them fail to be extracted, then the extractor is considered broken. All other metadata extraction should be completely non-fatal. For pornographic sites, appropriate `age_limit` must also be returned. diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 2180ecfe2..240197e8a 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -655,3 +655,59 @@ iancmy mokrueger luvyana szantnerb +hugepower +scribblemaniac +Codenade +Demon000 +Deukhoofd +grqz +hibes +Khaoklong51 +kieraneglin +lengzuo +naglis +ndyanx +otovalek +quad +rakslice +sahilsinghss73 +tony-hn +xingchensong +BallzCrasher +coreywright +eric321 +poyhen +tetra-fox +444995 +63427083 +allendema +DarkZeros +DTrombett +imranh2 +KarboniteKream +mikkovedru +pktiuk +rubyevadestaxes +avagordon01 +CounterPillow +JoseAngelB +KBelmin +kesor +MellowKyler +Wesley107772 +a13ssandr0 +ChocoLZS +doe1080 +hugovdev +jshumphrey +julionc +manavchaudhary1 +powergold1 +Sakura286 +SamDecrock +stratus-ss +subrat-lima +gitninja1234 +jkruse +xiaomac +wesson09 diff --git a/Changelog.md b/Changelog.md index 73bf828a6..9dc905309 100644 --- a/Changelog.md +++ b/Changelog.md @@ -4,6 +4,346 @@ # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master --> +### 2024.12.06 + +#### Core changes +- **cookies**: [Add `--cookies-from-browser` support for MS Store Firefox](https://github.com/yt-dlp/yt-dlp/commit/354cb4026cf2191e1a130ec2a627b95cabfbc60a) ([#11731](https://github.com/yt-dlp/yt-dlp/issues/11731)) by [wesson09](https://github.com/wesson09) + +#### Extractor changes +- **bilibili**: [Fix HD formats extraction](https://github.com/yt-dlp/yt-dlp/commit/fca3eb5f8be08d5fab2e18b45b7281a12e566725) ([#11734](https://github.com/yt-dlp/yt-dlp/issues/11734)) by [grqz](https://github.com/grqz) +- **soundcloud**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/2feb28028ee48f2185d2d95076e62accb09b9e2e) ([#11742](https://github.com/yt-dlp/yt-dlp/issues/11742)) by [bashonly](https://github.com/bashonly) +- **youtube** + - [Fix `n` sig extraction for player `3bb1f723`](https://github.com/yt-dlp/yt-dlp/commit/a95ee6d8803fca9157adecf63732ab58bf87fd88) ([#11750](https://github.com/yt-dlp/yt-dlp/issues/11750)) by [bashonly](https://github.com/bashonly) (With fixes in [4bd2655](https://github.com/yt-dlp/yt-dlp/commit/4bd2655398aed450456197a6767639114a24eac2)) + - [Fix signature function extraction](https://github.com/yt-dlp/yt-dlp/commit/4c85ccd1366c88cf93982f8350f58eed17355981) ([#11751](https://github.com/yt-dlp/yt-dlp/issues/11751)) by [bashonly](https://github.com/bashonly) + - [Player client maintenance](https://github.com/yt-dlp/yt-dlp/commit/2e49c789d3eebc39af8910705d65a98bca0e4c4f) ([#11724](https://github.com/yt-dlp/yt-dlp/issues/11724)) by [bashonly](https://github.com/bashonly) + +### 2024.12.03 + +#### Core changes +- [Add `playlist_webpage_url` field](https://github.com/yt-dlp/yt-dlp/commit/7d6c259a03bc4707a319e5e8c6eff0278707874b) ([#11613](https://github.com/yt-dlp/yt-dlp/issues/11613)) by [seproDev](https://github.com/seproDev) + +#### Extractor changes +- [Handle fragmented formats in `_remove_duplicate_formats`](https://github.com/yt-dlp/yt-dlp/commit/e0500cbf796323551bbabe5b8ed8c75a511ba47a) ([#11637](https://github.com/yt-dlp/yt-dlp/issues/11637)) by [Grub4K](https://github.com/Grub4K) +- **bilibili** + - [Always try to extract HD formats](https://github.com/yt-dlp/yt-dlp/commit/dc1687648077c5bf64863b307ecc5ab7e029bd8d) ([#10559](https://github.com/yt-dlp/yt-dlp/issues/10559)) by [grqz](https://github.com/grqz) + - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/239f5f36fe04603bec59c8b975f6a792f10246db) ([#11667](https://github.com/yt-dlp/yt-dlp/issues/11667)) by [grqz](https://github.com/grqz) (With fixes in [f05a1cd](https://github.com/yt-dlp/yt-dlp/commit/f05a1cd1492fc98dc8d80d2081d632a1879913d2) by [bashonly](https://github.com/bashonly), [grqz](https://github.com/grqz)) + - [Fix subtitles and chapters extraction](https://github.com/yt-dlp/yt-dlp/commit/a13a336aa6f906812701abec8101b73b73db8ff7) ([#11708](https://github.com/yt-dlp/yt-dlp/issues/11708)) by [xiaomac](https://github.com/xiaomac) +- **chaturbate**: [Fix support for non-public streams](https://github.com/yt-dlp/yt-dlp/commit/4b5eec0aaa7c02627f27a386591b735b90e681a8) ([#11624](https://github.com/yt-dlp/yt-dlp/issues/11624)) by [jkruse](https://github.com/jkruse) +- **dacast**: [Fix HLS AES formats extraction](https://github.com/yt-dlp/yt-dlp/commit/0a0d80800b9350d1a4c4b18d82cfb77ffbc3c507) ([#11644](https://github.com/yt-dlp/yt-dlp/issues/11644)) by [bashonly](https://github.com/bashonly) +- **dropbox**: [Fix password-protected video extraction](https://github.com/yt-dlp/yt-dlp/commit/00dcde728635633eee969ad4d498b9f233c4a94e) ([#11636](https://github.com/yt-dlp/yt-dlp/issues/11636)) by [bashonly](https://github.com/bashonly) +- **duoplay**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/62cba8a1bedbfc0ddde7267ae57b72bf5f7ea7b1) ([#11588](https://github.com/yt-dlp/yt-dlp/issues/11588)) by [bashonly](https://github.com/bashonly), [glensc](https://github.com/glensc) +- **facebook**: [Support more groups URLs](https://github.com/yt-dlp/yt-dlp/commit/e0f1ae813b36e783e2348ba2a1566e12f5cd8f6e) ([#11576](https://github.com/yt-dlp/yt-dlp/issues/11576)) by [grqz](https://github.com/grqz) +- **instagram**: [Support `share` URLs](https://github.com/yt-dlp/yt-dlp/commit/360aed810ad85db950df586282d256516c98cd2d) ([#11677](https://github.com/yt-dlp/yt-dlp/issues/11677)) by [grqz](https://github.com/grqz) +- **microsoftembed**: [Make format extraction non fatal](https://github.com/yt-dlp/yt-dlp/commit/2bea7936323ca4b6f3b9b1fdd892566223e30efa) ([#11654](https://github.com/yt-dlp/yt-dlp/issues/11654)) by [seproDev](https://github.com/seproDev) +- **mitele**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/cd0f934604587ed793e9177f6a127e5dcf99a7dd) ([#11683](https://github.com/yt-dlp/yt-dlp/issues/11683)) by [DarkZeros](https://github.com/DarkZeros) +- **stripchat**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/16336c51d0848a6868a4fa04e749fa03548b4913) ([#11596](https://github.com/yt-dlp/yt-dlp/issues/11596)) by [gitninja1234](https://github.com/gitninja1234) +- **tiktok**: [Deprioritize animated thumbnails](https://github.com/yt-dlp/yt-dlp/commit/910ecc422930bca14e2abe4986f5f92359e3cea8) ([#11645](https://github.com/yt-dlp/yt-dlp/issues/11645)) by [bashonly](https://github.com/bashonly) +- **vk**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/c038a7b187ba24360f14134842a7a2cf897c33b1) ([#11715](https://github.com/yt-dlp/yt-dlp/issues/11715)) by [bashonly](https://github.com/bashonly) +- **youtube** + - [Adjust player clients for site changes](https://github.com/yt-dlp/yt-dlp/commit/0d146c1e36f467af30e87b7af651bdee67b73500) ([#11663](https://github.com/yt-dlp/yt-dlp/issues/11663)) by [bashonly](https://github.com/bashonly) + - tab: [Fix playlists tab extraction](https://github.com/yt-dlp/yt-dlp/commit/fe70f20aedf528fdee332131bc9b6710e54e6f10) ([#11615](https://github.com/yt-dlp/yt-dlp/issues/11615)) by [seproDev](https://github.com/seproDev) + +#### Networking changes +- **Request Handler**: websockets: [Support websockets 14.0+](https://github.com/yt-dlp/yt-dlp/commit/c7316373c0a886f65a07a51e50ee147bb3294c85) ([#11616](https://github.com/yt-dlp/yt-dlp/issues/11616)) by [coletdjnz](https://github.com/coletdjnz) + +#### Misc. changes +- **cleanup** + - [Bump ruff to 0.8.x](https://github.com/yt-dlp/yt-dlp/commit/d8fb3490863653182864d2a53522f350d67a9ff8) ([#11608](https://github.com/yt-dlp/yt-dlp/issues/11608)) by [seproDev](https://github.com/seproDev) + - Miscellaneous + - [ccf0a6b](https://github.com/yt-dlp/yt-dlp/commit/ccf0a6b86b7f68a75463804fe485ec240b8635f0) by [bashonly](https://github.com/bashonly), [pzhlkj6612](https://github.com/pzhlkj6612) + - [2b67ac3](https://github.com/yt-dlp/yt-dlp/commit/2b67ac300ac8b44368fb121637d1743cea8c5b6b) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev) + +### 2024.11.18 + +#### Important changes +- **Login with OAuth is no longer supported for YouTube** +Due to a change made by the site, yt-dlp is no longer able to support OAuth login for YouTube. [Read more](https://github.com/yt-dlp/yt-dlp/issues/11462#issuecomment-2471703090) + +#### Core changes +- [Catch broken Cryptodome installations](https://github.com/yt-dlp/yt-dlp/commit/b83ca24eb72e1e558b0185bd73975586c0bc0546) ([#11486](https://github.com/yt-dlp/yt-dlp/issues/11486)) by [seproDev](https://github.com/seproDev) +- **utils** + - [Fix `join_nonempty`, add `**kwargs` to `unpack`](https://github.com/yt-dlp/yt-dlp/commit/39d79c9b9cf23411d935910685c40aa1a2fdb409) ([#11559](https://github.com/yt-dlp/yt-dlp/issues/11559)) by [Grub4K](https://github.com/Grub4K) + - `subs_list_to_dict`: [Add `lang` default parameter](https://github.com/yt-dlp/yt-dlp/commit/c014fbcddcb4c8f79d914ac5bb526758b540ea33) ([#11508](https://github.com/yt-dlp/yt-dlp/issues/11508)) by [Grub4K](https://github.com/Grub4K) + +#### Extractor changes +- [Allow `ext` override for thumbnails](https://github.com/yt-dlp/yt-dlp/commit/eb64ae7d5def6df2aba74fb703e7f168fb299865) ([#11545](https://github.com/yt-dlp/yt-dlp/issues/11545)) by [bashonly](https://github.com/bashonly) +- **adobepass**: [Fix provider requests](https://github.com/yt-dlp/yt-dlp/commit/85fdc66b6e01d19a94b4f39b58e3c0cf23600902) ([#11472](https://github.com/yt-dlp/yt-dlp/issues/11472)) by [bashonly](https://github.com/bashonly) +- **archive.org**: [Fix comments extraction](https://github.com/yt-dlp/yt-dlp/commit/f2a4983df7a64c4e93b56f79dbd16a781bd90206) ([#11527](https://github.com/yt-dlp/yt-dlp/issues/11527)) by [jshumphrey](https://github.com/jshumphrey) +- **bandlab**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/6365e92589e4bc17b8fffb0125a716d144ad2137) ([#11535](https://github.com/yt-dlp/yt-dlp/issues/11535)) by [seproDev](https://github.com/seproDev) +- **chaturbate** + - [Extract from API and support impersonation](https://github.com/yt-dlp/yt-dlp/commit/720b3dc453c342bc2e8df7dbc0acaab4479de46c) ([#11555](https://github.com/yt-dlp/yt-dlp/issues/11555)) by [powergold1](https://github.com/powergold1) (With fixes in [7cecd29](https://github.com/yt-dlp/yt-dlp/commit/7cecd299e4a5ef1f0f044b2fedc26f17e41f15e3) by [seproDev](https://github.com/seproDev)) + - [Support alternate domains](https://github.com/yt-dlp/yt-dlp/commit/a9f85670d03ab993dc589f21a9ffffcad61392d5) ([#10595](https://github.com/yt-dlp/yt-dlp/issues/10595)) by [manavchaudhary1](https://github.com/manavchaudhary1) +- **cloudflarestream**: [Avoid extraction via videodelivery.net](https://github.com/yt-dlp/yt-dlp/commit/2db8c2e7d57a1784b06057c48e3e91023720d195) ([#11478](https://github.com/yt-dlp/yt-dlp/issues/11478)) by [hugovdev](https://github.com/hugovdev) +- **ctvnews** + - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/f351440f1dc5b3dfbfc5737b037a869d946056fe) ([#11534](https://github.com/yt-dlp/yt-dlp/issues/11534)) by [bashonly](https://github.com/bashonly), [jshumphrey](https://github.com/jshumphrey) + - [Fix playlist ID extraction](https://github.com/yt-dlp/yt-dlp/commit/f9d98509a898737c12977b2e2117277bada2c196) ([#8892](https://github.com/yt-dlp/yt-dlp/issues/8892)) by [qbnu](https://github.com/qbnu) +- **digitalconcerthall**: [Support login with access/refresh tokens](https://github.com/yt-dlp/yt-dlp/commit/f7257588bdff5f0b0452635a66b253a783c97357) ([#11571](https://github.com/yt-dlp/yt-dlp/issues/11571)) by [bashonly](https://github.com/bashonly) +- **facebook**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/bacc31b05a04181b63100c481565256b14813a5e) ([#11513](https://github.com/yt-dlp/yt-dlp/issues/11513)) by [bashonly](https://github.com/bashonly) +- **gamedevtv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/be3579aaf0c3b71a0a3195e1955415d5e4d6b3d8) ([#11368](https://github.com/yt-dlp/yt-dlp/issues/11368)) by [bashonly](https://github.com/bashonly), [stratus-ss](https://github.com/stratus-ss) +- **goplay**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6b43a8d84b881d769b480ba6e20ec691e9d1b92d) ([#11466](https://github.com/yt-dlp/yt-dlp/issues/11466)) by [bashonly](https://github.com/bashonly), [SamDecrock](https://github.com/SamDecrock) +- **kenh14**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/eb15fd5a32d8b35ef515f7a3d1158c03025648ff) ([#3996](https://github.com/yt-dlp/yt-dlp/issues/3996)) by [krichbanana](https://github.com/krichbanana), [pzhlkj6612](https://github.com/pzhlkj6612) +- **litv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/e079ffbda66de150c0a9ebef05e89f61bb4d5f76) ([#11071](https://github.com/yt-dlp/yt-dlp/issues/11071)) by [jiru](https://github.com/jiru) +- **mixchmovie**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/0ec9bfed4d4a52bfb4f8733da1acf0aeeae21e6b) ([#10897](https://github.com/yt-dlp/yt-dlp/issues/10897)) by [Sakura286](https://github.com/Sakura286) +- **patreon**: [Fix comments extraction](https://github.com/yt-dlp/yt-dlp/commit/1d253b0a27110d174c40faf8fb1c999d099e0cde) ([#11530](https://github.com/yt-dlp/yt-dlp/issues/11530)) by [bashonly](https://github.com/bashonly), [jshumphrey](https://github.com/jshumphrey) +- **pialive**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/d867f99622ef7fba690b08da56c39d739b822bb7) ([#10811](https://github.com/yt-dlp/yt-dlp/issues/10811)) by [ChocoLZS](https://github.com/ChocoLZS) +- **radioradicale**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/70c55cb08f780eab687e881ef42bb5c6007d290b) ([#5607](https://github.com/yt-dlp/yt-dlp/issues/5607)) by [a13ssandr0](https://github.com/a13ssandr0), [pzhlkj6612](https://github.com/pzhlkj6612) +- **reddit**: [Improve error handling](https://github.com/yt-dlp/yt-dlp/commit/7ea2787920cccc6b8ea30791993d114fbd564434) ([#11573](https://github.com/yt-dlp/yt-dlp/issues/11573)) by [bashonly](https://github.com/bashonly) +- **redgifsuser**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/d215fba7edb69d4fa665f43663756fd260b1489f) ([#11531](https://github.com/yt-dlp/yt-dlp/issues/11531)) by [jshumphrey](https://github.com/jshumphrey) +- **rutube**: [Rework extractors](https://github.com/yt-dlp/yt-dlp/commit/e398217aae19bb25f91797bfbe8a3243698d7f45) ([#11480](https://github.com/yt-dlp/yt-dlp/issues/11480)) by [seproDev](https://github.com/seproDev) +- **sonylivseries**: [Add `sort_order` extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/2009cb27e17014787bf63eaa2ada51293d54f22a) ([#11569](https://github.com/yt-dlp/yt-dlp/issues/11569)) by [bashonly](https://github.com/bashonly) +- **soop**: [Fix thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/c699bafc5038b59c9afe8c2e69175fb66424c832) ([#11545](https://github.com/yt-dlp/yt-dlp/issues/11545)) by [bashonly](https://github.com/bashonly) +- **spankbang**: [Support browser impersonation](https://github.com/yt-dlp/yt-dlp/commit/8388ec256f7753b02488788e3cfa771f6e1db247) ([#11542](https://github.com/yt-dlp/yt-dlp/issues/11542)) by [jshumphrey](https://github.com/jshumphrey) +- **spreaker** + - [Support episode pages and access keys](https://github.com/yt-dlp/yt-dlp/commit/c39016f66df76d14284c705736ca73db8055d8de) ([#11489](https://github.com/yt-dlp/yt-dlp/issues/11489)) by [julionc](https://github.com/julionc) + - [Support podcast and feed pages](https://github.com/yt-dlp/yt-dlp/commit/c6737310619022248f5d0fd13872073cac168453) ([#10968](https://github.com/yt-dlp/yt-dlp/issues/10968)) by [subrat-lima](https://github.com/subrat-lima) +- **youtube** + - [Player client maintenance](https://github.com/yt-dlp/yt-dlp/commit/637d62a3a9fc723d68632c1af25c30acdadeeb85) ([#11528](https://github.com/yt-dlp/yt-dlp/issues/11528)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev) + - [Remove broken OAuth support](https://github.com/yt-dlp/yt-dlp/commit/52c0ffe40ad6e8404d93296f575007b05b04c686) ([#11558](https://github.com/yt-dlp/yt-dlp/issues/11558)) by [bashonly](https://github.com/bashonly) + - tab: [Fix podcasts tab extraction](https://github.com/yt-dlp/yt-dlp/commit/37cd7660eaff397c551ee18d80507702342b0c2b) ([#11567](https://github.com/yt-dlp/yt-dlp/issues/11567)) by [seproDev](https://github.com/seproDev) + +#### Misc. changes +- **build** + - [Bump PyInstaller version pin to `>=6.11.1`](https://github.com/yt-dlp/yt-dlp/commit/f9c8deb4e5887ff5150e911ac0452e645f988044) ([#11507](https://github.com/yt-dlp/yt-dlp/issues/11507)) by [bashonly](https://github.com/bashonly) + - [Enable attestations for trusted publishing](https://github.com/yt-dlp/yt-dlp/commit/f13df591d4d7ca8e2f31b35c9c91e69ba9e9b013) ([#11420](https://github.com/yt-dlp/yt-dlp/issues/11420)) by [bashonly](https://github.com/bashonly) + - [Pin `websockets` version to >=13.0,<14](https://github.com/yt-dlp/yt-dlp/commit/240a7d43c8a67ffb86d44dc276805aa43c358dcc) ([#11488](https://github.com/yt-dlp/yt-dlp/issues/11488)) by [bashonly](https://github.com/bashonly) +- **cleanup** + - [Deprecate more compat functions](https://github.com/yt-dlp/yt-dlp/commit/f95a92b3d0169a784ee15a138fbe09d82b2754a1) ([#11439](https://github.com/yt-dlp/yt-dlp/issues/11439)) by [seproDev](https://github.com/seproDev) + - [Remove dead extractors](https://github.com/yt-dlp/yt-dlp/commit/10fc719bc7f1eef469389c5219102266ef411f29) ([#11566](https://github.com/yt-dlp/yt-dlp/issues/11566)) by [doe1080](https://github.com/doe1080) + - Miscellaneous: [da252d9](https://github.com/yt-dlp/yt-dlp/commit/da252d9d322af3e2178ac5eae324809502a0a862) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K), [seproDev](https://github.com/seproDev) + +### 2024.11.04 + +#### Important changes +- **Beginning with this release, yt-dlp's Python dependencies *must* be installed using the `default` group** +If you're installing yt-dlp with pip/pipx or requiring yt-dlp in your own Python project, you'll need to specify `yt-dlp[default]` if you want to also install yt-dlp's optional dependencies (which were previously included by default). [Read more](https://github.com/yt-dlp/yt-dlp/pull/11255) +- **The minimum *required* Python version has been raised to 3.9** +Python 3.8 reached its end-of-life on 2024.10.07, and yt-dlp has now removed support for it. As an unfortunate side effect, the official `yt-dlp.exe` and `yt-dlp_x86.exe` binaries are no longer supported on Windows 7. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10086) + +#### Core changes +- [Allow thumbnails with `.jpe` extension](https://github.com/yt-dlp/yt-dlp/commit/5bc5fb2835ea59bdf326bd12176d74d2c7348a95) ([#11408](https://github.com/yt-dlp/yt-dlp/issues/11408)) by [bashonly](https://github.com/bashonly) +- [Expand paths in `--plugin-dirs`](https://github.com/yt-dlp/yt-dlp/commit/914af9a0cf51c9a3f74aa88d952bee8334c67511) ([#11334](https://github.com/yt-dlp/yt-dlp/issues/11334)) by [bashonly](https://github.com/bashonly) +- [Fix `--netrc` empty string parsing for Python <=3.10](https://github.com/yt-dlp/yt-dlp/commit/88402b714ec124633933737bc156b172a3dec3d6) ([#11414](https://github.com/yt-dlp/yt-dlp/issues/11414)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) +- [Populate format sorting fields before dependent fields](https://github.com/yt-dlp/yt-dlp/commit/5c880ef42e9c2b2fc412f6d69dad37d34fb75a62) ([#11353](https://github.com/yt-dlp/yt-dlp/issues/11353)) by [Grub4K](https://github.com/Grub4K) +- [Prioritize AV1](https://github.com/yt-dlp/yt-dlp/commit/3945677a75e94a1fecc085432d791e1c21220cd3) ([#11153](https://github.com/yt-dlp/yt-dlp/issues/11153)) by [seproDev](https://github.com/seproDev) +- [Remove Python 3.8 support](https://github.com/yt-dlp/yt-dlp/commit/d784464399b600ba9516bbcec6286f11d68974dd) ([#11321](https://github.com/yt-dlp/yt-dlp/issues/11321)) by [bashonly](https://github.com/bashonly) +- **aes**: [Fix GCM pad length calculation](https://github.com/yt-dlp/yt-dlp/commit/beae2db127d3b5017cbcf685da9de7a9ef496541) ([#11438](https://github.com/yt-dlp/yt-dlp/issues/11438)) by [seproDev](https://github.com/seproDev) +- **cookies**: [Support chrome table version 24](https://github.com/yt-dlp/yt-dlp/commit/4613096f2e6eab9dcbac0e98b6cec760bbc99375) ([#11425](https://github.com/yt-dlp/yt-dlp/issues/11425)) by [kesor](https://github.com/kesor), [seproDev](https://github.com/seproDev) +- **utils** + - [Allow partial application for more functions](https://github.com/yt-dlp/yt-dlp/commit/b6dc2c49e8793c6dfa21275e61caf49ec1148b81) ([#11391](https://github.com/yt-dlp/yt-dlp/issues/11391)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) (With fixes in [422195e](https://github.com/yt-dlp/yt-dlp/commit/422195ec70a00b0d2002b238cacbae7790c57fdf) by [Grub4K](https://github.com/Grub4K)) + - [Fix `find_element` by class](https://github.com/yt-dlp/yt-dlp/commit/f93c16395cea1fe9ffc3c594d3e019c3b214544c) ([#11402](https://github.com/yt-dlp/yt-dlp/issues/11402)) by [bashonly](https://github.com/bashonly) + - [Fix and improve `find_element` and `find_elements`](https://github.com/yt-dlp/yt-dlp/commit/b103aca24d35b72b405c340357dc01a0ed534281) ([#11443](https://github.com/yt-dlp/yt-dlp/issues/11443)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) + +#### Extractor changes +- [Resolve `language` to ISO639-2 for ISM formats](https://github.com/yt-dlp/yt-dlp/commit/21cdcf03a237a0c4979c941d5a5385cae44c7906) ([#11359](https://github.com/yt-dlp/yt-dlp/issues/11359)) by [bashonly](https://github.com/bashonly) +- **ardmediathek**: [Extract chapters](https://github.com/yt-dlp/yt-dlp/commit/59f8dd8239c31f00b708da53b39b1e2e9409b6e6) ([#11442](https://github.com/yt-dlp/yt-dlp/issues/11442)) by [iw0nderhow](https://github.com/iw0nderhow) +- **bfmtv**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/754940e9a558565d6bd3c0c529802569b1d0ae4e) ([#11444](https://github.com/yt-dlp/yt-dlp/issues/11444)) by [seproDev](https://github.com/seproDev) +- **bluesky**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/5c7a5aaab27e9c3cb367b663a6136ca58866e547) ([#11055](https://github.com/yt-dlp/yt-dlp/issues/11055)) by [MellowKyler](https://github.com/MellowKyler), [seproDev](https://github.com/seproDev) +- **ccma**: [Support new 3cat.cat domain](https://github.com/yt-dlp/yt-dlp/commit/330335386d4f7603d92d6796798375336005275e) ([#11222](https://github.com/yt-dlp/yt-dlp/issues/11222)) by [JoseAngelB](https://github.com/JoseAngelB) +- **chzzk**: video: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/9c6534da81e485b2325b3489ee4128943e6d3e4b) ([#11228](https://github.com/yt-dlp/yt-dlp/issues/11228)) by [hui1601](https://github.com/hui1601) +- **cnn**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/9acf79c91a8c6c55ca972747c6858e784e2da351) ([#10185](https://github.com/yt-dlp/yt-dlp/issues/10185)) by [kylegustavo](https://github.com/kylegustavo), [seproDev](https://github.com/seproDev) +- **dailymotion** + - [Improve embed extraction](https://github.com/yt-dlp/yt-dlp/commit/a403dcf9be20b49cbb3017328f4aaa352fb6d685) ([#10843](https://github.com/yt-dlp/yt-dlp/issues/10843)) by [bashonly](https://github.com/bashonly), [pzhlkj6612](https://github.com/pzhlkj6612) + - [Support shortened URLs](https://github.com/yt-dlp/yt-dlp/commit/d1358231371f20fa23020fa9176be3b56119873e) ([#11374](https://github.com/yt-dlp/yt-dlp/issues/11374)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev) +- **facebook**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/ec9b25043f399de6a591d8370d32bf0e66c117f2) ([#11343](https://github.com/yt-dlp/yt-dlp/issues/11343)) by [kclauhk](https://github.com/kclauhk) +- **generic**: [Do not impersonate by default](https://github.com/yt-dlp/yt-dlp/commit/c29f5a7fae93a08f3cfbb6127b2faa75145b06a0) ([#11336](https://github.com/yt-dlp/yt-dlp/issues/11336)) by [bashonly](https://github.com/bashonly) +- **nfl**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/838f4385de8300a4dd4e7ffbbf0e5b7b85fb52c2) ([#11409](https://github.com/yt-dlp/yt-dlp/issues/11409)) by [bashonly](https://github.com/bashonly) +- **niconicouser**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6abef74232c0fc695cd803c18ae446cacb129389) ([#11324](https://github.com/yt-dlp/yt-dlp/issues/11324)) by [Wesley107772](https://github.com/Wesley107772) +- **soundcloud**: [Extract artists](https://github.com/yt-dlp/yt-dlp/commit/f101e5d34c97c608156ad5396714c2a2edca966a) ([#11377](https://github.com/yt-dlp/yt-dlp/issues/11377)) by [seproDev](https://github.com/seproDev) +- **tumblr**: [Support more URLs](https://github.com/yt-dlp/yt-dlp/commit/b03267bf0675eeb8df5baf1daac7cf67840c91a5) ([#6057](https://github.com/yt-dlp/yt-dlp/issues/6057)) by [selfisekai](https://github.com/selfisekai), [seproDev](https://github.com/seproDev) +- **twitter**: [Remove cookies migration workaround](https://github.com/yt-dlp/yt-dlp/commit/76802f461332d444e596437c42374fa237fa5174) ([#11392](https://github.com/yt-dlp/yt-dlp/issues/11392)) by [bashonly](https://github.com/bashonly) +- **vimeo**: [Fix API retries](https://github.com/yt-dlp/yt-dlp/commit/57212a5f97ce367590aaa5c3e9a135eead8f81f7) ([#11351](https://github.com/yt-dlp/yt-dlp/issues/11351)) by [bashonly](https://github.com/bashonly) +- **yle_areena**: [Support live events](https://github.com/yt-dlp/yt-dlp/commit/a6783a3b9905e547f6c1d4df9d7c7999feda8afa) ([#11358](https://github.com/yt-dlp/yt-dlp/issues/11358)) by [bashonly](https://github.com/bashonly), [CounterPillow](https://github.com/CounterPillow) +- **youtube**: [Adjust OAuth refresh token handling](https://github.com/yt-dlp/yt-dlp/commit/d569a8845254d90ce13ad74ae76695e8d6441068) ([#11414](https://github.com/yt-dlp/yt-dlp/issues/11414)) by [bashonly](https://github.com/bashonly) + +#### Misc. changes +- **build** + - [Disable attestations for trusted publishing](https://github.com/yt-dlp/yt-dlp/commit/428ffb75aa3534b275cf54de42693a4d261519da) ([#11418](https://github.com/yt-dlp/yt-dlp/issues/11418)) by [bashonly](https://github.com/bashonly) + - [Move optional dependencies to the `default` group](https://github.com/yt-dlp/yt-dlp/commit/87884f15580910e4e0fe0e1db73508debc657471) ([#11255](https://github.com/yt-dlp/yt-dlp/issues/11255)) by [bashonly](https://github.com/bashonly) + - [Use Ubuntu 20.04 and Python 3.9 for Linux ARM builds](https://github.com/yt-dlp/yt-dlp/commit/dd2e24446954246a2ec4d4a7e95531f52a14b351) ([#8638](https://github.com/yt-dlp/yt-dlp/issues/8638)) by [bashonly](https://github.com/bashonly) +- **cleanup** + - Miscellaneous + - [ea9e35d](https://github.com/yt-dlp/yt-dlp/commit/ea9e35d85fba5eab341cdcaf1eaed69b57f7e465) by [bashonly](https://github.com/bashonly) + - [c998238](https://github.com/yt-dlp/yt-dlp/commit/c998238c2e76c62d1d29962c6e8ebe916cc7913b) by [bashonly](https://github.com/bashonly), [KBelmin](https://github.com/KBelmin) + - [197d0b0](https://github.com/yt-dlp/yt-dlp/commit/197d0b03b6a3c8fe4fa5ace630eeffec629bf72c) by [avagordon01](https://github.com/avagordon01), [bashonly](https://github.com/bashonly), [grqz](https://github.com/grqz), [Grub4K](https://github.com/Grub4K), [seproDev](https://github.com/seproDev) +- **devscripts**: `make_changelog`: [Parse full commit message for fixes](https://github.com/yt-dlp/yt-dlp/commit/0a3991edae0e10f2ea41ece9fdea5e48f789f1de) ([#11366](https://github.com/yt-dlp/yt-dlp/issues/11366)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) + +### 2024.10.22 + +#### Important changes +- **Following this release, yt-dlp's Python dependencies *must* be installed using the `default` group** +If you're installing yt-dlp with pip/pipx or requiring yt-dlp in your own Python project, you'll need to specify `yt-dlp[default]` if you want to also install yt-dlp's optional dependencies (which were previously included by default). [Read more](https://github.com/yt-dlp/yt-dlp/pull/11255) +- **py2exe is no longer supported** +This release's `yt-dlp_min.exe` will be the last, and it's actually a PyInstaller-bundled executable so that yt-dlp users updating their py2exe build with `-U` will be automatically migrated. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10087) + +#### Core changes +- [Add extractor helpers](https://github.com/yt-dlp/yt-dlp/commit/d710a6ca7c622705c0c8c8a3615916f531137d5d) ([#10653](https://github.com/yt-dlp/yt-dlp/issues/10653)) by [Grub4K](https://github.com/Grub4K) +- [Add option `--plugin-dirs`](https://github.com/yt-dlp/yt-dlp/commit/0f593dca9fa995d88eb763170a932da61c8f24dc) ([#11277](https://github.com/yt-dlp/yt-dlp/issues/11277)) by [coletdjnz](https://github.com/coletdjnz), [imranh2](https://github.com/imranh2) +- **cookies**: [Fix compatibility for Python <=3.9 in traceback](https://github.com/yt-dlp/yt-dlp/commit/c5f0f58efd8c3930de8202c15a5c53b1b635bd51) by [Grub4K](https://github.com/Grub4K) +- **utils** + - `Popen`: [Reset PyInstaller environment](https://github.com/yt-dlp/yt-dlp/commit/fbc66e3ab35743cc847a21223c67d88bb463cd9c) ([#11258](https://github.com/yt-dlp/yt-dlp/issues/11258)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) + - `sanitize_path`: [Reimplement function](https://github.com/yt-dlp/yt-dlp/commit/85b87c991af25dcb35630fa94580fd418e78ee33) ([#11198](https://github.com/yt-dlp/yt-dlp/issues/11198)) by [Grub4K](https://github.com/Grub4K) + +#### Extractor changes +- **adobepass**: [Use newer user-agent for provider redirect request](https://github.com/yt-dlp/yt-dlp/commit/dcfeea4dd5e5686821350baa6c7767a011944867) ([#11250](https://github.com/yt-dlp/yt-dlp/issues/11250)) by [bashonly](https://github.com/bashonly) +- **afreecatv**: [Adapt extractors to new sooplive.co.kr domain](https://github.com/yt-dlp/yt-dlp/commit/46fe60ff19395698a87113b2944453779e04ab9d) ([#11266](https://github.com/yt-dlp/yt-dlp/issues/11266)) by [63427083](https://github.com/63427083), [bashonly](https://github.com/bashonly) +- **cda**: [Support folders](https://github.com/yt-dlp/yt-dlp/commit/c4d95f67ddc522297bb1fea875255cf94b34d595) ([#10786](https://github.com/yt-dlp/yt-dlp/issues/10786)) by [pktiuk](https://github.com/pktiuk) +- **cwtv**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/9d43dcb2c5c38f443f84dfc126cd32720e1a1ad6) ([#11230](https://github.com/yt-dlp/yt-dlp/issues/11230)) by [bashonly](https://github.com/bashonly) +- **drtv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/f4338714241b11d9d43768ae71a25f5e952f677d) ([#11141](https://github.com/yt-dlp/yt-dlp/issues/11141)) by [444995](https://github.com/444995) +- **funk**: [Extend `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/8de431ec97a4b62b73df8f686b6e21e462775336) ([#11269](https://github.com/yt-dlp/yt-dlp/issues/11269)) by [seproDev](https://github.com/seproDev) +- **gem.cbc.ca**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/40054cb4a7ebbea30d335d444e6f58b298a3baa0) ([#11196](https://github.com/yt-dlp/yt-dlp/issues/11196)) by [DavidSkrundz](https://github.com/DavidSkrundz) +- **generic**: [Impersonate browser by default](https://github.com/yt-dlp/yt-dlp/commit/edfd095b1917701c5046bd51f9542897c17d41a7) ([#11206](https://github.com/yt-dlp/yt-dlp/issues/11206)) by [Grub4K](https://github.com/Grub4K) +- **imgur** + - [Fix thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/87408ccfd772ddf31a8323d8151c24f9577cbc9f) ([#11298](https://github.com/yt-dlp/yt-dlp/issues/11298)) by [seproDev](https://github.com/seproDev) + - [Support new URL format](https://github.com/yt-dlp/yt-dlp/commit/5af774d7a36c00bea618c7047c9326532cd3f616) ([#11075](https://github.com/yt-dlp/yt-dlp/issues/11075)) by [Deer-Spangle](https://github.com/Deer-Spangle) +- **patreon**: campaign: [Stricter URL matching](https://github.com/yt-dlp/yt-dlp/commit/babb70960595e2146f06f81affc29c7e713e34e2) ([#11235](https://github.com/yt-dlp/yt-dlp/issues/11235)) by [bashonly](https://github.com/bashonly) +- **reddit**: [Detect and raise when login is required](https://github.com/yt-dlp/yt-dlp/commit/cba7868502f04175fecf9ab3e363296aee7ebec2) ([#11202](https://github.com/yt-dlp/yt-dlp/issues/11202)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **substack**: [Resolve podcast file extensions](https://github.com/yt-dlp/yt-dlp/commit/3148c1822f66533998278f0a1cf842b9bea1526a) ([#11275](https://github.com/yt-dlp/yt-dlp/issues/11275)) by [bashonly](https://github.com/bashonly) +- **telecinco**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/0b7ec08816fb196cd41d392f8331b4eb8366c4f8) ([#11142](https://github.com/yt-dlp/yt-dlp/issues/11142)) by [bashonly](https://github.com/bashonly), [DarkZeros](https://github.com/DarkZeros) +- **tubitv**: [Strip extra whitespace from titles](https://github.com/yt-dlp/yt-dlp/commit/e68b4c19af122876561a41f2dd8093fae7b417c7) ([#10795](https://github.com/yt-dlp/yt-dlp/issues/10795)) by [allendema](https://github.com/allendema) +- **tver**: [Support series URLs](https://github.com/yt-dlp/yt-dlp/commit/ceaea731b6e314dbbdfb2e358d7677785ed0b4fc) ([#9507](https://github.com/yt-dlp/yt-dlp/issues/9507)) by [pzhlkj6612](https://github.com/pzhlkj6612), [vvto33](https://github.com/vvto33) +- **twitter**: spaces: [Allow extraction when not logged in](https://github.com/yt-dlp/yt-dlp/commit/679c68240a26481ea7c07cc0c014745631ea8481) ([#11289](https://github.com/yt-dlp/yt-dlp/issues/11289)) by [rubyevadestaxes](https://github.com/rubyevadestaxes) +- **weverse**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/5310fa87f6cb7f66bf42e2520878952fbf6b1652) ([#11215](https://github.com/yt-dlp/yt-dlp/issues/11215)) by [bashonly](https://github.com/bashonly) +- **youtube** + - [Fix `comment_count` extraction](https://github.com/yt-dlp/yt-dlp/commit/7af1ddaaf2a6a0a750373a9ab53c7770af4f9fe4) ([#11274](https://github.com/yt-dlp/yt-dlp/issues/11274)) by [bashonly](https://github.com/bashonly) + - [Remove broken `android_producer` client](https://github.com/yt-dlp/yt-dlp/commit/fed53d70bdb7d3e37ef63dd7fcf0ef74356167fd) ([#11297](https://github.com/yt-dlp/yt-dlp/issues/11297)) by [bashonly](https://github.com/bashonly) + - [Remove broken age-restriction workaround](https://github.com/yt-dlp/yt-dlp/commit/ec2f4bf0823a13043f98f5bd0bf6677837bf09dc) ([#11297](https://github.com/yt-dlp/yt-dlp/issues/11297)) by [bashonly](https://github.com/bashonly) + - [Support logging in with OAuth](https://github.com/yt-dlp/yt-dlp/commit/b8635c1d4779da195e71aa281f73aaad702c935e) ([#11001](https://github.com/yt-dlp/yt-dlp/issues/11001)) by [coletdjnz](https://github.com/coletdjnz) + +#### Misc. changes +- **build** + - [Migrate `py2exe` builds to `win_exe`](https://github.com/yt-dlp/yt-dlp/commit/a886cf3e900f4a2ec00af705f883539269545609) ([#11256](https://github.com/yt-dlp/yt-dlp/issues/11256)) by [bashonly](https://github.com/bashonly) + - [Use `macos-13` image for macOS builds](https://github.com/yt-dlp/yt-dlp/commit/64d84d75ca8c19ec06558cc7c511f5f4f7a822bc) ([#11236](https://github.com/yt-dlp/yt-dlp/issues/11236)) by [bashonly](https://github.com/bashonly) + - `make_lazy_extractors`: [Force running without plugins](https://github.com/yt-dlp/yt-dlp/commit/1a830394a21a81a3e9918f9e175abc9fbb21f089) ([#11205](https://github.com/yt-dlp/yt-dlp/issues/11205)) by [Grub4K](https://github.com/Grub4K) +- **cleanup**: Miscellaneous: [67adeb7](https://github.com/yt-dlp/yt-dlp/commit/67adeb7bab00662ba55d473e405b301abb42fe61) by [bashonly](https://github.com/bashonly), [DTrombett](https://github.com/DTrombett), [grqz](https://github.com/grqz), [Grub4K](https://github.com/Grub4K), [KarboniteKream](https://github.com/KarboniteKream), [mikkovedru](https://github.com/mikkovedru), [seproDev](https://github.com/seproDev) +- **test**: [Allow running tests explicitly](https://github.com/yt-dlp/yt-dlp/commit/16eb28026a2ddf5608d0a628ef15949b8d3805a9) ([#11203](https://github.com/yt-dlp/yt-dlp/issues/11203)) by [Grub4K](https://github.com/Grub4K) + +### 2024.10.07 + +#### Core changes +- **cookies**: [Fix cookie load error handling](https://github.com/yt-dlp/yt-dlp/commit/e59c82a74cda5139eb3928c75b0bd45484dbe7f0) ([#11140](https://github.com/yt-dlp/yt-dlp/issues/11140)) by [Grub4K](https://github.com/Grub4K) + +#### Extractor changes +- **applepodcasts**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6328e2e67a4e126e08af382e6a387073082d5c5f) ([#10903](https://github.com/yt-dlp/yt-dlp/issues/10903)) by [coreywright](https://github.com/coreywright) +- **cwtv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/4b7bec66d8100978b82bb24110ed44e2a7749931) ([#11135](https://github.com/yt-dlp/yt-dlp/issues/11135)) by [kclauhk](https://github.com/kclauhk) +- **instagram** + - [Do not hardcode user-agent](https://github.com/yt-dlp/yt-dlp/commit/079a7bc334281d3c13d347770ae5f9f2b7da471a) ([#11155](https://github.com/yt-dlp/yt-dlp/issues/11155)) by [poyhen](https://github.com/poyhen) + - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/cf85cba5d9496bd2689e1070005b4d1b4cd3dc6d) ([#11156](https://github.com/yt-dlp/yt-dlp/issues/11156)) by [tetra-fox](https://github.com/tetra-fox) +- **noodlemagazine**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/ccb23e1bac9768d1c70535beb744e668ed4a2720) ([#11144](https://github.com/yt-dlp/yt-dlp/issues/11144)) by [BallzCrasher](https://github.com/BallzCrasher) +- **patreon**: [Extract all m3u8 formats for locked posts](https://github.com/yt-dlp/yt-dlp/commit/f91645aceaf13926cf35be2c1dfef61b3aab97fb) ([#11138](https://github.com/yt-dlp/yt-dlp/issues/11138)) by [bashonly](https://github.com/bashonly) +- **youtube**: [Change default player clients to `ios,mweb`](https://github.com/yt-dlp/yt-dlp/commit/de2062753a188060d76f587e45becce61fe399f9) ([#11190](https://github.com/yt-dlp/yt-dlp/issues/11190)) by [seproDev](https://github.com/seproDev) + +#### Postprocessor changes +- **xattrmetadata**: [Try to write each attribute](https://github.com/yt-dlp/yt-dlp/commit/3a193346eeb27ac2959ff30c370adb899ec94732) ([#11115](https://github.com/yt-dlp/yt-dlp/issues/11115)) by [eric321](https://github.com/eric321) + +#### Misc. changes +- **ci**: [Rerun failed tests](https://github.com/yt-dlp/yt-dlp/commit/b31b81d85f00601710d4fac590c3e4efb4133283) ([#11143](https://github.com/yt-dlp/yt-dlp/issues/11143)) by [Grub4K](https://github.com/Grub4K) +- **cleanup**: Miscellaneous: [1a176d8](https://github.com/yt-dlp/yt-dlp/commit/1a176d874e6772cd898ce507379ea388e96ee3f7) by [bashonly](https://github.com/bashonly) + +### 2024.09.27 + +#### Important changes +- **The minimum *recommended* Python version has been raised to 3.9** +Since Python 3.8 will reach end-of-life in October 2024, support for it will be dropped soon. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10086) + +#### Core changes +- [Allow `none` arg to negate `--convert-subs` and `--convert-thumbnails`](https://github.com/yt-dlp/yt-dlp/commit/c08e0b20b5edd8957b8318716bc14e896d1b96f4) ([#11066](https://github.com/yt-dlp/yt-dlp/issues/11066)) by [kieraneglin](https://github.com/kieraneglin) +- [Fix format sorting bug with vp9.2 vcodec](https://github.com/yt-dlp/yt-dlp/commit/8f4ea14680c7865d8ffac10a9174205d1d84ada7) ([#10884](https://github.com/yt-dlp/yt-dlp/issues/10884)) by [rakslice](https://github.com/rakslice) +- [Raise minimum recommended Python version to 3.9](https://github.com/yt-dlp/yt-dlp/commit/cca534cd9e6850c70244f225a4a1895ef4bcdbec) ([#11098](https://github.com/yt-dlp/yt-dlp/issues/11098)) by [bashonly](https://github.com/bashonly) +- **cookies**: [Improve error message for Windows `--cookies-from-browser chrome` issue](https://github.com/yt-dlp/yt-dlp/commit/b397a64691421ace5df09457c2a764821a2dc6f2) ([#11090](https://github.com/yt-dlp/yt-dlp/issues/11090)) by [seproDev](https://github.com/seproDev) +- **utils**: `mimetype2ext`: [Recognize `aacp` as `aac`](https://github.com/yt-dlp/yt-dlp/commit/cc85596d5b59f0c14e9381b3675f619c1e12e597) ([#10860](https://github.com/yt-dlp/yt-dlp/issues/10860)) by [bashonly](https://github.com/bashonly) + +#### Extractor changes +- [Fix JW Player format parsing](https://github.com/yt-dlp/yt-dlp/commit/409f8e9e3b4bde81ef76fc563256f876d2ff8099) ([#10956](https://github.com/yt-dlp/yt-dlp/issues/10956)) by [seproDev](https://github.com/seproDev) +- [Handle decode errors when reading responses](https://github.com/yt-dlp/yt-dlp/commit/325001317d97f4545d66fac44c4ba772c6f45f22) ([#10868](https://github.com/yt-dlp/yt-dlp/issues/10868)) by [bashonly](https://github.com/bashonly) +- **abc.net.au**: iview, showseries: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/7f909046f4dc0fba472b4963145aef6e0d42491b) ([#11101](https://github.com/yt-dlp/yt-dlp/issues/11101)) by [bashonly](https://github.com/bashonly) +- **adn**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/cc88a54bb1ef285154775f8a6a413335ce4c71ce) ([#10749](https://github.com/yt-dlp/yt-dlp/issues/10749)) by [infanf](https://github.com/infanf) +- **asobistage**: [Support redirected URLs](https://github.com/yt-dlp/yt-dlp/commit/a7d3235c84dac57a127cbe0ff38f7f7c2fdd8fa0) ([#10768](https://github.com/yt-dlp/yt-dlp/issues/10768)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **bandcamp**: user: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/5d0176547f16a3642cd71627126e9dfc24981e20) ([#10328](https://github.com/yt-dlp/yt-dlp/issues/10328)) by [bashonly](https://github.com/bashonly), [quad](https://github.com/quad) +- **beacon**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/b4760c778d0c92c6e3f2bc8346cd72c8f08595ae) ([#9901](https://github.com/yt-dlp/yt-dlp/issues/9901)) by [Deukhoofd](https://github.com/Deukhoofd) +- **bilibili** + - [Fix chapters and subtitles extraction](https://github.com/yt-dlp/yt-dlp/commit/a2000bc85730c950351d78bb818493dc39dca3cb) ([#11099](https://github.com/yt-dlp/yt-dlp/issues/11099)) by [bashonly](https://github.com/bashonly) + - [Fix festival URL support](https://github.com/yt-dlp/yt-dlp/commit/b43bd864851f2862e26caa85461c5d825d49d463) ([#10740](https://github.com/yt-dlp/yt-dlp/issues/10740)) by [bashonly](https://github.com/bashonly), [grqz](https://github.com/grqz) +- **biliintl**: [Fix referer header](https://github.com/yt-dlp/yt-dlp/commit/a06bb586795ebab87a2356923acfc674d6f0e152) ([#11003](https://github.com/yt-dlp/yt-dlp/issues/11003)) by [Khaoklong51](https://github.com/Khaoklong51) +- **dropbox**: [Fix password-protected video support](https://github.com/yt-dlp/yt-dlp/commit/63da31b3b29af90062d8a72a905ffe4b5e499042) ([#10735](https://github.com/yt-dlp/yt-dlp/issues/10735)) by [ndyanx](https://github.com/ndyanx) +- **ertgr**: [Fix video extraction](https://github.com/yt-dlp/yt-dlp/commit/416686ed0cf792ec44ab059f3b229dd776077e14) ([#11091](https://github.com/yt-dlp/yt-dlp/issues/11091)) by [seproDev](https://github.com/seproDev) +- **eurosport**: [Support local URL variants](https://github.com/yt-dlp/yt-dlp/commit/f0bb28504c8c2b75ee3e5796aed50de2a7f90a1b) ([#10785](https://github.com/yt-dlp/yt-dlp/issues/10785)) by [seproDev](https://github.com/seproDev) +- **facebook** + - ads: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/d62fef7e07d454c0d2ba2d69fb96d691dba1ded0) ([#10704](https://github.com/yt-dlp/yt-dlp/issues/10704)) by [kclauhk](https://github.com/kclauhk) + - reel: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/0e1b941c6b2caa688b0d3332e723d16dbafa4311) by [lengzuo](https://github.com/lengzuo) +- **germanupa**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/124f058b546d652a359c67025bb479789bfbef0b) ([#10538](https://github.com/yt-dlp/yt-dlp/issues/10538)) by [grqz](https://github.com/grqz) +- **hgtvde**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/a555389c9bb32e589e00b4664974423fb7b04dcd) ([#10992](https://github.com/yt-dlp/yt-dlp/issues/10992)) by [bashonly](https://github.com/bashonly), [rdamas](https://github.com/rdamas) +- **huya**: video: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/25c1cdaa2650563494d3bf00a38f72d0d9486bff) ([#10686](https://github.com/yt-dlp/yt-dlp/issues/10686)) by [hugepower](https://github.com/hugepower) +- **iprima**: [Fix zoom URL support](https://github.com/yt-dlp/yt-dlp/commit/4a27b8f092f7f7c10b7a334d3535c97c2af02f0a) ([#10959](https://github.com/yt-dlp/yt-dlp/issues/10959)) by [otovalek](https://github.com/otovalek) +- **khanacademy**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/0fba08485b6445b72b5b63ae23ca2a73fa5d967f) ([#10913](https://github.com/yt-dlp/yt-dlp/issues/10913)) by [seproDev](https://github.com/seproDev) +- **kick** + - clips: [Support new URL format](https://github.com/yt-dlp/yt-dlp/commit/0aa4426e9a35f7f8e184f1f2082b3b313c1448f7) ([#11107](https://github.com/yt-dlp/yt-dlp/issues/11107)) by [bashonly](https://github.com/bashonly) + - vod: [Support new URL format](https://github.com/yt-dlp/yt-dlp/commit/173d54c151b987409e3eb09552d8d89ed8fc50f7) ([#10988](https://github.com/yt-dlp/yt-dlp/issues/10988)) by [bashonly](https://github.com/bashonly), [grqz](https://github.com/grqz) +- **kika**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/e6f48ca80821939c1fd11ec2a0cdbf2fba9b258a) ([#5788](https://github.com/yt-dlp/yt-dlp/issues/5788)) by [1100101](https://github.com/1100101) +- **lnkgo**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/fa83d0b36bc43d30fe9241c1e923f4614864b758) ([#10904](https://github.com/yt-dlp/yt-dlp/issues/10904)) by [naglis](https://github.com/naglis) +- **loom**: [Fix m3u8 formats extraction](https://github.com/yt-dlp/yt-dlp/commit/7509d692b37a7ec6230ea75bfe1e44a8de5eefce) ([#10760](https://github.com/yt-dlp/yt-dlp/issues/10760)) by [kclauhk](https://github.com/kclauhk) +- **mediaklikk**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/e2b3634e299be9c16a247ece3b1858d83889c324) ([#11083](https://github.com/yt-dlp/yt-dlp/issues/11083)) by [szantnerb](https://github.com/szantnerb) +- **mojevideo**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/28b0ecba2af5b4919f198474b3d00a76ef322c31) ([#11019](https://github.com/yt-dlp/yt-dlp/issues/11019)) by [04-pasha-04](https://github.com/04-pasha-04), [pzhlkj6612](https://github.com/pzhlkj6612) +- **niconico**: [Fix m3u8 formats extraction](https://github.com/yt-dlp/yt-dlp/commit/eabb4680fdb09ba1f48d174a700a2e3b43f82add) ([#11103](https://github.com/yt-dlp/yt-dlp/issues/11103)) by [bashonly](https://github.com/bashonly) +- **nzz**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/4a9bc8c3630378bc29f0266126b503f6190c0430) ([#10461](https://github.com/yt-dlp/yt-dlp/issues/10461)) by [1-Byte](https://github.com/1-Byte) +- **patreoncampaign**: [Support API URLs](https://github.com/yt-dlp/yt-dlp/commit/232e6db30c474d1b387e405342f34173ceeaf832) ([#10734](https://github.com/yt-dlp/yt-dlp/issues/10734)) by [bashonly](https://github.com/bashonly), [hibes](https://github.com/hibes) +- **pinterest**: [Extend `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/c8c078fe28b0ffc15ef9646346c00c592fe71a78) ([#10867](https://github.com/yt-dlp/yt-dlp/issues/10867)) by [bashonly](https://github.com/bashonly), [sahilsinghss73](https://github.com/sahilsinghss73) +- **radiko**: [Extract unique `id` values](https://github.com/yt-dlp/yt-dlp/commit/c8d096c5ce111411fbdbe2abb8fed54f317a6182) ([#10726](https://github.com/yt-dlp/yt-dlp/issues/10726)) by [garret1317](https://github.com/garret1317) +- **rtp**: [Support more subpages](https://github.com/yt-dlp/yt-dlp/commit/d02df303d8e49390599db9f34482697e4d1cf5b2) ([#10787](https://github.com/yt-dlp/yt-dlp/issues/10787)) by [Demon000](https://github.com/Demon000) +- **rumblechannel**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/ad0b857f459a6d390fbf124183916218c52f223a) ([#11049](https://github.com/yt-dlp/yt-dlp/issues/11049)) by [tony-hn](https://github.com/tony-hn) +- **rutube**: [Support livestreams](https://github.com/yt-dlp/yt-dlp/commit/41be32e78c3845000dbac188ffb90ea3ea7c4dfa) ([#10844](https://github.com/yt-dlp/yt-dlp/issues/10844)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **samplefocus**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/46f4c80bc363ee8116c33d37f65202e6c3470954) ([#10947](https://github.com/yt-dlp/yt-dlp/issues/10947)) by [seproDev](https://github.com/seproDev) +- **screenrec**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/36f9e602ad55679764bc75a4f67f7562b1d6adcf) ([#10917](https://github.com/yt-dlp/yt-dlp/issues/10917)) by [naglis](https://github.com/naglis) +- **sen**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/41a241ca6ffb95b3d9aaf4f42106ca8cba9af1a6) ([#10952](https://github.com/yt-dlp/yt-dlp/issues/10952)) by [seproDev](https://github.com/seproDev) +- **servus**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/300c91274f7ea5b1b0528fc5ee11cf1a61d4079e) ([#10944](https://github.com/yt-dlp/yt-dlp/issues/10944)) by [seproDev](https://github.com/seproDev) +- **snapchatspotlight**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/b37417e4f934fd8909788b493d017777155b0ae5) ([#11030](https://github.com/yt-dlp/yt-dlp/issues/11030)) by [seproDev](https://github.com/seproDev) +- **svtpage**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/5a8a05aebb49693e78e1123015837ed5e961ff76) ([#11010](https://github.com/yt-dlp/yt-dlp/issues/11010)) by [diman8](https://github.com/diman8) +- **tenplay**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/d8d473002b654ab0e7b97ead869f58b4361eeae1) ([#10928](https://github.com/yt-dlp/yt-dlp/issues/10928)) by [aarubui](https://github.com/aarubui) +- **tiktok**: [Fix web formats extraction](https://github.com/yt-dlp/yt-dlp/commit/3ad0b7f422d547204df687b6d0b2d9110fff3990) ([#11074](https://github.com/yt-dlp/yt-dlp/issues/11074)) by [bashonly](https://github.com/bashonly) +- **twitter**: spaces: [Support video spaces](https://github.com/yt-dlp/yt-dlp/commit/bef1d4d6fc9493fda7f75e2289c07c507d10092f) ([#10789](https://github.com/yt-dlp/yt-dlp/issues/10789)) by [bashonly](https://github.com/bashonly) +- **vidflex**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/e978c312d6550a6ae4c9df18001afb1b420cb72f) ([#10002](https://github.com/yt-dlp/yt-dlp/issues/10002)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **vimeo** + - [Always try to extract original format](https://github.com/yt-dlp/yt-dlp/commit/4115c24d157c5b5f63089d75c4e0f51d1f8b4489) ([#10721](https://github.com/yt-dlp/yt-dlp/issues/10721)) by [bashonly](https://github.com/bashonly) (With fixes in [e8e6a98](https://github.com/yt-dlp/yt-dlp/commit/e8e6a982a1b659eed434d225d7922f632bac6568) by [seproDev](https://github.com/seproDev)) + - [Fix HLS audio format sorting](https://github.com/yt-dlp/yt-dlp/commit/a1b4ac2b8ed8e6eaa56044d439f1e0d00c2ba218) ([#11082](https://github.com/yt-dlp/yt-dlp/issues/11082)) by [fireattack](https://github.com/fireattack) +- **watchespn**: [Improve auth support](https://github.com/yt-dlp/yt-dlp/commit/7adff8caf152dcf96d03aff69ed8545c0a63567c) ([#10910](https://github.com/yt-dlp/yt-dlp/issues/10910)) by [ischmidt20](https://github.com/ischmidt20) +- **wistia**: [Support password-protected videos](https://github.com/yt-dlp/yt-dlp/commit/9f5c9a90898c5a1e672922d9cd799716c73cee34) ([#11100](https://github.com/yt-dlp/yt-dlp/issues/11100)) by [bashonly](https://github.com/bashonly) +- **ximalaya**: [Add VIP support](https://github.com/yt-dlp/yt-dlp/commit/3dfd720d098b4d49d69cfc77e6376f22bcd90934) ([#10832](https://github.com/yt-dlp/yt-dlp/issues/10832)) by [seproDev](https://github.com/seproDev), [xingchensong](https://github.com/xingchensong) +- **xinpianchang**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/3aa0156e05662923d130ddbc1c82596e38c01a00) ([#10950](https://github.com/yt-dlp/yt-dlp/issues/10950)) by [seproDev](https://github.com/seproDev) +- **yleareena**: [Support podcasts](https://github.com/yt-dlp/yt-dlp/commit/48d629d461e05b1b19f5e53dc959bb9ebe95da42) ([#11104](https://github.com/yt-dlp/yt-dlp/issues/11104)) by [bashonly](https://github.com/bashonly) +- **youtube** + - [Add `po_token`, `visitor_data`, `data_sync_id` extractor args](https://github.com/yt-dlp/yt-dlp/commit/3a3bd00037e9908e87da4fa9f2ad772aa34dc60e) ([#10648](https://github.com/yt-dlp/yt-dlp/issues/10648)) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [seproDev](https://github.com/seproDev) (With fixes in [fa2be9a](https://github.com/yt-dlp/yt-dlp/commit/fa2be9a7c63babede07480151363e54eee5702bd) by [bashonly](https://github.com/bashonly)) + - [Support excluding `player_client`s in extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/49f3741a820ed142f6866317c2e7d247b130960e) ([#10710](https://github.com/yt-dlp/yt-dlp/issues/10710)) by [bashonly](https://github.com/bashonly) + - clip: [Prioritize `https` formats](https://github.com/yt-dlp/yt-dlp/commit/1d84b780cf33a1d84756825ac23f990a905703df) ([#11102](https://github.com/yt-dlp/yt-dlp/issues/11102)) by [bashonly](https://github.com/bashonly) + - tab: [Fix shorts tab extraction](https://github.com/yt-dlp/yt-dlp/commit/9431777b4c37129a6093080c77ca59960afbb9d7) ([#10938](https://github.com/yt-dlp/yt-dlp/issues/10938)) by [seproDev](https://github.com/seproDev) + +#### Networking changes +- [Fix handler not being added to RequestError](https://github.com/yt-dlp/yt-dlp/commit/d1c4d88b2d912e8da5e76db455562ca63b1af690) ([#10955](https://github.com/yt-dlp/yt-dlp/issues/10955)) by [coletdjnz](https://github.com/coletdjnz) +- [Pin `curl-cffi` version to < 0.7.2](https://github.com/yt-dlp/yt-dlp/commit/5bb1aa04dafce13ba9de707ea53169fab58b5207) ([#11092](https://github.com/yt-dlp/yt-dlp/issues/11092)) by [bashonly](https://github.com/bashonly) +- **Request Handler**: websockets: [Upgrade websockets to 13.0](https://github.com/yt-dlp/yt-dlp/commit/6f9e6537434562d513d0c9b68ced8a61ade94a64) ([#10815](https://github.com/yt-dlp/yt-dlp/issues/10815)) by [coletdjnz](https://github.com/coletdjnz) + +#### Misc. changes +- **build** + - [Bump PyInstaller version pin to `>=6.10.0`](https://github.com/yt-dlp/yt-dlp/commit/fb8b7f226d251e521a89b23c415e249e5b788e5c) ([#10709](https://github.com/yt-dlp/yt-dlp/issues/10709)) by [bashonly](https://github.com/bashonly) + - [Pin `delocate` version for `macos`](https://github.com/yt-dlp/yt-dlp/commit/7e41628ff523b3fe373b0981a5db441358980dab) ([#10901](https://github.com/yt-dlp/yt-dlp/issues/10901)) by [bashonly](https://github.com/bashonly) +- **ci** + - [Add comment sanitization workflow](https://github.com/yt-dlp/yt-dlp/commit/b6200bdcf3a9415ae36859188f9a57e3e461c696) ([#10915](https://github.com/yt-dlp/yt-dlp/issues/10915)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) + - [Add issue tracker anti-spam protection](https://github.com/yt-dlp/yt-dlp/commit/ad9a8115aa29a1a95c961b16fcf129a228d98f50) ([#10861](https://github.com/yt-dlp/yt-dlp/issues/10861)) by [bashonly](https://github.com/bashonly) +- **cleanup**: Miscellaneous: [c6387ab](https://github.com/yt-dlp/yt-dlp/commit/c6387abc1af9842bb0541288a5610abba9b1ab51) by [bashonly](https://github.com/bashonly), [Codenade](https://github.com/Codenade), [coletdjnz](https://github.com/coletdjnz), [grqz](https://github.com/grqz), [Grub4K](https://github.com/Grub4K), [pzhlkj6612](https://github.com/pzhlkj6612), [seproDev](https://github.com/seproDev) + +### 2024.08.06 + +#### Core changes +- **jsinterp**: [Improve `slice` implementation](https://github.com/yt-dlp/yt-dlp/commit/bb8bf1db993f59752d20b73b861bd55e40cf0e31) ([#10664](https://github.com/yt-dlp/yt-dlp/issues/10664)) by [seproDev](https://github.com/seproDev) + +#### Extractor changes +- **discoveryplusitaly**: [Support sport and olympics URLs](https://github.com/yt-dlp/yt-dlp/commit/e7d73bc4531ee3f91a46b15e218dcc1fbeb6226c) ([#10655](https://github.com/yt-dlp/yt-dlp/issues/10655)) by [bashonly](https://github.com/bashonly) +- **gem.cbc.ca**: live: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/fc5eecfa31c9571b6031cc3968aaa0394be55d7a) ([#10565](https://github.com/yt-dlp/yt-dlp/issues/10565)) by [bashonly](https://github.com/bashonly), [scribblemaniac](https://github.com/scribblemaniac) +- **niconico**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/4d9231208332d4c32364b8cd814bff8b20232cae) ([#10677](https://github.com/yt-dlp/yt-dlp/issues/10677)) by [bashonly](https://github.com/bashonly) +- **olympics**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/919540a9644e55deb78cdd6751757ec8fdaf76f4) ([#10625](https://github.com/yt-dlp/yt-dlp/issues/10625)) by [bashonly](https://github.com/bashonly) +- **youku**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/0088c6de23d832b117061a33e984dc452d992e9c) ([#10626](https://github.com/yt-dlp/yt-dlp/issues/10626)) by [hugepower](https://github.com/hugepower) +- **youtube** + - [Change default player clients to `ios,web_creator`](https://github.com/yt-dlp/yt-dlp/commit/406f4c2e47502fffc1b0c210b4ee6487c89a44cb) ([#10674](https://github.com/yt-dlp/yt-dlp/issues/10674)) by [bashonly](https://github.com/bashonly) + - [Fix `n` function name extraction for player `b12cc44b`](https://github.com/yt-dlp/yt-dlp/commit/c86891eb9434b4d7eec426d38c0c625b5e13cb2f) ([#10668](https://github.com/yt-dlp/yt-dlp/issues/10668)) by [seproDev](https://github.com/seproDev) + ### 2024.08.01 #### Core changes diff --git a/README.md b/README.md index dd78012a8..1db4ed2a5 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ [![YT-DLP](https://raw.githubusercontent.com/yt-dlp/yt-dlp/master/.github/banner.svg)](#readme) [![Release version](https://img.shields.io/github/v/release/yt-dlp/yt-dlp?color=brightgreen&label=Download&style=for-the-badge)](#installation "Installation") -[![PyPi](https://img.shields.io/badge/-PyPi-blue.svg?logo=pypi&labelColor=555555&style=for-the-badge)](https://pypi.org/project/yt-dlp "PyPi") +[![PyPI](https://img.shields.io/badge/-PyPI-blue.svg?logo=pypi&labelColor=555555&style=for-the-badge)](https://pypi.org/project/yt-dlp "PyPI") [![Donate](https://img.shields.io/badge/_-Donate-red.svg?logo=githubsponsors&labelColor=555555&style=for-the-badge)](Collaborators.md#collaborators "Donate") [![Matrix](https://img.shields.io/matrix/yt-dlp:matrix.org?color=brightgreen&labelColor=555555&label=&logo=element&style=for-the-badge)](https://matrix.to/#/#yt-dlp:matrix.org "Matrix") [![Discord](https://img.shields.io/discord/807245652072857610?color=blue&labelColor=555555&label=&logo=discord&style=for-the-badge)](https://discord.gg/H5MNcFW63r "Discord") @@ -81,7 +81,7 @@ yt-dlp is a feature-rich command-line audio/video downloader with support for [t [![Windows](https://img.shields.io/badge/-Windows_x64-blue.svg?style=for-the-badge&logo=windows)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe) [![Unix](https://img.shields.io/badge/-Linux/BSD-red.svg?style=for-the-badge&logo=linux)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp) [![MacOS](https://img.shields.io/badge/-MacOS-lightblue.svg?style=for-the-badge&logo=apple)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos) -[![PyPi](https://img.shields.io/badge/-PyPi-blue.svg?logo=pypi&labelColor=555555&style=for-the-badge)](https://pypi.org/project/yt-dlp) +[![PyPI](https://img.shields.io/badge/-PyPI-blue.svg?logo=pypi&labelColor=555555&style=for-the-badge)](https://pypi.org/project/yt-dlp) [![Source Tarball](https://img.shields.io/badge/-Source_tar-green.svg?style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz) [![Other variants](https://img.shields.io/badge/-Other-grey.svg?style=for-the-badge)](#release-files) [![All versions](https://img.shields.io/badge/-All_Versions-lightgrey.svg?style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/releases) @@ -98,15 +98,14 @@ You can install yt-dlp using [the binaries](#release-files), [pip](https://pypi. File|Description :---|:--- [yt-dlp](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)|Platform-independent [zipimport](https://docs.python.org/3/library/zipimport.html) binary. Needs Python (recommended for **Linux/BSD**) -[yt-dlp.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)|Windows (Win7 SP1+) standalone x64 binary (recommended for **Windows**) +[yt-dlp.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)|Windows (Win8+) standalone x64 binary (recommended for **Windows**) [yt-dlp_macos](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos)|Universal MacOS (10.15+) standalone executable (recommended for **MacOS**) #### Alternatives File|Description :---|:--- -[yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows (Win7 SP1+) standalone x86 (32-bit) binary -[yt-dlp_min.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_min.exe)|Windows (Win7 SP1+) standalone x64 binary built with `py2exe`
([Not recommended](#standalone-py2exe-builds-windows)) +[yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows (Win8+) standalone x86 (32-bit) binary [yt-dlp_linux](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux)|Linux standalone x64 binary [yt-dlp_linux_armv7l](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_armv7l)|Linux standalone armv7l (32-bit) binary [yt-dlp_linux_aarch64](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_aarch64)|Linux standalone aarch64 (64-bit) binary @@ -173,11 +172,11 @@ python3 -m pip install -U --pre "yt-dlp[default]" ``` ## DEPENDENCIES -Python versions 3.8+ (CPython and PyPy) are supported. Other versions and implementations may or may not work correctly. +Python versions 3.9+ (CPython) and 3.10+ (PyPy) are supported. Other versions and implementations may or may not work correctly. While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly recommended @@ -200,7 +199,7 @@ While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly The following provide support for impersonating browser requests. This may be required for some sites that employ TLS fingerprinting. -* [**curl_cffi**](https://github.com/yifeikong/curl_cffi) (recommended) - Python binding for [curl-impersonate](https://github.com/lwthiker/curl-impersonate). Provides impersonation targets for Chrome, Edge and Safari. Licensed under [MIT](https://github.com/yifeikong/curl_cffi/blob/main/LICENSE) +* [**curl_cffi**](https://github.com/lexiforest/curl_cffi) (recommended) - Python binding for [curl-impersonate](https://github.com/lexiforest/curl-impersonate). Provides impersonation targets for Chrome, Edge and Safari. Licensed under [MIT](https://github.com/lexiforest/curl_cffi/blob/main/LICENSE) * Can be installed with the `curl-cffi` group, e.g. `pip install "yt-dlp[default,curl-cffi]"` * Currently included in `yt-dlp.exe`, `yt-dlp_linux` and `yt-dlp_macos` builds @@ -254,31 +253,19 @@ On some systems, you may need to use `py` or `python` instead of `python3`. **Important**: Running `pyinstaller` directly **instead of** using `python -m bundle.pyinstaller` is **not** officially supported. This may or may not work correctly. ### Platform-independent Binary (UNIX) -You will need the build tools `python` (3.8+), `zip`, `make` (GNU), `pandoc`\* and `pytest`\*. +You will need the build tools `python` (3.9+), `zip`, `make` (GNU), `pandoc`\* and `pytest`\*. After installing these, simply run `make`. You can also run `make yt-dlp` instead to compile only the binary without updating any of the additional files. (The build tools marked with **\*** are not needed for this) -### Standalone Py2Exe Builds (Windows) - -While we provide the option to build with [py2exe](https://www.py2exe.org), it is recommended to build [using PyInstaller](#standalone-pyinstaller-builds) instead since the py2exe builds **cannot contain `pycryptodomex`/`certifi`/`requests` and need VC++14** on the target computer to run. - -If you wish to build it anyway, install Python (if it is not already installed) and you can run the following commands: - -``` -py devscripts/install_deps.py --include py2exe -py devscripts/make_lazy_extractors.py -py -m bundle.py2exe -``` - ### Related scripts * **`devscripts/install_deps.py`** - Install dependencies for yt-dlp. * **`devscripts/update-version.py`** - Update the version number based on the current date. * **`devscripts/set-variant.py`** - Set the build variant of the executable. * **`devscripts/make_changelog.py`** - Create a markdown changelog using short commit messages and update `CONTRIBUTORS` file. -* **`devscripts/make_lazy_extractors.py`** - Create lazy extractors. Running this before building the binaries (any variant) will improve their startup performance. Set the environment variable `YTDLP_NO_LAZY_EXTRACTORS=1` if you wish to forcefully disable lazy extractor loading. +* **`devscripts/make_lazy_extractors.py`** - Create lazy extractors. Running this before building the binaries (any variant) will improve their startup performance. Set the environment variable `YTDLP_NO_LAZY_EXTRACTORS` to something nonempty to forcefully disable lazy extractor loading. Note: See their `--help` for more info. @@ -348,8 +335,16 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git containing directory ("-" for stdin). Can be used multiple times and inside other configuration files - --flat-playlist Do not extract the videos of a playlist, - only list them + --plugin-dirs PATH Path to an additional directory to search + for plugins. This option can be used + multiple times to add multiple directories. + Note that this currently only works for + extractor plugins; postprocessor plugins can + only be loaded from the default plugin + directories + --flat-playlist Do not extract a playlist's URL result + entries; some entry metadata may be missing + and downloading may be bypassed --no-flat-playlist Fully extract the videos of a playlist (default) --live-from-start Download livestreams from the start. @@ -444,10 +439,10 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git E.g. "--date today-2weeks" downloads only videos uploaded on the same day two weeks ago --datebefore DATE Download only videos uploaded on or before - this date. The date formats accepted is the + this date. The date formats accepted are the same as --date --dateafter DATE Download only videos uploaded on or after - this date. The date formats accepted is the + this date. The date formats accepted are the same as --date --match-filters FILTER Generic video filter. Any "OUTPUT TEMPLATE" field can be compared with a number or a @@ -459,17 +454,17 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git conditions. Use a "\" to escape "&" or quotes if needed. If used multiple times, the filter matches if at least one of the - conditions is met. E.g. --match-filter - !is_live --match-filter "like_count>?100 & + conditions is met. E.g. --match-filters + !is_live --match-filters "like_count>?100 & description~='(?i)\bcats \& dogs\b'" matches only videos that are not live OR those that have a like count more than 100 (or the like field is not available) and also has a description that contains the phrase "cats & - dogs" (caseless). Use "--match-filter -" to + dogs" (caseless). Use "--match-filters -" to interactively ask whether to download each video - --no-match-filters Do not use any --match-filter (default) + --no-match-filters Do not use any --match-filters (default) --break-match-filters FILTER Same as "--match-filters" but stops the download process when a video is rejected --no-break-match-filters Do not use any --break-match-filters (default) @@ -485,12 +480,13 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git --no-download-archive Do not use archive file (default) --max-downloads NUMBER Abort after downloading NUMBER files --break-on-existing Stop the download process when encountering - a file that is in the archive + a file that is in the archive supplied with + the --download-archive option --no-break-on-existing Do not stop the download process when encountering a file that is in the archive (default) --break-per-input Alters --max-downloads, --break-on-existing, - --break-match-filter, and autonumber to + --break-match-filters, and autonumber to reset per input URL --no-break-per-input --break-on-existing and similar options terminates the entire download queue @@ -732,16 +728,16 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git used. This option can be used multiple times --print-to-file [WHEN:]TEMPLATE FILE Append given template to the file. The - values of WHEN and TEMPLATE are same as that - of --print. FILE uses the same syntax as the - output template. This option can be used - multiple times + values of WHEN and TEMPLATE are the same as + that of --print. FILE uses the same syntax + as the output template. This option can be + used multiple times -j, --dump-json Quiet, but print JSON information for each video. Simulate unless --no-simulate is used. See "OUTPUT TEMPLATE" for a description of available keys -J, --dump-single-json Quiet, but print JSON information for each - url or infojson passed. Simulate unless + URL or infojson passed. Simulate unless --no-simulate is used. If the URL refers to a playlist, the whole playlist information is dumped in a single line @@ -816,9 +812,9 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git --no-audio-multistreams Only one audio stream is downloaded for each output file (default) --prefer-free-formats Prefer video formats with free containers - over non-free ones of same quality. Use with - "-S ext" to strictly prefer free containers - irrespective of quality + over non-free ones of the same quality. Use + with "-S ext" to strictly prefer free + containers irrespective of quality --no-prefer-free-formats Don't give any special preference to free containers (default) --check-formats Make sure formats are selected only from @@ -843,15 +839,17 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git (default) (Alias: --no-write-automatic-subs) --list-subs List available subtitles of each video. Simulate unless --no-simulate is used - --sub-format FORMAT Subtitle format; accepts formats preference, - e.g. "srt" or "ass/srt/best" + --sub-format FORMAT Subtitle format; accepts formats preference + separated by "/", e.g. "srt" or "ass/srt/best" --sub-langs LANGS Languages of the subtitles to download (can be regex) or "all" separated by commas, e.g. - --sub-langs "en.*,ja". You can prefix the - language code with a "-" to exclude it from - the requested languages, e.g. --sub-langs - all,-live_chat. Use --list-subs for a list - of available language tags + --sub-langs "en.*,ja" (where "en.*" is a + regex pattern that matches "en" followed by + 0 or more of any character). You can prefix + the language code with a "-" to exclude it + from the requested languages, e.g. --sub- + langs all,-live_chat. Use --list-subs for a + list of available language tags ## Authentication Options: -u, --username USERNAME Login with this account ID @@ -899,9 +897,9 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git necessary (currently supported: avi, flv, gif, mkv, mov, mp4, webm, aac, aiff, alac, flac, m4a, mka, mp3, ogg, opus, vorbis, - wav). If target container does not support - the video/audio codec, remuxing will fail. - You can specify multiple rules; e.g. + wav). If the target container does not + support the video/audio codec, remuxing will + fail. You can specify multiple rules; e.g. "aac>m4a/mov>mp4/mkv" will remux aac to m4a, mov to mp4 and anything else to mkv --recode-video FORMAT Re-encode the video into another format if @@ -969,29 +967,29 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git are the same as that of --use-postprocessor (default: pre_process) --xattrs Write metadata to the video file's xattrs - (using dublin core and xdg standards) + (using Dublin Core and XDG standards) --concat-playlist POLICY Concatenate videos in a playlist. One of "never", "always", or "multi_video" (default; only when the videos form a single - show). All the video files must have same - codecs and number of streams to be - concatable. The "pl_video:" prefix can be + show). All the video files must have the + same codecs and number of streams to be + concatenable. The "pl_video:" prefix can be used with "--paths" and "--output" to set the output filename for the concatenated files. See "OUTPUT TEMPLATE" for details --fixup POLICY Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn (the - default; fix file if we can, warn - otherwise), force (try fixing even if file - already exists) + default; fix the file if we can, warn + otherwise), force (try fixing even if the + file already exists) --ffmpeg-location PATH Location of the ffmpeg binary; either the path to the binary or its containing directory --exec [WHEN:]CMD Execute a command, optionally prefixed with when to execute it, separated by a ":". Supported values of "WHEN" are the same as that of --use-postprocessor (default: - after_move). Same syntax as the output + after_move). The same syntax as the output template can be used to pass any field as arguments to the command. If no fields are passed, %(filepath,_filename|)q is appended @@ -999,12 +997,16 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git be used multiple times --no-exec Remove any previously defined --exec --convert-subs FORMAT Convert the subtitles to another format - (currently supported: ass, lrc, srt, vtt) - (Alias: --convert-subtitles) + (currently supported: ass, lrc, srt, vtt). + Use "--convert-subs none" to disable + conversion (default) (Alias: --convert- + subtitles) --convert-thumbnails FORMAT Convert the thumbnails to another format (currently supported: jpg, png, webp). You can specify multiple rules using similar - syntax as --remux-video + syntax as "--remux-video". Use "--convert- + thumbnails none" to disable conversion + (default) --split-chapters Split video into multiple files based on internal chapters. The "chapter:" prefix can be used with "--paths" and "--output" to set @@ -1025,7 +1027,7 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git --no-force-keyframes-at-cuts Do not force keyframes around the chapters when cutting/splitting (default) --use-postprocessor NAME[:ARGS] - The (case sensitive) name of plugin + The (case-sensitive) name of plugin postprocessors to be enabled, and (optionally) arguments to be passed to it, separated by a colon ":". ARGS are a @@ -1038,8 +1040,8 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git --print/--output), "before_dl" (before each video download), "post_process" (after each video download; default), "after_move" - (after moving video file to its final - locations), "after_video" (after downloading + (after moving the video file to its final + location), "after_video" (after downloading and processing all formats of a video), or "playlist" (at end of playlist). This option can be used multiple times to add different @@ -1057,7 +1059,7 @@ Make chapter entries for, or remove various segments (sponsor, music_offtopic, poi_highlight, chapter, all and default (=all). You can prefix the category with a "-" to exclude it. See [1] - for description of the categories. E.g. + for descriptions of the categories. E.g. --sponsorblock-mark all,-preview [1] https://wiki.sponsor.ajay.app/w/Segment_Categories --sponsorblock-remove CATS SponsorBlock categories to be removed from @@ -1089,7 +1091,7 @@ Make chapter entries for, or remove various segments (sponsor, (Alias: --no-allow-dynamic-mpd) --hls-split-discontinuity Split HLS playlists to different formats at discontinuities such as ad breaks - --no-hls-split-discontinuity Do not split HLS playlists to different + --no-hls-split-discontinuity Do not split HLS playlists into different formats at discontinuities such as ad breaks (default) --extractor-args IE_KEY:ARGS Pass ARGS arguments to the IE_KEY extractor. @@ -1099,7 +1101,7 @@ Make chapter entries for, or remove various segments (sponsor, # CONFIGURATION -You can configure yt-dlp by placing any supported command line option to a configuration file. The configuration is loaded from the following locations: +You can configure yt-dlp by placing any supported command line option in a configuration file. The configuration is loaded from the following locations: 1. **Main Configuration**: * The file given to `--config-location` @@ -1144,7 +1146,7 @@ E.g. with the following configuration file, yt-dlp will always extract the audio -o ~/YouTube/%(title)s.%(ext)s ``` -**Note**: Options in configuration file are just the same options aka switches used in regular command line calls; thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`. They must also be quoted when necessary, as if it were a UNIX shell. +**Note**: Options in a configuration file are just the same options aka switches used in regular command line calls; thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`. They must also be quoted when necessary, as if it were a UNIX shell. You can use `--ignore-config` if you want to disable all configuration files for a particular yt-dlp run. If `--ignore-config` is found inside any configuration file, no further configuration will be loaded. For example, having the option in the portable configuration file prevents loading of home, user, and system configurations. Additionally, (for backward compatibility) if `--ignore-config` is found inside the system configuration file, the user configuration is not loaded. @@ -1178,13 +1180,13 @@ As an alternative to using the `.netrc` file, which has the disadvantage of keep E.g. To use an encrypted `.netrc` file stored as `.authinfo.gpg` ``` -yt-dlp --netrc-cmd 'gpg --decrypt ~/.authinfo.gpg' https://www.youtube.com/watch?v=BaW_jenozKc +yt-dlp --netrc-cmd 'gpg --decrypt ~/.authinfo.gpg' 'https://www.youtube.com/watch?v=BaW_jenozKc' ``` ### Notes about environment variables * Environment variables are normally specified as `${VARIABLE}`/`$VARIABLE` on UNIX and `%VARIABLE%` on Windows; but is always shown as `${VARIABLE}` in this documentation -* yt-dlp also allow using UNIX-style variables on Windows for path-like options; e.g. `--output`, `--config-location` +* yt-dlp also allows using UNIX-style variables on Windows for path-like options; e.g. `--output`, `--config-location` * If unset, `${XDG_CONFIG_HOME}` defaults to `~/.config` and `${XDG_CACHE_HOME}` to `~/.cache` * On Windows, `~` points to `${HOME}` if present; or, `${USERPROFILE}` or `${HOMEDRIVE}${HOMEPATH}` otherwise * On Windows, `${USERPROFILE}` generally points to `C:\Users\` and `${APPDATA}` to `${USERPROFILE}\AppData\Roaming` @@ -1265,7 +1267,7 @@ The available fields are: - `like_count` (numeric): Number of positive ratings of the video - `dislike_count` (numeric): Number of negative ratings of the video - `repost_count` (numeric): Number of reposts of the video - - `average_rating` (numeric): Average rating give by users, the scale used depends on the webpage + - `average_rating` (numeric): Average rating given by users, the scale used depends on the webpage - `comment_count` (numeric): Number of comments on the video (For some extractors, comments are only downloaded at the end, and so this field cannot be used) - `age_limit` (numeric): Age restriction for the video (years) - `live_status` (string): One of "not_live", "is_live", "is_upcoming", "was_live", "post_live" (was live, but VOD is not yet processed) @@ -1292,10 +1294,11 @@ The available fields are: - `playlist_uploader_id` (string): Nickname or id of the playlist uploader - `playlist_channel` (string): Display name of the channel that uploaded the playlist - `playlist_channel_id` (string): Identifier of the channel that uploaded the playlist + - `playlist_webpage_url` (string): URL of the playlist webpage - `webpage_url` (string): A URL to the video webpage which, if given to yt-dlp, should yield the same result again - `webpage_url_basename` (string): The basename of the webpage URL - `webpage_url_domain` (string): The domain of the webpage URL - - `original_url` (string): The URL given by the user (or same as `webpage_url` for playlist entries) + - `original_url` (string): The URL given by the user (or the same as `webpage_url` for playlist entries) - `categories` (list): List of categories the video belongs to - `tags` (list): List of tags assigned to the video - `cast` (list): List of cast members @@ -1372,7 +1375,7 @@ Each aforementioned sequence when referenced in an output template will be repla **Tip**: Look at the `-j` output to identify which fields are available for the particular URL -For numeric sequences you can use [numeric related formatting](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting); e.g. `%(view_count)05d` will result in a string with view count padded with zeros up to 5 characters, like in `00042`. +For numeric sequences, you can use [numeric related formatting](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting); e.g. `%(view_count)05d` will result in a string with view count padded with zeros up to 5 characters, like in `00042`. Output templates can also contain arbitrary hierarchical path, e.g. `-o "%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s"` which will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you. @@ -1414,7 +1417,7 @@ $ yt-dlp -P "C:/MyVideos" -o "%(series)s/%(season_number)s - %(season)s/%(episod # Download video as "C:\MyVideos\uploader\title.ext", subtitles as "C:\MyVideos\subs\uploader\title.ext" # and put all temporary files in "C:\MyVideos\tmp" -$ yt-dlp -P "C:/MyVideos" -P "temp:tmp" -P "subtitle:subs" -o "%(uploader)s/%(title)s.%(ext)s" BaW_jenoz --write-subs +$ yt-dlp -P "C:/MyVideos" -P "temp:tmp" -P "subtitle:subs" -o "%(uploader)s/%(title)s.%(ext)s" BaW_jenozKc --write-subs # Download video as "C:\MyVideos\uploader\title.ext" and subtitles as "C:\MyVideos\uploader\subs\title.ext" $ yt-dlp -P "C:/MyVideos" -o "%(uploader)s/%(title)s.%(ext)s" -o "subtitle:%(uploader)s/subs/%(title)s.%(ext)s" BaW_jenozKc --write-subs @@ -1553,9 +1556,9 @@ The available fields are: All fields, unless specified otherwise, are sorted in descending order. To reverse this, prefix the field with a `+`. E.g. `+res` prefers format with the smallest resolution. Additionally, you can suffix a preferred value for the fields, separated by a `:`. E.g. `res:720` prefers larger videos, but no larger than 720p and the smallest video if there are no videos less than 720p. For `codec` and `ext`, you can provide two preferred values, the first for video and the second for audio. E.g. `+codec:avc:m4a` (equivalent to `+vcodec:avc,+acodec:m4a`) sets the video codec preference to `h264` > `h265` > `vp9` > `vp9.2` > `av01` > `vp8` > `h263` > `theora` and audio codec preference to `mp4a` > `aac` > `vorbis` > `opus` > `mp3` > `ac3` > `dts`. You can also make the sorting prefer the nearest values to the provided by using `~` as the delimiter. E.g. `filesize~1G` prefers the format with filesize closest to 1 GiB. -The fields `hasvid` and `ie_pref` are always given highest priority in sorting, irrespective of the user-defined order. This behavior can be changed by using `--format-sort-force`. Apart from these, the default order used is: `lang,quality,res,fps,hdr:12,vcodec:vp9.2,channels,acodec,size,br,asr,proto,ext,hasaud,source,id`. The extractors may override this default order, but they cannot override the user-provided order. +The fields `hasvid` and `ie_pref` are always given highest priority in sorting, irrespective of the user-defined order. This behavior can be changed by using `--format-sort-force`. Apart from these, the default order used is: `lang,quality,res,fps,hdr:12,vcodec,channels,acodec,size,br,asr,proto,ext,hasaud,source,id`. The extractors may override this default order, but they cannot override the user-provided order. -Note that the default has `vcodec:vp9.2`; i.e. `av1` is not preferred. Similarly, the default for hdr is `hdr:12`; i.e. Dolby Vision is not preferred. These choices are made since DV and AV1 formats are not yet fully compatible with most devices. This may be changed in the future as more devices become capable of smoothly playing back these formats. +Note that the default for hdr is `hdr:12`; i.e. Dolby Vision is not preferred. This choice was made since DV formats are not yet fully compatible with most devices. This may be changed in the future. If your format selector is `worst`, the last item is selected after sorting. This means it will select the format that is worst in all respects. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-f best -S +size,+br,+res,+fps`. @@ -1632,11 +1635,11 @@ $ yt-dlp -S "res:480" # or the worst video (that also has audio) if there is no video under 50 MB $ yt-dlp -f "b[filesize<50M] / w" -# Download largest video (that also has audio) but no bigger than 50 MB, +# Download the largest video (that also has audio) but no bigger than 50 MB, # or the smallest video (that also has audio) if there is no video under 50 MB $ yt-dlp -f "b" -S "filesize:50M" -# Download best video (that also has audio) that is closest in size to 50 MB +# Download the best video (that also has audio) that is closest in size to 50 MB $ yt-dlp -f "b" -S "filesize~50M" @@ -1692,7 +1695,7 @@ The metadata obtained by the extractors can be modified by using `--parse-metada The general syntax of `--parse-metadata FROM:TO` is to give the name of a field or an [output template](#output-template) to extract data from, and the format to interpret it as, separated by a colon `:`. Either a [Python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax) with named capture groups, a single field name, or a similar syntax to the [output template](#output-template) (only `%(field)s` formatting is supported) can be used for `TO`. The option can be used multiple times to parse and modify various fields. -Note that these options preserve their relative order, allowing replacements to be made in parsed fields and viceversa. Also, any field thus created can be used in the [output template](#output-template) and will also affect the media file's metadata added when using `--embed-metadata`. +Note that these options preserve their relative order, allowing replacements to be made in parsed fields and vice versa. Also, any field thus created can be used in the [output template](#output-template) and will also affect the media file's metadata added when using `--embed-metadata`. This option also has a few special uses: @@ -1758,7 +1761,7 @@ $ yt-dlp --replace-in-metadata "title,uploader" "[ _]" "-" # EXTRACTOR ARGUMENTS -Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. E.g. `--extractor-args "youtube:player-client=mediaconnect,web;formats=incomplete" --extractor-args "funimation:version=uncut"` +Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. E.g. `--extractor-args "youtube:player-client=tv,mweb;formats=incomplete" --extractor-args "funimation:version=uncut"` Note: In CLI, `ARG` can use `-` instead of `_`; e.g. `youtube:player-client"` becomes `youtube:player_client"` @@ -1767,7 +1770,7 @@ The following extractors use this feature: #### youtube * `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes * `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively -* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music` and `_creator` (e.g. `ios_creator`); and `mediaconnect`, `mweb`, `android_producer`, `android_testsuite`, `android_vr`, `web_safari`, `web_embedded`, `tv` and `tv_embedded` with no variants. By default, `ios,tv` is used, but `tv_embedded`, `web_creator` and `mediaconnect` are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. Most `android` clients will be given lowest priority since their formats are broken. You can use `all` to use all the clients, and `default` for the default clients. +* `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music` and `_creator` (e.g. `ios_creator`); and `mweb`, `android_vr`, `web_safari`, `web_embedded`, `tv` and `tv_embedded` with no variants. By default, `ios,mweb` is used, or `web_creator,mweb` is used when authenticating with cookies. The `_music` variants are added for `music.youtube.com` URLs. Some clients, such as `web` and `android`, require a `po_token` for their formats to be downloadable. Some clients, such as the `_creator` variants, will only work with authentication. Not all clients support authentication via cookies. You can use `all` to use all the clients, and `default` for the default clients. You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=all,-web` * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details * `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp. * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) @@ -1777,6 +1780,9 @@ The following extractors use this feature: * `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others * `innertube_key`: Innertube API key to use for all API requests. By default, no API key is used * `raise_incomplete_data`: `Incomplete Data Received` raises an error instead of reporting a warning +* `data_sync_id`: Overrides the account Data Sync ID used in Innertube API requests. This may be needed if you are using an account with `youtube:player_skip=webpage,configs` or `youtubetab:skip=webpage` +* `visitor_data`: Overrides the Visitor Data used in Innertube API requests. This should be used with `player_skip=webpage,configs` and without cookies. Note: this may have adverse effects if used improperly. If a session from a browser is wanted, you should pass cookies instead (which contain the Visitor ID) +* `po_token`: Proof of Origin (PO) Token(s) to use for requesting video playback. Comma seperated list of PO Tokens in the format `CLIENT+PO_TOKEN`, e.g. `youtube:po_token=web+XXX,android+YYY` #### youtubetab (YouTube playlists, channels, feeds, etc.) * `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details) @@ -1788,6 +1794,7 @@ The following extractors use this feature: * `key_query`: Passthrough the master m3u8 URL query to its HLS AES-128 decryption key URI if no value is provided, or else apply the query string given as `key_query=VALUE`. Note that this will have no effect if the key URI is provided via the `hls_key` extractor-arg. Does not apply to ffmpeg * `hls_key`: An HLS AES-128 key URI *or* key (as hex), and optionally the IV (as hex), in the form of `(URI|KEY)[,IV]`; e.g. `generic:hls_key=ABCDEF1234567980,0xFEDCBA0987654321`. Passing any of these values will force usage of the native HLS downloader and override the corresponding values found in the m3u8 playlist * `is_live`: Bypass live HLS detection and manually set `live_status` - a value of `false` will set `not_live`, any other value (or no value) will set `is_live` +* `impersonate`: Target(s) to try and impersonate with the initial webpage request; e.g. `generic:impersonate=safari,chrome-110`. Use `generic:impersonate` to impersonate any available target, and use `generic:impersonate=false` to disable impersonation (default) #### funimation * `language`: Audio languages to extract, e.g. `funimation:language=english,japanese` @@ -1853,7 +1860,7 @@ The following extractors use this feature: * `cdn`: One or more CDN IDs to use with the API call for stream URLs, e.g. `gcp_cdn`, `gs_cdn_pc_app`, `gs_cdn_mobile_web`, `gs_cdn_pc_web` #### soundcloud -* `formats`: Formats to request from the API. Requested values should be in the format of `{protocol}_{extension}` (omitting the bitrate), e.g. `hls_opus,http_aac`. The `*` character functions as a wildcard, e.g. `*_mp3`, and can be passed by itself to request all formats. Known protocols include `http`, `hls` and `hls-aes`; known extensions include `aac`, `opus` and `mp3`. Original `download` formats are always extracted. Default is `http_aac,hls_aac,http_opus,hls_opus,http_mp3,hls_mp3` +* `formats`: Formats to request from the API. Requested values should be in the format of `{protocol}_{codec}`, e.g. `hls_opus,http_aac`. The `*` character functions as a wildcard, e.g. `*_mp3`, and can be passed by itself to request all formats. Known protocols include `http`, `hls` and `hls-aes`; known codecs include `aac`, `opus` and `mp3`. Original `download` formats are always extracted. Default is `http_aac,hls_aac,http_opus,hls_opus,http_mp3,hls_mp3` #### orfon (orf:on) * `prefer_segments_playlist`: Prefer a playlist of program segments instead of a single complete video when available. If individual segments are desired, use `--concat-playlist never --extractor-args "orfon:prefer_segments_playlist"` @@ -1861,8 +1868,8 @@ The following extractors use this feature: #### bilibili * `prefer_multi_flv`: Prefer extracting flv formats over mp4 for older videos that still provide legacy formats -#### digitalconcerthall -* `prefer_combined_hls`: Prefer extracting combined/pre-merged video and audio HLS formats. This will exclude 4K/HEVC video and lossless/FLAC audio formats, which are only available as split video/audio HLS formats +#### sonylivseries +* `sort_order`: Episode sort order for series extraction - one of `asc` (ascending, oldest first) or `desc` (descending, newest first). Default is `asc` **Note**: These options may be changed/removed in the future without concern for backward compatibility @@ -1890,6 +1897,7 @@ In other words, the file structure on the disk looks something like: myplugin.py yt-dlp looks for these `yt_dlp_plugins` namespace folders in many locations (see below) and loads in plugins from **all** of them. +Set the environment variable `YTDLP_NO_PLUGINS` to something nonempty to disable loading plugins entirely. See the [wiki for some known plugins](https://github.com/yt-dlp/yt-dlp/wiki/Plugins) @@ -1917,7 +1925,7 @@ Plugins can be installed using various methods and locations. * Plugin packages can be installed and managed using `pip`. See [yt-dlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) for an example. * Note: plugin files between plugin packages installed with pip must have unique filenames. * Any path in `PYTHONPATH` is searched in for the `yt_dlp_plugins` namespace folder. - * Note: This does not apply for Pyinstaller/py2exe builds. + * Note: This does not apply for Pyinstaller builds. `.zip`, `.egg` and `.whl` archives containing a `yt_dlp_plugins` namespace folder in their root are also supported as plugin packages. @@ -2151,9 +2159,9 @@ with yt_dlp.YoutubeDL(ydl_opts) as ydl: * **YouTube improvements**: * Supports Clips, Stories (`ytstories:`), Search (including filters)**\***, YouTube Music Search, Channel-specific search, Search prefixes (`ytsearch:`, `ytsearchdate:`)**\***, Mixes, and Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`, `:ytnotif`) * Fix for [n-sig based throttling](https://github.com/ytdl-org/youtube-dl/issues/29326) **\*** - * Supports some (but not all) age-gated content without cookies * Download livestreams from the start using `--live-from-start` (*experimental*) * Channel URLs download all uploads of the channel, including shorts and live + * Support for [logging in with OAuth](https://github.com/yt-dlp/yt-dlp/wiki/Extractors#logging-in-with-oauth) * **Cookies from browser**: Cookies can be automatically extracted from all major web browsers using `--cookies-from-browser BROWSER[+KEYRING][:PROFILE][::CONTAINER]` @@ -2177,9 +2185,9 @@ with yt_dlp.YoutubeDL(ydl_opts) as ydl: * **Output template improvements**: Output templates can now have date-time formatting, numeric offsets, object traversal etc. See [output template](#output-template) for details. Even more advanced operations can also be done with the help of `--parse-metadata` and `--replace-in-metadata` -* **Other new options**: Many new options have been added such as `--alias`, `--print`, `--concat-playlist`, `--wait-for-video`, `--retry-sleep`, `--sleep-requests`, `--convert-thumbnails`, `--force-download-archive`, `--force-overwrites`, `--break-match-filter` etc +* **Other new options**: Many new options have been added such as `--alias`, `--print`, `--concat-playlist`, `--wait-for-video`, `--retry-sleep`, `--sleep-requests`, `--convert-thumbnails`, `--force-download-archive`, `--force-overwrites`, `--break-match-filters` etc -* **Improvements**: Regex and other operators in `--format`/`--match-filter`, multiple `--postprocessor-args` and `--downloader-args`, faster archive checking, more [format selection options](#format-selection), merge multi-video/audio, multiple `--config-locations`, `--exec` at different stages, etc +* **Improvements**: Regex and other operators in `--format`/`--match-filters`, multiple `--postprocessor-args` and `--downloader-args`, faster archive checking, more [format selection options](#format-selection), merge multi-video/audio, multiple `--config-locations`, `--exec` at different stages, etc * **Plugins**: Extractors and PostProcessors can be loaded from an external file. See [plugins](#plugins) for details @@ -2195,12 +2203,12 @@ Features marked with a **\*** have been back-ported to youtube-dl Some of yt-dlp's default options are different from that of youtube-dl and youtube-dlc: -* yt-dlp supports only [Python 3.8+](## "Windows 7"), and *may* remove support for more versions as they [become EOL](https://devguide.python.org/versions/#python-release-cycle); while [youtube-dl still supports Python 2.6+ and 3.2+](https://github.com/ytdl-org/youtube-dl/issues/30568#issue-1118238743) +* yt-dlp supports only [Python 3.9+](## "Windows 8"), and will remove support for more versions as they [become EOL](https://devguide.python.org/versions/#python-release-cycle); while [youtube-dl still supports Python 2.6+ and 3.2+](https://github.com/ytdl-org/youtube-dl/issues/30568#issue-1118238743) * The options `--auto-number` (`-A`), `--title` (`-t`) and `--literal` (`-l`), no longer work. See [removed options](#Removed) for details * `avconv` is not supported as an alternative to `ffmpeg` * yt-dlp stores config files in slightly different locations to youtube-dl. See [CONFIGURATION](#configuration) for a list of correct locations * The default [output template](#output-template) is `%(title)s [%(id)s].%(ext)s`. There is no real reason for this change. This was changed before yt-dlp was ever made public and now there are no plans to change it back to `%(title)s-%(id)s.%(ext)s`. Instead, you may use `--compat-options filename` -* The default [format sorting](#sorting-formats) is different from youtube-dl and prefers higher resolution and better codecs rather than higher bitrates. You can use the `--format-sort` option to change this to any order you prefer, or use `--compat-options format-sort` to use youtube-dl's sorting order +* The default [format sorting](#sorting-formats) is different from youtube-dl and prefers higher resolution and better codecs rather than higher bitrates. You can use the `--format-sort` option to change this to any order you prefer, or use `--compat-options format-sort` to use youtube-dl's sorting order. Older versions of yt-dlp preferred VP9 due to its broader compatibility; you can use `--compat-options prefer-vp9-sort` to revert to that format sorting preference. These two compat options cannot be used together * The default format selector is `bv*+ba/b`. This means that if a combined video + audio format that is better than the best video-only format is found, the former will be preferred. Use `-f bv+ba/b` or `--compat-options format-spec` to revert this * Unlike youtube-dlc, yt-dlp does not allow merging multiple audio/video streams into one file by default (since this conflicts with the use of `-f bv*+ba`). If needed, this feature must be enabled using `--audio-multistreams` and `--video-multistreams`. You can also use `--compat-options multistreams` to enable both * `--no-abort-on-error` is enabled by default. Use `--abort-on-error` or `--compat-options abort-on-error` to abort on errors instead @@ -2220,7 +2228,7 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu * `certifi` will be used for SSL root certificates, if installed. If you want to use system certificates (e.g. self-signed), use `--compat-options no-certifi` * yt-dlp's sanitization of invalid characters in filenames is different/smarter than in youtube-dl. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior * ~~yt-dlp tries to parse the external downloader outputs into the standard progress output if possible (Currently implemented: [aria2c](https://github.com/yt-dlp/yt-dlp/issues/5931)). You can use `--compat-options no-external-downloader-progress` to get the downloader output as-is~~ -* yt-dlp versions between 2021.09.01 and 2023.01.02 applies `--match-filter` to nested playlists. This was an unintentional side-effect of [8f18ac](https://github.com/yt-dlp/yt-dlp/commit/8f18aca8717bb0dd49054555af8d386e5eda3a88) and is fixed in [d7b460](https://github.com/yt-dlp/yt-dlp/commit/d7b460d0e5fc710950582baed2e3fc616ed98a80). Use `--compat-options playlist-match-filter` to revert this +* yt-dlp versions between 2021.09.01 and 2023.01.02 applies `--match-filters` to nested playlists. This was an unintentional side-effect of [8f18ac](https://github.com/yt-dlp/yt-dlp/commit/8f18aca8717bb0dd49054555af8d386e5eda3a88) and is fixed in [d7b460](https://github.com/yt-dlp/yt-dlp/commit/d7b460d0e5fc710950582baed2e3fc616ed98a80). Use `--compat-options playlist-match-filter` to revert this * yt-dlp versions between 2021.11.10 and 2023.06.21 estimated `filesize_approx` values for fragmented/manifest formats. This was added for convenience in [f2fe69](https://github.com/yt-dlp/yt-dlp/commit/f2fe69c7b0d208bdb1f6292b4ae92bc1e1a7444a), but was reverted in [0dff8e](https://github.com/yt-dlp/yt-dlp/commit/0dff8e4d1e6e9fb938f4256ea9af7d81f42fd54f) due to the potentially extreme inaccuracy of the estimated values. Use `--compat-options manifest-filesize-approx` to keep extracting the estimated values * yt-dlp uses modern http client backends such as `requests`. Use `--compat-options prefer-legacy-http-handler` to prefer the legacy http handler (`urllib`) to be used for standard http requests. * The sub-modules `swfinterp`, `casefold` are removed. @@ -2229,11 +2237,11 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu For ease of use, a few more compat options are available: * `--compat-options all`: Use all compat options (**Do NOT use this!**) -* `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams,-playlist-match-filter,-manifest-filesize-approx,-allow-unsafe-ext` -* `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect,-playlist-match-filter,-manifest-filesize-approx,-allow-unsafe-ext` +* `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams,-playlist-match-filter,-manifest-filesize-approx,-allow-unsafe-ext,-prefer-vp9-sort` +* `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect,-playlist-match-filter,-manifest-filesize-approx,-allow-unsafe-ext,-prefer-vp9-sort` * `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date` * `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler,manifest-filesize-approx` -* `--compat-options 2023`: Currently does nothing. Use this to enable all future compat options +* `--compat-options 2023`: Same as `--compat-options prefer-vp9-sort`. Use this to enable all future compat options The following compat options restore vulnerable behavior from before security patches: @@ -2266,13 +2274,13 @@ While these options are redundant, they are still expected to be used due to the --get-thumbnail --print thumbnail -e, --get-title --print title -g, --get-url --print urls - --match-title REGEX --match-filter "title ~= (?i)REGEX" - --reject-title REGEX --match-filter "title !~= (?i)REGEX" - --min-views COUNT --match-filter "view_count >=? COUNT" - --max-views COUNT --match-filter "view_count <=? COUNT" - --break-on-reject Use --break-match-filter - --user-agent UA --add-header "User-Agent:UA" - --referer URL --add-header "Referer:URL" + --match-title REGEX --match-filters "title ~= (?i)REGEX" + --reject-title REGEX --match-filters "title !~= (?i)REGEX" + --min-views COUNT --match-filters "view_count >=? COUNT" + --max-views COUNT --match-filters "view_count <=? COUNT" + --break-on-reject Use --break-match-filters + --user-agent UA --add-headers "User-Agent:UA" + --referer URL --add-headers "Referer:URL" --playlist-start NUMBER -I NUMBER: --playlist-end NUMBER -I :NUMBER --playlist-reverse -I ::-1 diff --git a/bundle/py2exe.py b/bundle/py2exe.py deleted file mode 100755 index 5b7f4883b..000000000 --- a/bundle/py2exe.py +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env python3 - -# Allow execution from anywhere -import os -import sys - -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -import warnings - -from py2exe import freeze - -from devscripts.utils import read_version - -VERSION = read_version() - - -def main(): - warnings.warn( - 'py2exe builds do not support pycryptodomex and needs VC++14 to run. ' - 'It is recommended to run "pyinst.py" to build using pyinstaller instead') - - freeze( - console=[{ - 'script': './yt_dlp/__main__.py', - 'dest_base': 'yt-dlp', - 'icon_resources': [(1, 'devscripts/logo.ico')], - }], - version_info={ - 'version': VERSION, - 'description': 'A feature-rich command-line audio/video downloader', - 'comments': 'Official repository: ', - 'product_name': 'yt-dlp', - 'product_version': VERSION, - }, - options={ - 'bundle_files': 0, - 'compressed': 1, - 'optimize': 2, - 'dist_dir': './dist', - 'excludes': [ - # py2exe cannot import Crypto - 'Crypto', - 'Cryptodome', - # requests >=2.32.0 breaks py2exe builds due to certifi dependency - 'requests', - 'urllib3', - ], - 'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'], - # Modules that are only imported dynamically must be added here - 'includes': ['yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated', - 'yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated'], - }, - zipfile=None, - ) - - -if __name__ == '__main__': - main() diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json index 5189de2d7..079e2f729 100644 --- a/devscripts/changelog_override.json +++ b/devscripts/changelog_override.json @@ -185,5 +185,59 @@ "action": "add", "when": "6075a029dba70a89675ae1250e7cdfd91f0eba41", "short": "[priority] Security: [[ie/douyutv] Do not use dangerous javascript source/URL](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-3v33-3wmw-3785)\n - A dependency on potentially malicious third-party JavaScript code has been removed from the Douyu extractors" + }, + { + "action": "add", + "when": "fb8b7f226d251e521a89b23c415e249e5b788e5c", + "short": "[priority] **The minimum *recommended* Python version has been raised to 3.9**\nSince Python 3.8 will reach end-of-life in October 2024, support for it will be dropped soon. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10086)" + }, + { + "action": "change", + "when": "b31b81d85f00601710d4fac590c3e4efb4133283", + "short": "[ci] Rerun failed tests (#11143)", + "authors": ["Grub4K"] + }, + { + "action": "add", + "when": "a886cf3e900f4a2ec00af705f883539269545609", + "short": "[priority] **py2exe is no longer supported**\nThis release's `yt-dlp_min.exe` will be the last, and it's actually a PyInstaller-bundled executable so that yt-dlp users updating their py2exe build with `-U` will be automatically migrated. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10087)" + }, + { + "action": "add", + "when": "a886cf3e900f4a2ec00af705f883539269545609", + "short": "[priority] **Following this release, yt-dlp's Python dependencies *must* be installed using the `default` group**\nIf you're installing yt-dlp with pip/pipx or requiring yt-dlp in your own Python project, you'll need to specify `yt-dlp[default]` if you want to also install yt-dlp's optional dependencies (which were previously included by default). [Read more](https://github.com/yt-dlp/yt-dlp/pull/11255)" + }, + { + "action": "add", + "when": "87884f15580910e4e0fe0e1db73508debc657471", + "short": "[priority] **Beginning with this release, yt-dlp's Python dependencies *must* be installed using the `default` group**\nIf you're installing yt-dlp with pip/pipx or requiring yt-dlp in your own Python project, you'll need to specify `yt-dlp[default]` if you want to also install yt-dlp's optional dependencies (which were previously included by default). [Read more](https://github.com/yt-dlp/yt-dlp/pull/11255)" + }, + { + "action": "add", + "when": "d784464399b600ba9516bbcec6286f11d68974dd", + "short": "[priority] **The minimum *required* Python version has been raised to 3.9**\nPython 3.8 reached its end-of-life on 2024.10.07, and yt-dlp has now removed support for it. As an unfortunate side effect, the official `yt-dlp.exe` and `yt-dlp_x86.exe` binaries are no longer supported on Windows 7. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10086)" + }, + { + "action": "change", + "when": "914af9a0cf51c9a3f74aa88d952bee8334c67511", + "short": "Expand paths in `--plugin-dirs` (#11334)", + "authors": ["bashonly"] + }, + { + "action": "change", + "when": "c29f5a7fae93a08f3cfbb6127b2faa75145b06a0", + "short": "[ie/generic] Do not impersonate by default (#11336)", + "authors": ["bashonly"] + }, + { + "action": "change", + "when": "57212a5f97ce367590aaa5c3e9a135eead8f81f7", + "short": "[ie/vimeo] Fix API retries (#11351)", + "authors": ["bashonly"] + }, + { + "action": "add", + "when": "52c0ffe40ad6e8404d93296f575007b05b04c686", + "short": "[priority] **Login with OAuth is no longer supported for YouTube**\nDue to a change made by the site, yt-dlp is no longer able to support OAuth login for YouTube. [Read more](https://github.com/yt-dlp/yt-dlp/issues/11462#issuecomment-2471703090)" } ] diff --git a/devscripts/generate_aes_testdata.py b/devscripts/generate_aes_testdata.py index 7f3c88bcf..73cf803b8 100644 --- a/devscripts/generate_aes_testdata.py +++ b/devscripts/generate_aes_testdata.py @@ -11,13 +11,12 @@ import codecs import subprocess from yt_dlp.aes import aes_encrypt, key_expansion -from yt_dlp.utils import intlist_to_bytes secret_msg = b'Secret message goes here' def hex_str(int_list): - return codecs.encode(intlist_to_bytes(int_list), 'hex') + return codecs.encode(bytes(int_list), 'hex') def openssl_encode(algo, key, iv): diff --git a/devscripts/make_changelog.py b/devscripts/make_changelog.py index 00634fb91..7c876101b 100644 --- a/devscripts/make_changelog.py +++ b/devscripts/make_changelog.py @@ -71,14 +71,13 @@ class CommitGroup(enum.Enum): def get(cls, value: str) -> tuple[CommitGroup | None, str | None]: group, _, subgroup = (group.strip().lower() for group in value.partition('/')) - result = cls.group_lookup().get(group) - if not result: - if subgroup: - return None, value - subgroup = group - result = cls.subgroup_lookup().get(subgroup) + if result := cls.group_lookup().get(group): + return result, subgroup or None - return result, subgroup or None + if subgroup: + return None, value + + return cls.subgroup_lookup().get(group), group or None @dataclass @@ -136,8 +135,7 @@ class Changelog: first = False yield '\n

Changelog

\n' - group = groups[item] - if group: + if group := groups[item]: yield self.format_module(item.value, group) if self._collapsible: @@ -253,7 +251,7 @@ class CommitRange: ''', re.VERBOSE | re.DOTALL) EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE) REVERT_RE = re.compile(r'(?:\[[^\]]+\]\s+)?(?i:Revert)\s+([\da-f]{40})') - FIXES_RE = re.compile(r'(?i:Fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Revert|Improve)\s+([\da-f]{40})') + FIXES_RE = re.compile(r'(?i:(?:bug\s*)?fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Improve)\s+([\da-f]{40})') UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)') def __init__(self, start, end, default_author=None): @@ -287,11 +285,16 @@ class CommitRange: short = next(lines) skip = short.startswith('Release ') or short == '[version] update' + fix_commitish = None + if match := self.FIXES_RE.search(short): + fix_commitish = match.group(1) + authors = [default_author] if default_author else [] for line in iter(lambda: next(lines), self.COMMIT_SEPARATOR): - match = self.AUTHOR_INDICATOR_RE.match(line) - if match: + if match := self.AUTHOR_INDICATOR_RE.match(line): authors = sorted(map(str.strip, line[match.end():].split(',')), key=str.casefold) + if not fix_commitish and (match := self.FIXES_RE.fullmatch(line)): + fix_commitish = match.group(1) commit = Commit(commit_hash, short, authors) if skip and (self._start or not i): @@ -301,21 +304,17 @@ class CommitRange: logger.debug(f'Reached Release commit, breaking: {commit}') break - revert_match = self.REVERT_RE.fullmatch(commit.short) - if revert_match: - reverts[revert_match.group(1)] = commit + if match := self.REVERT_RE.fullmatch(commit.short): + reverts[match.group(1)] = commit continue - fix_match = self.FIXES_RE.search(commit.short) - if fix_match: - commitish = fix_match.group(1) - fixes[commitish].append(commit) + if fix_commitish: + fixes[fix_commitish].append(commit) commits[commit.hash] = commit for commitish, revert_commit in reverts.items(): - reverted = commits.pop(commitish, None) - if reverted: + if reverted := commits.pop(commitish, None): logger.debug(f'{commitish} fully reverted {reverted}') else: commits[revert_commit.hash] = revert_commit @@ -461,8 +460,7 @@ def create_changelog(args): logger.info(f'Loaded {len(commits)} commits') - new_contributors = get_new_contributors(args.contributors_path, commits) - if new_contributors: + if new_contributors := get_new_contributors(args.contributors_path, commits): if args.contributors: write_file(args.contributors_path, '\n'.join(new_contributors) + '\n', mode='a') logger.info(f'New contributors: {", ".join(new_contributors)}') diff --git a/devscripts/make_issue_template.py b/devscripts/make_issue_template.py index a5d59f3c0..2a418ddbf 100644 --- a/devscripts/make_issue_template.py +++ b/devscripts/make_issue_template.py @@ -32,20 +32,29 @@ VERBOSE_TMPL = ''' placeholder: | [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 - [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) - [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 - [debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 - [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 + [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp-nightly-builds [1a176d874] (win_exe) + [debug] Python 3.10.11 (CPython AMD64 64bit) - Windows-10-10.0.20348-SP0 (OpenSSL 1.1.1t 7 Feb 2023) + [debug] exe versions: ffmpeg 7.0.2 (setts), ffprobe 7.0.2 + [debug] Optional libraries: Cryptodome-3.21.0, brotli-1.1.0, certifi-2024.08.30, curl_cffi-0.5.10, mutagen-1.47.0, requests-2.32.3, sqlite3-3.40.1, urllib3-2.2.3, websockets-13.1 [debug] Proxy map: {} - [debug] Request Handlers: urllib, requests - [debug] Loaded 1893 extractors - [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest + [debug] Request Handlers: urllib, requests, websockets, curl_cffi + [debug] Loaded 1838 extractors + [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest + Latest version: nightly@... from yt-dlp/yt-dlp-nightly-builds yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc render: shell validations: required: true + - type: markdown + attributes: + value: | + > [!CAUTION] + > ### GitHub is experiencing a high volume of malicious spam comments. + > ### If you receive any replies asking you download a file, do NOT follow the download links! + > + > Note that this issue may be temporarily locked as an anti-spam measure after it is opened. '''.strip() NO_SKIP = ''' diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py index d74ea202f..d288d8429 100644 --- a/devscripts/make_lazy_extractors.py +++ b/devscripts/make_lazy_extractors.py @@ -2,7 +2,6 @@ # Allow direct execution import os -import shutil import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) @@ -34,18 +33,14 @@ MODULE_TEMPLATE = read_file('devscripts/lazy_load_template.py') def main(): + os.environ['YTDLP_NO_PLUGINS'] = 'true' + os.environ['YTDLP_NO_LAZY_EXTRACTORS'] = 'true' + lazy_extractors_filename = get_filename_args(default_outfile='yt_dlp/extractor/lazy_extractors.py') - if os.path.exists(lazy_extractors_filename): - os.remove(lazy_extractors_filename) - _ALL_CLASSES = get_all_ies() # Must be before import - - import yt_dlp.plugins + from yt_dlp.extractor.extractors import _ALL_CLASSES from yt_dlp.extractor.common import InfoExtractor, SearchInfoExtractor - # Filter out plugins - _ALL_CLASSES = [cls for cls in _ALL_CLASSES if not cls.__module__.startswith(f'{yt_dlp.plugins.PACKAGE_NAME}.')] - DummyInfoExtractor = type('InfoExtractor', (InfoExtractor,), {'IE_NAME': NO_ATTR}) module_src = '\n'.join(( MODULE_TEMPLATE, @@ -58,20 +53,6 @@ def main(): write_file(lazy_extractors_filename, f'{module_src}\n') -def get_all_ies(): - PLUGINS_DIRNAME = 'ytdlp_plugins' - BLOCKED_DIRNAME = f'{PLUGINS_DIRNAME}_blocked' - if os.path.exists(PLUGINS_DIRNAME): - # os.rename cannot be used, e.g. in Docker. See https://github.com/yt-dlp/yt-dlp/pull/4958 - shutil.move(PLUGINS_DIRNAME, BLOCKED_DIRNAME) - try: - from yt_dlp.extractor.extractors import _ALL_CLASSES - finally: - if os.path.exists(BLOCKED_DIRNAME): - shutil.move(BLOCKED_DIRNAME, PLUGINS_DIRNAME) - return _ALL_CLASSES - - def extra_ie_code(ie, base=None): for var in STATIC_CLASS_PROPERTIES: val = getattr(ie, var) diff --git a/devscripts/run_tests.py b/devscripts/run_tests.py index c605aa62c..eb614fe59 100755 --- a/devscripts/run_tests.py +++ b/devscripts/run_tests.py @@ -16,7 +16,7 @@ fix_test_name = functools.partial(re.compile(r'IE(_all|_\d+)?$').sub, r'\1') def parse_args(): parser = argparse.ArgumentParser(description='Run selected yt-dlp tests') parser.add_argument( - 'test', help='a extractor tests, or one of "core" or "download"', nargs='*') + 'test', help='an extractor test, test path, or one of "core" or "download"', nargs='*') parser.add_argument( '-k', help='run a test matching EXPRESSION. Same as "pytest -k"', metavar='EXPRESSION') parser.add_argument( @@ -27,7 +27,6 @@ def parse_args(): def run_tests(*tests, pattern=None, ci=False): run_core = 'core' in tests or (not pattern and not tests) run_download = 'download' in tests - tests = list(map(fix_test_name, tests)) pytest_args = args.pytest_args or os.getenv('HATCH_TEST_ARGS', '') arguments = ['pytest', '-Werror', '--tb=short', *shlex.split(pytest_args)] @@ -41,7 +40,9 @@ def run_tests(*tests, pattern=None, ci=False): arguments.extend(['-m', 'download']) else: arguments.extend( - f'test/test_download.py::TestDownload::test_{test}' for test in tests) + test if '/' in test + else f'test/test_download.py::TestDownload::test_{fix_test_name(test)}' + for test in tests) print(f'Running {arguments}', flush=True) try: diff --git a/pyproject.toml b/pyproject.toml index d5480e1c6..96e2d669a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ maintainers = [ ] description = "A feature-rich command-line audio/video downloader" readme = "README.md" -requires-python = ">=3.8" +requires-python = ">=3.9" keywords = [ "youtube-dl", "video-downloader", @@ -29,11 +29,11 @@ classifiers = [ "Environment :: Console", "Programming Language :: Python", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Programming Language :: Python :: Implementation", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", @@ -41,7 +41,10 @@ classifiers = [ "Operating System :: OS Independent", ] dynamic = ["version"] -dependencies = [ +dependencies = [] + +[project.optional-dependencies] +default = [ "brotli; implementation_name=='cpython'", "brotlicffi; implementation_name!='cpython'", "certifi", @@ -49,14 +52,11 @@ dependencies = [ "pycryptodomex", "requests>=2.32.2,<3", "urllib3>=1.26.17,<3", - "websockets>=12.0", + "websockets>=13.0", ] - -[project.optional-dependencies] -default = [] curl-cffi = [ "curl-cffi==0.5.10; os_name=='nt' and implementation_name=='cpython'", - "curl-cffi>=0.5.10,!=0.6.*,<0.8; os_name!='nt' and implementation_name=='cpython'", + "curl-cffi>=0.5.10,!=0.6.*,<0.7.2; os_name!='nt' and implementation_name=='cpython'", ] secretstorage = [ "cffi", @@ -76,16 +76,14 @@ dev = [ ] static-analysis = [ "autopep8~=2.0", - "ruff~=0.5.0", + "ruff~=0.8.0", ] test = [ "pytest~=8.1", + "pytest-rerunfailures~=14.0", ] pyinstaller = [ - "pyinstaller>=6.7.0", # for compat with setuptools>=70 -] -py2exe = [ - "py2exe>=0.12", + "pyinstaller>=6.11.1", # Windows temp cleanup fixed in 6.11.1 ] [project.urls] @@ -162,7 +160,6 @@ lint-fix = "ruff check --fix {args:.}" features = ["test"] dependencies = [ "pytest-randomly~=3.15", - "pytest-rerunfailures~=14.0", "pytest-xdist[psutil]~=3.5", ] @@ -172,13 +169,11 @@ run-cov = "echo Code coverage not implemented && exit 1" [[tool.hatch.envs.hatch-test.matrix]] python = [ - "3.8", "3.9", "3.10", "3.11", "3.12", - "pypy3.8", - "pypy3.9", + "3.13", "pypy3.10", ] @@ -191,6 +186,7 @@ ignore = [ "E501", # line-too-long "E731", # lambda-assignment "E741", # ambiguous-variable-name + "UP031", # printf-string-formatting "UP036", # outdated-version-block "B006", # mutable-argument-default "B008", # function-call-in-default-argument @@ -263,9 +259,6 @@ select = [ "A002", # builtin-argument-shadowing "C408", # unnecessary-collection-call ] -"yt_dlp/jsinterp.py" = [ - "UP031", # printf-string-formatting -] [tool.ruff.lint.isort] known-first-party = [ @@ -318,6 +311,16 @@ banned-from = [ "yt_dlp.compat.compat_urllib_parse_urlparse".msg = "Use `urllib.parse.urlparse` instead." "yt_dlp.compat.compat_shlex_quote".msg = "Use `yt_dlp.utils.shell_quote` instead." "yt_dlp.utils.error_to_compat_str".msg = "Use `str` instead." +"yt_dlp.utils.bytes_to_intlist".msg = "Use `list` instead." +"yt_dlp.utils.intlist_to_bytes".msg = "Use `bytes` instead." +"yt_dlp.utils.decodeArgument".msg = "Do not use" +"yt_dlp.utils.decodeFilename".msg = "Do not use" +"yt_dlp.utils.encodeFilename".msg = "Do not use" +"yt_dlp.compat.compat_os_name".msg = "Use `os.name` instead." +"yt_dlp.compat.compat_realpath".msg = "Use `os.path.realpath` instead." +"yt_dlp.compat.functools".msg = "Use `functools` instead." +"yt_dlp.utils.decodeOption".msg = "Do not use" +"yt_dlp.utils.compiled_regex_type".msg = "Use `re.Pattern` instead." [tool.autopep8] max_line_length = 120 diff --git a/setup.cfg b/setup.cfg index 340cc3b4d..20d40cd30 100644 --- a/setup.cfg +++ b/setup.cfg @@ -16,7 +16,7 @@ remove-unused-variables = true [tox:tox] skipsdist = true -envlist = py{38,39,310,311,312},pypy{38,39,310} +envlist = py{39,310,311,312,313},pypy310 skip_missing_interpreters = true [testenv] # tox @@ -29,7 +29,7 @@ setenv = [isort] -py_version = 38 +py_version = 39 multi_line_output = VERTICAL_HANGING_INDENT line_length = 80 reverse_relative = true diff --git a/supportedsites.md b/supportedsites.md index e3bbe03ec..916735e08 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -45,10 +45,6 @@ - **aenetworks:collection** - **aenetworks:show** - **AeonCo** - - **afreecatv**: [*afreecatv*](## "netrc machine") afreecatv.com - - **afreecatv:catchstory**: [*afreecatv*](## "netrc machine") afreecatv.com catch story - - **afreecatv:live**: [*afreecatv*](## "netrc machine") afreecatv.com livestreams - - **afreecatv:user** - **AirTV** - **AitubeKZVideo** - **AliExpressLive** @@ -133,6 +129,8 @@ - **Bandcamp:album** - **Bandcamp:user** - **Bandcamp:weekly** + - **Bandlab** + - **BandlabPlaylist** - **BannedVideo** - **bbc**: [*bbc*](## "netrc machine") BBC - **bbc.co.uk**: [*bbc*](## "netrc machine") BBC iPlayer @@ -143,6 +141,7 @@ - **BBVTV**: [*bbvtv*](## "netrc machine") - **BBVTVLive**: [*bbvtv*](## "netrc machine") - **BBVTVRecordings**: [*bbvtv*](## "netrc machine") + - **BeaconTv** - **BeatBumpPlaylist** - **BeatBumpVideo** - **Beatport** @@ -193,6 +192,7 @@ - **blerp** - **blogger.com** - **Bloomberg** + - **Bluesky** - **BokeCC** - **BongaCams** - **Boosty** @@ -250,9 +250,10 @@ - **cbsnews:livevideo**: CBS News Live Videos - **cbssports**: (**Currently broken**) - **cbssports:embed**: (**Currently broken**) - - **CCMA** + - **CCMA**: 3Cat, TV3 and Catalunya Ràdio - **CCTV**: 央视网 - **CDA**: [*cdapl*](## "netrc machine") + - **CDAFolder** - **Cellebrite** - **CeskaTelevize** - **CGTN** @@ -282,8 +283,6 @@ - **cmt.com**: (**Currently broken**) - **CNBCVideo** - **CNN** - - **CNNArticle** - - **CNNBlogs** - **CNNIndonesia** - **ComedyCentral** - **ComedyCentralTV** @@ -487,6 +486,7 @@ - **Gab** - **GabTV** - **Gaia**: [*gaia*](## "netrc machine") + - **GameDevTVDashboard**: [*gamedevtv*](## "netrc machine") - **GameJolt** - **GameJoltCommunity** - **GameJoltGame** @@ -505,6 +505,7 @@ - **gem.cbc.ca:playlist** - **Genius** - **GeniusLyrics** + - **Germanupa**: germanupa.de - **GetCourseRu**: [*getcourseru*](## "netrc machine") - **GetCourseRuPlayer** - **Gettr** @@ -580,6 +581,7 @@ - **HungamaAlbumPlaylist** - **HungamaSong** - **huya:live**: huya.com + - **huya:video**: 虎牙视频 - **Hypem** - **Hytale** - **Icareus** @@ -652,6 +654,8 @@ - **Karaoketv** - **Katsomo**: (**Currently broken**) - **KelbyOne**: (**Currently broken**) + - **Kenh14Playlist** + - **Kenh14Video** - **Ketnet** - **khanacademy** - **khanacademy:unit** @@ -660,6 +664,7 @@ - **kick:vod** - **Kicker** - **KickStarter** + - **Kika**: KiKA.de - **kinja:embed** - **KinoPoisk** - **Kommunetv** @@ -684,9 +689,9 @@ - **LastFMPlaylist** - **LastFMUser** - **LaXarxaMes**: [*laxarxames*](## "netrc machine") - - **lbry** - - **lbry:channel** - - **lbry:playlist** + - **lbry**: odysee.com + - **lbry:channel**: odysee.com channels + - **lbry:playlist**: odysee.com playlists - **LCI** - **Lcp** - **LcpPlay** @@ -722,7 +727,6 @@ - **livestream:original** - **Livestreamfails** - **Lnk** - - **LnkGo** - **loc**: Library of Congress - **loom** - **loom:folder** @@ -756,7 +760,7 @@ - **Masters** - **MatchTV** - **MBN**: mbn.co.kr (매일방송) - - **MDR**: MDR.DE and KiKA + - **MDR**: MDR.DE - **MedalTV** - **media.ccc.de** - **media.ccc.de:lists** @@ -785,10 +789,6 @@ - **MicrosoftLearnSession** - **MicrosoftMedius** - **microsoftstream**: Microsoft Stream - - **mildom**: Record ongoing live by specific user in Mildom - - **mildom:clip**: Clip in Mildom - - **mildom:​user:vod**: Download all VODs from specific user in Mildom - - **mildom:vod**: VOD in Mildom - **minds** - **minds:channel** - **minds:group** @@ -799,6 +799,7 @@ - **MiTele**: mitele.es - **mixch** - **mixch:archive** + - **mixch:movie** - **mixcloud** - **mixcloud:playlist** - **mixcloud:user** @@ -811,6 +812,7 @@ - **MNetTVLive**: [*mnettv*](## "netrc machine") - **MNetTVRecordings**: [*mnettv*](## "netrc machine") - **MochaVideo** + - **Mojevideo**: mojevideo.sk - **Mojvideo** - **Monstercat** - **MonsterSirenHypergryphMusic** @@ -1042,8 +1044,8 @@ - **Parler**: Posts on parler.com - **parliamentlive.tv**: UK parliament videos - **Parlview**: (**Currently broken**) - - **Patreon** - - **PatreonCampaign** + - **patreon** + - **patreon:campaign** - **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC) - **PBSKids** - **PearVideo** @@ -1060,8 +1062,8 @@ - **PhilharmonieDeParis**: Philharmonie de Paris - **phoenix.de** - **Photobucket** + - **PiaLive** - **Piapro**: [*piapro*](## "netrc machine") - - **PIAULIZAPortal**: ulizaportal.jp - PIA LIVE STREAM - **Picarto** - **PicartoVod** - **Piksel** @@ -1088,8 +1090,6 @@ - **PodbayFMChannel** - **Podchaser** - **podomatic**: (**Currently broken**) - - **Pokemon** - - **PokemonWatch** - **PokerGo**: [*pokergo*](## "netrc machine") - **PokerGoCollection**: [*pokergo*](## "netrc machine") - **PolsatGo** @@ -1160,6 +1160,7 @@ - **RadioJavan**: (**Currently broken**) - **radiokapital** - **radiokapital:show** + - **RadioRadicale** - **RadioZetPodcast** - **radlive** - **radlive:channel** @@ -1285,12 +1286,14 @@ - **Screencast** - **Screencastify** - **ScreencastOMatic** + - **ScreenRec** - **ScrippsNetworks** - **scrippsnetworks:watch** - **Scrolller** - **SCTE**: [*scte*](## "netrc machine") (**Currently broken**) - **SCTECourse**: [*scte*](## "netrc machine") (**Currently broken**) - **sejm** + - **Sen** - **SenalColombiaLive**: (**Currently broken**) - **SenateGov** - **SenateISVP** @@ -1327,11 +1330,16 @@ - **SlidesLive** - **Slutload** - **Smotrim** + - **SnapchatSpotlight** - **Snotr** - **Sohu** - **SohuV** - **SonyLIV**: [*sonyliv*](## "netrc machine") - **SonyLIVSeries** + - **soop**: [*afreecatv*](## "netrc machine") sooplive.co.kr + - **soop:catchstory**: [*afreecatv*](## "netrc machine") sooplive.co.kr catch story + - **soop:live**: [*afreecatv*](## "netrc machine") sooplive.co.kr livestreams + - **soop:user**: [*afreecatv*](## "netrc machine") - **soundcloud**: [*soundcloud*](## "netrc machine") - **soundcloud:playlist**: [*soundcloud*](## "netrc machine") - **soundcloud:related**: [*soundcloud*](## "netrc machine") @@ -1360,9 +1368,7 @@ - **spotify**: Spotify episodes (**Currently broken**) - **spotify:show**: Spotify shows (**Currently broken**) - **Spreaker** - - **SpreakerPage** - **SpreakerShow** - - **SpreakerShowPage** - **SpringboardPlatform** - **Sprout** - **SproutVideo** @@ -1438,7 +1444,7 @@ - **TeleQuebecSquat** - **TeleQuebecVideo** - **TeleTask**: (**Currently broken**) - - **Telewebion** + - **Telewebion**: (**Currently broken**) - **Tempo** - **TennisTV**: [*tennistv*](## "netrc machine") - **TenPlay**: [*10play*](## "netrc machine") @@ -1563,6 +1569,8 @@ - **UFCTV**: [*ufctv*](## "netrc machine") - **ukcolumn**: (**Currently broken**) - **UKTVPlay** + - **UlizaPlayer** + - **UlizaPortal**: ulizaportal.jp - **umg:de**: Universal Music Deutschland (**Currently broken**) - **Unistra** - **Unity**: (**Currently broken**) @@ -1580,8 +1588,6 @@ - **Varzesh3**: (**Currently broken**) - **Vbox7** - **Veo** - - **Veoh** - - **veoh:user** - **Vesti**: Вести.Ru (**Currently broken**) - **Vevo** - **VevoPlaylist** @@ -1608,6 +1614,7 @@ - **videomore:season** - **videomore:video** - **VideoPress** + - **Vidflex** - **Vidio**: [*vidio*](## "netrc machine") - **VidioLive**: [*vidio*](## "netrc machine") - **VidioPremier**: [*vidio*](## "netrc machine") @@ -1736,7 +1743,7 @@ - **XiaoHongShu**: 小红书 - **ximalaya**: 喜马拉雅FM - **ximalaya:album**: 喜马拉雅FM 专辑 - - **xinpianchang**: xinpianchang.com (**Currently broken**) + - **Xinpianchang**: 新片场 - **XMinus**: (**Currently broken**) - **XNXX** - **Xstream** @@ -1770,24 +1777,24 @@ - **YouPornStar**: YouPorn Pornstar, with description, sorting and pagination - **YouPornTag**: YouPorn tag (porntags), with sorting, filtering and pagination - **YouPornVideos**: YouPorn video (browse) playlists, with sorting, filtering and pagination - - **youtube**: YouTube - - **youtube:clip** - - **youtube:favorites**: YouTube liked videos; ":ytfav" keyword (requires cookies) - - **youtube:history**: Youtube watch history; ":ythis" keyword (requires cookies) - - **youtube:​music:search_url**: YouTube music search URLs with selectable sections, e.g. #songs - - **youtube:notif**: YouTube notifications; ":ytnotif" keyword (requires cookies) - - **youtube:playlist**: YouTube playlists - - **youtube:recommended**: YouTube recommended videos; ":ytrec" keyword - - **youtube:search**: YouTube search; "ytsearch:" prefix - - **youtube:​search:date**: YouTube search, newest videos first; "ytsearchdate:" prefix - - **youtube:search_url**: YouTube search URLs with sorting and filter support - - **youtube:​shorts:pivot:audio**: YouTube Shorts audio pivot (Shorts using audio of a given video) - - **youtube:subscriptions**: YouTube subscriptions feed; ":ytsubs" keyword (requires cookies) - - **youtube:tab**: YouTube Tabs - - **youtube:user**: YouTube user videos; "ytuser:" prefix - - **youtube:watchlater**: Youtube watch later list; ":ytwatchlater" keyword (requires cookies) - - **YoutubeLivestreamEmbed**: YouTube livestream embeds - - **YoutubeYtBe**: youtu.be + - **youtube**: [*youtube*](## "netrc machine") YouTube + - **youtube:clip**: [*youtube*](## "netrc machine") + - **youtube:favorites**: [*youtube*](## "netrc machine") YouTube liked videos; ":ytfav" keyword (requires cookies) + - **youtube:history**: [*youtube*](## "netrc machine") Youtube watch history; ":ythis" keyword (requires cookies) + - **youtube:​music:search_url**: [*youtube*](## "netrc machine") YouTube music search URLs with selectable sections, e.g. #songs + - **youtube:notif**: [*youtube*](## "netrc machine") YouTube notifications; ":ytnotif" keyword (requires cookies) + - **youtube:playlist**: [*youtube*](## "netrc machine") YouTube playlists + - **youtube:recommended**: [*youtube*](## "netrc machine") YouTube recommended videos; ":ytrec" keyword + - **youtube:search**: [*youtube*](## "netrc machine") YouTube search; "ytsearch:" prefix + - **youtube:​search:date**: [*youtube*](## "netrc machine") YouTube search, newest videos first; "ytsearchdate:" prefix + - **youtube:search_url**: [*youtube*](## "netrc machine") YouTube search URLs with sorting and filter support + - **youtube:​shorts:pivot:audio**: [*youtube*](## "netrc machine") YouTube Shorts audio pivot (Shorts using audio of a given video) + - **youtube:subscriptions**: [*youtube*](## "netrc machine") YouTube subscriptions feed; ":ytsubs" keyword (requires cookies) + - **youtube:tab**: [*youtube*](## "netrc machine") YouTube Tabs + - **youtube:user**: [*youtube*](## "netrc machine") YouTube user videos; "ytuser:" prefix + - **youtube:watchlater**: [*youtube*](## "netrc machine") Youtube watch later list; ":ytwatchlater" keyword (requires cookies) + - **YoutubeLivestreamEmbed**: [*youtube*](## "netrc machine") YouTube livestream embeds + - **YoutubeYtBe**: [*youtube*](## "netrc machine") youtu.be - **Zaiko** - **ZaikoETicket** - **Zapiks** diff --git a/test/helper.py b/test/helper.py index 3b550d192..c776e70b7 100644 --- a/test/helper.py +++ b/test/helper.py @@ -9,7 +9,6 @@ import types import yt_dlp.extractor from yt_dlp import YoutubeDL -from yt_dlp.compat import compat_os_name from yt_dlp.utils import preferredencoding, try_call, write_string, find_available_port if 'pytest' in sys.modules: @@ -49,7 +48,7 @@ def report_warning(message, *args, **kwargs): Print the message to stderr, it will be prefixed with 'WARNING:' If stderr is a tty file the 'WARNING:' will be colored """ - if sys.stderr.isatty() and compat_os_name != 'nt': + if sys.stderr.isatty() and os.name != 'nt': _msg_header = '\033[0;33mWARNING:\033[0m' else: _msg_header = 'WARNING:' diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 31e8f8244..54f35ef55 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -53,6 +53,18 @@ class TestInfoExtractor(unittest.TestCase): def test_ie_key(self): self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE) + def test_get_netrc_login_info(self): + for params in [ + {'usenetrc': True, 'netrc_location': './test/testdata/netrc/netrc'}, + {'netrc_cmd': f'{sys.executable} ./test/testdata/netrc/print_netrc.py'}, + ]: + ie = DummyIE(FakeYDL(params)) + self.assertEqual(ie._get_netrc_login_info(netrc_machine='normal_use'), ('user', 'pass')) + self.assertEqual(ie._get_netrc_login_info(netrc_machine='empty_user'), ('', 'pass')) + self.assertEqual(ie._get_netrc_login_info(netrc_machine='empty_pass'), ('user', '')) + self.assertEqual(ie._get_netrc_login_info(netrc_machine='both_empty'), ('', '')) + self.assertEqual(ie._get_netrc_login_info(netrc_machine='nonexistent'), (None, None)) + def test_html_search_regex(self): html = '

Watch this video

' search = lambda re, *args: self.ie._html_search_regex(re, html, *args) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 1847c4ffd..966d27a49 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -15,7 +15,6 @@ import json from test.helper import FakeYDL, assertRegexpMatches, try_rm from yt_dlp import YoutubeDL -from yt_dlp.compat import compat_os_name from yt_dlp.extractor import YoutubeIE from yt_dlp.extractor.common import InfoExtractor from yt_dlp.postprocessor.common import PostProcessor @@ -236,6 +235,35 @@ class TestFormatSelection(unittest.TestCase): downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'vid-vcodec-dot') + def test_format_selection_by_vcodec_sort(self): + formats = [ + {'format_id': 'av1-format', 'ext': 'mp4', 'vcodec': 'av1', 'acodec': 'none', 'url': TEST_URL}, + {'format_id': 'vp9-hdr-format', 'ext': 'mp4', 'vcodec': 'vp09.02.50.10.01.09.18.09.00', 'acodec': 'none', 'url': TEST_URL}, + {'format_id': 'vp9-sdr-format', 'ext': 'mp4', 'vcodec': 'vp09.00.50.08', 'acodec': 'none', 'url': TEST_URL}, + {'format_id': 'h265-format', 'ext': 'mp4', 'vcodec': 'h265', 'acodec': 'none', 'url': TEST_URL}, + ] + info_dict = _make_result(formats) + + ydl = YDL({'format': 'bestvideo', 'format_sort': ['vcodec:vp9.2']}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'vp9-hdr-format') + + ydl = YDL({'format': 'bestvideo', 'format_sort': ['vcodec:vp9']}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'vp9-sdr-format') + + ydl = YDL({'format': 'bestvideo', 'format_sort': ['+vcodec:vp9.2']}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'vp9-hdr-format') + + ydl = YDL({'format': 'bestvideo', 'format_sort': ['+vcodec:vp9']}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'vp9-sdr-format') + def test_format_selection_string_ops(self): formats = [ {'format_id': 'abc-cba', 'ext': 'mp4', 'url': TEST_URL}, @@ -810,8 +838,8 @@ class TestYoutubeDL(unittest.TestCase): test('%(filesize)#D', '1Ki') test('%(height)5.2D', ' 1.08k') test('%(title4)#S', 'foo_bar_test') - test('%(title4).10S', ('foo "bar" ', 'foo "bar"' + ('#' if compat_os_name == 'nt' else ' '))) - if compat_os_name == 'nt': + test('%(title4).10S', ('foo "bar" ', 'foo "bar"' + ('#' if os.name == 'nt' else ' '))) + if os.name == 'nt': test('%(title4)q', ('"foo ""bar"" test"', None)) test('%(formats.:.id)#q', ('"id 1" "id 2" "id 3"', None)) test('%(formats.0.id)#q', ('"id 1"', None)) @@ -874,9 +902,9 @@ class TestYoutubeDL(unittest.TestCase): # Environment variable expansion for prepare_filename os.environ['__yt_dlp_var'] = 'expanded' - envvar = '%__yt_dlp_var%' if compat_os_name == 'nt' else '$__yt_dlp_var' + envvar = '%__yt_dlp_var%' if os.name == 'nt' else '$__yt_dlp_var' test(envvar, (envvar, 'expanded')) - if compat_os_name == 'nt': + if os.name == 'nt': test('%s%', ('%s%', '%s%')) os.environ['s'] = 'expanded' test('%s%', ('%s%', 'expanded')) # %s% should be expanded before escaping %s diff --git a/test/test_aes.py b/test/test_aes.py index 5f975efec..9cd9189bc 100644 --- a/test/test_aes.py +++ b/test/test_aes.py @@ -27,7 +27,6 @@ from yt_dlp.aes import ( pad_block, ) from yt_dlp.dependencies import Cryptodome -from yt_dlp.utils import bytes_to_intlist, intlist_to_bytes # the encrypted data can be generate with 'devscripts/generate_aes_testdata.py' @@ -40,33 +39,33 @@ class TestAES(unittest.TestCase): def test_encrypt(self): msg = b'message' key = list(range(16)) - encrypted = aes_encrypt(bytes_to_intlist(msg), key) - decrypted = intlist_to_bytes(aes_decrypt(encrypted, key)) + encrypted = aes_encrypt(list(msg), key) + decrypted = bytes(aes_decrypt(encrypted, key)) self.assertEqual(decrypted, msg) def test_cbc_decrypt(self): data = b'\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6\x27\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd' - decrypted = intlist_to_bytes(aes_cbc_decrypt(bytes_to_intlist(data), self.key, self.iv)) + decrypted = bytes(aes_cbc_decrypt(list(data), self.key, self.iv)) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) if Cryptodome.AES: - decrypted = aes_cbc_decrypt_bytes(data, intlist_to_bytes(self.key), intlist_to_bytes(self.iv)) + decrypted = aes_cbc_decrypt_bytes(data, bytes(self.key), bytes(self.iv)) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) def test_cbc_encrypt(self): - data = bytes_to_intlist(self.secret_msg) - encrypted = intlist_to_bytes(aes_cbc_encrypt(data, self.key, self.iv)) + data = list(self.secret_msg) + encrypted = bytes(aes_cbc_encrypt(data, self.key, self.iv)) self.assertEqual( encrypted, b'\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6\'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd') def test_ctr_decrypt(self): - data = bytes_to_intlist(b'\x03\xc7\xdd\xd4\x8e\xb3\xbc\x1a*O\xdc1\x12+8Aio\xd1z\xb5#\xaf\x08') - decrypted = intlist_to_bytes(aes_ctr_decrypt(data, self.key, self.iv)) + data = list(b'\x03\xc7\xdd\xd4\x8e\xb3\xbc\x1a*O\xdc1\x12+8Aio\xd1z\xb5#\xaf\x08') + decrypted = bytes(aes_ctr_decrypt(data, self.key, self.iv)) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) def test_ctr_encrypt(self): - data = bytes_to_intlist(self.secret_msg) - encrypted = intlist_to_bytes(aes_ctr_encrypt(data, self.key, self.iv)) + data = list(self.secret_msg) + encrypted = bytes(aes_ctr_encrypt(data, self.key, self.iv)) self.assertEqual( encrypted, b'\x03\xc7\xdd\xd4\x8e\xb3\xbc\x1a*O\xdc1\x12+8Aio\xd1z\xb5#\xaf\x08') @@ -75,47 +74,59 @@ class TestAES(unittest.TestCase): data = b'\x159Y\xcf5eud\x90\x9c\x85&]\x14\x1d\x0f.\x08\xb4T\xe4/\x17\xbd' authentication_tag = b'\xe8&I\x80rI\x07\x9d}YWuU@:e' - decrypted = intlist_to_bytes(aes_gcm_decrypt_and_verify( - bytes_to_intlist(data), self.key, bytes_to_intlist(authentication_tag), self.iv[:12])) + decrypted = bytes(aes_gcm_decrypt_and_verify( + list(data), self.key, list(authentication_tag), self.iv[:12])) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) if Cryptodome.AES: decrypted = aes_gcm_decrypt_and_verify_bytes( - data, intlist_to_bytes(self.key), authentication_tag, intlist_to_bytes(self.iv[:12])) + data, bytes(self.key), authentication_tag, bytes(self.iv[:12])) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) + def test_gcm_aligned_decrypt(self): + data = b'\x159Y\xcf5eud\x90\x9c\x85&]\x14\x1d\x0f' + authentication_tag = b'\x08\xb1\x9d!&\x98\xd0\xeaRq\x90\xe6;\xb5]\xd8' + + decrypted = bytes(aes_gcm_decrypt_and_verify( + list(data), self.key, list(authentication_tag), self.iv[:12])) + self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg[:16]) + if Cryptodome.AES: + decrypted = aes_gcm_decrypt_and_verify_bytes( + data, bytes(self.key), authentication_tag, bytes(self.iv[:12])) + self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg[:16]) + def test_decrypt_text(self): - password = intlist_to_bytes(self.key).decode() + password = bytes(self.key).decode() encrypted = base64.b64encode( - intlist_to_bytes(self.iv[:8]) + bytes(self.iv[:8]) + b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae', ).decode() decrypted = (aes_decrypt_text(encrypted, password, 16)) self.assertEqual(decrypted, self.secret_msg) - password = intlist_to_bytes(self.key).decode() + password = bytes(self.key).decode() encrypted = base64.b64encode( - intlist_to_bytes(self.iv[:8]) + bytes(self.iv[:8]) + b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83', ).decode() decrypted = (aes_decrypt_text(encrypted, password, 32)) self.assertEqual(decrypted, self.secret_msg) def test_ecb_encrypt(self): - data = bytes_to_intlist(self.secret_msg) - encrypted = intlist_to_bytes(aes_ecb_encrypt(data, self.key)) + data = list(self.secret_msg) + encrypted = bytes(aes_ecb_encrypt(data, self.key)) self.assertEqual( encrypted, b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:') def test_ecb_decrypt(self): - data = bytes_to_intlist(b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:') - decrypted = intlist_to_bytes(aes_ecb_decrypt(data, self.key, self.iv)) + data = list(b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:') + decrypted = bytes(aes_ecb_decrypt(data, self.key, self.iv)) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) def test_key_expansion(self): key = '4f6bdaa39e2f8cb07f5e722d9edef314' - self.assertEqual(key_expansion(bytes_to_intlist(bytearray.fromhex(key))), [ + self.assertEqual(key_expansion(list(bytearray.fromhex(key))), [ 0x4F, 0x6B, 0xDA, 0xA3, 0x9E, 0x2F, 0x8C, 0xB0, 0x7F, 0x5E, 0x72, 0x2D, 0x9E, 0xDE, 0xF3, 0x14, 0x53, 0x66, 0x20, 0xA8, 0xCD, 0x49, 0xAC, 0x18, 0xB2, 0x17, 0xDE, 0x35, 0x2C, 0xC9, 0x2D, 0x21, 0x8C, 0xBE, 0xDD, 0xD9, 0x41, 0xF7, 0x71, 0xC1, 0xF3, 0xE0, 0xAF, 0xF4, 0xDF, 0x29, 0x82, 0xD5, diff --git a/test/test_compat.py b/test/test_compat.py index e7d97e3e9..b1cc2a818 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -12,12 +12,7 @@ import struct from yt_dlp import compat from yt_dlp.compat import urllib # isort: split -from yt_dlp.compat import ( - compat_etree_fromstring, - compat_expanduser, - compat_urllib_parse_unquote, # noqa: TID251 - compat_urllib_parse_urlencode, # noqa: TID251 -) +from yt_dlp.compat import compat_etree_fromstring, compat_expanduser from yt_dlp.compat.urllib.request import getproxies @@ -43,39 +38,6 @@ class TestCompat(unittest.TestCase): finally: os.environ['HOME'] = old_home or '' - def test_compat_urllib_parse_unquote(self): - self.assertEqual(compat_urllib_parse_unquote('abc%20def'), 'abc def') - self.assertEqual(compat_urllib_parse_unquote('%7e/abc+def'), '~/abc+def') - self.assertEqual(compat_urllib_parse_unquote(''), '') - self.assertEqual(compat_urllib_parse_unquote('%'), '%') - self.assertEqual(compat_urllib_parse_unquote('%%'), '%%') - self.assertEqual(compat_urllib_parse_unquote('%%%'), '%%%') - self.assertEqual(compat_urllib_parse_unquote('%2F'), '/') - self.assertEqual(compat_urllib_parse_unquote('%2f'), '/') - self.assertEqual(compat_urllib_parse_unquote('%E6%B4%A5%E6%B3%A2'), '津波') - self.assertEqual( - compat_urllib_parse_unquote(''' -%%a'''), - ''' -%%a''') - self.assertEqual( - compat_urllib_parse_unquote('''%28%5E%E2%97%A3_%E2%97%A2%5E%29%E3%81%A3%EF%B8%BB%E3%83%87%E2%95%90%E4%B8%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%86%B6%I%Break%25Things%'''), - '''(^◣_◢^)っ︻デ═一 ⇀ ⇀ ⇀ ⇀ ⇀ ↶%I%Break%Things%''') - - def test_compat_urllib_parse_unquote_plus(self): - self.assertEqual(urllib.parse.unquote_plus('abc%20def'), 'abc def') - self.assertEqual(urllib.parse.unquote_plus('%7e/abc+def'), '~/abc def') - - def test_compat_urllib_parse_urlencode(self): - self.assertEqual(compat_urllib_parse_urlencode({'abc': 'def'}), 'abc=def') - self.assertEqual(compat_urllib_parse_urlencode({'abc': b'def'}), 'abc=def') - self.assertEqual(compat_urllib_parse_urlencode({b'abc': 'def'}), 'abc=def') - self.assertEqual(compat_urllib_parse_urlencode({b'abc': b'def'}), 'abc=def') - self.assertEqual(compat_urllib_parse_urlencode([('abc', 'def')]), 'abc=def') - self.assertEqual(compat_urllib_parse_urlencode([('abc', b'def')]), 'abc=def') - self.assertEqual(compat_urllib_parse_urlencode([(b'abc', 'def')]), 'abc=def') - self.assertEqual(compat_urllib_parse_urlencode([(b'abc', b'def')]), 'abc=def') - def test_compat_etree_fromstring(self): xml = ''' diff --git a/test/test_cookies.py b/test/test_cookies.py index e1271f67e..4b9b9b5a9 100644 --- a/test/test_cookies.py +++ b/test/test_cookies.py @@ -105,6 +105,13 @@ class TestCookies(unittest.TestCase): decryptor = LinuxChromeCookieDecryptor('Chrome', Logger()) self.assertEqual(decryptor.decrypt(encrypted_value), value) + def test_chrome_cookie_decryptor_linux_v10_meta24(self): + with MonkeyPatch(cookies, {'_get_linux_keyring_password': lambda *args, **kwargs: b''}): + encrypted_value = b'v10\x1f\xe4\x0e[\x83\x0c\xcc*kPi \xce\x8d\x1d\xbb\x80\r\x11\t\xbb\x9e^Hy\x94\xf4\x963\x9f\x82\xba\xfe\xa1\xed\xb9\xf1)\x00710\x92\xc8/<\x96B' + value = 'DE' + decryptor = LinuxChromeCookieDecryptor('Chrome', Logger(), meta_version=24) + self.assertEqual(decryptor.decrypt(encrypted_value), value) + def test_chrome_cookie_decryptor_windows_v10(self): with MonkeyPatch(cookies, { '_get_windows_v10_key': lambda *args, **kwargs: b'Y\xef\xad\xad\xeerp\xf0Y\xe6\x9b\x12\xc2 +
1
+
2
+
3
+

4

+

5

+''' + class TestTraversal: def test_traversal_base(self): @@ -420,6 +443,186 @@ class TestTraversal: assert traverse_obj(morsel, [(None,), any]) == morsel, \ 'Morsel should not be implicitly changed to dict on usage' + def test_traversal_filter(self): + data = [None, False, True, 0, 1, 0.0, 1.1, '', 'str', {}, {0: 0}, [], [1]] + + assert traverse_obj(data, [..., filter]) == [True, 1, 1.1, 'str', {0: 0}, [1]], \ + '`filter` should filter falsy values' + + +class TestTraversalHelpers: + def test_traversal_require(self): + with pytest.raises(ExtractorError): + traverse_obj(_TEST_DATA, ['None', {require('value')}]) + assert traverse_obj(_TEST_DATA, ['str', {require('value')}]) == 'str', \ + '`require` should pass through non `None` values' + + def test_subs_list_to_dict(self): + assert traverse_obj([ + {'name': 'de', 'url': 'https://example.com/subs/de.vtt'}, + {'name': 'en', 'url': 'https://example.com/subs/en1.ass'}, + {'name': 'en', 'url': 'https://example.com/subs/en2.ass'}, + ], [..., { + 'id': 'name', + 'url': 'url', + }, all, {subs_list_to_dict}]) == { + 'de': [{'url': 'https://example.com/subs/de.vtt'}], + 'en': [ + {'url': 'https://example.com/subs/en1.ass'}, + {'url': 'https://example.com/subs/en2.ass'}, + ], + }, 'function should build subtitle dict from list of subtitles' + assert traverse_obj([ + {'name': 'de', 'url': 'https://example.com/subs/de.ass'}, + {'name': 'de'}, + {'name': 'en', 'content': 'content'}, + {'url': 'https://example.com/subs/en'}, + ], [..., { + 'id': 'name', + 'data': 'content', + 'url': 'url', + }, all, {subs_list_to_dict(lang=None)}]) == { + 'de': [{'url': 'https://example.com/subs/de.ass'}], + 'en': [{'data': 'content'}], + }, 'subs with mandatory items missing should be filtered' + assert traverse_obj([ + {'url': 'https://example.com/subs/de.ass', 'name': 'de'}, + {'url': 'https://example.com/subs/en', 'name': 'en'}, + ], [..., { + 'id': 'name', + 'ext': ['url', {determine_ext(default_ext=None)}], + 'url': 'url', + }, all, {subs_list_to_dict(ext='ext')}]) == { + 'de': [{'url': 'https://example.com/subs/de.ass', 'ext': 'ass'}], + 'en': [{'url': 'https://example.com/subs/en', 'ext': 'ext'}], + }, '`ext` should set default ext but leave existing value untouched' + assert traverse_obj([ + {'name': 'en', 'url': 'https://example.com/subs/en2', 'prio': True}, + {'name': 'en', 'url': 'https://example.com/subs/en1', 'prio': False}, + ], [..., { + 'id': 'name', + 'quality': ['prio', {int}], + 'url': 'url', + }, all, {subs_list_to_dict(ext='ext')}]) == {'en': [ + {'url': 'https://example.com/subs/en1', 'ext': 'ext'}, + {'url': 'https://example.com/subs/en2', 'ext': 'ext'}, + ]}, '`quality` key should sort subtitle list accordingly' + assert traverse_obj([ + {'name': 'de', 'url': 'https://example.com/subs/de.ass'}, + {'name': 'de'}, + {'name': 'en', 'content': 'content'}, + {'url': 'https://example.com/subs/en'}, + ], [..., { + 'id': 'name', + 'url': 'url', + 'data': 'content', + }, all, {subs_list_to_dict(lang='en')}]) == { + 'de': [{'url': 'https://example.com/subs/de.ass'}], + 'en': [ + {'data': 'content'}, + {'url': 'https://example.com/subs/en'}, + ], + }, 'optionally provided lang should be used if no id available' + assert traverse_obj([ + {'name': 1, 'url': 'https://example.com/subs/de1'}, + {'name': {}, 'url': 'https://example.com/subs/de2'}, + {'name': 'de', 'ext': 1, 'url': 'https://example.com/subs/de3'}, + {'name': 'de', 'ext': {}, 'url': 'https://example.com/subs/de4'}, + ], [..., { + 'id': 'name', + 'url': 'url', + 'ext': 'ext', + }, all, {subs_list_to_dict(lang=None)}]) == { + 'de': [ + {'url': 'https://example.com/subs/de3'}, + {'url': 'https://example.com/subs/de4'}, + ], + }, 'non str types should be ignored for id and ext' + assert traverse_obj([ + {'name': 1, 'url': 'https://example.com/subs/de1'}, + {'name': {}, 'url': 'https://example.com/subs/de2'}, + {'name': 'de', 'ext': 1, 'url': 'https://example.com/subs/de3'}, + {'name': 'de', 'ext': {}, 'url': 'https://example.com/subs/de4'}, + ], [..., { + 'id': 'name', + 'url': 'url', + 'ext': 'ext', + }, all, {subs_list_to_dict(lang='de')}]) == { + 'de': [ + {'url': 'https://example.com/subs/de1'}, + {'url': 'https://example.com/subs/de2'}, + {'url': 'https://example.com/subs/de3'}, + {'url': 'https://example.com/subs/de4'}, + ], + }, 'non str types should be replaced by default id' + + def test_trim_str(self): + with pytest.raises(TypeError): + trim_str('positional') + + assert callable(trim_str(start='a')) + assert trim_str(start='ab')('abc') == 'c' + assert trim_str(end='bc')('abc') == 'a' + assert trim_str(start='a', end='c')('abc') == 'b' + assert trim_str(start='ab', end='c')('abc') == '' + assert trim_str(start='a', end='bc')('abc') == '' + assert trim_str(start='ab', end='bc')('abc') == '' + assert trim_str(start='abc', end='abc')('abc') == '' + assert trim_str(start='', end='')('abc') == 'abc' + + def test_unpack(self): + assert unpack(lambda *x: ''.join(map(str, x)))([1, 2, 3]) == '123' + assert unpack(join_nonempty)([1, 2, 3]) == '1-2-3' + assert unpack(join_nonempty, delim=' ')([1, 2, 3]) == '1 2 3' + with pytest.raises(TypeError): + unpack(join_nonempty)() + with pytest.raises(TypeError): + unpack() + + def test_find_element(self): + for improper_kwargs in [ + dict(attr='data-id'), + dict(value='y'), + dict(attr='data-id', value='y', cls='a'), + dict(attr='data-id', value='y', id='x'), + dict(cls='a', id='x'), + dict(cls='a', tag='p'), + dict(cls='[ab]', regex=True), + ]: + with pytest.raises(AssertionError): + find_element(**improper_kwargs)(_TEST_HTML) + + assert find_element(cls='a')(_TEST_HTML) == '1' + assert find_element(cls='a', html=True)(_TEST_HTML) == '
1
' + assert find_element(id='x')(_TEST_HTML) == '2' + assert find_element(id='[ex]')(_TEST_HTML) is None + assert find_element(id='[ex]', regex=True)(_TEST_HTML) == '2' + assert find_element(id='x', html=True)(_TEST_HTML) == '
2
' + assert find_element(attr='data-id', value='y')(_TEST_HTML) == '3' + assert find_element(attr='data-id', value='y(?:es)?')(_TEST_HTML) is None + assert find_element(attr='data-id', value='y(?:es)?', regex=True)(_TEST_HTML) == '3' + assert find_element( + attr='data-id', value='y', html=True)(_TEST_HTML) == '
3
' + + def test_find_elements(self): + for improper_kwargs in [ + dict(tag='p'), + dict(attr='data-id'), + dict(value='y'), + dict(attr='data-id', value='y', cls='a'), + dict(cls='a', tag='div'), + dict(cls='[ab]', regex=True), + ]: + with pytest.raises(AssertionError): + find_elements(**improper_kwargs)(_TEST_HTML) + + assert find_elements(cls='a')(_TEST_HTML) == ['1', '2', '4'] + assert find_elements(cls='a', html=True)(_TEST_HTML) == [ + '
1
', '
2
', '

4

'] + assert find_elements(attr='custom', value='z')(_TEST_HTML) == ['2', '3'] + assert find_elements(attr='custom', value='[ez]')(_TEST_HTML) == [] + assert find_elements(attr='custom', value='[ez]', regex=True)(_TEST_HTML) == ['2', '3', '5'] + class TestDictGet: def test_dict_get(self): diff --git a/test/test_update.py b/test/test_update.py index 63a21e445..23c12d38c 100644 --- a/test/test_update.py +++ b/test/test_update.py @@ -82,16 +82,32 @@ TEST_LOCKFILE_V1 = rf'''{TEST_LOCKFILE_COMMENT} lock 2022.08.18.36 .+ Python 3\.6 lock 2023.11.16 (?!win_x86_exe).+ Python 3\.7 lock 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server) +lock 2024.10.22 py2exe .+ +lock 2024.10.22 linux_(?:armv7l|aarch64)_exe .+-glibc2\.(?:[12]?\d|30)\b +lock 2024.10.22 (?!\w+_exe).+ Python 3\.8 +lock 2024.10.22 win(?:_x86)?_exe Python 3\.[78].+ Windows-(?:7-|2008ServerR2) ''' TEST_LOCKFILE_V2_TMPL = r'''%s lockV2 yt-dlp/yt-dlp 2022.08.18.36 .+ Python 3\.6 lockV2 yt-dlp/yt-dlp 2023.11.16 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server) +lockV2 yt-dlp/yt-dlp 2024.10.22 py2exe .+ +lockV2 yt-dlp/yt-dlp 2024.10.22 linux_(?:armv7l|aarch64)_exe .+-glibc2\.(?:[12]?\d|30)\b +lockV2 yt-dlp/yt-dlp 2024.10.22 (?!\w+_exe).+ Python 3\.8 +lockV2 yt-dlp/yt-dlp 2024.10.22 win(?:_x86)?_exe Python 3\.[78].+ Windows-(?:7-|2008ServerR2) lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 win_x86_exe .+ Windows-(?:Vista|2008Server) +lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 py2exe .+ +lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 linux_(?:armv7l|aarch64)_exe .+-glibc2\.(?:[12]?\d|30)\b +lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 (?!\w+_exe).+ Python 3\.8 +lockV2 yt-dlp/yt-dlp-nightly-builds 2024.10.22.051025 win(?:_x86)?_exe Python 3\.[78].+ Windows-(?:7-|2008ServerR2) lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 win_x86_exe .+ Windows-(?:Vista|2008Server) +lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.045052 py2exe .+ +lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.060347 linux_(?:armv7l|aarch64)_exe .+-glibc2\.(?:[12]?\d|30)\b +lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.060347 (?!\w+_exe).+ Python 3\.8 +lockV2 yt-dlp/yt-dlp-master-builds 2024.10.22.060347 win(?:_x86)?_exe Python 3\.[78].+ Windows-(?:7-|2008ServerR2) ''' TEST_LOCKFILE_V2 = TEST_LOCKFILE_V2_TMPL % TEST_LOCKFILE_COMMENT @@ -145,43 +161,76 @@ class TestUpdate(unittest.TestCase): for lockfile in (TEST_LOCKFILE_V1, TEST_LOCKFILE_V2, TEST_LOCKFILE_ACTUAL, TEST_LOCKFILE_FORK): # Normal operation test(lockfile, 'zip Python 3.12.0', '2023.12.31', '2023.12.31') - test(lockfile, 'zip stable Python 3.12.0', '2023.12.31', '2023.12.31', exact=True) - # Python 3.6 --update should update only to its lock + test(lockfile, 'zip Python 3.12.0', '2023.12.31', '2023.12.31', exact=True) + # py2exe should never update beyond 2024.10.22 + test(lockfile, 'py2exe Python 3.8', '2025.01.01', '2024.10.22') + test(lockfile, 'py2exe Python 3.8', '2025.01.01', None, exact=True) + # Python 3.6 --update should update only to the py3.6 lock test(lockfile, 'zip Python 3.6.0', '2023.11.16', '2022.08.18.36') - # --update-to an exact version later than the lock should return None - test(lockfile, 'zip stable Python 3.6.0', '2023.11.16', None, exact=True) - # Python 3.7 should be able to update to its lock + # Python 3.6 --update-to an exact version later than the py3.6 lock should return None + test(lockfile, 'zip Python 3.6.0', '2023.11.16', None, exact=True) + # Python 3.7 should be able to update to the py3.7 lock test(lockfile, 'zip Python 3.7.0', '2023.11.16', '2023.11.16') - test(lockfile, 'zip stable Python 3.7.1', '2023.11.16', '2023.11.16', exact=True) - # Non-win_x86_exe builds on py3.7 must be locked + test(lockfile, 'zip Python 3.7.1', '2023.11.16', '2023.11.16', exact=True) + # Non-win_x86_exe builds on py3.7 must be locked at py3.7 lock test(lockfile, 'zip Python 3.7.1', '2023.12.31', '2023.11.16') - test(lockfile, 'zip stable Python 3.7.1', '2023.12.31', None, exact=True) - test( # Windows Vista w/ win_x86_exe must be locked - lockfile, 'win_x86_exe stable Python 3.7.9 (CPython x86 32bit) - Windows-Vista-6.0.6003-SP2', + test(lockfile, 'zip Python 3.7.1', '2023.12.31', None, exact=True) + # Python 3.8 should only update to the py3.8 lock + test(lockfile, 'zip Python 3.8.10', '2025.01.01', '2024.10.22') + test(lockfile, 'zip Python 3.8.110', '2025.01.01', None, exact=True) + test( # Windows Vista w/ win_x86_exe must be locked at Vista lock + lockfile, 'win_x86_exe Python 3.7.9 (CPython x86 32bit) - Windows-Vista-6.0.6003-SP2', '2023.12.31', '2023.11.16') - test( # Windows 2008Server w/ win_x86_exe must be locked + test( # Windows 2008Server w/ win_x86_exe must be locked at Vista lock lockfile, 'win_x86_exe Python 3.7.9 (CPython x86 32bit) - Windows-2008Server', '2023.12.31', None, exact=True) - test( # Windows 7 w/ win_x86_exe py3.7 build should be able to update beyond lock - lockfile, 'win_x86_exe stable Python 3.7.9 (CPython x86 32bit) - Windows-7-6.1.7601-SP1', - '2023.12.31', '2023.12.31') - test( # Windows 8.1 w/ '2008Server' in platform string should be able to update beyond lock + test( # Windows 7 w/ win_x86_exe py3.7 build should be able to update beyond py3.7 lock + lockfile, 'win_x86_exe Python 3.7.9 (CPython x86 32bit) - Windows-7-6.1.7601-SP1', + '2023.12.31', '2023.12.31', exact=True) + test( # Windows 7 win_x86_exe should only update to Win7 lock + lockfile, 'win_x86_exe Python 3.7.9 (CPython x86 32bit) - Windows-7-6.1.7601-SP1', + '2025.01.01', '2024.10.22') + test( # Windows 2008ServerR2 win_exe should only update to Win7 lock + lockfile, 'win_exe Python 3.8.10 (CPython x86 32bit) - Windows-2008ServerR2', + '2025.12.31', '2024.10.22') + test( # Windows 8.1 w/ '2008Server' in platform string should be able to update beyond py3.7 lock lockfile, 'win_x86_exe Python 3.7.9 (CPython x86 32bit) - Windows-post2008Server-6.2.9200', '2023.12.31', '2023.12.31', exact=True) + test( # win_exe built w/Python 3.8 on Windows>=8 should be able to update beyond py3.8 lock + lockfile, 'win_exe Python 3.8.10 (CPython AMD64 64bit) - Windows-10-10.0.20348-SP0', + '2025.01.01', '2025.01.01', exact=True) + test( # linux_armv7l_exe w/glibc2.7 should only update to glibc<2.31 lock + lockfile, 'linux_armv7l_exe Python 3.8.0 (CPython armv7l 32bit) - Linux-6.5.0-1025-azure-armv7l-with-glibc2.7', + '2025.01.01', '2024.10.22') + test( # linux_armv7l_exe w/Python 3.8 and glibc>=2.31 should be able to update beyond py3.8 and glibc<2.31 locks + lockfile, 'linux_armv7l_exe Python 3.8.0 (CPython armv7l 32bit) - Linux-6.5.0-1025-azure-armv7l-with-glibc2.31', + '2025.01.01', '2025.01.01') + test( # linux_armv7l_exe w/glibc2.30 should only update to glibc<2.31 lock + lockfile, 'linux_armv7l_exe Python 3.8.0 (CPython armv7l 64bit) - Linux-6.5.0-1025-azure-aarch64-with-glibc2.30 (OpenSSL', + '2025.01.01', '2024.10.22') + test( # linux_aarch64_exe w/glibc2.17 should only update to glibc<2.31 lock + lockfile, 'linux_aarch64_exe Python 3.8.0 (CPython aarch64 64bit) - Linux-6.5.0-1025-azure-aarch64-with-glibc2.17', + '2025.01.01', '2024.10.22') + test( # linux_aarch64_exe w/glibc2.40 and glibc>=2.31 should be able to update beyond py3.8 and glibc<2.31 locks + lockfile, 'linux_aarch64_exe Python 3.8.0 (CPython aarch64 64bit) - Linux-6.5.0-1025-azure-aarch64-with-glibc2.40', + '2025.01.01', '2025.01.01') + test( # linux_aarch64_exe w/glibc2.3 should only update to glibc<2.31 lock + lockfile, 'linux_aarch64_exe Python 3.8.0 (CPython aarch64 64bit) - Linux-6.5.0-1025-azure-aarch64-with-glibc2.3 (OpenSSL', + '2025.01.01', '2024.10.22') # Forks can block updates to non-numeric tags rather than lock test(TEST_LOCKFILE_FORK, 'zip Python 3.6.3', 'pr0000', None, repo='fork/yt-dlp') - test(TEST_LOCKFILE_FORK, 'zip stable Python 3.7.4', 'pr0000', 'pr0000', repo='fork/yt-dlp') - test(TEST_LOCKFILE_FORK, 'zip stable Python 3.7.4', 'pr1234', None, repo='fork/yt-dlp') + test(TEST_LOCKFILE_FORK, 'zip Python 3.7.4', 'pr0000', 'pr0000', repo='fork/yt-dlp') + test(TEST_LOCKFILE_FORK, 'zip Python 3.7.4', 'pr1234', None, repo='fork/yt-dlp') test(TEST_LOCKFILE_FORK, 'zip Python 3.8.1', 'pr1234', 'pr1234', repo='fork/yt-dlp', exact=True) test( - TEST_LOCKFILE_FORK, 'win_x86_exe stable Python 3.7.9 (CPython x86 32bit) - Windows-Vista-6.0.6003-SP2', + TEST_LOCKFILE_FORK, 'win_x86_exe Python 3.7.9 (CPython x86 32bit) - Windows-Vista-6.0.6003-SP2', 'pr1234', None, repo='fork/yt-dlp') test( - TEST_LOCKFILE_FORK, 'win_x86_exe stable Python 3.7.9 (CPython x86 32bit) - Windows-7-6.1.7601-SP1', + TEST_LOCKFILE_FORK, 'win_x86_exe Python 3.7.9 (CPython x86 32bit) - Windows-7-6.1.7601-SP1', '2023.12.31', '2023.12.31', repo='fork/yt-dlp') test(TEST_LOCKFILE_FORK, 'zip Python 3.11.2', 'pr9999', None, repo='fork/yt-dlp', exact=True) - test(TEST_LOCKFILE_FORK, 'zip stable Python 3.12.0', 'pr9999', 'pr9999', repo='fork/yt-dlp') + test(TEST_LOCKFILE_FORK, 'zip Python 3.12.0', 'pr9999', 'pr9999', repo='fork/yt-dlp') def test_query_update(self): ydl = FakeYDL() diff --git a/test/test_utils.py b/test/test_utils.py index a2b459352..b3de14198 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -4,6 +4,7 @@ import os import sys import unittest +import unittest.mock import warnings import datetime as dt @@ -20,7 +21,6 @@ import xml.etree.ElementTree from yt_dlp.compat import ( compat_etree_fromstring, compat_HTMLParseError, - compat_os_name, ) from yt_dlp.utils import ( Config, @@ -48,7 +48,6 @@ from yt_dlp.utils import ( dfxp2srt, encode_base_n, encode_compat_str, - encodeFilename, expand_path, extract_attributes, extract_basic_auth, @@ -68,7 +67,6 @@ from yt_dlp.utils import ( get_elements_html_by_class, get_elements_text_and_html_by_attribute, int_or_none, - intlist_to_bytes, iri_to_uri, is_html, js_to_json, @@ -221,9 +219,10 @@ class TestUtil(unittest.TestCase): self.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI') def test_sanitize_path(self): - if sys.platform != 'win32': - return + with unittest.mock.patch('sys.platform', 'win32'): + self._test_sanitize_path() + def _test_sanitize_path(self): self.assertEqual(sanitize_path('abc'), 'abc') self.assertEqual(sanitize_path('abc/def'), 'abc\\def') self.assertEqual(sanitize_path('abc\\def'), 'abc\\def') @@ -256,6 +255,11 @@ class TestUtil(unittest.TestCase): self.assertEqual(sanitize_path('./abc'), 'abc') self.assertEqual(sanitize_path('./../abc'), '..\\abc') + self.assertEqual(sanitize_path('\\abc'), '\\abc') + self.assertEqual(sanitize_path('C:abc'), 'C:abc') + self.assertEqual(sanitize_path('C:abc\\..\\'), 'C:..') + self.assertEqual(sanitize_path('C:\\abc:%(title)s.%(ext)s'), 'C:\\abc#%(title)s.%(ext)s') + def test_sanitize_url(self): self.assertEqual(sanitize_url('//foo.bar'), 'http://foo.bar') self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar') @@ -337,11 +341,13 @@ class TestUtil(unittest.TestCase): self.assertEqual(remove_start(None, 'A - '), None) self.assertEqual(remove_start('A - B', 'A - '), 'B') self.assertEqual(remove_start('B - A', 'A - '), 'B - A') + self.assertEqual(remove_start('non-empty', ''), 'non-empty') def test_remove_end(self): self.assertEqual(remove_end(None, ' - B'), None) self.assertEqual(remove_end('A - B', ' - B'), 'A') self.assertEqual(remove_end('B - A', ' - B'), 'B - A') + self.assertEqual(remove_end('non-empty', ''), 'non-empty') def test_remove_quotes(self): self.assertEqual(remove_quotes(None), None) @@ -557,10 +563,10 @@ class TestUtil(unittest.TestCase): self.assertEqual(res_data, {'a': 'b', 'c': 'd'}) def test_shell_quote(self): - args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')] + args = ['ffmpeg', '-i', 'ñ€ß\'.mp4'] self.assertEqual( shell_quote(args), - """ffmpeg -i 'ñ€ß'"'"'.mp4'""" if compat_os_name != 'nt' else '''ffmpeg -i "ñ€ß'.mp4"''') + """ffmpeg -i 'ñ€ß'"'"'.mp4'""" if os.name != 'nt' else '''ffmpeg -i "ñ€ß'.mp4"''') def test_float_or_none(self): self.assertEqual(float_or_none('42.42'), 42.42) @@ -921,6 +927,11 @@ class TestUtil(unittest.TestCase): 'acodec': 'none', 'dynamic_range': 'HDR10', }) + self.assertEqual(parse_codecs('vp09.02.50.10.01.09.18.09.00'), { + 'vcodec': 'vp09.02.50.10.01.09.18.09.00', + 'acodec': 'none', + 'dynamic_range': 'HDR10', + }) self.assertEqual(parse_codecs('av01.0.12M.10.0.110.09.16.09.0'), { 'vcodec': 'av01.0.12M.10.0.110.09.16.09.0', 'acodec': 'none', @@ -1295,15 +1306,10 @@ class TestUtil(unittest.TestCase): self.assertEqual(clean_html('a:\n "b"'), 'a: "b"') self.assertEqual(clean_html('a
\xa0b'), 'a\nb') - def test_intlist_to_bytes(self): - self.assertEqual( - intlist_to_bytes([0, 1, 127, 128, 255]), - b'\x00\x01\x7f\x80\xff') - def test_args_to_str(self): self.assertEqual( args_to_str(['foo', 'ba/r', '-baz', '2 be', '']), - 'foo ba/r -baz \'2 be\' \'\'' if compat_os_name != 'nt' else 'foo ba/r -baz "2 be" ""', + 'foo ba/r -baz \'2 be\' \'\'' if os.name != 'nt' else 'foo ba/r -baz "2 be" ""', ) def test_parse_filesize(self): @@ -2103,7 +2109,7 @@ Line 1 assert extract_basic_auth('http://user:@foo.bar') == ('http://foo.bar', 'Basic dXNlcjo=') assert extract_basic_auth('http://user:pass@foo.bar') == ('http://foo.bar', 'Basic dXNlcjpwYXNz') - @unittest.skipUnless(compat_os_name == 'nt', 'Only relevant on Windows') + @unittest.skipUnless(os.name == 'nt', 'Only relevant on Windows') def test_windows_escaping(self): tests = [ 'test"&', @@ -2137,6 +2143,12 @@ Line 1 assert run_shell(args) == expected assert run_shell(shell_quote(args, shell=True)) == expected + def test_partial_application(self): + assert callable(int_or_none(scale=10)), 'missing positional parameter should apply partially' + assert int_or_none(10, scale=0.1) == 100, 'positionally passed argument should call function' + assert int_or_none(v=10) == 10, 'keyword passed positional should call function' + assert int_or_none(scale=0.1)(10) == 100, 'call after partial application should call the function' + if __name__ == '__main__': unittest.main() diff --git a/test/test_websockets.py b/test/test_websockets.py index 43f20ac65..06112cc0b 100644 --- a/test/test_websockets.py +++ b/test/test_websockets.py @@ -88,7 +88,7 @@ def create_wss_websocket_server(): certfn = os.path.join(TEST_DIR, 'testcert.pem') sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) sslctx.load_cert_chain(certfn, None) - return create_websocket_server(ssl_context=sslctx) + return create_websocket_server(ssl=sslctx) MTLS_CERT_DIR = os.path.join(TEST_DIR, 'testdata', 'certificate') @@ -103,7 +103,7 @@ def create_mtls_wss_websocket_server(): sslctx.load_verify_locations(cafile=cacertfn) sslctx.load_cert_chain(certfn, None) - return create_websocket_server(ssl_context=sslctx) + return create_websocket_server(ssl=sslctx) def create_legacy_wss_websocket_server(): @@ -112,7 +112,7 @@ def create_legacy_wss_websocket_server(): sslctx.maximum_version = ssl.TLSVersion.TLSv1_2 sslctx.set_ciphers('SHA1:AESCCM:aDSS:eNULL:aNULL') sslctx.load_cert_chain(certfn, None) - return create_websocket_server(ssl_context=sslctx) + return create_websocket_server(ssl=sslctx) def ws_validate_and_send(rh, req): @@ -139,7 +139,7 @@ class TestWebsSocketRequestHandlerConformance: cls.wss_thread, cls.wss_port = create_wss_websocket_server() cls.wss_base_url = f'wss://127.0.0.1:{cls.wss_port}' - cls.bad_wss_thread, cls.bad_wss_port = create_websocket_server(ssl_context=ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)) + cls.bad_wss_thread, cls.bad_wss_port = create_websocket_server(ssl=ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)) cls.bad_wss_host = f'wss://127.0.0.1:{cls.bad_wss_port}' cls.mtls_wss_thread, cls.mtls_wss_port = create_mtls_wss_websocket_server() diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index d37df7a2e..2a99436a6 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -68,6 +68,11 @@ _SIG_TESTS = [ '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', 'AOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL2QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0', ), + ( + 'https://www.youtube.com/s/player/3bb1f723/player_ias.vflset/en_US/base.js', + '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', + 'MyOSJXtKI3m-uME_jv7-pT12gOFC02RFkGoqWpzE0Cs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', + ), ] _NSIG_TESTS = [ @@ -179,6 +184,14 @@ _NSIG_TESTS = [ 'https://www.youtube.com/s/player/20dfca59/player_ias.vflset/en_US/base.js', '-fLCxedkAk4LUTK2', 'O8kfRq1y1eyHGw', ), + ( + 'https://www.youtube.com/s/player/b12cc44b/player_ias.vflset/en_US/base.js', + 'keLa5R2U00sR9SQK', 'N1OGyujjEwMnLw', + ), + ( + 'https://www.youtube.com/s/player/3bb1f723/player_ias.vflset/en_US/base.js', + 'gK15nzVyaXE9RsMP3z', 'ZFFWFLPWx9DEgQ', + ), ] @@ -250,8 +263,11 @@ def signature(jscode, sig_input): def n_sig(jscode, sig_input): - funcname = YoutubeIE(FakeYDL())._extract_n_function_name(jscode) - return JSInterpreter(jscode).call_function(funcname, sig_input) + ie = YoutubeIE(FakeYDL()) + funcname = ie._extract_n_function_name(jscode) + jsi = JSInterpreter(jscode) + func = jsi.extract_function_from_code(*ie._fixup_n_function_code(*jsi.extract_function_code(funcname))) + return func([sig_input]) make_sig_test = t_factory( diff --git a/test/testdata/netrc/netrc b/test/testdata/netrc/netrc new file mode 100644 index 000000000..bafe92fe6 --- /dev/null +++ b/test/testdata/netrc/netrc @@ -0,0 +1,4 @@ +machine normal_use login user password pass +machine empty_user login "" password pass +machine empty_pass login user password "" +machine both_empty login "" password "" diff --git a/test/testdata/netrc/print_netrc.py b/test/testdata/netrc/print_netrc.py new file mode 100644 index 000000000..5c25814f8 --- /dev/null +++ b/test/testdata/netrc/print_netrc.py @@ -0,0 +1,2 @@ +with open('./test/testdata/netrc/netrc', encoding='utf-8') as fp: + print(fp.read()) diff --git a/test/testdata/plugin_packages/testpackage/yt_dlp_plugins/extractor/package.py b/test/testdata/plugin_packages/testpackage/yt_dlp_plugins/extractor/package.py new file mode 100644 index 000000000..b860300d8 --- /dev/null +++ b/test/testdata/plugin_packages/testpackage/yt_dlp_plugins/extractor/package.py @@ -0,0 +1,5 @@ +from yt_dlp.extractor.common import InfoExtractor + + +class PackagePluginIE(InfoExtractor): + pass diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 9691a1ea7..65b72e026 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -26,8 +26,8 @@ import unicodedata from .cache import Cache from .compat import urllib # isort: split -from .compat import compat_os_name, urllib_req_to_req -from .cookies import LenientSimpleCookie, load_cookies +from .compat import urllib_req_to_req +from .cookies import CookieLoadError, LenientSimpleCookie, load_cookies from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name from .downloader.rtmp import rtmpdump_version from .extractor import gen_extractor_classes, get_info_extractor @@ -109,7 +109,6 @@ from .utils import ( determine_ext, determine_protocol, encode_compat_str, - encodeFilename, escapeHTML, expand_path, extract_basic_auth, @@ -154,7 +153,6 @@ from .utils import ( try_get, url_basename, variadic, - version_tuple, windows_enable_vt_mode, write_json_file, write_string, @@ -168,7 +166,7 @@ from .utils.networking import ( ) from .version import CHANNEL, ORIGIN, RELEASE_GIT_HEAD, VARIANT, __version__ -if compat_os_name == 'nt': +if os.name == 'nt': import ctypes @@ -251,7 +249,7 @@ class YoutubeDL: format_sort_force: Force the given format_sort. see "Sorting Formats" for more details. prefer_free_formats: Whether to prefer video formats with free containers - over non-free ones of same quality. + over non-free ones of the same quality. allow_multiple_video_streams: Allow multiple video streams to be merged into a single file allow_multiple_audio_streams: Allow multiple audio streams to be merged @@ -285,7 +283,7 @@ class YoutubeDL: rejecttitle: Reject downloads for matching titles. logger: Log messages to a logging.Logger instance. logtostderr: Print everything to stderr instead of stdout. - consoletitle: Display progress in console window's titlebar. + consoletitle: Display progress in the console window's titlebar. writedescription: Write the video description to a .description file writeinfojson: Write the video description to a .info.json file clean_infojson: Remove internal metadata from the infojson @@ -471,7 +469,7 @@ class YoutubeDL: The following options do not work when used through the API: filename, abort-on-error, multistreams, no-live-chat, format-sort, no-clean-infojson, no-playlist-metafiles, - no-keep-subs, no-attach-info-json, allow-unsafe-ext. + no-keep-subs, no-attach-info-json, allow-unsafe-ext, prefer-vp9-sort. Refer __init__.py for their implementation progress_template: Dictionary of templates for progress outputs. Allowed keys are 'download', 'postprocess', @@ -513,7 +511,7 @@ class YoutubeDL: The following options are used by the extractors: extractor_retries: Number of times to retry for known errors (default: 3) dynamic_mpd: Whether to process dynamic DASH manifests (default: True) - hls_split_discontinuity: Split HLS playlists to different formats at + hls_split_discontinuity: Split HLS playlists into different formats at discontinuities such as ad breaks (default: False) extractor_args: A dictionary of arguments to be passed to the extractors. See "EXTRACTOR ARGUMENTS" for details. @@ -553,7 +551,7 @@ class YoutubeDL: include_ads: - Doesn't work Download ads as well call_home: - Not implemented - Boolean, true iff we are allowed to contact the + Boolean, true if we are allowed to contact the yt-dlp servers for debugging. post_hooks: - Register a custom postprocessor A list of functions that get called as the final step @@ -644,7 +642,7 @@ class YoutubeDL: out=stdout, error=sys.stderr, screen=sys.stderr if self.params.get('quiet') else stdout, - console=None if compat_os_name == 'nt' else next( + console=None if os.name == 'nt' else next( filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None), ) @@ -953,7 +951,7 @@ class YoutubeDL: self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once) def _send_console_code(self, code): - if compat_os_name == 'nt' or not self._out_files.console: + if os.name == 'nt' or not self._out_files.console: return self._write_string(code, self._out_files.console) @@ -961,7 +959,7 @@ class YoutubeDL: if not self.params.get('consoletitle', False): return message = remove_terminal_sequences(message) - if compat_os_name == 'nt': + if os.name == 'nt': if ctypes.windll.kernel32.GetConsoleWindow(): # c_wchar_p() might not be necessary if `message` is # already of type unicode() @@ -1118,7 +1116,7 @@ class YoutubeDL: def raise_no_formats(self, info, forced=False, *, msg=None): has_drm = info.get('_has_drm') ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg) - msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!' + msg = msg or (has_drm and 'This video is DRM protected') or 'No video formats found!' if forced or not ignored: raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'], expected=has_drm or ignored or expected) @@ -1624,7 +1622,7 @@ class YoutubeDL: while True: try: return func(self, *args, **kwargs) - except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError): + except (CookieLoadError, DownloadCancelled, LazyList.IndexError, PagedList.IndexError): raise except ReExtractInfo as e: if e.expected: @@ -1949,6 +1947,7 @@ class YoutubeDL: 'playlist_uploader_id': ie_result.get('uploader_id'), 'playlist_channel': ie_result.get('channel'), 'playlist_channel_id': ie_result.get('channel_id'), + 'playlist_webpage_url': ie_result.get('webpage_url'), **kwargs, } if strict: @@ -2197,7 +2196,7 @@ class YoutubeDL: def _default_format_spec(self, info_dict): prefer_best = ( self.params['outtmpl']['default'] == '-' - or info_dict.get('is_live') and not self.params.get('live_from_start')) + or (info_dict.get('is_live') and not self.params.get('live_from_start'))) def can_merge(): merger = FFmpegMergerPP(self) @@ -2366,7 +2365,7 @@ class YoutubeDL: vexts=[f['ext'] for f in video_fmts], aexts=[f['ext'] for f in audio_fmts], preferences=(try_call(lambda: self.params['merge_output_format'].split('/')) - or self.params.get('prefer_free_formats') and ('webm', 'mkv'))) + or (self.params.get('prefer_free_formats') and ('webm', 'mkv')))) filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info)) @@ -2850,13 +2849,10 @@ class YoutubeDL: sanitize_string_field(fmt, 'format_id') sanitize_numeric_fields(fmt) fmt['url'] = sanitize_url(fmt['url']) - if fmt.get('ext') is None: - fmt['ext'] = determine_ext(fmt['url']).lower() + FormatSorter._fill_sorting_fields(fmt) if fmt['ext'] in ('aac', 'opus', 'mp3', 'flac', 'vorbis'): if fmt.get('acodec') is None: fmt['acodec'] = fmt['ext'] - if fmt.get('protocol') is None: - fmt['protocol'] = determine_protocol(fmt) if fmt.get('resolution') is None: fmt['resolution'] = self.format_resolution(fmt, default=None) if fmt.get('dynamic_range') is None and fmt.get('vcodec') != 'none': @@ -3259,9 +3255,9 @@ class YoutubeDL: if full_filename is None: return - if not self._ensure_dir_exists(encodeFilename(full_filename)): + if not self._ensure_dir_exists(full_filename): return - if not self._ensure_dir_exists(encodeFilename(temp_filename)): + if not self._ensure_dir_exists(temp_filename): return if self._write_description('video', info_dict, @@ -3293,16 +3289,16 @@ class YoutubeDL: if self.params.get('writeannotations', False): annofn = self.prepare_filename(info_dict, 'annotation') if annofn: - if not self._ensure_dir_exists(encodeFilename(annofn)): + if not self._ensure_dir_exists(annofn): return - if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)): + if not self.params.get('overwrites', True) and os.path.exists(annofn): self.to_screen('[info] Video annotations are already present') elif not info_dict.get('annotations'): self.report_warning('There are no annotations to write.') else: try: self.to_screen('[info] Writing video annotations to: ' + annofn) - with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile: + with open(annofn, 'w', encoding='utf-8') as annofile: annofile.write(info_dict['annotations']) except (KeyError, TypeError): self.report_warning('There are no annotations to write.') @@ -3318,14 +3314,14 @@ class YoutubeDL: f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown') return True linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext')) - if not self._ensure_dir_exists(encodeFilename(linkfn)): + if not self._ensure_dir_exists(linkfn): return False - if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)): + if self.params.get('overwrites', True) and os.path.exists(linkfn): self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present') return True try: self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}') - with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', + with open(to_high_limit_path(linkfn), 'w', encoding='utf-8', newline='\r\n' if link_type == 'url' else '\n') as linkfile: template_vars = {'url': url} if link_type == 'desktop': @@ -3356,7 +3352,7 @@ class YoutubeDL: if self.params.get('skip_download'): info_dict['filepath'] = temp_filename - info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename))) + info_dict['__finaldir'] = os.path.dirname(os.path.abspath(full_filename)) info_dict['__files_to_move'] = files_to_move replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)) info_dict['__write_download_archive'] = self.params.get('force_write_download_archive') @@ -3486,7 +3482,7 @@ class YoutubeDL: self.report_file_already_downloaded(dl_filename) dl_filename = dl_filename or temp_filename - info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename))) + info_dict['__finaldir'] = os.path.dirname(os.path.abspath(full_filename)) except network_exceptions as err: self.report_error(f'unable to download video data: {err}') @@ -3545,8 +3541,8 @@ class YoutubeDL: and info_dict.get('container') == 'm4a_dash', 'writing DASH m4a. Only some players support this container', FFmpegFixupM4aPP) - ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts') - or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None, + ffmpeg_fixup((downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')) + or (info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None), 'Possible MPEG-TS in MP4 container or malformed AAC timestamps', FFmpegFixupM3u8PP) ffmpeg_fixup(downloader == 'dashsegments' @@ -3580,6 +3576,8 @@ class YoutubeDL: def wrapper(*args, **kwargs): try: res = func(*args, **kwargs) + except CookieLoadError: + raise except UnavailableVideoError as e: self.report_error(e) except DownloadCancelled as e: @@ -4068,6 +4066,10 @@ class YoutubeDL: write_debug(f'Proxy map: {self.proxies}') write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}') + if os.environ.get('YTDLP_NO_PLUGINS'): + write_debug('Plugins are forcibly disabled') + return + for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items(): display_list = ['{}{}'.format( klass.__name__, '' if klass.__name__ == name else f' as {name}') @@ -4083,17 +4085,6 @@ class YoutubeDL: if plugin_dirs: write_debug(f'Plugin directories: {plugin_dirs}') - # Not implemented - if False and self.params.get('call_home'): - ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode() - write_debug(f'Public IP address: {ipaddr}') - latest_version = self.urlopen( - 'https://yt-dl.org/latest/version').read().decode() - if version_tuple(latest_version) > version_tuple(__version__): - self.report_warning( - f'You are using an outdated version (newest version: {latest_version})! ' - 'See https://yt-dl.org/update if you need help updating.') - @functools.cached_property def proxies(self): """Global proxy configuration""" @@ -4113,8 +4104,14 @@ class YoutubeDL: @functools.cached_property def cookiejar(self): """Global cookiejar instance""" - return load_cookies( - self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self) + try: + return load_cookies( + self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self) + except CookieLoadError as error: + cause = error.__context__ + # compat: <=py3.9: `traceback.format_exception` has a different signature + self.report_error(str(cause), tb=''.join(traceback.format_exception(None, cause, cause.__traceback__))) + raise @property def _opener(self): @@ -4300,7 +4297,7 @@ class YoutubeDL: else: try: self.to_screen(f'[info] Writing {label} description to: {descfn}') - with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: + with open(descfn, 'w', encoding='utf-8') as descfile: descfile.write(ie_result['description']) except OSError: self.report_error(f'Cannot write {label} description file {descfn}') @@ -4384,7 +4381,9 @@ class YoutubeDL: return None for idx, t in list(enumerate(thumbnails))[::-1]: - thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg') + thumb_ext = t.get('ext') or determine_ext(t['url'], 'jpg') + if multiple: + thumb_ext = f'{t["id"]}.{thumb_ext}' thumb_display_id = f'{label} thumbnail {t["id"]}' thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext')) thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext')) @@ -4400,7 +4399,7 @@ class YoutubeDL: try: uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {}))) self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}') - with open(encodeFilename(thumb_filename), 'wb') as thumbf: + with open(thumb_filename, 'wb') as thumbf: shutil.copyfileobj(uf, thumbf) ret.append((thumb_filename, thumb_filename_final)) t['filepath'] = thumb_filename diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index c0b8e3b50..20111175b 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -1,8 +1,8 @@ import sys -if sys.version_info < (3, 8): +if sys.version_info < (3, 9): raise ImportError( - f'You are using an unsupported version of Python. Only Python versions 3.8 and above are supported by yt-dlp') # noqa: F541 + f'You are using an unsupported version of Python. Only Python versions 3.9 and above are supported by yt-dlp') # noqa: F541 __license__ = 'The Unlicense' @@ -14,8 +14,7 @@ import os import re import traceback -from .compat import compat_os_name -from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS +from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS, CookieLoadError from .downloader.external import get_external_downloader from .extractor import list_extractor_classes from .extractor.adobepass import MSO_INFO @@ -34,6 +33,7 @@ from .postprocessor import ( ) from .update import Updater from .utils import ( + Config, NO_DEFAULT, POSTPROCESS_WHEN, DateRange, @@ -43,7 +43,6 @@ from .utils import ( GeoUtils, PlaylistEntries, SameFileError, - decodeOption, download_range_func, expand_path, float_or_none, @@ -158,6 +157,9 @@ def set_compat_opts(opts): opts.embed_infojson = False if 'format-sort' in opts.compat_opts: opts.format_sort.extend(FormatSorter.ytdl_default) + elif 'prefer-vp9-sort' in opts.compat_opts: + opts.format_sort.extend(FormatSorter._prefer_vp9_sort) + _video_multistreams_set = set_default_compat('multistreams', 'allow_multiple_video_streams', False, remove_compat=False) _audio_multistreams_set = set_default_compat('multistreams', 'allow_multiple_audio_streams', False, remove_compat=False) if _video_multistreams_set is False and _audio_multistreams_set is False: @@ -235,6 +237,11 @@ def validate_options(opts): validate_regex('format sorting', f, FormatSorter.regex) # Postprocessor formats + if opts.convertsubtitles == 'none': + opts.convertsubtitles = None + if opts.convertthumbnails == 'none': + opts.convertthumbnails = None + validate_regex('merge output format', opts.merge_output_format, r'({0})(/({0}))*'.format('|'.join(map(re.escape, FFmpegMergerPP.SUPPORTED_EXTS)))) validate_regex('audio format', opts.audioformat, FFmpegExtractAudioPP.FORMAT_RE) @@ -874,8 +881,8 @@ def parse_options(argv=None): 'listsubtitles': opts.listsubtitles, 'subtitlesformat': opts.subtitlesformat, 'subtitleslangs': opts.subtitleslangs, - 'matchtitle': decodeOption(opts.matchtitle), - 'rejecttitle': decodeOption(opts.rejecttitle), + 'matchtitle': opts.matchtitle, + 'rejecttitle': opts.rejecttitle, 'max_downloads': opts.max_downloads, 'prefer_free_formats': opts.prefer_free_formats, 'trim_file_name': opts.trim_file_name, @@ -962,6 +969,11 @@ def _real_main(argv=None): parser, opts, all_urls, ydl_opts = parse_options(argv) + # HACK: Set the plugin dirs early on + # TODO(coletdjnz): remove when plugin globals system is implemented + if opts.plugin_dirs is not None: + Config._plugin_dirs = list(map(expand_path, opts.plugin_dirs)) + # Dump user agent if opts.dump_user_agent: ua = traverse_obj(opts.headers, 'User-Agent', casesense=False, default=std_headers['User-Agent']) @@ -1039,7 +1051,7 @@ def _real_main(argv=None): ydl.warn_if_short_id(args) # Show a useful error message and wait for keypress if not launched from shell on Windows - if not args and compat_os_name == 'nt' and getattr(sys, 'frozen', False): + if not args and os.name == 'nt' and getattr(sys, 'frozen', False): import ctypes.wintypes import msvcrt @@ -1050,7 +1062,7 @@ def _real_main(argv=None): # If we only have a single process attached, then the executable was double clicked # When using `pyinstaller` with `--onefile`, two processes get attached is_onefile = hasattr(sys, '_MEIPASS') and os.path.basename(sys._MEIPASS).startswith('_MEI') - if attached_processes == 1 or is_onefile and attached_processes == 2: + if attached_processes == 1 or (is_onefile and attached_processes == 2): print(parser._generate_error_message( 'Do not double-click the executable, instead call it from a command line.\n' 'Please read the README for further information on how to use yt-dlp: ' @@ -1079,7 +1091,7 @@ def main(argv=None): _IN_CLI = True try: _exit(*variadic(_real_main(argv))) - except DownloadError: + except (CookieLoadError, DownloadError): _exit(1) except SameFileError as e: _exit(f'ERROR: {e}') @@ -1097,9 +1109,9 @@ def main(argv=None): from .extractor import gen_extractors, list_extractors __all__ = [ - 'main', 'YoutubeDL', - 'parse_options', 'gen_extractors', 'list_extractors', + 'main', + 'parse_options', ] diff --git a/yt_dlp/aes.py b/yt_dlp/aes.py index abf54a998..9908434a5 100644 --- a/yt_dlp/aes.py +++ b/yt_dlp/aes.py @@ -3,7 +3,6 @@ from math import ceil from .compat import compat_ord from .dependencies import Cryptodome -from .utils import bytes_to_intlist, intlist_to_bytes if Cryptodome.AES: def aes_cbc_decrypt_bytes(data, key, iv): @@ -17,15 +16,15 @@ if Cryptodome.AES: else: def aes_cbc_decrypt_bytes(data, key, iv): """ Decrypt bytes with AES-CBC using native implementation since pycryptodome is unavailable """ - return intlist_to_bytes(aes_cbc_decrypt(*map(bytes_to_intlist, (data, key, iv)))) + return bytes(aes_cbc_decrypt(*map(list, (data, key, iv)))) def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce): """ Decrypt bytes with AES-GCM using native implementation since pycryptodome is unavailable """ - return intlist_to_bytes(aes_gcm_decrypt_and_verify(*map(bytes_to_intlist, (data, key, tag, nonce)))) + return bytes(aes_gcm_decrypt_and_verify(*map(list, (data, key, tag, nonce)))) def aes_cbc_encrypt_bytes(data, key, iv, **kwargs): - return intlist_to_bytes(aes_cbc_encrypt(*map(bytes_to_intlist, (data, key, iv)), **kwargs)) + return bytes(aes_cbc_encrypt(*map(list, (data, key, iv)), **kwargs)) BLOCK_SIZE_BYTES = 16 @@ -221,7 +220,7 @@ def aes_gcm_decrypt_and_verify(data, key, tag, nonce): j0 = [*nonce, 0, 0, 0, 1] else: fill = (BLOCK_SIZE_BYTES - (len(nonce) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES + 8 - ghash_in = nonce + [0] * fill + bytes_to_intlist((8 * len(nonce)).to_bytes(8, 'big')) + ghash_in = nonce + [0] * fill + list((8 * len(nonce)).to_bytes(8, 'big')) j0 = ghash(hash_subkey, ghash_in) # TODO: add nonce support to aes_ctr_decrypt @@ -230,13 +229,13 @@ def aes_gcm_decrypt_and_verify(data, key, tag, nonce): iv_ctr = inc(j0) decrypted_data = aes_ctr_decrypt(data, key, iv_ctr + [0] * (BLOCK_SIZE_BYTES - len(iv_ctr))) - pad_len = len(data) // 16 * 16 + pad_len = (BLOCK_SIZE_BYTES - (len(data) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES s_tag = ghash( hash_subkey, data - + [0] * (BLOCK_SIZE_BYTES - len(data) + pad_len) # pad - + bytes_to_intlist((0 * 8).to_bytes(8, 'big') # length of associated data - + ((len(data) * 8).to_bytes(8, 'big'))), # length of data + + [0] * pad_len # pad + + list((0 * 8).to_bytes(8, 'big') # length of associated data + + ((len(data) * 8).to_bytes(8, 'big'))), # length of data ) if tag != aes_ctr_encrypt(s_tag, key, j0): @@ -300,8 +299,8 @@ def aes_decrypt_text(data, password, key_size_bytes): """ NONCE_LENGTH_BYTES = 8 - data = bytes_to_intlist(base64.b64decode(data)) - password = bytes_to_intlist(password.encode()) + data = list(base64.b64decode(data)) + password = list(password.encode()) key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password)) key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES) @@ -310,7 +309,7 @@ def aes_decrypt_text(data, password, key_size_bytes): cipher = data[NONCE_LENGTH_BYTES:] decrypted_data = aes_ctr_decrypt(cipher, key, nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)) - return intlist_to_bytes(decrypted_data) + return bytes(decrypted_data) RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36) @@ -535,19 +534,17 @@ def ghash(subkey, data): __all__ = [ 'aes_cbc_decrypt', 'aes_cbc_decrypt_bytes', - 'aes_ctr_decrypt', - 'aes_decrypt_text', - 'aes_decrypt', - 'aes_ecb_decrypt', - 'aes_gcm_decrypt_and_verify', - 'aes_gcm_decrypt_and_verify_bytes', - 'aes_cbc_encrypt', 'aes_cbc_encrypt_bytes', + 'aes_ctr_decrypt', 'aes_ctr_encrypt', + 'aes_decrypt', + 'aes_decrypt_text', + 'aes_ecb_decrypt', 'aes_ecb_encrypt', 'aes_encrypt', - + 'aes_gcm_decrypt_and_verify', + 'aes_gcm_decrypt_and_verify_bytes', 'key_expansion', 'pad_block', 'pkcs7_padding', diff --git a/yt_dlp/compat/__init__.py b/yt_dlp/compat/__init__.py index d820adaf1..d77962068 100644 --- a/yt_dlp/compat/__init__.py +++ b/yt_dlp/compat/__init__.py @@ -1,5 +1,4 @@ import os -import sys import xml.etree.ElementTree as etree from .compat_utils import passthrough_module @@ -24,33 +23,14 @@ def compat_etree_fromstring(text): return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder())) -compat_os_name = os._name if os.name == 'java' else os.name - - -def compat_shlex_quote(s): - from ..utils import shell_quote - return shell_quote(s) - - def compat_ord(c): return c if isinstance(c, int) else ord(c) -if compat_os_name == 'nt' and sys.version_info < (3, 8): - # os.path.realpath on Windows does not follow symbolic links - # prior to Python 3.8 (see https://bugs.python.org/issue9949) - def compat_realpath(path): - while os.path.islink(path): - path = os.path.abspath(os.readlink(path)) - return os.path.realpath(path) -else: - compat_realpath = os.path.realpath - - # Python 3.8+ does not honor %HOME% on windows, but this breaks compatibility with youtube-dl # See https://github.com/yt-dlp/yt-dlp/issues/792 # https://docs.python.org/3/library/os.path.html#os.path.expanduser -if compat_os_name in ('nt', 'ce'): +if os.name in ('nt', 'ce'): def compat_expanduser(path): HOME = os.environ.get('HOME') if not HOME: diff --git a/yt_dlp/compat/_deprecated.py b/yt_dlp/compat/_deprecated.py index 607bae999..445acc1a0 100644 --- a/yt_dlp/compat/_deprecated.py +++ b/yt_dlp/compat/_deprecated.py @@ -8,16 +8,14 @@ passthrough_module(__name__, '.._legacy', callback=lambda attr: warnings.warn( DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=6)) del passthrough_module -import base64 -import urllib.error -import urllib.parse +import functools # noqa: F401 +import os -compat_str = str -compat_b64decode = base64.b64decode +compat_os_name = os.name +compat_realpath = os.path.realpath -compat_urlparse = urllib.parse -compat_parse_qs = urllib.parse.parse_qs -compat_urllib_parse_unquote = urllib.parse.unquote -compat_urllib_parse_urlencode = urllib.parse.urlencode -compat_urllib_parse_urlparse = urllib.parse.urlparse + +def compat_shlex_quote(s): + from ..utils import shell_quote + return shell_quote(s) diff --git a/yt_dlp/compat/_legacy.py b/yt_dlp/compat/_legacy.py index dfc792eae..dae2c1459 100644 --- a/yt_dlp/compat/_legacy.py +++ b/yt_dlp/compat/_legacy.py @@ -30,7 +30,7 @@ from asyncio import run as compat_asyncio_run # noqa: F401 from re import Pattern as compat_Pattern # noqa: F401 from re import match as compat_Match # noqa: F401 -from . import compat_expanduser, compat_HTMLParseError, compat_realpath +from . import compat_expanduser, compat_HTMLParseError from .compat_utils import passthrough_module from ..dependencies import brotli as compat_brotli # noqa: F401 from ..dependencies import websockets as compat_websockets # noqa: F401 @@ -78,7 +78,7 @@ compat_kwargs = lambda kwargs: kwargs compat_map = map compat_numeric_types = (int, float, complex) compat_os_path_expanduser = compat_expanduser -compat_os_path_realpath = compat_realpath +compat_os_path_realpath = os.path.realpath compat_print = print compat_shlex_split = shlex.split compat_socket_create_connection = socket.create_connection @@ -104,5 +104,12 @@ compat_xml_parse_error = compat_xml_etree_ElementTree_ParseError = etree.ParseEr compat_xpath = lambda xpath: xpath compat_zip = zip workaround_optparse_bug9161 = lambda: None +compat_str = str +compat_b64decode = base64.b64decode +compat_urlparse = urllib.parse +compat_parse_qs = urllib.parse.parse_qs +compat_urllib_parse_unquote = urllib.parse.unquote +compat_urllib_parse_urlencode = urllib.parse.urlencode +compat_urllib_parse_urlparse = urllib.parse.urlparse legacy = [] diff --git a/yt_dlp/compat/compat_utils.py b/yt_dlp/compat/compat_utils.py index d62b7d048..d8b3c45cd 100644 --- a/yt_dlp/compat/compat_utils.py +++ b/yt_dlp/compat/compat_utils.py @@ -57,7 +57,7 @@ def passthrough_module(parent, child, allowed_attributes=(..., ), *, callback=la callback(attr) return ret - @functools.lru_cache(maxsize=None) + @functools.cache def from_child(attr): nonlocal child if attr not in allowed_attributes: diff --git a/yt_dlp/compat/functools.py b/yt_dlp/compat/functools.py deleted file mode 100644 index 96689575f..000000000 --- a/yt_dlp/compat/functools.py +++ /dev/null @@ -1,12 +0,0 @@ -# flake8: noqa: F405 -from functools import * # noqa: F403 - -from .compat_utils import passthrough_module - -passthrough_module(__name__, 'functools') -del passthrough_module - -try: - _ = cache # >= 3.9 -except NameError: - cache = lru_cache(maxsize=None) diff --git a/yt_dlp/compat/urllib/request.py b/yt_dlp/compat/urllib/request.py index ad9fa83c8..dfc7f4a2d 100644 --- a/yt_dlp/compat/urllib/request.py +++ b/yt_dlp/compat/urllib/request.py @@ -7,9 +7,9 @@ passthrough_module(__name__, 'urllib.request') del passthrough_module -from .. import compat_os_name +import os -if compat_os_name == 'nt': +if os.name == 'nt': # On older Python versions, proxies are extracted from Windows registry erroneously. [1] # If the https proxy in the registry does not have a scheme, urllib will incorrectly add https:// to it. [2] # It is unlikely that the user has actually set it to be https, so we should be fine to safely downgrade @@ -37,4 +37,4 @@ if compat_os_name == 'nt': def getproxies(): return getproxies_environment() or getproxies_registry_patched() -del compat_os_name +del os diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index 070d2fcb9..fad323c90 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -25,7 +25,6 @@ from .aes import ( aes_gcm_decrypt_and_verify_bytes, unpad_pkcs7, ) -from .compat import compat_os_name from .dependencies import ( _SECRETSTORAGE_UNAVAILABLE_REASON, secretstorage, @@ -34,6 +33,7 @@ from .dependencies import ( from .minicurses import MultilinePrinter, QuietMultilinePrinter from .utils import ( DownloadError, + YoutubeDLError, Popen, error_to_str, expand_path, @@ -86,24 +86,31 @@ def _create_progress_bar(logger): return printer +class CookieLoadError(YoutubeDLError): + pass + + def load_cookies(cookie_file, browser_specification, ydl): - cookie_jars = [] - if browser_specification is not None: - browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification) - cookie_jars.append( - extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container)) + try: + cookie_jars = [] + if browser_specification is not None: + browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification) + cookie_jars.append( + extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container)) - if cookie_file is not None: - is_filename = is_path_like(cookie_file) - if is_filename: - cookie_file = expand_path(cookie_file) + if cookie_file is not None: + is_filename = is_path_like(cookie_file) + if is_filename: + cookie_file = expand_path(cookie_file) - jar = YoutubeDLCookieJar(cookie_file) - if not is_filename or os.access(cookie_file, os.R_OK): - jar.load() - cookie_jars.append(jar) + jar = YoutubeDLCookieJar(cookie_file) + if not is_filename or os.access(cookie_file, os.R_OK): + jar.load() + cookie_jars.append(jar) - return _merge_cookie_jars(cookie_jars) + return _merge_cookie_jars(cookie_jars) + except Exception: + raise CookieLoadError('failed to load cookies') def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None, container=None): @@ -188,7 +195,10 @@ def _extract_firefox_cookies(profile, container, logger): def _firefox_browser_dirs(): if sys.platform in ('cygwin', 'win32'): - yield os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles') + yield from map(os.path.expandvars, ( + R'%APPDATA%\Mozilla\Firefox\Profiles', + R'%LOCALAPPDATA%\Packages\Mozilla.Firefox_n80bbvh6b1yt2\LocalCache\Roaming\Mozilla\Firefox\Profiles', + )) elif sys.platform == 'darwin': yield os.path.expanduser('~/Library/Application Support/Firefox/Profiles') @@ -294,12 +304,18 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger): raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"') logger.debug(f'Extracting cookies from: "{cookie_database_path}"') - decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring) - with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir: cursor = None try: cursor = _open_database_copy(cookie_database_path, tmpdir) + + # meta_version is necessary to determine if we need to trim the hash prefix from the cookies + # Ref: https://chromium.googlesource.com/chromium/src/+/b02dcebd7cafab92770734dc2bc317bd07f1d891/net/extras/sqlite/sqlite_persistent_cookie_store.cc#223 + meta_version = int(cursor.execute('SELECT value FROM meta WHERE key = "version"').fetchone()[0]) + decryptor = get_cookie_decryptor( + config['browser_dir'], config['keyring_name'], logger, + keyring=keyring, meta_version=meta_version) + cursor.connection.text_factory = bytes column_names = _get_column_names(cursor, 'cookies') secure_column = 'is_secure' if 'is_secure' in column_names else 'secure' @@ -329,7 +345,7 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger): logger.debug(f'cookie version breakdown: {counts}') return jar except PermissionError as error: - if compat_os_name == 'nt' and error.errno == 13: + if os.name == 'nt' and error.errno == 13: message = 'Could not copy Chrome cookie database. See https://github.com/yt-dlp/yt-dlp/issues/7271 for more info' logger.error(message) raise DownloadError(message) # force exit @@ -397,22 +413,23 @@ class ChromeCookieDecryptor: raise NotImplementedError('Must be implemented by sub classes') -def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None): +def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None, meta_version=None): if sys.platform == 'darwin': - return MacChromeCookieDecryptor(browser_keyring_name, logger) + return MacChromeCookieDecryptor(browser_keyring_name, logger, meta_version=meta_version) elif sys.platform in ('win32', 'cygwin'): - return WindowsChromeCookieDecryptor(browser_root, logger) - return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring) + return WindowsChromeCookieDecryptor(browser_root, logger, meta_version=meta_version) + return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring, meta_version=meta_version) class LinuxChromeCookieDecryptor(ChromeCookieDecryptor): - def __init__(self, browser_keyring_name, logger, *, keyring=None): + def __init__(self, browser_keyring_name, logger, *, keyring=None, meta_version=None): self._logger = logger self._v10_key = self.derive_key(b'peanuts') self._empty_key = self.derive_key(b'') self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0} self._browser_keyring_name = browser_keyring_name self._keyring = keyring + self._meta_version = meta_version or 0 @functools.cached_property def _v11_key(self): @@ -441,14 +458,18 @@ class LinuxChromeCookieDecryptor(ChromeCookieDecryptor): if version == b'v10': self._cookie_counts['v10'] += 1 - return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key, self._empty_key), self._logger) + return _decrypt_aes_cbc_multi( + ciphertext, (self._v10_key, self._empty_key), self._logger, + hash_prefix=self._meta_version >= 24) elif version == b'v11': self._cookie_counts['v11'] += 1 if self._v11_key is None: self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True) return None - return _decrypt_aes_cbc_multi(ciphertext, (self._v11_key, self._empty_key), self._logger) + return _decrypt_aes_cbc_multi( + ciphertext, (self._v11_key, self._empty_key), self._logger, + hash_prefix=self._meta_version >= 24) else: self._logger.warning(f'unknown cookie version: "{version}"', only_once=True) @@ -457,11 +478,12 @@ class LinuxChromeCookieDecryptor(ChromeCookieDecryptor): class MacChromeCookieDecryptor(ChromeCookieDecryptor): - def __init__(self, browser_keyring_name, logger): + def __init__(self, browser_keyring_name, logger, meta_version=None): self._logger = logger password = _get_mac_keyring_password(browser_keyring_name, logger) self._v10_key = None if password is None else self.derive_key(password) self._cookie_counts = {'v10': 0, 'other': 0} + self._meta_version = meta_version or 0 @staticmethod def derive_key(password): @@ -479,7 +501,8 @@ class MacChromeCookieDecryptor(ChromeCookieDecryptor): self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True) return None - return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key,), self._logger) + return _decrypt_aes_cbc_multi( + ciphertext, (self._v10_key,), self._logger, hash_prefix=self._meta_version >= 24) else: self._cookie_counts['other'] += 1 @@ -489,10 +512,11 @@ class MacChromeCookieDecryptor(ChromeCookieDecryptor): class WindowsChromeCookieDecryptor(ChromeCookieDecryptor): - def __init__(self, browser_root, logger): + def __init__(self, browser_root, logger, meta_version=None): self._logger = logger self._v10_key = _get_windows_v10_key(browser_root, logger) self._cookie_counts = {'v10': 0, 'other': 0} + self._meta_version = meta_version or 0 def decrypt(self, encrypted_value): version = encrypted_value[:3] @@ -516,7 +540,9 @@ class WindowsChromeCookieDecryptor(ChromeCookieDecryptor): ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length] authentication_tag = raw_ciphertext[-authentication_tag_length:] - return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger) + return _decrypt_aes_gcm( + ciphertext, self._v10_key, nonce, authentication_tag, self._logger, + hash_prefix=self._meta_version >= 24) else: self._cookie_counts['other'] += 1 @@ -1002,10 +1028,12 @@ def pbkdf2_sha1(password, salt, iterations, key_length): return hashlib.pbkdf2_hmac('sha1', password, salt, iterations, key_length) -def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16): +def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16, hash_prefix=False): for key in keys: plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector)) try: + if hash_prefix: + return plaintext[32:].decode() return plaintext.decode() except UnicodeDecodeError: pass @@ -1013,7 +1041,7 @@ def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' return None -def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger): +def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger, hash_prefix=False): try: plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce) except ValueError: @@ -1021,6 +1049,8 @@ def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger): return None try: + if hash_prefix: + return plaintext[32:].decode() return plaintext.decode() except UnicodeDecodeError: logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True) @@ -1053,8 +1083,9 @@ def _decrypt_windows_dpapi(ciphertext, logger): ctypes.byref(blob_out), # pDataOut ) if not ret: - logger.warning('failed to decrypt with DPAPI', only_once=True) - return None + message = 'Failed to decrypt with DPAPI. See https://github.com/yt-dlp/yt-dlp/issues/10927 for more info' + logger.error(message) + raise DownloadError(message) # force exit result = ctypes.string_at(blob_out.pbData, blob_out.cbData) ctypes.windll.kernel32.LocalFree(blob_out.pbData) @@ -1248,8 +1279,8 @@ class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar): def _really_save(self, f, ignore_discard, ignore_expires): now = time.time() for cookie in self: - if (not ignore_discard and cookie.discard - or not ignore_expires and cookie.is_expired(now)): + if ((not ignore_discard and cookie.discard) + or (not ignore_expires and cookie.is_expired(now))): continue name, value = cookie.name, cookie.value if value is None: diff --git a/yt_dlp/dependencies/Cryptodome.py b/yt_dlp/dependencies/Cryptodome.py index 2cfa4c952..0e4404d49 100644 --- a/yt_dlp/dependencies/Cryptodome.py +++ b/yt_dlp/dependencies/Cryptodome.py @@ -24,7 +24,7 @@ try: from Crypto.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5 # noqa: F401 from Crypto.Hash import CMAC, SHA1 # noqa: F401 from Crypto.PublicKey import RSA # noqa: F401 -except ImportError: +except (ImportError, OSError): __version__ = f'broken {__version__}'.strip() diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index 2e3ea2fc4..e8dcb37cc 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -20,9 +20,7 @@ from ..utils import ( Namespace, RetryManager, classproperty, - decodeArgument, deprecation_warning, - encodeFilename, format_bytes, join_nonempty, parse_bytes, @@ -219,7 +217,7 @@ class FileDownloader: def temp_name(self, filename): """Returns a temporary filename for the given filename.""" if self.params.get('nopart', False) or filename == '-' or \ - (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))): + (os.path.exists(filename) and not os.path.isfile(filename)): return filename return filename + '.part' @@ -273,7 +271,7 @@ class FileDownloader: """Try to set the last-modified time of the given file.""" if last_modified_hdr is None: return - if not os.path.isfile(encodeFilename(filename)): + if not os.path.isfile(filename): return timestr = last_modified_hdr if timestr is None: @@ -432,13 +430,13 @@ class FileDownloader: """ nooverwrites_and_exists = ( not self.params.get('overwrites', True) - and os.path.exists(encodeFilename(filename)) + and os.path.exists(filename) ) if not hasattr(filename, 'write'): continuedl_and_exists = ( self.params.get('continuedl', True) - and os.path.isfile(encodeFilename(filename)) + and os.path.isfile(filename) and not self.params.get('nopart', False) ) @@ -448,7 +446,7 @@ class FileDownloader: self._hook_progress({ 'filename': filename, 'status': 'finished', - 'total_bytes': os.path.getsize(encodeFilename(filename)), + 'total_bytes': os.path.getsize(filename), }, info_dict) self._finish_multiline_status() return True, False @@ -489,9 +487,7 @@ class FileDownloader: if not self.params.get('verbose', False): return - str_args = [decodeArgument(a) for a in args] - if exe is None: - exe = os.path.basename(str_args[0]) + exe = os.path.basename(args[0]) - self.write_debug(f'{exe} command line: {shell_quote(str_args)}') + self.write_debug(f'{exe} command line: {shell_quote(args)}') diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index ae2372915..7f6b5b45c 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -23,7 +23,6 @@ from ..utils import ( cli_valueless_option, determine_ext, encodeArgument, - encodeFilename, find_available_port, remove_end, traverse_obj, @@ -67,7 +66,7 @@ class ExternalFD(FragmentFD): 'elapsed': time.time() - started, } if filename != '-': - fsize = os.path.getsize(encodeFilename(tmpfilename)) + fsize = os.path.getsize(tmpfilename) self.try_rename(tmpfilename, filename) status.update({ 'downloaded_bytes': fsize, @@ -184,9 +183,9 @@ class ExternalFD(FragmentFD): dest.write(decrypt_fragment(fragment, src.read())) src.close() if not self.params.get('keep_fragments', False): - self.try_remove(encodeFilename(fragment_filename)) + self.try_remove(fragment_filename) dest.close() - self.try_remove(encodeFilename(f'{tmpfilename}.frag.urls')) + self.try_remove(f'{tmpfilename}.frag.urls') return 0 def _call_process(self, cmd, info_dict): @@ -508,7 +507,7 @@ class FFmpegFD(ExternalFD): env = None proxy = self.params.get('proxy') if proxy: - if not re.match(r'^[\da-zA-Z]+://', proxy): + if not re.match(r'[\da-zA-Z]+://', proxy): proxy = f'http://{proxy}' if proxy.startswith('socks'): @@ -559,7 +558,7 @@ class FFmpegFD(ExternalFD): selected_formats = info_dict.get('requested_formats') or [info_dict] for i, fmt in enumerate(selected_formats): - is_http = re.match(r'^https?://', fmt['url']) + is_http = re.match(r'https?://', fmt['url']) cookies = self.ydl.cookiejar.get_cookies_for_url(fmt['url']) if is_http else [] if cookies: args.extend(['-cookies', ''.join( @@ -620,7 +619,7 @@ class FFmpegFD(ExternalFD): args += self._configuration_args(('_o1', '_o', '')) args = [encodeArgument(opt) for opt in args] - args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True)) + args.append(ffpp._ffmpeg_filename_argument(tmpfilename)) self._debug_cmd(args) piped = any(fmt['url'] in ('-', 'pipe:') for fmt in selected_formats) diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index 0d00196e2..98784e703 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -9,10 +9,9 @@ import time from .common import FileDownloader from .http import HttpFD from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7 -from ..compat import compat_os_name from ..networking import Request from ..networking.exceptions import HTTPError, IncompleteRead -from ..utils import DownloadError, RetryManager, encodeFilename, traverse_obj +from ..utils import DownloadError, RetryManager, traverse_obj from ..utils.networking import HTTPHeaderDict from ..utils.progress import ProgressCalculator @@ -152,7 +151,7 @@ class FragmentFD(FileDownloader): if self.__do_ytdl_file(ctx): self._write_ytdl_file(ctx) if not self.params.get('keep_fragments', False): - self.try_remove(encodeFilename(ctx['fragment_filename_sanitized'])) + self.try_remove(ctx['fragment_filename_sanitized']) del ctx['fragment_filename_sanitized'] def _prepare_frag_download(self, ctx): @@ -188,7 +187,7 @@ class FragmentFD(FileDownloader): }) if self.__do_ytdl_file(ctx): - ytdl_file_exists = os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))) + ytdl_file_exists = os.path.isfile(self.ytdl_filename(ctx['filename'])) continuedl = self.params.get('continuedl', True) if continuedl and ytdl_file_exists: self._read_ytdl_file(ctx) @@ -390,7 +389,7 @@ class FragmentFD(FileDownloader): def __exit__(self, exc_type, exc_val, exc_tb): pass - if compat_os_name == 'nt': + if os.name == 'nt': def future_result(future): while True: try: diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index 0a00d5dab..da2574da7 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -119,12 +119,12 @@ class HlsFD(FragmentFD): self.to_screen(f'[{self.FD_NAME}] Fragment downloads will be delegated to {real_downloader.get_basename()}') def is_ad_fragment_start(s): - return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s - or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad')) + return ((s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s) + or (s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))) def is_ad_fragment_end(s): - return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s - or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment')) + return ((s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s) + or (s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment'))) fragments = [] diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index c0165790d..9c6dd8b79 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -15,7 +15,6 @@ from ..utils import ( ThrottledDownload, XAttrMetadataError, XAttrUnavailableError, - encodeFilename, int_or_none, parse_http_range, try_call, @@ -58,9 +57,8 @@ class HttpFD(FileDownloader): if self.params.get('continuedl', True): # Establish possible resume length - if os.path.isfile(encodeFilename(ctx.tmpfilename)): - ctx.resume_len = os.path.getsize( - encodeFilename(ctx.tmpfilename)) + if os.path.isfile(ctx.tmpfilename): + ctx.resume_len = os.path.getsize(ctx.tmpfilename) ctx.is_resume = ctx.resume_len > 0 @@ -241,7 +239,7 @@ class HttpFD(FileDownloader): ctx.resume_len = byte_counter else: try: - ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename)) + ctx.resume_len = os.path.getsize(ctx.tmpfilename) except FileNotFoundError: ctx.resume_len = 0 raise RetryDownload(e) diff --git a/yt_dlp/downloader/rtmp.py b/yt_dlp/downloader/rtmp.py index d7ffb3b34..1b831e5f3 100644 --- a/yt_dlp/downloader/rtmp.py +++ b/yt_dlp/downloader/rtmp.py @@ -8,7 +8,6 @@ from ..utils import ( Popen, check_executable, encodeArgument, - encodeFilename, get_exe_version, ) @@ -179,7 +178,7 @@ class RtmpFD(FileDownloader): return False while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live: - prevsize = os.path.getsize(encodeFilename(tmpfilename)) + prevsize = os.path.getsize(tmpfilename) self.to_screen(f'[rtmpdump] Downloaded {prevsize} bytes') time.sleep(5.0) # This seems to be needed args = [*basic_args, '--resume'] @@ -187,7 +186,7 @@ class RtmpFD(FileDownloader): args += ['--skip', '1'] args = [encodeArgument(a) for a in args] retval = run_rtmpdump(args) - cursize = os.path.getsize(encodeFilename(tmpfilename)) + cursize = os.path.getsize(tmpfilename) if prevsize == cursize and retval == RD_FAILED: break # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those @@ -196,7 +195,7 @@ class RtmpFD(FileDownloader): retval = RD_SUCCESS break if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE): - fsize = os.path.getsize(encodeFilename(tmpfilename)) + fsize = os.path.getsize(tmpfilename) self.to_screen(f'[rtmpdump] Downloaded {fsize} bytes') self.try_rename(tmpfilename, filename) self._hook_progress({ diff --git a/yt_dlp/downloader/rtsp.py b/yt_dlp/downloader/rtsp.py index e89269fed..b4b0be7e6 100644 --- a/yt_dlp/downloader/rtsp.py +++ b/yt_dlp/downloader/rtsp.py @@ -2,7 +2,7 @@ import os import subprocess from .common import FileDownloader -from ..utils import check_executable, encodeFilename +from ..utils import check_executable class RtspFD(FileDownloader): @@ -26,7 +26,7 @@ class RtspFD(FileDownloader): retval = subprocess.call(args) if retval == 0: - fsize = os.path.getsize(encodeFilename(tmpfilename)) + fsize = os.path.getsize(tmpfilename) self.to_screen(f'\r[{args[0]}] {fsize} bytes') self.try_rename(tmpfilename, filename) self._hook_progress({ diff --git a/yt_dlp/downloader/youtube_live_chat.py b/yt_dlp/downloader/youtube_live_chat.py index 961938d44..ddd912ca2 100644 --- a/yt_dlp/downloader/youtube_live_chat.py +++ b/yt_dlp/downloader/youtube_live_chat.py @@ -123,8 +123,8 @@ class YoutubeLiveChatFD(FragmentFD): data, lambda x: x['continuationContents']['liveChatContinuation'], dict) or {} - func = (info_dict['protocol'] == 'youtube_live_chat' and parse_actions_live - or frag_index == 1 and try_refresh_replay_beginning + func = ((info_dict['protocol'] == 'youtube_live_chat' and parse_actions_live) + or (frag_index == 1 and try_refresh_replay_beginning) or parse_actions_replay) return (True, *func(live_chat_continuation)) except HTTPError as err: diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 6ebe0be51..3d01ed6fe 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -208,6 +208,10 @@ from .bandcamp import ( BandcampUserIE, BandcampWeeklyIE, ) +from .bandlab import ( + BandlabIE, + BandlabPlaylistIE, +) from .bannedvideo import BannedVideoIE from .bbc import ( BBCIE, @@ -217,6 +221,7 @@ from .bbc import ( BBCCoUkIPlayerGroupIE, BBCCoUkPlaylistIE, ) +from .beacon import BeaconTvIE from .beatbump import ( BeatBumpPlaylistIE, BeatBumpVideoIE, @@ -277,6 +282,7 @@ from .bleacherreport import ( from .blerp import BlerpIE from .blogger import BloggerIE from .bloomberg import BloombergIE +from .bluesky import BlueskyIE from .bokecc import BokeCCIE from .bongacams import BongaCamsIE from .boosty import BoostyIE @@ -362,7 +368,10 @@ from .ccc import ( ) from .ccma import CCMAIE from .cctv import CCTVIE -from .cda import CDAIE +from .cda import ( + CDAIE, + CDAFolderIE, +) from .cellebrite import CellebriteIE from .ceskatelevize import CeskaTelevizeIE from .cgtn import CGTNIE @@ -397,8 +406,6 @@ from .cmt import CMTIE from .cnbc import CNBCVideoIE from .cnn import ( CNNIE, - CNNArticleIE, - CNNBlogsIE, CNNIndonesiaIE, ) from .comedycentral import ( @@ -705,6 +712,7 @@ from .gab import ( GabTVIE, ) from .gaia import GaiaIE +from .gamedevtv import GameDevTVDashboardIE from .gamejolt import ( GameJoltCommunityIE, GameJoltGameIE, @@ -729,6 +737,7 @@ from .genius import ( GeniusIE, GeniusLyricsIE, ) +from .germanupa import GermanupaIE from .getcourseru import ( GetCourseRuIE, GetCourseRuPlayerIE, @@ -822,7 +831,10 @@ from .hungama import ( HungamaIE, HungamaSongIE, ) -from .huya import HuyaLiveIE +from .huya import ( + HuyaLiveIE, + HuyaVideoIE, +) from .hypem import HypemIE from .hypergryph import MonsterSirenHypergryphMusicIE from .hytale import HytaleIE @@ -934,6 +946,10 @@ from .kaltura import KalturaIE from .kankanews import KankaNewsIE from .karaoketv import KaraoketvIE from .kelbyone import KelbyOneIE +from .kenh14 import ( + Kenh14PlaylistIE, + Kenh14VideoIE, +) from .khanacademy import ( KhanAcademyIE, KhanAcademyUnitIE, @@ -945,6 +961,7 @@ from .kick import ( ) from .kicker import KickerIE from .kickstarter import KickStarterIE +from .kika import KikaIE from .kinja import KinjaEmbedIE from .kinopoisk import KinoPoiskIE from .kommunetv import KommunetvIE @@ -1036,10 +1053,7 @@ from .livestream import ( LivestreamShortenerIE, ) from .livestreamfails import LivestreamfailsIE -from .lnkgo import ( - LnkGoIE, - LnkIE, -) +from .lnk import LnkIE from .loom import ( LoomFolderIE, LoomIE, @@ -1125,12 +1139,6 @@ from .microsoftembed import ( MicrosoftMediusIE, ) from .microsoftstream import MicrosoftStreamIE -from .mildom import ( - MildomClipIE, - MildomIE, - MildomUserVodIE, - MildomVodIE, -) from .minds import ( MindsChannelIE, MindsGroupIE, @@ -1150,6 +1158,7 @@ from .mitele import MiTeleIE from .mixch import ( MixchArchiveIE, MixchIE, + MixchMovieIE, ) from .mixcloud import ( MixcloudIE, @@ -1164,6 +1173,7 @@ from .mlb import ( ) from .mlssoccer import MLSSoccerIE from .mocha import MochaVideoIE +from .mojevideo import MojevideoIE from .mojvideo import MojvideoIE from .monstercat import MonstercatIE from .motherless import ( @@ -1510,8 +1520,8 @@ from .pgatour import PGATourIE from .philharmoniedeparis import PhilharmonieDeParisIE from .phoenix import PhoenixIE from .photobucket import PhotobucketIE +from .pialive import PiaLiveIE from .piapro import PiaproIE -from .piaulizaportal import PIAULIZAPortalIE from .picarto import ( PicartoIE, PicartoVodIE, @@ -1548,10 +1558,6 @@ from .podbayfm import ( ) from .podchaser import PodchaserIE from .podomatic import PodomaticIE -from .pokemon import ( - PokemonIE, - PokemonWatchIE, -) from .pokergo import ( PokerGoCollectionIE, PokerGoIE, @@ -1642,6 +1648,7 @@ from .radiokapital import ( RadioKapitalIE, RadioKapitalShowIE, ) +from .radioradicale import RadioRadicaleIE from .radiozet import RadioZetPodcastIE from .radlive import ( RadLiveChannelIE, @@ -1811,6 +1818,7 @@ from .screen9 import Screen9IE from .screencast import ScreencastIE from .screencastify import ScreencastifyIE from .screencastomatic import ScreencastOMaticIE +from .screenrec import ScreenRecIE from .scrippsnetworks import ( ScrippsNetworksIE, ScrippsNetworksWatchIE, @@ -1821,6 +1829,7 @@ from .scte import ( SCTECourseIE, ) from .sejmpl import SejmIE +from .sen import SenIE from .senalcolombia import SenalColombiaLiveIE from .senategov import ( SenateGovIE, @@ -1876,6 +1885,7 @@ from .slideshare import SlideshareIE from .slideslive import SlidesLiveIE from .slutload import SlutloadIE from .smotrim import SmotrimIE +from .snapchat import SnapchatSpotlightIE from .snotr import SnotrIE from .sohu import ( SohuIE, @@ -1930,9 +1940,7 @@ from .spotify import ( ) from .spreaker import ( SpreakerIE, - SpreakerPageIE, SpreakerShowIE, - SpreakerShowPageIE, ) from .springboardplatform import SpringboardPlatformIE from .sprout import SproutIE @@ -2243,6 +2251,10 @@ from .ufctv import ( ) from .ukcolumn import UkColumnIE from .uktvplay import UKTVPlayIE +from .uliza import ( + UlizaPlayerIE, + UlizaPortalIE, +) from .umg import UMGDeIE from .unistra import UnistraIE from .unity import UnityIE @@ -2271,10 +2283,6 @@ from .utreon import UtreonIE from .varzesh3 import Varzesh3IE from .vbox7 import Vbox7IE from .veo import VeoIE -from .veoh import ( - VeohIE, - VeohUserIE, -) from .vesti import VestiIE from .vevo import ( VevoIE, @@ -2312,6 +2320,7 @@ from .videomore import ( VideomoreVideoIE, ) from .videopress import VideoPressIE +from .vidflex import VidflexIE from .vidio import ( VidioIE, VidioLiveIE, diff --git a/yt_dlp/extractor/abc.py b/yt_dlp/extractor/abc.py index 7518ba6f0..7296be73b 100644 --- a/yt_dlp/extractor/abc.py +++ b/yt_dlp/extractor/abc.py @@ -387,17 +387,27 @@ class ABCIViewShowSeriesIE(InfoExtractor): 'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$', }, 'playlist_count': 15, + 'skip': 'This program is not currently available in ABC iview', + }, { + 'url': 'https://iview.abc.net.au/show/inbestigators', + 'info_dict': { + 'id': '175343-1', + 'title': 'Series 1', + 'description': 'md5:b9976935a6450e5b78ce2a940a755685', + 'series': 'The Inbestigators', + 'season': 'Series 1', + 'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.+\.jpg', + }, + 'playlist_count': 17, }] def _real_extract(self, url): show_id = self._match_id(url) webpage = self._download_webpage(url, show_id) - webpage_data = self._search_regex( - r'window\.__INITIAL_STATE__\s*=\s*[\'"](.+?)[\'"]\s*;', - webpage, 'initial state') - video_data = self._parse_json( - unescapeHTML(webpage_data).encode().decode('unicode_escape'), show_id) - video_data = video_data['route']['pageData']['_embedded'] + video_data = self._search_json( + r'window\.__INITIAL_STATE__\s*=\s*[\'"]', webpage, 'initial state', show_id, + transform_source=lambda x: x.encode().decode('unicode_escape'), + end_pattern=r'[\'"]\s*;')['route']['pageData']['_embedded'] highlight = try_get(video_data, lambda x: x['highlightVideo']['shareUrl']) if not self._yes_playlist(show_id, bool(highlight), video_label='highlight video'): diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py index 66ab083fe..b1343eed3 100644 --- a/yt_dlp/extractor/abematv.py +++ b/yt_dlp/extractor/abematv.py @@ -6,7 +6,6 @@ import hmac import io import json import re -import struct import time import urllib.parse import uuid @@ -18,10 +17,8 @@ from ..networking.exceptions import TransportError from ..utils import ( ExtractorError, OnDemandPagedList, - bytes_to_intlist, decode_base_n, int_or_none, - intlist_to_bytes, time_seconds, traverse_obj, update_url_query, @@ -72,15 +69,15 @@ class AbemaLicenseRH(RequestHandler): }) res = decode_base_n(license_response['k'], table=self._STRTABLE) - encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff)) + encvideokey = list(res.to_bytes(16, 'big')) h = hmac.new( binascii.unhexlify(self._HKEY), (license_response['cid'] + self.ie._DEVICE_ID).encode(), digestmod=hashlib.sha256) - enckey = bytes_to_intlist(h.digest()) + enckey = list(h.digest()) - return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey)) + return bytes(aes_ecb_decrypt(encvideokey, enckey)) class AbemaTVBaseIE(InfoExtractor): diff --git a/yt_dlp/extractor/academicearth.py b/yt_dlp/extractor/academicearth.py index d9691cb5c..b997a0288 100644 --- a/yt_dlp/extractor/academicearth.py +++ b/yt_dlp/extractor/academicearth.py @@ -4,7 +4,7 @@ from .common import InfoExtractor class AcademicEarthCourseIE(InfoExtractor): - _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P[^?#/]+)' + _VALID_URL = r'https?://(?:www\.)?academicearth\.org/playlists/(?P[^?#/]+)' IE_NAME = 'AcademicEarth:Course' _TEST = { 'url': 'http://academicearth.org/playlists/laws-of-nature/', diff --git a/yt_dlp/extractor/adn.py b/yt_dlp/extractor/adn.py index 337071794..7dff40556 100644 --- a/yt_dlp/extractor/adn.py +++ b/yt_dlp/extractor/adn.py @@ -11,11 +11,9 @@ from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, ass_subtitles_timecode, - bytes_to_intlist, bytes_to_long, float_or_none, int_or_none, - intlist_to_bytes, join_nonempty, long_to_bytes, parse_iso8601, @@ -49,9 +47,9 @@ class ADNBaseIE(InfoExtractor): class ADNIE(ADNBaseIE): - _VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.com/(?:(?Pde)/)?video/[^/?#]+/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?animationdigitalnetwork\.com/(?:(?Pde)/)?video/[^/?#]+/(?P\d+)' _TESTS = [{ - 'url': 'https://animationdigitalnetwork.com/video/fruits-basket/9841-episode-1-a-ce-soir', + 'url': 'https://animationdigitalnetwork.com/video/558-fruits-basket/9841-episode-1-a-ce-soir', 'md5': '1c9ef066ceb302c86f80c2b371615261', 'info_dict': { 'id': '9841', @@ -71,10 +69,7 @@ class ADNIE(ADNBaseIE): }, 'skip': 'Only available in French and German speaking Europe', }, { - 'url': 'http://animedigitalnetwork.com/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites', - 'only_matching': True, - }, { - 'url': 'https://animationdigitalnetwork.com/de/video/the-eminence-in-shadow/23550-folge-1', + 'url': 'https://animationdigitalnetwork.com/de/video/973-the-eminence-in-shadow/23550-folge-1', 'md5': '5c5651bf5791fa6fcd7906012b9d94e8', 'info_dict': { 'id': '23550', @@ -167,7 +162,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' 'username': username, })) or {}).get('accessToken') if access_token: - self._HEADERS = {'authorization': 'Bearer ' + access_token} + self._HEADERS['Authorization'] = f'Bearer {access_token}' except ExtractorError as e: message = None if isinstance(e.cause, HTTPError) and e.cause.status == 401: @@ -178,6 +173,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' def _real_extract(self, url): lang, video_id = self._match_valid_url(url).group('lang', 'id') + self._HEADERS['X-Target-Distribution'] = lang or 'fr' video_base_url = self._PLAYER_BASE_URL + f'video/{video_id}/' player = self._download_json( video_base_url + 'configuration', video_id, @@ -200,16 +196,16 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link') self._K = ''.join(random.choices('0123456789abcdef', k=16)) - message = bytes_to_intlist(json.dumps({ + message = list(json.dumps({ 'k': self._K, 't': token, - })) + }).encode()) # Sometimes authentication fails for no good reason, retry with # a different random padding links_data = None for _ in range(3): - padded_message = intlist_to_bytes(pkcs1pad(message, 128)) + padded_message = bytes(pkcs1pad(message, 128)) n, e = self._RSA_KEY encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n)) authorization = base64.b64encode(encrypted_message).decode() @@ -218,7 +214,6 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' links_data = self._download_json( links_url, video_id, 'Downloading links JSON metadata', headers={ 'X-Player-Token': authorization, - 'X-Target-Distribution': lang or 'fr', **self._HEADERS, }, query={ 'freeWithAds': 'true', @@ -237,7 +232,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' error = self._parse_json(e.cause.response.read(), video_id) message = error.get('message') - if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country': + if e.cause.status == 403 and error.get('code') == 'player-bad-geolocation-country': self.raise_geo_restricted(msg=message) raise ExtractorError(message) else: @@ -257,6 +252,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' load_balancer_data = self._download_json( load_balancer_url, video_id, f'Downloading {format_id} {quality} JSON metadata', + headers=self._HEADERS, fatal=False) or {} m3u8_url = load_balancer_data.get('location') if not m3u8_url: @@ -277,7 +273,7 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' video = (self._download_json( self._API_BASE_URL + f'video/{video_id}', video_id, - 'Downloading additional video metadata', fatal=False) or {}).get('video') or {} + 'Downloading additional video metadata', fatal=False, headers=self._HEADERS) or {}).get('video') or {} show = video.get('show') or {} return { @@ -299,9 +295,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' class ADNSeasonIE(ADNBaseIE): - _VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.com/(?:(?Pde)/)?video/(?P[^/?#]+)/?(?:$|[#?])' + _VALID_URL = r'https?://(?:www\.)?animationdigitalnetwork\.com/(?:(?Pde)/)?video/(?P\d+)[^/?#]*/?(?:$|[#?])' _TESTS = [{ - 'url': 'https://animationdigitalnetwork.com/video/tokyo-mew-mew-new', + 'url': 'https://animationdigitalnetwork.com/video/911-tokyo-mew-mew-new', 'playlist_count': 12, 'info_dict': { 'id': '911', @@ -312,16 +308,14 @@ class ADNSeasonIE(ADNBaseIE): def _real_extract(self, url): lang, video_show_slug = self._match_valid_url(url).group('lang', 'id') + self._HEADERS['X-Target-Distribution'] = lang or 'fr' show = self._download_json( f'{self._API_BASE_URL}show/{video_show_slug}/', video_show_slug, 'Downloading show JSON metadata', headers=self._HEADERS)['show'] show_id = str(show['id']) episodes = self._download_json( f'{self._API_BASE_URL}video/show/{show_id}', video_show_slug, - 'Downloading episode list', headers={ - 'X-Target-Distribution': lang or 'fr', - **self._HEADERS, - }, query={ + 'Downloading episode list', headers=self._HEADERS, query={ 'order': 'asc', 'limit': '-1', }) diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py index eb7e597e5..f1b877927 100644 --- a/yt_dlp/extractor/adobepass.py +++ b/yt_dlp/extractor/adobepass.py @@ -1355,13 +1355,14 @@ MSO_INFO = { class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor _SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s' _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0' + _MODERN_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; rv:131.0) Gecko/20100101 Firefox/131.0' _MVPD_CACHE = 'ap-mvpd' _DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page' def _download_webpage_handle(self, *args, **kwargs): headers = self.geo_verification_headers() - headers.update(kwargs.get('headers', {})) + headers.update(kwargs.get('headers') or {}) kwargs['headers'] = headers return super()._download_webpage_handle( *args, **kwargs) @@ -1454,7 +1455,11 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en 'no_iframe': 'false', 'domain_name': 'adobe.com', 'redirect_url': url, - }) + }, headers={ + # yt-dlp's default user-agent is usually too old for Comcast_SSO + # See: https://github.com/yt-dlp/yt-dlp/issues/10848 + 'User-Agent': self._MODERN_USER_AGENT, + } if mso_id == 'Comcast_SSO' else None) elif not self._cookies_passed: raise_mvpd_required() diff --git a/yt_dlp/extractor/afreecatv.py b/yt_dlp/extractor/afreecatv.py index 815d20537..572d1a389 100644 --- a/yt_dlp/extractor/afreecatv.py +++ b/yt_dlp/extractor/afreecatv.py @@ -33,21 +33,21 @@ class AfreecaTVBaseIE(InfoExtractor): } response = self._download_json( - 'https://login.afreecatv.com/app/LoginAction.php', None, + 'https://login.sooplive.co.kr/app/LoginAction.php', None, 'Logging in', data=urlencode_postdata(login_form)) _ERRORS = { -4: 'Your account has been suspended due to a violation of our terms and policies.', - -5: 'https://member.afreecatv.com/app/user_delete_progress.php', - -6: 'https://login.afreecatv.com/membership/changeMember.php', - -8: "Hello! AfreecaTV here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.", - -9: 'https://member.afreecatv.com/app/pop_login_block.php', - -11: 'https://login.afreecatv.com/afreeca/second_login.php', - -12: 'https://member.afreecatv.com/app/user_security.php', + -5: 'https://member.sooplive.co.kr/app/user_delete_progress.php', + -6: 'https://login.sooplive.co.kr/membership/changeMember.php', + -8: "Hello! Soop here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.", + -9: 'https://member.sooplive.co.kr/app/pop_login_block.php', + -11: 'https://login.sooplive.co.kr/afreeca/second_login.php', + -12: 'https://member.sooplive.co.kr/app/user_security.php', 0: 'The username does not exist or you have entered the wrong password.', -1: 'The username does not exist or you have entered the wrong password.', -3: 'You have entered your username/password incorrectly.', - -7: 'You cannot use your Global AfreecaTV account to access Korean AfreecaTV.', + -7: 'You cannot use your Global Soop account to access Korean Soop.', -10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.', -32008: 'You have failed to log in. Please contact our Help Center.', } @@ -61,76 +61,48 @@ class AfreecaTVBaseIE(InfoExtractor): def _call_api(self, endpoint, display_id, data=None, headers=None, query=None): return self._download_json(Request( - f'https://api.m.afreecatv.com/{endpoint}', + f'https://api.m.sooplive.co.kr/{endpoint}', data=data, headers=headers, query=query, extensions={'legacy_ssl': True}), display_id, 'Downloading API JSON', 'Unable to download API JSON') + @staticmethod + def _fixup_thumb(thumb_url): + if not url_or_none(thumb_url): + return None + # Core would determine_ext as 'php' from the url, so we need to provide the real ext + # See: https://github.com/yt-dlp/yt-dlp/issues/11537 + return [{'url': thumb_url, 'ext': 'jpg'}] + class AfreecaTVIE(AfreecaTVBaseIE): - IE_NAME = 'afreecatv' - IE_DESC = 'afreecatv.com' - _VALID_URL = r'''(?x) - https?:// - (?: - (?:(?:live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)? - (?: - /app/(?:index|read_ucc_bbs)\.cgi| - /player/[Pp]layer\.(?:swf|html) - )\?.*?\bnTitleNo=| - vod\.afreecatv\.com/(PLAYER/STATION|player)/ - ) - (?P\d+)/?(?:$|[?#&]) - ''' + IE_NAME = 'soop' + IE_DESC = 'sooplive.co.kr' + _VALID_URL = r'https?://vod\.(?:sooplive\.co\.kr|afreecatv\.com)/(?:PLAYER/STATION|player)/(?P\d+)/?(?:$|[?#&])' _TESTS = [{ - 'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=', - 'md5': 'f72c89fe7ecc14c1b5ce506c4996046e', + 'url': 'https://vod.sooplive.co.kr/player/96753363', 'info_dict': { - 'id': '36164052', + 'id': '20230108_9FF5BEE1_244432674_1', 'ext': 'mp4', - 'title': '데일리 에이프릴 요정들의 시상식!', - 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', - 'uploader': 'dailyapril', - 'uploader_id': 'dailyapril', - 'upload_date': '20160503', + 'uploader_id': 'rlantnghks', + 'uploader': '페이즈으', + 'duration': 10840, + 'thumbnail': r're:https?://videoimg\.sooplive\.co/.kr/.+', + 'upload_date': '20230108', + 'timestamp': 1673218805, + 'title': '젠지 페이즈', }, - 'skip': 'Video is gone', - }, { - 'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867', - 'info_dict': { - 'id': '36153164', - 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'", - 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', - 'uploader': 'dailyapril', - 'uploader_id': 'dailyapril', + 'params': { + 'skip_download': True, }, - 'playlist_count': 2, - 'playlist': [{ - 'md5': 'd8b7c174568da61d774ef0203159bf97', - 'info_dict': { - 'id': '36153164_1', - 'ext': 'mp4', - 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'", - 'upload_date': '20160502', - }, - }, { - 'md5': '58f2ce7f6044e34439ab2d50612ab02b', - 'info_dict': { - 'id': '36153164_2', - 'ext': 'mp4', - 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'", - 'upload_date': '20160502', - }, - }], - 'skip': 'Video is gone', }, { # non standard key - 'url': 'http://vod.afreecatv.com/PLAYER/STATION/20515605', + 'url': 'http://vod.sooplive.co.kr/PLAYER/STATION/20515605', 'info_dict': { 'id': '20170411_BE689A0E_190960999_1_2_h', 'ext': 'mp4', 'title': '혼자사는여자집', - 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', + 'thumbnail': r're:https?://(?:video|st)img\.sooplive\.co\.kr/.+', 'uploader': '♥이슬이', 'uploader_id': 'dasl8121', 'upload_date': '20170411', @@ -142,12 +114,12 @@ class AfreecaTVIE(AfreecaTVBaseIE): }, }, { # adult content - 'url': 'https://vod.afreecatv.com/player/97267690', + 'url': 'https://vod.sooplive.co.kr/player/97267690', 'info_dict': { 'id': '20180327_27901457_202289533_1', 'ext': 'mp4', 'title': '[생]빨개요♥ (part 1)', - 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', + 'thumbnail': r're:https?://(?:video|st)img\.sooplive\.co\.kr/.+', 'uploader': '[SA]서아', 'uploader_id': 'bjdyrksu', 'upload_date': '20180327', @@ -157,36 +129,17 @@ class AfreecaTVIE(AfreecaTVBaseIE): 'skip_download': True, }, 'skip': 'The VOD does not exist', - }, { - 'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652', - 'only_matching': True, - }, { - 'url': 'https://vod.afreecatv.com/player/96753363', - 'info_dict': { - 'id': '20230108_9FF5BEE1_244432674_1', - 'ext': 'mp4', - 'uploader_id': 'rlantnghks', - 'uploader': '페이즈으', - 'duration': 10840, - 'thumbnail': r're:https?://videoimg\.afreecatv\.com/.+', - 'upload_date': '20230108', - 'timestamp': 1673218805, - 'title': '젠지 페이즈', - }, - 'params': { - 'skip_download': True, - }, }, { # adult content - 'url': 'https://vod.afreecatv.com/player/70395877', + 'url': 'https://vod.sooplive.co.kr/player/70395877', 'only_matching': True, }, { # subscribers only - 'url': 'https://vod.afreecatv.com/player/104647403', + 'url': 'https://vod.sooplive.co.kr/player/104647403', 'only_matching': True, }, { # private - 'url': 'https://vod.afreecatv.com/player/81669846', + 'url': 'https://vod.sooplive.co.kr/player/81669846', 'only_matching': True, }] @@ -209,8 +162,8 @@ class AfreecaTVIE(AfreecaTVBaseIE): 'title': ('title', {str}), 'uploader': ('writer_nick', {str}), 'uploader_id': ('bj_id', {str}), - 'duration': ('total_file_duration', {functools.partial(int_or_none, scale=1000)}), - 'thumbnail': ('thumb', {url_or_none}), + 'duration': ('total_file_duration', {int_or_none(scale=1000)}), + 'thumbnails': ('thumb', {self._fixup_thumb}), }) entries = [] @@ -233,7 +186,7 @@ class AfreecaTVIE(AfreecaTVBaseIE): 'title': f'{common_info.get("title") or "Untitled"} (part {file_num})', 'formats': formats, **traverse_obj(file_element, { - 'duration': ('duration', {functools.partial(int_or_none, scale=1000)}), + 'duration': ('duration', {int_or_none(scale=1000)}), 'timestamp': ('file_start', {unified_timestamp}), }), }) @@ -262,11 +215,11 @@ class AfreecaTVIE(AfreecaTVBaseIE): class AfreecaTVCatchStoryIE(AfreecaTVBaseIE): - IE_NAME = 'afreecatv:catchstory' - IE_DESC = 'afreecatv.com catch story' - _VALID_URL = r'https?://vod\.afreecatv\.com/player/(?P\d+)/catchstory' + IE_NAME = 'soop:catchstory' + IE_DESC = 'sooplive.co.kr catch story' + _VALID_URL = r'https?://vod\.(?:sooplive\.co\.kr|afreecatv\.com)/player/(?P\d+)/catchstory' _TESTS = [{ - 'url': 'https://vod.afreecatv.com/player/103247/catchstory', + 'url': 'https://vod.sooplive.co.kr/player/103247/catchstory', 'info_dict': { 'id': '103247', }, @@ -281,29 +234,28 @@ class AfreecaTVCatchStoryIE(AfreecaTVBaseIE): return self.playlist_result(self._entries(data), video_id) - @staticmethod - def _entries(data): + def _entries(self, data): # 'files' is always a list with 1 element yield from traverse_obj(data, ( 'data', lambda _, v: v['story_type'] == 'catch', 'catch_list', lambda _, v: v['files'][0]['file'], { 'id': ('files', 0, 'file_info_key', {str}), 'url': ('files', 0, 'file', {url_or_none}), - 'duration': ('files', 0, 'duration', {functools.partial(int_or_none, scale=1000)}), + 'duration': ('files', 0, 'duration', {int_or_none(scale=1000)}), 'title': ('title', {str}), 'uploader': ('writer_nick', {str}), 'uploader_id': ('writer_id', {str}), - 'thumbnail': ('thumb', {url_or_none}), + 'thumbnails': ('thumb', {self._fixup_thumb}), 'timestamp': ('write_timestamp', {int_or_none}), })) class AfreecaTVLiveIE(AfreecaTVBaseIE): - IE_NAME = 'afreecatv:live' - IE_DESC = 'afreecatv.com livestreams' - _VALID_URL = r'https?://play\.afreeca(?:tv)?\.com/(?P[^/]+)(?:/(?P\d+))?' + IE_NAME = 'soop:live' + IE_DESC = 'sooplive.co.kr livestreams' + _VALID_URL = r'https?://play\.(?:sooplive\.co\.kr|afreecatv\.com)/(?P[^/?#]+)(?:/(?P\d+))?' _TESTS = [{ - 'url': 'https://play.afreecatv.com/pyh3646/237852185', + 'url': 'https://play.sooplive.co.kr/pyh3646/237852185', 'info_dict': { 'id': '237852185', 'ext': 'mp4', @@ -315,30 +267,30 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE): }, 'skip': 'Livestream has ended', }, { - 'url': 'https://play.afreecatv.com/pyh3646/237852185', + 'url': 'https://play.sooplive.co.kr/pyh3646/237852185', 'only_matching': True, }, { - 'url': 'https://play.afreecatv.com/pyh3646', + 'url': 'https://play.sooplive.co.kr/pyh3646', 'only_matching': True, }] - _LIVE_API_URL = 'https://live.afreecatv.com/afreeca/player_live_api.php' + _LIVE_API_URL = 'https://live.sooplive.co.kr/afreeca/player_live_api.php' _WORKING_CDNS = [ - 'gcp_cdn', # live-global-cdn-v02.afreecatv.com - 'gs_cdn_pc_app', # pc-app.stream.afreecatv.com - 'gs_cdn_mobile_web', # mobile-web.stream.afreecatv.com - 'gs_cdn_pc_web', # pc-web.stream.afreecatv.com + 'gcp_cdn', # live-global-cdn-v02.sooplive.co.kr + 'gs_cdn_pc_app', # pc-app.stream.sooplive.co.kr + 'gs_cdn_mobile_web', # mobile-web.stream.sooplive.co.kr + 'gs_cdn_pc_web', # pc-web.stream.sooplive.co.kr ] _BAD_CDNS = [ 'gs_cdn', # chromecast.afreeca.gscdn.com (cannot resolve) - 'gs_cdn_chromecast', # chromecast.stream.afreecatv.com (HTTP Error 400) - 'azure_cdn', # live-global-cdn-v01.afreecatv.com (cannot resolve) - 'aws_cf', # live-global-cdn-v03.afreecatv.com (cannot resolve) - 'kt_cdn', # kt.stream.afreecatv.com (HTTP Error 400) + 'gs_cdn_chromecast', # chromecast.stream.sooplive.co.kr (HTTP Error 400) + 'azure_cdn', # live-global-cdn-v01.sooplive.co.kr (cannot resolve) + 'aws_cf', # live-global-cdn-v03.sooplive.co.kr (cannot resolve) + 'kt_cdn', # kt.stream.sooplive.co.kr (HTTP Error 400) ] def _extract_formats(self, channel_info, broadcast_no, aid): - stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.afreecatv.com' + stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.sooplive.co.kr' # If user has not passed CDN IDs, try API-provided CDN ID followed by other working CDN IDs default_cdn_ids = orderedSet([ @@ -358,7 +310,7 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE): try: return self._extract_m3u8_formats( m3u8_url, broadcast_no, 'mp4', m3u8_id='hls', query={'aid': aid}, - headers={'Referer': 'https://play.afreecatv.com/'}) + headers={'Referer': 'https://play.sooplive.co.kr/'}) except ExtractorError as e: if attempt == len(cdn_ids): raise @@ -374,7 +326,13 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE): broadcaster_id = channel_info.get('BJID') or broadcaster_id broadcast_no = channel_info.get('BNO') or broadcast_no if not broadcast_no: - raise UserNotLive(video_id=broadcaster_id) + result = channel_info.get('RESULT') + if result == 0: + raise UserNotLive(video_id=broadcaster_id) + elif result == -6: + self.raise_login_required( + 'This channel is streaming for subscribers only', method='password') + raise ExtractorError('Unable to extract broadcast number') password = self.get_param('videopassword') if channel_info.get('BPWD') == 'Y' and password is None: @@ -403,7 +361,7 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE): formats = self._extract_formats(channel_info, broadcast_no, aid) station_info = traverse_obj(self._download_json( - 'https://st.afreecatv.com/api/get_station_status.php', broadcast_no, + 'https://st.sooplive.co.kr/api/get_station_status.php', broadcast_no, 'Downloading channel metadata', 'Unable to download channel metadata', query={'szBjId': broadcaster_id}, fatal=False), {dict}) or {} @@ -419,11 +377,11 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE): } -class AfreecaTVUserIE(InfoExtractor): - IE_NAME = 'afreecatv:user' - _VALID_URL = r'https?://bj\.afreeca(?:tv)?\.com/(?P[^/]+)/vods/?(?P[^/]+)?' +class AfreecaTVUserIE(AfreecaTVBaseIE): + IE_NAME = 'soop:user' + _VALID_URL = r'https?://ch\.(?:sooplive\.co\.kr|afreecatv\.com)/(?P[^/?#]+)/vods/?(?P[^/?#]+)?' _TESTS = [{ - 'url': 'https://bj.afreecatv.com/ryuryu24/vods/review', + 'url': 'https://ch.sooplive.co.kr/ryuryu24/vods/review', 'info_dict': { '_type': 'playlist', 'id': 'ryuryu24', @@ -431,7 +389,7 @@ class AfreecaTVUserIE(InfoExtractor): }, 'playlist_count': 218, }, { - 'url': 'https://bj.afreecatv.com/parang1995/vods/highlight', + 'url': 'https://ch.sooplive.co.kr/parang1995/vods/highlight', 'info_dict': { '_type': 'playlist', 'id': 'parang1995', @@ -439,7 +397,7 @@ class AfreecaTVUserIE(InfoExtractor): }, 'playlist_count': 997, }, { - 'url': 'https://bj.afreecatv.com/ryuryu24/vods', + 'url': 'https://ch.sooplive.co.kr/ryuryu24/vods', 'info_dict': { '_type': 'playlist', 'id': 'ryuryu24', @@ -447,7 +405,7 @@ class AfreecaTVUserIE(InfoExtractor): }, 'playlist_count': 221, }, { - 'url': 'https://bj.afreecatv.com/ryuryu24/vods/balloonclip', + 'url': 'https://ch.sooplive.co.kr/ryuryu24/vods/balloonclip', 'info_dict': { '_type': 'playlist', 'id': 'ryuryu24', @@ -459,12 +417,12 @@ class AfreecaTVUserIE(InfoExtractor): def _fetch_page(self, user_id, user_type, page): page += 1 - info = self._download_json(f'https://bjapi.afreecatv.com/api/{user_id}/vods/{user_type}', user_id, + info = self._download_json(f'https://chapi.sooplive.co.kr/api/{user_id}/vods/{user_type}', user_id, query={'page': page, 'per_page': self._PER_PAGE, 'orderby': 'reg_date'}, note=f'Downloading {user_type} video page {page}') for item in info['data']: yield self.url_result( - f'https://vod.afreecatv.com/player/{item["title_no"]}/', AfreecaTVIE, item['title_no']) + f'https://vod.sooplive.co.kr/player/{item["title_no"]}/', AfreecaTVIE, item['title_no']) def _real_extract(self, url): user_id, user_type = self._match_valid_url(url).group('id', 'slug_type') diff --git a/yt_dlp/extractor/allstar.py b/yt_dlp/extractor/allstar.py index 5ea1c30e3..697d83c1e 100644 --- a/yt_dlp/extractor/allstar.py +++ b/yt_dlp/extractor/allstar.py @@ -71,7 +71,7 @@ class AllstarBaseIE(InfoExtractor): 'thumbnails': (('clipImageThumb', 'clipImageSource'), {'url': {media_url_or_none}}), 'duration': ('clipLength', {int_or_none}), 'filesize': ('clipSizeBytes', {int_or_none}), - 'timestamp': ('createdDate', {functools.partial(int_or_none, scale=1000)}), + 'timestamp': ('createdDate', {int_or_none(scale=1000)}), 'uploader': ('username', {str}), 'uploader_id': ('user', '_id', {str}), 'view_count': ('views', {int_or_none}), diff --git a/yt_dlp/extractor/anvato.py b/yt_dlp/extractor/anvato.py index bf3d60b5e..bd3b19b13 100644 --- a/yt_dlp/extractor/anvato.py +++ b/yt_dlp/extractor/anvato.py @@ -8,10 +8,8 @@ import time from .common import InfoExtractor from ..aes import aes_encrypt from ..utils import ( - bytes_to_intlist, determine_ext, int_or_none, - intlist_to_bytes, join_nonempty, smuggle_url, strip_jsonp, @@ -33,24 +31,6 @@ class AnvatoIE(InfoExtractor): _AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce' # from anvplayer.min.js _TESTS = [{ - # from https://www.nfl.com/videos/baker-mayfield-s-game-changing-plays-from-3-td-game-week-14 - 'url': 'anvato:GXvEgwyJeWem8KCYXfeoHWknwP48Mboj:899441', - 'md5': '921919dab3cd0b849ff3d624831ae3e2', - 'info_dict': { - 'id': '899441', - 'ext': 'mp4', - 'title': 'Baker Mayfield\'s game-changing plays from 3-TD game Week 14', - 'description': 'md5:85e05a3cc163f8c344340f220521136d', - 'upload_date': '20201215', - 'timestamp': 1608009755, - 'thumbnail': r're:^https?://.*\.jpg', - 'uploader': 'NFL', - 'tags': ['Baltimore Ravens at Cleveland Browns (2020-REG-14)', 'Baker Mayfield', 'Game Highlights', - 'Player Highlights', 'Cleveland Browns', 'league'], - 'duration': 157, - 'categories': ['Entertainment', 'Game', 'Highlights'], - }, - }, { # from https://ktla.com/news/99-year-old-woman-learns-to-fly-in-torrance-checks-off-bucket-list-dream/ 'url': 'anvato:X8POa4zpGZMmeiq0wqiO8IP5rMqQM9VN:8032455', 'md5': '837718bcfb3a7778d022f857f7a9b19e', @@ -241,31 +221,6 @@ class AnvatoIE(InfoExtractor): 'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582', } - def _generate_nfl_token(self, anvack, mcp_id): - reroute = self._download_json( - 'https://api.nfl.com/v1/reroute', mcp_id, data=b'grant_type=client_credentials', - headers={'X-Domain-Id': 100}, note='Fetching token info') - token_type = reroute.get('token_type') or 'Bearer' - auth_token = f'{token_type} {reroute["access_token"]}' - response = self._download_json( - 'https://api.nfl.com/v3/shield/', mcp_id, data=json.dumps({ - 'query': '''{ - viewer { - mediaToken(anvack: "%s", id: %s) { - token - } - } -}''' % (anvack, mcp_id), # noqa: UP031 - }).encode(), headers={ - 'Authorization': auth_token, - 'Content-Type': 'application/json', - }, note='Fetching NFL API token') - return traverse_obj(response, ('data', 'viewer', 'mediaToken', 'token')) - - _TOKEN_GENERATORS = { - 'GXvEgwyJeWem8KCYXfeoHWknwP48Mboj': _generate_nfl_token, - } - def _server_time(self, access_key, video_id): return int_or_none(traverse_obj(self._download_json( f'{self._API_BASE_URL}/server_time', video_id, query={'anvack': access_key}, @@ -277,8 +232,8 @@ class AnvatoIE(InfoExtractor): server_time = self._server_time(access_key, video_id) input_data = f'{server_time}~{md5_text(video_data_url)}~{md5_text(server_time)}' - auth_secret = intlist_to_bytes(aes_encrypt( - bytes_to_intlist(input_data[:64]), bytes_to_intlist(self._AUTH_KEY))) + auth_secret = bytes(aes_encrypt( + list(input_data[:64].encode()), list(self._AUTH_KEY))) query = { 'X-Anvato-Adst-Auth': base64.b64encode(auth_secret).decode('ascii'), 'rtyp': 'fp', @@ -290,8 +245,6 @@ class AnvatoIE(InfoExtractor): } if extracted_token is not None: api['anvstk2'] = extracted_token - elif self._TOKEN_GENERATORS.get(access_key) is not None: - api['anvstk2'] = self._TOKEN_GENERATORS[access_key](self, access_key, video_id) elif self._ANVACK_TABLE.get(access_key) is not None: api['anvstk'] = md5_text(f'{access_key}|{anvrid}|{server_time}|{self._ANVACK_TABLE[access_key]}') else: diff --git a/yt_dlp/extractor/applepodcasts.py b/yt_dlp/extractor/applepodcasts.py index bd301e904..b99d24e0e 100644 --- a/yt_dlp/extractor/applepodcasts.py +++ b/yt_dlp/extractor/applepodcasts.py @@ -1,27 +1,42 @@ from .common import InfoExtractor from ..utils import ( - clean_html, clean_podcast_url, - get_element_by_class, int_or_none, parse_iso8601, - try_get, ) +from ..utils.traversal import traverse_obj class ApplePodcastsIE(InfoExtractor): _VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P\d+)' _TESTS = [{ + 'url': 'https://podcasts.apple.com/us/podcast/ferreck-dawn-to-the-break-of-dawn-117/id1625658232?i=1000665010654', + 'md5': '82cc219b8cc1dcf8bfc5a5e99b23b172', + 'info_dict': { + 'id': '1000665010654', + 'ext': 'mp3', + 'title': 'Ferreck Dawn - To The Break of Dawn 117', + 'episode': 'Ferreck Dawn - To The Break of Dawn 117', + 'description': 'md5:1fc571102f79dbd0a77bfd71ffda23bc', + 'upload_date': '20240812', + 'timestamp': 1723449600, + 'duration': 3596, + 'series': 'Ferreck Dawn - To The Break of Dawn', + 'thumbnail': 're:.+[.](png|jpe?g|webp)', + }, + }, { 'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777', - 'md5': '41dc31cd650143e530d9423b6b5a344f', + 'md5': 'baf8a6b8b8aa6062dbb4639ed73d0052', 'info_dict': { 'id': '1000482637777', 'ext': 'mp3', 'title': '207 - Whitney Webb Returns', + 'episode': '207 - Whitney Webb Returns', + 'episode_number': 207, 'description': 'md5:75ef4316031df7b41ced4e7b987f79c6', 'upload_date': '20200705', 'timestamp': 1593932400, - 'duration': 6454, + 'duration': 5369, 'series': 'The Tim Dillon Show', 'thumbnail': 're:.+[.](png|jpe?g|webp)', }, @@ -39,47 +54,24 @@ class ApplePodcastsIE(InfoExtractor): def _real_extract(self, url): episode_id = self._match_id(url) webpage = self._download_webpage(url, episode_id) - episode_data = {} - ember_data = {} - # new page type 2021-11 - amp_data = self._parse_json(self._search_regex( - r'(?s)id="shoebox-media-api-cache-amp-podcasts"[^>]*>\s*({.+?})\s*<', - webpage, 'AMP data', default='{}'), episode_id, fatal=False) or {} - amp_data = try_get(amp_data, - lambda a: self._parse_json( - next(a[x] for x in iter(a) if episode_id in x), - episode_id), - dict) or {} - amp_data = amp_data.get('d') or [] - episode_data = try_get( - amp_data, - lambda a: next(x for x in a - if x['type'] == 'podcast-episodes' and x['id'] == episode_id), - dict) - if not episode_data: - # try pre 2021-11 page type: TODO: consider deleting if no longer used - ember_data = self._parse_json(self._search_regex( - r'(?s)id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<', - webpage, 'ember data'), episode_id) or {} - ember_data = ember_data.get(episode_id) or ember_data - episode_data = try_get(ember_data, lambda x: x['data'], dict) - episode = episode_data['attributes'] - description = episode.get('description') or {} - - series = None - for inc in (amp_data or ember_data.get('included') or []): - if inc.get('type') == 'media/podcast': - series = try_get(inc, lambda x: x['attributes']['name']) - series = series or clean_html(get_element_by_class('podcast-header__identity', webpage)) + server_data = self._search_json( + r'', webpage), (..., {json.loads})) + data = get_first(post_data, ( + 'require', ..., ..., ..., '__bbox', 'require', ..., ..., ..., + 'entryPointRoot', 'otherProps', 'deeplinkAdCard', 'snapshot', {dict})) if not data: raise ExtractorError('Unable to extract ad data') title = data.get('title') if not title or title == '{{product.name}}': title = join_nonempty('display_format', 'page_name', delim=' by ', from_dict=data) + markup_id = traverse_obj(data, ('body', '__m', {str})) + markup = traverse_obj(post_data, ( + ..., 'require', ..., ..., ..., '__bbox', 'markup', lambda _, v: v[0].startswith(markup_id), + ..., '__html', {clean_html}, {lambda x: not x.startswith('{{product.') and x}, any)) - info_dict = traverse_obj(data, { - 'description': ('link_description', {str}, {lambda x: x if x != '{{product.description}}' else None}), + info_dict = merge_dicts({ + 'title': title, + 'description': markup or None, + }, traverse_obj(data, { + 'description': ('link_description', {lambda x: x if not x.startswith('{{product.') else None}), 'uploader': ('page_name', {str}), 'uploader_id': ('page_id', {str_or_none}), 'uploader_url': ('page_profile_uri', {url_or_none}), 'timestamp': ('creation_time', {int_or_none}), 'like_count': ('page_like_count', {int_or_none}), - }) + })) entries = [] for idx, entry in enumerate(traverse_obj( - data, (('videos', 'cards'), lambda _, v: any(url_or_none(v[f]) for f in self._FORMATS_MAP))), 1, + data, (('videos', 'cards'), lambda _, v: any(url_or_none(v.get(f)) for f in self._FORMATS_MAP))), 1, ): entries.append({ 'id': f'{video_id}_{idx}', 'title': entry.get('title') or title, - 'description': entry.get('link_description') or info_dict.get('description'), + 'description': traverse_obj(entry, 'body', 'link_description') or info_dict.get('description'), 'thumbnail': url_or_none(entry.get('video_preview_image_url')), 'formats': self._extract_formats(entry), }) diff --git a/yt_dlp/extractor/fc2.py b/yt_dlp/extractor/fc2.py index eac70f6a9..f7b883155 100644 --- a/yt_dlp/extractor/fc2.py +++ b/yt_dlp/extractor/fc2.py @@ -14,7 +14,7 @@ from ..utils import ( class FC2IE(InfoExtractor): - _VALID_URL = r'^(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P[^/]+)' + _VALID_URL = r'(?:https?://video\.fc2\.com/(?:[^/]+/)*content/|fc2:)(?P[^/]+)' IE_NAME = 'fc2' _NETRC_MACHINE = 'fc2' _TESTS = [{ diff --git a/yt_dlp/extractor/funimation.py b/yt_dlp/extractor/funimation.py index d3e61c84f..01b53bcde 100644 --- a/yt_dlp/extractor/funimation.py +++ b/yt_dlp/extractor/funimation.py @@ -193,9 +193,9 @@ class FunimationIE(FunimationBaseIE): for lang, version, fmt in self._get_experiences(episode): experience_id = str(fmt['experienceId']) - if (only_initial_experience and experience_id != initial_experience_id - or requested_languages and lang.lower() not in requested_languages - or requested_versions and version.lower() not in requested_versions): + if ((only_initial_experience and experience_id != initial_experience_id) + or (requested_languages and lang.lower() not in requested_languages) + or (requested_versions and version.lower() not in requested_versions)): continue thumbnails.append({'url': fmt.get('poster')}) duration = max(duration, fmt.get('duration', 0)) diff --git a/yt_dlp/extractor/funk.py b/yt_dlp/extractor/funk.py index 8bdea3fce..ef8ea72a8 100644 --- a/yt_dlp/extractor/funk.py +++ b/yt_dlp/extractor/funk.py @@ -3,7 +3,7 @@ from .nexx import NexxIE class FunkIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.|origin\.)?funk\.net/(?:channel|playlist)/[^/]+/(?P[0-9a-z-]+)-(?P\d+)' + _VALID_URL = r'https?://(?:(?:www|origin|play)\.)?funk\.net/(?:channel|playlist)/[^/?#]+/(?P[0-9a-z-]+)-(?P\d+)' _TESTS = [{ 'url': 'https://www.funk.net/channel/ba-793/die-lustigsten-instrumente-aus-dem-internet-teil-2-1155821', 'md5': '8610449476156f338761a75391b0017d', @@ -27,6 +27,9 @@ class FunkIE(InfoExtractor): }, { 'url': 'https://www.funk.net/playlist/neuesteVideos/kameras-auf-dem-fusion-festival-1618699', 'only_matching': True, + }, { + 'url': 'https://play.funk.net/playlist/neuesteVideos/george-floyd-wenn-die-polizei-toetet-der-fall-2004391', + 'only_matching': True, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/gamedevtv.py b/yt_dlp/extractor/gamedevtv.py new file mode 100644 index 000000000..06e8b7356 --- /dev/null +++ b/yt_dlp/extractor/gamedevtv.py @@ -0,0 +1,141 @@ +import json + +from .common import InfoExtractor +from ..networking.exceptions import HTTPError +from ..utils import ( + ExtractorError, + clean_html, + int_or_none, + join_nonempty, + parse_iso8601, + str_or_none, + url_or_none, +) +from ..utils.traversal import traverse_obj + + +class GameDevTVDashboardIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?gamedev\.tv/dashboard/courses/(?P\d+)(?:/(?P\d+))?' + _NETRC_MACHINE = 'gamedevtv' + _TESTS = [{ + 'url': 'https://www.gamedev.tv/dashboard/courses/25', + 'info_dict': { + 'id': '25', + 'title': 'Complete Blender Creator 3: Learn 3D Modelling for Beginners', + 'tags': ['blender', 'course', 'all', 'box modelling', 'sculpting'], + 'categories': ['Blender', '3D Art'], + 'thumbnail': 'https://gamedev-files.b-cdn.net/courses/qisc9pmu1jdc.jpg', + 'upload_date': '20220516', + 'timestamp': 1652694420, + 'modified_date': '20241027', + 'modified_timestamp': 1730049658, + }, + 'playlist_count': 100, + }, { + 'url': 'https://www.gamedev.tv/dashboard/courses/63/2279', + 'info_dict': { + 'id': 'df04f4d8-68a4-4756-a71b-9ca9446c3a01', + 'ext': 'mp4', + 'modified_timestamp': 1701695752, + 'upload_date': '20230504', + 'episode': 'MagicaVoxel Community Course Introduction', + 'series_id': '63', + 'title': 'MagicaVoxel Community Course Introduction', + 'timestamp': 1683195397, + 'modified_date': '20231204', + 'categories': ['3D Art', 'MagicaVoxel'], + 'season': 'MagicaVoxel Community Course', + 'tags': ['MagicaVoxel', 'all', 'course'], + 'series': 'MagicaVoxel 3D Art Mini Course', + 'duration': 1405, + 'episode_number': 1, + 'season_number': 1, + 'season_id': '219', + 'description': 'md5:a378738c5bbec1c785d76c067652d650', + 'display_id': '63-219-2279', + 'alt_title': '1_CC_MVX MagicaVoxel Community Course Introduction.mp4', + 'thumbnail': 'https://vz-23691c65-6fa.b-cdn.net/df04f4d8-68a4-4756-a71b-9ca9446c3a01/thumbnail.jpg', + }, + }] + _API_HEADERS = {} + + def _perform_login(self, username, password): + try: + response = self._download_json( + 'https://api.gamedev.tv/api/students/login', None, 'Logging in', + headers={'Content-Type': 'application/json'}, + data=json.dumps({ + 'email': username, + 'password': password, + 'cart_items': [], + }).encode()) + except ExtractorError as e: + if isinstance(e.cause, HTTPError) and e.cause.status == 401: + raise ExtractorError('Invalid username/password', expected=True) + raise + + self._API_HEADERS['Authorization'] = f'{response["token_type"]} {response["access_token"]}' + + def _real_initialize(self): + if not self._API_HEADERS.get('Authorization'): + self.raise_login_required( + 'This content is only available with purchase', method='password') + + def _entries(self, data, course_id, course_info, selected_lecture): + for section in traverse_obj(data, ('sections', ..., {dict})): + section_info = traverse_obj(section, { + 'season_id': ('id', {str_or_none}), + 'season': ('title', {str}), + 'season_number': ('order', {int_or_none}), + }) + for lecture in traverse_obj(section, ('lectures', lambda _, v: url_or_none(v['video']['playListUrl']))): + if selected_lecture and str(lecture.get('id')) != selected_lecture: + continue + display_id = join_nonempty(course_id, section_info.get('season_id'), lecture.get('id')) + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + lecture['video']['playListUrl'], display_id, 'mp4', m3u8_id='hls') + yield { + **course_info, + **section_info, + 'id': display_id, # fallback + 'display_id': display_id, + 'formats': formats, + 'subtitles': subtitles, + 'series': course_info.get('title'), + 'series_id': course_id, + **traverse_obj(lecture, { + 'id': ('video', 'guid', {str}), + 'title': ('title', {str}), + 'alt_title': ('video', 'title', {str}), + 'description': ('description', {clean_html}), + 'episode': ('title', {str}), + 'episode_number': ('order', {int_or_none}), + 'duration': ('video', 'duration_in_sec', {int_or_none}), + 'timestamp': ('video', 'created_at', {parse_iso8601}), + 'modified_timestamp': ('video', 'updated_at', {parse_iso8601}), + 'thumbnail': ('video', 'thumbnailUrl', {url_or_none}), + }), + } + + def _real_extract(self, url): + course_id, lecture_id = self._match_valid_url(url).group('course_id', 'lecture_id') + data = self._download_json( + f'https://api.gamedev.tv/api/courses/my/{course_id}', course_id, + headers=self._API_HEADERS)['data'] + + course_info = traverse_obj(data, { + 'title': ('title', {str}), + 'tags': ('tags', ..., 'name', {str}), + 'categories': ('categories', ..., 'title', {str}), + 'timestamp': ('created_at', {parse_iso8601}), + 'modified_timestamp': ('updated_at', {parse_iso8601}), + 'thumbnail': ('image', {url_or_none}), + }) + + entries = self._entries(data, course_id, course_info, lecture_id) + if lecture_id: + lecture = next(entries, None) + if not lecture: + raise ExtractorError('Lecture not found') + return lecture + return self.playlist_result(entries, course_id, **course_info) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 04cffaa86..320a47772 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -8,6 +8,9 @@ from .common import InfoExtractor from .commonprotocols import RtmpIE from .youtube import YoutubeIE from ..compat import compat_etree_fromstring +from ..cookies import LenientSimpleCookie +from ..networking.exceptions import HTTPError +from ..networking.impersonate import ImpersonateTarget from ..utils import ( KNOWN_EXTENSIONS, MEDIA_EXTENSIONS, @@ -2340,7 +2343,7 @@ class GenericIE(InfoExtractor): default_search = 'fixup_error' if default_search in ('auto', 'auto_warning', 'fixup_error'): - if re.match(r'^[^\s/]+\.[^\s/]+/', url): + if re.match(r'[^\s/]+\.[^\s/]+/', url): self.report_warning('The url doesn\'t specify the protocol, trying with http') return self.url_result('http://' + url) elif default_search != 'fixup_error': @@ -2373,6 +2376,11 @@ class GenericIE(InfoExtractor): else: video_id = self._generic_id(url) + # Do not impersonate by default; see https://github.com/yt-dlp/yt-dlp/issues/11335 + impersonate = self._configuration_arg('impersonate', ['false']) + if 'false' in impersonate: + impersonate = None + # Some webservers may serve compressed content of rather big size (e.g. gzipped flac) # making it impossible to download only chunk of the file (yet we need only 512kB to # test whether it's HTML or not). According to yt-dlp default Accept-Encoding @@ -2381,10 +2389,29 @@ class GenericIE(InfoExtractor): # to accept raw bytes and being able to download only a chunk. # It may probably better to solve this by checking Content-Type for application/octet-stream # after a HEAD request, but not sure if we can rely on this. - full_response = self._request_webpage(url, video_id, headers=filter_dict({ - 'Accept-Encoding': 'identity', - 'Referer': smuggled_data.get('referer'), - })) + try: + full_response = self._request_webpage(url, video_id, headers=filter_dict({ + 'Accept-Encoding': 'identity', + 'Referer': smuggled_data.get('referer'), + }), impersonate=impersonate) + except ExtractorError as e: + if not (isinstance(e.cause, HTTPError) and e.cause.status == 403 + and e.cause.response.get_header('cf-mitigated') == 'challenge' + and e.cause.response.extensions.get('impersonate') is None): + raise + cf_cookie_domain = traverse_obj( + LenientSimpleCookie(e.cause.response.get_header('set-cookie')), + ('__cf_bm', 'domain')) + if cf_cookie_domain: + self.write_debug(f'Clearing __cf_bm cookie for {cf_cookie_domain}') + self.cookiejar.clear(domain=cf_cookie_domain, path='/', name='__cf_bm') + msg = 'Got HTTP Error 403 caused by Cloudflare anti-bot challenge; ' + if not self._downloader._impersonate_target_available(ImpersonateTarget()): + msg += ('see https://github.com/yt-dlp/yt-dlp#impersonation for ' + 'how to install the required impersonation dependency, and ') + raise ExtractorError( + f'{msg}try again with --extractor-args "generic:impersonate"', expected=True) + new_url = full_response.url if new_url != extract_basic_auth(url)[0]: self.report_following_redirect(new_url) @@ -2400,7 +2427,7 @@ class GenericIE(InfoExtractor): # Check for direct link to a video content_type = full_response.headers.get('Content-Type', '').lower() - m = re.match(r'^(?Paudio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P[^;\s]+)', content_type) + m = re.match(r'(?Paudio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P[^;\s]+)', content_type) if m: self.report_detected('direct video link') headers = filter_dict({'Referer': smuggled_data.get('referer')}) diff --git a/yt_dlp/extractor/germanupa.py b/yt_dlp/extractor/germanupa.py new file mode 100644 index 000000000..e40f016b2 --- /dev/null +++ b/yt_dlp/extractor/germanupa.py @@ -0,0 +1,91 @@ +from .common import InfoExtractor +from .vimeo import VimeoIE +from ..utils import ( + parse_qs, + traverse_obj, + url_or_none, +) + + +class GermanupaIE(InfoExtractor): + IE_DESC = 'germanupa.de' + _VALID_URL = r'https?://germanupa\.de/mediathek/(?P[\w-]+)' + _TESTS = [{ + 'url': 'https://germanupa.de/mediathek/4-figma-beratung-deine-sprechstunde-fuer-figma-fragen', + 'info_dict': { + 'id': '909179246', + 'title': 'Tutorial: #4 Figma Beratung - Deine Sprechstunde für Figma-Fragen', + 'ext': 'mp4', + 'uploader': 'German UPA', + 'uploader_id': 'germanupa', + 'thumbnail': 'https://i.vimeocdn.com/video/1792564420-7415283ccef8bf8702dab8c6b7515555ceeb7a1c11371ffcc133b8e887dbf70e-d_1280', + 'uploader_url': 'https://vimeo.com/germanupa', + 'duration': 3987, + }, + 'expected_warnings': ['Failed to parse XML: not well-formed'], + 'params': {'skip_download': 'm3u8'}, + }, { + 'note': 'audio, uses GenericIE', + 'url': 'https://germanupa.de/mediathek/live-vom-ux-festival-neuigkeiten-von-figma-jobmarkt-agenturszene-interview-zu-sustainable', + 'info_dict': { + 'id': '1867346676', + 'title': 'Live vom UX Festival: Neuigkeiten von Figma, Jobmarkt, Agenturszene & Interview zu Sustainable UX', + 'ext': 'opus', + 'timestamp': 1720545088, + 'upload_date': '20240709', + 'duration': 3910.557, + 'like_count': int, + 'description': 'md5:db2aed5ff131e177a7b33901e9a8db05', + 'uploader': 'German UPA', + 'repost_count': int, + 'genres': ['Science'], + 'license': 'all-rights-reserved', + 'uploader_url': 'https://soundcloud.com/user-80097677', + 'uploader_id': '471579486', + 'view_count': int, + 'comment_count': int, + 'thumbnail': 'https://i1.sndcdn.com/artworks-oCti2e9GhaZFWBqY-48ybGw-original.jpg', + }, + }, { + 'note': 'Nur für Mitglieder/Just for members', + 'url': 'https://germanupa.de/mediathek/ux-festival-2024-usability-tests-und-ai', + 'info_dict': { + 'id': '986994430', + 'title': 'UX Festival 2024 "Usability Tests und AI" von Lennart Weber', + 'ext': 'mp4', + 'release_date': '20240719', + 'uploader_url': 'https://vimeo.com/germanupa', + 'timestamp': 1721373980, + 'license': 'by-sa', + 'like_count': int, + 'thumbnail': 'https://i.vimeocdn.com/video/1904187064-2a672630c30f9ad787bd390bff3f51d7506a3e8416763ba6dbf465732b165c5c-d_1280', + 'duration': 2146, + 'release_timestamp': 1721373980, + 'uploader': 'German UPA', + 'uploader_id': 'germanupa', + 'upload_date': '20240719', + 'comment_count': int, + }, + 'expected_warnings': ['Failed to parse XML: not well-formed'], + 'skip': 'login required', + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + param_url = traverse_obj( + self._search_regex( + r']+data-src\s*?=\s*?([\'"])(?Phttps://germanupa\.de/media/oembed\?url=(?:(?!\1).)+)\1', + webpage, 'embedded video', default=None, group='url'), + ({parse_qs}, 'url', 0, {url_or_none})) + + if not param_url: + if self._search_regex( + r']+class\s*?=\s*?([\'"])(?:(?!\1).)*login-wrapper(?:(?!\1).)*\1', + webpage, 'login wrapper', default=None): + self.raise_login_required('This video is only available for members') + return self.url_result(url, 'Generic') # Fall back to generic to extract audio + + real_url = param_url.replace('https://vimeo.com/', 'https://player.vimeo.com/video/') + return self.url_result(VimeoIE._smuggle_referrer(real_url, url), VimeoIE, video_id) diff --git a/yt_dlp/extractor/getcourseru.py b/yt_dlp/extractor/getcourseru.py index 53b881011..b7581d77e 100644 --- a/yt_dlp/extractor/getcourseru.py +++ b/yt_dlp/extractor/getcourseru.py @@ -52,7 +52,7 @@ class GetCourseRuIE(InfoExtractor): _BASE_URL_RE = rf'https?://(?:(?!player02\.)[^.]+\.getcourse\.(?:ru|io)|{"|".join(map(re.escape, _DOMAINS))})' _VALID_URL = [ rf'{_BASE_URL_RE}/(?!pl/|teach/)(?P[^?#]+)', - rf'{_BASE_URL_RE}/(:?pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P\d+)', + rf'{_BASE_URL_RE}/(?:pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P\d+)', ] _TESTS = [{ 'url': 'http://academymel.online/3video_1', diff --git a/yt_dlp/extractor/golem.py b/yt_dlp/extractor/golem.py index 90d2fe6c2..964bf6519 100644 --- a/yt_dlp/extractor/golem.py +++ b/yt_dlp/extractor/golem.py @@ -7,7 +7,7 @@ from ..utils import ( class GolemIE(InfoExtractor): - _VALID_URL = r'^https?://video\.golem\.de/.+?/(?P.+?)/' + _VALID_URL = r'https?://video\.golem\.de/.+?/(?P.+?)/' _TEST = { 'url': 'http://video.golem.de/handy/14095/iphone-6-und-6-plus-test.html', 'md5': 'c1a2c0a3c863319651c7c992c5ee29bf', diff --git a/yt_dlp/extractor/goplay.py b/yt_dlp/extractor/goplay.py index dfe5afe63..32300f75c 100644 --- a/yt_dlp/extractor/goplay.py +++ b/yt_dlp/extractor/goplay.py @@ -5,56 +5,63 @@ import hashlib import hmac import json import os +import re +import urllib.parse from .common import InfoExtractor from ..utils import ( ExtractorError, + int_or_none, + js_to_json, + remove_end, traverse_obj, - unescapeHTML, ) class GoPlayIE(InfoExtractor): - _VALID_URL = r'https?://(www\.)?goplay\.be/video/([^/]+/[^/]+/|)(?P[^/#]+)' + _VALID_URL = r'https?://(www\.)?goplay\.be/video/([^/?#]+/[^/?#]+/|)(?P[^/#]+)' _NETRC_MACHINE = 'goplay' _TESTS = [{ - 'url': 'https://www.goplay.be/video/de-container-cup/de-container-cup-s3/de-container-cup-s3-aflevering-2#autoplay', + 'url': 'https://www.goplay.be/video/de-slimste-mens-ter-wereld/de-slimste-mens-ter-wereld-s22/de-slimste-mens-ter-wereld-s22-aflevering-1', 'info_dict': { - 'id': '9c4214b8-e55d-4e4b-a446-f015f6c6f811', + 'id': '2baa4560-87a0-421b-bffc-359914e3c387', 'ext': 'mp4', - 'title': 'S3 - Aflevering 2', - 'series': 'De Container Cup', - 'season': 'Season 3', - 'season_number': 3, - 'episode': 'Episode 2', - 'episode_number': 2, + 'title': 'S22 - Aflevering 1', + 'description': r're:In aflevering 1 nemen Daan Alferink, Tess Elst en Xander De Rycke .{66}', + 'series': 'De Slimste Mens ter Wereld', + 'episode': 'Episode 1', + 'season_number': 22, + 'episode_number': 1, + 'season': 'Season 22', }, + 'params': {'skip_download': True}, 'skip': 'This video is only available for registered users', }, { - 'url': 'https://www.goplay.be/video/a-family-for-thr-holidays-s1-aflevering-1#autoplay', + 'url': 'https://www.goplay.be/video/1917', 'info_dict': { - 'id': '74e3ed07-748c-49e4-85a0-393a93337dbf', + 'id': '40cac41d-8d29-4ef5-aa11-75047b9f0907', 'ext': 'mp4', - 'title': 'A Family for the Holidays', + 'title': '1917', + 'description': r're:Op het hoogtepunt van de Eerste Wereldoorlog krijgen twee jonge .{94}', }, + 'params': {'skip_download': True}, 'skip': 'This video is only available for registered users', }, { 'url': 'https://www.goplay.be/video/de-mol/de-mol-s11/de-mol-s11-aflevering-1#autoplay', 'info_dict': { - 'id': '03eb8f2f-153e-41cb-9805-0d3a29dab656', + 'id': 'ecb79672-92b9-4cd9-a0d7-e2f0250681ee', 'ext': 'mp4', 'title': 'S11 - Aflevering 1', + 'description': r're:Tien kandidaten beginnen aan hun verovering van Amerika en ontmoeten .{102}', 'episode': 'Episode 1', 'series': 'De Mol', 'season_number': 11, 'episode_number': 1, 'season': 'Season 11', }, - 'params': { - 'skip_download': True, - }, + 'params': {'skip_download': True}, 'skip': 'This video is only available for registered users', }] @@ -69,27 +76,42 @@ class GoPlayIE(InfoExtractor): if not self._id_token: raise self.raise_login_required(method='password') - def _real_extract(self, url): - url, display_id = self._match_valid_url(url).group(0, 'display_id') - webpage = self._download_webpage(url, display_id) - video_data_json = self._html_search_regex(r']*>\s*self\.__next_f\.push\(\s*(\[.+?\])\s*\);?\s*', webpage), + (..., {js_to_json}, {json.loads}, ..., {self._find_json}, ...)) + meta = traverse_obj(nextjs_data, ( + ..., lambda _, v: v['meta']['path'] == urllib.parse.urlparse(url).path, 'meta', any)) + + video_id = meta['uuid'] + info_dict = traverse_obj(meta, { + 'title': ('title', {str}), + 'description': ('description', {str.strip}), + }) + + if traverse_obj(meta, ('program', 'subtype')) != 'movie': + for season_data in traverse_obj(nextjs_data, (..., 'children', ..., 'playlists', ...)): + episode_data = traverse_obj( + season_data, ('videos', lambda _, v: v['videoId'] == video_id, any)) + if not episode_data: + continue + + episode_title = traverse_obj( + episode_data, 'contextualTitle', 'episodeTitle', expected_type=str) + info_dict.update({ + 'title': episode_title or info_dict.get('title'), + 'series': remove_end(info_dict.get('title'), f' - {episode_title}'), + 'season_number': traverse_obj(season_data, ('season', {int_or_none})), + 'episode_number': traverse_obj(episode_data, ('episodeNumber', {int_or_none})), + }) + break api = self._download_json( f'https://api.goplay.be/web/v1/videos/long-form/{video_id}', diff --git a/yt_dlp/extractor/hrfensehen.py b/yt_dlp/extractor/hrfensehen.py index 17673d5b8..b5a7b14a5 100644 --- a/yt_dlp/extractor/hrfensehen.py +++ b/yt_dlp/extractor/hrfensehen.py @@ -13,7 +13,7 @@ from ..utils import ( class HRFernsehenIE(InfoExtractor): IE_NAME = 'hrfernsehen' - _VALID_URL = r'^https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P[0-9]{6})\.html' + _VALID_URL = r'https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P[0-9]{6})\.html' _TESTS = [{ 'url': 'https://www.hessenschau.de/tv-sendung/hessenschau-vom-26082020,video-130546.html', 'md5': '5c4e0ba94677c516a2f65a84110fc536', diff --git a/yt_dlp/extractor/huya.py b/yt_dlp/extractor/huya.py index 5663a78a3..f79e032e4 100644 --- a/yt_dlp/extractor/huya.py +++ b/yt_dlp/extractor/huya.py @@ -8,15 +8,19 @@ from .common import InfoExtractor from ..utils import ( ExtractorError, int_or_none, + parse_duration, str_or_none, try_get, unescapeHTML, + unified_strdate, update_url_query, + url_or_none, ) +from ..utils.traversal import traverse_obj class HuyaLiveIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.|m\.)?huya\.com/(?P[^/#?&]+)(?:\D|$)' + _VALID_URL = r'https?://(?:www\.|m\.)?huya\.com/(?!(?:video/play/))(?P[^/#?&]+)(?:\D|$)' IE_NAME = 'huya:live' IE_DESC = 'huya.com' TESTS = [{ @@ -24,6 +28,7 @@ class HuyaLiveIE(InfoExtractor): 'info_dict': { 'id': '572329', 'title': str, + 'ext': 'flv', 'description': str, 'is_live': True, 'view_count': int, @@ -131,3 +136,76 @@ class HuyaLiveIE(InfoExtractor): fm = base64.b64decode(params['fm']).decode().split('_', 1)[0] ss = hashlib.md5('|'.join([params['seqid'], params['ctype'], params['t']])) return fm, ss + + +class HuyaVideoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?huya\.com/video/play/(?P\d+)\.html' + IE_NAME = 'huya:video' + IE_DESC = '虎牙视频' + + _TESTS = [{ + 'url': 'https://www.huya.com/video/play/1002412640.html', + 'info_dict': { + 'id': '1002412640', + 'ext': 'mp4', + 'title': '8月3日', + 'thumbnail': r're:https?://.*\.jpg', + 'duration': 14, + 'uploader': '虎牙-ATS欧卡车队青木', + 'uploader_id': '1564376151', + 'upload_date': '20240803', + 'view_count': int, + 'comment_count': int, + 'like_count': int, + }, + }, + { + 'url': 'https://www.huya.com/video/play/556054543.html', + 'info_dict': { + 'id': '556054543', + 'ext': 'mp4', + 'title': '我不挑事 也不怕事', + 'thumbnail': r're:https?://.*\.jpg', + 'duration': 1864, + 'uploader': '卡尔', + 'uploader_id': '367138632', + 'upload_date': '20210811', + 'view_count': int, + 'comment_count': int, + 'like_count': int, + }, + }] + + def _real_extract(self, url: str): + video_id = self._match_id(url) + video_data = self._download_json( + 'https://liveapi.huya.com/moment/getMomentContent', video_id, + query={'videoId': video_id})['data']['moment']['videoInfo'] + + formats = [] + for definition in traverse_obj(video_data, ('definitions', lambda _, v: url_or_none(v['url']))): + formats.append({ + 'url': definition['url'], + **traverse_obj(definition, { + 'format_id': ('defName', {str}), + 'width': ('width', {int_or_none}), + 'height': ('height', {int_or_none}), + 'filesize': ('size', {int_or_none}), + }), + }) + + return { + 'id': video_id, + 'formats': formats, + **traverse_obj(video_data, { + 'title': ('videoTitle', {str}), + 'thumbnail': ('videoCover', {url_or_none}), + 'duration': ('videoDuration', {parse_duration}), + 'uploader': ('nickName', {str}), + 'uploader_id': ('uid', {str_or_none}), + 'upload_date': ('videoUploadTime', {unified_strdate}), + 'view_count': ('videoPlayNum', {int_or_none}), + 'comment_count': ('videoCommentNum', {int_or_none}), + 'like_count': ('favorCount', {int_or_none}), + }), + } diff --git a/yt_dlp/extractor/ilpost.py b/yt_dlp/extractor/ilpost.py index 2868f0c62..da203cf5f 100644 --- a/yt_dlp/extractor/ilpost.py +++ b/yt_dlp/extractor/ilpost.py @@ -1,4 +1,3 @@ -import functools from .common import InfoExtractor from ..utils import ( @@ -63,7 +62,7 @@ class IlPostIE(InfoExtractor): 'url': ('podcast_raw_url', {url_or_none}), 'thumbnail': ('image', {url_or_none}), 'timestamp': ('timestamp', {int_or_none}), - 'duration': ('milliseconds', {functools.partial(float_or_none, scale=1000)}), + 'duration': ('milliseconds', {float_or_none(scale=1000)}), 'availability': ('free', {lambda v: 'public' if v else 'subscriber_only'}), }), } diff --git a/yt_dlp/extractor/imgur.py b/yt_dlp/extractor/imgur.py index f0c3419d4..e2644e6a4 100644 --- a/yt_dlp/extractor/imgur.py +++ b/yt_dlp/extractor/imgur.py @@ -37,7 +37,7 @@ class ImgurBaseIE(InfoExtractor): class ImgurIE(ImgurBaseIE): - _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|t|topic|r)/)(?P[a-zA-Z0-9]+)' + _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|t|topic|r)/)(?:[^/?#]+-)?(?P[a-zA-Z0-9]+)' _TESTS = [{ 'url': 'https://imgur.com/A61SaA1', @@ -54,6 +54,22 @@ class ImgurIE(ImgurBaseIE): 'like_count': int, 'thumbnail': 'https://i.imgur.com/A61SaA1h.jpg', }, + }, { + # Test with URL slug + 'url': 'https://imgur.com/mrw-gifv-is-up-running-without-any-bugs-A61SaA1', + 'info_dict': { + 'id': 'A61SaA1', + 'ext': 'mp4', + 'title': 'MRW gifv is up and running without any bugs', + 'timestamp': 1416446068, + 'upload_date': '20141120', + 'dislike_count': int, + 'comment_count': int, + 'release_timestamp': 1416446068, + 'release_date': '20141120', + 'like_count': int, + 'thumbnail': 'https://i.imgur.com/A61SaA1h.jpg', + }, }, { 'url': 'https://i.imgur.com/A61SaA1.gifv', 'only_matching': True, @@ -92,6 +108,7 @@ class ImgurIE(ImgurBaseIE): 'comment_count': int, 'release_timestamp': 1710491255, 'release_date': '20240315', + 'thumbnail': 'https://i.imgur.com/zV03bd5h.jpg', }, }] @@ -208,7 +225,10 @@ class ImgurIE(ImgurBaseIE): }), get_all=False), 'id': video_id, 'formats': formats, - 'thumbnail': url_or_none(search('thumbnailUrl')), + 'thumbnails': [{ + 'url': thumbnail_url, + 'http_headers': {'Accept': '*/*'}, + }] if (thumbnail_url := search(['thumbnailUrl', 'twitter:image', 'og:image'])) else None, 'http_headers': {'Accept': '*/*'}, } @@ -252,17 +272,9 @@ class ImgurGalleryBaseIE(ImgurBaseIE): class ImgurGalleryIE(ImgurGalleryBaseIE): IE_NAME = 'imgur:gallery' - _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/?#]+)/(?P[a-zA-Z0-9]+)' + _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/?#]+)/(?:[^/?#]+-)?(?P[a-zA-Z0-9]+)' _TESTS = [{ - 'url': 'http://imgur.com/gallery/Q95ko', - 'info_dict': { - 'id': 'Q95ko', - 'title': 'Adding faces make every GIF better', - }, - 'playlist_count': 25, - 'skip': 'Zoinks! You\'ve taken a wrong turn.', - }, { # TODO: static images - replace with animated/video gallery 'url': 'http://imgur.com/topic/Aww/ll5Vk', 'only_matching': True, @@ -280,7 +292,27 @@ class ImgurGalleryIE(ImgurGalleryBaseIE): 'release_timestamp': 1358554297, 'thumbnail': 'https://i.imgur.com/YcAQlkxh.jpg', 'release_date': '20130119', - 'uploader_url': 'https://i.imgur.com/u3R4I2S_d.png?maxwidth=290&fidelity=grand', + 'uploader_url': 'https://i.imgur.com/N5Flb2v_d.png?maxwidth=290&fidelity=grand', + 'comment_count': int, + 'dislike_count': int, + 'like_count': int, + }, + }, { + # Test with slug + 'url': 'https://imgur.com/gallery/classic-steve-carell-gif-cracks-me-up-everytime-repost-downvotes-YcAQlkx', + 'add_ies': ['Imgur'], + 'info_dict': { + 'id': 'YcAQlkx', + 'ext': 'mp4', + 'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....', + 'timestamp': 1358554297, + 'upload_date': '20130119', + 'uploader_id': '1648642', + 'uploader': 'wittyusernamehere', + 'release_timestamp': 1358554297, + 'release_date': '20130119', + 'thumbnail': 'https://i.imgur.com/YcAQlkxh.jpg', + 'uploader_url': 'https://i.imgur.com/N5Flb2v_d.png?maxwidth=290&fidelity=grand', 'comment_count': int, 'dislike_count': int, 'like_count': int, @@ -317,6 +349,13 @@ class ImgurGalleryIE(ImgurGalleryBaseIE): 'title': 'Penguins !', }, 'playlist_count': 3, + }, { + 'url': 'https://imgur.com/t/unmuted/penguins-penguins-6lAn9VQ', + 'info_dict': { + 'id': '6lAn9VQ', + 'title': 'Penguins !', + }, + 'playlist_count': 3, }, { 'url': 'https://imgur.com/t/unmuted/kx2uD3C', 'add_ies': ['Imgur'], @@ -357,7 +396,7 @@ class ImgurGalleryIE(ImgurGalleryBaseIE): class ImgurAlbumIE(ImgurGalleryBaseIE): IE_NAME = 'imgur:album' - _VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?P[a-zA-Z0-9]+)' + _VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?:[^/?#]+-)?(?P[a-zA-Z0-9]+)' _GALLERY = False _TESTS = [{ # TODO: only static images - replace with animated/video gallery @@ -372,6 +411,14 @@ class ImgurAlbumIE(ImgurGalleryBaseIE): 'title': 'enen-no-shouboutai', }, 'playlist_count': 2, + }, { + # Test with URL slug + 'url': 'https://imgur.com/a/enen-no-shouboutai-iX265HX', + 'info_dict': { + 'id': 'iX265HX', + 'title': 'enen-no-shouboutai', + }, + 'playlist_count': 2, }, { 'url': 'https://imgur.com/a/8pih2Ed', 'info_dict': { diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py index 754f710ae..55086d0b2 100644 --- a/yt_dlp/extractor/instagram.py +++ b/yt_dlp/extractor/instagram.py @@ -48,7 +48,6 @@ class InstagramBaseIE(InfoExtractor): 'X-IG-WWW-Claim': '0', 'Origin': 'https://www.instagram.com', 'Accept': '*/*', - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36', } def _perform_login(self, username, password): @@ -255,7 +254,7 @@ class InstagramIOSIE(InfoExtractor): class InstagramIE(InstagramBaseIE): - _VALID_URL = r'(?Phttps?://(?:www\.)?instagram\.com(?:/[^/]+)?/(?:p|tv|reels?(?!/audio/))/(?P[^/?#&]+))' + _VALID_URL = r'(?Phttps?://(?:www\.)?instagram\.com(?:/(?!share/)[^/?#]+)?/(?:p|tv|reels?(?!/audio/))/(?P[^/?#&]+))' _EMBED_REGEX = [r']+src=(["\'])(?P(?:https?:)?//(?:www\.)?instagram\.com/p/[^/]+/embed.*?)\1'] _TESTS = [{ 'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc', @@ -435,10 +434,10 @@ class InstagramIE(InstagramBaseIE): 'X-Requested-With': 'XMLHttpRequest', 'Referer': url, }, query={ - 'query_hash': '9f8827793ef34641b2fb195d4d41151c', + 'doc_id': '8845758582119845', 'variables': json.dumps(variables, separators=(',', ':')), }) - media.update(traverse_obj(general_info, ('data', 'shortcode_media')) or {}) + media.update(traverse_obj(general_info, ('data', 'xdt_shortcode_media')) or {}) if not general_info: self.report_warning('General metadata extraction failed (some metadata might be missing).', video_id) diff --git a/yt_dlp/extractor/iprima.py b/yt_dlp/extractor/iprima.py index ab26dc5ef..9b91a454b 100644 --- a/yt_dlp/extractor/iprima.py +++ b/yt_dlp/extractor/iprima.py @@ -25,9 +25,29 @@ class IPrimaIE(InfoExtractor): 'id': 'p51388', 'ext': 'mp4', 'title': 'Partička (92)', - 'description': 'md5:859d53beae4609e6dd7796413f1b6cac', - 'upload_date': '20201103', - 'timestamp': 1604437480, + 'description': 'md5:57943f6a50d6188288c3a579d2fd5f01', + 'episode': 'Partička (92)', + 'season': 'Partička', + 'series': 'Prima Partička', + 'episode_number': 92, + 'thumbnail': 'https://d31b9s05ygj54s.cloudfront.net/prima-plus/image/video-ef6cf9de-c980-4443-92e4-17fe8bccd45c-16x9.jpeg', + }, + 'params': { + 'skip_download': True, # m3u8 download + }, + }, { + 'url': 'https://zoom.iprima.cz/porady/krasy-kanarskych-ostrovu/tenerife-v-risi-ohne', + 'info_dict': { + 'id': 'p1412199', + 'ext': 'mp4', + 'episode_number': 3, + 'episode': 'Tenerife: V říši ohně', + 'description': 'md5:4b4a05c574b5eaef130e68d4811c3f2c', + 'duration': 3111.0, + 'thumbnail': 'https://d31b9s05ygj54s.cloudfront.net/prima-plus/image/video-f66dd7fb-c1a0-47d1-b3bc-7db328d566c5-16x9-1711636518.jpg/t_16x9_medium_1366_768', + 'title': 'Tenerife: V říši ohně', + 'timestamp': 1711825800, + 'upload_date': '20240330', }, 'params': { 'skip_download': True, # m3u8 download @@ -131,6 +151,7 @@ class IPrimaIE(InfoExtractor): video_id = self._search_regex(( r'productId\s*=\s*([\'"])(?Pp\d+)\1', r'pproduct_id\s*=\s*([\'"])(?Pp\d+)\1', + r'let\s+videos\s*=\s*([\'"])(?Pp\d+)\1', ), webpage, 'real id', group='id', default=None) if not video_id: @@ -176,7 +197,7 @@ class IPrimaIE(InfoExtractor): final_result = self._search_json_ld(webpage, video_id, default={}) final_result.update({ 'id': video_id, - 'title': title, + 'title': final_result.get('title') or title, 'thumbnail': self._html_search_meta( ['thumbnail', 'og:image', 'twitter:image'], webpage, 'thumbnail', default=None), diff --git a/yt_dlp/extractor/japandiet.py b/yt_dlp/extractor/japandiet.py index 2ef091aff..994da22ae 100644 --- a/yt_dlp/extractor/japandiet.py +++ b/yt_dlp/extractor/japandiet.py @@ -194,11 +194,14 @@ class ShugiinItvVodIE(ShugiinItvBaseIE): class SangiinInstructionIE(InfoExtractor): - _VALID_URL = r'^https?://www\.webtv\.sangiin\.go\.jp/webtv/index\.php' + _VALID_URL = r'https?://www\.webtv\.sangiin\.go\.jp/webtv/index\.php' IE_DESC = False # this shouldn't be listed as a supported site def _real_extract(self, url): - raise ExtractorError('Copy the link from the botton below the video description or player, and use the link to download. If there are no button in the frame, get the URL of the frame showing the video.', expected=True) + raise ExtractorError( + 'Copy the link from the button below the video description/player ' + 'and use that link to download. If there is no button in the frame, ' + 'get the URL of the frame showing the video.', expected=True) class SangiinIE(InfoExtractor): diff --git a/yt_dlp/extractor/jiocinema.py b/yt_dlp/extractor/jiocinema.py index 30d98ba79..94c85064e 100644 --- a/yt_dlp/extractor/jiocinema.py +++ b/yt_dlp/extractor/jiocinema.py @@ -326,11 +326,11 @@ class JioCinemaIE(JioCinemaBaseIE): # fallback metadata 'title': ('name', {str}), 'description': ('fullSynopsis', {str}), - 'series': ('show', 'name', {str}, {lambda x: x or None}), + 'series': ('show', 'name', {str}, filter), 'season': ('tournamentName', {str}, {lambda x: x if x != 'Season 0' else None}), - 'season_number': ('episode', 'season', {int_or_none}, {lambda x: x or None}), + 'season_number': ('episode', 'season', {int_or_none}, filter), 'episode': ('fullTitle', {str}), - 'episode_number': ('episode', 'episodeNo', {int_or_none}, {lambda x: x or None}), + 'episode_number': ('episode', 'episodeNo', {int_or_none}, filter), 'age_limit': ('ageNemonic', {parse_age_limit}), 'duration': ('totalDuration', {float_or_none}), 'thumbnail': ('images', {url_or_none}), @@ -338,10 +338,10 @@ class JioCinemaIE(JioCinemaBaseIE): **traverse_obj(metadata, ('result', 0, { 'title': ('fullTitle', {str}), 'description': ('fullSynopsis', {str}), - 'series': ('showName', {str}, {lambda x: x or None}), - 'season': ('seasonName', {str}, {lambda x: x or None}), + 'series': ('showName', {str}, filter), + 'season': ('seasonName', {str}, filter), 'season_number': ('season', {int_or_none}), - 'season_id': ('seasonId', {str}, {lambda x: x or None}), + 'season_id': ('seasonId', {str}, filter), 'episode': ('fullTitle', {str}), 'episode_number': ('episode', {int_or_none}), 'timestamp': ('uploadTime', {int_or_none}), diff --git a/yt_dlp/extractor/kaltura.py b/yt_dlp/extractor/kaltura.py index e5737b1e9..6d51e32f6 100644 --- a/yt_dlp/extractor/kaltura.py +++ b/yt_dlp/extractor/kaltura.py @@ -22,7 +22,7 @@ class KalturaIE(InfoExtractor): (?: kaltura:(?P\w+):(?P\w+)(?::(?P\w+))?| https?:// - (:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/ + (?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/ (?: (?: # flash player diff --git a/yt_dlp/extractor/kenh14.py b/yt_dlp/extractor/kenh14.py new file mode 100644 index 000000000..3c46020e8 --- /dev/null +++ b/yt_dlp/extractor/kenh14.py @@ -0,0 +1,160 @@ +from .common import InfoExtractor +from ..utils import ( + clean_html, + extract_attributes, + get_element_by_class, + get_element_html_by_attribute, + get_elements_html_by_class, + int_or_none, + parse_duration, + parse_iso8601, + remove_start, + strip_or_none, + unescapeHTML, + update_url, + url_or_none, +) +from ..utils.traversal import traverse_obj + + +class Kenh14VideoIE(InfoExtractor): + _VALID_URL = r'https?://video\.kenh14\.vn/(?:video/)?[\w-]+-(?P[0-9]+)\.chn' + _TESTS = [{ + 'url': 'https://video.kenh14.vn/video/mo-hop-iphone-14-pro-max-nguon-unbox-therapy-316173.chn', + 'md5': '1ed67f9c3a1e74acf15db69590cf6210', + 'info_dict': { + 'id': '316173', + 'ext': 'mp4', + 'title': 'Video mở hộp iPhone 14 Pro Max (Nguồn: Unbox Therapy)', + 'description': 'Video mở hộp iPhone 14 Pro MaxVideo mở hộp iPhone 14 Pro Max (Nguồn: Unbox Therapy)', + 'thumbnail': r're:^https?://videothumbs\.mediacdn\.vn/.*\.jpg$', + 'tags': [], + 'uploader': 'Unbox Therapy', + 'upload_date': '20220517', + 'view_count': int, + 'duration': 722.86, + 'timestamp': 1652764468, + }, + }, { + 'url': 'https://video.kenh14.vn/video-316174.chn', + 'md5': '2b41877d2afaf4a3f487ceda8e5c7cbd', + 'info_dict': { + 'id': '316174', + 'ext': 'mp4', + 'title': 'Khoảnh khắc VĐV nằm gục khóc sau chiến thắng: 7 năm trời Việt Nam mới có HCV kiếm chém nữ, chỉ có 8 tháng để khổ luyện trước khi lên sàn đấu', + 'description': 'md5:de86aa22e143e2b277bce8ec9c6f17dc', + 'thumbnail': r're:^https?://videothumbs\.mediacdn\.vn/.*\.jpg$', + 'tags': [], + 'upload_date': '20220517', + 'view_count': int, + 'duration': 70.04, + 'timestamp': 1652766021, + }, + }, { + 'url': 'https://video.kenh14.vn/0-344740.chn', + 'md5': 'b843495d5e728142c8870c09b46df2a9', + 'info_dict': { + 'id': '344740', + 'ext': 'mov', + 'title': 'Kỳ Duyên đầy căng thẳng trong buổi ra quân đi Miss Universe, nghi thức tuyên thuệ lần đầu xuất hiện gây nhiều tranh cãi', + 'description': 'md5:2a2dbb4a7397169fb21ee68f09160497', + 'thumbnail': r're:^https?://kenh14cdn\.com/.*\.jpg$', + 'tags': ['kỳ duyên', 'Kỳ Duyên tuyên thuệ', 'miss universe'], + 'uploader': 'Quang Vũ', + 'upload_date': '20241024', + 'view_count': int, + 'duration': 198.88, + 'timestamp': 1729741590, + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + attrs = extract_attributes(get_element_html_by_attribute('type', 'VideoStream', webpage) or '') + direct_url = attrs['data-vid'] + + metadata = self._download_json( + 'https://api.kinghub.vn/video/api/v1/detailVideoByGet?FileName={}'.format( + remove_start(direct_url, 'kenh14cdn.com/')), video_id, fatal=False) + + formats = [{'url': f'https://{direct_url}', 'format_id': 'http', 'quality': 1}] + subtitles = {} + video_data = self._download_json( + f'https://{direct_url}.json', video_id, note='Downloading video data', fatal=False) + if hls_url := traverse_obj(video_data, ('hls', {url_or_none})): + fmts, subs = self._extract_m3u8_formats_and_subtitles( + hls_url, video_id, m3u8_id='hls', fatal=False) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + if dash_url := traverse_obj(video_data, ('mpd', {url_or_none})): + fmts, subs = self._extract_mpd_formats_and_subtitles( + dash_url, video_id, mpd_id='dash', fatal=False) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + + return { + **traverse_obj(metadata, { + 'duration': ('duration', {parse_duration}), + 'uploader': ('author', {strip_or_none}), + 'timestamp': ('uploadtime', {parse_iso8601(delimiter=' ')}), + 'view_count': ('views', {int_or_none}), + }), + 'id': video_id, + 'title': ( + traverse_obj(metadata, ('title', {strip_or_none})) + or clean_html(self._og_search_title(webpage)) + or clean_html(get_element_by_class('vdbw-title', webpage))), + 'formats': formats, + 'subtitles': subtitles, + 'description': ( + clean_html(self._og_search_description(webpage)) + or clean_html(get_element_by_class('vdbw-sapo', webpage))), + 'thumbnail': (self._og_search_thumbnail(webpage) or attrs.get('data-thumb')), + 'tags': traverse_obj(self._html_search_meta('keywords', webpage), ( + {lambda x: x.split(';')}, ..., filter)), + } + + +class Kenh14PlaylistIE(InfoExtractor): + _VALID_URL = r'https?://video\.kenh14\.vn/playlist/[\w-]+-(?P[0-9]+)\.chn' + _TESTS = [{ + 'url': 'https://video.kenh14.vn/playlist/tran-tinh-naked-love-mua-2-71.chn', + 'info_dict': { + 'id': '71', + 'title': 'Trần Tình (Naked love) mùa 2', + 'description': 'md5:e9522339304956dea931722dd72eddb2', + 'thumbnail': r're:^https?://kenh14cdn\.com/.*\.png$', + }, + 'playlist_count': 9, + }, { + 'url': 'https://video.kenh14.vn/playlist/0-72.chn', + 'info_dict': { + 'id': '72', + 'title': 'Lau Lại Đầu Từ', + 'description': 'Cùng xem xưa và nay có gì khác biệt nhé!', + 'thumbnail': r're:^https?://kenh14cdn\.com/.*\.png$', + }, + 'playlist_count': 6, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) + + category_detail = get_element_by_class('category-detail', webpage) or '' + embed_info = traverse_obj( + self._yield_json_ld(webpage, playlist_id), + (lambda _, v: v['name'] and v['alternateName'], any)) or {} + + return self.playlist_from_matches( + get_elements_html_by_class('video-item', webpage), playlist_id, + (clean_html(get_element_by_class('name', category_detail)) or unescapeHTML(embed_info.get('name'))), + getter=lambda x: 'https://video.kenh14.vn/video/video-{}.chn'.format(extract_attributes(x)['data-id']), + ie=Kenh14VideoIE, playlist_description=( + clean_html(get_element_by_class('description', category_detail)) + or unescapeHTML(embed_info.get('alternateName'))), + thumbnail=traverse_obj( + self._og_search_thumbnail(webpage), + ({url_or_none}, {update_url(query=None)}))) diff --git a/yt_dlp/extractor/khanacademy.py b/yt_dlp/extractor/khanacademy.py index 3f03f9e4c..42eef3c92 100644 --- a/yt_dlp/extractor/khanacademy.py +++ b/yt_dlp/extractor/khanacademy.py @@ -15,7 +15,7 @@ from ..utils import ( class KhanAcademyBaseIE(InfoExtractor): _VALID_URL_TEMPL = r'https?://(?:www\.)?khanacademy\.org/(?P(?:[^/]+/){%s}%s[^?#/&]+)' - _PUBLISHED_CONTENT_VERSION = '171419ab20465d931b356f22d20527f13969bb70' + _PUBLISHED_CONTENT_VERSION = 'dc34750f0572c80f5effe7134082fe351143c1e4' def _parse_video(self, video): return { @@ -39,7 +39,7 @@ class KhanAcademyBaseIE(InfoExtractor): query={ 'fastly_cacheable': 'persist_until_publish', 'pcv': self._PUBLISHED_CONTENT_VERSION, - 'hash': '1242644265', + 'hash': '3712657851', 'variables': json.dumps({ 'path': display_id, 'countryCode': 'US', diff --git a/yt_dlp/extractor/kick.py b/yt_dlp/extractor/kick.py index 1c1b2a177..1f001d421 100644 --- a/yt_dlp/extractor/kick.py +++ b/yt_dlp/extractor/kick.py @@ -1,4 +1,3 @@ -import functools from .common import InfoExtractor from ..networking import HEADRequest @@ -67,7 +66,7 @@ class KickIE(KickBaseIE): @classmethod def suitable(cls, url): - return False if KickClipIE.suitable(url) else super().suitable(url) + return False if (KickVODIE.suitable(url) or KickClipIE.suitable(url)) else super().suitable(url) def _real_extract(self, url): channel = self._match_id(url) @@ -98,25 +97,25 @@ class KickIE(KickBaseIE): class KickVODIE(KickBaseIE): IE_NAME = 'kick:vod' - _VALID_URL = r'https?://(?:www\.)?kick\.com/video/(?P[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})' + _VALID_URL = r'https?://(?:www\.)?kick\.com/[\w-]+/videos/(?P[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})' _TESTS = [{ - 'url': 'https://kick.com/video/e74614f4-5270-4319-90ad-32179f19a45c', + 'url': 'https://kick.com/xqc/videos/8dd97a8d-e17f-48fb-8bc3-565f88dbc9ea', 'md5': '3870f94153e40e7121a6e46c068b70cb', 'info_dict': { - 'id': 'e74614f4-5270-4319-90ad-32179f19a45c', + 'id': '8dd97a8d-e17f-48fb-8bc3-565f88dbc9ea', 'ext': 'mp4', - 'title': r're:❎ MEGA DRAMA ❎ LIVE ❎ CLICK ❎ ULTIMATE SKILLS .+', + 'title': '18+ #ad 🛑LIVE🛑CLICK🛑DRAMA🛑NEWS🛑STUFF🛑REACT🛑GET IN HHERE🛑BOP BOP🛑WEEEE WOOOO🛑', 'description': 'THE BEST AT ABSOLUTELY EVERYTHING. THE JUICER. LEADER OF THE JUICERS.', 'channel': 'xqc', 'channel_id': '668', 'uploader': 'xQc', 'uploader_id': '676', - 'upload_date': '20240724', - 'timestamp': 1721796562, - 'duration': 18566.0, + 'upload_date': '20240909', + 'timestamp': 1725919141, + 'duration': 10155.0, 'thumbnail': r're:^https?://.*\.jpg', 'view_count': int, - 'categories': ['VALORANT'], + 'categories': ['Just Chatting'], 'age_limit': 0, }, 'params': {'skip_download': 'm3u8'}, @@ -137,7 +136,7 @@ class KickVODIE(KickBaseIE): 'uploader': ('livestream', 'channel', 'user', 'username', {str}), 'uploader_id': ('livestream', 'channel', 'user_id', {int}, {str_or_none}), 'timestamp': ('created_at', {parse_iso8601}), - 'duration': ('livestream', 'duration', {functools.partial(float_or_none, scale=1000)}), + 'duration': ('livestream', 'duration', {float_or_none(scale=1000)}), 'thumbnail': ('livestream', 'thumbnail', {url_or_none}), 'categories': ('livestream', 'categories', ..., 'name', {str}), 'view_count': ('views', {int_or_none}), @@ -148,7 +147,7 @@ class KickVODIE(KickBaseIE): class KickClipIE(KickBaseIE): IE_NAME = 'kick:clips' - _VALID_URL = r'https?://(?:www\.)?kick\.com/[\w-]+/?\?(?:[^#]+&)?clip=(?Pclip_[\w-]+)' + _VALID_URL = r'https?://(?:www\.)?kick\.com/[\w-]+(?:/clips/|/?\?(?:[^#]+&)?clip=)(?Pclip_[\w-]+)' _TESTS = [{ 'url': 'https://kick.com/mxddy?clip=clip_01GYXVB5Y8PWAPWCWMSBCFB05X', 'info_dict': { @@ -189,6 +188,26 @@ class KickClipIE(KickBaseIE): 'age_limit': 0, }, 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://kick.com/spreen/clips/clip_01J8RGZRKHXHXXKJEHGRM932A5', + 'info_dict': { + 'id': 'clip_01J8RGZRKHXHXXKJEHGRM932A5', + 'ext': 'mp4', + 'title': 'KLJASLDJKLJKASDLJKDAS', + 'channel': 'spreen', + 'channel_id': '5312671', + 'uploader': 'AnormalBarraBaja', + 'uploader_id': '26518262', + 'duration': 43.0, + 'upload_date': '20240927', + 'timestamp': 1727399987, + 'thumbnail': 'https://clips.kick.com/clips/f2/clip_01J8RGZRKHXHXXKJEHGRM932A5/thumbnail.webp', + 'view_count': int, + 'like_count': int, + 'categories': ['Minecraft'], + 'age_limit': 0, + }, + 'params': {'skip_download': 'm3u8'}, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/kika.py b/yt_dlp/extractor/kika.py new file mode 100644 index 000000000..69f4a3ce0 --- /dev/null +++ b/yt_dlp/extractor/kika.py @@ -0,0 +1,126 @@ +from .common import InfoExtractor +from ..utils import ( + determine_ext, + int_or_none, + parse_duration, + parse_iso8601, + url_or_none, +) +from ..utils.traversal import traverse_obj + + +class KikaIE(InfoExtractor): + IE_DESC = 'KiKA.de' + _VALID_URL = r'https?://(?:www\.)?kika\.de/[\w/-]+/videos/(?P[a-z-]+\d+)' + _GEO_COUNTRIES = ['DE'] + + _TESTS = [{ + 'url': 'https://www.kika.de/logo/videos/logo-vom-samstag-einunddreissig-august-zweitausendvierundzwanzig-100', + 'md5': 'fbfc8da483719ef06f396e5e5b938c69', + 'info_dict': { + 'id': 'logo-vom-samstag-einunddreissig-august-zweitausendvierundzwanzig-100', + 'ext': 'mp4', + 'upload_date': '20240831', + 'timestamp': 1725126600, + 'season_number': 2024, + 'modified_date': '20240831', + 'episode': 'Episode 476', + 'episode_number': 476, + 'season': 'Season 2024', + 'duration': 634, + 'title': 'logo! vom Samstag, 31. August 2024', + 'modified_timestamp': 1725129983, + }, + }, { + 'url': 'https://www.kika.de/kaltstart/videos/video92498', + 'md5': '710ece827e5055094afeb474beacb7aa', + 'info_dict': { + 'id': 'video92498', + 'ext': 'mp4', + 'title': '7. Wo ist Leo?', + 'description': 'md5:fb48396a5b75068bcac1df74f1524920', + 'duration': 436, + 'timestamp': 1702926876, + 'upload_date': '20231218', + 'episode_number': 7, + 'modified_date': '20240319', + 'modified_timestamp': 1710880610, + 'episode': 'Episode 7', + 'season_number': 1, + 'season': 'Season 1', + }, + }, { + 'url': 'https://www.kika.de/bernd-das-brot/astrobrot/videos/video90088', + 'md5': 'ffd1b700d7de0a6616a1d08544c77294', + 'info_dict': { + 'id': 'video90088', + 'ext': 'mp4', + 'upload_date': '20221102', + 'timestamp': 1667390580, + 'duration': 197, + 'modified_timestamp': 1711093771, + 'episode_number': 8, + 'title': 'Es ist nicht leicht, ein Astrobrot zu sein', + 'modified_date': '20240322', + 'description': 'md5:d3641deaf1b5515a160788b2be4159a9', + 'season_number': 1, + 'episode': 'Episode 8', + 'season': 'Season 1', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + doc = self._download_json(f'https://www.kika.de/_next-api/proxy/v1/videos/{video_id}', video_id) + video_assets = self._download_json(doc['assets']['url'], video_id) + + subtitles = {} + if ttml_resource := url_or_none(video_assets.get('videoSubtitle')): + subtitles['de'] = [{ + 'url': ttml_resource, + 'ext': 'ttml', + }] + if webvtt_resource := url_or_none(video_assets.get('webvttUrl')): + subtitles.setdefault('de', []).append({ + 'url': webvtt_resource, + 'ext': 'vtt', + }) + + return { + 'id': video_id, + 'formats': list(self._extract_formats(video_assets, video_id)), + 'subtitles': subtitles, + **traverse_obj(doc, { + 'title': ('title', {str}), + 'description': ('description', {str}), + 'timestamp': ('date', {parse_iso8601}), + 'modified_timestamp': ('modificationDate', {parse_iso8601}), + 'duration': (( + ('durationInSeconds', {int_or_none}), + ('duration', {parse_duration})), any), + 'episode_number': ('episodeNumber', {int_or_none}), + 'season_number': ('season', {int_or_none}), + }), + } + + def _extract_formats(self, media_info, video_id): + for media in traverse_obj(media_info, ('assets', lambda _, v: url_or_none(v['url']))): + stream_url = media['url'] + ext = determine_ext(stream_url) + if ext == 'm3u8': + yield from self._extract_m3u8_formats( + stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False) + else: + yield { + 'url': stream_url, + 'format_id': ext, + **traverse_obj(media, { + 'width': ('frameWidth', {int_or_none}), + 'height': ('frameHeight', {int_or_none}), + # NB: filesize is 0 if unknown, bitrate is -1 if unknown + 'filesize': ('fileSize', {int_or_none}, filter), + 'abr': ('bitrateAudio', {int_or_none}, {lambda x: None if x == -1 else x}), + 'vbr': ('bitrateVideo', {int_or_none}, {lambda x: None if x == -1 else x}), + }), + } diff --git a/yt_dlp/extractor/laracasts.py b/yt_dlp/extractor/laracasts.py index 4494c4b79..4a61d6ab1 100644 --- a/yt_dlp/extractor/laracasts.py +++ b/yt_dlp/extractor/laracasts.py @@ -32,7 +32,7 @@ class LaracastsBaseIE(InfoExtractor): VimeoIE, url_transparent=True, **traverse_obj(episode, { 'id': ('id', {int}, {str_or_none}), - 'webpage_url': ('path', {lambda x: urljoin('https://laracasts.com', x)}), + 'webpage_url': ('path', {urljoin('https://laracasts.com')}), 'title': ('title', {clean_html}), 'season_number': ('chapter', {int_or_none}), 'episode_number': ('position', {int_or_none}), @@ -104,7 +104,7 @@ class LaracastsPlaylistIE(LaracastsBaseIE): 'description': ('body', {clean_html}), 'thumbnail': (('large_thumbnail', 'thumbnail'), {url_or_none}, any), 'duration': ('runTime', {parse_duration}), - 'categories': ('taxonomy', 'name', {str}, {lambda x: x and [x]}), + 'categories': ('taxonomy', 'name', {str}, all, filter), 'tags': ('topics', ..., 'name', {str}), 'modified_date': ('lastUpdated', {unified_strdate}), }), diff --git a/yt_dlp/extractor/lbry.py b/yt_dlp/extractor/lbry.py index c764d4961..0445b7cbf 100644 --- a/yt_dlp/extractor/lbry.py +++ b/yt_dlp/extractor/lbry.py @@ -66,7 +66,7 @@ class LBRYBaseIE(InfoExtractor): 'license': ('value', 'license', {str}), 'timestamp': ('timestamp', {int_or_none}), 'release_timestamp': ('value', 'release_time', {int_or_none}), - 'tags': ('value', 'tags', ..., {lambda x: x or None}), + 'tags': ('value', 'tags', ..., filter), 'duration': ('value', stream_type, 'duration', {int_or_none}), 'channel': ('signing_channel', 'value', 'title', {str}), 'channel_id': ('signing_channel', 'claim_id', {str}), @@ -136,6 +136,7 @@ class LBRYBaseIE(InfoExtractor): class LBRYIE(LBRYBaseIE): IE_NAME = 'lbry' + IE_DESC = 'odysee.com' _VALID_URL = LBRYBaseIE._BASE_URL_REGEX + rf''' (?:\$/(?:download|embed)/)? (?P @@ -364,6 +365,7 @@ class LBRYIE(LBRYBaseIE): class LBRYChannelIE(LBRYBaseIE): IE_NAME = 'lbry:channel' + IE_DESC = 'odysee.com channels' _VALID_URL = LBRYBaseIE._BASE_URL_REGEX + rf'(?P@{LBRYBaseIE._OPT_CLAIM_ID})/?(?:[?&]|$)' _TESTS = [{ 'url': 'https://lbry.tv/@LBRYFoundation:0', @@ -391,6 +393,7 @@ class LBRYChannelIE(LBRYBaseIE): class LBRYPlaylistIE(LBRYBaseIE): IE_NAME = 'lbry:playlist' + IE_DESC = 'odysee.com playlists' _VALID_URL = LBRYBaseIE._BASE_URL_REGEX + r'\$/(?:play)?list/(?P[0-9a-f-]+)' _TESTS = [{ 'url': 'https://odysee.com/$/playlist/ffef782f27486f0ac138bde8777f72ebdd0548c2', diff --git a/yt_dlp/extractor/learningonscreen.py b/yt_dlp/extractor/learningonscreen.py index dcf83144c..f4b51e66c 100644 --- a/yt_dlp/extractor/learningonscreen.py +++ b/yt_dlp/extractor/learningonscreen.py @@ -6,13 +6,11 @@ from ..utils import ( ExtractorError, clean_html, extract_attributes, - get_element_by_class, - get_element_html_by_id, join_nonempty, parse_duration, unified_timestamp, ) -from ..utils.traversal import traverse_obj +from ..utils.traversal import find_element, traverse_obj class LearningOnScreenIE(InfoExtractor): @@ -32,28 +30,24 @@ class LearningOnScreenIE(InfoExtractor): def _real_initialize(self): if not self._get_cookies('https://learningonscreen.ac.uk/').get('PHPSESSID-BOB-LIVE'): - self.raise_login_required( - 'Use --cookies for authentication. See ' - ' https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp ' - 'for how to manually pass cookies', method=None) + self.raise_login_required(method='session_cookies') def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) details = traverse_obj(webpage, ( - {functools.partial(get_element_html_by_id, 'programme-details')}, { - 'title': ({functools.partial(re.search, r'

([^<]+)

')}, 1, {clean_html}), + {find_element(id='programme-details', html=True)}, { + 'title': ({find_element(tag='h2')}, {clean_html}), 'timestamp': ( - {functools.partial(get_element_by_class, 'broadcast-date')}, + {find_element(cls='broadcast-date')}, {functools.partial(re.match, r'([^<]+)')}, 1, {unified_timestamp}), 'duration': ( - {functools.partial(get_element_by_class, 'prog-running-time')}, - {clean_html}, {parse_duration}), + {find_element(cls='prog-running-time')}, {clean_html}, {parse_duration}), })) title = details.pop('title', None) or traverse_obj(webpage, ( - {functools.partial(get_element_html_by_id, 'add-to-existing-playlist')}, + {find_element(id='add-to-existing-playlist', html=True)}, {extract_attributes}, 'data-record-title', {clean_html})) entries = self._parse_html5_media_entries( diff --git a/yt_dlp/extractor/listennotes.py b/yt_dlp/extractor/listennotes.py index 61eae95ed..9d68e1830 100644 --- a/yt_dlp/extractor/listennotes.py +++ b/yt_dlp/extractor/listennotes.py @@ -6,12 +6,10 @@ from ..utils import ( extract_attributes, get_element_by_class, get_element_html_by_id, - get_element_text_and_html_by_tag, parse_duration, strip_or_none, - traverse_obj, - try_call, ) +from ..utils.traversal import find_element, traverse_obj class ListenNotesIE(InfoExtractor): @@ -22,14 +20,14 @@ class ListenNotesIE(InfoExtractor): 'info_dict': { 'id': 'KrDgvNb_u1n', 'ext': 'mp3', - 'title': 'md5:32236591a921adf17bbdbf0441b6c0e9', - 'description': 'md5:c581ed197eeddcee55a67cdb547c8cbd', - 'duration': 2148.0, - 'channel': 'Thriving on Overload', + 'title': r're:Tim O’Reilly on noticing things other people .{113}', + 'description': r're:(?s)‘’We shape reality by what we notice and .{27459}', + 'duration': 2215.0, + 'channel': 'Amplifying Cognition', 'channel_id': 'ed84wITivxF', 'episode_id': 'e1312583fa7b4e24acfbb5131050be00', - 'thumbnail': 'https://production.listennotes.com/podcasts/thriving-on-overload-ross-dawson-1wb_KospA3P-ed84wITivxF.300x300.jpg', - 'channel_url': 'https://www.listennotes.com/podcasts/thriving-on-overload-ross-dawson-ed84wITivxF/', + 'thumbnail': 'https://cdn-images-3.listennotes.com/podcasts/amplifying-cognition-ross-dawson-Iemft4Gdr0k-ed84wITivxF.300x300.jpg', + 'channel_url': 'https://www.listennotes.com/podcasts/amplifying-cognition-ross-dawson-ed84wITivxF/', 'cast': ['Tim O’Reilly', 'Cookie Monster', 'Lao Tzu', 'Wallace Steven', 'Eric Raymond', 'Christine Peterson', 'John Maynard Keyne', 'Ross Dawson'], }, }, { @@ -39,13 +37,13 @@ class ListenNotesIE(InfoExtractor): 'id': 'lwEA3154JzG', 'ext': 'mp3', 'title': 'Episode 177: WireGuard with Jason Donenfeld', - 'description': 'md5:24744f36456a3e95f83c1193a3458594', + 'description': r're:(?s)Jason Donenfeld lead developer joins us this hour to discuss WireGuard, .{3169}', 'duration': 3861.0, 'channel': 'Ask Noah Show', 'channel_id': '4DQTzdS5-j7', 'episode_id': '8c8954b95e0b4859ad1eecec8bf6d3a4', 'channel_url': 'https://www.listennotes.com/podcasts/ask-noah-show-noah-j-chelliah-4DQTzdS5-j7/', - 'thumbnail': 'https://production.listennotes.com/podcasts/ask-noah-show-noah-j-chelliah-cfbRUw9Gs3F-4DQTzdS5-j7.300x300.jpg', + 'thumbnail': 'https://cdn-images-3.listennotes.com/podcasts/ask-noah-show-noah-j-chelliah-gD7vG150cxf-4DQTzdS5-j7.300x300.jpg', 'cast': ['noah showlink', 'noah show', 'noah dashboard', 'jason donenfeld'], }, }] @@ -70,7 +68,7 @@ class ListenNotesIE(InfoExtractor): 'id': audio_id, 'url': data['audio'], 'title': (data.get('data-title') - or try_call(lambda: get_element_text_and_html_by_tag('h1', webpage)[0]) + or traverse_obj(webpage, ({find_element(tag='h1')}, {clean_html})) or self._html_search_meta(('og:title', 'title', 'twitter:title'), webpage, 'title')), 'description': (self._clean_description(get_element_by_class('ln-text-p', webpage)) or strip_or_none(description)), diff --git a/yt_dlp/extractor/litv.py b/yt_dlp/extractor/litv.py index 93f926a9f..df9d141de 100644 --- a/yt_dlp/extractor/litv.py +++ b/yt_dlp/extractor/litv.py @@ -1,30 +1,32 @@ import json +import uuid from .common import InfoExtractor from ..utils import ( ExtractorError, int_or_none, + join_nonempty, smuggle_url, traverse_obj, try_call, unsmuggle_url, + urljoin, ) class LiTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?litv\.tv/(?:vod|promo)/[^/]+/(?:content\.do)?\?.*?\b(?:content_)?id=(?P[^&]+)' - - _URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?content_id=%s' - + _VALID_URL = r'https?://(?:www\.)?litv\.tv/(?:[^/?#]+/watch/|vod/[^/?#]+/content\.do\?content_id=)(?P[\w-]+)' + _URL_TEMPLATE = 'https://www.litv.tv/%s/watch/%s' + _GEO_COUNTRIES = ['TW'] _TESTS = [{ - 'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1', + 'url': 'https://www.litv.tv/drama/watch/VOD00041610', 'info_dict': { 'id': 'VOD00041606', 'title': '花千骨', }, 'playlist_count': 51, # 50 episodes + 1 trailer }, { - 'url': 'https://www.litv.tv/vod/drama/content.do?brc_id=root&id=VOD00041610&isUHEnabled=true&autoPlay=1', + 'url': 'https://www.litv.tv/drama/watch/VOD00041610', 'md5': 'b90ff1e9f1d8f5cfcd0a44c3e2b34c7a', 'info_dict': { 'id': 'VOD00041610', @@ -32,16 +34,15 @@ class LiTVIE(InfoExtractor): 'title': '花千骨第1集', 'thumbnail': r're:https?://.*\.jpg$', 'description': '《花千骨》陸劇線上看。十六年前,平靜的村莊內,一名女嬰隨異相出生,途徑此地的蜀山掌門清虛道長算出此女命運非同一般,她體內散發的異香易招惹妖魔。一念慈悲下,他在村莊周邊設下結界阻擋妖魔入侵,讓其年滿十六後去蜀山,並賜名花千骨。', - 'categories': ['奇幻', '愛情', '中國', '仙俠'], + 'categories': ['奇幻', '愛情', '仙俠', '古裝'], 'episode': 'Episode 1', 'episode_number': 1, }, 'params': { 'noplaylist': True, }, - 'skip': 'Georestricted to Taiwan', }, { - 'url': 'https://www.litv.tv/promo/miyuezhuan/?content_id=VOD00044841&', + 'url': 'https://www.litv.tv/drama/watch/VOD00044841', 'md5': '88322ea132f848d6e3e18b32a832b918', 'info_dict': { 'id': 'VOD00044841', @@ -55,94 +56,62 @@ class LiTVIE(InfoExtractor): def _extract_playlist(self, playlist_data, content_type): all_episodes = [ self.url_result(smuggle_url( - self._URL_TEMPLATE % (content_type, episode['contentId']), + self._URL_TEMPLATE % (content_type, episode['content_id']), {'force_noplaylist': True})) # To prevent infinite recursion - for episode in traverse_obj(playlist_data, ('seasons', ..., 'episode', lambda _, v: v['contentId']))] + for episode in traverse_obj(playlist_data, ('seasons', ..., 'episodes', lambda _, v: v['content_id']))] - return self.playlist_result(all_episodes, playlist_data['contentId'], playlist_data.get('title')) + return self.playlist_result(all_episodes, playlist_data['content_id'], playlist_data.get('title')) def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + vod_data = self._search_nextjs_data(webpage, video_id)['props']['pageProps'] - if self._search_regex( - r'(?i)]*http-equiv="refresh"\s[^>]*content="[0-9]+;\s*url=https://www\.litv\.tv/"', - webpage, 'meta refresh redirect', default=False, group=0): - raise ExtractorError('No such content found', expected=True) + program_info = traverse_obj(vod_data, ('programInformation', {dict})) or {} + playlist_data = traverse_obj(vod_data, ('seriesTree')) + if playlist_data and self._yes_playlist(program_info.get('series_id'), video_id, smuggled_data): + return self._extract_playlist(playlist_data, program_info.get('content_type')) - program_info = self._parse_json(self._search_regex( - r'var\s+programInfo\s*=\s*([^;]+)', webpage, 'VOD data', default='{}'), - video_id) + asset_id = traverse_obj(program_info, ('assets', 0, 'asset_id', {str})) + if asset_id: # This is a VOD + media_type = 'vod' + else: # This is a live stream + asset_id = program_info['content_id'] + media_type = program_info['content_type'] + puid = try_call(lambda: self._get_cookies('https://www.litv.tv/')['PUID'].value) + if puid: + endpoint = 'get-urls' + else: + puid = str(uuid.uuid4()) + endpoint = 'get-urls-no-auth' + video_data = self._download_json( + f'https://www.litv.tv/api/{endpoint}', video_id, + data=json.dumps({'AssetId': asset_id, 'MediaType': media_type, 'puid': puid}).encode(), + headers={'Content-Type': 'application/json'}) - # In browsers `getProgramInfo` request is always issued. Usually this - # endpoint gives the same result as the data embedded in the webpage. - # If, for some reason, there are no embedded data, we do an extra request. - if 'assetId' not in program_info: - program_info = self._download_json( - 'https://www.litv.tv/vod/ajax/getProgramInfo', video_id, - query={'contentId': video_id}, - headers={'Accept': 'application/json'}) - - series_id = program_info['seriesId'] - if self._yes_playlist(series_id, video_id, smuggled_data): - playlist_data = self._download_json( - 'https://www.litv.tv/vod/ajax/getSeriesTree', video_id, - query={'seriesId': series_id}, headers={'Accept': 'application/json'}) - return self._extract_playlist(playlist_data, program_info['contentType']) - - video_data = self._parse_json(self._search_regex( - r'uiHlsUrl\s*=\s*testBackendData\(([^;]+)\);', - webpage, 'video data', default='{}'), video_id) - if not video_data: - payload = {'assetId': program_info['assetId']} - puid = try_call(lambda: self._get_cookies('https://www.litv.tv/')['PUID'].value) - if puid: - payload.update({ - 'type': 'auth', - 'puid': puid, - }) - endpoint = 'getUrl' - else: - payload.update({ - 'watchDevices': program_info['watchDevices'], - 'contentType': program_info['contentType'], - }) - endpoint = 'getMainUrlNoAuth' - video_data = self._download_json( - f'https://www.litv.tv/vod/ajax/{endpoint}', video_id, - data=json.dumps(payload).encode(), - headers={'Content-Type': 'application/json'}) - - if not video_data.get('fullpath'): - error_msg = video_data.get('errorMessage') - if error_msg == 'vod.error.outsideregionerror': + if error := traverse_obj(video_data, ('error', {dict})): + error_msg = traverse_obj(error, ('message', {str})) + if error_msg and 'OutsideRegionError' in error_msg: self.raise_geo_restricted('This video is available in Taiwan only') - if error_msg: + elif error_msg: raise ExtractorError(f'{self.IE_NAME} said: {error_msg}', expected=True) - raise ExtractorError(f'Unexpected result from {self.IE_NAME}') + raise ExtractorError(f'Unexpected error from {self.IE_NAME}') formats = self._extract_m3u8_formats( - video_data['fullpath'], video_id, ext='mp4', - entry_protocol='m3u8_native', m3u8_id='hls') + video_data['result']['AssetURLs'][0], video_id, ext='mp4', m3u8_id='hls') for a_format in formats: # LiTV HLS segments doesn't like compressions a_format.setdefault('http_headers', {})['Accept-Encoding'] = 'identity' - title = program_info['title'] + program_info.get('secondaryMark', '') - description = program_info.get('description') - thumbnail = program_info.get('imageFile') - categories = [item['name'] for item in program_info.get('category', [])] - episode = int_or_none(program_info.get('episode')) - return { 'id': video_id, 'formats': formats, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'categories': categories, - 'episode_number': episode, + 'title': join_nonempty('title', 'secondary_mark', delim='', from_dict=program_info), + **traverse_obj(program_info, { + 'description': ('description', {str}), + 'thumbnail': ('picture', {urljoin('https://p-cdnstatic.svc.litv.tv/')}), + 'categories': ('genres', ..., 'name', {str}), + 'episode_number': ('episode', {int_or_none}), + }), } diff --git a/yt_dlp/extractor/lnkgo.py b/yt_dlp/extractor/lnk.py similarity index 53% rename from yt_dlp/extractor/lnkgo.py rename to yt_dlp/extractor/lnk.py index 31a7cefd8..593f73410 100644 --- a/yt_dlp/extractor/lnkgo.py +++ b/yt_dlp/extractor/lnk.py @@ -1,86 +1,11 @@ from .common import InfoExtractor from ..utils import ( - clean_html, format_field, int_or_none, - parse_iso8601, unified_strdate, ) -class LnkGoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?lnk(?:go)?\.(?:alfa\.)?lt/(?:visi-video/[^/]+|video)/(?P[A-Za-z0-9-]+)(?:/(?P\d+))?' - _TESTS = [{ - 'url': 'http://www.lnkgo.lt/visi-video/aktualai-pratesimas/ziurek-putka-trys-klausimai', - 'info_dict': { - 'id': '10809', - 'ext': 'mp4', - 'title': "Put'ka: Trys Klausimai", - 'upload_date': '20161216', - 'description': 'Seniai matytas Put’ka užduoda tris klausimėlius. Pabandykime surasti atsakymus.', - 'age_limit': 18, - 'duration': 117, - 'thumbnail': r're:^https?://.*\.jpg$', - 'timestamp': 1481904000, - }, - 'params': { - 'skip_download': True, # HLS download - }, - }, { - 'url': 'http://lnkgo.alfa.lt/visi-video/aktualai-pratesimas/ziurek-nerdas-taiso-kompiuteri-2', - 'info_dict': { - 'id': '10467', - 'ext': 'mp4', - 'title': 'Nėrdas: Kompiuterio Valymas', - 'upload_date': '20150113', - 'description': 'md5:7352d113a242a808676ff17e69db6a69', - 'age_limit': 18, - 'duration': 346, - 'thumbnail': r're:^https?://.*\.jpg$', - 'timestamp': 1421164800, - }, - 'params': { - 'skip_download': True, # HLS download - }, - }, { - 'url': 'https://lnk.lt/video/neigalieji-tv-bokste/37413', - 'only_matching': True, - }] - _AGE_LIMITS = { - 'N-7': 7, - 'N-14': 14, - 'S': 18, - } - _M3U8_TEMPL = 'https://vod.lnk.lt/lnk_vod/lnk/lnk/%s:%s/playlist.m3u8%s' - - def _real_extract(self, url): - display_id, video_id = self._match_valid_url(url).groups() - - video_info = self._download_json( - 'https://lnk.lt/api/main/video-page/{}/{}/false'.format(display_id, video_id or '0'), - display_id)['videoConfig']['videoInfo'] - - video_id = str(video_info['id']) - title = video_info['title'] - prefix = 'smil' if video_info.get('isQualityChangeAvailable') else 'mp4' - formats = self._extract_m3u8_formats( - self._M3U8_TEMPL % (prefix, video_info['videoUrl'], video_info.get('secureTokenParams') or ''), - video_id, 'mp4', 'm3u8_native') - - return { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'formats': formats, - 'thumbnail': format_field(video_info, 'posterImage', 'https://lnk.lt/all-images/%s'), - 'duration': int_or_none(video_info.get('duration')), - 'description': clean_html(video_info.get('htmlDescription')), - 'age_limit': self._AGE_LIMITS.get(video_info.get('pgRating'), 0), - 'timestamp': parse_iso8601(video_info.get('airDate')), - 'view_count': int_or_none(video_info.get('viewsCount')), - } - - class LnkIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?lnk\.lt/[^/]+/(?P\d+)' diff --git a/yt_dlp/extractor/loom.py b/yt_dlp/extractor/loom.py index 1191aa17e..b0878c33e 100644 --- a/yt_dlp/extractor/loom.py +++ b/yt_dlp/extractor/loom.py @@ -92,9 +92,9 @@ class LoomIE(InfoExtractor): }, 'params': {'videopassword': 'seniorinfants2'}, }, { - # embed, transcoded-url endpoint sends empty JSON response + # embed, transcoded-url endpoint sends empty JSON response, split video and audio HLS formats 'url': 'https://www.loom.com/embed/ddcf1c1ad21f451ea7468b1e33917e4e', - 'md5': '8488817242a0db1cb2ad0ea522553cf6', + 'md5': 'b321d261656848c184a94e3b93eae28d', 'info_dict': { 'id': 'ddcf1c1ad21f451ea7468b1e33917e4e', 'ext': 'mp4', @@ -104,6 +104,7 @@ class LoomIE(InfoExtractor): 'timestamp': 1657216459, 'duration': 181, }, + 'params': {'format': 'bestvideo'}, # Test video-only fixup 'expected_warnings': ['Failed to parse JSON'], }] _WEBPAGE_TESTS = [{ @@ -293,7 +294,11 @@ class LoomIE(InfoExtractor): format_url = format_url.replace('-split.m3u8', '.m3u8') m3u8_formats = self._extract_m3u8_formats( format_url, video_id, 'mp4', m3u8_id=f'hls-{format_id}', fatal=False, quality=quality) + # Sometimes only split video/audio formats are available, need to fixup video-only formats + is_not_premerged = 'none' in traverse_obj(m3u8_formats, (..., 'vcodec')) for fmt in m3u8_formats: + if is_not_premerged and fmt.get('vcodec') != 'none': + fmt['acodec'] = 'none' yield { **fmt, 'url': update_url(fmt['url'], query=query), diff --git a/yt_dlp/extractor/lsm.py b/yt_dlp/extractor/lsm.py index f5be08f97..56c06d745 100644 --- a/yt_dlp/extractor/lsm.py +++ b/yt_dlp/extractor/lsm.py @@ -114,7 +114,7 @@ class LSMLREmbedIE(InfoExtractor): def _real_extract(self, url): query = parse_qs(url) video_id = traverse_obj(query, ( - ('show', 'id'), 0, {int_or_none}, {lambda x: x or None}, {str_or_none}), get_all=False) + ('show', 'id'), 0, {int_or_none}, filter, {str_or_none}), get_all=False) webpage = self._download_webpage(url, video_id) player_data, media_data = self._search_regex( diff --git a/yt_dlp/extractor/magentamusik.py b/yt_dlp/extractor/magentamusik.py index 5bfc0a154..24c46a152 100644 --- a/yt_dlp/extractor/magentamusik.py +++ b/yt_dlp/extractor/magentamusik.py @@ -57,6 +57,6 @@ class MagentaMusikIE(InfoExtractor): 'duration': ('runtimeInSeconds', {int_or_none}), 'location': ('countriesOfProduction', {list}, {lambda x: join_nonempty(*x, delim=', ')}), 'release_year': ('yearOfProduction', {int_or_none}), - 'categories': ('mainGenre', {str}, {lambda x: x and [x]}), + 'categories': ('mainGenre', {str}, all, filter), })), } diff --git a/yt_dlp/extractor/mailru.py b/yt_dlp/extractor/mailru.py index cca678f14..0496a87f0 100644 --- a/yt_dlp/extractor/mailru.py +++ b/yt_dlp/extractor/mailru.py @@ -126,7 +126,7 @@ class MailRuIE(InfoExtractor): video_data = None # fix meta_url if missing the host address - if re.match(r'^\/\+\/', meta_url): + if re.match(r'\/\+\/', meta_url): meta_url = urljoin('https://my.mail.ru', meta_url) if meta_url: diff --git a/yt_dlp/extractor/mdr.py b/yt_dlp/extractor/mdr.py index 46097fa20..dfda3cc53 100644 --- a/yt_dlp/extractor/mdr.py +++ b/yt_dlp/extractor/mdr.py @@ -13,8 +13,8 @@ from ..utils import ( class MDRIE(InfoExtractor): - IE_DESC = 'MDR.DE and KiKA' - _VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z-]+-?(?P\d+)(?:_.+?)?\.html' + IE_DESC = 'MDR.DE' + _VALID_URL = r'https?://(?:www\.)?mdr\.de/(?:.*)/[a-z-]+-?(?P\d+)(?:_.+?)?\.html' _GEO_COUNTRIES = ['DE'] @@ -34,30 +34,6 @@ class MDRIE(InfoExtractor): 'uploader': 'MITTELDEUTSCHER RUNDFUNK', }, 'skip': '404 not found', - }, { - 'url': 'http://www.kika.de/baumhaus/videos/video19636.html', - 'md5': '4930515e36b06c111213e80d1e4aad0e', - 'info_dict': { - 'id': '19636', - 'ext': 'mp4', - 'title': 'Baumhaus vom 30. Oktober 2015', - 'duration': 134, - 'uploader': 'KIKA', - }, - 'skip': '404 not found', - }, { - 'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/videos/video8182.html', - 'md5': '5fe9c4dd7d71e3b238f04b8fdd588357', - 'info_dict': { - 'id': '8182', - 'ext': 'mp4', - 'title': 'Beutolomäus und der geheime Weihnachtswunsch', - 'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd', - 'timestamp': 1482541200, - 'upload_date': '20161224', - 'duration': 4628, - 'uploader': 'KIKA', - }, }, { # audio with alternative playerURL pattern 'url': 'http://www.mdr.de/kultur/videos-und-audios/audio-radio/operation-mindfuck-robert-wilson100.html', @@ -68,28 +44,7 @@ class MDRIE(InfoExtractor): 'duration': 3239, 'uploader': 'MITTELDEUTSCHER RUNDFUNK', }, - }, { - # empty bitrateVideo and bitrateAudio - 'url': 'https://www.kika.de/filme/sendung128372_zc-572e3f45_zs-1d9fb70e.html', - 'info_dict': { - 'id': '128372', - 'ext': 'mp4', - 'title': 'Der kleine Wichtel kehrt zurück', - 'description': 'md5:f77fafdff90f7aa1e9dca14f662c052a', - 'duration': 4876, - 'timestamp': 1607823300, - 'upload_date': '20201213', - 'uploader': 'ZDF', - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html', - 'only_matching': True, - }, { - 'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/einzelsendung2534.html', - 'only_matching': True, + 'skip': '404 not found', }, { 'url': 'http://www.mdr.de/mediathek/mdr-videos/a/video-1334.html', 'only_matching': True, diff --git a/yt_dlp/extractor/mediaklikk.py b/yt_dlp/extractor/mediaklikk.py index f51342060..197e91d1d 100644 --- a/yt_dlp/extractor/mediaklikk.py +++ b/yt_dlp/extractor/mediaklikk.py @@ -16,6 +16,15 @@ class MediaKlikkIE(InfoExtractor): (?P[^/#?_]+)''' _TESTS = [{ + 'url': 'https://mediaklikk.hu/filmajanlo/cikk/az-ajto/', + 'info_dict': { + 'id': '668177', + 'title': 'Az ajtó', + 'display_id': 'az-ajto', + 'ext': 'mp4', + 'thumbnail': 'https://cdn.cms.mtv.hu/wp-content/uploads/sites/4/2016/01/vlcsnap-2023-07-31-14h18m52s111.jpg', + }, + }, { # (old) mediaklikk. date in html. 'url': 'https://mediaklikk.hu/video/hazajaro-delnyugat-bacska-a-duna-menten-palankatol-doroszloig/', 'info_dict': { @@ -37,6 +46,7 @@ class MediaKlikkIE(InfoExtractor): 'upload_date': '20230903', 'thumbnail': 'https://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg', }, + 'skip': 'Webpage redirects to 404 page', }, { # (old) m4sport 'url': 'https://m4sport.hu/video/2021/08/30/gyemant-liga-parizs/', @@ -59,6 +69,7 @@ class MediaKlikkIE(InfoExtractor): 'upload_date': '20230908', 'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-08-22h43m18s691.jpg', }, + 'skip': 'Webpage redirects to 404 page', }, { # m4sport with *video/ url and no date 'url': 'https://m4sport.hu/bl-video/real-madrid-chelsea-1-1/', @@ -69,6 +80,7 @@ class MediaKlikkIE(InfoExtractor): 'ext': 'mp4', 'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2021/04/Sequence-01.Still001-1024x576.png', }, + 'skip': 'Webpage redirects to 404 page', }, { # (old) hirado 'url': 'https://hirado.hu/videok/felteteleket-szabott-a-fovaros/', @@ -90,6 +102,7 @@ class MediaKlikkIE(InfoExtractor): 'upload_date': '20230911', 'thumbnail': 'https://hirado.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-11-09h16m09s882.jpg', }, + 'skip': 'Webpage redirects to video list page', }, { # (old) petofilive 'url': 'https://petofilive.hu/video/2021/06/07/tha-shudras-az-akusztikban/', @@ -112,6 +125,7 @@ class MediaKlikkIE(InfoExtractor): 'upload_date': '20230909', 'thumbnail': 'https://petofilive.hu/wp-content/uploads/sites/4/2023/09/Clipboard11-2.jpg', }, + 'skip': 'Webpage redirects to video list page', }] def _real_extract(self, url): @@ -143,14 +157,14 @@ class MediaKlikkIE(InfoExtractor): if not playlist_url: raise ExtractorError('Unable to extract playlist url') - formats = self._extract_wowza_formats( - playlist_url, video_id, skip_protocols=['f4m', 'smil', 'dash']) + formats, subtitles = self._extract_m3u8_formats_and_subtitles(playlist_url, video_id) return { 'id': video_id, 'title': title, 'display_id': display_id, 'formats': formats, + 'subtitles': subtitles, 'upload_date': upload_date, 'thumbnail': player_data.get('bgImage') or self._og_search_thumbnail(webpage), } diff --git a/yt_dlp/extractor/mediastream.py b/yt_dlp/extractor/mediastream.py index ae0fb2aed..d2a22f98f 100644 --- a/yt_dlp/extractor/mediastream.py +++ b/yt_dlp/extractor/mediastream.py @@ -17,7 +17,7 @@ class MediaStreamBaseIE(InfoExtractor): _BASE_URL_RE = r'https?://mdstrm\.com/(?:embed|live-stream)' def _extract_mediastream_urls(self, webpage): - yield from traverse_obj(list(self._yield_json_ld(webpage, None, fatal=False)), ( + yield from traverse_obj(list(self._yield_json_ld(webpage, None, default={})), ( lambda _, v: v['@type'] == 'VideoObject', ('embedUrl', 'contentUrl'), {lambda x: x if re.match(rf'{self._BASE_URL_RE}/\w+', x) else None})) diff --git a/yt_dlp/extractor/mgtv.py b/yt_dlp/extractor/mgtv.py index d5dda06f9..c793626fd 100644 --- a/yt_dlp/extractor/mgtv.py +++ b/yt_dlp/extractor/mgtv.py @@ -16,7 +16,7 @@ from ..utils import ( class MGTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P\d+)\.html' + _VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/[bv]/(?:[^/]+/)*(?P\d+)\.html' IE_DESC = '芒果TV' IE_NAME = 'MangoTV' diff --git a/yt_dlp/extractor/microsoftembed.py b/yt_dlp/extractor/microsoftembed.py index d0135f5a9..2575d6c5e 100644 --- a/yt_dlp/extractor/microsoftembed.py +++ b/yt_dlp/extractor/microsoftembed.py @@ -26,6 +26,7 @@ class MicrosoftEmbedIE(InfoExtractor): 'timestamp': 1631658316, 'upload_date': '20210914', }, + 'expected_warnings': ['Failed to parse XML: syntax error: line 1, column 0'], }] _API_URL = 'https://prod-video-cms-rt-microsoft-com.akamaized.net/vhs/api/videos/' @@ -36,11 +37,11 @@ class MicrosoftEmbedIE(InfoExtractor): formats = [] for source_type, source in metadata['streams'].items(): if source_type == 'smooth_Streaming': - formats.extend(self._extract_ism_formats(source['url'], video_id, 'mss')) + formats.extend(self._extract_ism_formats(source['url'], video_id, 'mss', fatal=False)) elif source_type == 'apple_HTTP_Live_Streaming': - formats.extend(self._extract_m3u8_formats(source['url'], video_id, 'mp4')) + formats.extend(self._extract_m3u8_formats(source['url'], video_id, 'mp4', fatal=False)) elif source_type == 'mPEG_DASH': - formats.extend(self._extract_mpd_formats(source['url'], video_id)) + formats.extend(self._extract_mpd_formats(source['url'], video_id, fatal=False)) else: formats.append({ 'format_id': source_type, diff --git a/yt_dlp/extractor/mildom.py b/yt_dlp/extractor/mildom.py deleted file mode 100644 index 88a2b9e89..000000000 --- a/yt_dlp/extractor/mildom.py +++ /dev/null @@ -1,291 +0,0 @@ -import functools -import json -import uuid - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - OnDemandPagedList, - determine_ext, - dict_get, - float_or_none, - traverse_obj, -) - - -class MildomBaseIE(InfoExtractor): - _GUEST_ID = None - - def _call_api(self, url, video_id, query=None, note='Downloading JSON metadata', body=None): - if not self._GUEST_ID: - self._GUEST_ID = f'pc-gp-{uuid.uuid4()}' - - content = self._download_json( - url, video_id, note=note, data=json.dumps(body).encode() if body else None, - headers={'Content-Type': 'application/json'} if body else {}, - query={ - '__guest_id': self._GUEST_ID, - '__platform': 'web', - **(query or {}), - }) - - if content['code'] != 0: - raise ExtractorError( - f'Mildom says: {content["message"]} (code {content["code"]})', - expected=True) - return content['body'] - - -class MildomIE(MildomBaseIE): - IE_NAME = 'mildom' - IE_DESC = 'Record ongoing live by specific user in Mildom' - _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/(?P\d+)' - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(f'https://www.mildom.com/{video_id}', video_id) - - enterstudio = self._call_api( - 'https://cloudac.mildom.com/nonolive/gappserv/live/enterstudio', video_id, - note='Downloading live metadata', query={'user_id': video_id}) - result_video_id = enterstudio.get('log_id', video_id) - - servers = self._call_api( - 'https://cloudac.mildom.com/nonolive/gappserv/live/liveserver', result_video_id, - note='Downloading live server list', query={ - 'user_id': video_id, - 'live_server_type': 'hls', - }) - - playback_token = self._call_api( - 'https://cloudac.mildom.com/nonolive/gappserv/live/token', result_video_id, - note='Obtaining live playback token', body={'host_id': video_id, 'type': 'hls'}) - playback_token = traverse_obj(playback_token, ('data', ..., 'token'), get_all=False) - if not playback_token: - raise ExtractorError('Failed to obtain live playback token') - - formats = self._extract_m3u8_formats( - f'{servers["stream_server"]}/{video_id}_master.m3u8?{playback_token}', - result_video_id, 'mp4', headers={ - 'Referer': 'https://www.mildom.com/', - 'Origin': 'https://www.mildom.com', - }) - - for fmt in formats: - fmt.setdefault('http_headers', {})['Referer'] = 'https://www.mildom.com/' - - return { - 'id': result_video_id, - 'title': self._html_search_meta('twitter:description', webpage, default=None) or traverse_obj(enterstudio, 'anchor_intro'), - 'description': traverse_obj(enterstudio, 'intro', 'live_intro', expected_type=str), - 'timestamp': float_or_none(enterstudio.get('live_start_ms'), scale=1000), - 'uploader': self._html_search_meta('twitter:title', webpage, default=None) or traverse_obj(enterstudio, 'loginname'), - 'uploader_id': video_id, - 'formats': formats, - 'is_live': True, - } - - -class MildomVodIE(MildomBaseIE): - IE_NAME = 'mildom:vod' - IE_DESC = 'VOD in Mildom' - _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/playback/(?P\d+)/(?P(?P=user_id)-[a-zA-Z0-9]+-?[0-9]*)' - _TESTS = [{ - 'url': 'https://www.mildom.com/playback/10882672/10882672-1597662269', - 'info_dict': { - 'id': '10882672-1597662269', - 'ext': 'mp4', - 'title': '始めてのミルダム配信じゃぃ!', - 'thumbnail': r're:^https?://.*\.(png|jpg)$', - 'upload_date': '20200817', - 'duration': 4138.37, - 'description': 'ゲームをしたくて!', - 'timestamp': 1597662269.0, - 'uploader_id': '10882672', - 'uploader': 'kson組長(けいそん)', - }, - }, { - 'url': 'https://www.mildom.com/playback/10882672/10882672-1597758589870-477', - 'info_dict': { - 'id': '10882672-1597758589870-477', - 'ext': 'mp4', - 'title': '【kson】感染メイズ!麻酔銃で無双する', - 'thumbnail': r're:^https?://.*\.(png|jpg)$', - 'timestamp': 1597759093.0, - 'uploader': 'kson組長(けいそん)', - 'duration': 4302.58, - 'uploader_id': '10882672', - 'description': 'このステージ絶対乗り越えたい', - 'upload_date': '20200818', - }, - }, { - 'url': 'https://www.mildom.com/playback/10882672/10882672-buha9td2lrn97fk2jme0', - 'info_dict': { - 'id': '10882672-buha9td2lrn97fk2jme0', - 'ext': 'mp4', - 'title': '【kson組長】CART RACER!!!', - 'thumbnail': r're:^https?://.*\.(png|jpg)$', - 'uploader_id': '10882672', - 'uploader': 'kson組長(けいそん)', - 'upload_date': '20201104', - 'timestamp': 1604494797.0, - 'duration': 4657.25, - 'description': 'WTF', - }, - }] - - def _real_extract(self, url): - user_id, video_id = self._match_valid_url(url).group('user_id', 'id') - webpage = self._download_webpage(f'https://www.mildom.com/playback/{user_id}/{video_id}', video_id) - - autoplay = self._call_api( - 'https://cloudac.mildom.com/nonolive/videocontent/playback/getPlaybackDetail', video_id, - note='Downloading playback metadata', query={ - 'v_id': video_id, - })['playback'] - - formats = [{ - 'url': autoplay['audio_url'], - 'format_id': 'audio', - 'protocol': 'm3u8_native', - 'vcodec': 'none', - 'acodec': 'aac', - 'ext': 'm4a', - }] - for fmt in autoplay['video_link']: - formats.append({ - 'format_id': 'video-{}'.format(fmt['name']), - 'url': fmt['url'], - 'protocol': 'm3u8_native', - 'width': fmt['level'] * autoplay['video_width'] // autoplay['video_height'], - 'height': fmt['level'], - 'vcodec': 'h264', - 'acodec': 'aac', - 'ext': 'mp4', - }) - - return { - 'id': video_id, - 'title': self._html_search_meta(('og:description', 'description'), webpage, default=None) or autoplay.get('title'), - 'description': traverse_obj(autoplay, 'video_intro'), - 'timestamp': float_or_none(autoplay.get('publish_time'), scale=1000), - 'duration': float_or_none(autoplay.get('video_length'), scale=1000), - 'thumbnail': dict_get(autoplay, ('upload_pic', 'video_pic')), - 'uploader': traverse_obj(autoplay, ('author_info', 'login_name')), - 'uploader_id': user_id, - 'formats': formats, - } - - -class MildomClipIE(MildomBaseIE): - IE_NAME = 'mildom:clip' - IE_DESC = 'Clip in Mildom' - _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/clip/(?P(?P\d+)-[a-zA-Z0-9]+)' - _TESTS = [{ - 'url': 'https://www.mildom.com/clip/10042245-63921673e7b147ebb0806d42b5ba5ce9', - 'info_dict': { - 'id': '10042245-63921673e7b147ebb0806d42b5ba5ce9', - 'title': '全然違ったよ', - 'timestamp': 1619181890, - 'duration': 59, - 'thumbnail': r're:https?://.+', - 'uploader': 'ざきんぽ', - 'uploader_id': '10042245', - }, - }, { - 'url': 'https://www.mildom.com/clip/10111524-ebf4036e5aa8411c99fb3a1ae0902864', - 'info_dict': { - 'id': '10111524-ebf4036e5aa8411c99fb3a1ae0902864', - 'title': 'かっこいい', - 'timestamp': 1621094003, - 'duration': 59, - 'thumbnail': r're:https?://.+', - 'uploader': '(ルーキー', - 'uploader_id': '10111524', - }, - }, { - 'url': 'https://www.mildom.com/clip/10660174-2c539e6e277c4aaeb4b1fbe8d22cb902', - 'info_dict': { - 'id': '10660174-2c539e6e277c4aaeb4b1fbe8d22cb902', - 'title': 'あ', - 'timestamp': 1614769431, - 'duration': 31, - 'thumbnail': r're:https?://.+', - 'uploader': 'ドルゴルスレンギーン=ダグワドルジ', - 'uploader_id': '10660174', - }, - }] - - def _real_extract(self, url): - user_id, video_id = self._match_valid_url(url).group('user_id', 'id') - webpage = self._download_webpage(f'https://www.mildom.com/clip/{video_id}', video_id) - - clip_detail = self._call_api( - 'https://cloudac-cf-jp.mildom.com/nonolive/videocontent/clip/detail', video_id, - note='Downloading playback metadata', query={ - 'clip_id': video_id, - }) - - return { - 'id': video_id, - 'title': self._html_search_meta( - ('og:description', 'description'), webpage, default=None) or clip_detail.get('title'), - 'timestamp': float_or_none(clip_detail.get('create_time')), - 'duration': float_or_none(clip_detail.get('length')), - 'thumbnail': clip_detail.get('cover'), - 'uploader': traverse_obj(clip_detail, ('user_info', 'loginname')), - 'uploader_id': user_id, - - 'url': clip_detail['url'], - 'ext': determine_ext(clip_detail.get('url'), 'mp4'), - } - - -class MildomUserVodIE(MildomBaseIE): - IE_NAME = 'mildom:user:vod' - IE_DESC = 'Download all VODs from specific user in Mildom' - _VALID_URL = r'https?://(?:(?:www|m)\.)mildom\.com/profile/(?P\d+)' - _TESTS = [{ - 'url': 'https://www.mildom.com/profile/10093333', - 'info_dict': { - 'id': '10093333', - 'title': 'Uploads from ねこばたけ', - }, - 'playlist_mincount': 732, - }, { - 'url': 'https://www.mildom.com/profile/10882672', - 'info_dict': { - 'id': '10882672', - 'title': 'Uploads from kson組長(けいそん)', - }, - 'playlist_mincount': 201, - }] - - def _fetch_page(self, user_id, page): - page += 1 - reply = self._call_api( - 'https://cloudac.mildom.com/nonolive/videocontent/profile/playbackList', - user_id, note=f'Downloading page {page}', query={ - 'user_id': user_id, - 'page': page, - 'limit': '30', - }) - if not reply: - return - for x in reply: - v_id = x.get('v_id') - if not v_id: - continue - yield self.url_result(f'https://www.mildom.com/playback/{user_id}/{v_id}') - - def _real_extract(self, url): - user_id = self._match_id(url) - self.to_screen(f'This will download all VODs belonging to user. To download ongoing live video, use "https://www.mildom.com/{user_id}" instead') - - profile = self._call_api( - 'https://cloudac.mildom.com/nonolive/gappserv/user/profileV2', user_id, - query={'user_id': user_id}, note='Downloading user profile')['user_info'] - - return self.playlist_result( - OnDemandPagedList(functools.partial(self._fetch_page, user_id), 30), - user_id, f'Uploads from {profile["loginname"]}') diff --git a/yt_dlp/extractor/mit.py b/yt_dlp/extractor/mit.py index e75c540a2..66c3b0793 100644 --- a/yt_dlp/extractor/mit.py +++ b/yt_dlp/extractor/mit.py @@ -65,7 +65,7 @@ class TechTVMITIE(InfoExtractor): class OCWMITIE(InfoExtractor): IE_NAME = 'ocw.mit.edu' - _VALID_URL = r'^https?://ocw\.mit\.edu/courses/(?P[a-z0-9\-]+)' + _VALID_URL = r'https?://ocw\.mit\.edu/courses/(?P[a-z0-9\-]+)' _BASE_URL = 'http://ocw.mit.edu/' _TESTS = [ diff --git a/yt_dlp/extractor/mitele.py b/yt_dlp/extractor/mitele.py index ea2998672..76fef337a 100644 --- a/yt_dlp/extractor/mitele.py +++ b/yt_dlp/extractor/mitele.py @@ -1,14 +1,13 @@ -from .telecinco import TelecincoIE +from .telecinco import TelecincoBaseIE from ..utils import ( int_or_none, parse_iso8601, ) -class MiTeleIE(TelecincoIE): # XXX: Do not subclass from concrete IE +class MiTeleIE(TelecincoBaseIE): IE_DESC = 'mitele.es' _VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P[^/]+)/player' - _TESTS = [{ 'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player', 'info_dict': { @@ -27,6 +26,7 @@ class MiTeleIE(TelecincoIE): # XXX: Do not subclass from concrete IE 'timestamp': 1471209401, 'upload_date': '20160814', }, + 'skip': 'HTTP Error 404 Not Found', }, { # no explicit title 'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player', @@ -49,6 +49,26 @@ class MiTeleIE(TelecincoIE): # XXX: Do not subclass from concrete IE 'params': { 'skip_download': True, }, + 'skip': 'HTTP Error 404 Not Found', + }, { + 'url': 'https://www.mitele.es/programas-tv/horizonte/temporada-5/programa-171-40_013480051/player/', + 'info_dict': { + 'id': '7adbe22e-cd41-4787-afa4-36f3da7c2c6f', + 'ext': 'mp4', + 'title': 'Horizonte Temporada 5 Programa 171', + 'description': 'md5:97f1fb712c5ac27e5693a8b3c5c0c6e3', + 'episode': 'Las Zonas de Bajas Emisiones, a debate', + 'episode_number': 171, + 'season': 'Season 5', + 'season_number': 5, + 'series': 'Horizonte', + 'duration': 7012, + 'upload_date': '20240927', + 'timestamp': 1727416450, + 'thumbnail': 'https://album.mediaset.es/eimg/2024/09/27/horizonte-171_9f02.jpg', + 'age_limit': 12, + }, + 'params': {'geo_bypass_country': 'ES'}, }, { 'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player', 'only_matching': True, @@ -60,9 +80,9 @@ class MiTeleIE(TelecincoIE): # XXX: Do not subclass from concrete IE def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - pre_player = self._parse_json(self._search_regex( - r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=\s*({.+})', - webpage, 'Pre Player'), display_id)['prePlayer'] + pre_player = self._search_json( + r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=', + webpage, 'Pre Player', display_id)['prePlayer'] title = pre_player['title'] video_info = self._parse_content(pre_player['video'], url) content = pre_player.get('content') or {} diff --git a/yt_dlp/extractor/mixch.py b/yt_dlp/extractor/mixch.py index 9b7c7b89b..4bccc81bd 100644 --- a/yt_dlp/extractor/mixch.py +++ b/yt_dlp/extractor/mixch.py @@ -12,7 +12,7 @@ from ..utils.traversal import traverse_obj class MixchIE(InfoExtractor): IE_NAME = 'mixch' - _VALID_URL = r'https?://(?:www\.)?mixch\.tv/u/(?P\d+)' + _VALID_URL = r'https?://mixch\.tv/u/(?P\d+)' _TESTS = [{ 'url': 'https://mixch.tv/u/16943797/live', @@ -66,7 +66,7 @@ class MixchIE(InfoExtractor): note='Downloading comments', errnote='Failed to download comments'), (..., { 'author': ('name', {str}), 'author_id': ('user_id', {str_or_none}), - 'id': ('message_id', {str}, {lambda x: x or None}), + 'id': ('message_id', {str}, filter), 'text': ('body', {str}), 'timestamp': ('created', {int}), })) @@ -74,7 +74,7 @@ class MixchIE(InfoExtractor): class MixchArchiveIE(InfoExtractor): IE_NAME = 'mixch:archive' - _VALID_URL = r'https?://(?:www\.)?mixch\.tv/archive/(?P\d+)' + _VALID_URL = r'https?://mixch\.tv/archive/(?P\d+)' _TESTS = [{ 'url': 'https://mixch.tv/archive/421', @@ -116,3 +116,56 @@ class MixchArchiveIE(InfoExtractor): 'formats': self._extract_m3u8_formats(info_json['archiveURL'], video_id), 'thumbnail': traverse_obj(info_json, ('thumbnailURL', {url_or_none})), } + + +class MixchMovieIE(InfoExtractor): + IE_NAME = 'mixch:movie' + _VALID_URL = r'https?://mixch\.tv/m/(?P\w+)' + + _TESTS = [{ + 'url': 'https://mixch.tv/m/Ve8KNkJ5', + 'info_dict': { + 'id': 'Ve8KNkJ5', + 'title': '夏☀️\nムービーへのポイントは本イベントに加算されないので配信にてお願い致します🙇🏻\u200d♀️\n#TGCCAMPUS #ミス東大 #ミス東大2024 ', + 'ext': 'mp4', + 'uploader': 'ミス東大No.5 松藤百香🍑💫', + 'uploader_id': '12299174', + 'channel_follower_count': int, + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'timestamp': 1724070828, + 'uploader_url': 'https://mixch.tv/u/12299174', + 'live_status': 'not_live', + 'upload_date': '20240819', + }, + }, { + 'url': 'https://mixch.tv/m/61DzpIKE', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + data = self._download_json( + f'https://mixch.tv/api-web/movies/{video_id}', video_id) + return { + 'id': video_id, + 'formats': [{ + 'format_id': 'mp4', + 'url': data['movie']['file'], + 'ext': 'mp4', + }], + **traverse_obj(data, { + 'title': ('movie', 'title', {str}), + 'thumbnail': ('movie', 'thumbnailURL', {url_or_none}), + 'uploader': ('ownerInfo', 'name', {str}), + 'uploader_id': ('ownerInfo', 'id', {int}, {str_or_none}), + 'channel_follower_count': ('ownerInfo', 'fan', {int_or_none}), + 'view_count': ('ownerInfo', 'view', {int_or_none}), + 'like_count': ('movie', 'favCount', {int_or_none}), + 'comment_count': ('movie', 'commentCount', {int_or_none}), + 'timestamp': ('movie', 'published', {int_or_none}), + 'uploader_url': ('ownerInfo', 'id', {lambda x: x and f'https://mixch.tv/u/{x}'}, filter), + }), + 'live_status': 'not_live', + } diff --git a/yt_dlp/extractor/mojevideo.py b/yt_dlp/extractor/mojevideo.py new file mode 100644 index 000000000..145e30697 --- /dev/null +++ b/yt_dlp/extractor/mojevideo.py @@ -0,0 +1,121 @@ +from .common import InfoExtractor +from ..utils import js_to_json, remove_end, update_url_query + + +class MojevideoIE(InfoExtractor): + IE_DESC = 'mojevideo.sk' + _VALID_URL = r'https?://(?:www\.)?mojevideo\.sk/video/(?P\w+)/(?P[\w()]+?)\.html' + + _TESTS = [{ + 'url': 'https://www.mojevideo.sk/video/3d17c/chlapci_dobetonovali_sme_mame_hotovo.html', + 'md5': '384a4628bd2bbd261c5206cf77c38c17', + 'info_dict': { + 'id': '3d17c', + 'ext': 'mp4', + 'title': 'Chlapci dobetónovali sme, máme hotovo!', + 'display_id': 'chlapci_dobetonovali_sme_mame_hotovo', + 'description': 'md5:a0822126044050d304a9ef58c92ddb34', + 'thumbnail': 'https://fs5.mojevideo.sk/imgfb/250236.jpg', + 'duration': 21.0, + 'upload_date': '20230919', + 'timestamp': 1695129706, + 'like_count': int, + 'dislike_count': int, + 'view_count': int, + 'comment_count': int, + }, + }, { + # 720p + 'url': 'https://www.mojevideo.sk/video/14677/den_blbec.html', + 'md5': '517c3e111c53a67d10b429c1f344ba2f', + 'info_dict': { + 'id': '14677', + 'ext': 'mp4', + 'title': 'Deň blbec?', + 'display_id': 'den_blbec', + 'description': 'I maličkosť vám môže zmeniť celý deň. Nikdy nezahadzujte žuvačky na zem!', + 'thumbnail': 'https://fs5.mojevideo.sk/imgfb/83575.jpg', + 'duration': 100.0, + 'upload_date': '20120515', + 'timestamp': 1337076481, + 'like_count': int, + 'dislike_count': int, + 'view_count': int, + 'comment_count': int, + }, + }, { + # 1080p + 'url': 'https://www.mojevideo.sk/video/2feb2/band_maid_onset_(instrumental)_live_zepp_tokyo_(full_hd).html', + 'md5': '64599a23d3ac31cf2fe069e4353d8162', + 'info_dict': { + 'id': '2feb2', + 'ext': 'mp4', + 'title': 'BAND-MAID - onset (Instrumental) Live - Zepp Tokyo (Full HD)', + 'display_id': 'band_maid_onset_(instrumental)_live_zepp_tokyo_(full_hd)', + 'description': 'Výborná inštrumentálna skladba od skupiny BAND-MAID.', + 'thumbnail': 'https://fs5.mojevideo.sk/imgfb/196274.jpg', + 'duration': 240.0, + 'upload_date': '20190708', + 'timestamp': 1562576592, + 'like_count': int, + 'dislike_count': int, + 'view_count': int, + 'comment_count': int, + }, + }, { + # 720p + 'url': 'https://www.mojevideo.sk/video/358c8/dva_nissany_skyline_strielaju_v_londyne.html', + 'only_matching': True, + }, { + # 720p + 'url': 'https://www.mojevideo.sk/video/2455d/gopro_hero4_session_nova_sportova_vodotesna_kamera.html', + 'only_matching': True, + }, { + # 1080p + 'url': 'https://www.mojevideo.sk/video/352ee/amd_rx_6800_xt_vs_nvidia_rtx_3080_(test_v_9_hrach).html', + 'only_matching': True, + }, { + # 1080p + 'url': 'https://www.mojevideo.sk/video/2cbeb/trailer_z_avengers_infinity_war.html', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id, display_id = self._match_valid_url(url).groups() + webpage = self._download_webpage(url, video_id) + + video_id_dec = self._search_regex( + r'\bvId\s*=\s*(\d+)', webpage, 'video id', fatal=False) or str(int(video_id, 16)) + video_exp = self._search_regex(r'\bvEx\s*=\s*["\'](\d+)', webpage, 'video expiry') + video_hashes = self._search_json( + r'\bvHash\s*=', webpage, 'video hashes', video_id, + contains_pattern=r'\[(?s:.+)\]', transform_source=js_to_json) + + formats = [] + for video_hash, (suffix, quality, format_note) in zip(video_hashes, [ + ('', 1, 'normálna kvalita'), + ('_lq', 0, 'nízka kvalita'), + ('_hd', 2, 'HD-720p'), + ('_fhd', 3, 'FULL HD-1080p'), + ('_2k', 4, '2K-1440p'), + ]): + formats.append({ + 'format_id': f'mp4-{quality}', + 'quality': quality, + 'format_note': format_note, + 'url': update_url_query( + f'https://cache01.mojevideo.sk/securevideos69/{video_id_dec}{suffix}.mp4', { + 'md5': video_hash, + 'expires': video_exp, + }), + }) + + return { + 'id': video_id, + 'display_id': display_id, + 'formats': formats, + 'title': (self._og_search_title(webpage, default=None) + or remove_end(self._html_extract_title(webpage, 'title'), ' - Mojevideo')), + 'description': self._og_search_description(webpage), + **self._search_json_ld(webpage, video_id, default={}), + } diff --git a/yt_dlp/extractor/monstercat.py b/yt_dlp/extractor/monstercat.py index 930c13e27..f17b91f5a 100644 --- a/yt_dlp/extractor/monstercat.py +++ b/yt_dlp/extractor/monstercat.py @@ -4,15 +4,11 @@ from .common import InfoExtractor from ..utils import ( clean_html, extract_attributes, - get_element_by_class, - get_element_html_by_class, - get_element_text_and_html_by_tag, int_or_none, strip_or_none, - traverse_obj, - try_call, unified_strdate, ) +from ..utils.traversal import find_element, traverse_obj class MonstercatIE(InfoExtractor): @@ -26,19 +22,21 @@ class MonstercatIE(InfoExtractor): 'thumbnail': 'https://www.monstercat.com/release/742779548009/cover', 'release_date': '20230711', 'album': 'The Secret Language of Trees', - 'album_artist': 'BT', + 'album_artists': ['BT'], }, }] def _extract_tracks(self, table, album_meta): for td in re.findall(r'((?:(?!)[\w\W])+)', table): # regex by chatgpt due to lack of get_elements_by_tag - title = clean_html(try_call( - lambda: get_element_by_class('d-inline-flex flex-column', td).partition(' ]+data-audiopath[^>]+>)', playlist), 1): entry = traverse_obj(extract_attributes(track), { @@ -200,12 +201,12 @@ class NekoHackerIE(InfoExtractor): 'album': 'data-albumtitle', 'duration': ('data-tracktime', {parse_duration}), 'release_date': ('data-releasedate', {lambda x: re.match(r'\d{8}', x.replace('.', ''))}, 0), - 'thumbnail': ('data-albumart', {url_or_none}), }) entries.append({ **entry, + 'thumbnail': url_or_none(player_params.get('artwork')), 'track_number': track_number, - 'artist': 'Neko Hacker', + 'artists': ['Neko Hacker'], 'vcodec': 'none', 'acodec': 'mp3' if entry['ext'] == 'mp3' else None, }) diff --git a/yt_dlp/extractor/neteasemusic.py b/yt_dlp/extractor/neteasemusic.py index a759da214..900b8b2a3 100644 --- a/yt_dlp/extractor/neteasemusic.py +++ b/yt_dlp/extractor/neteasemusic.py @@ -36,10 +36,6 @@ class NetEaseMusicBaseIE(InfoExtractor): _API_BASE = 'http://music.163.com/api/' _GEO_BYPASS = False - @staticmethod - def _kilo_or_none(value): - return int_or_none(value, scale=1000) - def _create_eapi_cipher(self, api_path, query_body, cookies): request_text = json.dumps({**query_body, 'header': cookies}, separators=(',', ':')) @@ -101,7 +97,7 @@ class NetEaseMusicBaseIE(InfoExtractor): 'vcodec': 'none', **traverse_obj(song, { 'ext': ('type', {str}), - 'abr': ('br', {self._kilo_or_none}), + 'abr': ('br', {int_or_none(scale=1000)}), 'filesize': ('size', {int_or_none}), }), }) @@ -282,9 +278,9 @@ class NetEaseMusicIE(NetEaseMusicBaseIE): **lyric_data, **traverse_obj(info, { 'title': ('name', {str}), - 'timestamp': ('album', 'publishTime', {self._kilo_or_none}), + 'timestamp': ('album', 'publishTime', {int_or_none(scale=1000)}), 'thumbnail': ('album', 'picUrl', {url_or_none}), - 'duration': ('duration', {self._kilo_or_none}), + 'duration': ('duration', {int_or_none(scale=1000)}), 'album': ('album', 'name', {str}), 'average_rating': ('score', {int_or_none}), }), @@ -440,7 +436,7 @@ class NetEaseMusicListIE(NetEaseMusicBaseIE): 'tags': ('tags', ..., {str}), 'uploader': ('creator', 'nickname', {str}), 'uploader_id': ('creator', 'userId', {str_or_none}), - 'timestamp': ('updateTime', {self._kilo_or_none}), + 'timestamp': ('updateTime', {int_or_none(scale=1000)}), })) if traverse_obj(info, ('playlist', 'specialType')) == 10: metainfo['title'] = f'{metainfo.get("title")} {strftime_or_none(metainfo.get("timestamp"), "%Y-%m-%d")}' @@ -517,10 +513,10 @@ class NetEaseMusicMvIE(NetEaseMusicBaseIE): 'creators': traverse_obj(info, ('artists', ..., 'name')) or [info.get('artistName')], **traverse_obj(info, { 'title': ('name', {str}), - 'description': (('desc', 'briefDesc'), {str}, {lambda x: x or None}), + 'description': (('desc', 'briefDesc'), {str}, filter), 'upload_date': ('publishTime', {unified_strdate}), 'thumbnail': ('cover', {url_or_none}), - 'duration': ('duration', {self._kilo_or_none}), + 'duration': ('duration', {int_or_none(scale=1000)}), 'view_count': ('playCount', {int_or_none}), 'like_count': ('likeCount', {int_or_none}), 'comment_count': ('commentCount', {int_or_none}), @@ -588,7 +584,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE): 'description': ('description', {str}), 'creator': ('dj', 'brand', {str}), 'thumbnail': ('coverUrl', {url_or_none}), - 'timestamp': ('createTime', {self._kilo_or_none}), + 'timestamp': ('createTime', {int_or_none(scale=1000)}), }) if not self._yes_playlist( @@ -598,7 +594,7 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE): return { 'id': str(info['mainSong']['id']), 'formats': formats, - 'duration': traverse_obj(info, ('mainSong', 'duration', {self._kilo_or_none})), + 'duration': traverse_obj(info, ('mainSong', 'duration', {int_or_none(scale=1000)})), **metainfo, } diff --git a/yt_dlp/extractor/nexx.py b/yt_dlp/extractor/nexx.py index cd32892fa..ee1bc281c 100644 --- a/yt_dlp/extractor/nexx.py +++ b/yt_dlp/extractor/nexx.py @@ -371,7 +371,7 @@ class NexxIE(InfoExtractor): # not all videos work via arc, e.g. nexx:741:1269984 if not video: # Reverse engineered from JS code (see getDeviceID function) - device_id = f'{random.randint(1, 4)}:{int(time.time())}:{random.randint(1e4, 99999)}{random.randint(1, 9)}' + device_id = f'{random.randint(1, 4)}:{int(time.time())}:{random.randint(10000, 99999)}{random.randint(1, 9)}' result = self._call_api(domain_id, 'session/init', video_id, data={ 'nxp_devh': device_id, diff --git a/yt_dlp/extractor/nfl.py b/yt_dlp/extractor/nfl.py index c537c1c47..59213a44b 100644 --- a/yt_dlp/extractor/nfl.py +++ b/yt_dlp/extractor/nfl.py @@ -11,9 +11,12 @@ from ..utils import ( clean_html, determine_ext, get_element_by_class, - traverse_obj, + int_or_none, + make_archive_id, + url_or_none, urlencode_postdata, ) +from ..utils.traversal import traverse_obj class NFLBaseIE(InfoExtractor): @@ -75,22 +78,15 @@ class NFLBaseIE(InfoExtractor): 'osVersion': '10.0', }, separators=(',', ':')).encode()).decode(), 'networkType': 'other', - 'nflClaimGroupsToAdd': [], - 'nflClaimGroupsToRemove': [], + 'peacockUUID': 'undefined', } _ACCOUNT_INFO = {} - _API_KEY = None + _API_KEY = '3_Qa8TkWpIB8ESCBT8tY2TukbVKgO5F6BJVc7N1oComdwFzI7H2L9NOWdm11i_BY9f' _TOKEN = None _TOKEN_EXPIRY = 0 - def _get_account_info(self, url, slug): - if not self._API_KEY: - webpage = self._download_webpage(url, slug, fatal=False) or '' - self._API_KEY = self._search_regex( - r'window\.gigyaApiKey\s*=\s*["\'](\w+)["\'];', webpage, 'API key', - fatal=False) or '3_Qa8TkWpIB8ESCBT8tY2TukbVKgO5F6BJVc7N1oComdwFzI7H2L9NOWdm11i_BY9f' - + def _get_account_info(self): cookies = self._get_cookies('https://auth-id.nfl.com/') login_token = traverse_obj(cookies, ( (f'glt_{self._API_KEY}', lambda k, _: k.startswith('glt_')), {lambda x: x.value}), get_all=False) @@ -103,7 +99,7 @@ class NFLBaseIE(InfoExtractor): 'or else try using --cookies-from-browser instead', expected=True) account = self._download_json( - 'https://auth-id.nfl.com/accounts.getAccountInfo', slug, + 'https://auth-id.nfl.com/accounts.getAccountInfo', None, note='Downloading account info', data=urlencode_postdata({ 'include': 'profile,data', 'lang': 'en', @@ -111,7 +107,7 @@ class NFLBaseIE(InfoExtractor): 'sdk': 'js_latest', 'login_token': login_token, 'authMode': 'cookie', - 'pageURL': url, + 'pageURL': 'https://www.nfl.com/', 'sdkBuild': traverse_obj(cookies, ( 'gig_canary_ver', {lambda x: x.value.partition('-')[0]}), default='15170'), 'format': 'json', @@ -126,55 +122,78 @@ class NFLBaseIE(InfoExtractor): if len(self._ACCOUNT_INFO) != 3: raise ExtractorError('Failed to retrieve account info with provided cookies', expected=True) - def _get_auth_token(self, url, slug): + def _get_auth_token(self): if self._TOKEN and self._TOKEN_EXPIRY > int(time.time() + 30): return - if not self._ACCOUNT_INFO: - self._get_account_info(url, slug) - token = self._download_json( 'https://api.nfl.com/identity/v3/token%s' % ( '/refresh' if self._ACCOUNT_INFO.get('refreshToken') else ''), - slug, headers={'Content-Type': 'application/json'}, note='Downloading access token', + None, headers={'Content-Type': 'application/json'}, note='Downloading access token', data=json.dumps({**self._CLIENT_DATA, **self._ACCOUNT_INFO}, separators=(',', ':')).encode()) self._TOKEN = token['accessToken'] self._TOKEN_EXPIRY = token['expiresIn'] self._ACCOUNT_INFO['refreshToken'] = token['refreshToken'] + def _extract_video(self, mcp_id, is_live=False): + self._get_auth_token() + data = self._download_json( + f'https://api.nfl.com/play/v1/asset/{mcp_id}', mcp_id, headers={ + 'Authorization': f'Bearer {self._TOKEN}', + 'Accept': 'application/json', + 'Content-Type': 'application/json', + }, data=json.dumps({'init': True, 'live': is_live}, separators=(',', ':')).encode()) + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + data['accessUrl'], mcp_id, 'mp4', m3u8_id='hls') + + return { + 'id': mcp_id, + 'formats': formats, + 'subtitles': subtitles, + 'is_live': is_live, + '_old_archive_ids': [make_archive_id(AnvatoIE, mcp_id)], + **traverse_obj(data, ('metadata', { + 'title': ('event', ('def_title', 'friendlyName'), {str}, any), + 'description': ('event', 'def_description', {str}), + 'duration': ('event', 'duration', {int_or_none}), + 'thumbnails': ('thumbnails', ..., 'url', {'url': {url_or_none}}), + })), + } + def _parse_video_config(self, video_config, display_id): video_config = self._parse_json(video_config, display_id) + is_live = traverse_obj(video_config, ('live', {bool})) or False item = video_config['playlist'][0] - mcp_id = item.get('mcpID') - if mcp_id: - info = self.url_result(f'{self._ANVATO_PREFIX}{mcp_id}', AnvatoIE, mcp_id) + if mcp_id := item.get('mcpID'): + return self._extract_video(mcp_id, is_live=is_live) + + info = {'id': item.get('id') or item['entityId']} + + item_url = item['url'] + ext = determine_ext(item_url) + if ext == 'm3u8': + info['formats'] = self._extract_m3u8_formats(item_url, info['id'], 'mp4') else: - media_id = item.get('id') or item['entityId'] - title = item.get('title') - item_url = item['url'] - info = {'id': media_id} - ext = determine_ext(item_url) - if ext == 'm3u8': - info['formats'] = self._extract_m3u8_formats(item_url, media_id, 'mp4') - else: - info['url'] = item_url - if item.get('audio') is True: - info['vcodec'] = 'none' - is_live = video_config.get('live') is True - thumbnails = None - image_url = item.get(item.get('imageSrc')) or item.get(item.get('posterImage')) - if image_url: - thumbnails = [{ - 'url': image_url, - 'ext': determine_ext(image_url, 'jpg'), - }] - info.update({ - 'title': title, - 'is_live': is_live, - 'description': clean_html(item.get('description')), - 'thumbnails': thumbnails, - }) + info['url'] = item_url + if item.get('audio') is True: + info['vcodec'] = 'none' + + thumbnails = None + if image_url := traverse_obj(item, 'imageSrc', 'posterImage', expected_type=url_or_none): + thumbnails = [{ + 'url': image_url, + 'ext': determine_ext(image_url, 'jpg'), + }] + + info.update({ + **traverse_obj(item, { + 'title': ('title', {str}), + 'description': ('description', {clean_html}), + }), + 'is_live': is_live, + 'thumbnails': thumbnails, + }) return info @@ -188,24 +207,20 @@ class NFLIE(NFLBaseIE): 'ext': 'mp4', 'title': "Baker Mayfield's game-changing plays from 3-TD game Week 14", 'description': 'md5:85e05a3cc163f8c344340f220521136d', - 'upload_date': '20201215', - 'timestamp': 1608009755, - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'NFL', - 'tags': 'count:6', + 'thumbnail': r're:https?://.+\.jpg', 'duration': 157, - 'categories': 'count:3', + '_old_archive_ids': ['anvato 899441'], }, }, { 'url': 'https://www.chiefs.com/listen/patrick-mahomes-travis-kelce-react-to-win-over-dolphins-the-breakdown', - 'md5': '6886b32c24b463038c760ceb55a34566', + 'md5': '92a517f05bd3eb50fe50244bc621aec8', 'info_dict': { - 'id': 'd87e8790-3e14-11eb-8ceb-ff05c2867f99', + 'id': '8b7c3625-a461-4751-8db4-85f536f2bbd0', 'ext': 'mp3', 'title': 'Patrick Mahomes, Travis Kelce React to Win Over Dolphins | The Breakdown', 'description': 'md5:12ada8ee70e6762658c30e223e095075', + 'thumbnail': 'https://static.clubs.nfl.com/image/private/t_editorial_landscape_12_desktop/v1571153441/chiefs/rfljejccnyhhkpkfq855', }, - 'skip': 'HTTP Error 404: Not Found', }, { 'url': 'https://www.buffalobills.com/video/buffalo-bills-military-recognition-week-14', 'only_matching': True, @@ -236,13 +251,16 @@ class NFLArticleIE(NFLBaseIE): def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - entries = [] - for video_config in re.findall(self._VIDEO_CONFIG_REGEX, webpage): - entries.append(self._parse_video_config(video_config, display_id)) + + def entries(): + for video_config in re.findall(self._VIDEO_CONFIG_REGEX, webpage): + yield self._parse_video_config(video_config, display_id) + title = clean_html(get_element_by_class( 'nfl-c-article__title', webpage)) or self._html_search_meta( ['og:title', 'twitter:title'], webpage) - return self.playlist_result(entries, display_id, title) + + return self.playlist_result(entries(), display_id, title) class NFLPlusReplayIE(NFLBaseIE): @@ -307,6 +325,9 @@ class NFLPlusReplayIE(NFLBaseIE): 'all_22': 'All-22', } + def _real_initialize(self): + self._get_account_info() + def _real_extract(self, url): slug, video_id = self._match_valid_url(url).group('slug', 'id') requested_types = self._configuration_arg('type', ['all']) @@ -315,7 +336,7 @@ class NFLPlusReplayIE(NFLBaseIE): requested_types = traverse_obj(self._REPLAY_TYPES, (None, requested_types)) if not video_id: - self._get_auth_token(url, slug) + self._get_auth_token() headers = {'Authorization': f'Bearer {self._TOKEN}'} game_id = self._download_json( f'https://api.nfl.com/football/v2/games/externalId/slug/{slug}', slug, @@ -328,14 +349,13 @@ class NFLPlusReplayIE(NFLBaseIE): 'items', lambda _, v: v['subType'] == requested_types[0], 'mcpPlaybackId'), get_all=False) if video_id: - return self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id) + return self._extract_video(video_id) def entries(): for replay in traverse_obj( replays, ('items', lambda _, v: v['mcpPlaybackId'] and v['subType'] in requested_types), ): - video_id = replay['mcpPlaybackId'] - yield self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id) + yield self._extract_video(replay['mcpPlaybackId']) return self.playlist_result(entries(), slug) @@ -362,12 +382,15 @@ class NFLPlusEpisodeIE(NFLBaseIE): 'params': {'skip_download': 'm3u8'}, }] + def _real_initialize(self): + self._get_account_info() + def _real_extract(self, url): slug = self._match_id(url) - self._get_auth_token(url, slug) + self._get_auth_token() video_id = self._download_json( f'https://api.nfl.com/content/v1/videos/episodes/{slug}', slug, headers={ 'Authorization': f'Bearer {self._TOKEN}', })['mcpPlaybackId'] - return self.url_result(f'{self._ANVATO_PREFIX}{video_id}', AnvatoIE, video_id) + return self._extract_video(video_id) diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py index 9d7b010c5..29fc1da1e 100644 --- a/yt_dlp/extractor/niconico.py +++ b/yt_dlp/extractor/niconico.py @@ -40,7 +40,6 @@ class NiconicoIE(InfoExtractor): _TESTS = [{ 'url': 'http://www.nicovideo.jp/watch/sm22312215', - 'md5': 'd1a75c0823e2f629128c43e1212760f9', 'info_dict': { 'id': 'sm22312215', 'ext': 'mp4', @@ -56,8 +55,8 @@ class NiconicoIE(InfoExtractor): 'comment_count': int, 'genres': ['未設定'], 'tags': [], - 'expected_protocol': str, }, + 'params': {'skip_download': 'm3u8'}, }, { # File downloaded with and without credentials are different, so omit # the md5 field @@ -77,8 +76,8 @@ class NiconicoIE(InfoExtractor): 'view_count': int, 'genres': ['音楽・サウンド'], 'tags': ['Translation_Request', 'Kagamine_Rin', 'Rin_Original'], - 'expected_protocol': str, }, + 'params': {'skip_download': 'm3u8'}, }, { # 'video exists but is marked as "deleted" # md5 is unstable @@ -112,7 +111,6 @@ class NiconicoIE(InfoExtractor): }, { # video not available via `getflv`; "old" HTML5 video 'url': 'http://www.nicovideo.jp/watch/sm1151009', - 'md5': 'f95a3d259172667b293530cc2e41ebda', 'info_dict': { 'id': 'sm1151009', 'ext': 'mp4', @@ -128,11 +126,10 @@ class NiconicoIE(InfoExtractor): 'comment_count': int, 'genres': ['ゲーム'], 'tags': [], - 'expected_protocol': str, }, + 'params': {'skip_download': 'm3u8'}, }, { # "New" HTML5 video - # md5 is unstable 'url': 'http://www.nicovideo.jp/watch/sm31464864', 'info_dict': { 'id': 'sm31464864', @@ -149,12 +146,11 @@ class NiconicoIE(InfoExtractor): 'comment_count': int, 'genres': ['アニメ'], 'tags': [], - 'expected_protocol': str, }, + 'params': {'skip_download': 'm3u8'}, }, { # Video without owner 'url': 'http://www.nicovideo.jp/watch/sm18238488', - 'md5': 'd265680a1f92bdcbbd2a507fc9e78a9e', 'info_dict': { 'id': 'sm18238488', 'ext': 'mp4', @@ -168,8 +164,8 @@ class NiconicoIE(InfoExtractor): 'comment_count': int, 'genres': ['エンターテイメント'], 'tags': [], - 'expected_protocol': str, }, + 'params': {'skip_download': 'm3u8'}, }, { 'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg', 'only_matching': True, @@ -375,11 +371,11 @@ class NiconicoIE(InfoExtractor): 'acodec': 'aac', 'vcodec': 'h264', **traverse_obj(audio_quality, ('metadata', { - 'abr': ('bitrate', {functools.partial(float_or_none, scale=1000)}), + 'abr': ('bitrate', {float_or_none(scale=1000)}), 'asr': ('samplingRate', {int_or_none}), })), **traverse_obj(video_quality, ('metadata', { - 'vbr': ('bitrate', {functools.partial(float_or_none, scale=1000)}), + 'vbr': ('bitrate', {float_or_none(scale=1000)}), 'height': ('resolution', 'height', {int_or_none}), 'width': ('resolution', 'width', {int_or_none}), })), @@ -424,7 +420,7 @@ class NiconicoIE(InfoExtractor): 'x-request-with': 'https://www.nicovideo.jp', })['data']['contentUrl'] # Getting all audio formats results in duplicate video formats which we filter out later - dms_fmts = self._extract_m3u8_formats(dms_m3u8_url, video_id) + dms_fmts = self._extract_m3u8_formats(dms_m3u8_url, video_id, 'mp4') # m3u8 extraction does not provide audio bitrates, so extract from the API data and fix for audio_fmt in traverse_obj(dms_fmts, lambda _, v: v['vcodec'] == 'none'): @@ -432,11 +428,10 @@ class NiconicoIE(InfoExtractor): **audio_fmt, **traverse_obj(audios, (lambda _, v: audio_fmt['format_id'].startswith(v['id']), { 'format_id': ('id', {str}), - 'abr': ('bitRate', {functools.partial(float_or_none, scale=1000)}), + 'abr': ('bitRate', {float_or_none(scale=1000)}), 'asr': ('samplingRate', {int_or_none}), }), get_all=False), 'acodec': 'aac', - 'ext': 'm4a', } # Sort before removing dupes to keep the format dicts with the lowest tbr @@ -458,9 +453,11 @@ class NiconicoIE(InfoExtractor): if video_id.startswith('so'): video_id = self._match_id(handle.url) - api_data = self._parse_json(self._html_search_regex( - 'data-api-data="([^"]+)"', webpage, - 'API data', default='{}'), video_id) + api_data = traverse_obj( + self._parse_json(self._html_search_meta('server-response', webpage) or '', video_id), + ('data', 'response', {dict})) + if not api_data: + raise ExtractorError('Server response data not found') except ExtractorError as e: try: api_data = self._download_json( @@ -872,7 +869,7 @@ class NicovideoTagURLIE(NicovideoSearchBaseIE): class NiconicoUserIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/user/(?P\d+)/?(?:$|[#?])' + _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/user/(?P\d+)(?:/video)?/?(?:$|[#?])' _TEST = { 'url': 'https://www.nicovideo.jp/user/419948', 'info_dict': { @@ -880,7 +877,7 @@ class NiconicoUserIE(InfoExtractor): }, 'playlist_mincount': 101, } - _API_URL = 'https://nvapi.nicovideo.jp/v1/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s' + _API_URL = 'https://nvapi.nicovideo.jp/v2/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s' _PAGE_SIZE = 100 _API_HEADERS = { @@ -900,12 +897,13 @@ class NiconicoUserIE(InfoExtractor): total_count = int_or_none(json_parsed['data'].get('totalCount')) for entry in json_parsed['data']['items']: count += 1 - yield self.url_result('https://www.nicovideo.jp/watch/{}'.format(entry['id'])) + yield self.url_result( + f'https://www.nicovideo.jp/watch/{entry["essential"]["id"]}', ie=NiconicoIE) page_num += 1 def _real_extract(self, url): list_id = self._match_id(url) - return self.playlist_result(self._entries(list_id), list_id, ie=NiconicoIE.ie_key()) + return self.playlist_result(self._entries(list_id), list_id) class NiconicoLiveIE(InfoExtractor): diff --git a/yt_dlp/extractor/noodlemagazine.py b/yt_dlp/extractor/noodlemagazine.py index 6414f46ef..4a73e4779 100644 --- a/yt_dlp/extractor/noodlemagazine.py +++ b/yt_dlp/extractor/noodlemagazine.py @@ -43,14 +43,8 @@ class NoodleMagazineIE(InfoExtractor): def build_url(url_or_path): return urljoin('https://adult.noodlemagazine.com', url_or_path) - headers = {'Referer': url} - player_path = self._html_search_regex( - r']+\bid="iplayer"[^>]+\bsrc="([^"]+)"', webpage, 'player path') - player_iframe = self._download_webpage( - build_url(player_path), video_id, 'Downloading iframe page', headers=headers) - playlist_url = self._search_regex( - r'window\.playlistUrl\s*=\s*["\']([^"\']+)["\']', player_iframe, 'playlist url') - playlist_info = self._download_json(build_url(playlist_url), video_id, headers=headers) + playlist_info = self._search_json( + r'window\.playlist\s*=', webpage, video_id, 'playlist info') formats = [] for source in traverse_obj(playlist_info, ('sources', lambda _, v: v['file'])): diff --git a/yt_dlp/extractor/nubilesporn.py b/yt_dlp/extractor/nubilesporn.py index c2079d8b0..47c7be61d 100644 --- a/yt_dlp/extractor/nubilesporn.py +++ b/yt_dlp/extractor/nubilesporn.py @@ -10,10 +10,10 @@ from ..utils import ( get_element_html_by_class, get_elements_by_class, int_or_none, - try_call, unified_timestamp, urlencode_postdata, ) +from ..utils.traversal import find_element, find_elements, traverse_obj class NubilesPornIE(InfoExtractor): @@ -70,9 +70,8 @@ class NubilesPornIE(InfoExtractor): url, get_element_by_class('watch-page-video-wrapper', page), video_id)[0] channel_id, channel_name = self._search_regex( - r'/video/website/(?P\d+).+>(?P\w+).com', get_element_html_by_class('site-link', page), + r'/video/website/(?P\d+).+>(?P\w+).com', get_element_html_by_class('site-link', page) or '', 'channel', fatal=False, group=('id', 'name')) or (None, None) - channel_name = re.sub(r'([^A-Z]+)([A-Z]+)', r'\1 \2', channel_name) return { 'id': video_id, @@ -82,14 +81,14 @@ class NubilesPornIE(InfoExtractor): 'thumbnail': media_entries.get('thumbnail'), 'description': clean_html(get_element_html_by_class('content-pane-description', page)), 'timestamp': unified_timestamp(get_element_by_class('date', page)), - 'channel': channel_name, + 'channel': re.sub(r'([^A-Z]+)([A-Z]+)', r'\1 \2', channel_name) if channel_name else None, 'channel_id': channel_id, 'channel_url': format_field(channel_id, None, 'https://members.nubiles-porn.com/video/website/%s'), 'like_count': int_or_none(get_element_by_id('likecount', page)), 'average_rating': float_or_none(get_element_by_class('score', page)), 'age_limit': 18, - 'categories': try_call(lambda: list(map(clean_html, get_elements_by_class('btn', get_element_by_class('categories', page))))), - 'tags': try_call(lambda: list(map(clean_html, get_elements_by_class('btn', get_elements_by_class('tags', page)[1])))), + 'categories': traverse_obj(page, ({find_element(cls='categories')}, {find_elements(cls='btn')}, ..., {clean_html})), + 'tags': traverse_obj(page, ({find_elements(cls='tags')}, 1, {find_elements(cls='btn')}, ..., {clean_html})), 'cast': get_elements_by_class('content-pane-performer', page), 'availability': 'needs_auth', 'series': channel_name, diff --git a/yt_dlp/extractor/nytimes.py b/yt_dlp/extractor/nytimes.py index 5ec3cdd67..9ef57410a 100644 --- a/yt_dlp/extractor/nytimes.py +++ b/yt_dlp/extractor/nytimes.py @@ -235,7 +235,7 @@ class NYTimesArticleIE(NYTimesBaseIE): details = traverse_obj(block, { 'id': ('sourceId', {str}), 'uploader': ('bylines', ..., 'renderedRepresentation', {str}), - 'duration': (None, (('duration', {lambda x: float_or_none(x, scale=1000)}), ('length', {int_or_none}))), + 'duration': (None, (('duration', {float_or_none(scale=1000)}), ('length', {int_or_none}))), 'timestamp': ('firstPublished', {parse_iso8601}), 'series': ('podcastSeries', {str}), }, get_all=False) diff --git a/yt_dlp/extractor/nzonscreen.py b/yt_dlp/extractor/nzonscreen.py index 5fc516daf..755039804 100644 --- a/yt_dlp/extractor/nzonscreen.py +++ b/yt_dlp/extractor/nzonscreen.py @@ -10,7 +10,7 @@ from ..utils import ( class NZOnScreenIE(InfoExtractor): - _VALID_URL = r'^https?://www\.nzonscreen\.com/title/(?P[^/?#]+)' + _VALID_URL = r'https?://www\.nzonscreen\.com/title/(?P[^/?#]+)' _TESTS = [{ 'url': 'https://www.nzonscreen.com/title/shoop-shoop-diddy-wop-cumma-cumma-wang-dang-1982', 'info_dict': { diff --git a/yt_dlp/extractor/nzz.py b/yt_dlp/extractor/nzz.py index ac3b73156..047c4e1ac 100644 --- a/yt_dlp/extractor/nzz.py +++ b/yt_dlp/extractor/nzz.py @@ -1,9 +1,6 @@ import re from .common import InfoExtractor -from ..utils import ( - extract_attributes, -) class NZZIE(InfoExtractor): @@ -22,19 +19,14 @@ class NZZIE(InfoExtractor): 'playlist_count': 1, }] + def _entries(self, webpage, page_id): + for script in re.findall(r'(?s)]* data-hid="jw-video-jw[^>]+>(.+?)', webpage): + settings = self._search_json(r'var\s+settings\s*=[^{]*', script, 'settings', page_id, fatal=False) + if entry := self._parse_jwplayer_data(settings, page_id): + yield entry + def _real_extract(self, url): page_id = self._match_id(url) webpage = self._download_webpage(url, page_id) - entries = [] - for player_element in re.findall( - r'(<[^>]+class="kalturaPlayer[^"]*"[^>]*>)', webpage): - player_params = extract_attributes(player_element) - if player_params.get('data-type') not in ('kaltura_singleArticle',): - self.report_warning('Unsupported player type') - continue - entry_id = player_params['data-id'] - entries.append(self.url_result( - 'kaltura:1750922:' + entry_id, 'Kaltura', entry_id)) - - return self.playlist_result(entries, page_id) + return self.playlist_result(self._entries(webpage, page_id), page_id) diff --git a/yt_dlp/extractor/ondemandkorea.py b/yt_dlp/extractor/ondemandkorea.py index 591b4147e..1921f3fd8 100644 --- a/yt_dlp/extractor/ondemandkorea.py +++ b/yt_dlp/extractor/ondemandkorea.py @@ -115,7 +115,7 @@ class OnDemandKoreaIE(InfoExtractor): **traverse_obj(data, { 'thumbnail': ('episode', 'images', 'thumbnail', {url_or_none}), 'release_date': ('episode', 'release_date', {lambda x: x.replace('-', '')}, {unified_strdate}), - 'duration': ('duration', {functools.partial(float_or_none, scale=1000)}), + 'duration': ('duration', {float_or_none(scale=1000)}), 'age_limit': ('age_rating', 'name', {lambda x: x.replace('R', '')}, {parse_age_limit}), 'series': ('episode', {if_series(key='program')}, 'title'), 'series_id': ('episode', {if_series(key='program')}, 'id', {str_or_none}), diff --git a/yt_dlp/extractor/orf.py b/yt_dlp/extractor/orf.py index 9c37a54d6..12c4a2104 100644 --- a/yt_dlp/extractor/orf.py +++ b/yt_dlp/extractor/orf.py @@ -1,5 +1,4 @@ import base64 -import functools import re from .common import InfoExtractor @@ -192,7 +191,7 @@ class ORFPodcastIE(InfoExtractor): 'ext': ('enclosures', 0, 'type', {mimetype2ext}), 'title': 'title', 'description': ('description', {clean_html}), - 'duration': ('duration', {functools.partial(float_or_none, scale=1000)}), + 'duration': ('duration', {float_or_none(scale=1000)}), 'series': ('podcast', 'title'), })), } @@ -494,7 +493,7 @@ class ORFONIE(InfoExtractor): return traverse_obj(api_json, { 'id': ('id', {int}, {str_or_none}), 'age_limit': ('age_classification', {parse_age_limit}), - 'duration': ('exact_duration', {functools.partial(float_or_none, scale=1000)}), + 'duration': ('exact_duration', {float_or_none(scale=1000)}), 'title': (('title', 'headline'), {str}), 'description': (('description', 'teaser_text'), {str}), 'media_type': ('video_type', {str}), diff --git a/yt_dlp/extractor/parler.py b/yt_dlp/extractor/parler.py index 9be288a7d..e5bb3be4e 100644 --- a/yt_dlp/extractor/parler.py +++ b/yt_dlp/extractor/parler.py @@ -1,5 +1,3 @@ -import functools - from .common import InfoExtractor from .youtube import YoutubeIE from ..utils import ( @@ -83,7 +81,7 @@ class ParlerIE(InfoExtractor): 'timestamp': ('date_created', {unified_timestamp}), 'uploader': ('user', 'name', {strip_or_none}), 'uploader_id': ('user', 'username', {str}), - 'uploader_url': ('user', 'username', {functools.partial(urljoin, 'https://parler.com/')}), + 'uploader_url': ('user', 'username', {urljoin('https://parler.com/')}), 'view_count': ('views', {int_or_none}), 'comment_count': ('total_comments', {int_or_none}), 'repost_count': ('echos', {int_or_none}), diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py index 7d6e8439c..6bdeaf157 100644 --- a/yt_dlp/extractor/patreon.py +++ b/yt_dlp/extractor/patreon.py @@ -1,3 +1,4 @@ +import functools import itertools import urllib.parse @@ -15,20 +16,26 @@ from ..utils import ( parse_iso8601, smuggle_url, str_or_none, - traverse_obj, url_or_none, urljoin, ) +from ..utils.traversal import traverse_obj, value class PatreonBaseIE(InfoExtractor): - USER_AGENT = 'Patreon/7.6.28 (Android; Android 11; Scale/2.10)' + @functools.cached_property + def patreon_user_agent(self): + # Patreon mobile UA is needed to avoid triggering Cloudflare anti-bot protection. + # Newer UA yields higher res m3u8 formats for locked posts, but gives 401 if not logged-in + if self._get_cookies('https://www.patreon.com/').get('session_id'): + return 'Patreon/72.2.28 (Android; Android 14; Scale/2.10)' + return 'Patreon/7.6.28 (Android; Android 11; Scale/2.10)' def _call_api(self, ep, item_id, query=None, headers=None, fatal=True, note=None): if headers is None: headers = {} if 'User-Agent' not in headers: - headers['User-Agent'] = self.USER_AGENT + headers['User-Agent'] = self.patreon_user_agent if query: query.update({'json-api-version': 1.0}) @@ -48,6 +55,7 @@ class PatreonBaseIE(InfoExtractor): class PatreonIE(PatreonBaseIE): + IE_NAME = 'patreon' _VALID_URL = r'https?://(?:www\.)?patreon\.com/(?:creation\?hid=|posts/(?:[\w-]+-)?)(?P\d+)' _TESTS = [{ 'url': 'http://www.patreon.com/creation?hid=743933', @@ -111,6 +119,7 @@ class PatreonIE(PatreonBaseIE): 'comment_count': int, 'channel_is_verified': True, 'chapters': 'count:4', + 'timestamp': 1423689666, }, 'params': { 'noplaylist': True, @@ -221,6 +230,7 @@ class PatreonIE(PatreonBaseIE): 'thumbnail': r're:^https?://.+', }, 'params': {'skip_download': 'm3u8'}, + 'expected_warnings': ['Failed to parse XML: not well-formed'], }, { # multiple attachments/embeds 'url': 'https://www.patreon.com/posts/holy-wars-solos-100601977', @@ -242,6 +252,27 @@ class PatreonIE(PatreonBaseIE): 'thumbnail': r're:^https?://.+', }, 'skip': 'Patron-only content', + }, { + # Contains a comment reply in the 'included' section + 'url': 'https://www.patreon.com/posts/114721679', + 'info_dict': { + 'id': '114721679', + 'ext': 'mp4', + 'upload_date': '20241025', + 'uploader': 'Japanalysis', + 'like_count': int, + 'thumbnail': r're:^https?://.+', + 'comment_count': int, + 'title': 'Karasawa Part 2', + 'description': 'Part 2 of this video https://www.youtube.com/watch?v=Azms2-VTASk', + 'uploader_url': 'https://www.patreon.com/japanalysis', + 'uploader_id': '80504268', + 'channel_url': 'https://www.patreon.com/japanalysis', + 'channel_follower_count': int, + 'timestamp': 1729897015, + 'channel_id': '9346307', + }, + 'params': {'getcomments': True}, }] _RETURN_TYPE = 'video' @@ -326,8 +357,13 @@ class PatreonIE(PatreonBaseIE): if embed_url and (urlh := self._request_webpage( embed_url, video_id, 'Checking embed URL', headers=headers, fatal=False, errnote=False, expected_status=403)): + # Vimeo's Cloudflare anti-bot protection will return HTTP status 200 for 404, so we need + # to check for "Sorry, we couldn&rsquo;t find that page" in the meta description tag + meta_description = clean_html(self._html_search_meta( + 'description', self._webpage_read_content(urlh, embed_url, video_id, fatal=False), default=None)) # Password-protected vids.io embeds return 403 errors w/o --video-password or session cookie - if urlh.status != 403 or VidsIoIE.suitable(embed_url): + if ((urlh.status != 403 and meta_description != 'Sorry, we couldn’t find that page') + or VidsIoIE.suitable(embed_url)): entries.append(self.url_result(smuggle_url(embed_url, headers))) post_file = traverse_obj(attributes, ('post_file', {dict})) @@ -389,26 +425,24 @@ class PatreonIE(PatreonBaseIE): f'posts/{post_id}/comments', post_id, query=params, note=f'Downloading comments page {page}') cursor = None - for comment in traverse_obj(response, (('data', ('included', lambda _, v: v['type'] == 'comment')), ...)): + for comment in traverse_obj(response, (('data', 'included'), lambda _, v: v['type'] == 'comment' and v['id'])): count += 1 - comment_id = comment.get('id') - attributes = comment.get('attributes') or {} - if comment_id is None: - continue author_id = traverse_obj(comment, ('relationships', 'commenter', 'data', 'id')) - author_info = traverse_obj( - response, ('included', lambda _, v: v['id'] == author_id and v['type'] == 'user', 'attributes'), - get_all=False, expected_type=dict, default={}) yield { - 'id': comment_id, - 'text': attributes.get('body'), - 'timestamp': parse_iso8601(attributes.get('created')), - 'parent': traverse_obj(comment, ('relationships', 'parent', 'data', 'id'), default='root'), - 'author_is_uploader': attributes.get('is_by_creator'), + **traverse_obj(comment, { + 'id': ('id', {str_or_none}), + 'text': ('attributes', 'body', {str}), + 'timestamp': ('attributes', 'created', {parse_iso8601}), + 'parent': ('relationships', 'parent', 'data', ('id', {value('root')}), {str}, any), + 'author_is_uploader': ('attributes', 'is_by_creator', {bool}), + }), + **traverse_obj(response, ( + 'included', lambda _, v: v['id'] == author_id and v['type'] == 'user', 'attributes', { + 'author': ('full_name', {str}), + 'author_thumbnail': ('image_url', {url_or_none}), + }), get_all=False), 'author_id': author_id, - 'author': author_info.get('full_name'), - 'author_thumbnail': author_info.get('image_url'), } if count < traverse_obj(response, ('meta', 'count')): @@ -419,15 +453,19 @@ class PatreonIE(PatreonBaseIE): class PatreonCampaignIE(PatreonBaseIE): - - _VALID_URL = r'https?://(?:www\.)?patreon\.com/(?!rss)(?:(?:m/(?P\d+))|(?P[-\w]+))' + IE_NAME = 'patreon:campaign' + _VALID_URL = r'''(?x) + https?://(?:www\.)?patreon\.com/(?: + (?:m|api/campaigns)/(?P\d+)| + (?P(?!creation[?/]|posts/|rss[?/])[\w-]+) + )(?:/posts)?/?(?:$|[?#])''' _TESTS = [{ 'url': 'https://www.patreon.com/dissonancepod/', 'info_dict': { 'title': 'Cognitive Dissonance Podcast', 'channel_url': 'https://www.patreon.com/dissonancepod', 'id': '80642', - 'description': 'md5:eb2fa8b83da7ab887adeac34da6b7af7', + 'description': r're:(?s).*We produce a weekly news podcast focusing on stories that deal with skepticism and religion.*', 'channel_id': '80642', 'channel': 'Cognitive Dissonance Podcast', 'age_limit': 0, @@ -442,31 +480,46 @@ class PatreonCampaignIE(PatreonBaseIE): 'url': 'https://www.patreon.com/m/4767637/posts', 'info_dict': { 'title': 'Not Just Bikes', - 'channel_follower_count': int, 'id': '4767637', 'channel_id': '4767637', 'channel_url': 'https://www.patreon.com/notjustbikes', - 'description': 'md5:595c6e7dca76ae615b1d38c298a287a1', + 'description': r're:(?s).*Not Just Bikes started as a way to explain why we chose to live in the Netherlands.*', 'age_limit': 0, 'channel': 'Not Just Bikes', 'uploader_url': 'https://www.patreon.com/notjustbikes', - 'uploader': 'Not Just Bikes', + 'uploader': 'Jason', 'uploader_id': '37306634', 'thumbnail': r're:^https?://.*$', }, 'playlist_mincount': 71, + }, { + 'url': 'https://www.patreon.com/api/campaigns/4243769/posts', + 'info_dict': { + 'title': 'Second Thought', + 'channel_follower_count': int, + 'id': '4243769', + 'channel_id': '4243769', + 'channel_url': 'https://www.patreon.com/secondthought', + 'description': r're:(?s).*Second Thought is an educational YouTube channel.*', + 'age_limit': 0, + 'channel': 'Second Thought', + 'uploader_url': 'https://www.patreon.com/secondthought', + 'uploader': 'JT Chapman', + 'uploader_id': '32718287', + 'thumbnail': r're:^https?://.*$', + }, + 'playlist_mincount': 201, }, { 'url': 'https://www.patreon.com/dissonancepod/posts', 'only_matching': True, }, { 'url': 'https://www.patreon.com/m/5932659', 'only_matching': True, + }, { + 'url': 'https://www.patreon.com/api/campaigns/4243769', + 'only_matching': True, }] - @classmethod - def suitable(cls, url): - return False if PatreonIE.suitable(url) else super().suitable(url) - def _entries(self, campaign_id): cursor = None params = { @@ -493,7 +546,7 @@ class PatreonCampaignIE(PatreonBaseIE): campaign_id, vanity = self._match_valid_url(url).group('campaign_id', 'vanity') if campaign_id is None: - webpage = self._download_webpage(url, vanity, headers={'User-Agent': self.USER_AGENT}) + webpage = self._download_webpage(url, vanity, headers={'User-Agent': self.patreon_user_agent}) campaign_id = self._search_nextjs_data( webpage, vanity)['props']['pageProps']['bootstrapEnvelope']['pageBootstrap']['campaign']['data']['id'] diff --git a/yt_dlp/extractor/pialive.py b/yt_dlp/extractor/pialive.py new file mode 100644 index 000000000..7469135c1 --- /dev/null +++ b/yt_dlp/extractor/pialive.py @@ -0,0 +1,122 @@ +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + clean_html, + extract_attributes, + get_element_by_class, + get_element_html_by_class, + multipart_encode, + str_or_none, + unified_timestamp, + url_or_none, +) +from ..utils.traversal import traverse_obj + + +class PiaLiveIE(InfoExtractor): + _VALID_URL = r'https?://player\.pia-live\.jp/stream/(?P[\w-]+)' + _PLAYER_ROOT_URL = 'https://player.pia-live.jp/' + _PIA_LIVE_API_URL = 'https://api.pia-live.jp' + _API_KEY = 'kfds)FKFps-dms9e' + _TESTS = [{ + 'url': 'https://player.pia-live.jp/stream/4JagFBEIM14s_hK9aXHKf3k3F3bY5eoHFQxu68TC6krUDqGOwN4d61dCWQYOd6CTxl4hjya9dsfEZGsM4uGOUdax60lEI4twsXGXf7crmz8Gk__GhupTrWxA7RFRVt76', + 'info_dict': { + 'id': '88f3109a-f503-4d0f-a9f7-9f39ac745d84', + 'display_id': '2431867_001', + 'title': 'こながめでたい日2024の視聴ページ | PIA LIVE STREAM(ぴあライブストリーム)', + 'live_status': 'was_live', + 'comment_count': int, + }, + 'params': { + 'getcomments': True, + 'skip_download': True, + 'ignore_no_formats_error': True, + }, + 'skip': 'The video is no longer available', + }, { + 'url': 'https://player.pia-live.jp/stream/4JagFBEIM14s_hK9aXHKf3k3F3bY5eoHFQxu68TC6krJdu0GVBVbVy01IwpJ6J3qBEm3d9TCTt1d0eWpsZGj7DrOjVOmS7GAWGwyscMgiThopJvzgWC4H5b-7XQjAfRZ', + 'info_dict': { + 'id': '9ce8b8ba-f6d1-4d1f-83a0-18c3148ded93', + 'display_id': '2431867_002', + 'title': 'こながめでたい日2024の視聴ページ | PIA LIVE STREAM(ぴあライブストリーム)', + 'live_status': 'was_live', + 'comment_count': int, + }, + 'params': { + 'getcomments': True, + 'skip_download': True, + 'ignore_no_formats_error': True, + }, + 'skip': 'The video is no longer available', + }] + + def _extract_var(self, variable, html): + return self._search_regex( + rf'(?:var|const|let)\s+{variable}\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', + html, f'variable {variable}', group='value') + + def _real_extract(self, url): + video_key = self._match_id(url) + webpage = self._download_webpage(url, video_key) + + program_code = self._extract_var('programCode', webpage) + article_code = self._extract_var('articleCode', webpage) + title = self._html_extract_title(webpage) + + if get_element_html_by_class('play-end', webpage): + raise ExtractorError('The video is no longer available', expected=True, video_id=program_code) + + if start_info := clean_html(get_element_by_class('play-waiting__date', webpage)): + date, time = self._search_regex( + r'(?P\d{4}/\d{1,2}/\d{1,2})\([月火水木金土日]\)(?P