Merge branch 'yt-dlp:master' into pr/6498

This commit is contained in:
bashonly 2023-07-22 13:23:54 -05:00
commit 194bc49c55
No known key found for this signature in database
GPG key ID: 783F096F253D15B0
226 changed files with 6016 additions and 2405 deletions

View file

@ -18,7 +18,7 @@ body:
options: options:
- label: I'm reporting that yt-dlp is broken on a **supported** site - label: I'm reporting that yt-dlp is broken on a **supported** site
required: true required: true
- label: I've verified that I'm running yt-dlp version **2023.06.22** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) - label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
required: true required: true
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
required: true required: true
@ -64,7 +64,7 @@ body:
[debug] Command-line config: ['-vU', 'test:youtube'] [debug] Command-line config: ['-vU', 'test:youtube']
[debug] Portable config "yt-dlp.conf": ['-i'] [debug] Portable config "yt-dlp.conf": ['-i']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version 2023.06.22 [9d339c4] (win32_exe) [debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
[debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffmpeg -bsfs
[debug] Checking exe version: ffprobe -bsfs [debug] Checking exe version: ffprobe -bsfs
@ -72,8 +72,8 @@ body:
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
[debug] Proxy map: {} [debug] Proxy map: {}
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: 2023.06.22, Current version: 2023.06.22 Latest version: 2023.07.06, Current version: 2023.07.06
yt-dlp is up to date (2023.06.22) yt-dlp is up to date (2023.07.06)
<more lines> <more lines>
render: shell render: shell
validations: validations:

View file

@ -18,7 +18,7 @@ body:
options: options:
- label: I'm reporting a new site support request - label: I'm reporting a new site support request
required: true required: true
- label: I've verified that I'm running yt-dlp version **2023.06.22** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) - label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
required: true required: true
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
required: true required: true
@ -76,7 +76,7 @@ body:
[debug] Command-line config: ['-vU', 'test:youtube'] [debug] Command-line config: ['-vU', 'test:youtube']
[debug] Portable config "yt-dlp.conf": ['-i'] [debug] Portable config "yt-dlp.conf": ['-i']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version 2023.06.22 [9d339c4] (win32_exe) [debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
[debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffmpeg -bsfs
[debug] Checking exe version: ffprobe -bsfs [debug] Checking exe version: ffprobe -bsfs
@ -84,8 +84,8 @@ body:
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
[debug] Proxy map: {} [debug] Proxy map: {}
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: 2023.06.22, Current version: 2023.06.22 Latest version: 2023.07.06, Current version: 2023.07.06
yt-dlp is up to date (2023.06.22) yt-dlp is up to date (2023.07.06)
<more lines> <more lines>
render: shell render: shell
validations: validations:

View file

@ -18,7 +18,7 @@ body:
options: options:
- label: I'm requesting a site-specific feature - label: I'm requesting a site-specific feature
required: true required: true
- label: I've verified that I'm running yt-dlp version **2023.06.22** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) - label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
required: true required: true
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
required: true required: true
@ -72,7 +72,7 @@ body:
[debug] Command-line config: ['-vU', 'test:youtube'] [debug] Command-line config: ['-vU', 'test:youtube']
[debug] Portable config "yt-dlp.conf": ['-i'] [debug] Portable config "yt-dlp.conf": ['-i']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version 2023.06.22 [9d339c4] (win32_exe) [debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
[debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffmpeg -bsfs
[debug] Checking exe version: ffprobe -bsfs [debug] Checking exe version: ffprobe -bsfs
@ -80,8 +80,8 @@ body:
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
[debug] Proxy map: {} [debug] Proxy map: {}
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: 2023.06.22, Current version: 2023.06.22 Latest version: 2023.07.06, Current version: 2023.07.06
yt-dlp is up to date (2023.06.22) yt-dlp is up to date (2023.07.06)
<more lines> <more lines>
render: shell render: shell
validations: validations:

View file

@ -18,7 +18,7 @@ body:
options: options:
- label: I'm reporting a bug unrelated to a specific site - label: I'm reporting a bug unrelated to a specific site
required: true required: true
- label: I've verified that I'm running yt-dlp version **2023.06.22** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) - label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
required: true required: true
- label: I've checked that all provided URLs are playable in a browser with the same IP and same login details - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details
required: true required: true
@ -57,7 +57,7 @@ body:
[debug] Command-line config: ['-vU', 'test:youtube'] [debug] Command-line config: ['-vU', 'test:youtube']
[debug] Portable config "yt-dlp.conf": ['-i'] [debug] Portable config "yt-dlp.conf": ['-i']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version 2023.06.22 [9d339c4] (win32_exe) [debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
[debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffmpeg -bsfs
[debug] Checking exe version: ffprobe -bsfs [debug] Checking exe version: ffprobe -bsfs
@ -65,8 +65,8 @@ body:
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
[debug] Proxy map: {} [debug] Proxy map: {}
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: 2023.06.22, Current version: 2023.06.22 Latest version: 2023.07.06, Current version: 2023.07.06
yt-dlp is up to date (2023.06.22) yt-dlp is up to date (2023.07.06)
<more lines> <more lines>
render: shell render: shell
validations: validations:

View file

@ -20,7 +20,7 @@ body:
required: true required: true
- label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
required: true required: true
- label: I've verified that I'm running yt-dlp version **2023.06.22** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) - label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
required: true required: true
- label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates
required: true required: true
@ -53,7 +53,7 @@ body:
[debug] Command-line config: ['-vU', 'test:youtube'] [debug] Command-line config: ['-vU', 'test:youtube']
[debug] Portable config "yt-dlp.conf": ['-i'] [debug] Portable config "yt-dlp.conf": ['-i']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version 2023.06.22 [9d339c4] (win32_exe) [debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
[debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffmpeg -bsfs
[debug] Checking exe version: ffprobe -bsfs [debug] Checking exe version: ffprobe -bsfs
@ -61,7 +61,7 @@ body:
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
[debug] Proxy map: {} [debug] Proxy map: {}
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: 2023.06.22, Current version: 2023.06.22 Latest version: 2023.07.06, Current version: 2023.07.06
yt-dlp is up to date (2023.06.22) yt-dlp is up to date (2023.07.06)
<more lines> <more lines>
render: shell render: shell

View file

@ -26,7 +26,7 @@ body:
required: true required: true
- label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme)
required: true required: true
- label: I've verified that I'm running yt-dlp version **2023.06.22** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) - label: I've verified that I'm running yt-dlp version **2023.07.06** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit)
required: true required: true
- label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions **including closed ones**. DO NOT post duplicates - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions **including closed ones**. DO NOT post duplicates
required: true required: true
@ -59,7 +59,7 @@ body:
[debug] Command-line config: ['-vU', 'test:youtube'] [debug] Command-line config: ['-vU', 'test:youtube']
[debug] Portable config "yt-dlp.conf": ['-i'] [debug] Portable config "yt-dlp.conf": ['-i']
[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8
[debug] yt-dlp version 2023.06.22 [9d339c4] (win32_exe) [debug] yt-dlp version 2023.07.06 [9d339c4] (win32_exe)
[debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0
[debug] Checking exe version: ffmpeg -bsfs [debug] Checking exe version: ffmpeg -bsfs
[debug] Checking exe version: ffprobe -bsfs [debug] Checking exe version: ffprobe -bsfs
@ -67,7 +67,7 @@ body:
[debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3
[debug] Proxy map: {} [debug] Proxy map: {}
[debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest
Latest version: 2023.06.22, Current version: 2023.06.22 Latest version: 2023.07.06, Current version: 2023.07.06
yt-dlp is up to date (2023.06.22) yt-dlp is up to date (2023.07.06)
<more lines> <more lines>
render: shell render: shell

65
.github/workflows/codeql.yml vendored Normal file
View file

@ -0,0 +1,65 @@
name: "CodeQL"
on:
push:
branches: [ 'master', 'gh-pages', 'release' ]
pull_request:
# The branches below must be a subset of the branches above
branches: [ 'master' ]
schedule:
- cron: '59 11 * * 5'
jobs:
analyze:
name: Analyze
runs-on: ubuntu-latest
permissions:
actions: read
contents: read
security-events: write
strategy:
fail-fast: false
matrix:
language: [ 'python' ]
# CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
# Use only 'java' to analyze code written in Java, Kotlin or both
# Use only 'javascript' to analyze code written in JavaScript, TypeScript or both
# Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
steps:
- name: Checkout repository
uses: actions/checkout@v3
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v2
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
# By default, queries listed here will override any specified in a config file.
# Prefix the list here with "+" to use these queries and those in the config file.
# For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
# queries: security-extended,security-and-quality
# Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
uses: github/codeql-action/autobuild@v2
# Command-line programs to run using the OS shell.
# 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
# If the Autobuild fails above, remove it and uncomment the following three lines.
# modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
# - run: |
# echo "Run, Build Application using script"
# ./location_of_script_within_repo/buildscript.sh
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v2
with:
category: "/language:${{matrix.language}}"

View file

@ -460,3 +460,10 @@ berkanteber
OverlordQ OverlordQ
rexlambert22 rexlambert22
Ti4eeT4e Ti4eeT4e
AmanSal1
bbilly1
meliber
nnoboa
rdamas
RfadnjdExt
urectanc

View file

@ -4,11 +4,65 @@
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
--> -->
### 2023.07.06
#### Important changes
- Security: [[CVE-2023-35934](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-35934)] Fix [Cookie leak](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj)
- `--add-header Cookie:` is deprecated and auto-scoped to input URL domains
- Cookies are scoped when passed to external downloaders
- Add `cookies` field to info.json and deprecate `http_headers.Cookie`
#### Core changes
- [Allow extractors to mark formats as potentially DRM](https://github.com/yt-dlp/yt-dlp/commit/bc344cd456380999c1ee74554dfd432a38f32ec7) ([#7396](https://github.com/yt-dlp/yt-dlp/issues/7396)) by [pukkandan](https://github.com/pukkandan)
- [Bugfix for b4e0d75848e9447cee2cd3646ce54d4744a7ff56](https://github.com/yt-dlp/yt-dlp/commit/e59e20744eb32ce4b6ea0dece7c673be8376a710) by [pukkandan](https://github.com/pukkandan)
- [Change how `Cookie` headers are handled](https://github.com/yt-dlp/yt-dlp/commit/3121512228487c9c690d3d39bfd2579addf96e07) by [Grub4K](https://github.com/Grub4K)
- [Prevent `Cookie` leaks on HTTP redirect](https://github.com/yt-dlp/yt-dlp/commit/f8b4bcc0a791274223723488bfbfc23ea3276641) by [coletdjnz](https://github.com/coletdjnz)
- **formats**: [Fix best fallback for storyboards](https://github.com/yt-dlp/yt-dlp/commit/906c0bdcd8974340d619e99ccd613c163eb0d0c2) by [pukkandan](https://github.com/pukkandan)
- **outtmpl**: [Pad `playlist_index` etc even when with internal formatting](https://github.com/yt-dlp/yt-dlp/commit/47bcd437247152e0af5b3ebc5592db7bb66855c2) by [pukkandan](https://github.com/pukkandan)
- **utils**: clean_podcast_url: [Handle protocol in redirect URL](https://github.com/yt-dlp/yt-dlp/commit/91302ed349f34dc26cc1d661bb45a4b71f4417f7) by [pukkandan](https://github.com/pukkandan)
#### Extractor changes
- **abc**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/8f05fbae2a79ce0713077ccc68b354e63216bf20) ([#7434](https://github.com/yt-dlp/yt-dlp/issues/7434)) by [meliber](https://github.com/meliber)
- **AdultSwim**: [Extract subtitles from m3u8](https://github.com/yt-dlp/yt-dlp/commit/5e16cf92eb496b7c1541a6b1d727cb87542984db) ([#7421](https://github.com/yt-dlp/yt-dlp/issues/7421)) by [nnoboa](https://github.com/nnoboa)
- **crunchyroll**: music: [Fix `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/5b4b92769afcc398475e481bfa839f1158902fe9) ([#7439](https://github.com/yt-dlp/yt-dlp/issues/7439)) by [AmanSal1](https://github.com/AmanSal1), [rdamas](https://github.com/rdamas)
- **Douyin**: [Fix extraction from webpage](https://github.com/yt-dlp/yt-dlp/commit/a2be9781fbf4d7e4db245c277ca2ecc41cf3a7b2) by [bashonly](https://github.com/bashonly)
- **googledrive**: [Fix source format extraction](https://github.com/yt-dlp/yt-dlp/commit/3b7f5300c577fef40464d46d4e4037a69d51fe82) ([#7395](https://github.com/yt-dlp/yt-dlp/issues/7395)) by [RfadnjdExt](https://github.com/RfadnjdExt)
- **kick**: [Fix `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/ef8509c300ea50da86aea447eb214d3d6f6db6bb) by [bashonly](https://github.com/bashonly)
- **qdance**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/f0a1ff118145b6449982ba401f9a9f656ecd8062) ([#7420](https://github.com/yt-dlp/yt-dlp/issues/7420)) by [bashonly](https://github.com/bashonly)
- **sbs**: [Python 3.7 compat](https://github.com/yt-dlp/yt-dlp/commit/f393bbe724b1fc6c7f754a5da507e807b2b40ad2) by [pukkandan](https://github.com/pukkandan)
- **stacommu**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/af1fd12f675220df6793fc019dff320bc76e8080) ([#7432](https://github.com/yt-dlp/yt-dlp/issues/7432)) by [urectanc](https://github.com/urectanc)
- **twitter**
- [Fix unauthenticated extraction](https://github.com/yt-dlp/yt-dlp/commit/49296437a8e5fa91dacb5446e51ab588474c85d3) ([#7476](https://github.com/yt-dlp/yt-dlp/issues/7476)) by [bashonly](https://github.com/bashonly)
- spaces: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/1cffd621cb371f1563563cfb2fe37d137e8a7bee) ([#7512](https://github.com/yt-dlp/yt-dlp/issues/7512)) by [bashonly](https://github.com/bashonly)
- **vidlii**: [Handle relative URLs](https://github.com/yt-dlp/yt-dlp/commit/ad8902f616ad2541f9b9626738f1393fad89a64c) by [pukkandan](https://github.com/pukkandan)
- **vk**: VKPlay, VKPlayLive: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/8776349ef6b1f644584a92dfa00a05208a48edc4) ([#7358](https://github.com/yt-dlp/yt-dlp/issues/7358)) by [c-basalt](https://github.com/c-basalt)
- **youtube**
- [Add extractor-arg `formats`](https://github.com/yt-dlp/yt-dlp/commit/58786a10f212bd63f9ad1d0b4d9e4d31c3b385e2) by [pukkandan](https://github.com/pukkandan)
- [Avoid false DRM detection](https://github.com/yt-dlp/yt-dlp/commit/94ed638a437fc766699d440e978982e24ce6a30a) ([#7396](https://github.com/yt-dlp/yt-dlp/issues/7396)) by [pukkandan](https://github.com/pukkandan)
- [Fix comments' `is_favorited`](https://github.com/yt-dlp/yt-dlp/commit/89bed013741a776506f60380b7fd89d27d0710b4) ([#7390](https://github.com/yt-dlp/yt-dlp/issues/7390)) by [bbilly1](https://github.com/bbilly1)
- [Ignore incomplete data for comment threads by default](https://github.com/yt-dlp/yt-dlp/commit/4dc4d8473c085900edc841c87c20041233d25b1f) ([#7475](https://github.com/yt-dlp/yt-dlp/issues/7475)) by [coletdjnz](https://github.com/coletdjnz)
- [Process `post_live` over 2 hours](https://github.com/yt-dlp/yt-dlp/commit/d949c10c45bfc359bdacd52e6a180169b8128958) by [pukkandan](https://github.com/pukkandan)
- stories: [Remove](https://github.com/yt-dlp/yt-dlp/commit/90db9a3c00ca80492c6a58c542e4cbf4c2710866) ([#7459](https://github.com/yt-dlp/yt-dlp/issues/7459)) by [pukkandan](https://github.com/pukkandan)
- tab: [Support shorts-only playlists](https://github.com/yt-dlp/yt-dlp/commit/fcbc9ed760be6e3455bbadfaf277b4504b06f068) ([#7425](https://github.com/yt-dlp/yt-dlp/issues/7425)) by [coletdjnz](https://github.com/coletdjnz)
#### Downloader changes
- **aria2c**: [Add `--no-conf`](https://github.com/yt-dlp/yt-dlp/commit/8a8af356e3bba98a7f7d333aff0777d5d92130c8) by [pukkandan](https://github.com/pukkandan)
- **external**: [Scope cookies](https://github.com/yt-dlp/yt-dlp/commit/1ceb657bdd254ad961489e5060f2ccc7d556b729) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz)
- **http**: [Avoid infinite loop when no data is received](https://github.com/yt-dlp/yt-dlp/commit/662ef1e910b72e57957f06589925b2332ba52821) by [pukkandan](https://github.com/pukkandan)
#### Misc. changes
- [Add CodeQL workflow](https://github.com/yt-dlp/yt-dlp/commit/6355b5f1e1e8e7f4ef866d71d51e03baf0e82f17) ([#7497](https://github.com/yt-dlp/yt-dlp/issues/7497)) by [jorgectf](https://github.com/jorgectf)
- **cleanup**: Miscellaneous: [337734d](https://github.com/yt-dlp/yt-dlp/commit/337734d4a8a6500bc65434843db346b5cbd05e81) by [pukkandan](https://github.com/pukkandan)
- **docs**: [Minor fixes](https://github.com/yt-dlp/yt-dlp/commit/b532a3481046e1eabb6232ee8196fb696c356ff6) by [pukkandan](https://github.com/pukkandan)
- **make_changelog**: [Skip reverted commits](https://github.com/yt-dlp/yt-dlp/commit/fa44802809d189fca0f4782263d48d6533384503) by [pukkandan](https://github.com/pukkandan)
### 2023.06.22 ### 2023.06.22
#### Core changes #### Core changes
- [Fix bug in db3ad8a67661d7b234a6954d9c6a4a9b1749f5eb](https://github.com/yt-dlp/yt-dlp/commit/d7cd97e8d8d42b500fea9abb2aa4ac9b0f98b2ad) by [pukkandan](https://github.com/pukkandan) - [Fix bug in db3ad8a67661d7b234a6954d9c6a4a9b1749f5eb](https://github.com/yt-dlp/yt-dlp/commit/d7cd97e8d8d42b500fea9abb2aa4ac9b0f98b2ad) by [pukkandan](https://github.com/pukkandan)
- [Improve `--download-sections`](https://github.com/yt-dlp/yt-dlp/commit/b4e0d75848e9447cee2cd3646ce54d4744a7ff56) by [pukkandan](https://github.com/pukkandan) - [Improve `--download-sections`](https://github.com/yt-dlp/yt-dlp/commit/b4e0d75848e9447cee2cd3646ce54d4744a7ff56) by [pukkandan](https://github.com/pukkandan)
- Support negative time-ranges
- Add `*from-url` to obey time-ranges in URL
- [Indicate `filesize` approximated from `tbr` better](https://github.com/yt-dlp/yt-dlp/commit/0dff8e4d1e6e9fb938f4256ea9af7d81f42fd54f) by [pukkandan](https://github.com/pukkandan) - [Indicate `filesize` approximated from `tbr` better](https://github.com/yt-dlp/yt-dlp/commit/0dff8e4d1e6e9fb938f4256ea9af7d81f42fd54f) by [pukkandan](https://github.com/pukkandan)
#### Extractor changes #### Extractor changes
@ -19,7 +73,7 @@
- **nebula**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/3f756c8c4095b942cf49788eb0862ceaf57847f2) ([#7156](https://github.com/yt-dlp/yt-dlp/issues/7156)) by [Lamieur](https://github.com/Lamieur), [rohieb](https://github.com/rohieb) - **nebula**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/3f756c8c4095b942cf49788eb0862ceaf57847f2) ([#7156](https://github.com/yt-dlp/yt-dlp/issues/7156)) by [Lamieur](https://github.com/Lamieur), [rohieb](https://github.com/rohieb)
- **rheinmaintv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/98cb1eda7a4cf67c96078980dbd63e6c06ad7f7c) ([#7311](https://github.com/yt-dlp/yt-dlp/issues/7311)) by [barthelmannk](https://github.com/barthelmannk) - **rheinmaintv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/98cb1eda7a4cf67c96078980dbd63e6c06ad7f7c) ([#7311](https://github.com/yt-dlp/yt-dlp/issues/7311)) by [barthelmannk](https://github.com/barthelmannk)
- **youtube** - **youtube**
- [Add `ios` to default clients used](https://github.com/yt-dlp/yt-dlp/commit/1e75d97db21152acc764b30a688e516f04b8a142) - [Add `ios` to default clients used](https://github.com/yt-dlp/yt-dlp/commit/1e75d97db21152acc764b30a688e516f04b8a142) by [pukkandan](https://github.com/pukkandan)
- IOS is affected neither by 403 nor by nsig so helps mitigate them preemptively - IOS is affected neither by 403 nor by nsig so helps mitigate them preemptively
- IOS also has higher bit-rate 'premium' formats though they are not labeled as such - IOS also has higher bit-rate 'premium' formats though they are not labeled as such
- [Improve description parsing performance](https://github.com/yt-dlp/yt-dlp/commit/71dc18fa29263a1ff0472c23d81bfc8dd4422d48) ([#7315](https://github.com/yt-dlp/yt-dlp/issues/7315)) by [berkanteber](https://github.com/berkanteber), [pukkandan](https://github.com/pukkandan) - [Improve description parsing performance](https://github.com/yt-dlp/yt-dlp/commit/71dc18fa29263a1ff0472c23d81bfc8dd4422d48) ([#7315](https://github.com/yt-dlp/yt-dlp/issues/7315)) by [berkanteber](https://github.com/berkanteber), [pukkandan](https://github.com/pukkandan)
@ -27,7 +81,7 @@
- [Workaround 403 for android formats](https://github.com/yt-dlp/yt-dlp/commit/81ca451480051d7ce1a31c017e005358345a9149) by [pukkandan](https://github.com/pukkandan) - [Workaround 403 for android formats](https://github.com/yt-dlp/yt-dlp/commit/81ca451480051d7ce1a31c017e005358345a9149) by [pukkandan](https://github.com/pukkandan)
#### Misc. changes #### Misc. changes
- [Revert "Add automatic duplicate issue detection"](https://github.com/yt-dlp/yt-dlp/commit/a4486bfc1dc7057efca9dd3fe70d7fa25c56f700) - [Revert "Add automatic duplicate issue detection"](https://github.com/yt-dlp/yt-dlp/commit/a4486bfc1dc7057efca9dd3fe70d7fa25c56f700) by [pukkandan](https://github.com/pukkandan)
- **cleanup** - **cleanup**
- Miscellaneous - Miscellaneous
- [7f9c6a6](https://github.com/yt-dlp/yt-dlp/commit/7f9c6a63b16e145495479e9f666f5b9e2ee69e2f) by [bashonly](https://github.com/bashonly) - [7f9c6a6](https://github.com/yt-dlp/yt-dlp/commit/7f9c6a63b16e145495479e9f666f5b9e2ee69e2f) by [bashonly](https://github.com/bashonly)

View file

@ -74,7 +74,7 @@ offlinetest: codetest
$(PYTHON) -m pytest -k "not download" $(PYTHON) -m pytest -k "not download"
# XXX: This is hard to maintain # XXX: This is hard to maintain
CODE_FOLDERS = yt_dlp yt_dlp/downloader yt_dlp/extractor yt_dlp/postprocessor yt_dlp/compat yt_dlp/compat/urllib yt_dlp/utils yt_dlp/dependencies CODE_FOLDERS = yt_dlp yt_dlp/downloader yt_dlp/extractor yt_dlp/postprocessor yt_dlp/compat yt_dlp/compat/urllib yt_dlp/utils yt_dlp/dependencies yt_dlp/networking
yt-dlp: yt_dlp/*.py yt_dlp/*/*.py yt-dlp: yt_dlp/*.py yt_dlp/*/*.py
mkdir -p zip mkdir -p zip
for d in $(CODE_FOLDERS) ; do \ for d in $(CODE_FOLDERS) ; do \

View file

@ -12,7 +12,7 @@
[![License: Unlicense](https://img.shields.io/badge/-Unlicense-blue.svg?style=for-the-badge)](LICENSE "License") [![License: Unlicense](https://img.shields.io/badge/-Unlicense-blue.svg?style=for-the-badge)](LICENSE "License")
[![CI Status](https://img.shields.io/github/actions/workflow/status/yt-dlp/yt-dlp/core.yml?branch=master&label=Tests&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/actions "CI Status") [![CI Status](https://img.shields.io/github/actions/workflow/status/yt-dlp/yt-dlp/core.yml?branch=master&label=Tests&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/actions "CI Status")
[![Commits](https://img.shields.io/github/commit-activity/m/yt-dlp/yt-dlp?label=commits&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/commits "Commit History") [![Commits](https://img.shields.io/github/commit-activity/m/yt-dlp/yt-dlp?label=commits&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/commits "Commit History")
[![Last Commit](https://img.shields.io/github/last-commit/yt-dlp/yt-dlp/master?label=&style=for-the-badge&display_timestamp=committer)](https://github.com/yt-dlp/yt-dlp/commits "Commit History") [![Last Commit](https://img.shields.io/github/last-commit/yt-dlp/yt-dlp/master?label=&style=for-the-badge&display_timestamp=committer)](https://github.com/yt-dlp/yt-dlp/pulse/monthly "Last activity")
</div> </div>
<!-- MANPAGE: END EXCLUDED SECTION --> <!-- MANPAGE: END EXCLUDED SECTION -->
@ -76,7 +76,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t
# NEW FEATURES # NEW FEATURES
* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@42f2d4**](https://github.com/yt-dlp/yt-dlp/commit/42f2d4) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21)) * Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@42f2d4**](https://github.com/ytdl-org/youtube-dl/commit/07af47960f3bb262ead02490ce65c8c45c01741e) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))
* **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in YouTube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API * **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in YouTube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API
@ -1323,7 +1323,7 @@ The available fields are:
- `extractor` (string): Name of the extractor - `extractor` (string): Name of the extractor
- `extractor_key` (string): Key name of the extractor - `extractor_key` (string): Key name of the extractor
- `epoch` (numeric): Unix epoch of when the information extraction was completed - `epoch` (numeric): Unix epoch of when the information extraction was completed
- `autonumber` (numeric): Number that will be increased with each download, starting at `--autonumber-start` - `autonumber` (numeric): Number that will be increased with each download, starting at `--autonumber-start`, padded with leading zeros to 5 digits
- `video_autonumber` (numeric): Number that will be increased with each video - `video_autonumber` (numeric): Number that will be increased with each video
- `n_entries` (numeric): Total number of extracted items in the playlist - `n_entries` (numeric): Total number of extracted items in the playlist
- `playlist_id` (string): Identifier of the playlist that contains the video - `playlist_id` (string): Identifier of the playlist that contains the video
@ -1509,7 +1509,7 @@ Unless `--video-multistreams` is used, all formats with a video stream except th
## Filtering Formats ## Filtering Formats
You can also filter the video formats by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`). You can also filter the video formats by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"` since filters without a selector are interpreted as `best`).
The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `>=`, `=` (equals), `!=` (not equals): The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `>=`, `=` (equals), `!=` (not equals):
@ -1545,7 +1545,7 @@ Any string comparison may be prefixed with negation `!` in order to produce an o
**Note**: None of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the website. Any other field made available by the extractor can also be used for filtering. **Note**: None of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the website. Any other field made available by the extractor can also be used for filtering.
Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "[height<=?720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. You can also use the filters with `all` to download all formats that satisfy the filter, e.g. `-f "all[vcodec=none]"` selects all audio-only formats. Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "bv[height<=?720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. You can also use the filters with `all` to download all formats that satisfy the filter, e.g. `-f "all[vcodec=none]"` selects all audio-only formats.
Format selectors can also be grouped using parentheses; e.g. `-f "(mp4,webm)[height<480]"` will download the best pre-merged mp4 and webm formats with a height lower than 480. Format selectors can also be grouped using parentheses; e.g. `-f "(mp4,webm)[height<480]"` will download the best pre-merged mp4 and webm formats with a height lower than 480.
@ -1569,7 +1569,7 @@ The available fields are:
- `aext`: Audio Extension (`m4a` > `aac` > `mp3` > `ogg` > `opus` > `webm` > other). If `--prefer-free-formats` is used, the order changes to `ogg` > `opus` > `webm` > `mp3` > `m4a` > `aac` - `aext`: Audio Extension (`m4a` > `aac` > `mp3` > `ogg` > `opus` > `webm` > other). If `--prefer-free-formats` is used, the order changes to `ogg` > `opus` > `webm` > `mp3` > `m4a` > `aac`
- `ext`: Equivalent to `vext,aext` - `ext`: Equivalent to `vext,aext`
- `filesize`: Exact filesize, if known in advance - `filesize`: Exact filesize, if known in advance
- `fs_approx`: Approximate filesize calculated from the manifests - `fs_approx`: Approximate filesize
- `size`: Exact filesize if available, otherwise approximate filesize - `size`: Exact filesize if available, otherwise approximate filesize
- `height`: Height of video - `height`: Height of video
- `width`: Width of video - `width`: Width of video
@ -1580,7 +1580,7 @@ The available fields are:
- `tbr`: Total average bitrate in KBit/s - `tbr`: Total average bitrate in KBit/s
- `vbr`: Average video bitrate in KBit/s - `vbr`: Average video bitrate in KBit/s
- `abr`: Average audio bitrate in KBit/s - `abr`: Average audio bitrate in KBit/s
- `br`: Equivalent to using `tbr,vbr,abr` - `br`: Average bitrate in KBit/s, `tbr`/`vbr`/`abr`
- `asr`: Audio sample rate in Hz - `asr`: Audio sample rate in Hz
**Deprecation warning**: Many of these fields have (currently undocumented) aliases, that may be removed in a future version. It is recommended to use only the documented field names. **Deprecation warning**: Many of these fields have (currently undocumented) aliases, that may be removed in a future version. It is recommended to use only the documented field names.
@ -1805,8 +1805,7 @@ The following extractors use this feature:
* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side)
* `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all` * `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all`
* E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total * E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total
* `include_duplicate_formats`: Extract formats with identical content but different URLs or protocol. This is useful if some of the formats are unavailable or throttled. * `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8)
* `include_incomplete_formats`: Extract formats that cannot be downloaded completely (live dash and post-live m3u8)
* `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others * `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others
* `innertube_key`: Innertube API key to use for all API requests * `innertube_key`: Innertube API key to use for all API requests
@ -1856,7 +1855,7 @@ The following extractors use this feature:
#### twitter #### twitter
* `legacy_api`: Force usage of the legacy Twitter API instead of the GraphQL API for tweet extraction. Has no effect if login cookies are passed * `legacy_api`: Force usage of the legacy Twitter API instead of the GraphQL API for tweet extraction. Has no effect if login cookies are passed
#### wrestleuniverse #### stacommu, wrestleuniverse
* `device_id`: UUID value assigned by the website and used to enforce device limits for paid livestream content. Can be found in browser local storage * `device_id`: UUID value assigned by the website and used to enforce device limits for paid livestream content. Can be found in browser local storage
#### twitch #### twitch
@ -1954,7 +1953,7 @@ with YoutubeDL() as ydl:
ydl.download(URLS) ydl.download(URLS)
``` ```
Most likely, you'll want to use various options. For a list of options available, have a look at [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py#L184). Most likely, you'll want to use various options. For a list of options available, have a look at [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py#L183) or `help(yt_dlp.YoutubeDL)` in a Python shell. If you are already familiar with the CLI, you can use [`devscripts/cli_to_api.py`](https://github.com/yt-dlp/yt-dlp/blob/master/devscripts/cli_to_api.py) to translate any CLI switches to `YoutubeDL` params.
**Tip**: If you are porting your code from youtube-dl to yt-dlp, one important point to look out for is that we do not guarantee the return value of `YoutubeDL.extract_info` to be json serializable, or even be a dictionary. It will be dictionary-like, but if you want to ensure it is a serializable dictionary, pass it through `YoutubeDL.sanitize_info` as shown in the [example below](#extracting-information) **Tip**: If you are porting your code from youtube-dl to yt-dlp, one important point to look out for is that we do not guarantee the return value of `YoutubeDL.extract_info` to be json serializable, or even be a dictionary. It will be dictionary-like, but if you want to ensure it is a serializable dictionary, pass it through `YoutubeDL.sanitize_info` as shown in the [example below](#extracting-information)

View file

@ -1,12 +1,12 @@
[ [
{ {
"action": "add", "action": "add",
"when": "776d1c3f0c9b00399896dd2e40e78e9a43218109", "when": "29cb20bd563c02671b31dd840139e93dd37150a1",
"short": "[priority] **A new release type has been added!**\n * [`nightly`](https://github.com/yt-dlp/yt-dlp/releases/tag/nightly) builds will be made after each push, containing the latest fixes (but also possibly bugs).\n * When using `--update`/`-U`, a release binary will only update to its current channel (either `stable` or `nightly`).\n * The `--update-to` option has been added allowing the user more control over program upgrades (or downgrades).\n * `--update-to` can change the release channel (`stable`, `nightly`) and also upgrade or downgrade to specific tags.\n * **Usage**: `--update-to CHANNEL`, `--update-to TAG`, `--update-to CHANNEL@TAG`" "short": "[priority] **A new release type has been added!**\n * [`nightly`](https://github.com/yt-dlp/yt-dlp/releases/tag/nightly) builds will be made after each push, containing the latest fixes (but also possibly bugs).\n * When using `--update`/`-U`, a release binary will only update to its current channel (either `stable` or `nightly`).\n * The `--update-to` option has been added allowing the user more control over program upgrades (or downgrades).\n * `--update-to` can change the release channel (`stable`, `nightly`) and also upgrade or downgrade to specific tags.\n * **Usage**: `--update-to CHANNEL`, `--update-to TAG`, `--update-to CHANNEL@TAG`"
}, },
{ {
"action": "add", "action": "add",
"when": "776d1c3f0c9b00399896dd2e40e78e9a43218109", "when": "5038f6d713303e0967d002216e7a88652401c22a",
"short": "[priority] **YouTube throttling fixes!**" "short": "[priority] **YouTube throttling fixes!**"
}, },
{ {
@ -38,13 +38,15 @@
}, },
{ {
"action": "change", "action": "change",
"when": "7b37e8b23691613f331bd4ebc9d639dd6f93c972", "when": "b4e0d75848e9447cee2cd3646ce54d4744a7ff56",
"short": "Improve `--download-sections`\n - Support negative time-ranges\n - Add `*from-url` to obey time-ranges in URL" "short": "Improve `--download-sections`\n - Support negative time-ranges\n - Add `*from-url` to obey time-ranges in URL",
"authors": ["pukkandan"]
}, },
{ {
"action": "change", "action": "change",
"when": "1e75d97db21152acc764b30a688e516f04b8a142", "when": "1e75d97db21152acc764b30a688e516f04b8a142",
"short": "[extractor/youtube] Add `ios` to default clients used\n - IOS is affected neither by 403 nor by nsig so helps mitigate them preemptively\n - IOS also has higher bit-rate 'premium' formats though they are not labeled as such" "short": "[extractor/youtube] Add `ios` to default clients used\n - IOS is affected neither by 403 nor by nsig so helps mitigate them preemptively\n - IOS also has higher bit-rate 'premium' formats though they are not labeled as such",
"authors": ["pukkandan"]
}, },
{ {
"action": "change", "action": "change",
@ -55,6 +57,17 @@
{ {
"action": "change", "action": "change",
"when": "a4486bfc1dc7057efca9dd3fe70d7fa25c56f700", "when": "a4486bfc1dc7057efca9dd3fe70d7fa25c56f700",
"short": "[misc] Revert \"Add automatic duplicate issue detection\"" "short": "[misc] Revert \"Add automatic duplicate issue detection\"",
"authors": ["pukkandan"]
},
{
"action": "add",
"when": "1ceb657bdd254ad961489e5060f2ccc7d556b729",
"short": "[priority] Security: [[CVE-2023-35934](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-35934)] Fix [Cookie leak](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj)\n - `--add-header Cookie:` is deprecated and auto-scoped to input URL domains\n - Cookies are scoped when passed to external downloaders\n - Add `cookies` field to info.json and deprecate `http_headers.Cookie`"
},
{
"action": "change",
"when": "b03fa7834579a01cc5fba48c0e73488a16683d48",
"short": "[ie/twitter] Revert 92315c03774cfabb3a921884326beb4b981f786b"
} }
] ]

View file

@ -53,7 +53,9 @@ class CommitGroup(enum.Enum):
'cookies', 'cookies',
'core', 'core',
'dependencies', 'dependencies',
'formats',
'jsinterp', 'jsinterp',
'networking',
'outtmpl', 'outtmpl',
'plugins', 'plugins',
'update', 'update',
@ -68,9 +70,9 @@ class CommitGroup(enum.Enum):
'misc', 'misc',
'test', 'test',
}, },
cls.EXTRACTOR: {'extractor'}, cls.EXTRACTOR: {'extractor', 'ie'},
cls.DOWNLOADER: {'downloader'}, cls.DOWNLOADER: {'downloader', 'fd'},
cls.POSTPROCESSOR: {'postprocessor'}, cls.POSTPROCESSOR: {'postprocessor', 'pp'},
}.items() }.items()
for name in names for name in names
} }
@ -252,6 +254,7 @@ class CommitRange:
(?:\ \((?P<issues>\#\d+(?:,\ \#\d+)*)\))? (?:\ \((?P<issues>\#\d+(?:,\ \#\d+)*)\))?
''', re.VERBOSE | re.DOTALL) ''', re.VERBOSE | re.DOTALL)
EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE) EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE)
REVERT_RE = re.compile(r'(?:\[[^\]]+\]\s+)?(?i:Revert)\s+([\da-f]{40})')
FIXES_RE = re.compile(r'(?i:Fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Revert)\s+([\da-f]{40})') FIXES_RE = re.compile(r'(?i:Fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Revert)\s+([\da-f]{40})')
UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)') UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)')
@ -279,7 +282,7 @@ class CommitRange:
self.COMMAND, 'log', f'--format=%H%n%s%n%b%n{self.COMMIT_SEPARATOR}', self.COMMAND, 'log', f'--format=%H%n%s%n%b%n{self.COMMIT_SEPARATOR}',
f'{self._start}..{self._end}' if self._start else self._end).stdout f'{self._start}..{self._end}' if self._start else self._end).stdout
commits = {} commits, reverts = {}, {}
fixes = defaultdict(list) fixes = defaultdict(list)
lines = iter(result.splitlines(False)) lines = iter(result.splitlines(False))
for i, commit_hash in enumerate(lines): for i, commit_hash in enumerate(lines):
@ -300,6 +303,11 @@ class CommitRange:
logger.debug(f'Reached Release commit, breaking: {commit}') logger.debug(f'Reached Release commit, breaking: {commit}')
break break
revert_match = self.REVERT_RE.fullmatch(commit.short)
if revert_match:
reverts[revert_match.group(1)] = commit
continue
fix_match = self.FIXES_RE.search(commit.short) fix_match = self.FIXES_RE.search(commit.short)
if fix_match: if fix_match:
commitish = fix_match.group(1) commitish = fix_match.group(1)
@ -307,6 +315,13 @@ class CommitRange:
commits[commit.hash] = commit commits[commit.hash] = commit
for commitish, revert_commit in reverts.items():
reverted = commits.pop(commitish, None)
if reverted:
logger.debug(f'{commit} fully reverted {reverted}')
else:
commits[revert_commit.hash] = revert_commit
for commitish, fix_commits in fixes.items(): for commitish, fix_commits in fixes.items():
if commitish in commits: if commitish in commits:
hashes = ', '.join(commit.hash[:HASH_LENGTH] for commit in fix_commits) hashes = ', '.join(commit.hash[:HASH_LENGTH] for commit in fix_commits)

View file

@ -8,7 +8,6 @@ ignore = E402,E501,E731,E741,W503
max_line_length = 120 max_line_length = 120
per_file_ignores = per_file_ignores =
devscripts/lazy_load_template.py: F401 devscripts/lazy_load_template.py: F401
yt_dlp/utils/__init__.py: F401, F403
[autoflake] [autoflake]

View file

@ -65,7 +65,8 @@ def py2exe_params():
'excludes': ['Crypto', 'Cryptodome'], # py2exe cannot import Crypto 'excludes': ['Crypto', 'Cryptodome'], # py2exe cannot import Crypto
'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'], 'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'],
# Modules that are only imported dynamically must be added here # Modules that are only imported dynamically must be added here
'includes': ['yt_dlp.compat._legacy'], 'includes': ['yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated',
'yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated'],
}, },
'zipfile': None, 'zipfile': None,
} }

View file

@ -1136,6 +1136,7 @@
- **puhutv:serie** - **puhutv:serie**
- **Puls4** - **Puls4**
- **Pyvideo** - **Pyvideo**
- **QDance**: [*qdance*](## "netrc machine")
- **QingTing** - **QingTing**
- **qqmusic**: QQ音乐 - **qqmusic**: QQ音乐
- **qqmusic:album**: QQ音乐 - 专辑 - **qqmusic:album**: QQ音乐 - 专辑
@ -1363,6 +1364,8 @@
- **sr:mediathek**: Saarländischer Rundfunk - **sr:mediathek**: Saarländischer Rundfunk
- **SRGSSR** - **SRGSSR**
- **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites - **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites
- **StacommuLive**: [*stacommu*](## "netrc machine")
- **StacommuVOD**: [*stacommu*](## "netrc machine")
- **StagePlusVODConcert**: [*stageplus*](## "netrc machine") - **StagePlusVODConcert**: [*stageplus*](## "netrc machine")
- **stanfordoc**: Stanford Open ClassRoom - **stanfordoc**: Stanford Open ClassRoom
- **StarTrek** - **StarTrek**
@ -1647,6 +1650,8 @@
- **vk**: [*vk*](## "netrc machine") VK - **vk**: [*vk*](## "netrc machine") VK
- **vk:uservideos**: [*vk*](## "netrc machine") VK - User's Videos - **vk:uservideos**: [*vk*](## "netrc machine") VK - User's Videos
- **vk:wallpost**: [*vk*](## "netrc machine") - **vk:wallpost**: [*vk*](## "netrc machine")
- **VKPlay**
- **VKPlayLive**
- **vm.tiktok** - **vm.tiktok**
- **Vocaroo** - **Vocaroo**
- **Vodlocker** - **Vodlocker**
@ -1800,7 +1805,6 @@
- **youtube:search:date**: YouTube search, newest videos first; "ytsearchdate:" prefix - **youtube:search:date**: YouTube search, newest videos first; "ytsearchdate:" prefix
- **youtube:search_url**: YouTube search URLs with sorting and filter support - **youtube:search_url**: YouTube search URLs with sorting and filter support
- **youtube:shorts:pivot:audio**: YouTube Shorts audio pivot (Shorts using audio of a given video) - **youtube:shorts:pivot:audio**: YouTube Shorts audio pivot (Shorts using audio of a given video)
- **youtube:stories**: YouTube channel stories; "ytstories:" prefix
- **youtube:subscriptions**: YouTube subscriptions feed; ":ytsubs" keyword (requires cookies) - **youtube:subscriptions**: YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)
- **youtube:tab**: YouTube Tabs - **youtube:tab**: YouTube Tabs
- **youtube:user**: YouTube user videos; "ytuser:" prefix - **youtube:user**: YouTube user videos; "ytuser:" prefix

View file

@ -11,7 +11,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import copy import copy
import json import json
from test.helper import FakeYDL, assertRegexpMatches from test.helper import FakeYDL, assertRegexpMatches, try_rm
from yt_dlp import YoutubeDL from yt_dlp import YoutubeDL
from yt_dlp.compat import compat_os_name from yt_dlp.compat import compat_os_name
from yt_dlp.extractor import YoutubeIE from yt_dlp.extractor import YoutubeIE
@ -24,6 +24,7 @@ from yt_dlp.utils import (
int_or_none, int_or_none,
match_filter_func, match_filter_func,
) )
from yt_dlp.utils.traversal import traverse_obj
TEST_URL = 'http://localhost/sample.mp4' TEST_URL = 'http://localhost/sample.mp4'
@ -684,7 +685,8 @@ class TestYoutubeDL(unittest.TestCase):
test('%(id)s.%(ext)s', '1234.mp4') test('%(id)s.%(ext)s', '1234.mp4')
test('%(duration_string)s', ('27:46:40', '27-46-40')) test('%(duration_string)s', ('27:46:40', '27-46-40'))
test('%(resolution)s', '1080p') test('%(resolution)s', '1080p')
test('%(playlist_index)s', '001') test('%(playlist_index|)s', '001')
test('%(playlist_index&{}!)s', '1!')
test('%(playlist_autonumber)s', '02') test('%(playlist_autonumber)s', '02')
test('%(autonumber)s', '00001') test('%(autonumber)s', '00001')
test('%(autonumber+2)03d', '005', autonumber_start=3) test('%(autonumber+2)03d', '005', autonumber_start=3)
@ -1213,6 +1215,129 @@ class TestYoutubeDL(unittest.TestCase):
self.assertEqual(downloaded['extractor'], 'Video') self.assertEqual(downloaded['extractor'], 'Video')
self.assertEqual(downloaded['extractor_key'], 'Video') self.assertEqual(downloaded['extractor_key'], 'Video')
def test_header_cookies(self):
from http.cookiejar import Cookie
ydl = FakeYDL()
ydl.report_warning = lambda *_, **__: None
def cookie(name, value, version=None, domain='', path='', secure=False, expires=None):
return Cookie(
version or 0, name, value, None, False,
domain, bool(domain), bool(domain), path, bool(path),
secure, expires, False, None, None, rest={})
_test_url = 'https://yt.dlp/test'
def test(encoded_cookies, cookies, *, headers=False, round_trip=None, error_re=None):
def _test():
ydl.cookiejar.clear()
ydl._load_cookies(encoded_cookies, autoscope=headers)
if headers:
ydl._apply_header_cookies(_test_url)
data = {'url': _test_url}
ydl._calc_headers(data)
self.assertCountEqual(
map(vars, ydl.cookiejar), map(vars, cookies),
'Extracted cookiejar.Cookie is not the same')
if not headers:
self.assertEqual(
data.get('cookies'), round_trip or encoded_cookies,
'Cookie is not the same as round trip')
ydl.__dict__['_YoutubeDL__header_cookies'] = []
with self.subTest(msg=encoded_cookies):
if not error_re:
_test()
return
with self.assertRaisesRegex(Exception, error_re):
_test()
test('test=value; Domain=.yt.dlp', [cookie('test', 'value', domain='.yt.dlp')])
test('test=value', [cookie('test', 'value')], error_re=r'Unscoped cookies are not allowed')
test('cookie1=value1; Domain=.yt.dlp; Path=/test; cookie2=value2; Domain=.yt.dlp; Path=/', [
cookie('cookie1', 'value1', domain='.yt.dlp', path='/test'),
cookie('cookie2', 'value2', domain='.yt.dlp', path='/')])
test('test=value; Domain=.yt.dlp; Path=/test; Secure; Expires=9999999999', [
cookie('test', 'value', domain='.yt.dlp', path='/test', secure=True, expires=9999999999)])
test('test="value; "; path=/test; domain=.yt.dlp', [
cookie('test', 'value; ', domain='.yt.dlp', path='/test')],
round_trip='test="value\\073 "; Domain=.yt.dlp; Path=/test')
test('name=; Domain=.yt.dlp', [cookie('name', '', domain='.yt.dlp')],
round_trip='name=""; Domain=.yt.dlp')
test('test=value', [cookie('test', 'value', domain='.yt.dlp')], headers=True)
test('cookie1=value; Domain=.yt.dlp; cookie2=value', [], headers=True, error_re=r'Invalid syntax')
ydl.deprecated_feature = ydl.report_error
test('test=value', [], headers=True, error_re=r'Passing cookies as a header is a potential security risk')
def test_infojson_cookies(self):
TEST_FILE = 'test_infojson_cookies.info.json'
TEST_URL = 'https://example.com/example.mp4'
COOKIES = 'a=b; Domain=.example.com; c=d; Domain=.example.com'
COOKIE_HEADER = {'Cookie': 'a=b; c=d'}
ydl = FakeYDL()
ydl.process_info = lambda x: ydl._write_info_json('test', x, TEST_FILE)
def make_info(info_header_cookies=False, fmts_header_cookies=False, cookies_field=False):
fmt = {'url': TEST_URL}
if fmts_header_cookies:
fmt['http_headers'] = COOKIE_HEADER
if cookies_field:
fmt['cookies'] = COOKIES
return _make_result([fmt], http_headers=COOKIE_HEADER if info_header_cookies else None)
def test(initial_info, note):
result = {}
result['processed'] = ydl.process_ie_result(initial_info)
self.assertTrue(ydl.cookiejar.get_cookies_for_url(TEST_URL),
msg=f'No cookies set in cookiejar after initial process when {note}')
ydl.cookiejar.clear()
with open(TEST_FILE) as infojson:
result['loaded'] = ydl.sanitize_info(json.load(infojson), True)
result['final'] = ydl.process_ie_result(result['loaded'].copy(), download=False)
self.assertTrue(ydl.cookiejar.get_cookies_for_url(TEST_URL),
msg=f'No cookies set in cookiejar after final process when {note}')
ydl.cookiejar.clear()
for key in ('processed', 'loaded', 'final'):
info = result[key]
self.assertIsNone(
traverse_obj(info, ((None, ('formats', 0)), 'http_headers', 'Cookie'), casesense=False, get_all=False),
msg=f'Cookie header not removed in {key} result when {note}')
self.assertEqual(
traverse_obj(info, ((None, ('formats', 0)), 'cookies'), get_all=False), COOKIES,
msg=f'No cookies field found in {key} result when {note}')
test({'url': TEST_URL, 'http_headers': COOKIE_HEADER, 'id': '1', 'title': 'x'}, 'no formats field')
test(make_info(info_header_cookies=True), 'info_dict header cokies')
test(make_info(fmts_header_cookies=True), 'format header cookies')
test(make_info(info_header_cookies=True, fmts_header_cookies=True), 'info_dict and format header cookies')
test(make_info(info_header_cookies=True, fmts_header_cookies=True, cookies_field=True), 'all cookies fields')
test(make_info(cookies_field=True), 'cookies format field')
test({'url': TEST_URL, 'cookies': COOKIES, 'id': '1', 'title': 'x'}, 'info_dict cookies field only')
try_rm(TEST_FILE)
def test_add_headers_cookie(self):
def check_for_cookie_header(result):
return traverse_obj(result, ((None, ('formats', 0)), 'http_headers', 'Cookie'), casesense=False, get_all=False)
ydl = FakeYDL({'http_headers': {'Cookie': 'a=b'}})
ydl._apply_header_cookies(_make_result([])['webpage_url']) # Scope to input webpage URL: .example.com
fmt = {'url': 'https://example.com/video.mp4'}
result = ydl.process_ie_result(_make_result([fmt]), download=False)
self.assertIsNone(check_for_cookie_header(result), msg='http_headers cookies in result info_dict')
self.assertEqual(result.get('cookies'), 'a=b; Domain=.example.com', msg='No cookies were set in cookies field')
self.assertIn('a=b', ydl.cookiejar.get_cookie_header(fmt['url']), msg='No cookies were set in cookiejar')
fmt = {'url': 'https://wrong.com/video.mp4'}
result = ydl.process_ie_result(_make_result([fmt]), download=False)
self.assertIsNone(check_for_cookie_header(result), msg='http_headers cookies for wrong domain')
self.assertFalse(result.get('cookies'), msg='Cookies set in cookies field for wrong domain')
self.assertFalse(ydl.cookiejar.get_cookie_header(fmt['url']), msg='Cookies set in cookiejar for wrong domain')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View file

@ -17,10 +17,10 @@ from yt_dlp.cookies import YoutubeDLCookieJar
class TestYoutubeDLCookieJar(unittest.TestCase): class TestYoutubeDLCookieJar(unittest.TestCase):
def test_keep_session_cookies(self): def test_keep_session_cookies(self):
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt') cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt')
cookiejar.load(ignore_discard=True, ignore_expires=True) cookiejar.load()
tf = tempfile.NamedTemporaryFile(delete=False) tf = tempfile.NamedTemporaryFile(delete=False)
try: try:
cookiejar.save(filename=tf.name, ignore_discard=True, ignore_expires=True) cookiejar.save(filename=tf.name)
temp = tf.read().decode() temp = tf.read().decode()
self.assertTrue(re.search( self.assertTrue(re.search(
r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpiresEmpty\s+YoutubeDLExpiresEmptyValue', temp)) r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpiresEmpty\s+YoutubeDLExpiresEmptyValue', temp))
@ -32,7 +32,7 @@ class TestYoutubeDLCookieJar(unittest.TestCase):
def test_strip_httponly_prefix(self): def test_strip_httponly_prefix(self):
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt') cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt')
cookiejar.load(ignore_discard=True, ignore_expires=True) cookiejar.load()
def assert_cookie_has_value(key): def assert_cookie_has_value(key):
self.assertEqual(cookiejar._cookies['www.foobar.foobar']['/'][key].value, key + '_VALUE') self.assertEqual(cookiejar._cookies['www.foobar.foobar']['/'][key].value, key + '_VALUE')
@ -42,17 +42,25 @@ class TestYoutubeDLCookieJar(unittest.TestCase):
def test_malformed_cookies(self): def test_malformed_cookies(self):
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/malformed_cookies.txt') cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/malformed_cookies.txt')
cookiejar.load(ignore_discard=True, ignore_expires=True) cookiejar.load()
# Cookies should be empty since all malformed cookie file entries # Cookies should be empty since all malformed cookie file entries
# will be ignored # will be ignored
self.assertFalse(cookiejar._cookies) self.assertFalse(cookiejar._cookies)
def test_get_cookie_header(self): def test_get_cookie_header(self):
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt') cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt')
cookiejar.load(ignore_discard=True, ignore_expires=True) cookiejar.load()
header = cookiejar.get_cookie_header('https://www.foobar.foobar') header = cookiejar.get_cookie_header('https://www.foobar.foobar')
self.assertIn('HTTPONLY_COOKIE', header) self.assertIn('HTTPONLY_COOKIE', header)
def test_get_cookies_for_url(self):
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt')
cookiejar.load()
cookies = cookiejar.get_cookies_for_url('https://www.foobar.foobar/')
self.assertEqual(len(cookies), 2)
cookies = cookiejar.get_cookies_for_url('https://foobar.foobar/')
self.assertFalse(cookies)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View file

@ -9,15 +9,16 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import struct import struct
import urllib.parse
from yt_dlp import compat from yt_dlp import compat
from yt_dlp.compat import urllib # isort: split
from yt_dlp.compat import ( from yt_dlp.compat import (
compat_etree_fromstring, compat_etree_fromstring,
compat_expanduser, compat_expanduser,
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
compat_urllib_parse_urlencode, compat_urllib_parse_urlencode,
) )
from yt_dlp.compat.urllib.request import getproxies
class TestCompat(unittest.TestCase): class TestCompat(unittest.TestCase):
@ -28,8 +29,7 @@ class TestCompat(unittest.TestCase):
with self.assertWarns(DeprecationWarning): with self.assertWarns(DeprecationWarning):
compat.WINDOWS_VT_MODE compat.WINDOWS_VT_MODE
# TODO: Test submodule self.assertEqual(urllib.request.getproxies, getproxies)
# compat.asyncio.events # Must not raise error
with self.assertWarns(DeprecationWarning): with self.assertWarns(DeprecationWarning):
compat.compat_pycrypto_AES # Must not raise error compat.compat_pycrypto_AES # Must not raise error

View file

@ -10,10 +10,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import collections import collections
import hashlib import hashlib
import http.client
import json import json
import socket
import urllib.error
from test.helper import ( from test.helper import (
assertGreaterEqual, assertGreaterEqual,
@ -29,6 +26,7 @@ from test.helper import (
import yt_dlp.YoutubeDL # isort: split import yt_dlp.YoutubeDL # isort: split
from yt_dlp.extractor import get_info_extractor from yt_dlp.extractor import get_info_extractor
from yt_dlp.networking.exceptions import HTTPError, TransportError
from yt_dlp.utils import ( from yt_dlp.utils import (
DownloadError, DownloadError,
ExtractorError, ExtractorError,
@ -162,8 +160,7 @@ def generator(test_case, tname):
force_generic_extractor=params.get('force_generic_extractor', False)) force_generic_extractor=params.get('force_generic_extractor', False))
except (DownloadError, ExtractorError) as err: except (DownloadError, ExtractorError) as err:
# Check if the exception is not a network related one # Check if the exception is not a network related one
if (err.exc_info[0] not in (urllib.error.URLError, socket.timeout, UnavailableVideoError, http.client.BadStatusLine) if not isinstance(err.exc_info[1], (TransportError, UnavailableVideoError)) or (isinstance(err.exc_info[1], HTTPError) and err.exc_info[1].status == 503):
or (err.exc_info[0] == urllib.error.HTTPError and err.exc_info[1].code == 503)):
err.msg = f'{getattr(err, "msg", err)} ({tname})' err.msg = f'{getattr(err, "msg", err)} ({tname})'
raise raise
@ -249,7 +246,7 @@ def generator(test_case, tname):
# extractor returns full results even with extract_flat # extractor returns full results even with extract_flat
res_tcs = [{'info_dict': e} for e in res_dict['entries']] res_tcs = [{'info_dict': e} for e in res_dict['entries']]
try_rm_tcs_files(res_tcs) try_rm_tcs_files(res_tcs)
ydl.close()
return test_template return test_template

View file

@ -0,0 +1,139 @@
#!/usr/bin/env python3
# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import http.cookiejar
from test.helper import FakeYDL
from yt_dlp.downloader.external import (
Aria2cFD,
AxelFD,
CurlFD,
FFmpegFD,
HttpieFD,
WgetFD,
)
TEST_COOKIE = {
'version': 0,
'name': 'test',
'value': 'ytdlp',
'port': None,
'port_specified': False,
'domain': '.example.com',
'domain_specified': True,
'domain_initial_dot': False,
'path': '/',
'path_specified': True,
'secure': False,
'expires': None,
'discard': False,
'comment': None,
'comment_url': None,
'rest': {},
}
TEST_INFO = {'url': 'http://www.example.com/'}
class TestHttpieFD(unittest.TestCase):
def test_make_cmd(self):
with FakeYDL() as ydl:
downloader = HttpieFD(ydl, {})
self.assertEqual(
downloader._make_cmd('test', TEST_INFO),
['http', '--download', '--output', 'test', 'http://www.example.com/'])
# Test cookie header is added
ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
self.assertEqual(
downloader._make_cmd('test', TEST_INFO),
['http', '--download', '--output', 'test', 'http://www.example.com/', 'Cookie:test=ytdlp'])
class TestAxelFD(unittest.TestCase):
def test_make_cmd(self):
with FakeYDL() as ydl:
downloader = AxelFD(ydl, {})
self.assertEqual(
downloader._make_cmd('test', TEST_INFO),
['axel', '-o', 'test', '--', 'http://www.example.com/'])
# Test cookie header is added
ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
self.assertEqual(
downloader._make_cmd('test', TEST_INFO),
['axel', '-o', 'test', '-H', 'Cookie: test=ytdlp', '--max-redirect=0', '--', 'http://www.example.com/'])
class TestWgetFD(unittest.TestCase):
def test_make_cmd(self):
with FakeYDL() as ydl:
downloader = WgetFD(ydl, {})
self.assertNotIn('--load-cookies', downloader._make_cmd('test', TEST_INFO))
# Test cookiejar tempfile arg is added
ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
self.assertIn('--load-cookies', downloader._make_cmd('test', TEST_INFO))
class TestCurlFD(unittest.TestCase):
def test_make_cmd(self):
with FakeYDL() as ydl:
downloader = CurlFD(ydl, {})
self.assertNotIn('--cookie', downloader._make_cmd('test', TEST_INFO))
# Test cookie header is added
ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
self.assertIn('--cookie', downloader._make_cmd('test', TEST_INFO))
self.assertIn('test=ytdlp', downloader._make_cmd('test', TEST_INFO))
class TestAria2cFD(unittest.TestCase):
def test_make_cmd(self):
with FakeYDL() as ydl:
downloader = Aria2cFD(ydl, {})
downloader._make_cmd('test', TEST_INFO)
self.assertFalse(hasattr(downloader, '_cookies_tempfile'))
# Test cookiejar tempfile arg is added
ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
cmd = downloader._make_cmd('test', TEST_INFO)
self.assertIn(f'--load-cookies={downloader._cookies_tempfile}', cmd)
@unittest.skipUnless(FFmpegFD.available(), 'ffmpeg not found')
class TestFFmpegFD(unittest.TestCase):
_args = []
def _test_cmd(self, args):
self._args = args
def test_make_cmd(self):
with FakeYDL() as ydl:
downloader = FFmpegFD(ydl, {})
downloader._debug_cmd = self._test_cmd
downloader._call_downloader('test', {**TEST_INFO, 'ext': 'mp4'})
self.assertEqual(self._args, [
'ffmpeg', '-y', '-hide_banner', '-i', 'http://www.example.com/',
'-c', 'copy', '-f', 'mp4', 'file:test'])
# Test cookies arg is added
ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE))
downloader._call_downloader('test', {**TEST_INFO, 'ext': 'mp4'})
self.assertEqual(self._args, [
'ffmpeg', '-y', '-hide_banner', '-cookies', 'test=ytdlp; path=/; domain=.example.com;\r\n',
'-i', 'http://www.example.com/', '-c', 'copy', '-f', 'mp4', 'file:test'])
# Test with non-url input (ffmpeg reads from stdin '-' for websockets)
downloader._call_downloader('test', {'url': 'x', 'ext': 'mp4'})
self.assertEqual(self._args, [
'ffmpeg', '-y', '-hide_banner', '-i', 'x', '-c', 'copy', '-f', 'mp4', 'file:test'])
if __name__ == '__main__':
unittest.main()

View file

@ -16,6 +16,7 @@ from test.helper import http_server_port, try_rm
from yt_dlp import YoutubeDL from yt_dlp import YoutubeDL
from yt_dlp.downloader.http import HttpFD from yt_dlp.downloader.http import HttpFD
from yt_dlp.utils import encodeFilename from yt_dlp.utils import encodeFilename
from yt_dlp.utils._utils import _YDLLogger as FakeLogger
TEST_DIR = os.path.dirname(os.path.abspath(__file__)) TEST_DIR = os.path.dirname(os.path.abspath(__file__))
@ -67,17 +68,6 @@ class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
assert False assert False
class FakeLogger:
def debug(self, msg):
pass
def warning(self, msg):
pass
def error(self, msg):
pass
class TestHttpFD(unittest.TestCase): class TestHttpFD(unittest.TestCase):
def setUp(self): def setUp(self):
self.httpd = http.server.HTTPServer( self.httpd = http.server.HTTPServer(

View file

@ -1,500 +0,0 @@
#!/usr/bin/env python3
# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import gzip
import http.cookiejar
import http.server
import io
import pathlib
import ssl
import tempfile
import threading
import urllib.error
import urllib.request
import zlib
from test.helper import http_server_port
from yt_dlp import YoutubeDL
from yt_dlp.dependencies import brotli
from yt_dlp.utils import sanitized_Request, urlencode_postdata
from .helper import FakeYDL
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
protocol_version = 'HTTP/1.1'
def log_message(self, format, *args):
pass
def _headers(self):
payload = str(self.headers).encode('utf-8')
self.send_response(200)
self.send_header('Content-Type', 'application/json')
self.send_header('Content-Length', str(len(payload)))
self.end_headers()
self.wfile.write(payload)
def _redirect(self):
self.send_response(int(self.path[len('/redirect_'):]))
self.send_header('Location', '/method')
self.send_header('Content-Length', '0')
self.end_headers()
def _method(self, method, payload=None):
self.send_response(200)
self.send_header('Content-Length', str(len(payload or '')))
self.send_header('Method', method)
self.end_headers()
if payload:
self.wfile.write(payload)
def _status(self, status):
payload = f'<html>{status} NOT FOUND</html>'.encode()
self.send_response(int(status))
self.send_header('Content-Type', 'text/html; charset=utf-8')
self.send_header('Content-Length', str(len(payload)))
self.end_headers()
self.wfile.write(payload)
def _read_data(self):
if 'Content-Length' in self.headers:
return self.rfile.read(int(self.headers['Content-Length']))
def do_POST(self):
data = self._read_data()
if self.path.startswith('/redirect_'):
self._redirect()
elif self.path.startswith('/method'):
self._method('POST', data)
elif self.path.startswith('/headers'):
self._headers()
else:
self._status(404)
def do_HEAD(self):
if self.path.startswith('/redirect_'):
self._redirect()
elif self.path.startswith('/method'):
self._method('HEAD')
else:
self._status(404)
def do_PUT(self):
data = self._read_data()
if self.path.startswith('/redirect_'):
self._redirect()
elif self.path.startswith('/method'):
self._method('PUT', data)
else:
self._status(404)
def do_GET(self):
if self.path == '/video.html':
payload = b'<html><video src="/vid.mp4" /></html>'
self.send_response(200)
self.send_header('Content-Type', 'text/html; charset=utf-8')
self.send_header('Content-Length', str(len(payload))) # required for persistent connections
self.end_headers()
self.wfile.write(payload)
elif self.path == '/vid.mp4':
payload = b'\x00\x00\x00\x00\x20\x66\x74[video]'
self.send_response(200)
self.send_header('Content-Type', 'video/mp4')
self.send_header('Content-Length', str(len(payload)))
self.end_headers()
self.wfile.write(payload)
elif self.path == '/%E4%B8%AD%E6%96%87.html':
payload = b'<html><video src="/vid.mp4" /></html>'
self.send_response(200)
self.send_header('Content-Type', 'text/html; charset=utf-8')
self.send_header('Content-Length', str(len(payload)))
self.end_headers()
self.wfile.write(payload)
elif self.path == '/%c7%9f':
payload = b'<html><video src="/vid.mp4" /></html>'
self.send_response(200)
self.send_header('Content-Type', 'text/html; charset=utf-8')
self.send_header('Content-Length', str(len(payload)))
self.end_headers()
self.wfile.write(payload)
elif self.path.startswith('/redirect_'):
self._redirect()
elif self.path.startswith('/method'):
self._method('GET')
elif self.path.startswith('/headers'):
self._headers()
elif self.path == '/trailing_garbage':
payload = b'<html><video src="/vid.mp4" /></html>'
self.send_response(200)
self.send_header('Content-Type', 'text/html; charset=utf-8')
self.send_header('Content-Encoding', 'gzip')
buf = io.BytesIO()
with gzip.GzipFile(fileobj=buf, mode='wb') as f:
f.write(payload)
compressed = buf.getvalue() + b'trailing garbage'
self.send_header('Content-Length', str(len(compressed)))
self.end_headers()
self.wfile.write(compressed)
elif self.path == '/302-non-ascii-redirect':
new_url = f'http://127.0.0.1:{http_server_port(self.server)}/中文.html'
self.send_response(301)
self.send_header('Location', new_url)
self.send_header('Content-Length', '0')
self.end_headers()
elif self.path == '/content-encoding':
encodings = self.headers.get('ytdl-encoding', '')
payload = b'<html><video src="/vid.mp4" /></html>'
for encoding in filter(None, (e.strip() for e in encodings.split(','))):
if encoding == 'br' and brotli:
payload = brotli.compress(payload)
elif encoding == 'gzip':
buf = io.BytesIO()
with gzip.GzipFile(fileobj=buf, mode='wb') as f:
f.write(payload)
payload = buf.getvalue()
elif encoding == 'deflate':
payload = zlib.compress(payload)
elif encoding == 'unsupported':
payload = b'raw'
break
else:
self._status(415)
return
self.send_response(200)
self.send_header('Content-Encoding', encodings)
self.send_header('Content-Length', str(len(payload)))
self.end_headers()
self.wfile.write(payload)
else:
self._status(404)
def send_header(self, keyword, value):
"""
Forcibly allow HTTP server to send non percent-encoded non-ASCII characters in headers.
This is against what is defined in RFC 3986, however we need to test we support this
since some sites incorrectly do this.
"""
if keyword.lower() == 'connection':
return super().send_header(keyword, value)
if not hasattr(self, '_headers_buffer'):
self._headers_buffer = []
self._headers_buffer.append(f'{keyword}: {value}\r\n'.encode())
class FakeLogger:
def debug(self, msg):
pass
def warning(self, msg):
pass
def error(self, msg):
pass
class TestHTTP(unittest.TestCase):
def setUp(self):
# HTTP server
self.http_httpd = http.server.ThreadingHTTPServer(
('127.0.0.1', 0), HTTPTestRequestHandler)
self.http_port = http_server_port(self.http_httpd)
self.http_server_thread = threading.Thread(target=self.http_httpd.serve_forever)
# FIXME: we should probably stop the http server thread after each test
# See: https://github.com/yt-dlp/yt-dlp/pull/7094#discussion_r1199746041
self.http_server_thread.daemon = True
self.http_server_thread.start()
# HTTPS server
certfn = os.path.join(TEST_DIR, 'testcert.pem')
self.https_httpd = http.server.ThreadingHTTPServer(
('127.0.0.1', 0), HTTPTestRequestHandler)
sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
sslctx.load_cert_chain(certfn, None)
self.https_httpd.socket = sslctx.wrap_socket(self.https_httpd.socket, server_side=True)
self.https_port = http_server_port(self.https_httpd)
self.https_server_thread = threading.Thread(target=self.https_httpd.serve_forever)
self.https_server_thread.daemon = True
self.https_server_thread.start()
def test_nocheckcertificate(self):
with FakeYDL({'logger': FakeLogger()}) as ydl:
with self.assertRaises(urllib.error.URLError):
ydl.urlopen(sanitized_Request(f'https://127.0.0.1:{self.https_port}/headers'))
with FakeYDL({'logger': FakeLogger(), 'nocheckcertificate': True}) as ydl:
r = ydl.urlopen(sanitized_Request(f'https://127.0.0.1:{self.https_port}/headers'))
self.assertEqual(r.status, 200)
r.close()
def test_percent_encode(self):
with FakeYDL() as ydl:
# Unicode characters should be encoded with uppercase percent-encoding
res = ydl.urlopen(sanitized_Request(f'http://127.0.0.1:{self.http_port}/中文.html'))
self.assertEqual(res.status, 200)
res.close()
# don't normalize existing percent encodings
res = ydl.urlopen(sanitized_Request(f'http://127.0.0.1:{self.http_port}/%c7%9f'))
self.assertEqual(res.status, 200)
res.close()
def test_unicode_path_redirection(self):
with FakeYDL() as ydl:
r = ydl.urlopen(sanitized_Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect'))
self.assertEqual(r.url, f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html')
r.close()
def test_redirect(self):
with FakeYDL() as ydl:
def do_req(redirect_status, method):
data = b'testdata' if method in ('POST', 'PUT') else None
res = ydl.urlopen(sanitized_Request(
f'http://127.0.0.1:{self.http_port}/redirect_{redirect_status}', method=method, data=data))
return res.read().decode('utf-8'), res.headers.get('method', '')
# A 303 must either use GET or HEAD for subsequent request
self.assertEqual(do_req(303, 'POST'), ('', 'GET'))
self.assertEqual(do_req(303, 'HEAD'), ('', 'HEAD'))
self.assertEqual(do_req(303, 'PUT'), ('', 'GET'))
# 301 and 302 turn POST only into a GET
self.assertEqual(do_req(301, 'POST'), ('', 'GET'))
self.assertEqual(do_req(301, 'HEAD'), ('', 'HEAD'))
self.assertEqual(do_req(302, 'POST'), ('', 'GET'))
self.assertEqual(do_req(302, 'HEAD'), ('', 'HEAD'))
self.assertEqual(do_req(301, 'PUT'), ('testdata', 'PUT'))
self.assertEqual(do_req(302, 'PUT'), ('testdata', 'PUT'))
# 307 and 308 should not change method
for m in ('POST', 'PUT'):
self.assertEqual(do_req(307, m), ('testdata', m))
self.assertEqual(do_req(308, m), ('testdata', m))
self.assertEqual(do_req(307, 'HEAD'), ('', 'HEAD'))
self.assertEqual(do_req(308, 'HEAD'), ('', 'HEAD'))
# These should not redirect and instead raise an HTTPError
for code in (300, 304, 305, 306):
with self.assertRaises(urllib.error.HTTPError):
do_req(code, 'GET')
def test_content_type(self):
# https://github.com/yt-dlp/yt-dlp/commit/379a4f161d4ad3e40932dcf5aca6e6fb9715ab28
with FakeYDL({'nocheckcertificate': True}) as ydl:
# method should be auto-detected as POST
r = sanitized_Request(f'https://localhost:{self.https_port}/headers', data=urlencode_postdata({'test': 'test'}))
headers = ydl.urlopen(r).read().decode('utf-8')
self.assertIn('Content-Type: application/x-www-form-urlencoded', headers)
# test http
r = sanitized_Request(f'http://localhost:{self.http_port}/headers', data=urlencode_postdata({'test': 'test'}))
headers = ydl.urlopen(r).read().decode('utf-8')
self.assertIn('Content-Type: application/x-www-form-urlencoded', headers)
def test_cookiejar(self):
with FakeYDL() as ydl:
ydl.cookiejar.set_cookie(http.cookiejar.Cookie(
0, 'test', 'ytdlp', None, False, '127.0.0.1', True,
False, '/headers', True, False, None, False, None, None, {}))
data = ydl.urlopen(sanitized_Request(f'http://127.0.0.1:{self.http_port}/headers')).read()
self.assertIn(b'Cookie: test=ytdlp', data)
def test_no_compression_compat_header(self):
with FakeYDL() as ydl:
data = ydl.urlopen(
sanitized_Request(
f'http://127.0.0.1:{self.http_port}/headers',
headers={'Youtubedl-no-compression': True})).read()
self.assertIn(b'Accept-Encoding: identity', data)
self.assertNotIn(b'youtubedl-no-compression', data.lower())
def test_gzip_trailing_garbage(self):
# https://github.com/ytdl-org/youtube-dl/commit/aa3e950764337ef9800c936f4de89b31c00dfcf5
# https://github.com/ytdl-org/youtube-dl/commit/6f2ec15cee79d35dba065677cad9da7491ec6e6f
with FakeYDL() as ydl:
data = ydl.urlopen(sanitized_Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode('utf-8')
self.assertEqual(data, '<html><video src="/vid.mp4" /></html>')
@unittest.skipUnless(brotli, 'brotli support is not installed')
def test_brotli(self):
with FakeYDL() as ydl:
res = ydl.urlopen(
sanitized_Request(
f'http://127.0.0.1:{self.http_port}/content-encoding',
headers={'ytdl-encoding': 'br'}))
self.assertEqual(res.headers.get('Content-Encoding'), 'br')
self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
def test_deflate(self):
with FakeYDL() as ydl:
res = ydl.urlopen(
sanitized_Request(
f'http://127.0.0.1:{self.http_port}/content-encoding',
headers={'ytdl-encoding': 'deflate'}))
self.assertEqual(res.headers.get('Content-Encoding'), 'deflate')
self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
def test_gzip(self):
with FakeYDL() as ydl:
res = ydl.urlopen(
sanitized_Request(
f'http://127.0.0.1:{self.http_port}/content-encoding',
headers={'ytdl-encoding': 'gzip'}))
self.assertEqual(res.headers.get('Content-Encoding'), 'gzip')
self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
def test_multiple_encodings(self):
# https://www.rfc-editor.org/rfc/rfc9110.html#section-8.4
with FakeYDL() as ydl:
for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
res = ydl.urlopen(
sanitized_Request(
f'http://127.0.0.1:{self.http_port}/content-encoding',
headers={'ytdl-encoding': pair}))
self.assertEqual(res.headers.get('Content-Encoding'), pair)
self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
def test_unsupported_encoding(self):
# it should return the raw content
with FakeYDL() as ydl:
res = ydl.urlopen(
sanitized_Request(
f'http://127.0.0.1:{self.http_port}/content-encoding',
headers={'ytdl-encoding': 'unsupported'}))
self.assertEqual(res.headers.get('Content-Encoding'), 'unsupported')
self.assertEqual(res.read(), b'raw')
class TestClientCert(unittest.TestCase):
def setUp(self):
certfn = os.path.join(TEST_DIR, 'testcert.pem')
self.certdir = os.path.join(TEST_DIR, 'testdata', 'certificate')
cacertfn = os.path.join(self.certdir, 'ca.crt')
self.httpd = http.server.HTTPServer(('127.0.0.1', 0), HTTPTestRequestHandler)
sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
sslctx.verify_mode = ssl.CERT_REQUIRED
sslctx.load_verify_locations(cafile=cacertfn)
sslctx.load_cert_chain(certfn, None)
self.httpd.socket = sslctx.wrap_socket(self.httpd.socket, server_side=True)
self.port = http_server_port(self.httpd)
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
self.server_thread.daemon = True
self.server_thread.start()
def _run_test(self, **params):
ydl = YoutubeDL({
'logger': FakeLogger(),
# Disable client-side validation of unacceptable self-signed testcert.pem
# The test is of a check on the server side, so unaffected
'nocheckcertificate': True,
**params,
})
r = ydl.extract_info(f'https://127.0.0.1:{self.port}/video.html')
self.assertEqual(r['url'], f'https://127.0.0.1:{self.port}/vid.mp4')
def test_certificate_combined_nopass(self):
self._run_test(client_certificate=os.path.join(self.certdir, 'clientwithkey.crt'))
def test_certificate_nocombined_nopass(self):
self._run_test(client_certificate=os.path.join(self.certdir, 'client.crt'),
client_certificate_key=os.path.join(self.certdir, 'client.key'))
def test_certificate_combined_pass(self):
self._run_test(client_certificate=os.path.join(self.certdir, 'clientwithencryptedkey.crt'),
client_certificate_password='foobar')
def test_certificate_nocombined_pass(self):
self._run_test(client_certificate=os.path.join(self.certdir, 'client.crt'),
client_certificate_key=os.path.join(self.certdir, 'clientencrypted.key'),
client_certificate_password='foobar')
def _build_proxy_handler(name):
class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler):
proxy_name = name
def log_message(self, format, *args):
pass
def do_GET(self):
self.send_response(200)
self.send_header('Content-Type', 'text/plain; charset=utf-8')
self.end_headers()
self.wfile.write(f'{self.proxy_name}: {self.path}'.encode())
return HTTPTestRequestHandler
class TestProxy(unittest.TestCase):
def setUp(self):
self.proxy = http.server.HTTPServer(
('127.0.0.1', 0), _build_proxy_handler('normal'))
self.port = http_server_port(self.proxy)
self.proxy_thread = threading.Thread(target=self.proxy.serve_forever)
self.proxy_thread.daemon = True
self.proxy_thread.start()
self.geo_proxy = http.server.HTTPServer(
('127.0.0.1', 0), _build_proxy_handler('geo'))
self.geo_port = http_server_port(self.geo_proxy)
self.geo_proxy_thread = threading.Thread(target=self.geo_proxy.serve_forever)
self.geo_proxy_thread.daemon = True
self.geo_proxy_thread.start()
def test_proxy(self):
geo_proxy = f'127.0.0.1:{self.geo_port}'
ydl = YoutubeDL({
'proxy': f'127.0.0.1:{self.port}',
'geo_verification_proxy': geo_proxy,
})
url = 'http://foo.com/bar'
response = ydl.urlopen(url).read().decode()
self.assertEqual(response, f'normal: {url}')
req = urllib.request.Request(url)
req.add_header('Ytdl-request-proxy', geo_proxy)
response = ydl.urlopen(req).read().decode()
self.assertEqual(response, f'geo: {url}')
def test_proxy_with_idn(self):
ydl = YoutubeDL({
'proxy': f'127.0.0.1:{self.port}',
})
url = 'http://中文.tw/'
response = ydl.urlopen(url).read().decode()
# b'xn--fiq228c' is '中文'.encode('idna')
self.assertEqual(response, 'normal: http://xn--fiq228c.tw/')
class TestFileURL(unittest.TestCase):
# See https://github.com/ytdl-org/youtube-dl/issues/8227
def test_file_urls(self):
tf = tempfile.NamedTemporaryFile(delete=False)
tf.write(b'foobar')
tf.close()
url = pathlib.Path(tf.name).as_uri()
with FakeYDL() as ydl:
self.assertRaisesRegex(
urllib.error.URLError, 'file:// URLs are explicitly disabled in yt-dlp for security reasons', ydl.urlopen, url)
with FakeYDL({'enable_file_urls': True}) as ydl:
res = ydl.urlopen(url)
self.assertEqual(res.read(), b'foobar')
res.close()
os.unlink(tf.name)
if __name__ == '__main__':
unittest.main()

1382
test/test_networking.py Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,279 @@
#!/usr/bin/env python3
# Allow direct execution
import os
import sys
import pytest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import contextlib
import io
import platform
import random
import ssl
import urllib.error
import warnings
from yt_dlp.cookies import YoutubeDLCookieJar
from yt_dlp.dependencies import certifi
from yt_dlp.networking import Response
from yt_dlp.networking._helper import (
InstanceStoreMixin,
add_accept_encoding_header,
get_redirect_method,
make_socks_proxy_opts,
select_proxy,
ssl_load_certs,
)
from yt_dlp.networking.exceptions import (
HTTPError,
IncompleteRead,
_CompatHTTPError,
)
from yt_dlp.socks import ProxyType
from yt_dlp.utils.networking import HTTPHeaderDict
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
class TestNetworkingUtils:
def test_select_proxy(self):
proxies = {
'all': 'socks5://example.com',
'http': 'http://example.com:1080',
'no': 'bypass.example.com,yt-dl.org'
}
assert select_proxy('https://example.com', proxies) == proxies['all']
assert select_proxy('http://example.com', proxies) == proxies['http']
assert select_proxy('http://bypass.example.com', proxies) is None
assert select_proxy('https://yt-dl.org', proxies) is None
@pytest.mark.parametrize('socks_proxy,expected', [
('socks5h://example.com', {
'proxytype': ProxyType.SOCKS5,
'addr': 'example.com',
'port': 1080,
'rdns': True,
'username': None,
'password': None
}),
('socks5://user:@example.com:5555', {
'proxytype': ProxyType.SOCKS5,
'addr': 'example.com',
'port': 5555,
'rdns': False,
'username': 'user',
'password': ''
}),
('socks4://u%40ser:pa%20ss@127.0.0.1:1080', {
'proxytype': ProxyType.SOCKS4,
'addr': '127.0.0.1',
'port': 1080,
'rdns': False,
'username': 'u@ser',
'password': 'pa ss'
}),
('socks4a://:pa%20ss@127.0.0.1', {
'proxytype': ProxyType.SOCKS4A,
'addr': '127.0.0.1',
'port': 1080,
'rdns': True,
'username': '',
'password': 'pa ss'
})
])
def test_make_socks_proxy_opts(self, socks_proxy, expected):
assert make_socks_proxy_opts(socks_proxy) == expected
def test_make_socks_proxy_unknown(self):
with pytest.raises(ValueError, match='Unknown SOCKS proxy version: socks'):
make_socks_proxy_opts('socks://127.0.0.1')
@pytest.mark.skipif(not certifi, reason='certifi is not installed')
def test_load_certifi(self):
context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
context2 = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
ssl_load_certs(context, use_certifi=True)
context2.load_verify_locations(cafile=certifi.where())
assert context.get_ca_certs() == context2.get_ca_certs()
# Test load normal certs
# XXX: could there be a case where system certs are the same as certifi?
context3 = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
ssl_load_certs(context3, use_certifi=False)
assert context3.get_ca_certs() != context.get_ca_certs()
@pytest.mark.parametrize('method,status,expected', [
('GET', 303, 'GET'),
('HEAD', 303, 'HEAD'),
('PUT', 303, 'GET'),
('POST', 301, 'GET'),
('HEAD', 301, 'HEAD'),
('POST', 302, 'GET'),
('HEAD', 302, 'HEAD'),
('PUT', 302, 'PUT'),
('POST', 308, 'POST'),
('POST', 307, 'POST'),
('HEAD', 308, 'HEAD'),
('HEAD', 307, 'HEAD'),
])
def test_get_redirect_method(self, method, status, expected):
assert get_redirect_method(method, status) == expected
@pytest.mark.parametrize('headers,supported_encodings,expected', [
({'Accept-Encoding': 'br'}, ['gzip', 'br'], {'Accept-Encoding': 'br'}),
({}, ['gzip', 'br'], {'Accept-Encoding': 'gzip, br'}),
({'Content-type': 'application/json'}, [], {'Content-type': 'application/json', 'Accept-Encoding': 'identity'}),
])
def test_add_accept_encoding_header(self, headers, supported_encodings, expected):
headers = HTTPHeaderDict(headers)
add_accept_encoding_header(headers, supported_encodings)
assert headers == HTTPHeaderDict(expected)
class TestInstanceStoreMixin:
class FakeInstanceStoreMixin(InstanceStoreMixin):
def _create_instance(self, **kwargs):
return random.randint(0, 1000000)
def _close_instance(self, instance):
pass
def test_mixin(self):
mixin = self.FakeInstanceStoreMixin()
assert mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}}) == mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}})
assert mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'e', 4}}) != mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}})
assert mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}} != mixin._get_instance(d={'a': 1, 'b': 2, 'g': {'d', 4}}))
assert mixin._get_instance(d={'a': 1}, e=[1, 2, 3]) == mixin._get_instance(d={'a': 1}, e=[1, 2, 3])
assert mixin._get_instance(d={'a': 1}, e=[1, 2, 3]) != mixin._get_instance(d={'a': 1}, e=[1, 2, 3, 4])
cookiejar = YoutubeDLCookieJar()
assert mixin._get_instance(b=[1, 2], c=cookiejar) == mixin._get_instance(b=[1, 2], c=cookiejar)
assert mixin._get_instance(b=[1, 2], c=cookiejar) != mixin._get_instance(b=[1, 2], c=YoutubeDLCookieJar())
# Different order
assert mixin._get_instance(c=cookiejar, b=[1, 2]) == mixin._get_instance(b=[1, 2], c=cookiejar)
m = mixin._get_instance(t=1234)
assert mixin._get_instance(t=1234) == m
mixin._clear_instances()
assert mixin._get_instance(t=1234) != m
class TestNetworkingExceptions:
@staticmethod
def create_response(status):
return Response(fp=io.BytesIO(b'test'), url='http://example.com', headers={'tesT': 'test'}, status=status)
@pytest.mark.parametrize('http_error_class', [HTTPError, lambda r: _CompatHTTPError(HTTPError(r))])
def test_http_error(self, http_error_class):
response = self.create_response(403)
error = http_error_class(response)
assert error.status == 403
assert str(error) == error.msg == 'HTTP Error 403: Forbidden'
assert error.reason == response.reason
assert error.response is response
data = error.response.read()
assert data == b'test'
assert repr(error) == '<HTTPError 403: Forbidden>'
@pytest.mark.parametrize('http_error_class', [HTTPError, lambda *args, **kwargs: _CompatHTTPError(HTTPError(*args, **kwargs))])
def test_redirect_http_error(self, http_error_class):
response = self.create_response(301)
error = http_error_class(response, redirect_loop=True)
assert str(error) == error.msg == 'HTTP Error 301: Moved Permanently (redirect loop detected)'
assert error.reason == 'Moved Permanently'
def test_compat_http_error(self):
response = self.create_response(403)
error = _CompatHTTPError(HTTPError(response))
assert isinstance(error, HTTPError)
assert isinstance(error, urllib.error.HTTPError)
@contextlib.contextmanager
def raises_deprecation_warning():
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always')
yield
if len(w) == 0:
pytest.fail('Did not raise DeprecationWarning')
if len(w) > 1:
pytest.fail(f'Raised multiple warnings: {w}')
if not issubclass(w[-1].category, DeprecationWarning):
pytest.fail(f'Expected DeprecationWarning, got {w[-1].category}')
w.clear()
with raises_deprecation_warning():
assert error.code == 403
with raises_deprecation_warning():
assert error.getcode() == 403
with raises_deprecation_warning():
assert error.hdrs is error.response.headers
with raises_deprecation_warning():
assert error.info() is error.response.headers
with raises_deprecation_warning():
assert error.headers is error.response.headers
with raises_deprecation_warning():
assert error.filename == error.response.url
with raises_deprecation_warning():
assert error.url == error.response.url
with raises_deprecation_warning():
assert error.geturl() == error.response.url
# Passthrough file operations
with raises_deprecation_warning():
assert error.read() == b'test'
with raises_deprecation_warning():
assert not error.closed
with raises_deprecation_warning():
# Technically Response operations are also passed through, which should not be used.
assert error.get_header('test') == 'test'
# Should not raise a warning
error.close()
@pytest.mark.skipif(
platform.python_implementation() == 'PyPy', reason='garbage collector works differently in pypy')
def test_compat_http_error_autoclose(self):
# Compat HTTPError should not autoclose response
response = self.create_response(403)
_CompatHTTPError(HTTPError(response))
assert not response.closed
def test_incomplete_read_error(self):
error = IncompleteRead(b'test', 3, cause='test')
assert isinstance(error, IncompleteRead)
assert repr(error) == '<IncompleteRead: 4 bytes read, 3 more expected>'
assert str(error) == error.msg == '4 bytes read, 3 more expected'
assert error.partial == b'test'
assert error.expected == 3
assert error.cause == 'test'
error = IncompleteRead(b'aaa')
assert repr(error) == '<IncompleteRead: 3 bytes read>'
assert str(error) == '3 bytes read'

View file

@ -51,6 +51,7 @@ from yt_dlp.utils import (
escape_url, escape_url,
expand_path, expand_path,
extract_attributes, extract_attributes,
extract_basic_auth,
find_xpath_attr, find_xpath_attr,
fix_xml_ampersands, fix_xml_ampersands,
float_or_none, float_or_none,
@ -103,7 +104,6 @@ from yt_dlp.utils import (
sanitize_filename, sanitize_filename,
sanitize_path, sanitize_path,
sanitize_url, sanitize_url,
sanitized_Request,
shell_quote, shell_quote,
smuggle_url, smuggle_url,
str_or_none, str_or_none,
@ -132,6 +132,7 @@ from yt_dlp.utils import (
xpath_text, xpath_text,
xpath_with_ns, xpath_with_ns,
) )
from yt_dlp.utils.networking import HTTPHeaderDict
class TestUtil(unittest.TestCase): class TestUtil(unittest.TestCase):
@ -258,15 +259,6 @@ class TestUtil(unittest.TestCase):
self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar') self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar')
self.assertEqual(sanitize_url('foo bar'), 'foo bar') self.assertEqual(sanitize_url('foo bar'), 'foo bar')
def test_extract_basic_auth(self):
auth_header = lambda url: sanitized_Request(url).get_header('Authorization')
self.assertFalse(auth_header('http://foo.bar'))
self.assertFalse(auth_header('http://:foo.bar'))
self.assertEqual(auth_header('http://@foo.bar'), 'Basic Og==')
self.assertEqual(auth_header('http://:pass@foo.bar'), 'Basic OnBhc3M=')
self.assertEqual(auth_header('http://user:@foo.bar'), 'Basic dXNlcjo=')
self.assertEqual(auth_header('http://user:pass@foo.bar'), 'Basic dXNlcjpwYXNz')
def test_expand_path(self): def test_expand_path(self):
def env(var): def env(var):
return f'%{var}%' if sys.platform == 'win32' else f'${var}' return f'%{var}%' if sys.platform == 'win32' else f'${var}'
@ -668,6 +660,8 @@ class TestUtil(unittest.TestCase):
self.assertEqual(parse_duration('P0Y0M0DT0H4M20.880S'), 260.88) self.assertEqual(parse_duration('P0Y0M0DT0H4M20.880S'), 260.88)
self.assertEqual(parse_duration('01:02:03:050'), 3723.05) self.assertEqual(parse_duration('01:02:03:050'), 3723.05)
self.assertEqual(parse_duration('103:050'), 103.05) self.assertEqual(parse_duration('103:050'), 103.05)
self.assertEqual(parse_duration('1HR 3MIN'), 3780)
self.assertEqual(parse_duration('2hrs 3mins'), 7380)
def test_fix_xml_ampersands(self): def test_fix_xml_ampersands(self):
self.assertEqual( self.assertEqual(
@ -1840,6 +1834,8 @@ Line 1
def test_clean_podcast_url(self): def test_clean_podcast_url(self):
self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3') self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3')
self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3') self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3')
self.assertEqual(clean_podcast_url('https://pdst.fm/e/2.gum.fm/chtbl.com/track/chrt.fm/track/34D33/pscrb.fm/rss/p/traffic.megaphone.fm/ITLLC7765286967.mp3?updated=1687282661'), 'https://traffic.megaphone.fm/ITLLC7765286967.mp3?updated=1687282661')
self.assertEqual(clean_podcast_url('https://pdst.fm/e/https://mgln.ai/e/441/www.buzzsprout.com/1121972/13019085-ep-252-the-deep-life-stack.mp3'), 'https://www.buzzsprout.com/1121972/13019085-ep-252-the-deep-life-stack.mp3')
def test_LazyList(self): def test_LazyList(self):
it = list(range(10)) it = list(range(10))
@ -2327,6 +2323,44 @@ Line 1
self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 'group')), ['0123', '3'], self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 'group')), ['0123', '3'],
msg='function on a `re.Match` should give group name as well') msg='function on a `re.Match` should give group name as well')
def test_http_header_dict(self):
headers = HTTPHeaderDict()
headers['ytdl-test'] = 1
self.assertEqual(list(headers.items()), [('Ytdl-Test', '1')])
headers['Ytdl-test'] = '2'
self.assertEqual(list(headers.items()), [('Ytdl-Test', '2')])
self.assertTrue('ytDl-Test' in headers)
self.assertEqual(str(headers), str(dict(headers)))
self.assertEqual(repr(headers), str(dict(headers)))
headers.update({'X-dlp': 'data'})
self.assertEqual(set(headers.items()), {('Ytdl-Test', '2'), ('X-Dlp', 'data')})
self.assertEqual(dict(headers), {'Ytdl-Test': '2', 'X-Dlp': 'data'})
self.assertEqual(len(headers), 2)
self.assertEqual(headers.copy(), headers)
headers2 = HTTPHeaderDict({'X-dlp': 'data3'}, **headers, **{'X-dlp': 'data2'})
self.assertEqual(set(headers2.items()), {('Ytdl-Test', '2'), ('X-Dlp', 'data2')})
self.assertEqual(len(headers2), 2)
headers2.clear()
self.assertEqual(len(headers2), 0)
# ensure we prefer latter headers
headers3 = HTTPHeaderDict({'Ytdl-TeSt': 1}, {'Ytdl-test': 2})
self.assertEqual(set(headers3.items()), {('Ytdl-Test', '2')})
del headers3['ytdl-tesT']
self.assertEqual(dict(headers3), {})
headers4 = HTTPHeaderDict({'ytdl-test': 'data;'})
self.assertEqual(set(headers4.items()), {('Ytdl-Test', 'data;')})
def test_extract_basic_auth(self):
assert extract_basic_auth('http://:foo.bar') == ('http://:foo.bar', None)
assert extract_basic_auth('http://foo.bar') == ('http://foo.bar', None)
assert extract_basic_auth('http://@foo.bar') == ('http://foo.bar', 'Basic Og==')
assert extract_basic_auth('http://:pass@foo.bar') == ('http://foo.bar', 'Basic OnBhc3M=')
assert extract_basic_auth('http://user:@foo.bar') == ('http://foo.bar', 'Basic dXNlcjo=')
assert extract_basic_auth('http://user:pass@foo.bar') == ('http://foo.bar', 'Basic dXNlcjpwYXNz')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View file

@ -1,9 +1,10 @@
import collections import collections
import contextlib import contextlib
import copy
import datetime import datetime
import errno import errno
import fileinput import fileinput
import functools import http.cookiejar
import io import io
import itertools import itertools
import json import json
@ -23,9 +24,9 @@ import traceback
import unicodedata import unicodedata
from .cache import Cache from .cache import Cache
from .compat import urllib # isort: split from .compat import functools, urllib # isort: split
from .compat import compat_os_name, compat_shlex_quote from .compat import compat_os_name, compat_shlex_quote, urllib_req_to_req
from .cookies import load_cookies from .cookies import LenientSimpleCookie, load_cookies
from .downloader import ( from .downloader import (
DashSegmentsFD, DashSegmentsFD,
FFmpegFD, FFmpegFD,
@ -37,6 +38,16 @@ from .extractor import gen_extractor_classes, get_info_extractor
from .extractor.common import UnsupportedURLIE from .extractor.common import UnsupportedURLIE
from .extractor.openload import PhantomJSwrapper from .extractor.openload import PhantomJSwrapper
from .minicurses import format_text from .minicurses import format_text
from .networking import HEADRequest, Request, RequestDirector
from .networking.common import _REQUEST_HANDLERS
from .networking.exceptions import (
HTTPError,
NoSupportingHandlers,
RequestError,
SSLError,
_CompatHTTPError,
network_exceptions,
)
from .plugins import directories as plugin_directories from .plugins import directories as plugin_directories
from .postprocessor import _PLUGIN_CLASSES as plugin_pps from .postprocessor import _PLUGIN_CLASSES as plugin_pps
from .postprocessor import ( from .postprocessor import (
@ -75,13 +86,11 @@ from .utils import (
ExtractorError, ExtractorError,
FormatSorter, FormatSorter,
GeoRestrictedError, GeoRestrictedError,
HEADRequest,
ISO3166Utils, ISO3166Utils,
LazyList, LazyList,
MaxDownloadsReached, MaxDownloadsReached,
Namespace, Namespace,
PagedList, PagedList,
PerRequestProxyHandler,
PlaylistEntries, PlaylistEntries,
Popen, Popen,
PostProcessingError, PostProcessingError,
@ -90,9 +99,6 @@ from .utils import (
SameFileError, SameFileError,
UnavailableVideoError, UnavailableVideoError,
UserNotLive, UserNotLive,
YoutubeDLCookieProcessor,
YoutubeDLHandler,
YoutubeDLRedirectHandler,
age_restricted, age_restricted,
args_to_str, args_to_str,
bug_reports_message, bug_reports_message,
@ -105,6 +111,7 @@ from .utils import (
error_to_compat_str, error_to_compat_str,
escapeHTML, escapeHTML,
expand_path, expand_path,
extract_basic_auth,
filter_dict, filter_dict,
float_or_none, float_or_none,
format_bytes, format_bytes,
@ -120,9 +127,6 @@ from .utils import (
locked_file, locked_file,
make_archive_id, make_archive_id,
make_dir, make_dir,
make_HTTPS_handler,
merge_headers,
network_exceptions,
number_of_digits, number_of_digits,
orderedSet, orderedSet,
orderedSet_from_options, orderedSet_from_options,
@ -135,8 +139,6 @@ from .utils import (
sanitize_filename, sanitize_filename,
sanitize_path, sanitize_path,
sanitize_url, sanitize_url,
sanitized_Request,
std_headers,
str_or_none, str_or_none,
strftime_or_none, strftime_or_none,
subtitles_filename, subtitles_filename,
@ -154,6 +156,13 @@ from .utils import (
write_json_file, write_json_file,
write_string, write_string,
) )
from .utils._utils import _YDLLogger
from .utils.networking import (
HTTPHeaderDict,
clean_headers,
clean_proxies,
std_headers,
)
from .version import CHANNEL, RELEASE_GIT_HEAD, VARIANT, __version__ from .version import CHANNEL, RELEASE_GIT_HEAD, VARIANT, __version__
if compat_os_name == 'nt': if compat_os_name == 'nt':
@ -568,7 +577,7 @@ class YoutubeDL:
'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels', 'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns', 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start', 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
'preference', 'language', 'language_preference', 'quality', 'source_preference', 'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies',
'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options', 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time' 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
} }
@ -617,7 +626,8 @@ class YoutubeDL:
if self.params.get('no_color'): if self.params.get('no_color'):
if self.params.get('color') is not None: if self.params.get('color') is not None:
self.report_warning('Overwriting params from "color" with "no_color"') self.params.setdefault('_warnings', []).append(
'Overwriting params from "color" with "no_color"')
self.params['color'] = 'no_color' self.params['color'] = 'no_color'
term_allow_color = os.environ.get('TERM', '').lower() != 'dumb' term_allow_color = os.environ.get('TERM', '').lower() != 'dumb'
@ -675,6 +685,13 @@ class YoutubeDL:
raise raise
self.params['compat_opts'] = set(self.params.get('compat_opts', ())) self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
self.__header_cookies = []
self._load_cookies(self.params['http_headers'].get('Cookie')) # compat
self.params['http_headers'].pop('Cookie', None)
self._request_director = self.build_request_director(
sorted(_REQUEST_HANDLERS.values(), key=lambda rh: rh.RH_NAME.lower()))
if auto_init and auto_init != 'no_verbose_header': if auto_init and auto_init != 'no_verbose_header':
self.print_debug_header() self.print_debug_header()
@ -745,9 +762,6 @@ class YoutubeDL:
else self.params['format'] if callable(self.params['format']) else self.params['format'] if callable(self.params['format'])
else self.build_format_selector(self.params['format'])) else self.build_format_selector(self.params['format']))
# Set http_headers defaults according to std_headers
self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
hooks = { hooks = {
'post_hooks': self.add_post_hook, 'post_hooks': self.add_post_hook,
'progress_hooks': self.add_progress_hook, 'progress_hooks': self.add_progress_hook,
@ -764,8 +778,6 @@ class YoutubeDL:
get_postprocessor(pp_def.pop('key'))(self, **pp_def), get_postprocessor(pp_def.pop('key'))(self, **pp_def),
when=when) when=when)
self._setup_opener()
def preload_download_archive(fn): def preload_download_archive(fn):
"""Preload the archive, if any is specified""" """Preload the archive, if any is specified"""
archive = set() archive = set()
@ -941,11 +953,17 @@ class YoutubeDL:
self.save_console_title() self.save_console_title()
return self return self
def save_cookies(self):
if self.params.get('cookiefile') is not None:
self.cookiejar.save()
def __exit__(self, *args): def __exit__(self, *args):
self.restore_console_title() self.restore_console_title()
self.close()
if self.params.get('cookiefile') is not None: def close(self):
self.cookiejar.save(ignore_discard=True, ignore_expires=True) self.save_cookies()
self._request_director.close()
def trouble(self, message=None, tb=None, is_error=True): def trouble(self, message=None, tb=None, is_error=True):
"""Determine action to take when a download problem appears. """Determine action to take when a download problem appears.
@ -988,6 +1006,7 @@ class YoutubeDL:
ID='green', ID='green',
DELIM='blue', DELIM='blue',
ERROR='red', ERROR='red',
BAD_FORMAT='light red',
WARNING='yellow', WARNING='yellow',
SUPPRESS='light black', SUPPRESS='light black',
) )
@ -1276,28 +1295,27 @@ class YoutubeDL:
return outer_mobj.group(0) return outer_mobj.group(0)
key = outer_mobj.group('key') key = outer_mobj.group('key')
mobj = re.match(INTERNAL_FORMAT_RE, key) mobj = re.match(INTERNAL_FORMAT_RE, key)
initial_field = mobj.group('fields') if mobj else '' value, replacement, default, last_field = None, None, na, ''
value, replacement, default = None, None, na
while mobj: while mobj:
mobj = mobj.groupdict() mobj = mobj.groupdict()
default = mobj['default'] if mobj['default'] is not None else default default = mobj['default'] if mobj['default'] is not None else default
value = get_value(mobj) value = get_value(mobj)
replacement = mobj['replacement'] last_field, replacement = mobj['fields'], mobj['replacement']
if value is None and mobj['alternate']: if value is None and mobj['alternate']:
mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:]) mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
else: else:
break break
fmt = outer_mobj.group('format')
if fmt == 's' and value is not None and key in field_size_compat_map.keys():
fmt = f'0{field_size_compat_map[key]:d}d'
if None not in (value, replacement): if None not in (value, replacement):
try: try:
value = replacement_formatter.format(replacement, value) value = replacement_formatter.format(replacement, value)
except ValueError: except ValueError:
value, default = None, na value, default = None, na
fmt = outer_mobj.group('format')
if fmt == 's' and last_field in field_size_compat_map.keys() and isinstance(value, int):
fmt = f'0{field_size_compat_map[last_field]:d}d'
flags = outer_mobj.group('conversion') or '' flags = outer_mobj.group('conversion') or ''
str_fmt = f'{fmt[:-1]}s' str_fmt = f'{fmt[:-1]}s'
if value is None: if value is None:
@ -1327,7 +1345,7 @@ class YoutubeDL:
value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s', value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
factor=1024 if '#' in flags else 1000) factor=1024 if '#' in flags else 1000)
elif fmt[-1] == 'S': # filename sanitization elif fmt[-1] == 'S': # filename sanitization
value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt value, fmt = filename_sanitizer(last_field, value, restricted='#' in flags), str_fmt
elif fmt[-1] == 'c': elif fmt[-1] == 'c':
if value: if value:
value = str(value)[0] value = str(value)[0]
@ -1346,7 +1364,7 @@ class YoutubeDL:
elif fmt[-1] == 'a': elif fmt[-1] == 'a':
value, fmt = ascii(value), str_fmt value, fmt = ascii(value), str_fmt
if fmt[-1] in 'csra': if fmt[-1] in 'csra':
value = sanitizer(initial_field, value) value = sanitizer(last_field, value)
key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format')) key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
TMPL_DICT[key] = value TMPL_DICT[key] = value
@ -1481,7 +1499,10 @@ class YoutubeDL:
return ret return ret
if self.in_download_archive(info_dict): if self.in_download_archive(info_dict):
reason = '%s has already been recorded in the archive' % video_title reason = ''.join((
format_field(info_dict, 'id', f'{self._format_screen("%s", self.Styles.ID)}: '),
format_field(info_dict, 'title', f'{self._format_screen("%s", self.Styles.EMPHASIS)} '),
'has already been recorded in the archive'))
break_opt, break_err = 'break_on_existing', ExistingVideoReached break_opt, break_err = 'break_on_existing', ExistingVideoReached
else: else:
try: try:
@ -1542,7 +1563,8 @@ class YoutubeDL:
temp_id = ie.get_temp_id(url) temp_id = ie.get_temp_id(url)
if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}): if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
self.to_screen(f'[{key}] {temp_id}: has already been recorded in the archive') self.to_screen(f'[download] {self._format_screen(temp_id, self.Styles.ID)}: '
'has already been recorded in the archive')
if self.params.get('break_on_existing', False): if self.params.get('break_on_existing', False):
raise ExistingVideoReached() raise ExistingVideoReached()
break break
@ -1630,8 +1652,67 @@ class YoutubeDL:
self.to_screen('') self.to_screen('')
raise raise
def _load_cookies(self, data, *, autoscope=True):
"""Loads cookies from a `Cookie` header
This tries to work around the security vulnerability of passing cookies to every domain.
See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
@param data The Cookie header as string to load the cookies from
@param autoscope If `False`, scope cookies using Set-Cookie syntax and error for cookie without domains
If `True`, save cookies for later to be stored in the jar with a limited scope
If a URL, save cookies in the jar with the domain of the URL
"""
for cookie in LenientSimpleCookie(data).values():
if autoscope and any(cookie.values()):
raise ValueError('Invalid syntax in Cookie Header')
domain = cookie.get('domain') or ''
expiry = cookie.get('expires')
if expiry == '': # 0 is valid
expiry = None
prepared_cookie = http.cookiejar.Cookie(
cookie.get('version') or 0, cookie.key, cookie.value, None, False,
domain, True, True, cookie.get('path') or '', bool(cookie.get('path')),
cookie.get('secure') or False, expiry, False, None, None, {})
if domain:
self.cookiejar.set_cookie(prepared_cookie)
elif autoscope is True:
self.deprecated_feature(
'Passing cookies as a header is a potential security risk; '
'they will be scoped to the domain of the downloaded urls. '
'Please consider loading cookies from a file or browser instead.')
self.__header_cookies.append(prepared_cookie)
elif autoscope:
self.report_warning(
'The extractor result contains an unscoped cookie as an HTTP header. '
f'If you are using yt-dlp with an input URL{bug_reports_message(before=",")}',
only_once=True)
self._apply_header_cookies(autoscope, [prepared_cookie])
else:
self.report_error('Unscoped cookies are not allowed; please specify some sort of scoping',
tb=False, is_error=False)
def _apply_header_cookies(self, url, cookies=None):
"""Applies stray header cookies to the provided url
This loads header cookies and scopes them to the domain provided in `url`.
While this is not ideal, it helps reduce the risk of them being sent
to an unintended destination while mostly maintaining compatibility.
"""
parsed = urllib.parse.urlparse(url)
if not parsed.hostname:
return
for cookie in map(copy.copy, cookies or self.__header_cookies):
cookie.domain = f'.{parsed.hostname}'
self.cookiejar.set_cookie(cookie)
@_handle_extraction_exceptions @_handle_extraction_exceptions
def __extract_info(self, url, ie, download, extra_info, process): def __extract_info(self, url, ie, download, extra_info, process):
self._apply_header_cookies(url)
try: try:
ie_result = ie.extract(url) ie_result = ie.extract(url)
except UserNotLive as e: except UserNotLive as e:
@ -2091,8 +2172,6 @@ class YoutubeDL:
allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False), allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
'video': self.params.get('allow_multiple_video_streams', False)} 'video': self.params.get('allow_multiple_video_streams', False)}
check_formats = self.params.get('check_formats') == 'selected'
def _parse_filter(tokens): def _parse_filter(tokens):
filter_parts = [] filter_parts = []
for type, string_, start, _, _ in tokens: for type, string_, start, _, _ in tokens:
@ -2265,10 +2344,19 @@ class YoutubeDL:
return new_dict return new_dict
def _check_formats(formats): def _check_formats(formats):
if not check_formats: if (self.params.get('check_formats') is not None
or self.params.get('allow_unplayable_formats')):
yield from formats yield from formats
return return
yield from self._check_formats(formats) elif self.params.get('check_formats') == 'selected':
yield from self._check_formats(formats)
return
for f in formats:
if f.get('has_drm'):
yield from self._check_formats([f])
else:
yield f
def _build_selector_function(selector): def _build_selector_function(selector):
if isinstance(selector, list): # , if isinstance(selector, list): # ,
@ -2407,14 +2495,34 @@ class YoutubeDL:
parsed_selector = _parse_format_selection(iter(TokenIterator(tokens))) parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
return _build_selector_function(parsed_selector) return _build_selector_function(parsed_selector)
def _calc_headers(self, info_dict): def _calc_headers(self, info_dict, load_cookies=False):
res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {}) res = HTTPHeaderDict(self.params['http_headers'], info_dict.get('http_headers'))
if 'Youtubedl-No-Compression' in res: # deprecated clean_headers(res)
res.pop('Youtubedl-No-Compression', None)
res['Accept-Encoding'] = 'identity' if load_cookies: # For --load-info-json
cookies = self.cookiejar.get_cookie_header(info_dict['url']) self._load_cookies(res.get('Cookie'), autoscope=info_dict['url']) # compat
self._load_cookies(info_dict.get('cookies'), autoscope=False)
# The `Cookie` header is removed to prevent leaks and unscoped cookies.
# See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
res.pop('Cookie', None)
cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
if cookies: if cookies:
res['Cookie'] = cookies encoder = LenientSimpleCookie()
values = []
for cookie in cookies:
_, value = encoder.value_encode(cookie.value)
values.append(f'{cookie.name}={value}')
if cookie.domain:
values.append(f'Domain={cookie.domain}')
if cookie.path:
values.append(f'Path={cookie.path}')
if cookie.secure:
values.append('Secure')
if cookie.expires:
values.append(f'Expires={cookie.expires}')
if cookie.version:
values.append(f'Version={cookie.version}')
info_dict['cookies'] = '; '.join(values)
if 'X-Forwarded-For' not in res: if 'X-Forwarded-For' not in res:
x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip') x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
@ -2620,10 +2728,10 @@ class YoutubeDL:
if field_preference: if field_preference:
info_dict['_format_sort_fields'] = field_preference info_dict['_format_sort_fields'] = field_preference
# or None ensures --clean-infojson removes it info_dict['_has_drm'] = any( # or None ensures --clean-infojson removes it
info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None f.get('has_drm') and f['has_drm'] != 'maybe' for f in formats) or None
if not self.params.get('allow_unplayable_formats'): if not self.params.get('allow_unplayable_formats'):
formats = [f for f in formats if not f.get('has_drm')] formats = [f for f in formats if not f.get('has_drm') or f['has_drm'] == 'maybe']
if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats): if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
self.report_warning( self.report_warning(
@ -2675,7 +2783,12 @@ class YoutubeDL:
and info_dict.get('duration') and format.get('tbr') and info_dict.get('duration') and format.get('tbr')
and not format.get('filesize') and not format.get('filesize_approx')): and not format.get('filesize') and not format.get('filesize_approx')):
format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8)) format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict)) format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict), load_cookies=True)
# Safeguard against old/insecure infojson when using --load-info-json
if info_dict.get('http_headers'):
info_dict['http_headers'] = HTTPHeaderDict(info_dict['http_headers'])
info_dict['http_headers'].pop('Cookie', None)
# This is copied to http_headers by the above _calc_headers and can now be removed # This is copied to http_headers by the above _calc_headers and can now be removed
if '__x_forwarded_for_ip' in info_dict: if '__x_forwarded_for_ip' in info_dict:
@ -2772,11 +2885,8 @@ class YoutubeDL:
formats_to_download = list(format_selector({ formats_to_download = list(format_selector({
'formats': formats, 'formats': formats,
'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats), 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
'incomplete_formats': ( 'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats) # No formats with video
# All formats are video-only or or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio
all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
# all formats are audio-only
or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)),
})) }))
if interactive_format_selection and not formats_to_download: if interactive_format_selection and not formats_to_download:
self.report_error('Requested format is not available', tb=False, is_error=False) self.report_error('Requested format is not available', tb=False, is_error=False)
@ -3186,8 +3296,9 @@ class YoutubeDL:
fd, success = None, True fd, success = None, True
if info_dict.get('protocol') or info_dict.get('url'): if info_dict.get('protocol') or info_dict.get('url'):
fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-') fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
if not (fd is FFmpegFD or fd is DashSegmentsFD) and 'no-direct-merge' not in self.params['compat_opts'] and (info_dict.get('section_start') or info_dict.get('section_end')): if fd not in [FFmpegFD, DashSegmentsFD] and 'no-direct-merge' not in self.params['compat_opts'] and (
msg = ('This format cannot be partially downloaded' if FFmpegFD.available() info_dict.get('section_start') or info_dict.get('section_end')):
msg = (f'This format cannot be partially downloaded {fd}' if FFmpegFD.available()
else 'You have requested downloading the video partially, but ffmpeg is not installed') else 'You have requested downloading the video partially, but ffmpeg is not installed')
self.report_error(f'{msg}. Aborting') self.report_error(f'{msg}. Aborting')
return return
@ -3346,7 +3457,7 @@ class YoutubeDL:
postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any(( postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((
isinstance(pp, FFmpegVideoConvertorPP) isinstance(pp, FFmpegVideoConvertorPP)
and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None) and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)
) for pp in self._pps['post_process']) ) for pp in self._pps['post_process']) or fd == FFmpegFD
if not postprocessed_by_ffmpeg: if not postprocessed_by_ffmpeg:
ffmpeg_fixup(ext == 'm4a' and info_dict.get('container') == 'm4a_dash', ffmpeg_fixup(ext == 'm4a' and info_dict.get('container') == 'm4a_dash',
@ -3692,7 +3803,7 @@ class YoutubeDL:
def simplified_codec(f, field): def simplified_codec(f, field):
assert field in ('acodec', 'vcodec') assert field in ('acodec', 'vcodec')
codec = f.get(field, 'unknown') codec = f.get(field)
if not codec: if not codec:
return 'unknown' return 'unknown'
elif codec != 'none': elif codec != 'none':
@ -3727,14 +3838,13 @@ class YoutubeDL:
simplified_codec(f, 'acodec'), simplified_codec(f, 'acodec'),
format_field(f, 'abr', '\t%dk', func=round), format_field(f, 'abr', '\t%dk', func=round),
format_field(f, 'asr', '\t%s', func=format_decimal_suffix), format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
join_nonempty( join_nonempty(format_field(f, 'language', '[%s]'), join_nonempty(
self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None, self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None,
self._format_out('DRM', 'light red') if f.get('has_drm') else None, (self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe'
format_field(f, 'language', '[%s]'), else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None),
join_nonempty(format_field(f, 'format_note'), format_field(f, 'format_note'),
format_field(f, 'container', ignore=(None, f.get('ext'))), format_field(f, 'container', ignore=(None, f.get('ext'))),
delim=', '), delim=', '), delim=' '),
delim=' '),
] for f in formats if f.get('preference') is None or f['preference'] >= -1000] ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
header_line = self._list_format_headers( header_line = self._list_format_headers(
'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO', 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',
@ -3783,12 +3893,6 @@ class YoutubeDL:
def list_subtitles(self, video_id, subtitles, name='subtitles'): def list_subtitles(self, video_id, subtitles, name='subtitles'):
self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles) self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
def urlopen(self, req):
""" Start an HTTP download """
if isinstance(req, str):
req = sanitized_Request(req)
return self._opener.open(req, timeout=self._socket_timeout)
def print_debug_header(self): def print_debug_header(self):
if not self.params.get('verbose'): if not self.params.get('verbose'):
return return
@ -3877,13 +3981,8 @@ class YoutubeDL:
join_nonempty(*get_package_info(m)) for m in available_dependencies.values() join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
})) or 'none')) })) or 'none'))
self._setup_opener() write_debug(f'Proxy map: {self.proxies}')
proxy_map = {} # write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers)}')
for handler in self._opener.handlers:
if hasattr(handler, 'proxies'):
proxy_map.update(handler.proxies)
write_debug(f'Proxy map: {proxy_map}')
for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items(): for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
display_list = ['%s%s' % ( display_list = ['%s%s' % (
klass.__name__, '' if klass.__name__ == name else f' as {name}') klass.__name__, '' if klass.__name__ == name else f' as {name}')
@ -3911,58 +4010,109 @@ class YoutubeDL:
'See https://yt-dl.org/update if you need help updating.' % 'See https://yt-dl.org/update if you need help updating.' %
latest_version) latest_version)
def _setup_opener(self): @functools.cached_property
if hasattr(self, '_opener'): def proxies(self):
return """Global proxy configuration"""
timeout_val = self.params.get('socket_timeout')
self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
opts_cookiefile = self.params.get('cookiefile')
opts_proxy = self.params.get('proxy') opts_proxy = self.params.get('proxy')
self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
if opts_proxy is not None: if opts_proxy is not None:
if opts_proxy == '': if opts_proxy == '':
proxies = {} opts_proxy = '__noproxy__'
else: proxies = {'all': opts_proxy}
proxies = {'http': opts_proxy, 'https': opts_proxy}
else: else:
proxies = urllib.request.getproxies() proxies = urllib.request.getproxies()
# Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805) # compat. Set HTTPS_PROXY to __noproxy__ to revert
if 'http' in proxies and 'https' not in proxies: if 'http' in proxies and 'https' not in proxies:
proxies['https'] = proxies['http'] proxies['https'] = proxies['http']
proxy_handler = PerRequestProxyHandler(proxies)
debuglevel = 1 if self.params.get('debug_printtraffic') else 0 return proxies
https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
redirect_handler = YoutubeDLRedirectHandler()
data_handler = urllib.request.DataHandler()
# When passing our own FileHandler instance, build_opener won't add the @functools.cached_property
# default FileHandler and allows us to disable the file protocol, which def cookiejar(self):
# can be used for malicious purposes (see """Global cookiejar instance"""
# https://github.com/ytdl-org/youtube-dl/issues/8227) return load_cookies(
file_handler = urllib.request.FileHandler() self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self)
if not self.params.get('enable_file_urls'): @property
def file_open(*args, **kwargs): def _opener(self):
raise urllib.error.URLError( """
'file:// URLs are explicitly disabled in yt-dlp for security reasons. ' Get a urllib OpenerDirector from the Urllib handler (deprecated).
'Use --enable-file-urls to enable at your own risk.') """
file_handler.file_open = file_open self.deprecation_warning('YoutubeDL._opener is deprecated, use YoutubeDL.urlopen()')
handler = self._request_director.handlers['Urllib']
return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies)
opener = urllib.request.build_opener( def urlopen(self, req):
proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler) """ Start an HTTP download """
if isinstance(req, str):
req = Request(req)
elif isinstance(req, urllib.request.Request):
self.deprecation_warning(
'Passing a urllib.request.Request object to YoutubeDL.urlopen() is deprecated. '
'Use yt_dlp.networking.common.Request instead.')
req = urllib_req_to_req(req)
assert isinstance(req, Request)
# Delete the default user-agent header, which would otherwise apply in # compat: Assume user:pass url params are basic auth
# cases where our custom HTTP handler doesn't come into play url, basic_auth_header = extract_basic_auth(req.url)
# (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details) if basic_auth_header:
opener.addheaders = [] req.headers['Authorization'] = basic_auth_header
self._opener = opener req.url = sanitize_url(url)
clean_proxies(proxies=req.proxies, headers=req.headers)
clean_headers(req.headers)
try:
return self._request_director.send(req)
except NoSupportingHandlers as e:
for ue in e.unsupported_errors:
if not (ue.handler and ue.msg):
continue
if ue.handler.RH_KEY == 'Urllib' and 'unsupported url scheme: "file"' in ue.msg.lower():
raise RequestError(
'file:// URLs are disabled by default in yt-dlp for security reasons. '
'Use --enable-file-urls to enable at your own risk.', cause=ue) from ue
raise
except SSLError as e:
if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e):
raise RequestError('UNSAFE_LEGACY_RENEGOTIATION_DISABLED: Try using --legacy-server-connect', cause=e) from e
elif 'SSLV3_ALERT_HANDSHAKE_FAILURE' in str(e):
raise RequestError(
'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. '
'Try using --legacy-server-connect', cause=e) from e
raise
except HTTPError as e: # TODO: Remove in a future release
raise _CompatHTTPError(e) from e
def build_request_director(self, handlers):
logger = _YDLLogger(self)
headers = self.params.get('http_headers').copy()
proxies = self.proxies.copy()
clean_headers(headers)
clean_proxies(proxies, headers)
director = RequestDirector(logger=logger, verbose=self.params.get('debug_printtraffic'))
for handler in handlers:
director.add_handler(handler(
logger=logger,
headers=headers,
cookiejar=self.cookiejar,
proxies=proxies,
prefer_system_certs='no-certifi' in self.params['compat_opts'],
verify=not self.params.get('nocheckcertificate'),
**traverse_obj(self.params, {
'verbose': 'debug_printtraffic',
'source_address': 'source_address',
'timeout': 'socket_timeout',
'legacy_ssl_support': 'legacyserverconnect',
'enable_file_urls': 'enable_file_urls',
'client_cert': {
'client_certificate': 'client_certificate',
'client_certificate_key': 'client_certificate_key',
'client_certificate_password': 'client_certificate_password',
},
}),
))
return director
def encode(self, s): def encode(self, s):
if isinstance(s, bytes): if isinstance(s, bytes):
@ -4115,14 +4265,14 @@ class YoutubeDL:
else: else:
self.to_screen(f'[info] Downloading {thumb_display_id} ...') self.to_screen(f'[info] Downloading {thumb_display_id} ...')
try: try:
uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {}))) uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {})))
self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}') self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
with open(encodeFilename(thumb_filename), 'wb') as thumbf: with open(encodeFilename(thumb_filename), 'wb') as thumbf:
shutil.copyfileobj(uf, thumbf) shutil.copyfileobj(uf, thumbf)
ret.append((thumb_filename, thumb_filename_final)) ret.append((thumb_filename, thumb_filename_final))
t['filepath'] = thumb_filename t['filepath'] = thumb_filename
except network_exceptions as err: except network_exceptions as err:
if isinstance(err, urllib.error.HTTPError) and err.code == 404: if isinstance(err, HTTPError) and err.status == 404:
self.to_screen(f'[info] {thumb_display_id.title()} does not exist') self.to_screen(f'[info] {thumb_display_id.title()} does not exist')
else: else:
self.report_warning(f'Unable to download {thumb_display_id}: {err}') self.report_warning(f'Unable to download {thumb_display_id}: {err}')

View file

@ -58,11 +58,11 @@ from .utils import (
read_stdin, read_stdin,
render_table, render_table,
setproctitle, setproctitle,
std_headers,
traverse_obj, traverse_obj,
variadic, variadic,
write_string, write_string,
) )
from .utils.networking import std_headers
from .YoutubeDL import YoutubeDL from .YoutubeDL import YoutubeDL
_IN_CLI = False _IN_CLI = False

View file

@ -18,7 +18,8 @@ def pycryptodome_module():
def get_hidden_imports(): def get_hidden_imports():
yield 'yt_dlp.compat._legacy' yield from ('yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated')
yield from ('yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated')
yield pycryptodome_module() yield pycryptodome_module()
yield from collect_submodules('websockets') yield from collect_submodules('websockets')
# These are auto-detected, but explicitly add them just in case # These are auto-detected, but explicitly add them just in case

View file

@ -1,14 +1,11 @@
import os import os
import sys import sys
import warnings
import xml.etree.ElementTree as etree import xml.etree.ElementTree as etree
from ._deprecated import * # noqa: F401, F403
from .compat_utils import passthrough_module from .compat_utils import passthrough_module
# XXX: Implement this the same way as other DeprecationWarnings without circular import passthrough_module(__name__, '._deprecated')
passthrough_module(__name__, '._legacy', callback=lambda attr: warnings.warn( del passthrough_module
DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=5))
# HTMLParseError has been deprecated in Python 3.3 and removed in # HTMLParseError has been deprecated in Python 3.3 and removed in
@ -70,3 +67,13 @@ if compat_os_name in ('nt', 'ce'):
return userhome + path[i:] return userhome + path[i:]
else: else:
compat_expanduser = os.path.expanduser compat_expanduser = os.path.expanduser
def urllib_req_to_req(urllib_request):
"""Convert urllib Request to a networking Request"""
from ..networking import Request
from ..utils.networking import HTTPHeaderDict
return Request(
urllib_request.get_full_url(), data=urllib_request.data, method=urllib_request.get_method(),
headers=HTTPHeaderDict(urllib_request.headers, urllib_request.unredirected_hdrs),
extensions={'timeout': urllib_request.timeout} if hasattr(urllib_request, 'timeout') else None)

View file

@ -1,4 +1,12 @@
"""Deprecated - New code should avoid these""" """Deprecated - New code should avoid these"""
import warnings
from .compat_utils import passthrough_module
# XXX: Implement this the same way as other DeprecationWarnings without circular import
passthrough_module(__name__, '.._legacy', callback=lambda attr: warnings.warn(
DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=6))
del passthrough_module
import base64 import base64
import urllib.error import urllib.error
@ -8,7 +16,6 @@ compat_str = str
compat_b64decode = base64.b64decode compat_b64decode = base64.b64decode
compat_HTTPError = urllib.error.HTTPError
compat_urlparse = urllib.parse compat_urlparse = urllib.parse
compat_parse_qs = urllib.parse.parse_qs compat_parse_qs = urllib.parse.parse_qs
compat_urllib_parse_unquote = urllib.parse.unquote compat_urllib_parse_unquote = urllib.parse.unquote

View file

@ -16,12 +16,12 @@ import shlex
import shutil import shutil
import socket import socket
import struct import struct
import subprocess
import tokenize import tokenize
import urllib.error import urllib.error
import urllib.parse import urllib.parse
import urllib.request import urllib.request
import xml.etree.ElementTree as etree import xml.etree.ElementTree as etree
from subprocess import DEVNULL
# isort: split # isort: split
import asyncio # noqa: F401 import asyncio # noqa: F401
@ -70,6 +70,7 @@ compat_html_parser_HTMLParseError = compat_HTMLParseError
compat_HTMLParser = compat_html_parser_HTMLParser = html.parser.HTMLParser compat_HTMLParser = compat_html_parser_HTMLParser = html.parser.HTMLParser
compat_http_client = http.client compat_http_client = http.client
compat_http_server = http.server compat_http_server = http.server
compat_HTTPError = urllib.error.HTTPError
compat_input = input compat_input = input
compat_integer_types = (int, ) compat_integer_types = (int, )
compat_itertools_count = itertools.count compat_itertools_count = itertools.count
@ -84,7 +85,7 @@ compat_socket_create_connection = socket.create_connection
compat_Struct = struct.Struct compat_Struct = struct.Struct
compat_struct_pack = struct.pack compat_struct_pack = struct.pack
compat_struct_unpack = struct.unpack compat_struct_unpack = struct.unpack
compat_subprocess_get_DEVNULL = lambda: DEVNULL compat_subprocess_get_DEVNULL = lambda: subprocess.DEVNULL
compat_tokenize_tokenize = tokenize.tokenize compat_tokenize_tokenize = tokenize.tokenize
compat_urllib_error = urllib.error compat_urllib_error = urllib.error
compat_urllib_HTTPError = urllib.error.HTTPError compat_urllib_HTTPError = urllib.error.HTTPError

13
yt_dlp/compat/types.py Normal file
View file

@ -0,0 +1,13 @@
# flake8: noqa: F405
from types import * # noqa: F403
from .compat_utils import passthrough_module
passthrough_module(__name__, 'types')
del passthrough_module
try:
# NB: pypy has builtin NoneType, so checking NameError won't work
from types import NoneType # >= 3.10
except ImportError:
NoneType = type(None)

View file

@ -1,6 +1,9 @@
# flake8: noqa: F405 # flake8: noqa: F405
from urllib import * # noqa: F403 from urllib import * # noqa: F403
del request
from . import request # noqa: F401
from ..compat_utils import passthrough_module from ..compat_utils import passthrough_module
passthrough_module(__name__, 'urllib') passthrough_module(__name__, 'urllib')

View file

@ -41,30 +41,15 @@ from .utils import (
try_call, try_call,
write_string, write_string,
) )
from .utils._utils import _YDLLogger
CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'} CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'} SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
class YDLLogger: class YDLLogger(_YDLLogger):
def __init__(self, ydl=None): def warning(self, message, only_once=False): # compat
self._ydl = ydl return super().warning(message, once=only_once)
def debug(self, message):
if self._ydl:
self._ydl.write_debug(message)
def info(self, message):
if self._ydl:
self._ydl.to_screen(f'[Cookies] {message}')
def warning(self, message, only_once=False):
if self._ydl:
self._ydl.report_warning(message, only_once)
def error(self, message):
if self._ydl:
self._ydl.report_error(message)
class ProgressBar(MultilinePrinter): class ProgressBar(MultilinePrinter):
_DELAY, _timer = 0.1, 0 _DELAY, _timer = 0.1, 0
@ -112,7 +97,7 @@ def load_cookies(cookie_file, browser_specification, ydl):
jar = YoutubeDLCookieJar(cookie_file) jar = YoutubeDLCookieJar(cookie_file)
if not is_filename or os.access(cookie_file, os.R_OK): if not is_filename or os.access(cookie_file, os.R_OK):
jar.load(ignore_discard=True, ignore_expires=True) jar.load()
cookie_jars.append(jar) cookie_jars.append(jar)
return _merge_cookie_jars(cookie_jars) return _merge_cookie_jars(cookie_jars)
@ -1228,7 +1213,7 @@ class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
file.truncate(0) file.truncate(0)
yield file yield file
def _really_save(self, f, ignore_discard=False, ignore_expires=False): def _really_save(self, f, ignore_discard, ignore_expires):
now = time.time() now = time.time()
for cookie in self: for cookie in self:
if (not ignore_discard and cookie.discard if (not ignore_discard and cookie.discard
@ -1249,7 +1234,7 @@ class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
name, value name, value
))) )))
def save(self, filename=None, *args, **kwargs): def save(self, filename=None, ignore_discard=True, ignore_expires=True):
""" """
Save cookies to a file. Save cookies to a file.
Code is taken from CPython 3.6 Code is taken from CPython 3.6
@ -1268,9 +1253,9 @@ class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
with self.open(filename, write=True) as f: with self.open(filename, write=True) as f:
f.write(self._HEADER) f.write(self._HEADER)
self._really_save(f, *args, **kwargs) self._really_save(f, ignore_discard, ignore_expires)
def load(self, filename=None, ignore_discard=False, ignore_expires=False): def load(self, filename=None, ignore_discard=True, ignore_expires=True):
"""Load cookies from a file.""" """Load cookies from a file."""
if filename is None: if filename is None:
if self.filename is not None: if self.filename is not None:
@ -1327,6 +1312,13 @@ class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
self.add_cookie_header(cookie_req) self.add_cookie_header(cookie_req)
return cookie_req.get_header('Cookie') return cookie_req.get_header('Cookie')
def get_cookies_for_url(self, url):
"""Generate a list of Cookie objects for a given url"""
# Policy `_now` attribute must be set before calling `_cookies_for_request`
# Ref: https://github.com/python/cpython/blob/3.7/Lib/http/cookiejar.py#L1360
self._policy._now = self._now = int(time.time())
return self._cookies_for_request(urllib.request.Request(escape_url(sanitize_url(url))))
def clear(self, *args, **kwargs): def clear(self, *args, **kwargs):
with contextlib.suppress(KeyError): with contextlib.suppress(KeyError):
return super().clear(*args, **kwargs) return super().clear(*args, **kwargs)

View file

@ -255,7 +255,8 @@ class FileDownloader:
@wrap_file_access('remove') @wrap_file_access('remove')
def try_remove(self, filename): def try_remove(self, filename):
os.remove(filename) if os.path.isfile(filename):
os.remove(filename)
@wrap_file_access('rename') @wrap_file_access('rename')
def try_rename(self, old_filename, new_filename): def try_rename(self, old_filename, new_filename):
@ -418,7 +419,6 @@ class FileDownloader:
"""Download to a filename using the info from info_dict """Download to a filename using the info from info_dict
Return True on success and False otherwise Return True on success and False otherwise
""" """
nooverwrites_and_exists = ( nooverwrites_and_exists = (
not self.params.get('overwrites', True) not self.params.get('overwrites', True)
and os.path.exists(encodeFilename(filename)) and os.path.exists(encodeFilename(filename))

View file

@ -1,14 +1,16 @@
import enum import enum
import json import json
import os.path import os
import re import re
import subprocess import subprocess
import sys import sys
import tempfile
import time import time
import uuid import uuid
from .fragment import FragmentFD from .fragment import FragmentFD
from ..compat import functools from ..compat import functools
from ..networking import Request
from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor
from ..utils import ( from ..utils import (
Popen, Popen,
@ -24,7 +26,6 @@ from ..utils import (
encodeFilename, encodeFilename,
find_available_port, find_available_port,
remove_end, remove_end,
sanitized_Request,
traverse_obj, traverse_obj,
) )
@ -42,6 +43,7 @@ class ExternalFD(FragmentFD):
def real_download(self, filename, info_dict): def real_download(self, filename, info_dict):
self.report_destination(filename) self.report_destination(filename)
tmpfilename = self.temp_name(filename) tmpfilename = self.temp_name(filename)
self._cookies_tempfile = None
try: try:
started = time.time() started = time.time()
@ -54,6 +56,9 @@ class ExternalFD(FragmentFD):
# should take place # should take place
retval = 0 retval = 0
self.to_screen('[%s] Interrupted by user' % self.get_basename()) self.to_screen('[%s] Interrupted by user' % self.get_basename())
finally:
if self._cookies_tempfile:
self.try_remove(self._cookies_tempfile)
if retval == 0: if retval == 0:
status = { status = {
@ -125,6 +130,16 @@ class ExternalFD(FragmentFD):
self.get_basename(), self.params.get('external_downloader_args'), self.EXE_NAME, self.get_basename(), self.params.get('external_downloader_args'), self.EXE_NAME,
keys, *args, **kwargs) keys, *args, **kwargs)
def _write_cookies(self):
if not self.ydl.cookiejar.filename:
tmp_cookies = tempfile.NamedTemporaryFile(suffix='.cookies', delete=False)
tmp_cookies.close()
self._cookies_tempfile = tmp_cookies.name
self.to_screen(f'[download] Writing temporary cookies file to "{self._cookies_tempfile}"')
# real_download resets _cookies_tempfile; if it's None then save() will write to cookiejar.filename
self.ydl.cookiejar.save(self._cookies_tempfile)
return self.ydl.cookiejar.filename or self._cookies_tempfile
def _call_downloader(self, tmpfilename, info_dict): def _call_downloader(self, tmpfilename, info_dict):
""" Either overwrite this or implement _make_cmd """ """ Either overwrite this or implement _make_cmd """
cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)] cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
@ -184,6 +199,9 @@ class CurlFD(ExternalFD):
def _make_cmd(self, tmpfilename, info_dict): def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed'] cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed']
cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
if cookie_header:
cmd += ['--cookie', cookie_header]
if info_dict.get('http_headers') is not None: if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items(): for key, val in info_dict['http_headers'].items():
cmd += ['--header', f'{key}: {val}'] cmd += ['--header', f'{key}: {val}']
@ -214,6 +232,9 @@ class AxelFD(ExternalFD):
if info_dict.get('http_headers') is not None: if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items(): for key, val in info_dict['http_headers'].items():
cmd += ['-H', f'{key}: {val}'] cmd += ['-H', f'{key}: {val}']
cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
if cookie_header:
cmd += ['-H', f'Cookie: {cookie_header}', '--max-redirect=0']
cmd += self._configuration_args() cmd += self._configuration_args()
cmd += ['--', info_dict['url']] cmd += ['--', info_dict['url']]
return cmd return cmd
@ -223,7 +244,9 @@ class WgetFD(ExternalFD):
AVAILABLE_OPT = '--version' AVAILABLE_OPT = '--version'
def _make_cmd(self, tmpfilename, info_dict): def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies', '--compression=auto'] cmd = [self.exe, '-O', tmpfilename, '-nv', '--compression=auto']
if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
cmd += ['--load-cookies', self._write_cookies()]
if info_dict.get('http_headers') is not None: if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items(): for key, val in info_dict['http_headers'].items():
cmd += ['--header', f'{key}: {val}'] cmd += ['--header', f'{key}: {val}']
@ -271,7 +294,7 @@ class Aria2cFD(ExternalFD):
return super()._call_downloader(tmpfilename, info_dict) return super()._call_downloader(tmpfilename, info_dict)
def _make_cmd(self, tmpfilename, info_dict): def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-c', cmd = [self.exe, '-c', '--no-conf',
'--console-log-level=warn', '--summary-interval=0', '--download-result=hide', '--console-log-level=warn', '--summary-interval=0', '--download-result=hide',
'--http-accept-gzip=true', '--file-allocation=none', '-x16', '-j16', '-s16'] '--http-accept-gzip=true', '--file-allocation=none', '-x16', '-j16', '-s16']
if 'fragments' in info_dict: if 'fragments' in info_dict:
@ -279,6 +302,8 @@ class Aria2cFD(ExternalFD):
else: else:
cmd += ['--min-split-size', '1M'] cmd += ['--min-split-size', '1M']
if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
cmd += [f'--load-cookies={self._write_cookies()}']
if info_dict.get('http_headers') is not None: if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items(): for key, val in info_dict['http_headers'].items():
cmd += ['--header', f'{key}: {val}'] cmd += ['--header', f'{key}: {val}']
@ -333,13 +358,12 @@ class Aria2cFD(ExternalFD):
'method': method, 'method': method,
'params': [f'token:{rpc_secret}', *params], 'params': [f'token:{rpc_secret}', *params],
}).encode('utf-8') }).encode('utf-8')
request = sanitized_Request( request = Request(
f'http://localhost:{rpc_port}/jsonrpc', f'http://localhost:{rpc_port}/jsonrpc',
data=d, headers={ data=d, headers={
'Content-Type': 'application/json', 'Content-Type': 'application/json',
'Content-Length': f'{len(d)}', 'Content-Length': f'{len(d)}',
'Ytdl-request-proxy': '__noproxy__', }, proxies={'all': None})
})
with self.ydl.urlopen(request) as r: with self.ydl.urlopen(request) as r:
resp = json.load(r) resp = json.load(r)
assert resp.get('id') == sanitycheck, 'Something went wrong with RPC server' assert resp.get('id') == sanitycheck, 'Something went wrong with RPC server'
@ -417,6 +441,14 @@ class HttpieFD(ExternalFD):
if info_dict.get('http_headers') is not None: if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items(): for key, val in info_dict['http_headers'].items():
cmd += [f'{key}:{val}'] cmd += [f'{key}:{val}']
# httpie 3.1.0+ removes the Cookie header on redirect, so this should be safe for now. [1]
# If we ever need cookie handling for redirects, we can export the cookiejar into a session. [2]
# 1: https://github.com/httpie/httpie/security/advisories/GHSA-9w4w-cpc8-h2fq
# 2: https://httpie.io/docs/cli/sessions
cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
if cookie_header:
cmd += [f'Cookie:{cookie_header}']
return cmd return cmd
@ -527,7 +559,13 @@ class FFmpegFD(ExternalFD):
selected_formats = info_dict.get('requested_formats') or [info_dict] selected_formats = info_dict.get('requested_formats') or [info_dict]
for i, fmt in enumerate(selected_formats): for i, fmt in enumerate(selected_formats):
if fmt.get('http_headers') and re.match(r'^https?://', fmt['url']): is_http = re.match(r'^https?://', fmt['url'])
cookies = self.ydl.cookiejar.get_cookies_for_url(fmt['url']) if is_http else []
if cookies:
args.extend(['-cookies', ''.join(
f'{cookie.name}={cookie.value}; path={cookie.path}; domain={cookie.domain};\r\n'
for cookie in cookies)])
if fmt.get('http_headers') and is_http:
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv: # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header. # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
args.extend(['-headers', ''.join(f'{key}: {val}\r\n' for key, val in fmt['http_headers'].items())]) args.extend(['-headers', ''.join(f'{key}: {val}\r\n' for key, val in fmt['http_headers'].items())])

View file

@ -3,11 +3,11 @@ import io
import itertools import itertools
import struct import struct
import time import time
import urllib.error
import urllib.parse import urllib.parse
from .fragment import FragmentFD from .fragment import FragmentFD
from ..compat import compat_etree_fromstring from ..compat import compat_etree_fromstring
from ..networking.exceptions import HTTPError
from ..utils import fix_xml_ampersands, xpath_text from ..utils import fix_xml_ampersands, xpath_text
@ -312,7 +312,7 @@ class F4mFD(FragmentFD):
self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME) self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
man_url = urlh.geturl() man_url = urlh.url
# Some manifests may be malformed, e.g. prosiebensat1 generated manifests # Some manifests may be malformed, e.g. prosiebensat1 generated manifests
# (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244 # (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244
# and https://github.com/ytdl-org/youtube-dl/issues/7823) # and https://github.com/ytdl-org/youtube-dl/issues/7823)
@ -407,8 +407,8 @@ class F4mFD(FragmentFD):
if box_type == b'mdat': if box_type == b'mdat':
self._append_fragment(ctx, box_data) self._append_fragment(ctx, box_data)
break break
except urllib.error.HTTPError as err: except HTTPError as err:
if live and (err.code == 404 or err.code == 410): if live and (err.status == 404 or err.status == 410):
# We didn't keep up with the live window. Continue # We didn't keep up with the live window. Continue
# with the next available fragment. # with the next available fragment.
msg = 'Fragment %d unavailable' % frag_i msg = 'Fragment %d unavailable' % frag_i

View file

@ -1,24 +1,19 @@
import concurrent.futures import concurrent.futures
import contextlib import contextlib
import http.client
import json import json
import math import math
import os import os
import struct import struct
import time import time
import urllib.error
from .common import FileDownloader from .common import FileDownloader
from .http import HttpFD from .http import HttpFD
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7 from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
from ..compat import compat_os_name from ..compat import compat_os_name
from ..utils import ( from ..networking import Request
DownloadError, from ..networking.exceptions import HTTPError, IncompleteRead
RetryManager, from ..utils import DownloadError, RetryManager, encodeFilename, traverse_obj
encodeFilename, from ..utils.networking import HTTPHeaderDict
sanitized_Request,
traverse_obj,
)
class HttpQuietDownloader(HttpFD): class HttpQuietDownloader(HttpFD):
@ -75,7 +70,7 @@ class FragmentFD(FileDownloader):
def _prepare_url(self, info_dict, url): def _prepare_url(self, info_dict, url):
headers = info_dict.get('http_headers') headers = info_dict.get('http_headers')
return sanitized_Request(url, None, headers) if headers else url return Request(url, None, headers) if headers else url
def _prepare_and_start_frag_download(self, ctx, info_dict): def _prepare_and_start_frag_download(self, ctx, info_dict):
self._prepare_frag_download(ctx) self._prepare_frag_download(ctx)
@ -300,9 +295,7 @@ class FragmentFD(FileDownloader):
def _finish_frag_download(self, ctx, info_dict): def _finish_frag_download(self, ctx, info_dict):
ctx['dest_stream'].close() ctx['dest_stream'].close()
if self.__do_ytdl_file(ctx): if self.__do_ytdl_file(ctx):
ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename'])) self.try_remove(self.ytdl_filename(ctx['filename']))
if os.path.isfile(ytdl_filename):
self.try_remove(ytdl_filename)
elapsed = time.time() - ctx['started'] elapsed = time.time() - ctx['started']
to_file = ctx['tmpfilename'] != '-' to_file = ctx['tmpfilename'] != '-'
@ -459,7 +452,7 @@ class FragmentFD(FileDownloader):
frag_index = ctx['fragment_index'] = fragment['frag_index'] frag_index = ctx['fragment_index'] = fragment['frag_index']
ctx['last_error'] = None ctx['last_error'] = None
headers = info_dict.get('http_headers', {}).copy() headers = HTTPHeaderDict(info_dict.get('http_headers'))
byte_range = fragment.get('byte_range') byte_range = fragment.get('byte_range')
if byte_range: if byte_range:
headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1) headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1)
@ -479,7 +472,7 @@ class FragmentFD(FileDownloader):
if not self._download_fragment( if not self._download_fragment(
ctx, fragment['url'], info_dict, headers, info_dict.get('request_data')): ctx, fragment['url'], info_dict, headers, info_dict.get('request_data')):
return return
except (urllib.error.HTTPError, http.client.IncompleteRead) as err: except (HTTPError, IncompleteRead) as err:
retry.error = err retry.error = err
continue continue
except DownloadError: # has own retry settings except DownloadError: # has own retry settings

View file

@ -28,7 +28,16 @@ class HlsFD(FragmentFD):
FD_NAME = 'hlsnative' FD_NAME = 'hlsnative'
@staticmethod @staticmethod
def can_download(manifest, info_dict, allow_unplayable_formats=False): def _has_drm(manifest): # TODO: https://github.com/yt-dlp/yt-dlp/pull/5039
return bool(re.search('|'.join((
r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay
r'#EXT-X-(?:SESSION-)?KEY:.*?KEYFORMAT="com\.apple\.streamingkeydelivery"', # Apple FairPlay
r'#EXT-X-(?:SESSION-)?KEY:.*?KEYFORMAT="com\.microsoft\.playready"', # Microsoft PlayReady
r'#EXT-X-FAXS-CM:', # Adobe Flash Access
)), manifest))
@classmethod
def can_download(cls, manifest, info_dict, allow_unplayable_formats=False):
UNSUPPORTED_FEATURES = [ UNSUPPORTED_FEATURES = [
# r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] # r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
@ -50,13 +59,15 @@ class HlsFD(FragmentFD):
] ]
if not allow_unplayable_formats: if not allow_unplayable_formats:
UNSUPPORTED_FEATURES += [ UNSUPPORTED_FEATURES += [
r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1] r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1], but not necessarily DRM
] ]
def check_results(): def check_results():
yield not info_dict.get('is_live') yield not info_dict.get('is_live')
for feature in UNSUPPORTED_FEATURES: for feature in UNSUPPORTED_FEATURES:
yield not re.search(feature, manifest) yield not re.search(feature, manifest)
if not allow_unplayable_formats:
yield not cls._has_drm(manifest)
return all(check_results()) return all(check_results())
def real_download(self, filename, info_dict): def real_download(self, filename, info_dict):
@ -64,7 +75,7 @@ class HlsFD(FragmentFD):
self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME) self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
man_url = urlh.geturl() man_url = urlh.url
s = urlh.read().decode('utf-8', 'ignore') s = urlh.read().decode('utf-8', 'ignore')
can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None
@ -81,14 +92,13 @@ class HlsFD(FragmentFD):
message = ('Live HLS streams are not supported by the native downloader. If this is a livestream, ' message = ('Live HLS streams are not supported by the native downloader. If this is a livestream, '
f'please {install_ffmpeg}add "--downloader ffmpeg --hls-use-mpegts" to your command') f'please {install_ffmpeg}add "--downloader ffmpeg --hls-use-mpegts" to your command')
if not can_download: if not can_download:
has_drm = re.search('|'.join([ if self._has_drm(s) and not self.params.get('allow_unplayable_formats'):
r'#EXT-X-FAXS-CM:', # Adobe Flash Access if info_dict.get('has_drm') and self.params.get('test'):
r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay self.to_screen(f'[{self.FD_NAME}] This format is DRM protected', skip_eol=True)
]), s) else:
if has_drm and not self.params.get('allow_unplayable_formats'): self.report_error(
self.report_error( 'This format is DRM protected; Try selecting another format with --format or '
'This video is DRM protected; Try selecting another format with --format or ' 'add --check-formats to automatically fallback to the next best format', tb=False)
'add --check-formats to automatically fallback to the next best format')
return False return False
message = message or 'Unsupported features have been detected' message = message or 'Unsupported features have been detected'
fd = FFmpegFD(self.ydl, self.params) fd = FFmpegFD(self.ydl, self.params)

View file

@ -1,12 +1,14 @@
import http.client
import os import os
import random import random
import socket
import ssl
import time import time
import urllib.error
from .common import FileDownloader from .common import FileDownloader
from ..networking import Request
from ..networking.exceptions import (
CertificateVerifyError,
HTTPError,
TransportError,
)
from ..utils import ( from ..utils import (
ContentTooShortError, ContentTooShortError,
RetryManager, RetryManager,
@ -16,18 +18,10 @@ from ..utils import (
encodeFilename, encodeFilename,
int_or_none, int_or_none,
parse_http_range, parse_http_range,
sanitized_Request,
try_call, try_call,
write_xattr, write_xattr,
) )
from ..utils.networking import HTTPHeaderDict
RESPONSE_READ_EXCEPTIONS = (
TimeoutError,
socket.timeout, # compat: py < 3.10
ConnectionError,
ssl.SSLError,
http.client.HTTPException
)
class HttpFD(FileDownloader): class HttpFD(FileDownloader):
@ -46,10 +40,7 @@ class HttpFD(FileDownloader):
ctx.stream = None ctx.stream = None
# Disable compression # Disable compression
headers = {'Accept-Encoding': 'identity'} headers = HTTPHeaderDict({'Accept-Encoding': 'identity'}, info_dict.get('http_headers'))
add_headers = info_dict.get('http_headers')
if add_headers:
headers.update(add_headers)
is_test = self.params.get('test', False) is_test = self.params.get('test', False)
chunk_size = self._TEST_FILE_SIZE if is_test else ( chunk_size = self._TEST_FILE_SIZE if is_test else (
@ -120,10 +111,10 @@ class HttpFD(FileDownloader):
if try_call(lambda: range_end >= ctx.content_len): if try_call(lambda: range_end >= ctx.content_len):
range_end = ctx.content_len - 1 range_end = ctx.content_len - 1
request = sanitized_Request(url, request_data, headers) request = Request(url, request_data, headers)
has_range = range_start is not None has_range = range_start is not None
if has_range: if has_range:
request.add_header('Range', f'bytes={int(range_start)}-{int_or_none(range_end) or ""}') request.headers['Range'] = f'bytes={int(range_start)}-{int_or_none(range_end) or ""}'
# Establish connection # Establish connection
try: try:
ctx.data = self.ydl.urlopen(request) ctx.data = self.ydl.urlopen(request)
@ -154,17 +145,17 @@ class HttpFD(FileDownloader):
self.report_unable_to_resume() self.report_unable_to_resume()
ctx.resume_len = 0 ctx.resume_len = 0
ctx.open_mode = 'wb' ctx.open_mode = 'wb'
ctx.data_len = ctx.content_len = int_or_none(ctx.data.info().get('Content-length', None)) ctx.data_len = ctx.content_len = int_or_none(ctx.data.headers.get('Content-length', None))
except urllib.error.HTTPError as err: except HTTPError as err:
if err.code == 416: if err.status == 416:
# Unable to resume (requested range not satisfiable) # Unable to resume (requested range not satisfiable)
try: try:
# Open the connection again without the range header # Open the connection again without the range header
ctx.data = self.ydl.urlopen( ctx.data = self.ydl.urlopen(
sanitized_Request(url, request_data, headers)) Request(url, request_data, headers))
content_length = ctx.data.info()['Content-Length'] content_length = ctx.data.headers['Content-Length']
except urllib.error.HTTPError as err: except HTTPError as err:
if err.code < 500 or err.code >= 600: if err.status < 500 or err.status >= 600:
raise raise
else: else:
# Examine the reported length # Examine the reported length
@ -192,17 +183,13 @@ class HttpFD(FileDownloader):
ctx.resume_len = 0 ctx.resume_len = 0
ctx.open_mode = 'wb' ctx.open_mode = 'wb'
return return
elif err.code < 500 or err.code >= 600: elif err.status < 500 or err.status >= 600:
# Unexpected HTTP error # Unexpected HTTP error
raise raise
raise RetryDownload(err) raise RetryDownload(err)
except urllib.error.URLError as err: except CertificateVerifyError:
if isinstance(err.reason, ssl.CertificateError): raise
raise except TransportError as err:
raise RetryDownload(err)
# In urllib.request.AbstractHTTPHandler, the response is partially read on request.
# Any errors that occur during this will not be wrapped by URLError
except RESPONSE_READ_EXCEPTIONS as err:
raise RetryDownload(err) raise RetryDownload(err)
def close_stream(): def close_stream():
@ -212,9 +199,9 @@ class HttpFD(FileDownloader):
ctx.stream = None ctx.stream = None
def download(): def download():
data_len = ctx.data.info().get('Content-length') data_len = ctx.data.headers.get('Content-length')
if ctx.data.info().get('Content-encoding'): if ctx.data.headers.get('Content-encoding'):
# Content-encoding is present, Content-length is not reliable anymore as we are # Content-encoding is present, Content-length is not reliable anymore as we are
# doing auto decompression. (See: https://github.com/yt-dlp/yt-dlp/pull/6176) # doing auto decompression. (See: https://github.com/yt-dlp/yt-dlp/pull/6176)
data_len = None data_len = None
@ -258,7 +245,7 @@ class HttpFD(FileDownloader):
try: try:
# Download and write # Download and write
data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter)) data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
except RESPONSE_READ_EXCEPTIONS as err: except TransportError as err:
retry(err) retry(err)
byte_counter += len(data_block) byte_counter += len(data_block)
@ -339,15 +326,15 @@ class HttpFD(FileDownloader):
elif speed: elif speed:
ctx.throttle_start = None ctx.throttle_start = None
if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len:
ctx.resume_len = byte_counter
# ctx.block_size = block_size
raise NextFragment()
if ctx.stream is None: if ctx.stream is None:
self.to_stderr('\n') self.to_stderr('\n')
self.report_error('Did not get any data blocks') self.report_error('Did not get any data blocks')
return False return False
if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len:
ctx.resume_len = byte_counter
raise NextFragment()
if ctx.tmpfilename != '-': if ctx.tmpfilename != '-':
ctx.stream.close() ctx.stream.close()
@ -359,7 +346,7 @@ class HttpFD(FileDownloader):
# Update file modification time # Update file modification time
if self.params.get('updatetime', True): if self.params.get('updatetime', True):
info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.info().get('last-modified', None)) info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.headers.get('last-modified', None))
self._hook_progress({ self._hook_progress({
'downloaded_bytes': byte_counter, 'downloaded_bytes': byte_counter,

View file

@ -2,9 +2,9 @@ import binascii
import io import io
import struct import struct
import time import time
import urllib.error
from .fragment import FragmentFD from .fragment import FragmentFD
from ..networking.exceptions import HTTPError
from ..utils import RetryManager from ..utils import RetryManager
u8 = struct.Struct('>B') u8 = struct.Struct('>B')
@ -271,7 +271,7 @@ class IsmFD(FragmentFD):
write_piff_header(ctx['dest_stream'], info_dict['_download_params']) write_piff_header(ctx['dest_stream'], info_dict['_download_params'])
extra_state['ism_track_written'] = True extra_state['ism_track_written'] = True
self._append_fragment(ctx, frag_content) self._append_fragment(ctx, frag_content)
except urllib.error.HTTPError as err: except HTTPError as err:
retry.error = err retry.error = err
continue continue

View file

@ -5,13 +5,8 @@ import time
from . import get_suitable_downloader from . import get_suitable_downloader
from .common import FileDownloader from .common import FileDownloader
from .external import FFmpegFD from .external import FFmpegFD
from ..utils import ( from ..networking import Request
DownloadError, from ..utils import DownloadError, WebSocketsWrapper, str_or_none, try_get
WebSocketsWrapper,
sanitized_Request,
str_or_none,
try_get,
)
class NiconicoDmcFD(FileDownloader): class NiconicoDmcFD(FileDownloader):
@ -33,7 +28,7 @@ class NiconicoDmcFD(FileDownloader):
heartbeat_data = heartbeat_info_dict['data'].encode() heartbeat_data = heartbeat_info_dict['data'].encode()
heartbeat_interval = heartbeat_info_dict.get('interval', 30) heartbeat_interval = heartbeat_info_dict.get('interval', 30)
request = sanitized_Request(heartbeat_url, heartbeat_data) request = Request(heartbeat_url, heartbeat_data)
def heartbeat(): def heartbeat():
try: try:

View file

@ -1,8 +1,8 @@
import json import json
import time import time
import urllib.error
from .fragment import FragmentFD from .fragment import FragmentFD
from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
RegexNotFoundError, RegexNotFoundError,
RetryManager, RetryManager,
@ -10,6 +10,7 @@ from ..utils import (
int_or_none, int_or_none,
try_get, try_get,
) )
from ..utils.networking import HTTPHeaderDict
class YoutubeLiveChatFD(FragmentFD): class YoutubeLiveChatFD(FragmentFD):
@ -37,10 +38,7 @@ class YoutubeLiveChatFD(FragmentFD):
start_time = int(time.time() * 1000) start_time = int(time.time() * 1000)
def dl_fragment(url, data=None, headers=None): def dl_fragment(url, data=None, headers=None):
http_headers = info_dict.get('http_headers', {}) http_headers = HTTPHeaderDict(info_dict.get('http_headers'), headers)
if headers:
http_headers = http_headers.copy()
http_headers.update(headers)
return self._download_fragment(ctx, url, info_dict, http_headers, data) return self._download_fragment(ctx, url, info_dict, http_headers, data)
def parse_actions_replay(live_chat_continuation): def parse_actions_replay(live_chat_continuation):
@ -129,7 +127,7 @@ class YoutubeLiveChatFD(FragmentFD):
or frag_index == 1 and try_refresh_replay_beginning or frag_index == 1 and try_refresh_replay_beginning
or parse_actions_replay) or parse_actions_replay)
return (True, *func(live_chat_continuation)) return (True, *func(live_chat_continuation))
except urllib.error.HTTPError as err: except HTTPError as err:
retry.error = err retry.error = err
continue continue
return False, None, None, None return False, None, None, None

View file

@ -15,7 +15,6 @@ from .youtube import ( # Youtube is moved to the top to improve performance
YoutubeSearchURLIE, YoutubeSearchURLIE,
YoutubeMusicSearchURLIE, YoutubeMusicSearchURLIE,
YoutubeSubscriptionsIE, YoutubeSubscriptionsIE,
YoutubeStoriesIE,
YoutubeTruncatedIDIE, YoutubeTruncatedIDIE,
YoutubeTruncatedURLIE, YoutubeTruncatedURLIE,
YoutubeYtBeIE, YoutubeYtBeIE,
@ -215,6 +214,7 @@ from .bild import BildIE
from .bilibili import ( from .bilibili import (
BiliBiliIE, BiliBiliIE,
BiliBiliBangumiIE, BiliBiliBangumiIE,
BiliBiliBangumiSeasonIE,
BiliBiliBangumiMediaIE, BiliBiliBangumiMediaIE,
BiliBiliSearchIE, BiliBiliSearchIE,
BilibiliCategoryIE, BilibiliCategoryIE,
@ -1012,6 +1012,7 @@ from .lynda import (
LyndaCourseIE LyndaCourseIE
) )
from .m6 import M6IE from .m6 import M6IE
from .magellantv import MagellanTVIE
from .magentamusik360 import MagentaMusik360IE from .magentamusik360 import MagentaMusik360IE
from .mailru import ( from .mailru import (
MailRuIE, MailRuIE,
@ -1141,6 +1142,7 @@ from .mtv import (
) )
from .muenchentv import MuenchenTVIE from .muenchentv import MuenchenTVIE
from .murrtube import MurrtubeIE, MurrtubeUserIE from .murrtube import MurrtubeIE, MurrtubeUserIE
from .museai import MuseAIIE
from .musescore import MuseScoreIE from .musescore import MuseScoreIE
from .musicdex import ( from .musicdex import (
MusicdexSongIE, MusicdexSongIE,
@ -1531,6 +1533,7 @@ from .prx import (
) )
from .puls4 import Puls4IE from .puls4 import Puls4IE
from .pyvideo import PyvideoIE from .pyvideo import PyvideoIE
from .qdance import QDanceIE
from .qingting import QingTingIE from .qingting import QingTingIE
from .qqmusic import ( from .qqmusic import (
QQMusicIE, QQMusicIE,
@ -1854,6 +1857,10 @@ from .srgssr import (
SRGSSRPlayIE, SRGSSRPlayIE,
) )
from .srmediathek import SRMediathekIE from .srmediathek import SRMediathekIE
from .stacommu import (
StacommuLiveIE,
StacommuVODIE,
)
from .stanfordoc import StanfordOpenClassroomIE from .stanfordoc import StanfordOpenClassroomIE
from .startv import StarTVIE from .startv import StarTVIE
from .steam import ( from .steam import (
@ -1866,7 +1873,6 @@ from .storyfire import (
StoryFireSeriesIE, StoryFireSeriesIE,
) )
from .streamable import StreamableIE from .streamable import StreamableIE
from .streamanity import StreamanityIE
from .streamcloud import StreamcloudIE from .streamcloud import StreamcloudIE
from .streamcz import StreamCZIE from .streamcz import StreamCZIE
from .streamff import StreamFFIE from .streamff import StreamFFIE
@ -2267,6 +2273,8 @@ from .vk import (
VKIE, VKIE,
VKUserVideosIE, VKUserVideosIE,
VKWallPostIE, VKWallPostIE,
VKPlayIE,
VKPlayLiveIE,
) )
from .vocaroo import VocarooIE from .vocaroo import VocarooIE
from .vodlocker import VodlockerIE from .vodlocker import VodlockerIE

View file

@ -12,6 +12,7 @@ from ..utils import (
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
str_or_none, str_or_none,
traverse_obj,
try_get, try_get,
unescapeHTML, unescapeHTML,
update_url_query, update_url_query,
@ -85,6 +86,15 @@ class ABCIE(InfoExtractor):
'uploader': 'Behind the News', 'uploader': 'Behind the News',
'uploader_id': 'behindthenews', 'uploader_id': 'behindthenews',
} }
}, {
'url': 'https://www.abc.net.au/news/2023-06-25/wagner-boss-orders-troops-back-to-bases-to-avoid-bloodshed/102520540',
'info_dict': {
'id': '102520540',
'title': 'Wagner Group retreating from Russia, leader Prigozhin to move to Belarus',
'ext': 'mp4',
'description': 'Wagner troops leave Rostov-on-Don and\xa0Yevgeny Prigozhin will move to Belarus under a deal brokered by Belarusian President Alexander Lukashenko to end the mutiny.',
'thumbnail': 'https://live-production.wcms.abc-cdn.net.au/0c170f5b57f0105c432f366c0e8e267b?impolicy=wcms_crop_resize&cropH=2813&cropW=5000&xPos=0&yPos=249&width=862&height=485',
}
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -107,7 +117,7 @@ class ABCIE(InfoExtractor):
video = True video = True
if mobj is None: if mobj is None:
mobj = re.search(r'(?P<type>)"sources": (?P<json_data>\[[^\]]+\]),', webpage) mobj = re.search(r'(?P<type>)"(?:sources|files|renditions)":\s*(?P<json_data>\[[^\]]+\])', webpage)
if mobj is None: if mobj is None:
mobj = re.search( mobj = re.search(
r'inline(?P<type>Video|Audio|YouTube)Data\.push\((?P<json_data>[^)]+)\);', r'inline(?P<type>Video|Audio|YouTube)Data\.push\((?P<json_data>[^)]+)\);',
@ -121,7 +131,8 @@ class ABCIE(InfoExtractor):
urls_info = self._parse_json( urls_info = self._parse_json(
mobj.group('json_data'), video_id, transform_source=js_to_json) mobj.group('json_data'), video_id, transform_source=js_to_json)
youtube = mobj.group('type') == 'YouTube' youtube = mobj.group('type') == 'YouTube'
video = mobj.group('type') == 'Video' or urls_info[0]['contentType'] == 'video/mp4' video = mobj.group('type') == 'Video' or traverse_obj(
urls_info, (0, ('contentType', 'MIMEType')), get_all=False) == 'video/mp4'
if not isinstance(urls_info, list): if not isinstance(urls_info, list):
urls_info = [urls_info] urls_info = [urls_info]

View file

@ -22,80 +22,23 @@ from ..utils import (
int_or_none, int_or_none,
intlist_to_bytes, intlist_to_bytes,
OnDemandPagedList, OnDemandPagedList,
request_to_url,
time_seconds, time_seconds,
traverse_obj, traverse_obj,
update_url_query, update_url_query,
) )
# NOTE: network handler related code is temporary thing until network stack overhaul PRs are merged (#2861/#2862)
def add_opener(ydl, handler): # FIXME: Create proper API in .networking
def add_opener(ydl, handler): """Add a handler for opening URLs, like _download_webpage"""
''' Add a handler for opening URLs, like _download_webpage '''
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426 # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605 # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
assert isinstance(ydl._opener, urllib.request.OpenerDirector) rh = ydl._request_director.handlers['Urllib']
ydl._opener.add_handler(handler) if 'abematv-license' in rh._SUPPORTED_URL_SCHEMES:
return
opener = rh._get_instance(cookiejar=ydl.cookiejar, proxies=ydl.proxies)
def remove_opener(ydl, handler): assert isinstance(opener, urllib.request.OpenerDirector)
''' opener.add_handler(handler)
Remove handler(s) for opening URLs rh._SUPPORTED_URL_SCHEMES = (*rh._SUPPORTED_URL_SCHEMES, 'abematv-license')
@param handler Either handler object itself or handler type.
Specifying handler type will remove all handler which isinstance returns True.
'''
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426
# https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L605
opener = ydl._opener
assert isinstance(ydl._opener, urllib.request.OpenerDirector)
if isinstance(handler, (type, tuple)):
find_cp = lambda x: isinstance(x, handler)
else:
find_cp = lambda x: x is handler
removed = []
for meth in dir(handler):
if meth in ["redirect_request", "do_open", "proxy_open"]:
# oops, coincidental match
continue
i = meth.find("_")
protocol = meth[:i]
condition = meth[i + 1:]
if condition.startswith("error"):
j = condition.find("_") + i + 1
kind = meth[j + 1:]
try:
kind = int(kind)
except ValueError:
pass
lookup = opener.handle_error.get(protocol, {})
opener.handle_error[protocol] = lookup
elif condition == "open":
kind = protocol
lookup = opener.handle_open
elif condition == "response":
kind = protocol
lookup = opener.process_response
elif condition == "request":
kind = protocol
lookup = opener.process_request
else:
continue
handlers = lookup.setdefault(kind, [])
if handlers:
handlers[:] = [x for x in handlers if not find_cp(x)]
removed.append(x for x in handlers if find_cp(x))
if removed:
for x in opener.handlers:
if find_cp(x):
x.add_parent(None)
opener.handlers[:] = [x for x in opener.handlers if not find_cp(x)]
class AbemaLicenseHandler(urllib.request.BaseHandler): class AbemaLicenseHandler(urllib.request.BaseHandler):
@ -137,11 +80,11 @@ class AbemaLicenseHandler(urllib.request.BaseHandler):
return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey)) return intlist_to_bytes(aes_ecb_decrypt(encvideokey, enckey))
def abematv_license_open(self, url): def abematv_license_open(self, url):
url = request_to_url(url) url = url.get_full_url() if isinstance(url, urllib.request.Request) else url
ticket = urllib.parse.urlparse(url).netloc ticket = urllib.parse.urlparse(url).netloc
response_data = self._get_videokey_from_ticket(ticket) response_data = self._get_videokey_from_ticket(ticket)
return urllib.response.addinfourl(io.BytesIO(response_data), headers={ return urllib.response.addinfourl(io.BytesIO(response_data), headers={
'Content-Length': len(response_data), 'Content-Length': str(len(response_data)),
}, url=url, code=200) }, url=url, code=200)
@ -213,10 +156,7 @@ class AbemaTVBaseIE(InfoExtractor):
}) })
AbemaTVBaseIE._USERTOKEN = user_data['token'] AbemaTVBaseIE._USERTOKEN = user_data['token']
# don't allow adding it 2 times or more, though it's guarded
remove_opener(self._downloader, AbemaLicenseHandler)
add_opener(self._downloader, AbemaLicenseHandler(self)) add_opener(self._downloader, AbemaLicenseHandler(self))
return self._USERTOKEN return self._USERTOKEN
def _get_media_token(self, invalidate=False, to_show=True): def _get_media_token(self, invalidate=False, to_show=True):

View file

@ -6,10 +6,8 @@ import random
from .common import InfoExtractor from .common import InfoExtractor
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7 from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
from ..compat import ( from ..compat import compat_b64decode
compat_HTTPError, from ..networking.exceptions import HTTPError
compat_b64decode,
)
from ..utils import ( from ..utils import (
ass_subtitles_timecode, ass_subtitles_timecode,
bytes_to_intlist, bytes_to_intlist,
@ -142,9 +140,9 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
self._HEADERS = {'authorization': 'Bearer ' + access_token} self._HEADERS = {'authorization': 'Bearer ' + access_token}
except ExtractorError as e: except ExtractorError as e:
message = None message = None
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: if isinstance(e.cause, HTTPError) and e.cause.status == 401:
resp = self._parse_json( resp = self._parse_json(
e.cause.read().decode(), None, fatal=False) or {} e.cause.response.read().decode(), None, fatal=False) or {}
message = resp.get('message') or resp.get('code') message = resp.get('message') or resp.get('code')
self.report_warning(message or self._LOGIN_ERR_MESSAGE) self.report_warning(message or self._LOGIN_ERR_MESSAGE)
@ -195,14 +193,14 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
}) })
break break
except ExtractorError as e: except ExtractorError as e:
if not isinstance(e.cause, compat_HTTPError): if not isinstance(e.cause, HTTPError):
raise e raise e
if e.cause.code == 401: if e.cause.status == 401:
# This usually goes away with a different random pkcs1pad, so retry # This usually goes away with a different random pkcs1pad, so retry
continue continue
error = self._parse_json(e.cause.read(), video_id) error = self._parse_json(e.cause.response.read(), video_id)
message = error.get('message') message = error.get('message')
if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country': if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
self.raise_geo_restricted(msg=message) self.raise_geo_restricted(msg=message)

View file

@ -2,11 +2,11 @@ import getpass
import json import json
import re import re
import time import time
import urllib.error
import xml.etree.ElementTree as etree import xml.etree.ElementTree as etree
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urlparse from ..compat import compat_urlparse
from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
NO_DEFAULT, NO_DEFAULT,
ExtractorError, ExtractorError,
@ -1394,7 +1394,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
form_page, urlh = form_page_res form_page, urlh = form_page_res
post_url = self._html_search_regex(r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_page, 'post url', group='url') post_url = self._html_search_regex(r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_page, 'post url', group='url')
if not re.match(r'https?://', post_url): if not re.match(r'https?://', post_url):
post_url = compat_urlparse.urljoin(urlh.geturl(), post_url) post_url = compat_urlparse.urljoin(urlh.url, post_url)
form_data = self._hidden_inputs(form_page) form_data = self._hidden_inputs(form_page)
form_data.update(data) form_data.update(data)
return self._download_webpage_handle( return self._download_webpage_handle(
@ -1473,7 +1473,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
elif 'automatically signed in with' in provider_redirect_page: elif 'automatically signed in with' in provider_redirect_page:
# Seems like comcast is rolling up new way of automatically signing customers # Seems like comcast is rolling up new way of automatically signing customers
oauth_redirect_url = self._html_search_regex( oauth_redirect_url = self._html_search_regex(
r'continue:\s*"(https://oauth.xfinity.com/oauth/authorize\?.+)"', provider_redirect_page, r'continue:\s*"(https://oauth\.xfinity\.com/oauth/authorize\?.+)"', provider_redirect_page,
'oauth redirect (signed)') 'oauth redirect (signed)')
# Just need to process the request. No useful data comes back # Just need to process the request. No useful data comes back
self._download_webpage(oauth_redirect_url, video_id, 'Confirming auto login') self._download_webpage(oauth_redirect_url, video_id, 'Confirming auto login')
@ -1619,7 +1619,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
hidden_data['history'] = 1 hidden_data['history'] = 1
provider_login_page_res = self._download_webpage_handle( provider_login_page_res = self._download_webpage_handle(
urlh.geturl(), video_id, 'Sending first bookend', urlh.url, video_id, 'Sending first bookend',
query=hidden_data) query=hidden_data)
provider_association_redirect, urlh = post_form( provider_association_redirect, urlh = post_form(
@ -1629,7 +1629,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
}) })
provider_refresh_redirect_url = extract_redirect_url( provider_refresh_redirect_url = extract_redirect_url(
provider_association_redirect, url=urlh.geturl()) provider_association_redirect, url=urlh.url)
last_bookend_page, urlh = self._download_webpage_handle( last_bookend_page, urlh = self._download_webpage_handle(
provider_refresh_redirect_url, video_id, provider_refresh_redirect_url, video_id,
@ -1638,7 +1638,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
hidden_data['history'] = 3 hidden_data['history'] = 3
mvpd_confirm_page_res = self._download_webpage_handle( mvpd_confirm_page_res = self._download_webpage_handle(
urlh.geturl(), video_id, 'Sending final bookend', urlh.url, video_id, 'Sending final bookend',
query=hidden_data) query=hidden_data)
post_form(mvpd_confirm_page_res, 'Confirming Login') post_form(mvpd_confirm_page_res, 'Confirming Login')
@ -1652,7 +1652,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
hidden_data['history_val'] = 1 hidden_data['history_val'] = 1
provider_login_redirect_page_res = self._download_webpage_handle( provider_login_redirect_page_res = self._download_webpage_handle(
urlh.geturl(), video_id, 'Sending First Bookend', urlh.url, video_id, 'Sending First Bookend',
query=hidden_data) query=hidden_data)
provider_login_redirect_page, urlh = provider_login_redirect_page_res provider_login_redirect_page, urlh = provider_login_redirect_page_res
@ -1680,7 +1680,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
}) })
provider_refresh_redirect_url = extract_redirect_url( provider_refresh_redirect_url = extract_redirect_url(
provider_association_redirect, url=urlh.geturl()) provider_association_redirect, url=urlh.url)
last_bookend_page, urlh = self._download_webpage_handle( last_bookend_page, urlh = self._download_webpage_handle(
provider_refresh_redirect_url, video_id, provider_refresh_redirect_url, video_id,
@ -1690,7 +1690,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
hidden_data['history_val'] = 3 hidden_data['history_val'] = 3
mvpd_confirm_page_res = self._download_webpage_handle( mvpd_confirm_page_res = self._download_webpage_handle(
urlh.geturl(), video_id, 'Sending Final Bookend', urlh.url, video_id, 'Sending Final Bookend',
query=hidden_data) query=hidden_data)
post_form(mvpd_confirm_page_res, 'Confirming Login') post_form(mvpd_confirm_page_res, 'Confirming Login')
@ -1699,7 +1699,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
# based redirect that should be followed. # based redirect that should be followed.
provider_redirect_page, urlh = provider_redirect_page_res provider_redirect_page, urlh = provider_redirect_page_res
provider_refresh_redirect_url = extract_redirect_url( provider_refresh_redirect_url = extract_redirect_url(
provider_redirect_page, url=urlh.geturl()) provider_redirect_page, url=urlh.url)
if provider_refresh_redirect_url: if provider_refresh_redirect_url:
provider_redirect_page_res = self._download_webpage_handle( provider_redirect_page_res = self._download_webpage_handle(
provider_refresh_redirect_url, video_id, provider_refresh_redirect_url, video_id,
@ -1724,7 +1724,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
'requestor_id': requestor_id, 'requestor_id': requestor_id,
}), headers=mvpd_headers) }), headers=mvpd_headers)
except ExtractorError as e: except ExtractorError as e:
if not mso_id and isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 401: if not mso_id and isinstance(e.cause, HTTPError) and e.cause.status == 401:
raise_mvpd_required() raise_mvpd_required()
raise raise
if '<pendingLogout' in session: if '<pendingLogout' in session:

View file

@ -170,8 +170,10 @@ class AdultSwimIE(TurnerBaseIE):
continue continue
ext = determine_ext(asset_url, mimetype2ext(asset.get('mime_type'))) ext = determine_ext(asset_url, mimetype2ext(asset.get('mime_type')))
if ext == 'm3u8': if ext == 'm3u8':
info['formats'].extend(self._extract_m3u8_formats( fmts, subs = self._extract_m3u8_formats_and_subtitles(
asset_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) asset_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
info['formats'].extend(fmts)
self._merge_subtitles(subs, target=info['subtitles'])
elif ext == 'f4m': elif ext == 'f4m':
continue continue
# info['formats'].extend(self._extract_f4m_formats( # info['formats'].extend(self._extract_f4m_formats(

View file

@ -1,8 +1,8 @@
import urllib.parse import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import ( from ..utils import (
HEADRequest,
ExtractorError, ExtractorError,
determine_ext, determine_ext,
scale_thumbnails_to_max_format_width, scale_thumbnails_to_max_format_width,
@ -121,7 +121,7 @@ class Ant1NewsGrEmbedIE(Ant1NewsGrBaseIE):
canonical_url = self._request_webpage( canonical_url = self._request_webpage(
HEADRequest(url), video_id, HEADRequest(url), video_id,
note='Resolve canonical player URL', note='Resolve canonical player URL',
errnote='Could not resolve canonical player URL').geturl() errnote='Could not resolve canonical player URL').url
_, netloc, _, _, query, _ = urllib.parse.urlparse(canonical_url) _, netloc, _, _, query, _ = urllib.parse.urlparse(canonical_url)
cid = urllib.parse.parse_qs(query)['cid'][0] cid = urllib.parse.parse_qs(query)['cid'][0]

View file

@ -1,16 +1,16 @@
import json import json
import re import re
import urllib.error
import urllib.parse import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from .naver import NaverBaseIE from .naver import NaverBaseIE
from .youtube import YoutubeBaseInfoExtractor, YoutubeIE from .youtube import YoutubeBaseInfoExtractor, YoutubeIE
from ..compat import compat_HTTPError, compat_urllib_parse_unquote from ..compat import compat_urllib_parse_unquote
from ..networking import HEADRequest
from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
KNOWN_EXTENSIONS, KNOWN_EXTENSIONS,
ExtractorError, ExtractorError,
HEADRequest,
bug_reports_message, bug_reports_message,
clean_html, clean_html,
dict_get, dict_get,
@ -899,7 +899,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
video_id, note='Fetching archived video file url', expected_status=True) video_id, note='Fetching archived video file url', expected_status=True)
except ExtractorError as e: except ExtractorError as e:
# HTTP Error 404 is expected if the video is not saved. # HTTP Error 404 is expected if the video is not saved.
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: if isinstance(e.cause, HTTPError) and e.cause.status == 404:
self.raise_no_formats( self.raise_no_formats(
'The requested video is not archived, indexed, or there is an issue with web.archive.org (try again later)', expected=True) 'The requested video is not archived, indexed, or there is an issue with web.archive.org (try again later)', expected=True)
else: else:
@ -926,7 +926,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
info['thumbnails'] = self._extract_thumbnails(video_id) info['thumbnails'] = self._extract_thumbnails(video_id)
if urlh: if urlh:
url = compat_urllib_parse_unquote(urlh.geturl()) url = compat_urllib_parse_unquote(urlh.url)
video_file_url_qs = parse_qs(url) video_file_url_qs = parse_qs(url)
# Attempt to recover any ext & format info from playback url & response headers # Attempt to recover any ext & format info from playback url & response headers
format = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))} format = {'url': url, 'filesize': int_or_none(urlh.headers.get('x-archive-orig-content-length'))}
@ -1052,7 +1052,7 @@ class VLiveWebArchiveIE(InfoExtractor):
try: try:
return self._download_webpage(f'https://web.archive.org/web/{timestamp}id_/{url}', video_id, **kwargs) return self._download_webpage(f'https://web.archive.org/web/{timestamp}id_/{url}', video_id, **kwargs)
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 404: if isinstance(e.cause, HTTPError) and e.cause.status == 404:
raise ExtractorError('Page was not archived', expected=True) raise ExtractorError('Page was not archived', expected=True)
retry.error = e retry.error = e
continue continue

View file

@ -1,5 +1,5 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_HTTPError from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
@ -34,8 +34,8 @@ class AtresPlayerIE(InfoExtractor):
_API_BASE = 'https://api.atresplayer.com/' _API_BASE = 'https://api.atresplayer.com/'
def _handle_error(self, e, code): def _handle_error(self, e, code):
if isinstance(e.cause, compat_HTTPError) and e.cause.code == code: if isinstance(e.cause, HTTPError) and e.cause.status == code:
error = self._parse_json(e.cause.read(), None) error = self._parse_json(e.cause.response.read(), None)
if error.get('error') == 'required_registered': if error.get('error') == 'required_registered':
self.raise_login_required() self.raise_login_required()
raise ExtractorError(error['error_description'], expected=True) raise ExtractorError(error['error_description'], expected=True)

View file

@ -2,11 +2,11 @@ import functools
import itertools import itertools
import json import json
import re import re
import urllib.error
import xml.etree.ElementTree import xml.etree.ElementTree
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_HTTPError, compat_str, compat_urlparse from ..compat import compat_str, compat_urlparse
from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
OnDemandPagedList, OnDemandPagedList,
@ -277,7 +277,7 @@ class BBCCoUkIE(InfoExtractor):
post_url, None, 'Logging in', data=urlencode_postdata(login_form), post_url, None, 'Logging in', data=urlencode_postdata(login_form),
headers={'Referer': self._LOGIN_URL}) headers={'Referer': self._LOGIN_URL})
if self._LOGIN_URL in urlh.geturl(): if self._LOGIN_URL in urlh.url:
error = clean_html(get_element_by_class('form-message', response)) error = clean_html(get_element_by_class('form-message', response))
if error: if error:
raise ExtractorError( raise ExtractorError(
@ -388,8 +388,8 @@ class BBCCoUkIE(InfoExtractor):
href, programme_id, ext='mp4', entry_protocol='m3u8_native', href, programme_id, ext='mp4', entry_protocol='m3u8_native',
m3u8_id=format_id, fatal=False) m3u8_id=format_id, fatal=False)
except ExtractorError as e: except ExtractorError as e:
if not (isinstance(e.exc_info[1], urllib.error.HTTPError) if not (isinstance(e.exc_info[1], HTTPError)
and e.exc_info[1].code in (403, 404)): and e.exc_info[1].status in (403, 404)):
raise raise
fmts = [] fmts = []
formats.extend(fmts) formats.extend(fmts)
@ -472,7 +472,7 @@ class BBCCoUkIE(InfoExtractor):
return programme_id, title, description, duration, formats, subtitles return programme_id, title, description, duration, formats, subtitles
except ExtractorError as ee: except ExtractorError as ee:
if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404): if not (isinstance(ee.cause, HTTPError) and ee.cause.status == 404):
raise raise
# fallback to legacy playlist # fallback to legacy playlist
@ -983,7 +983,7 @@ class BBCIE(BBCCoUkIE): # XXX: Do not subclass from concrete IE
# Some playlist URL may fail with 500, at the same time # Some playlist URL may fail with 500, at the same time
# the other one may work fine (e.g. # the other one may work fine (e.g.
# http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu) # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500: if isinstance(e.cause, HTTPError) and e.cause.status == 500:
continue continue
raise raise
if entry: if entry:

View file

@ -4,11 +4,11 @@ import hashlib
import itertools import itertools
import math import math
import time import time
import urllib.error
import urllib.parse import urllib.parse
from .common import InfoExtractor, SearchInfoExtractor from .common import InfoExtractor, SearchInfoExtractor
from ..dependencies import Cryptodome from ..dependencies import Cryptodome
from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
GeoRestrictedError, GeoRestrictedError,
@ -18,6 +18,7 @@ from ..utils import (
float_or_none, float_or_none,
format_field, format_field,
int_or_none, int_or_none,
join_nonempty,
make_archive_id, make_archive_id,
merge_dicts, merge_dicts,
mimetype2ext, mimetype2ext,
@ -135,6 +136,17 @@ class BilibiliBaseIE(InfoExtractor):
for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))): for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))):
yield from children yield from children
def _get_episodes_from_season(self, ss_id, url):
season_info = self._download_json(
'https://api.bilibili.com/pgc/web/season/section', ss_id,
note='Downloading season info', query={'season_id': ss_id},
headers={'Referer': url, **self.geo_verification_headers()})
for entry in traverse_obj(season_info, (
'result', 'main_section', 'episodes',
lambda _, v: url_or_none(v['share_url']) and v['id'])):
yield self.url_result(entry['share_url'], BiliBiliBangumiIE, f'ep{entry["id"]}')
class BiliBiliIE(BilibiliBaseIE): class BiliBiliIE(BilibiliBaseIE):
_VALID_URL = r'https?://www\.bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)' _VALID_URL = r'https?://www\.bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
@ -403,76 +415,93 @@ class BiliBiliIE(BilibiliBaseIE):
class BiliBiliBangumiIE(BilibiliBaseIE): class BiliBiliBangumiIE(BilibiliBaseIE):
_VALID_URL = r'(?x)https?://www\.bilibili\.com/bangumi/play/(?P<id>(?:ss|ep)\d+)' _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/(?P<id>ep\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.bilibili.com/bangumi/play/ss897', 'url': 'https://www.bilibili.com/bangumi/play/ep267851',
'info_dict': { 'info_dict': {
'id': 'ss897', 'id': '267851',
'ext': 'mp4', 'ext': 'mp4',
'series': '神的记事本', 'series': '鬼灭之刃',
'season': '神的记事本', 'series_id': '4358',
'season_id': 897, 'season': '鬼灭之刃',
'season_id': '26801',
'season_number': 1, 'season_number': 1,
'episode': '你与旅行包', 'episode': '残酷',
'episode_number': 2, 'episode_id': '267851',
'title': '神的记事本第2话 你与旅行包', 'episode_number': 1,
'duration': 1428.487, 'title': '1 残酷',
'timestamp': 1310809380, 'duration': 1425.256,
'upload_date': '20110716', 'timestamp': 1554566400,
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$', 'upload_date': '20190406',
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
}, },
}, { 'skip': 'According to the copyright owner\'s request, you may only watch the video after you are premium member.'
'url': 'https://www.bilibili.com/bangumi/play/ep508406',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
episode_id = video_id[2:]
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
if '您所在的地区无法观看本片' in webpage: if '您所在的地区无法观看本片' in webpage:
raise GeoRestrictedError('This video is restricted') raise GeoRestrictedError('This video is restricted')
elif ('开通大会员观看' in webpage and '__playinfo__' not in webpage elif '正在观看预览,大会员免费看全片' in webpage:
or '正在观看预览,大会员免费看全片' in webpage):
self.raise_login_required('This video is for premium members only') self.raise_login_required('This video is for premium members only')
play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data'] headers = {'Referer': url, **self.geo_verification_headers()}
play_info = self._download_json(
'https://api.bilibili.com/pgc/player/web/v2/playurl', video_id,
'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
headers=headers)
premium_only = play_info.get('code') == -10403
play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
formats = self.extract_formats(play_info) formats = self.extract_formats(play_info)
if (not formats and '成为大会员抢先看' in webpage if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage):
and play_info.get('durl') and not play_info.get('dash')):
self.raise_login_required('This video is for premium members only') self.raise_login_required('This video is for premium members only')
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id) bangumi_info = self._download_json(
'https://api.bilibili.com/pgc/view/web/season', video_id, 'Get episode details',
query={'ep_id': episode_id}, headers=headers)['result']
season_id = traverse_obj(initial_state, ('mediaInfo', 'season_id')) episode_number, episode_info = next((
(idx, ep) for idx, ep in enumerate(traverse_obj(
bangumi_info, ('episodes', ..., {dict})), 1)
if str_or_none(ep.get('id')) == episode_id), (1, {}))
season_id = bangumi_info.get('season_id')
season_number = season_id and next(( season_number = season_id and next((
idx + 1 for idx, e in enumerate( idx + 1 for idx, e in enumerate(
traverse_obj(initial_state, ('mediaInfo', 'seasons', ...))) traverse_obj(bangumi_info, ('seasons', ...)))
if e.get('season_id') == season_id if e.get('season_id') == season_id
), None) ), None)
aid = episode_info.get('aid')
return { return {
'id': video_id, 'id': video_id,
'formats': formats, 'formats': formats,
'title': traverse_obj(initial_state, 'h1Title'), **traverse_obj(bangumi_info, {
'episode': traverse_obj(initial_state, ('epInfo', 'long_title')), 'series': ('series', 'series_title', {str}),
'episode_number': int_or_none(traverse_obj(initial_state, ('epInfo', 'title'))), 'series_id': ('series', 'series_id', {str_or_none}),
'series': traverse_obj(initial_state, ('mediaInfo', 'series')), 'thumbnail': ('square_cover', {url_or_none}),
'season': traverse_obj(initial_state, ('mediaInfo', 'season_title')), }),
'season_id': season_id, 'title': join_nonempty('title', 'long_title', delim=' ', from_dict=episode_info),
'episode': episode_info.get('long_title'),
'episode_id': episode_id,
'episode_number': int_or_none(episode_info.get('title')) or episode_number,
'season_id': str_or_none(season_id),
'season_number': season_number, 'season_number': season_number,
'thumbnail': traverse_obj(initial_state, ('epInfo', 'cover')), 'timestamp': int_or_none(episode_info.get('pub_time')),
'timestamp': traverse_obj(initial_state, ('epInfo', 'pub_time')),
'duration': float_or_none(play_info.get('timelength'), scale=1000), 'duration': float_or_none(play_info.get('timelength'), scale=1000),
'subtitles': self.extract_subtitles( 'subtitles': self.extract_subtitles(video_id, aid, episode_info.get('cid')),
video_id, initial_state, traverse_obj(initial_state, ('epInfo', 'cid'))), '__post_extractor': self.extract_comments(aid),
'__post_extractor': self.extract_comments(traverse_obj(initial_state, ('epInfo', 'aid'))), 'http_headers': headers,
'http_headers': {'Referer': url, **self.geo_verification_headers()},
} }
class BiliBiliBangumiMediaIE(InfoExtractor): class BiliBiliBangumiMediaIE(BilibiliBaseIE):
_VALID_URL = r'https?://www\.bilibili\.com/bangumi/media/md(?P<id>\d+)' _VALID_URL = r'https?://www\.bilibili\.com/bangumi/media/md(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.bilibili.com/bangumi/media/md24097891', 'url': 'https://www.bilibili.com/bangumi/media/md24097891',
@ -485,16 +514,26 @@ class BiliBiliBangumiMediaIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
media_id = self._match_id(url) media_id = self._match_id(url)
webpage = self._download_webpage(url, media_id) webpage = self._download_webpage(url, media_id)
ss_id = self._search_json(
r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)['mediaInfo']['season_id']
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id) return self.playlist_result(self._get_episodes_from_season(ss_id, url), media_id)
episode_list = self._download_json(
'https://api.bilibili.com/pgc/web/season/section', media_id,
query={'season_id': initial_state['mediaInfo']['season_id']},
note='Downloading season info')['result']['main_section']['episodes']
return self.playlist_result((
self.url_result(entry['share_url'], BiliBiliBangumiIE, entry['aid']) class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
for entry in episode_list), media_id) _VALID_URL = r'(?x)https?://www\.bilibili\.com/bangumi/play/ss(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.bilibili.com/bangumi/play/ss26801',
'info_dict': {
'id': '26801'
},
'playlist_mincount': 26
}]
def _real_extract(self, url):
ss_id = self._match_id(url)
return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id)
class BilibiliSpaceBaseIE(InfoExtractor): class BilibiliSpaceBaseIE(InfoExtractor):
@ -575,7 +614,7 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search', response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
playlist_id, note=f'Downloading page {page_idx}', query=query) playlist_id, note=f'Downloading page {page_idx}', query=query)
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 412: if isinstance(e.cause, HTTPError) and e.cause.status == 412:
raise ExtractorError( raise ExtractorError(
'Request is blocked by server (412), please add cookies, wait and try later.', expected=True) 'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
raise raise

View file

@ -2,9 +2,9 @@ import functools
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
HEADRequest,
OnDemandPagedList, OnDemandPagedList,
clean_html, clean_html,
get_element_by_class, get_element_by_class,

View file

@ -1,6 +1,6 @@
from .adobepass import AdobePassIE from .adobepass import AdobePassIE
from ..networking import HEADRequest
from ..utils import ( from ..utils import (
HEADRequest,
extract_attributes, extract_attributes,
float_or_none, float_or_none,
get_element_html_by_class, get_element_html_by_class,
@ -155,7 +155,7 @@ class BravoTVIE(AdobePassIE):
chapters = None chapters = None
m3u8_url = self._request_webpage(HEADRequest( m3u8_url = self._request_webpage(HEADRequest(
update_url_query(f'{tp_url}/stream.m3u8', query)), video_id, 'Checking m3u8 URL').geturl() update_url_query(f'{tp_url}/stream.m3u8', query)), video_id, 'Checking m3u8 URL').url
if 'mpeg_cenc' in m3u8_url: if 'mpeg_cenc' in m3u8_url:
self.report_drm(video_id) self.report_drm(video_id)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls') formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')

View file

@ -7,10 +7,10 @@ from .adobepass import AdobePassIE
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_etree_fromstring, compat_etree_fromstring,
compat_HTTPError,
compat_parse_qs, compat_parse_qs,
compat_urlparse, compat_urlparse,
) )
from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
clean_html, clean_html,
dict_get, dict_get,
@ -915,8 +915,8 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
json_data = self._download_json(api_url, video_id, headers=headers) json_data = self._download_json(api_url, video_id, headers=headers)
break break
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403): if isinstance(e.cause, HTTPError) and e.cause.status in (401, 403):
json_data = self._parse_json(e.cause.read().decode(), video_id)[0] json_data = self._parse_json(e.cause.response.read().decode(), video_id)[0]
message = json_data.get('message') or json_data['error_code'] message = json_data.get('message') or json_data['error_code']
if json_data.get('error_subcode') == 'CLIENT_GEO': if json_data.get('error_subcode') == 'CLIENT_GEO':
self.raise_geo_restricted(msg=message) self.raise_geo_restricted(msg=message)

View file

@ -64,7 +64,7 @@ class CanalplusIE(InfoExtractor):
# response = self._request_webpage( # response = self._request_webpage(
# HEADRequest(fmt_url), video_id, # HEADRequest(fmt_url), video_id,
# 'Checking if the video is georestricted') # 'Checking if the video is georestricted')
# if '/blocage' in response.geturl(): # if '/blocage' in response.url:
# raise ExtractorError( # raise ExtractorError(
# 'The video is not available in your country', # 'The video is not available in your country',
# expected=True) # expected=True)

View file

@ -7,9 +7,9 @@ import zlib
from .anvato import AnvatoIE from .anvato import AnvatoIE
from .common import InfoExtractor from .common import InfoExtractor
from .paramountplus import ParamountPlusIE from .paramountplus import ParamountPlusIE
from ..networking import HEADRequest
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
HEADRequest,
UserNotLive, UserNotLive,
determine_ext, determine_ext,
float_or_none, float_or_none,

View file

@ -1,20 +1,20 @@
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_urllib_parse_unquote, compat_urllib_parse_urlparse
compat_urllib_parse_unquote, from ..networking import Request
compat_urllib_parse_urlparse,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
float_or_none, float_or_none,
sanitized_Request,
str_or_none, str_or_none,
traverse_obj, traverse_obj,
urlencode_postdata, urlencode_postdata,
USER_AGENTS,
) )
USER_AGENTS = {
'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
}
class CeskaTelevizeIE(InfoExtractor): class CeskaTelevizeIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)' _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
@ -97,7 +97,7 @@ class CeskaTelevizeIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
playlist_id = self._match_id(url) playlist_id = self._match_id(url)
webpage, urlh = self._download_webpage_handle(url, playlist_id) webpage, urlh = self._download_webpage_handle(url, playlist_id)
parsed_url = compat_urllib_parse_urlparse(urlh.geturl()) parsed_url = compat_urllib_parse_urlparse(urlh.url)
site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize') site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize')
playlist_title = self._og_search_title(webpage, default=None) playlist_title = self._og_search_title(webpage, default=None)
if site_name and playlist_title: if site_name and playlist_title:
@ -163,16 +163,16 @@ class CeskaTelevizeIE(InfoExtractor):
entries = [] entries = []
for user_agent in (None, USER_AGENTS['Safari']): for user_agent in (None, USER_AGENTS['Safari']):
req = sanitized_Request( req = Request(
'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist/', 'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist/',
data=urlencode_postdata(data)) data=urlencode_postdata(data))
req.add_header('Content-type', 'application/x-www-form-urlencoded') req.headers['Content-type'] = 'application/x-www-form-urlencoded'
req.add_header('x-addr', '127.0.0.1') req.headers['x-addr'] = '127.0.0.1'
req.add_header('X-Requested-With', 'XMLHttpRequest') req.headers['X-Requested-With'] = 'XMLHttpRequest'
if user_agent: if user_agent:
req.add_header('User-Agent', user_agent) req.headers['User-Agent'] = user_agent
req.add_header('Referer', url) req.headers['Referer'] = url
playlistpage = self._download_json(req, playlist_id, fatal=False) playlistpage = self._download_json(req, playlist_id, fatal=False)
@ -183,8 +183,8 @@ class CeskaTelevizeIE(InfoExtractor):
if playlist_url == 'error_region': if playlist_url == 'error_region':
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
req = sanitized_Request(compat_urllib_parse_unquote(playlist_url)) req = Request(compat_urllib_parse_unquote(playlist_url))
req.add_header('Referer', url) req.headers['Referer'] = url
playlist = self._download_json(req, playlist_id, fatal=False) playlist = self._download_json(req, playlist_id, fatal=False)
if not playlist: if not playlist:

View file

@ -1,6 +1,6 @@
import json import json
import urllib.error
from .common import InfoExtractor from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
float_or_none, float_or_none,
@ -40,7 +40,7 @@ class CinetecaMilanoIE(InfoExtractor):
'Authorization': try_get(self._get_cookies('https://www.cinetecamilano.it'), lambda x: f'Bearer {x["cnt-token"].value}') or '' 'Authorization': try_get(self._get_cookies('https://www.cinetecamilano.it'), lambda x: f'Bearer {x["cnt-token"].value}') or ''
}) })
except ExtractorError as e: except ExtractorError as e:
if ((isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 500) if ((isinstance(e.cause, HTTPError) and e.cause.status == 500)
or isinstance(e.cause, json.JSONDecodeError)): or isinstance(e.cause, json.JSONDecodeError)):
self.raise_login_required(method='cookies') self.raise_login_required(method='cookies')
raise raise

View file

@ -33,7 +33,7 @@ class CiscoWebexIE(InfoExtractor):
if rcid: if rcid:
webpage = self._download_webpage(url, None, note='Getting video ID') webpage = self._download_webpage(url, None, note='Getting video ID')
url = self._search_regex(self._VALID_URL, webpage, 'redirection url', group='url') url = self._search_regex(self._VALID_URL, webpage, 'redirection url', group='url')
url = self._request_webpage(url, None, note='Resolving final URL').geturl() url = self._request_webpage(url, None, note='Resolving final URL').url
mobj = self._match_valid_url(url) mobj = self._match_valid_url(url)
subdomain = mobj.group('subdomain') subdomain = mobj.group('subdomain')
siteurl = mobj.group('siteurl_1') or mobj.group('siteurl_2') siteurl = mobj.group('siteurl_1') or mobj.group('siteurl_2')
@ -49,7 +49,7 @@ class CiscoWebexIE(InfoExtractor):
'https://%s.webex.com/webappng/api/v1/recordings/%s/stream' % (subdomain, video_id), 'https://%s.webex.com/webappng/api/v1/recordings/%s/stream' % (subdomain, video_id),
video_id, headers=headers, query={'siteurl': siteurl}, expected_status=(403, 429)) video_id, headers=headers, query={'siteurl': siteurl}, expected_status=(403, 429))
if urlh.getcode() == 403: if urlh.status == 403:
if stream['code'] == 53004: if stream['code'] == 53004:
self.raise_login_required() self.raise_login_required()
if stream['code'] == 53005: if stream['code'] == 53005:
@ -59,7 +59,7 @@ class CiscoWebexIE(InfoExtractor):
'This video is protected by a password, use the --video-password option', expected=True) 'This video is protected by a password, use the --video-password option', expected=True)
raise ExtractorError(f'{self.IE_NAME} said: {stream["code"]} - {stream["message"]}', expected=True) raise ExtractorError(f'{self.IE_NAME} said: {stream["code"]} - {stream["message"]}', expected=True)
if urlh.getcode() == 429: if urlh.status == 429:
self.raise_login_required( self.raise_login_required(
f'{self.IE_NAME} asks you to solve a CAPTCHA. Solve CAPTCHA in browser and', f'{self.IE_NAME} asks you to solve a CAPTCHA. Solve CAPTCHA in browser and',
method='cookies') method='cookies')

View file

@ -17,15 +17,26 @@ import subprocess
import sys import sys
import time import time
import types import types
import urllib.error
import urllib.parse import urllib.parse
import urllib.request import urllib.request
import xml.etree.ElementTree import xml.etree.ElementTree
from ..compat import functools # isort: split from ..compat import functools # isort: split
from ..compat import compat_etree_fromstring, compat_expanduser, compat_os_name from ..compat import (
compat_etree_fromstring,
compat_expanduser,
compat_os_name,
urllib_req_to_req,
)
from ..cookies import LenientSimpleCookie from ..cookies import LenientSimpleCookie
from ..downloader.f4m import get_base_url, remove_encrypted_media from ..downloader.f4m import get_base_url, remove_encrypted_media
from ..downloader.hls import HlsFD
from ..networking import HEADRequest, Request
from ..networking.exceptions import (
HTTPError,
IncompleteRead,
network_exceptions,
)
from ..utils import ( from ..utils import (
IDENTITY, IDENTITY,
JSON_LD_RE, JSON_LD_RE,
@ -34,7 +45,6 @@ from ..utils import (
FormatSorter, FormatSorter,
GeoRestrictedError, GeoRestrictedError,
GeoUtils, GeoUtils,
HEADRequest,
LenientJSONDecoder, LenientJSONDecoder,
Popen, Popen,
RegexNotFoundError, RegexNotFoundError,
@ -60,7 +70,6 @@ from ..utils import (
js_to_json, js_to_json,
mimetype2ext, mimetype2ext,
netrc_from_content, netrc_from_content,
network_exceptions,
orderedSet, orderedSet,
parse_bitrate, parse_bitrate,
parse_codecs, parse_codecs,
@ -70,7 +79,6 @@ from ..utils import (
parse_resolution, parse_resolution,
sanitize_filename, sanitize_filename,
sanitize_url, sanitize_url,
sanitized_Request,
smuggle_url, smuggle_url,
str_or_none, str_or_none,
str_to_int, str_to_int,
@ -82,8 +90,6 @@ from ..utils import (
unescapeHTML, unescapeHTML,
unified_strdate, unified_strdate,
unified_timestamp, unified_timestamp,
update_Request,
update_url_query,
url_basename, url_basename,
url_or_none, url_or_none,
urlhandle_detect_ext, urlhandle_detect_ext,
@ -224,7 +230,8 @@ class InfoExtractor:
width : height ratio as float. width : height ratio as float.
* no_resume The server does not support resuming the * no_resume The server does not support resuming the
(HTTP or RTMP) download. Boolean. (HTTP or RTMP) download. Boolean.
* has_drm The format has DRM and cannot be downloaded. Boolean * has_drm True if the format has DRM and cannot be downloaded.
'maybe' if the format may have DRM and has to be tested before download.
* extra_param_to_segment_url A query string to append to each * extra_param_to_segment_url A query string to append to each
fragment's URL, or to update each existing query string fragment's URL, or to update each existing query string
with. Only applied by the native HLS/DASH downloaders. with. Only applied by the native HLS/DASH downloaders.
@ -726,7 +733,7 @@ class InfoExtractor:
e.ie = e.ie or self.IE_NAME, e.ie = e.ie or self.IE_NAME,
e.traceback = e.traceback or sys.exc_info()[2] e.traceback = e.traceback or sys.exc_info()[2]
raise raise
except http.client.IncompleteRead as e: except IncompleteRead as e:
raise ExtractorError('A network error has occurred.', cause=e, expected=True, video_id=self.get_temp_id(url)) raise ExtractorError('A network error has occurred.', cause=e, expected=True, video_id=self.get_temp_id(url))
except (KeyError, StopIteration) as e: except (KeyError, StopIteration) as e:
raise ExtractorError('An extractor error has occurred.', cause=e, video_id=self.get_temp_id(url)) raise ExtractorError('An extractor error has occurred.', cause=e, video_id=self.get_temp_id(url))
@ -785,20 +792,25 @@ class InfoExtractor:
@staticmethod @staticmethod
def __can_accept_status_code(err, expected_status): def __can_accept_status_code(err, expected_status):
assert isinstance(err, urllib.error.HTTPError) assert isinstance(err, HTTPError)
if expected_status is None: if expected_status is None:
return False return False
elif callable(expected_status): elif callable(expected_status):
return expected_status(err.code) is True return expected_status(err.status) is True
else: else:
return err.code in variadic(expected_status) return err.status in variadic(expected_status)
def _create_request(self, url_or_request, data=None, headers=None, query=None): def _create_request(self, url_or_request, data=None, headers=None, query=None):
if isinstance(url_or_request, urllib.request.Request): if isinstance(url_or_request, urllib.request.Request):
return update_Request(url_or_request, data=data, headers=headers, query=query) self._downloader.deprecation_warning(
if query: 'Passing a urllib.request.Request to _create_request() is deprecated. '
url_or_request = update_url_query(url_or_request, query) 'Use yt_dlp.networking.common.Request instead.')
return sanitized_Request(url_or_request, data, headers or {}) url_or_request = urllib_req_to_req(url_or_request)
elif not isinstance(url_or_request, Request):
url_or_request = Request(url_or_request)
url_or_request.update(data=data, headers=headers, query=query)
return url_or_request
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers=None, query=None, expected_status=None): def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers=None, query=None, expected_status=None):
""" """
@ -834,14 +846,9 @@ class InfoExtractor:
try: try:
return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query)) return self._downloader.urlopen(self._create_request(url_or_request, data, headers, query))
except network_exceptions as err: except network_exceptions as err:
if isinstance(err, urllib.error.HTTPError): if isinstance(err, HTTPError):
if self.__can_accept_status_code(err, expected_status): if self.__can_accept_status_code(err, expected_status):
# Retain reference to error to prevent file object from return err.response
# being closed before it can be read. Works around the
# effects of <https://bugs.python.org/issue15002>
# introduced in Python 3.4.1.
err.fp._error = err
return err.fp
if errnote is False: if errnote is False:
return False return False
@ -973,11 +980,11 @@ class InfoExtractor:
if prefix is not None: if prefix is not None:
webpage_bytes = prefix + webpage_bytes webpage_bytes = prefix + webpage_bytes
if self.get_param('dump_intermediate_pages', False): if self.get_param('dump_intermediate_pages', False):
self.to_screen('Dumping request to ' + urlh.geturl()) self.to_screen('Dumping request to ' + urlh.url)
dump = base64.b64encode(webpage_bytes).decode('ascii') dump = base64.b64encode(webpage_bytes).decode('ascii')
self._downloader.to_screen(dump) self._downloader.to_screen(dump)
if self.get_param('write_pages'): if self.get_param('write_pages'):
filename = self._request_dump_filename(urlh.geturl(), video_id) filename = self._request_dump_filename(urlh.url, video_id)
self.to_screen(f'Saving request to {filename}') self.to_screen(f'Saving request to {filename}')
with open(filename, 'wb') as outf: with open(filename, 'wb') as outf:
outf.write(webpage_bytes) outf.write(webpage_bytes)
@ -1035,7 +1042,7 @@ class InfoExtractor:
fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None): fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
if self.get_param('load_pages'): if self.get_param('load_pages'):
url_or_request = self._create_request(url_or_request, data, headers, query) url_or_request = self._create_request(url_or_request, data, headers, query)
filename = self._request_dump_filename(url_or_request.full_url, video_id) filename = self._request_dump_filename(url_or_request.url, video_id)
self.to_screen(f'Loading request from {filename}') self.to_screen(f'Loading request from {filename}')
try: try:
with open(filename, 'rb') as dumpf: with open(filename, 'rb') as dumpf:
@ -1109,7 +1116,7 @@ class InfoExtractor:
while True: while True:
try: try:
return self.__download_webpage(url_or_request, video_id, note, errnote, None, fatal, *args, **kwargs) return self.__download_webpage(url_or_request, video_id, note, errnote, None, fatal, *args, **kwargs)
except http.client.IncompleteRead as e: except IncompleteRead as e:
try_count += 1 try_count += 1
if try_count >= tries: if try_count >= tries:
raise e raise e
@ -1806,7 +1813,7 @@ class InfoExtractor:
return [] return []
manifest, urlh = res manifest, urlh = res
manifest_url = urlh.geturl() manifest_url = urlh.url
return self._parse_f4m_formats( return self._parse_f4m_formats(
manifest, manifest_url, video_id, preference=preference, quality=quality, f4m_id=f4m_id, manifest, manifest_url, video_id, preference=preference, quality=quality, f4m_id=f4m_id,
@ -1965,7 +1972,7 @@ class InfoExtractor:
return [], {} return [], {}
m3u8_doc, urlh = res m3u8_doc, urlh = res
m3u8_url = urlh.geturl() m3u8_url = urlh.url
return self._parse_m3u8_formats_and_subtitles( return self._parse_m3u8_formats_and_subtitles(
m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol, m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
@ -1979,11 +1986,7 @@ class InfoExtractor:
errnote=None, fatal=True, data=None, headers={}, query={}, errnote=None, fatal=True, data=None, headers={}, query={},
video_id=None): video_id=None):
formats, subtitles = [], {} formats, subtitles = [], {}
has_drm = HlsFD._has_drm(m3u8_doc)
has_drm = re.search('|'.join([
r'#EXT-X-FAXS-CM:', # Adobe Flash Access
r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay
]), m3u8_doc)
def format_url(url): def format_url(url):
return url if re.match(r'^https?://', url) else urllib.parse.urljoin(m3u8_url, url) return url if re.match(r'^https?://', url) else urllib.parse.urljoin(m3u8_url, url)
@ -2247,7 +2250,7 @@ class InfoExtractor:
return [], {} return [], {}
smil, urlh = res smil, urlh = res
smil_url = urlh.geturl() smil_url = urlh.url
namespace = self._parse_smil_namespace(smil) namespace = self._parse_smil_namespace(smil)
@ -2270,7 +2273,7 @@ class InfoExtractor:
return {} return {}
smil, urlh = res smil, urlh = res
smil_url = urlh.geturl() smil_url = urlh.url
return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params) return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params)
@ -2462,7 +2465,7 @@ class InfoExtractor:
return [] return []
xspf, urlh = res xspf, urlh = res
xspf_url = urlh.geturl() xspf_url = urlh.url
return self._parse_xspf( return self._parse_xspf(
xspf, playlist_id, xspf_url=xspf_url, xspf, playlist_id, xspf_url=xspf_url,
@ -2533,7 +2536,7 @@ class InfoExtractor:
return [], {} return [], {}
# We could have been redirected to a new url when we retrieved our mpd file. # We could have been redirected to a new url when we retrieved our mpd file.
mpd_url = urlh.geturl() mpd_url = urlh.url
mpd_base_url = base_url(mpd_url) mpd_base_url = base_url(mpd_url)
return self._parse_mpd_formats_and_subtitles( return self._parse_mpd_formats_and_subtitles(
@ -2919,7 +2922,7 @@ class InfoExtractor:
if ism_doc is None: if ism_doc is None:
return [], {} return [], {}
return self._parse_ism_formats_and_subtitles(ism_doc, urlh.geturl(), ism_id) return self._parse_ism_formats_and_subtitles(ism_doc, urlh.url, ism_id)
def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None): def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
""" """

View file

@ -4,7 +4,7 @@ import re
import time import time
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_HTTPError from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
float_or_none, float_or_none,
@ -113,7 +113,7 @@ class CrackleIE(InfoExtractor):
errnote='Unable to download media JSON') errnote='Unable to download media JSON')
except ExtractorError as e: except ExtractorError as e:
# 401 means geo restriction, trying next country # 401 means geo restriction, trying next country
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: if isinstance(e.cause, HTTPError) and e.cause.status == 401:
continue continue
raise raise

View file

@ -1,7 +1,7 @@
import base64 import base64
import urllib.error
from .common import InfoExtractor from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
float_or_none, float_or_none,
@ -27,11 +27,24 @@ class CrunchyrollBaseIE(InfoExtractor):
_AUTH_HEADERS = None _AUTH_HEADERS = None
_API_ENDPOINT = None _API_ENDPOINT = None
_BASIC_AUTH = None _BASIC_AUTH = None
_QUERY = {} _CLIENT_ID = ('cr_web', 'noaihdevm_6iyg0a8l0q')
_LOCALE_LOOKUP = {
'ar': 'ar-SA',
'de': 'de-DE',
'': 'en-US',
'es': 'es-419',
'es-es': 'es-ES',
'fr': 'fr-FR',
'it': 'it-IT',
'pt-br': 'pt-BR',
'pt-pt': 'pt-PT',
'ru': 'ru-RU',
'hi': 'hi-IN',
}
@property @property
def is_logged_in(self): def is_logged_in(self):
return self._get_cookies(self._BASE_URL).get('etp_rt') return bool(self._get_cookies(self._BASE_URL).get('etp_rt'))
def _perform_login(self, username, password): def _perform_login(self, username, password):
if self.is_logged_in: if self.is_logged_in:
@ -62,49 +75,49 @@ class CrunchyrollBaseIE(InfoExtractor):
if not self.is_logged_in: if not self.is_logged_in:
raise ExtractorError('Login succeeded but did not set etp_rt cookie') raise ExtractorError('Login succeeded but did not set etp_rt cookie')
def _update_query(self, lang):
if lang in CrunchyrollBaseIE._QUERY:
return
webpage = self._download_webpage(
f'{self._BASE_URL}/{lang}', None, note=f'Retrieving main page (lang={lang or None})')
initial_state = self._search_json(r'__INITIAL_STATE__\s*=', webpage, 'initial state', None)
CrunchyrollBaseIE._QUERY[lang] = traverse_obj(initial_state, {
'locale': ('localization', 'locale'),
}) or None
if CrunchyrollBaseIE._BASIC_AUTH:
return
app_config = self._search_json(r'__APP_CONFIG__\s*=', webpage, 'app config', None)
cx_api_param = app_config['cxApiParams']['accountAuthClientId' if self.is_logged_in else 'anonClientId']
self.write_debug(f'Using cxApiParam={cx_api_param}')
CrunchyrollBaseIE._BASIC_AUTH = 'Basic ' + base64.b64encode(f'{cx_api_param}:'.encode()).decode()
def _update_auth(self): def _update_auth(self):
if CrunchyrollBaseIE._AUTH_HEADERS and CrunchyrollBaseIE._AUTH_REFRESH > time_seconds(): if CrunchyrollBaseIE._AUTH_HEADERS and CrunchyrollBaseIE._AUTH_REFRESH > time_seconds():
return return
assert CrunchyrollBaseIE._BASIC_AUTH, '_update_query needs to be called at least one time beforehand' if not CrunchyrollBaseIE._BASIC_AUTH:
cx_api_param = self._CLIENT_ID[self.is_logged_in]
self.write_debug(f'Using cxApiParam={cx_api_param}')
CrunchyrollBaseIE._BASIC_AUTH = 'Basic ' + base64.b64encode(f'{cx_api_param}:'.encode()).decode()
grant_type = 'etp_rt_cookie' if self.is_logged_in else 'client_id' grant_type = 'etp_rt_cookie' if self.is_logged_in else 'client_id'
auth_response = self._download_json( try:
f'{self._BASE_URL}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}', auth_response = self._download_json(
headers={'Authorization': CrunchyrollBaseIE._BASIC_AUTH}, data=f'grant_type={grant_type}'.encode()) f'{self._BASE_URL}/auth/v1/token', None, note=f'Authenticating with grant_type={grant_type}',
headers={'Authorization': CrunchyrollBaseIE._BASIC_AUTH}, data=f'grant_type={grant_type}'.encode())
except ExtractorError as error:
if isinstance(error.cause, HTTPError) and error.cause.status == 403:
raise ExtractorError(
'Request blocked by Cloudflare; navigate to Crunchyroll in your browser, '
'then pass the fresh cookies (with --cookies-from-browser or --cookies) '
'and your browser\'s User-Agent (with --user-agent)', expected=True)
raise
CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']} CrunchyrollBaseIE._AUTH_HEADERS = {'Authorization': auth_response['token_type'] + ' ' + auth_response['access_token']}
CrunchyrollBaseIE._AUTH_REFRESH = time_seconds(seconds=traverse_obj(auth_response, ('expires_in', {float_or_none}), default=300) - 10) CrunchyrollBaseIE._AUTH_REFRESH = time_seconds(seconds=traverse_obj(auth_response, ('expires_in', {float_or_none}), default=300) - 10)
def _locale_from_language(self, language):
config_locale = self._configuration_arg('metadata', ie_key=CrunchyrollBetaIE, casesense=True)
return config_locale[0] if config_locale else self._LOCALE_LOOKUP.get(language)
def _call_base_api(self, endpoint, internal_id, lang, note=None, query={}): def _call_base_api(self, endpoint, internal_id, lang, note=None, query={}):
self._update_query(lang)
self._update_auth() self._update_auth()
if not endpoint.startswith('/'): if not endpoint.startswith('/'):
endpoint = f'/{endpoint}' endpoint = f'/{endpoint}'
query = query.copy()
locale = self._locale_from_language(lang)
if locale:
query['locale'] = locale
return self._download_json( return self._download_json(
f'{self._BASE_URL}{endpoint}', internal_id, note or f'Calling API: {endpoint}', f'{self._BASE_URL}{endpoint}', internal_id, note or f'Calling API: {endpoint}',
headers=CrunchyrollBaseIE._AUTH_HEADERS, query={**CrunchyrollBaseIE._QUERY[lang], **query}) headers=CrunchyrollBaseIE._AUTH_HEADERS, query=query)
def _call_api(self, path, internal_id, lang, note='api', query={}): def _call_api(self, path, internal_id, lang, note='api', query={}):
if not path.startswith(f'/content/v2/{self._API_ENDPOINT}/'): if not path.startswith(f'/content/v2/{self._API_ENDPOINT}/'):
@ -114,7 +127,7 @@ class CrunchyrollBaseIE(InfoExtractor):
result = self._call_base_api( result = self._call_base_api(
path, internal_id, lang, f'Downloading {note} JSON ({self._API_ENDPOINT})', query=query) path, internal_id, lang, f'Downloading {note} JSON ({self._API_ENDPOINT})', query=query)
except ExtractorError as error: except ExtractorError as error:
if isinstance(error.cause, urllib.error.HTTPError) and error.cause.code == 404: if isinstance(error.cause, HTTPError) and error.cause.status == 404:
return None return None
raise raise
@ -206,7 +219,7 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
IE_NAME = 'crunchyroll' IE_NAME = 'crunchyroll'
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?://(?:beta\.|www\.)?crunchyroll\.com/ https?://(?:beta\.|www\.)?crunchyroll\.com/
(?P<lang>(?:\w{2}(?:-\w{2})?/)?) (?:(?P<lang>\w{2}(?:-\w{2})?)/)?
watch/(?!concert|musicvideo)(?P<id>\w+)''' watch/(?!concert|musicvideo)(?P<id>\w+)'''
_TESTS = [{ _TESTS = [{
# Premium only # Premium only
@ -304,7 +317,7 @@ class CrunchyrollBetaIE(CrunchyrollCmsBaseIE):
}, },
'playlist_mincount': 5, 'playlist_mincount': 5,
}, { }, {
'url': 'https://www.crunchyroll.com/watch/GY2P1Q98Y', 'url': 'https://www.crunchyroll.com/de/watch/GY2P1Q98Y',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://beta.crunchyroll.com/pt-br/watch/G8WUN8VKP/the-ruler-of-conspiracy', 'url': 'https://beta.crunchyroll.com/pt-br/watch/G8WUN8VKP/the-ruler-of-conspiracy',
@ -490,8 +503,21 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?://(?:www\.)?crunchyroll\.com/ https?://(?:www\.)?crunchyroll\.com/
(?P<lang>(?:\w{2}(?:-\w{2})?/)?) (?P<lang>(?:\w{2}(?:-\w{2})?/)?)
watch/(?P<type>concert|musicvideo)/(?P<id>\w{10})''' watch/(?P<type>concert|musicvideo)/(?P<id>\w+)'''
_TESTS = [{ _TESTS = [{
'url': 'https://www.crunchyroll.com/de/watch/musicvideo/MV5B02C79',
'info_dict': {
'ext': 'mp4',
'id': 'MV5B02C79',
'display_id': 'egaono-hana',
'title': 'Egaono Hana',
'track': 'Egaono Hana',
'artist': 'Goose house',
'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$',
'genre': ['J-Pop'],
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C', 'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C',
'info_dict': { 'info_dict': {
'ext': 'mp4', 'ext': 'mp4',
@ -519,11 +545,14 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE):
}, },
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, { }, {
'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C/crossing-field', 'url': 'https://www.crunchyroll.com/de/watch/musicvideo/MV5B02C79/egaono-hana',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://www.crunchyroll.com/watch/concert/MC2E2AC135/live-is-smile-always-364joker-at-yokohama-arena', 'url': 'https://www.crunchyroll.com/watch/concert/MC2E2AC135/live-is-smile-always-364joker-at-yokohama-arena',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C/crossing-field',
'only_matching': True,
}] }]
_API_ENDPOINT = 'music' _API_ENDPOINT = 'music'

View file

@ -1,10 +1,8 @@
import time import time
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..networking import HEADRequest
int_or_none, from ..utils import int_or_none
HEADRequest,
)
class CultureUnpluggedIE(InfoExtractor): class CultureUnpluggedIE(InfoExtractor):

View file

@ -1,9 +1,9 @@
import hashlib import hashlib
import re import re
import time import time
import urllib.error
from .common import InfoExtractor from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
classproperty, classproperty,
@ -105,7 +105,7 @@ class DacastVODIE(DacastBaseIE):
formats = self._extract_m3u8_formats(hls_url, video_id, 'mp4', m3u8_id='hls') formats = self._extract_m3u8_formats(hls_url, video_id, 'mp4', m3u8_id='hls')
except ExtractorError as e: except ExtractorError as e:
# CDN will randomly respond with 403 # CDN will randomly respond with 403
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 403: if isinstance(e.cause, HTTPError) and e.cause.status == 403:
retry.error = e retry.error = e
continue continue
raise raise

View file

@ -3,7 +3,7 @@ import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_HTTPError from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
OnDemandPagedList, OnDemandPagedList,
@ -68,9 +68,9 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
None, 'Downloading Access Token', None, 'Downloading Access Token',
data=urlencode_postdata(data))['access_token'] data=urlencode_postdata(data))['access_token']
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: if isinstance(e.cause, HTTPError) and e.cause.status == 400:
raise ExtractorError(self._parse_json( raise ExtractorError(self._parse_json(
e.cause.read().decode(), xid)['error_description'], expected=True) e.cause.response.read().decode(), xid)['error_description'], expected=True)
raise raise
self._set_dailymotion_cookie('access_token' if username else 'client_token', token) self._set_dailymotion_cookie('access_token' if username else 'client_token', token)
self._HEADERS['Authorization'] = 'Bearer ' + token self._HEADERS['Authorization'] = 'Bearer ' + token

View file

@ -3,8 +3,8 @@ import string
from .discoverygo import DiscoveryGoBaseIE from .discoverygo import DiscoveryGoBaseIE
from ..compat import compat_urllib_parse_unquote from ..compat import compat_urllib_parse_unquote
from ..networking.exceptions import HTTPError
from ..utils import ExtractorError from ..utils import ExtractorError
from ..compat import compat_HTTPError
class DiscoveryIE(DiscoveryGoBaseIE): class DiscoveryIE(DiscoveryGoBaseIE):
@ -100,9 +100,9 @@ class DiscoveryIE(DiscoveryGoBaseIE):
self._API_BASE_URL + 'streaming/video/' + video_id, self._API_BASE_URL + 'streaming/video/' + video_id,
display_id, 'Downloading streaming JSON metadata', headers=headers) display_id, 'Downloading streaming JSON metadata', headers=headers)
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403): if isinstance(e.cause, HTTPError) and e.cause.status in (401, 403):
e_description = self._parse_json( e_description = self._parse_json(
e.cause.read().decode(), display_id)['description'] e.cause.response.read().decode(), display_id)['description']
if 'resource not available for country' in e_description: if 'resource not available for country' in e_description:
self.raise_geo_restricted(countries=self._GEO_COUNTRIES) self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
if 'Authorized Networks' in e_description: if 'Authorized Networks' in e_description:

View file

@ -2,7 +2,7 @@ import json
import uuid import uuid
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_HTTPError from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
ExtractorError, ExtractorError,
@ -39,7 +39,7 @@ class DPlayBaseIE(InfoExtractor):
return f'Bearer {token}' return f'Bearer {token}'
def _process_errors(self, e, geo_countries): def _process_errors(self, e, geo_countries):
info = self._parse_json(e.cause.read().decode('utf-8'), None) info = self._parse_json(e.cause.response.read().decode('utf-8'), None)
error = info['errors'][0] error = info['errors'][0]
error_code = error.get('code') error_code = error.get('code')
if error_code == 'access.denied.geoblocked': if error_code == 'access.denied.geoblocked':
@ -87,7 +87,7 @@ class DPlayBaseIE(InfoExtractor):
'include': 'images,primaryChannel,show,tags' 'include': 'images,primaryChannel,show,tags'
}) })
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: if isinstance(e.cause, HTTPError) and e.cause.status == 400:
self._process_errors(e, geo_countries) self._process_errors(e, geo_countries)
raise raise
video_id = video['data']['id'] video_id = video['data']['id']
@ -99,7 +99,7 @@ class DPlayBaseIE(InfoExtractor):
streaming = self._download_video_playback_info( streaming = self._download_video_playback_info(
disco_base, video_id, headers) disco_base, video_id, headers)
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: if isinstance(e.cause, HTTPError) and e.cause.status == 403:
self._process_errors(e, geo_countries) self._process_errors(e, geo_countries)
raise raise
for format_dict in streaming: for format_dict in streaming:

View file

@ -2,7 +2,7 @@ import functools
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_HTTPError from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
@ -111,8 +111,8 @@ class EaglePlatformIE(InfoExtractor):
response = super(EaglePlatformIE, self)._download_json( response = super(EaglePlatformIE, self)._download_json(
url_or_request, video_id, *args, **kwargs) url_or_request, video_id, *args, **kwargs)
except ExtractorError as ee: except ExtractorError as ee:
if isinstance(ee.cause, compat_HTTPError): if isinstance(ee.cause, HTTPError):
response = self._parse_json(ee.cause.read().decode('utf-8'), video_id) response = self._parse_json(ee.cause.response.read().decode('utf-8'), video_id)
self._handle_error(response) self._handle_error(response)
raise raise
return response return response

View file

@ -1,10 +1,6 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..networking import Request
float_or_none, from ..utils import float_or_none, int_or_none, parse_iso8601
int_or_none,
parse_iso8601,
sanitized_Request,
)
class EitbIE(InfoExtractor): class EitbIE(InfoExtractor):
@ -54,7 +50,7 @@ class EitbIE(InfoExtractor):
hls_url = media.get('HLS_SURL') hls_url = media.get('HLS_SURL')
if hls_url: if hls_url:
request = sanitized_Request( request = Request(
'http://mam.eitb.eus/mam/REST/ServiceMultiweb/DomainRestrictedSecurity/TokenAuth/', 'http://mam.eitb.eus/mam/REST/ServiceMultiweb/DomainRestrictedSecurity/TokenAuth/',
headers={'Referer': url}) headers={'Referer': url})
token_data = self._download_json( token_data = self._download_json(

View file

@ -52,7 +52,7 @@ class EpornerIE(InfoExtractor):
webpage, urlh = self._download_webpage_handle(url, display_id) webpage, urlh = self._download_webpage_handle(url, display_id)
video_id = self._match_id(urlh.geturl()) video_id = self._match_id(urlh.url)
hash = self._search_regex( hash = self._search_regex(
r'hash\s*[:=]\s*["\']([\da-f]{32})', webpage, 'hash') r'hash\s*[:=]\s*["\']([\da-f]{32})', webpage, 'hash')

View file

@ -41,7 +41,7 @@ class EttuTvIE(InfoExtractor):
'device': 'desktop', 'device': 'desktop',
}) })
stream_response = self._download_json(player_settings['streamAccess'], video_id, data={}) stream_response = self._download_json(player_settings['streamAccess'], video_id, data=b'')
formats, subtitles = self._extract_m3u8_formats_and_subtitles( formats, subtitles = self._extract_m3u8_formats_and_subtitles(
stream_response['data']['stream'], video_id, 'mp4') stream_response['data']['stream'], video_id, 'mp4')

View file

@ -8,6 +8,8 @@ from ..compat import (
compat_str, compat_str,
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
) )
from ..networking import Request
from ..networking.exceptions import network_exceptions
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
clean_html, clean_html,
@ -19,11 +21,10 @@ from ..utils import (
int_or_none, int_or_none,
js_to_json, js_to_json,
merge_dicts, merge_dicts,
network_exceptions,
parse_count, parse_count,
parse_qs, parse_qs,
qualities, qualities,
sanitized_Request, str_or_none,
traverse_obj, traverse_obj,
try_get, try_get,
url_or_none, url_or_none,
@ -90,16 +91,16 @@ class FacebookIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '274175099429670', 'id': '274175099429670',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Asif Nawab Butt', 'title': 'Asif',
'description': 'Asif Nawab Butt', 'description': '',
'uploader': 'Asif Nawab Butt', 'uploader': 'Asif Nawab Butt',
'upload_date': '20140506', 'upload_date': '20140506',
'timestamp': 1399398998, 'timestamp': 1399398998,
'thumbnail': r're:^https?://.*', 'thumbnail': r're:^https?://.*',
'uploader_id': 'pfbid04scW44U4P9iTyLZAGy8y8W3pR3i2VugvHCimiRudUAVbN3MPp9eXBaYFcgVworZwl',
'duration': 131.03,
'concurrent_view_count': int,
}, },
'expected_warnings': [
'title'
]
}, { }, {
'note': 'Video with DASH manifest', 'note': 'Video with DASH manifest',
'url': 'https://www.facebook.com/video.php?v=957955867617029', 'url': 'https://www.facebook.com/video.php?v=957955867617029',
@ -151,7 +152,7 @@ class FacebookIE(InfoExtractor):
# have 1080P, but only up to 720p in swf params # have 1080P, but only up to 720p in swf params
# data.video.story.attachments[].media # data.video.story.attachments[].media
'url': 'https://www.facebook.com/cnn/videos/10155529876156509/', 'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
'md5': '3f3798adb2b73423263e59376f1f5eb7', 'md5': 'ca63897a90c9452efee5f8c40d080e25',
'info_dict': { 'info_dict': {
'id': '10155529876156509', 'id': '10155529876156509',
'ext': 'mp4', 'ext': 'mp4',
@ -162,6 +163,9 @@ class FacebookIE(InfoExtractor):
'uploader': 'CNN', 'uploader': 'CNN',
'thumbnail': r're:^https?://.*', 'thumbnail': r're:^https?://.*',
'view_count': int, 'view_count': int,
'uploader_id': '100059479812265',
'concurrent_view_count': int,
'duration': 44.478,
}, },
}, { }, {
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall # bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
@ -170,12 +174,16 @@ class FacebookIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '1417995061575415', 'id': '1417995061575415',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Ukrainian Scientists Worldwide | Довгоочікуване відео', 'title': 'Довгоочікуване відео | By Yaroslav - Facebook',
'description': 'Довгоочікуване відео', 'description': 'Довгоочікуване відео',
'timestamp': 1486648771, 'timestamp': 1486648217,
'upload_date': '20170209', 'upload_date': '20170209',
'uploader': 'Yaroslav Korpan', 'uploader': 'Yaroslav Korpan',
'uploader_id': '100000948048708', 'uploader_id': 'pfbid029y8j22EwH3ikeqgH3SEP9G3CAi9kmWKgXJJG9s5geV7mo3J2bvURqHCdgucRgAyhl',
'concurrent_view_count': int,
'thumbnail': r're:^https?://.*',
'view_count': int,
'duration': 11736.446,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -192,9 +200,7 @@ class FacebookIE(InfoExtractor):
'uploader': 'La Guía Del Varón', 'uploader': 'La Guía Del Varón',
'thumbnail': r're:^https?://.*', 'thumbnail': r're:^https?://.*',
}, },
'params': { 'skip': 'Requires logging in',
'skip_download': True,
},
}, { }, {
# data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
'url': 'https://www.facebook.com/groups/1024490957622648/permalink/1396382447100162/', 'url': 'https://www.facebook.com/groups/1024490957622648/permalink/1396382447100162/',
@ -208,9 +214,7 @@ class FacebookIE(InfoExtractor):
'uploader': 'Elisabeth Ahtn', 'uploader': 'Elisabeth Ahtn',
'uploader_id': '100013949973717', 'uploader_id': '100013949973717',
}, },
'params': { 'skip': 'Requires logging in',
'skip_download': True,
},
}, { }, {
'url': 'https://www.facebook.com/video.php?v=10204634152394104', 'url': 'https://www.facebook.com/video.php?v=10204634152394104',
'only_matching': True, 'only_matching': True,
@ -252,7 +256,11 @@ class FacebookIE(InfoExtractor):
'timestamp': 1527084179, 'timestamp': 1527084179,
'upload_date': '20180523', 'upload_date': '20180523',
'uploader': 'ESL One Dota 2', 'uploader': 'ESL One Dota 2',
'uploader_id': '234218833769558', 'uploader_id': '100066514874195',
'duration': 4524.212,
'view_count': int,
'thumbnail': r're:^https?://.*',
'concurrent_view_count': int,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -262,8 +270,17 @@ class FacebookIE(InfoExtractor):
'url': 'https://www.facebook.com/100033620354545/videos/106560053808006/', 'url': 'https://www.facebook.com/100033620354545/videos/106560053808006/',
'info_dict': { 'info_dict': {
'id': '106560053808006', 'id': '106560053808006',
'ext': 'mp4',
'title': 'Josef',
'thumbnail': r're:^https?://.*',
'concurrent_view_count': int,
'uploader_id': 'pfbid02gXHbDwxumkaKJQaTGUf3znYfYzTuidGEWawiramNx4YamSj2afwYSRkpcjtHtMRJl',
'timestamp': 1549275572,
'duration': 3.413,
'uploader': 'Josef Novak',
'description': '',
'upload_date': '20190204',
}, },
'playlist_count': 2,
}, { }, {
# data.video.story.attachments[].media # data.video.story.attachments[].media
'url': 'https://www.facebook.com/watch/?v=647537299265662', 'url': 'https://www.facebook.com/watch/?v=647537299265662',
@ -276,6 +293,7 @@ class FacebookIE(InfoExtractor):
'id': '10157667649866271', 'id': '10157667649866271',
}, },
'playlist_count': 3, 'playlist_count': 3,
'skip': 'Requires logging in',
}, { }, {
# data.nodes[].comet_sections.content.story.attachments[].style_type_renderer.attachment.media # data.nodes[].comet_sections.content.story.attachments[].style_type_renderer.attachment.media
'url': 'https://m.facebook.com/Alliance.Police.Department/posts/4048563708499330', 'url': 'https://m.facebook.com/Alliance.Police.Department/posts/4048563708499330',
@ -319,7 +337,7 @@ class FacebookIE(InfoExtractor):
} }
def _perform_login(self, username, password): def _perform_login(self, username, password):
login_page_req = sanitized_Request(self._LOGIN_URL) login_page_req = Request(self._LOGIN_URL)
self._set_cookie('facebook.com', 'locale', 'en_US') self._set_cookie('facebook.com', 'locale', 'en_US')
login_page = self._download_webpage(login_page_req, None, login_page = self._download_webpage(login_page_req, None,
note='Downloading login page', note='Downloading login page',
@ -340,8 +358,8 @@ class FacebookIE(InfoExtractor):
'timezone': '-60', 'timezone': '-60',
'trynum': '1', 'trynum': '1',
} }
request = sanitized_Request(self._LOGIN_URL, urlencode_postdata(login_form)) request = Request(self._LOGIN_URL, urlencode_postdata(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.headers['Content-Type'] = 'application/x-www-form-urlencoded'
try: try:
login_results = self._download_webpage(request, None, login_results = self._download_webpage(request, None,
note='Logging in', errnote='unable to fetch login page') note='Logging in', errnote='unable to fetch login page')
@ -367,8 +385,8 @@ class FacebookIE(InfoExtractor):
'h': h, 'h': h,
'name_action_selected': 'dont_save', 'name_action_selected': 'dont_save',
} }
check_req = sanitized_Request(self._CHECKPOINT_URL, urlencode_postdata(check_form)) check_req = Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
check_req.add_header('Content-Type', 'application/x-www-form-urlencoded') check_req.headers['Content-Type'] = 'application/x-www-form-urlencoded'
check_response = self._download_webpage(check_req, None, check_response = self._download_webpage(check_req, None,
note='Confirming login') note='Confirming login')
if re.search(r'id="checkpointSubmitButton"', check_response) is not None: if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
@ -497,6 +515,13 @@ class FacebookIE(InfoExtractor):
entries = [] entries = []
def parse_graphql_video(video): def parse_graphql_video(video):
v_id = video.get('videoId') or video.get('id') or video_id
reel_info = traverse_obj(
video, ('creation_story', 'short_form_video_context', 'playback_video', {dict}))
if reel_info:
video = video['creation_story']
video['owner'] = traverse_obj(video, ('short_form_video_context', 'video_owner'))
video.update(reel_info)
formats = [] formats = []
q = qualities(['sd', 'hd']) q = qualities(['sd', 'hd'])
for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'), for key, format_id in (('playable_url', 'sd'), ('playable_url_quality_hd', 'hd'),
@ -513,15 +538,15 @@ class FacebookIE(InfoExtractor):
'url': playable_url, 'url': playable_url,
}) })
extract_dash_manifest(video, formats) extract_dash_manifest(video, formats)
v_id = video.get('videoId') or video.get('id') or video_id
info = { info = {
'id': v_id, 'id': v_id,
'formats': formats, 'formats': formats,
'thumbnail': traverse_obj( 'thumbnail': traverse_obj(
video, ('thumbnailImage', 'uri'), ('preferred_thumbnail', 'image', 'uri')), video, ('thumbnailImage', 'uri'), ('preferred_thumbnail', 'image', 'uri')),
'uploader_id': try_get(video, lambda x: x['owner']['id']), 'uploader_id': traverse_obj(video, ('owner', 'id', {str_or_none})),
'timestamp': int_or_none(video.get('publish_time')), 'timestamp': traverse_obj(video, 'publish_time', 'creation_time', expected_type=int_or_none),
'duration': float_or_none(video.get('playable_duration_in_ms'), 1000), 'duration': (float_or_none(video.get('playable_duration_in_ms'), 1000)
or float_or_none(video.get('length_in_second'))),
} }
process_formats(info) process_formats(info)
description = try_get(video, lambda x: x['savable_description']['text']) description = try_get(video, lambda x: x['savable_description']['text'])
@ -782,18 +807,18 @@ class FacebookReelIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
'url': 'https://www.facebook.com/reel/1195289147628387', 'url': 'https://www.facebook.com/reel/1195289147628387',
'md5': 'c4ff9a7182ff9ff7d6f7a83603bae831', 'md5': 'f13dd37f2633595982db5ed8765474d3',
'info_dict': { 'info_dict': {
'id': '1195289147628387', 'id': '1195289147628387',
'ext': 'mp4', 'ext': 'mp4',
'title': 'md5:9f5b142921b2dc57004fa13f76005f87', 'title': 'md5:b05800b5b1ad56c0ca78bd3807b6a61e',
'description': 'md5:24ea7ef062215d295bdde64e778f5474', 'description': 'md5:22f03309b216ac84720183961441d8db',
'uploader': 'Beast Camp Training', 'uploader': 'md5:723e6cb3091241160f20b3c5dc282af1',
'uploader_id': '1738535909799870', 'uploader_id': '100040874179269',
'duration': 9.536, 'duration': 9.579,
'thumbnail': r're:^https?://.*', 'timestamp': 1637502609,
'upload_date': '20211121', 'upload_date': '20211121',
'timestamp': 1637502604, 'thumbnail': r're:^https?://.*',
} }
}] }]

View file

@ -3,11 +3,11 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_parse_qs from ..compat import compat_parse_qs
from ..dependencies import websockets from ..dependencies import websockets
from ..networking import Request
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
WebSocketsWrapper, WebSocketsWrapper,
js_to_json, js_to_json,
sanitized_Request,
traverse_obj, traverse_obj,
update_url_query, update_url_query,
urlencode_postdata, urlencode_postdata,
@ -57,7 +57,7 @@ class FC2IE(InfoExtractor):
} }
login_data = urlencode_postdata(login_form_strs) login_data = urlencode_postdata(login_form_strs)
request = sanitized_Request( request = Request(
'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data) 'https://secure.id.fc2.com/index.php?mode=login&switch_language=en', login_data)
login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in') login_results = self._download_webpage(request, None, note='Logging in', errnote='Unable to log in')
@ -66,7 +66,7 @@ class FC2IE(InfoExtractor):
return False return False
# this is also needed # this is also needed
login_redir = sanitized_Request('http://id.fc2.com/?mode=redirect&login=done') login_redir = Request('http://id.fc2.com/?mode=redirect&login=done')
self._download_webpage( self._download_webpage(
login_redir, None, note='Login redirect', errnote='Login redirect failed') login_redir, None, note='Login redirect', errnote='Login redirect failed')

View file

@ -1,8 +1,6 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_str
compat_str, from ..networking.exceptions import HTTPError
compat_HTTPError,
)
from ..utils import ( from ..utils import (
qualities, qualities,
strip_or_none, strip_or_none,
@ -40,8 +38,8 @@ class FilmOnIE(InfoExtractor):
'https://www.filmon.com/api/vod/movie?id=%s' % video_id, 'https://www.filmon.com/api/vod/movie?id=%s' % video_id,
video_id)['response'] video_id)['response']
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError): if isinstance(e.cause, HTTPError):
errmsg = self._parse_json(e.cause.read().decode(), video_id)['reason'] errmsg = self._parse_json(e.cause.response.read().decode(), video_id)['reason']
raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True) raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True)
raise raise
@ -124,8 +122,8 @@ class FilmOnChannelIE(InfoExtractor):
channel_data = self._download_json( channel_data = self._download_json(
'http://www.filmon.com/api-v2/channel/' + channel_id, channel_id)['data'] 'http://www.filmon.com/api-v2/channel/' + channel_id, channel_id)['data']
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError): if isinstance(e.cause, HTTPError):
errmsg = self._parse_json(e.cause.read().decode(), channel_id)['message'] errmsg = self._parse_json(e.cause.response.read().decode(), channel_id)['message']
raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True) raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True)
raise raise

View file

@ -3,10 +3,10 @@ import uuid
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_HTTPError,
compat_str, compat_str,
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
) )
from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
@ -68,9 +68,9 @@ class FOXIE(InfoExtractor):
'https://api3.fox.com/v2.0/' + path, 'https://api3.fox.com/v2.0/' + path,
video_id, data=data, headers=headers) video_id, data=data, headers=headers)
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: if isinstance(e.cause, HTTPError) and e.cause.status == 403:
entitlement_issues = self._parse_json( entitlement_issues = self._parse_json(
e.cause.read().decode(), video_id)['entitlementIssues'] e.cause.response.read().decode(), video_id)['entitlementIssues']
for e in entitlement_issues: for e in entitlement_issues:
if e.get('errorCode') == 1005: if e.get('errorCode') == 1005:
raise ExtractorError( raise ExtractorError(
@ -123,8 +123,8 @@ class FOXIE(InfoExtractor):
try: try:
m3u8_url = self._download_json(release_url, video_id)['playURL'] m3u8_url = self._download_json(release_url, video_id)['playURL']
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: if isinstance(e.cause, HTTPError) and e.cause.status == 403:
error = self._parse_json(e.cause.read().decode(), video_id) error = self._parse_json(e.cause.response.read().decode(), video_id)
if error.get('exception') == 'GeoLocationBlocked': if error.get('exception') == 'GeoLocationBlocked':
self.raise_geo_restricted(countries=['US']) self.raise_geo_restricted(countries=['US'])
raise ExtractorError(error['description'], expected=True) raise ExtractorError(error['description'], expected=True)

View file

@ -1,6 +1,7 @@
from .common import InfoExtractor from .common import InfoExtractor
from .uplynk import UplynkPreplayIE from .uplynk import UplynkPreplayIE
from ..utils import HEADRequest, float_or_none, make_archive_id, smuggle_url from ..networking import HEADRequest
from ..utils import float_or_none, make_archive_id, smuggle_url
class FoxSportsIE(InfoExtractor): class FoxSportsIE(InfoExtractor):
@ -35,7 +36,7 @@ class FoxSportsIE(InfoExtractor):
'x-api-key': 'cf289e299efdfa39fb6316f259d1de93', 'x-api-key': 'cf289e299efdfa39fb6316f259d1de93',
}) })
preplay_url = self._request_webpage( preplay_url = self._request_webpage(
HEADRequest(data['url']), video_id, 'Fetching preplay URL').geturl() HEADRequest(data['url']), video_id, 'Fetching preplay URL').url
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',

View file

@ -1,5 +1,5 @@
from ..utils import HEADRequest
from .common import InfoExtractor from .common import InfoExtractor
from ..networking import HEADRequest
class FujiTVFODPlus7IE(InfoExtractor): class FujiTVFODPlus7IE(InfoExtractor):

View file

@ -3,7 +3,7 @@ import re
import string import string
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_HTTPError from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
determine_ext, determine_ext,
@ -46,8 +46,8 @@ class FunimationBaseIE(InfoExtractor):
})) }))
FunimationBaseIE._TOKEN = data['token'] FunimationBaseIE._TOKEN = data['token']
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: if isinstance(e.cause, HTTPError) and e.cause.status == 401:
error = self._parse_json(e.cause.read().decode(), None)['error'] error = self._parse_json(e.cause.response.read().decode(), None)['error']
raise ExtractorError(error, expected=True) raise ExtractorError(error, expected=True)
raise raise

View file

@ -2,13 +2,8 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from .kaltura import KalturaIE from .kaltura import KalturaIE
from ..utils import ( from ..networking import HEADRequest, Request
HEADRequest, from ..utils import remove_start, smuggle_url, urlencode_postdata
remove_start,
sanitized_Request,
smuggle_url,
urlencode_postdata,
)
class GDCVaultIE(InfoExtractor): class GDCVaultIE(InfoExtractor):
@ -138,8 +133,8 @@ class GDCVaultIE(InfoExtractor):
'password': password, 'password': password,
} }
request = sanitized_Request(login_url, urlencode_postdata(login_form)) request = Request(login_url, urlencode_postdata(login_form))
request.add_header('Content-Type', 'application/x-www-form-urlencoded') request.headers['Content-Type'] = 'application/x-www-form-urlencoded'
self._download_webpage(request, display_id, 'Logging in') self._download_webpage(request, display_id, 'Logging in')
start_page = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page') start_page = self._download_webpage(webpage_url, display_id, 'Getting authenticated video page')
self._download_webpage(logout_url, display_id, 'Logging out') self._download_webpage(logout_url, display_id, 'Logging out')
@ -163,7 +158,7 @@ class GDCVaultIE(InfoExtractor):
video_url = 'http://www.gdcvault.com' + direct_url video_url = 'http://www.gdcvault.com' + direct_url
# resolve the url so that we can detect the correct extension # resolve the url so that we can detect the correct extension
video_url = self._request_webpage( video_url = self._request_webpage(
HEADRequest(video_url), video_id).geturl() HEADRequest(video_url), video_id).url
return { return {
'id': video_id, 'id': video_id,

View file

@ -2431,7 +2431,7 @@ class GenericIE(InfoExtractor):
'Accept-Encoding': 'identity', 'Accept-Encoding': 'identity',
**smuggled_data.get('http_headers', {}) **smuggled_data.get('http_headers', {})
}) })
new_url = full_response.geturl() new_url = full_response.url
url = urllib.parse.urlparse(url)._replace(scheme=urllib.parse.urlparse(new_url).scheme).geturl() url = urllib.parse.urlparse(url)._replace(scheme=urllib.parse.urlparse(new_url).scheme).geturl()
if new_url != extract_basic_auth(url)[0]: if new_url != extract_basic_auth(url)[0]:
self.report_following_redirect(new_url) self.report_following_redirect(new_url)
@ -2529,12 +2529,12 @@ class GenericIE(InfoExtractor):
return self.playlist_result( return self.playlist_result(
self._parse_xspf( self._parse_xspf(
doc, video_id, xspf_url=url, doc, video_id, xspf_url=url,
xspf_base_url=full_response.geturl()), xspf_base_url=full_response.url),
video_id) video_id)
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag): elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
info_dict['formats'], info_dict['subtitles'] = self._parse_mpd_formats_and_subtitles( info_dict['formats'], info_dict['subtitles'] = self._parse_mpd_formats_and_subtitles(
doc, doc,
mpd_base_url=full_response.geturl().rpartition('/')[0], mpd_base_url=full_response.url.rpartition('/')[0],
mpd_url=url) mpd_url=url)
self._extra_manifest_info(info_dict, url) self._extra_manifest_info(info_dict, url)
self.report_detected('DASH manifest') self.report_detected('DASH manifest')
@ -2562,7 +2562,7 @@ class GenericIE(InfoExtractor):
self._downloader.write_debug('Looking for embeds') self._downloader.write_debug('Looking for embeds')
embeds = list(self._extract_embeds(original_url, webpage, urlh=full_response, info_dict=info_dict)) embeds = list(self._extract_embeds(original_url, webpage, urlh=full_response, info_dict=info_dict))
if len(embeds) == 1: if len(embeds) == 1:
return {**info_dict, **embeds[0]} return merge_dicts(embeds[0], info_dict)
elif embeds: elif embeds:
return self.playlist_result(embeds, **info_dict) return self.playlist_result(embeds, **info_dict)
raise UnsupportedError(url) raise UnsupportedError(url)
@ -2572,7 +2572,7 @@ class GenericIE(InfoExtractor):
info_dict = types.MappingProxyType(info_dict) # Prevents accidental mutation info_dict = types.MappingProxyType(info_dict) # Prevents accidental mutation
video_id = traverse_obj(info_dict, 'display_id', 'id') or self._generic_id(url) video_id = traverse_obj(info_dict, 'display_id', 'id') or self._generic_id(url)
url, smuggled_data = unsmuggle_url(url, {}) url, smuggled_data = unsmuggle_url(url, {})
actual_url = urlh.geturl() if urlh else url actual_url = urlh.url if urlh else url
# Sometimes embedded video player is hidden behind percent encoding # Sometimes embedded video player is hidden behind percent encoding
# (e.g. https://github.com/ytdl-org/youtube-dl/issues/2448) # (e.g. https://github.com/ytdl-org/youtube-dl/issues/2448)

View file

@ -8,8 +8,8 @@ from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_str, compat_str,
) )
from ..networking import HEADRequest
from ..utils import ( from ..utils import (
HEADRequest,
ExtractorError, ExtractorError,
float_or_none, float_or_none,
orderedSet, orderedSet,

View file

@ -5,7 +5,9 @@ from ..compat import compat_parse_qs
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
determine_ext, determine_ext,
extract_attributes,
get_element_by_class, get_element_by_class,
get_element_html_by_id,
int_or_none, int_or_none,
lowercase_escape, lowercase_escape,
try_get, try_get,
@ -34,6 +36,7 @@ class GoogleDriveIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Big Buck Bunny.mp4', 'title': 'Big Buck Bunny.mp4',
'duration': 45, 'duration': 45,
'thumbnail': 'https://drive.google.com/thumbnail?id=0ByeS4oOUV-49Zzh4R1J6R09zazQ',
} }
}, { }, {
# video can't be watched anonymously due to view count limit reached, # video can't be watched anonymously due to view count limit reached,
@ -207,10 +210,10 @@ class GoogleDriveIE(InfoExtractor):
'export': 'download', 'export': 'download',
}) })
def request_source_file(source_url, kind): def request_source_file(source_url, kind, data=None):
return self._request_webpage( return self._request_webpage(
source_url, video_id, note='Requesting %s file' % kind, source_url, video_id, note='Requesting %s file' % kind,
errnote='Unable to request %s file' % kind, fatal=False) errnote='Unable to request %s file' % kind, fatal=False, data=data)
urlh = request_source_file(source_url, 'source') urlh = request_source_file(source_url, 'source')
if urlh: if urlh:
def add_source_format(urlh): def add_source_format(urlh):
@ -225,7 +228,7 @@ class GoogleDriveIE(InfoExtractor):
# Using original URLs may result in redirect loop due to # Using original URLs may result in redirect loop due to
# google.com's cookies mistakenly used for googleusercontent.com # google.com's cookies mistakenly used for googleusercontent.com
# redirect URLs (see #23919). # redirect URLs (see #23919).
'url': urlh.geturl(), 'url': urlh.url,
'ext': determine_ext(title, 'mp4').lower(), 'ext': determine_ext(title, 'mp4').lower(),
'format_id': 'source', 'format_id': 'source',
'quality': 1, 'quality': 1,
@ -237,14 +240,10 @@ class GoogleDriveIE(InfoExtractor):
urlh, url, video_id, note='Downloading confirmation page', urlh, url, video_id, note='Downloading confirmation page',
errnote='Unable to confirm download', fatal=False) errnote='Unable to confirm download', fatal=False)
if confirmation_webpage: if confirmation_webpage:
confirm = self._search_regex( confirmed_source_url = extract_attributes(
r'confirm=([^&"\']+)', confirmation_webpage, get_element_html_by_id('download-form', confirmation_webpage) or '').get('action')
'confirmation code', default=None) if confirmed_source_url:
if confirm: urlh = request_source_file(confirmed_source_url, 'confirmed source', data=b'')
confirmed_source_url = update_url_query(source_url, {
'confirm': confirm,
})
urlh = request_source_file(confirmed_source_url, 'confirmed source')
if urlh and urlh.headers.get('Content-Disposition'): if urlh and urlh.headers.get('Content-Disposition'):
add_source_format(urlh) add_source_format(urlh)
else: else:

View file

@ -126,7 +126,7 @@ class HKETVIE(InfoExtractor):
# If we ever wanted to provide the final resolved URL that # If we ever wanted to provide the final resolved URL that
# does not require cookies, albeit with a shorter lifespan: # does not require cookies, albeit with a shorter lifespan:
# urlh = self._downloader.urlopen(file_url) # urlh = self._downloader.urlopen(file_url)
# resolved_url = urlh.geturl() # resolved_url = urlh.url
label = fmt.get('label') label = fmt.get('label')
h = self._FORMAT_HEIGHTS.get(label) h = self._FORMAT_HEIGHTS.get(label)
w = h * width // height if h and width and height else None w = h * width // height if h and width and height else None

View file

@ -1,11 +1,7 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_b64decode from ..compat import compat_b64decode
from ..utils import ( from ..networking import HEADRequest, Request
ExtractorError, from ..utils import ExtractorError, urlencode_postdata
HEADRequest,
sanitized_Request,
urlencode_postdata,
)
class HotNewHipHopIE(InfoExtractor): class HotNewHipHopIE(InfoExtractor):
@ -36,9 +32,9 @@ class HotNewHipHopIE(InfoExtractor):
('mediaType', 's'), ('mediaType', 's'),
('mediaId', video_id), ('mediaId', video_id),
]) ])
r = sanitized_Request( r = Request(
'http://www.hotnewhiphop.com/ajax/media/getActions/', data=reqdata) 'http://www.hotnewhiphop.com/ajax/media/getActions/', data=reqdata)
r.add_header('Content-Type', 'application/x-www-form-urlencoded') r.headers['Content-Type'] = 'application/x-www-form-urlencoded'
mkd = self._download_json( mkd = self._download_json(
r, video_id, note='Requesting media key', r, video_id, note='Requesting media key',
errnote='Could not download media key') errnote='Could not download media key')
@ -50,7 +46,7 @@ class HotNewHipHopIE(InfoExtractor):
req = self._request_webpage( req = self._request_webpage(
redirect_req, video_id, redirect_req, video_id,
note='Resolving final URL', errnote='Could not resolve final URL') note='Resolving final URL', errnote='Could not resolve final URL')
video_url = req.geturl() video_url = req.url
if video_url.endswith('.html'): if video_url.endswith('.html'):
raise ExtractorError('Redirect failed') raise ExtractorError('Redirect failed')

View file

@ -6,7 +6,8 @@ import time
import uuid import uuid
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_HTTPError, compat_str from ..compat import compat_str
from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
determine_ext, determine_ext,
@ -233,7 +234,7 @@ class HotStarIE(HotStarBaseIE):
'height': int_or_none(playback_set.get('height')), 'height': int_or_none(playback_set.get('height')),
}] }]
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: if isinstance(e.cause, HTTPError) and e.cause.status == 403:
geo_restricted = True geo_restricted = True
continue continue

View file

@ -1,13 +1,13 @@
import json import json
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_HTTPError from ..networking import Request
from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
clean_html, clean_html,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
parse_age_limit, parse_age_limit,
sanitized_Request,
try_get, try_get,
) )
@ -42,7 +42,7 @@ class HRTiBaseIE(InfoExtractor):
'application_version': self._APP_VERSION 'application_version': self._APP_VERSION
} }
req = sanitized_Request(self._API_URL, data=json.dumps(app_data).encode('utf-8')) req = Request(self._API_URL, data=json.dumps(app_data).encode('utf-8'))
req.get_method = lambda: 'PUT' req.get_method = lambda: 'PUT'
resources = self._download_json( resources = self._download_json(
@ -73,8 +73,8 @@ class HRTiBaseIE(InfoExtractor):
self._login_url, None, note='Logging in', errnote='Unable to log in', self._login_url, None, note='Logging in', errnote='Unable to log in',
data=json.dumps(auth_data).encode('utf-8')) data=json.dumps(auth_data).encode('utf-8'))
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 406: if isinstance(e.cause, HTTPError) and e.cause.status == 406:
auth_info = self._parse_json(e.cause.read().encode('utf-8'), None) auth_info = self._parse_json(e.cause.response.read().encode('utf-8'), None)
else: else:
raise raise

View file

@ -1,8 +1,9 @@
import re import re
import urllib.error import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_parse_qs from ..compat import compat_parse_qs
from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
determine_ext, determine_ext,
@ -27,9 +28,9 @@ class IGNBaseIE(InfoExtractor):
try: try:
return self._call_api(slug) return self._call_api(slug)
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 404: if isinstance(e.cause, HTTPError) and e.cause.status == 404:
e.cause.args = e.cause.args or [ e.cause.args = e.cause.args or [
e.cause.geturl(), e.cause.getcode(), e.cause.reason] e.cause.response.url, e.cause.status, e.cause.reason]
raise ExtractorError( raise ExtractorError(
'Content not found: expired?', cause=e.cause, 'Content not found: expired?', cause=e.cause,
expected=True) expected=True)
@ -226,7 +227,7 @@ class IGNVideoIE(IGNBaseIE):
parsed_url._replace(path=parsed_url.path.rsplit('/', 1)[0] + '/embed')) parsed_url._replace(path=parsed_url.path.rsplit('/', 1)[0] + '/embed'))
webpage, urlh = self._download_webpage_handle(embed_url, video_id) webpage, urlh = self._download_webpage_handle(embed_url, video_id)
new_url = urlh.geturl() new_url = urlh.url
ign_url = compat_parse_qs( ign_url = compat_parse_qs(
urllib.parse.urlparse(new_url).query).get('url', [None])[-1] urllib.parse.urlparse(new_url).query).get('url', [None])[-1]
if ign_url: if ign_url:
@ -323,14 +324,14 @@ class IGNArticleIE(IGNBaseIE):
try: try:
return self._call_api(slug) return self._call_api(slug)
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, urllib.error.HTTPError): if isinstance(e.cause, HTTPError):
e.cause.args = e.cause.args or [ e.cause.args = e.cause.args or [
e.cause.geturl(), e.cause.getcode(), e.cause.reason] e.cause.response.url, e.cause.status, e.cause.reason]
if e.cause.code == 404: if e.cause.status == 404:
raise ExtractorError( raise ExtractorError(
'Content not found: expired?', cause=e.cause, 'Content not found: expired?', cause=e.cause,
expected=True) expected=True)
elif e.cause.code == 503: elif e.cause.status == 503:
self.report_warning(error_to_compat_str(e.cause)) self.report_warning(error_to_compat_str(e.cause))
return return
raise raise

View file

@ -1,7 +1,7 @@
import json import json
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_HTTPError from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
@ -52,9 +52,9 @@ class ImgGamingBaseIE(InfoExtractor):
return self._call_api( return self._call_api(
stream_path, media_id)['playerUrlCallback'] stream_path, media_id)['playerUrlCallback']
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: if isinstance(e.cause, HTTPError) and e.cause.status == 403:
raise ExtractorError( raise ExtractorError(
self._parse_json(e.cause.read().decode(), media_id)['messages'][0], self._parse_json(e.cause.response.read().decode(), media_id)['messages'][0],
expected=True) expected=True)
raise raise

View file

@ -3,9 +3,9 @@ import itertools
import json import json
import re import re
import time import time
import urllib.error
from .common import InfoExtractor from .common import InfoExtractor
from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
decode_base_n, decode_base_n,
@ -442,7 +442,7 @@ class InstagramIE(InstagramBaseIE):
shared_data = self._search_json( shared_data = self._search_json(
r'window\._sharedData\s*=', webpage, 'shared data', video_id, fatal=False) or {} r'window\._sharedData\s*=', webpage, 'shared data', video_id, fatal=False) or {}
if shared_data and self._LOGIN_URL not in urlh.geturl(): if shared_data and self._LOGIN_URL not in urlh.url:
media.update(traverse_obj( media.update(traverse_obj(
shared_data, ('entry_data', 'PostPage', 0, 'graphql', 'shortcode_media'), shared_data, ('entry_data', 'PostPage', 0, 'graphql', 'shortcode_media'),
('entry_data', 'PostPage', 0, 'media'), expected_type=dict) or {}) ('entry_data', 'PostPage', 0, 'media'), expected_type=dict) or {})
@ -589,7 +589,7 @@ class InstagramPlaylistBaseIE(InstagramBaseIE):
except ExtractorError as e: except ExtractorError as e:
# if it's an error caused by a bad query, and there are # if it's an error caused by a bad query, and there are
# more GIS templates to try, ignore it and keep trying # more GIS templates to try, ignore it and keep trying
if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 403: if isinstance(e.cause, HTTPError) and e.cause.status == 403:
if gis_tmpl != gis_tmpls[-1]: if gis_tmpl != gis_tmpls[-1]:
continue continue
raise raise

View file

@ -81,7 +81,7 @@ class IPrimaIE(InfoExtractor):
note='Logging in') note='Logging in')
# a profile may need to be selected first, even when there is only a single one # a profile may need to be selected first, even when there is only a single one
if '/profile-select' in login_handle.geturl(): if '/profile-select' in login_handle.url:
profile_id = self._search_regex( profile_id = self._search_regex(
r'data-identifier\s*=\s*["\']?(\w+)', profile_select_html, 'profile id') r'data-identifier\s*=\s*["\']?(\w+)', profile_select_html, 'profile id')
@ -89,7 +89,7 @@ class IPrimaIE(InfoExtractor):
f'{self._AUTH_ROOT}/user/profile-select-perform/{profile_id}', None, f'{self._AUTH_ROOT}/user/profile-select-perform/{profile_id}', None,
query={'continueUrl': '/user/login?redirect_uri=/user/'}, note='Selecting profile') query={'continueUrl': '/user/login?redirect_uri=/user/'}, note='Selecting profile')
code = traverse_obj(login_handle.geturl(), ({parse_qs}, 'code', 0)) code = traverse_obj(login_handle.url, ({parse_qs}, 'code', 0))
if not code: if not code:
raise ExtractorError('Login failed', expected=True) raise ExtractorError('Login failed', expected=True)

View file

@ -527,7 +527,7 @@ class IqIE(InfoExtractor):
if player_js_cache: if player_js_cache:
return player_js_cache return player_js_cache
webpack_js_url = self._proto_relative_url(self._search_regex( webpack_js_url = self._proto_relative_url(self._search_regex(
r'<script src="((?:https?)?//stc.iqiyipic.com/_next/static/chunks/webpack-\w+\.js)"', webpage, 'webpack URL')) r'<script src="((?:https?:)?//stc\.iqiyipic\.com/_next/static/chunks/webpack-\w+\.js)"', webpage, 'webpack URL'))
webpack_js = self._download_webpage(webpack_js_url, video_id, note='Downloading webpack JS', errnote='Unable to download webpack JS') webpack_js = self._download_webpage(webpack_js_url, video_id, note='Downloading webpack JS', errnote='Unable to download webpack JS')
webpack_map = self._search_json( webpack_map = self._search_json(
r'["\']\s*\+\s*', webpack_js, 'JS locations', video_id, r'["\']\s*\+\s*', webpack_js, 'JS locations', video_id,

View file

@ -1,5 +1,5 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_HTTPError from ..networking.exceptions import HTTPError
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
@ -101,8 +101,8 @@ class KakaoIE(InfoExtractor):
cdn_api_base, video_id, query=query, cdn_api_base, video_id, query=query,
note='Downloading video URL for profile %s' % profile_name) note='Downloading video URL for profile %s' % profile_name)
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: if isinstance(e.cause, HTTPError) and e.cause.status == 403:
resp = self._parse_json(e.cause.read().decode(), video_id) resp = self._parse_json(e.cause.response.read().decode(), video_id)
if resp.get('code') == 'GeoBlocked': if resp.get('code') == 'GeoBlocked':
self.raise_geo_restricted() self.raise_geo_restricted()
raise raise

View file

@ -1,7 +1,6 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import ( from ..utils import (
HEADRequest,
UserNotLive, UserNotLive,
float_or_none, float_or_none,
merge_dicts, merge_dicts,
@ -30,7 +29,7 @@ class KickBaseIE(InfoExtractor):
class KickIE(KickBaseIE): class KickIE(KickBaseIE):
_VALID_URL = r'https?://(?:www\.)?kick\.com/(?!(?:video|categories|search|auth)(?:[/?#]|$))(?P<id>[\w_]+)' _VALID_URL = r'https?://(?:www\.)?kick\.com/(?!(?:video|categories|search|auth)(?:[/?#]|$))(?P<id>[\w-]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://kick.com/yuppy', 'url': 'https://kick.com/yuppy',
'info_dict': { 'info_dict': {

View file

@ -91,7 +91,7 @@ class KuwoIE(KuwoBaseIE):
webpage, urlh = self._download_webpage_handle( webpage, urlh = self._download_webpage_handle(
url, song_id, note='Download song detail info', url, song_id, note='Download song detail info',
errnote='Unable to get song detail info') errnote='Unable to get song detail info')
if song_id not in urlh.geturl() or '对不起,该歌曲由于版权问题已被下线,将返回网站首页' in webpage: if song_id not in urlh.url or '对不起,该歌曲由于版权问题已被下线,将返回网站首页' in webpage:
raise ExtractorError('this song has been offline because of copyright issues', expected=True) raise ExtractorError('this song has been offline because of copyright issues', expected=True)
song_name = self._html_search_regex( song_name = self._html_search_regex(

View file

@ -1,13 +1,8 @@
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..networking import HEADRequest
float_or_none, from ..utils import float_or_none, int_or_none, parse_duration, unified_strdate
HEADRequest,
int_or_none,
parse_duration,
unified_strdate,
)
class LA7IE(InfoExtractor): class LA7IE(InfoExtractor):

View file

@ -3,9 +3,9 @@ import json
import urllib.parse import urllib.parse
from .common import InfoExtractor from .common import InfoExtractor
from ..networking import HEADRequest
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
HEADRequest,
OnDemandPagedList, OnDemandPagedList,
UnsupportedError, UnsupportedError,
determine_ext, determine_ext,
@ -266,7 +266,7 @@ class LBRYIE(LBRYBaseIE):
# HEAD request returns redirect response to m3u8 URL if available # HEAD request returns redirect response to m3u8 URL if available
final_url = self._request_webpage( final_url = self._request_webpage(
HEADRequest(streaming_url), display_id, headers=headers, HEADRequest(streaming_url), display_id, headers=headers,
note='Downloading streaming redirect url info').geturl() note='Downloading streaming redirect url info').url
elif result.get('value_type') == 'stream': elif result.get('value_type') == 'stream':
claim_id, is_live = result['signing_channel']['claim_id'], True claim_id, is_live = result['signing_channel']['claim_id'], True

Some files were not shown because too many files have changed in this diff Show more