mirror of
https://github.com/yt-dlp/yt-dlp
synced 2025-01-20 10:26:21 +01:00
Merge remote-tracking branch 'origin/master' into yt-live-from-start-range
This commit is contained in:
commit
444e02ef3b
74 changed files with 5099 additions and 1935 deletions
2
.github/workflows/build.yml
vendored
2
.github/workflows/build.yml
vendored
|
@ -192,7 +192,7 @@ jobs:
|
||||||
- name: Install Requirements
|
- name: Install Requirements
|
||||||
run: |
|
run: |
|
||||||
brew install coreutils
|
brew install coreutils
|
||||||
/usr/bin/python3 -m pip install -U --user pip Pyinstaller -r requirements.txt
|
/usr/bin/python3 -m pip install -U --user pip Pyinstaller==5.8 -r requirements.txt
|
||||||
|
|
||||||
- name: Prepare
|
- name: Prepare
|
||||||
run: |
|
run: |
|
||||||
|
|
28
README.md
28
README.md
|
@ -463,15 +463,11 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
|
||||||
specified by --proxy (or none, if the option
|
specified by --proxy (or none, if the option
|
||||||
is not present) is used for the actual
|
is not present) is used for the actual
|
||||||
downloading
|
downloading
|
||||||
--geo-bypass Bypass geographic restriction via faking
|
--xff VALUE How to fake X-Forwarded-For HTTP header to
|
||||||
X-Forwarded-For HTTP header (default)
|
try bypassing geographic restriction. One of
|
||||||
--no-geo-bypass Do not bypass geographic restriction via
|
"default" (Only when known to be useful),
|
||||||
faking X-Forwarded-For HTTP header
|
"never", a two-letter ISO 3166-2 country
|
||||||
--geo-bypass-country CODE Force bypass geographic restriction with
|
code, or an IP block in CIDR notation
|
||||||
explicitly provided two-letter ISO 3166-2
|
|
||||||
country code
|
|
||||||
--geo-bypass-ip-block IP_BLOCK Force bypass geographic restriction with
|
|
||||||
explicitly provided IP block in CIDR notation
|
|
||||||
|
|
||||||
## Video Selection:
|
## Video Selection:
|
||||||
-I, --playlist-items ITEM_SPEC Comma separated playlist_index of the items
|
-I, --playlist-items ITEM_SPEC Comma separated playlist_index of the items
|
||||||
|
@ -752,6 +748,7 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
|
||||||
## Verbosity and Simulation Options:
|
## Verbosity and Simulation Options:
|
||||||
-q, --quiet Activate quiet mode. If used with --verbose,
|
-q, --quiet Activate quiet mode. If used with --verbose,
|
||||||
print the log to stderr
|
print the log to stderr
|
||||||
|
--no-quiet Deactivate quiet mode. (Default)
|
||||||
--no-warnings Ignore warnings
|
--no-warnings Ignore warnings
|
||||||
-s, --simulate Do not download the video and do not write
|
-s, --simulate Do not download the video and do not write
|
||||||
anything to disk
|
anything to disk
|
||||||
|
@ -1246,7 +1243,7 @@ The field names themselves (the part inside the parenthesis) can also have some
|
||||||
|
|
||||||
1. **Alternatives**: Alternate fields can be specified separated with a `,`. E.g. `%(release_date>%Y,upload_date>%Y|Unknown)s`
|
1. **Alternatives**: Alternate fields can be specified separated with a `,`. E.g. `%(release_date>%Y,upload_date>%Y|Unknown)s`
|
||||||
|
|
||||||
1. **Replacement**: A replacement value can be specified using a `&` separator. If the field is *not* empty, this replacement value will be used instead of the actual field content. This is done after alternate fields are considered; thus the replacement is used if *any* of the alternative fields is *not* empty.
|
1. **Replacement**: A replacement value can be specified using a `&` separator according to the [`str.format` mini-language](https://docs.python.org/3/library/string.html#format-specification-mini-language). If the field is *not* empty, this replacement value will be used instead of the actual field content. This is done after alternate fields are considered; thus the replacement is used if *any* of the alternative fields is *not* empty. E.g. `%(chapters&has chapters|no chapters)s`, `%(title&TITLE={:>20}|NO TITLE)s`
|
||||||
|
|
||||||
1. **Default**: A literal default value can be specified for when the field is empty using a `|` separator. This overrides `--output-na-placeholder`. E.g. `%(uploader|Unknown)s`
|
1. **Default**: A literal default value can be specified for when the field is empty using a `|` separator. This overrides `--output-na-placeholder`. E.g. `%(uploader|Unknown)s`
|
||||||
|
|
||||||
|
@ -1797,7 +1794,10 @@ The following extractors use this feature:
|
||||||
* `approximate_date`: Extract approximate `upload_date` and `timestamp` in flat-playlist. This may cause date-based filters to be slightly off
|
* `approximate_date`: Extract approximate `upload_date` and `timestamp` in flat-playlist. This may cause date-based filters to be slightly off
|
||||||
|
|
||||||
#### generic
|
#### generic
|
||||||
* `fragment_query`: Passthrough any query in mpd/m3u8 manifest URLs to their fragments. Does not apply to ffmpeg
|
* `fragment_query`: Passthrough any query in mpd/m3u8 manifest URLs to their fragments if no value is provided, or else apply the query string given as `fragment_query=VALUE`. Does not apply to ffmpeg
|
||||||
|
* `variant_query`: Passthrough the master m3u8 URL query to its variant playlist URLs if no value is provided, or else apply the query string given as `variant_query=VALUE`
|
||||||
|
* `hls_key`: An HLS AES-128 key URI *or* key (as hex), and optionally the IV (as hex), in the form of `(URI|KEY)[,IV]`; e.g. `generic:hls_key=ABCDEF1234567980,0xFEDCBA0987654321`. Passing any of these values will force usage of the native HLS downloader and override the corresponding values found in the m3u8 playlist
|
||||||
|
* `is_live`: Bypass live HLS detection and manually set `live_status` - a value of `false` will set `not_live`, any other value (or no value) will set `is_live`
|
||||||
|
|
||||||
#### funimation
|
#### funimation
|
||||||
* `language`: Audio languages to extract, e.g. `funimation:language=english,japanese`
|
* `language`: Audio languages to extract, e.g. `funimation:language=english,japanese`
|
||||||
|
@ -1833,7 +1833,7 @@ The following extractors use this feature:
|
||||||
* `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks`
|
* `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks`
|
||||||
|
|
||||||
#### twitter
|
#### twitter
|
||||||
* `force_graphql`: Force usage of the GraphQL API. By default it will only be used if login cookies are provided
|
* `legacy_api`: Force usage of the legacy Twitter API instead of the GraphQL API for tweet extraction. Has no effect if login cookies are passed
|
||||||
|
|
||||||
**Note**: These options may be changed/removed in the future without concern for backward compatibility
|
**Note**: These options may be changed/removed in the future without concern for backward compatibility
|
||||||
|
|
||||||
|
@ -2164,6 +2164,10 @@ While these options still work, their use is not recommended since there are oth
|
||||||
--youtube-skip-hls-manifest --extractor-args "youtube:skip=hls" (Alias: --no-youtube-include-hls-manifest)
|
--youtube-skip-hls-manifest --extractor-args "youtube:skip=hls" (Alias: --no-youtube-include-hls-manifest)
|
||||||
--youtube-include-dash-manifest Default (Alias: --no-youtube-skip-dash-manifest)
|
--youtube-include-dash-manifest Default (Alias: --no-youtube-skip-dash-manifest)
|
||||||
--youtube-include-hls-manifest Default (Alias: --no-youtube-skip-hls-manifest)
|
--youtube-include-hls-manifest Default (Alias: --no-youtube-skip-hls-manifest)
|
||||||
|
--geo-bypass --xff "default"
|
||||||
|
--no-geo-bypass --xff "never"
|
||||||
|
--geo-bypass-country CODE --xff CODE
|
||||||
|
--geo-bypass-ip-block IP_BLOCK --xff IP_BLOCK
|
||||||
|
|
||||||
|
|
||||||
#### Developer options
|
#### Developer options
|
||||||
|
|
|
@ -1406,6 +1406,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||||
'vcodec': 'none',
|
'vcodec': 'none',
|
||||||
'acodec': 'AACL',
|
'acodec': 'AACL',
|
||||||
'protocol': 'ism',
|
'protocol': 'ism',
|
||||||
|
'audio_channels': 2,
|
||||||
'_download_params': {
|
'_download_params': {
|
||||||
'stream_type': 'audio',
|
'stream_type': 'audio',
|
||||||
'duration': 8880746666,
|
'duration': 8880746666,
|
||||||
|
@ -1419,9 +1420,6 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||||
'bits_per_sample': 16,
|
'bits_per_sample': 16,
|
||||||
'nal_unit_length_field': 4
|
'nal_unit_length_field': 4
|
||||||
},
|
},
|
||||||
'audio_ext': 'isma',
|
|
||||||
'video_ext': 'none',
|
|
||||||
'abr': 128,
|
|
||||||
}, {
|
}, {
|
||||||
'format_id': 'video-100',
|
'format_id': 'video-100',
|
||||||
'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
|
'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
|
||||||
|
@ -1445,9 +1443,6 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||||
'bits_per_sample': 16,
|
'bits_per_sample': 16,
|
||||||
'nal_unit_length_field': 4
|
'nal_unit_length_field': 4
|
||||||
},
|
},
|
||||||
'video_ext': 'ismv',
|
|
||||||
'audio_ext': 'none',
|
|
||||||
'vbr': 100,
|
|
||||||
}, {
|
}, {
|
||||||
'format_id': 'video-326',
|
'format_id': 'video-326',
|
||||||
'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
|
'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
|
||||||
|
@ -1471,9 +1466,6 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||||
'bits_per_sample': 16,
|
'bits_per_sample': 16,
|
||||||
'nal_unit_length_field': 4
|
'nal_unit_length_field': 4
|
||||||
},
|
},
|
||||||
'video_ext': 'ismv',
|
|
||||||
'audio_ext': 'none',
|
|
||||||
'vbr': 326,
|
|
||||||
}, {
|
}, {
|
||||||
'format_id': 'video-698',
|
'format_id': 'video-698',
|
||||||
'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
|
'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
|
||||||
|
@ -1497,9 +1489,6 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||||
'bits_per_sample': 16,
|
'bits_per_sample': 16,
|
||||||
'nal_unit_length_field': 4
|
'nal_unit_length_field': 4
|
||||||
},
|
},
|
||||||
'video_ext': 'ismv',
|
|
||||||
'audio_ext': 'none',
|
|
||||||
'vbr': 698,
|
|
||||||
}, {
|
}, {
|
||||||
'format_id': 'video-1493',
|
'format_id': 'video-1493',
|
||||||
'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
|
'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
|
||||||
|
@ -1523,9 +1512,6 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||||
'bits_per_sample': 16,
|
'bits_per_sample': 16,
|
||||||
'nal_unit_length_field': 4
|
'nal_unit_length_field': 4
|
||||||
},
|
},
|
||||||
'video_ext': 'ismv',
|
|
||||||
'audio_ext': 'none',
|
|
||||||
'vbr': 1493,
|
|
||||||
}, {
|
}, {
|
||||||
'format_id': 'video-4482',
|
'format_id': 'video-4482',
|
||||||
'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
|
'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest',
|
||||||
|
@ -1549,9 +1535,6 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||||
'bits_per_sample': 16,
|
'bits_per_sample': 16,
|
||||||
'nal_unit_length_field': 4
|
'nal_unit_length_field': 4
|
||||||
},
|
},
|
||||||
'video_ext': 'ismv',
|
|
||||||
'audio_ext': 'none',
|
|
||||||
'vbr': 4482,
|
|
||||||
}],
|
}],
|
||||||
{
|
{
|
||||||
'eng': [
|
'eng': [
|
||||||
|
@ -1575,34 +1558,6 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||||
'ec-3_test',
|
'ec-3_test',
|
||||||
'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||||
[{
|
[{
|
||||||
'format_id': 'audio_deu_1-224',
|
|
||||||
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
|
||||||
'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
|
||||||
'ext': 'isma',
|
|
||||||
'tbr': 224,
|
|
||||||
'asr': 48000,
|
|
||||||
'vcodec': 'none',
|
|
||||||
'acodec': 'EC-3',
|
|
||||||
'protocol': 'ism',
|
|
||||||
'_download_params':
|
|
||||||
{
|
|
||||||
'stream_type': 'audio',
|
|
||||||
'duration': 370000000,
|
|
||||||
'timescale': 10000000,
|
|
||||||
'width': 0,
|
|
||||||
'height': 0,
|
|
||||||
'fourcc': 'EC-3',
|
|
||||||
'language': 'deu',
|
|
||||||
'codec_private_data': '00063F000000AF87FBA7022DFB42A4D405CD93843BDD0700200F00',
|
|
||||||
'sampling_rate': 48000,
|
|
||||||
'channels': 6,
|
|
||||||
'bits_per_sample': 16,
|
|
||||||
'nal_unit_length_field': 4
|
|
||||||
},
|
|
||||||
'audio_ext': 'isma',
|
|
||||||
'video_ext': 'none',
|
|
||||||
'abr': 224,
|
|
||||||
}, {
|
|
||||||
'format_id': 'audio_deu-127',
|
'format_id': 'audio_deu-127',
|
||||||
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||||
'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||||
|
@ -1612,8 +1567,9 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||||
'vcodec': 'none',
|
'vcodec': 'none',
|
||||||
'acodec': 'AACL',
|
'acodec': 'AACL',
|
||||||
'protocol': 'ism',
|
'protocol': 'ism',
|
||||||
'_download_params':
|
'language': 'deu',
|
||||||
{
|
'audio_channels': 2,
|
||||||
|
'_download_params': {
|
||||||
'stream_type': 'audio',
|
'stream_type': 'audio',
|
||||||
'duration': 370000000,
|
'duration': 370000000,
|
||||||
'timescale': 10000000,
|
'timescale': 10000000,
|
||||||
|
@ -1627,9 +1583,32 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||||
'bits_per_sample': 16,
|
'bits_per_sample': 16,
|
||||||
'nal_unit_length_field': 4
|
'nal_unit_length_field': 4
|
||||||
},
|
},
|
||||||
'audio_ext': 'isma',
|
}, {
|
||||||
'video_ext': 'none',
|
'format_id': 'audio_deu_1-224',
|
||||||
'abr': 127,
|
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||||
|
'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||||
|
'ext': 'isma',
|
||||||
|
'tbr': 224,
|
||||||
|
'asr': 48000,
|
||||||
|
'vcodec': 'none',
|
||||||
|
'acodec': 'EC-3',
|
||||||
|
'protocol': 'ism',
|
||||||
|
'language': 'deu',
|
||||||
|
'audio_channels': 6,
|
||||||
|
'_download_params': {
|
||||||
|
'stream_type': 'audio',
|
||||||
|
'duration': 370000000,
|
||||||
|
'timescale': 10000000,
|
||||||
|
'width': 0,
|
||||||
|
'height': 0,
|
||||||
|
'fourcc': 'EC-3',
|
||||||
|
'language': 'deu',
|
||||||
|
'codec_private_data': '00063F000000AF87FBA7022DFB42A4D405CD93843BDD0700200F00',
|
||||||
|
'sampling_rate': 48000,
|
||||||
|
'channels': 6,
|
||||||
|
'bits_per_sample': 16,
|
||||||
|
'nal_unit_length_field': 4
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'format_id': 'video_deu-23',
|
'format_id': 'video_deu-23',
|
||||||
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||||
|
@ -1641,8 +1620,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||||
'vcodec': 'AVC1',
|
'vcodec': 'AVC1',
|
||||||
'acodec': 'none',
|
'acodec': 'none',
|
||||||
'protocol': 'ism',
|
'protocol': 'ism',
|
||||||
'_download_params':
|
'language': 'deu',
|
||||||
{
|
'_download_params': {
|
||||||
'stream_type': 'video',
|
'stream_type': 'video',
|
||||||
'duration': 370000000,
|
'duration': 370000000,
|
||||||
'timescale': 10000000,
|
'timescale': 10000000,
|
||||||
|
@ -1655,9 +1634,6 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||||
'bits_per_sample': 16,
|
'bits_per_sample': 16,
|
||||||
'nal_unit_length_field': 4
|
'nal_unit_length_field': 4
|
||||||
},
|
},
|
||||||
'video_ext': 'ismv',
|
|
||||||
'audio_ext': 'none',
|
|
||||||
'vbr': 23,
|
|
||||||
}, {
|
}, {
|
||||||
'format_id': 'video_deu-403',
|
'format_id': 'video_deu-403',
|
||||||
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||||
|
@ -1669,8 +1645,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||||
'vcodec': 'AVC1',
|
'vcodec': 'AVC1',
|
||||||
'acodec': 'none',
|
'acodec': 'none',
|
||||||
'protocol': 'ism',
|
'protocol': 'ism',
|
||||||
'_download_params':
|
'language': 'deu',
|
||||||
{
|
'_download_params': {
|
||||||
'stream_type': 'video',
|
'stream_type': 'video',
|
||||||
'duration': 370000000,
|
'duration': 370000000,
|
||||||
'timescale': 10000000,
|
'timescale': 10000000,
|
||||||
|
@ -1683,9 +1659,6 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||||
'bits_per_sample': 16,
|
'bits_per_sample': 16,
|
||||||
'nal_unit_length_field': 4
|
'nal_unit_length_field': 4
|
||||||
},
|
},
|
||||||
'video_ext': 'ismv',
|
|
||||||
'audio_ext': 'none',
|
|
||||||
'vbr': 403,
|
|
||||||
}, {
|
}, {
|
||||||
'format_id': 'video_deu-680',
|
'format_id': 'video_deu-680',
|
||||||
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||||
|
@ -1697,8 +1670,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||||
'vcodec': 'AVC1',
|
'vcodec': 'AVC1',
|
||||||
'acodec': 'none',
|
'acodec': 'none',
|
||||||
'protocol': 'ism',
|
'protocol': 'ism',
|
||||||
'_download_params':
|
'language': 'deu',
|
||||||
{
|
'_download_params': {
|
||||||
'stream_type': 'video',
|
'stream_type': 'video',
|
||||||
'duration': 370000000,
|
'duration': 370000000,
|
||||||
'timescale': 10000000,
|
'timescale': 10000000,
|
||||||
|
@ -1711,9 +1684,6 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||||
'bits_per_sample': 16,
|
'bits_per_sample': 16,
|
||||||
'nal_unit_length_field': 4
|
'nal_unit_length_field': 4
|
||||||
},
|
},
|
||||||
'video_ext': 'ismv',
|
|
||||||
'audio_ext': 'none',
|
|
||||||
'vbr': 680,
|
|
||||||
}, {
|
}, {
|
||||||
'format_id': 'video_deu-1253',
|
'format_id': 'video_deu-1253',
|
||||||
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||||
|
@ -1725,8 +1695,9 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||||
'vcodec': 'AVC1',
|
'vcodec': 'AVC1',
|
||||||
'acodec': 'none',
|
'acodec': 'none',
|
||||||
'protocol': 'ism',
|
'protocol': 'ism',
|
||||||
'_download_params':
|
'vbr': 1253,
|
||||||
{
|
'language': 'deu',
|
||||||
|
'_download_params': {
|
||||||
'stream_type': 'video',
|
'stream_type': 'video',
|
||||||
'duration': 370000000,
|
'duration': 370000000,
|
||||||
'timescale': 10000000,
|
'timescale': 10000000,
|
||||||
|
@ -1739,9 +1710,6 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||||
'bits_per_sample': 16,
|
'bits_per_sample': 16,
|
||||||
'nal_unit_length_field': 4
|
'nal_unit_length_field': 4
|
||||||
},
|
},
|
||||||
'video_ext': 'ismv',
|
|
||||||
'audio_ext': 'none',
|
|
||||||
'vbr': 1253,
|
|
||||||
}, {
|
}, {
|
||||||
'format_id': 'video_deu-2121',
|
'format_id': 'video_deu-2121',
|
||||||
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||||
|
@ -1753,8 +1721,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||||
'vcodec': 'AVC1',
|
'vcodec': 'AVC1',
|
||||||
'acodec': 'none',
|
'acodec': 'none',
|
||||||
'protocol': 'ism',
|
'protocol': 'ism',
|
||||||
'_download_params':
|
'language': 'deu',
|
||||||
{
|
'_download_params': {
|
||||||
'stream_type': 'video',
|
'stream_type': 'video',
|
||||||
'duration': 370000000,
|
'duration': 370000000,
|
||||||
'timescale': 10000000,
|
'timescale': 10000000,
|
||||||
|
@ -1767,9 +1735,6 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||||
'bits_per_sample': 16,
|
'bits_per_sample': 16,
|
||||||
'nal_unit_length_field': 4
|
'nal_unit_length_field': 4
|
||||||
},
|
},
|
||||||
'video_ext': 'ismv',
|
|
||||||
'audio_ext': 'none',
|
|
||||||
'vbr': 2121,
|
|
||||||
}, {
|
}, {
|
||||||
'format_id': 'video_deu-3275',
|
'format_id': 'video_deu-3275',
|
||||||
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||||
|
@ -1781,8 +1746,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||||
'vcodec': 'AVC1',
|
'vcodec': 'AVC1',
|
||||||
'acodec': 'none',
|
'acodec': 'none',
|
||||||
'protocol': 'ism',
|
'protocol': 'ism',
|
||||||
'_download_params':
|
'language': 'deu',
|
||||||
{
|
'_download_params': {
|
||||||
'stream_type': 'video',
|
'stream_type': 'video',
|
||||||
'duration': 370000000,
|
'duration': 370000000,
|
||||||
'timescale': 10000000,
|
'timescale': 10000000,
|
||||||
|
@ -1795,9 +1760,6 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||||
'bits_per_sample': 16,
|
'bits_per_sample': 16,
|
||||||
'nal_unit_length_field': 4
|
'nal_unit_length_field': 4
|
||||||
},
|
},
|
||||||
'video_ext': 'ismv',
|
|
||||||
'audio_ext': 'none',
|
|
||||||
'vbr': 3275,
|
|
||||||
}, {
|
}, {
|
||||||
'format_id': 'video_deu-5300',
|
'format_id': 'video_deu-5300',
|
||||||
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||||
|
@ -1809,8 +1771,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||||
'vcodec': 'AVC1',
|
'vcodec': 'AVC1',
|
||||||
'acodec': 'none',
|
'acodec': 'none',
|
||||||
'protocol': 'ism',
|
'protocol': 'ism',
|
||||||
'_download_params':
|
'language': 'deu',
|
||||||
{
|
'_download_params': {
|
||||||
'stream_type': 'video',
|
'stream_type': 'video',
|
||||||
'duration': 370000000,
|
'duration': 370000000,
|
||||||
'timescale': 10000000,
|
'timescale': 10000000,
|
||||||
|
@ -1823,9 +1785,6 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||||
'bits_per_sample': 16,
|
'bits_per_sample': 16,
|
||||||
'nal_unit_length_field': 4
|
'nal_unit_length_field': 4
|
||||||
},
|
},
|
||||||
'video_ext': 'ismv',
|
|
||||||
'audio_ext': 'none',
|
|
||||||
'vbr': 5300,
|
|
||||||
}, {
|
}, {
|
||||||
'format_id': 'video_deu-8079',
|
'format_id': 'video_deu-8079',
|
||||||
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest',
|
||||||
|
@ -1837,8 +1796,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||||
'vcodec': 'AVC1',
|
'vcodec': 'AVC1',
|
||||||
'acodec': 'none',
|
'acodec': 'none',
|
||||||
'protocol': 'ism',
|
'protocol': 'ism',
|
||||||
'_download_params':
|
'language': 'deu',
|
||||||
{
|
'_download_params': {
|
||||||
'stream_type': 'video',
|
'stream_type': 'video',
|
||||||
'duration': 370000000,
|
'duration': 370000000,
|
||||||
'timescale': 10000000,
|
'timescale': 10000000,
|
||||||
|
@ -1851,9 +1810,6 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||||
'bits_per_sample': 16,
|
'bits_per_sample': 16,
|
||||||
'nal_unit_length_field': 4
|
'nal_unit_length_field': 4
|
||||||
},
|
},
|
||||||
'video_ext': 'ismv',
|
|
||||||
'audio_ext': 'none',
|
|
||||||
'vbr': 8079,
|
|
||||||
}],
|
}],
|
||||||
{},
|
{},
|
||||||
),
|
),
|
||||||
|
|
|
@ -822,6 +822,10 @@ class TestYoutubeDL(unittest.TestCase):
|
||||||
test('%(title&foo|baz)s.bar', 'baz.bar')
|
test('%(title&foo|baz)s.bar', 'baz.bar')
|
||||||
test('%(x,id&foo|baz)s.bar', 'foo.bar')
|
test('%(x,id&foo|baz)s.bar', 'foo.bar')
|
||||||
test('%(x,title&foo|baz)s.bar', 'baz.bar')
|
test('%(x,title&foo|baz)s.bar', 'baz.bar')
|
||||||
|
test('%(id&a\nb|)s', ('a\nb', 'a b'))
|
||||||
|
test('%(id&hi {:>10} {}|)s', 'hi 1234 1234')
|
||||||
|
test(R'%(id&{0} {}|)s', 'NA')
|
||||||
|
test(R'%(id&{0.1}|)s', 'NA')
|
||||||
|
|
||||||
# Laziness
|
# Laziness
|
||||||
def gen():
|
def gen():
|
||||||
|
|
|
@ -445,6 +445,22 @@ class TestJSInterpreter(unittest.TestCase):
|
||||||
jsi = JSInterpreter('function x(){return 1236566549 << 5}')
|
jsi = JSInterpreter('function x(){return 1236566549 << 5}')
|
||||||
self.assertEqual(jsi.call_function('x'), 915423904)
|
self.assertEqual(jsi.call_function('x'), 915423904)
|
||||||
|
|
||||||
|
def test_negative(self):
|
||||||
|
jsi = JSInterpreter("function f(){return 2 * -2.0;}")
|
||||||
|
self.assertEqual(jsi.call_function('f'), -4)
|
||||||
|
|
||||||
|
jsi = JSInterpreter('function f(){return 2 - - -2;}')
|
||||||
|
self.assertEqual(jsi.call_function('f'), 0)
|
||||||
|
|
||||||
|
jsi = JSInterpreter('function f(){return 2 - - - -2;}')
|
||||||
|
self.assertEqual(jsi.call_function('f'), 4)
|
||||||
|
|
||||||
|
jsi = JSInterpreter('function f(){return 2 - + + - -2;}')
|
||||||
|
self.assertEqual(jsi.call_function('f'), 0)
|
||||||
|
|
||||||
|
jsi = JSInterpreter('function f(){return 2 + - + - -2;}')
|
||||||
|
self.assertEqual(jsi.call_function('f'), 0)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -1195,6 +1195,13 @@ class TestUtil(unittest.TestCase):
|
||||||
self.assertEqual(js_to_json('42a1'), '42"a1"')
|
self.assertEqual(js_to_json('42a1'), '42"a1"')
|
||||||
self.assertEqual(js_to_json('42a-1'), '42"a"-1')
|
self.assertEqual(js_to_json('42a-1'), '42"a"-1')
|
||||||
|
|
||||||
|
def test_js_to_json_template_literal(self):
|
||||||
|
self.assertEqual(js_to_json('`Hello ${name}`', {'name': '"world"'}), '"Hello world"')
|
||||||
|
self.assertEqual(js_to_json('`${name}${name}`', {'name': '"X"'}), '"XX"')
|
||||||
|
self.assertEqual(js_to_json('`${name}${name}`', {'name': '5'}), '"55"')
|
||||||
|
self.assertEqual(js_to_json('`${name}"${name}"`', {'name': '5'}), '"5\\"5\\""')
|
||||||
|
self.assertEqual(js_to_json('`${name}`', {}), '"name"')
|
||||||
|
|
||||||
def test_extract_attributes(self):
|
def test_extract_attributes(self):
|
||||||
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
|
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
|
||||||
self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
|
self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
|
||||||
|
@ -2014,6 +2021,8 @@ Line 1
|
||||||
msg='nested `...` queries should work')
|
msg='nested `...` queries should work')
|
||||||
self.assertCountEqual(traverse_obj(_TEST_DATA, (..., ..., 'index')), range(4),
|
self.assertCountEqual(traverse_obj(_TEST_DATA, (..., ..., 'index')), range(4),
|
||||||
msg='`...` query result should be flattened')
|
msg='`...` query result should be flattened')
|
||||||
|
self.assertEqual(traverse_obj(iter(range(4)), ...), list(range(4)),
|
||||||
|
msg='`...` should accept iterables')
|
||||||
|
|
||||||
# Test function as key
|
# Test function as key
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, lambda x, y: x == 'urls' and isinstance(y, list)),
|
self.assertEqual(traverse_obj(_TEST_DATA, lambda x, y: x == 'urls' and isinstance(y, list)),
|
||||||
|
@ -2021,6 +2030,8 @@ Line 1
|
||||||
msg='function as query key should perform a filter based on (key, value)')
|
msg='function as query key should perform a filter based on (key, value)')
|
||||||
self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], str)), {'str'},
|
self.assertCountEqual(traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], str)), {'str'},
|
||||||
msg='exceptions in the query function should be catched')
|
msg='exceptions in the query function should be catched')
|
||||||
|
self.assertEqual(traverse_obj(iter(range(4)), lambda _, x: x % 2 == 0), [0, 2],
|
||||||
|
msg='function key should accept iterables')
|
||||||
if __debug__:
|
if __debug__:
|
||||||
with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'):
|
with self.assertRaises(Exception, msg='Wrong function signature should raise in debug'):
|
||||||
traverse_obj(_TEST_DATA, lambda a: ...)
|
traverse_obj(_TEST_DATA, lambda a: ...)
|
||||||
|
@ -2045,6 +2056,17 @@ Line 1
|
||||||
with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
|
with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'):
|
||||||
traverse_obj(_TEST_DATA, {str.upper, str})
|
traverse_obj(_TEST_DATA, {str.upper, str})
|
||||||
|
|
||||||
|
# Test `slice` as a key
|
||||||
|
_SLICE_DATA = [0, 1, 2, 3, 4]
|
||||||
|
self.assertEqual(traverse_obj(_TEST_DATA, ('dict', slice(1))), None,
|
||||||
|
msg='slice on a dictionary should not throw')
|
||||||
|
self.assertEqual(traverse_obj(_SLICE_DATA, slice(1)), _SLICE_DATA[:1],
|
||||||
|
msg='slice key should apply slice to sequence')
|
||||||
|
self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 2)), _SLICE_DATA[1:2],
|
||||||
|
msg='slice key should apply slice to sequence')
|
||||||
|
self.assertEqual(traverse_obj(_SLICE_DATA, slice(1, 4, 2)), _SLICE_DATA[1:4:2],
|
||||||
|
msg='slice key should apply slice to sequence')
|
||||||
|
|
||||||
# Test alternative paths
|
# Test alternative paths
|
||||||
self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'str'), 'str',
|
self.assertEqual(traverse_obj(_TEST_DATA, 'fail', 'str'), 'str',
|
||||||
msg='multiple `paths` should be treated as alternative paths')
|
msg='multiple `paths` should be treated as alternative paths')
|
||||||
|
@ -2228,6 +2250,12 @@ Line 1
|
||||||
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)),
|
self.assertEqual(traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)),
|
||||||
traverse_string=True), ['s', 'r'],
|
traverse_string=True), ['s', 'r'],
|
||||||
msg='branching should result in list if `traverse_string`')
|
msg='branching should result in list if `traverse_string`')
|
||||||
|
self.assertEqual(traverse_obj({}, (0, ...), traverse_string=True), [],
|
||||||
|
msg='branching should result in list if `traverse_string`')
|
||||||
|
self.assertEqual(traverse_obj({}, (0, lambda x, y: True), traverse_string=True), [],
|
||||||
|
msg='branching should result in list if `traverse_string`')
|
||||||
|
self.assertEqual(traverse_obj({}, (0, slice(1)), traverse_string=True), [],
|
||||||
|
msg='branching should result in list if `traverse_string`')
|
||||||
|
|
||||||
# Test is_user_input behavior
|
# Test is_user_input behavior
|
||||||
_IS_USER_INPUT_DATA = {'range8': list(range(8))}
|
_IS_USER_INPUT_DATA = {'range8': list(range(8))}
|
||||||
|
|
|
@ -142,6 +142,10 @@ _NSIG_TESTS = [
|
||||||
'https://www.youtube.com/s/player/dac945fd/player_ias.vflset/en_US/base.js',
|
'https://www.youtube.com/s/player/dac945fd/player_ias.vflset/en_US/base.js',
|
||||||
'o8BkRxXhuYsBCWi6RplPdP', '3Lx32v_hmzTm6A',
|
'o8BkRxXhuYsBCWi6RplPdP', '3Lx32v_hmzTm6A',
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/6f20102c/player_ias.vflset/en_US/base.js',
|
||||||
|
'lE8DhoDmKqnmJJ', 'pJTTX6XyJP2BYw',
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -21,7 +21,7 @@ import tokenize
|
||||||
import traceback
|
import traceback
|
||||||
import unicodedata
|
import unicodedata
|
||||||
import urllib.request
|
import urllib.request
|
||||||
from string import ascii_letters
|
from string import Formatter, ascii_letters
|
||||||
|
|
||||||
from .cache import Cache
|
from .cache import Cache
|
||||||
from .compat import compat_os_name, compat_shlex_quote
|
from .compat import compat_os_name, compat_shlex_quote
|
||||||
|
@ -1161,7 +1161,7 @@ class YoutubeDL:
|
||||||
}
|
}
|
||||||
MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
|
MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
|
||||||
MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
|
MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
|
||||||
INTERNAL_FORMAT_RE = re.compile(rf'''(?x)
|
INTERNAL_FORMAT_RE = re.compile(rf'''(?xs)
|
||||||
(?P<negate>-)?
|
(?P<negate>-)?
|
||||||
(?P<fields>{FIELD_RE})
|
(?P<fields>{FIELD_RE})
|
||||||
(?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
|
(?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
|
||||||
|
@ -1242,6 +1242,14 @@ class YoutubeDL:
|
||||||
return list(obj)
|
return list(obj)
|
||||||
return repr(obj)
|
return repr(obj)
|
||||||
|
|
||||||
|
class _ReplacementFormatter(Formatter):
|
||||||
|
def get_field(self, field_name, args, kwargs):
|
||||||
|
if field_name.isdigit():
|
||||||
|
return args[0], -1
|
||||||
|
raise ValueError('Unsupported field')
|
||||||
|
|
||||||
|
replacement_formatter = _ReplacementFormatter()
|
||||||
|
|
||||||
def create_key(outer_mobj):
|
def create_key(outer_mobj):
|
||||||
if not outer_mobj.group('has_key'):
|
if not outer_mobj.group('has_key'):
|
||||||
return outer_mobj.group(0)
|
return outer_mobj.group(0)
|
||||||
|
@ -1263,7 +1271,13 @@ class YoutubeDL:
|
||||||
if fmt == 's' and value is not None and key in field_size_compat_map.keys():
|
if fmt == 's' and value is not None and key in field_size_compat_map.keys():
|
||||||
fmt = f'0{field_size_compat_map[key]:d}d'
|
fmt = f'0{field_size_compat_map[key]:d}d'
|
||||||
|
|
||||||
value = default if value is None else value if replacement is None else replacement
|
if value is None:
|
||||||
|
value = default
|
||||||
|
elif replacement is not None:
|
||||||
|
try:
|
||||||
|
value = replacement_formatter.format(replacement, value)
|
||||||
|
except ValueError:
|
||||||
|
value = na
|
||||||
|
|
||||||
flags = outer_mobj.group('conversion') or ''
|
flags = outer_mobj.group('conversion') or ''
|
||||||
str_fmt = f'{fmt[:-1]}s'
|
str_fmt = f'{fmt[:-1]}s'
|
||||||
|
@ -1668,7 +1682,7 @@ class YoutubeDL:
|
||||||
self.add_extra_info(info_copy, extra_info)
|
self.add_extra_info(info_copy, extra_info)
|
||||||
info_copy, _ = self.pre_process(info_copy)
|
info_copy, _ = self.pre_process(info_copy)
|
||||||
self._fill_common_fields(info_copy, False)
|
self._fill_common_fields(info_copy, False)
|
||||||
self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
|
self.__forced_printings(info_copy)
|
||||||
self._raise_pending_errors(info_copy)
|
self._raise_pending_errors(info_copy)
|
||||||
if self.params.get('force_write_download_archive', False):
|
if self.params.get('force_write_download_archive', False):
|
||||||
self.record_download_archive(info_copy)
|
self.record_download_archive(info_copy)
|
||||||
|
@ -1937,7 +1951,7 @@ class YoutubeDL:
|
||||||
'!=': operator.ne,
|
'!=': operator.ne,
|
||||||
}
|
}
|
||||||
operator_rex = re.compile(r'''(?x)\s*
|
operator_rex = re.compile(r'''(?x)\s*
|
||||||
(?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
|
(?P<key>[\w.-]+)\s*
|
||||||
(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
|
(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
|
||||||
(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
|
(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
|
||||||
''' % '|'.join(map(re.escape, OPERATORS.keys())))
|
''' % '|'.join(map(re.escape, OPERATORS.keys())))
|
||||||
|
@ -2710,7 +2724,7 @@ class YoutubeDL:
|
||||||
self.list_formats(info_dict)
|
self.list_formats(info_dict)
|
||||||
if list_only:
|
if list_only:
|
||||||
# Without this printing, -F --print-json will not work
|
# Without this printing, -F --print-json will not work
|
||||||
self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
|
self.__forced_printings(info_dict)
|
||||||
return info_dict
|
return info_dict
|
||||||
|
|
||||||
format_selector = self.format_selector
|
format_selector = self.format_selector
|
||||||
|
@ -2870,6 +2884,12 @@ class YoutubeDL:
|
||||||
if info_dict is None:
|
if info_dict is None:
|
||||||
return
|
return
|
||||||
info_copy = info_dict.copy()
|
info_copy = info_dict.copy()
|
||||||
|
info_copy.setdefault('filename', self.prepare_filename(info_dict))
|
||||||
|
if info_dict.get('requested_formats') is not None:
|
||||||
|
# For RTMP URLs, also include the playpath
|
||||||
|
info_copy['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
|
||||||
|
elif info_dict.get('url'):
|
||||||
|
info_copy['urls'] = info_dict['url'] + info_dict.get('play_path', '')
|
||||||
info_copy['formats_table'] = self.render_formats_table(info_dict)
|
info_copy['formats_table'] = self.render_formats_table(info_dict)
|
||||||
info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
|
info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
|
||||||
info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
|
info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
|
||||||
|
@ -2895,46 +2915,36 @@ class YoutubeDL:
|
||||||
tmpl = format_tmpl(tmpl)
|
tmpl = format_tmpl(tmpl)
|
||||||
self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
|
self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
|
||||||
if self._ensure_dir_exists(filename):
|
if self._ensure_dir_exists(filename):
|
||||||
with open(filename, 'a', encoding='utf-8') as f:
|
with open(filename, 'a', encoding='utf-8', newline='') as f:
|
||||||
f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
|
f.write(self.evaluate_outtmpl(tmpl, info_copy) + os.linesep)
|
||||||
|
|
||||||
def __forced_printings(self, info_dict, filename, incomplete):
|
return info_copy
|
||||||
def print_mandatory(field, actual_field=None):
|
|
||||||
if actual_field is None:
|
|
||||||
actual_field = field
|
|
||||||
if (self.params.get('force%s' % field, False)
|
|
||||||
and (not incomplete or info_dict.get(actual_field) is not None)):
|
|
||||||
self.to_stdout(info_dict[actual_field])
|
|
||||||
|
|
||||||
def print_optional(field):
|
|
||||||
if (self.params.get('force%s' % field, False)
|
|
||||||
and info_dict.get(field) is not None):
|
|
||||||
self.to_stdout(info_dict[field])
|
|
||||||
|
|
||||||
info_dict = info_dict.copy()
|
|
||||||
if filename is not None:
|
|
||||||
info_dict['filename'] = filename
|
|
||||||
if info_dict.get('requested_formats') is not None:
|
|
||||||
# For RTMP URLs, also include the playpath
|
|
||||||
info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
|
|
||||||
elif info_dict.get('url'):
|
|
||||||
info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
|
|
||||||
|
|
||||||
|
def __forced_printings(self, info_dict, filename=None, incomplete=True):
|
||||||
if (self.params.get('forcejson')
|
if (self.params.get('forcejson')
|
||||||
or self.params['forceprint'].get('video')
|
or self.params['forceprint'].get('video')
|
||||||
or self.params['print_to_file'].get('video')):
|
or self.params['print_to_file'].get('video')):
|
||||||
self.post_extract(info_dict)
|
self.post_extract(info_dict)
|
||||||
self._forceprint('video', info_dict)
|
if filename:
|
||||||
|
info_dict['filename'] = filename
|
||||||
|
info_copy = self._forceprint('video', info_dict)
|
||||||
|
|
||||||
print_mandatory('title')
|
def print_field(field, actual_field=None, optional=False):
|
||||||
print_mandatory('id')
|
if actual_field is None:
|
||||||
print_mandatory('url', 'urls')
|
actual_field = field
|
||||||
print_optional('thumbnail')
|
if self.params.get(f'force{field}') and (
|
||||||
print_optional('description')
|
info_copy.get(field) is not None or (not optional and not incomplete)):
|
||||||
print_optional('filename')
|
self.to_stdout(info_copy[actual_field])
|
||||||
if self.params.get('forceduration') and info_dict.get('duration') is not None:
|
|
||||||
self.to_stdout(formatSeconds(info_dict['duration']))
|
print_field('title')
|
||||||
print_mandatory('format')
|
print_field('id')
|
||||||
|
print_field('url', 'urls')
|
||||||
|
print_field('thumbnail', optional=True)
|
||||||
|
print_field('description', optional=True)
|
||||||
|
print_field('filename', optional=True)
|
||||||
|
if self.params.get('forceduration') and info_copy.get('duration') is not None:
|
||||||
|
self.to_stdout(formatSeconds(info_copy['duration']))
|
||||||
|
print_field('format')
|
||||||
|
|
||||||
if self.params.get('forcejson'):
|
if self.params.get('forcejson'):
|
||||||
self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
|
self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
|
||||||
|
@ -3316,7 +3326,7 @@ class YoutubeDL:
|
||||||
or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
|
or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
|
||||||
'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
|
'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
|
||||||
FFmpegFixupM3u8PP)
|
FFmpegFixupM3u8PP)
|
||||||
ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
|
ffmpeg_fixup(info_dict.get('is_live') and downloader == 'dashsegments',
|
||||||
'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
|
'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
|
||||||
|
|
||||||
ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
|
ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
|
||||||
|
@ -3482,7 +3492,7 @@ class YoutubeDL:
|
||||||
*files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
|
*files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
|
||||||
return infodict
|
return infodict
|
||||||
|
|
||||||
def run_all_pps(self, key, info, *, additional_pps=None):
|
def run_all_pps(self, key, info, *, additional_pps=None, fatal=True):
|
||||||
if key != 'video':
|
if key != 'video':
|
||||||
self._forceprint(key, info)
|
self._forceprint(key, info)
|
||||||
for pp in (additional_pps or []) + self._pps[key]:
|
for pp in (additional_pps or []) + self._pps[key]:
|
||||||
|
|
|
@ -412,12 +412,17 @@ def validate_options(opts):
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
raise ValueError(f'Invalid playlist-items {opts.playlist_items!r}: {err}')
|
raise ValueError(f'Invalid playlist-items {opts.playlist_items!r}: {err}')
|
||||||
|
|
||||||
geo_bypass_code = opts.geo_bypass_ip_block or opts.geo_bypass_country
|
opts.geo_bypass_country, opts.geo_bypass_ip_block = None, None
|
||||||
if geo_bypass_code is not None:
|
if opts.geo_bypass.lower() not in ('default', 'never'):
|
||||||
try:
|
try:
|
||||||
GeoUtils.random_ipv4(geo_bypass_code)
|
GeoUtils.random_ipv4(opts.geo_bypass)
|
||||||
except Exception:
|
except Exception:
|
||||||
raise ValueError('unsupported geo-bypass country or ip-block')
|
raise ValueError(f'Unsupported --xff "{opts.geo_bypass}"')
|
||||||
|
if len(opts.geo_bypass) == 2:
|
||||||
|
opts.geo_bypass_country = opts.geo_bypass
|
||||||
|
else:
|
||||||
|
opts.geo_bypass_ip_block = opts.geo_bypass
|
||||||
|
opts.geo_bypass = opts.geo_bypass.lower() != 'never'
|
||||||
|
|
||||||
opts.match_filter = match_filter_func(opts.match_filter, opts.breaking_match_filter)
|
opts.match_filter = match_filter_func(opts.match_filter, opts.breaking_match_filter)
|
||||||
|
|
||||||
|
@ -720,7 +725,8 @@ def parse_options(argv=None):
|
||||||
'dumpjson', 'dump_single_json', 'getdescription', 'getduration', 'getfilename',
|
'dumpjson', 'dump_single_json', 'getdescription', 'getduration', 'getfilename',
|
||||||
'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl'
|
'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl'
|
||||||
))
|
))
|
||||||
opts.quiet = opts.quiet or any_getting or opts.print_json or bool(opts.forceprint)
|
if opts.quiet is None:
|
||||||
|
opts.quiet = any_getting or opts.print_json or bool(opts.forceprint)
|
||||||
|
|
||||||
playlist_pps = [pp for pp in postprocessors if pp.get('when') == 'playlist']
|
playlist_pps = [pp for pp in postprocessors if pp.get('when') == 'playlist']
|
||||||
write_playlist_infojson = (opts.writeinfojson and not opts.clean_infojson
|
write_playlist_infojson = (opts.writeinfojson and not opts.clean_infojson
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
import types
|
from ..compat.compat_utils import passthrough_module
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import Cryptodome as _parent
|
import Cryptodome as _parent
|
||||||
|
@ -6,9 +6,11 @@ except ImportError:
|
||||||
try:
|
try:
|
||||||
import Crypto as _parent
|
import Crypto as _parent
|
||||||
except (ImportError, SyntaxError): # Old Crypto gives SyntaxError in newer Python
|
except (ImportError, SyntaxError): # Old Crypto gives SyntaxError in newer Python
|
||||||
_parent = types.ModuleType('no_Cryptodome')
|
_parent = passthrough_module(__name__, 'no_Cryptodome')
|
||||||
__bool__ = lambda: False
|
__bool__ = lambda: False
|
||||||
|
|
||||||
|
del passthrough_module
|
||||||
|
|
||||||
__version__ = ''
|
__version__ = ''
|
||||||
AES = PKCS1_v1_5 = Blowfish = PKCS1_OAEP = SHA1 = CMAC = RSA = None
|
AES = PKCS1_v1_5 = Blowfish = PKCS1_OAEP = SHA1 = CMAC = RSA = None
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -254,6 +254,14 @@ from .br import (
|
||||||
BRMediathekIE,
|
BRMediathekIE,
|
||||||
)
|
)
|
||||||
from .bravotv import BravoTVIE
|
from .bravotv import BravoTVIE
|
||||||
|
from .brainpop import (
|
||||||
|
BrainPOPIE,
|
||||||
|
BrainPOPJrIE,
|
||||||
|
BrainPOPELLIE,
|
||||||
|
BrainPOPEspIE,
|
||||||
|
BrainPOPFrIE,
|
||||||
|
BrainPOPIlIE,
|
||||||
|
)
|
||||||
from .breakcom import BreakIE
|
from .breakcom import BreakIE
|
||||||
from .breitbart import BreitBartIE
|
from .breitbart import BreitBartIE
|
||||||
from .brightcove import (
|
from .brightcove import (
|
||||||
|
@ -298,7 +306,10 @@ from .cbc import (
|
||||||
CBCGemPlaylistIE,
|
CBCGemPlaylistIE,
|
||||||
CBCGemLiveIE,
|
CBCGemLiveIE,
|
||||||
)
|
)
|
||||||
from .cbs import CBSIE
|
from .cbs import (
|
||||||
|
CBSIE,
|
||||||
|
ParamountPressExpressIE,
|
||||||
|
)
|
||||||
from .cbslocal import (
|
from .cbslocal import (
|
||||||
CBSLocalIE,
|
CBSLocalIE,
|
||||||
CBSLocalArticleIE,
|
CBSLocalArticleIE,
|
||||||
|
@ -345,6 +356,7 @@ from .ciscolive import (
|
||||||
)
|
)
|
||||||
from .ciscowebex import CiscoWebexIE
|
from .ciscowebex import CiscoWebexIE
|
||||||
from .cjsw import CJSWIE
|
from .cjsw import CJSWIE
|
||||||
|
from .clipchamp import ClipchampIE
|
||||||
from .cliphunter import CliphunterIE
|
from .cliphunter import CliphunterIE
|
||||||
from .clippit import ClippitIE
|
from .clippit import ClippitIE
|
||||||
from .cliprs import ClipRsIE
|
from .cliprs import ClipRsIE
|
||||||
|
@ -441,6 +453,10 @@ from .deezer import (
|
||||||
)
|
)
|
||||||
from .democracynow import DemocracynowIE
|
from .democracynow import DemocracynowIE
|
||||||
from .detik import DetikEmbedIE
|
from .detik import DetikEmbedIE
|
||||||
|
from .dlf import (
|
||||||
|
DLFIE,
|
||||||
|
DLFCorpusIE,
|
||||||
|
)
|
||||||
from .dfb import DFBIE
|
from .dfb import DFBIE
|
||||||
from .dhm import DHMIE
|
from .dhm import DHMIE
|
||||||
from .digg import DiggIE
|
from .digg import DiggIE
|
||||||
|
@ -674,10 +690,18 @@ from .gfycat import GfycatIE
|
||||||
from .giantbomb import GiantBombIE
|
from .giantbomb import GiantBombIE
|
||||||
from .giga import GigaIE
|
from .giga import GigaIE
|
||||||
from .glide import GlideIE
|
from .glide import GlideIE
|
||||||
|
from .globalplayer import (
|
||||||
|
GlobalPlayerLiveIE,
|
||||||
|
GlobalPlayerLivePlaylistIE,
|
||||||
|
GlobalPlayerAudioIE,
|
||||||
|
GlobalPlayerAudioEpisodeIE,
|
||||||
|
GlobalPlayerVideoIE
|
||||||
|
)
|
||||||
from .globo import (
|
from .globo import (
|
||||||
GloboIE,
|
GloboIE,
|
||||||
GloboArticleIE,
|
GloboArticleIE,
|
||||||
)
|
)
|
||||||
|
from .gmanetwork import GMANetworkVideoIE
|
||||||
from .go import GoIE
|
from .go import GoIE
|
||||||
from .godtube import GodTubeIE
|
from .godtube import GodTubeIE
|
||||||
from .gofile import GofileIE
|
from .gofile import GofileIE
|
||||||
|
@ -709,13 +733,16 @@ from .hearthisat import HearThisAtIE
|
||||||
from .heise import HeiseIE
|
from .heise import HeiseIE
|
||||||
from .hellporno import HellPornoIE
|
from .hellporno import HellPornoIE
|
||||||
from .helsinki import HelsinkiIE
|
from .helsinki import HelsinkiIE
|
||||||
from .hentaistigma import HentaiStigmaIE
|
|
||||||
from .hgtv import HGTVComShowIE
|
from .hgtv import HGTVComShowIE
|
||||||
from .hketv import HKETVIE
|
from .hketv import HKETVIE
|
||||||
from .hidive import HiDiveIE
|
from .hidive import HiDiveIE
|
||||||
from .historicfilms import HistoricFilmsIE
|
from .historicfilms import HistoricFilmsIE
|
||||||
from .hitbox import HitboxIE, HitboxLiveIE
|
from .hitbox import HitboxIE, HitboxLiveIE
|
||||||
from .hitrecord import HitRecordIE
|
from .hitrecord import HitRecordIE
|
||||||
|
from .hollywoodreporter import (
|
||||||
|
HollywoodReporterIE,
|
||||||
|
HollywoodReporterPlaylistIE,
|
||||||
|
)
|
||||||
from .holodex import HolodexIE
|
from .holodex import HolodexIE
|
||||||
from .hotnewhiphop import HotNewHipHopIE
|
from .hotnewhiphop import HotNewHipHopIE
|
||||||
from .hotstar import (
|
from .hotstar import (
|
||||||
|
@ -727,6 +754,7 @@ from .hotstar import (
|
||||||
)
|
)
|
||||||
from .howcast import HowcastIE
|
from .howcast import HowcastIE
|
||||||
from .howstuffworks import HowStuffWorksIE
|
from .howstuffworks import HowStuffWorksIE
|
||||||
|
from .hrefli import HrefLiRedirectIE
|
||||||
from .hrfensehen import HRFernsehenIE
|
from .hrfensehen import HRFernsehenIE
|
||||||
from .hrti import (
|
from .hrti import (
|
||||||
HRTiIE,
|
HRTiIE,
|
||||||
|
@ -936,10 +964,6 @@ from .limelight import (
|
||||||
LimelightChannelIE,
|
LimelightChannelIE,
|
||||||
LimelightChannelListIE,
|
LimelightChannelListIE,
|
||||||
)
|
)
|
||||||
from .line import (
|
|
||||||
LineLiveIE,
|
|
||||||
LineLiveChannelIE,
|
|
||||||
)
|
|
||||||
from .linkedin import (
|
from .linkedin import (
|
||||||
LinkedInIE,
|
LinkedInIE,
|
||||||
LinkedInLearningIE,
|
LinkedInLearningIE,
|
||||||
|
@ -1219,6 +1243,8 @@ from .nhk import (
|
||||||
NhkForSchoolBangumiIE,
|
NhkForSchoolBangumiIE,
|
||||||
NhkForSchoolSubjectIE,
|
NhkForSchoolSubjectIE,
|
||||||
NhkForSchoolProgramListIE,
|
NhkForSchoolProgramListIE,
|
||||||
|
NhkRadioNewsPageIE,
|
||||||
|
NhkRadiruIE,
|
||||||
)
|
)
|
||||||
from .nhl import NHLIE
|
from .nhl import NHLIE
|
||||||
from .nick import (
|
from .nick import (
|
||||||
|
@ -1390,6 +1416,7 @@ from .periscope import (
|
||||||
PeriscopeIE,
|
PeriscopeIE,
|
||||||
PeriscopeUserIE,
|
PeriscopeUserIE,
|
||||||
)
|
)
|
||||||
|
from .pgatour import PGATourIE
|
||||||
from .philharmoniedeparis import PhilharmonieDeParisIE
|
from .philharmoniedeparis import PhilharmonieDeParisIE
|
||||||
from .phoenix import PhoenixIE
|
from .phoenix import PhoenixIE
|
||||||
from .photobucket import PhotobucketIE
|
from .photobucket import PhotobucketIE
|
||||||
|
@ -1606,6 +1633,11 @@ from .rtnews import (
|
||||||
from .rtp import RTPIE
|
from .rtp import RTPIE
|
||||||
from .rtrfm import RTRFMIE
|
from .rtrfm import RTRFMIE
|
||||||
from .rts import RTSIE
|
from .rts import RTSIE
|
||||||
|
from .rtvcplay import (
|
||||||
|
RTVCPlayIE,
|
||||||
|
RTVCPlayEmbedIE,
|
||||||
|
RTVCKalturaIE,
|
||||||
|
)
|
||||||
from .rtve import (
|
from .rtve import (
|
||||||
RTVEALaCartaIE,
|
RTVEALaCartaIE,
|
||||||
RTVEAudioIE,
|
RTVEAudioIE,
|
||||||
|
@ -1675,6 +1707,7 @@ from .scte import (
|
||||||
)
|
)
|
||||||
from .scrolller import ScrolllerIE
|
from .scrolller import ScrolllerIE
|
||||||
from .seeker import SeekerIE
|
from .seeker import SeekerIE
|
||||||
|
from .senalcolombia import SenalColombiaLiveIE
|
||||||
from .senategov import SenateISVPIE, SenateGovIE
|
from .senategov import SenateISVPIE, SenateGovIE
|
||||||
from .sendtonews import SendtoNewsIE
|
from .sendtonews import SendtoNewsIE
|
||||||
from .servus import ServusIE
|
from .servus import ServusIE
|
||||||
|
@ -1772,6 +1805,7 @@ from .spike import (
|
||||||
BellatorIE,
|
BellatorIE,
|
||||||
ParamountNetworkIE,
|
ParamountNetworkIE,
|
||||||
)
|
)
|
||||||
|
from .stageplus import StagePlusVODConcertIE
|
||||||
from .startrek import StarTrekIE
|
from .startrek import StarTrekIE
|
||||||
from .stitcher import (
|
from .stitcher import (
|
||||||
StitcherIE,
|
StitcherIE,
|
||||||
|
@ -1954,6 +1988,7 @@ from .traileraddict import TrailerAddictIE
|
||||||
from .triller import (
|
from .triller import (
|
||||||
TrillerIE,
|
TrillerIE,
|
||||||
TrillerUserIE,
|
TrillerUserIE,
|
||||||
|
TrillerShortIE,
|
||||||
)
|
)
|
||||||
from .trilulilu import TriluliluIE
|
from .trilulilu import TriluliluIE
|
||||||
from .trovo import (
|
from .trovo import (
|
||||||
|
@ -2280,6 +2315,8 @@ from .weibo import (
|
||||||
WeiboMobileIE
|
WeiboMobileIE
|
||||||
)
|
)
|
||||||
from .weiqitv import WeiqiTVIE
|
from .weiqitv import WeiqiTVIE
|
||||||
|
from .wevidi import WeVidiIE
|
||||||
|
from .whyp import WhypIE
|
||||||
from .wikimedia import WikimediaIE
|
from .wikimedia import WikimediaIE
|
||||||
from .willow import WillowIE
|
from .willow import WillowIE
|
||||||
from .wimtv import WimTVIE
|
from .wimtv import WimTVIE
|
||||||
|
@ -2334,8 +2371,6 @@ from .xxxymovies import XXXYMoviesIE
|
||||||
from .yahoo import (
|
from .yahoo import (
|
||||||
YahooIE,
|
YahooIE,
|
||||||
YahooSearchIE,
|
YahooSearchIE,
|
||||||
YahooGyaOPlayerIE,
|
|
||||||
YahooGyaOIE,
|
|
||||||
YahooJapanNewsIE,
|
YahooJapanNewsIE,
|
||||||
)
|
)
|
||||||
from .yandexdisk import YandexDiskIE
|
from .yandexdisk import YandexDiskIE
|
||||||
|
|
|
@ -436,6 +436,16 @@ class AbemaTVIE(AbemaTVBaseIE):
|
||||||
if 3 not in ondemand_types:
|
if 3 not in ondemand_types:
|
||||||
# cannot acquire decryption key for these streams
|
# cannot acquire decryption key for these streams
|
||||||
self.report_warning('This is a premium-only stream')
|
self.report_warning('This is a premium-only stream')
|
||||||
|
info.update(traverse_obj(api_response, {
|
||||||
|
'series': ('series', 'title'),
|
||||||
|
'season': ('season', 'title'),
|
||||||
|
'season_number': ('season', 'sequence'),
|
||||||
|
'episode_number': ('episode', 'number'),
|
||||||
|
}))
|
||||||
|
if not title:
|
||||||
|
title = traverse_obj(api_response, ('episode', 'title'))
|
||||||
|
if not description:
|
||||||
|
description = traverse_obj(api_response, ('episode', 'content'))
|
||||||
|
|
||||||
m3u8_url = f'https://vod-abematv.akamaized.net/program/{video_id}/playlist.m3u8'
|
m3u8_url = f'https://vod-abematv.akamaized.net/program/{video_id}/playlist.m3u8'
|
||||||
elif video_type == 'slots':
|
elif video_type == 'slots':
|
||||||
|
|
|
@ -1573,7 +1573,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||||
}), headers={
|
}), headers={
|
||||||
'Content-Type': 'application/x-www-form-urlencoded'
|
'Content-Type': 'application/x-www-form-urlencoded'
|
||||||
})
|
})
|
||||||
elif mso_id == 'Spectrum':
|
elif mso_id in ('Spectrum', 'Charter_Direct'):
|
||||||
# Spectrum's login for is dynamically loaded via JS so we need to hardcode the flow
|
# Spectrum's login for is dynamically loaded via JS so we need to hardcode the flow
|
||||||
# as a one-off implementation.
|
# as a one-off implementation.
|
||||||
provider_redirect_page, urlh = provider_redirect_page_res
|
provider_redirect_page, urlh = provider_redirect_page_res
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .vimeo import VimeoIE
|
from .vimeo import VimeoIE
|
||||||
|
from ..utils import ExtractorError, traverse_obj, url_or_none
|
||||||
|
|
||||||
|
|
||||||
class AeonCoIE(InfoExtractor):
|
class AeonCoIE(InfoExtractor):
|
||||||
|
@ -19,22 +20,55 @@ class AeonCoIE(InfoExtractor):
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://aeon.co/videos/dazzling-timelapse-shows-how-microbes-spoil-our-food-and-sometimes-enrich-it',
|
'url': 'https://aeon.co/videos/dazzling-timelapse-shows-how-microbes-spoil-our-food-and-sometimes-enrich-it',
|
||||||
'md5': '4e5f3dad9dbda0dbfa2da41a851e631e',
|
'md5': '03582d795382e49f2fd0b427b55de409',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '728595228',
|
'id': '759576926',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Wrought',
|
'title': 'Wrought',
|
||||||
'thumbnail': 'https://i.vimeocdn.com/video/1484618528-c91452611f9a4e4497735a533da60d45b2fe472deb0c880f0afaab0cd2efb22a-d_1280',
|
'thumbnail': 'https://i.vimeocdn.com/video/1525599692-84614af88e446612f49ca966cf8f80eab2c73376bedd80555741c521c26f9a3e-d_1280',
|
||||||
'uploader': 'Biofilm Productions',
|
'uploader': 'Aeon Video',
|
||||||
'uploader_id': 'user140352216',
|
'uploader_id': 'aeonvideo',
|
||||||
'uploader_url': 'https://vimeo.com/user140352216',
|
'uploader_url': 'https://vimeo.com/aeonvideo',
|
||||||
'duration': 1344
|
'duration': 1344
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://aeon.co/videos/chew-over-the-prisoners-dilemma-and-see-if-you-can-find-the-rational-path-out',
|
||||||
|
'md5': '1cfda0bf3ae24df17d00f2c0cb6cc21b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'emyi4z-O0ls',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'How to outsmart the Prisoner’s Dilemma - Lucas Husted',
|
||||||
|
'thumbnail': 'https://i.ytimg.com/vi_webp/emyi4z-O0ls/maxresdefault.webp',
|
||||||
|
'uploader': 'TED-Ed',
|
||||||
|
'uploader_id': '@TEDEd',
|
||||||
|
'uploader_url': 'https://www.youtube.com/@TEDEd',
|
||||||
|
'duration': 344,
|
||||||
|
'upload_date': '20200827',
|
||||||
|
'channel_id': 'UCsooa4yRKGN_zEE8iknghZA',
|
||||||
|
'playable_in_embed': True,
|
||||||
|
'description': 'md5:c0959524f08cb60f96fd010f3dfb17f3',
|
||||||
|
'categories': ['Education'],
|
||||||
|
'like_count': int,
|
||||||
|
'channel': 'TED-Ed',
|
||||||
|
'chapters': 'count:7',
|
||||||
|
'channel_url': 'https://www.youtube.com/channel/UCsooa4yRKGN_zEE8iknghZA',
|
||||||
|
'tags': 'count:26',
|
||||||
|
'availability': 'public',
|
||||||
|
'channel_follower_count': int,
|
||||||
|
'view_count': int,
|
||||||
|
'age_limit': 0,
|
||||||
|
'live_status': 'not_live',
|
||||||
|
'comment_count': int,
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
vimeo_id = self._search_regex(r'hosterId":\s*"(?P<id>[0-9]+)', webpage, 'vimeo id')
|
embed_url = traverse_obj(self._yield_json_ld(webpage, video_id), (
|
||||||
vimeo_url = VimeoIE._smuggle_referrer(f'https://player.vimeo.com/video/{vimeo_id}', 'https://aeon.co')
|
lambda _, v: v['@type'] == 'VideoObject', 'embedUrl', {url_or_none}), get_all=False)
|
||||||
return self.url_result(vimeo_url, VimeoIE)
|
if not embed_url:
|
||||||
|
raise ExtractorError('No embed URL found in webpage')
|
||||||
|
if 'player.vimeo.com' in embed_url:
|
||||||
|
embed_url = VimeoIE._smuggle_referrer(embed_url, 'https://aeon.co/')
|
||||||
|
return self.url_result(embed_url)
|
||||||
|
|
|
@ -26,6 +26,7 @@ from ..utils import (
|
||||||
srt_subtitles_timecode,
|
srt_subtitles_timecode,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
|
unified_timestamp,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
|
@ -133,7 +134,7 @@ class BilibiliBaseIE(InfoExtractor):
|
||||||
|
|
||||||
|
|
||||||
class BiliBiliIE(BilibiliBaseIE):
|
class BiliBiliIE(BilibiliBaseIE):
|
||||||
_VALID_URL = r'https?://www\.bilibili\.com/video/[aAbB][vV](?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://www\.bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.bilibili.com/video/BV13x41117TL',
|
'url': 'https://www.bilibili.com/video/BV13x41117TL',
|
||||||
|
@ -281,19 +282,60 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||||
},
|
},
|
||||||
'params': {'skip_download': True},
|
'params': {'skip_download': True},
|
||||||
|
}, {
|
||||||
|
'note': 'video redirects to festival page',
|
||||||
|
'url': 'https://www.bilibili.com/video/BV1wP4y1P72h',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'BV1wP4y1P72h',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '牛虎年相交之际,一首传统民族打击乐《牛斗虎》祝大家新春快乐,虎年大吉!【bilibili音乐虎闹新春】',
|
||||||
|
'timestamp': 1643947497,
|
||||||
|
'upload_date': '20220204',
|
||||||
|
'description': 'md5:8681a0d4d2c06b4ae27e59c8080a7fe6',
|
||||||
|
'uploader': '叨叨冯聊音乐',
|
||||||
|
'duration': 246.719,
|
||||||
|
'uploader_id': '528182630',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': True},
|
||||||
|
}, {
|
||||||
|
'note': 'newer festival video',
|
||||||
|
'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'BV1ay4y1d77f',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '【崩坏3新春剧场】为特别的你送上祝福!',
|
||||||
|
'timestamp': 1674273600,
|
||||||
|
'upload_date': '20230121',
|
||||||
|
'description': 'md5:58af66d15c6a0122dc30c8adfd828dd8',
|
||||||
|
'uploader': '果蝇轰',
|
||||||
|
'duration': 1111.722,
|
||||||
|
'uploader_id': '8469526',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': True},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
|
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
|
||||||
play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data']
|
|
||||||
|
|
||||||
video_data = initial_state['videoData']
|
is_festival = 'videoData' not in initial_state
|
||||||
|
if is_festival:
|
||||||
|
video_data = initial_state['videoInfo']
|
||||||
|
else:
|
||||||
|
play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data']
|
||||||
|
video_data = initial_state['videoData']
|
||||||
|
|
||||||
video_id, title = video_data['bvid'], video_data.get('title')
|
video_id, title = video_data['bvid'], video_data.get('title')
|
||||||
|
|
||||||
# Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
|
# Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
|
||||||
page_list_json = traverse_obj(
|
page_list_json = not is_festival and traverse_obj(
|
||||||
self._download_json(
|
self._download_json(
|
||||||
'https://api.bilibili.com/x/player/pagelist', video_id,
|
'https://api.bilibili.com/x/player/pagelist', video_id,
|
||||||
fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
|
fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
|
||||||
|
@ -316,20 +358,39 @@ class BiliBiliIE(BilibiliBaseIE):
|
||||||
|
|
||||||
cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
|
cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
|
||||||
|
|
||||||
|
festival_info = {}
|
||||||
|
if is_festival:
|
||||||
|
play_info = self._download_json(
|
||||||
|
'https://api.bilibili.com/x/player/playurl', video_id,
|
||||||
|
query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
|
||||||
|
note='Extracting festival video formats')['data']
|
||||||
|
|
||||||
|
festival_info = traverse_obj(initial_state, {
|
||||||
|
'uploader': ('videoInfo', 'upName'),
|
||||||
|
'uploader_id': ('videoInfo', 'upMid', {str_or_none}),
|
||||||
|
'like_count': ('videoStatus', 'like', {int_or_none}),
|
||||||
|
'thumbnail': ('sectionEpisodes', lambda _, v: v['bvid'] == video_id, 'cover'),
|
||||||
|
}, get_all=False)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
**traverse_obj(initial_state, {
|
||||||
|
'uploader': ('upData', 'name'),
|
||||||
|
'uploader_id': ('upData', 'mid', {str_or_none}),
|
||||||
|
'like_count': ('videoData', 'stat', 'like', {int_or_none}),
|
||||||
|
'tags': ('tags', ..., 'tag_name'),
|
||||||
|
'thumbnail': ('videoData', 'pic', {url_or_none}),
|
||||||
|
}),
|
||||||
|
**festival_info,
|
||||||
|
**traverse_obj(video_data, {
|
||||||
|
'description': 'desc',
|
||||||
|
'timestamp': ('pubdate', {int_or_none}),
|
||||||
|
'view_count': (('viewCount', ('stat', 'view')), {int_or_none}),
|
||||||
|
'comment_count': ('stat', 'reply', {int_or_none}),
|
||||||
|
}, get_all=False),
|
||||||
'id': f'{video_id}{format_field(part_id, None, "_p%d")}',
|
'id': f'{video_id}{format_field(part_id, None, "_p%d")}',
|
||||||
'formats': self.extract_formats(play_info),
|
'formats': self.extract_formats(play_info),
|
||||||
'_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
|
'_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': traverse_obj(initial_state, ('videoData', 'desc')),
|
|
||||||
'view_count': traverse_obj(initial_state, ('videoData', 'stat', 'view')),
|
|
||||||
'uploader': traverse_obj(initial_state, ('upData', 'name')),
|
|
||||||
'uploader_id': traverse_obj(initial_state, ('upData', 'mid')),
|
|
||||||
'like_count': traverse_obj(initial_state, ('videoData', 'stat', 'like')),
|
|
||||||
'comment_count': traverse_obj(initial_state, ('videoData', 'stat', 'reply')),
|
|
||||||
'tags': traverse_obj(initial_state, ('tags', ..., 'tag_name')),
|
|
||||||
'thumbnail': traverse_obj(initial_state, ('videoData', 'pic')),
|
|
||||||
'timestamp': traverse_obj(initial_state, ('videoData', 'pubdate')),
|
|
||||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||||
'chapters': self._get_chapters(aid, cid),
|
'chapters': self._get_chapters(aid, cid),
|
||||||
'subtitles': self.extract_subtitles(video_id, aid, cid),
|
'subtitles': self.extract_subtitles(video_id, aid, cid),
|
||||||
|
@ -996,6 +1057,53 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||||
'thumbnail': r're:https?://pic[-\.]bstarstatic.+/ugc/.+\.jpg$',
|
'thumbnail': r're:https?://pic[-\.]bstarstatic.+/ugc/.+\.jpg$',
|
||||||
'upload_date': '20221212',
|
'upload_date': '20221212',
|
||||||
'title': 'Kimetsu no Yaiba Season 3 Official Trailer - Bstation',
|
'title': 'Kimetsu no Yaiba Season 3 Official Trailer - Bstation',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# episode comment extraction
|
||||||
|
'url': 'https://www.bilibili.tv/en/play/34580/340317',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '340317',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'timestamp': 1604057820,
|
||||||
|
'upload_date': '20201030',
|
||||||
|
'episode_number': 5,
|
||||||
|
'title': 'E5 - My Own Steel',
|
||||||
|
'description': 'md5:2b17ab10aebb33e3c2a54da9e8e487e2',
|
||||||
|
'thumbnail': r're:https?://pic\.bstarstatic\.com/ogv/.+\.png$',
|
||||||
|
'episode': 'Episode 5',
|
||||||
|
'comment_count': int,
|
||||||
|
'chapters': [{
|
||||||
|
'start_time': 0,
|
||||||
|
'end_time': 61.0,
|
||||||
|
'title': '<Untitled Chapter 1>'
|
||||||
|
}, {
|
||||||
|
'start_time': 61.0,
|
||||||
|
'end_time': 134.0,
|
||||||
|
'title': 'Intro'
|
||||||
|
}, {
|
||||||
|
'start_time': 1290.0,
|
||||||
|
'end_time': 1379.0,
|
||||||
|
'title': 'Outro'
|
||||||
|
}],
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'getcomments': True
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# user generated content comment extraction
|
||||||
|
'url': 'https://www.bilibili.tv/en/video/2045730385',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2045730385',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
|
||||||
|
'timestamp': 1667891924,
|
||||||
|
'upload_date': '20221108',
|
||||||
|
'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan - Bstation',
|
||||||
|
'comment_count': int,
|
||||||
|
'thumbnail': 'https://pic.bstarstatic.com/ugc/f6c363659efd2eabe5683fbb906b1582.jpg',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'getcomments': True
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
# episode id without intro and outro
|
# episode id without intro and outro
|
||||||
|
@ -1055,11 +1163,69 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||||
|
|
||||||
# XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
|
# XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
|
||||||
return merge_dicts(
|
return merge_dicts(
|
||||||
self._parse_video_metadata(video_data), self._search_json_ld(webpage, video_id), {
|
self._parse_video_metadata(video_data), self._search_json_ld(webpage, video_id, fatal=False), {
|
||||||
'title': self._html_search_meta('og:title', webpage),
|
'title': self._html_search_meta('og:title', webpage),
|
||||||
'description': self._html_search_meta('og:description', webpage)
|
'description': self._html_search_meta('og:description', webpage)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
def _get_comments_reply(self, root_id, next_id=0, display_id=None):
|
||||||
|
comment_api_raw_data = self._download_json(
|
||||||
|
'https://api.bilibili.tv/reply/web/detail', display_id,
|
||||||
|
note=f'Downloading reply comment of {root_id} - {next_id}',
|
||||||
|
query={
|
||||||
|
'platform': 'web',
|
||||||
|
'ps': 20, # comment's reply per page (default: 3)
|
||||||
|
'root': root_id,
|
||||||
|
'next': next_id,
|
||||||
|
})
|
||||||
|
|
||||||
|
for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
|
||||||
|
yield {
|
||||||
|
'author': traverse_obj(replies, ('member', 'name')),
|
||||||
|
'author_id': traverse_obj(replies, ('member', 'mid')),
|
||||||
|
'author_thumbnail': traverse_obj(replies, ('member', 'face')),
|
||||||
|
'text': traverse_obj(replies, ('content', 'message')),
|
||||||
|
'id': replies.get('rpid'),
|
||||||
|
'like_count': int_or_none(replies.get('like_count')),
|
||||||
|
'parent': replies.get('parent'),
|
||||||
|
'timestamp': unified_timestamp(replies.get('ctime_text'))
|
||||||
|
}
|
||||||
|
|
||||||
|
if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
|
||||||
|
yield from self._get_comments_reply(
|
||||||
|
root_id, comment_api_raw_data['data']['cursor']['next'], display_id)
|
||||||
|
|
||||||
|
def _get_comments(self, video_id, ep_id):
|
||||||
|
for i in itertools.count(0):
|
||||||
|
comment_api_raw_data = self._download_json(
|
||||||
|
'https://api.bilibili.tv/reply/web/root', video_id,
|
||||||
|
note=f'Downloading comment page {i + 1}',
|
||||||
|
query={
|
||||||
|
'platform': 'web',
|
||||||
|
'pn': i, # page number
|
||||||
|
'ps': 20, # comment per page (default: 20)
|
||||||
|
'oid': video_id,
|
||||||
|
'type': 3 if ep_id else 1, # 1: user generated content, 3: series content
|
||||||
|
'sort_type': 1, # 1: best, 2: recent
|
||||||
|
})
|
||||||
|
|
||||||
|
for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
|
||||||
|
yield {
|
||||||
|
'author': traverse_obj(replies, ('member', 'name')),
|
||||||
|
'author_id': traverse_obj(replies, ('member', 'mid')),
|
||||||
|
'author_thumbnail': traverse_obj(replies, ('member', 'face')),
|
||||||
|
'text': traverse_obj(replies, ('content', 'message')),
|
||||||
|
'id': replies.get('rpid'),
|
||||||
|
'like_count': int_or_none(replies.get('like_count')),
|
||||||
|
'timestamp': unified_timestamp(replies.get('ctime_text')),
|
||||||
|
'author_is_uploader': bool(traverse_obj(replies, ('member', 'type'))),
|
||||||
|
}
|
||||||
|
if replies.get('count'):
|
||||||
|
yield from self._get_comments_reply(replies.get('rpid'), display_id=video_id)
|
||||||
|
|
||||||
|
if traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
|
||||||
|
break
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')
|
season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')
|
||||||
video_id = ep_id or aid
|
video_id = ep_id or aid
|
||||||
|
@ -1087,7 +1253,8 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||||
**self._extract_video_metadata(url, video_id, season_id),
|
**self._extract_video_metadata(url, video_id, season_id),
|
||||||
'formats': self._get_formats(ep_id=ep_id, aid=aid),
|
'formats': self._get_formats(ep_id=ep_id, aid=aid),
|
||||||
'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
|
'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
|
||||||
'chapters': chapters
|
'chapters': chapters,
|
||||||
|
'__post_extractor': self.extract_comments(video_id, ep_id)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -77,7 +77,10 @@ class BitChuteIE(InfoExtractor):
|
||||||
def _check_format(self, video_url, video_id):
|
def _check_format(self, video_url, video_id):
|
||||||
urls = orderedSet(
|
urls = orderedSet(
|
||||||
re.sub(r'(^https?://)(seed\d+)(?=\.bitchute\.com)', fr'\g<1>{host}', video_url)
|
re.sub(r'(^https?://)(seed\d+)(?=\.bitchute\.com)', fr'\g<1>{host}', video_url)
|
||||||
for host in (r'\g<2>', 'seed150', 'seed151', 'seed152', 'seed153'))
|
for host in (r'\g<2>', 'seed122', 'seed125', 'seed126', 'seed128',
|
||||||
|
'seed132', 'seed150', 'seed151', 'seed152', 'seed153',
|
||||||
|
'seed167', 'seed171', 'seed177', 'seed305', 'seed307',
|
||||||
|
'seedp29xb', 'zb10-7gsop1v78'))
|
||||||
for url in urls:
|
for url in urls:
|
||||||
try:
|
try:
|
||||||
response = self._request_webpage(
|
response = self._request_webpage(
|
||||||
|
|
318
yt_dlp/extractor/brainpop.py
Normal file
318
yt_dlp/extractor/brainpop.py
Normal file
|
@ -0,0 +1,318 @@
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
classproperty,
|
||||||
|
int_or_none,
|
||||||
|
traverse_obj,
|
||||||
|
urljoin
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class BrainPOPBaseIE(InfoExtractor):
|
||||||
|
_NETRC_MACHINE = 'brainpop'
|
||||||
|
_ORIGIN = '' # So that _VALID_URL doesn't crash
|
||||||
|
_LOGIN_ERRORS = {
|
||||||
|
1502: 'The username and password you entered did not match.', # LOGIN_FAILED
|
||||||
|
1503: 'Payment method is expired.', # LOGIN_FAILED_ACCOUNT_NOT_ACTIVE
|
||||||
|
1506: 'Your BrainPOP plan has expired.', # LOGIN_FAILED_ACCOUNT_EXPIRED
|
||||||
|
1507: 'Terms not accepted.', # LOGIN_FAILED_TERMS_NOT_ACCEPTED
|
||||||
|
1508: 'Account not activated.', # LOGIN_FAILED_SUBSCRIPTION_NOT_ACTIVE
|
||||||
|
1512: 'The maximum number of devices permitted are logged in with your account right now.', # LOGIN_FAILED_LOGIN_LIMIT_REACHED
|
||||||
|
1513: 'You are trying to access your account from outside of its allowed IP range.', # LOGIN_FAILED_INVALID_IP
|
||||||
|
1514: 'Individual accounts are not included in your plan. Try again with your shared username and password.', # LOGIN_FAILED_MBP_DISABLED
|
||||||
|
1515: 'Account not activated.', # LOGIN_FAILED_TEACHER_NOT_ACTIVE
|
||||||
|
1523: 'That username and password won\'t work on this BrainPOP site.', # LOGIN_FAILED_NO_ACCESS
|
||||||
|
1524: 'You\'ll need to join a class before you can login.', # LOGIN_FAILED_STUDENT_NO_PERIOD
|
||||||
|
1526: 'Your account is locked. Reset your password, or ask a teacher or administrator for help.', # LOGIN_FAILED_ACCOUNT_LOCKED
|
||||||
|
}
|
||||||
|
|
||||||
|
@classproperty
|
||||||
|
def _VALID_URL(cls):
|
||||||
|
root = re.escape(cls._ORIGIN).replace(r'https:', r'https?:').replace(r'www\.', r'(?:www\.)?')
|
||||||
|
return rf'{root}/(?P<slug>[^/]+/[^/]+/(?P<id>[^/?#&]+))'
|
||||||
|
|
||||||
|
def _assemble_formats(self, slug, format_id, display_id, token='', extra_fields={}):
|
||||||
|
formats = []
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
f'{urljoin(self._HLS_URL, slug)}.m3u8?{token}',
|
||||||
|
display_id, 'mp4', m3u8_id=f'{format_id}-hls', fatal=False)
|
||||||
|
formats.append({
|
||||||
|
'format_id': format_id,
|
||||||
|
'url': f'{urljoin(self._VIDEO_URL, slug)}?{token}',
|
||||||
|
})
|
||||||
|
for f in formats:
|
||||||
|
f.update(extra_fields)
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def _extract_adaptive_formats(self, data, token, display_id, key_format='%s', extra_fields={}):
|
||||||
|
formats = []
|
||||||
|
additional_key_formats = {
|
||||||
|
'%s': {},
|
||||||
|
'ad_%s': {
|
||||||
|
'format_note': 'Audio description',
|
||||||
|
'source_preference': -2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for additional_key_format, additional_key_fields in additional_key_formats.items():
|
||||||
|
for key_quality, key_index in enumerate(('high', 'low')):
|
||||||
|
full_key_index = additional_key_format % (key_format % key_index)
|
||||||
|
if data.get(full_key_index):
|
||||||
|
formats.extend(self._assemble_formats(data[full_key_index], full_key_index, display_id, token, {
|
||||||
|
'quality': -1 - key_quality,
|
||||||
|
**additional_key_fields,
|
||||||
|
**extra_fields
|
||||||
|
}))
|
||||||
|
return formats
|
||||||
|
|
||||||
|
def _perform_login(self, username, password):
|
||||||
|
login_res = self._download_json(
|
||||||
|
'https://api.brainpop.com/api/login', None,
|
||||||
|
data=json.dumps({'username': username, 'password': password}).encode(),
|
||||||
|
headers={
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Referer': self._ORIGIN
|
||||||
|
}, note='Logging in', errnote='Unable to log in', expected_status=400)
|
||||||
|
status_code = int_or_none(login_res['status_code'])
|
||||||
|
if status_code != 1505:
|
||||||
|
self.report_warning(
|
||||||
|
f'Unable to login: {self._LOGIN_ERRORS.get(status_code) or login_res.get("message")}'
|
||||||
|
or f'Got status code {status_code}')
|
||||||
|
|
||||||
|
|
||||||
|
class BrainPOPIE(BrainPOPBaseIE):
|
||||||
|
_ORIGIN = 'https://www.brainpop.com'
|
||||||
|
_VIDEO_URL = 'https://svideos.brainpop.com'
|
||||||
|
_HLS_URL = 'https://hls.brainpop.com'
|
||||||
|
_CDN_URL = 'https://cdn.brainpop.com'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.brainpop.com/health/conflictresolution/martinlutherkingjr/movie?ref=null',
|
||||||
|
'md5': '3ead374233ae74c7f1b0029a01c972f0',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1f3259fa457292b4',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Martin Luther King, Jr.',
|
||||||
|
'display_id': 'martinlutherkingjr',
|
||||||
|
'description': 'md5:f403dbb2bf3ccc7cf4c59d9e43e3c349',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.brainpop.com/science/space/bigbang/',
|
||||||
|
'md5': '9a1ff0e77444dd9e437354eb669c87ec',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'acae52cd48c99acf',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Big Bang',
|
||||||
|
'display_id': 'bigbang',
|
||||||
|
'description': 'md5:3e53b766b0f116f631b13f4cae185d38',
|
||||||
|
},
|
||||||
|
'skip': 'Requires login',
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
slug, display_id = self._match_valid_url(url).group('slug', 'id')
|
||||||
|
movie_data = self._download_json(
|
||||||
|
f'https://api.brainpop.com/api/content/published/bp/en/{slug}/movie?full=1', display_id,
|
||||||
|
'Downloading movie data JSON', 'Unable to download movie data')['data']
|
||||||
|
topic_data = traverse_obj(self._download_json(
|
||||||
|
f'https://api.brainpop.com/api/content/published/bp/en/{slug}?full=1', display_id,
|
||||||
|
'Downloading topic data JSON', 'Unable to download topic data', fatal=False),
|
||||||
|
('data', 'topic'), expected_type=dict) or movie_data['topic']
|
||||||
|
|
||||||
|
if not traverse_obj(movie_data, ('access', 'allow')):
|
||||||
|
reason = traverse_obj(movie_data, ('access', 'reason'))
|
||||||
|
if 'logged' in reason:
|
||||||
|
self.raise_login_required(reason, metadata_available=True)
|
||||||
|
else:
|
||||||
|
self.raise_no_formats(reason, video_id=display_id)
|
||||||
|
movie_feature = movie_data['feature']
|
||||||
|
movie_feature_data = movie_feature['data']
|
||||||
|
|
||||||
|
formats, subtitles = [], {}
|
||||||
|
formats.extend(self._extract_adaptive_formats(movie_feature_data, movie_feature_data.get('token', ''), display_id, '%s_v2', {
|
||||||
|
'language': movie_feature.get('language') or 'en',
|
||||||
|
'language_preference': 10
|
||||||
|
}))
|
||||||
|
for lang, localized_feature in traverse_obj(movie_feature, 'localization', default={}, expected_type=dict).items():
|
||||||
|
formats.extend(self._extract_adaptive_formats(localized_feature, localized_feature.get('token', ''), display_id, '%s_v2', {
|
||||||
|
'language': lang,
|
||||||
|
'language_preference': -10
|
||||||
|
}))
|
||||||
|
|
||||||
|
# TODO: Do localization fields also have subtitles?
|
||||||
|
for name, url in movie_feature_data.items():
|
||||||
|
lang = self._search_regex(
|
||||||
|
r'^subtitles_(?P<lang>\w+)$', name, 'subtitle metadata', default=None)
|
||||||
|
if lang and url:
|
||||||
|
subtitles.setdefault(lang, []).append({
|
||||||
|
'url': urljoin(self._CDN_URL, url)
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': topic_data['topic_id'],
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': topic_data.get('name'),
|
||||||
|
'description': topic_data.get('synopsis'),
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class BrainPOPLegacyBaseIE(BrainPOPBaseIE):
|
||||||
|
def _parse_js_topic_data(self, topic_data, display_id, token):
|
||||||
|
movie_data = topic_data['movies']
|
||||||
|
# TODO: Are there non-burned subtitles?
|
||||||
|
formats = self._extract_adaptive_formats(movie_data, token, display_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': topic_data['EntryID'],
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': topic_data.get('name'),
|
||||||
|
'alt_title': topic_data.get('title'),
|
||||||
|
'description': topic_data.get('synopsis'),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
slug, display_id = self._match_valid_url(url).group('slug', 'id')
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
topic_data = self._search_json(
|
||||||
|
r'var\s+content\s*=\s*', webpage, 'content data',
|
||||||
|
display_id, end_pattern=';')['category']['unit']['topic']
|
||||||
|
token = self._search_regex(r'ec_token\s*:\s*[\'"]([^\'"]+)', webpage, 'video token')
|
||||||
|
return self._parse_js_topic_data(topic_data, display_id, token)
|
||||||
|
|
||||||
|
|
||||||
|
class BrainPOPJrIE(BrainPOPLegacyBaseIE):
|
||||||
|
_ORIGIN = 'https://jr.brainpop.com'
|
||||||
|
_VIDEO_URL = 'https://svideos-jr.brainpop.com'
|
||||||
|
_HLS_URL = 'https://hls-jr.brainpop.com'
|
||||||
|
_CDN_URL = 'https://cdn-jr.brainpop.com'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://jr.brainpop.com/health/feelingsandsel/emotions/',
|
||||||
|
'md5': '04e0561bb21770f305a0ce6cf0d869ab',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '347',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Emotions',
|
||||||
|
'display_id': 'emotions',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://jr.brainpop.com/science/habitats/arctichabitats/',
|
||||||
|
'md5': 'b0ed063bbd1910df00220ee29340f5d6',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '29',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Arctic Habitats',
|
||||||
|
'display_id': 'arctichabitats',
|
||||||
|
},
|
||||||
|
'skip': 'Requires login',
|
||||||
|
}]
|
||||||
|
|
||||||
|
|
||||||
|
class BrainPOPELLIE(BrainPOPLegacyBaseIE):
|
||||||
|
_ORIGIN = 'https://ell.brainpop.com'
|
||||||
|
_VIDEO_URL = 'https://svideos-esl.brainpop.com'
|
||||||
|
_HLS_URL = 'https://hls-esl.brainpop.com'
|
||||||
|
_CDN_URL = 'https://cdn-esl.brainpop.com'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://ell.brainpop.com/level1/unit1/lesson1/',
|
||||||
|
'md5': 'a2012700cfb774acb7ad2e8834eed0d0',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Lesson 1',
|
||||||
|
'display_id': 'lesson1',
|
||||||
|
'alt_title': 'Personal Pronouns',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://ell.brainpop.com/level3/unit6/lesson5/',
|
||||||
|
'md5': 'be19c8292c87b24aacfb5fda2f3f8363',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '101',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Lesson 5',
|
||||||
|
'display_id': 'lesson5',
|
||||||
|
'alt_title': 'Review: Unit 6',
|
||||||
|
},
|
||||||
|
'skip': 'Requires login',
|
||||||
|
}]
|
||||||
|
|
||||||
|
|
||||||
|
class BrainPOPEspIE(BrainPOPLegacyBaseIE):
|
||||||
|
IE_DESC = 'BrainPOP Español'
|
||||||
|
_ORIGIN = 'https://esp.brainpop.com'
|
||||||
|
_VIDEO_URL = 'https://svideos.brainpop.com'
|
||||||
|
_HLS_URL = 'https://hls.brainpop.com'
|
||||||
|
_CDN_URL = 'https://cdn.brainpop.com/mx'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://esp.brainpop.com/ciencia/la_diversidad_de_la_vida/ecosistemas/',
|
||||||
|
'md5': 'cb3f062db2b3c5240ddfcfde7108f8c9',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3893',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Ecosistemas',
|
||||||
|
'display_id': 'ecosistemas',
|
||||||
|
'description': 'md5:80fc55b07e241f8c8f2aa8d74deaf3c3',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://esp.brainpop.com/espanol/la_escritura/emily_dickinson/',
|
||||||
|
'md5': '98c1b9559e0e33777209c425cda7dac4',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '7146',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Emily Dickinson',
|
||||||
|
'display_id': 'emily_dickinson',
|
||||||
|
'description': 'md5:2795ad87b1d239c9711c1e92ab5a978b',
|
||||||
|
},
|
||||||
|
'skip': 'Requires login',
|
||||||
|
}]
|
||||||
|
|
||||||
|
|
||||||
|
class BrainPOPFrIE(BrainPOPLegacyBaseIE):
|
||||||
|
IE_DESC = 'BrainPOP Français'
|
||||||
|
_ORIGIN = 'https://fr.brainpop.com'
|
||||||
|
_VIDEO_URL = 'https://svideos.brainpop.com'
|
||||||
|
_HLS_URL = 'https://hls.brainpop.com'
|
||||||
|
_CDN_URL = 'https://cdn.brainpop.com/fr'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://fr.brainpop.com/sciencesdelaterre/energie/sourcesdenergie/',
|
||||||
|
'md5': '97e7f48af8af93f8a2be11709f239371',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1651',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Sources d\'énergie',
|
||||||
|
'display_id': 'sourcesdenergie',
|
||||||
|
'description': 'md5:7eece350f019a21ef9f64d4088b2d857',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://fr.brainpop.com/francais/ecrire/plagiat/',
|
||||||
|
'md5': '0cf2b4f89804d0dd4a360a51310d445a',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5803',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Plagiat',
|
||||||
|
'display_id': 'plagiat',
|
||||||
|
'description': 'md5:4496d87127ace28e8b1eda116e77cd2b',
|
||||||
|
},
|
||||||
|
'skip': 'Requires login',
|
||||||
|
}]
|
||||||
|
|
||||||
|
|
||||||
|
class BrainPOPIlIE(BrainPOPLegacyBaseIE):
|
||||||
|
IE_DESC = 'BrainPOP Hebrew'
|
||||||
|
_ORIGIN = 'https://il.brainpop.com'
|
||||||
|
_VIDEO_URL = 'https://svideos.brainpop.com'
|
||||||
|
_HLS_URL = 'https://hls.brainpop.com'
|
||||||
|
_CDN_URL = 'https://cdn.brainpop.com/he'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://il.brainpop.com/category_9/subcategory_150/subjects_3782/',
|
||||||
|
'md5': '9e4ea9dc60ecd385a6e5ca12ccf31641',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3782',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'md5:e993632fcda0545d9205602ec314ad67',
|
||||||
|
'display_id': 'subjects_3782',
|
||||||
|
'description': 'md5:4cc084a8012beb01f037724423a4d4ed',
|
||||||
|
},
|
||||||
|
}]
|
|
@ -1,117 +1,185 @@
|
||||||
import re
|
|
||||||
|
|
||||||
from .adobepass import AdobePassIE
|
from .adobepass import AdobePassIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
smuggle_url,
|
extract_attributes,
|
||||||
update_url_query,
|
|
||||||
int_or_none,
|
|
||||||
float_or_none,
|
float_or_none,
|
||||||
try_get,
|
get_element_html_by_class,
|
||||||
dict_get,
|
int_or_none,
|
||||||
|
merge_dicts,
|
||||||
|
parse_age_limit,
|
||||||
|
remove_end,
|
||||||
|
str_or_none,
|
||||||
|
traverse_obj,
|
||||||
|
unescapeHTML,
|
||||||
|
unified_timestamp,
|
||||||
|
update_url_query,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class BravoTVIE(AdobePassIE):
|
class BravoTVIE(AdobePassIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?P<req_id>bravotv|oxygen)\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
|
_VALID_URL = r'https?://(?:www\.)?(?P<site>bravotv|oxygen)\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is',
|
'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is',
|
||||||
'md5': 'e34684cfea2a96cd2ee1ef3a60909de9',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'epL0pmK1kQlT',
|
'id': '3923059',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'The Top Chef Season 16 Winner Is...',
|
'title': 'The Top Chef Season 16 Winner Is...',
|
||||||
'description': 'Find out who takes the title of Top Chef!',
|
'description': 'Find out who takes the title of Top Chef!',
|
||||||
'uploader': 'NBCU-BRAV',
|
|
||||||
'upload_date': '20190314',
|
'upload_date': '20190314',
|
||||||
'timestamp': 1552591860,
|
'timestamp': 1552591860,
|
||||||
'season_number': 16,
|
'season_number': 16,
|
||||||
'episode_number': 15,
|
'episode_number': 15,
|
||||||
'series': 'Top Chef',
|
'series': 'Top Chef',
|
||||||
'episode': 'The Top Chef Season 16 Winner Is...',
|
'episode': 'The Top Chef Season 16 Winner Is...',
|
||||||
'duration': 190.0,
|
'duration': 190.357,
|
||||||
}
|
'season': 'Season 16',
|
||||||
|
'thumbnail': r're:^https://.+\.jpg',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
|
'url': 'https://www.bravotv.com/top-chef/season-20/episode-1/london-calling',
|
||||||
'only_matching': True,
|
'info_dict': {
|
||||||
|
'id': '9000234570',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'London Calling',
|
||||||
|
'description': 'md5:5af95a8cbac1856bd10e7562f86bb759',
|
||||||
|
'upload_date': '20230310',
|
||||||
|
'timestamp': 1678410000,
|
||||||
|
'season_number': 20,
|
||||||
|
'episode_number': 1,
|
||||||
|
'series': 'Top Chef',
|
||||||
|
'episode': 'London Calling',
|
||||||
|
'duration': 3266.03,
|
||||||
|
'season': 'Season 20',
|
||||||
|
'chapters': 'count:7',
|
||||||
|
'thumbnail': r're:^https://.+\.jpg',
|
||||||
|
'age_limit': 14,
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
'skip': 'This video requires AdobePass MSO credentials',
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.oxygen.com/in-ice-cold-blood/season-1/closing-night',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3692045',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Closing Night',
|
||||||
|
'description': 'md5:3170065c5c2f19548d72a4cbc254af63',
|
||||||
|
'upload_date': '20180401',
|
||||||
|
'timestamp': 1522623600,
|
||||||
|
'season_number': 1,
|
||||||
|
'episode_number': 1,
|
||||||
|
'series': 'In Ice Cold Blood',
|
||||||
|
'episode': 'Closing Night',
|
||||||
|
'duration': 2629.051,
|
||||||
|
'season': 'Season 1',
|
||||||
|
'chapters': 'count:6',
|
||||||
|
'thumbnail': r're:^https://.+\.jpg',
|
||||||
|
'age_limit': 14,
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
'skip': 'This video requires AdobePass MSO credentials',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.oxygen.com/in-ice-cold-blood/season-2/episode-16/videos/handling-the-horwitz-house-after-the-murder-season-2',
|
'url': 'https://www.oxygen.com/in-ice-cold-blood/season-2/episode-16/videos/handling-the-horwitz-house-after-the-murder-season-2',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3974019',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '\'Handling The Horwitz House After The Murder (Season 2, Episode 16)',
|
||||||
|
'description': 'md5:f9d638dd6946a1c1c0533a9c6100eae5',
|
||||||
|
'upload_date': '20190617',
|
||||||
|
'timestamp': 1560790800,
|
||||||
|
'season_number': 2,
|
||||||
|
'episode_number': 16,
|
||||||
|
'series': 'In Ice Cold Blood',
|
||||||
|
'episode': '\'Handling The Horwitz House After The Murder (Season 2, Episode 16)',
|
||||||
|
'duration': 68.235,
|
||||||
|
'season': 'Season 2',
|
||||||
|
'thumbnail': r're:^https://.+\.jpg',
|
||||||
|
'age_limit': 14,
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
site, display_id = self._match_valid_url(url).groups()
|
site, display_id = self._match_valid_url(url).group('site', 'id')
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
settings = self._parse_json(self._search_regex(
|
settings = self._search_json(
|
||||||
r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>({.+?})</script>', webpage, 'drupal settings'),
|
r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>', webpage, 'settings', display_id)
|
||||||
display_id)
|
tve = extract_attributes(get_element_html_by_class('tve-video-deck-app', webpage) or '')
|
||||||
info = {}
|
|
||||||
query = {
|
query = {
|
||||||
'mbr': 'true',
|
'manifest': 'm3u',
|
||||||
|
'formats': 'm3u,mpeg4',
|
||||||
}
|
}
|
||||||
account_pid, release_pid = [None] * 2
|
|
||||||
tve = settings.get('ls_tve')
|
|
||||||
if tve:
|
if tve:
|
||||||
query['manifest'] = 'm3u'
|
account_pid = tve.get('data-mpx-media-account-pid') or 'HNK2IC'
|
||||||
mobj = re.search(r'<[^>]+id="pdk-player"[^>]+data-url=["\']?(?:https?:)?//player\.theplatform\.com/p/([^/]+)/(?:[^/]+/)*select/([^?#&"\']+)', webpage)
|
account_id = tve['data-mpx-media-account-id']
|
||||||
if mobj:
|
metadata = self._parse_json(
|
||||||
account_pid, tp_path = mobj.groups()
|
tve.get('data-normalized-video', ''), display_id, fatal=False, transform_source=unescapeHTML)
|
||||||
release_pid = tp_path.strip('/').split('/')[-1]
|
video_id = tve.get('data-guid') or metadata['guid']
|
||||||
else:
|
if tve.get('data-entitlement') == 'auth':
|
||||||
account_pid = 'HNK2IC'
|
auth = traverse_obj(settings, ('tve_adobe_auth', {dict})) or {}
|
||||||
tp_path = release_pid = tve['release_pid']
|
site = remove_end(site, 'tv')
|
||||||
if tve.get('entitlement') == 'auth':
|
release_pid = tve['data-release-pid']
|
||||||
adobe_pass = settings.get('tve_adobe_auth', {})
|
|
||||||
if site == 'bravotv':
|
|
||||||
site = 'bravo'
|
|
||||||
resource = self._get_mvpd_resource(
|
resource = self._get_mvpd_resource(
|
||||||
adobe_pass.get('adobePassResourceId') or site,
|
tve.get('data-adobe-pass-resource-id') or auth.get('adobePassResourceId') or site,
|
||||||
tve['title'], release_pid, tve.get('rating'))
|
tve['data-title'], release_pid, tve.get('data-rating'))
|
||||||
query['auth'] = self._extract_mvpd_auth(
|
query.update({
|
||||||
url, release_pid,
|
'switch': 'HLSServiceSecure',
|
||||||
adobe_pass.get('adobePassRequestorId') or site, resource)
|
'auth': self._extract_mvpd_auth(
|
||||||
|
url, release_pid, auth.get('adobePassRequestorId') or site, resource),
|
||||||
|
})
|
||||||
|
|
||||||
else:
|
else:
|
||||||
shared_playlist = settings['ls_playlist']
|
ls_playlist = traverse_obj(settings, ('ls_playlist', ..., {dict}), get_all=False) or {}
|
||||||
account_pid = shared_playlist['account_pid']
|
account_pid = ls_playlist.get('mpxMediaAccountPid') or 'PHSl-B'
|
||||||
metadata = shared_playlist['video_metadata'][shared_playlist['default_clip']]
|
account_id = ls_playlist['mpxMediaAccountId']
|
||||||
tp_path = release_pid = metadata.get('release_pid')
|
video_id = ls_playlist['defaultGuid']
|
||||||
if not release_pid:
|
metadata = traverse_obj(
|
||||||
release_pid = metadata['guid']
|
ls_playlist, ('videos', lambda _, v: v['guid'] == video_id, {dict}), get_all=False)
|
||||||
tp_path = 'media/guid/2140479951/' + release_pid
|
|
||||||
info.update({
|
|
||||||
'title': metadata['title'],
|
|
||||||
'description': metadata.get('description'),
|
|
||||||
'season_number': int_or_none(metadata.get('season_num')),
|
|
||||||
'episode_number': int_or_none(metadata.get('episode_num')),
|
|
||||||
})
|
|
||||||
query['switch'] = 'progressive'
|
|
||||||
|
|
||||||
tp_url = 'http://link.theplatform.com/s/%s/%s' % (account_pid, tp_path)
|
|
||||||
|
|
||||||
|
tp_url = f'https://link.theplatform.com/s/{account_pid}/media/guid/{account_id}/{video_id}'
|
||||||
tp_metadata = self._download_json(
|
tp_metadata = self._download_json(
|
||||||
update_url_query(tp_url, {'format': 'preview'}),
|
update_url_query(tp_url, {'format': 'preview'}), video_id, fatal=False)
|
||||||
display_id, fatal=False)
|
|
||||||
if tp_metadata:
|
|
||||||
info.update({
|
|
||||||
'title': tp_metadata.get('title'),
|
|
||||||
'description': tp_metadata.get('description'),
|
|
||||||
'duration': float_or_none(tp_metadata.get('duration'), 1000),
|
|
||||||
'season_number': int_or_none(
|
|
||||||
dict_get(tp_metadata, ('pl1$seasonNumber', 'nbcu$seasonNumber'))),
|
|
||||||
'episode_number': int_or_none(
|
|
||||||
dict_get(tp_metadata, ('pl1$episodeNumber', 'nbcu$episodeNumber'))),
|
|
||||||
# For some reason the series is sometimes wrapped into a single element array.
|
|
||||||
'series': try_get(
|
|
||||||
dict_get(tp_metadata, ('pl1$show', 'nbcu$show')),
|
|
||||||
lambda x: x[0] if isinstance(x, list) else x,
|
|
||||||
expected_type=str),
|
|
||||||
'episode': dict_get(
|
|
||||||
tp_metadata, ('pl1$episodeName', 'nbcu$episodeName', 'title')),
|
|
||||||
})
|
|
||||||
|
|
||||||
info.update({
|
seconds_or_none = lambda x: float_or_none(x, 1000)
|
||||||
'_type': 'url_transparent',
|
chapters = traverse_obj(tp_metadata, ('chapters', ..., {
|
||||||
'id': release_pid,
|
'start_time': ('startTime', {seconds_or_none}),
|
||||||
'url': smuggle_url(update_url_query(tp_url, query), {'force_smil_url': True}),
|
'end_time': ('endTime', {seconds_or_none}),
|
||||||
'ie_key': 'ThePlatform',
|
}))
|
||||||
})
|
# prune pointless single chapters that span the entire duration from short videos
|
||||||
return info
|
if len(chapters) == 1 and not traverse_obj(chapters, (0, 'end_time')):
|
||||||
|
chapters = None
|
||||||
|
|
||||||
|
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||||
|
update_url_query(f'{tp_url}/stream.m3u8', query), video_id, 'mp4', m3u8_id='hls')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'chapters': chapters,
|
||||||
|
**merge_dicts(traverse_obj(tp_metadata, {
|
||||||
|
'title': 'title',
|
||||||
|
'description': 'description',
|
||||||
|
'duration': ('duration', {seconds_or_none}),
|
||||||
|
'timestamp': ('pubDate', {seconds_or_none}),
|
||||||
|
'season_number': (('pl1$seasonNumber', 'nbcu$seasonNumber'), {int_or_none}),
|
||||||
|
'episode_number': (('pl1$episodeNumber', 'nbcu$episodeNumber'), {int_or_none}),
|
||||||
|
'series': (('pl1$show', 'nbcu$show'), (None, ...), {str}),
|
||||||
|
'episode': (('title', 'pl1$episodeNumber', 'nbcu$episodeNumber'), {str_or_none}),
|
||||||
|
'age_limit': ('ratings', ..., 'rating', {parse_age_limit}),
|
||||||
|
}, get_all=False), traverse_obj(metadata, {
|
||||||
|
'title': 'title',
|
||||||
|
'description': 'description',
|
||||||
|
'duration': ('durationInSeconds', {int_or_none}),
|
||||||
|
'timestamp': ('airDate', {unified_timestamp}),
|
||||||
|
'thumbnail': ('thumbnailUrl', {url_or_none}),
|
||||||
|
'season_number': ('seasonNumber', {int_or_none}),
|
||||||
|
'episode_number': ('episodeNumber', {int_or_none}),
|
||||||
|
'episode': 'episodeTitle',
|
||||||
|
'series': 'show',
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
|
|
@ -575,6 +575,7 @@ class BrightcoveNewBaseIE(AdobePassIE):
|
||||||
self.raise_no_formats(
|
self.raise_no_formats(
|
||||||
error.get('message') or error.get('error_subcode') or error['error_code'], expected=True)
|
error.get('message') or error.get('error_subcode') or error['error_code'], expected=True)
|
||||||
|
|
||||||
|
headers.pop('Authorization', None) # or else http formats will give error 400
|
||||||
for f in formats:
|
for f in formats:
|
||||||
f.setdefault('http_headers', {}).update(headers)
|
f.setdefault('http_headers', {}).update(headers)
|
||||||
|
|
||||||
|
@ -895,8 +896,9 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
|
||||||
store_pk(policy_key)
|
store_pk(policy_key)
|
||||||
return policy_key
|
return policy_key
|
||||||
|
|
||||||
api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id)
|
token = smuggled_data.get('token')
|
||||||
headers = {}
|
api_url = f'https://{"edge-auth" if token else "edge"}.api.brightcove.com/playback/v1/accounts/{account_id}/{content_type}s/{video_id}'
|
||||||
|
headers = {'Authorization': f'Bearer {token}'} if token else {}
|
||||||
referrer = smuggled_data.get('referrer') # XXX: notice the spelling/case of the key
|
referrer = smuggled_data.get('referrer') # XXX: notice the spelling/case of the key
|
||||||
if referrer:
|
if referrer:
|
||||||
headers.update({
|
headers.update({
|
||||||
|
|
|
@ -8,14 +8,16 @@ from ..compat import (
|
||||||
compat_str,
|
compat_str,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
join_nonempty,
|
join_nonempty,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
|
parse_iso8601,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
|
traverse_obj,
|
||||||
try_get,
|
try_get,
|
||||||
ExtractorError,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -404,7 +406,7 @@ class CBCGemIE(InfoExtractor):
|
||||||
|
|
||||||
class CBCGemPlaylistIE(InfoExtractor):
|
class CBCGemPlaylistIE(InfoExtractor):
|
||||||
IE_NAME = 'gem.cbc.ca:playlist'
|
IE_NAME = 'gem.cbc.ca:playlist'
|
||||||
_VALID_URL = r'https?://gem\.cbc\.ca/media/(?P<id>(?P<show>[0-9a-z-]+)/s(?P<season>[0-9]+))/?(?:[?#]|$)'
|
_VALID_URL = r'https?://gem\.cbc\.ca/(?:media/)?(?P<id>(?P<show>[0-9a-z-]+)/s(?P<season>[0-9]+))/?(?:[?#]|$)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# TV show playlist, all public videos
|
# TV show playlist, all public videos
|
||||||
'url': 'https://gem.cbc.ca/media/schitts-creek/s06',
|
'url': 'https://gem.cbc.ca/media/schitts-creek/s06',
|
||||||
|
@ -414,6 +416,9 @@ class CBCGemPlaylistIE(InfoExtractor):
|
||||||
'title': 'Season 6',
|
'title': 'Season 6',
|
||||||
'description': 'md5:6a92104a56cbeb5818cc47884d4326a2',
|
'description': 'md5:6a92104a56cbeb5818cc47884d4326a2',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://gem.cbc.ca/schitts-creek/s06',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
_API_BASE = 'https://services.radio-canada.ca/ott/cbc-api/v2/shows/'
|
_API_BASE = 'https://services.radio-canada.ca/ott/cbc-api/v2/shows/'
|
||||||
|
|
||||||
|
@ -473,49 +478,90 @@ class CBCGemPlaylistIE(InfoExtractor):
|
||||||
|
|
||||||
class CBCGemLiveIE(InfoExtractor):
|
class CBCGemLiveIE(InfoExtractor):
|
||||||
IE_NAME = 'gem.cbc.ca:live'
|
IE_NAME = 'gem.cbc.ca:live'
|
||||||
_VALID_URL = r'https?://gem\.cbc\.ca/live/(?P<id>\d+)'
|
_VALID_URL = r'https?://gem\.cbc\.ca/live(?:-event)?/(?P<id>\d+)'
|
||||||
_TEST = {
|
_TESTS = [
|
||||||
'url': 'https://gem.cbc.ca/live/920604739687',
|
{
|
||||||
'info_dict': {
|
'url': 'https://gem.cbc.ca/live/920604739687',
|
||||||
'title': 'Ottawa',
|
'info_dict': {
|
||||||
'description': 'The live TV channel and local programming from Ottawa',
|
'title': 'Ottawa',
|
||||||
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/CBC_OTT_VMS/Live_Channel_Static_Images/Ottawa_2880x1620.jpg',
|
'description': 'The live TV channel and local programming from Ottawa',
|
||||||
'is_live': True,
|
'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/CBC_OTT_VMS/Live_Channel_Static_Images/Ottawa_2880x1620.jpg',
|
||||||
'id': 'AyqZwxRqh8EH',
|
'is_live': True,
|
||||||
'ext': 'mp4',
|
'id': 'AyqZwxRqh8EH',
|
||||||
'timestamp': 1492106160,
|
'ext': 'mp4',
|
||||||
'upload_date': '20170413',
|
'timestamp': 1492106160,
|
||||||
'uploader': 'CBCC-NEW',
|
'upload_date': '20170413',
|
||||||
|
'uploader': 'CBCC-NEW',
|
||||||
|
},
|
||||||
|
'skip': 'Live might have ended',
|
||||||
},
|
},
|
||||||
'skip': 'Live might have ended',
|
{
|
||||||
}
|
'url': 'https://gem.cbc.ca/live/44',
|
||||||
|
'info_dict': {
|
||||||
# It's unclear where the chars at the end come from, but they appear to be
|
'id': '44',
|
||||||
# constant. Might need updating in the future.
|
'ext': 'mp4',
|
||||||
# There are two URLs, some livestreams are in one, and some
|
'is_live': True,
|
||||||
# in the other. The JSON schema is the same for both.
|
'title': r're:^Ottawa [0-9\-: ]+',
|
||||||
_API_URLS = ['https://tpfeed.cbc.ca/f/ExhSPC/t_t3UKJR6MAT', 'https://tpfeed.cbc.ca/f/ExhSPC/FNiv9xQx_BnT']
|
'description': 'The live TV channel and local programming from Ottawa',
|
||||||
|
'live_status': 'is_live',
|
||||||
|
'thumbnail': r're:https://images.gem.cbc.ca/v1/cbc-gem/live/.*'
|
||||||
|
},
|
||||||
|
'params': {'skip_download': True},
|
||||||
|
'skip': 'Live might have ended',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://gem.cbc.ca/live-event/10835',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '10835',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'is_live': True,
|
||||||
|
'title': r're:^The National \| Biden’s trip wraps up, Paltrow testifies, Bird flu [0-9\-: ]+',
|
||||||
|
'description': 'March 24, 2023 | President Biden’s Ottawa visit ends with big pledges from both countries. Plus, Gwyneth Paltrow testifies in her ski collision trial.',
|
||||||
|
'live_status': 'is_live',
|
||||||
|
'thumbnail': r're:https://images.gem.cbc.ca/v1/cbc-gem/live/.*',
|
||||||
|
'timestamp': 1679706000,
|
||||||
|
'upload_date': '20230325',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': True},
|
||||||
|
'skip': 'Live might have ended',
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
video_info = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['data']
|
||||||
|
|
||||||
for api_url in self._API_URLS:
|
# Two types of metadata JSON
|
||||||
video_info = next((
|
if not video_info.get('formattedIdMedia'):
|
||||||
stream for stream in self._download_json(api_url, video_id)['entries']
|
video_info = traverse_obj(
|
||||||
if stream.get('guid') == video_id), None)
|
video_info, (('freeTv', ('streams', ...)), 'items', lambda _, v: v['key'] == video_id, {dict}),
|
||||||
if video_info:
|
get_all=False, default={})
|
||||||
break
|
|
||||||
else:
|
video_stream_id = video_info.get('formattedIdMedia')
|
||||||
|
if not video_stream_id:
|
||||||
raise ExtractorError('Couldn\'t find video metadata, maybe this livestream is now offline', expected=True)
|
raise ExtractorError('Couldn\'t find video metadata, maybe this livestream is now offline', expected=True)
|
||||||
|
|
||||||
|
stream_data = self._download_json(
|
||||||
|
'https://services.radio-canada.ca/media/validation/v2/', video_id, query={
|
||||||
|
'appCode': 'mpx',
|
||||||
|
'connectionType': 'hd',
|
||||||
|
'deviceType': 'ipad',
|
||||||
|
'idMedia': video_stream_id,
|
||||||
|
'multibitrate': 'true',
|
||||||
|
'output': 'json',
|
||||||
|
'tech': 'hls',
|
||||||
|
'manifestType': 'desktop',
|
||||||
|
})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
|
||||||
'ie_key': 'ThePlatform',
|
|
||||||
'url': video_info['content'][0]['url'],
|
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_info.get('title'),
|
'formats': self._extract_m3u8_formats(stream_data['url'], video_id, 'mp4', live=True),
|
||||||
'description': video_info.get('description'),
|
|
||||||
'tags': try_get(video_info, lambda x: x['keywords'].split(', ')),
|
|
||||||
'thumbnail': video_info.get('cbc$staticImage'),
|
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
|
**traverse_obj(video_info, {
|
||||||
|
'title': 'title',
|
||||||
|
'description': 'description',
|
||||||
|
'thumbnail': ('images', 'card', 'url'),
|
||||||
|
'timestamp': ('airDate', {parse_iso8601}),
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,8 +1,14 @@
|
||||||
|
from .brightcove import BrightcoveNewIE
|
||||||
|
from .common import InfoExtractor
|
||||||
from .theplatform import ThePlatformFeedIE
|
from .theplatform import ThePlatformFeedIE
|
||||||
|
from .youtube import YoutubeIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
extract_attributes,
|
||||||
|
get_element_html_by_id,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
|
smuggle_url,
|
||||||
xpath_element,
|
xpath_element,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
|
@ -162,3 +168,110 @@ class CBSIE(CBSBaseIE):
|
||||||
'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000),
|
'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000),
|
||||||
'thumbnail': url_or_none(xpath_text(video_data, 'previewImageURL')),
|
'thumbnail': url_or_none(xpath_text(video_data, 'previewImageURL')),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
|
class ParamountPressExpressIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?paramountpressexpress\.com(?:/[\w-]+)+/(?P<yt>yt-)?video/?\?watch=(?P<id>[\w-]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.paramountpressexpress.com/cbs-entertainment/shows/survivor/video/?watch=pnzew7e2hx',
|
||||||
|
'md5': '56631dbcadaab980d1fc47cb7b76cba4',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6322981580112',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'I’m Felicia',
|
||||||
|
'description': 'md5:88fad93f8eede1c9c8f390239e4c6290',
|
||||||
|
'uploader_id': '6055873637001',
|
||||||
|
'upload_date': '20230320',
|
||||||
|
'timestamp': 1679334960,
|
||||||
|
'duration': 49.557,
|
||||||
|
'thumbnail': r're:^https://.+\.jpg',
|
||||||
|
'tags': [],
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.paramountpressexpress.com/cbs-entertainment/video/?watch=2s5eh8kppc',
|
||||||
|
'md5': 'edcb03e3210b88a3e56c05aa863e0e5b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6323036027112',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '‘Y&R’ Set Visit: Jerry O’Connell Quizzes Cast on Pre-Love Scene Rituals and More',
|
||||||
|
'description': 'md5:b929867a357aac5544b783d834c78383',
|
||||||
|
'uploader_id': '6055873637001',
|
||||||
|
'upload_date': '20230321',
|
||||||
|
'timestamp': 1679430180,
|
||||||
|
'duration': 132.032,
|
||||||
|
'thumbnail': r're:^https://.+\.jpg',
|
||||||
|
'tags': [],
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.paramountpressexpress.com/paramount-plus/yt-video/?watch=OX9wJWOcqck',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'OX9wJWOcqck',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Rugrats | Season 2 Official Trailer | Paramount+',
|
||||||
|
'description': 'md5:1f7e26f5625a9f0d6564d9ad97a9f7de',
|
||||||
|
'uploader': 'Paramount Plus',
|
||||||
|
'uploader_id': '@paramountplus',
|
||||||
|
'uploader_url': 'http://www.youtube.com/@paramountplus',
|
||||||
|
'channel': 'Paramount Plus',
|
||||||
|
'channel_id': 'UCrRttZIypNTA1Mrfwo745Sg',
|
||||||
|
'channel_url': 'https://www.youtube.com/channel/UCrRttZIypNTA1Mrfwo745Sg',
|
||||||
|
'upload_date': '20230316',
|
||||||
|
'duration': 88,
|
||||||
|
'age_limit': 0,
|
||||||
|
'availability': 'public',
|
||||||
|
'live_status': 'not_live',
|
||||||
|
'playable_in_embed': True,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'channel_follower_count': int,
|
||||||
|
'thumbnail': 'https://i.ytimg.com/vi/OX9wJWOcqck/maxresdefault.jpg',
|
||||||
|
'categories': ['Entertainment'],
|
||||||
|
'tags': ['Rugrats'],
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.paramountpressexpress.com/showtime/yt-video/?watch=_ljssSoDLkw',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '_ljssSoDLkw',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Lavell Crawford: THEE Lavell Crawford Comedy Special Official Trailer | SHOWTIME',
|
||||||
|
'description': 'md5:39581bcc3fd810209b642609f448af70',
|
||||||
|
'uploader': 'SHOWTIME',
|
||||||
|
'uploader_id': '@Showtime',
|
||||||
|
'uploader_url': 'http://www.youtube.com/@Showtime',
|
||||||
|
'channel': 'SHOWTIME',
|
||||||
|
'channel_id': 'UCtwMWJr2BFPkuJTnSvCESSQ',
|
||||||
|
'channel_url': 'https://www.youtube.com/channel/UCtwMWJr2BFPkuJTnSvCESSQ',
|
||||||
|
'upload_date': '20230209',
|
||||||
|
'duration': 49,
|
||||||
|
'age_limit': 0,
|
||||||
|
'availability': 'public',
|
||||||
|
'live_status': 'not_live',
|
||||||
|
'playable_in_embed': True,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'channel_follower_count': int,
|
||||||
|
'thumbnail': 'https://i.ytimg.com/vi_webp/_ljssSoDLkw/maxresdefault.webp',
|
||||||
|
'categories': ['People & Blogs'],
|
||||||
|
'tags': 'count:27',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id, is_youtube = self._match_valid_url(url).group('id', 'yt')
|
||||||
|
if is_youtube:
|
||||||
|
return self.url_result(display_id, YoutubeIE)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'\bvideo_id\s*=\s*["\'](\d+)["\']\s*,', webpage, 'Brightcove ID')
|
||||||
|
token = self._search_regex(r'\btoken\s*=\s*["\']([\w.-]+)["\']', webpage, 'token')
|
||||||
|
|
||||||
|
player = extract_attributes(get_element_html_by_id('vcbrightcoveplayer', webpage) or '')
|
||||||
|
account_id = player.get('data-account') or '6055873637001'
|
||||||
|
player_id = player.get('data-player') or 'OtLKgXlO9F'
|
||||||
|
embed = player.get('data-embed') or 'default'
|
||||||
|
|
||||||
|
return self.url_result(smuggle_url(
|
||||||
|
f'https://players.brightcove.net/{account_id}/{player_id}_{embed}/index.html?videoId={video_id}',
|
||||||
|
{'token': token}), BrightcoveNewIE)
|
||||||
|
|
61
yt_dlp/extractor/clipchamp.py
Normal file
61
yt_dlp/extractor/clipchamp.py
Normal file
|
@ -0,0 +1,61 @@
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
traverse_obj,
|
||||||
|
unified_timestamp,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ClipchampIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?clipchamp\.com/watch/(?P<id>[\w-]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://clipchamp.com/watch/gRXZ4ZhdDaU',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'gRXZ4ZhdDaU',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Untitled video',
|
||||||
|
'uploader': 'Alexander Schwartz',
|
||||||
|
'timestamp': 1680805580,
|
||||||
|
'upload_date': '20230406',
|
||||||
|
'thumbnail': r're:^https?://.+\.jpg',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}]
|
||||||
|
|
||||||
|
_STREAM_URL_TMPL = 'https://%s.cloudflarestream.com/%s/manifest/video.%s'
|
||||||
|
_STREAM_URL_QUERY = {'parentOrigin': 'https://clipchamp.com'}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['video']
|
||||||
|
|
||||||
|
storage_location = data.get('storage_location')
|
||||||
|
if storage_location != 'cf_stream':
|
||||||
|
raise ExtractorError(f'Unsupported clip storage location "{storage_location}"')
|
||||||
|
|
||||||
|
path = data['download_url']
|
||||||
|
iframe = self._download_webpage(
|
||||||
|
f'https://iframe.cloudflarestream.com/{path}', video_id, 'Downloading player iframe')
|
||||||
|
subdomain = self._search_regex(
|
||||||
|
r'\bcustomer-domain-prefix=["\']([\w-]+)["\']', iframe,
|
||||||
|
'subdomain', fatal=False) or 'customer-2ut9yn3y6fta1yxe'
|
||||||
|
|
||||||
|
formats = self._extract_mpd_formats(
|
||||||
|
self._STREAM_URL_TMPL % (subdomain, path, 'mpd'), video_id,
|
||||||
|
query=self._STREAM_URL_QUERY, fatal=False, mpd_id='dash')
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
self._STREAM_URL_TMPL % (subdomain, path, 'm3u8'), video_id, 'mp4',
|
||||||
|
query=self._STREAM_URL_QUERY, fatal=False, m3u8_id='hls'))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'uploader': ' '.join(traverse_obj(data, ('creator', ('first_name', 'last_name'), {str}))) or None,
|
||||||
|
**traverse_obj(data, {
|
||||||
|
'title': ('project', 'project_name', {str}),
|
||||||
|
'timestamp': ('created_at', {unified_timestamp}),
|
||||||
|
'thumbnail': ('thumbnail_url', {url_or_none}),
|
||||||
|
}),
|
||||||
|
}
|
|
@ -2998,6 +2998,8 @@ class InfoExtractor:
|
||||||
'protocol': 'ism',
|
'protocol': 'ism',
|
||||||
'fragments': fragments,
|
'fragments': fragments,
|
||||||
'has_drm': ism_doc.find('Protection') is not None,
|
'has_drm': ism_doc.find('Protection') is not None,
|
||||||
|
'language': stream_language,
|
||||||
|
'audio_channels': int_or_none(track.get('Channels')),
|
||||||
'_download_params': {
|
'_download_params': {
|
||||||
'stream_type': stream_type,
|
'stream_type': stream_type,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
|
@ -3528,8 +3530,8 @@ class InfoExtractor:
|
||||||
@classmethod
|
@classmethod
|
||||||
def is_single_video(cls, url):
|
def is_single_video(cls, url):
|
||||||
"""Returns whether the URL is of a single video, None if unknown"""
|
"""Returns whether the URL is of a single video, None if unknown"""
|
||||||
assert cls.suitable(url), 'The URL must be suitable for the extractor'
|
if cls.suitable(url):
|
||||||
return {'video': True, 'playlist': False}.get(cls._RETURN_TYPE)
|
return {'video': True, 'playlist': False}.get(cls._RETURN_TYPE)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def is_suitable(cls, age_limit):
|
def is_suitable(cls, age_limit):
|
||||||
|
@ -3671,18 +3673,22 @@ class InfoExtractor:
|
||||||
'start_time': start_function(chapter),
|
'start_time': start_function(chapter),
|
||||||
'title': title_function(chapter),
|
'title': title_function(chapter),
|
||||||
} for chapter in chapter_list or []]
|
} for chapter in chapter_list or []]
|
||||||
if not strict:
|
if strict:
|
||||||
|
warn = self.report_warning
|
||||||
|
else:
|
||||||
|
warn = self.write_debug
|
||||||
chapter_list.sort(key=lambda c: c['start_time'] or 0)
|
chapter_list.sort(key=lambda c: c['start_time'] or 0)
|
||||||
|
|
||||||
chapters = [{'start_time': 0}]
|
chapters = [{'start_time': 0}]
|
||||||
for idx, chapter in enumerate(chapter_list):
|
for idx, chapter in enumerate(chapter_list):
|
||||||
if chapter['start_time'] is None:
|
if chapter['start_time'] is None:
|
||||||
self.report_warning(f'Incomplete chapter {idx}')
|
warn(f'Incomplete chapter {idx}')
|
||||||
elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
|
elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
|
||||||
chapters.append(chapter)
|
chapters.append(chapter)
|
||||||
elif chapter not in chapters:
|
elif chapter not in chapters:
|
||||||
self.report_warning(
|
issue = (f'{chapter["start_time"]} > {duration}' if chapter['start_time'] > duration
|
||||||
f'Invalid start time ({chapter["start_time"]} < {chapters[-1]["start_time"]}) for chapter "{chapter["title"]}"')
|
else f'{chapter["start_time"]} < {chapters[-1]["start_time"]}')
|
||||||
|
warn(f'Invalid start time ({issue}) for chapter "{chapter["title"]}"')
|
||||||
return chapters[1:]
|
return chapters[1:]
|
||||||
|
|
||||||
def _extract_chapters_from_description(self, description, duration):
|
def _extract_chapters_from_description(self, description, duration):
|
||||||
|
|
192
yt_dlp/extractor/dlf.py
Normal file
192
yt_dlp/extractor/dlf.py
Normal file
|
@ -0,0 +1,192 @@
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
extract_attributes,
|
||||||
|
int_or_none,
|
||||||
|
traverse_obj,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DLFBaseIE(InfoExtractor):
|
||||||
|
_VALID_URL_BASE = r'https?://(?:www\.)?deutschlandfunk\.de/'
|
||||||
|
_BUTTON_REGEX = r'(<button[^>]+alt="Anhören"[^>]+data-audio-diraid[^>]*>)'
|
||||||
|
|
||||||
|
def _parse_button_attrs(self, button, audio_id=None):
|
||||||
|
attrs = extract_attributes(button)
|
||||||
|
audio_id = audio_id or attrs['data-audio-diraid']
|
||||||
|
|
||||||
|
url = traverse_obj(
|
||||||
|
attrs, 'data-audio-download-src', 'data-audio', 'data-audioreference',
|
||||||
|
'data-audio-src', expected_type=url_or_none)
|
||||||
|
ext = determine_ext(url)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': audio_id,
|
||||||
|
'extractor_key': DLFIE.ie_key(),
|
||||||
|
'extractor': DLFIE.IE_NAME,
|
||||||
|
**traverse_obj(attrs, {
|
||||||
|
'title': (('data-audiotitle', 'data-audio-title', 'data-audio-download-tracking-title'), {str}),
|
||||||
|
'duration': (('data-audioduration', 'data-audio-duration'), {int_or_none}),
|
||||||
|
'thumbnail': ('data-audioimage', {url_or_none}),
|
||||||
|
'uploader': 'data-audio-producer',
|
||||||
|
'series': 'data-audio-series',
|
||||||
|
'channel': 'data-audio-origin-site-name',
|
||||||
|
'webpage_url': ('data-audio-download-tracking-path', {url_or_none}),
|
||||||
|
}, get_all=False),
|
||||||
|
'formats': (self._extract_m3u8_formats(url, audio_id, fatal=False)
|
||||||
|
if ext == 'm3u8' else [{'url': url, 'ext': ext, 'vcodec': 'none'}])
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class DLFIE(DLFBaseIE):
|
||||||
|
IE_NAME = 'dlf'
|
||||||
|
_VALID_URL = DLFBaseIE._VALID_URL_BASE + r'[\w-]+-dlf-(?P<id>[\da-f]{8})-100\.html'
|
||||||
|
_TESTS = [
|
||||||
|
# Audio as an HLS stream
|
||||||
|
{
|
||||||
|
'url': 'https://www.deutschlandfunk.de/tanz-der-saiteninstrumente-das-wild-strings-trio-aus-slowenien-dlf-03a3eb19-100.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '03a3eb19',
|
||||||
|
'title': r're:Tanz der Saiteninstrumente [-/] Das Wild Strings Trio aus Slowenien',
|
||||||
|
'ext': 'm4a',
|
||||||
|
'duration': 3298,
|
||||||
|
'thumbnail': 'https://assets.deutschlandfunk.de/FALLBACK-IMAGE-AUDIO/512x512.png?t=1603714364673',
|
||||||
|
'uploader': 'Deutschlandfunk',
|
||||||
|
'series': 'On Stage',
|
||||||
|
'channel': 'deutschlandfunk'
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'm3u8'
|
||||||
|
},
|
||||||
|
'skip': 'This webpage no longer exists'
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.deutschlandfunk.de/russische-athleten-kehren-zurueck-auf-die-sportbuehne-ein-gefaehrlicher-tueroeffner-dlf-d9cc1856-100.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'd9cc1856',
|
||||||
|
'title': 'Russische Athleten kehren zurück auf die Sportbühne: Ein gefährlicher Türöffner',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'duration': 291,
|
||||||
|
'thumbnail': 'https://assets.deutschlandfunk.de/FALLBACK-IMAGE-AUDIO/512x512.png?t=1603714364673',
|
||||||
|
'uploader': 'Deutschlandfunk',
|
||||||
|
'series': 'Kommentare und Themen der Woche',
|
||||||
|
'channel': 'deutschlandfunk'
|
||||||
|
}
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
audio_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, audio_id)
|
||||||
|
|
||||||
|
return self._parse_button_attrs(
|
||||||
|
self._search_regex(self._BUTTON_REGEX, webpage, 'button'), audio_id)
|
||||||
|
|
||||||
|
|
||||||
|
class DLFCorpusIE(DLFBaseIE):
|
||||||
|
IE_NAME = 'dlf:corpus'
|
||||||
|
IE_DESC = 'DLF Multi-feed Archives'
|
||||||
|
_VALID_URL = DLFBaseIE._VALID_URL_BASE + r'(?P<id>(?![\w-]+-dlf-[\da-f]{8})[\w-]+-\d+)\.html'
|
||||||
|
_TESTS = [
|
||||||
|
# Recorded news broadcast with referrals to related broadcasts
|
||||||
|
{
|
||||||
|
'url': 'https://www.deutschlandfunk.de/fechten-russland-belarus-ukraine-protest-100.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'fechten-russland-belarus-ukraine-protest-100',
|
||||||
|
'title': r're:Wiederzulassung als neutrale Athleten [-/] Was die Rückkehr russischer und belarussischer Sportler beim Fechten bedeutet',
|
||||||
|
'description': 'md5:91340aab29c71aa7518ad5be13d1e8ad'
|
||||||
|
},
|
||||||
|
'playlist_mincount': 5,
|
||||||
|
'playlist': [{
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1fc5d64a',
|
||||||
|
'title': r're:Wiederzulassung als neutrale Athleten [-/] Was die Rückkehr russischer und belarussischer Sportler beim Fechten bedeutet',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'duration': 252,
|
||||||
|
'thumbnail': 'https://assets.deutschlandfunk.de/aad16241-6b76-4a09-958b-96d0ee1d6f57/512x512.jpg?t=1679480020313',
|
||||||
|
'uploader': 'Deutschlandfunk',
|
||||||
|
'series': 'Sport',
|
||||||
|
'channel': 'deutschlandfunk'
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2ada145f',
|
||||||
|
'title': r're:(?:Sportpolitik / )?Fechtverband votiert für Rückkehr russischer Athleten',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'duration': 336,
|
||||||
|
'thumbnail': 'https://assets.deutschlandfunk.de/FILE_93982766f7317df30409b8a184ac044a/512x512.jpg?t=1678547581005',
|
||||||
|
'uploader': 'Deutschlandfunk',
|
||||||
|
'series': 'Deutschlandfunk Nova',
|
||||||
|
'channel': 'deutschlandfunk-nova'
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5e55e8c9',
|
||||||
|
'title': r're:Wiederzulassung von Russland und Belarus [-/] "Herumlavieren" des Fechter-Bundes sorgt für Unverständnis',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'duration': 187,
|
||||||
|
'thumbnail': 'https://assets.deutschlandfunk.de/a595989d-1ed1-4a2e-8370-b64d7f11d757/512x512.jpg?t=1679173825412',
|
||||||
|
'uploader': 'Deutschlandfunk',
|
||||||
|
'series': 'Sport am Samstag',
|
||||||
|
'channel': 'deutschlandfunk'
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'info_dict': {
|
||||||
|
'id': '47e1a096',
|
||||||
|
'title': r're:Rückkehr Russlands im Fechten [-/] "Fassungslos, dass es einfach so passiert ist"',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'duration': 602,
|
||||||
|
'thumbnail': 'https://assets.deutschlandfunk.de/da4c494a-21cc-48b4-9cc7-40e09fd442c2/512x512.jpg?t=1678562155770',
|
||||||
|
'uploader': 'Deutschlandfunk',
|
||||||
|
'series': 'Sport am Samstag',
|
||||||
|
'channel': 'deutschlandfunk'
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5e55e8c9',
|
||||||
|
'title': r're:Wiederzulassung von Russland und Belarus [-/] "Herumlavieren" des Fechter-Bundes sorgt für Unverständnis',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'duration': 187,
|
||||||
|
'thumbnail': 'https://assets.deutschlandfunk.de/a595989d-1ed1-4a2e-8370-b64d7f11d757/512x512.jpg?t=1679173825412',
|
||||||
|
'uploader': 'Deutschlandfunk',
|
||||||
|
'series': 'Sport am Samstag',
|
||||||
|
'channel': 'deutschlandfunk'
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
},
|
||||||
|
# Podcast feed with tag buttons, playlist count fluctuates
|
||||||
|
{
|
||||||
|
'url': 'https://www.deutschlandfunk.de/kommentare-und-themen-der-woche-100.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'kommentare-und-themen-der-woche-100',
|
||||||
|
'title': 'Meinung - Kommentare und Themen der Woche',
|
||||||
|
'description': 'md5:2901bbd65cd2d45e116d399a099ce5d5',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 10,
|
||||||
|
},
|
||||||
|
# Podcast feed with no description
|
||||||
|
{
|
||||||
|
'url': 'https://www.deutschlandfunk.de/podcast-tolle-idee-100.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'podcast-tolle-idee-100',
|
||||||
|
'title': 'Wissenschaftspodcast - Tolle Idee! - Was wurde daraus?',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 11,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'id': playlist_id,
|
||||||
|
'description': self._html_search_meta(
|
||||||
|
['description', 'og:description', 'twitter:description'], webpage, default=None),
|
||||||
|
'title': self._html_search_meta(
|
||||||
|
['og:title', 'twitter:title'], webpage, default=None),
|
||||||
|
'entries': map(self._parse_button_attrs, re.findall(self._BUTTON_REGEX, webpage)),
|
||||||
|
}
|
|
@ -12,7 +12,6 @@ from ..utils import (
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
try_get,
|
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
|
@ -25,7 +24,7 @@ class DRTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:
|
(?:
|
||||||
(?:www\.)?dr\.dk/(?:tv/se|nyheder|(?:radio|lyd)(?:/ondemand)?)/(?:[^/]+/)*|
|
(?:www\.)?dr\.dk/(?:tv/se|nyheder|(?P<radio>radio|lyd)(?:/ondemand)?)/(?:[^/]+/)*|
|
||||||
(?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode|program)/
|
(?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode|program)/
|
||||||
)
|
)
|
||||||
(?P<id>[\da-z_-]+)
|
(?P<id>[\da-z_-]+)
|
||||||
|
@ -80,7 +79,7 @@ class DRTVIE(InfoExtractor):
|
||||||
'description': 'md5:8c66dcbc1669bbc6f873879880f37f2a',
|
'description': 'md5:8c66dcbc1669bbc6f873879880f37f2a',
|
||||||
'timestamp': 1546628400,
|
'timestamp': 1546628400,
|
||||||
'upload_date': '20190104',
|
'upload_date': '20190104',
|
||||||
'duration': 3504.618,
|
'duration': 3504.619,
|
||||||
'formats': 'mincount:20',
|
'formats': 'mincount:20',
|
||||||
'release_year': 2017,
|
'release_year': 2017,
|
||||||
'season_id': 'urn:dr:mu:bundle:5afc03ad6187a4065ca5fd35',
|
'season_id': 'urn:dr:mu:bundle:5afc03ad6187a4065ca5fd35',
|
||||||
|
@ -101,14 +100,16 @@ class DRTVIE(InfoExtractor):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Bonderøven 2019 (1:8)',
|
'title': 'Bonderøven 2019 (1:8)',
|
||||||
'description': 'md5:b6dcfe9b6f0bea6703e9a0092739a5bd',
|
'description': 'md5:b6dcfe9b6f0bea6703e9a0092739a5bd',
|
||||||
'timestamp': 1603188600,
|
'timestamp': 1654856100,
|
||||||
'upload_date': '20201020',
|
'upload_date': '20220610',
|
||||||
'duration': 2576.6,
|
'duration': 2576.6,
|
||||||
'season': 'Bonderøven 2019',
|
'season': 'Bonderøven 2019',
|
||||||
'season_id': 'urn:dr:mu:bundle:5c201667a11fa01ca4528ce5',
|
'season_id': 'urn:dr:mu:bundle:5c201667a11fa01ca4528ce5',
|
||||||
'release_year': 2019,
|
'release_year': 2019,
|
||||||
'season_number': 2019,
|
'season_number': 2019,
|
||||||
'series': 'Frank & Kastaniegaarden'
|
'series': 'Frank & Kastaniegaarden',
|
||||||
|
'episode_number': 1,
|
||||||
|
'episode': 'Episode 1',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
|
@ -140,10 +141,26 @@ class DRTVIE(InfoExtractor):
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'skip': 'this video has been removed',
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.dr.dk/lyd/p4kbh/regionale-nyheder-kh4/regionale-nyheder-2023-03-14-10-30-9',
|
||||||
|
'info_dict': {
|
||||||
|
'ext': 'mp4',
|
||||||
|
'id': '14802310112',
|
||||||
|
'timestamp': 1678786200,
|
||||||
|
'duration': 120.043,
|
||||||
|
'season_id': 'urn:dr:mu:bundle:63a4f7c87140143504b6710f',
|
||||||
|
'series': 'P4 København regionale nyheder',
|
||||||
|
'upload_date': '20230314',
|
||||||
|
'release_year': 0,
|
||||||
|
'description': 'Hør seneste regionale nyheder fra P4 København.',
|
||||||
|
'season': 'Regionale nyheder',
|
||||||
|
'title': 'Regionale nyheder',
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
raw_video_id = self._match_id(url)
|
raw_video_id, is_radio_url = self._match_valid_url(url).group('id', 'radio')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, raw_video_id)
|
webpage = self._download_webpage(url, raw_video_id)
|
||||||
|
|
||||||
|
@ -170,15 +187,17 @@ class DRTVIE(InfoExtractor):
|
||||||
programcard_url = '%s/%s' % (_PROGRAMCARD_BASE, video_id)
|
programcard_url = '%s/%s' % (_PROGRAMCARD_BASE, video_id)
|
||||||
else:
|
else:
|
||||||
programcard_url = _PROGRAMCARD_BASE
|
programcard_url = _PROGRAMCARD_BASE
|
||||||
page = self._parse_json(
|
if is_radio_url:
|
||||||
self._search_regex(
|
video_id = self._search_nextjs_data(
|
||||||
r'data\s*=\s*({.+?})\s*(?:;|</script)', webpage,
|
webpage, raw_video_id)['props']['pageProps']['episode']['productionNumber']
|
||||||
'data'), '1')['cache']['page']
|
else:
|
||||||
page = page[list(page.keys())[0]]
|
json_data = self._search_json(
|
||||||
item = try_get(
|
r'window\.__data\s*=', webpage, 'data', raw_video_id)
|
||||||
page, (lambda x: x['item'], lambda x: x['entries'][0]['item']),
|
video_id = traverse_obj(json_data, (
|
||||||
dict)
|
'cache', 'page', ..., (None, ('entries', 0)), 'item', 'customId',
|
||||||
video_id = item['customId'].split(':')[-1]
|
{lambda x: x.split(':')[-1]}), get_all=False)
|
||||||
|
if not video_id:
|
||||||
|
raise ExtractorError('Unable to extract video id')
|
||||||
query['productionnumber'] = video_id
|
query['productionnumber'] = video_id
|
||||||
|
|
||||||
data = self._download_json(
|
data = self._download_json(
|
||||||
|
@ -269,10 +288,11 @@ class DRTVIE(InfoExtractor):
|
||||||
f['vcodec'] = 'none'
|
f['vcodec'] = 'none'
|
||||||
formats.extend(f4m_formats)
|
formats.extend(f4m_formats)
|
||||||
elif target == 'HLS':
|
elif target == 'HLS':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||||
uri, video_id, 'mp4', entry_protocol='m3u8_native',
|
uri, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
quality=preference, m3u8_id=format_id,
|
quality=preference, m3u8_id=format_id, fatal=False)
|
||||||
fatal=False))
|
formats.extend(fmts)
|
||||||
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
else:
|
else:
|
||||||
bitrate = link.get('Bitrate')
|
bitrate = link.get('Bitrate')
|
||||||
if bitrate:
|
if bitrate:
|
||||||
|
|
|
@ -14,6 +14,7 @@ from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
UnsupportedError,
|
UnsupportedError,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
determine_protocol,
|
||||||
dict_get,
|
dict_get,
|
||||||
extract_basic_auth,
|
extract_basic_auth,
|
||||||
format_field,
|
format_field,
|
||||||
|
@ -32,6 +33,7 @@ from ..utils import (
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
|
update_url_query,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urljoin,
|
urljoin,
|
||||||
variadic,
|
variadic,
|
||||||
|
@ -866,7 +868,7 @@ class GenericIE(InfoExtractor):
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
# Video.js embed, multiple formats
|
# Youtube embed, formerly: Video.js embed, multiple formats
|
||||||
'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html',
|
'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'yygqldloqIk',
|
'id': 'yygqldloqIk',
|
||||||
|
@ -893,6 +895,7 @@ class GenericIE(InfoExtractor):
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'skip': '404 Not Found',
|
||||||
},
|
},
|
||||||
# rtl.nl embed
|
# rtl.nl embed
|
||||||
{
|
{
|
||||||
|
@ -2168,6 +2171,33 @@ class GenericIE(InfoExtractor):
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'note': 'Live HLS direct link',
|
||||||
|
'url': 'https://d18j67ugtrocuq.cloudfront.net/out/v1/2767aec339144787926bd0322f72c6e9/index.m3u8',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'index',
|
||||||
|
'title': r're:index',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'live_status': 'is_live',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'm3u8',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'note': 'Video.js VOD HLS',
|
||||||
|
'url': 'https://gist.githubusercontent.com/bashonly/2aae0862c50f4a4b84f220c315767208/raw/e3380d413749dabbe804c9c2d8fd9a45142475c7/videojs_hls_test.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'videojs_hls_test',
|
||||||
|
'title': 'video',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'age_limit': 0,
|
||||||
|
'duration': 1800,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'm3u8',
|
||||||
|
},
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def report_following_redirect(self, new_url):
|
def report_following_redirect(self, new_url):
|
||||||
|
@ -2184,12 +2214,41 @@ class GenericIE(InfoExtractor):
|
||||||
|
|
||||||
self._downloader.write_debug(f'Identified {num} {name}{format_field(note, None, "; %s")}')
|
self._downloader.write_debug(f'Identified {num} {name}{format_field(note, None, "; %s")}')
|
||||||
|
|
||||||
def _fragment_query(self, url):
|
def _extra_manifest_info(self, info, manifest_url):
|
||||||
if self._configuration_arg('fragment_query'):
|
fragment_query = self._configuration_arg('fragment_query', [None], casesense=True)[0]
|
||||||
query_string = urllib.parse.urlparse(url).query
|
if fragment_query is not None:
|
||||||
if query_string:
|
info['extra_param_to_segment_url'] = (
|
||||||
return {'extra_param_to_segment_url': query_string}
|
urllib.parse.urlparse(fragment_query).query or fragment_query
|
||||||
return {}
|
or urllib.parse.urlparse(manifest_url).query or None)
|
||||||
|
|
||||||
|
hex_or_none = lambda x: x if re.fullmatch(r'(0x)?[\da-f]+', x, re.IGNORECASE) else None
|
||||||
|
info['hls_aes'] = traverse_obj(self._configuration_arg('hls_key', casesense=True), {
|
||||||
|
'uri': (0, {url_or_none}), 'key': (0, {hex_or_none}), 'iv': (1, {hex_or_none}),
|
||||||
|
}) or None
|
||||||
|
|
||||||
|
variant_query = self._configuration_arg('variant_query', [None], casesense=True)[0]
|
||||||
|
if variant_query is not None:
|
||||||
|
query = urllib.parse.parse_qs(
|
||||||
|
urllib.parse.urlparse(variant_query).query or variant_query
|
||||||
|
or urllib.parse.urlparse(manifest_url).query)
|
||||||
|
for fmt in self._downloader._get_formats(info):
|
||||||
|
fmt['url'] = update_url_query(fmt['url'], query)
|
||||||
|
|
||||||
|
# Attempt to detect live HLS or set VOD duration
|
||||||
|
m3u8_format = next((f for f in self._downloader._get_formats(info)
|
||||||
|
if determine_protocol(f) == 'm3u8_native'), None)
|
||||||
|
if m3u8_format:
|
||||||
|
is_live = self._configuration_arg('is_live', [None])[0]
|
||||||
|
if is_live is not None:
|
||||||
|
info['live_status'] = 'not_live' if is_live == 'false' else 'is_live'
|
||||||
|
return
|
||||||
|
headers = m3u8_format.get('http_headers') or info.get('http_headers')
|
||||||
|
duration = self._extract_m3u8_vod_duration(
|
||||||
|
m3u8_format['url'], info.get('id'), note='Checking m3u8 live status',
|
||||||
|
errnote='Failed to download m3u8 media playlist', headers=headers)
|
||||||
|
if not duration:
|
||||||
|
info['live_status'] = 'is_live'
|
||||||
|
info['duration'] = info.get('duration') or duration
|
||||||
|
|
||||||
def _extract_rss(self, url, video_id, doc):
|
def _extract_rss(self, url, video_id, doc):
|
||||||
NS_MAP = {
|
NS_MAP = {
|
||||||
|
@ -2397,10 +2456,8 @@ class GenericIE(InfoExtractor):
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
if format_id.endswith('mpegurl') or ext == 'm3u8':
|
if format_id.endswith('mpegurl') or ext == 'm3u8':
|
||||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4', headers=headers)
|
formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4', headers=headers)
|
||||||
info_dict.update(self._fragment_query(url))
|
|
||||||
elif format_id.endswith('mpd') or format_id.endswith('dash+xml') or ext == 'mpd':
|
elif format_id.endswith('mpd') or format_id.endswith('dash+xml') or ext == 'mpd':
|
||||||
formats, subtitles = self._extract_mpd_formats_and_subtitles(url, video_id, headers=headers)
|
formats, subtitles = self._extract_mpd_formats_and_subtitles(url, video_id, headers=headers)
|
||||||
info_dict.update(self._fragment_query(url))
|
|
||||||
elif format_id == 'f4m' or ext == 'f4m':
|
elif format_id == 'f4m' or ext == 'f4m':
|
||||||
formats = self._extract_f4m_formats(url, video_id, headers=headers)
|
formats = self._extract_f4m_formats(url, video_id, headers=headers)
|
||||||
else:
|
else:
|
||||||
|
@ -2415,6 +2472,7 @@ class GenericIE(InfoExtractor):
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'http_headers': headers or None,
|
'http_headers': headers or None,
|
||||||
})
|
})
|
||||||
|
self._extra_manifest_info(info_dict, url)
|
||||||
return info_dict
|
return info_dict
|
||||||
|
|
||||||
if not self.get_param('test', False) and not is_intentional:
|
if not self.get_param('test', False) and not is_intentional:
|
||||||
|
@ -2427,7 +2485,7 @@ class GenericIE(InfoExtractor):
|
||||||
if first_bytes.startswith(b'#EXTM3U'):
|
if first_bytes.startswith(b'#EXTM3U'):
|
||||||
self.report_detected('M3U playlist')
|
self.report_detected('M3U playlist')
|
||||||
info_dict['formats'], info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
|
info_dict['formats'], info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4')
|
||||||
info_dict.update(self._fragment_query(url))
|
self._extra_manifest_info(info_dict, url)
|
||||||
return info_dict
|
return info_dict
|
||||||
|
|
||||||
# Maybe it's a direct link to a video?
|
# Maybe it's a direct link to a video?
|
||||||
|
@ -2478,7 +2536,7 @@ class GenericIE(InfoExtractor):
|
||||||
doc,
|
doc,
|
||||||
mpd_base_url=full_response.geturl().rpartition('/')[0],
|
mpd_base_url=full_response.geturl().rpartition('/')[0],
|
||||||
mpd_url=url)
|
mpd_url=url)
|
||||||
info_dict.update(self._fragment_query(url))
|
self._extra_manifest_info(info_dict, url)
|
||||||
self.report_detected('DASH manifest')
|
self.report_detected('DASH manifest')
|
||||||
return info_dict
|
return info_dict
|
||||||
elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
|
elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
|
||||||
|
@ -2567,8 +2625,7 @@ class GenericIE(InfoExtractor):
|
||||||
varname = mobj.group(1)
|
varname = mobj.group(1)
|
||||||
sources = variadic(self._parse_json(
|
sources = variadic(self._parse_json(
|
||||||
mobj.group(2), video_id, transform_source=js_to_json, fatal=False) or [])
|
mobj.group(2), video_id, transform_source=js_to_json, fatal=False) or [])
|
||||||
formats = []
|
formats, subtitles, src = [], {}, None
|
||||||
subtitles = {}
|
|
||||||
for source in sources:
|
for source in sources:
|
||||||
src = source.get('src')
|
src = source.get('src')
|
||||||
if not src or not isinstance(src, str):
|
if not src or not isinstance(src, str):
|
||||||
|
@ -2591,8 +2648,6 @@ class GenericIE(InfoExtractor):
|
||||||
m3u8_id='hls', fatal=False)
|
m3u8_id='hls', fatal=False)
|
||||||
formats.extend(fmts)
|
formats.extend(fmts)
|
||||||
self._merge_subtitles(subs, target=subtitles)
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
for fmt in formats:
|
|
||||||
fmt.update(self._fragment_query(src))
|
|
||||||
|
|
||||||
if not formats:
|
if not formats:
|
||||||
formats.append({
|
formats.append({
|
||||||
|
@ -2608,11 +2663,11 @@ class GenericIE(InfoExtractor):
|
||||||
for sub_match in re.finditer(rf'(?s){re.escape(varname)}' r'\.addRemoteTextTrack\(({.+?})\s*,\s*(?:true|false)\)', webpage):
|
for sub_match in re.finditer(rf'(?s){re.escape(varname)}' r'\.addRemoteTextTrack\(({.+?})\s*,\s*(?:true|false)\)', webpage):
|
||||||
sub = self._parse_json(
|
sub = self._parse_json(
|
||||||
sub_match.group(1), video_id, transform_source=js_to_json, fatal=False) or {}
|
sub_match.group(1), video_id, transform_source=js_to_json, fatal=False) or {}
|
||||||
src = str_or_none(sub.get('src'))
|
sub_src = str_or_none(sub.get('src'))
|
||||||
if not src:
|
if not sub_src:
|
||||||
continue
|
continue
|
||||||
subtitles.setdefault(dict_get(sub, ('language', 'srclang')) or 'und', []).append({
|
subtitles.setdefault(dict_get(sub, ('language', 'srclang')) or 'und', []).append({
|
||||||
'url': urllib.parse.urljoin(url, src),
|
'url': urllib.parse.urljoin(url, sub_src),
|
||||||
'name': sub.get('label'),
|
'name': sub.get('label'),
|
||||||
'http_headers': {
|
'http_headers': {
|
||||||
'Referer': actual_url,
|
'Referer': actual_url,
|
||||||
|
@ -2620,7 +2675,10 @@ class GenericIE(InfoExtractor):
|
||||||
})
|
})
|
||||||
if formats or subtitles:
|
if formats or subtitles:
|
||||||
self.report_detected('video.js embed')
|
self.report_detected('video.js embed')
|
||||||
return [{'formats': formats, 'subtitles': subtitles}]
|
info_dict = {'formats': formats, 'subtitles': subtitles}
|
||||||
|
if formats:
|
||||||
|
self._extra_manifest_info(info_dict, src)
|
||||||
|
return [info_dict]
|
||||||
|
|
||||||
# Look for generic KVS player (before json-ld bc of some urls that break otherwise)
|
# Look for generic KVS player (before json-ld bc of some urls that break otherwise)
|
||||||
found = self._search_regex((
|
found = self._search_regex((
|
||||||
|
@ -2795,10 +2853,10 @@ class GenericIE(InfoExtractor):
|
||||||
return [self._extract_xspf_playlist(video_url, video_id)]
|
return [self._extract_xspf_playlist(video_url, video_id)]
|
||||||
elif ext == 'm3u8':
|
elif ext == 'm3u8':
|
||||||
entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(video_url, video_id, ext='mp4', headers=headers)
|
entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_m3u8_formats_and_subtitles(video_url, video_id, ext='mp4', headers=headers)
|
||||||
entry_info_dict.update(self._fragment_query(video_url))
|
self._extra_manifest_info(entry_info_dict, video_url)
|
||||||
elif ext == 'mpd':
|
elif ext == 'mpd':
|
||||||
entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_mpd_formats_and_subtitles(video_url, video_id, headers=headers)
|
entry_info_dict['formats'], entry_info_dict['subtitles'] = self._extract_mpd_formats_and_subtitles(video_url, video_id, headers=headers)
|
||||||
entry_info_dict.update(self._fragment_query(video_url))
|
self._extra_manifest_info(entry_info_dict, video_url)
|
||||||
elif ext == 'f4m':
|
elif ext == 'f4m':
|
||||||
entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id, headers=headers)
|
entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id, headers=headers)
|
||||||
elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
|
elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
|
||||||
|
|
|
@ -10,7 +10,7 @@ from ..utils import (
|
||||||
|
|
||||||
|
|
||||||
class GeniusIE(InfoExtractor):
|
class GeniusIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?genius\.com/videos/(?P<id>[^?/#]+)'
|
_VALID_URL = r'https?://(?:www\.)?genius\.com/(?:videos|(?P<article>a))/(?P<id>[^?/#]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://genius.com/videos/Vince-staples-breaks-down-the-meaning-of-when-sparks-fly',
|
'url': 'https://genius.com/videos/Vince-staples-breaks-down-the-meaning-of-when-sparks-fly',
|
||||||
'md5': '64c2ad98cfafcfda23bfa0ad0c512f4c',
|
'md5': '64c2ad98cfafcfda23bfa0ad0c512f4c',
|
||||||
|
@ -41,19 +41,37 @@ class GeniusIE(InfoExtractor):
|
||||||
'timestamp': 1631209167,
|
'timestamp': 1631209167,
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://genius.com/a/cordae-anderson-paak-break-down-the-meaning-of-two-tens',
|
||||||
|
'md5': 'f98a4e03b16b0a2821bd6e52fb3cc9d7',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6321509903112',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Cordae & Anderson .Paak Breaks Down The Meaning Of “Two Tens”',
|
||||||
|
'description': 'md5:1255f0e1161d07342ce56a8464ac339d',
|
||||||
|
'tags': ['song id: 5457554'],
|
||||||
|
'uploader_id': '4863540648001',
|
||||||
|
'duration': 361.813,
|
||||||
|
'upload_date': '20230301',
|
||||||
|
'timestamp': 1677703908,
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id, is_article = self._match_valid_url(url).group('id', 'article')
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
metadata = self._search_json(
|
metadata = self._search_json(
|
||||||
r'<meta content="', webpage, 'metadata', display_id, transform_source=unescapeHTML)
|
r'<meta content="', webpage, 'metadata', display_id,
|
||||||
video_id = traverse_obj(
|
end_pattern=r'"\s+itemprop="page_data"', transform_source=unescapeHTML)
|
||||||
metadata, ('video', 'provider_id'),
|
video_id = traverse_obj(metadata, (
|
||||||
('dfp_kv', lambda _, x: x['name'] == 'brightcove_video_id', 'values', 0), get_all=False)
|
(('article', 'media', ...), ('video', None)),
|
||||||
|
('provider_id', ('dfp_kv', lambda _, v: v['name'] == 'brightcove_video_id', 'values', ...))),
|
||||||
|
get_all=False)
|
||||||
if not video_id:
|
if not video_id:
|
||||||
raise ExtractorError('Brightcove video id not found in webpage')
|
# Not all article pages have videos, expect the error
|
||||||
|
raise ExtractorError('Brightcove video ID not found in webpage', expected=bool(is_article))
|
||||||
|
|
||||||
config = self._search_json(r'var\s*APP_CONFIG\s*=', webpage, 'config', video_id, default={})
|
config = self._search_json(r'var\s*APP_CONFIG\s*=', webpage, 'config', video_id, default={})
|
||||||
account_id = config.get('brightcove_account_id', '4863540648001')
|
account_id = config.get('brightcove_account_id', '4863540648001')
|
||||||
|
@ -68,7 +86,7 @@ class GeniusIE(InfoExtractor):
|
||||||
|
|
||||||
|
|
||||||
class GeniusLyricsIE(InfoExtractor):
|
class GeniusLyricsIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?genius\.com/(?P<id>[^?/#]+)-lyrics[?/#]?'
|
_VALID_URL = r'https?://(?:www\.)?genius\.com/(?P<id>[^?/#]+)-lyrics(?:[?/#]|$)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://genius.com/Lil-baby-heyy-lyrics',
|
'url': 'https://genius.com/Lil-baby-heyy-lyrics',
|
||||||
'playlist_mincount': 2,
|
'playlist_mincount': 2,
|
||||||
|
|
254
yt_dlp/extractor/globalplayer.py
Executable file
254
yt_dlp/extractor/globalplayer.py
Executable file
|
@ -0,0 +1,254 @@
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
join_nonempty,
|
||||||
|
parse_duration,
|
||||||
|
str_or_none,
|
||||||
|
traverse_obj,
|
||||||
|
unified_strdate,
|
||||||
|
unified_timestamp,
|
||||||
|
urlhandle_detect_ext,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class GlobalPlayerBaseIE(InfoExtractor):
|
||||||
|
def _get_page_props(self, url, video_id):
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
return self._search_nextjs_data(webpage, video_id)['props']['pageProps']
|
||||||
|
|
||||||
|
def _request_ext(self, url, video_id):
|
||||||
|
return urlhandle_detect_ext(self._request_webpage( # Server rejects HEAD requests
|
||||||
|
url, video_id, note='Determining source extension'))
|
||||||
|
|
||||||
|
def _extract_audio(self, episode, series):
|
||||||
|
return {
|
||||||
|
'vcodec': 'none',
|
||||||
|
**traverse_obj(series, {
|
||||||
|
'series': 'title',
|
||||||
|
'series_id': 'id',
|
||||||
|
'thumbnail': 'imageUrl',
|
||||||
|
'uploader': 'itunesAuthor', # podcasts only
|
||||||
|
}),
|
||||||
|
**traverse_obj(episode, {
|
||||||
|
'id': 'id',
|
||||||
|
'description': ('description', {clean_html}),
|
||||||
|
'duration': ('duration', {parse_duration}),
|
||||||
|
'thumbnail': 'imageUrl',
|
||||||
|
'url': 'streamUrl',
|
||||||
|
'timestamp': (('pubDate', 'startDate'), {unified_timestamp}),
|
||||||
|
'title': 'title',
|
||||||
|
}, get_all=False)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class GlobalPlayerLiveIE(GlobalPlayerBaseIE):
|
||||||
|
_VALID_URL = r'https?://www\.globalplayer\.com/live/(?P<id>\w+)/\w+'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.globalplayer.com/live/smoothchill/uk/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2mx1E',
|
||||||
|
'ext': 'aac',
|
||||||
|
'display_id': 'smoothchill-uk',
|
||||||
|
'title': 're:^Smooth Chill.+$',
|
||||||
|
'thumbnail': 'https://herald.musicradio.com/media/f296ade8-50c9-4f60-911f-924e96873620.png',
|
||||||
|
'description': 'Music To Chill To',
|
||||||
|
'live_status': 'is_live',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# national station
|
||||||
|
'url': 'https://www.globalplayer.com/live/heart/uk/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2mwx4',
|
||||||
|
'ext': 'aac',
|
||||||
|
'description': 'turn up the feel good!',
|
||||||
|
'thumbnail': 'https://herald.musicradio.com/media/49b9e8cb-15bf-4bf2-8c28-a4850cc6b0f3.png',
|
||||||
|
'live_status': 'is_live',
|
||||||
|
'title': 're:^Heart UK.+$',
|
||||||
|
'display_id': 'heart-uk',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# regional variation
|
||||||
|
'url': 'https://www.globalplayer.com/live/heart/london/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'AMqg',
|
||||||
|
'ext': 'aac',
|
||||||
|
'thumbnail': 'https://herald.musicradio.com/media/49b9e8cb-15bf-4bf2-8c28-a4850cc6b0f3.png',
|
||||||
|
'title': 're:^Heart London.+$',
|
||||||
|
'live_status': 'is_live',
|
||||||
|
'display_id': 'heart-london',
|
||||||
|
'description': 'turn up the feel good!',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
station = self._get_page_props(url, video_id)['station']
|
||||||
|
stream_url = station['streamUrl']
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': station['id'],
|
||||||
|
'display_id': join_nonempty('brandSlug', 'slug', from_dict=station) or station.get('legacyStationPrefix'),
|
||||||
|
'url': stream_url,
|
||||||
|
'ext': self._request_ext(stream_url, video_id),
|
||||||
|
'vcodec': 'none',
|
||||||
|
'is_live': True,
|
||||||
|
**traverse_obj(station, {
|
||||||
|
'title': (('name', 'brandName'), {str_or_none}),
|
||||||
|
'description': 'tagline',
|
||||||
|
'thumbnail': 'brandLogo',
|
||||||
|
}, get_all=False),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class GlobalPlayerLivePlaylistIE(GlobalPlayerBaseIE):
|
||||||
|
_VALID_URL = r'https?://www\.globalplayer\.com/playlists/(?P<id>\w+)'
|
||||||
|
_TESTS = [{
|
||||||
|
# "live playlist"
|
||||||
|
'url': 'https://www.globalplayer.com/playlists/8bLk/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '8bLk',
|
||||||
|
'ext': 'aac',
|
||||||
|
'live_status': 'is_live',
|
||||||
|
'description': 'md5:e10f5e10b01a7f2c14ba815509fbb38d',
|
||||||
|
'thumbnail': 'https://images.globalplayer.com/images/551379?width=450&signature=oMLPZIoi5_dBSHnTMREW0Xg76mA=',
|
||||||
|
'title': 're:^Classic FM Hall of Fame.+$'
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
station = self._get_page_props(url, video_id)['playlistData']
|
||||||
|
stream_url = station['streamUrl']
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'url': stream_url,
|
||||||
|
'ext': self._request_ext(stream_url, video_id),
|
||||||
|
'vcodec': 'none',
|
||||||
|
'is_live': True,
|
||||||
|
**traverse_obj(station, {
|
||||||
|
'title': 'title',
|
||||||
|
'description': 'description',
|
||||||
|
'thumbnail': 'image',
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class GlobalPlayerAudioIE(GlobalPlayerBaseIE):
|
||||||
|
_VALID_URL = r'https?://www\.globalplayer\.com/(?:(?P<podcast>podcasts)/|catchup/\w+/\w+/)(?P<id>\w+)/?(?:$|[?#])'
|
||||||
|
_TESTS = [{
|
||||||
|
# podcast
|
||||||
|
'url': 'https://www.globalplayer.com/podcasts/42KuaM/',
|
||||||
|
'playlist_mincount': 5,
|
||||||
|
'info_dict': {
|
||||||
|
'id': '42KuaM',
|
||||||
|
'title': 'Filthy Ritual',
|
||||||
|
'thumbnail': 'md5:60286e7d12d795bd1bbc9efc6cee643e',
|
||||||
|
'categories': ['Society & Culture', 'True Crime'],
|
||||||
|
'uploader': 'Global',
|
||||||
|
'description': 'md5:da5b918eac9ae319454a10a563afacf9',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# radio catchup
|
||||||
|
'url': 'https://www.globalplayer.com/catchup/lbc/uk/46vyD7z/',
|
||||||
|
'playlist_mincount': 3,
|
||||||
|
'info_dict': {
|
||||||
|
'id': '46vyD7z',
|
||||||
|
'description': 'Nick Ferrari At Breakfast is Leading Britain\'s Conversation.',
|
||||||
|
'title': 'Nick Ferrari',
|
||||||
|
'thumbnail': 'md5:4df24d8a226f5b2508efbcc6ae874ebf',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id, podcast = self._match_valid_url(url).group('id', 'podcast')
|
||||||
|
props = self._get_page_props(url, video_id)
|
||||||
|
series = props['podcastInfo'] if podcast else props['catchupInfo']
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'id': video_id,
|
||||||
|
'entries': [self._extract_audio(ep, series) for ep in traverse_obj(
|
||||||
|
series, ('episodes', lambda _, v: v['id'] and v['streamUrl']))],
|
||||||
|
'categories': traverse_obj(series, ('categories', ..., 'name')) or None,
|
||||||
|
**traverse_obj(series, {
|
||||||
|
'description': 'description',
|
||||||
|
'thumbnail': 'imageUrl',
|
||||||
|
'title': 'title',
|
||||||
|
'uploader': 'itunesAuthor', # podcasts only
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class GlobalPlayerAudioEpisodeIE(GlobalPlayerBaseIE):
|
||||||
|
_VALID_URL = r'https?://www\.globalplayer\.com/(?:(?P<podcast>podcasts)|catchup/\w+/\w+)/episodes/(?P<id>\w+)/?(?:$|[?#])'
|
||||||
|
_TESTS = [{
|
||||||
|
# podcast
|
||||||
|
'url': 'https://www.globalplayer.com/podcasts/episodes/7DrfNnE/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '7DrfNnE',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Filthy Ritual - Trailer',
|
||||||
|
'description': 'md5:1f1562fd0f01b4773b590984f94223e0',
|
||||||
|
'thumbnail': 'md5:60286e7d12d795bd1bbc9efc6cee643e',
|
||||||
|
'duration': 225.0,
|
||||||
|
'timestamp': 1681254900,
|
||||||
|
'series': 'Filthy Ritual',
|
||||||
|
'series_id': '42KuaM',
|
||||||
|
'upload_date': '20230411',
|
||||||
|
'uploader': 'Global',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# radio catchup
|
||||||
|
'url': 'https://www.globalplayer.com/catchup/lbc/uk/episodes/2zGq26Vcv1fCWhddC4JAwETXWe/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2zGq26Vcv1fCWhddC4JAwETXWe',
|
||||||
|
'ext': 'm4a',
|
||||||
|
'timestamp': 1682056800,
|
||||||
|
'series': 'Nick Ferrari',
|
||||||
|
'thumbnail': 'md5:4df24d8a226f5b2508efbcc6ae874ebf',
|
||||||
|
'upload_date': '20230421',
|
||||||
|
'series_id': '46vyD7z',
|
||||||
|
'description': 'Nick Ferrari At Breakfast is Leading Britain\'s Conversation.',
|
||||||
|
'title': 'Nick Ferrari',
|
||||||
|
'duration': 10800.0,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id, podcast = self._match_valid_url(url).group('id', 'podcast')
|
||||||
|
props = self._get_page_props(url, video_id)
|
||||||
|
episode = props['podcastEpisode'] if podcast else props['catchupEpisode']
|
||||||
|
|
||||||
|
return self._extract_audio(
|
||||||
|
episode, traverse_obj(episode, 'podcast', 'show', expected_type=dict) or {})
|
||||||
|
|
||||||
|
|
||||||
|
class GlobalPlayerVideoIE(GlobalPlayerBaseIE):
|
||||||
|
_VALID_URL = r'https?://www\.globalplayer\.com/videos/(?P<id>\w+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.globalplayer.com/videos/2JsSZ7Gm2uP/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2JsSZ7Gm2uP',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': 'md5:6a9f063c67c42f218e42eee7d0298bfd',
|
||||||
|
'thumbnail': 'md5:d4498af48e15aae4839ce77b97d39550',
|
||||||
|
'upload_date': '20230420',
|
||||||
|
'title': 'Treble Malakai Bayoh sings a sublime Handel aria at Classic FM Live',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
meta = self._get_page_props(url, video_id)['videoData']
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
**traverse_obj(meta, {
|
||||||
|
'url': 'url',
|
||||||
|
'thumbnail': ('image', 'url'),
|
||||||
|
'title': 'title',
|
||||||
|
'upload_date': ('publish_date', {unified_strdate}),
|
||||||
|
'description': 'description',
|
||||||
|
}),
|
||||||
|
}
|
83
yt_dlp/extractor/gmanetwork.py
Normal file
83
yt_dlp/extractor/gmanetwork.py
Normal file
|
@ -0,0 +1,83 @@
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from .dailymotion import DailymotionIE
|
||||||
|
from .youtube import YoutubeIE
|
||||||
|
|
||||||
|
|
||||||
|
class GMANetworkVideoIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www)\.gmanetwork\.com/(?:\w+/){3}(?P<id>\d+)/(?P<display_id>[\w-]+)/video'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.gmanetwork.com/fullepisodes/home/running_man_philippines/168677/running-man-philippines-catch-the-thief-full-chapter-2/video?section=home',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '28BqW0AXPe0',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20220919',
|
||||||
|
'uploader_url': 'http://www.youtube.com/channel/UChsoPNR5x-wdSO2GrOSIWqQ',
|
||||||
|
'like_count': int,
|
||||||
|
'view_count': int,
|
||||||
|
'uploader': 'YoüLOL',
|
||||||
|
'channel_id': 'UChsoPNR5x-wdSO2GrOSIWqQ',
|
||||||
|
'duration': 5313,
|
||||||
|
'comment_count': int,
|
||||||
|
'tags': 'count:22',
|
||||||
|
'uploader_id': 'UChsoPNR5x-wdSO2GrOSIWqQ',
|
||||||
|
'title': 'Running Man Philippines: Catch the Thief (FULL CHAPTER 2)',
|
||||||
|
'channel_url': 'https://www.youtube.com/channel/UChsoPNR5x-wdSO2GrOSIWqQ',
|
||||||
|
'thumbnail': 'https://i.ytimg.com/vi/28BqW0AXPe0/maxresdefault.jpg',
|
||||||
|
'release_timestamp': 1663594212,
|
||||||
|
'age_limit': 0,
|
||||||
|
'channel_follower_count': int,
|
||||||
|
'categories': ['Entertainment'],
|
||||||
|
'description': 'md5:811bdcea74f9c48051824e494756e926',
|
||||||
|
'live_status': 'not_live',
|
||||||
|
'playable_in_embed': True,
|
||||||
|
'channel': 'YoüLOL',
|
||||||
|
'availability': 'public',
|
||||||
|
'release_date': '20220919',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.gmanetwork.com/fullepisodes/home/more_than_words/87059/more-than-words-full-episode-80/video?section=home',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'yiDOExw2aSA',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'live_status': 'not_live',
|
||||||
|
'channel': 'GMANetwork',
|
||||||
|
'like_count': int,
|
||||||
|
'channel_follower_count': int,
|
||||||
|
'description': 'md5:6d00cd658394fa1a5071200d3ed4be05',
|
||||||
|
'duration': 1419,
|
||||||
|
'age_limit': 0,
|
||||||
|
'comment_count': int,
|
||||||
|
'upload_date': '20181003',
|
||||||
|
'thumbnail': 'https://i.ytimg.com/vi_webp/yiDOExw2aSA/maxresdefault.webp',
|
||||||
|
'availability': 'public',
|
||||||
|
'playable_in_embed': True,
|
||||||
|
'channel_id': 'UCKL5hAuzgFQsyrsQKgU0Qng',
|
||||||
|
'title': 'More Than Words: Full Episode 80 (Finale)',
|
||||||
|
'uploader_id': 'GMANETWORK',
|
||||||
|
'categories': ['Entertainment'],
|
||||||
|
'uploader': 'GMANetwork',
|
||||||
|
'channel_url': 'https://www.youtube.com/channel/UCKL5hAuzgFQsyrsQKgU0Qng',
|
||||||
|
'tags': 'count:29',
|
||||||
|
'view_count': int,
|
||||||
|
'uploader_url': 'http://www.youtube.com/user/GMANETWORK',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
content_id, display_id = self._match_valid_url(url).group('id', 'display_id')
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
# webpage route
|
||||||
|
youtube_id = self._search_regex(
|
||||||
|
r'var\s*YOUTUBE_VIDEO\s*=\s*[\'"]+(?P<yt_id>[\w-]+)', webpage, 'youtube_id', fatal=False)
|
||||||
|
if youtube_id:
|
||||||
|
return self.url_result(youtube_id, YoutubeIE, youtube_id)
|
||||||
|
|
||||||
|
# api call route
|
||||||
|
# more info at https://aphrodite.gmanetwork.com/fullepisodes/assets/fullepisodes/js/dist/fullepisodes_video.js?v=1.1.11
|
||||||
|
network_url = self._search_regex(
|
||||||
|
r'NETWORK_URL\s*=\s*[\'"](?P<url>[^\'"]+)', webpage, 'network_url')
|
||||||
|
json_data = self._download_json(f'{network_url}api/data/content/video/{content_id}', display_id)
|
||||||
|
if json_data.get('video_file'):
|
||||||
|
return self.url_result(json_data['video_file'], YoutubeIE, json_data['video_file'])
|
||||||
|
else:
|
||||||
|
return self.url_result(json_data['dailymotion_file'], DailymotionIE, json_data['dailymotion_file'])
|
|
@ -3,6 +3,7 @@ import functools
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
OnDemandPagedList,
|
OnDemandPagedList,
|
||||||
|
float_or_none,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
)
|
)
|
||||||
|
@ -19,7 +20,9 @@ class GronkhIE(InfoExtractor):
|
||||||
'title': 'H.O.R.D.E. - DAS ZWEiTE ZEiTALTER 🎲 Session 1',
|
'title': 'H.O.R.D.E. - DAS ZWEiTE ZEiTALTER 🎲 Session 1',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'thumbnail': 'https://01.cdn.vod.farm/preview/9e2555d3a23bf4e5c5b7c6b3b70a9d84.jpg',
|
'thumbnail': 'https://01.cdn.vod.farm/preview/9e2555d3a23bf4e5c5b7c6b3b70a9d84.jpg',
|
||||||
'upload_date': '20221111'
|
'upload_date': '20221111',
|
||||||
|
'chapters': 'count:3',
|
||||||
|
'duration': 31463,
|
||||||
},
|
},
|
||||||
'params': {'skip_download': True}
|
'params': {'skip_download': True}
|
||||||
}, {
|
}, {
|
||||||
|
@ -30,7 +33,8 @@ class GronkhIE(InfoExtractor):
|
||||||
'title': 'GTV0536, 2021-10-01 - MARTHA IS DEAD #FREiAB1830 !FF7 !horde !archiv',
|
'title': 'GTV0536, 2021-10-01 - MARTHA IS DEAD #FREiAB1830 !FF7 !horde !archiv',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'thumbnail': 'https://01.cdn.vod.farm/preview/6436746cce14e25f751260a692872b9b.jpg',
|
'thumbnail': 'https://01.cdn.vod.farm/preview/6436746cce14e25f751260a692872b9b.jpg',
|
||||||
'upload_date': '20211001'
|
'upload_date': '20211001',
|
||||||
|
'duration': 32058,
|
||||||
},
|
},
|
||||||
'params': {'skip_download': True}
|
'params': {'skip_download': True}
|
||||||
}, {
|
}, {
|
||||||
|
@ -56,6 +60,12 @@ class GronkhIE(InfoExtractor):
|
||||||
'upload_date': unified_strdate(data_json.get('created_at')),
|
'upload_date': unified_strdate(data_json.get('created_at')),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
|
'duration': float_or_none(data_json.get('source_length')),
|
||||||
|
'chapters': traverse_obj(data_json, (
|
||||||
|
'chapters', lambda _, v: float_or_none(v['offset']) is not None, {
|
||||||
|
'title': 'title',
|
||||||
|
'start_time': ('offset', {float_or_none}),
|
||||||
|
})) or None,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,37 +0,0 @@
|
||||||
from .common import InfoExtractor
|
|
||||||
|
|
||||||
|
|
||||||
class HentaiStigmaIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'^https?://hentai\.animestigma\.com/(?P<id>[^/]+)'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://hentai.animestigma.com/inyouchuu-etsu-bonus/',
|
|
||||||
'md5': '4e3d07422a68a4cc363d8f57c8bf0d23',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'inyouchuu-etsu-bonus',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Inyouchuu Etsu Bonus',
|
|
||||||
'age_limit': 18,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
title = self._html_search_regex(
|
|
||||||
r'<h2[^>]+class="posttitle"[^>]*><a[^>]*>([^<]+)</a>',
|
|
||||||
webpage, 'title')
|
|
||||||
wrap_url = self._html_search_regex(
|
|
||||||
r'<iframe[^>]+src="([^"]+mp4)"', webpage, 'wrapper url')
|
|
||||||
wrap_webpage = self._download_webpage(wrap_url, video_id)
|
|
||||||
|
|
||||||
video_url = self._html_search_regex(
|
|
||||||
r'file\s*:\s*"([^"]+)"', wrap_webpage, 'video url')
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'url': video_url,
|
|
||||||
'title': title,
|
|
||||||
'age_limit': 18,
|
|
||||||
}
|
|
72
yt_dlp/extractor/hollywoodreporter.py
Normal file
72
yt_dlp/extractor/hollywoodreporter.py
Normal file
|
@ -0,0 +1,72 @@
|
||||||
|
import functools
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from .jwplatform import JWPlatformIE
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
OnDemandPagedList,
|
||||||
|
extract_attributes,
|
||||||
|
get_element_by_class,
|
||||||
|
get_element_html_by_class,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class HollywoodReporterIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?hollywoodreporter\.com/video/(?P<id>[\w-]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.hollywoodreporter.com/video/chris-pine-michelle-rodriguez-dungeons-dragons-cast-directors-on-what-it-took-to-make-film-sxsw-2023/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'zH4jZaR5',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'md5:a9a1c073770a32f178955997712c4bd9',
|
||||||
|
'description': 'The cast and directors of \'Dungeons & Dragons: Honor Among Thieves\' talk about their new film.',
|
||||||
|
'thumbnail': 'https://cdn.jwplayer.com/v2/media/zH4jZaR5/poster.jpg?width=720',
|
||||||
|
'upload_date': '20230312',
|
||||||
|
'timestamp': 1678586423,
|
||||||
|
'duration': 242.0,
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
data = extract_attributes(get_element_html_by_class('vlanding-video-card__link', webpage) or '')
|
||||||
|
video_id = data['data-video-showcase-trigger']
|
||||||
|
showcase_type = data['data-video-showcase-type']
|
||||||
|
|
||||||
|
if showcase_type == 'jwplayer':
|
||||||
|
return self.url_result(f'jwplatform:{video_id}', JWPlatformIE)
|
||||||
|
elif showcase_type == 'youtube':
|
||||||
|
return self.url_result(video_id, 'Youtube')
|
||||||
|
else:
|
||||||
|
raise ExtractorError(f'Unsupported showcase type "{showcase_type}"')
|
||||||
|
|
||||||
|
|
||||||
|
class HollywoodReporterPlaylistIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?hollywoodreporter\.com/vcategory/(?P<slug>[\w-]+)-(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.hollywoodreporter.com/vcategory/heat-vision-breakdown-57822/',
|
||||||
|
'playlist_mincount': 109,
|
||||||
|
'info_dict': {
|
||||||
|
'id': '57822',
|
||||||
|
'title': 'heat-vision-breakdown',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _fetch_page(self, slug, pl_id, page):
|
||||||
|
page += 1
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
f'https://www.hollywoodreporter.com/vcategory/{slug}-{pl_id}/page/{page}/',
|
||||||
|
pl_id, note=f'Downloading playlist page {page}')
|
||||||
|
section = get_element_by_class('video-playlist-river', webpage) or ''
|
||||||
|
|
||||||
|
for url in re.findall(r'<a[^>]+href="([^"]+)"[^>]+class="c-title__link', section):
|
||||||
|
yield self.url_result(url, HollywoodReporterIE)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
slug, pl_id = self._match_valid_url(url).group('slug', 'id')
|
||||||
|
return self.playlist_result(
|
||||||
|
OnDemandPagedList(functools.partial(self._fetch_page, slug, pl_id), 15), pl_id, slug)
|
15
yt_dlp/extractor/hrefli.py
Normal file
15
yt_dlp/extractor/hrefli.py
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class HrefLiRedirectIE(InfoExtractor):
|
||||||
|
IE_NAME = 'href.li'
|
||||||
|
IE_DESC = False # Do not list
|
||||||
|
_VALID_URL = r'https?://href\.li/\?(?P<url>.+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://href.li/?https://www.reddit.com/r/cats/comments/12bluel/my_cat_helps_me_with_water/?utm_source=share&utm_medium=android_app&utm_name=androidcss&utm_term=1&utm_content=share_button',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
return self.url_result(self._match_valid_url(url).group('url'))
|
|
@ -1,239 +1,199 @@
|
||||||
import itertools
|
import functools
|
||||||
import re
|
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
import hashlib
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
OnDemandPagedList,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
remove_end,
|
qualities,
|
||||||
strip_or_none,
|
traverse_obj,
|
||||||
unified_strdate,
|
unified_timestamp,
|
||||||
url_or_none,
|
|
||||||
urljoin,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class IwaraBaseIE(InfoExtractor):
|
class IwaraIE(InfoExtractor):
|
||||||
_BASE_REGEX = r'(?P<base_url>https?://(?:www\.|ecchi\.)?iwara\.tv)'
|
IE_NAME = 'iwara'
|
||||||
|
_VALID_URL = r'https?://(?:www\.|ecchi\.)?iwara\.tv/videos?/(?P<id>[a-zA-Z0-9]+)'
|
||||||
def _extract_playlist(self, base_url, webpage):
|
|
||||||
for path in re.findall(r'class="title">\s*<a[^<]+href="([^"]+)', webpage):
|
|
||||||
yield self.url_result(urljoin(base_url, path))
|
|
||||||
|
|
||||||
|
|
||||||
class IwaraIE(IwaraBaseIE):
|
|
||||||
_VALID_URL = fr'{IwaraBaseIE._BASE_REGEX}/videos/(?P<id>[a-zA-Z0-9]+)'
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://iwara.tv/videos/amVwUl1EHpAD9RD',
|
# this video cannot be played because of migration
|
||||||
# md5 is unstable
|
'only_matching': True,
|
||||||
|
'url': 'https://www.iwara.tv/video/k2ayoueezfkx6gvq',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'amVwUl1EHpAD9RD',
|
'id': 'k2ayoueezfkx6gvq',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '【MMD R-18】ガールフレンド carry_me_off',
|
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
'thumbnail': 'https://i.iwara.tv/sites/default/files/videos/thumbnails/7951/thumbnail-7951_0001.png',
|
'title': 'Defeat of Irybelda - アイリベルダの敗北',
|
||||||
'uploader': 'Reimu丨Action',
|
'description': 'md5:70278abebe706647a8b4cb04cf23e0d3',
|
||||||
'upload_date': '20150828',
|
'uploader': 'Inwerwm',
|
||||||
'description': 'md5:1d4905ce48c66c9299c617f08e106e0f',
|
'uploader_id': 'inwerwm',
|
||||||
|
'tags': 'count:1',
|
||||||
|
'like_count': 6133,
|
||||||
|
'view_count': 1050343,
|
||||||
|
'comment_count': 1,
|
||||||
|
'timestamp': 1677843869,
|
||||||
|
'modified_timestamp': 1679056362,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://ecchi.iwara.tv/videos/Vb4yf2yZspkzkBO',
|
'url': 'https://iwara.tv/video/1ywe1sbkqwumpdxz5/',
|
||||||
'md5': '7e5f1f359cd51a027ba4a7b7710a50f0',
|
'md5': '20691ce1473ec2766c0788e14c60ce66',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '0B1LvuHnL-sRFNXB1WHNqbGw4SXc',
|
'id': '1ywe1sbkqwumpdxz5',
|
||||||
'ext': 'mp4',
|
|
||||||
'title': '[3D Hentai] Kyonyu × Genkai × Emaki Shinobi Girls.mp4',
|
|
||||||
'age_limit': 18,
|
|
||||||
},
|
|
||||||
'add_ie': ['GoogleDrive'],
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.iwara.tv/videos/nawkaumd6ilezzgq',
|
|
||||||
# md5 is unstable
|
|
||||||
'info_dict': {
|
|
||||||
'id': '6liAP9s2Ojc',
|
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
'title': '[MMD] Do It Again Ver.2 [1080p 60FPS] (Motion,Camera,Wav+DL)',
|
'title': 'Aponia 阿波尼亚SEX Party Tonight 手动脱衣 大奶 裸腿',
|
||||||
'description': 'md5:590c12c0df1443d833fbebe05da8c47a',
|
'description': 'md5:0c4c310f2e0592d68b9f771d348329ca',
|
||||||
'upload_date': '20160910',
|
'uploader': '龙也zZZ',
|
||||||
'uploader': 'aMMDsork',
|
'uploader_id': 'user792540',
|
||||||
'uploader_id': 'UCVOFyOSCyFkXTYYHITtqB7A',
|
'tags': [
|
||||||
|
'uncategorized'
|
||||||
|
],
|
||||||
|
'like_count': 1809,
|
||||||
|
'view_count': 25156,
|
||||||
|
'comment_count': 1,
|
||||||
|
'timestamp': 1678732213,
|
||||||
|
'modified_timestamp': 1679110271,
|
||||||
},
|
},
|
||||||
'add_ie': ['Youtube'],
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _extract_formats(self, video_id, fileurl):
|
||||||
|
up = urllib.parse.urlparse(fileurl)
|
||||||
|
q = urllib.parse.parse_qs(up.query)
|
||||||
|
paths = up.path.rstrip('/').split('/')
|
||||||
|
# https://github.com/yt-dlp/yt-dlp/issues/6549#issuecomment-1473771047
|
||||||
|
x_version = hashlib.sha1('_'.join((paths[-1], q['expires'][0], '5nFp9kmbNnHdAFhaqMvt')).encode()).hexdigest()
|
||||||
|
|
||||||
|
preference = qualities(['preview', '360', '540', 'Source'])
|
||||||
|
|
||||||
|
files = self._download_json(fileurl, video_id, headers={'X-Version': x_version})
|
||||||
|
for fmt in files:
|
||||||
|
yield traverse_obj(fmt, {
|
||||||
|
'format_id': 'name',
|
||||||
|
'url': ('src', ('view', 'download'), {self._proto_relative_url}),
|
||||||
|
'ext': ('type', {mimetype2ext}),
|
||||||
|
'quality': ('name', {preference}),
|
||||||
|
'height': ('name', {int_or_none}),
|
||||||
|
}, get_all=False)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
video_data = self._download_json(f'https://api.iwara.tv/video/{video_id}', video_id, expected_status=lambda x: True)
|
||||||
|
errmsg = video_data.get('message')
|
||||||
|
# at this point we can actually get uploaded user info, but do we need it?
|
||||||
|
if errmsg == 'errors.privateVideo':
|
||||||
|
self.raise_login_required('Private video. Login if you have permissions to watch')
|
||||||
|
elif errmsg:
|
||||||
|
raise ExtractorError(f'Iwara says: {errmsg}')
|
||||||
|
|
||||||
webpage, urlh = self._download_webpage_handle(url, video_id)
|
if not video_data.get('fileUrl'):
|
||||||
|
if video_data.get('embedUrl'):
|
||||||
hostname = urllib.parse.urlparse(urlh.geturl()).hostname
|
return self.url_result(video_data.get('embedUrl'))
|
||||||
# ecchi is 'sexy' in Japanese
|
raise ExtractorError('This video is unplayable', expected=True)
|
||||||
age_limit = 18 if hostname.split('.')[0] == 'ecchi' else 0
|
|
||||||
|
|
||||||
video_data = self._download_json('http://www.iwara.tv/api/video/%s' % video_id, video_id)
|
|
||||||
|
|
||||||
if not video_data:
|
|
||||||
iframe_url = self._html_search_regex(
|
|
||||||
r'<iframe[^>]+src=([\'"])(?P<url>[^\'"]+)\1',
|
|
||||||
webpage, 'iframe URL', group='url')
|
|
||||||
return {
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'url': iframe_url,
|
|
||||||
'age_limit': age_limit,
|
|
||||||
}
|
|
||||||
|
|
||||||
title = remove_end(self._html_extract_title(webpage), ' | Iwara')
|
|
||||||
|
|
||||||
thumbnail = self._html_search_regex(
|
|
||||||
r'poster=[\'"]([^\'"]+)', webpage, 'thumbnail', default=None)
|
|
||||||
|
|
||||||
uploader = self._html_search_regex(
|
|
||||||
r'class="username">([^<]+)', webpage, 'uploader', fatal=False)
|
|
||||||
|
|
||||||
upload_date = unified_strdate(self._html_search_regex(
|
|
||||||
r'作成日:([^\s]+)', webpage, 'upload_date', fatal=False))
|
|
||||||
|
|
||||||
description = strip_or_none(self._search_regex(
|
|
||||||
r'<p>(.+?(?=</div))', webpage, 'description', fatal=False,
|
|
||||||
flags=re.DOTALL))
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
for a_format in video_data:
|
|
||||||
format_uri = url_or_none(a_format.get('uri'))
|
|
||||||
if not format_uri:
|
|
||||||
continue
|
|
||||||
format_id = a_format.get('resolution')
|
|
||||||
height = int_or_none(self._search_regex(
|
|
||||||
r'(\d+)p', format_id, 'height', default=None))
|
|
||||||
formats.append({
|
|
||||||
'url': self._proto_relative_url(format_uri, 'https:'),
|
|
||||||
'format_id': format_id,
|
|
||||||
'ext': mimetype2ext(a_format.get('mime')) or 'mp4',
|
|
||||||
'height': height,
|
|
||||||
'width': int_or_none(height / 9.0 * 16.0 if height else None),
|
|
||||||
'quality': 1 if format_id == 'Source' else 0,
|
|
||||||
})
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'age_limit': 18 if video_data.get('rating') == 'ecchi' else 0, # ecchi is 'sexy' in Japanese
|
||||||
'age_limit': age_limit,
|
**traverse_obj(video_data, {
|
||||||
'formats': formats,
|
'title': 'title',
|
||||||
'thumbnail': self._proto_relative_url(thumbnail, 'https:'),
|
'description': 'body',
|
||||||
'uploader': uploader,
|
'uploader': ('user', 'name'),
|
||||||
'upload_date': upload_date,
|
'uploader_id': ('user', 'username'),
|
||||||
'description': description,
|
'tags': ('tags', ..., 'id'),
|
||||||
|
'like_count': 'numLikes',
|
||||||
|
'view_count': 'numViews',
|
||||||
|
'comment_count': 'numComments',
|
||||||
|
'timestamp': ('createdAt', {unified_timestamp}),
|
||||||
|
'modified_timestamp': ('updatedAt', {unified_timestamp}),
|
||||||
|
'thumbnail': ('file', 'id', {str}, {
|
||||||
|
lambda x: f'https://files.iwara.tv/image/thumbnail/{x}/thumbnail-00.jpg'}),
|
||||||
|
}),
|
||||||
|
'formats': list(self._extract_formats(video_id, video_data.get('fileUrl'))),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class IwaraPlaylistIE(IwaraBaseIE):
|
class IwaraUserIE(InfoExtractor):
|
||||||
_VALID_URL = fr'{IwaraBaseIE._BASE_REGEX}/playlist/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:www\.)?iwara\.tv/profile/(?P<id>[^/?#&]+)'
|
||||||
IE_NAME = 'iwara:playlist'
|
|
||||||
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://ecchi.iwara.tv/playlist/best-enf',
|
|
||||||
'info_dict': {
|
|
||||||
'title': 'Best enf',
|
|
||||||
'uploader': 'Jared98112',
|
|
||||||
'id': 'best-enf',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 1097,
|
|
||||||
}, {
|
|
||||||
# urlencoded
|
|
||||||
'url': 'https://ecchi.iwara.tv/playlist/%E3%83%97%E3%83%AC%E3%82%A4%E3%83%AA%E3%82%B9%E3%83%88-2',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'プレイリスト-2',
|
|
||||||
'title': 'プレイリスト',
|
|
||||||
'uploader': 'mainyu',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 91,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
playlist_id, base_url = self._match_valid_url(url).group('id', 'base_url')
|
|
||||||
playlist_id = urllib.parse.unquote(playlist_id)
|
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'playlist',
|
|
||||||
'id': playlist_id,
|
|
||||||
'title': self._html_search_regex(r'class="title"[^>]*>([^<]+)', webpage, 'title', fatal=False),
|
|
||||||
'uploader': self._html_search_regex(r'<h2>([^<]+)', webpage, 'uploader', fatal=False),
|
|
||||||
'entries': self._extract_playlist(base_url, webpage),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class IwaraUserIE(IwaraBaseIE):
|
|
||||||
_VALID_URL = fr'{IwaraBaseIE._BASE_REGEX}/users/(?P<id>[^/?#&]+)'
|
|
||||||
IE_NAME = 'iwara:user'
|
IE_NAME = 'iwara:user'
|
||||||
|
_PER_PAGE = 32
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'note': 'number of all videos page is just 1 page. less than 40 videos',
|
'url': 'https://iwara.tv/profile/user792540/videos',
|
||||||
'url': 'https://ecchi.iwara.tv/users/infinityyukarip',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'Uploaded videos from Infinity_YukariP',
|
'id': 'user792540',
|
||||||
'id': 'infinityyukarip',
|
|
||||||
'uploader': 'Infinity_YukariP',
|
|
||||||
'uploader_id': 'infinityyukarip',
|
|
||||||
},
|
},
|
||||||
'playlist_mincount': 39,
|
'playlist_mincount': 80,
|
||||||
}, {
|
}, {
|
||||||
'note': 'no even all videos page. probably less than 10 videos',
|
'url': 'https://iwara.tv/profile/theblackbirdcalls/videos',
|
||||||
'url': 'https://ecchi.iwara.tv/users/mmd-quintet',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'Uploaded videos from mmd quintet',
|
|
||||||
'id': 'mmd-quintet',
|
|
||||||
'uploader': 'mmd quintet',
|
|
||||||
'uploader_id': 'mmd-quintet',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 6,
|
|
||||||
}, {
|
|
||||||
'note': 'has paging. more than 40 videos',
|
|
||||||
'url': 'https://ecchi.iwara.tv/users/theblackbirdcalls',
|
|
||||||
'info_dict': {
|
|
||||||
'title': 'Uploaded videos from TheBlackbirdCalls',
|
|
||||||
'id': 'theblackbirdcalls',
|
'id': 'theblackbirdcalls',
|
||||||
'uploader': 'TheBlackbirdCalls',
|
|
||||||
'uploader_id': 'theblackbirdcalls',
|
|
||||||
},
|
},
|
||||||
'playlist_mincount': 420,
|
'playlist_mincount': 723,
|
||||||
}, {
|
}, {
|
||||||
'note': 'foreign chars in URL. there must be foreign characters in URL',
|
'url': 'https://iwara.tv/profile/user792540',
|
||||||
'url': 'https://ecchi.iwara.tv/users/ぶた丼',
|
'only_matching': True,
|
||||||
'info_dict': {
|
}, {
|
||||||
'title': 'Uploaded videos from ぶた丼',
|
'url': 'https://iwara.tv/profile/theblackbirdcalls',
|
||||||
'id': 'ぶた丼',
|
'only_matching': True,
|
||||||
'uploader': 'ぶた丼',
|
|
||||||
'uploader_id': 'ぶた丼',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 170,
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _entries(self, playlist_id, base_url):
|
def _entries(self, playlist_id, user_id, page):
|
||||||
webpage = self._download_webpage(
|
videos = self._download_json(
|
||||||
f'{base_url}/users/{playlist_id}', playlist_id)
|
'https://api.iwara.tv/videos', playlist_id,
|
||||||
videos_url = self._search_regex(r'<a href="(/users/[^/]+/videos)(?:\?[^"]+)?">', webpage, 'all videos url', default=None)
|
note=f'Downloading page {page}',
|
||||||
if not videos_url:
|
query={
|
||||||
yield from self._extract_playlist(base_url, webpage)
|
'page': page,
|
||||||
return
|
'sort': 'date',
|
||||||
|
'user': user_id,
|
||||||
videos_url = urljoin(base_url, videos_url)
|
'limit': self._PER_PAGE,
|
||||||
|
})
|
||||||
for n in itertools.count(1):
|
for x in traverse_obj(videos, ('results', ..., 'id')):
|
||||||
page = self._download_webpage(
|
yield self.url_result(f'https://iwara.tv/video/{x}')
|
||||||
videos_url, playlist_id, note=f'Downloading playlist page {n}',
|
|
||||||
query={'page': str(n - 1)} if n > 1 else {})
|
|
||||||
yield from self._extract_playlist(
|
|
||||||
base_url, page)
|
|
||||||
|
|
||||||
if f'page={n}' not in page:
|
|
||||||
break
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
playlist_id, base_url = self._match_valid_url(url).group('id', 'base_url')
|
playlist_id = self._match_id(url)
|
||||||
playlist_id = urllib.parse.unquote(playlist_id)
|
user_info = self._download_json(
|
||||||
|
f'https://api.iwara.tv/profile/{playlist_id}', playlist_id,
|
||||||
|
note='Requesting user info')
|
||||||
|
user_id = traverse_obj(user_info, ('user', 'id'))
|
||||||
|
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
self._entries(playlist_id, base_url), playlist_id)
|
OnDemandPagedList(
|
||||||
|
functools.partial(self._entries, playlist_id, user_id),
|
||||||
|
self._PER_PAGE),
|
||||||
|
playlist_id, traverse_obj(user_info, ('user', 'name')))
|
||||||
|
|
||||||
|
|
||||||
|
class IwaraPlaylistIE(InfoExtractor):
|
||||||
|
# the ID is an UUID but I don't think it's necessary to write concrete regex
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?iwara\.tv/playlist/(?P<id>[0-9a-f-]+)'
|
||||||
|
IE_NAME = 'iwara:playlist'
|
||||||
|
_PER_PAGE = 32
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://iwara.tv/playlist/458e5486-36a4-4ac0-b233-7e9eef01025f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '458e5486-36a4-4ac0-b233-7e9eef01025f',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 3,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _entries(self, playlist_id, first_page, page):
|
||||||
|
videos = self._download_json(
|
||||||
|
'https://api.iwara.tv/videos', playlist_id, f'Downloading page {page}',
|
||||||
|
query={'page': page, 'limit': self._PER_PAGE}) if page else first_page
|
||||||
|
for x in traverse_obj(videos, ('results', ..., 'id')):
|
||||||
|
yield self.url_result(f'https://iwara.tv/video/{x}')
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
page_0 = self._download_json(
|
||||||
|
f'https://api.iwara.tv/playlist/{playlist_id}?page=0&limit={self._PER_PAGE}', playlist_id,
|
||||||
|
note='Requesting playlist info')
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
OnDemandPagedList(
|
||||||
|
functools.partial(self._entries, playlist_id, page_0),
|
||||||
|
self._PER_PAGE),
|
||||||
|
playlist_id, traverse_obj(page_0, ('title', 'name')))
|
||||||
|
|
|
@ -8,14 +8,16 @@ class JWPlatformIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|manifest)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
|
_VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview|manifest)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
|
'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
|
||||||
'md5': 'fa8899fa601eb7c83a64e9d568bdf325',
|
'md5': '3aa16e4f6860e6e78b7df5829519aed3',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'nPripu9l',
|
'id': 'nPripu9l',
|
||||||
'ext': 'mov',
|
'ext': 'mp4',
|
||||||
'title': 'Big Buck Bunny Trailer',
|
'title': 'Big Buck Bunny Trailer',
|
||||||
'description': 'Big Buck Bunny is a short animated film by the Blender Institute. It is made using free and open source software.',
|
'description': 'Big Buck Bunny is a short animated film by the Blender Institute. It is made using free and open source software.',
|
||||||
'upload_date': '20081127',
|
'upload_date': '20081127',
|
||||||
'timestamp': 1227796140,
|
'timestamp': 1227796140,
|
||||||
|
'duration': 32.0,
|
||||||
|
'thumbnail': 'https://cdn.jwplayer.com/v2/media/nPripu9l/poster.jpg?width=720',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://cdn.jwplayer.com/players/nPripu9l-ALJ3XQCI.js',
|
'url': 'https://cdn.jwplayer.com/players/nPripu9l-ALJ3XQCI.js',
|
||||||
|
@ -37,18 +39,31 @@ class JWPlatformIE(InfoExtractor):
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# Player url not surrounded by quotes
|
# Player url not surrounded by quotes
|
||||||
'url': 'https://www.deutsche-kinemathek.de/en/online/streaming/darling-berlin',
|
'url': 'https://www.deutsche-kinemathek.de/en/online/streaming/school-trip',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'R10NQdhY',
|
'id': 'jUxh5uin',
|
||||||
'title': 'Playgirl',
|
'title': 'Klassenfahrt',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'upload_date': '20220624',
|
'upload_date': '20230109',
|
||||||
'thumbnail': 'https://cdn.jwplayer.com/v2/media/R10NQdhY/poster.jpg?width=720',
|
'thumbnail': 'https://cdn.jwplayer.com/v2/media/jUxh5uin/poster.jpg?width=720',
|
||||||
'timestamp': 1656064800,
|
'timestamp': 1673270298,
|
||||||
'description': 'BRD 1966, Will Tremper',
|
'description': '',
|
||||||
'duration': 5146.0,
|
'duration': 5193.0,
|
||||||
},
|
},
|
||||||
'params': {'allowed_extractors': ['generic', 'jwplatform']},
|
'params': {'allowed_extractors': ['generic', 'jwplatform']},
|
||||||
|
}, {
|
||||||
|
# iframe src attribute includes backslash before URL string
|
||||||
|
'url': 'https://www.elespectador.com/colombia/video-asi-se-evito-la-fuga-de-john-poulos-presunto-feminicida-de-valentina-trespalacios-explicacion',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'QD3gsexj',
|
||||||
|
'title': 'Así se evitó la fuga de John Poulos, presunto feminicida de Valentina Trespalacios',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20230127',
|
||||||
|
'thumbnail': 'https://cdn.jwplayer.com/v2/media/QD3gsexj/poster.jpg?width=720',
|
||||||
|
'timestamp': 1674862986,
|
||||||
|
'description': 'md5:128fd74591c4e1fc2da598c5cb6f5ce4',
|
||||||
|
'duration': 263.0,
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -57,7 +72,7 @@ class JWPlatformIE(InfoExtractor):
|
||||||
# <input value=URL> is used by hyland.com
|
# <input value=URL> is used by hyland.com
|
||||||
# if we find <iframe>, dont look for <input>
|
# if we find <iframe>, dont look for <input>
|
||||||
ret = re.findall(
|
ret = re.findall(
|
||||||
r'<%s[^>]+?%s=["\']?((?:https?:)?//(?:content\.jwplatform|cdn\.jwplayer)\.com/players/[a-zA-Z0-9]{8})' % (tag, key),
|
r'<%s[^>]+?%s=\\?["\']?((?:https?:)?//(?:content\.jwplatform|cdn\.jwplayer)\.com/players/[a-zA-Z0-9]{8})' % (tag, key),
|
||||||
webpage)
|
webpage)
|
||||||
if ret:
|
if ret:
|
||||||
return ret
|
return ret
|
||||||
|
|
|
@ -14,7 +14,7 @@ from ..utils import (
|
||||||
|
|
||||||
class KickBaseIE(InfoExtractor):
|
class KickBaseIE(InfoExtractor):
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
self._request_webpage(HEADRequest('https://kick.com/'), None, 'Setting up session')
|
self._request_webpage(HEADRequest('https://kick.com/'), None, 'Setting up session', fatal=False)
|
||||||
xsrf_token = self._get_cookies('https://kick.com/').get('XSRF-TOKEN')
|
xsrf_token = self._get_cookies('https://kick.com/').get('XSRF-TOKEN')
|
||||||
if not xsrf_token:
|
if not xsrf_token:
|
||||||
self.write_debug('kick.com did not set XSRF-TOKEN cookie')
|
self.write_debug('kick.com did not set XSRF-TOKEN cookie')
|
||||||
|
|
|
@ -1,33 +1,24 @@
|
||||||
|
import itertools
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import int_or_none, format_field
|
from ..utils import int_or_none, parse_qs, traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class LastFMPlaylistBaseIE(InfoExtractor):
|
class LastFMPlaylistBaseIE(InfoExtractor):
|
||||||
def _entries(self, url, playlist_id):
|
def _entries(self, url, playlist_id):
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
single_page = traverse_obj(parse_qs(url), ('page', -1, {int_or_none}))
|
||||||
start_page_number = int_or_none(self._search_regex(
|
for page in itertools.count(single_page or 1):
|
||||||
r'\bpage=(\d+)', url, 'page', default=None)) or 1
|
|
||||||
last_page_number = int_or_none(self._search_regex(
|
|
||||||
r'>(\d+)</a>[^<]*</li>[^<]*<li[^>]+class="pagination-next', webpage, 'last_page', default=None))
|
|
||||||
|
|
||||||
for page_number in range(start_page_number, (last_page_number or start_page_number) + 1):
|
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
url, playlist_id,
|
url, playlist_id, f'Downloading page {page}', query={'page': page})
|
||||||
note='Downloading page %d%s' % (page_number, format_field(last_page_number, None, ' of %d')),
|
videos = re.findall(r'data-youtube-url="([^"]+)"', webpage)
|
||||||
query={'page': page_number})
|
yield from videos
|
||||||
page_entries = [
|
if single_page or not videos:
|
||||||
self.url_result(player_url, 'Youtube')
|
return
|
||||||
for player_url in set(re.findall(r'data-youtube-url="([^"]+)"', webpage))
|
|
||||||
]
|
|
||||||
|
|
||||||
for e in page_entries:
|
|
||||||
yield e
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
playlist_id = self._match_id(url)
|
playlist_id = self._match_id(url)
|
||||||
return self.playlist_result(self._entries(url, playlist_id), playlist_id)
|
return self.playlist_from_matches(self._entries(url, playlist_id), playlist_id, ie='Youtube')
|
||||||
|
|
||||||
|
|
||||||
class LastFMPlaylistIE(LastFMPlaylistBaseIE):
|
class LastFMPlaylistIE(LastFMPlaylistBaseIE):
|
||||||
|
@ -37,7 +28,7 @@ class LastFMPlaylistIE(LastFMPlaylistBaseIE):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'Oasis',
|
'id': 'Oasis',
|
||||||
},
|
},
|
||||||
'playlist_count': 11,
|
'playlist_mincount': 11,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.last.fm/music/Oasis',
|
'url': 'https://www.last.fm/music/Oasis',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -73,6 +64,18 @@ class LastFMUserIE(LastFMPlaylistBaseIE):
|
||||||
'id': '12319471',
|
'id': '12319471',
|
||||||
},
|
},
|
||||||
'playlist_count': 30,
|
'playlist_count': 30,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.last.fm/user/naamloos1/playlists/12543760',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '12543760',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 80,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.last.fm/user/naamloos1/playlists/12543760?page=3',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '12543760',
|
||||||
|
},
|
||||||
|
'playlist_count': 32,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,143 +0,0 @@
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
|
||||||
format_field,
|
|
||||||
int_or_none,
|
|
||||||
str_or_none,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class LineLiveBaseIE(InfoExtractor):
|
|
||||||
_API_BASE_URL = 'https://live-api.line-apps.com/web/v4.0/channel/'
|
|
||||||
|
|
||||||
def _parse_broadcast_item(self, item):
|
|
||||||
broadcast_id = compat_str(item['id'])
|
|
||||||
title = item['title']
|
|
||||||
is_live = item.get('isBroadcastingNow')
|
|
||||||
|
|
||||||
thumbnails = []
|
|
||||||
for thumbnail_id, thumbnail_url in (item.get('thumbnailURLs') or {}).items():
|
|
||||||
if not thumbnail_url:
|
|
||||||
continue
|
|
||||||
thumbnails.append({
|
|
||||||
'id': thumbnail_id,
|
|
||||||
'url': thumbnail_url,
|
|
||||||
})
|
|
||||||
|
|
||||||
channel = item.get('channel') or {}
|
|
||||||
channel_id = str_or_none(channel.get('id'))
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': broadcast_id,
|
|
||||||
'title': title,
|
|
||||||
'thumbnails': thumbnails,
|
|
||||||
'timestamp': int_or_none(item.get('createdAt')),
|
|
||||||
'channel': channel.get('name'),
|
|
||||||
'channel_id': channel_id,
|
|
||||||
'channel_url': format_field(channel_id, None, 'https://live.line.me/channels/%s'),
|
|
||||||
'duration': int_or_none(item.get('archiveDuration')),
|
|
||||||
'view_count': int_or_none(item.get('viewerCount')),
|
|
||||||
'comment_count': int_or_none(item.get('chatCount')),
|
|
||||||
'is_live': is_live,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class LineLiveIE(LineLiveBaseIE):
|
|
||||||
_VALID_URL = r'https?://live\.line\.me/channels/(?P<channel_id>\d+)/broadcast/(?P<id>\d+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://live.line.me/channels/5833718/broadcast/18373277',
|
|
||||||
'md5': '2c15843b8cb3acd55009ddcb2db91f7c',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '18373277',
|
|
||||||
'title': '2021/12/05 (15分犬)定例譲渡会🐶',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'timestamp': 1638674925,
|
|
||||||
'upload_date': '20211205',
|
|
||||||
'thumbnail': 'md5:e1f5817e60f4a72b7e43377cf308d7ef',
|
|
||||||
'channel_url': 'https://live.line.me/channels/5833718',
|
|
||||||
'channel': 'Yahooニュース掲載🗞プロフ見てね🐕🐕',
|
|
||||||
'channel_id': '5833718',
|
|
||||||
'duration': 937,
|
|
||||||
'view_count': int,
|
|
||||||
'comment_count': int,
|
|
||||||
'is_live': False,
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
# archiveStatus == 'DELETED'
|
|
||||||
'url': 'https://live.line.me/channels/4778159/broadcast/16378488',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
channel_id, broadcast_id = self._match_valid_url(url).groups()
|
|
||||||
broadcast = self._download_json(
|
|
||||||
self._API_BASE_URL + '%s/broadcast/%s' % (channel_id, broadcast_id),
|
|
||||||
broadcast_id)
|
|
||||||
item = broadcast['item']
|
|
||||||
info = self._parse_broadcast_item(item)
|
|
||||||
protocol = 'm3u8' if info['is_live'] else 'm3u8_native'
|
|
||||||
formats = []
|
|
||||||
for k, v in (broadcast.get(('live' if info['is_live'] else 'archived') + 'HLSURLs') or {}).items():
|
|
||||||
if not v:
|
|
||||||
continue
|
|
||||||
if k == 'abr':
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
v, broadcast_id, 'mp4', protocol,
|
|
||||||
m3u8_id='hls', fatal=False))
|
|
||||||
continue
|
|
||||||
f = {
|
|
||||||
'ext': 'mp4',
|
|
||||||
'format_id': 'hls-' + k,
|
|
||||||
'protocol': protocol,
|
|
||||||
'url': v,
|
|
||||||
}
|
|
||||||
if not k.isdigit():
|
|
||||||
f['vcodec'] = 'none'
|
|
||||||
formats.append(f)
|
|
||||||
if not formats:
|
|
||||||
archive_status = item.get('archiveStatus')
|
|
||||||
if archive_status != 'ARCHIVED':
|
|
||||||
self.raise_no_formats('this video has been ' + archive_status.lower(), expected=True)
|
|
||||||
info['formats'] = formats
|
|
||||||
return info
|
|
||||||
|
|
||||||
|
|
||||||
class LineLiveChannelIE(LineLiveBaseIE):
|
|
||||||
_VALID_URL = r'https?://live\.line\.me/channels/(?P<id>\d+)(?!/broadcast/\d+)(?:[/?&#]|$)'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'https://live.line.me/channels/5893542',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '5893542',
|
|
||||||
'title': 'いくらちゃんだよぉ🦒',
|
|
||||||
'description': 'md5:4d418087973ad081ceb1b3481f0b1816',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 29
|
|
||||||
}
|
|
||||||
|
|
||||||
def _archived_broadcasts_entries(self, archived_broadcasts, channel_id):
|
|
||||||
while True:
|
|
||||||
for row in (archived_broadcasts.get('rows') or []):
|
|
||||||
share_url = str_or_none(row.get('shareURL'))
|
|
||||||
if not share_url:
|
|
||||||
continue
|
|
||||||
info = self._parse_broadcast_item(row)
|
|
||||||
info.update({
|
|
||||||
'_type': 'url',
|
|
||||||
'url': share_url,
|
|
||||||
'ie_key': LineLiveIE.ie_key(),
|
|
||||||
})
|
|
||||||
yield info
|
|
||||||
if not archived_broadcasts.get('hasNextPage'):
|
|
||||||
return
|
|
||||||
archived_broadcasts = self._download_json(
|
|
||||||
self._API_BASE_URL + channel_id + '/archived_broadcasts',
|
|
||||||
channel_id, query={
|
|
||||||
'lastId': info['id'],
|
|
||||||
})
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
channel_id = self._match_id(url)
|
|
||||||
channel = self._download_json(self._API_BASE_URL + channel_id, channel_id)
|
|
||||||
return self.playlist_result(
|
|
||||||
self._archived_broadcasts_entries(channel.get('archivedBroadcasts') or {}, channel_id),
|
|
||||||
channel_id, channel.get('title'), channel.get('information'))
|
|
|
@ -8,12 +8,12 @@ from ..utils import (
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
traverse_obj,
|
traverse_obj
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class MedalTVIE(InfoExtractor):
|
class MedalTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?medal\.tv/(?P<path>games/[^/?#&]+/clips)/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:www\.)?medal\.tv/games/[^/?#&]+/clips/(?P<id>[^/?#&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://medal.tv/games/valorant/clips/jTBFnLKdLy15K',
|
'url': 'https://medal.tv/games/valorant/clips/jTBFnLKdLy15K',
|
||||||
'md5': '6930f8972914b6b9fdc2bb3918098ba0',
|
'md5': '6930f8972914b6b9fdc2bb3918098ba0',
|
||||||
|
@ -80,25 +80,14 @@ class MedalTVIE(InfoExtractor):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
path = self._match_valid_url(url).group('path')
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
next_data = self._search_json(
|
hydration_data = self._search_json(
|
||||||
'<script[^>]*__NEXT_DATA__[^>]*>', webpage,
|
r'<script[^>]*>[^<]*\bhydrationData\s*=', webpage,
|
||||||
'next data', video_id, end_pattern='</script>', fatal=False)
|
'next data', video_id, end_pattern='</script>', fatal=False)
|
||||||
|
|
||||||
build_id = next_data.get('buildId')
|
clip = traverse_obj(hydration_data, ('clips', ...), get_all=False)
|
||||||
if not build_id:
|
|
||||||
raise ExtractorError(
|
|
||||||
'Could not find build ID.', video_id=video_id)
|
|
||||||
|
|
||||||
locale = next_data.get('locale', 'en')
|
|
||||||
|
|
||||||
api_response = self._download_json(
|
|
||||||
f'https://medal.tv/_next/data/{build_id}/{locale}/{path}/{video_id}.json', video_id)
|
|
||||||
|
|
||||||
clip = traverse_obj(api_response, ('pageProps', 'clip')) or {}
|
|
||||||
if not clip:
|
if not clip:
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'Could not find video information.', video_id=video_id)
|
'Could not find video information.', video_id=video_id)
|
||||||
|
@ -152,7 +141,7 @@ class MedalTVIE(InfoExtractor):
|
||||||
|
|
||||||
# Necessary because the id of the author is not known in advance.
|
# Necessary because the id of the author is not known in advance.
|
||||||
# Won't raise an issue if no profile can be found as this is optional.
|
# Won't raise an issue if no profile can be found as this is optional.
|
||||||
author = traverse_obj(api_response, ('pageProps', 'profile')) or {}
|
author = traverse_obj(hydration_data, ('profiles', ...), get_all=False) or {}
|
||||||
author_id = str_or_none(author.get('userId'))
|
author_id = str_or_none(author.get('userId'))
|
||||||
author_url = format_field(author_id, None, 'https://medal.tv/users/%s')
|
author_url = format_field(author_id, None, 'https://medal.tv/users/%s')
|
||||||
|
|
||||||
|
|
|
@ -2,16 +2,44 @@ import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
remove_end,
|
remove_end,
|
||||||
str_or_none,
|
|
||||||
strip_or_none,
|
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class MediaStreamIE(InfoExtractor):
|
class MediaStreamBaseIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://mdstrm.com/(?:embed|live-stream)/(?P<id>\w+)'
|
_EMBED_BASE_URL = 'https://mdstrm.com/embed'
|
||||||
|
_BASE_URL_RE = r'https?://mdstrm\.com/(?:embed|live-stream)'
|
||||||
|
|
||||||
|
def _extract_mediastream_urls(self, webpage):
|
||||||
|
yield from traverse_obj(list(self._yield_json_ld(webpage, None)), (
|
||||||
|
lambda _, v: v['@type'] == 'VideoObject', ('embedUrl', 'contentUrl'),
|
||||||
|
{lambda x: x if re.match(rf'{self._BASE_URL_RE}/\w+', x) else None}))
|
||||||
|
|
||||||
|
for mobj in re.finditer(r'<script[^>]+>[^>]*playerMdStream\.mdstreamVideo\(\s*[\'"](?P<video_id>\w+)', webpage):
|
||||||
|
yield f'{self._EMBED_BASE_URL}/{mobj.group("video_id")}'
|
||||||
|
|
||||||
|
yield from re.findall(
|
||||||
|
rf'<iframe[^>]+\bsrc="({self._BASE_URL_RE}/\w+)', webpage)
|
||||||
|
|
||||||
|
for mobj in re.finditer(
|
||||||
|
r'''(?x)
|
||||||
|
<(?:div|ps-mediastream)[^>]+
|
||||||
|
(class="[^"]*MediaStreamVideoPlayer)[^"]*"[^>]+
|
||||||
|
data-video-id="(?P<video_id>\w+)"
|
||||||
|
(?:\s*data-video-type="(?P<video_type>[^"]+))?
|
||||||
|
(?:[^>]*>\s*<div[^>]+\1[^"]*"[^>]+data-mediastream=["\'][^>]+
|
||||||
|
https://mdstrm\.com/(?P<live>live-stream))?
|
||||||
|
''', webpage):
|
||||||
|
|
||||||
|
video_type = 'live-stream' if mobj.group('video_type') == 'live' or mobj.group('live') else 'embed'
|
||||||
|
yield f'https://mdstrm.com/{video_type}/{mobj.group("video_id")}'
|
||||||
|
|
||||||
|
|
||||||
|
class MediaStreamIE(MediaStreamBaseIE):
|
||||||
|
_VALID_URL = MediaStreamBaseIE._BASE_URL_RE + r'/(?P<id>\w+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://mdstrm.com/embed/6318e3f1d1d316083ae48831',
|
'url': 'https://mdstrm.com/embed/6318e3f1d1d316083ae48831',
|
||||||
|
@ -23,6 +51,7 @@ class MediaStreamIE(InfoExtractor):
|
||||||
'thumbnail': r're:^https?://[^?#]+6318e3f1d1d316083ae48831',
|
'thumbnail': r're:^https?://[^?#]+6318e3f1d1d316083ae48831',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
},
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_WEBPAGE_TESTS = [{
|
_WEBPAGE_TESTS = [{
|
||||||
|
@ -35,9 +64,7 @@ class MediaStreamIE(InfoExtractor):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'live_status': 'is_live',
|
'live_status': 'is_live',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {'skip_download': 'Livestream'},
|
||||||
'skip_download': 'Livestream'
|
|
||||||
},
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.multimedios.com/television/clases-de-llaves-y-castigos-quien-sabe-mas',
|
'url': 'https://www.multimedios.com/television/clases-de-llaves-y-castigos-quien-sabe-mas',
|
||||||
'md5': 'de31f0b1ecc321fb35bf22d58734ea40',
|
'md5': 'de31f0b1ecc321fb35bf22d58734ea40',
|
||||||
|
@ -48,6 +75,7 @@ class MediaStreamIE(InfoExtractor):
|
||||||
'thumbnail': 're:^https?://[^?#]+63731bab8ec9b308a2c9ed28',
|
'thumbnail': 're:^https?://[^?#]+63731bab8ec9b308a2c9ed28',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
},
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.americatv.com.pe/videos/esto-es-guerra/facundo-gonzalez-sufrio-fuerte-golpe-durante-competencia-frente-hugo-garcia-eeg-noticia-139120',
|
'url': 'https://www.americatv.com.pe/videos/esto-es-guerra/facundo-gonzalez-sufrio-fuerte-golpe-durante-competencia-frente-hugo-garcia-eeg-noticia-139120',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -57,6 +85,7 @@ class MediaStreamIE(InfoExtractor):
|
||||||
'thumbnail': 're:^https?://[^?#]+63756df1c638b008a5659dec',
|
'thumbnail': 're:^https?://[^?#]+63756df1c638b008a5659dec',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
},
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.americatv.com.pe/videos/al-fondo-hay-sitio/nuevas-lomas-town-bernardo-mata-se-enfrento-sujeto-luchar-amor-macarena-noticia-139083',
|
'url': 'https://www.americatv.com.pe/videos/al-fondo-hay-sitio/nuevas-lomas-town-bernardo-mata-se-enfrento-sujeto-luchar-amor-macarena-noticia-139083',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -66,26 +95,12 @@ class MediaStreamIE(InfoExtractor):
|
||||||
'thumbnail': 're:^https?://[^?#]+637307669609130f74cd3a6e',
|
'thumbnail': 're:^https?://[^?#]+637307669609130f74cd3a6e',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
},
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@classmethod
|
def _extract_from_webpage(self, url, webpage):
|
||||||
def _extract_embed_urls(cls, url, webpage):
|
for embed_url in self._extract_mediastream_urls(webpage):
|
||||||
for mobj in re.finditer(r'<script[^>]+>[^>]*playerMdStream.mdstreamVideo\(\s*[\'"](?P<video_id>\w+)', webpage):
|
yield self.url_result(embed_url, MediaStreamIE, None)
|
||||||
yield f'https://mdstrm.com/embed/{mobj.group("video_id")}'
|
|
||||||
|
|
||||||
yield from re.findall(
|
|
||||||
r'<iframe[^>]src\s*=\s*"(https://mdstrm.com/[\w-]+/\w+)', webpage)
|
|
||||||
|
|
||||||
for mobj in re.finditer(
|
|
||||||
r'''(?x)
|
|
||||||
<(?:div|ps-mediastream)[^>]+
|
|
||||||
class\s*=\s*"[^"]*MediaStreamVideoPlayer[^"]*"[^>]+
|
|
||||||
data-video-id\s*=\s*"(?P<video_id>\w+)\s*"
|
|
||||||
(?:\s*data-video-type\s*=\s*"(?P<video_type>[^"]+))?
|
|
||||||
''', webpage):
|
|
||||||
|
|
||||||
video_type = 'live-stream' if mobj.group('video_type') == 'live' else 'embed'
|
|
||||||
yield f'https://mdstrm.com/{video_type}/{mobj.group("video_id")}'
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
@ -94,7 +109,7 @@ class MediaStreamIE(InfoExtractor):
|
||||||
if 'Debido a tu ubicación no puedes ver el contenido' in webpage:
|
if 'Debido a tu ubicación no puedes ver el contenido' in webpage:
|
||||||
self.raise_geo_restricted()
|
self.raise_geo_restricted()
|
||||||
|
|
||||||
player_config = self._search_json(r'window.MDSTRM.OPTIONS\s*=', webpage, 'metadata', video_id)
|
player_config = self._search_json(r'window\.MDSTRM\.OPTIONS\s*=', webpage, 'metadata', video_id)
|
||||||
|
|
||||||
formats, subtitles = [], {}
|
formats, subtitles = [], {}
|
||||||
for video_format in player_config['src']:
|
for video_format in player_config['src']:
|
||||||
|
@ -122,7 +137,7 @@ class MediaStreamIE(InfoExtractor):
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class WinSportsVideoIE(InfoExtractor):
|
class WinSportsVideoIE(MediaStreamBaseIE):
|
||||||
_VALID_URL = r'https?://www\.winsports\.co/videos/(?P<id>[\w-]+)'
|
_VALID_URL = r'https?://www\.winsports\.co/videos/(?P<id>[\w-]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
@ -158,21 +173,36 @@ class WinSportsVideoIE(InfoExtractor):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
},
|
},
|
||||||
'params': {'skip_download': 'm3u8'},
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.winsports.co/videos/bucaramanga-se-quedo-con-el-grito-de-gol-en-la-garganta',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6402adb62bbf3b18d454e1b0',
|
||||||
|
'display_id': 'bucaramanga-se-quedo-con-el-grito-de-gol-en-la-garganta',
|
||||||
|
'title': '⚽Bucaramanga se quedó con el grito de gol en la garganta',
|
||||||
|
'description': 'Gol anulado Bucaramanga',
|
||||||
|
'thumbnail': r're:^https?://[^?#]+6402adb62bbf3b18d454e1b0',
|
||||||
|
'ext': 'mp4',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
json_ld = self._search_json_ld(webpage, display_id, expected_type='VideoObject', default={})
|
data = self._search_json(
|
||||||
media_setting_json = self._search_json(
|
r'<script\s*[^>]+data-drupal-selector="drupal-settings-json">', webpage, 'data', display_id)
|
||||||
r'<script\s*[^>]+data-drupal-selector="drupal-settings-json">', webpage, 'drupal-setting-json', display_id)
|
|
||||||
|
|
||||||
mediastream_id = traverse_obj(
|
mediastream_url = urljoin(f'{self._EMBED_BASE_URL}/', (
|
||||||
media_setting_json, ('settings', 'mediastream_formatter', ..., 'mediastream_id', {str_or_none}),
|
traverse_obj(data, (
|
||||||
get_all=False) or json_ld.get('url')
|
(('settings', 'mediastream_formatter', ..., 'mediastream_id'), 'url'), {str}), get_all=False)
|
||||||
if not mediastream_id:
|
or next(self._extract_mediastream_urls(webpage), None)))
|
||||||
|
|
||||||
|
if not mediastream_url:
|
||||||
self.raise_no_formats('No MediaStream embed found in webpage')
|
self.raise_no_formats('No MediaStream embed found in webpage')
|
||||||
|
|
||||||
|
title = clean_html(remove_end(
|
||||||
|
self._search_json_ld(webpage, display_id, expected_type='VideoObject', default={}).get('title')
|
||||||
|
or self._og_search_title(webpage), '| Win Sports'))
|
||||||
|
|
||||||
return self.url_result(
|
return self.url_result(
|
||||||
urljoin('https://mdstrm.com/embed/', mediastream_id), MediaStreamIE, display_id, url_transparent=True,
|
mediastream_url, MediaStreamIE, display_id, url_transparent=True, display_id=display_id, video_title=title)
|
||||||
display_id=display_id, video_title=strip_or_none(remove_end(json_ld.get('title'), '| Win Sports')))
|
|
||||||
|
|
|
@ -12,9 +12,13 @@ from ..utils import (
|
||||||
RegexNotFoundError,
|
RegexNotFoundError,
|
||||||
UserNotLive,
|
UserNotLive,
|
||||||
clean_html,
|
clean_html,
|
||||||
|
determine_ext,
|
||||||
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
mimetype2ext,
|
||||||
parse_age_limit,
|
parse_age_limit,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
|
remove_end,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
try_get,
|
try_get,
|
||||||
|
@ -22,7 +26,6 @@ from ..utils import (
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
url_basename,
|
url_basename,
|
||||||
xpath_attr,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -660,6 +663,7 @@ class NBCStationsIE(InfoExtractor):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Large Structure Fire in Downtown LA Prompts Smoke Odor Advisory',
|
'title': 'Large Structure Fire in Downtown LA Prompts Smoke Odor Advisory',
|
||||||
'description': 'md5:417ed3c2d91fe9d301e6db7b0942f182',
|
'description': 'md5:417ed3c2d91fe9d301e6db7b0942f182',
|
||||||
|
'duration': 112.513,
|
||||||
'timestamp': 1661135892,
|
'timestamp': 1661135892,
|
||||||
'upload_date': '20220822',
|
'upload_date': '20220822',
|
||||||
'uploader': 'NBC 4',
|
'uploader': 'NBC 4',
|
||||||
|
@ -676,6 +680,7 @@ class NBCStationsIE(InfoExtractor):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Huracán complica que televidente de Tucson reciba reembolso',
|
'title': 'Huracán complica que televidente de Tucson reciba reembolso',
|
||||||
'description': 'md5:af298dc73aab74d4fca6abfb12acb6cf',
|
'description': 'md5:af298dc73aab74d4fca6abfb12acb6cf',
|
||||||
|
'duration': 172.406,
|
||||||
'timestamp': 1660886507,
|
'timestamp': 1660886507,
|
||||||
'upload_date': '20220819',
|
'upload_date': '20220819',
|
||||||
'uploader': 'Telemundo Arizona',
|
'uploader': 'Telemundo Arizona',
|
||||||
|
@ -685,6 +690,22 @@ class NBCStationsIE(InfoExtractor):
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': 'm3u8',
|
'skip_download': 'm3u8',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# direct mp4 link
|
||||||
|
'url': 'https://www.nbcboston.com/weather/video-weather/highs-near-freezing-in-boston-on-wednesday/2961135/',
|
||||||
|
'md5': '9bf8c41dc7abbb75b1a44f1491a4cc85',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2961135',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Highs Near Freezing in Boston on Wednesday',
|
||||||
|
'description': 'md5:3ec486609a926c99f00a3512e6c0e85b',
|
||||||
|
'duration': 235.669,
|
||||||
|
'timestamp': 1675268656,
|
||||||
|
'upload_date': '20230201',
|
||||||
|
'uploader': '',
|
||||||
|
'channel_id': 'WBTS',
|
||||||
|
'channel': 'nbcboston',
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_RESOLUTIONS = {
|
_RESOLUTIONS = {
|
||||||
|
@ -711,7 +732,7 @@ class NBCStationsIE(InfoExtractor):
|
||||||
if not video_data:
|
if not video_data:
|
||||||
raise ExtractorError('No video metadata found in webpage', expected=True)
|
raise ExtractorError('No video metadata found in webpage', expected=True)
|
||||||
|
|
||||||
info, formats, subtitles = {}, [], {}
|
info, formats = {}, []
|
||||||
is_live = int_or_none(video_data.get('mpx_is_livestream')) == 1
|
is_live = int_or_none(video_data.get('mpx_is_livestream')) == 1
|
||||||
query = {
|
query = {
|
||||||
'formats': 'MPEG-DASH none,M3U none,MPEG-DASH none,MPEG4,MP3',
|
'formats': 'MPEG-DASH none,M3U none,MPEG-DASH none,MPEG4,MP3',
|
||||||
|
@ -747,13 +768,14 @@ class NBCStationsIE(InfoExtractor):
|
||||||
|
|
||||||
video_url = traverse_obj(video_data, ((None, ('video', 'meta')), 'mp4_url'), get_all=False)
|
video_url = traverse_obj(video_data, ((None, ('video', 'meta')), 'mp4_url'), get_all=False)
|
||||||
if video_url:
|
if video_url:
|
||||||
|
ext = determine_ext(video_url)
|
||||||
height = self._search_regex(r'\d+-(\d+)p', url_basename(video_url), 'height', default=None)
|
height = self._search_regex(r'\d+-(\d+)p', url_basename(video_url), 'height', default=None)
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'ext': 'mp4',
|
'ext': ext,
|
||||||
'width': int_or_none(self._RESOLUTIONS.get(height)),
|
'width': int_or_none(self._RESOLUTIONS.get(height)),
|
||||||
'height': int_or_none(height),
|
'height': int_or_none(height),
|
||||||
'format_id': 'http-mp4',
|
'format_id': f'http-{ext}',
|
||||||
})
|
})
|
||||||
|
|
||||||
info.update({
|
info.update({
|
||||||
|
@ -770,14 +792,25 @@ class NBCStationsIE(InfoExtractor):
|
||||||
smil = self._download_xml(
|
smil = self._download_xml(
|
||||||
f'https://link.theplatform.com/s/{pdk_acct}/{player_id}', video_id,
|
f'https://link.theplatform.com/s/{pdk_acct}/{player_id}', video_id,
|
||||||
note='Downloading SMIL data', query=query, fatal=is_live)
|
note='Downloading SMIL data', query=query, fatal=is_live)
|
||||||
if smil:
|
subtitles = self._parse_smil_subtitles(smil, default_ns) if smil else {}
|
||||||
manifest_url = xpath_attr(smil, f'.//{{{default_ns}}}video', 'src', fatal=is_live)
|
for video in smil.findall(self._xpath_ns('.//video', default_ns)) if smil else []:
|
||||||
subtitles = self._parse_smil_subtitles(smil, default_ns)
|
info['duration'] = float_or_none(remove_end(video.get('dur'), 'ms'), 1000)
|
||||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
video_src_url = video.get('src')
|
||||||
manifest_url, video_id, 'mp4', m3u8_id='hls', fatal=is_live,
|
ext = mimetype2ext(video.get('type'), default=determine_ext(video_src_url))
|
||||||
live=is_live, errnote='No HLS formats found')
|
if ext == 'm3u8':
|
||||||
formats.extend(fmts)
|
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||||
self._merge_subtitles(subs, target=subtitles)
|
video_src_url, video_id, 'mp4', m3u8_id='hls', fatal=is_live,
|
||||||
|
live=is_live, errnote='No HLS formats found')
|
||||||
|
formats.extend(fmts)
|
||||||
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
|
elif video_src_url:
|
||||||
|
formats.append({
|
||||||
|
'url': video_src_url,
|
||||||
|
'format_id': f'https-{ext}',
|
||||||
|
'ext': ext,
|
||||||
|
'width': int_or_none(video.get('width')),
|
||||||
|
'height': int_or_none(video.get('height')),
|
||||||
|
})
|
||||||
|
|
||||||
if not formats:
|
if not formats:
|
||||||
self.raise_no_formats('No video content found in webpage', expected=True)
|
self.raise_no_formats('No video content found in webpage', expected=True)
|
||||||
|
|
|
@ -5,7 +5,7 @@ import urllib.error
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import ExtractorError, parse_iso8601
|
from ..utils import ExtractorError, parse_iso8601
|
||||||
|
|
||||||
_BASE_URL_RE = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app|nebula\.tv)'
|
_BASE_URL_RE = r'https?://(?:www\.|beta\.)?(?:watchnebula\.com|nebula\.app|nebula\.tv)'
|
||||||
|
|
||||||
|
|
||||||
class NebulaBaseIE(InfoExtractor):
|
class NebulaBaseIE(InfoExtractor):
|
||||||
|
@ -183,6 +183,10 @@ class NebulaIE(NebulaBaseIE):
|
||||||
'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',
|
'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://beta.nebula.tv/videos/money-episode-1-the-draw',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _fetch_video_metadata(self, slug):
|
def _fetch_video_metadata(self, slug):
|
||||||
|
|
|
@ -6,7 +6,8 @@ from ..utils import (
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
urljoin
|
urljoin,
|
||||||
|
url_or_none
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -334,3 +335,140 @@ class NhkForSchoolProgramListIE(InfoExtractor):
|
||||||
for x in traverse_obj(bangumi_list, ('part', ..., 'part-video-dasid')) or []]
|
for x in traverse_obj(bangumi_list, ('part', ..., 'part-video-dasid')) or []]
|
||||||
|
|
||||||
return self.playlist_result(bangumis, program_id, title, description)
|
return self.playlist_result(bangumis, program_id, title, description)
|
||||||
|
|
||||||
|
|
||||||
|
class NhkRadiruIE(InfoExtractor):
|
||||||
|
_GEO_COUNTRIES = ['JP']
|
||||||
|
IE_DESC = 'NHK らじる (Radiru/Rajiru)'
|
||||||
|
_VALID_URL = r'https?://www\.nhk\.or\.jp/radio/(?:player/ondemand|ondemand/detail)\.html\?p=(?P<site>[\da-zA-Z]+)_(?P<corner>[\da-zA-Z]+)(?:_(?P<headline>[\da-zA-Z]+))?'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=0449_01_3853544',
|
||||||
|
'skip': 'Episode expired on 2023-04-16',
|
||||||
|
'info_dict': {
|
||||||
|
'channel': 'NHK-FM',
|
||||||
|
'description': 'md5:94b08bdeadde81a97df4ec882acce3e9',
|
||||||
|
'ext': 'm4a',
|
||||||
|
'id': '0449_01_3853544',
|
||||||
|
'series': 'ジャズ・トゥナイト',
|
||||||
|
'thumbnail': 'https://www.nhk.or.jp/prog/img/449/g449.jpg',
|
||||||
|
'timestamp': 1680969600,
|
||||||
|
'title': 'ジャズ・トゥナイト NEWジャズ特集',
|
||||||
|
'upload_date': '20230408',
|
||||||
|
'release_timestamp': 1680962400,
|
||||||
|
'release_date': '20230408',
|
||||||
|
'was_live': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# playlist, airs every weekday so it should _hopefully_ be okay forever
|
||||||
|
'url': 'https://www.nhk.or.jp/radio/ondemand/detail.html?p=0458_01',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0458_01',
|
||||||
|
'title': 'ベストオブクラシック',
|
||||||
|
'description': '世界中の上質な演奏会をじっくり堪能する本格派クラシック番組。',
|
||||||
|
'channel': 'NHK-FM',
|
||||||
|
'thumbnail': 'https://www.nhk.or.jp/prog/img/458/g458.jpg',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 3,
|
||||||
|
}, {
|
||||||
|
# one with letters in the id
|
||||||
|
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F300_06_3738470',
|
||||||
|
'note': 'Expires on 2024-03-31',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'F300_06_3738470',
|
||||||
|
'ext': 'm4a',
|
||||||
|
'title': '有島武郎「一房のぶどう」',
|
||||||
|
'description': '朗読:川野一宇(ラジオ深夜便アンカー)\r\n\r\n(2016年12月8日放送「ラジオ深夜便『アンカー朗読シリーズ』」より)',
|
||||||
|
'channel': 'NHKラジオ第1、NHK-FM',
|
||||||
|
'timestamp': 1635757200,
|
||||||
|
'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F300/img/corner/box_109_thumbnail.jpg',
|
||||||
|
'release_date': '20161207',
|
||||||
|
'series': 'らじる文庫 by ラジオ深夜便 ',
|
||||||
|
'release_timestamp': 1481126700,
|
||||||
|
'upload_date': '20211101',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# news
|
||||||
|
'url': 'https://www.nhk.or.jp/radio/player/ondemand.html?p=F261_01_3855109',
|
||||||
|
'skip': 'Expires on 2023-04-17',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'F261_01_3855109',
|
||||||
|
'ext': 'm4a',
|
||||||
|
'channel': 'NHKラジオ第1',
|
||||||
|
'timestamp': 1681635900,
|
||||||
|
'release_date': '20230416',
|
||||||
|
'series': 'NHKラジオニュース',
|
||||||
|
'title': '午後6時のNHKニュース',
|
||||||
|
'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F261/img/RADIONEWS_640.jpg',
|
||||||
|
'upload_date': '20230416',
|
||||||
|
'release_timestamp': 1681635600,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _extract_episode_info(self, headline, programme_id, series_meta):
|
||||||
|
episode_id = f'{programme_id}_{headline["headline_id"]}'
|
||||||
|
episode = traverse_obj(headline, ('file_list', 0, {dict}))
|
||||||
|
|
||||||
|
return {
|
||||||
|
**series_meta,
|
||||||
|
'id': episode_id,
|
||||||
|
'formats': self._extract_m3u8_formats(episode.get('file_name'), episode_id, fatal=False),
|
||||||
|
'container': 'm4a_dash', # force fixup, AAC-only HLS
|
||||||
|
'was_live': True,
|
||||||
|
'series': series_meta.get('title'),
|
||||||
|
'thumbnail': url_or_none(headline.get('headline_image')) or series_meta.get('thumbnail'),
|
||||||
|
**traverse_obj(episode, {
|
||||||
|
'title': 'file_title',
|
||||||
|
'description': 'file_title_sub',
|
||||||
|
'timestamp': ('open_time', {unified_timestamp}),
|
||||||
|
'release_timestamp': ('aa_vinfo4', {lambda x: x.split('_')[0]}, {unified_timestamp}),
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
site_id, corner_id, headline_id = self._match_valid_url(url).group('site', 'corner', 'headline')
|
||||||
|
programme_id = f'{site_id}_{corner_id}'
|
||||||
|
|
||||||
|
if site_id == 'F261':
|
||||||
|
json_url = 'https://www.nhk.or.jp/s-media/news/news-site/list/v1/all.json'
|
||||||
|
else:
|
||||||
|
json_url = f'https://www.nhk.or.jp/radioondemand/json/{site_id}/bangumi_{programme_id}.json'
|
||||||
|
|
||||||
|
meta = self._download_json(json_url, programme_id)['main']
|
||||||
|
|
||||||
|
series_meta = traverse_obj(meta, {
|
||||||
|
'title': 'program_name',
|
||||||
|
'channel': 'media_name',
|
||||||
|
'thumbnail': (('thumbnail_c', 'thumbnail_p'), {url_or_none}),
|
||||||
|
}, get_all=False)
|
||||||
|
|
||||||
|
if headline_id:
|
||||||
|
return self._extract_episode_info(
|
||||||
|
traverse_obj(meta, (
|
||||||
|
'detail_list', lambda _, v: v['headline_id'] == headline_id), get_all=False),
|
||||||
|
programme_id, series_meta)
|
||||||
|
|
||||||
|
def entries():
|
||||||
|
for headline in traverse_obj(meta, ('detail_list', ..., {dict})):
|
||||||
|
yield self._extract_episode_info(headline, programme_id, series_meta)
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
entries(), programme_id, playlist_description=meta.get('site_detail'), **series_meta)
|
||||||
|
|
||||||
|
|
||||||
|
class NhkRadioNewsPageIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://www\.nhk\.or\.jp/radionews/?(?:$|[?#])'
|
||||||
|
_TESTS = [{
|
||||||
|
# airs daily, on-the-hour most hours
|
||||||
|
'url': 'https://www.nhk.or.jp/radionews/',
|
||||||
|
'playlist_mincount': 5,
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'F261_01',
|
||||||
|
'thumbnail': 'https://www.nhk.or.jp/radioondemand/json/F261/img/RADIONEWS_640.jpg',
|
||||||
|
'description': 'md5:bf2c5b397e44bc7eb26de98d8f15d79d',
|
||||||
|
'channel': 'NHKラジオ第1',
|
||||||
|
'title': 'NHKラジオニュース',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
return self.url_result('https://www.nhk.or.jp/radio/ondemand/detail.html?p=F261_01', NhkRadiruIE)
|
||||||
|
|
|
@ -477,23 +477,32 @@ class NiconicoIE(InfoExtractor):
|
||||||
user_id_str = session_api_data.get('serviceUserId')
|
user_id_str = session_api_data.get('serviceUserId')
|
||||||
|
|
||||||
thread_ids = traverse_obj(api_data, ('comment', 'threads', lambda _, v: v['isActive']))
|
thread_ids = traverse_obj(api_data, ('comment', 'threads', lambda _, v: v['isActive']))
|
||||||
raw_danmaku = self._extract_all_comments(video_id, thread_ids, user_id_str, comment_user_key)
|
legacy_danmaku = self._extract_legacy_comments(video_id, thread_ids, user_id_str, comment_user_key) or []
|
||||||
if not raw_danmaku:
|
|
||||||
|
new_comments = traverse_obj(api_data, ('comment', 'nvComment'))
|
||||||
|
new_danmaku = self._extract_new_comments(
|
||||||
|
new_comments.get('server'), video_id,
|
||||||
|
new_comments.get('params'), new_comments.get('threadKey'))
|
||||||
|
|
||||||
|
if not legacy_danmaku and not new_danmaku:
|
||||||
self.report_warning(f'Failed to get comments. {bug_reports_message()}')
|
self.report_warning(f'Failed to get comments. {bug_reports_message()}')
|
||||||
return
|
return
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'comments': [{
|
'comments': [{
|
||||||
'ext': 'json',
|
'ext': 'json',
|
||||||
'data': json.dumps(raw_danmaku),
|
'data': json.dumps(legacy_danmaku + new_danmaku),
|
||||||
}],
|
}],
|
||||||
}
|
}
|
||||||
|
|
||||||
def _extract_all_comments(self, video_id, threads, user_id, user_key):
|
def _extract_legacy_comments(self, video_id, threads, user_id, user_key):
|
||||||
auth_data = {
|
auth_data = {
|
||||||
'user_id': user_id,
|
'user_id': user_id,
|
||||||
'userkey': user_key,
|
'userkey': user_key,
|
||||||
} if user_id and user_key else {'user_id': ''}
|
} if user_id and user_key else {'user_id': ''}
|
||||||
|
|
||||||
|
api_url = traverse_obj(threads, (..., 'server'), get_all=False)
|
||||||
|
|
||||||
# Request Start
|
# Request Start
|
||||||
post_data = [{'ping': {'content': 'rs:0'}}]
|
post_data = [{'ping': {'content': 'rs:0'}}]
|
||||||
for i, thread in enumerate(threads):
|
for i, thread in enumerate(threads):
|
||||||
|
@ -532,17 +541,32 @@ class NiconicoIE(InfoExtractor):
|
||||||
# Request Final
|
# Request Final
|
||||||
post_data.append({'ping': {'content': 'rf:0'}})
|
post_data.append({'ping': {'content': 'rf:0'}})
|
||||||
|
|
||||||
for api_url in self._COMMENT_API_ENDPOINTS:
|
return self._download_json(
|
||||||
comments = self._download_json(
|
f'{api_url}/api.json', video_id, data=json.dumps(post_data).encode(), fatal=False,
|
||||||
api_url, video_id, data=json.dumps(post_data).encode(), fatal=False,
|
headers={
|
||||||
headers={
|
'Referer': f'https://www.nicovideo.jp/watch/{video_id}',
|
||||||
'Referer': 'https://www.nicovideo.jp/watch/%s' % video_id,
|
'Origin': 'https://www.nicovideo.jp',
|
||||||
'Origin': 'https://www.nicovideo.jp',
|
'Content-Type': 'text/plain;charset=UTF-8',
|
||||||
'Content-Type': 'text/plain;charset=UTF-8',
|
},
|
||||||
},
|
note='Downloading comments', errnote=f'Failed to access endpoint {api_url}')
|
||||||
note='Downloading comments', errnote=f'Failed to access endpoint {api_url}')
|
|
||||||
if comments:
|
def _extract_new_comments(self, endpoint, video_id, params, thread_key):
|
||||||
return comments
|
comments = self._download_json(
|
||||||
|
f'{endpoint}/v1/threads', video_id, data=json.dumps({
|
||||||
|
'additionals': {},
|
||||||
|
'params': params,
|
||||||
|
'threadKey': thread_key,
|
||||||
|
}).encode(), fatal=False,
|
||||||
|
headers={
|
||||||
|
'Referer': 'https://www.nicovideo.jp/',
|
||||||
|
'Origin': 'https://www.nicovideo.jp',
|
||||||
|
'Content-Type': 'text/plain;charset=UTF-8',
|
||||||
|
'x-client-os-type': 'others',
|
||||||
|
'x-frontend-id': '6',
|
||||||
|
'x-frontend-version': '0',
|
||||||
|
},
|
||||||
|
note='Downloading comments (new)', errnote='Failed to download comments (new)')
|
||||||
|
return traverse_obj(comments, ('data', 'threads', ..., 'comments', ...))
|
||||||
|
|
||||||
|
|
||||||
class NiconicoPlaylistBaseIE(InfoExtractor):
|
class NiconicoPlaylistBaseIE(InfoExtractor):
|
||||||
|
@ -636,10 +660,10 @@ class NiconicoPlaylistIE(NiconicoPlaylistBaseIE):
|
||||||
|
|
||||||
class NiconicoSeriesIE(InfoExtractor):
|
class NiconicoSeriesIE(InfoExtractor):
|
||||||
IE_NAME = 'niconico:series'
|
IE_NAME = 'niconico:series'
|
||||||
_VALID_URL = r'https?://(?:(?:www\.|sp\.)?nicovideo\.jp|nico\.ms)/series/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:(?:www\.|sp\.)?nicovideo\.jp(?:/user/\d+)?|nico\.ms)/series/(?P<id>\d+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.nicovideo.jp/series/110226',
|
'url': 'https://www.nicovideo.jp/user/44113208/series/110226',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '110226',
|
'id': '110226',
|
||||||
'title': 'ご立派ァ!のシリーズ',
|
'title': 'ご立派ァ!のシリーズ',
|
||||||
|
@ -659,7 +683,7 @@ class NiconicoSeriesIE(InfoExtractor):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
list_id = self._match_id(url)
|
list_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(f'https://www.nicovideo.jp/series/{list_id}', list_id)
|
webpage = self._download_webpage(url, list_id)
|
||||||
|
|
||||||
title = self._search_regex(
|
title = self._search_regex(
|
||||||
(r'<title>「(.+)(全',
|
(r'<title>「(.+)(全',
|
||||||
|
@ -667,10 +691,9 @@ class NiconicoSeriesIE(InfoExtractor):
|
||||||
webpage, 'title', fatal=False)
|
webpage, 'title', fatal=False)
|
||||||
if title:
|
if title:
|
||||||
title = unescapeHTML(title)
|
title = unescapeHTML(title)
|
||||||
playlist = [
|
json_data = next(self._yield_json_ld(webpage, None, fatal=False))
|
||||||
self.url_result(f'https://www.nicovideo.jp/watch/{v_id}', video_id=v_id)
|
return self.playlist_from_matches(
|
||||||
for v_id in re.findall(r'data-href=[\'"](?:https://www\.nicovideo\.jp)?/watch/([a-z0-9]+)', webpage)]
|
traverse_obj(json_data, ('itemListElement', ..., 'url')), list_id, title, ie=NiconicoIE)
|
||||||
return self.playlist_result(playlist, list_id, title)
|
|
||||||
|
|
||||||
|
|
||||||
class NiconicoHistoryIE(NiconicoPlaylistBaseIE):
|
class NiconicoHistoryIE(NiconicoPlaylistBaseIE):
|
||||||
|
|
|
@ -1,13 +1,14 @@
|
||||||
|
import functools
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .youtube import YoutubeIE
|
from .youtube import YoutubeIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
format_field,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
urlencode_postdata,
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -24,7 +25,7 @@ class ParlerIE(InfoExtractor):
|
||||||
'thumbnail': 'https://bl-images.parler.com/videos/6ce7cdf3-a27a-4d72-bf9c-d3e17ce39a66/thumbnail.jpeg',
|
'thumbnail': 'https://bl-images.parler.com/videos/6ce7cdf3-a27a-4d72-bf9c-d3e17ce39a66/thumbnail.jpeg',
|
||||||
'title': 'Parler video #df79fdba-07cc-48fe-b085-3293897520d7',
|
'title': 'Parler video #df79fdba-07cc-48fe-b085-3293897520d7',
|
||||||
'description': 'md5:6f220bde2df4a97cbb89ac11f1fd8197',
|
'description': 'md5:6f220bde2df4a97cbb89ac11f1fd8197',
|
||||||
'timestamp': 1659744000,
|
'timestamp': 1659785481,
|
||||||
'upload_date': '20220806',
|
'upload_date': '20220806',
|
||||||
'uploader': 'Tulsi Gabbard',
|
'uploader': 'Tulsi Gabbard',
|
||||||
'uploader_id': 'TulsiGabbard',
|
'uploader_id': 'TulsiGabbard',
|
||||||
|
@ -34,78 +35,57 @@ class ParlerIE(InfoExtractor):
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
|
||||||
'url': 'https://parler.com/feed/a7406eb4-91e5-4793-b5e3-ade57a24e287',
|
|
||||||
'md5': '11687e2f5bb353682cee338d181422ed',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'a7406eb4-91e5-4793-b5e3-ade57a24e287',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'thumbnail': 'https://bl-images.parler.com/videos/317827a8-1e48-4cbc-981f-7dd17d4c1183/thumbnail.jpeg',
|
|
||||||
'title': 'Parler video #a7406eb4-91e5-4793-b5e3-ade57a24e287',
|
|
||||||
'description': 'This man should run for office',
|
|
||||||
'timestamp': 1659657600,
|
|
||||||
'upload_date': '20220805',
|
|
||||||
'uploader': 'Benny Johnson',
|
|
||||||
'uploader_id': 'BennyJohnson',
|
|
||||||
'uploader_url': 'https://parler.com/BennyJohnson',
|
|
||||||
'view_count': int,
|
|
||||||
'comment_count': int,
|
|
||||||
'repost_count': int,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
'url': 'https://parler.com/feed/f23b85c1-6558-470f-b9ff-02c145f28da5',
|
'url': 'https://parler.com/feed/f23b85c1-6558-470f-b9ff-02c145f28da5',
|
||||||
'md5': 'eaba1ff4a10fe281f5ce74e930ab2cb4',
|
'md5': 'eaba1ff4a10fe281f5ce74e930ab2cb4',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'r5vkSaz8PxQ',
|
'id': 'r5vkSaz8PxQ',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'thumbnail': 'https://i.ytimg.com/vi_webp/r5vkSaz8PxQ/maxresdefault.webp',
|
|
||||||
'title': 'Tom MacDonald Names Reaction',
|
|
||||||
'description': 'md5:33c21f0d35ae6dc2edf3007d6696baea',
|
|
||||||
'upload_date': '20220716',
|
|
||||||
'duration': 1267,
|
|
||||||
'uploader': 'Mahesh Chookolingo',
|
|
||||||
'uploader_id': 'maheshchookolingo',
|
|
||||||
'uploader_url': 'http://www.youtube.com/user/maheshchookolingo',
|
|
||||||
'channel': 'Mahesh Chookolingo',
|
|
||||||
'channel_id': 'UCox6YeMSY1PQInbCtTaZj_w',
|
|
||||||
'channel_url': 'https://www.youtube.com/channel/UCox6YeMSY1PQInbCtTaZj_w',
|
|
||||||
'categories': ['Entertainment'],
|
|
||||||
'tags': list,
|
|
||||||
'availability': 'public',
|
|
||||||
'live_status': 'not_live',
|
'live_status': 'not_live',
|
||||||
'view_count': int,
|
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
|
'duration': 1267,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'channel_follower_count': int,
|
'channel_follower_count': int,
|
||||||
'age_limit': 0,
|
'channel_id': 'UCox6YeMSY1PQInbCtTaZj_w',
|
||||||
|
'upload_date': '20220716',
|
||||||
|
'thumbnail': 'https://i.ytimg.com/vi/r5vkSaz8PxQ/maxresdefault.jpg',
|
||||||
|
'tags': 'count:17',
|
||||||
|
'availability': 'public',
|
||||||
|
'categories': ['Entertainment'],
|
||||||
'playable_in_embed': True,
|
'playable_in_embed': True,
|
||||||
|
'channel': 'Who Knows What! With Mahesh & Friends',
|
||||||
|
'title': 'Tom MacDonald Names Reaction',
|
||||||
|
'uploader': 'Who Knows What! With Mahesh & Friends',
|
||||||
|
'uploader_id': '@maheshchookolingo',
|
||||||
|
'age_limit': 0,
|
||||||
|
'description': 'md5:33c21f0d35ae6dc2edf3007d6696baea',
|
||||||
|
'channel_url': 'https://www.youtube.com/channel/UCox6YeMSY1PQInbCtTaZj_w',
|
||||||
|
'view_count': int,
|
||||||
|
'uploader_url': 'http://www.youtube.com/@maheshchookolingo',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
data = self._download_json(
|
data = self._download_json(f'https://api.parler.com/v0/public/parleys/{video_id}',
|
||||||
'https://parler.com/open-api/ParleyDetailEndpoint.php', video_id,
|
video_id)['data']
|
||||||
data=urlencode_postdata({'uuid': video_id}))['data'][0]
|
if data.get('link'):
|
||||||
primary = data['primary']
|
return self.url_result(data['link'], YoutubeIE)
|
||||||
|
|
||||||
embed = self._parse_json(primary.get('V2LINKLONG') or '', video_id, fatal=False)
|
|
||||||
if embed:
|
|
||||||
return self.url_result(embed[0], YoutubeIE)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': traverse_obj(primary, ('video_data', 'videoSrc')),
|
'title': strip_or_none(data.get('title')) or '',
|
||||||
'thumbnail': traverse_obj(primary, ('video_data', 'thumbnailUrl')),
|
**traverse_obj(data, {
|
||||||
'title': '',
|
'url': ('video', 'videoSrc'),
|
||||||
'description': strip_or_none(clean_html(primary.get('full_body'))) or None,
|
'thumbnail': ('video', 'thumbnailUrl'),
|
||||||
'timestamp': unified_timestamp(primary.get('date_created')),
|
'description': ('body', {clean_html}),
|
||||||
'uploader': strip_or_none(primary.get('name')),
|
'timestamp': ('date_created', {unified_timestamp}),
|
||||||
'uploader_id': strip_or_none(primary.get('username')),
|
'uploader': ('user', 'name', {strip_or_none}),
|
||||||
'uploader_url': format_field(strip_or_none(primary.get('username')), None, 'https://parler.com/%s'),
|
'uploader_id': ('user', 'username', {str}),
|
||||||
'view_count': int_or_none(primary.get('view_count')),
|
'uploader_url': ('user', 'username', {functools.partial(urljoin, 'https://parler.com/')}),
|
||||||
'comment_count': int_or_none(traverse_obj(data, ('engagement', 'commentCount'))),
|
'view_count': ('views', {int_or_none}),
|
||||||
'repost_count': int_or_none(traverse_obj(data, ('engagement', 'echoCount'))),
|
'comment_count': ('total_comments', {int_or_none}),
|
||||||
|
'repost_count': ('echos', {int_or_none}),
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
47
yt_dlp/extractor/pgatour.py
Normal file
47
yt_dlp/extractor/pgatour.py
Normal file
|
@ -0,0 +1,47 @@
|
||||||
|
from .brightcove import BrightcoveNewIE
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class PGATourIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?pgatour\.com/video/[\w-]+/(?P<tc>T)?(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.pgatour.com/video/competition/T6322447785112/adam-hadwin-2023-the-players-round-4-18th-hole-shot-1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6322447785112',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Adam Hadwin | 2023 THE PLAYERS | Round 4 | 18th hole | Shot 1',
|
||||||
|
'uploader_id': '6116716431001',
|
||||||
|
'upload_date': '20230312',
|
||||||
|
'timestamp': 1678653136,
|
||||||
|
'duration': 20.011,
|
||||||
|
'thumbnail': r're:^https://.+\.jpg',
|
||||||
|
'tags': 'count:7',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.pgatour.com/video/features/6322506425112/follow-the-players-trophy-on-championship-sunday',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6322506425112',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Follow THE PLAYERS trophy on Championship Sunday',
|
||||||
|
'description': 'md5:4d29e4bdfa03694a0ebfd08950398568',
|
||||||
|
'uploader_id': '6082840763001',
|
||||||
|
'upload_date': '20230313',
|
||||||
|
'timestamp': 1678739835,
|
||||||
|
'duration': 123.435,
|
||||||
|
'thumbnail': r're:^https://.+\.jpg',
|
||||||
|
'tags': 'count:8',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id, is_tourcast = self._match_valid_url(url).group('id', 'tc')
|
||||||
|
|
||||||
|
# From https://www.pgatour.com/_next/static/chunks/pages/_app-8bcf849560daf38d.js
|
||||||
|
account_id = '6116716431001' if is_tourcast else '6082840763001'
|
||||||
|
player_id = 'Vsd5Umu8r' if is_tourcast else 'FWIBYMBPj'
|
||||||
|
|
||||||
|
return self.url_result(
|
||||||
|
f'https://players.brightcove.net/{account_id}/{player_id}_default/index.html?videoId={video_id}',
|
||||||
|
BrightcoveNewIE)
|
|
@ -1,42 +1,60 @@
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import int_or_none, urljoin
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
int_or_none,
|
||||||
|
get_element_by_class,
|
||||||
|
urljoin,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class PornezIE(InfoExtractor):
|
class PornezIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?pornez\.net/video(?P<id>[0-9]+)/'
|
_VALID_URL = r'https?://(?:www\.)?pornez\.net/(?:video(?P<id>\w+)|watch)/'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'https://pornez.net/video344819/mistresst-funny_penis_names-wmv/',
|
'url': 'https://pornez.net/video344819/mistresst-funny_penis_names-wmv/',
|
||||||
'md5': '2e19a0a1cff3a5dbea0ef1b9e80bcbbc',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '344819',
|
'id': '344819',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': r'mistresst funny_penis_names wmv',
|
'title': 'mistresst funny_penis_names wmv',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
},
|
||||||
}
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}, {
|
||||||
|
'url': 'https://pornez.net/watch/leana+lovings+stiff+for+stepdaughter/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '156161',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Watch leana lovings stiff for stepdaughter porn video.',
|
||||||
|
'age_limit': 18,
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}, {
|
||||||
|
'url': 'https://pornez.net/videovzs27fj/tutor4k-e14-blue-wave-1080p-nbq-tutor4k-e14-blue-wave/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
iframe_src = self._html_search_regex(
|
if not video_id:
|
||||||
r'<iframe[^>]+src="([^"]+)"', webpage, 'iframe', fatal=True)
|
video_id = self._search_regex(
|
||||||
iframe_src = urljoin('https://pornez.net', iframe_src)
|
r'<link[^>]+\bhref=["\']https?://pornez.net/\?p=(\w+)["\']', webpage, 'id')
|
||||||
title = self._html_search_meta(['name', 'twitter:title', 'og:title'], webpage, 'title', default=None)
|
|
||||||
if title is None:
|
iframe_src = self._html_search_regex(r'<iframe[^>]+src="([^"]+)"', webpage, 'iframe')
|
||||||
title = self._search_regex(r'<h1>(.*?)</h1>', webpage, 'title', fatal=True)
|
iframe = self._download_webpage(urljoin('https://pornez.net', iframe_src), video_id)
|
||||||
thumbnail = self._html_search_meta(['thumbnailUrl'], webpage, 'title', default=None)
|
|
||||||
webpage = self._download_webpage(iframe_src, video_id)
|
entries = self._parse_html5_media_entries(iframe_src, iframe, video_id)[0]
|
||||||
entries = self._parse_html5_media_entries(iframe_src, webpage, video_id)[0]
|
for fmt in entries['formats']:
|
||||||
for format in entries['formats']:
|
height = self._search_regex(r'_(\d+)\.m3u8', fmt['url'], 'height')
|
||||||
height = self._search_regex(r'_(\d+)\.m3u8', format['url'], 'height')
|
fmt['format_id'] = '%sp' % height
|
||||||
format['format_id'] = '%sp' % height
|
fmt['height'] = int_or_none(height)
|
||||||
format['height'] = int_or_none(height)
|
|
||||||
|
|
||||||
entries.update({
|
entries.update({
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': (clean_html(get_element_by_class('video-title', webpage))
|
||||||
'thumbnail': thumbnail,
|
or self._html_search_meta(
|
||||||
'age_limit': 18
|
['twitter:title', 'og:title', 'description'], webpage, 'title', default=None)),
|
||||||
|
'thumbnail': self._html_search_meta(['thumbnailUrl'], webpage, 'thumb', default=None),
|
||||||
|
'age_limit': 18,
|
||||||
})
|
})
|
||||||
return entries
|
return entries
|
||||||
|
|
|
@ -58,6 +58,11 @@ class PornHubBaseIE(InfoExtractor):
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
self._logged_in = False
|
self._logged_in = False
|
||||||
|
|
||||||
|
def _set_age_cookies(self, host):
|
||||||
|
self._set_cookie(host, 'age_verified', '1')
|
||||||
|
self._set_cookie(host, 'accessAgeDisclaimerPH', '1')
|
||||||
|
self._set_cookie(host, 'accessPH', '1')
|
||||||
|
|
||||||
def _login(self, host):
|
def _login(self, host):
|
||||||
if self._logged_in:
|
if self._logged_in:
|
||||||
return
|
return
|
||||||
|
@ -267,8 +272,7 @@ class PornHubIE(PornHubBaseIE):
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
self._login(host)
|
self._login(host)
|
||||||
|
self._set_age_cookies(host)
|
||||||
self._set_cookie(host, 'age_verified', '1')
|
|
||||||
|
|
||||||
def dl_webpage(platform):
|
def dl_webpage(platform):
|
||||||
self._set_cookie(host, 'platform', platform)
|
self._set_cookie(host, 'platform', platform)
|
||||||
|
@ -569,6 +573,7 @@ class PornHubUserIE(PornHubPlaylistBaseIE):
|
||||||
mobj = self._match_valid_url(url)
|
mobj = self._match_valid_url(url)
|
||||||
user_id = mobj.group('id')
|
user_id = mobj.group('id')
|
||||||
videos_url = '%s/videos' % mobj.group('url')
|
videos_url = '%s/videos' % mobj.group('url')
|
||||||
|
self._set_age_cookies(mobj.group('host'))
|
||||||
page = self._extract_page(url)
|
page = self._extract_page(url)
|
||||||
if page:
|
if page:
|
||||||
videos_url = update_url_query(videos_url, {'page': page})
|
videos_url = update_url_query(videos_url, {'page': page})
|
||||||
|
@ -633,6 +638,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
|
||||||
item_id = mobj.group('id')
|
item_id = mobj.group('id')
|
||||||
|
|
||||||
self._login(host)
|
self._login(host)
|
||||||
|
self._set_age_cookies(host)
|
||||||
|
|
||||||
return self.playlist_result(self._entries(url, host, item_id), item_id)
|
return self.playlist_result(self._entries(url, host, item_id), item_id)
|
||||||
|
|
||||||
|
@ -812,5 +818,6 @@ class PornHubPlaylistIE(PornHubPlaylistBaseIE):
|
||||||
item_id = mobj.group('id')
|
item_id = mobj.group('id')
|
||||||
|
|
||||||
self._login(host)
|
self._login(host)
|
||||||
|
self._set_age_cookies(host)
|
||||||
|
|
||||||
return self.playlist_result(self._entries(mobj.group('url'), host, item_id), item_id)
|
return self.playlist_result(self._entries(mobj.group('url'), host, item_id), item_id)
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
import random
|
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
@ -9,12 +8,14 @@ from ..utils import (
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
urlencode_postdata,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class RedditIE(InfoExtractor):
|
class RedditIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?P<subdomain>[^/]+\.)?reddit(?:media)?\.com/(?P<slug>(?:r|user)/[^/]+/comments/(?P<id>[^/?#&]+))'
|
_NETRC_MACHINE = 'reddit'
|
||||||
|
_VALID_URL = r'https?://(?P<host>(?:\w+\.)?reddit(?:media)?\.com)/(?P<slug>(?:(?:r|user)/[^/]+/)?comments/(?P<id>[^/?#&]+))'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
|
'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -109,6 +110,46 @@ class RedditIE(InfoExtractor):
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
'channel_id': 'dumbfuckers_club',
|
'channel_id': 'dumbfuckers_club',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# post link without subreddit
|
||||||
|
'url': 'https://www.reddit.com/comments/124pp33',
|
||||||
|
'md5': '15eec9d828adcef4468b741a7e45a395',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'antsenjc2jqa1',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'display_id': '124pp33',
|
||||||
|
'title': 'Harmless prank of some old friends',
|
||||||
|
'uploader': 'Dudezila',
|
||||||
|
'channel_id': 'ContagiousLaughter',
|
||||||
|
'duration': 17,
|
||||||
|
'upload_date': '20230328',
|
||||||
|
'timestamp': 1680012043,
|
||||||
|
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||||
|
'age_limit': 0,
|
||||||
|
'comment_count': int,
|
||||||
|
'dislike_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# quarantined subreddit post
|
||||||
|
'url': 'https://old.reddit.com/r/GenZedong/comments/12fujy3/based_hasan/',
|
||||||
|
'md5': '3156ea69e3c1f1b6259683c5abd36e71',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '8bwtclfggpsa1',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'display_id': '12fujy3',
|
||||||
|
'title': 'Based Hasan?',
|
||||||
|
'uploader': 'KingNigelXLII',
|
||||||
|
'channel_id': 'GenZedong',
|
||||||
|
'duration': 16,
|
||||||
|
'upload_date': '20230408',
|
||||||
|
'timestamp': 1680979138,
|
||||||
|
'age_limit': 0,
|
||||||
|
'comment_count': int,
|
||||||
|
'dislike_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
},
|
||||||
|
'skip': 'Requires account that has opted-in to the GenZedong subreddit',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.reddit.com/r/videos/comments/6rrwyj',
|
'url': 'https://www.reddit.com/r/videos/comments/6rrwyj',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -137,21 +178,45 @@ class RedditIE(InfoExtractor):
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
def _perform_login(self, username, password):
|
||||||
def _gen_session_id():
|
captcha = self._download_json(
|
||||||
id_length = 16
|
'https://www.reddit.com/api/requires_captcha/login.json', None,
|
||||||
rand_max = 1 << (id_length * 4)
|
'Checking login requirement')['required']
|
||||||
return '%0.*x' % (id_length, random.randrange(rand_max))
|
if captcha:
|
||||||
|
raise ExtractorError('Reddit is requiring captcha before login', expected=True)
|
||||||
|
login = self._download_json(
|
||||||
|
f'https://www.reddit.com/api/login/{username}', None, data=urlencode_postdata({
|
||||||
|
'op': 'login-main',
|
||||||
|
'user': username,
|
||||||
|
'passwd': password,
|
||||||
|
'api_type': 'json',
|
||||||
|
}), note='Logging in', errnote='Login request failed')
|
||||||
|
errors = '; '.join(traverse_obj(login, ('json', 'errors', ..., 1)))
|
||||||
|
if errors:
|
||||||
|
raise ExtractorError(f'Unable to login, Reddit API says {errors}', expected=True)
|
||||||
|
elif not traverse_obj(login, ('json', 'data', 'cookie', {str})):
|
||||||
|
raise ExtractorError('Unable to login, no cookie was returned')
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
subdomain, slug, video_id = self._match_valid_url(url).group('subdomain', 'slug', 'id')
|
host, slug, video_id = self._match_valid_url(url).group('host', 'slug', 'id')
|
||||||
|
|
||||||
self._set_cookie('.reddit.com', 'reddit_session', self._gen_session_id())
|
data = self._download_json(
|
||||||
self._set_cookie('.reddit.com', '_options', '%7B%22pref_quarantine_optin%22%3A%20true%7D')
|
f'https://{host}/{slug}/.json', video_id, fatal=False, expected_status=403)
|
||||||
data = self._download_json(f'https://{subdomain}reddit.com/{slug}/.json', video_id, fatal=False)
|
|
||||||
if not data:
|
if not data:
|
||||||
# Fall back to old.reddit.com in case the requested subdomain fails
|
fallback_host = 'old.reddit.com' if host != 'old.reddit.com' else 'www.reddit.com'
|
||||||
data = self._download_json(f'https://old.reddit.com/{slug}/.json', video_id)
|
self.to_screen(f'{host} request failed, retrying with {fallback_host}')
|
||||||
|
data = self._download_json(
|
||||||
|
f'https://{fallback_host}/{slug}/.json', video_id, expected_status=403)
|
||||||
|
|
||||||
|
if traverse_obj(data, 'error') == 403:
|
||||||
|
reason = data.get('reason')
|
||||||
|
if reason == 'quarantined':
|
||||||
|
self.raise_login_required('Quarantined subreddit; an account that has opted in is required')
|
||||||
|
elif reason == 'private':
|
||||||
|
self.raise_login_required('Private subreddit; an account that has been approved is required')
|
||||||
|
else:
|
||||||
|
raise ExtractorError(f'HTTP Error 403 Forbidden; reason given: {reason}')
|
||||||
|
|
||||||
data = data[0]['data']['children'][0]['data']
|
data = data[0]['data']['children'][0]['data']
|
||||||
video_url = data['url']
|
video_url = data['url']
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,12 @@
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import extract_attributes, int_or_none, remove_start, traverse_obj
|
from ..utils import (
|
||||||
|
extract_attributes,
|
||||||
|
int_or_none,
|
||||||
|
remove_start,
|
||||||
|
str_or_none,
|
||||||
|
traverse_obj,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class RozhlasIE(InfoExtractor):
|
class RozhlasIE(InfoExtractor):
|
||||||
|
@ -50,7 +57,7 @@ class RozhlasVltavaIE(InfoExtractor):
|
||||||
'url': 'https://wave.rozhlas.cz/papej-masicko-porcujeme-a-bilancujeme-filmy-a-serialy-ktere-letos-zabily-8891337',
|
'url': 'https://wave.rozhlas.cz/papej-masicko-porcujeme-a-bilancujeme-filmy-a-serialy-ktere-letos-zabily-8891337',
|
||||||
'md5': 'ba2fdbc1242fc16771c7695d271ec355',
|
'md5': 'ba2fdbc1242fc16771c7695d271ec355',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 8891337,
|
'id': '8891337',
|
||||||
'title': 'md5:21f99739d04ab49d8c189ec711eef4ec',
|
'title': 'md5:21f99739d04ab49d8c189ec711eef4ec',
|
||||||
},
|
},
|
||||||
'playlist_count': 1,
|
'playlist_count': 1,
|
||||||
|
@ -69,7 +76,7 @@ class RozhlasVltavaIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://wave.rozhlas.cz/poslechnete-si-neklid-podcastovy-thriller-o-vine-strachu-a-vztahu-ktery-zasel-8554744',
|
'url': 'https://wave.rozhlas.cz/poslechnete-si-neklid-podcastovy-thriller-o-vine-strachu-a-vztahu-ktery-zasel-8554744',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 8554744,
|
'id': '8554744',
|
||||||
'title': 'Poslechněte si Neklid. Podcastový thriller o vině, strachu a vztahu, který zašel příliš daleko',
|
'title': 'Poslechněte si Neklid. Podcastový thriller o vině, strachu a vztahu, který zašel příliš daleko',
|
||||||
},
|
},
|
||||||
'playlist_count': 5,
|
'playlist_count': 5,
|
||||||
|
@ -139,27 +146,62 @@ class RozhlasVltavaIE(InfoExtractor):
|
||||||
'chapter_number': 5,
|
'chapter_number': 5,
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
}, {
|
||||||
|
'url': 'https://dvojka.rozhlas.cz/karel-siktanc-cerny-jezdec-bily-kun-napinava-pohadka-o-tajemnem-prizraku-8946969',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '8946969',
|
||||||
|
'title': 'Karel Šiktanc: Černý jezdec, bílý kůň. Napínavá pohádka o tajemném přízraku',
|
||||||
|
},
|
||||||
|
'playlist_count': 1,
|
||||||
|
'playlist': [{
|
||||||
|
'info_dict': {
|
||||||
|
'id': '10631121',
|
||||||
|
'ext': 'm4a',
|
||||||
|
'title': 'Karel Šiktanc: Černý jezdec, bílý kůň. Napínavá pohádka o tajemném přízraku',
|
||||||
|
'description': 'Karel Šiktanc: Černý jezdec, bílý kůň',
|
||||||
|
'duration': 2656,
|
||||||
|
'artist': 'Tvůrčí skupina Drama a literatura',
|
||||||
|
'channel_id': 'dvojka',
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
'params': {'skip_download': 'dash'},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_video(self, entry):
|
def _extract_video(self, entry):
|
||||||
chapter_number = int_or_none(traverse_obj(entry, ('meta', 'ga', 'contentSerialPart')))
|
formats = []
|
||||||
|
audio_id = entry['meta']['ga']['contentId']
|
||||||
|
for audio in traverse_obj(entry, ('audioLinks', lambda _, v: url_or_none(v['url']))):
|
||||||
|
ext = audio.get('variant')
|
||||||
|
if ext == 'dash':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
audio['url'], audio_id, mpd_id=ext, fatal=False))
|
||||||
|
elif ext == 'hls':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
audio['url'], audio_id, 'm4a', m3u8_id=ext, fatal=False))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': audio['url'],
|
||||||
|
'ext': ext,
|
||||||
|
'format_id': ext,
|
||||||
|
'abr': int_or_none(audio.get('bitrate')),
|
||||||
|
'acodec': ext,
|
||||||
|
'vcodec': 'none',
|
||||||
|
})
|
||||||
|
|
||||||
|
chapter_number = traverse_obj(entry, ('meta', 'ga', 'contentSerialPart', {int_or_none}))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': entry['meta']['ga']['contentId'],
|
'id': audio_id,
|
||||||
'title': traverse_obj(entry, ('meta', 'ga', 'contentName')),
|
|
||||||
'description': entry.get('title'),
|
|
||||||
'duration': entry.get('duration'),
|
|
||||||
'artist': traverse_obj(entry, ('meta', 'ga', 'contentAuthor')),
|
|
||||||
'channel_id': traverse_obj(entry, ('meta', 'ga', 'contentCreator')),
|
|
||||||
'chapter': traverse_obj(entry, ('meta', 'ga', 'contentNameShort')) if chapter_number else None,
|
'chapter': traverse_obj(entry, ('meta', 'ga', 'contentNameShort')) if chapter_number else None,
|
||||||
'chapter_number': chapter_number,
|
'chapter_number': chapter_number,
|
||||||
'formats': [{
|
'formats': formats,
|
||||||
'url': audio_link['url'],
|
**traverse_obj(entry, {
|
||||||
'ext': audio_link.get('variant'),
|
'title': ('meta', 'ga', 'contentName'),
|
||||||
'format_id': audio_link.get('variant'),
|
'description': 'title',
|
||||||
'abr': audio_link.get('bitrate'),
|
'duration': ('duration', {int_or_none}),
|
||||||
'acodec': audio_link.get('variant'),
|
'artist': ('meta', 'ga', 'contentAuthor'),
|
||||||
'vcodec': 'none',
|
'channel_id': ('meta', 'ga', 'contentCreator'),
|
||||||
} for audio_link in entry['audioLinks']],
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -173,7 +215,7 @@ class RozhlasVltavaIE(InfoExtractor):
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'playlist',
|
'_type': 'playlist',
|
||||||
'id': data.get('embedId'),
|
'id': str_or_none(data.get('embedId')) or video_id,
|
||||||
'title': traverse_obj(data, ('series', 'title')),
|
'title': traverse_obj(data, ('series', 'title')),
|
||||||
'entries': map(self._extract_video, data['playlist']),
|
'entries': map(self._extract_video, data['playlist']),
|
||||||
}
|
}
|
||||||
|
|
285
yt_dlp/extractor/rtvcplay.py
Normal file
285
yt_dlp/extractor/rtvcplay.py
Normal file
|
@ -0,0 +1,285 @@
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor, ExtractorError
|
||||||
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
determine_ext,
|
||||||
|
int_or_none,
|
||||||
|
float_or_none,
|
||||||
|
js_to_json,
|
||||||
|
mimetype2ext,
|
||||||
|
traverse_obj,
|
||||||
|
urljoin,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class RTVCPlayBaseIE(InfoExtractor):
|
||||||
|
_BASE_VALID_URL = r'https?://(?:www\.)?rtvcplay\.co'
|
||||||
|
|
||||||
|
def _extract_player_config(self, webpage, video_id):
|
||||||
|
return self._search_json(
|
||||||
|
r'<script\b[^>]*>[^<]*(?:var|let|const)\s+config\s*=', re.sub(r'"\s*\+\s*"', '', webpage),
|
||||||
|
'player_config', video_id, transform_source=js_to_json)
|
||||||
|
|
||||||
|
def _extract_formats_and_subtitles_player_config(self, player_config, video_id):
|
||||||
|
formats, subtitles = [], {}
|
||||||
|
for source in traverse_obj(player_config, ('sources', ..., lambda _, v: url_or_none(v['url']))):
|
||||||
|
ext = mimetype2ext(source.get('mimetype'), default=determine_ext(source['url']))
|
||||||
|
if ext == 'm3u8':
|
||||||
|
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||||
|
source['url'], video_id, 'mp4', fatal=False)
|
||||||
|
formats.extend(fmts)
|
||||||
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'url': source['url'],
|
||||||
|
'ext': ext,
|
||||||
|
})
|
||||||
|
|
||||||
|
return formats, subtitles
|
||||||
|
|
||||||
|
|
||||||
|
class RTVCPlayIE(RTVCPlayBaseIE):
|
||||||
|
_VALID_URL = RTVCPlayBaseIE._BASE_VALID_URL + r'/(?P<category>(?!embed)[^/]+)/(?:[^?#]+/)?(?P<id>[\w-]+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.rtvcplay.co/en-vivo/canal-institucional',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'canal-institucional',
|
||||||
|
'title': r're:^Canal Institucional',
|
||||||
|
'description': 'md5:eff9e548394175928059320c006031ea',
|
||||||
|
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||||
|
'live_status': 'is_live',
|
||||||
|
'ext': 'mp4',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'Livestream',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.rtvcplay.co/en-vivo/senal-colombia',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'senal-colombia',
|
||||||
|
'title': r're:^Señal Colombia',
|
||||||
|
'description': 'md5:799f16a401d97f40c33a2c6a3e2a507b',
|
||||||
|
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||||
|
'live_status': 'is_live',
|
||||||
|
'ext': 'mp4',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'Livestream',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.rtvcplay.co/en-vivo/radio-nacional',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'radio-nacional',
|
||||||
|
'title': r're:^Radio Nacional',
|
||||||
|
'description': 'md5:5de009bc6a9fa79d2a6cf0b73f977d53',
|
||||||
|
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||||
|
'live_status': 'is_live',
|
||||||
|
'ext': 'mp4',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'Livestream',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.rtvcplay.co/peliculas-ficcion/senoritas',
|
||||||
|
'md5': '1288ee6f6d1330d880f98bff2ed710a3',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'senoritas',
|
||||||
|
'title': 'Señoritas',
|
||||||
|
'description': 'md5:f095a2bb52cb6cf279daf6302f86fb32',
|
||||||
|
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||||
|
'ext': 'mp4',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.rtvcplay.co/competencias-basicas-ciudadanas-y-socioemocionales/profe-en-tu-casa/james-regresa-clases-28022022',
|
||||||
|
'md5': 'f040a7380a269ad633cf837384d5e9fc',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'james-regresa-clases-28022022',
|
||||||
|
'title': 'James regresa a clases - 28/02/2022',
|
||||||
|
'description': 'md5:c5dcdf757c7ab29305e8763c6007e675',
|
||||||
|
'ext': 'mp4',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.rtvcplay.co/peliculas-documentales/llinas-el-cerebro-y-el-universo',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'llinas-el-cerebro-y-el-universo',
|
||||||
|
'title': 'Llinás, el cerebro y el universo',
|
||||||
|
'description': 'md5:add875bf2309bb52b3e8b9b06116d9b0',
|
||||||
|
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 3,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.rtvcplay.co/competencias-basicas-ciudadanas-y-socioemocionales/profe-en-tu-casa',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'profe-en-tu-casa',
|
||||||
|
'title': 'Profe en tu casa',
|
||||||
|
'description': 'md5:47dbe20e263194413b1db2a2805a4f2e',
|
||||||
|
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 537,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.rtvcplay.co/series-al-oido/relato-de-un-naufrago-una-travesia-del-periodismo-a-la-literatura',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'relato-de-un-naufrago-una-travesia-del-periodismo-a-la-literatura',
|
||||||
|
'title': 'Relato de un náufrago: una travesía del periodismo a la literatura',
|
||||||
|
'description': 'md5:6da28fdca4a5a568ea47ef65ef775603',
|
||||||
|
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 5,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.rtvcplay.co/series-al-oido/diez-versiones',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'diez-versiones',
|
||||||
|
'title': 'Diez versiones',
|
||||||
|
'description': 'md5:997471ed971cb3fd8e41969457675306',
|
||||||
|
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 20,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id, category = self._match_valid_url(url).group('id', 'category')
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
hydration = self._search_json(
|
||||||
|
r'window\.__RTVCPLAY_STATE__\s*=', webpage, 'hydration',
|
||||||
|
video_id, transform_source=js_to_json)['content']['currentContent']
|
||||||
|
|
||||||
|
asset_id = traverse_obj(hydration, ('video', 'assetid'))
|
||||||
|
if asset_id:
|
||||||
|
hls_url = hydration['base_url_hls'].replace('[node:field_asset_id]', asset_id)
|
||||||
|
else:
|
||||||
|
hls_url = traverse_obj(hydration, ('channel', 'hls'))
|
||||||
|
|
||||||
|
metadata = traverse_obj(hydration, {
|
||||||
|
'title': 'title',
|
||||||
|
'description': 'description',
|
||||||
|
'thumbnail': ((('channel', 'image', 'logo'), ('resource', 'image', 'cover_desktop')), 'path'),
|
||||||
|
}, get_all=False)
|
||||||
|
|
||||||
|
# Probably it's a program's page
|
||||||
|
if not hls_url:
|
||||||
|
seasons = traverse_obj(
|
||||||
|
hydration, ('widgets', lambda _, y: y['type'] == 'seasonList', 'contents'),
|
||||||
|
get_all=False)
|
||||||
|
if not seasons:
|
||||||
|
podcast_episodes = hydration.get('audios')
|
||||||
|
if not podcast_episodes:
|
||||||
|
raise ExtractorError('Could not find asset_id nor program playlist nor podcast episodes')
|
||||||
|
|
||||||
|
return self.playlist_result([
|
||||||
|
self.url_result(episode['file'], url_transparent=True, **traverse_obj(episode, {
|
||||||
|
'title': 'title',
|
||||||
|
'description': ('description', {clean_html}),
|
||||||
|
'episode_number': ('chapter_number', {float_or_none}, {int_or_none}),
|
||||||
|
'season_number': ('season', {int_or_none}),
|
||||||
|
})) for episode in podcast_episodes], video_id, **metadata)
|
||||||
|
|
||||||
|
entries = [self.url_result(
|
||||||
|
urljoin(url, episode['slug']), url_transparent=True,
|
||||||
|
**traverse_obj(season, {
|
||||||
|
'season': 'title',
|
||||||
|
'season_number': ('season', {int_or_none}),
|
||||||
|
}), **traverse_obj(episode, {
|
||||||
|
'title': 'title',
|
||||||
|
'thumbnail': ('image', 'cover', 'path'),
|
||||||
|
'episode_number': ('chapter_number', {int_or_none}),
|
||||||
|
})) for season in seasons for episode in traverse_obj(season, ('contents', ...))]
|
||||||
|
|
||||||
|
return self.playlist_result(entries, video_id, **metadata)
|
||||||
|
|
||||||
|
formats, subtitles = self._extract_m3u8_formats_and_subtitles(hls_url, video_id, 'mp4')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'is_live': category == 'en-vivo',
|
||||||
|
**metadata,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class RTVCPlayEmbedIE(RTVCPlayBaseIE):
|
||||||
|
_VALID_URL = RTVCPlayBaseIE._BASE_VALID_URL + r'/embed/(?P<id>[\w-]+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.rtvcplay.co/embed/72b0e699-248b-4929-a4a8-3782702fa7f9',
|
||||||
|
'md5': 'ed529aeaee7aa2a72afe91ac7d1177a8',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '72b0e699-248b-4929-a4a8-3782702fa7f9',
|
||||||
|
'title': 'Tráiler: Señoritas',
|
||||||
|
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||||
|
'ext': 'mp4',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
player_config = self._extract_player_config(webpage, video_id)
|
||||||
|
formats, subtitles = self._extract_formats_and_subtitles_player_config(player_config, video_id)
|
||||||
|
|
||||||
|
asset_id = traverse_obj(player_config, ('rtvcplay', 'assetid'))
|
||||||
|
metadata = {} if not asset_id else self._download_json(
|
||||||
|
f'https://cms.rtvcplay.co/api/v1/video/asset-id/{asset_id}', video_id, fatal=False)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
**traverse_obj(metadata, {
|
||||||
|
'title': 'title',
|
||||||
|
'description': 'description',
|
||||||
|
'thumbnail': ('image', ..., 'thumbnail', 'path'),
|
||||||
|
}, get_all=False)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class RTVCKalturaIE(RTVCPlayBaseIE):
|
||||||
|
_VALID_URL = r'https?://media\.rtvc\.gov\.co/kalturartvc/(?P<id>[\w-]+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://media.rtvc.gov.co/kalturartvc/indexSC.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'indexSC',
|
||||||
|
'title': r're:^Señal Colombia',
|
||||||
|
'description': 'md5:799f16a401d97f40c33a2c6a3e2a507b',
|
||||||
|
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||||
|
'live_status': 'is_live',
|
||||||
|
'ext': 'mp4',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'Livestream',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
player_config = self._extract_player_config(webpage, video_id)
|
||||||
|
formats, subtitles = self._extract_formats_and_subtitles_player_config(player_config, video_id)
|
||||||
|
|
||||||
|
channel_id = traverse_obj(player_config, ('rtvcplay', 'channelId'))
|
||||||
|
metadata = {} if not channel_id else self._download_json(
|
||||||
|
f'https://cms.rtvcplay.co/api/v1/taxonomy_term/streaming/{channel_id}', video_id, fatal=False)
|
||||||
|
|
||||||
|
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||||
|
traverse_obj(metadata, ('channel', 'hls')), video_id, 'mp4', fatal=False)
|
||||||
|
formats.extend(fmts)
|
||||||
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'is_live': True,
|
||||||
|
**traverse_obj(metadata, {
|
||||||
|
'title': 'title',
|
||||||
|
'description': 'description',
|
||||||
|
'thumbnail': ('channel', 'image', 'logo', 'path'),
|
||||||
|
})
|
||||||
|
}
|
|
@ -7,8 +7,11 @@ from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
UnsupportedError,
|
UnsupportedError,
|
||||||
clean_html,
|
clean_html,
|
||||||
|
determine_ext,
|
||||||
|
format_field,
|
||||||
get_element_by_class,
|
get_element_by_class,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
join_nonempty,
|
||||||
parse_count,
|
parse_count,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
|
@ -164,7 +167,13 @@ class RumbleEmbedIE(InfoExtractor):
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for ext, ext_info in (video.get('ua') or {}).items():
|
for ext, ext_info in (video.get('ua') or {}).items():
|
||||||
for height, video_info in (ext_info or {}).items():
|
if isinstance(ext_info, dict):
|
||||||
|
for height, video_info in ext_info.items():
|
||||||
|
if not traverse_obj(video_info, ('meta', 'h', {int_or_none})):
|
||||||
|
video_info.setdefault('meta', {})['h'] = height
|
||||||
|
ext_info = ext_info.values()
|
||||||
|
|
||||||
|
for video_info in ext_info:
|
||||||
meta = video_info.get('meta') or {}
|
meta = video_info.get('meta') or {}
|
||||||
if not video_info.get('url'):
|
if not video_info.get('url'):
|
||||||
continue
|
continue
|
||||||
|
@ -175,12 +184,16 @@ class RumbleEmbedIE(InfoExtractor):
|
||||||
video_info['url'], video_id,
|
video_info['url'], video_id,
|
||||||
ext='mp4', m3u8_id='hls', fatal=False, live=live_status == 'is_live'))
|
ext='mp4', m3u8_id='hls', fatal=False, live=live_status == 'is_live'))
|
||||||
continue
|
continue
|
||||||
|
timeline = ext == 'timeline'
|
||||||
|
if timeline:
|
||||||
|
ext = determine_ext(video_info['url'])
|
||||||
formats.append({
|
formats.append({
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
|
'acodec': 'none' if timeline else None,
|
||||||
'url': video_info['url'],
|
'url': video_info['url'],
|
||||||
'format_id': '%s-%sp' % (ext, height),
|
'format_id': join_nonempty(ext, format_field(meta, 'h', '%sp')),
|
||||||
'height': int_or_none(height),
|
'format_note': 'Timeline' if timeline else None,
|
||||||
'fps': video.get('fps'),
|
'fps': None if timeline else video.get('fps'),
|
||||||
**traverse_obj(meta, {
|
**traverse_obj(meta, {
|
||||||
'tbr': 'bitrate',
|
'tbr': 'bitrate',
|
||||||
'filesize': 'size',
|
'filesize': 'size',
|
||||||
|
@ -247,6 +260,43 @@ class RumbleIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.rumble.com/vDMUM1?key=value',
|
'url': 'http://www.rumble.com/vDMUM1?key=value',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'note': 'timeline format',
|
||||||
|
'url': 'https://rumble.com/v2ea9qb-the-u.s.-cannot-hide-this-in-ukraine-anymore-redacted-with-natali-and-clayt.html',
|
||||||
|
'md5': '40d61fec6c0945bca3d0e1dc1aa53d79',
|
||||||
|
'params': {'format': 'wv'},
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'v2bou5f',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'uploader': 'Redacted News',
|
||||||
|
'upload_date': '20230322',
|
||||||
|
'timestamp': 1679445010,
|
||||||
|
'title': 'The U.S. CANNOT hide this in Ukraine anymore | Redacted with Natali and Clayton Morris',
|
||||||
|
'duration': 892,
|
||||||
|
'channel': 'Redacted News',
|
||||||
|
'description': 'md5:aaad0c5c3426d7a361c29bdaaced7c42',
|
||||||
|
'channel_url': 'https://rumble.com/c/Redacted',
|
||||||
|
'live_status': 'not_live',
|
||||||
|
'thumbnail': 'https://sp.rmbl.ws/s8/1/d/x/2/O/dx2Oi.qR4e-small-The-U.S.-CANNOT-hide-this-i.jpg',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://rumble.com/v2e7fju-the-covid-twitter-files-drop-protecting-fauci-while-censoring-the-truth-wma.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'v2blzyy',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'live_status': 'was_live',
|
||||||
|
'release_timestamp': 1679446804,
|
||||||
|
'description': 'md5:2ac4908ccfecfb921f8ffa4b30c1e636',
|
||||||
|
'release_date': '20230322',
|
||||||
|
'timestamp': 1679445692,
|
||||||
|
'duration': 4435,
|
||||||
|
'upload_date': '20230322',
|
||||||
|
'title': 'The Covid Twitter Files Drop: Protecting Fauci While Censoring The Truth w/Matt Taibbi',
|
||||||
|
'uploader': 'Kim Iversen',
|
||||||
|
'channel_url': 'https://rumble.com/c/KimIversen',
|
||||||
|
'channel': 'Kim Iversen',
|
||||||
|
'thumbnail': 'https://sp.rmbl.ws/s8/1/6/b/w/O/6bwOi.qR4e-small-The-Covid-Twitter-Files-Dro.jpg',
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_WEBPAGE_TESTS = [{
|
_WEBPAGE_TESTS = [{
|
||||||
|
|
|
@ -1,7 +1,13 @@
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
smuggle_url,
|
HEADRequest,
|
||||||
ExtractorError,
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
parse_duration,
|
||||||
|
parse_iso8601,
|
||||||
|
traverse_obj,
|
||||||
|
update_url_query,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -11,7 +17,7 @@ class SBSIE(InfoExtractor):
|
||||||
https?://(?:www\.)?sbs\.com\.au/(?:
|
https?://(?:www\.)?sbs\.com\.au/(?:
|
||||||
ondemand(?:
|
ondemand(?:
|
||||||
/video/(?:single/)?|
|
/video/(?:single/)?|
|
||||||
/movie/[^/]+/|
|
/(?:movie|tv-program)/[^/]+/|
|
||||||
/(?:tv|news)-series/(?:[^/]+/){3}|
|
/(?:tv|news)-series/(?:[^/]+/){3}|
|
||||||
.*?\bplay=|/watch/
|
.*?\bplay=|/watch/
|
||||||
)|news/(?:embeds/)?video/
|
)|news/(?:embeds/)?video/
|
||||||
|
@ -27,18 +33,21 @@ class SBSIE(InfoExtractor):
|
||||||
# Original URL is handled by the generic IE which finds the iframe:
|
# Original URL is handled by the generic IE which finds the iframe:
|
||||||
# http://www.sbs.com.au/thefeed/blog/2014/08/21/dingo-conservation
|
# http://www.sbs.com.au/thefeed/blog/2014/08/21/dingo-conservation
|
||||||
'url': 'http://www.sbs.com.au/ondemand/video/single/320403011771/?source=drupal&vertical=thefeed',
|
'url': 'http://www.sbs.com.au/ondemand/video/single/320403011771/?source=drupal&vertical=thefeed',
|
||||||
'md5': '3150cf278965eeabb5b4cea1c963fe0a',
|
'md5': '31f84a7a19b53635db63c73f8ab0c4a7',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '_rFBPRPO4pMR',
|
'id': '320403011771', # '_rFBPRPO4pMR',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Dingo Conservation (The Feed)',
|
'title': 'Dingo Conservation (The Feed)',
|
||||||
'description': 'md5:f250a9856fca50d22dec0b5b8015f8a5',
|
'description': 'md5:f250a9856fca50d22dec0b5b8015f8a5',
|
||||||
'thumbnail': r're:http://.*\.jpg',
|
'thumbnail': r're:https?://.*\.jpg',
|
||||||
'duration': 308,
|
'duration': 308,
|
||||||
'timestamp': 1408613220,
|
'timestamp': 1408613220,
|
||||||
'upload_date': '20140821',
|
'upload_date': '20140821',
|
||||||
'uploader': 'SBSC',
|
'uploader': 'SBSC',
|
||||||
|
'tags': None,
|
||||||
|
'categories': None,
|
||||||
},
|
},
|
||||||
|
'expected_warnings': ['Unable to download JSON metadata'],
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed',
|
'url': 'http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -70,34 +79,80 @@ class SBSIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.sbs.com.au/ondemand/tv-series/the-handmaids-tale/season-5/the-handmaids-tale-s5-ep1/2065631811776',
|
'url': 'https://www.sbs.com.au/ondemand/tv-series/the-handmaids-tale/season-5/the-handmaids-tale-s5-ep1/2065631811776',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.sbs.com.au/ondemand/tv-program/autun-romes-forgotten-sister/2116212803602',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
_GEO_COUNTRIES = ['AU']
|
||||||
|
_AUS_TV_PARENTAL_GUIDELINES = {
|
||||||
|
'P': 0,
|
||||||
|
'C': 7,
|
||||||
|
'G': 0,
|
||||||
|
'PG': 0,
|
||||||
|
'M': 14,
|
||||||
|
'MA15+': 15,
|
||||||
|
'MAV15+': 15,
|
||||||
|
'R18+': 18,
|
||||||
|
}
|
||||||
|
_PLAYER_API = 'https://www.sbs.com.au/api/v3'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
player_params = self._download_json(
|
formats, subtitles = self._extract_smil_formats_and_subtitles(
|
||||||
'http://www.sbs.com.au/api/video_pdkvars/id/%s?form=json' % video_id, video_id)
|
update_url_query(f'{self._PLAYER_API}/video_smil', {'id': video_id}), video_id)
|
||||||
|
|
||||||
error = player_params.get('error')
|
if not formats:
|
||||||
if error:
|
urlh = self._request_webpage(
|
||||||
error_message = 'Sorry, The video you are looking for does not exist.'
|
HEADRequest('https://sbs-vod-prod-01.akamaized.net/'), video_id,
|
||||||
video_data = error.get('results') or {}
|
note='Checking geo-restriction', fatal=False, expected_status=403)
|
||||||
error_code = error.get('errorCode')
|
if urlh:
|
||||||
if error_code == 'ComingSoon':
|
error_reasons = urlh.headers.get_all('x-error-reason') or []
|
||||||
error_message = '%s is not yet available.' % video_data.get('title', '')
|
if 'geo-blocked' in error_reasons:
|
||||||
elif error_code in ('Forbidden', 'intranetAccessOnly'):
|
self.raise_geo_restricted(countries=['AU'])
|
||||||
error_message = 'Sorry, This video cannot be accessed via this website'
|
self.raise_no_formats('No formats are available', video_id=video_id)
|
||||||
elif error_code == 'Expired':
|
|
||||||
error_message = 'Sorry, %s is no longer available.' % video_data.get('title', '')
|
|
||||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
|
|
||||||
|
|
||||||
urls = player_params['releaseUrls']
|
media = traverse_obj(self._download_json(
|
||||||
theplatform_url = (urls.get('progressive') or urls.get('html')
|
f'{self._PLAYER_API}/video_stream', video_id, fatal=False,
|
||||||
or urls.get('standard') or player_params['relatedItemsURL'])
|
query={'id': video_id, 'context': 'tv'}), ('video_object', {dict})) or {}
|
||||||
|
|
||||||
|
media.update(self._download_json(
|
||||||
|
f'https://catalogue.pr.sbsod.com/mpx-media/{video_id}',
|
||||||
|
video_id, fatal=not media) or {})
|
||||||
|
|
||||||
|
# For named episodes, use the catalogue's title to set episode, rather than generic 'Episode N'.
|
||||||
|
if traverse_obj(media, ('partOfSeries', {dict})):
|
||||||
|
media['epName'] = traverse_obj(media, ('title', {str}))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
|
||||||
'ie_key': 'ThePlatform',
|
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'url': smuggle_url(self._proto_relative_url(theplatform_url), {'force_smil_url': True}),
|
**traverse_obj(media, {
|
||||||
'is_live': player_params.get('streamType') == 'live',
|
'title': ('name', {str}),
|
||||||
|
'description': ('description', {str}),
|
||||||
|
'channel': ('taxonomy', 'channel', 'name', {str}),
|
||||||
|
'series': ((('partOfSeries', 'name'), 'seriesTitle'), {str}),
|
||||||
|
'series_id': ((('partOfSeries', 'uuid'), 'seriesID'), {str}),
|
||||||
|
'season_number': ('seasonNumber', {int_or_none}),
|
||||||
|
'episode': ('epName', {str}),
|
||||||
|
'episode_number': ('episodeNumber', {int_or_none}),
|
||||||
|
'timestamp': (('datePublished', ('publication', 'startDate')), {parse_iso8601}),
|
||||||
|
'release_year': ('releaseYear', {int_or_none}),
|
||||||
|
'duration': ('duration', ({float_or_none}, {parse_duration})),
|
||||||
|
'is_live': ('liveStream', {bool}),
|
||||||
|
'age_limit': (
|
||||||
|
('classificationID', 'contentRating'), {str.upper}, {self._AUS_TV_PARENTAL_GUIDELINES.get}),
|
||||||
|
}, get_all=False),
|
||||||
|
**traverse_obj(media, {
|
||||||
|
'categories': (('genres', ...), ('taxonomy', ('genre', 'subgenre'), 'name'), {str}),
|
||||||
|
'tags': (('consumerAdviceTexts', ('sbsSubCertification', 'consumerAdvice')), ..., {str}),
|
||||||
|
'thumbnails': ('thumbnails', lambda _, v: url_or_none(v['contentUrl']), {
|
||||||
|
'id': ('name', {str}),
|
||||||
|
'url': 'contentUrl',
|
||||||
|
'width': ('width', {int_or_none}),
|
||||||
|
'height': ('height', {int_or_none}),
|
||||||
|
}),
|
||||||
|
}),
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'uploader': 'SBSC',
|
||||||
}
|
}
|
||||||
|
|
31
yt_dlp/extractor/senalcolombia.py
Normal file
31
yt_dlp/extractor/senalcolombia.py
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from .rtvcplay import RTVCKalturaIE
|
||||||
|
|
||||||
|
|
||||||
|
class SenalColombiaLiveIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?senalcolombia\.tv/(?P<id>senal-en-vivo)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.senalcolombia.tv/senal-en-vivo',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'indexSC',
|
||||||
|
'title': 're:^Señal Colombia',
|
||||||
|
'description': 'md5:799f16a401d97f40c33a2c6a3e2a507b',
|
||||||
|
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||||
|
'live_status': 'is_live',
|
||||||
|
'ext': 'mp4',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'Livestream',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
hydration = self._search_json(
|
||||||
|
r'<script\b[^>]*data-drupal-selector\s*=\s*"[^"]*drupal-settings-json[^"]*"[^>]*>',
|
||||||
|
webpage, 'hydration', display_id)
|
||||||
|
|
||||||
|
return self.url_result(hydration['envivosrc'], RTVCKalturaIE, display_id)
|
518
yt_dlp/extractor/stageplus.py
Normal file
518
yt_dlp/extractor/stageplus.py
Normal file
|
@ -0,0 +1,518 @@
|
||||||
|
import json
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
float_or_none,
|
||||||
|
traverse_obj,
|
||||||
|
try_call,
|
||||||
|
unified_timestamp,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class StagePlusVODConcertIE(InfoExtractor):
|
||||||
|
_NETRC_MACHINE = 'stageplus'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?stage-plus\.com/video/(?P<id>vod_concert_\w+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.stage-plus.com/video/vod_concert_APNM8GRFDPHMASJKBSPJACG',
|
||||||
|
'playlist_count': 6,
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'vod_concert_APNM8GRFDPHMASJKBSPJACG',
|
||||||
|
'title': 'Yuja Wang plays Rachmaninoff\'s Piano Concerto No. 2 – from Odeonsplatz',
|
||||||
|
'description': 'md5:50f78ec180518c9bdb876bac550996fc',
|
||||||
|
'artist': ['Yuja Wang', 'Lorenzo Viotti'],
|
||||||
|
'upload_date': '20230331',
|
||||||
|
'timestamp': 1680249600,
|
||||||
|
'release_date': '20210709',
|
||||||
|
'release_timestamp': 1625788800,
|
||||||
|
'thumbnails': 'count:3',
|
||||||
|
},
|
||||||
|
'playlist': [{
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'performance_work_A1IN4PJFE9MM2RJ3CLBMUSJBBSOJAD9O',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Piano Concerto No. 2 in C Minor, Op. 18',
|
||||||
|
'description': 'md5:50f78ec180518c9bdb876bac550996fc',
|
||||||
|
'upload_date': '20230331',
|
||||||
|
'timestamp': 1680249600,
|
||||||
|
'release_date': '20210709',
|
||||||
|
'release_timestamp': 1625788800,
|
||||||
|
'duration': 2207,
|
||||||
|
'chapters': 'count:5',
|
||||||
|
'artist': ['Yuja Wang'],
|
||||||
|
'composer': ['Sergei Rachmaninoff'],
|
||||||
|
'album': 'Yuja Wang plays Rachmaninoff\'s Piano Concerto No. 2 – from Odeonsplatz',
|
||||||
|
'album_artist': ['Yuja Wang', 'Lorenzo Viotti'],
|
||||||
|
'track': 'Piano Concerto No. 2 in C Minor, Op. 18',
|
||||||
|
'track_number': 1,
|
||||||
|
'genre': 'Instrumental Concerto',
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}]
|
||||||
|
|
||||||
|
# TODO: Prune this after livestream and/or album extractors are added
|
||||||
|
_GRAPHQL_QUERY = '''query videoDetailPage($videoId: ID!, $sliderItemsFirst: Int = 24) {
|
||||||
|
node(id: $videoId) {
|
||||||
|
__typename
|
||||||
|
...LiveConcertFields
|
||||||
|
... on LiveConcert {
|
||||||
|
artists {
|
||||||
|
edges {
|
||||||
|
role {
|
||||||
|
...RoleFields
|
||||||
|
}
|
||||||
|
node {
|
||||||
|
id
|
||||||
|
name
|
||||||
|
sortName
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
isAtmos
|
||||||
|
maxResolution
|
||||||
|
groups {
|
||||||
|
id
|
||||||
|
name
|
||||||
|
typeDisplayName
|
||||||
|
}
|
||||||
|
shortDescription
|
||||||
|
performanceWorks {
|
||||||
|
...livePerformanceWorkFields
|
||||||
|
}
|
||||||
|
totalDuration
|
||||||
|
sliders {
|
||||||
|
...contentContainerFields
|
||||||
|
}
|
||||||
|
vodConcert {
|
||||||
|
__typename
|
||||||
|
id
|
||||||
|
}
|
||||||
|
}
|
||||||
|
...VideoFields
|
||||||
|
... on Video {
|
||||||
|
artists {
|
||||||
|
edges {
|
||||||
|
role {
|
||||||
|
...RoleFields
|
||||||
|
}
|
||||||
|
node {
|
||||||
|
id
|
||||||
|
name
|
||||||
|
sortName
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
isAtmos
|
||||||
|
maxResolution
|
||||||
|
isLossless
|
||||||
|
description
|
||||||
|
productionDate
|
||||||
|
takedownDate
|
||||||
|
sliders {
|
||||||
|
...contentContainerFields
|
||||||
|
}
|
||||||
|
}
|
||||||
|
...VodConcertFields
|
||||||
|
... on VodConcert {
|
||||||
|
artists {
|
||||||
|
edges {
|
||||||
|
role {
|
||||||
|
...RoleFields
|
||||||
|
}
|
||||||
|
node {
|
||||||
|
id
|
||||||
|
name
|
||||||
|
sortName
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
isAtmos
|
||||||
|
maxResolution
|
||||||
|
groups {
|
||||||
|
id
|
||||||
|
name
|
||||||
|
typeDisplayName
|
||||||
|
}
|
||||||
|
performanceWorks {
|
||||||
|
...PerformanceWorkFields
|
||||||
|
}
|
||||||
|
shortDescription
|
||||||
|
productionDate
|
||||||
|
takedownDate
|
||||||
|
sliders {
|
||||||
|
...contentContainerFields
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fragment LiveConcertFields on LiveConcert {
|
||||||
|
endTime
|
||||||
|
id
|
||||||
|
pictures {
|
||||||
|
...PictureFields
|
||||||
|
}
|
||||||
|
reruns {
|
||||||
|
...liveConcertRerunFields
|
||||||
|
}
|
||||||
|
publicationLevel
|
||||||
|
startTime
|
||||||
|
streamStartTime
|
||||||
|
subtitle
|
||||||
|
title
|
||||||
|
typeDisplayName
|
||||||
|
stream {
|
||||||
|
...liveStreamFields
|
||||||
|
}
|
||||||
|
trailerStream {
|
||||||
|
...streamFields
|
||||||
|
}
|
||||||
|
geoAccessCountries
|
||||||
|
geoAccessMode
|
||||||
|
}
|
||||||
|
|
||||||
|
fragment PictureFields on Picture {
|
||||||
|
id
|
||||||
|
url
|
||||||
|
type
|
||||||
|
}
|
||||||
|
|
||||||
|
fragment liveConcertRerunFields on LiveConcertRerun {
|
||||||
|
streamStartTime
|
||||||
|
endTime
|
||||||
|
startTime
|
||||||
|
stream {
|
||||||
|
...rerunStreamFields
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fragment rerunStreamFields on RerunStream {
|
||||||
|
publicationLevel
|
||||||
|
streamType
|
||||||
|
url
|
||||||
|
}
|
||||||
|
|
||||||
|
fragment liveStreamFields on LiveStream {
|
||||||
|
publicationLevel
|
||||||
|
streamType
|
||||||
|
url
|
||||||
|
}
|
||||||
|
|
||||||
|
fragment streamFields on Stream {
|
||||||
|
publicationLevel
|
||||||
|
streamType
|
||||||
|
url
|
||||||
|
}
|
||||||
|
|
||||||
|
fragment RoleFields on Role {
|
||||||
|
__typename
|
||||||
|
id
|
||||||
|
type
|
||||||
|
displayName
|
||||||
|
}
|
||||||
|
|
||||||
|
fragment livePerformanceWorkFields on LivePerformanceWork {
|
||||||
|
__typename
|
||||||
|
id
|
||||||
|
artists {
|
||||||
|
...artistWithRoleFields
|
||||||
|
}
|
||||||
|
groups {
|
||||||
|
edges {
|
||||||
|
node {
|
||||||
|
id
|
||||||
|
name
|
||||||
|
typeDisplayName
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
work {
|
||||||
|
...workFields
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fragment artistWithRoleFields on ArtistWithRoleConnection {
|
||||||
|
edges {
|
||||||
|
role {
|
||||||
|
...RoleFields
|
||||||
|
}
|
||||||
|
node {
|
||||||
|
id
|
||||||
|
name
|
||||||
|
sortName
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fragment workFields on Work {
|
||||||
|
id
|
||||||
|
title
|
||||||
|
movements {
|
||||||
|
id
|
||||||
|
title
|
||||||
|
}
|
||||||
|
composers {
|
||||||
|
id
|
||||||
|
name
|
||||||
|
}
|
||||||
|
genre {
|
||||||
|
id
|
||||||
|
title
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fragment contentContainerFields on CuratedContentContainer {
|
||||||
|
__typename
|
||||||
|
...SliderFields
|
||||||
|
...BannerFields
|
||||||
|
}
|
||||||
|
|
||||||
|
fragment SliderFields on Slider {
|
||||||
|
id
|
||||||
|
headline
|
||||||
|
items(first: $sliderItemsFirst) {
|
||||||
|
edges {
|
||||||
|
node {
|
||||||
|
id
|
||||||
|
__typename
|
||||||
|
...AlbumFields
|
||||||
|
...ArtistFields
|
||||||
|
...EpochFields
|
||||||
|
...GenreFields
|
||||||
|
...GroupFields
|
||||||
|
...LiveConcertFields
|
||||||
|
...PartnerFields
|
||||||
|
...PerformanceWorkFields
|
||||||
|
...VideoFields
|
||||||
|
...VodConcertFields
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fragment AlbumFields on Album {
|
||||||
|
artistAndGroupDisplayInfo
|
||||||
|
id
|
||||||
|
pictures {
|
||||||
|
...PictureFields
|
||||||
|
}
|
||||||
|
title
|
||||||
|
}
|
||||||
|
|
||||||
|
fragment ArtistFields on Artist {
|
||||||
|
id
|
||||||
|
name
|
||||||
|
roles {
|
||||||
|
...RoleFields
|
||||||
|
}
|
||||||
|
pictures {
|
||||||
|
...PictureFields
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fragment EpochFields on Epoch {
|
||||||
|
id
|
||||||
|
endYear
|
||||||
|
pictures {
|
||||||
|
...PictureFields
|
||||||
|
}
|
||||||
|
startYear
|
||||||
|
title
|
||||||
|
}
|
||||||
|
|
||||||
|
fragment GenreFields on Genre {
|
||||||
|
id
|
||||||
|
pictures {
|
||||||
|
...PictureFields
|
||||||
|
}
|
||||||
|
title
|
||||||
|
}
|
||||||
|
|
||||||
|
fragment GroupFields on Group {
|
||||||
|
id
|
||||||
|
name
|
||||||
|
typeDisplayName
|
||||||
|
pictures {
|
||||||
|
...PictureFields
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fragment PartnerFields on Partner {
|
||||||
|
id
|
||||||
|
name
|
||||||
|
typeDisplayName
|
||||||
|
subtypeDisplayName
|
||||||
|
pictures {
|
||||||
|
...PictureFields
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fragment PerformanceWorkFields on PerformanceWork {
|
||||||
|
__typename
|
||||||
|
id
|
||||||
|
artists {
|
||||||
|
...artistWithRoleFields
|
||||||
|
}
|
||||||
|
groups {
|
||||||
|
edges {
|
||||||
|
node {
|
||||||
|
id
|
||||||
|
name
|
||||||
|
typeDisplayName
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
work {
|
||||||
|
...workFields
|
||||||
|
}
|
||||||
|
stream {
|
||||||
|
...streamFields
|
||||||
|
}
|
||||||
|
vodConcert {
|
||||||
|
__typename
|
||||||
|
id
|
||||||
|
}
|
||||||
|
duration
|
||||||
|
cuePoints {
|
||||||
|
mark
|
||||||
|
title
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fragment VideoFields on Video {
|
||||||
|
id
|
||||||
|
archiveReleaseDate
|
||||||
|
title
|
||||||
|
subtitle
|
||||||
|
pictures {
|
||||||
|
...PictureFields
|
||||||
|
}
|
||||||
|
stream {
|
||||||
|
...streamFields
|
||||||
|
}
|
||||||
|
trailerStream {
|
||||||
|
...streamFields
|
||||||
|
}
|
||||||
|
duration
|
||||||
|
typeDisplayName
|
||||||
|
duration
|
||||||
|
geoAccessCountries
|
||||||
|
geoAccessMode
|
||||||
|
publicationLevel
|
||||||
|
takedownDate
|
||||||
|
}
|
||||||
|
|
||||||
|
fragment VodConcertFields on VodConcert {
|
||||||
|
id
|
||||||
|
archiveReleaseDate
|
||||||
|
pictures {
|
||||||
|
...PictureFields
|
||||||
|
}
|
||||||
|
subtitle
|
||||||
|
title
|
||||||
|
typeDisplayName
|
||||||
|
totalDuration
|
||||||
|
geoAccessCountries
|
||||||
|
geoAccessMode
|
||||||
|
trailerStream {
|
||||||
|
...streamFields
|
||||||
|
}
|
||||||
|
publicationLevel
|
||||||
|
takedownDate
|
||||||
|
}
|
||||||
|
|
||||||
|
fragment BannerFields on Banner {
|
||||||
|
description
|
||||||
|
link
|
||||||
|
pictures {
|
||||||
|
...PictureFields
|
||||||
|
}
|
||||||
|
title
|
||||||
|
}'''
|
||||||
|
|
||||||
|
_TOKEN = None
|
||||||
|
|
||||||
|
def _perform_login(self, username, password):
|
||||||
|
auth = self._download_json('https://audience.api.stageplus.io/oauth/token', None, headers={
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Origin': 'https://www.stage-plus.com',
|
||||||
|
}, data=json.dumps({
|
||||||
|
'grant_type': 'password',
|
||||||
|
'username': username,
|
||||||
|
'password': password,
|
||||||
|
'device_info': 'Chrome (Windows)',
|
||||||
|
'client_device_id': str(uuid.uuid4()),
|
||||||
|
}, separators=(',', ':')).encode(), note='Logging in')
|
||||||
|
|
||||||
|
if auth.get('access_token'):
|
||||||
|
self._TOKEN = auth['access_token']
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
if self._TOKEN:
|
||||||
|
return
|
||||||
|
|
||||||
|
self._TOKEN = try_call(
|
||||||
|
lambda: self._get_cookies('https://www.stage-plus.com/')['dgplus_access_token'].value)
|
||||||
|
if not self._TOKEN:
|
||||||
|
self.raise_login_required()
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
concert_id = self._match_id(url)
|
||||||
|
|
||||||
|
data = self._download_json('https://audience.api.stageplus.io/graphql', concert_id, headers={
|
||||||
|
'authorization': f'Bearer {self._TOKEN}',
|
||||||
|
'content-type': 'application/json',
|
||||||
|
'Origin': 'https://www.stage-plus.com',
|
||||||
|
}, data=json.dumps({
|
||||||
|
'query': self._GRAPHQL_QUERY,
|
||||||
|
'variables': {'videoId': concert_id},
|
||||||
|
'operationName': 'videoDetailPage'
|
||||||
|
}, separators=(',', ':')).encode())['data']['node']
|
||||||
|
|
||||||
|
metadata = traverse_obj(data, {
|
||||||
|
'title': 'title',
|
||||||
|
'description': ('shortDescription', {str}),
|
||||||
|
'artist': ('artists', 'edges', ..., 'node', 'name'),
|
||||||
|
'timestamp': ('archiveReleaseDate', {unified_timestamp}),
|
||||||
|
'release_timestamp': ('productionDate', {unified_timestamp}),
|
||||||
|
})
|
||||||
|
|
||||||
|
thumbnails = traverse_obj(data, ('pictures', lambda _, v: url_or_none(v['url']), {
|
||||||
|
'id': 'name',
|
||||||
|
'url': 'url',
|
||||||
|
})) or None
|
||||||
|
|
||||||
|
m3u8_headers = {'jwt': self._TOKEN}
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for idx, video in enumerate(traverse_obj(data, (
|
||||||
|
'performanceWorks', lambda _, v: v['id'] and url_or_none(v['stream']['url']))), 1):
|
||||||
|
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||||
|
video['stream']['url'], video['id'], 'mp4', m3u8_id='hls', headers=m3u8_headers)
|
||||||
|
entries.append({
|
||||||
|
'id': video['id'],
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'http_headers': m3u8_headers,
|
||||||
|
'album': metadata.get('title'),
|
||||||
|
'album_artist': metadata.get('artist'),
|
||||||
|
'track_number': idx,
|
||||||
|
**metadata,
|
||||||
|
**traverse_obj(video, {
|
||||||
|
'title': ('work', 'title'),
|
||||||
|
'track': ('work', 'title'),
|
||||||
|
'duration': ('duration', {float_or_none}),
|
||||||
|
'chapters': (
|
||||||
|
'cuePoints', lambda _, v: float_or_none(v['mark']) is not None, {
|
||||||
|
'title': 'title',
|
||||||
|
'start_time': ('mark', {float_or_none}),
|
||||||
|
}),
|
||||||
|
'artist': ('artists', 'edges', ..., 'node', 'name'),
|
||||||
|
'composer': ('work', 'composers', ..., 'name'),
|
||||||
|
'genre': ('work', 'genre', 'title'),
|
||||||
|
}),
|
||||||
|
})
|
||||||
|
|
||||||
|
return self.playlist_result(entries, concert_id, thumbnails=thumbnails, **metadata)
|
|
@ -38,11 +38,23 @@ class TelecaribePlayIE(InfoExtractor):
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': 'Livestream',
|
'skip_download': 'Livestream',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.play.telecaribe.co/liveplus',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'liveplus',
|
||||||
|
'title': r're:^Señal en vivo Plus',
|
||||||
|
'live_status': 'is_live',
|
||||||
|
'ext': 'mp4',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'Livestream',
|
||||||
|
},
|
||||||
|
'skip': 'Geo-restricted to Colombia',
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _download_player_webpage(self, webpage, display_id):
|
def _download_player_webpage(self, webpage, display_id):
|
||||||
page_id = self._search_regex(
|
page_id = self._search_regex(
|
||||||
(r'window.firstPageId\s*=\s*["\']([^"\']+)', r'<div[^>]+id\s*=\s*"pageBackground_([^"]+)'),
|
(r'window\.firstPageId\s*=\s*["\']([^"\']+)', r'<div[^>]+id\s*=\s*"pageBackground_([^"]+)'),
|
||||||
webpage, 'page_id')
|
webpage, 'page_id')
|
||||||
|
|
||||||
props = self._download_json(self._search_regex(
|
props = self._download_json(self._search_regex(
|
||||||
|
@ -59,14 +71,16 @@ class TelecaribePlayIE(InfoExtractor):
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
player = self._download_player_webpage(webpage, display_id)
|
player = self._download_player_webpage(webpage, display_id)
|
||||||
|
|
||||||
if display_id != 'live':
|
livestream_url = self._search_regex(
|
||||||
|
r'(?:let|const|var)\s+source\s*=\s*["\']([^"\']+)', player, 'm3u8 url', default=None)
|
||||||
|
|
||||||
|
if not livestream_url:
|
||||||
return self.playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
re.findall(r'<a[^>]+href\s*=\s*"([^"]+\.mp4)', player), display_id,
|
re.findall(r'<a[^>]+href\s*=\s*"([^"]+\.mp4)', player), display_id,
|
||||||
self._get_clean_title(self._og_search_title(webpage)))
|
self._get_clean_title(self._og_search_title(webpage)))
|
||||||
|
|
||||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||||
self._search_regex(r'(?:let|const|var)\s+source\s*=\s*["\']([^"\']+)', player, 'm3u8 url'),
|
livestream_url, display_id, 'mp4', live=True)
|
||||||
display_id, 'mp4')
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': display_id,
|
'id': display_id,
|
||||||
|
|
|
@ -5,15 +5,22 @@ from ..utils import extract_attributes
|
||||||
|
|
||||||
|
|
||||||
class TheSunIE(InfoExtractor):
|
class TheSunIE(InfoExtractor):
|
||||||
_VALID_URL = r'https://(?:www\.)?thesun\.co\.uk/[^/]+/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?the-?sun(\.co\.uk|\.com)/[^/]+/(?P<id>\d+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'https://www.thesun.co.uk/tvandshowbiz/2261604/orlando-bloom-and-katy-perry-post-adorable-instagram-video-together-celebrating-thanksgiving-after-split-rumours/',
|
'url': 'https://www.thesun.co.uk/tvandshowbiz/2261604/orlando-bloom-and-katy-perry-post-adorable-instagram-video-together-celebrating-thanksgiving-after-split-rumours/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2261604',
|
'id': '2261604',
|
||||||
'title': 'md5:cba22f48bad9218b64d5bbe0e16afddf',
|
'title': 'md5:cba22f48bad9218b64d5bbe0e16afddf',
|
||||||
},
|
},
|
||||||
'playlist_count': 2,
|
'playlist_count': 2,
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://www.the-sun.com/entertainment/7611415/1000lb-sisters-fans-rip-amy-dangerous-health-decision/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '7611415',
|
||||||
|
'title': 'md5:e0b9b976f79dc770e5c80f22f40bb844',
|
||||||
|
},
|
||||||
|
'playlist_count': 1,
|
||||||
|
}]
|
||||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
|
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
import itertools
|
import itertools
|
||||||
import json
|
import json
|
||||||
import random
|
import random
|
||||||
|
import re
|
||||||
import string
|
import string
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
@ -12,15 +13,19 @@ from ..utils import (
|
||||||
LazyList,
|
LazyList,
|
||||||
UnsupportedError,
|
UnsupportedError,
|
||||||
UserNotLive,
|
UserNotLive,
|
||||||
|
determine_ext,
|
||||||
|
format_field,
|
||||||
get_element_by_id,
|
get_element_by_id,
|
||||||
get_first,
|
get_first,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
join_nonempty,
|
join_nonempty,
|
||||||
|
merge_dicts,
|
||||||
qualities,
|
qualities,
|
||||||
remove_start,
|
remove_start,
|
||||||
srt_subtitles_timecode,
|
srt_subtitles_timecode,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
|
try_call,
|
||||||
try_get,
|
try_get,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
@ -200,6 +205,16 @@ class TikTokBaseIE(InfoExtractor):
|
||||||
|
|
||||||
known_resolutions = {}
|
known_resolutions = {}
|
||||||
|
|
||||||
|
def mp3_meta(url):
|
||||||
|
return {
|
||||||
|
'format_note': 'Music track',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'acodec': 'mp3',
|
||||||
|
'vcodec': 'none',
|
||||||
|
'width': None,
|
||||||
|
'height': None,
|
||||||
|
} if determine_ext(url) == 'mp3' else {}
|
||||||
|
|
||||||
def extract_addr(addr, add_meta={}):
|
def extract_addr(addr, add_meta={}):
|
||||||
parsed_meta, res = parse_url_key(addr.get('url_key', ''))
|
parsed_meta, res = parse_url_key(addr.get('url_key', ''))
|
||||||
if res:
|
if res:
|
||||||
|
@ -215,7 +230,8 @@ class TikTokBaseIE(InfoExtractor):
|
||||||
'source_preference': -2 if 'aweme/v1' in url else -1, # Downloads from API might get blocked
|
'source_preference': -2 if 'aweme/v1' in url else -1, # Downloads from API might get blocked
|
||||||
**add_meta, **parsed_meta,
|
**add_meta, **parsed_meta,
|
||||||
'format_note': join_nonempty(
|
'format_note': join_nonempty(
|
||||||
add_meta.get('format_note'), '(API)' if 'aweme/v1' in url else None, delim=' ')
|
add_meta.get('format_note'), '(API)' if 'aweme/v1' in url else None, delim=' '),
|
||||||
|
**mp3_meta(url),
|
||||||
} for url in addr.get('url_list') or []]
|
} for url in addr.get('url_list') or []]
|
||||||
|
|
||||||
# Hack: Add direct video links first to prioritize them when removing duplicate formats
|
# Hack: Add direct video links first to prioritize them when removing duplicate formats
|
||||||
|
@ -271,17 +287,15 @@ class TikTokBaseIE(InfoExtractor):
|
||||||
thumbnails = []
|
thumbnails = []
|
||||||
for cover_id in ('cover', 'ai_dynamic_cover', 'animated_cover', 'ai_dynamic_cover_bak',
|
for cover_id in ('cover', 'ai_dynamic_cover', 'animated_cover', 'ai_dynamic_cover_bak',
|
||||||
'origin_cover', 'dynamic_cover'):
|
'origin_cover', 'dynamic_cover'):
|
||||||
cover = video_info.get(cover_id)
|
for cover_url in traverse_obj(video_info, (cover_id, 'url_list', ...)):
|
||||||
if cover:
|
thumbnails.append({
|
||||||
for cover_url in cover['url_list']:
|
'id': cover_id,
|
||||||
thumbnails.append({
|
'url': cover_url,
|
||||||
'id': cover_id,
|
})
|
||||||
'url': cover_url,
|
|
||||||
})
|
|
||||||
|
|
||||||
stats_info = aweme_detail.get('statistics', {})
|
stats_info = aweme_detail.get('statistics') or {}
|
||||||
author_info = aweme_detail.get('author', {})
|
author_info = aweme_detail.get('author') or {}
|
||||||
music_info = aweme_detail.get('music', {})
|
music_info = aweme_detail.get('music') or {}
|
||||||
user_url = self._UPLOADER_URL_FORMAT % (traverse_obj(author_info,
|
user_url = self._UPLOADER_URL_FORMAT % (traverse_obj(author_info,
|
||||||
'sec_uid', 'id', 'uid', 'unique_id',
|
'sec_uid', 'id', 'uid', 'unique_id',
|
||||||
expected_type=str_or_none, get_all=False))
|
expected_type=str_or_none, get_all=False))
|
||||||
|
@ -303,20 +317,27 @@ class TikTokBaseIE(InfoExtractor):
|
||||||
'extractor_key': TikTokIE.ie_key(),
|
'extractor_key': TikTokIE.ie_key(),
|
||||||
'extractor': TikTokIE.IE_NAME,
|
'extractor': TikTokIE.IE_NAME,
|
||||||
'webpage_url': self._create_url(author_info.get('uid'), aweme_id),
|
'webpage_url': self._create_url(author_info.get('uid'), aweme_id),
|
||||||
'title': aweme_detail.get('desc'),
|
**traverse_obj(aweme_detail, {
|
||||||
'description': aweme_detail.get('desc'),
|
'title': ('desc', {str}),
|
||||||
'view_count': int_or_none(stats_info.get('play_count')),
|
'description': ('desc', {str}),
|
||||||
'like_count': int_or_none(stats_info.get('digg_count')),
|
'timestamp': ('create_time', {int_or_none}),
|
||||||
'repost_count': int_or_none(stats_info.get('share_count')),
|
}),
|
||||||
'comment_count': int_or_none(stats_info.get('comment_count')),
|
**traverse_obj(stats_info, {
|
||||||
'uploader': str_or_none(author_info.get('unique_id')),
|
'view_count': 'play_count',
|
||||||
'creator': str_or_none(author_info.get('nickname')),
|
'like_count': 'digg_count',
|
||||||
'uploader_id': str_or_none(author_info.get('uid')),
|
'repost_count': 'share_count',
|
||||||
|
'comment_count': 'comment_count',
|
||||||
|
}, expected_type=int_or_none),
|
||||||
|
**traverse_obj(author_info, {
|
||||||
|
'uploader': 'unique_id',
|
||||||
|
'uploader_id': 'uid',
|
||||||
|
'creator': 'nickname',
|
||||||
|
'channel_id': 'sec_uid',
|
||||||
|
}, expected_type=str_or_none),
|
||||||
'uploader_url': user_url,
|
'uploader_url': user_url,
|
||||||
'track': music_track,
|
'track': music_track,
|
||||||
'album': str_or_none(music_info.get('album')) or None,
|
'album': str_or_none(music_info.get('album')) or None,
|
||||||
'artist': music_author or None,
|
'artist': music_author or None,
|
||||||
'timestamp': int_or_none(aweme_detail.get('create_time')),
|
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': self.extract_subtitles(aweme_detail, aweme_id),
|
'subtitles': self.extract_subtitles(aweme_detail, aweme_id),
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
|
@ -328,37 +349,27 @@ class TikTokBaseIE(InfoExtractor):
|
||||||
'_format_sort_fields': ('quality', 'codec', 'size', 'br'),
|
'_format_sort_fields': ('quality', 'codec', 'size', 'br'),
|
||||||
}
|
}
|
||||||
|
|
||||||
def _parse_aweme_video_web(self, aweme_detail, webpage_url):
|
def _parse_aweme_video_web(self, aweme_detail, webpage_url, video_id):
|
||||||
video_info = aweme_detail['video']
|
video_info = aweme_detail['video']
|
||||||
author_info = traverse_obj(aweme_detail, 'authorInfo', 'author', expected_type=dict, default={})
|
author_info = traverse_obj(aweme_detail, 'authorInfo', 'author', expected_type=dict, default={})
|
||||||
music_info = aweme_detail.get('music') or {}
|
music_info = aweme_detail.get('music') or {}
|
||||||
stats_info = aweme_detail.get('stats') or {}
|
stats_info = aweme_detail.get('stats') or {}
|
||||||
user_url = self._UPLOADER_URL_FORMAT % (traverse_obj(author_info,
|
channel_id = traverse_obj(author_info or aweme_detail, (('authorSecId', 'secUid'), {str}), get_all=False)
|
||||||
'secUid', 'id', 'uid', 'uniqueId',
|
user_url = self._UPLOADER_URL_FORMAT % channel_id if channel_id else None
|
||||||
expected_type=str_or_none, get_all=False)
|
|
||||||
or aweme_detail.get('authorSecId'))
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
play_url = video_info.get('playAddr')
|
width = int_or_none(video_info.get('width'))
|
||||||
width = video_info.get('width')
|
height = int_or_none(video_info.get('height'))
|
||||||
height = video_info.get('height')
|
|
||||||
if isinstance(play_url, str):
|
for play_url in traverse_obj(video_info, ('playAddr', ((..., 'src'), None), {url_or_none})):
|
||||||
formats = [{
|
formats.append({
|
||||||
'url': self._proto_relative_url(play_url),
|
'url': self._proto_relative_url(play_url),
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'width': width,
|
'width': width,
|
||||||
'height': height,
|
'height': height,
|
||||||
}]
|
})
|
||||||
elif isinstance(play_url, list):
|
|
||||||
formats = [{
|
|
||||||
'url': self._proto_relative_url(url),
|
|
||||||
'ext': 'mp4',
|
|
||||||
'width': width,
|
|
||||||
'height': height,
|
|
||||||
} for url in traverse_obj(play_url, (..., 'src'), expected_type=url_or_none) if url]
|
|
||||||
|
|
||||||
download_url = url_or_none(video_info.get('downloadAddr')) or traverse_obj(video_info, ('download', 'url'), expected_type=url_or_none)
|
for download_url in traverse_obj(video_info, (('downloadAddr', ('download', 'url')), {url_or_none})):
|
||||||
if download_url:
|
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': 'download',
|
'format_id': 'download',
|
||||||
'url': self._proto_relative_url(download_url),
|
'url': self._proto_relative_url(download_url),
|
||||||
|
@ -366,38 +377,48 @@ class TikTokBaseIE(InfoExtractor):
|
||||||
'width': width,
|
'width': width,
|
||||||
'height': height,
|
'height': height,
|
||||||
})
|
})
|
||||||
|
|
||||||
self._remove_duplicate_formats(formats)
|
self._remove_duplicate_formats(formats)
|
||||||
|
|
||||||
thumbnails = []
|
thumbnails = []
|
||||||
for thumbnail_name in ('thumbnail', 'cover', 'dynamicCover', 'originCover'):
|
for thumb_url in traverse_obj(aweme_detail, (
|
||||||
if aweme_detail.get(thumbnail_name):
|
(None, 'video'), ('thumbnail', 'cover', 'dynamicCover', 'originCover'), {url_or_none})):
|
||||||
thumbnails = [{
|
thumbnails.append({
|
||||||
'url': self._proto_relative_url(aweme_detail[thumbnail_name]),
|
'url': self._proto_relative_url(thumb_url),
|
||||||
'width': width,
|
'width': width,
|
||||||
'height': height
|
'height': height,
|
||||||
}]
|
})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': traverse_obj(aweme_detail, 'id', 'awemeId', expected_type=str_or_none),
|
'id': video_id,
|
||||||
'title': aweme_detail.get('desc'),
|
**traverse_obj(aweme_detail, {
|
||||||
'duration': try_get(aweme_detail, lambda x: x['video']['duration'], int),
|
'title': ('desc', {str}),
|
||||||
'view_count': int_or_none(stats_info.get('playCount')),
|
'description': ('desc', {str}),
|
||||||
'like_count': int_or_none(stats_info.get('diggCount')),
|
'duration': ('video', 'duration', {int_or_none}),
|
||||||
'repost_count': int_or_none(stats_info.get('shareCount')),
|
'timestamp': ('createTime', {int_or_none}),
|
||||||
'comment_count': int_or_none(stats_info.get('commentCount')),
|
}),
|
||||||
'timestamp': int_or_none(aweme_detail.get('createTime')),
|
**traverse_obj(author_info or aweme_detail, {
|
||||||
'creator': str_or_none(author_info.get('nickname')),
|
'creator': ('nickname', {str}),
|
||||||
'uploader': str_or_none(author_info.get('uniqueId') or aweme_detail.get('author')),
|
'uploader': (('uniqueId', 'author'), {str}),
|
||||||
'uploader_id': str_or_none(traverse_obj(author_info, 'id', 'uid', 'authorId')),
|
'uploader_id': (('authorId', 'uid', 'id'), {str_or_none}),
|
||||||
|
}, get_all=False),
|
||||||
|
**traverse_obj(stats_info, {
|
||||||
|
'view_count': 'playCount',
|
||||||
|
'like_count': 'diggCount',
|
||||||
|
'repost_count': 'shareCount',
|
||||||
|
'comment_count': 'commentCount',
|
||||||
|
}, expected_type=int_or_none),
|
||||||
|
**traverse_obj(music_info, {
|
||||||
|
'track': 'title',
|
||||||
|
'album': ('album', {lambda x: x or None}),
|
||||||
|
'artist': 'authorName',
|
||||||
|
}, expected_type=str),
|
||||||
|
'channel_id': channel_id,
|
||||||
'uploader_url': user_url,
|
'uploader_url': user_url,
|
||||||
'track': str_or_none(music_info.get('title')),
|
|
||||||
'album': str_or_none(music_info.get('album')) or None,
|
|
||||||
'artist': str_or_none(music_info.get('authorName')),
|
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'description': str_or_none(aweme_detail.get('desc')),
|
|
||||||
'http_headers': {
|
'http_headers': {
|
||||||
'Referer': webpage_url
|
'Referer': webpage_url,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -431,7 +452,8 @@ class TikTokIE(TikTokBaseIE):
|
||||||
'artist': 'Ysrbeats',
|
'artist': 'Ysrbeats',
|
||||||
'album': 'Lehanga',
|
'album': 'Lehanga',
|
||||||
'track': 'Lehanga',
|
'track': 'Lehanga',
|
||||||
}
|
},
|
||||||
|
'skip': '404 Not Found',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.tiktok.com/@patroxofficial/video/6742501081818877190?langCountry=en',
|
'url': 'https://www.tiktok.com/@patroxofficial/video/6742501081818877190?langCountry=en',
|
||||||
'md5': '6f3cf8cdd9b28cb8363fe0a9a160695b',
|
'md5': '6f3cf8cdd9b28cb8363fe0a9a160695b',
|
||||||
|
@ -446,6 +468,7 @@ class TikTokIE(TikTokBaseIE):
|
||||||
'uploader': 'patrox',
|
'uploader': 'patrox',
|
||||||
'uploader_id': '18702747',
|
'uploader_id': '18702747',
|
||||||
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAiFnldaILebi5heDoVU6bn4jBWWycX6-9U3xuNPqZ8Ws',
|
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAiFnldaILebi5heDoVU6bn4jBWWycX6-9U3xuNPqZ8Ws',
|
||||||
|
'channel_id': 'MS4wLjABAAAAiFnldaILebi5heDoVU6bn4jBWWycX6-9U3xuNPqZ8Ws',
|
||||||
'creator': 'patroX',
|
'creator': 'patroX',
|
||||||
'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
|
'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?',
|
||||||
'upload_date': '20190930',
|
'upload_date': '20190930',
|
||||||
|
@ -456,7 +479,7 @@ class TikTokIE(TikTokBaseIE):
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'artist': 'Evan Todd, Jessica Keenan Wynn, Alice Lee, Barrett Wilbert Weed & Jon Eidson',
|
'artist': 'Evan Todd, Jessica Keenan Wynn, Alice Lee, Barrett Wilbert Weed & Jon Eidson',
|
||||||
'track': 'Big Fun',
|
'track': 'Big Fun',
|
||||||
}
|
},
|
||||||
}, {
|
}, {
|
||||||
# Banned audio, only available on the app
|
# Banned audio, only available on the app
|
||||||
'url': 'https://www.tiktok.com/@barudakhb_/video/6984138651336838402',
|
'url': 'https://www.tiktok.com/@barudakhb_/video/6984138651336838402',
|
||||||
|
@ -469,6 +492,7 @@ class TikTokIE(TikTokBaseIE):
|
||||||
'creator': 'md5:29f238c49bc0c176cb3cef1a9cea9fa6',
|
'creator': 'md5:29f238c49bc0c176cb3cef1a9cea9fa6',
|
||||||
'uploader_id': '6974687867511718913',
|
'uploader_id': '6974687867511718913',
|
||||||
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAbhBwQC-R1iKoix6jDFsF-vBdfx2ABoDjaZrM9fX6arU3w71q3cOWgWuTXn1soZ7d',
|
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAbhBwQC-R1iKoix6jDFsF-vBdfx2ABoDjaZrM9fX6arU3w71q3cOWgWuTXn1soZ7d',
|
||||||
|
'channel_id': 'MS4wLjABAAAAbhBwQC-R1iKoix6jDFsF-vBdfx2ABoDjaZrM9fX6arU3w71q3cOWgWuTXn1soZ7d',
|
||||||
'track': 'Boka Dance',
|
'track': 'Boka Dance',
|
||||||
'artist': 'md5:29f238c49bc0c176cb3cef1a9cea9fa6',
|
'artist': 'md5:29f238c49bc0c176cb3cef1a9cea9fa6',
|
||||||
'timestamp': 1626121503,
|
'timestamp': 1626121503,
|
||||||
|
@ -479,7 +503,7 @@ class TikTokIE(TikTokBaseIE):
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
}
|
},
|
||||||
}, {
|
}, {
|
||||||
# Sponsored video, only available with feed workaround
|
# Sponsored video, only available with feed workaround
|
||||||
'url': 'https://www.tiktok.com/@MS4wLjABAAAATh8Vewkn0LYM7Fo03iec3qKdeCUOcBIouRk1mkiag6h3o_pQu_dUXvZ2EZlGST7_/video/7042692929109986561',
|
'url': 'https://www.tiktok.com/@MS4wLjABAAAATh8Vewkn0LYM7Fo03iec3qKdeCUOcBIouRk1mkiag6h3o_pQu_dUXvZ2EZlGST7_/video/7042692929109986561',
|
||||||
|
@ -492,6 +516,7 @@ class TikTokIE(TikTokBaseIE):
|
||||||
'creator': 'Slap And Run',
|
'creator': 'Slap And Run',
|
||||||
'uploader_id': '7036055384943690754',
|
'uploader_id': '7036055384943690754',
|
||||||
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAATh8Vewkn0LYM7Fo03iec3qKdeCUOcBIouRk1mkiag6h3o_pQu_dUXvZ2EZlGST7_',
|
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAATh8Vewkn0LYM7Fo03iec3qKdeCUOcBIouRk1mkiag6h3o_pQu_dUXvZ2EZlGST7_',
|
||||||
|
'channel_id': 'MS4wLjABAAAATh8Vewkn0LYM7Fo03iec3qKdeCUOcBIouRk1mkiag6h3o_pQu_dUXvZ2EZlGST7_',
|
||||||
'track': 'Promoted Music',
|
'track': 'Promoted Music',
|
||||||
'timestamp': 1639754738,
|
'timestamp': 1639754738,
|
||||||
'duration': 30,
|
'duration': 30,
|
||||||
|
@ -502,7 +527,6 @@ class TikTokIE(TikTokBaseIE):
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
},
|
},
|
||||||
'expected_warnings': ['trying with webpage', 'Unable to find video in feed']
|
|
||||||
}, {
|
}, {
|
||||||
# Video without title and description
|
# Video without title and description
|
||||||
'url': 'https://www.tiktok.com/@pokemonlife22/video/7059698374567611694',
|
'url': 'https://www.tiktok.com/@pokemonlife22/video/7059698374567611694',
|
||||||
|
@ -515,6 +539,7 @@ class TikTokIE(TikTokBaseIE):
|
||||||
'creator': 'Pokemon',
|
'creator': 'Pokemon',
|
||||||
'uploader_id': '6820838815978423302',
|
'uploader_id': '6820838815978423302',
|
||||||
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAA0tF1nBwQVVMyrGu3CqttkNgM68Do1OXUFuCY0CRQk8fEtSVDj89HqoqvbSTmUP2W',
|
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAA0tF1nBwQVVMyrGu3CqttkNgM68Do1OXUFuCY0CRQk8fEtSVDj89HqoqvbSTmUP2W',
|
||||||
|
'channel_id': 'MS4wLjABAAAA0tF1nBwQVVMyrGu3CqttkNgM68Do1OXUFuCY0CRQk8fEtSVDj89HqoqvbSTmUP2W',
|
||||||
'track': 'original sound',
|
'track': 'original sound',
|
||||||
'timestamp': 1643714123,
|
'timestamp': 1643714123,
|
||||||
'duration': 6,
|
'duration': 6,
|
||||||
|
@ -549,6 +574,56 @@ class TikTokIE(TikTokBaseIE):
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
},
|
},
|
||||||
'skip': 'This video is unavailable',
|
'skip': 'This video is unavailable',
|
||||||
|
}, {
|
||||||
|
# slideshow audio-only mp3 format
|
||||||
|
'url': 'https://www.tiktok.com/@_le_cannibale_/video/7139980461132074283',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '7139980461132074283',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'TikTok video #7139980461132074283',
|
||||||
|
'description': '',
|
||||||
|
'creator': 'Antaura',
|
||||||
|
'uploader': '_le_cannibale_',
|
||||||
|
'uploader_id': '6604511138619654149',
|
||||||
|
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAoShJqaw_5gvy48y3azFeFcT4jeyKWbB0VVYasOCt2tTLwjNFIaDcHAM4D-QGXFOP',
|
||||||
|
'channel_id': 'MS4wLjABAAAAoShJqaw_5gvy48y3azFeFcT4jeyKWbB0VVYasOCt2tTLwjNFIaDcHAM4D-QGXFOP',
|
||||||
|
'artist': 'nathan !',
|
||||||
|
'track': 'grahamscott canon',
|
||||||
|
'upload_date': '20220905',
|
||||||
|
'timestamp': 1662406249,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'repost_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'thumbnail': r're:^https://.+\.webp',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# only available via web
|
||||||
|
'url': 'https://www.tiktok.com/@moxypatch/video/7206382937372134662',
|
||||||
|
'md5': '8d8c0be14127020cd9f5def4a2e6b411',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '7206382937372134662',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'md5:1d95c0b96560ca0e8a231af4172b2c0a',
|
||||||
|
'description': 'md5:1d95c0b96560ca0e8a231af4172b2c0a',
|
||||||
|
'creator': 'MoxyPatch',
|
||||||
|
'uploader': 'moxypatch',
|
||||||
|
'uploader_id': '7039142049363379205',
|
||||||
|
'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAFhqKnngMHJSsifL0w1vFOP5kn3Ndo1ODp0XuIBkNMBCkALTvwILdpu12g3pTtL4V',
|
||||||
|
'channel_id': 'MS4wLjABAAAAFhqKnngMHJSsifL0w1vFOP5kn3Ndo1ODp0XuIBkNMBCkALTvwILdpu12g3pTtL4V',
|
||||||
|
'artist': 'your worst nightmare',
|
||||||
|
'track': 'original sound',
|
||||||
|
'upload_date': '20230303',
|
||||||
|
'timestamp': 1677866781,
|
||||||
|
'duration': 10,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'repost_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'thumbnail': r're:^https://.+',
|
||||||
|
'thumbnails': 'count:3',
|
||||||
|
},
|
||||||
|
'expected_warnings': ['Unable to find video in feed'],
|
||||||
}, {
|
}, {
|
||||||
# Auto-captions available
|
# Auto-captions available
|
||||||
'url': 'https://www.tiktok.com/@hankgreen1/video/7047596209028074758',
|
'url': 'https://www.tiktok.com/@hankgreen1/video/7047596209028074758',
|
||||||
|
@ -563,7 +638,7 @@ class TikTokIE(TikTokBaseIE):
|
||||||
self.report_warning(f'{e}; trying with webpage')
|
self.report_warning(f'{e}; trying with webpage')
|
||||||
|
|
||||||
url = self._create_url(user_id, video_id)
|
url = self._create_url(user_id, video_id)
|
||||||
webpage = self._download_webpage(url, video_id, headers={'User-Agent': 'User-Agent:Mozilla/5.0'})
|
webpage = self._download_webpage(url, video_id, headers={'User-Agent': 'Mozilla/5.0'})
|
||||||
next_data = self._search_nextjs_data(webpage, video_id, default='{}')
|
next_data = self._search_nextjs_data(webpage, video_id, default='{}')
|
||||||
if next_data:
|
if next_data:
|
||||||
status = traverse_obj(next_data, ('props', 'pageProps', 'statusCode'), expected_type=int) or 0
|
status = traverse_obj(next_data, ('props', 'pageProps', 'statusCode'), expected_type=int) or 0
|
||||||
|
@ -574,7 +649,7 @@ class TikTokIE(TikTokBaseIE):
|
||||||
video_data = traverse_obj(sigi_data, ('ItemModule', video_id), expected_type=dict)
|
video_data = traverse_obj(sigi_data, ('ItemModule', video_id), expected_type=dict)
|
||||||
|
|
||||||
if status == 0:
|
if status == 0:
|
||||||
return self._parse_aweme_video_web(video_data, url)
|
return self._parse_aweme_video_web(video_data, url, video_id)
|
||||||
elif status == 10216:
|
elif status == 10216:
|
||||||
raise ExtractorError('This video is private', expected=True)
|
raise ExtractorError('This video is private', expected=True)
|
||||||
raise ExtractorError('Video not available', video_id=video_id)
|
raise ExtractorError('Video not available', video_id=video_id)
|
||||||
|
@ -801,6 +876,7 @@ class DouyinIE(TikTokBaseIE):
|
||||||
'description': '#杨超越 小小水手带你去远航❤️',
|
'description': '#杨超越 小小水手带你去远航❤️',
|
||||||
'uploader_id': '110403406559',
|
'uploader_id': '110403406559',
|
||||||
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
|
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
|
||||||
|
'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
|
||||||
'creator': '杨超越',
|
'creator': '杨超越',
|
||||||
'duration': 19782,
|
'duration': 19782,
|
||||||
'timestamp': 1620905839,
|
'timestamp': 1620905839,
|
||||||
|
@ -810,6 +886,7 @@ class DouyinIE(TikTokBaseIE):
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
|
'thumbnail': r're:https?://.+\.jpe?g',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.douyin.com/video/6982497745948921092',
|
'url': 'https://www.douyin.com/video/6982497745948921092',
|
||||||
|
@ -821,8 +898,9 @@ class DouyinIE(TikTokBaseIE):
|
||||||
'description': '这个夏日和小羊@杨超越 一起遇见白色幻想',
|
'description': '这个夏日和小羊@杨超越 一起遇见白色幻想',
|
||||||
'uploader_id': '408654318141572',
|
'uploader_id': '408654318141572',
|
||||||
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA',
|
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA',
|
||||||
|
'channel_id': 'MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA',
|
||||||
'creator': '杨超越工作室',
|
'creator': '杨超越工作室',
|
||||||
'duration': 42608,
|
'duration': 42479,
|
||||||
'timestamp': 1625739481,
|
'timestamp': 1625739481,
|
||||||
'upload_date': '20210708',
|
'upload_date': '20210708',
|
||||||
'track': '@杨超越工作室创作的原声',
|
'track': '@杨超越工作室创作的原声',
|
||||||
|
@ -830,6 +908,7 @@ class DouyinIE(TikTokBaseIE):
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
|
'thumbnail': r're:https?://.+\.jpe?g',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.douyin.com/video/6953975910773099811',
|
'url': 'https://www.douyin.com/video/6953975910773099811',
|
||||||
|
@ -841,8 +920,9 @@ class DouyinIE(TikTokBaseIE):
|
||||||
'description': '#一起看海 出现在你的夏日里',
|
'description': '#一起看海 出现在你的夏日里',
|
||||||
'uploader_id': '110403406559',
|
'uploader_id': '110403406559',
|
||||||
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
|
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
|
||||||
|
'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
|
||||||
'creator': '杨超越',
|
'creator': '杨超越',
|
||||||
'duration': 17228,
|
'duration': 17343,
|
||||||
'timestamp': 1619098692,
|
'timestamp': 1619098692,
|
||||||
'upload_date': '20210422',
|
'upload_date': '20210422',
|
||||||
'track': '@杨超越创作的原声',
|
'track': '@杨超越创作的原声',
|
||||||
|
@ -850,6 +930,7 @@ class DouyinIE(TikTokBaseIE):
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
|
'thumbnail': r're:https?://.+\.jpe?g',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.douyin.com/video/6950251282489675042',
|
'url': 'https://www.douyin.com/video/6950251282489675042',
|
||||||
|
@ -878,6 +959,7 @@ class DouyinIE(TikTokBaseIE):
|
||||||
'description': '#哪个爱豆的105度最甜 换个角度看看我哈哈',
|
'description': '#哪个爱豆的105度最甜 换个角度看看我哈哈',
|
||||||
'uploader_id': '110403406559',
|
'uploader_id': '110403406559',
|
||||||
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
|
'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
|
||||||
|
'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98',
|
||||||
'creator': '杨超越',
|
'creator': '杨超越',
|
||||||
'duration': 15115,
|
'duration': 15115,
|
||||||
'timestamp': 1621261163,
|
'timestamp': 1621261163,
|
||||||
|
@ -887,6 +969,7 @@ class DouyinIE(TikTokBaseIE):
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
|
'thumbnail': r're:https?://.+\.jpe?g',
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
_APP_VERSIONS = [('23.3.0', '230300')]
|
_APP_VERSIONS = [('23.3.0', '230300')]
|
||||||
|
@ -918,7 +1001,7 @@ class DouyinIE(TikTokBaseIE):
|
||||||
|
|
||||||
render_data = self._parse_json(
|
render_data = self._parse_json(
|
||||||
render_data_json, video_id, transform_source=compat_urllib_parse_unquote)
|
render_data_json, video_id, transform_source=compat_urllib_parse_unquote)
|
||||||
return self._parse_aweme_video_web(get_first(render_data, ('aweme', 'detail')), url)
|
return self._parse_aweme_video_web(get_first(render_data, ('aweme', 'detail')), url, video_id)
|
||||||
|
|
||||||
|
|
||||||
class TikTokVMIE(InfoExtractor):
|
class TikTokVMIE(InfoExtractor):
|
||||||
|
@ -983,40 +1066,173 @@ class TikTokVMIE(InfoExtractor):
|
||||||
return self.url_result(new_url)
|
return self.url_result(new_url)
|
||||||
|
|
||||||
|
|
||||||
class TikTokLiveIE(InfoExtractor):
|
class TikTokLiveIE(TikTokBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?tiktok\.com/@(?P<id>[\w\.-]+)/live'
|
_VALID_URL = r'''(?x)https?://(?:
|
||||||
|
(?:www\.)?tiktok\.com/@(?P<uploader>[\w.-]+)/live|
|
||||||
|
m\.tiktok\.com/share/live/(?P<id>\d+)
|
||||||
|
)'''
|
||||||
IE_NAME = 'tiktok:live'
|
IE_NAME = 'tiktok:live'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
'url': 'https://www.tiktok.com/@weathernewslive/live',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '7210809319192726273',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': r're:ウェザーニュースLiVE[\d\s:-]*',
|
||||||
|
'creator': 'ウェザーニュースLiVE',
|
||||||
|
'uploader': 'weathernewslive',
|
||||||
|
'uploader_id': '6621496731283095554',
|
||||||
|
'uploader_url': 'https://www.tiktok.com/@weathernewslive',
|
||||||
|
'live_status': 'is_live',
|
||||||
|
'concurrent_view_count': int,
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.tiktok.com/@pilarmagenta/live',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '7209423610325322522',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': str,
|
||||||
|
'creator': 'Pilarmagenta',
|
||||||
|
'uploader': 'pilarmagenta',
|
||||||
|
'uploader_id': '6624846890674683909',
|
||||||
|
'uploader_url': 'https://www.tiktok.com/@pilarmagenta',
|
||||||
|
'live_status': 'is_live',
|
||||||
|
'concurrent_view_count': int,
|
||||||
|
},
|
||||||
|
'skip': 'Livestream',
|
||||||
|
}, {
|
||||||
|
'url': 'https://m.tiktok.com/share/live/7209423610325322522/?language=en',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
'url': 'https://www.tiktok.com/@iris04201/live',
|
'url': 'https://www.tiktok.com/@iris04201/live',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _call_api(self, url, param, room_id, uploader, key=None):
|
||||||
|
response = traverse_obj(self._download_json(
|
||||||
|
url, room_id, fatal=False, query={
|
||||||
|
'aid': '1988',
|
||||||
|
param: room_id,
|
||||||
|
}), (key, {dict}), default={})
|
||||||
|
|
||||||
|
# status == 2 if live else 4
|
||||||
|
if int_or_none(response.get('status')) == 2:
|
||||||
|
return response
|
||||||
|
# If room_id is obtained via mobile share URL and cannot be refreshed, do not wait for live
|
||||||
|
elif not uploader:
|
||||||
|
raise ExtractorError('This livestream has ended', expected=True)
|
||||||
|
raise UserNotLive(video_id=uploader)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
uploader = self._match_id(url)
|
uploader, room_id = self._match_valid_url(url).group('uploader', 'id')
|
||||||
webpage = self._download_webpage(url, uploader, headers={'User-Agent': 'User-Agent:Mozilla/5.0'})
|
webpage = self._download_webpage(
|
||||||
room_id = self._html_search_regex(r'snssdk\d*://live\?room_id=(\d+)', webpage, 'room ID', default=None)
|
url, uploader or room_id, headers={'User-Agent': 'Mozilla/5.0'}, fatal=not room_id)
|
||||||
|
|
||||||
|
if webpage:
|
||||||
|
data = try_call(lambda: self._get_sigi_state(webpage, uploader or room_id))
|
||||||
|
room_id = (traverse_obj(data, ('UserModule', 'users', ..., 'roomId', {str_or_none}), get_all=False)
|
||||||
|
or self._search_regex(r'snssdk\d*://live\?room_id=(\d+)', webpage, 'room ID', default=None)
|
||||||
|
or room_id)
|
||||||
|
uploader = uploader or traverse_obj(
|
||||||
|
data, ('LiveRoom', 'liveRoomUserInfo', 'user', 'uniqueId'),
|
||||||
|
('UserModule', 'users', ..., 'uniqueId'), get_all=False, expected_type=str)
|
||||||
|
|
||||||
if not room_id:
|
if not room_id:
|
||||||
raise UserNotLive(video_id=uploader)
|
raise UserNotLive(video_id=uploader)
|
||||||
live_info = traverse_obj(self._download_json(
|
|
||||||
'https://www.tiktok.com/api/live/detail/', room_id, query={
|
|
||||||
'aid': '1988',
|
|
||||||
'roomID': room_id,
|
|
||||||
}), 'LiveRoomInfo', expected_type=dict, default={})
|
|
||||||
|
|
||||||
if 'status' not in live_info:
|
formats = []
|
||||||
raise ExtractorError('Unexpected response from TikTok API')
|
live_info = self._call_api(
|
||||||
# status = 2 if live else 4
|
'https://webcast.tiktok.com/webcast/room/info', 'room_id', room_id, uploader, key='data')
|
||||||
if not int_or_none(live_info['status']) == 2:
|
|
||||||
raise UserNotLive(video_id=uploader)
|
get_quality = qualities(('SD1', 'ld', 'SD2', 'sd', 'HD1', 'hd', 'FULL_HD1', 'uhd', 'ORIGION', 'origin'))
|
||||||
|
parse_inner = lambda x: self._parse_json(x, None)
|
||||||
|
|
||||||
|
for quality, stream in traverse_obj(live_info, (
|
||||||
|
'stream_url', 'live_core_sdk_data', 'pull_data', 'stream_data',
|
||||||
|
{parse_inner}, 'data', {dict}), default={}).items():
|
||||||
|
|
||||||
|
sdk_params = traverse_obj(stream, ('main', 'sdk_params', {parse_inner}, {
|
||||||
|
'vcodec': ('VCodec', {str}),
|
||||||
|
'tbr': ('vbitrate', {lambda x: int_or_none(x, 1000)}),
|
||||||
|
'resolution': ('resolution', {lambda x: re.match(r'(?i)\d+x\d+|\d+p', x).group().lower()}),
|
||||||
|
}))
|
||||||
|
|
||||||
|
flv_url = traverse_obj(stream, ('main', 'flv', {url_or_none}))
|
||||||
|
if flv_url:
|
||||||
|
formats.append({
|
||||||
|
'url': flv_url,
|
||||||
|
'ext': 'flv',
|
||||||
|
'format_id': f'flv-{quality}',
|
||||||
|
'quality': get_quality(quality),
|
||||||
|
**sdk_params,
|
||||||
|
})
|
||||||
|
|
||||||
|
hls_url = traverse_obj(stream, ('main', 'hls', {url_or_none}))
|
||||||
|
if hls_url:
|
||||||
|
formats.append({
|
||||||
|
'url': hls_url,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'protocol': 'm3u8_native',
|
||||||
|
'format_id': f'hls-{quality}',
|
||||||
|
'quality': get_quality(quality),
|
||||||
|
**sdk_params,
|
||||||
|
})
|
||||||
|
|
||||||
|
def get_vcodec(*keys):
|
||||||
|
return traverse_obj(live_info, (
|
||||||
|
'stream_url', *keys, {parse_inner}, 'VCodec', {str}))
|
||||||
|
|
||||||
|
for stream in ('hls', 'rtmp'):
|
||||||
|
stream_url = traverse_obj(live_info, ('stream_url', f'{stream}_pull_url', {url_or_none}))
|
||||||
|
if stream_url:
|
||||||
|
formats.append({
|
||||||
|
'url': stream_url,
|
||||||
|
'ext': 'mp4' if stream == 'hls' else 'flv',
|
||||||
|
'protocol': 'm3u8_native' if stream == 'hls' else 'https',
|
||||||
|
'format_id': f'{stream}-pull',
|
||||||
|
'vcodec': get_vcodec(f'{stream}_pull_url_params'),
|
||||||
|
'quality': get_quality('ORIGION'),
|
||||||
|
})
|
||||||
|
|
||||||
|
for f_id, f_url in traverse_obj(live_info, ('stream_url', 'flv_pull_url', {dict}), default={}).items():
|
||||||
|
if not url_or_none(f_url):
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
|
'url': f_url,
|
||||||
|
'ext': 'flv',
|
||||||
|
'format_id': f'flv-{f_id}'.lower(),
|
||||||
|
'vcodec': get_vcodec('flv_pull_url_params', f_id),
|
||||||
|
'quality': get_quality(f_id),
|
||||||
|
})
|
||||||
|
|
||||||
|
# If uploader is a guest on another's livestream, primary endpoint will not have m3u8 URLs
|
||||||
|
if not traverse_obj(formats, lambda _, v: v['ext'] == 'mp4'):
|
||||||
|
live_info = merge_dicts(live_info, self._call_api(
|
||||||
|
'https://www.tiktok.com/api/live/detail/', 'roomID', room_id, uploader, key='LiveRoomInfo'))
|
||||||
|
if url_or_none(live_info.get('liveUrl')):
|
||||||
|
formats.append({
|
||||||
|
'url': live_info['liveUrl'],
|
||||||
|
'ext': 'mp4',
|
||||||
|
'protocol': 'm3u8_native',
|
||||||
|
'format_id': 'hls-fallback',
|
||||||
|
'vcodec': 'h264',
|
||||||
|
'quality': get_quality('origin'),
|
||||||
|
})
|
||||||
|
|
||||||
|
uploader = uploader or traverse_obj(live_info, ('ownerInfo', 'uniqueId'), ('owner', 'display_id'))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': room_id,
|
'id': room_id,
|
||||||
'title': live_info.get('title') or self._html_search_meta(['og:title', 'twitter:title'], webpage, default=''),
|
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'uploader_id': traverse_obj(live_info, ('ownerInfo', 'id')),
|
'uploader_url': format_field(uploader, None, self._UPLOADER_URL_FORMAT) or None,
|
||||||
'creator': traverse_obj(live_info, ('ownerInfo', 'nickname')),
|
|
||||||
'concurrent_view_count': traverse_obj(live_info, ('liveRoomStats', 'userCount'), expected_type=int),
|
|
||||||
'formats': self._extract_m3u8_formats(live_info['liveUrl'], room_id, 'mp4', live=True),
|
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
|
'formats': formats,
|
||||||
|
'_format_sort_fields': ('quality', 'ext'),
|
||||||
|
**traverse_obj(live_info, {
|
||||||
|
'title': 'title',
|
||||||
|
'uploader_id': (('ownerInfo', 'owner'), 'id', {str_or_none}),
|
||||||
|
'creator': (('ownerInfo', 'owner'), 'nickname'),
|
||||||
|
'concurrent_view_count': (('user_count', ('liveRoomStats', 'userCount')), {int_or_none}),
|
||||||
|
}, get_all=False),
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,15 +1,21 @@
|
||||||
import itertools
|
import itertools
|
||||||
import json
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
HEADRequest,
|
||||||
|
UnsupportedError,
|
||||||
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
parse_resolution,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
unified_strdate,
|
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
url_basename,
|
url_basename,
|
||||||
|
urljoin,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -22,25 +28,22 @@ class TrillerBaseIE(InfoExtractor):
|
||||||
if self._API_HEADERS.get('Authorization'):
|
if self._API_HEADERS.get('Authorization'):
|
||||||
return
|
return
|
||||||
|
|
||||||
user_check = self._download_json(
|
headers = {**self._API_HEADERS, 'Content-Type': 'application/json'}
|
||||||
|
user_check = traverse_obj(self._download_json(
|
||||||
f'{self._API_BASE_URL}/api/user/is-valid-username', None, note='Checking username',
|
f'{self._API_BASE_URL}/api/user/is-valid-username', None, note='Checking username',
|
||||||
fatal=False, expected_status=400, headers={
|
fatal=False, expected_status=400, headers=headers,
|
||||||
'Content-Type': 'application/json',
|
data=json.dumps({'username': username}, separators=(',', ':')).encode()), 'status')
|
||||||
'Origin': 'https://triller.co',
|
|
||||||
}, data=json.dumps({'username': username}, separators=(',', ':')).encode('utf-8'))
|
if user_check: # endpoint returns `"status":false` if username exists
|
||||||
if user_check.get('status'): # endpoint returns "status":false if username exists
|
|
||||||
raise ExtractorError('Unable to login: Invalid username', expected=True)
|
raise ExtractorError('Unable to login: Invalid username', expected=True)
|
||||||
|
|
||||||
credentials = {
|
|
||||||
'username': username,
|
|
||||||
'password': password,
|
|
||||||
}
|
|
||||||
login = self._download_json(
|
login = self._download_json(
|
||||||
f'{self._API_BASE_URL}/user/auth', None, note='Logging in',
|
f'{self._API_BASE_URL}/user/auth', None, note='Logging in', fatal=False,
|
||||||
fatal=False, expected_status=400, headers={
|
expected_status=400, headers=headers, data=json.dumps({
|
||||||
'Content-Type': 'application/json',
|
'username': username,
|
||||||
'Origin': 'https://triller.co',
|
'password': password,
|
||||||
}, data=json.dumps(credentials, separators=(',', ':')).encode('utf-8'))
|
}, separators=(',', ':')).encode()) or {}
|
||||||
|
|
||||||
if not login.get('auth_token'):
|
if not login.get('auth_token'):
|
||||||
if login.get('error') == 1008:
|
if login.get('error') == 1008:
|
||||||
raise ExtractorError('Unable to login: Incorrect password', expected=True)
|
raise ExtractorError('Unable to login: Incorrect password', expected=True)
|
||||||
|
@ -55,100 +58,100 @@ class TrillerBaseIE(InfoExtractor):
|
||||||
headers=self._API_HEADERS, query={'limit': limit}) or {}
|
headers=self._API_HEADERS, query={'limit': limit}) or {}
|
||||||
if not comment_info.get('comments'):
|
if not comment_info.get('comments'):
|
||||||
return
|
return
|
||||||
for comment_dict in comment_info['comments']:
|
yield from traverse_obj(comment_info, ('comments', ..., {
|
||||||
yield {
|
'id': ('id', {str_or_none}),
|
||||||
'author': traverse_obj(comment_dict, ('author', 'username')),
|
'text': 'body',
|
||||||
'author_id': traverse_obj(comment_dict, ('author', 'user_id')),
|
'author': ('author', 'username'),
|
||||||
'id': comment_dict.get('id'),
|
'author_id': ('author', 'user_id'),
|
||||||
'text': comment_dict.get('body'),
|
'timestamp': ('timestamp', {unified_timestamp}),
|
||||||
'timestamp': unified_timestamp(comment_dict.get('timestamp')),
|
}))
|
||||||
}
|
|
||||||
|
|
||||||
def _check_user_info(self, user_info):
|
def _check_user_info(self, user_info):
|
||||||
if not user_info:
|
if user_info.get('private') and not user_info.get('followed_by_me'):
|
||||||
self.report_warning('Unable to extract user info')
|
|
||||||
elif user_info.get('private') and not user_info.get('followed_by_me'):
|
|
||||||
raise ExtractorError('This video is private', expected=True)
|
raise ExtractorError('This video is private', expected=True)
|
||||||
elif traverse_obj(user_info, 'blocked_by_user', 'blocking_user'):
|
elif traverse_obj(user_info, 'blocked_by_user', 'blocking_user'):
|
||||||
raise ExtractorError('The author of the video is blocked', expected=True)
|
raise ExtractorError('The author of the video is blocked', expected=True)
|
||||||
return user_info
|
return user_info
|
||||||
|
|
||||||
def _parse_video_info(self, video_info, username, user_info=None):
|
def _parse_video_info(self, video_info, username, user_id, display_id=None):
|
||||||
video_uuid = video_info.get('video_uuid')
|
video_id = str(video_info['id'])
|
||||||
video_id = video_info.get('id')
|
display_id = display_id or video_info.get('video_uuid')
|
||||||
|
|
||||||
|
if traverse_obj(video_info, (
|
||||||
|
None, ('transcoded_url', 'video_url', 'stream_url', 'audio_url'),
|
||||||
|
{lambda x: re.search(r'/copyright/', x)}), get_all=False):
|
||||||
|
self.raise_no_formats('This video has been removed due to licensing restrictions', expected=True)
|
||||||
|
|
||||||
|
def format_info(url):
|
||||||
|
return {
|
||||||
|
'url': url,
|
||||||
|
'ext': determine_ext(url),
|
||||||
|
'format_id': url_basename(url).split('.')[0],
|
||||||
|
}
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
video_url = traverse_obj(video_info, 'video_url', 'stream_url')
|
|
||||||
if video_url:
|
if determine_ext(video_info.get('transcoded_url')) == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
video_info['transcoded_url'], video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||||
|
|
||||||
|
for video in traverse_obj(video_info, ('video_set', lambda _, v: url_or_none(v['url']))):
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': video_url,
|
**format_info(video['url']),
|
||||||
'ext': 'mp4',
|
**parse_resolution(video.get('resolution')),
|
||||||
'vcodec': 'h264',
|
|
||||||
'width': video_info.get('width'),
|
|
||||||
'height': video_info.get('height'),
|
|
||||||
'format_id': url_basename(video_url).split('.')[0],
|
|
||||||
'filesize': video_info.get('filesize'),
|
|
||||||
})
|
|
||||||
video_set = video_info.get('video_set') or []
|
|
||||||
for video in video_set:
|
|
||||||
resolution = video.get('resolution') or ''
|
|
||||||
formats.append({
|
|
||||||
'url': video['url'],
|
|
||||||
'ext': 'mp4',
|
|
||||||
'vcodec': video.get('codec'),
|
'vcodec': video.get('codec'),
|
||||||
'vbr': int_or_none(video.get('bitrate'), 1000),
|
'vbr': int_or_none(video.get('bitrate'), 1000),
|
||||||
'width': int_or_none(resolution.split('x')[0]),
|
|
||||||
'height': int_or_none(resolution.split('x')[1]),
|
|
||||||
'format_id': url_basename(video['url']).split('.')[0],
|
|
||||||
})
|
})
|
||||||
audio_url = video_info.get('audio_url')
|
|
||||||
if audio_url:
|
video_url = traverse_obj(video_info, 'video_url', 'stream_url', expected_type=url_or_none)
|
||||||
|
if video_url:
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': audio_url,
|
**format_info(video_url),
|
||||||
'ext': 'm4a',
|
'vcodec': 'h264',
|
||||||
'format_id': url_basename(audio_url).split('.')[0],
|
**traverse_obj(video_info, {
|
||||||
|
'width': 'width',
|
||||||
|
'height': 'height',
|
||||||
|
'filesize': 'filesize',
|
||||||
|
}, expected_type=int_or_none),
|
||||||
})
|
})
|
||||||
|
|
||||||
manifest_url = video_info.get('transcoded_url')
|
audio_url = url_or_none(video_info.get('audio_url'))
|
||||||
if manifest_url:
|
if audio_url:
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.append(format_info(audio_url))
|
||||||
manifest_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
|
||||||
m3u8_id='hls', fatal=False))
|
|
||||||
|
|
||||||
comment_count = int_or_none(video_info.get('comment_count'))
|
comment_count = traverse_obj(video_info, ('comment_count', {int_or_none}))
|
||||||
|
|
||||||
user_info = user_info or traverse_obj(video_info, 'user', default={})
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': str_or_none(video_id) or video_uuid,
|
'id': video_id,
|
||||||
'title': video_info.get('description') or f'Video by {username}',
|
'display_id': display_id,
|
||||||
'thumbnail': video_info.get('thumbnail_url'),
|
'uploader': username,
|
||||||
'description': video_info.get('description'),
|
'uploader_id': user_id or traverse_obj(video_info, ('user', 'user_id', {str_or_none})),
|
||||||
'uploader': str_or_none(username),
|
'webpage_url': urljoin(f'https://triller.co/@{username}/video/', display_id),
|
||||||
'uploader_id': str_or_none(user_info.get('user_id')),
|
|
||||||
'creator': str_or_none(user_info.get('name')),
|
|
||||||
'timestamp': unified_timestamp(video_info.get('timestamp')),
|
|
||||||
'upload_date': unified_strdate(video_info.get('timestamp')),
|
|
||||||
'duration': int_or_none(video_info.get('duration')),
|
|
||||||
'view_count': int_or_none(video_info.get('play_count')),
|
|
||||||
'like_count': int_or_none(video_info.get('likes_count')),
|
|
||||||
'artist': str_or_none(video_info.get('song_artist')),
|
|
||||||
'track': str_or_none(video_info.get('song_title')),
|
|
||||||
'webpage_url': f'https://triller.co/@{username}/video/{video_uuid}',
|
|
||||||
'uploader_url': f'https://triller.co/@{username}',
|
'uploader_url': f'https://triller.co/@{username}',
|
||||||
'extractor_key': TrillerIE.ie_key(),
|
'extractor_key': TrillerIE.ie_key(),
|
||||||
'extractor': TrillerIE.IE_NAME,
|
'extractor': TrillerIE.IE_NAME,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'comment_count': comment_count,
|
'comment_count': comment_count,
|
||||||
'__post_extractor': self.extract_comments(video_id, comment_count),
|
'__post_extractor': self.extract_comments(video_id, comment_count),
|
||||||
|
**traverse_obj(video_info, {
|
||||||
|
'title': ('description', {lambda x: x.replace('\r\n', ' ')}),
|
||||||
|
'description': 'description',
|
||||||
|
'creator': ((('user'), ('users', lambda _, v: str(v['user_id']) == user_id)), 'name'),
|
||||||
|
'thumbnail': ('thumbnail_url', {url_or_none}),
|
||||||
|
'timestamp': ('timestamp', {unified_timestamp}),
|
||||||
|
'duration': ('duration', {int_or_none}),
|
||||||
|
'view_count': ('play_count', {int_or_none}),
|
||||||
|
'like_count': ('likes_count', {int_or_none}),
|
||||||
|
'artist': 'song_artist',
|
||||||
|
'track': 'song_title',
|
||||||
|
}, get_all=False),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class TrillerIE(TrillerBaseIE):
|
class TrillerIE(TrillerBaseIE):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://(?:www\.)?triller\.co/
|
https?://(?:www\.)?triller\.co/
|
||||||
@(?P<username>[\w\._]+)/video/
|
@(?P<username>[\w.]+)/video/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})
|
||||||
(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})
|
|
||||||
'''
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://triller.co/@theestallion/video/2358fcd7-3df2-4c77-84c8-1d091610a6cf',
|
'url': 'https://triller.co/@theestallion/video/2358fcd7-3df2-4c77-84c8-1d091610a6cf',
|
||||||
|
@ -165,16 +168,14 @@ class TrillerIE(TrillerBaseIE):
|
||||||
'timestamp': 1660598222,
|
'timestamp': 1660598222,
|
||||||
'upload_date': '20220815',
|
'upload_date': '20220815',
|
||||||
'duration': 47,
|
'duration': 47,
|
||||||
'height': 3840,
|
|
||||||
'width': 2160,
|
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'artist': 'Megan Thee Stallion',
|
'artist': 'Megan Thee Stallion',
|
||||||
'track': 'Her',
|
'track': 'Her',
|
||||||
'webpage_url': 'https://triller.co/@theestallion/video/2358fcd7-3df2-4c77-84c8-1d091610a6cf',
|
|
||||||
'uploader_url': 'https://triller.co/@theestallion',
|
'uploader_url': 'https://triller.co/@theestallion',
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
}
|
},
|
||||||
|
'skip': 'This video has been removed due to licensing restrictions',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://triller.co/@charlidamelio/video/46c6fcfa-aa9e-4503-a50c-68444f44cddc',
|
'url': 'https://triller.co/@charlidamelio/video/46c6fcfa-aa9e-4503-a50c-68444f44cddc',
|
||||||
'md5': '874055f462af5b0699b9dbb527a505a0',
|
'md5': '874055f462af5b0699b9dbb527a505a0',
|
||||||
|
@ -182,6 +183,7 @@ class TrillerIE(TrillerBaseIE):
|
||||||
'id': '71621339',
|
'id': '71621339',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'md5:4c91ea82760fe0fffb71b8c3aa7295fc',
|
'title': 'md5:4c91ea82760fe0fffb71b8c3aa7295fc',
|
||||||
|
'display_id': '46c6fcfa-aa9e-4503-a50c-68444f44cddc',
|
||||||
'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
|
'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
|
||||||
'description': 'md5:4c91ea82760fe0fffb71b8c3aa7295fc',
|
'description': 'md5:4c91ea82760fe0fffb71b8c3aa7295fc',
|
||||||
'uploader': 'charlidamelio',
|
'uploader': 'charlidamelio',
|
||||||
|
@ -190,59 +192,75 @@ class TrillerIE(TrillerBaseIE):
|
||||||
'timestamp': 1660773354,
|
'timestamp': 1660773354,
|
||||||
'upload_date': '20220817',
|
'upload_date': '20220817',
|
||||||
'duration': 16,
|
'duration': 16,
|
||||||
'height': 1920,
|
|
||||||
'width': 1080,
|
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'artist': 'Dixie',
|
'artist': 'Dixie',
|
||||||
'track': 'Someone to Blame',
|
'track': 'Someone to Blame',
|
||||||
'webpage_url': 'https://triller.co/@charlidamelio/video/46c6fcfa-aa9e-4503-a50c-68444f44cddc',
|
|
||||||
'uploader_url': 'https://triller.co/@charlidamelio',
|
'uploader_url': 'https://triller.co/@charlidamelio',
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
}
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://triller.co/@theestallion/video/07f35f38-1f51-48e2-8c5f-f7a8e829988f',
|
||||||
|
'md5': 'af7b3553e4b8bfca507636471ee2eb41',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '71837829',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'UNGRATEFUL VIDEO OUT NOW 👏🏾👏🏾👏🏾 💙💙 link my bio #womeninhiphop',
|
||||||
|
'display_id': '07f35f38-1f51-48e2-8c5f-f7a8e829988f',
|
||||||
|
'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
|
||||||
|
'description': 'UNGRATEFUL VIDEO OUT NOW 👏🏾👏🏾👏🏾 💙💙 link my bio\r\n #womeninhiphop',
|
||||||
|
'uploader': 'theestallion',
|
||||||
|
'uploader_id': '18992236',
|
||||||
|
'creator': 'Megan Thee Stallion',
|
||||||
|
'timestamp': 1662486178,
|
||||||
|
'upload_date': '20220906',
|
||||||
|
'duration': 30,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'artist': 'Unknown',
|
||||||
|
'track': 'Unknown',
|
||||||
|
'uploader_url': 'https://triller.co/@theestallion',
|
||||||
|
'comment_count': int,
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
username, video_uuid = self._match_valid_url(url).group('username', 'id')
|
username, display_id = self._match_valid_url(url).group('username', 'id')
|
||||||
|
|
||||||
video_info = traverse_obj(self._download_json(
|
video_info = self._download_json(
|
||||||
f'{self._API_BASE_URL}/api/videos/{video_uuid}',
|
f'{self._API_BASE_URL}/api/videos/{display_id}', display_id,
|
||||||
video_uuid, note='Downloading video info API JSON',
|
headers=self._API_HEADERS)['videos'][0]
|
||||||
errnote='Unable to download video info API JSON',
|
|
||||||
headers=self._API_HEADERS), ('videos', 0))
|
|
||||||
if not video_info:
|
|
||||||
raise ExtractorError('No video info found in API response')
|
|
||||||
|
|
||||||
user_info = self._check_user_info(video_info.get('user') or {})
|
self._check_user_info(video_info.get('user') or {})
|
||||||
return self._parse_video_info(video_info, username, user_info)
|
|
||||||
|
return self._parse_video_info(video_info, username, None, display_id)
|
||||||
|
|
||||||
|
|
||||||
class TrillerUserIE(TrillerBaseIE):
|
class TrillerUserIE(TrillerBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?triller\.co/@(?P<id>[\w\._]+)/?(?:$|[#?])'
|
_VALID_URL = r'https?://(?:www\.)?triller\.co/@(?P<id>[\w.]+)/?(?:$|[#?])'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# first videos request only returns 2 videos
|
|
||||||
'url': 'https://triller.co/@theestallion',
|
'url': 'https://triller.co/@theestallion',
|
||||||
'playlist_mincount': 9,
|
'playlist_mincount': 12,
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '18992236',
|
'id': '18992236',
|
||||||
'title': 'theestallion',
|
'title': 'theestallion',
|
||||||
'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
|
'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
|
||||||
}
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://triller.co/@charlidamelio',
|
'url': 'https://triller.co/@charlidamelio',
|
||||||
'playlist_mincount': 25,
|
'playlist_mincount': 150,
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1875551',
|
'id': '1875551',
|
||||||
'title': 'charlidamelio',
|
'title': 'charlidamelio',
|
||||||
'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
|
'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
|
||||||
}
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
if not self._API_HEADERS.get('Authorization'):
|
if not self._API_HEADERS.get('Authorization'):
|
||||||
guest = self._download_json(
|
guest = self._download_json(
|
||||||
f'{self._API_BASE_URL}/user/create_guest',
|
f'{self._API_BASE_URL}/user/create_guest', None,
|
||||||
None, note='Creating guest session', data=b'', headers=self._API_HEADERS, query={
|
note='Creating guest session', data=b'', headers=self._API_HEADERS, query={
|
||||||
'platform': 'Web',
|
'platform': 'Web',
|
||||||
'app_version': '',
|
'app_version': '',
|
||||||
})
|
})
|
||||||
|
@ -251,44 +269,65 @@ class TrillerUserIE(TrillerBaseIE):
|
||||||
|
|
||||||
self._API_HEADERS['Authorization'] = f'Bearer {guest["auth_token"]}'
|
self._API_HEADERS['Authorization'] = f'Bearer {guest["auth_token"]}'
|
||||||
|
|
||||||
def _extract_video_list(self, username, user_id, limit=6):
|
def _entries(self, username, user_id, limit=6):
|
||||||
query = {
|
query = {'limit': limit}
|
||||||
'limit': limit,
|
|
||||||
}
|
|
||||||
for page in itertools.count(1):
|
for page in itertools.count(1):
|
||||||
for retry in self.RetryManager():
|
videos = self._download_json(
|
||||||
try:
|
f'{self._API_BASE_URL}/api/users/{user_id}/videos',
|
||||||
video_list = self._download_json(
|
username, note=f'Downloading user video list page {page}',
|
||||||
f'{self._API_BASE_URL}/api/users/{user_id}/videos',
|
headers=self._API_HEADERS, query=query)
|
||||||
username, note=f'Downloading user video list page {page}',
|
|
||||||
errnote='Unable to download user video list', headers=self._API_HEADERS,
|
for video in traverse_obj(videos, ('videos', ...)):
|
||||||
query=query)
|
yield self._parse_video_info(video, username, user_id)
|
||||||
except ExtractorError as e:
|
|
||||||
if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:
|
query['before_time'] = traverse_obj(videos, ('videos', -1, 'timestamp'))
|
||||||
retry.error = e
|
|
||||||
continue
|
|
||||||
raise
|
|
||||||
if not video_list.get('videos'):
|
|
||||||
break
|
|
||||||
yield from video_list['videos']
|
|
||||||
query['before_time'] = traverse_obj(video_list, ('videos', -1, 'timestamp'))
|
|
||||||
if not query['before_time']:
|
if not query['before_time']:
|
||||||
break
|
break
|
||||||
|
|
||||||
def _entries(self, videos, username, user_info):
|
|
||||||
for video in videos:
|
|
||||||
yield self._parse_video_info(video, username, user_info)
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
username = self._match_id(url)
|
username = self._match_id(url)
|
||||||
|
|
||||||
user_info = self._check_user_info(self._download_json(
|
user_info = self._check_user_info(self._download_json(
|
||||||
f'{self._API_BASE_URL}/api/users/by_username/{username}',
|
f'{self._API_BASE_URL}/api/users/by_username/{username}',
|
||||||
username, note='Downloading user info',
|
username, note='Downloading user info', headers=self._API_HEADERS)['user'])
|
||||||
errnote='Failed to download user info', headers=self._API_HEADERS).get('user', {}))
|
|
||||||
|
|
||||||
user_id = str_or_none(user_info.get('user_id'))
|
user_id = str_or_none(user_info.get('user_id'))
|
||||||
videos = self._extract_video_list(username, user_id)
|
if not user_id:
|
||||||
thumbnail = user_info.get('avatar_url')
|
raise ExtractorError('Unable to extract user ID')
|
||||||
|
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
self._entries(videos, username, user_info), user_id, username, thumbnail=thumbnail)
|
self._entries(username, user_id), user_id, username, thumbnail=user_info.get('avatar_url'))
|
||||||
|
|
||||||
|
|
||||||
|
class TrillerShortIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://v\.triller\.co/(?P<id>\w+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://v.triller.co/WWZNWk',
|
||||||
|
'md5': '5eb8dc2c971bd8cd794ec9e8d5e9d101',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '66210052',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'md5:2dfc89d154cd91a4a18cd9582ba03e16',
|
||||||
|
'display_id': 'f4480e1f-fb4e-45b9-a44c-9e6c679ce7eb',
|
||||||
|
'thumbnail': r're:^https://uploads\.cdn\.triller\.co/.+\.jpg$',
|
||||||
|
'description': 'md5:2dfc89d154cd91a4a18cd9582ba03e16',
|
||||||
|
'uploader': 'statefairent',
|
||||||
|
'uploader_id': '487545193',
|
||||||
|
'creator': 'Official Summer Fair of LA',
|
||||||
|
'timestamp': 1629655457,
|
||||||
|
'upload_date': '20210822',
|
||||||
|
'duration': 19,
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'artist': 'Unknown',
|
||||||
|
'track': 'Unknown',
|
||||||
|
'uploader_url': 'https://triller.co/@statefairent',
|
||||||
|
'comment_count': int,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
real_url = self._request_webpage(HEADRequest(url), self._match_id(url)).geturl()
|
||||||
|
if self.suitable(real_url): # Prevent infinite loop in case redirect fails
|
||||||
|
raise UnsupportedError(real_url)
|
||||||
|
return self.url_result(real_url)
|
||||||
|
|
|
@ -482,21 +482,34 @@ class TVPEmbedIE(InfoExtractor):
|
||||||
class TVPVODBaseIE(InfoExtractor):
|
class TVPVODBaseIE(InfoExtractor):
|
||||||
_API_BASE_URL = 'https://vod.tvp.pl/api/products'
|
_API_BASE_URL = 'https://vod.tvp.pl/api/products'
|
||||||
|
|
||||||
def _call_api(self, resource, video_id, **kwargs):
|
def _call_api(self, resource, video_id, query={}, **kwargs):
|
||||||
return self._download_json(
|
is_valid = lambda x: 200 <= x < 300
|
||||||
|
document, urlh = self._download_json_handle(
|
||||||
f'{self._API_BASE_URL}/{resource}', video_id,
|
f'{self._API_BASE_URL}/{resource}', video_id,
|
||||||
query={'lang': 'pl', 'platform': 'BROWSER'}, **kwargs)
|
query={'lang': 'pl', 'platform': 'BROWSER', **query},
|
||||||
|
expected_status=lambda x: is_valid(x) or 400 <= x < 500, **kwargs)
|
||||||
|
if is_valid(urlh.status):
|
||||||
|
return document
|
||||||
|
raise ExtractorError(f'Woronicza said: {document.get("code")} (HTTP {urlh.status})')
|
||||||
|
|
||||||
def _parse_video(self, video):
|
def _parse_video(self, video, with_url=True):
|
||||||
return {
|
info_dict = traverse_obj(video, {
|
||||||
'_type': 'url',
|
'id': ('id', {str_or_none}),
|
||||||
'url': 'tvp:' + video['externalUid'],
|
'title': 'title',
|
||||||
'ie_key': TVPEmbedIE.ie_key(),
|
'age_limit': ('rating', {int_or_none}),
|
||||||
'title': video.get('title'),
|
'duration': ('duration', {int_or_none}),
|
||||||
'description': traverse_obj(video, ('lead', 'description')),
|
'episode_number': ('number', {int_or_none}),
|
||||||
'age_limit': int_or_none(video.get('rating')),
|
'series': ('season', 'serial', 'title', {str_or_none}),
|
||||||
'duration': int_or_none(video.get('duration')),
|
'thumbnails': ('images', ..., ..., {'url': ('url', {url_or_none})}),
|
||||||
}
|
})
|
||||||
|
info_dict['description'] = clean_html(dict_get(video, ('lead', 'description')))
|
||||||
|
if with_url:
|
||||||
|
info_dict.update({
|
||||||
|
'_type': 'url',
|
||||||
|
'url': video['webUrl'],
|
||||||
|
'ie_key': TVPVODVideoIE.ie_key(),
|
||||||
|
})
|
||||||
|
return info_dict
|
||||||
|
|
||||||
|
|
||||||
class TVPVODVideoIE(TVPVODBaseIE):
|
class TVPVODVideoIE(TVPVODBaseIE):
|
||||||
|
@ -506,37 +519,70 @@ class TVPVODVideoIE(TVPVODBaseIE):
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://vod.tvp.pl/dla-dzieci,24/laboratorium-alchemika-odcinki,309338/odcinek-24,S01E24,311357',
|
'url': 'https://vod.tvp.pl/dla-dzieci,24/laboratorium-alchemika-odcinki,309338/odcinek-24,S01E24,311357',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '60468609',
|
'id': '311357',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Laboratorium alchemika, Tusze termiczne. Jak zobaczyć niewidoczne. Odcinek 24',
|
'title': 'Tusze termiczne. Jak zobaczyć niewidoczne. Odcinek 24',
|
||||||
'description': 'md5:1d4098d3e537092ccbac1abf49b7cd4c',
|
'description': 'md5:1d4098d3e537092ccbac1abf49b7cd4c',
|
||||||
'duration': 300,
|
'duration': 300,
|
||||||
'episode_number': 24,
|
'episode_number': 24,
|
||||||
'episode': 'Episode 24',
|
'episode': 'Episode 24',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
'series': 'Laboratorium alchemika',
|
'series': 'Laboratorium alchemika',
|
||||||
'thumbnail': 're:https://.+',
|
'thumbnail': 're:https?://.+',
|
||||||
},
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://vod.tvp.pl/filmy-dokumentalne,163/ukrainski-sluga-narodu,339667',
|
'url': 'https://vod.tvp.pl/filmy-dokumentalne,163/ukrainski-sluga-narodu,339667',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '51640077',
|
'id': '339667',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Ukraiński sługa narodu, Ukraiński sługa narodu',
|
'title': 'Ukraiński sługa narodu',
|
||||||
'series': 'Ukraiński sługa narodu',
|
|
||||||
'description': 'md5:b7940c0a8e439b0c81653a986f544ef3',
|
'description': 'md5:b7940c0a8e439b0c81653a986f544ef3',
|
||||||
'age_limit': 12,
|
'age_limit': 12,
|
||||||
'episode': 'Episode 0',
|
|
||||||
'episode_number': 0,
|
|
||||||
'duration': 3051,
|
'duration': 3051,
|
||||||
'thumbnail': 're:https://.+',
|
'thumbnail': 're:https?://.+',
|
||||||
|
'subtitles': 'count:2',
|
||||||
},
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}, {
|
||||||
|
'note': 'embed fails with "payment required"',
|
||||||
|
'url': 'https://vod.tvp.pl/seriale,18/polowanie-na-cmy-odcinki,390116/odcinek-7,S01E07,398869',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '398869',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'odc. 7',
|
||||||
|
'description': 'md5:dd2bb33f023dc5c2fbaddfbe4cb5dba0',
|
||||||
|
'duration': 2750,
|
||||||
|
'age_limit': 16,
|
||||||
|
'series': 'Polowanie na ćmy',
|
||||||
|
'episode_number': 7,
|
||||||
|
'episode': 'Episode 7',
|
||||||
|
'thumbnail': 're:https?://.+',
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
return self._parse_video(self._call_api(f'vods/{video_id}', video_id))
|
info_dict = self._parse_video(self._call_api(f'vods/{video_id}', video_id), with_url=False)
|
||||||
|
|
||||||
|
playlist = self._call_api(f'{video_id}/videos/playlist', video_id, query={'videoType': 'MOVIE'})
|
||||||
|
|
||||||
|
info_dict['formats'] = []
|
||||||
|
for manifest_url in traverse_obj(playlist, ('sources', 'HLS', ..., 'src')):
|
||||||
|
info_dict['formats'].extend(self._extract_m3u8_formats(manifest_url, video_id, fatal=False))
|
||||||
|
for manifest_url in traverse_obj(playlist, ('sources', 'DASH', ..., 'src')):
|
||||||
|
info_dict['formats'].extend(self._extract_mpd_formats(manifest_url, video_id, fatal=False))
|
||||||
|
|
||||||
|
info_dict['subtitles'] = {}
|
||||||
|
for sub in playlist.get('subtitles') or []:
|
||||||
|
info_dict['subtitles'].setdefault(sub.get('language') or 'und', []).append({
|
||||||
|
'url': sub['url'],
|
||||||
|
'ext': 'ttml',
|
||||||
|
})
|
||||||
|
|
||||||
|
return info_dict
|
||||||
|
|
||||||
|
|
||||||
class TVPVODSeriesIE(TVPVODBaseIE):
|
class TVPVODSeriesIE(TVPVODBaseIE):
|
||||||
|
@ -551,7 +597,7 @@ class TVPVODSeriesIE(TVPVODBaseIE):
|
||||||
'age_limit': 12,
|
'age_limit': 12,
|
||||||
'categories': ['seriale'],
|
'categories': ['seriale'],
|
||||||
},
|
},
|
||||||
'playlist_count': 129,
|
'playlist_count': 130,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://vod.tvp.pl/programy,88/rolnik-szuka-zony-odcinki,284514',
|
'url': 'https://vod.tvp.pl/programy,88/rolnik-szuka-zony-odcinki,284514',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
|
|
@ -179,6 +179,14 @@ class TwitchBaseIE(InfoExtractor):
|
||||||
video_id, ops,
|
video_id, ops,
|
||||||
'Downloading %s access token GraphQL' % token_kind)['data'][method]
|
'Downloading %s access token GraphQL' % token_kind)['data'][method]
|
||||||
|
|
||||||
|
def _get_thumbnails(self, thumbnail):
|
||||||
|
return [{
|
||||||
|
'url': re.sub(r'\d+x\d+(\.\w+)($|(?=[?#]))', r'0x0\g<1>', thumbnail),
|
||||||
|
'preference': 1,
|
||||||
|
}, {
|
||||||
|
'url': thumbnail,
|
||||||
|
}] if thumbnail else None
|
||||||
|
|
||||||
|
|
||||||
class TwitchVodIE(TwitchBaseIE):
|
class TwitchVodIE(TwitchBaseIE):
|
||||||
IE_NAME = 'twitch:vod'
|
IE_NAME = 'twitch:vod'
|
||||||
|
@ -460,15 +468,13 @@ class TwitchVodIE(TwitchBaseIE):
|
||||||
is_live, thumbnail = True, None
|
is_live, thumbnail = True, None
|
||||||
else:
|
else:
|
||||||
is_live = False
|
is_live = False
|
||||||
for p in ('width', 'height'):
|
|
||||||
thumbnail = thumbnail.replace('{%s}' % p, '0')
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': vod_id,
|
'id': vod_id,
|
||||||
'title': info.get('title') or 'Untitled Broadcast',
|
'title': info.get('title') or 'Untitled Broadcast',
|
||||||
'description': info.get('description'),
|
'description': info.get('description'),
|
||||||
'duration': int_or_none(info.get('lengthSeconds')),
|
'duration': int_or_none(info.get('lengthSeconds')),
|
||||||
'thumbnail': thumbnail,
|
'thumbnails': self._get_thumbnails(thumbnail),
|
||||||
'uploader': try_get(info, lambda x: x['owner']['displayName'], compat_str),
|
'uploader': try_get(info, lambda x: x['owner']['displayName'], compat_str),
|
||||||
'uploader_id': try_get(info, lambda x: x['owner']['login'], compat_str),
|
'uploader_id': try_get(info, lambda x: x['owner']['login'], compat_str),
|
||||||
'timestamp': unified_timestamp(info.get('publishedAt')),
|
'timestamp': unified_timestamp(info.get('publishedAt')),
|
||||||
|
@ -1053,7 +1059,7 @@ class TwitchStreamIE(TwitchBaseIE):
|
||||||
'display_id': channel_name,
|
'display_id': channel_name,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': thumbnail,
|
'thumbnails': self._get_thumbnails(thumbnail),
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'uploader_id': channel_name,
|
'uploader_id': channel_name,
|
||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import urllib.error
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .periscope import PeriscopeBaseIE, PeriscopeIE
|
from .periscope import PeriscopeBaseIE, PeriscopeIE
|
||||||
|
@ -17,6 +16,7 @@ from ..utils import (
|
||||||
format_field,
|
format_field,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
make_archive_id,
|
make_archive_id,
|
||||||
|
remove_end,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
|
@ -32,11 +32,9 @@ from ..utils import (
|
||||||
class TwitterBaseIE(InfoExtractor):
|
class TwitterBaseIE(InfoExtractor):
|
||||||
_API_BASE = 'https://api.twitter.com/1.1/'
|
_API_BASE = 'https://api.twitter.com/1.1/'
|
||||||
_GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
|
_GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
|
||||||
_TOKENS = {
|
|
||||||
'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA': None,
|
|
||||||
'AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw': None,
|
|
||||||
}
|
|
||||||
_BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
|
_BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
|
||||||
|
_AUTH = {'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'}
|
||||||
|
_guest_token = None
|
||||||
|
|
||||||
def _extract_variant_formats(self, variant, video_id):
|
def _extract_variant_formats(self, variant, video_id):
|
||||||
variant_url = variant.get('url')
|
variant_url = variant.get('url')
|
||||||
|
@ -94,7 +92,7 @@ class TwitterBaseIE(InfoExtractor):
|
||||||
|
|
||||||
def _call_api(self, path, video_id, query={}, graphql=False):
|
def _call_api(self, path, video_id, query={}, graphql=False):
|
||||||
cookies = self._get_cookies(self._API_BASE)
|
cookies = self._get_cookies(self._API_BASE)
|
||||||
headers = {}
|
headers = self._AUTH.copy()
|
||||||
|
|
||||||
csrf_cookie = cookies.get('ct0')
|
csrf_cookie = cookies.get('ct0')
|
||||||
if csrf_cookie:
|
if csrf_cookie:
|
||||||
|
@ -107,54 +105,34 @@ class TwitterBaseIE(InfoExtractor):
|
||||||
'x-twitter-active-user': 'yes',
|
'x-twitter-active-user': 'yes',
|
||||||
})
|
})
|
||||||
|
|
||||||
last_error = None
|
for first_attempt in (True, False):
|
||||||
for bearer_token in self._TOKENS:
|
if not self.is_logged_in and not self._guest_token:
|
||||||
for first_attempt in (True, False):
|
headers.pop('x-guest-token', None)
|
||||||
headers['Authorization'] = f'Bearer {bearer_token}'
|
self._guest_token = traverse_obj(self._download_json(
|
||||||
|
f'{self._API_BASE}guest/activate.json', video_id,
|
||||||
|
'Downloading guest token', data=b'', headers=headers), 'guest_token')
|
||||||
|
if self._guest_token:
|
||||||
|
headers['x-guest-token'] = self._guest_token
|
||||||
|
elif not self.is_logged_in:
|
||||||
|
raise ExtractorError('Could not retrieve guest token')
|
||||||
|
|
||||||
if not self.is_logged_in:
|
allowed_status = {400, 401, 403, 404} if graphql else {403}
|
||||||
if not self._TOKENS[bearer_token]:
|
result = self._download_json(
|
||||||
headers.pop('x-guest-token', None)
|
(self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
|
||||||
guest_token_response = self._download_json(
|
video_id, headers=headers, query=query, expected_status=allowed_status,
|
||||||
self._API_BASE + 'guest/activate.json', video_id,
|
note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
|
||||||
'Downloading guest token', data=b'', headers=headers)
|
|
||||||
|
|
||||||
self._TOKENS[bearer_token] = guest_token_response.get('guest_token')
|
if result.get('errors'):
|
||||||
if not self._TOKENS[bearer_token]:
|
errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
|
||||||
raise ExtractorError('Could not retrieve guest token')
|
if not self.is_logged_in and first_attempt and 'bad guest token' in errors.lower():
|
||||||
|
self.to_screen('Guest token has expired. Refreshing guest token')
|
||||||
|
self._guest_token = None
|
||||||
|
continue
|
||||||
|
|
||||||
headers['x-guest-token'] = self._TOKENS[bearer_token]
|
raise ExtractorError(
|
||||||
|
f'Error(s) while querying API: {errors or "Unknown error"}', expected=True)
|
||||||
|
|
||||||
try:
|
return result
|
||||||
allowed_status = {400, 403, 404} if graphql else {403}
|
|
||||||
result = self._download_json(
|
|
||||||
(self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
|
|
||||||
video_id, headers=headers, query=query, expected_status=allowed_status)
|
|
||||||
|
|
||||||
except ExtractorError as e:
|
|
||||||
if last_error:
|
|
||||||
raise last_error
|
|
||||||
|
|
||||||
if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code != 404:
|
|
||||||
raise
|
|
||||||
|
|
||||||
last_error = e
|
|
||||||
self.report_warning(
|
|
||||||
'Twitter API gave 404 response, retrying with deprecated auth token. '
|
|
||||||
'Only one media item can be extracted')
|
|
||||||
break # continue outer loop with next bearer_token
|
|
||||||
|
|
||||||
if result.get('errors'):
|
|
||||||
errors = traverse_obj(result, ('errors', ..., 'message'), expected_type=str)
|
|
||||||
if first_attempt and any('bad guest token' in error.lower() for error in errors):
|
|
||||||
self.to_screen('Guest token has expired. Refreshing guest token')
|
|
||||||
self._TOKENS[bearer_token] = None
|
|
||||||
continue
|
|
||||||
|
|
||||||
error_message = ', '.join(set(errors)) or 'Unknown error'
|
|
||||||
raise ExtractorError(f'Error(s) while querying API: {error_message}', expected=True)
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
def _build_graphql_query(self, media_id):
|
def _build_graphql_query(self, media_id):
|
||||||
raise NotImplementedError('Method must be implemented to support GraphQL')
|
raise NotImplementedError('Method must be implemented to support GraphQL')
|
||||||
|
@ -313,6 +291,7 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
|
'view_count': int,
|
||||||
'tags': [],
|
'tags': [],
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
},
|
},
|
||||||
|
@ -391,6 +370,7 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
|
'view_count': int,
|
||||||
'tags': ['Damndaniel'],
|
'tags': ['Damndaniel'],
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
|
@ -431,6 +411,7 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
|
'view_count': int,
|
||||||
'tags': [],
|
'tags': [],
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
|
@ -480,6 +461,7 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
|
'view_count': int,
|
||||||
'tags': ['Maria'],
|
'tags': ['Maria'],
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
|
@ -505,6 +487,7 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
|
'view_count': int,
|
||||||
'tags': [],
|
'tags': [],
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
|
@ -529,6 +512,7 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
|
'view_count': int,
|
||||||
'tags': [],
|
'tags': [],
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
|
@ -589,6 +573,7 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
|
'view_count': int,
|
||||||
'tags': [],
|
'tags': [],
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
|
@ -630,12 +615,12 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
|
'view_count': int,
|
||||||
'tags': ['HurricaneIan'],
|
'tags': ['HurricaneIan'],
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# Adult content, uses old token
|
# Adult content, fails if not logged in (GraphQL)
|
||||||
# Fails if not logged in (GraphQL)
|
|
||||||
'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
|
'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1575199163847000068',
|
'id': '1575199163847000068',
|
||||||
|
@ -655,9 +640,8 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
'tags': []
|
'tags': []
|
||||||
},
|
},
|
||||||
'expected_warnings': ['404'],
|
'skip': 'Requires authentication',
|
||||||
}, {
|
}, {
|
||||||
# Description is missing one https://t.co url (GraphQL)
|
|
||||||
'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
|
'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
|
||||||
'playlist_mincount': 2,
|
'playlist_mincount': 2,
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -669,14 +653,13 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'upload_date': '20210519',
|
'upload_date': '20210519',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw https://t.co/kbXZrozlY7',
|
'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
|
||||||
'uploader_id': 'Srirachachau',
|
'uploader_id': 'Srirachachau',
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'uploader_url': 'https://twitter.com/Srirachachau',
|
'uploader_url': 'https://twitter.com/Srirachachau',
|
||||||
'timestamp': 1621447860,
|
'timestamp': 1621447860,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# Description is missing one https://t.co url (GraphQL)
|
|
||||||
'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
|
'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
|
||||||
'playlist_mincount': 2,
|
'playlist_mincount': 2,
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -688,7 +671,7 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'uploader': str,
|
'uploader': str,
|
||||||
'timestamp': 1665143744,
|
'timestamp': 1665143744,
|
||||||
'uploader_url': 'https://twitter.com/DavidToons_',
|
'uploader_url': 'https://twitter.com/DavidToons_',
|
||||||
'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/glfQdgfFXH https://t.co/WgJauwIW1w',
|
'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
|
||||||
'tags': [],
|
'tags': [],
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'upload_date': '20221007',
|
'upload_date': '20221007',
|
||||||
|
@ -752,7 +735,7 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1600649511827013632',
|
'id': '1600649511827013632',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'md5:dac4f4d4c591fcc4e88a253eba472dc3',
|
'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
|
||||||
'thumbnail': r're:^https?://.+\.jpg',
|
'thumbnail': r're:^https?://.+\.jpg',
|
||||||
'timestamp': 1670459604.0,
|
'timestamp': 1670459604.0,
|
||||||
'uploader_id': 'CTVJLaidlaw',
|
'uploader_id': 'CTVJLaidlaw',
|
||||||
|
@ -764,6 +747,7 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'uploader_url': 'https://twitter.com/CTVJLaidlaw',
|
'uploader_url': 'https://twitter.com/CTVJLaidlaw',
|
||||||
'display_id': '1600649710662213632',
|
'display_id': '1600649710662213632',
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
|
'view_count': int,
|
||||||
'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
|
'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
|
||||||
'upload_date': '20221208',
|
'upload_date': '20221208',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
|
@ -791,6 +775,7 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
|
'view_count': int,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
|
'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
|
||||||
|
@ -806,6 +791,7 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'duration': 9.531,
|
'duration': 9.531,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
|
'view_count': int,
|
||||||
'upload_date': '20221203',
|
'upload_date': '20221203',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
'timestamp': 1670092210.0,
|
'timestamp': 1670092210.0,
|
||||||
|
@ -815,7 +801,6 @@ class TwitterIE(TwitterBaseIE):
|
||||||
},
|
},
|
||||||
'params': {'noplaylist': True},
|
'params': {'noplaylist': True},
|
||||||
}, {
|
}, {
|
||||||
# Media view count is GraphQL only, force in test
|
|
||||||
'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
|
'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1600009362759733248',
|
'id': '1600009362759733248',
|
||||||
|
@ -826,10 +811,10 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
|
'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
'uploader': 'Mün The Shinobi | BlaqBoi\'s Therapist',
|
'uploader': 'Mün The Shinobi',
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'upload_date': '20221206',
|
'upload_date': '20221206',
|
||||||
'title': 'Mün The Shinobi | BlaqBoi\'s Therapist - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
|
'title': 'Mün The Shinobi - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'tags': [],
|
'tags': [],
|
||||||
|
@ -837,9 +822,8 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'duration': 139.987,
|
'duration': 139.987,
|
||||||
'timestamp': 1670306984.0,
|
'timestamp': 1670306984.0,
|
||||||
},
|
},
|
||||||
'params': {'extractor_args': {'twitter': {'force_graphql': ['']}}},
|
|
||||||
}, {
|
}, {
|
||||||
# url to retweet id
|
# url to retweet id, legacy API
|
||||||
'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
|
'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1623274794488659969',
|
'id': '1623274794488659969',
|
||||||
|
@ -860,6 +844,7 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
},
|
},
|
||||||
|
'params': {'extractor_args': {'twitter': {'legacy_api': ['']}}},
|
||||||
}, {
|
}, {
|
||||||
# onion route
|
# onion route
|
||||||
'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
|
'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
|
||||||
|
@ -905,11 +890,13 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'tweet_results', 'result', ('tweet', None),
|
'tweet_results', 'result', ('tweet', None),
|
||||||
), expected_type=dict, default={}, get_all=False)
|
), expected_type=dict, default={}, get_all=False)
|
||||||
|
|
||||||
if result.get('__typename') not in ('Tweet', None):
|
if result.get('__typename') not in ('Tweet', 'TweetTombstone', None):
|
||||||
self.report_warning(f'Unknown typename: {result.get("__typename")}', twid, only_once=True)
|
self.report_warning(f'Unknown typename: {result.get("__typename")}', twid, only_once=True)
|
||||||
|
|
||||||
if 'tombstone' in result:
|
if 'tombstone' in result:
|
||||||
cause = traverse_obj(result, ('tombstone', 'text', 'text'), expected_type=str)
|
cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
|
||||||
|
if cause and 'adult content' in cause:
|
||||||
|
self.raise_login_required(cause)
|
||||||
raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
|
raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
|
||||||
|
|
||||||
status = result.get('legacy', {})
|
status = result.get('legacy', {})
|
||||||
|
@ -922,7 +909,7 @@ class TwitterIE(TwitterBaseIE):
|
||||||
# extra transformation is needed since result does not match legacy format
|
# extra transformation is needed since result does not match legacy format
|
||||||
binding_values = {
|
binding_values = {
|
||||||
binding_value.get('key'): binding_value.get('value')
|
binding_value.get('key'): binding_value.get('value')
|
||||||
for binding_value in traverse_obj(status, ('card', 'binding_values', ...), expected_type=dict)
|
for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
|
||||||
}
|
}
|
||||||
if binding_values:
|
if binding_values:
|
||||||
status['card']['binding_values'] = binding_values
|
status['card']['binding_values'] = binding_values
|
||||||
|
@ -965,12 +952,7 @@ class TwitterIE(TwitterBaseIE):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
twid, selected_index = self._match_valid_url(url).group('id', 'index')
|
twid, selected_index = self._match_valid_url(url).group('id', 'index')
|
||||||
if self.is_logged_in or self._configuration_arg('force_graphql'):
|
if self._configuration_arg('legacy_api') and not self.is_logged_in:
|
||||||
self.write_debug(f'Using GraphQL API (Auth = {self.is_logged_in})')
|
|
||||||
result = self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid)
|
|
||||||
status = self._graphql_to_legacy(result, twid)
|
|
||||||
|
|
||||||
else:
|
|
||||||
status = traverse_obj(self._call_api(f'statuses/show/{twid}.json', twid, {
|
status = traverse_obj(self._call_api(f'statuses/show/{twid}.json', twid, {
|
||||||
'cards_platform': 'Web-12',
|
'cards_platform': 'Web-12',
|
||||||
'include_cards': 1,
|
'include_cards': 1,
|
||||||
|
@ -978,6 +960,9 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'include_user_entities': 0,
|
'include_user_entities': 0,
|
||||||
'tweet_mode': 'extended',
|
'tweet_mode': 'extended',
|
||||||
}), 'retweeted_status', None)
|
}), 'retweeted_status', None)
|
||||||
|
else:
|
||||||
|
result = self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid)
|
||||||
|
status = self._graphql_to_legacy(result, twid)
|
||||||
|
|
||||||
title = description = status['full_text'].replace('\n', ' ')
|
title = description = status['full_text'].replace('\n', ' ')
|
||||||
# strip 'https -_t.co_BJYgOjSeGA' junk from filenames
|
# strip 'https -_t.co_BJYgOjSeGA' junk from filenames
|
||||||
|
@ -1142,7 +1127,8 @@ class TwitterIE(TwitterBaseIE):
|
||||||
if not entries:
|
if not entries:
|
||||||
expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
|
expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
|
||||||
if not expanded_url or expanded_url == url:
|
if not expanded_url or expanded_url == url:
|
||||||
raise ExtractorError('No video could be found in this tweet', expected=True)
|
self.raise_no_formats('No video could be found in this tweet', expected=True)
|
||||||
|
return info
|
||||||
|
|
||||||
return self.url_result(expanded_url, display_id=twid, **info)
|
return self.url_result(expanded_url, display_id=twid, **info)
|
||||||
|
|
||||||
|
|
108
yt_dlp/extractor/wevidi.py
Normal file
108
yt_dlp/extractor/wevidi.py
Normal file
|
@ -0,0 +1,108 @@
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import clean_html, float_or_none, get_element_by_class, js_to_json, traverse_obj
|
||||||
|
|
||||||
|
|
||||||
|
class WeVidiIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?wevidi\.net/watch/(?P<id>[\w-]{11})'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://wevidi.net/watch/2th7UO5F4KV',
|
||||||
|
'md5': 'b913d1ff5bbad499e2c7ef4aa6d829d7',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2th7UO5F4KV',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'YouTube Alternative: WeVidi - customizable channels & more',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'description': 'md5:73a27d0a87d49fbcc5584566326ebeed',
|
||||||
|
'uploader': 'eclecRC',
|
||||||
|
'duration': 932.098,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://wevidi.net/watch/ievRuuQHbPS',
|
||||||
|
'md5': 'ce8a94989a959bff9003fa27ee572935',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'ievRuuQHbPS',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'WeVidi Playlists',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'description': 'md5:32cdfca272687390d9bd9b0c9c6153ee',
|
||||||
|
'uploader': 'WeVidi',
|
||||||
|
'duration': 36.1999,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://wevidi.net/watch/PcMzDWaQSWb',
|
||||||
|
'md5': '55ee0d3434be5d9e5cc76b83f2bb57ec',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'PcMzDWaQSWb',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Cat blep',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'description': 'md5:e2c9e2b54b8bb424cc64937c8fdc068f',
|
||||||
|
'uploader': 'WeVidi',
|
||||||
|
'duration': 41.972,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://wevidi.net/watch/wJnRqDHNe_u',
|
||||||
|
'md5': 'c8f263dd47e66cc17546b3abf47b5a77',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'wJnRqDHNe_u',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Gissy Talks: YouTube Alternatives',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'description': 'md5:e65036f0d4af80e0af191bd11af5195e',
|
||||||
|
'uploader': 'GissyEva',
|
||||||
|
'duration': 630.451,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://wevidi.net/watch/4m1c4yJR_yc',
|
||||||
|
'md5': 'c63ce5ca6990dce86855fc02ca5bc1ed',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4m1c4yJR_yc',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Enough of that! - Awesome Exilez Podcast',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'description': 'md5:96af99dd63468b2dfab3020560e3e9b2',
|
||||||
|
'uploader': 'eclecRC',
|
||||||
|
'duration': 6.804,
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _extract_formats(self, wvplayer_props):
|
||||||
|
# Taken from WeVidi player JS: https://wevidi.net/layouts/default/static/player.min.js
|
||||||
|
resolution_map = {
|
||||||
|
1: 144,
|
||||||
|
2: 240,
|
||||||
|
3: 360,
|
||||||
|
4: 480,
|
||||||
|
5: 720,
|
||||||
|
6: 1080
|
||||||
|
}
|
||||||
|
|
||||||
|
src_path = f'{wvplayer_props["srcVID"]}/{wvplayer_props["srcUID"]}/{wvplayer_props["srcNAME"]}'
|
||||||
|
for res in traverse_obj(wvplayer_props, ('resolutions', ..., {int}, {lambda x: x or None})):
|
||||||
|
format_id = str(-(res // -2) - 1)
|
||||||
|
yield {
|
||||||
|
'acodec': 'mp4a.40.2',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'format_id': format_id,
|
||||||
|
'height': resolution_map.get(res),
|
||||||
|
'url': f'https://www.wevidi.net/videoplayback/{src_path}/{format_id}',
|
||||||
|
'vcodec': 'avc1.42E01E',
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
wvplayer_props = self._search_json(
|
||||||
|
r'WVPlayer\(', webpage, 'player', video_id,
|
||||||
|
transform_source=lambda x: js_to_json(x.replace('||', '}')))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': clean_html(get_element_by_class('video_title', webpage)),
|
||||||
|
'description': clean_html(get_element_by_class('descr_long', webpage)),
|
||||||
|
'uploader': clean_html(get_element_by_class('username', webpage)),
|
||||||
|
'formats': list(self._extract_formats(wvplayer_props)),
|
||||||
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
|
'duration': float_or_none(wvplayer_props.get('duration')),
|
||||||
|
}
|
50
yt_dlp/extractor/whyp.py
Normal file
50
yt_dlp/extractor/whyp.py
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
float_or_none,
|
||||||
|
str_or_none,
|
||||||
|
traverse_obj,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class WhypIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?whyp\.it/tracks/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.whyp.it/tracks/18337/home-page-example-track-b4kq7',
|
||||||
|
'md5': 'c1187b42ebf8605284e3dc92aeb33d16',
|
||||||
|
'info_dict': {
|
||||||
|
'url': 'https://cdn.whyp.it/50eb17cc-e9ff-4e18-b89b-dc9206a95cb1.mp3',
|
||||||
|
'id': '18337',
|
||||||
|
'title': 'Home Page Example Track',
|
||||||
|
'description': 'md5:bd758000fb93f3159339c852b5b9133c',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'duration': 52.82,
|
||||||
|
'uploader': 'Brad',
|
||||||
|
'uploader_id': '1',
|
||||||
|
'thumbnail': 'https://cdn.whyp.it/a537bb36-3373-4c61-96c8-27fc1b2f427a.jpg',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.whyp.it/tracks/18337',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
unique_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, unique_id)
|
||||||
|
data = self._search_nuxt_data(webpage, unique_id)['rawTrack']
|
||||||
|
|
||||||
|
return {
|
||||||
|
'url': data['audio_url'],
|
||||||
|
'id': unique_id,
|
||||||
|
**traverse_obj(data, {
|
||||||
|
'title': 'title',
|
||||||
|
'description': 'description',
|
||||||
|
'duration': ('duration', {float_or_none}),
|
||||||
|
'uploader': ('user', 'username'),
|
||||||
|
'uploader_id': ('user', 'id', {str_or_none}),
|
||||||
|
'thumbnail': ('artwork_url', {url_or_none}),
|
||||||
|
}),
|
||||||
|
'ext': 'mp3',
|
||||||
|
'vcodec': 'none',
|
||||||
|
'http_headers': {'Referer': 'https://whyp.it/'},
|
||||||
|
}
|
|
@ -2,7 +2,6 @@ import hashlib
|
||||||
import itertools
|
import itertools
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
from .brightcove import BrightcoveNewIE
|
|
||||||
from .common import InfoExtractor, SearchInfoExtractor
|
from .common import InfoExtractor, SearchInfoExtractor
|
||||||
from .youtube import YoutubeIE
|
from .youtube import YoutubeIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
@ -11,7 +10,6 @@ from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
smuggle_url,
|
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
try_get,
|
try_get,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
|
@ -337,121 +335,6 @@ class YahooSearchIE(SearchInfoExtractor):
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
||||||
class YahooGyaOPlayerIE(InfoExtractor):
|
|
||||||
IE_NAME = 'yahoo:gyao:player'
|
|
||||||
_VALID_URL = r'https?://(?:gyao\.yahoo\.co\.jp/(?:player|episode(?:/[^/]+)?)|streaming\.yahoo\.co\.jp/c/y)/(?P<id>\d+/v\d+/v\d+|[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://gyao.yahoo.co.jp/player/00998/v00818/v0000000000000008564/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '5993125228001',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'フューリー 【字幕版】',
|
|
||||||
'description': 'md5:21e691c798a15330eda4db17a8fe45a5',
|
|
||||||
'uploader_id': '4235717419001',
|
|
||||||
'upload_date': '20190124',
|
|
||||||
'timestamp': 1548294365,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
# m3u8 download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://streaming.yahoo.co.jp/c/y/01034/v00133/v0000000000000000706/',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'https://gyao.yahoo.co.jp/episode/%E3%81%8D%E3%81%AE%E3%81%86%E4%BD%95%E9%A3%9F%E3%81%B9%E3%81%9F%EF%BC%9F%20%E7%AC%AC2%E8%A9%B1%202019%2F4%2F12%E6%94%BE%E9%80%81%E5%88%86/5cb02352-b725-409e-9f8d-88f947a9f682',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'https://gyao.yahoo.co.jp/episode/5fa1226c-ef8d-4e93-af7a-fd92f4e30597',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
_GEO_BYPASS = False
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url).replace('/', ':')
|
|
||||||
headers = self.geo_verification_headers()
|
|
||||||
headers['Accept'] = 'application/json'
|
|
||||||
resp = self._download_json(
|
|
||||||
'https://gyao.yahoo.co.jp/apis/playback/graphql', video_id, query={
|
|
||||||
'appId': 'dj00aiZpPUNJeDh2cU1RazU3UCZzPWNvbnN1bWVyc2VjcmV0Jng9NTk-',
|
|
||||||
'query': '''{
|
|
||||||
content(parameter: {contentId: "%s", logicaAgent: PC_WEB}) {
|
|
||||||
video {
|
|
||||||
delivery {
|
|
||||||
id
|
|
||||||
}
|
|
||||||
title
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}''' % video_id,
|
|
||||||
}, headers=headers)
|
|
||||||
content = resp['data']['content']
|
|
||||||
if not content:
|
|
||||||
msg = resp['errors'][0]['message']
|
|
||||||
if msg == 'not in japan':
|
|
||||||
self.raise_geo_restricted(countries=['JP'])
|
|
||||||
raise ExtractorError(msg)
|
|
||||||
video = content['video']
|
|
||||||
return {
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'id': video_id,
|
|
||||||
'title': video['title'],
|
|
||||||
'url': smuggle_url(
|
|
||||||
'http://players.brightcove.net/4235717419001/SyG5P0gjb_default/index.html?videoId=' + video['delivery']['id'],
|
|
||||||
{'geo_countries': ['JP']}),
|
|
||||||
'ie_key': BrightcoveNewIE.ie_key(),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class YahooGyaOIE(InfoExtractor):
|
|
||||||
IE_NAME = 'yahoo:gyao'
|
|
||||||
_VALID_URL = r'https?://(?:gyao\.yahoo\.co\.jp/(?:p|title(?:/[^/]+)?)|streaming\.yahoo\.co\.jp/p/y)/(?P<id>\d+/v\d+|[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://gyao.yahoo.co.jp/title/%E3%82%BF%E3%82%A4%E3%83%A0%E3%83%9C%E3%82%AB%E3%83%B3%E3%82%B7%E3%83%AA%E3%83%BC%E3%82%BA%20%E3%83%A4%E3%83%83%E3%82%BF%E3%83%BC%E3%83%9E%E3%83%B3/5f60ceb3-6e5e-40ef-ba40-d68b598d067f',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '5f60ceb3-6e5e-40ef-ba40-d68b598d067f',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 80,
|
|
||||||
}, {
|
|
||||||
'url': 'https://gyao.yahoo.co.jp/p/00449/v03102/',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'https://streaming.yahoo.co.jp/p/y/01034/v00133/',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'https://gyao.yahoo.co.jp/title/%E3%81%97%E3%82%83%E3%81%B9%E3%81%8F%E3%82%8A007/5b025a49-b2e5-4dc7-945c-09c6634afacf',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'https://gyao.yahoo.co.jp/title/5b025a49-b2e5-4dc7-945c-09c6634afacf',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _entries(self, program_id):
|
|
||||||
page = 1
|
|
||||||
while True:
|
|
||||||
playlist = self._download_json(
|
|
||||||
f'https://gyao.yahoo.co.jp/api/programs/{program_id}/videos?page={page}&serviceId=gy', program_id,
|
|
||||||
note=f'Downloading JSON metadata page {page}')
|
|
||||||
if not playlist:
|
|
||||||
break
|
|
||||||
for video in playlist['videos']:
|
|
||||||
video_id = video.get('id')
|
|
||||||
if not video_id:
|
|
||||||
continue
|
|
||||||
if video.get('streamingAvailability') == 'notYet':
|
|
||||||
continue
|
|
||||||
yield self.url_result(
|
|
||||||
'https://gyao.yahoo.co.jp/player/%s/' % video_id.replace(':', '/'),
|
|
||||||
YahooGyaOPlayerIE.ie_key(), video_id)
|
|
||||||
if playlist.get('ended'):
|
|
||||||
break
|
|
||||||
page += 1
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
program_id = self._match_id(url).replace('/', ':')
|
|
||||||
return self.playlist_result(self._entries(program_id), program_id)
|
|
||||||
|
|
||||||
|
|
||||||
class YahooJapanNewsIE(InfoExtractor):
|
class YahooJapanNewsIE(InfoExtractor):
|
||||||
IE_NAME = 'yahoo:japannews'
|
IE_NAME = 'yahoo:japannews'
|
||||||
IE_DESC = 'Yahoo! Japan News'
|
IE_DESC = 'Yahoo! Japan News'
|
||||||
|
|
|
@ -6,6 +6,7 @@ import time
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
clean_html,
|
||||||
get_element_by_class,
|
get_element_by_class,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
|
@ -26,48 +27,8 @@ class YoukuIE(InfoExtractor):
|
||||||
'''
|
'''
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# MD5 is unstable
|
|
||||||
'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'XMTc1ODE5Njcy',
|
|
||||||
'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'duration': 74.73,
|
|
||||||
'thumbnail': r're:^https?://.*',
|
|
||||||
'uploader': '。躲猫猫、',
|
|
||||||
'uploader_id': '36017967',
|
|
||||||
'uploader_url': 'http://i.youku.com/u/UMTQ0MDcxODY4',
|
|
||||||
'tags': list,
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'url': 'http://player.youku.com/player.php/sid/XNDgyMDQ2NTQw/v.swf',
|
'url': 'http://player.youku.com/player.php/sid/XNDgyMDQ2NTQw/v.swf',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
|
||||||
'url': 'http://v.youku.com/v_show/id_XODgxNjg1Mzk2_ev_1.html',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'XODgxNjg1Mzk2',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': '武媚娘传奇 85',
|
|
||||||
'duration': 1999.61,
|
|
||||||
'thumbnail': r're:^https?://.*',
|
|
||||||
'uploader': '疯狂豆花',
|
|
||||||
'uploader_id': '62583473',
|
|
||||||
'uploader_url': 'http://i.youku.com/u/UMjUwMzMzODky',
|
|
||||||
'tags': list,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'http://v.youku.com/v_show/id_XMTI1OTczNDM5Mg==.html',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'XMTI1OTczNDM5Mg',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': '花千骨 04',
|
|
||||||
'duration': 2363,
|
|
||||||
'thumbnail': r're:^https?://.*',
|
|
||||||
'uploader': '放剧场-花千骨',
|
|
||||||
'uploader_id': '772849359',
|
|
||||||
'uploader_url': 'http://i.youku.com/u/UMzA5MTM5NzQzNg==',
|
|
||||||
'tags': list,
|
|
||||||
},
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://v.youku.com/v_show/id_XNjA1NzA2Njgw.html',
|
'url': 'http://v.youku.com/v_show/id_XNjA1NzA2Njgw.html',
|
||||||
'note': 'Video protected with password',
|
'note': 'Video protected with password',
|
||||||
|
@ -81,6 +42,7 @@ class YoukuIE(InfoExtractor):
|
||||||
'uploader_id': '322014285',
|
'uploader_id': '322014285',
|
||||||
'uploader_url': 'http://i.youku.com/u/UMTI4ODA1NzE0MA==',
|
'uploader_url': 'http://i.youku.com/u/UMTI4ODA1NzE0MA==',
|
||||||
'tags': list,
|
'tags': list,
|
||||||
|
'skip': '404',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'videopassword': '100600',
|
'videopassword': '100600',
|
||||||
|
@ -192,7 +154,7 @@ class YoukuIE(InfoExtractor):
|
||||||
else:
|
else:
|
||||||
msg = 'Youku server reported error %i' % error.get('code')
|
msg = 'Youku server reported error %i' % error.get('code')
|
||||||
if error_note is not None:
|
if error_note is not None:
|
||||||
msg += ': ' + error_note
|
msg += ': ' + clean_html(error_note)
|
||||||
raise ExtractorError(msg)
|
raise ExtractorError(msg)
|
||||||
|
|
||||||
# get video title
|
# get video title
|
||||||
|
|
|
@ -6,6 +6,7 @@ from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
|
traverse_obj,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
@ -86,32 +87,31 @@ class YouPornIE(InfoExtractor):
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = self._match_valid_url(url)
|
video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
|
||||||
video_id = mobj.group('id')
|
|
||||||
display_id = mobj.group('display_id') or video_id
|
|
||||||
|
|
||||||
definitions = self._download_json(
|
definitions = self._download_json(
|
||||||
'https://www.youporn.com/api/video/media_definitions/%s/' % video_id,
|
f'https://www.youporn.com/api/video/media_definitions/{video_id}/', display_id or video_id)
|
||||||
display_id)
|
|
||||||
|
def get_format_data(data, f):
|
||||||
|
return traverse_obj(data, lambda _, v: v['format'] == f and url_or_none(v['videoUrl']))
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for definition in definitions:
|
# Try to extract only the actual master m3u8 first, avoiding the duplicate single resolution "master" m3u8s
|
||||||
if not isinstance(definition, dict):
|
for hls_url in traverse_obj(get_format_data(definitions, 'hls'), (
|
||||||
continue
|
lambda _, v: not isinstance(v['defaultQuality'], bool), 'videoUrl'), (..., 'videoUrl')):
|
||||||
video_url = url_or_none(definition.get('videoUrl'))
|
formats.extend(self._extract_m3u8_formats(hls_url, video_id, 'mp4', fatal=False, m3u8_id='hls'))
|
||||||
if not video_url:
|
|
||||||
continue
|
for definition in get_format_data(definitions, 'mp4'):
|
||||||
f = {
|
f = traverse_obj(definition, {
|
||||||
'url': video_url,
|
'url': 'videoUrl',
|
||||||
'filesize': int_or_none(definition.get('videoSize')),
|
'filesize': ('videoSize', {int_or_none})
|
||||||
}
|
})
|
||||||
height = int_or_none(definition.get('quality'))
|
height = int_or_none(definition.get('quality'))
|
||||||
# Video URL's path looks like this:
|
# Video URL's path looks like this:
|
||||||
# /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
|
# /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
|
||||||
# /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
|
# /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
|
||||||
# /videos/201703/11/109285532/1080P_4000K_109285532.mp4
|
# /videos/201703/11/109285532/1080P_4000K_109285532.mp4
|
||||||
# We will benefit from it by extracting some metadata
|
# We will benefit from it by extracting some metadata
|
||||||
mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', video_url)
|
mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', definition['videoUrl'])
|
||||||
if mobj:
|
if mobj:
|
||||||
if not height:
|
if not height:
|
||||||
height = int(mobj.group('height'))
|
height = int(mobj.group('height'))
|
||||||
|
@ -179,6 +179,7 @@ class YouPornIE(InfoExtractor):
|
||||||
'tags')
|
'tags')
|
||||||
|
|
||||||
data = self._search_json_ld(webpage, video_id, expected_type='VideoObject', fatal=False)
|
data = self._search_json_ld(webpage, video_id, expected_type='VideoObject', fatal=False)
|
||||||
|
data.pop('url', None)
|
||||||
return merge_dicts(data, {
|
return merge_dicts(data, {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -5,6 +5,7 @@ from ..utils import (
|
||||||
str_or_none,
|
str_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
parse_filesize,
|
parse_filesize,
|
||||||
|
traverse_obj,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
|
@ -12,8 +13,8 @@ from ..utils import (
|
||||||
|
|
||||||
class ZoomIE(InfoExtractor):
|
class ZoomIE(InfoExtractor):
|
||||||
IE_NAME = 'zoom'
|
IE_NAME = 'zoom'
|
||||||
_VALID_URL = r'(?P<base_url>https?://(?:[^.]+\.)?zoom.us/)rec(?:ording)?/(?:play|share)/(?P<id>[A-Za-z0-9_.-]+)'
|
_VALID_URL = r'(?P<base_url>https?://(?:[^.]+\.)?zoom.us/)rec(?:ording)?/(?P<type>play|share)/(?P<id>[A-Za-z0-9_.-]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'https://economist.zoom.us/rec/play/dUk_CNBETmZ5VA2BwEl-jjakPpJ3M1pcfVYAPRsoIbEByGsLjUZtaa4yCATQuOL3der8BlTwxQePl_j0.EImBkXzTIaPvdZO5',
|
'url': 'https://economist.zoom.us/rec/play/dUk_CNBETmZ5VA2BwEl-jjakPpJ3M1pcfVYAPRsoIbEByGsLjUZtaa4yCATQuOL3der8BlTwxQePl_j0.EImBkXzTIaPvdZO5',
|
||||||
'md5': 'ab445e8c911fddc4f9adc842c2c5d434',
|
'md5': 'ab445e8c911fddc4f9adc842c2c5d434',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -22,36 +23,73 @@ class ZoomIE(InfoExtractor):
|
||||||
'title': 'China\'s "two sessions" and the new five-year plan',
|
'title': 'China\'s "two sessions" and the new five-year plan',
|
||||||
},
|
},
|
||||||
'skip': 'Recording requires email authentication to access',
|
'skip': 'Recording requires email authentication to access',
|
||||||
}
|
}, {
|
||||||
|
# play URL
|
||||||
|
'url': 'https://ffgolf.zoom.us/rec/play/qhEhXbrxq1Zoucx8CMtHzq1Z_2YZRPVCqWK_K-2FkEGRsSLDeOX8Tu4P6jtjZcRry8QhIbvKZdtr4UNo.QcPn2debFskI9whJ',
|
||||||
|
'md5': '2c4b1c4e5213ebf9db293e88d9385bee',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'qhEhXbrxq1Zoucx8CMtHzq1Z_2YZRPVCqWK_K-2FkEGRsSLDeOX8Tu4P6jtjZcRry8QhIbvKZdtr4UNo.QcPn2debFskI9whJ',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Prépa AF2023 - Séance 5 du 11 avril - R20/VM/GO',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# share URL
|
||||||
|
'url': 'https://us02web.zoom.us/rec/share/hkUk5Zxcga0nkyNGhVCRfzkA2gX_mzgS3LpTxEEWJz9Y_QpIQ4mZFOUx7KZRZDQA.9LGQBdqmDAYgiZ_8',
|
||||||
|
'md5': '90fdc7cfcaee5d52d1c817fc03c43c9b',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'hkUk5Zxcga0nkyNGhVCRfzkA2gX_mzgS3LpTxEEWJz9Y_QpIQ4mZFOUx7KZRZDQA.9LGQBdqmDAYgiZ_8',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Timea Andrea Lelik\'s Personal Meeting Room',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _get_page_data(self, webpage, video_id):
|
||||||
base_url, play_id = self._match_valid_url(url).groups()
|
return self._search_json(
|
||||||
webpage = self._download_webpage(url, play_id)
|
r'window\.__data__\s*=', webpage, 'data', video_id, transform_source=js_to_json)
|
||||||
|
|
||||||
|
def _get_real_webpage(self, url, base_url, video_id, url_type):
|
||||||
|
webpage = self._download_webpage(url, video_id, note=f'Downloading {url_type} webpage')
|
||||||
try:
|
try:
|
||||||
form = self._form_hidden_inputs('password_form', webpage)
|
form = self._form_hidden_inputs('password_form', webpage)
|
||||||
except ExtractorError:
|
except ExtractorError:
|
||||||
form = None
|
return webpage
|
||||||
if form:
|
|
||||||
password = self.get_param('videopassword')
|
|
||||||
if not password:
|
|
||||||
raise ExtractorError(
|
|
||||||
'This video is protected by a passcode, use the --video-password option', expected=True)
|
|
||||||
is_meeting = form.get('useWhichPasswd') == 'meeting'
|
|
||||||
validation = self._download_json(
|
|
||||||
base_url + 'rec/validate%s_passwd' % ('_meet' if is_meeting else ''),
|
|
||||||
play_id, 'Validating passcode', 'Wrong passcode', data=urlencode_postdata({
|
|
||||||
'id': form[('meet' if is_meeting else 'file') + 'Id'],
|
|
||||||
'passwd': password,
|
|
||||||
'action': form.get('action'),
|
|
||||||
}))
|
|
||||||
if not validation.get('status'):
|
|
||||||
raise ExtractorError(validation['errorMessage'], expected=True)
|
|
||||||
webpage = self._download_webpage(url, play_id)
|
|
||||||
|
|
||||||
data = self._parse_json(self._search_regex(
|
password = self.get_param('videopassword')
|
||||||
r'(?s)window\.__data__\s*=\s*({.+?});',
|
if not password:
|
||||||
webpage, 'data'), play_id, js_to_json)
|
raise ExtractorError(
|
||||||
|
'This video is protected by a passcode, use the --video-password option', expected=True)
|
||||||
|
is_meeting = form.get('useWhichPasswd') == 'meeting'
|
||||||
|
validation = self._download_json(
|
||||||
|
base_url + 'rec/validate%s_passwd' % ('_meet' if is_meeting else ''),
|
||||||
|
video_id, 'Validating passcode', 'Wrong passcode', data=urlencode_postdata({
|
||||||
|
'id': form[('meet' if is_meeting else 'file') + 'Id'],
|
||||||
|
'passwd': password,
|
||||||
|
'action': form.get('action'),
|
||||||
|
}))
|
||||||
|
if not validation.get('status'):
|
||||||
|
raise ExtractorError(validation['errorMessage'], expected=True)
|
||||||
|
return self._download_webpage(url, video_id, note=f'Re-downloading {url_type} webpage')
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
base_url, url_type, video_id = self._match_valid_url(url).group('base_url', 'type', 'id')
|
||||||
|
|
||||||
|
if url_type == 'share':
|
||||||
|
webpage = self._get_real_webpage(url, base_url, video_id, 'share')
|
||||||
|
meeting_id = self._get_page_data(webpage, video_id)['meetingId']
|
||||||
|
redirect_path = self._download_json(
|
||||||
|
f'{base_url}nws/recording/1.0/play/share-info/{meeting_id}',
|
||||||
|
video_id, note='Downloading share info JSON')['result']['redirectUrl']
|
||||||
|
url = urljoin(base_url, redirect_path)
|
||||||
|
|
||||||
|
webpage = self._get_real_webpage(url, base_url, video_id, 'play')
|
||||||
|
file_id = self._get_page_data(webpage, video_id)['fileId']
|
||||||
|
if not file_id:
|
||||||
|
# When things go wrong, file_id can be empty string
|
||||||
|
raise ExtractorError('Unable to extract file ID')
|
||||||
|
|
||||||
|
data = self._download_json(
|
||||||
|
f'{base_url}nws/recording/1.0/play/info/{file_id}', video_id,
|
||||||
|
note='Downloading play info JSON')['result']
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
for _type in ('transcript', 'cc', 'chapter'):
|
for _type in ('transcript', 'cc', 'chapter'):
|
||||||
|
@ -67,11 +105,11 @@ class ZoomIE(InfoExtractor):
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_note': 'Camera stream',
|
'format_note': 'Camera stream',
|
||||||
'url': str_or_none(data.get('viewMp4Url')),
|
'url': str_or_none(data.get('viewMp4Url')),
|
||||||
'width': int_or_none(data.get('viewResolvtionsWidth')),
|
'width': int_or_none(traverse_obj(data, ('viewResolvtions', 0))),
|
||||||
'height': int_or_none(data.get('viewResolvtionsHeight')),
|
'height': int_or_none(traverse_obj(data, ('viewResolvtions', 1))),
|
||||||
'format_id': str_or_none(data.get('recordingId')),
|
'format_id': str_or_none(traverse_obj(data, ('recording', 'id'))),
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'filesize_approx': parse_filesize(data.get('fileSize')),
|
'filesize_approx': parse_filesize(str_or_none(traverse_obj(data, ('recording', 'fileSizeInMB')))),
|
||||||
'preference': 0
|
'preference': 0
|
||||||
})
|
})
|
||||||
|
|
||||||
|
@ -79,16 +117,16 @@ class ZoomIE(InfoExtractor):
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_note': 'Screen share stream',
|
'format_note': 'Screen share stream',
|
||||||
'url': str_or_none(data.get('shareMp4Url')),
|
'url': str_or_none(data.get('shareMp4Url')),
|
||||||
'width': int_or_none(data.get('shareResolvtionsWidth')),
|
'width': int_or_none(traverse_obj(data, ('shareResolvtions', 0))),
|
||||||
'height': int_or_none(data.get('shareResolvtionsHeight')),
|
'height': int_or_none(traverse_obj(data, ('shareResolvtions', 1))),
|
||||||
'format_id': str_or_none(data.get('shareVideoId')),
|
'format_id': str_or_none(traverse_obj(data, ('shareVideo', 'id'))),
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'preference': -1
|
'preference': -1
|
||||||
})
|
})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': play_id,
|
'id': video_id,
|
||||||
'title': data.get('topic'),
|
'title': str_or_none(traverse_obj(data, ('meet', 'topic'))),
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'http_headers': {
|
'http_headers': {
|
||||||
|
|
|
@ -243,7 +243,7 @@ class JSInterpreter:
|
||||||
return
|
return
|
||||||
counters = {k: 0 for k in _MATCHING_PARENS.values()}
|
counters = {k: 0 for k in _MATCHING_PARENS.values()}
|
||||||
start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
|
start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
|
||||||
in_quote, escaping, after_op, in_regex_char_group = None, False, True, False
|
in_quote, escaping, after_op, in_regex_char_group, in_unary_op = None, False, True, False, False
|
||||||
for idx, char in enumerate(expr):
|
for idx, char in enumerate(expr):
|
||||||
if not in_quote and char in _MATCHING_PARENS:
|
if not in_quote and char in _MATCHING_PARENS:
|
||||||
counters[_MATCHING_PARENS[char]] += 1
|
counters[_MATCHING_PARENS[char]] += 1
|
||||||
|
@ -258,9 +258,11 @@ class JSInterpreter:
|
||||||
elif in_quote == '/' and char in '[]':
|
elif in_quote == '/' and char in '[]':
|
||||||
in_regex_char_group = char == '['
|
in_regex_char_group = char == '['
|
||||||
escaping = not escaping and in_quote and char == '\\'
|
escaping = not escaping and in_quote and char == '\\'
|
||||||
after_op = not in_quote and char in OP_CHARS or (char.isspace() and after_op)
|
in_unary_op = (not in_quote and not in_regex_char_group
|
||||||
|
and after_op not in (True, False) and char in '-+')
|
||||||
|
after_op = char if (not in_quote and char in OP_CHARS) else (char.isspace() and after_op)
|
||||||
|
|
||||||
if char != delim[pos] or any(counters.values()) or in_quote:
|
if char != delim[pos] or any(counters.values()) or in_quote or in_unary_op:
|
||||||
pos = 0
|
pos = 0
|
||||||
continue
|
continue
|
||||||
elif pos != delim_len:
|
elif pos != delim_len:
|
||||||
|
|
|
@ -243,7 +243,7 @@ def create_parser():
|
||||||
if multiple_keys:
|
if multiple_keys:
|
||||||
allowed_keys = fr'({allowed_keys})(,({allowed_keys}))*'
|
allowed_keys = fr'({allowed_keys})(,({allowed_keys}))*'
|
||||||
mobj = re.match(
|
mobj = re.match(
|
||||||
fr'(?i)(?P<keys>{allowed_keys}){delimiter}(?P<val>.*)$',
|
fr'(?is)(?P<keys>{allowed_keys}){delimiter}(?P<val>.*)$',
|
||||||
value[0] if multiple_args else value)
|
value[0] if multiple_args else value)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
keys, val = mobj.group('keys').split(','), mobj.group('val')
|
keys, val = mobj.group('keys').split(','), mobj.group('val')
|
||||||
|
@ -526,22 +526,27 @@ def create_parser():
|
||||||
'--cn-verification-proxy',
|
'--cn-verification-proxy',
|
||||||
dest='cn_verification_proxy', default=None, metavar='URL',
|
dest='cn_verification_proxy', default=None, metavar='URL',
|
||||||
help=optparse.SUPPRESS_HELP)
|
help=optparse.SUPPRESS_HELP)
|
||||||
|
geo.add_option(
|
||||||
|
'--xff', metavar='VALUE',
|
||||||
|
dest='geo_bypass', default="default",
|
||||||
|
help=(
|
||||||
|
'How to fake X-Forwarded-For HTTP header to try bypassing geographic restriction. '
|
||||||
|
'One of "default" (Only when known to be useful), "never", '
|
||||||
|
'a two-letter ISO 3166-2 country code, or an IP block in CIDR notation'))
|
||||||
geo.add_option(
|
geo.add_option(
|
||||||
'--geo-bypass',
|
'--geo-bypass',
|
||||||
action='store_true', dest='geo_bypass', default=True,
|
action='store_const', dest='geo_bypass', const='default',
|
||||||
help='Bypass geographic restriction via faking X-Forwarded-For HTTP header (default)')
|
help=optparse.SUPPRESS_HELP)
|
||||||
geo.add_option(
|
geo.add_option(
|
||||||
'--no-geo-bypass',
|
'--no-geo-bypass',
|
||||||
action='store_false', dest='geo_bypass',
|
action='store_const', dest='geo_bypass', const='never',
|
||||||
help='Do not bypass geographic restriction via faking X-Forwarded-For HTTP header')
|
help=optparse.SUPPRESS_HELP)
|
||||||
geo.add_option(
|
geo.add_option(
|
||||||
'--geo-bypass-country', metavar='CODE',
|
'--geo-bypass-country', metavar='CODE', dest='geo_bypass',
|
||||||
dest='geo_bypass_country', default=None,
|
help=optparse.SUPPRESS_HELP)
|
||||||
help='Force bypass geographic restriction with explicitly provided two-letter ISO 3166-2 country code')
|
|
||||||
geo.add_option(
|
geo.add_option(
|
||||||
'--geo-bypass-ip-block', metavar='IP_BLOCK',
|
'--geo-bypass-ip-block', metavar='IP_BLOCK', dest='geo_bypass',
|
||||||
dest='geo_bypass_ip_block', default=None,
|
help=optparse.SUPPRESS_HELP)
|
||||||
help='Force bypass geographic restriction with explicitly provided IP block in CIDR notation')
|
|
||||||
|
|
||||||
selection = optparse.OptionGroup(parser, 'Video Selection')
|
selection = optparse.OptionGroup(parser, 'Video Selection')
|
||||||
selection.add_option(
|
selection.add_option(
|
||||||
|
@ -1086,8 +1091,12 @@ def create_parser():
|
||||||
verbosity = optparse.OptionGroup(parser, 'Verbosity and Simulation Options')
|
verbosity = optparse.OptionGroup(parser, 'Verbosity and Simulation Options')
|
||||||
verbosity.add_option(
|
verbosity.add_option(
|
||||||
'-q', '--quiet',
|
'-q', '--quiet',
|
||||||
action='store_true', dest='quiet', default=False,
|
action='store_true', dest='quiet', default=None,
|
||||||
help='Activate quiet mode. If used with --verbose, print the log to stderr')
|
help='Activate quiet mode. If used with --verbose, print the log to stderr')
|
||||||
|
verbosity.add_option(
|
||||||
|
'--no-quiet',
|
||||||
|
action='store_false', dest='quiet',
|
||||||
|
help='Deactivate quiet mode. (Default)')
|
||||||
verbosity.add_option(
|
verbosity.add_option(
|
||||||
'--no-warnings',
|
'--no-warnings',
|
||||||
dest='no_warnings', action='store_true', default=False,
|
dest='no_warnings', action='store_true', default=False,
|
||||||
|
|
|
@ -107,7 +107,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor):
|
||||||
options.extend(['-map', '-0:%d' % old_stream])
|
options.extend(['-map', '-0:%d' % old_stream])
|
||||||
new_stream -= 1
|
new_stream -= 1
|
||||||
options.extend([
|
options.extend([
|
||||||
'-attach', thumbnail_filename,
|
'-attach', self._ffmpeg_filename_argument(thumbnail_filename),
|
||||||
'-metadata:s:%d' % new_stream, 'mimetype=%s' % mimetype,
|
'-metadata:s:%d' % new_stream, 'mimetype=%s' % mimetype,
|
||||||
'-metadata:s:%d' % new_stream, 'filename=cover.%s' % thumbnail_ext])
|
'-metadata:s:%d' % new_stream, 'filename=cover.%s' % thumbnail_ext])
|
||||||
|
|
||||||
|
|
|
@ -809,7 +809,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor):
|
||||||
new_stream -= 1
|
new_stream -= 1
|
||||||
|
|
||||||
yield (
|
yield (
|
||||||
'-attach', infofn,
|
'-attach', self._ffmpeg_filename_argument(infofn),
|
||||||
f'-metadata:s:{new_stream}', 'mimetype=application/json',
|
f'-metadata:s:{new_stream}', 'mimetype=application/json',
|
||||||
f'-metadata:s:{new_stream}', 'filename=info.json',
|
f'-metadata:s:{new_stream}', 'filename=info.json',
|
||||||
)
|
)
|
||||||
|
@ -898,8 +898,11 @@ class FFmpegFixupM3u8PP(FFmpegFixupPostProcessor):
|
||||||
@PostProcessor._restrict_to(images=False)
|
@PostProcessor._restrict_to(images=False)
|
||||||
def run(self, info):
|
def run(self, info):
|
||||||
if all(self._needs_fixup(info)):
|
if all(self._needs_fixup(info)):
|
||||||
|
args = ['-f', 'mp4']
|
||||||
|
if self.get_audio_codec(info['filepath']) == 'aac':
|
||||||
|
args.extend(['-bsf:a', 'aac_adtstoasc'])
|
||||||
self._fixup('Fixing MPEG-TS in MP4 container', info['filepath'], [
|
self._fixup('Fixing MPEG-TS in MP4 container', info['filepath'], [
|
||||||
*self.stream_copy_opts(), '-f', 'mp4', '-bsf:a', 'aac_adtstoasc'])
|
*self.stream_copy_opts(), *args])
|
||||||
return [], info
|
return [], info
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -2187,10 +2187,11 @@ else:
|
||||||
fcntl.lockf(f, flags)
|
fcntl.lockf(f, flags)
|
||||||
|
|
||||||
def _unlock_file(f):
|
def _unlock_file(f):
|
||||||
try:
|
with contextlib.suppress(OSError):
|
||||||
fcntl.flock(f, fcntl.LOCK_UN)
|
return fcntl.flock(f, fcntl.LOCK_UN)
|
||||||
except OSError:
|
with contextlib.suppress(OSError):
|
||||||
fcntl.lockf(f, fcntl.LOCK_UN)
|
return fcntl.lockf(f, fcntl.LOCK_UN) # AOSP does not have flock()
|
||||||
|
return fcntl.flock(f, fcntl.LOCK_UN | fcntl.LOCK_NB) # virtiofs needs LOCK_NB on unlocking
|
||||||
|
|
||||||
except ImportError:
|
except ImportError:
|
||||||
|
|
||||||
|
@ -3278,8 +3279,14 @@ def multipart_encode(data, boundary=None):
|
||||||
return out, content_type
|
return out, content_type
|
||||||
|
|
||||||
|
|
||||||
def variadic(x, allowed_types=(str, bytes, dict)):
|
def is_iterable_like(x, allowed_types=collections.abc.Iterable, blocked_types=NO_DEFAULT):
|
||||||
return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
|
if blocked_types is NO_DEFAULT:
|
||||||
|
blocked_types = (str, bytes, collections.abc.Mapping)
|
||||||
|
return isinstance(x, allowed_types) and not isinstance(x, blocked_types)
|
||||||
|
|
||||||
|
|
||||||
|
def variadic(x, allowed_types=NO_DEFAULT):
|
||||||
|
return x if is_iterable_like(x, blocked_types=allowed_types) else (x,)
|
||||||
|
|
||||||
|
|
||||||
def dict_get(d, key_or_keys, default=None, skip_false_values=True):
|
def dict_get(d, key_or_keys, default=None, skip_false_values=True):
|
||||||
|
@ -3371,7 +3378,7 @@ def strip_jsonp(code):
|
||||||
|
|
||||||
def js_to_json(code, vars={}, *, strict=False):
|
def js_to_json(code, vars={}, *, strict=False):
|
||||||
# vars is a dict of var, val pairs to substitute
|
# vars is a dict of var, val pairs to substitute
|
||||||
STRING_QUOTES = '\'"'
|
STRING_QUOTES = '\'"`'
|
||||||
STRING_RE = '|'.join(rf'{q}(?:\\.|[^\\{q}])*{q}' for q in STRING_QUOTES)
|
STRING_RE = '|'.join(rf'{q}(?:\\.|[^\\{q}])*{q}' for q in STRING_QUOTES)
|
||||||
COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
|
COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
|
||||||
SKIP_RE = fr'\s*(?:{COMMENT_RE})?\s*'
|
SKIP_RE = fr'\s*(?:{COMMENT_RE})?\s*'
|
||||||
|
@ -3389,6 +3396,12 @@ def js_to_json(code, vars={}, *, strict=False):
|
||||||
else '' if escape == '\n'
|
else '' if escape == '\n'
|
||||||
else escape)
|
else escape)
|
||||||
|
|
||||||
|
def template_substitute(match):
|
||||||
|
evaluated = js_to_json(match.group(1), vars, strict=strict)
|
||||||
|
if evaluated[0] == '"':
|
||||||
|
return json.loads(evaluated)
|
||||||
|
return evaluated
|
||||||
|
|
||||||
def fix_kv(m):
|
def fix_kv(m):
|
||||||
v = m.group(0)
|
v = m.group(0)
|
||||||
if v in ('true', 'false', 'null'):
|
if v in ('true', 'false', 'null'):
|
||||||
|
@ -3399,7 +3412,8 @@ def js_to_json(code, vars={}, *, strict=False):
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
if v[0] in STRING_QUOTES:
|
if v[0] in STRING_QUOTES:
|
||||||
escaped = re.sub(r'(?s)(")|\\(.)', process_escape, v[1:-1])
|
v = re.sub(r'(?s)\${([^}]+)}', template_substitute, v[1:-1]) if v[0] == '`' else v[1:-1]
|
||||||
|
escaped = re.sub(r'(?s)(")|\\(.)', process_escape, v)
|
||||||
return f'"{escaped}"'
|
return f'"{escaped}"'
|
||||||
|
|
||||||
for regex, base in INTEGER_TABLE:
|
for regex, base in INTEGER_TABLE:
|
||||||
|
@ -4091,6 +4105,10 @@ def dfxp2srt(dfxp_data):
|
||||||
def close(self):
|
def close(self):
|
||||||
return self._out.strip()
|
return self._out.strip()
|
||||||
|
|
||||||
|
# Fix UTF-8 encoded file wrongly marked as UTF-16. See https://github.com/yt-dlp/yt-dlp/issues/6543#issuecomment-1477169870
|
||||||
|
# This will not trigger false positives since only UTF-8 text is being replaced
|
||||||
|
dfxp_data = dfxp_data.replace(b'encoding=\'UTF-16\'', b'encoding=\'UTF-8\'')
|
||||||
|
|
||||||
def parse_node(node):
|
def parse_node(node):
|
||||||
target = TTMLPElementParser()
|
target = TTMLPElementParser()
|
||||||
parser = xml.etree.ElementTree.XMLParser(target=target)
|
parser = xml.etree.ElementTree.XMLParser(target=target)
|
||||||
|
@ -5461,7 +5479,7 @@ def traverse_obj(
|
||||||
obj, *paths, default=NO_DEFAULT, expected_type=None, get_all=True,
|
obj, *paths, default=NO_DEFAULT, expected_type=None, get_all=True,
|
||||||
casesense=True, is_user_input=False, traverse_string=False):
|
casesense=True, is_user_input=False, traverse_string=False):
|
||||||
"""
|
"""
|
||||||
Safely traverse nested `dict`s and `Sequence`s
|
Safely traverse nested `dict`s and `Iterable`s
|
||||||
|
|
||||||
>>> obj = [{}, {"key": "value"}]
|
>>> obj = [{}, {"key": "value"}]
|
||||||
>>> traverse_obj(obj, (1, "key"))
|
>>> traverse_obj(obj, (1, "key"))
|
||||||
|
@ -5469,7 +5487,7 @@ def traverse_obj(
|
||||||
|
|
||||||
Each of the provided `paths` is tested and the first producing a valid result will be returned.
|
Each of the provided `paths` is tested and the first producing a valid result will be returned.
|
||||||
The next path will also be tested if the path branched but no results could be found.
|
The next path will also be tested if the path branched but no results could be found.
|
||||||
Supported values for traversal are `Mapping`, `Sequence` and `re.Match`.
|
Supported values for traversal are `Mapping`, `Iterable` and `re.Match`.
|
||||||
Unhelpful values (`{}`, `None`) are treated as the absence of a value and discarded.
|
Unhelpful values (`{}`, `None`) are treated as the absence of a value and discarded.
|
||||||
|
|
||||||
The paths will be wrapped in `variadic`, so that `'key'` is conveniently the same as `('key', )`.
|
The paths will be wrapped in `variadic`, so that `'key'` is conveniently the same as `('key', )`.
|
||||||
|
@ -5486,7 +5504,7 @@ def traverse_obj(
|
||||||
Read as: `[traverse_obj(obj, branch) for branch in branches]`.
|
Read as: `[traverse_obj(obj, branch) for branch in branches]`.
|
||||||
- `function`: Branch out and return values filtered by the function.
|
- `function`: Branch out and return values filtered by the function.
|
||||||
Read as: `[value for key, value in obj if function(key, value)]`.
|
Read as: `[value for key, value in obj if function(key, value)]`.
|
||||||
For `Sequence`s, `key` is the index of the value.
|
For `Iterable`s, `key` is the index of the value.
|
||||||
For `re.Match`es, `key` is the group number (0 = full match)
|
For `re.Match`es, `key` is the group number (0 = full match)
|
||||||
as well as additionally any group names, if given.
|
as well as additionally any group names, if given.
|
||||||
- `dict` Transform the current object and return a matching dict.
|
- `dict` Transform the current object and return a matching dict.
|
||||||
|
@ -5522,7 +5540,6 @@ def traverse_obj(
|
||||||
If no `default` is given and the last path branches, a `list` of results
|
If no `default` is given and the last path branches, a `list` of results
|
||||||
is always returned. If a path ends on a `dict` that result will always be a `dict`.
|
is always returned. If a path ends on a `dict` that result will always be a `dict`.
|
||||||
"""
|
"""
|
||||||
is_sequence = lambda x: isinstance(x, collections.abc.Sequence) and not isinstance(x, (str, bytes))
|
|
||||||
casefold = lambda k: k.casefold() if isinstance(k, str) else k
|
casefold = lambda k: k.casefold() if isinstance(k, str) else k
|
||||||
|
|
||||||
if isinstance(expected_type, type):
|
if isinstance(expected_type, type):
|
||||||
|
@ -5535,7 +5552,9 @@ def traverse_obj(
|
||||||
result = None
|
result = None
|
||||||
|
|
||||||
if obj is None and traverse_string:
|
if obj is None and traverse_string:
|
||||||
pass
|
if key is ... or callable(key) or isinstance(key, slice):
|
||||||
|
branching = True
|
||||||
|
result = ()
|
||||||
|
|
||||||
elif key is None:
|
elif key is None:
|
||||||
result = obj
|
result = obj
|
||||||
|
@ -5558,7 +5577,7 @@ def traverse_obj(
|
||||||
branching = True
|
branching = True
|
||||||
if isinstance(obj, collections.abc.Mapping):
|
if isinstance(obj, collections.abc.Mapping):
|
||||||
result = obj.values()
|
result = obj.values()
|
||||||
elif is_sequence(obj):
|
elif is_iterable_like(obj):
|
||||||
result = obj
|
result = obj
|
||||||
elif isinstance(obj, re.Match):
|
elif isinstance(obj, re.Match):
|
||||||
result = obj.groups()
|
result = obj.groups()
|
||||||
|
@ -5572,7 +5591,7 @@ def traverse_obj(
|
||||||
branching = True
|
branching = True
|
||||||
if isinstance(obj, collections.abc.Mapping):
|
if isinstance(obj, collections.abc.Mapping):
|
||||||
iter_obj = obj.items()
|
iter_obj = obj.items()
|
||||||
elif is_sequence(obj):
|
elif is_iterable_like(obj):
|
||||||
iter_obj = enumerate(obj)
|
iter_obj = enumerate(obj)
|
||||||
elif isinstance(obj, re.Match):
|
elif isinstance(obj, re.Match):
|
||||||
iter_obj = itertools.chain(
|
iter_obj = itertools.chain(
|
||||||
|
@ -5596,7 +5615,7 @@ def traverse_obj(
|
||||||
} or None
|
} or None
|
||||||
|
|
||||||
elif isinstance(obj, collections.abc.Mapping):
|
elif isinstance(obj, collections.abc.Mapping):
|
||||||
result = (obj.get(key) if casesense or (key in obj) else
|
result = (try_call(obj.get, args=(key,)) if casesense or try_call(obj.__contains__, args=(key,)) else
|
||||||
next((v for k, v in obj.items() if casefold(k) == key), None))
|
next((v for k, v in obj.items() if casefold(k) == key), None))
|
||||||
|
|
||||||
elif isinstance(obj, re.Match):
|
elif isinstance(obj, re.Match):
|
||||||
|
@ -5608,7 +5627,7 @@ def traverse_obj(
|
||||||
result = next((v for k, v in obj.groupdict().items() if casefold(k) == key), None)
|
result = next((v for k, v in obj.groupdict().items() if casefold(k) == key), None)
|
||||||
|
|
||||||
elif isinstance(key, (int, slice)):
|
elif isinstance(key, (int, slice)):
|
||||||
if is_sequence(obj):
|
if is_iterable_like(obj, collections.abc.Sequence):
|
||||||
branching = isinstance(key, slice)
|
branching = isinstance(key, slice)
|
||||||
with contextlib.suppress(IndexError):
|
with contextlib.suppress(IndexError):
|
||||||
result = obj[key]
|
result = obj[key]
|
||||||
|
|
Loading…
Reference in a new issue