mirror of
https://github.com/yt-dlp/yt-dlp
synced 2025-01-01 06:21:09 +01:00
Merge branch 'yt-dlp:master' into pr/live-sections
This commit is contained in:
commit
c0be43d4d7
8 changed files with 278 additions and 71 deletions
13
Changelog.md
13
Changelog.md
|
@ -4,6 +4,19 @@
|
||||||
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
|
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
|
||||||
-->
|
-->
|
||||||
|
|
||||||
|
### 2024.07.25
|
||||||
|
|
||||||
|
#### Extractor changes
|
||||||
|
- **abematv**: [Adapt key retrieval to request handler framework](https://github.com/yt-dlp/yt-dlp/commit/a3bab4752a2b3d56e5a59b4e0411bb8f695c010b) ([#10491](https://github.com/yt-dlp/yt-dlp/issues/10491)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **facebook**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/1a34a802f44a1dab8f642c79c3cc810e21541d3b) ([#10531](https://github.com/yt-dlp/yt-dlp/issues/10531)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **mlbtv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/f0993391e6052ec8f7aacc286609564f226943b9) ([#10515](https://github.com/yt-dlp/yt-dlp/issues/10515)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **tiktok**: [Fix and deprioritize JSON subtitles](https://github.com/yt-dlp/yt-dlp/commit/2f97779f335ac069ecccd9c7bf81abf4a83cfe7a) ([#10516](https://github.com/yt-dlp/yt-dlp/issues/10516)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **vimeo**: [Fix chapters extraction](https://github.com/yt-dlp/yt-dlp/commit/a0a1bc3d8d8e3bb9a48a06e835815a0460e90e77) ([#10544](https://github.com/yt-dlp/yt-dlp/issues/10544)) by [bashonly](https://github.com/bashonly)
|
||||||
|
- **youtube**: [Fix `n` function name extraction for player `3400486c`](https://github.com/yt-dlp/yt-dlp/commit/713b4cd18f00556771af8cfdd9cea6cc1a09e948) ([#10542](https://github.com/yt-dlp/yt-dlp/issues/10542)) by [bashonly](https://github.com/bashonly)
|
||||||
|
|
||||||
|
#### Misc. changes
|
||||||
|
- **build**: [Pin `setuptools` version](https://github.com/yt-dlp/yt-dlp/commit/e046db8a116b1c320d4785daadd48ea0b22a3987) ([#10493](https://github.com/yt-dlp/yt-dlp/issues/10493)) by [bashonly](https://github.com/bashonly)
|
||||||
|
|
||||||
### 2024.07.16
|
### 2024.07.16
|
||||||
|
|
||||||
#### Core changes
|
#### Core changes
|
||||||
|
|
|
@ -171,6 +171,10 @@ _NSIG_TESTS = [
|
||||||
'https://www.youtube.com/s/player/b22ef6e7/player_ias.vflset/en_US/base.js',
|
'https://www.youtube.com/s/player/b22ef6e7/player_ias.vflset/en_US/base.js',
|
||||||
'b6HcntHGkvBLk_FRf', 'kNPW6A7FyP2l8A',
|
'b6HcntHGkvBLk_FRf', 'kNPW6A7FyP2l8A',
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
'https://www.youtube.com/s/player/3400486c/player_ias.vflset/en_US/base.js',
|
||||||
|
'lL46g3XifCKUZn1Xfw', 'z767lhet6V2Skl',
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -571,16 +571,21 @@ class FacebookIE(InfoExtractor):
|
||||||
# Formats larger than ~500MB will return error 403 unless chunk size is regulated
|
# Formats larger than ~500MB will return error 403 unless chunk size is regulated
|
||||||
f.setdefault('downloader_options', {})['http_chunk_size'] = 250 << 20
|
f.setdefault('downloader_options', {})['http_chunk_size'] = 250 << 20
|
||||||
|
|
||||||
def extract_relay_data(_filter):
|
def yield_all_relay_data(_filter):
|
||||||
return self._parse_json(self._search_regex(
|
for relay_data in re.findall(rf'data-sjs>({{.*?{_filter}.*?}})</script>', webpage):
|
||||||
rf'data-sjs>({{.*?{_filter}.*?}})</script>',
|
yield self._parse_json(relay_data, video_id, fatal=False) or {}
|
||||||
webpage, 'replay data', default='{}'), video_id, fatal=False) or {}
|
|
||||||
|
|
||||||
def extract_relay_prefetched_data(_filter):
|
def extract_relay_data(_filter):
|
||||||
return traverse_obj(extract_relay_data(_filter), (
|
return next(filter(None, yield_all_relay_data(_filter)), {})
|
||||||
'require', (None, (..., ..., ..., '__bbox', 'require')),
|
|
||||||
|
def extract_relay_prefetched_data(_filter, target_keys=None):
|
||||||
|
path = 'data'
|
||||||
|
if target_keys is not None:
|
||||||
|
path = lambda k, v: k == 'data' and any(target in v for target in variadic(target_keys))
|
||||||
|
return traverse_obj(yield_all_relay_data(_filter), (
|
||||||
|
..., 'require', (None, (..., ..., ..., '__bbox', 'require')),
|
||||||
lambda _, v: any(key.startswith('RelayPrefetchedStreamCache') for key in v),
|
lambda _, v: any(key.startswith('RelayPrefetchedStreamCache') for key in v),
|
||||||
..., ..., '__bbox', 'result', 'data', {dict}), get_all=False) or {}
|
..., ..., '__bbox', 'result', path, {dict}), get_all=False) or {}
|
||||||
|
|
||||||
if not video_data:
|
if not video_data:
|
||||||
server_js_data = self._parse_json(self._search_regex([
|
server_js_data = self._parse_json(self._search_regex([
|
||||||
|
@ -591,7 +596,8 @@ class FacebookIE(InfoExtractor):
|
||||||
|
|
||||||
if not video_data:
|
if not video_data:
|
||||||
data = extract_relay_prefetched_data(
|
data = extract_relay_prefetched_data(
|
||||||
r'"(?:dash_manifest|playable_url(?:_quality_hd)?)')
|
r'"(?:dash_manifest|playable_url(?:_quality_hd)?)',
|
||||||
|
target_keys=('video', 'event', 'nodes', 'node', 'mediaset'))
|
||||||
if data:
|
if data:
|
||||||
entries = []
|
entries = []
|
||||||
|
|
||||||
|
|
|
@ -1,16 +1,21 @@
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
import urllib.parse
|
import time
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..networking.exceptions import HTTPError
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
join_nonempty,
|
join_nonempty,
|
||||||
|
jwt_decode_hs256,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
try_get,
|
try_get,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
from ..utils.traversal import traverse_obj
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
@ -276,81 +281,213 @@ class MLBVideoIE(MLBBaseIE):
|
||||||
class MLBTVIE(InfoExtractor):
|
class MLBTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?mlb\.com/tv/g(?P<id>\d{6})'
|
_VALID_URL = r'https?://(?:www\.)?mlb\.com/tv/g(?P<id>\d{6})'
|
||||||
_NETRC_MACHINE = 'mlb'
|
_NETRC_MACHINE = 'mlb'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.mlb.com/tv/g661581/vee2eff5f-a7df-4c20-bdb4-7b926fa12638',
|
'url': 'https://www.mlb.com/tv/g661581/vee2eff5f-a7df-4c20-bdb4-7b926fa12638',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '661581',
|
'id': '661581',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '2022-07-02 - St. Louis Cardinals @ Philadelphia Phillies',
|
'title': '2022-07-02 - St. Louis Cardinals @ Philadelphia Phillies',
|
||||||
|
'release_date': '20220702',
|
||||||
|
'release_timestamp': 1656792300,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
_GRAPHQL_INIT_QUERY = '''\
|
||||||
|
mutation initSession($device: InitSessionInput!, $clientType: ClientType!, $experience: ExperienceTypeInput) {
|
||||||
|
initSession(device: $device, clientType: $clientType, experience: $experience) {
|
||||||
|
deviceId
|
||||||
|
sessionId
|
||||||
|
entitlements {
|
||||||
|
code
|
||||||
|
}
|
||||||
|
location {
|
||||||
|
countryCode
|
||||||
|
regionName
|
||||||
|
zipCode
|
||||||
|
latitude
|
||||||
|
longitude
|
||||||
|
}
|
||||||
|
clientExperience
|
||||||
|
features
|
||||||
|
}
|
||||||
|
}'''
|
||||||
|
_GRAPHQL_PLAYBACK_QUERY = '''\
|
||||||
|
mutation initPlaybackSession(
|
||||||
|
$adCapabilities: [AdExperienceType]
|
||||||
|
$mediaId: String!
|
||||||
|
$deviceId: String!
|
||||||
|
$sessionId: String!
|
||||||
|
$quality: PlaybackQuality
|
||||||
|
) {
|
||||||
|
initPlaybackSession(
|
||||||
|
adCapabilities: $adCapabilities
|
||||||
|
mediaId: $mediaId
|
||||||
|
deviceId: $deviceId
|
||||||
|
sessionId: $sessionId
|
||||||
|
quality: $quality
|
||||||
|
) {
|
||||||
|
playbackSessionId
|
||||||
|
playback {
|
||||||
|
url
|
||||||
|
token
|
||||||
|
expiration
|
||||||
|
cdn
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}'''
|
||||||
|
_APP_VERSION = '7.8.2'
|
||||||
|
_device_id = None
|
||||||
|
_session_id = None
|
||||||
_access_token = None
|
_access_token = None
|
||||||
|
_token_expiry = 0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _api_headers(self):
|
||||||
|
if (self._token_expiry - 120) <= time.time():
|
||||||
|
self.write_debug('Access token has expired; re-logging in')
|
||||||
|
self._perform_login(*self._get_login_info())
|
||||||
|
return {'Authorization': f'Bearer {self._access_token}'}
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
if not self._access_token:
|
if not self._access_token:
|
||||||
self.raise_login_required(
|
self.raise_login_required(
|
||||||
'All videos are only available to registered users', method='password')
|
'All videos are only available to registered users', method='password')
|
||||||
|
|
||||||
|
def _set_device_id(self, username):
|
||||||
|
if not self._device_id:
|
||||||
|
self._device_id = self.cache.load(
|
||||||
|
self._NETRC_MACHINE, 'device_ids', default={}).get(username)
|
||||||
|
if self._device_id:
|
||||||
|
return
|
||||||
|
self._device_id = str(uuid.uuid4())
|
||||||
|
self.cache.store(self._NETRC_MACHINE, 'device_ids', {username: self._device_id})
|
||||||
|
|
||||||
def _perform_login(self, username, password):
|
def _perform_login(self, username, password):
|
||||||
data = f'grant_type=password&username={urllib.parse.quote(username)}&password={urllib.parse.quote(password)}&scope=openid offline_access&client_id=0oa3e1nutA1HLzAKG356'
|
try:
|
||||||
access_token = self._download_json(
|
self._access_token = self._download_json(
|
||||||
'https://ids.mlb.com/oauth2/aus1m088yK07noBfh356/v1/token', None,
|
'https://ids.mlb.com/oauth2/aus1m088yK07noBfh356/v1/token', None,
|
||||||
headers={
|
'Logging in', 'Unable to log in', headers={
|
||||||
'User-Agent': 'okhttp/3.12.1',
|
'User-Agent': 'okhttp/3.12.1',
|
||||||
'Content-Type': 'application/x-www-form-urlencoded',
|
'Content-Type': 'application/x-www-form-urlencoded',
|
||||||
}, data=data.encode())['access_token']
|
}, data=urlencode_postdata({
|
||||||
|
'grant_type': 'password',
|
||||||
|
'username': username,
|
||||||
|
'password': password,
|
||||||
|
'scope': 'openid offline_access',
|
||||||
|
'client_id': '0oa3e1nutA1HLzAKG356',
|
||||||
|
}))['access_token']
|
||||||
|
except ExtractorError as error:
|
||||||
|
if isinstance(error.cause, HTTPError) and error.cause.status == 400:
|
||||||
|
raise ExtractorError('Invalid username or password', expected=True)
|
||||||
|
raise
|
||||||
|
|
||||||
entitlement = self._download_webpage(
|
self._token_expiry = traverse_obj(self._access_token, ({jwt_decode_hs256}, 'exp', {int})) or 0
|
||||||
f'https://media-entitlement.mlb.com/api/v3/jwt?os=Android&appname=AtBat&did={uuid.uuid4()}', None,
|
self._set_device_id(username)
|
||||||
headers={
|
|
||||||
'User-Agent': 'okhttp/3.12.1',
|
|
||||||
'Authorization': f'Bearer {access_token}',
|
|
||||||
})
|
|
||||||
|
|
||||||
data = f'grant_type=urn:ietf:params:oauth:grant-type:token-exchange&subject_token={entitlement}&subject_token_type=urn:ietf:params:oauth:token-type:jwt&platform=android-tv'
|
self._session_id = self._call_api({
|
||||||
self._access_token = self._download_json(
|
'operationName': 'initSession',
|
||||||
'https://us.edge.bamgrid.com/token', None,
|
'query': self._GRAPHQL_INIT_QUERY,
|
||||||
|
'variables': {
|
||||||
|
'device': {
|
||||||
|
'appVersion': self._APP_VERSION,
|
||||||
|
'deviceFamily': 'desktop',
|
||||||
|
'knownDeviceId': self._device_id,
|
||||||
|
'languagePreference': 'ENGLISH',
|
||||||
|
'manufacturer': '',
|
||||||
|
'model': '',
|
||||||
|
'os': '',
|
||||||
|
'osVersion': '',
|
||||||
|
},
|
||||||
|
'clientType': 'WEB',
|
||||||
|
},
|
||||||
|
}, None, 'session ID')['data']['initSession']['sessionId']
|
||||||
|
|
||||||
|
def _call_api(self, data, video_id, description='GraphQL JSON', fatal=True):
|
||||||
|
return self._download_json(
|
||||||
|
'https://media-gateway.mlb.com/graphql', video_id,
|
||||||
|
f'Downloading {description}', f'Unable to download {description}', fatal=fatal,
|
||||||
headers={
|
headers={
|
||||||
|
**self._api_headers,
|
||||||
'Accept': 'application/json',
|
'Accept': 'application/json',
|
||||||
'Authorization': 'Bearer bWxidHYmYW5kcm9pZCYxLjAuMA.6LZMbH2r--rbXcgEabaDdIslpo4RyZrlVfWZhsAgXIk',
|
'Content-Type': 'application/json',
|
||||||
'Content-Type': 'application/x-www-form-urlencoded',
|
'x-client-name': 'WEB',
|
||||||
}, data=data.encode())['access_token']
|
'x-client-version': self._APP_VERSION,
|
||||||
|
}, data=json.dumps(data, separators=(',', ':')).encode())
|
||||||
|
|
||||||
|
def _extract_formats_and_subtitles(self, broadcast, video_id):
|
||||||
|
feed = traverse_obj(broadcast, ('homeAway', {str.title}))
|
||||||
|
medium = traverse_obj(broadcast, ('type', {str}))
|
||||||
|
language = traverse_obj(broadcast, ('language', {str.lower}))
|
||||||
|
format_id = join_nonempty(feed, medium, language)
|
||||||
|
|
||||||
|
response = self._call_api({
|
||||||
|
'operationName': 'initPlaybackSession',
|
||||||
|
'query': self._GRAPHQL_PLAYBACK_QUERY,
|
||||||
|
'variables': {
|
||||||
|
'adCapabilities': ['GOOGLE_STANDALONE_AD_PODS'],
|
||||||
|
'deviceId': self._device_id,
|
||||||
|
'mediaId': broadcast['mediaId'],
|
||||||
|
'quality': 'PLACEHOLDER',
|
||||||
|
'sessionId': self._session_id,
|
||||||
|
},
|
||||||
|
}, video_id, f'{format_id} broadcast JSON', fatal=False)
|
||||||
|
|
||||||
|
playback = traverse_obj(response, ('data', 'initPlaybackSession', 'playback', {dict}))
|
||||||
|
m3u8_url = traverse_obj(playback, ('url', {url_or_none}))
|
||||||
|
token = traverse_obj(playback, ('token', {str}))
|
||||||
|
|
||||||
|
if not (m3u8_url and token):
|
||||||
|
errors = '; '.join(traverse_obj(response, ('errors', ..., 'message', {str})))
|
||||||
|
if 'not entitled' in errors:
|
||||||
|
raise ExtractorError(errors, expected=True)
|
||||||
|
elif errors: # Only warn when 'blacked out' since radio formats are available
|
||||||
|
self.report_warning(f'API returned errors for {format_id}: {errors}')
|
||||||
|
else:
|
||||||
|
self.report_warning(f'No formats available for {format_id} broadcast; skipping')
|
||||||
|
return [], {}
|
||||||
|
|
||||||
|
cdn_headers = {'x-cdn-token': token}
|
||||||
|
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||||
|
m3u8_url.replace(f'/{token}/', '/'), video_id, 'mp4',
|
||||||
|
m3u8_id=format_id, fatal=False, headers=cdn_headers)
|
||||||
|
for fmt in fmts:
|
||||||
|
fmt['http_headers'] = cdn_headers
|
||||||
|
fmt.setdefault('format_note', join_nonempty(feed, medium, delim=' '))
|
||||||
|
fmt.setdefault('language', language)
|
||||||
|
if fmt.get('vcodec') == 'none' and fmt['language'] == 'en':
|
||||||
|
fmt['source_preference'] = 10
|
||||||
|
|
||||||
|
return fmts, subs
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
airings = self._download_json(
|
metadata = traverse_obj(self._download_json(
|
||||||
f'https://search-api-mlbtv.mlb.com/svc/search/v2/graphql/persisted/query/core/Airings?variables=%7B%22partnerProgramIds%22%3A%5B%22{video_id}%22%5D%2C%22applyEsniMediaRightsLabels%22%3Atrue%7D',
|
'https://statsapi.mlb.com/api/v1/schedule', video_id, query={
|
||||||
video_id)['data']['Airings']
|
'gamePk': video_id,
|
||||||
|
'hydrate': 'broadcasts(all),statusFlags',
|
||||||
|
}), ('dates', ..., 'games', lambda _, v: str(v['gamePk']) == video_id and v['broadcasts'], any))
|
||||||
|
|
||||||
|
broadcasts = traverse_obj(metadata, (
|
||||||
|
'broadcasts', lambda _, v: v['mediaId'] and v['mediaState']['mediaStateCode'] != 'MEDIA_OFF'))
|
||||||
|
|
||||||
formats, subtitles = [], {}
|
formats, subtitles = [], {}
|
||||||
for airing in traverse_obj(airings, lambda _, v: v['playbackUrls'][0]['href']):
|
for broadcast in broadcasts:
|
||||||
format_id = join_nonempty('feedType', 'feedLanguage', from_dict=airing)
|
fmts, subs = self._extract_formats_and_subtitles(broadcast, video_id)
|
||||||
m3u8_url = traverse_obj(self._download_json(
|
formats.extend(fmts)
|
||||||
airing['playbackUrls'][0]['href'].format(scenario='browser~csai'), video_id,
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
note=f'Downloading {format_id} stream info JSON',
|
|
||||||
errnote=f'Failed to download {format_id} stream info, skipping',
|
|
||||||
fatal=False, headers={
|
|
||||||
'Authorization': self._access_token,
|
|
||||||
'Accept': 'application/vnd.media-service+json; version=2',
|
|
||||||
}), ('stream', 'complete', {url_or_none}))
|
|
||||||
if not m3u8_url:
|
|
||||||
continue
|
|
||||||
f, s = self._extract_m3u8_formats_and_subtitles(
|
|
||||||
m3u8_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)
|
|
||||||
formats.extend(f)
|
|
||||||
self._merge_subtitles(s, target=subtitles)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': traverse_obj(airings, (..., 'titles', 0, 'episodeName'), get_all=False),
|
'title': join_nonempty(
|
||||||
'is_live': traverse_obj(airings, (..., 'mediaConfig', 'productType'), get_all=False) == 'LIVE',
|
traverse_obj(metadata, ('officialDate', {str})),
|
||||||
|
traverse_obj(metadata, ('teams', ('away', 'home'), 'team', 'name', {str}, all, {' @ '.join})),
|
||||||
|
delim=' - '),
|
||||||
|
'is_live': traverse_obj(broadcasts, (..., 'mediaState', 'mediaStateCode', {str}, any)) == 'MEDIA_ON',
|
||||||
|
'release_timestamp': traverse_obj(metadata, ('gameDate', {parse_iso8601})),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'http_headers': {'Authorization': f'Bearer {self._access_token}'},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -23,7 +23,6 @@ from ..utils import (
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
parse_qs,
|
parse_qs,
|
||||||
qualities,
|
qualities,
|
||||||
remove_start,
|
|
||||||
srt_subtitles_timecode,
|
srt_subtitles_timecode,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
|
@ -254,7 +253,16 @@ class TikTokBaseIE(InfoExtractor):
|
||||||
|
|
||||||
def _get_subtitles(self, aweme_detail, aweme_id, user_name):
|
def _get_subtitles(self, aweme_detail, aweme_id, user_name):
|
||||||
# TODO: Extract text positioning info
|
# TODO: Extract text positioning info
|
||||||
|
|
||||||
|
EXT_MAP = { # From lowest to highest preference
|
||||||
|
'creator_caption': 'json',
|
||||||
|
'srt': 'srt',
|
||||||
|
'webvtt': 'vtt',
|
||||||
|
}
|
||||||
|
preference = qualities(tuple(EXT_MAP.values()))
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
|
|
||||||
# aweme/detail endpoint subs
|
# aweme/detail endpoint subs
|
||||||
captions_info = traverse_obj(
|
captions_info = traverse_obj(
|
||||||
aweme_detail, ('interaction_stickers', ..., 'auto_video_caption_info', 'auto_captions', ...), expected_type=dict)
|
aweme_detail, ('interaction_stickers', ..., 'auto_video_caption_info', 'auto_captions', ...), expected_type=dict)
|
||||||
|
@ -278,8 +286,8 @@ class TikTokBaseIE(InfoExtractor):
|
||||||
if not caption.get('url'):
|
if not caption.get('url'):
|
||||||
continue
|
continue
|
||||||
subtitles.setdefault(caption.get('lang') or 'en', []).append({
|
subtitles.setdefault(caption.get('lang') or 'en', []).append({
|
||||||
'ext': remove_start(caption.get('caption_format'), 'web'),
|
|
||||||
'url': caption['url'],
|
'url': caption['url'],
|
||||||
|
'ext': EXT_MAP.get(caption.get('Format')),
|
||||||
})
|
})
|
||||||
# webpage subs
|
# webpage subs
|
||||||
if not subtitles:
|
if not subtitles:
|
||||||
|
@ -288,9 +296,14 @@ class TikTokBaseIE(InfoExtractor):
|
||||||
self._create_url(user_name, aweme_id), aweme_id, fatal=False)
|
self._create_url(user_name, aweme_id), aweme_id, fatal=False)
|
||||||
for caption in traverse_obj(aweme_detail, ('video', 'subtitleInfos', lambda _, v: v['Url'])):
|
for caption in traverse_obj(aweme_detail, ('video', 'subtitleInfos', lambda _, v: v['Url'])):
|
||||||
subtitles.setdefault(caption.get('LanguageCodeName') or 'en', []).append({
|
subtitles.setdefault(caption.get('LanguageCodeName') or 'en', []).append({
|
||||||
'ext': remove_start(caption.get('Format'), 'web'),
|
|
||||||
'url': caption['Url'],
|
'url': caption['Url'],
|
||||||
|
'ext': EXT_MAP.get(caption.get('Format')),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# Deprioritize creator_caption json since it can't be embedded or used by media players
|
||||||
|
for lang, subs_list in subtitles.items():
|
||||||
|
subtitles[lang] = sorted(subs_list, key=lambda x: preference(x['ext']))
|
||||||
|
|
||||||
return subtitles
|
return subtitles
|
||||||
|
|
||||||
def _parse_url_key(self, url_key):
|
def _parse_url_key(self, url_key):
|
||||||
|
|
|
@ -212,16 +212,6 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||||
owner = video_data.get('owner') or {}
|
owner = video_data.get('owner') or {}
|
||||||
video_uploader_url = owner.get('url')
|
video_uploader_url = owner.get('url')
|
||||||
|
|
||||||
duration = int_or_none(video_data.get('duration'))
|
|
||||||
chapter_data = try_get(config, lambda x: x['embed']['chapters']) or []
|
|
||||||
chapters = [{
|
|
||||||
'title': current_chapter.get('title'),
|
|
||||||
'start_time': current_chapter.get('timecode'),
|
|
||||||
'end_time': next_chapter.get('timecode'),
|
|
||||||
} for current_chapter, next_chapter in zip(chapter_data, chapter_data[1:] + [{'timecode': duration}])]
|
|
||||||
if chapters and chapters[0]['start_time']: # Chapters may not start from 0
|
|
||||||
chapters[:0] = [{'title': '<Untitled>', 'start_time': 0, 'end_time': chapters[0]['start_time']}]
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': str_or_none(video_data.get('id')) or video_id,
|
'id': str_or_none(video_data.get('id')) or video_id,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
|
@ -229,8 +219,12 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||||
'uploader_id': video_uploader_url.split('/')[-1] if video_uploader_url else None,
|
'uploader_id': video_uploader_url.split('/')[-1] if video_uploader_url else None,
|
||||||
'uploader_url': video_uploader_url,
|
'uploader_url': video_uploader_url,
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'duration': duration,
|
'duration': int_or_none(video_data.get('duration')),
|
||||||
'chapters': chapters or None,
|
'chapters': sorted(traverse_obj(config, (
|
||||||
|
'embed', 'chapters', lambda _, v: int(v['timecode']) is not None, {
|
||||||
|
'title': ('title', {str}),
|
||||||
|
'start_time': ('timecode', {int_or_none}),
|
||||||
|
})), key=lambda c: c['start_time']) or None,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'live_status': live_status,
|
'live_status': live_status,
|
||||||
|
@ -708,6 +702,39 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# chapters must be sorted, see: https://github.com/yt-dlp/yt-dlp/issues/5308
|
||||||
|
'url': 'https://player.vimeo.com/video/756714419',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '756714419',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Dr Arielle Schwartz - Therapeutic yoga for optimum sleep',
|
||||||
|
'uploader': 'Alex Howard',
|
||||||
|
'uploader_id': 'user54729178',
|
||||||
|
'uploader_url': 'https://vimeo.com/user54729178',
|
||||||
|
'thumbnail': r're:https://i\.vimeocdn\.com/video/1520099929-[\da-f]+-d_1280',
|
||||||
|
'duration': 2636,
|
||||||
|
'chapters': [
|
||||||
|
{'start_time': 0, 'end_time': 10, 'title': '<Untitled Chapter 1>'},
|
||||||
|
{'start_time': 10, 'end_time': 106, 'title': 'Welcoming Dr Arielle Schwartz'},
|
||||||
|
{'start_time': 106, 'end_time': 305, 'title': 'What is therapeutic yoga?'},
|
||||||
|
{'start_time': 305, 'end_time': 594, 'title': 'Vagal toning practices'},
|
||||||
|
{'start_time': 594, 'end_time': 888, 'title': 'Trauma and difficulty letting go'},
|
||||||
|
{'start_time': 888, 'end_time': 1059, 'title': "Dr Schwartz' insomnia experience"},
|
||||||
|
{'start_time': 1059, 'end_time': 1471, 'title': 'A strategy for helping sleep issues'},
|
||||||
|
{'start_time': 1471, 'end_time': 1667, 'title': 'Yoga nidra'},
|
||||||
|
{'start_time': 1667, 'end_time': 2121, 'title': 'Wisdom in stillness'},
|
||||||
|
{'start_time': 2121, 'end_time': 2386, 'title': 'What helps us be more able to let go?'},
|
||||||
|
{'start_time': 2386, 'end_time': 2510, 'title': 'Practical tips to help ourselves'},
|
||||||
|
{'start_time': 2510, 'end_time': 2636, 'title': 'Where to find out more'},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'http_headers': {'Referer': 'https://sleepsuperconference.com'},
|
||||||
|
'skip_download': 'm3u8',
|
||||||
|
},
|
||||||
|
'expected_warnings': ['Failed to parse XML: not well-formed'],
|
||||||
|
},
|
||||||
{
|
{
|
||||||
# user playlist alias -> https://vimeo.com/258705797
|
# user playlist alias -> https://vimeo.com/258705797
|
||||||
'url': 'https://vimeo.com/user26785108/newspiritualguide',
|
'url': 'https://vimeo.com/user26785108/newspiritualguide',
|
||||||
|
|
|
@ -3160,7 +3160,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
|
|
||||||
def _extract_n_function_name(self, jscode):
|
def _extract_n_function_name(self, jscode):
|
||||||
funcname, idx = self._search_regex(
|
funcname, idx = self._search_regex(
|
||||||
r'''(?x)(?:\.get\("n"\)\)&&\(b=|b=String\.fromCharCode\(110\),c=a\.get\(b\)\)&&\(c=)
|
r'''(?x)
|
||||||
|
(?:
|
||||||
|
\.get\("n"\)\)&&\(b=|
|
||||||
|
(?:
|
||||||
|
b=String\.fromCharCode\(110\)|
|
||||||
|
([a-zA-Z0-9$.]+)&&\(b="nn"\[\+\1\]
|
||||||
|
),c=a\.get\(b\)\)&&\(c=
|
||||||
|
)
|
||||||
(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)''',
|
(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)''',
|
||||||
jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
|
jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
|
||||||
if not idx:
|
if not idx:
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
# Autogenerated by devscripts/update-version.py
|
# Autogenerated by devscripts/update-version.py
|
||||||
|
|
||||||
__version__ = '2024.07.16'
|
__version__ = '2024.07.25'
|
||||||
|
|
||||||
RELEASE_GIT_HEAD = '89a161e8c62569a662deda1c948664152efcb6b4'
|
RELEASE_GIT_HEAD = 'f0993391e6052ec8f7aacc286609564f226943b9'
|
||||||
|
|
||||||
VARIANT = None
|
VARIANT = None
|
||||||
|
|
||||||
|
@ -12,4 +12,4 @@ CHANNEL = 'stable'
|
||||||
|
|
||||||
ORIGIN = 'yt-dlp/yt-dlp'
|
ORIGIN = 'yt-dlp/yt-dlp'
|
||||||
|
|
||||||
_pkg_version = '2024.07.16'
|
_pkg_version = '2024.07.25'
|
||||||
|
|
Loading…
Reference in a new issue