mirror of
https://github.com/yt-dlp/yt-dlp
synced 2024-12-28 22:24:34 +01:00
Refactored ZenYandexIE to parse metadata from var _params
Refactored ZenYandexIE to parse metadata from `var _params`, updated MPD/M3U8 stream detection, and adjusted title/description/thumbnail extraction to use new SSR response fields.
This commit is contained in:
parent
3905f64920
commit
4c5482d570
1 changed files with 21 additions and 13 deletions
|
@ -258,34 +258,42 @@ class ZenYandexIE(InfoExtractor):
|
||||||
video_id = self._match_id(redirect)
|
video_id = self._match_id(redirect)
|
||||||
webpage = self._download_webpage(redirect, video_id, note='Redirecting')
|
webpage = self._download_webpage(redirect, video_id, note='Redirecting')
|
||||||
data_json = self._search_json(
|
data_json = self._search_json(
|
||||||
r'("data"\s*:|data\s*=)', webpage, 'metadata', video_id, contains_pattern=r'{["\']_*serverState_*video.+}')
|
r'var _params=\(', webpage, 'metadata', video_id, contains_pattern=r'{.+streamsResponse.+}')
|
||||||
serverstate = self._search_regex(r'(_+serverState_+video-site_[^_]+_+)', webpage, 'server state')
|
|
||||||
uploader = self._search_regex(r'(<a\s*class=["\']card-channel-link[^"\']+["\'][^>]+>)',
|
uploader = self._search_regex(r'(<a\s*class=["\']card-channel-link[^"\']+["\'][^>]+>)',
|
||||||
webpage, 'uploader', default='<a>')
|
webpage, 'uploader', default='<a>')
|
||||||
uploader_name = extract_attributes(uploader).get('aria-label')
|
uploader_name = extract_attributes(uploader).get('aria-label')
|
||||||
item_id = traverse_obj(data_json, (serverstate, 'videoViewer', 'openedItemId', {str}))
|
video_meta = traverse_obj(data_json, ('ssrData', 'videoMetaResponse', {dict}))
|
||||||
video_json = traverse_obj(data_json, (serverstate, 'videoViewer', 'items', item_id, {dict})) or {}
|
video_meta2 = traverse_obj(video_meta, ('video', {dict}))
|
||||||
|
m3u8_url = traverse_obj(video_meta2, ('id', {str}))
|
||||||
|
streams = traverse_obj(data_json, ('ssrData', 'streamsResponse', 'SingleStream', 0, {dict}))
|
||||||
|
|
||||||
formats, subtitles = [], {}
|
formats, subtitles = [], {}
|
||||||
for s_url in traverse_obj(video_json, ('video', 'streams', ..., {url_or_none})):
|
for s_url in traverse_obj(streams, ('StreamInfo', ..., "OutputStream", {url_or_none})):
|
||||||
ext = determine_ext(s_url)
|
ext = determine_ext(s_url)
|
||||||
|
|
||||||
if ext == 'mpd':
|
if ext == 'mpd':
|
||||||
fmts, subs = self._extract_mpd_formats_and_subtitles(s_url, video_id, mpd_id='dash')
|
fmts, subs = self._extract_mpd_formats_and_subtitles(s_url, video_id, mpd_id='dash')
|
||||||
elif ext == 'm3u8':
|
elif ext == 'm3u8':
|
||||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(s_url, video_id, 'mp4')
|
fmts, subs = self._extract_m3u8_formats_and_subtitles(s_url, video_id, 'mp4')
|
||||||
|
else:
|
||||||
|
fmts = [{'url': s_url}]
|
||||||
|
subs = {}
|
||||||
|
|
||||||
formats.extend(fmts)
|
formats.extend(fmts)
|
||||||
subtitles = self._merge_subtitles(subtitles, subs)
|
subtitles = self._merge_subtitles(subtitles, subs)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_json.get('title') or self._og_search_title(webpage),
|
'title': video_meta.get('title') or self._og_search_title(webpage),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'duration': int_or_none(video_json.get('duration')),
|
'duration': int_or_none(video_meta2.get('duration')),
|
||||||
'view_count': int_or_none(video_json.get('views')),
|
'view_count': int_or_none(video_meta2.get('views')),
|
||||||
'timestamp': int_or_none(video_json.get('publicationDate')),
|
'timestamp': int_or_none(video_meta.get('publicationDate')),
|
||||||
'uploader': uploader_name or data_json.get('authorName') or try_get(data_json, lambda x: x['publisher']['name']),
|
'uploader': uploader_name or try_get(video_meta, lambda x: x['source']['title']),
|
||||||
'description': video_json.get('description') or self._og_search_description(webpage),
|
'description': video_meta.get('description') or self._og_search_description(webpage),
|
||||||
'thumbnail': self._og_search_thumbnail(webpage) or try_get(data_json, lambda x: x['og']['imageUrl']),
|
'thumbnail': self._og_search_thumbnail(webpage) or video_meta.get('image'),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue