mirror of
https://github.com/yt-dlp/yt-dlp
synced 2025-01-13 20:01:57 +01:00
parent
cb96c5be70
commit
f5f15c9993
1 changed files with 45 additions and 22 deletions
|
@ -15,6 +15,7 @@ from ..compat import (
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
filter_dict,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
|
@ -755,15 +756,21 @@ class BiliIntlBaseIE(InfoExtractor):
|
||||||
for i, line in enumerate(json['body']) if line.get('content'))
|
for i, line in enumerate(json['body']) if line.get('content'))
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def _get_subtitles(self, ep_id):
|
def _get_subtitles(self, *, ep_id=None, aid=None):
|
||||||
sub_json = self._call_api(f'/web/v2/subtitle?episode_id={ep_id}&platform=web', ep_id)
|
sub_json = self._call_api(
|
||||||
|
'/web/v2/subtitle', ep_id or aid, note='Downloading subtitles list',
|
||||||
|
errnote='Unable to download subtitles list', query=filter_dict({
|
||||||
|
'platform': 'web',
|
||||||
|
'episode_id': ep_id,
|
||||||
|
'aid': aid,
|
||||||
|
}))
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
for sub in sub_json.get('subtitles') or []:
|
for sub in sub_json.get('subtitles') or []:
|
||||||
sub_url = sub.get('url')
|
sub_url = sub.get('url')
|
||||||
if not sub_url:
|
if not sub_url:
|
||||||
continue
|
continue
|
||||||
sub_data = self._download_json(
|
sub_data = self._download_json(
|
||||||
sub_url, ep_id, errnote='Unable to download subtitles', fatal=False,
|
sub_url, ep_id or aid, errnote='Unable to download subtitles', fatal=False,
|
||||||
note='Downloading subtitles%s' % f' for {sub["lang"]}' if sub.get('lang') else '')
|
note='Downloading subtitles%s' % f' for {sub["lang"]}' if sub.get('lang') else '')
|
||||||
if not sub_data:
|
if not sub_data:
|
||||||
continue
|
continue
|
||||||
|
@ -773,9 +780,14 @@ class BiliIntlBaseIE(InfoExtractor):
|
||||||
})
|
})
|
||||||
return subtitles
|
return subtitles
|
||||||
|
|
||||||
def _get_formats(self, ep_id):
|
def _get_formats(self, *, ep_id=None, aid=None):
|
||||||
video_json = self._call_api(f'/web/playurl?ep_id={ep_id}&platform=web', ep_id,
|
video_json = self._call_api(
|
||||||
note='Downloading video formats', errnote='Unable to download video formats')
|
'/web/playurl', ep_id or aid, note='Downloading video formats',
|
||||||
|
errnote='Unable to download video formats', query=filter_dict({
|
||||||
|
'platform': 'web',
|
||||||
|
'ep_id': ep_id,
|
||||||
|
'aid': aid,
|
||||||
|
}))
|
||||||
video_json = video_json['playurl']
|
video_json = video_json['playurl']
|
||||||
formats = []
|
formats = []
|
||||||
for vid in video_json.get('video') or []:
|
for vid in video_json.get('video') or []:
|
||||||
|
@ -809,15 +821,15 @@ class BiliIntlBaseIE(InfoExtractor):
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
def _extract_ep_info(self, episode_data, ep_id):
|
def _extract_video_info(self, video_data, *, ep_id=None, aid=None):
|
||||||
return {
|
return {
|
||||||
'id': ep_id,
|
'id': ep_id or aid,
|
||||||
'title': episode_data.get('title_display') or episode_data['title'],
|
'title': video_data.get('title_display') or video_data.get('title'),
|
||||||
'thumbnail': episode_data.get('cover'),
|
'thumbnail': video_data.get('cover'),
|
||||||
'episode_number': int_or_none(self._search_regex(
|
'episode_number': int_or_none(self._search_regex(
|
||||||
r'^E(\d+)(?:$| - )', episode_data.get('title_display'), 'episode number', default=None)),
|
r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)),
|
||||||
'formats': self._get_formats(ep_id),
|
'formats': self._get_formats(ep_id=ep_id, aid=aid),
|
||||||
'subtitles': self._get_subtitles(ep_id),
|
'subtitles': self._get_subtitles(ep_id=ep_id, aid=aid),
|
||||||
'extractor_key': BiliIntlIE.ie_key(),
|
'extractor_key': BiliIntlIE.ie_key(),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -854,7 +866,7 @@ class BiliIntlBaseIE(InfoExtractor):
|
||||||
|
|
||||||
|
|
||||||
class BiliIntlIE(BiliIntlBaseIE):
|
class BiliIntlIE(BiliIntlBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-z]{2}/)?play/(?P<season_id>\d+)/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-z]{2}/)?(play/(?P<season_id>\d+)/(?P<ep_id>\d+)|video/(?P<aid>\d+))'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# Bstation page
|
# Bstation page
|
||||||
'url': 'https://www.bilibili.tv/en/play/34613/341736',
|
'url': 'https://www.bilibili.tv/en/play/34613/341736',
|
||||||
|
@ -889,24 +901,35 @@ class BiliIntlIE(BiliIntlBaseIE):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.biliintl.com/en/play/34613/341736',
|
'url': 'https://www.biliintl.com/en/play/34613/341736',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# User-generated content (as opposed to a series licensed from a studio)
|
||||||
|
'url': 'https://bilibili.tv/en/video/2019955076',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# No language in URL
|
||||||
|
'url': 'https://www.bilibili.tv/video/2019955076',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
season_id, video_id = self._match_valid_url(url).groups()
|
season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')
|
||||||
|
video_id = ep_id or aid
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
# Bstation layout
|
# Bstation layout
|
||||||
initial_data = self._parse_json(self._search_regex(
|
initial_data = self._parse_json(self._search_regex(
|
||||||
r'window\.__INITIAL_DATA__\s*=\s*({.+?});', webpage,
|
r'window\.__INITIAL_(?:DATA|STATE)__\s*=\s*({.+?});', webpage,
|
||||||
'preload state', default='{}'), video_id, fatal=False) or {}
|
'preload state', default='{}'), video_id, fatal=False) or {}
|
||||||
episode_data = traverse_obj(initial_data, ('OgvVideo', 'epDetail'), expected_type=dict)
|
video_data = (
|
||||||
|
traverse_obj(initial_data, ('OgvVideo', 'epDetail'), expected_type=dict)
|
||||||
|
or traverse_obj(initial_data, ('UgcVideo', 'videoData'), expected_type=dict) or {})
|
||||||
|
|
||||||
if not episode_data:
|
if season_id and not video_data:
|
||||||
# Non-Bstation layout, read through episode list
|
# Non-Bstation layout, read through episode list
|
||||||
season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id)
|
season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id)
|
||||||
episode_data = next(
|
video_data = next(
|
||||||
episode for episode in traverse_obj(season_json, ('sections', ..., 'episodes', ...), expected_type=dict)
|
episode for episode in traverse_obj(season_json, ('sections', ..., 'episodes', ...), expected_type=dict)
|
||||||
if str(episode.get('episode_id')) == video_id)
|
if str(episode.get('episode_id')) == ep_id)
|
||||||
return self._extract_ep_info(episode_data, video_id)
|
return self._extract_video_info(video_data, ep_id=ep_id, aid=aid)
|
||||||
|
|
||||||
|
|
||||||
class BiliIntlSeriesIE(BiliIntlBaseIE):
|
class BiliIntlSeriesIE(BiliIntlBaseIE):
|
||||||
|
@ -934,7 +957,7 @@ class BiliIntlSeriesIE(BiliIntlBaseIE):
|
||||||
series_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id)
|
series_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id)
|
||||||
for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict, default=[]):
|
for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict, default=[]):
|
||||||
episode_id = str(episode.get('episode_id'))
|
episode_id = str(episode.get('episode_id'))
|
||||||
yield self._extract_ep_info(episode, episode_id)
|
yield self._extract_video_info(episode, ep_id=episode_id)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
series_id = self._match_id(url)
|
series_id = self._match_id(url)
|
||||||
|
|
Loading…
Reference in a new issue