[funimation] Extract subtitles (#434)

Closes #420, https://github.com/ytdl-org/youtube-dl/issues/25645
Related: https://github.com/ytdl-org/youtube-dl/pull/24906

Authored by: Mevious
This commit is contained in:
Mevious 2021-06-22 18:57:53 -07:00 committed by GitHub
parent 9fc0de5796
commit 29f63c9672
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -10,8 +10,9 @@ from ..utils import (
determine_ext,
int_or_none,
js_to_json,
urlencode_postdata,
urljoin,
ExtractorError,
urlencode_postdata
)
@ -109,6 +110,7 @@ class FunimationIE(InfoExtractor):
if series:
title = '%s - %s' % (series, title)
description = self._html_search_meta(['description', 'og:description'], webpage, fatal=True)
subtitles = self.extract_subtitles(url, video_id, display_id)
try:
headers = {}
@ -153,6 +155,24 @@ class FunimationIE(InfoExtractor):
'season_number': int_or_none(title_data.get('seasonNum') or _search_kane('season')),
'episode_number': int_or_none(title_data.get('episodeNum')),
'episode': episode,
'subtitles': subtitles,
'season_id': title_data.get('seriesId'),
'formats': formats,
}
def _get_subtitles(self, url, video_id, display_id):
player_url = urljoin(url, '/player/' + video_id)
player_page = self._download_webpage(player_url, display_id)
text_tracks_json_string = self._search_regex(
r'"textTracks": (\[{.+?}\])',
player_page, 'subtitles data', default='')
text_tracks = self._parse_json(
text_tracks_json_string, display_id, js_to_json, fatal=False) or []
subtitles = {}
for text_track in text_tracks:
url_element = {'url': text_track.get('src')}
language = text_track.get('language')
if text_track.get('type') == 'CC':
language += '_CC'
subtitles.setdefault(language, []).append(url_element)
return subtitles