mirror of
https://github.com/yt-dlp/yt-dlp
synced 2024-12-26 21:59:08 +01:00
[ie/espn] Add subtitles extraction; fix clip regex in articles
No video is extractable from any "only_matching" url in "ESPNArticleIE".
This commit is contained in:
parent
e8e6a982a1
commit
0743fbd6e9
1 changed files with 25 additions and 11 deletions
|
@ -113,6 +113,7 @@ class ESPNIE(OnceIE):
|
||||||
|
|
||||||
format_urls = set()
|
format_urls = set()
|
||||||
formats = []
|
formats = []
|
||||||
|
subtitles = {}
|
||||||
|
|
||||||
def traverse_source(source, base_source_id=None):
|
def traverse_source(source, base_source_id=None):
|
||||||
for src_id, src_item in source.items():
|
for src_id, src_item in source.items():
|
||||||
|
@ -140,9 +141,11 @@ class ESPNIE(OnceIE):
|
||||||
formats.extend(self._extract_f4m_formats(
|
formats.extend(self._extract_f4m_formats(
|
||||||
source_url, video_id, f4m_id=source_id, fatal=False))
|
source_url, video_id, f4m_id=source_id, fatal=False))
|
||||||
elif ext == 'm3u8':
|
elif ext == 'm3u8':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
m3u8_frmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
|
||||||
source_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
source_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
m3u8_id=source_id, fatal=False))
|
m3u8_id=source_id, fatal=False)
|
||||||
|
formats.extend(m3u8_frmts)
|
||||||
|
self._merge_subtitles(m3u8_subs, target=subtitles)
|
||||||
else:
|
else:
|
||||||
f = {
|
f = {
|
||||||
'url': source_url,
|
'url': source_url,
|
||||||
|
@ -176,12 +179,26 @@ class ESPNIE(OnceIE):
|
||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class ESPNArticleIE(InfoExtractor):
|
class ESPNArticleIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)'
|
_VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
'url': 'https://www.espn.com/college-football/game/_/gameId/401520427',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '401520427',
|
||||||
|
'title': 'Alabama 27-24 Auburn (Nov 25, 2023) Final Score - ESPN',
|
||||||
|
'description': 'Game summary of the Alabama Crimson Tide vs. Auburn Tigers NCAAF game, final score 27-24, from November 25, 2023 on ESPN.',
|
||||||
|
'entries': [{
|
||||||
|
'id': '38979520',
|
||||||
|
}, {
|
||||||
|
'id': '38981707',
|
||||||
|
}],
|
||||||
|
},
|
||||||
|
'playlist_count': 2,
|
||||||
|
}, {
|
||||||
'url': 'http://espn.go.com/nba/recap?gameId=400793786',
|
'url': 'http://espn.go.com/nba/recap?gameId=400793786',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
|
@ -200,16 +217,13 @@ class ESPNArticleIE(InfoExtractor):
|
||||||
return False if (ESPNIE.suitable(url) or WatchESPNIE.suitable(url)) else super().suitable(url)
|
return False if (ESPNIE.suitable(url) or WatchESPNIE.suitable(url)) else super().suitable(url)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
playlist_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
return self.playlist_result(traverse_obj(re.finditer(
|
||||||
|
r'class=(["\']).*?Media.*?\1[^>]+data-videoid=["\'](?P<id>\d+)', webpage), (..., 'id', {
|
||||||
video_id = self._search_regex(
|
lambda x: self.url_result(f'http://espn.go.com/video/clip?id={x}', ESPNIE.ie_key(), x),
|
||||||
r'class=(["\']).*?video-play-button.*?\1[^>]+data-id=["\'](?P<id>\d+)',
|
})), playlist_id, self._html_extract_title(webpage), self._html_search_meta('description', webpage))
|
||||||
webpage, 'video id', group='id')
|
|
||||||
|
|
||||||
return self.url_result(
|
|
||||||
f'http://espn.go.com/video/clip?id={video_id}', ESPNIE.ie_key())
|
|
||||||
|
|
||||||
|
|
||||||
class FiveThirtyEightIE(InfoExtractor):
|
class FiveThirtyEightIE(InfoExtractor):
|
||||||
|
|
Loading…
Reference in a new issue