From 0743fbd6e9cdd0fa4269705e46baa09d52c40530 Mon Sep 17 00:00:00 2001
From: Mozi <29089388+pzhlkj6612@users.noreply.github.com>
Date: Thu, 5 Sep 2024 16:50:54 +0000
Subject: [PATCH] [ie/espn] Add subtitles extraction; fix clip regex in
 articles

No video is extractable from any "only_matching" url in "ESPNArticleIE".
---
 yt_dlp/extractor/espn.py | 36 +++++++++++++++++++++++++-----------
 1 file changed, 25 insertions(+), 11 deletions(-)
diff --git a/yt_dlp/extractor/espn.py b/yt_dlp/extractor/espn.py
index 4e9b63524e..08e8e5be31 100644
--- a/yt_dlp/extractor/espn.py
+++ b/yt_dlp/extractor/espn.py
@@ -113,6 +113,7 @@ class ESPNIE(OnceIE):
 
         format_urls = set()
         formats = []
+        subtitles = {}
 
         def traverse_source(source, base_source_id=None):
             for src_id, src_item in source.items():
@@ -140,9 +141,11 @@ class ESPNIE(OnceIE):
                 formats.extend(self._extract_f4m_formats(
                     source_url, video_id, f4m_id=source_id, fatal=False))
             elif ext == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(
+                m3u8_frmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
                     source_url, video_id, 'mp4', entry_protocol='m3u8_native',
-                    m3u8_id=source_id, fatal=False))
+                    m3u8_id=source_id, fatal=False)
+                formats.extend(m3u8_frmts)
+                self._merge_subtitles(m3u8_subs, target=subtitles)
             else:
                 f = {
                     'url': source_url,
@@ -176,12 +179,26 @@ class ESPNIE(OnceIE):
             'timestamp': timestamp,
             'duration': duration,
             'formats': formats,
+            'subtitles': subtitles,
         }
 
 
 class ESPNArticleIE(InfoExtractor):
     _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)'
     _TESTS = [{
+        'url': 'https://www.espn.com/college-football/game/_/gameId/401520427',
+        'info_dict': {
+            'id': '401520427',
+            'title': 'Alabama 27-24 Auburn (Nov 25, 2023) Final Score - ESPN',
+            'description': 'Game summary of the Alabama Crimson Tide vs. Auburn Tigers NCAAF game, final score 27-24, from November 25, 2023 on ESPN.',
+            'entries': [{
+                'id': '38979520',
+            }, {
+                'id': '38981707',
+            }],
+        },
+        'playlist_count': 2,
+    }, {
         'url': 'http://espn.go.com/nba/recap?gameId=400793786',
         'only_matching': True,
     }, {
@@ -200,16 +217,13 @@ class ESPNArticleIE(InfoExtractor):
         return False if (ESPNIE.suitable(url) or WatchESPNIE.suitable(url)) else super().suitable(url)
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        playlist_id = self._match_id(url)
+        webpage = self._download_webpage(url, playlist_id)
 
-        webpage = self._download_webpage(url, video_id)
-
-        video_id = self._search_regex(
-            r'class=(["\']).*?video-play-button.*?\1[^>]+data-id=["\'](?P<id>\d+)',
-            webpage, 'video id', group='id')
-
-        return self.url_result(
-            f'http://espn.go.com/video/clip?id={video_id}', ESPNIE.ie_key())
+        return self.playlist_result(traverse_obj(re.finditer(
+            r'class=(["\']).*?Media.*?\1[^>]+data-videoid=["\'](?P<id>\d+)', webpage), (..., 'id', {
+                lambda x: self.url_result(f'http://espn.go.com/video/clip?id={x}', ESPNIE.ie_key(), x),
+            })), playlist_id, self._html_extract_title(webpage), self._html_search_meta('description', webpage))
 
 
 class FiveThirtyEightIE(InfoExtractor):