improve list extract

2024-12-27 21:59:17 +01:00 · 2024-12-10 23:33:41 -05:00 · 2024-12-10 23:33:41 -05:00 · 1d77424989
commit 1d77424989
parent 941c50b699
1 changed files with 20 additions and 6 deletions
--- a/yt_dlp/extractor/niconicochannelplus.py
+++ b/yt_dlp/extractor/niconicochannelplus.py
@ -23,12 +23,18 @@ class NiconicoChannelPlusBaseIE(InfoExtractor):
    _DOMAIN_SITE_ID = {}
    _CHANNEL_NAMES = {}
    _CHANNEL_AGE_LIMIT = {}
+    _SUITABLE_DOMAINS = set()

    def _get_settings(self, url, video_id=None):
        base_url = urljoin(url, '/')
        if base_url not in self._SITE_SETTINGS:
-            self._SITE_SETTINGS[base_url] = self._download_json(
+            site_settings = self._download_json(
                urljoin(base_url, '/site/settings.json'), video_id, note='Downloading site settings')
+            if 'api_base_url' not in site_settings or 'fanclub_site_id' not in site_settings:
+                raise ExtractorError('Unable to get site settings')
+            self._SITE_SETTINGS[base_url] = site_settings
+        self._SUITABLE_DOMAINS.add(urllib.parse.urlparse(url).netloc)
+
        if self._SITE_SETTINGS[base_url].get('platform_id') not in ['CHPL', 'SHTA', 'JOQR', 'TKFM']:
            self.report_warning(f'Unknown platform type: {self._SITE_SETTINGS[base_url].get("platform_id")}')
        return self._SITE_SETTINGS[base_url]
@ -162,13 +168,19 @@ class NiconicoChannelPlusIE(NiconicoChannelPlusBaseIE):
        'skip': 'subscriber only',
    }]

+    @staticmethod
+    def _match_video_id(url):
+        return re.search(r'/(?:video|audio|live)/(?P<id>sm\w+)', urllib.parse.urlparse(url).path)
+
+    @classmethod
+    def suitable(cls, url):
+        return super().suitable(url) or (
+            urllib.parse.urlparse(url).netloc in cls._SUITABLE_DOMAINS and cls._match_video_id(url))
+
    def _extract_from_webpage(self, url, webpage):
        if self._match_video_id(url) and self._is_channel_plus_webpage(webpage):
            yield self._real_extract(url)

-    def _match_video_id(self, url):
-        return re.search(r'/(?:video|audio|live)/(?P<id>sm\w+)', urllib.parse.urlparse(url).path)
-
    def _real_extract(self, url):
        video_id = self._match_video_id(url).group('id')

@ -295,9 +307,11 @@ class NiconicoChannelPlusChannelBaseIE(NiconicoChannelPlusBaseIE):
            note=f'Getting channel info (page {page + 1})',
            errnote=f'Unable to get channel info (page {page + 1})')

-        for content_code in traverse_obj(response, ('data', 'video_pages', 'list', ..., 'content_code')):
+        for entry in traverse_obj(response, ('data', 'video_pages', 'list', lambda _, v: v['content_code'])):
            # "video/{content_code}" works for both VOD and live, but "live/{content_code}" doesn't work for VOD
-            yield self.url_result(f'{self._get_channel_url(site_url)}/video/{content_code}')
+            yield self.url_result(
+                f'{self._get_channel_url(site_url)}/video/{entry["content_code"]}', ie=NiconicoChannelPlusIE,
+                video_id=entry['content_code'], video_title=entry.get('title'))


 class NiconicoChannelPlusChannelVideosIE(NiconicoChannelPlusChannelBaseIE):