From 1d77424989f22d8369eccc019c4a32819fd63776 Mon Sep 17 00:00:00 2001 From: c-basalt <117849907+c-basalt@users.noreply.github.com> Date: Tue, 10 Dec 2024 23:33:41 -0500 Subject: [PATCH] improve list extract --- yt_dlp/extractor/niconicochannelplus.py | 26 +++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/niconicochannelplus.py b/yt_dlp/extractor/niconicochannelplus.py index e5041ec035..c8699eebac 100644 --- a/yt_dlp/extractor/niconicochannelplus.py +++ b/yt_dlp/extractor/niconicochannelplus.py @@ -23,12 +23,18 @@ class NiconicoChannelPlusBaseIE(InfoExtractor): _DOMAIN_SITE_ID = {} _CHANNEL_NAMES = {} _CHANNEL_AGE_LIMIT = {} + _SUITABLE_DOMAINS = set() def _get_settings(self, url, video_id=None): base_url = urljoin(url, '/') if base_url not in self._SITE_SETTINGS: - self._SITE_SETTINGS[base_url] = self._download_json( + site_settings = self._download_json( urljoin(base_url, '/site/settings.json'), video_id, note='Downloading site settings') + if 'api_base_url' not in site_settings or 'fanclub_site_id' not in site_settings: + raise ExtractorError('Unable to get site settings') + self._SITE_SETTINGS[base_url] = site_settings + self._SUITABLE_DOMAINS.add(urllib.parse.urlparse(url).netloc) + if self._SITE_SETTINGS[base_url].get('platform_id') not in ['CHPL', 'SHTA', 'JOQR', 'TKFM']: self.report_warning(f'Unknown platform type: {self._SITE_SETTINGS[base_url].get("platform_id")}') return self._SITE_SETTINGS[base_url] @@ -162,13 +168,19 @@ class NiconicoChannelPlusIE(NiconicoChannelPlusBaseIE): 'skip': 'subscriber only', }] + @staticmethod + def _match_video_id(url): + return re.search(r'/(?:video|audio|live)/(?Psm\w+)', urllib.parse.urlparse(url).path) + + @classmethod + def suitable(cls, url): + return super().suitable(url) or ( + urllib.parse.urlparse(url).netloc in cls._SUITABLE_DOMAINS and cls._match_video_id(url)) + def _extract_from_webpage(self, url, webpage): if self._match_video_id(url) and self._is_channel_plus_webpage(webpage): yield self._real_extract(url) - def _match_video_id(self, url): - return re.search(r'/(?:video|audio|live)/(?Psm\w+)', urllib.parse.urlparse(url).path) - def _real_extract(self, url): video_id = self._match_video_id(url).group('id') @@ -295,9 +307,11 @@ class NiconicoChannelPlusChannelBaseIE(NiconicoChannelPlusBaseIE): note=f'Getting channel info (page {page + 1})', errnote=f'Unable to get channel info (page {page + 1})') - for content_code in traverse_obj(response, ('data', 'video_pages', 'list', ..., 'content_code')): + for entry in traverse_obj(response, ('data', 'video_pages', 'list', lambda _, v: v['content_code'])): # "video/{content_code}" works for both VOD and live, but "live/{content_code}" doesn't work for VOD - yield self.url_result(f'{self._get_channel_url(site_url)}/video/{content_code}') + yield self.url_result( + f'{self._get_channel_url(site_url)}/video/{entry["content_code"]}', ie=NiconicoChannelPlusIE, + video_id=entry['content_code'], video_title=entry.get('title')) class NiconicoChannelPlusChannelVideosIE(NiconicoChannelPlusChannelBaseIE):