mirror of
https://github.com/yt-dlp/yt-dlp
synced 2025-01-01 06:21:09 +01:00
generic extraction
This commit is contained in:
parent
29a955d63d
commit
941c50b699
2 changed files with 23 additions and 10 deletions
|
@ -2393,7 +2393,7 @@ class GenericIE(InfoExtractor):
|
||||||
full_response = self._request_webpage(url, video_id, headers=filter_dict({
|
full_response = self._request_webpage(url, video_id, headers=filter_dict({
|
||||||
'Accept-Encoding': 'identity',
|
'Accept-Encoding': 'identity',
|
||||||
'Referer': smuggled_data.get('referer'),
|
'Referer': smuggled_data.get('referer'),
|
||||||
}), impersonate=impersonate)
|
}), impersonate=impersonate, expected_status=404)
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if not (isinstance(e.cause, HTTPError) and e.cause.status == 403
|
if not (isinstance(e.cause, HTTPError) and e.cause.status == 403
|
||||||
and e.cause.response.get_header('cf-mitigated') == 'challenge'
|
and e.cause.response.get_header('cf-mitigated') == 'challenge'
|
||||||
|
|
|
@ -93,6 +93,9 @@ class NiconicoChannelPlusBaseIE(InfoExtractor):
|
||||||
), ('data', 'fanclub_site', 'content_provider', 'age_limit', {int}))
|
), ('data', 'fanclub_site', 'content_provider', 'age_limit', {int}))
|
||||||
return self._CHANNEL_AGE_LIMIT[fanclub_site_id]
|
return self._CHANNEL_AGE_LIMIT[fanclub_site_id]
|
||||||
|
|
||||||
|
def _is_channel_plus_webpage(self, webpage):
|
||||||
|
return 'GTM-KXT7G5G' in webpage or 'NicoGoogleTagManagerDataLayer' in webpage
|
||||||
|
|
||||||
|
|
||||||
class NiconicoChannelPlusIE(NiconicoChannelPlusBaseIE):
|
class NiconicoChannelPlusIE(NiconicoChannelPlusBaseIE):
|
||||||
IE_NAME = 'NiconicoChannelPlus'
|
IE_NAME = 'NiconicoChannelPlus'
|
||||||
|
@ -159,22 +162,25 @@ class NiconicoChannelPlusIE(NiconicoChannelPlusBaseIE):
|
||||||
'skip': 'subscriber only',
|
'skip': 'subscriber only',
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _parse_video_id(self, url):
|
def _extract_from_webpage(self, url, webpage):
|
||||||
parsed = urllib.parse.urlparse(url)
|
if self._match_video_id(url) and self._is_channel_plus_webpage(webpage):
|
||||||
return re.search(r'/(?:video|live)/(?P<id>\w+)', parsed.path)[1]
|
yield self._real_extract(url)
|
||||||
|
|
||||||
|
def _match_video_id(self, url):
|
||||||
|
return re.search(r'/(?:video|audio|live)/(?P<id>sm\w+)', urllib.parse.urlparse(url).path)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._parse_video_id(url)
|
video_id = self._match_video_id(url).group('id')
|
||||||
|
|
||||||
video_info = self._download_api_json(url, f'/video_pages/{video_id}', video_id,
|
video_info = self._download_api_json(url, f'/video_pages/{video_id}', video_id,
|
||||||
note='Downloading video info')['data']['video_page']
|
note='Downloading video info')['data']['video_page']
|
||||||
|
|
||||||
live_status, session_payload, timestamp = self._parse_live_status(video_id, video_info)
|
live_status, session_payload, timestamp = self._parse_live_status(video_id, video_info)
|
||||||
session_info = self._download_api_json(
|
session_id = self._download_api_json(
|
||||||
url, f'/video_pages/{video_id}/session_ids', video_id, data=json.dumps(session_payload).encode(),
|
url, f'/video_pages/{video_id}/session_ids', video_id, data=json.dumps(session_payload).encode(),
|
||||||
headers={'content-type': 'application/json'}, note='Downloading video session')['data']
|
headers={'content-type': 'application/json'}, note='Downloading video session')['data']['session_id']
|
||||||
formats = self._extract_m3u8_formats(
|
formats = self._extract_m3u8_formats(
|
||||||
video_info['video_stream']['authenticated_url'].format(**session_info), video_id)
|
video_info['video_stream']['authenticated_url'].format(session_id=session_id), video_id)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -291,8 +297,7 @@ class NiconicoChannelPlusChannelBaseIE(NiconicoChannelPlusBaseIE):
|
||||||
|
|
||||||
for content_code in traverse_obj(response, ('data', 'video_pages', 'list', ..., 'content_code')):
|
for content_code in traverse_obj(response, ('data', 'video_pages', 'list', ..., 'content_code')):
|
||||||
# "video/{content_code}" works for both VOD and live, but "live/{content_code}" doesn't work for VOD
|
# "video/{content_code}" works for both VOD and live, but "live/{content_code}" doesn't work for VOD
|
||||||
yield self.url_result(
|
yield self.url_result(f'{self._get_channel_url(site_url)}/video/{content_code}')
|
||||||
f'{self._get_channel_url(site_url)}/video/{content_code}', NiconicoChannelPlusIE)
|
|
||||||
|
|
||||||
|
|
||||||
class NiconicoChannelPlusChannelVideosIE(NiconicoChannelPlusChannelBaseIE):
|
class NiconicoChannelPlusChannelVideosIE(NiconicoChannelPlusChannelBaseIE):
|
||||||
|
@ -381,6 +386,10 @@ class NiconicoChannelPlusChannelVideosIE(NiconicoChannelPlusChannelBaseIE):
|
||||||
'playlist_mincount': 6,
|
'playlist_mincount': 6,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _extract_from_webpage(self, url, webpage):
|
||||||
|
if re.search(r'/videos/?(?:[\?#]|$)', url) and self._is_channel_plus_webpage(webpage):
|
||||||
|
yield self._real_extract(url)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
"""
|
"""
|
||||||
API parameters:
|
API parameters:
|
||||||
|
@ -444,6 +453,10 @@ class NiconicoChannelPlusChannelLivesIE(NiconicoChannelPlusChannelBaseIE):
|
||||||
'playlist_mincount': 6,
|
'playlist_mincount': 6,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _extract_from_webpage(self, url, webpage):
|
||||||
|
if re.search(r'/lives/?(?:[\?#]|$)', url) and self._is_channel_plus_webpage(webpage):
|
||||||
|
yield self._real_extract(url)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
"""
|
"""
|
||||||
API parameters:
|
API parameters:
|
||||||
|
|
Loading…
Reference in a new issue