mirror of
https://github.com/yt-dlp/yt-dlp
synced 2024-12-28 22:24:34 +01:00
[youtube:channel] Fix extraction (fixes #4435)
It uses now the same pagination system as playlists
This commit is contained in:
parent
baa7081d68
commit
23d3608c6b
1 changed files with 13 additions and 8 deletions
|
@ -1269,8 +1269,6 @@ class YoutubeTopListIE(YoutubePlaylistIE):
|
|||
class YoutubeChannelIE(InfoExtractor):
|
||||
IE_DESC = 'YouTube.com channels'
|
||||
_VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
|
||||
_MORE_PAGES_INDICATOR = 'yt-uix-load-more'
|
||||
_MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'
|
||||
IE_NAME = 'youtube:channel'
|
||||
_TESTS = [{
|
||||
'note': 'paginated channel',
|
||||
|
@ -1307,20 +1305,27 @@ class YoutubeChannelIE(InfoExtractor):
|
|||
return self.playlist_result(entries, channel_id)
|
||||
|
||||
def _entries():
|
||||
more_widget_html = content_html = channel_page
|
||||
for pagenum in itertools.count(1):
|
||||
url = self._MORE_PAGES_URL % (pagenum, channel_id)
|
||||
page = self._download_json(
|
||||
url, channel_id, note='Downloading page #%s' % pagenum,
|
||||
transform_source=uppercase_escape)
|
||||
|
||||
ids_in_page = self.extract_videos_from_page(page['content_html'])
|
||||
ids_in_page = self.extract_videos_from_page(content_html)
|
||||
for video_id in ids_in_page:
|
||||
yield self.url_result(
|
||||
video_id, 'Youtube', video_id=video_id)
|
||||
|
||||
if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
|
||||
mobj = re.search(
|
||||
r'data-uix-load-more-href="/?(?P<more>[^"]+)"',
|
||||
more_widget_html)
|
||||
if not mobj:
|
||||
break
|
||||
|
||||
more = self._download_json(
|
||||
'https://youtube.com/%s' % mobj.group('more'), channel_id,
|
||||
'Downloading page #%s' % (pagenum + 1),
|
||||
transform_source=uppercase_escape)
|
||||
content_html = more['content_html']
|
||||
more_widget_html = more['load_more_widget_html']
|
||||
|
||||
return self.playlist_result(_entries(), channel_id)
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue