[soundcloud] Make playlist extraction lazy

This commit is contained in:
pukkandan 2021-09-05 10:26:46 +05:30
parent d98b006b85
commit aa6c25309a

View file

@ -14,7 +14,6 @@ from ..compat import (
compat_HTTPError, compat_HTTPError,
compat_kwargs, compat_kwargs,
compat_str, compat_str,
compat_urlparse,
) )
from ..utils import ( from ..utils import (
error_to_compat_str, error_to_compat_str,
@ -24,6 +23,7 @@ from ..utils import (
int_or_none, int_or_none,
KNOWN_EXTENSIONS, KNOWN_EXTENSIONS,
mimetype2ext, mimetype2ext,
parse_qs,
str_or_none, str_or_none,
try_get, try_get,
unified_timestamp, unified_timestamp,
@ -49,8 +49,7 @@ class SoundcloudEmbedIE(InfoExtractor):
webpage)] webpage)]
def _real_extract(self, url): def _real_extract(self, url):
query = compat_urlparse.parse_qs( query = parse_qs(url)
compat_urlparse.urlparse(url).query)
api_url = query['url'][0] api_url = query['url'][0]
secret_token = query.get('secret_token') secret_token = query.get('secret_token')
if secret_token: if secret_token:
@ -656,64 +655,46 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
class SoundcloudPagedPlaylistBaseIE(SoundcloudIE): class SoundcloudPagedPlaylistBaseIE(SoundcloudIE):
def _extract_playlist(self, base_url, playlist_id, playlist_title): def _extract_playlist(self, base_url, playlist_id, playlist_title):
# Per the SoundCloud documentation, the maximum limit for a linked partitioning query is 200. return {
# https://developers.soundcloud.com/blog/offset-pagination-deprecated '_type': 'playlist',
COMMON_QUERY = { 'id': playlist_id,
'limit': 200, 'title': playlist_title,
'linked_partitioning': '1', 'entries': self._entries(base_url, playlist_id),
}
def _entries(self, base_url, playlist_id):
# Per the SoundCloud documentation, the maximum limit for a linked partitioning query is 200.
# https://developers.soundcloud.com/blog/offset-pagination-deprecated
query = {
'limit': 200,
'linked_partitioning': '1',
'offset': 0,
} }
query = COMMON_QUERY.copy()
query['offset'] = 0
next_href = base_url next_href = base_url
entries = []
for i in itertools.count(): for i in itertools.count():
response = self._download_json( response = self._download_json(
next_href, playlist_id, next_href, playlist_id,
'Downloading track page %s' % (i + 1), query=query, headers=self._HEADERS) 'Downloading track page %s' % (i + 1), query=query, headers=self._HEADERS)
collection = response['collection'] def resolve_entry(*candidates):
if not isinstance(collection, list):
collection = []
# Empty collection may be returned, in this case we proceed
# straight to next_href
def resolve_entry(candidates):
for cand in candidates: for cand in candidates:
if not isinstance(cand, dict): if not isinstance(cand, dict):
continue continue
permalink_url = url_or_none(cand.get('permalink_url')) permalink_url = url_or_none(cand.get('permalink_url'))
if not permalink_url: if permalink_url:
continue return self.url_result(
return self.url_result( permalink_url,
permalink_url, SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None,
SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None, str_or_none(cand.get('id')), cand.get('title'))
str_or_none(cand.get('id')), cand.get('title'))
for e in collection: for e in response['collection'] or []:
entry = resolve_entry((e, e.get('track'), e.get('playlist'))) yield resolve_entry(e, e.get('track'), e.get('playlist'))
if entry:
entries.append(entry)
next_href = response.get('next_href') next_href = response.get('next_href')
if not next_href: query.pop('offset', None)
break
next_href = response['next_href']
parsed_next_href = compat_urlparse.urlparse(next_href)
query = compat_urlparse.parse_qs(parsed_next_href.query)
query.update(COMMON_QUERY)
return {
'_type': 'playlist',
'id': playlist_id,
'title': playlist_title,
'entries': entries,
}
class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE): class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):