mirror of
https://github.com/yt-dlp/yt-dlp
synced 2024-12-26 21:59:08 +01:00
support lazy playlist & change geo restriction handling
This commit is contained in:
parent
48416c3718
commit
adb26de9b4
1 changed files with 100 additions and 108 deletions
|
@ -22,11 +22,6 @@ class ExtremeMusicBaseIE(InfoExtractor):
|
||||||
def _initialize(self, url, video_id, country=None):
|
def _initialize(self, url, video_id, country=None):
|
||||||
self._REQUIRE_VERSION = (self._configuration_arg('ver', ie_key='extrememusic')
|
self._REQUIRE_VERSION = (self._configuration_arg('ver', ie_key='extrememusic')
|
||||||
or self._configuration_arg('version', ie_key='extrememusic'))
|
or self._configuration_arg('version', ie_key='extrememusic'))
|
||||||
# This site serves different versions of the same playlist id due to geo-restriction
|
|
||||||
# use user's own country code if no code (geo_bypass_country or pre-defined country code) is provided
|
|
||||||
if not country:
|
|
||||||
country = self._download_webpage('https://ipapi.co/country_code', video_id)
|
|
||||||
self.to_screen(f'Set country code to {country}')
|
|
||||||
env = self._download_json('https://www.extrememusic.com/env', video_id)
|
env = self._download_json('https://www.extrememusic.com/env', video_id)
|
||||||
self._REQUEST_HEADERS = {
|
self._REQUEST_HEADERS = {
|
||||||
'Accept': 'application/json',
|
'Accept': 'application/json',
|
||||||
|
@ -53,11 +48,11 @@ class ExtremeMusicBaseIE(InfoExtractor):
|
||||||
def _extract_track(self, album_data, track_id=None, version_id=None):
|
def _extract_track(self, album_data, track_id=None, version_id=None):
|
||||||
if 'tracks' in album_data and 'track_sounds' in album_data:
|
if 'tracks' in album_data and 'track_sounds' in album_data:
|
||||||
if not track_id and version_id:
|
if not track_id and version_id:
|
||||||
track_id = traverse_obj(album_data['track_sounds'],
|
track_id = traverse_obj(album_data, (
|
||||||
(lambda _, v: v['id'] == int(version_id), 'track_id', {int}), get_all=False)
|
'track_sounds', lambda _, v: v['id'] == int(version_id), 'track_id', {int}), get_all=False)
|
||||||
if track := traverse_obj(album_data['tracks'],
|
if track := traverse_obj(album_data, (
|
||||||
(lambda _, v: v['id'] == int(track_id), {dict}), get_all=False):
|
'tracks', lambda _, v: v['id'] == int(track_id), {dict}), get_all=False):
|
||||||
info = {**traverse_obj(track, {
|
track_info = {**traverse_obj(track, {
|
||||||
'track': ('title', {str}),
|
'track': ('title', {str}),
|
||||||
'track_number': ('sort_order', {lambda v: v + 1}, {int}),
|
'track_number': ('sort_order', {lambda v: v + 1}, {int}),
|
||||||
'track_id': ('track_no', {str}),
|
'track_id': ('track_no', {str}),
|
||||||
|
@ -68,10 +63,10 @@ class ExtremeMusicBaseIE(InfoExtractor):
|
||||||
'genres': (('genre', 'subgenre'), ..., 'label'),
|
'genres': (('genre', 'subgenre'), ..., 'label'),
|
||||||
'tag': ('keywords', ..., 'label'),
|
'tag': ('keywords', ..., 'label'),
|
||||||
'album': ('album_title', {lambda v: str_or_none(v) or None}),
|
'album': ('album_title', {lambda v: str_or_none(v) or None}),
|
||||||
}), **traverse_obj(album_data, {
|
}), **traverse_obj(album_data, ('album', {
|
||||||
'album_artists': ('album', 'artist', {lambda v: [v] if v else None}),
|
'album_artists': ('artist', {lambda v: [v] if v else None}),
|
||||||
'upload_date': ('album', 'created', {unified_strdate}),
|
'upload_date': ('created', {unified_strdate}),
|
||||||
})}
|
}))}
|
||||||
entries, thumbnails = [], []
|
entries, thumbnails = [], []
|
||||||
for image in traverse_obj(track, ('images', 'default')):
|
for image in traverse_obj(track, ('images', 'default')):
|
||||||
thumbnails.append(traverse_obj(image, {
|
thumbnails.append(traverse_obj(image, {
|
||||||
|
@ -80,17 +75,18 @@ class ExtremeMusicBaseIE(InfoExtractor):
|
||||||
'height': ('height', {int_or_none}),
|
'height': ('height', {int_or_none}),
|
||||||
}))
|
}))
|
||||||
if not self._REQUIRE_VERSION:
|
if not self._REQUIRE_VERSION:
|
||||||
version_id = version_id or traverse_obj(track, 'default_track_sound_id', ('track_sound_ids', 0))
|
version_id = (version_id
|
||||||
|
or traverse_obj(track, 'default_track_sound_id', ('track_sound_ids', 0)))
|
||||||
for sound_id in [version_id] if version_id else track['track_sound_ids']:
|
for sound_id in [version_id] if version_id else track['track_sound_ids']:
|
||||||
if sound := traverse_obj(album_data['track_sounds'],
|
if sound := traverse_obj(album_data, (
|
||||||
(lambda _, v: v['id'] == int(sound_id) and v['track_id'] == int(track_id),
|
'track_sounds', lambda _, v: v['id'] == int(sound_id) and v['track_id'] == int(track_id),
|
||||||
{dict}), get_all=False):
|
{dict}), get_all=False):
|
||||||
if (version_id
|
if (version_id
|
||||||
or 'all' in self._REQUIRE_VERSION
|
or 'all' in self._REQUIRE_VERSION
|
||||||
or any(x in sound['version_type'].lower() for x in self._REQUIRE_VERSION)):
|
or any(x in sound['version_type'].lower() for x in self._REQUIRE_VERSION)):
|
||||||
formats = []
|
formats = []
|
||||||
for audio_url in traverse_obj(sound, ('assets', 'audio', ('preview_url',
|
for audio_url in traverse_obj(
|
||||||
'preview_url_hls'))):
|
sound, ('assets', 'audio', ('preview_url', 'preview_url_hls'))):
|
||||||
if determine_ext(audio_url) == 'm3u8':
|
if determine_ext(audio_url) == 'm3u8':
|
||||||
m3u8_url = re.sub(r'\.m3u8\?.*', '/HLS/128_v4.m3u8', audio_url)
|
m3u8_url = re.sub(r'\.m3u8\?.*', '/HLS/128_v4.m3u8', audio_url)
|
||||||
for f in self._extract_m3u8_formats(m3u8_url, sound_id, 'm4a', fatal=False):
|
for f in self._extract_m3u8_formats(m3u8_url, sound_id, 'm4a', fatal=False):
|
||||||
|
@ -108,25 +104,18 @@ class ExtremeMusicBaseIE(InfoExtractor):
|
||||||
'id': str(sound_id),
|
'id': str(sound_id),
|
||||||
'title': join_nonempty('title', 'version_type', from_dict=sound, delim=' - '),
|
'title': join_nonempty('title', 'version_type', from_dict=sound, delim=' - '),
|
||||||
'alt_title': sound['version_type'],
|
'alt_title': sound['version_type'],
|
||||||
**info,
|
**track_info,
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'duration': sound.get('duration'),
|
'duration': sound.get('duration'),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'webpage_url': f"https://www.extrememusic.com/albums/{track['album_id']}?item={track_id}&ver={sound_id}",
|
'webpage_url': f"https://www.extrememusic.com/albums/{track['album_id']}?item={track_id}&ver={sound_id}",
|
||||||
})
|
})
|
||||||
|
return [entries, {
|
||||||
if len(entries) > 1:
|
|
||||||
return {
|
|
||||||
'id': track_id,
|
'id': track_id,
|
||||||
**info,
|
'title': track_info['track'],
|
||||||
'entries': entries,
|
**track_info,
|
||||||
'_type': 'playlist',
|
}]
|
||||||
}
|
|
||||||
elif len(entries) == 1:
|
|
||||||
return entries[0]
|
|
||||||
else:
|
|
||||||
self.raise_no_formats('Track data not found', video_id=track_id)
|
self.raise_no_formats('Track data not found', video_id=track_id)
|
||||||
return []
|
|
||||||
|
|
||||||
|
|
||||||
class ExtremeMusicIE(ExtremeMusicBaseIE):
|
class ExtremeMusicIE(ExtremeMusicBaseIE):
|
||||||
|
@ -214,9 +203,11 @@ class ExtremeMusicIE(ExtremeMusicBaseIE):
|
||||||
album_id, track_id, version_id = self._match_valid_url(url).group('album', 'id', 'ver')
|
album_id, track_id, version_id = self._match_valid_url(url).group('album', 'id', 'ver')
|
||||||
self._initialize(url, version_id or track_id, self.get_param('geo_bypass_country') or 'DE')
|
self._initialize(url, version_id or track_id, self.get_param('geo_bypass_country') or 'DE')
|
||||||
album_data = self._get_album_data(album_id, version_id or track_id)
|
album_data = self._get_album_data(album_id, version_id or track_id)
|
||||||
if result := self._extract_track(album_data, track_id, version_id):
|
if track := self._extract_track(album_data, track_id, version_id):
|
||||||
return result
|
if len(track[0]) > 1:
|
||||||
else:
|
return self.playlist_result(track[0], **track[1])
|
||||||
|
elif len(track[0]) == 1:
|
||||||
|
return track[0][0]
|
||||||
self.raise_no_formats('No formats were found')
|
self.raise_no_formats('No formats were found')
|
||||||
|
|
||||||
|
|
||||||
|
@ -247,32 +238,26 @@ class ExtremeMusicAIE(ExtremeMusicBaseIE):
|
||||||
album_id = self._match_id(url)
|
album_id = self._match_id(url)
|
||||||
self._initialize(url, album_id, self.get_param('geo_bypass_country') or 'DE')
|
self._initialize(url, album_id, self.get_param('geo_bypass_country') or 'DE')
|
||||||
album_data = self._get_album_data(album_id, album_id)
|
album_data = self._get_album_data(album_id, album_id)
|
||||||
|
|
||||||
entries = []
|
|
||||||
for track_id in traverse_obj(album_data, ('tracks', ..., 'id')):
|
|
||||||
if track := self._extract_track(album_data, track_id=track_id):
|
|
||||||
if track.get('entries'):
|
|
||||||
entries.extend(track['entries'])
|
|
||||||
else:
|
|
||||||
entries.append(track)
|
|
||||||
|
|
||||||
if entries:
|
|
||||||
subgenres = traverse_obj(album_data, ('album', 'subgenres', {str_or_none}))
|
subgenres = traverse_obj(album_data, ('album', 'subgenres', {str_or_none}))
|
||||||
return merge_dicts(traverse_obj(album_data.get('album'), {
|
album_info = merge_dicts(traverse_obj(album_data, ('album', {
|
||||||
'id': ('id', {lambda v: str(v)}),
|
'id': ('id', {lambda v: str(v)}),
|
||||||
'album': ('title', {str_or_none}),
|
'album': ('title', {str_or_none}),
|
||||||
'description': ('description', {lambda v: str_or_none(v) or None}),
|
'description': ('description', {lambda v: str_or_none(v) or None}),
|
||||||
'artists': ('artist', {lambda v: [v] if v else None}),
|
'artists': ('artist', {lambda v: [v] if v else None}),
|
||||||
'genres': ('genres', {str_or_none}, {lambda v: join_nonempty(v, subgenres, delim=', ')},
|
'genres': ('genres', {str_or_none},
|
||||||
|
{lambda v: join_nonempty(v, subgenres, delim=', ')},
|
||||||
{lambda v: v.split(', ') if v else None}),
|
{lambda v: v.split(', ') if v else None}),
|
||||||
'tag': ('keywords', {lambda v: v.split(', ') if v else None}),
|
'tag': ('keywords', {lambda v: v.split(', ') if v else None}),
|
||||||
}), {
|
})), {
|
||||||
'description': traverse_obj(album_data, ('bio', 'description', {lambda v: str_or_none(v) or None})),
|
'description': traverse_obj(
|
||||||
'entries': entries,
|
album_data, ('bio', 'description', {lambda v: str_or_none(v) or None})),
|
||||||
'_type': 'playlist',
|
|
||||||
})
|
})
|
||||||
else:
|
|
||||||
self.raise_no_formats('No formats were found')
|
return self.playlist_result(
|
||||||
|
(entry for sounds in (track[0] for track in (self._extract_track(album_data, track_id=track_id)
|
||||||
|
for track_id in traverse_obj(album_data, ('tracks', ..., 'id'))))
|
||||||
|
for entry in sounds),
|
||||||
|
**album_info)
|
||||||
|
|
||||||
|
|
||||||
class ExtremeMusicPIE(ExtremeMusicBaseIE):
|
class ExtremeMusicPIE(ExtremeMusicBaseIE):
|
||||||
|
@ -285,8 +270,7 @@ class ExtremeMusicPIE(ExtremeMusicBaseIE):
|
||||||
'title': 'NICE',
|
'title': 'NICE',
|
||||||
'thumbnail': 'https://d2oet5a29f64lj.cloudfront.net/img-data/w/2480/featureditem/square/thumbnail_PLAYLIST_Nice-square-(formerly ChristmasTraditional).jpg',
|
'thumbnail': 'https://d2oet5a29f64lj.cloudfront.net/img-data/w/2480/featureditem/square/thumbnail_PLAYLIST_Nice-square-(formerly ChristmasTraditional).jpg',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 29,
|
'playlist_count': 40,
|
||||||
'expected_warnings': ['This playlist has geo-restricted items. Try using --xff to specify a different country code, e.g. DE'],
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.extrememusic.com/playlists/fUKKU5KAfK61pAAKp4U4KpKUxsRk2ki_fU117KpUUAAUKAUfpA6UAfAKK8Ul5ji',
|
'url': 'https://www.extrememusic.com/playlists/fUKKU5KAfK61pAAKp4U4KpKUxsRk2ki_fU117KpUUAAUKAUfpA6UAfAKK8Ul5ji',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -299,60 +283,68 @@ class ExtremeMusicPIE(ExtremeMusicBaseIE):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
playlist_id = self._match_id(url)
|
playlist_id = self._match_id(url)
|
||||||
self._initialize(url, playlist_id, self.get_param('geo_bypass_country'))
|
self._initialize(url, playlist_id, self.get_param('geo_bypass_country') or 'DE')
|
||||||
|
|
||||||
def playlist_query(playlist_id, offset, limit):
|
def playlist_query(playlist_id, offset, limit, note=None):
|
||||||
# playlist api: https://snapi.extrememusic.com/playlists?id={playlist_id}&range={offset}%2C{limit}'
|
# playlist api: https://snapi.extrememusic.com/playlists?id={playlist_id}&range={offset}%2C{limit}'
|
||||||
return self._download_json(
|
return self._download_json(
|
||||||
'https://snapi.extrememusic.com/playlists', playlist_id,
|
f'{self._API_URL}/playlists', playlist_id, note=note, query={
|
||||||
note=f'Downloading item {offset + 1}-{offset + limit}', query={
|
|
||||||
'id': playlist_id,
|
'id': playlist_id,
|
||||||
'range': f'{offset},{limit}',
|
'range': f'{offset + 1},{limit}',
|
||||||
}, headers=self._REQUEST_HEADERS)
|
}, headers=self._REQUEST_HEADERS)
|
||||||
|
|
||||||
thumbnails, entries = [], []
|
def extract_playlist(playlist_id):
|
||||||
album_data, track_done, limit = {}, [], 50
|
albums, tracks_done, items_count, limit = {}, [], 0, 25
|
||||||
for i in itertools.count():
|
for i in itertools.count():
|
||||||
playlist = playlist_query(playlist_id, i * limit, limit)
|
try:
|
||||||
if len(playlist['playlist_items']) == 0:
|
# try to tackle geo restriction by shortening playlist id
|
||||||
|
playlist = playlist_query(playlist_id.split('_')[0], i * limit, limit,
|
||||||
|
note=f'Downloading item {i * limit + 1}-{i * limit + limit}')
|
||||||
|
except Exception:
|
||||||
|
playlist = playlist_query(playlist_id, i * limit, limit,
|
||||||
|
note=f'Downloading item {i * limit + 1}-{i * limit + limit}')
|
||||||
|
if playlist_items_count := traverse_obj(playlist, ('playlist', 'playlist_items_count')):
|
||||||
|
if len(playlist.get('tracks', [])) == 0:
|
||||||
|
if items_count < playlist_items_count:
|
||||||
|
self.report_warning('This playlist has geo-restricted items. Try using --xff to specify a different country code')
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
track_ids = traverse_obj(playlist, ('playlist_items', ..., 'track_id'))
|
track_ids = traverse_obj(playlist, ('tracks', ..., 'id'))
|
||||||
|
items_count += len(track_ids)
|
||||||
for track_id in list(dict.fromkeys(track_ids)):
|
for track_id in list(dict.fromkeys(track_ids)):
|
||||||
if track_id not in track_done:
|
if track_id not in tracks_done:
|
||||||
album_id = traverse_obj(playlist,
|
album_id = traverse_obj(
|
||||||
('tracks', lambda _, v: v['id'] == track_id, 'album_id', {int}), get_all=False)
|
playlist, ('tracks', lambda _, v: v['id'] == track_id,
|
||||||
if album_id not in album_data:
|
'album_id', {int}), get_all=False)
|
||||||
album_data[album_id] = self._get_album_data(album_id, track_id, fatal=False)
|
if album_id not in albums:
|
||||||
playlist['album'] = traverse_obj(album_data, (album_id, 'album', {dict}))
|
albums[album_id] = self._get_album_data(album_id, track_id, fatal=False)
|
||||||
|
playlist['album'] = traverse_obj(albums, (album_id, 'album', {dict}))
|
||||||
if track := self._extract_track(playlist, track_id=track_id):
|
if track := self._extract_track(playlist, track_id=track_id):
|
||||||
if track.get('entries'):
|
if len(track[0]) > 1:
|
||||||
entries.extend(track['entries'])
|
yield from track[0]
|
||||||
else:
|
elif len(track[0]) == 1:
|
||||||
entries.append(track)
|
yield track[0][0]
|
||||||
track_done.append(track_id)
|
tracks_done.append(track_id)
|
||||||
if len(track_done) >= playlist['playlist']['playlist_items_count']:
|
if items_count >= playlist_items_count:
|
||||||
break
|
break
|
||||||
|
else:
|
||||||
|
return []
|
||||||
|
|
||||||
if entries:
|
playlist_info, thumbnails = {}, []
|
||||||
if len(track_done) < playlist['playlist']['playlist_items_count']:
|
if playlist := playlist_query(playlist_id, 0, 1, note='Downloading JSON metadata'):
|
||||||
self.report_warning('This playlist has geo-restricted items. Try using --xff to specify a different country code, e.g. DE')
|
for image in traverse_obj(playlist, ('playlist', 'images', 'square')):
|
||||||
|
|
||||||
for image in traverse_obj(playlist['playlist'], ('images', 'square')):
|
|
||||||
thumbnails.append(traverse_obj(image, {
|
thumbnails.append(traverse_obj(image, {
|
||||||
'url': ('url', {url_or_none}),
|
'url': ('url', {url_or_none}),
|
||||||
'width': ('width', {int_or_none}),
|
'width': ('width', {int_or_none}),
|
||||||
'height': ('height', {int_or_none}),
|
'height': ('height', {int_or_none}),
|
||||||
}))
|
}))
|
||||||
|
playlist_info = {
|
||||||
return {k: v for k, v in {
|
**traverse_obj(playlist, ('playlist', {
|
||||||
'id': playlist['playlist']['id'],
|
'id': ('id', {str}),
|
||||||
'title': playlist['playlist']['title'],
|
'title': ('title', {str_or_none}),
|
||||||
|
'uploader': ('owner_name', {str_or_none}),
|
||||||
|
})),
|
||||||
'thumbnail': traverse_obj(thumbnails, (0, 'url', {url_or_none})),
|
'thumbnail': traverse_obj(thumbnails, (0, 'url', {url_or_none})),
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'uploader': playlist['playlist']['owner_name'],
|
}
|
||||||
'entries': entries,
|
return self.playlist_result(extract_playlist(playlist_id), **playlist_info)
|
||||||
'_type': 'playlist',
|
|
||||||
}.items() if v}
|
|
||||||
else:
|
|
||||||
self.raise_no_formats('No formats were found')
|
|
||||||
|
|
Loading…
Reference in a new issue