mirror of
https://github.com/yt-dlp/yt-dlp
synced 2024-12-27 21:59:17 +01:00
[RadioFrance] fix profile pagination detection
This commit is contained in:
parent
867bf965bb
commit
e01fab7041
1 changed files with 12 additions and 18 deletions
|
@ -392,7 +392,7 @@ class RadioFranceProfileIE(RadioFrancePlaylistBaseIE):
|
|||
_VALID_URL = rf'{RadioFranceBaseIE._VALID_URL_BASE}/personnes/(?P<id>[\w-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.radiofrance.fr/personnes/thomas-pesquet?p=3',
|
||||
'url': 'https://www.radiofrance.fr/personnes/thomas-pesquet',
|
||||
'info_dict': {
|
||||
'id': '86c62790-e481-11e2-9f7b-782bcb6744eb',
|
||||
'display_id': 'thomas-pesquet',
|
||||
|
@ -422,30 +422,24 @@ class RadioFranceProfileIE(RadioFrancePlaylistBaseIE):
|
|||
webpage = self._download_webpage(url, profile_id, note=f'Downloading {profile_id} page {cursor}')
|
||||
|
||||
resp = dict()
|
||||
|
||||
# On profile pages, the data is stored in a javascript array in the final <script>
|
||||
# Each episode is stored as
|
||||
# a[0] = { id: ... }; a[1] = [ id: ... ]; on page 2->
|
||||
# If a page had a thumbnail, the a variable contains image data,
|
||||
# and episode data is stored in b[0]...
|
||||
resp['items'] = []
|
||||
podcastindex = 0
|
||||
nextmatch = True
|
||||
while nextmatch:
|
||||
nextmatch = self._search_json(r'\w+\[' + str(podcastindex) + r'\]\s*=\s*', webpage, profile_id,
|
||||
profile_id, transform_source=js_to_json, fatal=False, default=None)
|
||||
podcastindex += 1
|
||||
if nextmatch is not None:
|
||||
resp['items'].append(nextmatch)
|
||||
|
||||
# There is more than one pagination key in the final <script>
|
||||
# We should use pick the pagination object which is within a documents object
|
||||
# get episode data from page
|
||||
pagedata = self._search_json(r'documents\s*:\s*', webpage, profile_id, profile_id,
|
||||
transform_source=js_to_json)
|
||||
lastPage = traverse_obj(pagedata, ('pagination', 'lastPage'))
|
||||
|
||||
# get thepage data
|
||||
pagekey = pagedata['pagination']
|
||||
hasMorePages = False
|
||||
lastPage = int(self._search_regex(pagekey+'\.lastPage=(\d+);', webpage, profile_id, '0'))
|
||||
hasMorePages = cursor < lastPage
|
||||
resp['next'] = cursor + 1 if hasMorePages else None
|
||||
|
||||
# get episode data, note, not all will be A/V, so filter for 'expression'
|
||||
for item in pagedata['items']:
|
||||
if item['model']=='Expression':
|
||||
resp['items'].append(item)
|
||||
|
||||
resp['metadata'] = self._search_json(r'content:\s*', webpage, profile_id, profile_id,
|
||||
transform_source=js_to_json)
|
||||
# If the image data is stored separately rather than in the main content area
|
||||
|
|
Loading…
Reference in a new issue