[RadioFrance] fix profile pagination detection

This commit is contained in:
lonm 2024-10-15 14:44:48 +01:00
parent 867bf965bb
commit e01fab7041

View file

@ -392,7 +392,7 @@ class RadioFranceProfileIE(RadioFrancePlaylistBaseIE):
_VALID_URL = rf'{RadioFranceBaseIE._VALID_URL_BASE}/personnes/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://www.radiofrance.fr/personnes/thomas-pesquet?p=3',
'url': 'https://www.radiofrance.fr/personnes/thomas-pesquet',
'info_dict': {
'id': '86c62790-e481-11e2-9f7b-782bcb6744eb',
'display_id': 'thomas-pesquet',
@ -422,30 +422,24 @@ class RadioFranceProfileIE(RadioFrancePlaylistBaseIE):
webpage = self._download_webpage(url, profile_id, note=f'Downloading {profile_id} page {cursor}')
resp = dict()
# On profile pages, the data is stored in a javascript array in the final <script>
# Each episode is stored as
# a[0] = { id: ... }; a[1] = [ id: ... ]; on page 2->
# If a page had a thumbnail, the a variable contains image data,
# and episode data is stored in b[0]...
resp['items'] = []
podcastindex = 0
nextmatch = True
while nextmatch:
nextmatch = self._search_json(r'\w+\[' + str(podcastindex) + r'\]\s*=\s*', webpage, profile_id,
profile_id, transform_source=js_to_json, fatal=False, default=None)
podcastindex += 1
if nextmatch is not None:
resp['items'].append(nextmatch)
# There is more than one pagination key in the final <script>
# We should use pick the pagination object which is within a documents object
# get episode data from page
pagedata = self._search_json(r'documents\s*:\s*', webpage, profile_id, profile_id,
transform_source=js_to_json)
lastPage = traverse_obj(pagedata, ('pagination', 'lastPage'))
# get thepage data
pagekey = pagedata['pagination']
hasMorePages = False
lastPage = int(self._search_regex(pagekey+'\.lastPage=(\d+);', webpage, profile_id, '0'))
hasMorePages = cursor < lastPage
resp['next'] = cursor + 1 if hasMorePages else None
# get episode data, note, not all will be A/V, so filter for 'expression'
for item in pagedata['items']:
if item['model']=='Expression':
resp['items'].append(item)
resp['metadata'] = self._search_json(r'content:\s*', webpage, profile_id, profile_id,
transform_source=js_to_json)
# If the image data is stored separately rather than in the main content area