[cbc] Improve playlist support (closes #11704)

This commit is contained in:
Sergey M․ 2017-01-14 08:30:00 +07:00
parent 5d4c7daa49
commit abe8cb763f
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D

View file

@ -90,19 +90,21 @@ class CBCIE(InfoExtractor):
}, },
}], }],
'skip': 'Geo-restricted to Canada', 'skip': 'Geo-restricted to Canada',
}, {
# multiple CBC.APP.Caffeine.initInstance(...)
'url': 'http://www.cbc.ca/news/canada/calgary/dog-indoor-exercise-winter-1.3928238',
'info_dict': {
'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks',
'id': 'dog-indoor-exercise-winter-1.3928238',
},
'playlist_mincount': 6,
}] }]
@classmethod @classmethod
def suitable(cls, url): def suitable(cls, url):
return False if CBCPlayerIE.suitable(url) else super(CBCIE, cls).suitable(url) return False if CBCPlayerIE.suitable(url) else super(CBCIE, cls).suitable(url)
def _real_extract(self, url): def _extract_player_init(self, player_init, display_id):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
player_init = self._search_regex(
r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage, 'player init',
default=None)
if player_init:
player_info = self._parse_json(player_init, display_id, js_to_json) player_info = self._parse_json(player_init, display_id, js_to_json)
media_id = player_info.get('mediaId') media_id = player_info.get('mediaId')
if not media_id: if not media_id:
@ -117,9 +119,20 @@ class CBCIE(InfoExtractor):
'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id, 'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id,
clip_id)['entries'][0]['id'].split('/')[-1] clip_id)['entries'][0]['id'].split('/')[-1]
return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
else:
entries = [self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)] def _real_extract(self, url):
return self.playlist_result(entries) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
entries = [
self._extract_player_init(player_init, display_id)
for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)]
entries.extend([
self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)])
return self.playlist_result(
entries, display_id,
self._og_search_title(webpage, fatal=False),
self._og_search_description(webpage))
class CBCPlayerIE(InfoExtractor): class CBCPlayerIE(InfoExtractor):