mirror of
https://github.com/yt-dlp/yt-dlp
synced 2025-01-16 03:40:50 +01:00
[bbc] improve extraction
- extract f4m and dash formats - improve format sorting and listing - improve extraction of articles with `otherSettings.playlist`
This commit is contained in:
parent
089a40955c
commit
a7e5f27412
1 changed files with 83 additions and 92 deletions
|
@ -229,51 +229,6 @@ class BBCCoUkIE(InfoExtractor):
|
||||||
asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
|
asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
|
||||||
return [ref.get('href') for ref in asx.findall('./Entry/ref')]
|
return [ref.get('href') for ref in asx.findall('./Entry/ref')]
|
||||||
|
|
||||||
def _extract_connection(self, connection, programme_id):
|
|
||||||
formats = []
|
|
||||||
kind = connection.get('kind')
|
|
||||||
protocol = connection.get('protocol')
|
|
||||||
supplier = connection.get('supplier')
|
|
||||||
if protocol == 'http':
|
|
||||||
href = connection.get('href')
|
|
||||||
transfer_format = connection.get('transferFormat')
|
|
||||||
# ASX playlist
|
|
||||||
if supplier == 'asx':
|
|
||||||
for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
|
|
||||||
formats.append({
|
|
||||||
'url': ref,
|
|
||||||
'format_id': 'ref%s_%s' % (i, supplier),
|
|
||||||
})
|
|
||||||
# Skip DASH until supported
|
|
||||||
elif transfer_format == 'dash':
|
|
||||||
pass
|
|
||||||
elif transfer_format == 'hls':
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
|
||||||
m3u8_id=supplier, fatal=False))
|
|
||||||
# Direct link
|
|
||||||
else:
|
|
||||||
formats.append({
|
|
||||||
'url': href,
|
|
||||||
'format_id': supplier or kind or protocol,
|
|
||||||
})
|
|
||||||
elif protocol == 'rtmp':
|
|
||||||
application = connection.get('application', 'ondemand')
|
|
||||||
auth_string = connection.get('authString')
|
|
||||||
identifier = connection.get('identifier')
|
|
||||||
server = connection.get('server')
|
|
||||||
formats.append({
|
|
||||||
'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
|
|
||||||
'play_path': identifier,
|
|
||||||
'app': '%s?%s' % (application, auth_string),
|
|
||||||
'page_url': 'http://www.bbc.co.uk',
|
|
||||||
'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
|
|
||||||
'rtmp_live': False,
|
|
||||||
'ext': 'flv',
|
|
||||||
'format_id': supplier,
|
|
||||||
})
|
|
||||||
return formats
|
|
||||||
|
|
||||||
def _extract_items(self, playlist):
|
def _extract_items(self, playlist):
|
||||||
return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)
|
return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)
|
||||||
|
|
||||||
|
@ -294,46 +249,6 @@ class BBCCoUkIE(InfoExtractor):
|
||||||
def _extract_connections(self, media):
|
def _extract_connections(self, media):
|
||||||
return self._findall_ns(media, './{%s}connection')
|
return self._findall_ns(media, './{%s}connection')
|
||||||
|
|
||||||
def _extract_video(self, media, programme_id):
|
|
||||||
formats = []
|
|
||||||
vbr = int_or_none(media.get('bitrate'))
|
|
||||||
vcodec = media.get('encoding')
|
|
||||||
service = media.get('service')
|
|
||||||
width = int_or_none(media.get('width'))
|
|
||||||
height = int_or_none(media.get('height'))
|
|
||||||
file_size = int_or_none(media.get('media_file_size'))
|
|
||||||
for connection in self._extract_connections(media):
|
|
||||||
conn_formats = self._extract_connection(connection, programme_id)
|
|
||||||
for format in conn_formats:
|
|
||||||
format.update({
|
|
||||||
'width': width,
|
|
||||||
'height': height,
|
|
||||||
'vbr': vbr,
|
|
||||||
'vcodec': vcodec,
|
|
||||||
'filesize': file_size,
|
|
||||||
})
|
|
||||||
if service:
|
|
||||||
format['format_id'] = '%s_%s' % (service, format['format_id'])
|
|
||||||
formats.extend(conn_formats)
|
|
||||||
return formats
|
|
||||||
|
|
||||||
def _extract_audio(self, media, programme_id):
|
|
||||||
formats = []
|
|
||||||
abr = int_or_none(media.get('bitrate'))
|
|
||||||
acodec = media.get('encoding')
|
|
||||||
service = media.get('service')
|
|
||||||
for connection in self._extract_connections(media):
|
|
||||||
conn_formats = self._extract_connection(connection, programme_id)
|
|
||||||
for format in conn_formats:
|
|
||||||
format.update({
|
|
||||||
'format_id': '%s_%s' % (service, format['format_id']),
|
|
||||||
'abr': abr,
|
|
||||||
'acodec': acodec,
|
|
||||||
'vcodec': 'none',
|
|
||||||
})
|
|
||||||
formats.extend(conn_formats)
|
|
||||||
return formats
|
|
||||||
|
|
||||||
def _get_subtitles(self, media, programme_id):
|
def _get_subtitles(self, media, programme_id):
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
for connection in self._extract_connections(media):
|
for connection in self._extract_connections(media):
|
||||||
|
@ -382,10 +297,77 @@ class BBCCoUkIE(InfoExtractor):
|
||||||
|
|
||||||
for media in self._extract_medias(media_selection):
|
for media in self._extract_medias(media_selection):
|
||||||
kind = media.get('kind')
|
kind = media.get('kind')
|
||||||
if kind == 'audio':
|
if kind in ('video', 'audio'):
|
||||||
formats.extend(self._extract_audio(media, programme_id))
|
bitrate = int_or_none(media.get('bitrate'))
|
||||||
elif kind == 'video':
|
encoding = media.get('encoding')
|
||||||
formats.extend(self._extract_video(media, programme_id))
|
service = media.get('service')
|
||||||
|
width = int_or_none(media.get('width'))
|
||||||
|
height = int_or_none(media.get('height'))
|
||||||
|
file_size = int_or_none(media.get('media_file_size'))
|
||||||
|
for connection in self._extract_connections(media):
|
||||||
|
conn_kind = connection.get('kind')
|
||||||
|
protocol = connection.get('protocol')
|
||||||
|
supplier = connection.get('supplier')
|
||||||
|
href = connection.get('href')
|
||||||
|
transfer_format = connection.get('transferFormat')
|
||||||
|
format_id = supplier or conn_kind or protocol
|
||||||
|
if service:
|
||||||
|
format_id = '%s_%s' % (service, format_id)
|
||||||
|
# ASX playlist
|
||||||
|
if supplier == 'asx':
|
||||||
|
for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
|
||||||
|
formats.append({
|
||||||
|
'url': ref,
|
||||||
|
'format_id': 'ref%s_%s' % (i, format_id),
|
||||||
|
})
|
||||||
|
elif transfer_format == 'dash':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
href, programme_id, mpd_id=format_id, fatal=False))
|
||||||
|
elif transfer_format == 'hls':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id=format_id, fatal=False))
|
||||||
|
elif transfer_format == 'hds':
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
href, programme_id, f4m_id=format_id, fatal=False))
|
||||||
|
else:
|
||||||
|
fmt = {
|
||||||
|
'format_id': format_id,
|
||||||
|
'filesize': file_size,
|
||||||
|
}
|
||||||
|
if kind == 'video':
|
||||||
|
fmt.update({
|
||||||
|
'width': width,
|
||||||
|
'height': height,
|
||||||
|
'vbr': bitrate,
|
||||||
|
'vcodec': encoding,
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
fmt.update({
|
||||||
|
'abr': bitrate,
|
||||||
|
'acodec': encoding,
|
||||||
|
'vcodec': 'none',
|
||||||
|
})
|
||||||
|
if protocol == 'http':
|
||||||
|
# Direct link
|
||||||
|
fmt.update({
|
||||||
|
'url': href,
|
||||||
|
})
|
||||||
|
elif protocol == 'rtmp':
|
||||||
|
application = connection.get('application', 'ondemand')
|
||||||
|
auth_string = connection.get('authString')
|
||||||
|
identifier = connection.get('identifier')
|
||||||
|
server = connection.get('server')
|
||||||
|
fmt.update({
|
||||||
|
'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
|
||||||
|
'play_path': identifier,
|
||||||
|
'app': '%s?%s' % (application, auth_string),
|
||||||
|
'page_url': 'http://www.bbc.co.uk',
|
||||||
|
'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
|
||||||
|
'rtmp_live': False,
|
||||||
|
'ext': 'flv',
|
||||||
|
})
|
||||||
|
formats.append(fmt)
|
||||||
elif kind == 'captions':
|
elif kind == 'captions':
|
||||||
subtitles = self.extract_subtitles(media, programme_id)
|
subtitles = self.extract_subtitles(media, programme_id)
|
||||||
return formats, subtitles
|
return formats, subtitles
|
||||||
|
@ -820,13 +802,19 @@ class BBCIE(BBCCoUkIE):
|
||||||
# http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)
|
# http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)
|
||||||
playlist = data_playable.get('otherSettings', {}).get('playlist', {})
|
playlist = data_playable.get('otherSettings', {}).get('playlist', {})
|
||||||
if playlist:
|
if playlist:
|
||||||
for key in ('progressiveDownload', 'streaming'):
|
entry = None
|
||||||
|
for key in ('streaming', 'progressiveDownload'):
|
||||||
playlist_url = playlist.get('%sUrl' % key)
|
playlist_url = playlist.get('%sUrl' % key)
|
||||||
if not playlist_url:
|
if not playlist_url:
|
||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
entries.append(self._extract_from_playlist_sxml(
|
info = self._extract_from_playlist_sxml(
|
||||||
playlist_url, playlist_id, timestamp))
|
playlist_url, playlist_id, timestamp)
|
||||||
|
if not entry:
|
||||||
|
entry = info
|
||||||
|
else:
|
||||||
|
entry['title'] = info['title']
|
||||||
|
entry['formats'].extend(info['formats'])
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Some playlist URL may fail with 500, at the same time
|
# Some playlist URL may fail with 500, at the same time
|
||||||
# the other one may work fine (e.g.
|
# the other one may work fine (e.g.
|
||||||
|
@ -834,6 +822,9 @@ class BBCIE(BBCCoUkIE):
|
||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
|
||||||
continue
|
continue
|
||||||
raise
|
raise
|
||||||
|
if entry:
|
||||||
|
self._sort_formats(entry['formats'])
|
||||||
|
entries.append(entry)
|
||||||
|
|
||||||
if entries:
|
if entries:
|
||||||
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
||||||
|
|
Loading…
Reference in a new issue