[extractor] Detect more subtitle codecs in MPD manifests (#2174)

Authored by: fstirlitz
This commit is contained in:
Felix S 2021-12-31 20:06:45 +00:00 committed by GitHub
parent 11aa91a12f
commit 4afa3ec4b6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 13 additions and 5 deletions

View file

@ -2712,11 +2712,15 @@ class InfoExtractor(object):
mime_type = representation_attrib['mimeType'] mime_type = representation_attrib['mimeType']
content_type = representation_attrib.get('contentType', mime_type.split('/')[0]) content_type = representation_attrib.get('contentType', mime_type.split('/')[0])
codecs = representation_attrib.get('codecs', '') codecs = parse_codecs(representation_attrib.get('codecs', ''))
if content_type not in ('video', 'audio', 'text'): if content_type not in ('video', 'audio', 'text'):
if mime_type == 'image/jpeg': if mime_type == 'image/jpeg':
content_type = mime_type content_type = mime_type
elif codecs.split('.')[0] == 'stpp': elif codecs['vcodec'] != 'none':
content_type = 'video'
elif codecs['acodec'] != 'none':
content_type = 'audio'
elif codecs.get('tcodec', 'none') != 'none':
content_type = 'text' content_type = 'text'
elif mimetype2ext(mime_type) in ('tt', 'dfxp', 'ttml', 'xml', 'json'): elif mimetype2ext(mime_type) in ('tt', 'dfxp', 'ttml', 'xml', 'json'):
content_type = 'text' content_type = 'text'
@ -2762,8 +2766,8 @@ class InfoExtractor(object):
'format_note': 'DASH %s' % content_type, 'format_note': 'DASH %s' % content_type,
'filesize': filesize, 'filesize': filesize,
'container': mimetype2ext(mime_type) + '_dash', 'container': mimetype2ext(mime_type) + '_dash',
**codecs
} }
f.update(parse_codecs(codecs))
elif content_type == 'text': elif content_type == 'text':
f = { f = {
'ext': mimetype2ext(mime_type), 'ext': mimetype2ext(mime_type),

View file

@ -3196,7 +3196,7 @@ def parse_codecs(codecs_str):
return {} return {}
split_codecs = list(filter(None, map( split_codecs = list(filter(None, map(
str.strip, codecs_str.strip().strip(',').split(',')))) str.strip, codecs_str.strip().strip(',').split(','))))
vcodec, acodec, hdr = None, None, None vcodec, acodec, tcodec, hdr = None, None, None, None
for full_codec in split_codecs: for full_codec in split_codecs:
parts = full_codec.split('.') parts = full_codec.split('.')
codec = parts[0].replace('0', '') codec = parts[0].replace('0', '')
@ -3213,13 +3213,17 @@ def parse_codecs(codecs_str):
elif codec in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'): elif codec in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
if not acodec: if not acodec:
acodec = full_codec acodec = full_codec
elif codec in ('stpp', 'wvtt',):
if not tcodec:
tcodec = full_codec
else: else:
write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr) write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
if vcodec or acodec: if vcodec or acodec or tcodec:
return { return {
'vcodec': vcodec or 'none', 'vcodec': vcodec or 'none',
'acodec': acodec or 'none', 'acodec': acodec or 'none',
'dynamic_range': hdr, 'dynamic_range': hdr,
**({'tcodec': tcodec} if tcodec is not None else {}),
} }
elif len(split_codecs) == 2: elif len(split_codecs) == 2:
return { return {