[extractor] Detect more subtitle codecs in MPD manifests (#2174)

Authored by: fstirlitz
2024-12-28 22:24:34 +01:00 · 2021-12-31 20:06:45 +00:00 · 2021-12-31 20:06:45 +00:00 · 4afa3ec4b6
commit 4afa3ec4b6
parent 11aa91a12f
2 changed files with 13 additions and 5 deletions
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@ -2712,11 +2712,15 @@ class InfoExtractor(object):
                    mime_type = representation_attrib['mimeType']
                    content_type = representation_attrib.get('contentType', mime_type.split('/')[0])
-                    codecs = representation_attrib.get('codecs', '')
+                    codecs = parse_codecs(representation_attrib.get('codecs', ''))
                    if content_type not in ('video', 'audio', 'text'):
                        if mime_type == 'image/jpeg':
                            content_type = mime_type
-                        elif codecs.split('.')[0] == 'stpp':
+                        elif codecs['vcodec'] != 'none':
                            content_type = 'video'
                        elif codecs['acodec'] != 'none':
                            content_type = 'audio'
                        elif codecs.get('tcodec', 'none') != 'none':
                            content_type = 'text'
                        elif mimetype2ext(mime_type) in ('tt', 'dfxp', 'ttml', 'xml', 'json'):
                            content_type = 'text'
@ -2762,8 +2766,8 @@ class InfoExtractor(object):
                            'format_note': 'DASH %s' % content_type,
                            'filesize': filesize,
                            'container': mimetype2ext(mime_type) + '_dash',
                            **codecs
                        }
                        f.update(parse_codecs(codecs))
                    elif content_type == 'text':
                        f = {
                            'ext': mimetype2ext(mime_type),
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@ -3196,7 +3196,7 @@ def parse_codecs(codecs_str):
        return {}
    split_codecs = list(filter(None, map(
        str.strip, codecs_str.strip().strip(',').split(','))))
-    vcodec, acodec, hdr = None, None, None
+    vcodec, acodec, tcodec, hdr = None, None, None, None
    for full_codec in split_codecs:
        parts = full_codec.split('.')
        codec = parts[0].replace('0', '')
@ -3213,13 +3213,17 @@ def parse_codecs(codecs_str):
        elif codec in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
            if not acodec:
                acodec = full_codec
        elif codec in ('stpp', 'wvtt',):
            if not tcodec:
                tcodec = full_codec
        else:
            write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
-    if vcodec or acodec:
+    if vcodec or acodec or tcodec:
        return {
            'vcodec': vcodec or 'none',
            'acodec': acodec or 'none',
            'dynamic_range': hdr,
            **({'tcodec': tcodec} if tcodec is not None else {}),
        }
    elif len(split_codecs) == 2:
        return {