From 367429e23879ae127b82e7e8cacd62b878033e75 Mon Sep 17 00:00:00 2001 From: Elyse <26639800+elyse0@users.noreply.github.com> Date: Thu, 9 Mar 2023 12:08:20 -0600 Subject: [PATCH] [common] Extract start and end keys for Dash fragments --- yt_dlp/extractor/common.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 2091df7fa..815538248 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -2566,7 +2566,7 @@ class InfoExtractor: r = int(s.get('r', 0)) ms_info['total_number'] += 1 + r ms_info['s'].append({ - 't': int(s.get('t', 0)), + 't': int_or_none(s.get('t')), # @d is mandatory (see [1, 5.3.9.6.2, Table 17, page 60]) 'd': int(s.attrib['d']), 'r': r, @@ -2608,9 +2608,16 @@ class InfoExtractor: return ms_info mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration')) + availability_start_time = unified_timestamp( + mpd_doc.get('availabilityStartTime'), with_milliseconds=True) or 0 formats, subtitles = [], {} stream_numbers = collections.defaultdict(int) for period in mpd_doc.findall(_add_ns('Period')): + # segmentIngestTime is completely out of spec, but YT Livestream do this + segment_ingest_time = period.get('{http://youtube.com/yt/2012/10/10}segmentIngestTime') + if segment_ingest_time: + availability_start_time = unified_timestamp(segment_ingest_time, with_milliseconds=True) + period_duration = parse_duration(period.get('duration')) or mpd_duration period_ms_info = extract_multisegment_info(period, { 'start_number': 1, @@ -2784,13 +2791,17 @@ class InfoExtractor: 'Bandwidth': bandwidth, 'Number': segment_number, } + duration = float_or_none(segment_d, representation_ms_info['timescale']) + start = float_or_none(segment_time, representation_ms_info['timescale']) representation_ms_info['fragments'].append({ media_location_key: segment_url, - 'duration': float_or_none(segment_d, representation_ms_info['timescale']), + 'duration': duration, + 'start': availability_start_time + start, + 'end': availability_start_time + start + duration, }) for num, s in enumerate(representation_ms_info['s']): - segment_time = s.get('t') or segment_time + segment_time = s['t'] if s.get('t') is not None else segment_time segment_d = s['d'] add_segment_url() segment_number += 1 @@ -2806,6 +2817,7 @@ class InfoExtractor: fragments = [] segment_index = 0 timescale = representation_ms_info['timescale'] + start = 0 for s in representation_ms_info['s']: duration = float_or_none(s['d'], timescale) for r in range(s.get('r', 0) + 1): @@ -2813,8 +2825,11 @@ class InfoExtractor: fragments.append({ location_key(segment_uri): segment_uri, 'duration': duration, + 'start': availability_start_time + start, + 'end': availability_start_time + start + duration, }) segment_index += 1 + start += duration representation_ms_info['fragments'] = fragments elif 'segment_urls' in representation_ms_info: # Segment URLs with no SegmentTimeline