mirror of
https://github.com/yt-dlp/yt-dlp
synced 2025-01-16 03:40:50 +01:00
Improve chapter sanitization
This commit is contained in:
parent
385f7f3895
commit
a3976e0760
2 changed files with 10 additions and 7 deletions
|
@ -2377,13 +2377,18 @@ class YoutubeDL:
|
||||||
self.report_warning('"duration" field is negative, there is an error in extractor')
|
self.report_warning('"duration" field is negative, there is an error in extractor')
|
||||||
|
|
||||||
chapters = info_dict.get('chapters') or []
|
chapters = info_dict.get('chapters') or []
|
||||||
|
if chapters and chapters[0].get('start_time'):
|
||||||
|
chapters.insert(0, {'start_time': 0})
|
||||||
|
|
||||||
dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
|
dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
|
||||||
for prev, current, next_ in zip(
|
for idx, (prev, current, next_) in enumerate(zip(
|
||||||
(dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)):
|
(dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):
|
||||||
if current.get('start_time') is None:
|
if current.get('start_time') is None:
|
||||||
current['start_time'] = prev.get('end_time')
|
current['start_time'] = prev.get('end_time')
|
||||||
if not current.get('end_time'):
|
if not current.get('end_time'):
|
||||||
current['end_time'] = next_.get('start_time')
|
current['end_time'] = next_.get('start_time')
|
||||||
|
if not current.get('title'):
|
||||||
|
current['title'] = f'<Untitled Chapter {idx}>'
|
||||||
|
|
||||||
if 'playlist' not in info_dict:
|
if 'playlist' not in info_dict:
|
||||||
# It isn't part of a playlist
|
# It isn't part of a playlist
|
||||||
|
|
|
@ -2764,17 +2764,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
if not strict:
|
if not strict:
|
||||||
chapter_list.sort(key=lambda c: c['start_time'] or 0)
|
chapter_list.sort(key=lambda c: c['start_time'] or 0)
|
||||||
|
|
||||||
chapters = [{'start_time': 0, 'title': '<Untitled>'}]
|
chapters = [{'start_time': 0}]
|
||||||
for idx, chapter in enumerate(chapter_list):
|
for idx, chapter in enumerate(chapter_list):
|
||||||
if chapter['start_time'] is None or not chapter['title']:
|
if chapter['start_time'] is None:
|
||||||
self.report_warning(f'Incomplete chapter {idx}')
|
self.report_warning(f'Incomplete chapter {idx}')
|
||||||
elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
|
elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
|
||||||
chapters[-1]['end_time'] = chapter['start_time']
|
|
||||||
chapters.append(chapter)
|
chapters.append(chapter)
|
||||||
else:
|
else:
|
||||||
self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"')
|
self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"')
|
||||||
chapters[-1]['end_time'] = duration
|
return chapters[1:]
|
||||||
return chapters if len(chapters) > 1 and chapters[1]['start_time'] else chapters[1:]
|
|
||||||
|
|
||||||
def _extract_comment(self, comment_renderer, parent=None):
|
def _extract_comment(self, comment_renderer, parent=None):
|
||||||
comment_id = comment_renderer.get('commentId')
|
comment_id = comment_renderer.get('commentId')
|
||||||
|
|
Loading…
Reference in a new issue