From 62cba8a1bedbfc0ddde7267ae57b72bf5f7ea7b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elan=20Ruusam=C3=A4e?= Date: Mon, 2 Dec 2024 00:33:11 +0200 Subject: [PATCH] [ie/duoplay] Fix extractor (#11588) Authored by: glensc, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> --- yt_dlp/extractor/duoplay.py | 60 +++++++++++++++++++++++++++++-------- 1 file changed, 47 insertions(+), 13 deletions(-) diff --git a/yt_dlp/extractor/duoplay.py b/yt_dlp/extractor/duoplay.py index 18642fea39..75650c3a64 100644 --- a/yt_dlp/extractor/duoplay.py +++ b/yt_dlp/extractor/duoplay.py @@ -5,15 +5,16 @@ from ..utils import ( get_element_text_and_html_by_tag, int_or_none, join_nonempty, + parse_qs, str_or_none, try_call, unified_timestamp, ) -from ..utils.traversal import traverse_obj +from ..utils.traversal import traverse_obj, value class DuoplayIE(InfoExtractor): - _VALID_URL = r'https?://duoplay\.ee/(?P\d+)/[\w-]+/?(?:\?(?:[^#]+&)?ep=(?P\d+))?' + _VALID_URL = r'https?://duoplay\.ee/(?P\d+)(?:[/?#]|$)' _TESTS = [{ 'note': 'Siberi võmm S02E12', 'url': 'https://duoplay.ee/4312/siberi-vomm?ep=24', @@ -34,15 +35,16 @@ class DuoplayIE(InfoExtractor): 'episode_number': 12, 'episode_id': '24', }, + 'skip': 'No video found', }, { 'note': 'Empty title', 'url': 'https://duoplay.ee/17/uhikarotid?ep=14', - 'md5': '6aca68be71112314738dd17cced7f8bf', + 'md5': 'cba9f5dabf2582b224d80ac44fb80e47', 'info_dict': { 'id': '17_14', 'ext': 'mp4', - 'title': 'Ühikarotid', - 'thumbnail': r're:https://.+\.jpg(?:\?c=\d+)?$', + 'title': 'Episode 14', + 'thumbnail': r're:https?://.+\.jpg', 'description': 'md5:4719b418e058c209def41d48b601276e', 'upload_date': '20100916', 'timestamp': 1284661800, @@ -52,6 +54,8 @@ class DuoplayIE(InfoExtractor): 'season_number': 2, 'episode_id': '14', 'release_year': 2010, + 'episode': 'Episode 14', + 'episode_number': 14, }, }, { 'note': 'Movie without expiry', @@ -68,10 +72,32 @@ class DuoplayIE(InfoExtractor): 'timestamp': 1671054000, 'release_year': 2018, }, + 'skip': 'No video found', + }, { + 'note': 'Episode url without show name', + 'url': 'https://duoplay.ee/9644?ep=185', + 'md5': '63f324b4fe2dbd8194dca16a6d52184a', + 'info_dict': { + 'id': '9644_185', + 'ext': 'mp4', + 'title': 'Episode 185', + 'thumbnail': r're:https?://.+\.jpg', + 'description': 'md5:ed25ba4e9e5d54bc291a4a0cdd241467', + 'upload_date': '20241120', + 'timestamp': 1732077000, + 'episode': 'Episode 63', + 'episode_id': '185', + 'episode_number': 63, + 'season': 'Season 2', + 'season_number': 2, + 'series': 'Telehommik', + 'series_id': '9644', + }, }] def _real_extract(self, url): - telecast_id, episode = self._match_valid_url(url).group('id', 'ep') + telecast_id = self._match_id(url) + episode = traverse_obj(parse_qs(url), ('ep', 0, {int_or_none}, {str_or_none})) video_id = join_nonempty(telecast_id, episode, delim='_') webpage = self._download_webpage(url, video_id) video_player = try_call(lambda: extract_attributes( @@ -79,25 +105,33 @@ class DuoplayIE(InfoExtractor): if not video_player or not video_player.get('manifest-url'): raise ExtractorError('No video found', expected=True) + manifest_url = video_player['manifest-url'] + session_token = self._download_json( + 'https://sts.postimees.ee/session/register', video_id, 'Registering session', + 'Unable to register session', headers={ + 'Accept': 'application/json', + 'X-Original-URI': manifest_url, + })['session'] + episode_attr = self._parse_json(video_player.get(':episode') or '', video_id, fatal=False) or {} return { 'id': video_id, - 'formats': self._extract_m3u8_formats(video_player['manifest-url'], video_id, 'mp4'), + 'formats': self._extract_m3u8_formats(manifest_url, video_id, 'mp4', query={'s': session_token}), **traverse_obj(episode_attr, { - 'title': 'title', - 'description': 'synopsis', + 'title': ('title', {str}), + 'description': ('synopsis', {str}), 'thumbnail': ('images', 'original'), 'timestamp': ('airtime', {lambda x: unified_timestamp(x + ' +0200')}), - 'cast': ('cast', {lambda x: x.split(', ')}), + 'cast': ('cast', filter, {lambda x: x.split(', ')}), 'release_year': ('year', {int_or_none}), }), **(traverse_obj(episode_attr, { - 'title': (None, ('subtitle', ('episode_nr', {lambda x: f'Episode {x}' if x else None}))), - 'series': 'title', + 'title': (None, (('subtitle', {str}, filter), {value(f'Episode {episode}' if episode else None)})), + 'series': ('title', {str}), 'series_id': ('telecast_id', {str_or_none}), 'season_number': ('season_id', {int_or_none}), - 'episode': 'subtitle', + 'episode': ('subtitle', {str}, filter), 'episode_number': ('episode_nr', {int_or_none}), 'episode_id': ('episode_id', {str_or_none}), }, get_all=False) if episode_attr.get('category') != 'movies' else {}),