From 8125680192ec104dbfb6b1cc07ccc7e3c189ddc5 Mon Sep 17 00:00:00 2001 From: subrat-lima Date: Wed, 18 Sep 2024 09:21:58 +0530 Subject: [PATCH] [ie/afl] added OmnyFMShow extractor --- yt_dlp/extractor/_extractors.py | 6 ++- yt_dlp/extractor/omnyfm.py | 68 +++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 yt_dlp/extractor/omnyfm.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 9e9f4b6018..652d409d7c 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -75,7 +75,10 @@ from .aenetworks import ( HistoryTopicIE, ) from .aeonco import AeonCoIE -from .afl import AFLVideoIE +from .afl import ( + AFLPodcastsIE, + AFLVideoIE, +) from .afreecatv import ( AfreecaTVCatchStoryIE, AfreecaTVIE, @@ -1427,6 +1430,7 @@ from .oftv import ( ) from .oktoberfesttv import OktoberfestTVIE from .olympics import OlympicsReplayIE +from .omnyfm import OmnyFMShowIE from .on24 import On24IE from .ondemandkorea import ( OnDemandKoreaIE, diff --git a/yt_dlp/extractor/omnyfm.py b/yt_dlp/extractor/omnyfm.py new file mode 100644 index 0000000000..f01fa35828 --- /dev/null +++ b/yt_dlp/extractor/omnyfm.py @@ -0,0 +1,68 @@ +import functools +import json +import math + +from .common import InfoExtractor +from ..utils import ( + InAdvancePagedList, + clean_html, + float_or_none, + get_element_by_id, + int_or_none, + str_or_none, + traverse_obj, + unified_strdate, + url_or_none, +) + + +class OmnyFMShowIE(InfoExtractor): + IE_NAME = 'omnyfm:show' + _VALID_URL = r'https?://omny\.fm/shows/(?P[\w-]+)' + _PAGE_SIZE = 10 + _TESTS = [{ + 'url': 'https://omny.fm/shows/league-leaders', + 'info_dict': { + 'id': 'bbe146d4-9bee-4763-b785-ad830009a23f', + 'title': 'League Leaders with Nicole Livingstone', + }, + 'playlist_mincount': 15, + }, { + 'url': 'https://omny.fm/shows/afl-daily', + 'only_matching': True, + }] + + def _fetch_page(self, org_id, playlist_id, page): + return self._download_json(f'https://api.omny.fm/orgs/{org_id}/programs/{playlist_id}/clips?cursor={page}&pageSize={self._PAGE_SIZE}', f'{playlist_id}_{page}') + + def _entries(self, org_id, playlist_id, first_page_data, page): + data = first_page_data if not page else self._fetch_page(org_id, playlist_id, page + 1) + for clip in data.get('Clips', {}): + yield traverse_obj(clip, { + 'id': ('Id', {str_or_none}), + 'title': ('Title', {str_or_none}), + 'description': ('Description', {clean_html}), + 'thumbnail': (('ImageUrl', 'ArtworkUrl'), {url_or_none}, any), + 'duration': ('DurationSeconds', {float_or_none}), + 'url': ('AudioUrl', {url_or_none}), + 'season_number': ('Season', {int_or_none}), + 'episode_number': ('Episode', {int_or_none}), + 'timestamp': ('PublishedUtc', {unified_strdate}, {int_or_none}), + 'filesize': ('PublishedAudioSizeInBytes', {int}), + }) + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + data = json.loads(get_element_by_id('__NEXT_DATA__', webpage)) + org_id = traverse_obj(data, ('props', 'pageProps', 'program', 'OrganizationId', {str_or_none})) + playlist_id = traverse_obj(data, ('props', 'pageProps', 'program', 'Id', {str_or_none})) + playlist_count = traverse_obj(data, ('props', 'pageProps', 'program', 'DefaultPlaylist', 'NumberOfClips', {int_or_none})) + title = traverse_obj(data, ('props', 'pageProps', 'program', 'Name', {str_or_none})) + first_page_data = traverse_obj(data, ('props', 'pageProps', 'clips', {dict})) + total_pages = math.ceil(playlist_count / self._PAGE_SIZE) + + return self.playlist_result(InAdvancePagedList( + functools.partial(self._entries, org_id, playlist_id, first_page_data), + total_pages, self._PAGE_SIZE), playlist_id, title)