From 5fea24bda2b1a23648f6067d690116d619fa76a3 Mon Sep 17 00:00:00 2001 From: subrat-lima Date: Wed, 18 Sep 2024 13:23:50 +0530 Subject: [PATCH] [ie/afl][ie/omnyfm] added AFLPodcastIE and updated OmnyFMShowIE 1. AFLPodcastIE: Added extractor for AFL podcasts 2. OmnyFMShowIE: Updated code to adjust url before download page to support various url patterns --- yt_dlp/extractor/_extractors.py | 2 +- yt_dlp/extractor/afl.py | 26 ++++++++++++++++++++++++++ yt_dlp/extractor/omnyfm.py | 4 +++- 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 652d409d7c..46a993058f 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -76,7 +76,7 @@ from .aenetworks import ( ) from .aeonco import AeonCoIE from .afl import ( - AFLPodcastsIE, + AFLPodcastIE, AFLVideoIE, ) from .afreecatv import ( diff --git a/yt_dlp/extractor/afl.py b/yt_dlp/extractor/afl.py index 44b2ee2907..506a25808a 100644 --- a/yt_dlp/extractor/afl.py +++ b/yt_dlp/extractor/afl.py @@ -1,12 +1,14 @@ from .brightcove import BrightcoveNewIE from .common import InfoExtractor +from .omnyfm import OmnyFMShowIE from ..utils import ( extract_attributes, get_element_by_class, smuggle_url, str_or_none, traverse_obj, + url_or_none, ) @@ -52,3 +54,27 @@ class AFLVideoIE(InfoExtractor): video_url = f'https://players.brightcove.net/{account_id}/{player_id}/index.html?videoId={video_id}' video_url = smuggle_url(video_url, {'referrer': url}) return self.url_result(video_url, BrightcoveNewIE) + + +class AFLPodcastIE(InfoExtractor): + IE_NAME = 'afl:podcast' + _VALID_URL = r'https?://(?:www\.)?afl\.com.au/(?:aflw/)?podcasts/(?P[\w-]+)' + _TESTS = [{ + 'url': 'https://www.afl.com.au/podcasts/between-us', + 'md5': '7000431c2bd3f96eddb5f63273aea83e', + 'info_dict': { + 'id': 'e0ab8454-f818-483f-bed1-b156002c021f', + 'title': 'Between Us', + }, + 'playlist_mincount': 7, + }, { + 'url': 'https://www.afl.com.au/podcasts/afl-daily', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + element = get_element_by_class('omny-embed', webpage) + podcast_url = traverse_obj(extract_attributes(element), ('src', {url_or_none})) + return self.url_result(podcast_url, OmnyFMShowIE) diff --git a/yt_dlp/extractor/omnyfm.py b/yt_dlp/extractor/omnyfm.py index f01fa35828..0f69d59542 100644 --- a/yt_dlp/extractor/omnyfm.py +++ b/yt_dlp/extractor/omnyfm.py @@ -19,6 +19,7 @@ from ..utils import ( class OmnyFMShowIE(InfoExtractor): IE_NAME = 'omnyfm:show' _VALID_URL = r'https?://omny\.fm/shows/(?P[\w-]+)' + _EMBED_REGEX = [r']+?src=(?:["\'])(?Phttps?://omny\.fm/shows/.+?)\1'] _PAGE_SIZE = 10 _TESTS = [{ 'url': 'https://omny.fm/shows/league-leaders', @@ -53,7 +54,8 @@ class OmnyFMShowIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) + page_url = 'https://omny.fm/shows/' + display_id + webpage = self._download_webpage(page_url, display_id) data = json.loads(get_element_by_id('__NEXT_DATA__', webpage)) org_id = traverse_obj(data, ('props', 'pageProps', 'program', 'OrganizationId', {str_or_none}))