From db8b4edc7d0bd27da462f6fe82ff6e13e3d68a04 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Tue, 19 Dec 2023 22:21:47 +0800 Subject: [PATCH] [ie/JoqrAg] Add extractor (#8384) Authored by: pzhlkj6612 --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/joqrag.py | 112 ++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+) create mode 100644 yt_dlp/extractor/joqrag.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 572d79fba2..d5f030c6b0 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -865,6 +865,7 @@ from .jiosaavn import ( ) from .jove import JoveIE from .joj import JojIE +from .joqrag import JoqrAgIE from .jstream import JStreamIE from .jtbc import ( JTBCIE, diff --git a/yt_dlp/extractor/joqrag.py b/yt_dlp/extractor/joqrag.py new file mode 100644 index 0000000000..3bb28af94e --- /dev/null +++ b/yt_dlp/extractor/joqrag.py @@ -0,0 +1,112 @@ +import datetime +import urllib.parse + +from .common import InfoExtractor +from ..utils import ( + clean_html, + datetime_from_str, + unified_timestamp, + urljoin, +) + + +class JoqrAgIE(InfoExtractor): + IE_DESC = '超!A&G+ 文化放送 (f.k.a. AGQR) Nippon Cultural Broadcasting, Inc. (JOQR)' + _VALID_URL = [r'https?://www\.uniqueradio\.jp/agplayer5/(?:player|inc-player-hls)\.php', + r'https?://(?:www\.)?joqr\.co\.jp/ag/', + r'https?://(?:www\.)?joqr\.co\.jp/qr/ag(?:daily|regular)program/?(?:$|[#?])'] + _TESTS = [{ + 'url': 'https://www.uniqueradio.jp/agplayer5/player.php', + 'info_dict': { + 'id': 'live', + 'title': str, + 'channel': '超!A&G+', + 'description': str, + 'live_status': 'is_live', + 'release_timestamp': int, + }, + 'params': { + 'skip_download': True, + 'ignore_no_formats_error': True, + }, + }, { + 'url': 'https://www.uniqueradio.jp/agplayer5/inc-player-hls.php', + 'only_matching': True, + }, { + 'url': 'https://www.joqr.co.jp/ag/article/103760/', + 'only_matching': True, + }, { + 'url': 'http://www.joqr.co.jp/qr/agdailyprogram/', + 'only_matching': True, + }, { + 'url': 'http://www.joqr.co.jp/qr/agregularprogram/', + 'only_matching': True, + }] + + def _extract_metadata(self, variable, html): + return clean_html(urllib.parse.unquote_plus(self._search_regex( + rf'var\s+{variable}\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', + html, 'metadata', group='value', default=''))) or None + + def _extract_start_timestamp(self, video_id, is_live): + def extract_start_time_from(date_str): + dt = datetime_from_str(date_str) + datetime.timedelta(hours=9) + date = dt.strftime('%Y%m%d') + start_time = self._search_regex( + r']+\bclass="dailyProgram-itemHeaderTime"[^>]*>[\s\d:]+–\s*(\d{1,2}:\d{1,2})', + self._download_webpage( + f'https://www.joqr.co.jp/qr/agdailyprogram/?date={date}', video_id, + note=f'Downloading program list of {date}', fatal=False, + errnote=f'Failed to download program list of {date}') or '', + 'start time', default=None) + if start_time: + return unified_timestamp(f'{dt.strftime("%Y/%m/%d")} {start_time} +09:00') + return None + + start_timestamp = extract_start_time_from('today') + if not start_timestamp: + return None + + if not is_live or start_timestamp < datetime_from_str('now').timestamp(): + return start_timestamp + else: + return extract_start_time_from('yesterday') + + def _real_extract(self, url): + video_id = 'live' + + metadata = self._download_webpage( + 'https://www.uniqueradio.jp/aandg', video_id, + note='Downloading metadata', errnote='Failed to download metadata') + title = self._extract_metadata('Program_name', metadata) + + if title == '放送休止': + formats = [] + live_status = 'is_upcoming' + release_timestamp = self._extract_start_timestamp(video_id, False) + msg = 'This stream is not currently live' + if release_timestamp: + msg += (' and will start at ' + + datetime.datetime.fromtimestamp(release_timestamp).strftime('%Y-%m-%d %H:%M:%S')) + self.raise_no_formats(msg, expected=True) + else: + m3u8_path = self._search_regex( + r']*\bsrc="([^"]+)"', + self._download_webpage( + 'https://www.uniqueradio.jp/agplayer5/inc-player-hls.php', video_id, + note='Downloading player data', errnote='Failed to download player data'), + 'm3u8 url') + formats = self._extract_m3u8_formats( + urljoin('https://www.uniqueradio.jp/', m3u8_path), video_id) + live_status = 'is_live' + release_timestamp = self._extract_start_timestamp(video_id, True) + + return { + 'id': video_id, + 'title': title, + 'channel': '超!A&G+', + 'description': self._extract_metadata('Program_text', metadata), + 'formats': formats, + 'live_status': live_status, + 'release_timestamp': release_timestamp, + }