From 693ec74401fa8d42b0cfd5f1ef24aabade5cc275 Mon Sep 17 00:00:00 2001 From: Damiano Amatruda Date: Mon, 18 Oct 2021 03:32:46 +0200 Subject: [PATCH] [on24] Add extractor (#1200) Authored by: damianoamatruda --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/on24.py | 91 ++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+) create mode 100644 yt_dlp/extractor/on24.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 4c89c5a18..03d4a67f5 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -983,6 +983,7 @@ from .odatv import OdaTVIE from .odnoklassniki import OdnoklassnikiIE from .oktoberfesttv import OktoberfestTVIE from .olympics import OlympicsReplayIE +from .on24 import On24IE from .ondemandkorea import OnDemandKoreaIE from .onet import ( OnetIE, diff --git a/yt_dlp/extractor/on24.py b/yt_dlp/extractor/on24.py new file mode 100644 index 000000000..d4d824430 --- /dev/null +++ b/yt_dlp/extractor/on24.py @@ -0,0 +1,91 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + strip_or_none, + try_get, + urljoin, +) + + +class On24IE(InfoExtractor): + IE_NAME = 'on24' + IE_DESC = 'ON24' + + _VALID_URL = r'''(?x) + https?://event\.on24\.com/(?: + wcc/r/(?P\d{7})/(?P[0-9A-F]{32})| + eventRegistration/(?:console/EventConsoleApollo|EventLobbyServlet\?target=lobby30) + \.jsp\?(?:[^/#?]*&)?eventid=(?P\d{7})[^/#?]*&key=(?P[0-9A-F]{32}) + )''' + + _TESTS = [{ + 'url': 'https://event.on24.com/eventRegistration/console/EventConsoleApollo.jsp?uimode=nextgeneration&eventid=2197467&sessionid=1&key=5DF57BE53237F36A43B478DD36277A84&contenttype=A&eventuserid=305999&playerwidth=1000&playerheight=650&caller=previewLobby&text_language_id=en&format=fhaudio&newConsole=false', + 'info_dict': { + 'id': '2197467', + 'ext': 'wav', + 'title': 'Pearson Test of English General/Pearson English International Certificate Teacher Training Guide', + 'upload_date': '20200219', + 'timestamp': 1582149600.0, + 'view_count': int, + } + }, { + 'url': 'https://event.on24.com/wcc/r/2639291/82829018E813065A122363877975752E?mode=login&email=johnsmith@gmail.com', + 'only_matching': True, + }, { + 'url': 'https://event.on24.com/eventRegistration/console/EventConsoleApollo.jsp?&eventid=2639291&sessionid=1&username=&partnerref=&format=fhvideo1&mobile=&flashsupportedmobiledevice=&helpcenter=&key=82829018E813065A122363877975752E&newConsole=true&nxChe=true&newTabCon=true&text_language_id=en&playerwidth=748&playerheight=526&eventuserid=338788762&contenttype=A&mediametricsessionid=384764716&mediametricid=3558192&usercd=369267058&mode=launch', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + event_id = mobj.group('id_1') or mobj.group('id_2') + event_key = mobj.group('key_1') or mobj.group('key_2') + + event_data = self._download_json( + 'https://event.on24.com/apic/utilApp/EventConsoleCachedServlet', + event_id, query={ + 'eventId': event_id, + 'displayProfile': 'player', + 'key': event_key, + 'contentType': 'A' + }) + event_id = str(try_get(event_data, lambda x: x['presentationLogInfo']['eventid'])) or event_id + language = event_data.get('localelanguagecode') + + formats = [] + for media in event_data.get('mediaUrlInfo', []): + media_url = urljoin('https://event.on24.com/media/news/corporatevideo/events/', str(media.get('url'))) + if not media_url: + continue + media_type = media.get('code') + if media_type == 'fhvideo1': + formats.append({ + 'format_id': 'video', + 'url': media_url, + 'language': language, + 'ext': 'mp4', + 'vcodec': 'avc1.640020', + 'acodec': 'mp4a.40.2', + }) + elif media_type == 'audio': + formats.append({ + 'format_id': 'audio', + 'url': media_url, + 'language': language, + 'ext': 'wav', + 'vcodec': 'none', + 'acodec': 'wav' + }) + self._sort_formats(formats) + + return { + 'id': event_id, + 'title': strip_or_none(event_data.get('description')), + 'timestamp': int_or_none(try_get(event_data, lambda x: x['session']['startdate']), 1000), + 'webpage_url': f'https://event.on24.com/wcc/r/{event_id}/{event_key}', + 'view_count': event_data.get('registrantcount'), + 'formats': formats, + }