mirror of
https://github.com/yt-dlp/yt-dlp
synced 2024-12-26 21:59:08 +01:00
[ie/escribe] Add extractor
This commit is contained in:
parent
f919729538
commit
b1bb2fc3a3
2 changed files with 116 additions and 0 deletions
|
@ -610,6 +610,7 @@ from .ertgr import (
|
|||
ERTFlixIE,
|
||||
ERTWebtvEmbedIE,
|
||||
)
|
||||
from .escribe import EscribeIE
|
||||
from .espn import (
|
||||
ESPNIE,
|
||||
ESPNArticleIE,
|
||||
|
|
115
yt_dlp/extractor/escribe.py
Normal file
115
yt_dlp/extractor/escribe.py
Normal file
|
@ -0,0 +1,115 @@
|
|||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import extract_attributes, get_element_html_by_id, get_element_text_and_html_by_tag
|
||||
|
||||
|
||||
class EscribeIE(InfoExtractor):
|
||||
_VALID_URL = r'https://[^.]+\.escribemeetings\.com/(?:Players/ISIStandAlonePlayer|Meeting)\.aspx'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'https://pub-guelph.escribemeetings.com/Players/ISIStandAlonePlayer.aspx?Id=3ac80dd1-d45a-45e8-8be0-cfe526e5b829',
|
||||
'md5': '06748d4cccc36d12dbd967af92078ef8',
|
||||
'info_dict': {
|
||||
'id': '3ac80dd1-d45a-45e8-8be0-cfe526e5b829',
|
||||
'ext': 'mp4',
|
||||
'title': 'Council Planning - October 08, 2024',
|
||||
'url': 'https://video.isilive.ca/guelph/Council%20Encoder_CPM_2024-10-08-03-55.mp4',
|
||||
'uploader': 'guelph',
|
||||
'upload_date': '20241008',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://pub-guelph.escribemeetings.com/Players/ISIStandAlonePlayer.aspx?Id=4a0da857-5283-48ff-9675-6e41a6608b52',
|
||||
'md5': 'd498884762a777a503502871696bd985',
|
||||
'info_dict': {
|
||||
'id': '4a0da857-5283-48ff-9675-6e41a6608b52',
|
||||
'ext': 'mp4',
|
||||
'title': 'Council Planning - September 10, 2024',
|
||||
'url': 'https://video.isilive.ca/guelph/Council%20Encoder_CPM_2024-09-10-05-56.mp4',
|
||||
'uploader': 'guelph',
|
||||
'upload_date': '20240910',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://pub-guelph.escribemeetings.com/Players/ISIStandAlonePlayer.aspx?Id=99dad340-87ab-46cb-a53b-326b8e57b9af',
|
||||
'md5': '81e0de48da05e378c14584078c2dffa8',
|
||||
'info_dict': {
|
||||
'id': '99dad340-87ab-46cb-a53b-326b8e57b9af',
|
||||
'ext': 'mp4',
|
||||
'title': 'Committee of the Whole - November 05, 2024',
|
||||
'url': 'https://video.isilive.ca/guelph/Council%20Encoder_Committee%20of%20the%20Whole_2024-11-05-01-28.mp4',
|
||||
'uploader': 'guelph',
|
||||
'upload_date': '20241105',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://pub-guelph.escribemeetings.com/Meeting.aspx?Id=4fd7316d-12ae-4f06-90d7-7b5c9989a5bf&Agenda=PostMinutes&lang=English',
|
||||
'md5': '900f850c3a31dd7e1600c529dd5e82b7',
|
||||
'info_dict': {
|
||||
'id': '4fd7316d-12ae-4f06-90d7-7b5c9989a5bf',
|
||||
'ext': 'mp4',
|
||||
'title': 'Heritage Guelph - November 04, 2024',
|
||||
'url': 'https://video.isilive.ca/guelph/November%204%2C%202024%20-%20Heritage%20Guelph%20Meeting.mp4',
|
||||
'uploader': 'guelph',
|
||||
'upload_date': '20241104',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://pub-cutlerbay-fl.escribemeetings.com/Meeting.aspx?Id=8a38be89-e595-45d5-bda4-c4258704b494&Agenda=Agenda&lang=English',
|
||||
'md5': 'c8d644d7ceaf125858dc446343faa057',
|
||||
'info_dict': {
|
||||
'id': '8a38be89-e595-45d5-bda4-c4258704b494',
|
||||
'ext': 'mp4',
|
||||
'title': 'Town Council Zoning Workshop - With Virtual - June 13, 2024',
|
||||
'url': 'https://video.isilive.ca/cutlerbay/06-13-2024%20TCZW_Recording.mp4',
|
||||
'uploader': 'cutlerbay',
|
||||
'upload_date': '20240613',
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
query_args = urllib.parse.parse_qs(urllib.parse.urlparse(url).query)
|
||||
video_id = query_args['Id'][0]
|
||||
|
||||
# Potentially switch from the player page to the detailed meeting page.
|
||||
url = url.replace('/Players/ISIStandAlonePlayer.aspx', '/Meeting.aspx', 1)
|
||||
|
||||
# Extract the element replaced by the JavaScript ISI player.
|
||||
html = self._download_webpage(url, video_id)
|
||||
player_target = get_element_html_by_id('isi_player', html)
|
||||
player_attrs = extract_attributes(player_target)
|
||||
|
||||
file_name = player_attrs['data-stream_name']
|
||||
client_id = player_attrs['data-client_id']
|
||||
|
||||
quoted_file_name = urllib.parse.quote(file_name)
|
||||
quoted_client_id = urllib.parse.quote(client_id)
|
||||
video_url = f'https://video.isilive.ca/{quoted_client_id}/{quoted_file_name}'
|
||||
|
||||
title, _ = get_element_text_and_html_by_tag('title', html)
|
||||
title = title.strip()
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'uploader': client_id,
|
||||
'title': title,
|
||||
'webpage_url': url,
|
||||
}
|
||||
|
||||
# No point breaking if there's ever a file without an extension.
|
||||
if '.' in file_name:
|
||||
_, ext = file_name.rsplit('.', maxsplit=1)
|
||||
info['ext'] = ext
|
||||
|
||||
# Use the date of the meeting as the upload date, which is not necessarily
|
||||
# the same but it's both what is available and likely desired by users.
|
||||
# Using regex as the parser seems to fail on this chunk of the HTML.
|
||||
meeting_date = self._html_search_regex(
|
||||
r'datetime=["\'](\d{4}-\d{2}-\d{2})', html, 'upload_date', fatal=False)
|
||||
if meeting_date is not None:
|
||||
info['upload_date'] = meeting_date.replace('-', '')
|
||||
|
||||
return info
|
Loading…
Reference in a new issue