mirror of
https://github.com/yt-dlp/yt-dlp
synced 2024-12-27 21:59:17 +01:00
[ie/BunnyCDN] Add extractor
This commit is contained in:
parent
f919729538
commit
b3fc89d98e
3 changed files with 200 additions and 30 deletions
|
@ -311,6 +311,7 @@ from .brilliantpala import (
|
||||||
)
|
)
|
||||||
from .bundesliga import BundesligaIE
|
from .bundesliga import BundesligaIE
|
||||||
from .bundestag import BundestagIE
|
from .bundestag import BundestagIE
|
||||||
|
from .bunnycdn import BunnyCDNIE
|
||||||
from .businessinsider import BusinessInsiderIE
|
from .businessinsider import BusinessInsiderIE
|
||||||
from .buzzfeed import BuzzFeedIE
|
from .buzzfeed import BuzzFeedIE
|
||||||
from .byutv import BYUtvIE
|
from .byutv import BYUtvIE
|
||||||
|
|
184
yt_dlp/extractor/bunnycdn.py
Normal file
184
yt_dlp/extractor/bunnycdn.py
Normal file
|
@ -0,0 +1,184 @@
|
||||||
|
import hashlib
|
||||||
|
import json
|
||||||
|
import random
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..networking import HEADRequest
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
extract_attributes,
|
||||||
|
int_or_none,
|
||||||
|
parse_qs,
|
||||||
|
smuggle_url,
|
||||||
|
unsmuggle_url,
|
||||||
|
url_or_none,
|
||||||
|
urlhandle_detect_ext,
|
||||||
|
)
|
||||||
|
from ..utils.traversal import find_element, traverse_obj
|
||||||
|
|
||||||
|
|
||||||
|
class BunnyCDNIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:iframe\.mediadelivery\.net|video\.bunnycdn\.com)/(?:embed|play)/(?P<library_id>\d+)/(?P<id>[\da-f-]+)'
|
||||||
|
_EMBED_REGEX = [rf'<iframe[^>]+src=[\'"](?P<url>{_VALID_URL}[^\'"]*)[\'"]']
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://iframe.mediadelivery.net/embed/113933/e73edec1-e381-4c8b-ae73-717a140e0924',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'e73edec1-e381-4c8b-ae73-717a140e0924',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'mistress morgana (3).mp4',
|
||||||
|
'description': '',
|
||||||
|
'timestamp': 1693251673,
|
||||||
|
'thumbnail': r're:^https?://.*\.b-cdn\.net/e73edec1-e381-4c8b-ae73-717a140e0924/thumbnail\.jpg',
|
||||||
|
'duration': 7.0,
|
||||||
|
'upload_date': '20230828',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://iframe.mediadelivery.net/play/136145/32e34c4b-0d72-437c-9abb-05e67657da34',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '32e34c4b-0d72-437c-9abb-05e67657da34',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'timestamp': 1691145748,
|
||||||
|
'thumbnail': r're:^https?://.*\.b-cdn\.net/32e34c4b-0d72-437c-9abb-05e67657da34/thumbnail_9172dc16\.jpg',
|
||||||
|
'duration': 106.0,
|
||||||
|
'description': 'md5:981a3e899a5c78352b21ed8b2f1efd81',
|
||||||
|
'upload_date': '20230804',
|
||||||
|
'title': 'Sanela ist Teil der #arbeitsmarktkraft',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# Stream requires activation and pings
|
||||||
|
'url': 'https://iframe.mediadelivery.net/embed/200867/2e8545ec-509d-4571-b855-4cf0235ccd75',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2e8545ec-509d-4571-b855-4cf0235ccd75',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'timestamp': 1708497752,
|
||||||
|
'title': 'netflix part 1',
|
||||||
|
'duration': 3959.0,
|
||||||
|
'description': '',
|
||||||
|
'upload_date': '20240221',
|
||||||
|
'thumbnail': r're:^https?://.*\.b-cdn\.net/2e8545ec-509d-4571-b855-4cf0235ccd75/thumbnail\.jpg',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
_WEBPAGE_TESTS = [{
|
||||||
|
'url': 'https://www.queisser.de/unternehmen/neue-firmenzentrale',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'd3e06f96-9972-45a0-a261-1e565bf72778',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': '',
|
||||||
|
'thumbnail': r're:^https?://.*\.b-cdn\.net/d3e06f96-9972-45a0-a261-1e565bf72778/thumbnail_512bb53f\.jpg',
|
||||||
|
'upload_date': '20221214',
|
||||||
|
'duration': 134.0,
|
||||||
|
'timestamp': 1671016982,
|
||||||
|
'title': 'Zeitraffer Abriss 1080p',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# Stream requires Referer
|
||||||
|
'url': 'https://conword.io/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3a5d863e-9cd6-447e-b6ef-e289af50b349',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Conword bei der Stadt Köln und Stadt Dortmund',
|
||||||
|
'description': '',
|
||||||
|
'upload_date': '20231031',
|
||||||
|
'duration': 31.0,
|
||||||
|
'thumbnail': 'https://video.watchuh.com/3a5d863e-9cd6-447e-b6ef-e289af50b349/thumbnail.jpg',
|
||||||
|
'timestamp': 1698783879,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# URL requires token and expires
|
||||||
|
'url': 'https://www.stockphotos.com/video/moscow-subway-the-train-is-arriving-at-the-park-kultury-station-10017830',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0b02fa20-4e8c-4140-8f87-f64d820a3386',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'thumbnail': r're:^https?://.*\.b-cdn\.net//0b02fa20-4e8c-4140-8f87-f64d820a3386/thumbnail\.jpg',
|
||||||
|
'title': 'Moscow subway. The train is arriving at the Park Kultury station.',
|
||||||
|
'upload_date': '20240531',
|
||||||
|
'duration': 18.0,
|
||||||
|
'timestamp': 1717152269,
|
||||||
|
'description': '',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _send_ping(self, ping_url, video_id, headers, secret, context_id, time, paused='false'):
|
||||||
|
# Hard coded, since it doesn't seem to matter
|
||||||
|
res = 1080
|
||||||
|
md5_hash = hashlib.md5(f'{secret}_{context_id}_{time}_{paused}_{res}'.encode()).hexdigest()
|
||||||
|
self._download_webpage(
|
||||||
|
ping_url, video_id, note=f'Sending ping at {time}',
|
||||||
|
query={'hash': md5_hash, 'time': time, 'paused': paused, 'resolution': res},
|
||||||
|
headers=headers)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _extract_embed_urls(cls, url, webpage):
|
||||||
|
for embed_url in super()._extract_embed_urls(url, webpage):
|
||||||
|
yield smuggle_url(embed_url, {'Referer': url})
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
url, smuggled_data = unsmuggle_url(url, {})
|
||||||
|
|
||||||
|
video_id, library_id = self._match_valid_url(url).group('id', 'library_id')
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
f'https://iframe.mediadelivery.net/embed/{library_id}/{video_id}', video_id,
|
||||||
|
headers=traverse_obj(smuggled_data, {'Referer': 'Referer'}),
|
||||||
|
query=traverse_obj(parse_qs(url), {'token': 'token', 'expires': 'expires'}))
|
||||||
|
|
||||||
|
if html_title := self._html_extract_title(webpage, default=None) == '403':
|
||||||
|
raise ExtractorError('This video is inaccessible. Consider passing a Referer', expected=True)
|
||||||
|
elif html_title == '404':
|
||||||
|
raise ExtractorError('This video does not exist', expected=True)
|
||||||
|
|
||||||
|
headers = {'Referer': url}
|
||||||
|
|
||||||
|
info = traverse_obj(self._parse_html5_media_entries(url, webpage, video_id, _headers=headers), 0) or {}
|
||||||
|
formats = info.get('formats') or []
|
||||||
|
subtitles = info.get('subtitles') or {}
|
||||||
|
|
||||||
|
original_url = self._search_regex(
|
||||||
|
r'(?:var|const|let)\s+originalUrl\s*=\s*["\']([^"\']+)["\']', webpage, 'original url', default=None)
|
||||||
|
if url_or_none(original_url):
|
||||||
|
urlh = self._request_webpage(
|
||||||
|
HEADRequest(original_url), video_id=video_id, note='Checking original',
|
||||||
|
headers=headers, fatal=False, expected_status=(403, 404))
|
||||||
|
if urlh and urlh.status == 200:
|
||||||
|
formats.append({
|
||||||
|
'url': original_url,
|
||||||
|
'format_id': 'source',
|
||||||
|
'quality': 1,
|
||||||
|
'http_headers': headers,
|
||||||
|
'ext': urlhandle_detect_ext(urlh, default='mp4'),
|
||||||
|
'filesize': int_or_none(urlh.headers.get('Content-Length')),
|
||||||
|
})
|
||||||
|
|
||||||
|
# MediaCage Streams require activation and pings
|
||||||
|
src_url = self._search_regex(
|
||||||
|
r'\.setAttribute\([\'"]src[\'"],\s*[\'"]([^\'"]+)[\'"]\)', webpage, 'src url', default=None)
|
||||||
|
activation_url = self._search_regex(
|
||||||
|
r'loadUrl\([\'"]([^\'"]+/activate)[\'"]', webpage, 'activation url', default=None)
|
||||||
|
ping_url = self._search_regex(
|
||||||
|
r'loadUrl\([\'"]([^\'"]+/ping)[\'"]', webpage, 'ping url', default=None)
|
||||||
|
secret = traverse_obj(parse_qs(src_url), ('secret', 0))
|
||||||
|
context_id = traverse_obj(parse_qs(src_url), ('contextId', 0))
|
||||||
|
if src_url and activation_url and ping_url and secret and context_id:
|
||||||
|
self._send_ping(ping_url, video_id, headers, secret, context_id, 0, 'true')
|
||||||
|
self._download_webpage(
|
||||||
|
activation_url, video_id, headers=headers, note='Downloading activation data')
|
||||||
|
# Sending first couple pings ahead of time seems to be enough
|
||||||
|
for i in range(0, 30, 4):
|
||||||
|
self._send_ping(ping_url, video_id, headers, secret, context_id, i + round(random.random(), 6))
|
||||||
|
|
||||||
|
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||||
|
src_url, video_id, 'mp4', headers=headers, m3u8_id='hls', fatal=False)
|
||||||
|
for fmt in fmts:
|
||||||
|
fmt['http_headers'] = headers
|
||||||
|
formats.extend(fmts)
|
||||||
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
**traverse_obj(webpage, ({find_element(id='main-video', html=True)}, {extract_attributes}, {
|
||||||
|
'title': ('data-plyr-config', {json.loads}, 'title', {str}),
|
||||||
|
'thumbnail': ('data-poster', {url_or_none}),
|
||||||
|
})),
|
||||||
|
**self._search_json_ld(webpage, video_id, fatal=False),
|
||||||
|
}
|
|
@ -1,5 +1,6 @@
|
||||||
|
from .bunnycdn import BunnyCDNIE
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import try_get, unified_timestamp
|
from ..utils import make_archive_id, try_get, unified_timestamp
|
||||||
|
|
||||||
|
|
||||||
class SovietsClosetBaseIE(InfoExtractor):
|
class SovietsClosetBaseIE(InfoExtractor):
|
||||||
|
@ -43,7 +44,7 @@ class SovietsClosetIE(SovietsClosetBaseIE):
|
||||||
'url': 'https://sovietscloset.com/video/1337',
|
'url': 'https://sovietscloset.com/video/1337',
|
||||||
'md5': 'bd012b04b261725510ca5383074cdd55',
|
'md5': 'bd012b04b261725510ca5383074cdd55',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1337',
|
'id': '2f0cfbf4-3588-43a9-a7d6-7c9ea3755e67',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'The Witcher #13',
|
'title': 'The Witcher #13',
|
||||||
'thumbnail': r're:^https?://.*\.b-cdn\.net/2f0cfbf4-3588-43a9-a7d6-7c9ea3755e67/thumbnail\.jpg$',
|
'thumbnail': r're:^https?://.*\.b-cdn\.net/2f0cfbf4-3588-43a9-a7d6-7c9ea3755e67/thumbnail\.jpg$',
|
||||||
|
@ -55,20 +56,23 @@ class SovietsClosetIE(SovietsClosetBaseIE):
|
||||||
'upload_date': '20170413',
|
'upload_date': '20170413',
|
||||||
'uploader_id': 'SovietWomble',
|
'uploader_id': 'SovietWomble',
|
||||||
'uploader_url': 'https://www.twitch.tv/SovietWomble',
|
'uploader_url': 'https://www.twitch.tv/SovietWomble',
|
||||||
'duration': 7007,
|
'duration': 7008,
|
||||||
'was_live': True,
|
'was_live': True,
|
||||||
'availability': 'public',
|
'availability': 'public',
|
||||||
'series': 'The Witcher',
|
'series': 'The Witcher',
|
||||||
'season': 'Misc',
|
'season': 'Misc',
|
||||||
'episode_number': 13,
|
'episode_number': 13,
|
||||||
'episode': 'Episode 13',
|
'episode': 'Episode 13',
|
||||||
|
'creators': ['SovietWomble'],
|
||||||
|
'description': '',
|
||||||
|
'_old_archive_ids': ['sovietscloset 1337'],
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'https://sovietscloset.com/video/1105',
|
'url': 'https://sovietscloset.com/video/1105',
|
||||||
'md5': '89fa928f183893cb65a0b7be846d8a90',
|
'md5': '89fa928f183893cb65a0b7be846d8a90',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1105',
|
'id': 'c0e5e76f-3a93-40b4-bf01-12343c2eec5d',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Arma 3 - Zeus Games #5',
|
'title': 'Arma 3 - Zeus Games #5',
|
||||||
'uploader': 'SovietWomble',
|
'uploader': 'SovietWomble',
|
||||||
|
@ -80,39 +84,20 @@ class SovietsClosetIE(SovietsClosetBaseIE):
|
||||||
'upload_date': '20160420',
|
'upload_date': '20160420',
|
||||||
'uploader_id': 'SovietWomble',
|
'uploader_id': 'SovietWomble',
|
||||||
'uploader_url': 'https://www.twitch.tv/SovietWomble',
|
'uploader_url': 'https://www.twitch.tv/SovietWomble',
|
||||||
'duration': 8804,
|
'duration': 8805,
|
||||||
'was_live': True,
|
'was_live': True,
|
||||||
'availability': 'public',
|
'availability': 'public',
|
||||||
'series': 'Arma 3',
|
'series': 'Arma 3',
|
||||||
'season': 'Zeus Games',
|
'season': 'Zeus Games',
|
||||||
'episode_number': 5,
|
'episode_number': 5,
|
||||||
'episode': 'Episode 5',
|
'episode': 'Episode 5',
|
||||||
|
'creators': ['SovietWomble'],
|
||||||
|
'description': '',
|
||||||
|
'_old_archive_ids': ['sovietscloset 1105'],
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _extract_bunnycdn_iframe(self, video_id, bunnycdn_id):
|
|
||||||
iframe = self._download_webpage(
|
|
||||||
f'https://iframe.mediadelivery.net/embed/5105/{bunnycdn_id}',
|
|
||||||
video_id, note='Downloading BunnyCDN iframe', headers=self.MEDIADELIVERY_REFERER)
|
|
||||||
|
|
||||||
m3u8_url = self._search_regex(r'(https?://.*?\.m3u8)', iframe, 'm3u8 url')
|
|
||||||
thumbnail_url = self._search_regex(r'(https?://.*?thumbnail\.jpg)', iframe, 'thumbnail url')
|
|
||||||
|
|
||||||
m3u8_formats = self._extract_m3u8_formats(m3u8_url, video_id, headers=self.MEDIADELIVERY_REFERER)
|
|
||||||
|
|
||||||
if not m3u8_formats:
|
|
||||||
duration = None
|
|
||||||
else:
|
|
||||||
duration = self._extract_m3u8_vod_duration(
|
|
||||||
m3u8_formats[0]['url'], video_id, headers=self.MEDIADELIVERY_REFERER)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'formats': m3u8_formats,
|
|
||||||
'thumbnail': thumbnail_url,
|
|
||||||
'duration': duration,
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
@ -122,13 +107,13 @@ class SovietsClosetIE(SovietsClosetBaseIE):
|
||||||
|
|
||||||
stream = self.parse_nuxt_jsonp(f'{static_assets_base}/video/{video_id}/payload.js', video_id, 'video')['stream']
|
stream = self.parse_nuxt_jsonp(f'{static_assets_base}/video/{video_id}/payload.js', video_id, 'video')['stream']
|
||||||
|
|
||||||
return {
|
return self.url_result(
|
||||||
|
f'https://iframe.mediadelivery.net/embed/5105/{stream["bunnyId"]}', ie=BunnyCDNIE, url_transparent=True,
|
||||||
**self.video_meta(
|
**self.video_meta(
|
||||||
video_id=video_id, game_name=stream['game']['name'],
|
video_id=video_id, game_name=stream['game']['name'],
|
||||||
category_name=try_get(stream, lambda x: x['subcategory']['name'], str),
|
category_name=try_get(stream, lambda x: x['subcategory']['name'], str),
|
||||||
episode_number=stream.get('number'), stream_date=stream.get('date')),
|
episode_number=stream.get('number'), stream_date=stream.get('date')),
|
||||||
**self._extract_bunnycdn_iframe(video_id, stream['bunnyId']),
|
_old_archive_ids=[make_archive_id(self, video_id)])
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class SovietsClosetPlaylistIE(SovietsClosetBaseIE):
|
class SovietsClosetPlaylistIE(SovietsClosetBaseIE):
|
||||||
|
|
Loading…
Reference in a new issue