mirror of
https://github.com/yt-dlp/yt-dlp
synced 2025-01-15 03:41:33 +01:00
Update to ytdl-commit-4fb25ff
[maoritv] Add new extractor4fb25ff5a3
Except: [vimeo] improve extraction3ae9c0f410
[youtube:tab] Pass innertube context...1b0a13f33c
This commit is contained in:
parent
68379de561
commit
f7ad71607d
12 changed files with 441 additions and 127 deletions
|
@ -78,6 +78,15 @@ try:
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
import Cookie as compat_cookies
|
import Cookie as compat_cookies
|
||||||
|
|
||||||
|
if sys.version_info[0] == 2:
|
||||||
|
class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie):
|
||||||
|
def load(self, rawdata):
|
||||||
|
if isinstance(rawdata, compat_str):
|
||||||
|
rawdata = str(rawdata)
|
||||||
|
return super(compat_cookies_SimpleCookie, self).load(rawdata)
|
||||||
|
else:
|
||||||
|
compat_cookies_SimpleCookie = compat_cookies.SimpleCookie
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import html.entities as compat_html_entities
|
import html.entities as compat_html_entities
|
||||||
except ImportError: # Python 2
|
except ImportError: # Python 2
|
||||||
|
@ -3020,6 +3029,7 @@ __all__ = [
|
||||||
'compat_cookiejar',
|
'compat_cookiejar',
|
||||||
'compat_cookiejar_Cookie',
|
'compat_cookiejar_Cookie',
|
||||||
'compat_cookies',
|
'compat_cookies',
|
||||||
|
'compat_cookies_SimpleCookie',
|
||||||
'compat_ctypes_WINFUNCTYPE',
|
'compat_ctypes_WINFUNCTYPE',
|
||||||
'compat_etree_Element',
|
'compat_etree_Element',
|
||||||
'compat_etree_fromstring',
|
'compat_etree_fromstring',
|
||||||
|
|
|
@ -1,38 +1,113 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .cbs import CBSBaseIE
|
import re
|
||||||
|
|
||||||
|
# from .cbs import CBSBaseIE
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class CBSSportsIE(CBSBaseIE):
|
# class CBSSportsEmbedIE(CBSBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?cbssports\.com/[^/]+/(?:video|news)/(?P<id>[^/?#&]+)'
|
class CBSSportsEmbedIE(InfoExtractor):
|
||||||
|
IE_NAME = 'cbssports:embed'
|
||||||
|
_VALID_URL = r'''(?ix)https?://(?:(?:www\.)?cbs|embed\.247)sports\.com/player/embed.+?
|
||||||
|
(?:
|
||||||
|
ids%3D(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})|
|
||||||
|
pcid%3D(?P<pcid>\d+)
|
||||||
|
)'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.cbssports.com/nba/video/donovan-mitchell-flashes-star-potential-in-game-2-victory-over-thunder/',
|
'url': 'https://www.cbssports.com/player/embed/?args=player_id%3Db56c03a6-231a-4bbe-9c55-af3c8a8e9636%26ids%3Db56c03a6-231a-4bbe-9c55-af3c8a8e9636%26resizable%3D1%26autoplay%3Dtrue%26domain%3Dcbssports.com%26comp_ads_enabled%3Dfalse%26watchAndRead%3D0%26startTime%3D0%26env%3Dprod',
|
||||||
'info_dict': {
|
'only_matching': True,
|
||||||
'id': '1214315075735',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Donovan Mitchell flashes star potential in Game 2 victory over Thunder',
|
|
||||||
'description': 'md5:df6f48622612c2d6bd2e295ddef58def',
|
|
||||||
'timestamp': 1524111457,
|
|
||||||
'upload_date': '20180419',
|
|
||||||
'uploader': 'CBSI-NEW',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
# m3u8 download
|
|
||||||
'skip_download': True,
|
|
||||||
}
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.cbssports.com/nba/news/nba-playoffs-2018-watch-76ers-vs-heat-game-3-series-schedule-tv-channel-online-stream/',
|
'url': 'https://embed.247sports.com/player/embed/?args=%3fplayer_id%3d1827823171591%26channel%3dcollege-football-recruiting%26pcid%3d1827823171591%26width%3d640%26height%3d360%26autoplay%3dTrue%26comp_ads_enabled%3dFalse%26uvpc%3dhttps%253a%252f%252fwww.cbssports.com%252fapi%252fcontent%252fvideo%252fconfig%252f%253fcfg%253duvp_247sports_v4%2526partner%253d247%26uvpc_m%3dhttps%253a%252f%252fwww.cbssports.com%252fapi%252fcontent%252fvideo%252fconfig%252f%253fcfg%253duvp_247sports_m_v4%2526partner_m%253d247_mobile%26utag%3d247sportssite%26resizable%3dTrue',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_video_info(self, filter_query, video_id):
|
# def _extract_video_info(self, filter_query, video_id):
|
||||||
return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id)
|
# return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
uuid, pcid = re.match(self._VALID_URL, url).groups()
|
||||||
|
query = {'id': uuid} if uuid else {'pcid': pcid}
|
||||||
|
video = self._download_json(
|
||||||
|
'https://www.cbssports.com/api/content/video/',
|
||||||
|
uuid or pcid, query=query)[0]
|
||||||
|
video_id = video['id']
|
||||||
|
title = video['title']
|
||||||
|
metadata = video.get('metaData') or {}
|
||||||
|
# return self._extract_video_info('byId=%d' % metadata['mpxOutletId'], video_id)
|
||||||
|
# return self._extract_video_info('byGuid=' + metadata['mpxRefId'], video_id)
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
metadata['files'][0]['url'], video_id, 'mp4',
|
||||||
|
'm3u8_native', m3u8_id='hls', fatal=False)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
image = video.get('image')
|
||||||
|
thumbnails = None
|
||||||
|
if image:
|
||||||
|
image_path = image.get('path')
|
||||||
|
if image_path:
|
||||||
|
thumbnails = [{
|
||||||
|
'url': image_path,
|
||||||
|
'width': int_or_none(image.get('width')),
|
||||||
|
'height': int_or_none(image.get('height')),
|
||||||
|
'filesize': int_or_none(image.get('size')),
|
||||||
|
}]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'description': video.get('description'),
|
||||||
|
'timestamp': int_or_none(try_get(video, lambda x: x['dateCreated']['epoch'])),
|
||||||
|
'duration': int_or_none(metadata.get('duration')),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class CBSSportsBaseIE(InfoExtractor):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
video_id = self._search_regex(
|
iframe_url = self._search_regex(
|
||||||
[r'(?:=|%26)pcid%3D(\d+)', r'embedVideo(?:Container)?_(\d+)'],
|
r'<iframe[^>]+(?:data-)?src="(https?://[^/]+/player/embed[^"]+)"',
|
||||||
webpage, 'video id')
|
webpage, 'embed url')
|
||||||
return self._extract_video_info('byId=%s' % video_id, video_id)
|
return self.url_result(iframe_url, CBSSportsEmbedIE.ie_key())
|
||||||
|
|
||||||
|
|
||||||
|
class CBSSportsIE(CBSSportsBaseIE):
|
||||||
|
IE_NAME = 'cbssports'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?cbssports\.com/[^/]+/video/(?P<id>[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.cbssports.com/college-football/video/cover-3-stanford-spring-gleaning/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b56c03a6-231a-4bbe-9c55-af3c8a8e9636',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Cover 3: Stanford Spring Gleaning',
|
||||||
|
'description': 'The Cover 3 crew break down everything you need to know about the Stanford Cardinal this spring.',
|
||||||
|
'timestamp': 1617218398,
|
||||||
|
'upload_date': '20210331',
|
||||||
|
'duration': 502,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
|
||||||
|
class TwentyFourSevenSportsIE(CBSSportsBaseIE):
|
||||||
|
IE_NAME = '247sports'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?247sports\.com/Video/(?:[^/?#&]+-)?(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://247sports.com/Video/2021-QB-Jake-Garcia-senior-highlights-through-five-games-10084854/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4f1265cb-c3b5-44a8-bb1d-1914119a0ccc',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '2021 QB Jake Garcia senior highlights through five games',
|
||||||
|
'description': 'md5:8cb67ebed48e2e6adac1701e0ff6e45b',
|
||||||
|
'timestamp': 1607114223,
|
||||||
|
'upload_date': '20201204',
|
||||||
|
'duration': 208,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
|
@ -17,7 +17,7 @@ import math
|
||||||
|
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_cookiejar_Cookie,
|
compat_cookiejar_Cookie,
|
||||||
compat_cookies,
|
compat_cookies_SimpleCookie,
|
||||||
compat_etree_Element,
|
compat_etree_Element,
|
||||||
compat_etree_fromstring,
|
compat_etree_fromstring,
|
||||||
compat_getpass,
|
compat_getpass,
|
||||||
|
@ -1308,6 +1308,7 @@ class InfoExtractor(object):
|
||||||
|
|
||||||
def extract_video_object(e):
|
def extract_video_object(e):
|
||||||
assert e['@type'] == 'VideoObject'
|
assert e['@type'] == 'VideoObject'
|
||||||
|
author = e.get('author')
|
||||||
info.update({
|
info.update({
|
||||||
'url': url_or_none(e.get('contentUrl')),
|
'url': url_or_none(e.get('contentUrl')),
|
||||||
'title': unescapeHTML(e.get('name')),
|
'title': unescapeHTML(e.get('name')),
|
||||||
|
@ -1315,7 +1316,11 @@ class InfoExtractor(object):
|
||||||
'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
|
'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
|
||||||
'duration': parse_duration(e.get('duration')),
|
'duration': parse_duration(e.get('duration')),
|
||||||
'timestamp': unified_timestamp(e.get('uploadDate')),
|
'timestamp': unified_timestamp(e.get('uploadDate')),
|
||||||
'uploader': str_or_none(e.get('author')),
|
# author can be an instance of 'Organization' or 'Person' types.
|
||||||
|
# both types can have 'name' property(inherited from 'Thing' type). [1]
|
||||||
|
# however some websites are using 'Text' type instead.
|
||||||
|
# 1. https://schema.org/VideoObject
|
||||||
|
'uploader': author.get('name') if isinstance(author, dict) else author if isinstance(author, compat_str) else None,
|
||||||
'filesize': float_or_none(e.get('contentSize')),
|
'filesize': float_or_none(e.get('contentSize')),
|
||||||
'tbr': int_or_none(e.get('bitrate')),
|
'tbr': int_or_none(e.get('bitrate')),
|
||||||
'width': int_or_none(e.get('width')),
|
'width': int_or_none(e.get('width')),
|
||||||
|
@ -3219,13 +3224,10 @@ class InfoExtractor(object):
|
||||||
self._downloader.cookiejar.set_cookie(cookie)
|
self._downloader.cookiejar.set_cookie(cookie)
|
||||||
|
|
||||||
def _get_cookies(self, url):
|
def _get_cookies(self, url):
|
||||||
""" Return a compat_cookies.SimpleCookie with the cookies for the url """
|
""" Return a compat_cookies_SimpleCookie with the cookies for the url """
|
||||||
req = sanitized_Request(url)
|
req = sanitized_Request(url)
|
||||||
self._downloader.cookiejar.add_cookie_header(req)
|
self._downloader.cookiejar.add_cookie_header(req)
|
||||||
cookie = req.get_header('Cookie')
|
return compat_cookies_SimpleCookie(req.get_header('Cookie'))
|
||||||
if cookie and sys.version_info[0] == 2:
|
|
||||||
cookie = str(cookie)
|
|
||||||
return compat_cookies.SimpleCookie(cookie)
|
|
||||||
|
|
||||||
def _apply_first_set_cookie_header(self, url_handle, cookie):
|
def _apply_first_set_cookie_header(self, url_handle, cookie):
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -25,12 +25,12 @@ class CuriosityStreamBaseIE(InfoExtractor):
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'%s said: %s' % (self.IE_NAME, error), expected=True)
|
'%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||||
|
|
||||||
def _call_api(self, path, video_id):
|
def _call_api(self, path, video_id, query=None):
|
||||||
headers = {}
|
headers = {}
|
||||||
if self._auth_token:
|
if self._auth_token:
|
||||||
headers['X-Auth-Token'] = self._auth_token
|
headers['X-Auth-Token'] = self._auth_token
|
||||||
result = self._download_json(
|
result = self._download_json(
|
||||||
self._API_BASE_URL + path, video_id, headers=headers)
|
self._API_BASE_URL + path, video_id, headers=headers, query=query)
|
||||||
self._handle_errors(result)
|
self._handle_errors(result)
|
||||||
return result['data']
|
return result['data']
|
||||||
|
|
||||||
|
@ -52,27 +52,38 @@ class CuriosityStreamIE(CuriosityStreamBaseIE):
|
||||||
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P<id>\d+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'https://app.curiositystream.com/video/2',
|
'url': 'https://app.curiositystream.com/video/2',
|
||||||
'md5': '262bb2f257ff301115f1973540de8983',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2',
|
'id': '2',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'How Did You Develop The Internet?',
|
'title': 'How Did You Develop The Internet?',
|
||||||
'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
|
'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
|
||||||
}
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'bestvideo',
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
media = self._call_api('media/' + video_id, video_id)
|
|
||||||
title = media['title']
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
for encoding_format in ('m3u8', 'mpd'):
|
||||||
|
media = self._call_api('media/' + video_id, video_id, query={
|
||||||
|
'encodingsNew': 'true',
|
||||||
|
'encodingsFormat': encoding_format,
|
||||||
|
})
|
||||||
for encoding in media.get('encodings', []):
|
for encoding in media.get('encodings', []):
|
||||||
m3u8_url = encoding.get('master_playlist_url')
|
playlist_url = encoding.get('master_playlist_url')
|
||||||
if m3u8_url:
|
if encoding_format == 'm3u8':
|
||||||
|
# use `m3u8` entry_protocol until EXT-X-MAP is properly supported by `m3u8_native` entry_protocol
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
playlist_url, video_id, 'mp4',
|
||||||
m3u8_id='hls', fatal=False))
|
m3u8_id='hls', fatal=False))
|
||||||
|
elif encoding_format == 'mpd':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
playlist_url, video_id, mpd_id='dash', fatal=False))
|
||||||
encoding_url = encoding.get('url')
|
encoding_url = encoding.get('url')
|
||||||
file_url = encoding.get('file_url')
|
file_url = encoding.get('file_url')
|
||||||
if not encoding_url and not file_url:
|
if not encoding_url and not file_url:
|
||||||
|
@ -108,6 +119,8 @@ class CuriosityStreamIE(CuriosityStreamBaseIE):
|
||||||
formats.append(fmt)
|
formats.append(fmt)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = media['title']
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
for closed_caption in media.get('closed_captions', []):
|
for closed_caption in media.get('closed_captions', []):
|
||||||
sub_url = closed_caption.get('file')
|
sub_url = closed_caption.get('file')
|
||||||
|
@ -140,7 +153,7 @@ class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
|
||||||
'title': 'Curious Minds: The Internet',
|
'title': 'Curious Minds: The Internet',
|
||||||
'description': 'How is the internet shaping our lives in the 21st Century?',
|
'description': 'How is the internet shaping our lives in the 21st Century?',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 17,
|
'playlist_mincount': 16,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://curiositystream.com/series/2',
|
'url': 'https://curiositystream.com/series/2',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
|
|
@ -210,7 +210,11 @@ from .cbsnews import (
|
||||||
CBSNewsIE,
|
CBSNewsIE,
|
||||||
CBSNewsLiveVideoIE,
|
CBSNewsLiveVideoIE,
|
||||||
)
|
)
|
||||||
from .cbssports import CBSSportsIE
|
from .cbssports import (
|
||||||
|
CBSSportsEmbedIE,
|
||||||
|
CBSSportsIE,
|
||||||
|
TwentyFourSevenSportsIE,
|
||||||
|
)
|
||||||
from .ccc import (
|
from .ccc import (
|
||||||
CCCIE,
|
CCCIE,
|
||||||
CCCPlaylistIE,
|
CCCPlaylistIE,
|
||||||
|
@ -633,7 +637,11 @@ from .limelight import (
|
||||||
LimelightChannelIE,
|
LimelightChannelIE,
|
||||||
LimelightChannelListIE,
|
LimelightChannelListIE,
|
||||||
)
|
)
|
||||||
from .line import LineTVIE
|
from .line import (
|
||||||
|
LineTVIE,
|
||||||
|
LineLiveIE,
|
||||||
|
LineLiveChannelIE,
|
||||||
|
)
|
||||||
from .linkedin import (
|
from .linkedin import (
|
||||||
LinkedInLearningIE,
|
LinkedInLearningIE,
|
||||||
LinkedInLearningCourseIE,
|
LinkedInLearningCourseIE,
|
||||||
|
@ -671,6 +679,7 @@ from .mangomolo import (
|
||||||
MangomoloLiveIE,
|
MangomoloLiveIE,
|
||||||
)
|
)
|
||||||
from .manyvids import ManyVidsIE
|
from .manyvids import ManyVidsIE
|
||||||
|
from .maoritv import MaoriTVIE
|
||||||
from .markiza import (
|
from .markiza import (
|
||||||
MarkizaIE,
|
MarkizaIE,
|
||||||
MarkizaPageIE,
|
MarkizaPageIE,
|
||||||
|
|
|
@ -29,34 +29,51 @@ class JamendoIE(InfoExtractor):
|
||||||
'id': '196219',
|
'id': '196219',
|
||||||
'display_id': 'stories-from-emona-i',
|
'display_id': 'stories-from-emona-i',
|
||||||
'ext': 'flac',
|
'ext': 'flac',
|
||||||
'title': 'Maya Filipič - Stories from Emona I',
|
# 'title': 'Maya Filipič - Stories from Emona I',
|
||||||
'artist': 'Maya Filipič',
|
'title': 'Stories from Emona I',
|
||||||
|
# 'artist': 'Maya Filipič',
|
||||||
'track': 'Stories from Emona I',
|
'track': 'Stories from Emona I',
|
||||||
'duration': 210,
|
'duration': 210,
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'timestamp': 1217438117,
|
'timestamp': 1217438117,
|
||||||
'upload_date': '20080730',
|
'upload_date': '20080730',
|
||||||
|
'license': 'by-nc-nd',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'average_rating': int,
|
||||||
|
'tags': ['piano', 'peaceful', 'newage', 'strings', 'upbeat'],
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://licensing.jamendo.com/en/track/1496667/energetic-rock',
|
'url': 'https://licensing.jamendo.com/en/track/1496667/energetic-rock',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _call_api(self, resource, resource_id):
|
||||||
|
path = '/api/%ss' % resource
|
||||||
|
rand = compat_str(random.random())
|
||||||
|
return self._download_json(
|
||||||
|
'https://www.jamendo.com' + path, resource_id, query={
|
||||||
|
'id[]': resource_id,
|
||||||
|
}, headers={
|
||||||
|
'X-Jam-Call': '$%s*%s~' % (hashlib.sha1((path + rand).encode()).hexdigest(), rand)
|
||||||
|
})[0]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
track_id, display_id = self._VALID_URL_RE.match(url).groups()
|
track_id, display_id = self._VALID_URL_RE.match(url).groups()
|
||||||
webpage = self._download_webpage(
|
# webpage = self._download_webpage(
|
||||||
'https://www.jamendo.com/track/' + track_id, track_id)
|
# 'https://www.jamendo.com/track/' + track_id, track_id)
|
||||||
models = self._parse_json(self._html_search_regex(
|
# models = self._parse_json(self._html_search_regex(
|
||||||
r"data-bundled-models='([^']+)",
|
# r"data-bundled-models='([^']+)",
|
||||||
webpage, 'bundled models'), track_id)
|
# webpage, 'bundled models'), track_id)
|
||||||
track = models['track']['models'][0]
|
# track = models['track']['models'][0]
|
||||||
|
track = self._call_api('track', track_id)
|
||||||
title = track_name = track['name']
|
title = track_name = track['name']
|
||||||
get_model = lambda x: try_get(models, lambda y: y[x]['models'][0], dict) or {}
|
# get_model = lambda x: try_get(models, lambda y: y[x]['models'][0], dict) or {}
|
||||||
artist = get_model('artist')
|
# artist = get_model('artist')
|
||||||
artist_name = artist.get('name')
|
# artist_name = artist.get('name')
|
||||||
if artist_name:
|
# if artist_name:
|
||||||
title = '%s - %s' % (artist_name, title)
|
# title = '%s - %s' % (artist_name, title)
|
||||||
album = get_model('album')
|
# album = get_model('album')
|
||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294'
|
'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294'
|
||||||
|
@ -74,7 +91,7 @@ class JamendoIE(InfoExtractor):
|
||||||
|
|
||||||
urls = []
|
urls = []
|
||||||
thumbnails = []
|
thumbnails = []
|
||||||
for _, covers in track.get('cover', {}).items():
|
for covers in (track.get('cover') or {}).values():
|
||||||
for cover_id, cover_url in covers.items():
|
for cover_id, cover_url in covers.items():
|
||||||
if not cover_url or cover_url in urls:
|
if not cover_url or cover_url in urls:
|
||||||
continue
|
continue
|
||||||
|
@ -88,13 +105,14 @@ class JamendoIE(InfoExtractor):
|
||||||
})
|
})
|
||||||
|
|
||||||
tags = []
|
tags = []
|
||||||
for tag in track.get('tags', []):
|
for tag in (track.get('tags') or []):
|
||||||
tag_name = tag.get('name')
|
tag_name = tag.get('name')
|
||||||
if not tag_name:
|
if not tag_name:
|
||||||
continue
|
continue
|
||||||
tags.append(tag_name)
|
tags.append(tag_name)
|
||||||
|
|
||||||
stats = track.get('stats') or {}
|
stats = track.get('stats') or {}
|
||||||
|
license = track.get('licenseCC') or []
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': track_id,
|
'id': track_id,
|
||||||
|
@ -103,11 +121,11 @@ class JamendoIE(InfoExtractor):
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': track.get('description'),
|
'description': track.get('description'),
|
||||||
'duration': int_or_none(track.get('duration')),
|
'duration': int_or_none(track.get('duration')),
|
||||||
'artist': artist_name,
|
# 'artist': artist_name,
|
||||||
'track': track_name,
|
'track': track_name,
|
||||||
'album': album.get('name'),
|
# 'album': album.get('name'),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'license': '-'.join(track.get('licenseCC', [])) or None,
|
'license': '-'.join(license) if license else None,
|
||||||
'timestamp': int_or_none(track.get('dateCreated')),
|
'timestamp': int_or_none(track.get('dateCreated')),
|
||||||
'view_count': int_or_none(stats.get('listenedAll')),
|
'view_count': int_or_none(stats.get('listenedAll')),
|
||||||
'like_count': int_or_none(stats.get('favorited')),
|
'like_count': int_or_none(stats.get('favorited')),
|
||||||
|
@ -116,9 +134,9 @@ class JamendoIE(InfoExtractor):
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class JamendoAlbumIE(InfoExtractor):
|
class JamendoAlbumIE(JamendoIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'https://www.jamendo.com/album/121486/duck-on-cover',
|
'url': 'https://www.jamendo.com/album/121486/duck-on-cover',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '121486',
|
'id': '121486',
|
||||||
|
@ -151,17 +169,7 @@ class JamendoAlbumIE(InfoExtractor):
|
||||||
'params': {
|
'params': {
|
||||||
'playlistend': 2
|
'playlistend': 2
|
||||||
}
|
}
|
||||||
}
|
}]
|
||||||
|
|
||||||
def _call_api(self, resource, resource_id):
|
|
||||||
path = '/api/%ss' % resource
|
|
||||||
rand = compat_str(random.random())
|
|
||||||
return self._download_json(
|
|
||||||
'https://www.jamendo.com' + path, resource_id, query={
|
|
||||||
'id[]': resource_id,
|
|
||||||
}, headers={
|
|
||||||
'X-Jam-Call': '$%s*%s~' % (hashlib.sha1((path + rand).encode()).hexdigest(), rand)
|
|
||||||
})[0]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
album_id = self._match_id(url)
|
album_id = self._match_id(url)
|
||||||
|
@ -169,7 +177,7 @@ class JamendoAlbumIE(InfoExtractor):
|
||||||
album_name = album.get('name')
|
album_name = album.get('name')
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
for track in album.get('tracks', []):
|
for track in (album.get('tracks') or []):
|
||||||
track_id = track.get('id')
|
track_id = track.get('id')
|
||||||
if not track_id:
|
if not track_id:
|
||||||
continue
|
continue
|
||||||
|
|
|
@ -4,7 +4,13 @@ from __future__ import unicode_literals
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import js_to_json
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
|
str_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class LineTVIE(InfoExtractor):
|
class LineTVIE(InfoExtractor):
|
||||||
|
@ -88,3 +94,137 @@ class LineTVIE(InfoExtractor):
|
||||||
for thumbnail in video_info.get('thumbnails', {}).get('list', [])],
|
for thumbnail in video_info.get('thumbnails', {}).get('list', [])],
|
||||||
'view_count': video_info.get('meta', {}).get('count'),
|
'view_count': video_info.get('meta', {}).get('count'),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class LineLiveBaseIE(InfoExtractor):
|
||||||
|
_API_BASE_URL = 'https://live-api.line-apps.com/web/v4.0/channel/'
|
||||||
|
|
||||||
|
def _parse_broadcast_item(self, item):
|
||||||
|
broadcast_id = compat_str(item['id'])
|
||||||
|
title = item['title']
|
||||||
|
is_live = item.get('isBroadcastingNow')
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
for thumbnail_id, thumbnail_url in (item.get('thumbnailURLs') or {}).items():
|
||||||
|
if not thumbnail_url:
|
||||||
|
continue
|
||||||
|
thumbnails.append({
|
||||||
|
'id': thumbnail_id,
|
||||||
|
'url': thumbnail_url,
|
||||||
|
})
|
||||||
|
|
||||||
|
channel = item.get('channel') or {}
|
||||||
|
channel_id = str_or_none(channel.get('id'))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': broadcast_id,
|
||||||
|
'title': self._live_title(title) if is_live else title,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'timestamp': int_or_none(item.get('createdAt')),
|
||||||
|
'channel': channel.get('name'),
|
||||||
|
'channel_id': channel_id,
|
||||||
|
'channel_url': 'https://live.line.me/channels/' + channel_id if channel_id else None,
|
||||||
|
'duration': int_or_none(item.get('archiveDuration')),
|
||||||
|
'view_count': int_or_none(item.get('viewerCount')),
|
||||||
|
'comment_count': int_or_none(item.get('chatCount')),
|
||||||
|
'is_live': is_live,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class LineLiveIE(LineLiveBaseIE):
|
||||||
|
_VALID_URL = r'https?://live\.line\.me/channels/(?P<channel_id>\d+)/broadcast/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://live.line.me/channels/4867368/broadcast/16331360',
|
||||||
|
'md5': 'bc931f26bf1d4f971e3b0982b3fab4a3',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '16331360',
|
||||||
|
'title': '振りコピ講座😙😙😙',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'timestamp': 1617095132,
|
||||||
|
'upload_date': '20210330',
|
||||||
|
'channel': '白川ゆめか',
|
||||||
|
'channel_id': '4867368',
|
||||||
|
'view_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'is_live': False,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# archiveStatus == 'DELETED'
|
||||||
|
'url': 'https://live.line.me/channels/4778159/broadcast/16378488',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
channel_id, broadcast_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
broadcast = self._download_json(
|
||||||
|
self._API_BASE_URL + '%s/broadcast/%s' % (channel_id, broadcast_id),
|
||||||
|
broadcast_id)
|
||||||
|
item = broadcast['item']
|
||||||
|
info = self._parse_broadcast_item(item)
|
||||||
|
protocol = 'm3u8' if info['is_live'] else 'm3u8_native'
|
||||||
|
formats = []
|
||||||
|
for k, v in (broadcast.get(('live' if info['is_live'] else 'archived') + 'HLSURLs') or {}).items():
|
||||||
|
if not v:
|
||||||
|
continue
|
||||||
|
if k == 'abr':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
v, broadcast_id, 'mp4', protocol,
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
continue
|
||||||
|
f = {
|
||||||
|
'ext': 'mp4',
|
||||||
|
'format_id': 'hls-' + k,
|
||||||
|
'protocol': protocol,
|
||||||
|
'url': v,
|
||||||
|
}
|
||||||
|
if not k.isdigit():
|
||||||
|
f['vcodec'] = 'none'
|
||||||
|
formats.append(f)
|
||||||
|
if not formats:
|
||||||
|
archive_status = item.get('archiveStatus')
|
||||||
|
if archive_status != 'ARCHIVED':
|
||||||
|
raise ExtractorError('this video has been ' + archive_status.lower(), expected=True)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
info['formats'] = formats
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
|
class LineLiveChannelIE(LineLiveBaseIE):
|
||||||
|
_VALID_URL = r'https?://live\.line\.me/channels/(?P<id>\d+)(?!/broadcast/\d+)(?:[/?&#]|$)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://live.line.me/channels/5893542',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5893542',
|
||||||
|
'title': 'いくらちゃん',
|
||||||
|
'description': 'md5:c3a4af801f43b2fac0b02294976580be',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 29
|
||||||
|
}
|
||||||
|
|
||||||
|
def _archived_broadcasts_entries(self, archived_broadcasts, channel_id):
|
||||||
|
while True:
|
||||||
|
for row in (archived_broadcasts.get('rows') or []):
|
||||||
|
share_url = str_or_none(row.get('shareURL'))
|
||||||
|
if not share_url:
|
||||||
|
continue
|
||||||
|
info = self._parse_broadcast_item(row)
|
||||||
|
info.update({
|
||||||
|
'_type': 'url',
|
||||||
|
'url': share_url,
|
||||||
|
'ie_key': LineLiveIE.ie_key(),
|
||||||
|
})
|
||||||
|
yield info
|
||||||
|
if not archived_broadcasts.get('hasNextPage'):
|
||||||
|
return
|
||||||
|
archived_broadcasts = self._download_json(
|
||||||
|
self._API_BASE_URL + channel_id + '/archived_broadcasts',
|
||||||
|
channel_id, query={
|
||||||
|
'lastId': info['id'],
|
||||||
|
})
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
channel_id = self._match_id(url)
|
||||||
|
channel = self._download_json(self._API_BASE_URL + channel_id, channel_id)
|
||||||
|
return self.playlist_result(
|
||||||
|
self._archived_broadcasts_entries(channel.get('archivedBroadcasts') or {}, channel_id),
|
||||||
|
channel_id, channel.get('title'), channel.get('information'))
|
||||||
|
|
31
yt_dlp/extractor/maoritv.py
Normal file
31
yt_dlp/extractor/maoritv.py
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class MaoriTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?maoritelevision\.com/shows/(?:[^/]+/)+(?P<id>[^/?&#]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.maoritelevision.com/shows/korero-mai/S01E054/korero-mai-series-1-episode-54',
|
||||||
|
'md5': '5ade8ef53851b6a132c051b1cd858899',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4774724855001',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Kōrero Mai, Series 1 Episode 54',
|
||||||
|
'upload_date': '20160226',
|
||||||
|
'timestamp': 1456455018,
|
||||||
|
'description': 'md5:59bde32fd066d637a1a55794c56d8dcb',
|
||||||
|
'uploader_id': '1614493167001',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1614493167001/HJlhIQhQf_default/index.html?videoId=%s'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
brightcove_id = self._search_regex(
|
||||||
|
r'data-main-video-id=["\'](\d+)', webpage, 'brightcove id')
|
||||||
|
return self.url_result(
|
||||||
|
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
||||||
|
'BrightcoveNew', brightcove_id)
|
|
@ -275,7 +275,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_child_with_type(parent, t):
|
def _extract_child_with_type(parent, t):
|
||||||
return next(c for c in parent['children'] if c.get('type') == t)
|
for c in parent['children']:
|
||||||
|
if c.get('type') == t:
|
||||||
|
return c
|
||||||
|
|
||||||
def _extract_mgid(self, webpage):
|
def _extract_mgid(self, webpage):
|
||||||
try:
|
try:
|
||||||
|
@ -306,7 +308,8 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||||
data = self._parse_json(self._search_regex(
|
data = self._parse_json(self._search_regex(
|
||||||
r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
|
r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
|
||||||
main_container = self._extract_child_with_type(data, 'MainContainer')
|
main_container = self._extract_child_with_type(data, 'MainContainer')
|
||||||
video_player = self._extract_child_with_type(main_container, 'VideoPlayer')
|
ab_testing = self._extract_child_with_type(main_container, 'ABTesting')
|
||||||
|
video_player = self._extract_child_with_type(ab_testing or main_container, 'VideoPlayer')
|
||||||
mgid = video_player['props']['media']['video']['config']['uri']
|
mgid = video_player['props']['media']['video']['config']['uri']
|
||||||
|
|
||||||
return mgid
|
return mgid
|
||||||
|
|
|
@ -398,6 +398,16 @@ class PornHubIE(PornHubBaseIE):
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
def add_format(format_url, height=None):
|
def add_format(format_url, height=None):
|
||||||
|
ext = determine_ext(format_url)
|
||||||
|
if ext == 'mpd':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
format_url, video_id, mpd_id='dash', fatal=False))
|
||||||
|
return
|
||||||
|
if ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
return
|
||||||
tbr = None
|
tbr = None
|
||||||
mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', format_url)
|
mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', format_url)
|
||||||
if mobj:
|
if mobj:
|
||||||
|
@ -417,16 +427,6 @@ class PornHubIE(PornHubBaseIE):
|
||||||
r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None)
|
r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None)
|
||||||
if upload_date:
|
if upload_date:
|
||||||
upload_date = upload_date.replace('/', '')
|
upload_date = upload_date.replace('/', '')
|
||||||
ext = determine_ext(video_url)
|
|
||||||
if ext == 'mpd':
|
|
||||||
formats.extend(self._extract_mpd_formats(
|
|
||||||
video_url, video_id, mpd_id='dash', fatal=False))
|
|
||||||
continue
|
|
||||||
elif ext == 'm3u8':
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
|
||||||
m3u8_id='hls', fatal=False))
|
|
||||||
continue
|
|
||||||
if '/video/get_media' in video_url:
|
if '/video/get_media' in video_url:
|
||||||
medias = self._download_json(video_url, video_id, fatal=False)
|
medias = self._download_json(video_url, video_id, fatal=False)
|
||||||
if isinstance(medias, list):
|
if isinstance(medias, list):
|
||||||
|
|
|
@ -154,7 +154,7 @@ class YoukuIE(InfoExtractor):
|
||||||
# request basic data
|
# request basic data
|
||||||
basic_data_params = {
|
basic_data_params = {
|
||||||
'vid': video_id,
|
'vid': video_id,
|
||||||
'ccode': '0590',
|
'ccode': '0532',
|
||||||
'client_ip': '192.168.1.1',
|
'client_ip': '192.168.1.1',
|
||||||
'utid': cna,
|
'utid': cna,
|
||||||
'client_ts': time.time() / 1000,
|
'client_ts': time.time() / 1000,
|
||||||
|
|
|
@ -1248,6 +1248,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
|
'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
|
||||||
|
'url': 'cBvYw8_A0vQ',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'cBvYw8_A0vQ',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '4K Ueno Okachimachi Street Scenes 上野御徒町歩き',
|
||||||
|
'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
|
||||||
|
'upload_date': '20201120',
|
||||||
|
'uploader': 'Walk around Japan',
|
||||||
|
'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
|
||||||
|
'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
|
@ -1817,7 +1834,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
def get_text(x):
|
def get_text(x):
|
||||||
if not x:
|
if not x:
|
||||||
return
|
return
|
||||||
return x.get('simpleText') or ''.join([r['text'] for r in x['runs']])
|
text = x.get('simpleText')
|
||||||
|
if text and isinstance(text, compat_str):
|
||||||
|
return text
|
||||||
|
runs = x.get('runs')
|
||||||
|
if not isinstance(runs, list):
|
||||||
|
return
|
||||||
|
return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
|
||||||
|
|
||||||
search_meta = (
|
search_meta = (
|
||||||
lambda x: self._html_search_meta(x, webpage, default=None)) \
|
lambda x: self._html_search_meta(x, webpage, default=None)) \
|
||||||
|
|
Loading…
Reference in a new issue