mirror of
https://github.com/yt-dlp/yt-dlp
synced 2025-01-15 03:41:33 +01:00
Update to ytdl-commit-de39d128
[extractor/ceskatelevize] Back-port extractor from yt-dlp
de39d1281c
Closes #5361, Closes #4634, Closes #5210
This commit is contained in:
parent
a349d4d641
commit
db4678e448
12 changed files with 385 additions and 201 deletions
|
@ -11,7 +11,6 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
import base64
|
import base64
|
||||||
|
|
||||||
from yt_dlp.aes import (
|
from yt_dlp.aes import (
|
||||||
BLOCK_SIZE_BYTES,
|
|
||||||
aes_cbc_decrypt,
|
aes_cbc_decrypt,
|
||||||
aes_cbc_decrypt_bytes,
|
aes_cbc_decrypt_bytes,
|
||||||
aes_cbc_encrypt,
|
aes_cbc_encrypt,
|
||||||
|
@ -103,8 +102,7 @@ class TestAES(unittest.TestCase):
|
||||||
|
|
||||||
def test_ecb_encrypt(self):
|
def test_ecb_encrypt(self):
|
||||||
data = bytes_to_intlist(self.secret_msg)
|
data = bytes_to_intlist(self.secret_msg)
|
||||||
data += [0x08] * (BLOCK_SIZE_BYTES - len(data) % BLOCK_SIZE_BYTES)
|
encrypted = intlist_to_bytes(aes_ecb_encrypt(data, self.key))
|
||||||
encrypted = intlist_to_bytes(aes_ecb_encrypt(data, self.key, self.iv))
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
encrypted,
|
encrypted,
|
||||||
b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:')
|
b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:')
|
||||||
|
|
|
@ -28,11 +28,23 @@ def aes_cbc_encrypt_bytes(data, key, iv, **kwargs):
|
||||||
return intlist_to_bytes(aes_cbc_encrypt(*map(bytes_to_intlist, (data, key, iv)), **kwargs))
|
return intlist_to_bytes(aes_cbc_encrypt(*map(bytes_to_intlist, (data, key, iv)), **kwargs))
|
||||||
|
|
||||||
|
|
||||||
|
BLOCK_SIZE_BYTES = 16
|
||||||
|
|
||||||
|
|
||||||
def unpad_pkcs7(data):
|
def unpad_pkcs7(data):
|
||||||
return data[:-compat_ord(data[-1])]
|
return data[:-compat_ord(data[-1])]
|
||||||
|
|
||||||
|
|
||||||
BLOCK_SIZE_BYTES = 16
|
def pkcs7_padding(data):
|
||||||
|
"""
|
||||||
|
PKCS#7 padding
|
||||||
|
|
||||||
|
@param {int[]} data cleartext
|
||||||
|
@returns {int[]} padding data
|
||||||
|
"""
|
||||||
|
|
||||||
|
remaining_length = BLOCK_SIZE_BYTES - len(data) % BLOCK_SIZE_BYTES
|
||||||
|
return data + [remaining_length] * remaining_length
|
||||||
|
|
||||||
|
|
||||||
def pad_block(block, padding_mode):
|
def pad_block(block, padding_mode):
|
||||||
|
@ -64,7 +76,7 @@ def pad_block(block, padding_mode):
|
||||||
|
|
||||||
def aes_ecb_encrypt(data, key, iv=None):
|
def aes_ecb_encrypt(data, key, iv=None):
|
||||||
"""
|
"""
|
||||||
Encrypt with aes in ECB mode
|
Encrypt with aes in ECB mode. Using PKCS#7 padding
|
||||||
|
|
||||||
@param {int[]} data cleartext
|
@param {int[]} data cleartext
|
||||||
@param {int[]} key 16/24/32-Byte cipher key
|
@param {int[]} key 16/24/32-Byte cipher key
|
||||||
|
@ -77,8 +89,7 @@ def aes_ecb_encrypt(data, key, iv=None):
|
||||||
encrypted_data = []
|
encrypted_data = []
|
||||||
for i in range(block_count):
|
for i in range(block_count):
|
||||||
block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
|
block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]
|
||||||
encrypted_data += aes_encrypt(block, expanded_key)
|
encrypted_data += aes_encrypt(pkcs7_padding(block), expanded_key)
|
||||||
encrypted_data = encrypted_data[:len(data)]
|
|
||||||
|
|
||||||
return encrypted_data
|
return encrypted_data
|
||||||
|
|
||||||
|
@ -551,5 +562,6 @@ __all__ = [
|
||||||
|
|
||||||
'key_expansion',
|
'key_expansion',
|
||||||
'pad_block',
|
'pad_block',
|
||||||
|
'pkcs7_padding',
|
||||||
'unpad_pkcs7',
|
'unpad_pkcs7',
|
||||||
]
|
]
|
||||||
|
|
|
@ -48,6 +48,7 @@ def compat_setenv(key, value, env=os.environ):
|
||||||
|
|
||||||
|
|
||||||
compat_basestring = str
|
compat_basestring = str
|
||||||
|
compat_casefold = str.casefold
|
||||||
compat_chr = chr
|
compat_chr = chr
|
||||||
compat_collections_abc = collections.abc
|
compat_collections_abc = collections.abc
|
||||||
compat_cookiejar = http.cookiejar
|
compat_cookiejar = http.cookiejar
|
||||||
|
|
|
@ -28,30 +28,34 @@ from ..utils import (
|
||||||
|
|
||||||
|
|
||||||
class ADNIE(InfoExtractor):
|
class ADNIE(InfoExtractor):
|
||||||
IE_DESC = 'Anime Digital Network'
|
IE_DESC = 'Animation Digital Network'
|
||||||
_VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?(?:animation|anime)digitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
|
'url': 'https://animationdigitalnetwork.fr/video/fruits-basket/9841-episode-1-a-ce-soir',
|
||||||
'md5': '0319c99885ff5547565cacb4f3f9348d',
|
'md5': '1c9ef066ceb302c86f80c2b371615261',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '7778',
|
'id': '9841',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Blue Exorcist - Kyôto Saga - Episode 1',
|
'title': 'Fruits Basket - Episode 1',
|
||||||
'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
|
'description': 'md5:14be2f72c3c96809b0ca424b0097d336',
|
||||||
'series': 'Blue Exorcist - Kyôto Saga',
|
'series': 'Fruits Basket',
|
||||||
'duration': 1467,
|
'duration': 1437,
|
||||||
'release_date': '20170106',
|
'release_date': '20190405',
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'average_rating': float,
|
'average_rating': float,
|
||||||
'season_number': 2,
|
'season_number': 1,
|
||||||
'episode': 'Début des hostilités',
|
'episode': 'À ce soir !',
|
||||||
'episode_number': 1,
|
'episode_number': 1,
|
||||||
}
|
},
|
||||||
}
|
'skip': 'Only available in region (FR, ...)',
|
||||||
|
}, {
|
||||||
|
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
_NETRC_MACHINE = 'animedigitalnetwork'
|
_NETRC_MACHINE = 'animationdigitalnetwork'
|
||||||
_BASE_URL = 'http://animedigitalnetwork.fr'
|
_BASE = 'animationdigitalnetwork.fr'
|
||||||
_API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr/'
|
_API_BASE_URL = 'https://gw.api.' + _BASE + '/'
|
||||||
_PLAYER_BASE_URL = _API_BASE_URL + 'player/'
|
_PLAYER_BASE_URL = _API_BASE_URL + 'player/'
|
||||||
_HEADERS = {}
|
_HEADERS = {}
|
||||||
_LOGIN_ERR_MESSAGE = 'Unable to log in'
|
_LOGIN_ERR_MESSAGE = 'Unable to log in'
|
||||||
|
@ -75,11 +79,11 @@ class ADNIE(InfoExtractor):
|
||||||
if subtitle_location:
|
if subtitle_location:
|
||||||
enc_subtitles = self._download_webpage(
|
enc_subtitles = self._download_webpage(
|
||||||
subtitle_location, video_id, 'Downloading subtitles data',
|
subtitle_location, video_id, 'Downloading subtitles data',
|
||||||
fatal=False, headers={'Origin': 'https://animedigitalnetwork.fr'})
|
fatal=False, headers={'Origin': 'https://' + self._BASE})
|
||||||
if not enc_subtitles:
|
if not enc_subtitles:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
# http://animationdigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
||||||
dec_subtitles = unpad_pkcs7(aes_cbc_decrypt_bytes(
|
dec_subtitles = unpad_pkcs7(aes_cbc_decrypt_bytes(
|
||||||
compat_b64decode(enc_subtitles[24:]),
|
compat_b64decode(enc_subtitles[24:]),
|
||||||
binascii.unhexlify(self._K + '7fac1178830cfe0c'),
|
binascii.unhexlify(self._K + '7fac1178830cfe0c'),
|
||||||
|
|
|
@ -9,6 +9,7 @@ from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
|
str_or_none,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
USER_AGENTS,
|
USER_AGENTS,
|
||||||
|
@ -16,13 +17,13 @@ from ..utils import (
|
||||||
|
|
||||||
|
|
||||||
class CeskaTelevizeIE(InfoExtractor):
|
class CeskaTelevizeIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
|
_VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en',
|
'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '61924494877028507',
|
'id': '61924494877028507',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Hyde Park Civilizace: Bonus 01 - En',
|
'title': 'Bonus 01 - En - Hyde Park Civilizace',
|
||||||
'description': 'English Subtittles',
|
'description': 'English Subtittles',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'duration': 81.3,
|
'duration': 81.3,
|
||||||
|
@ -33,18 +34,29 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# live stream
|
# live stream
|
||||||
'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/',
|
'url': 'http://www.ceskatelevize.cz/zive/ct1/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 402,
|
'id': '102',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
'title': r'ČT1 - živé vysílání online',
|
||||||
|
'description': 'Sledujte živé vysílání kanálu ČT1 online. Vybírat si můžete i z dalších kanálů České televize na kterémkoli z vašich zařízení.',
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'skip': 'Georestricted to Czech Republic',
|
}, {
|
||||||
|
# another
|
||||||
|
'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/',
|
||||||
|
'only_matching': True,
|
||||||
|
'info_dict': {
|
||||||
|
'id': 402,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||||
|
'is_live': True,
|
||||||
|
},
|
||||||
|
# 'skip': 'Georestricted to Czech Republic',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25',
|
'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -53,21 +65,21 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||||
'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
|
'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '215562210900007-bogotart',
|
'id': '215562210900007-bogotart',
|
||||||
'title': 'Queer: Bogotart',
|
'title': 'Bogotart - Queer',
|
||||||
'description': 'Hlavní město Kolumbie v doprovodu queer umělců. Vroucí svět plný vášně, sebevědomí, ale i násilí a bolesti. Připravil Peter Serge Butko',
|
'description': 'Hlavní město Kolumbie v doprovodu queer umělců. Vroucí svět plný vášně, sebevědomí, ale i násilí a bolesti',
|
||||||
},
|
},
|
||||||
'playlist': [{
|
'playlist': [{
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '61924494877311053',
|
'id': '61924494877311053',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Queer: Bogotart (Varování 18+)',
|
'title': 'Bogotart - Queer (Varování 18+)',
|
||||||
'duration': 11.9,
|
'duration': 11.9,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '61924494877068022',
|
'id': '61924494877068022',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Queer: Bogotart (Queer)',
|
'title': 'Bogotart - Queer (Queer)',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'duration': 1558.3,
|
'duration': 1558.3,
|
||||||
},
|
},
|
||||||
|
@ -84,28 +96,42 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
playlist_id = self._match_id(url)
|
playlist_id = self._match_id(url)
|
||||||
parsed_url = compat_urllib_parse_urlparse(url)
|
webpage, urlh = self._download_webpage_handle(url, playlist_id)
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
parsed_url = compat_urllib_parse_urlparse(urlh.geturl())
|
||||||
site_name = self._og_search_property('site_name', webpage, fatal=False, default=None)
|
site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize')
|
||||||
playlist_title = self._og_search_title(webpage, default=None)
|
playlist_title = self._og_search_title(webpage, default=None)
|
||||||
if site_name and playlist_title:
|
if site_name and playlist_title:
|
||||||
playlist_title = playlist_title.replace(f' — {site_name}', '', 1)
|
playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, 1)[0]
|
||||||
playlist_description = self._og_search_description(webpage, default=None)
|
playlist_description = self._og_search_description(webpage, default=None)
|
||||||
if playlist_description:
|
if playlist_description:
|
||||||
playlist_description = playlist_description.replace('\xa0', ' ')
|
playlist_description = playlist_description.replace('\xa0', ' ')
|
||||||
|
|
||||||
if parsed_url.path.startswith('/porady/'):
|
type_ = 'IDEC'
|
||||||
|
if re.search(r'(^/porady|/zive)/', parsed_url.path):
|
||||||
next_data = self._search_nextjs_data(webpage, playlist_id)
|
next_data = self._search_nextjs_data(webpage, playlist_id)
|
||||||
|
if '/zive/' in parsed_url.path:
|
||||||
|
idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'liveBroadcast', 'current', 'idec'), get_all=False)
|
||||||
|
else:
|
||||||
idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False)
|
idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False)
|
||||||
|
if not idec:
|
||||||
|
idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'videobonusDetail', 'bonusId'), get_all=False)
|
||||||
|
if idec:
|
||||||
|
type_ = 'bonus'
|
||||||
if not idec:
|
if not idec:
|
||||||
raise ExtractorError('Failed to find IDEC id')
|
raise ExtractorError('Failed to find IDEC id')
|
||||||
iframe_hash = self._download_webpage('https://www.ceskatelevize.cz/v-api/iframe-hash/', playlist_id)
|
iframe_hash = self._download_webpage(
|
||||||
webpage = self._download_webpage('https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php', playlist_id,
|
'https://www.ceskatelevize.cz/v-api/iframe-hash/',
|
||||||
query={'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', 'IDEC': idec})
|
playlist_id, note='Getting IFRAME hash')
|
||||||
|
query = {'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', type_: idec, }
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
'https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php',
|
||||||
|
playlist_id, note='Downloading player', query=query)
|
||||||
|
|
||||||
NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.'
|
NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.'
|
||||||
if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
|
if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
|
||||||
raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
|
self.raise_geo_restricted(NOT_AVAILABLE_STRING)
|
||||||
|
if any(not_found in webpage for not_found in ('Neplatný parametr pro videopřehrávač', 'IDEC nebyl nalezen', )):
|
||||||
|
raise ExtractorError('no video with IDEC available', video_id=idec, expected=True)
|
||||||
|
|
||||||
type_ = None
|
type_ = None
|
||||||
episode_id = None
|
episode_id = None
|
||||||
|
@ -174,7 +200,6 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||||
is_live = item.get('type') == 'LIVE'
|
is_live = item.get('type') == 'LIVE'
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, stream_url in item.get('streamUrls', {}).items():
|
for format_id, stream_url in item.get('streamUrls', {}).items():
|
||||||
stream_url = stream_url.replace('https://', 'http://')
|
|
||||||
if 'playerType=flash' in stream_url:
|
if 'playerType=flash' in stream_url:
|
||||||
stream_formats = self._extract_m3u8_formats(
|
stream_formats = self._extract_m3u8_formats(
|
||||||
stream_url, playlist_id, 'mp4', 'm3u8_native',
|
stream_url, playlist_id, 'mp4', 'm3u8_native',
|
||||||
|
@ -196,7 +221,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||||
entries[num]['formats'].extend(formats)
|
entries[num]['formats'].extend(formats)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
item_id = item.get('id') or item['assetId']
|
item_id = str_or_none(item.get('id') or item['assetId'])
|
||||||
title = item['title']
|
title = item['title']
|
||||||
|
|
||||||
duration = float_or_none(item.get('duration'))
|
duration = float_or_none(item.get('duration'))
|
||||||
|
@ -227,6 +252,8 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||||
for e in entries:
|
for e in entries:
|
||||||
self._sort_formats(e['formats'])
|
self._sort_formats(e['formats'])
|
||||||
|
|
||||||
|
if len(entries) == 1:
|
||||||
|
return entries[0]
|
||||||
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
||||||
|
|
||||||
def _get_subtitles(self, episode_id, subs):
|
def _get_subtitles(self, episode_id, subs):
|
||||||
|
|
|
@ -1,8 +1,12 @@
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
extract_attributes,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -17,17 +21,20 @@ class ManyVidsIE(InfoExtractor):
|
||||||
'id': '133957',
|
'id': '133957',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'everthing about me (Preview)',
|
'title': 'everthing about me (Preview)',
|
||||||
|
'uploader': 'ellyxxix',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# full video
|
# full video
|
||||||
'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/',
|
'url': 'https://www.manyvids.com/Video/935718/MY-FACE-REVEAL/',
|
||||||
'md5': 'f3e8f7086409e9b470e2643edb96bdcc',
|
'md5': 'bb47bab0e0802c2a60c24ef079dfe60f',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '935718',
|
'id': '935718',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'MY FACE REVEAL',
|
'title': 'MY FACE REVEAL',
|
||||||
|
'description': 'md5:ec5901d41808b3746fed90face161612',
|
||||||
|
'uploader': 'Sarah Calanthe',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
},
|
},
|
||||||
|
@ -36,17 +43,50 @@ class ManyVidsIE(InfoExtractor):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
real_url = 'https://www.manyvids.com/video/%s/gtm.js' % (video_id, )
|
||||||
|
try:
|
||||||
|
webpage = self._download_webpage(real_url, video_id)
|
||||||
|
except Exception:
|
||||||
|
# probably useless fallback
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
video_url = self._search_regex(
|
info = self._search_regex(
|
||||||
r'data-(?:video-filepath|meta-video)\s*=s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
r'''(<div\b[^>]*\bid\s*=\s*(['"])pageMetaDetails\2[^>]*>)''',
|
||||||
webpage, 'video URL', group='url')
|
webpage, 'meta details', default='')
|
||||||
|
info = extract_attributes(info)
|
||||||
|
|
||||||
title = self._html_search_regex(
|
player = self._search_regex(
|
||||||
|
r'''(<div\b[^>]*\bid\s*=\s*(['"])rmpPlayerStream\2[^>]*>)''',
|
||||||
|
webpage, 'player details', default='')
|
||||||
|
player = extract_attributes(player)
|
||||||
|
|
||||||
|
video_urls_and_ids = (
|
||||||
|
(info.get('data-meta-video'), 'video'),
|
||||||
|
(player.get('data-video-transcoded'), 'transcoded'),
|
||||||
|
(player.get('data-video-filepath'), 'filepath'),
|
||||||
|
(self._og_search_video_url(webpage, secure=False, default=None), 'og_video'),
|
||||||
|
)
|
||||||
|
|
||||||
|
def txt_or_none(s, default=None):
|
||||||
|
return (s.strip() or default) if isinstance(s, compat_str) else default
|
||||||
|
|
||||||
|
uploader = txt_or_none(info.get('data-meta-author'))
|
||||||
|
|
||||||
|
def mung_title(s):
|
||||||
|
if uploader:
|
||||||
|
s = re.sub(r'^\s*%s\s+[|-]' % (re.escape(uploader), ), '', s)
|
||||||
|
return txt_or_none(s)
|
||||||
|
|
||||||
|
title = (
|
||||||
|
mung_title(info.get('data-meta-title'))
|
||||||
|
or self._html_search_regex(
|
||||||
(r'<span[^>]+class=["\']item-title[^>]+>([^<]+)',
|
(r'<span[^>]+class=["\']item-title[^>]+>([^<]+)',
|
||||||
r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'),
|
r'<h2[^>]+class=["\']h2 m-0["\'][^>]*>([^<]+)'),
|
||||||
webpage, 'title', default=None) or self._html_search_meta(
|
webpage, 'title', default=None)
|
||||||
'twitter:title', webpage, 'title', fatal=True)
|
or self._html_search_meta(
|
||||||
|
'twitter:title', webpage, 'title', fatal=True))
|
||||||
|
|
||||||
|
title = re.sub(r'\s*[|-]\s+ManyVids\s*$', '', title) or title
|
||||||
|
|
||||||
if any(p in webpage for p in ('preview_videos', '_preview.mp4')):
|
if any(p in webpage for p in ('preview_videos', '_preview.mp4')):
|
||||||
title += ' (Preview)'
|
title += ' (Preview)'
|
||||||
|
@ -59,7 +99,8 @@ class ManyVidsIE(InfoExtractor):
|
||||||
# Sets some cookies
|
# Sets some cookies
|
||||||
self._download_webpage(
|
self._download_webpage(
|
||||||
'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php',
|
'https://www.manyvids.com/includes/ajax_repository/you_had_me_at_hello.php',
|
||||||
video_id, fatal=False, data=urlencode_postdata({
|
video_id, note='Setting format cookies', fatal=False,
|
||||||
|
data=urlencode_postdata({
|
||||||
'mvtoken': mv_token,
|
'mvtoken': mv_token,
|
||||||
'vid': video_id,
|
'vid': video_id,
|
||||||
}), headers={
|
}), headers={
|
||||||
|
@ -67,24 +108,56 @@ class ManyVidsIE(InfoExtractor):
|
||||||
'X-Requested-With': 'XMLHttpRequest'
|
'X-Requested-With': 'XMLHttpRequest'
|
||||||
})
|
})
|
||||||
|
|
||||||
if determine_ext(video_url) == 'm3u8':
|
formats = []
|
||||||
formats = self._extract_m3u8_formats(
|
for v_url, fmt in video_urls_and_ids:
|
||||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
v_url = url_or_none(v_url)
|
||||||
m3u8_id='hls')
|
if not v_url:
|
||||||
|
continue
|
||||||
|
if determine_ext(v_url) == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
v_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls'))
|
||||||
else:
|
else:
|
||||||
formats = [{'url': video_url}]
|
formats.append({
|
||||||
|
'url': v_url,
|
||||||
|
'format_id': fmt,
|
||||||
|
})
|
||||||
|
|
||||||
like_count = int_or_none(self._search_regex(
|
self._remove_duplicate_formats(formats)
|
||||||
r'data-likes=["\'](\d+)', webpage, 'like count', default=None))
|
|
||||||
view_count = str_to_int(self._html_search_regex(
|
for f in formats:
|
||||||
r'(?s)<span[^>]+class="views-wrapper"[^>]*>(.+?)</span', webpage,
|
if f.get('height') is None:
|
||||||
'view count', default=None))
|
f['height'] = int_or_none(
|
||||||
|
self._search_regex(r'_(\d{2,3}[02468])_', f['url'], 'video height', default=None))
|
||||||
|
if '/preview/' in f['url']:
|
||||||
|
f['format_id'] = '_'.join(filter(None, (f.get('format_id'), 'preview')))
|
||||||
|
f['preference'] = -10
|
||||||
|
if 'transcoded' in f['format_id']:
|
||||||
|
f['preference'] = f.get('preference', -1) - 1
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
def get_likes():
|
||||||
|
likes = self._search_regex(
|
||||||
|
r'''(<a\b[^>]*\bdata-id\s*=\s*(['"])%s\2[^>]*>)''' % (video_id, ),
|
||||||
|
webpage, 'likes', default='')
|
||||||
|
likes = extract_attributes(likes)
|
||||||
|
return int_or_none(likes.get('data-likes'))
|
||||||
|
|
||||||
|
def get_views():
|
||||||
|
return str_to_int(self._html_search_regex(
|
||||||
|
r'''(?s)<span\b[^>]*\bclass\s*=["']views-wrapper\b[^>]+>.+?<span\b[^>]+>\s*(\d[\d,.]*)\s*</span>''',
|
||||||
|
webpage, 'view count', default=None))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'view_count': view_count,
|
|
||||||
'like_count': like_count,
|
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'uploader': self._html_search_regex(r'<meta[^>]+name="author"[^>]*>([^<]+)', webpage, 'uploader'),
|
'description': txt_or_none(info.get('data-meta-description')),
|
||||||
|
'uploader': txt_or_none(info.get('data-meta-author')),
|
||||||
|
'thumbnail': (
|
||||||
|
url_or_none(info.get('data-meta-image'))
|
||||||
|
or url_or_none(player.get('data-video-screenshot'))),
|
||||||
|
'view_count': get_views(),
|
||||||
|
'like_count': get_likes(),
|
||||||
}
|
}
|
||||||
|
|
|
@ -69,7 +69,7 @@ class MotherlessIE(InfoExtractor):
|
||||||
'title': 'a/ Hot Teens',
|
'title': 'a/ Hot Teens',
|
||||||
'categories': list,
|
'categories': list,
|
||||||
'upload_date': '20210104',
|
'upload_date': '20210104',
|
||||||
'uploader_id': 'yonbiw',
|
'uploader_id': 'anonymous',
|
||||||
'thumbnail': r're:https?://.*\.jpg',
|
'thumbnail': r're:https?://.*\.jpg',
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
},
|
},
|
||||||
|
@ -123,11 +123,12 @@ class MotherlessIE(InfoExtractor):
|
||||||
kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta}
|
kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta}
|
||||||
upload_date = (datetime.datetime.utcnow() - datetime.timedelta(**kwargs)).strftime('%Y%m%d')
|
upload_date = (datetime.datetime.utcnow() - datetime.timedelta(**kwargs)).strftime('%Y%m%d')
|
||||||
|
|
||||||
comment_count = webpage.count('class="media-comment-contents"')
|
comment_count = len(re.findall(r'''class\s*=\s*['"]media-comment-contents\b''', webpage))
|
||||||
uploader_id = self._html_search_regex(
|
uploader_id = self._html_search_regex(
|
||||||
(r'"media-meta-member">\s+<a href="/m/([^"]+)"',
|
(r'''<span\b[^>]+\bclass\s*=\s*["']username\b[^>]*>([^<]+)</span>''',
|
||||||
r'<span\b[^>]+\bclass="username">([^<]+)</span>'),
|
r'''(?s)['"](?:media-meta-member|thumb-member-username)\b[^>]+>\s*<a\b[^>]+\bhref\s*=\s*['"]/m/([^"']+)'''),
|
||||||
webpage, 'uploader_id', fatal=False)
|
webpage, 'uploader_id', fatal=False)
|
||||||
|
|
||||||
categories = self._html_search_meta('keywords', webpage, default=None)
|
categories = self._html_search_meta('keywords', webpage, default=None)
|
||||||
if categories:
|
if categories:
|
||||||
categories = [cat.strip() for cat in categories.split(',')]
|
categories = [cat.strip() for cat in categories.split(',')]
|
||||||
|
@ -217,19 +218,19 @@ class MotherlessGroupIE(InfoExtractor):
|
||||||
r'<title>([\w\s]+\w)\s+-', webpage, 'title', fatal=False)
|
r'<title>([\w\s]+\w)\s+-', webpage, 'title', fatal=False)
|
||||||
description = self._html_search_meta(
|
description = self._html_search_meta(
|
||||||
'description', webpage, fatal=False)
|
'description', webpage, fatal=False)
|
||||||
page_count = self._int(self._search_regex(
|
page_count = str_to_int(self._search_regex(
|
||||||
r'(\d+)</(?:a|span)><(?:a|span)[^>]+rel="next">',
|
r'(\d+)\s*</(?:a|span)>\s*<(?:a|span)[^>]+(?:>\s*NEXT|\brel\s*=\s*["\']?next)\b',
|
||||||
webpage, 'page_count', default=0), 'page_count')
|
webpage, 'page_count', default=0))
|
||||||
if not page_count:
|
if not page_count:
|
||||||
message = self._search_regex(
|
message = self._search_regex(
|
||||||
r'class="error-page"[^>]*>\s*<p[^>]*>\s*(?P<error_msg>[^<]+)(?<=\S)\s*',
|
r'''class\s*=\s*['"]error-page\b[^>]*>\s*<p[^>]*>\s*(?P<error_msg>[^<]+)(?<=\S)\s*''',
|
||||||
webpage, 'error_msg', default=None) or 'This group has no videos.'
|
webpage, 'error_msg', default=None) or 'This group has no videos.'
|
||||||
self.report_warning(message, group_id)
|
self.report_warning(message, group_id)
|
||||||
|
page_count = 1
|
||||||
PAGE_SIZE = 80
|
PAGE_SIZE = 80
|
||||||
|
|
||||||
def _get_page(idx):
|
def _get_page(idx):
|
||||||
if not page_count:
|
if idx > 0:
|
||||||
return
|
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
page_url, group_id, query={'page': idx + 1},
|
page_url, group_id, query={'page': idx + 1},
|
||||||
note='Downloading page %d/%d' % (idx + 1, page_count)
|
note='Downloading page %d/%d' % (idx + 1, page_count)
|
||||||
|
|
|
@ -1,12 +1,25 @@
|
||||||
import itertools
|
import json
|
||||||
import re
|
import re
|
||||||
|
import time
|
||||||
from base64 import b64encode
|
from base64 import b64encode
|
||||||
|
from binascii import hexlify
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from hashlib import md5
|
from hashlib import md5
|
||||||
|
from random import randint
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str, compat_urllib_parse_urlencode
|
from ..aes import aes_ecb_encrypt, pkcs7_padding
|
||||||
from ..utils import float_or_none, sanitized_Request
|
from ..compat import compat_urllib_parse_urlencode
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
bytes_to_intlist,
|
||||||
|
error_to_compat_str,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
intlist_to_bytes,
|
||||||
|
sanitized_Request,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class NetEaseMusicBaseIE(InfoExtractor):
|
class NetEaseMusicBaseIE(InfoExtractor):
|
||||||
|
@ -17,7 +30,7 @@ class NetEaseMusicBaseIE(InfoExtractor):
|
||||||
@classmethod
|
@classmethod
|
||||||
def _encrypt(cls, dfsid):
|
def _encrypt(cls, dfsid):
|
||||||
salt_bytes = bytearray(cls._NETEASE_SALT.encode('utf-8'))
|
salt_bytes = bytearray(cls._NETEASE_SALT.encode('utf-8'))
|
||||||
string_bytes = bytearray(compat_str(dfsid).encode('ascii'))
|
string_bytes = bytearray(str(dfsid).encode('ascii'))
|
||||||
salt_len = len(salt_bytes)
|
salt_len = len(salt_bytes)
|
||||||
for i in range(len(string_bytes)):
|
for i in range(len(string_bytes)):
|
||||||
string_bytes[i] = string_bytes[i] ^ salt_bytes[i % salt_len]
|
string_bytes[i] = string_bytes[i] ^ salt_bytes[i % salt_len]
|
||||||
|
@ -26,32 +39,106 @@ class NetEaseMusicBaseIE(InfoExtractor):
|
||||||
result = b64encode(m.digest()).decode('ascii')
|
result = b64encode(m.digest()).decode('ascii')
|
||||||
return result.replace('/', '_').replace('+', '-')
|
return result.replace('/', '_').replace('+', '-')
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def make_player_api_request_data_and_headers(cls, song_id, bitrate):
|
||||||
|
KEY = b'e82ckenh8dichen8'
|
||||||
|
URL = '/api/song/enhance/player/url'
|
||||||
|
now = int(time.time() * 1000)
|
||||||
|
rand = randint(0, 1000)
|
||||||
|
cookie = {
|
||||||
|
'osver': None,
|
||||||
|
'deviceId': None,
|
||||||
|
'appver': '8.0.0',
|
||||||
|
'versioncode': '140',
|
||||||
|
'mobilename': None,
|
||||||
|
'buildver': '1623435496',
|
||||||
|
'resolution': '1920x1080',
|
||||||
|
'__csrf': '',
|
||||||
|
'os': 'pc',
|
||||||
|
'channel': None,
|
||||||
|
'requestId': '{0}_{1:04}'.format(now, rand),
|
||||||
|
}
|
||||||
|
request_text = json.dumps(
|
||||||
|
{'ids': '[{0}]'.format(song_id), 'br': bitrate, 'header': cookie},
|
||||||
|
separators=(',', ':'))
|
||||||
|
message = 'nobody{0}use{1}md5forencrypt'.format(
|
||||||
|
URL, request_text).encode('latin1')
|
||||||
|
msg_digest = md5(message).hexdigest()
|
||||||
|
|
||||||
|
data = '{0}-36cd479b6b5-{1}-36cd479b6b5-{2}'.format(
|
||||||
|
URL, request_text, msg_digest)
|
||||||
|
data = pkcs7_padding(bytes_to_intlist(data))
|
||||||
|
encrypted = intlist_to_bytes(aes_ecb_encrypt(data, bytes_to_intlist(KEY)))
|
||||||
|
encrypted_params = hexlify(encrypted).decode('ascii').upper()
|
||||||
|
|
||||||
|
cookie = '; '.join(
|
||||||
|
['{0}={1}'.format(k, v if v is not None else 'undefined')
|
||||||
|
for [k, v] in cookie.items()])
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
'User-Agent': self.extractor.get_param('http_headers')['User-Agent'],
|
||||||
|
'Content-Type': 'application/x-www-form-urlencoded',
|
||||||
|
'Referer': 'https://music.163.com',
|
||||||
|
'Cookie': cookie,
|
||||||
|
}
|
||||||
|
return ('params={0}'.format(encrypted_params), headers)
|
||||||
|
|
||||||
|
def _call_player_api(self, song_id, bitrate):
|
||||||
|
url = 'https://interface3.music.163.com/eapi/song/enhance/player/url'
|
||||||
|
data, headers = self.make_player_api_request_data_and_headers(song_id, bitrate)
|
||||||
|
try:
|
||||||
|
msg = 'empty result'
|
||||||
|
result = self._download_json(
|
||||||
|
url, song_id, data=data.encode('ascii'), headers=headers)
|
||||||
|
if result:
|
||||||
|
return result
|
||||||
|
except ExtractorError as e:
|
||||||
|
if type(e.cause) in (ValueError, TypeError):
|
||||||
|
# JSON load failure
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
msg = error_to_compat_str(e)
|
||||||
|
self.report_warning('%s API call (%s) failed: %s' % (
|
||||||
|
song_id, bitrate, msg))
|
||||||
|
return {}
|
||||||
|
|
||||||
def extract_formats(self, info):
|
def extract_formats(self, info):
|
||||||
|
err = 0
|
||||||
formats = []
|
formats = []
|
||||||
|
song_id = info['id']
|
||||||
for song_format in self._FORMATS:
|
for song_format in self._FORMATS:
|
||||||
details = info.get(song_format)
|
details = info.get(song_format)
|
||||||
if not details:
|
if not details:
|
||||||
continue
|
continue
|
||||||
song_file_path = '/%s/%s.%s' % (
|
|
||||||
self._encrypt(details['dfsId']), details['dfsId'], details['extension'])
|
|
||||||
|
|
||||||
# 203.130.59.9, 124.40.233.182, 115.231.74.139, etc is a reverse proxy-like feature
|
bitrate = int_or_none(details.get('bitrate')) or 999000
|
||||||
# from NetEase's CDN provider that can be used if m5.music.126.net does not
|
data = self._call_player_api(song_id, bitrate)
|
||||||
# work, especially for users outside of Mainland China
|
for song in try_get(data, lambda x: x['data'], list) or []:
|
||||||
# via: https://github.com/JixunMoe/unblock-163/issues/3#issuecomment-163115880
|
song_url = try_get(song, lambda x: x['url'])
|
||||||
for host in ('http://m5.music.126.net', 'http://115.231.74.139/m1.music.126.net',
|
if not song_url:
|
||||||
'http://124.40.233.182/m1.music.126.net', 'http://203.130.59.9/m1.music.126.net'):
|
continue
|
||||||
song_url = host + song_file_path
|
|
||||||
if self._is_valid_url(song_url, info['id'], 'song'):
|
if self._is_valid_url(song_url, info['id'], 'song'):
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': song_url,
|
'url': song_url,
|
||||||
'ext': details.get('extension'),
|
'ext': details.get('extension'),
|
||||||
'abr': float_or_none(details.get('bitrate'), scale=1000),
|
'abr': float_or_none(song.get('br'), scale=1000),
|
||||||
'format_id': song_format,
|
'format_id': song_format,
|
||||||
'filesize': details.get('size'),
|
'filesize': int_or_none(song.get('size')),
|
||||||
'asr': details.get('sr')
|
'asr': int_or_none(details.get('sr')),
|
||||||
})
|
})
|
||||||
break
|
elif err == 0:
|
||||||
|
err = try_get(song, lambda x: x['code'], int)
|
||||||
|
|
||||||
|
if not formats:
|
||||||
|
msg = 'No media links found'
|
||||||
|
if err != 0 and (err < 200 or err >= 400):
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s (site code %d)' % (msg, err, ), expected=True)
|
||||||
|
else:
|
||||||
|
self.raise_geo_restricted(
|
||||||
|
msg + ': probably this video is not available from your location due to geo restriction.',
|
||||||
|
countries=['CN'])
|
||||||
|
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -67,33 +154,19 @@ class NetEaseMusicBaseIE(InfoExtractor):
|
||||||
class NetEaseMusicIE(NetEaseMusicBaseIE):
|
class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||||
IE_NAME = 'netease:song'
|
IE_NAME = 'netease:song'
|
||||||
IE_DESC = '网易云音乐'
|
IE_DESC = '网易云音乐'
|
||||||
_VALID_URL = r'https?://music\.163\.com/(#/)?song\?id=(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://music.163.com/#/song?id=32102397',
|
'url': 'http://music.163.com/#/song?id=32102397',
|
||||||
'md5': 'f2e97280e6345c74ba9d5677dd5dcb45',
|
'md5': '3e909614ce09b1ccef4a3eb205441190',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '32102397',
|
'id': '32102397',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'Bad Blood (feat. Kendrick Lamar)',
|
'title': 'Bad Blood',
|
||||||
'creator': 'Taylor Swift / Kendrick Lamar',
|
'creator': 'Taylor Swift / Kendrick Lamar',
|
||||||
'upload_date': '20150517',
|
'upload_date': '20150516',
|
||||||
'timestamp': 1431878400,
|
'timestamp': 1431792000,
|
||||||
'description': 'md5:a10a54589c2860300d02e1de821eb2ef',
|
'description': 'md5:25fc5f27e47aad975aa6d36382c7833c',
|
||||||
},
|
},
|
||||||
'skip': 'Blocked outside Mainland China',
|
|
||||||
}, {
|
|
||||||
'note': 'No lyrics translation.',
|
|
||||||
'url': 'http://music.163.com/#/song?id=29822014',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '29822014',
|
|
||||||
'ext': 'mp3',
|
|
||||||
'title': '听见下雨的声音',
|
|
||||||
'creator': '周杰伦',
|
|
||||||
'upload_date': '20141225',
|
|
||||||
'timestamp': 1419523200,
|
|
||||||
'description': 'md5:a4d8d89f44656af206b7b2555c0bce6c',
|
|
||||||
},
|
|
||||||
'skip': 'Blocked outside Mainland China',
|
|
||||||
}, {
|
}, {
|
||||||
'note': 'No lyrics.',
|
'note': 'No lyrics.',
|
||||||
'url': 'http://music.163.com/song?id=17241424',
|
'url': 'http://music.163.com/song?id=17241424',
|
||||||
|
@ -103,9 +176,9 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||||
'title': 'Opus 28',
|
'title': 'Opus 28',
|
||||||
'creator': 'Dustin O\'Halloran',
|
'creator': 'Dustin O\'Halloran',
|
||||||
'upload_date': '20080211',
|
'upload_date': '20080211',
|
||||||
|
'description': 'md5:f12945b0f6e0365e3b73c5032e1b0ff4',
|
||||||
'timestamp': 1202745600,
|
'timestamp': 1202745600,
|
||||||
},
|
},
|
||||||
'skip': 'Blocked outside Mainland China',
|
|
||||||
}, {
|
}, {
|
||||||
'note': 'Has translated name.',
|
'note': 'Has translated name.',
|
||||||
'url': 'http://music.163.com/#/song?id=22735043',
|
'url': 'http://music.163.com/#/song?id=22735043',
|
||||||
|
@ -119,7 +192,18 @@ class NetEaseMusicIE(NetEaseMusicBaseIE):
|
||||||
'timestamp': 1264608000,
|
'timestamp': 1264608000,
|
||||||
'alt_title': '说出愿望吧(Genie)',
|
'alt_title': '说出愿望吧(Genie)',
|
||||||
},
|
},
|
||||||
'skip': 'Blocked outside Mainland China',
|
}, {
|
||||||
|
'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846',
|
||||||
|
'md5': '95826c73ea50b1c288b22180ec9e754d',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '95670',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': '国际歌',
|
||||||
|
'creator': '马备',
|
||||||
|
'upload_date': '19911130',
|
||||||
|
'timestamp': 691516800,
|
||||||
|
'description': 'md5:1ba2f911a2b0aa398479f595224f2141',
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _process_lyrics(self, lyrics_info):
|
def _process_lyrics(self, lyrics_info):
|
||||||
|
|
|
@ -58,8 +58,7 @@ class NRKBaseIE(InfoExtractor):
|
||||||
return self._download_json(
|
return self._download_json(
|
||||||
urljoin('https://psapi.nrk.no/', path),
|
urljoin('https://psapi.nrk.no/', path),
|
||||||
video_id, note or 'Downloading %s JSON' % item,
|
video_id, note or 'Downloading %s JSON' % item,
|
||||||
fatal=fatal, query=query,
|
fatal=fatal, query=query)
|
||||||
headers={'Accept-Encoding': 'gzip, deflate, br'})
|
|
||||||
|
|
||||||
|
|
||||||
class NRKIE(NRKBaseIE):
|
class NRKIE(NRKBaseIE):
|
||||||
|
|
|
@ -870,7 +870,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||||
|
|
||||||
if '://player.vimeo.com/video/' in url:
|
if '://player.vimeo.com/video/' in url:
|
||||||
config = self._parse_json(self._search_regex(
|
config = self._parse_json(self._search_regex(
|
||||||
r'\bconfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id)
|
r'\b(?:playerC|c)onfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id)
|
||||||
if config.get('view') == 4:
|
if config.get('view') == 4:
|
||||||
config = self._verify_player_video_password(
|
config = self._verify_player_video_password(
|
||||||
redirect_url, video_id, headers)
|
redirect_url, video_id, headers)
|
||||||
|
|
|
@ -3,13 +3,14 @@ import re
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
NO_DEFAULT,
|
||||||
|
ExtractorError,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
|
extract_attributes,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
join_nonempty,
|
join_nonempty,
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
NO_DEFAULT,
|
|
||||||
orderedSet,
|
|
||||||
parse_codecs,
|
parse_codecs,
|
||||||
qualities,
|
qualities,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
|
@ -188,7 +189,7 @@ class ZDFIE(ZDFBaseIE):
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.zdf.de/funk/druck-11790/funk-alles-ist-verzaubert-102.html',
|
'url': 'https://www.zdf.de/funk/druck-11790/funk-alles-ist-verzaubert-102.html',
|
||||||
'md5': '57af4423db0455a3975d2dc4578536bc',
|
'md5': '1b93bdec7d02fc0b703c5e7687461628',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'id': 'video_funk_1770473',
|
'id': 'video_funk_1770473',
|
||||||
|
@ -250,17 +251,15 @@ class ZDFIE(ZDFBaseIE):
|
||||||
title = content.get('title') or content['teaserHeadline']
|
title = content.get('title') or content['teaserHeadline']
|
||||||
|
|
||||||
t = content['mainVideoContent']['http://zdf.de/rels/target']
|
t = content['mainVideoContent']['http://zdf.de/rels/target']
|
||||||
|
ptmd_path = traverse_obj(t, (
|
||||||
ptmd_path = t.get('http://zdf.de/rels/streams/ptmd')
|
(('streams', 'default'), None),
|
||||||
|
('http://zdf.de/rels/streams/ptmd', 'http://zdf.de/rels/streams/ptmd-template')
|
||||||
|
), get_all=False)
|
||||||
if not ptmd_path:
|
if not ptmd_path:
|
||||||
ptmd_path = traverse_obj(
|
raise ExtractorError('Could not extract ptmd_path')
|
||||||
t, ('streams', 'default', 'http://zdf.de/rels/streams/ptmd-template'),
|
|
||||||
'http://zdf.de/rels/streams/ptmd-template').replace(
|
|
||||||
'{playerId}', 'ngplayer_2_4')
|
|
||||||
|
|
||||||
info = self._extract_ptmd(
|
info = self._extract_ptmd(
|
||||||
urljoin(url, ptmd_path), video_id, player['apiToken'], url)
|
urljoin(url, ptmd_path.replace('{playerId}', 'ngplayer_2_4')), video_id, player['apiToken'], url)
|
||||||
|
|
||||||
thumbnails = []
|
thumbnails = []
|
||||||
layouts = try_get(
|
layouts = try_get(
|
||||||
|
@ -309,14 +308,15 @@ class ZDFIE(ZDFBaseIE):
|
||||||
'https://zdf-cdn.live.cellular.de/mediathekV2/document/%s' % video_id,
|
'https://zdf-cdn.live.cellular.de/mediathekV2/document/%s' % video_id,
|
||||||
video_id)
|
video_id)
|
||||||
|
|
||||||
document = video['document']
|
formats = []
|
||||||
|
formitaeten = try_get(video, lambda x: x['document']['formitaeten'], list)
|
||||||
|
document = formitaeten and video['document']
|
||||||
|
if formitaeten:
|
||||||
title = document['titel']
|
title = document['titel']
|
||||||
content_id = document['basename']
|
content_id = document['basename']
|
||||||
|
|
||||||
formats = []
|
|
||||||
format_urls = set()
|
format_urls = set()
|
||||||
for f in document['formitaeten']:
|
for f in formitaeten or []:
|
||||||
self._extract_format(content_id, formats, format_urls, f)
|
self._extract_format(content_id, formats, format_urls, f)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
@ -364,9 +364,9 @@ class ZDFChannelIE(ZDFBaseIE):
|
||||||
'url': 'https://www.zdf.de/sport/das-aktuelle-sportstudio',
|
'url': 'https://www.zdf.de/sport/das-aktuelle-sportstudio',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'das-aktuelle-sportstudio',
|
'id': 'das-aktuelle-sportstudio',
|
||||||
'title': 'das aktuelle sportstudio | ZDF',
|
'title': 'das aktuelle sportstudio',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 23,
|
'playlist_mincount': 18,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.zdf.de/dokumentation/planet-e',
|
'url': 'https://www.zdf.de/dokumentation/planet-e',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -374,6 +374,14 @@ class ZDFChannelIE(ZDFBaseIE):
|
||||||
'title': 'planet e.',
|
'title': 'planet e.',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 50,
|
'playlist_mincount': 50,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.zdf.de/gesellschaft/aktenzeichen-xy-ungeloest',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'aktenzeichen-xy-ungeloest',
|
||||||
|
'title': 'Aktenzeichen XY... ungelöst',
|
||||||
|
'entries': "lambda x: not any('xy580-fall1-kindermoerder-gesucht-100' in e['url'] for e in x)",
|
||||||
|
},
|
||||||
|
'playlist_mincount': 2,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.zdf.de/filme/taunuskrimi/',
|
'url': 'https://www.zdf.de/filme/taunuskrimi/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -383,60 +391,36 @@ class ZDFChannelIE(ZDFBaseIE):
|
||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
return False if ZDFIE.suitable(url) else super(ZDFChannelIE, cls).suitable(url)
|
return False if ZDFIE.suitable(url) else super(ZDFChannelIE, cls).suitable(url)
|
||||||
|
|
||||||
|
def _og_search_title(self, webpage, fatal=False):
|
||||||
|
title = super(ZDFChannelIE, self)._og_search_title(webpage, fatal=fatal)
|
||||||
|
return re.split(r'\s+[-|]\s+ZDF(?:mediathek)?$', title or '')[0] or None
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
channel_id = self._match_id(url)
|
channel_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, channel_id)
|
webpage = self._download_webpage(url, channel_id)
|
||||||
|
|
||||||
entries = [
|
matches = re.finditer(
|
||||||
self.url_result(item_url, ie=ZDFIE.ie_key())
|
r'''<div\b[^>]*?\sdata-plusbar-id\s*=\s*(["'])(?P<p_id>[\w-]+)\1[^>]*?\sdata-plusbar-url=\1(?P<url>%s)\1''' % ZDFIE._VALID_URL,
|
||||||
for item_url in orderedSet(re.findall(
|
webpage)
|
||||||
r'data-plusbar-url=["\'](http.+?\.html)', webpage))]
|
|
||||||
|
|
||||||
return self.playlist_result(
|
if self._downloader.params.get('noplaylist', False):
|
||||||
entries, channel_id, self._og_search_title(webpage, fatal=False))
|
entry = next(
|
||||||
|
(self.url_result(m.group('url'), ie=ZDFIE.ie_key()) for m in matches),
|
||||||
|
None)
|
||||||
|
self.to_screen('Downloading just the main video because of --no-playlist')
|
||||||
|
if entry:
|
||||||
|
return entry
|
||||||
|
else:
|
||||||
|
self.to_screen('Downloading playlist %s - add --no-playlist to download just the main video' % (channel_id, ))
|
||||||
|
|
||||||
r"""
|
def check_video(m):
|
||||||
player = self._extract_player(webpage, channel_id)
|
v_ref = self._search_regex(
|
||||||
|
r'''(<a\b[^>]*?\shref\s*=[^>]+?\sdata-target-id\s*=\s*(["'])%s\2[^>]*>)''' % (m.group('p_id'), ),
|
||||||
|
webpage, 'check id', default='')
|
||||||
|
v_ref = extract_attributes(v_ref)
|
||||||
|
return v_ref.get('data-target-video-type') != 'novideo'
|
||||||
|
|
||||||
channel_id = self._search_regex(
|
return self.playlist_from_matches(
|
||||||
r'docId\s*:\s*(["\'])(?P<id>(?!\1).+?)\1', webpage,
|
(m.group('url') for m in matches if check_video(m)),
|
||||||
'channel id', group='id')
|
channel_id, self._og_search_title(webpage, fatal=False))
|
||||||
|
|
||||||
channel = self._call_api(
|
|
||||||
'https://api.zdf.de/content/documents/%s.json' % channel_id,
|
|
||||||
player, url, channel_id)
|
|
||||||
|
|
||||||
items = []
|
|
||||||
for module in channel['module']:
|
|
||||||
for teaser in try_get(module, lambda x: x['teaser'], list) or []:
|
|
||||||
t = try_get(
|
|
||||||
teaser, lambda x: x['http://zdf.de/rels/target'], dict)
|
|
||||||
if not t:
|
|
||||||
continue
|
|
||||||
items.extend(try_get(
|
|
||||||
t,
|
|
||||||
lambda x: x['resultsWithVideo']['http://zdf.de/rels/search/results'],
|
|
||||||
list) or [])
|
|
||||||
items.extend(try_get(
|
|
||||||
module,
|
|
||||||
lambda x: x['filterRef']['resultsWithVideo']['http://zdf.de/rels/search/results'],
|
|
||||||
list) or [])
|
|
||||||
|
|
||||||
entries = []
|
|
||||||
entry_urls = set()
|
|
||||||
for item in items:
|
|
||||||
t = try_get(item, lambda x: x['http://zdf.de/rels/target'], dict)
|
|
||||||
if not t:
|
|
||||||
continue
|
|
||||||
sharing_url = t.get('http://zdf.de/rels/sharing-url')
|
|
||||||
if not sharing_url or not isinstance(sharing_url, compat_str):
|
|
||||||
continue
|
|
||||||
if sharing_url in entry_urls:
|
|
||||||
continue
|
|
||||||
entry_urls.add(sharing_url)
|
|
||||||
entries.append(self.url_result(
|
|
||||||
sharing_url, ie=ZDFIE.ie_key(), video_id=t.get('id')))
|
|
||||||
|
|
||||||
return self.playlist_result(entries, channel_id, channel.get('title'))
|
|
||||||
"""
|
|
||||||
|
|
|
@ -685,7 +685,8 @@ def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
|
||||||
return '\0_'
|
return '\0_'
|
||||||
return char
|
return char
|
||||||
|
|
||||||
if restricted and is_id is NO_DEFAULT:
|
# Replace look-alike Unicode glyphs
|
||||||
|
if restricted and (is_id is NO_DEFAULT or not is_id):
|
||||||
s = unicodedata.normalize('NFKC', s)
|
s = unicodedata.normalize('NFKC', s)
|
||||||
s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s) # Handle timestamps
|
s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s) # Handle timestamps
|
||||||
result = ''.join(map(replace_insane, s))
|
result = ''.join(map(replace_insane, s))
|
||||||
|
|
Loading…
Reference in a new issue