mirror of
https://github.com/yt-dlp/yt-dlp
synced 2024-12-25 21:59:04 +01:00
[ie/vk] Fix extractors (#11715)
Closes #5832, Closes #11471, Closes #11646, Closes #11670 Authored by: bashonly
This commit is contained in:
parent
a13a336aa6
commit
c038a7b187
1 changed files with 64 additions and 32 deletions
|
@ -17,10 +17,10 @@ from ..utils import (
|
||||||
get_element_html_by_id,
|
get_element_html_by_id,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
join_nonempty,
|
join_nonempty,
|
||||||
|
parse_qs,
|
||||||
parse_resolution,
|
parse_resolution,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
traverse_obj,
|
|
||||||
try_call,
|
try_call,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
|
@ -29,6 +29,7 @@ from ..utils import (
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
|
from ..utils.traversal import require, traverse_obj
|
||||||
|
|
||||||
|
|
||||||
class VKBaseIE(InfoExtractor):
|
class VKBaseIE(InfoExtractor):
|
||||||
|
@ -91,17 +92,17 @@ class VKBaseIE(InfoExtractor):
|
||||||
class VKIE(VKBaseIE):
|
class VKIE(VKBaseIE):
|
||||||
IE_NAME = 'vk'
|
IE_NAME = 'vk'
|
||||||
IE_DESC = 'VK'
|
IE_DESC = 'VK'
|
||||||
_EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1']
|
_EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk(?:(?:video)?\.ru|\.com)/video_ext\.php.+?)\1']
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:
|
(?:
|
||||||
(?:
|
(?:
|
||||||
(?:(?:m|new)\.)?vk\.com/video_|
|
(?:(?:m|new)\.)?vk(?:(?:video)?\.ru|\.com)/video_|
|
||||||
(?:www\.)?daxab\.com/
|
(?:www\.)?daxab\.com/
|
||||||
)
|
)
|
||||||
ext\.php\?(?P<embed_query>.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+).*)|
|
ext\.php\?(?P<embed_query>.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+).*)|
|
||||||
(?:
|
(?:
|
||||||
(?:(?:m|new)\.)?vk\.com/(?:.+?\?.*?z=)?(?:video|clip)|
|
(?:(?:m|new)\.)?vk(?:(?:video)?\.ru|\.com)/(?:.+?\?.*?z=)?(?:video|clip)|
|
||||||
(?:www\.)?daxab\.com/embed/
|
(?:www\.)?daxab\.com/embed/
|
||||||
)
|
)
|
||||||
(?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>([\da-f]+)|(ln-[\da-zA-Z]+)))?
|
(?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>([\da-f]+)|(ln-[\da-zA-Z]+)))?
|
||||||
|
@ -110,7 +111,7 @@ class VKIE(VKBaseIE):
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
|
'url': 'https://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '-77521_162222515',
|
'id': '-77521_162222515',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
@ -127,7 +128,7 @@ class VKIE(VKBaseIE):
|
||||||
'params': {'skip_download': 'm3u8'},
|
'params': {'skip_download': 'm3u8'},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'http://vk.com/video205387401_165548505',
|
'url': 'https://vk.com/video205387401_165548505',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '205387401_165548505',
|
'id': '205387401_165548505',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
@ -182,10 +183,10 @@ class VKIE(VKBaseIE):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
|
'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
|
||||||
'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
|
'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
|
||||||
'duration': 178,
|
'duration': 179,
|
||||||
'upload_date': '20130117',
|
'upload_date': '20130117',
|
||||||
'uploader': "Children's Joy Foundation Inc.",
|
'uploader': "Children's Joy Foundation Inc.",
|
||||||
'uploader_id': 'thecjf',
|
'uploader_id': '@CJFIofficial',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'channel_id': 'UCgzCNQ11TmR9V97ECnhi3gw',
|
'channel_id': 'UCgzCNQ11TmR9V97ECnhi3gw',
|
||||||
'availability': 'public',
|
'availability': 'public',
|
||||||
|
@ -193,7 +194,7 @@ class VKIE(VKBaseIE):
|
||||||
'live_status': 'not_live',
|
'live_status': 'not_live',
|
||||||
'playable_in_embed': True,
|
'playable_in_embed': True,
|
||||||
'channel': 'Children\'s Joy Foundation Inc.',
|
'channel': 'Children\'s Joy Foundation Inc.',
|
||||||
'uploader_url': 'http://www.youtube.com/user/thecjf',
|
'uploader_url': 'https://www.youtube.com/@CJFIofficial',
|
||||||
'thumbnail': r're:https?://.+\.jpg$',
|
'thumbnail': r're:https?://.+\.jpg$',
|
||||||
'tags': 'count:27',
|
'tags': 'count:27',
|
||||||
'start_time': 0.0,
|
'start_time': 0.0,
|
||||||
|
@ -201,6 +202,7 @@ class VKIE(VKBaseIE):
|
||||||
'channel_url': 'https://www.youtube.com/channel/UCgzCNQ11TmR9V97ECnhi3gw',
|
'channel_url': 'https://www.youtube.com/channel/UCgzCNQ11TmR9V97ECnhi3gw',
|
||||||
'channel_follower_count': int,
|
'channel_follower_count': int,
|
||||||
'age_limit': 0,
|
'age_limit': 0,
|
||||||
|
'timestamp': 1358394935,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -222,6 +224,7 @@ class VKIE(VKBaseIE):
|
||||||
'thumbnail': r're:https?://.+x1080$',
|
'thumbnail': r're:https?://.+x1080$',
|
||||||
'tags': list,
|
'tags': list,
|
||||||
},
|
},
|
||||||
|
'skip': 'This video has been deleted and is no longer available.',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'https://vk.com/clips-74006511?z=clip-74006511_456247211',
|
'url': 'https://vk.com/clips-74006511?z=clip-74006511_456247211',
|
||||||
|
@ -235,13 +238,13 @@ class VKIE(VKBaseIE):
|
||||||
'timestamp': 1664995597,
|
'timestamp': 1664995597,
|
||||||
'title': 'Clip by @madempress',
|
'title': 'Clip by @madempress',
|
||||||
'upload_date': '20221005',
|
'upload_date': '20221005',
|
||||||
'uploader': 'Шальная императрица',
|
'uploader': 'Шальная Императрица',
|
||||||
'uploader_id': '-74006511',
|
'uploader_id': '-74006511',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
# video key is extra_data not url\d+
|
# video key is extra_data not url\d+
|
||||||
'url': 'http://vk.com/video-110305615_171782105',
|
'url': 'https://vk.com/video-110305615_171782105',
|
||||||
'md5': 'e13fcda136f99764872e739d13fac1d1',
|
'md5': 'e13fcda136f99764872e739d13fac1d1',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '-110305615_171782105',
|
'id': '-110305615_171782105',
|
||||||
|
@ -273,6 +276,7 @@ class VKIE(VKBaseIE):
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'skip': 'No formats found',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
# live stream, hls and rtmp links, most likely already finished live
|
# live stream, hls and rtmp links, most likely already finished live
|
||||||
|
@ -312,7 +316,16 @@ class VKIE(VKBaseIE):
|
||||||
{
|
{
|
||||||
'url': 'https://vk.com/clip30014565_456240946',
|
'url': 'https://vk.com/clip30014565_456240946',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://vkvideo.ru/video-127553155_456242961',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://vk.ru/video-220754053_456242564',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = self._match_valid_url(url)
|
mobj = self._match_valid_url(url)
|
||||||
|
@ -338,7 +351,7 @@ class VKIE(VKBaseIE):
|
||||||
video_id = '{}_{}'.format(mobj.group('oid'), mobj.group('id'))
|
video_id = '{}_{}'.format(mobj.group('oid'), mobj.group('id'))
|
||||||
|
|
||||||
info_page = self._download_webpage(
|
info_page = self._download_webpage(
|
||||||
'http://vk.com/video_ext.php?' + mobj.group('embed_query'), video_id)
|
'https://vk.com/video_ext.php?' + mobj.group('embed_query'), video_id)
|
||||||
|
|
||||||
error_message = self._html_search_regex(
|
error_message = self._html_search_regex(
|
||||||
[r'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>',
|
[r'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>',
|
||||||
|
@ -432,7 +445,7 @@ class VKIE(VKBaseIE):
|
||||||
if m_opts_url:
|
if m_opts_url:
|
||||||
opts_url = m_opts_url.group(1)
|
opts_url = m_opts_url.group(1)
|
||||||
if opts_url.startswith('//'):
|
if opts_url.startswith('//'):
|
||||||
opts_url = 'http:' + opts_url
|
opts_url = 'https:' + opts_url
|
||||||
return self.url_result(opts_url)
|
return self.url_result(opts_url)
|
||||||
|
|
||||||
data = player['params'][0]
|
data = player['params'][0]
|
||||||
|
@ -512,8 +525,11 @@ class VKIE(VKBaseIE):
|
||||||
class VKUserVideosIE(VKBaseIE):
|
class VKUserVideosIE(VKBaseIE):
|
||||||
IE_NAME = 'vk:uservideos'
|
IE_NAME = 'vk:uservideos'
|
||||||
IE_DESC = "VK - User's Videos"
|
IE_DESC = "VK - User's Videos"
|
||||||
_VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/video/(?:playlist/)?(?P<id>[^?$#/&]+)(?!\?.*\bz=video)(?:[/?#&](?:.*?\bsection=(?P<section>\w+))?|$)'
|
_BASE_URL_RE = r'https?://(?:(?:m|new)\.)?vk(?:video\.ru|\.com/video)'
|
||||||
_TEMPLATE_URL = 'https://vk.com/videos'
|
_VALID_URL = [
|
||||||
|
rf'{_BASE_URL_RE}/playlist/(?P<id>-?\d+_\d+)',
|
||||||
|
rf'{_BASE_URL_RE}/(?P<id>@[^/?#]+)(?:/all)?/?(?!\?.*\bz=video)(?:[?#]|$)',
|
||||||
|
]
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://vk.com/video/@mobidevices',
|
'url': 'https://vk.com/video/@mobidevices',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -527,12 +543,20 @@ class VKUserVideosIE(VKBaseIE):
|
||||||
},
|
},
|
||||||
'playlist_mincount': 182,
|
'playlist_mincount': 182,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://vk.com/video/playlist/-174476437_2',
|
'url': 'https://vkvideo.ru/playlist/-204353299_426',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '-174476437_playlist_2',
|
'id': '-204353299_playlist_426',
|
||||||
'title': 'Анонсы',
|
|
||||||
},
|
},
|
||||||
'playlist_mincount': 108,
|
'playlist_mincount': 33,
|
||||||
|
}, {
|
||||||
|
'url': 'https://vk.com/video/@gorkyfilmstudio/all',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://vkvideo.ru/@mobidevices',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://vk.com/video/playlist/-174476437_2',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
_VIDEO = collections.namedtuple('Video', ['owner_id', 'id'])
|
_VIDEO = collections.namedtuple('Video', ['owner_id', 'id'])
|
||||||
|
|
||||||
|
@ -552,7 +576,7 @@ class VKUserVideosIE(VKBaseIE):
|
||||||
v = self._VIDEO._make(video[:2])
|
v = self._VIDEO._make(video[:2])
|
||||||
video_id = '%d_%d' % (v.owner_id, v.id)
|
video_id = '%d_%d' % (v.owner_id, v.id)
|
||||||
yield self.url_result(
|
yield self.url_result(
|
||||||
'http://vk.com/video' + video_id, VKIE.ie_key(), video_id)
|
'https://vk.com/video' + video_id, VKIE.ie_key(), video_id)
|
||||||
if count >= total:
|
if count >= total:
|
||||||
break
|
break
|
||||||
video_list_json = self._download_payload('al_video', page_id, {
|
video_list_json = self._download_payload('al_video', page_id, {
|
||||||
|
@ -561,23 +585,25 @@ class VKUserVideosIE(VKBaseIE):
|
||||||
'oid': page_id,
|
'oid': page_id,
|
||||||
'section': section,
|
'section': section,
|
||||||
})[0][section]
|
})[0][section]
|
||||||
count += video_list_json['count']
|
new_count = video_list_json['count']
|
||||||
|
if not new_count:
|
||||||
|
self.to_screen(f'{page_id}: Skipping {total - count} unavailable videos')
|
||||||
|
break
|
||||||
|
count += new_count
|
||||||
video_list = video_list_json['list']
|
video_list = video_list_json['list']
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
u_id, section = self._match_valid_url(url).groups()
|
u_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, u_id)
|
webpage = self._download_webpage(url, u_id)
|
||||||
|
|
||||||
if u_id.startswith('@'):
|
if u_id.startswith('@'):
|
||||||
page_id = self._search_regex(r'data-owner-id\s?=\s?"([^"]+)"', webpage, 'page_id')
|
page_id = traverse_obj(
|
||||||
elif '_' in u_id:
|
self._search_json(r'\bvar newCur\s*=', webpage, 'cursor data', u_id),
|
||||||
page_id, section = u_id.split('_', 1)
|
('oid', {int}, {str_or_none}, {require('page id')}))
|
||||||
section = f'playlist_{section}'
|
section = traverse_obj(parse_qs(url), ('section', 0)) or 'all'
|
||||||
else:
|
else:
|
||||||
raise ExtractorError('Invalid URL', expected=True)
|
page_id, _, section = u_id.partition('_')
|
||||||
|
section = f'playlist_{section}'
|
||||||
if not section:
|
|
||||||
section = 'all'
|
|
||||||
|
|
||||||
playlist_title = clean_html(get_element_by_class('VideoInfoPanel__title', webpage))
|
playlist_title = clean_html(get_element_by_class('VideoInfoPanel__title', webpage))
|
||||||
return self.playlist_result(self._entries(page_id, section), f'{page_id}_{section}', playlist_title)
|
return self.playlist_result(self._entries(page_id, section), f'{page_id}_{section}', playlist_title)
|
||||||
|
@ -717,7 +743,7 @@ class VKWallPostIE(VKBaseIE):
|
||||||
|
|
||||||
|
|
||||||
class VKPlayBaseIE(InfoExtractor):
|
class VKPlayBaseIE(InfoExtractor):
|
||||||
_BASE_URL_RE = r'https?://(?:vkplay\.live|live\.vkplay\.ru)/'
|
_BASE_URL_RE = r'https?://(?:vkplay\.live|live\.vk(?:play|video)\.ru)/'
|
||||||
_RESOLUTIONS = {
|
_RESOLUTIONS = {
|
||||||
'tiny': '256x144',
|
'tiny': '256x144',
|
||||||
'lowest': '426x240',
|
'lowest': '426x240',
|
||||||
|
@ -797,6 +823,9 @@ class VKPlayIE(VKPlayBaseIE):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://live.vkplay.ru/lebwa/record/33a4e4ce-e3ef-49db-bb14-f006cc6fabc9/records',
|
'url': 'https://live.vkplay.ru/lebwa/record/33a4e4ce-e3ef-49db-bb14-f006cc6fabc9/records',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://live.vkvideo.ru/lebwa/record/33a4e4ce-e3ef-49db-bb14-f006cc6fabc9/records',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -839,6 +868,9 @@ class VKPlayLiveIE(VKPlayBaseIE):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://live.vkplay.ru/lebwa',
|
'url': 'https://live.vkplay.ru/lebwa',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://live.vkvideo.ru/panterka',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
Loading…
Reference in a new issue