[ie/vk] Fix extractors (#11715)

Closes #5832, Closes #11471, Closes #11646, Closes #11670
Authored by: bashonly
This commit is contained in:
bashonly 2024-12-03 14:28:43 +00:00 committed by GitHub
parent a13a336aa6
commit c038a7b187
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -17,10 +17,10 @@ from ..utils import (
get_element_html_by_id, get_element_html_by_id,
int_or_none, int_or_none,
join_nonempty, join_nonempty,
parse_qs,
parse_resolution, parse_resolution,
str_or_none, str_or_none,
str_to_int, str_to_int,
traverse_obj,
try_call, try_call,
unescapeHTML, unescapeHTML,
unified_timestamp, unified_timestamp,
@ -29,6 +29,7 @@ from ..utils import (
urlencode_postdata, urlencode_postdata,
urljoin, urljoin,
) )
from ..utils.traversal import require, traverse_obj
class VKBaseIE(InfoExtractor): class VKBaseIE(InfoExtractor):
@ -91,17 +92,17 @@ class VKBaseIE(InfoExtractor):
class VKIE(VKBaseIE): class VKIE(VKBaseIE):
IE_NAME = 'vk' IE_NAME = 'vk'
IE_DESC = 'VK' IE_DESC = 'VK'
_EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1'] _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk(?:(?:video)?\.ru|\.com)/video_ext\.php.+?)\1']
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?: (?:
(?: (?:
(?:(?:m|new)\.)?vk\.com/video_| (?:(?:m|new)\.)?vk(?:(?:video)?\.ru|\.com)/video_|
(?:www\.)?daxab\.com/ (?:www\.)?daxab\.com/
) )
ext\.php\?(?P<embed_query>.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+).*)| ext\.php\?(?P<embed_query>.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+).*)|
(?: (?:
(?:(?:m|new)\.)?vk\.com/(?:.+?\?.*?z=)?(?:video|clip)| (?:(?:m|new)\.)?vk(?:(?:video)?\.ru|\.com)/(?:.+?\?.*?z=)?(?:video|clip)|
(?:www\.)?daxab\.com/embed/ (?:www\.)?daxab\.com/embed/
) )
(?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>([\da-f]+)|(ln-[\da-zA-Z]+)))? (?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>([\da-f]+)|(ln-[\da-zA-Z]+)))?
@ -110,7 +111,7 @@ class VKIE(VKBaseIE):
_TESTS = [ _TESTS = [
{ {
'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521', 'url': 'https://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
'info_dict': { 'info_dict': {
'id': '-77521_162222515', 'id': '-77521_162222515',
'ext': 'mp4', 'ext': 'mp4',
@ -127,7 +128,7 @@ class VKIE(VKBaseIE):
'params': {'skip_download': 'm3u8'}, 'params': {'skip_download': 'm3u8'},
}, },
{ {
'url': 'http://vk.com/video205387401_165548505', 'url': 'https://vk.com/video205387401_165548505',
'info_dict': { 'info_dict': {
'id': '205387401_165548505', 'id': '205387401_165548505',
'ext': 'mp4', 'ext': 'mp4',
@ -182,10 +183,10 @@ class VKIE(VKBaseIE):
'ext': 'mp4', 'ext': 'mp4',
'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate", 'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
'description': 'md5:bf9c26cfa4acdfb146362682edd3827a', 'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
'duration': 178, 'duration': 179,
'upload_date': '20130117', 'upload_date': '20130117',
'uploader': "Children's Joy Foundation Inc.", 'uploader': "Children's Joy Foundation Inc.",
'uploader_id': 'thecjf', 'uploader_id': '@CJFIofficial',
'view_count': int, 'view_count': int,
'channel_id': 'UCgzCNQ11TmR9V97ECnhi3gw', 'channel_id': 'UCgzCNQ11TmR9V97ECnhi3gw',
'availability': 'public', 'availability': 'public',
@ -193,7 +194,7 @@ class VKIE(VKBaseIE):
'live_status': 'not_live', 'live_status': 'not_live',
'playable_in_embed': True, 'playable_in_embed': True,
'channel': 'Children\'s Joy Foundation Inc.', 'channel': 'Children\'s Joy Foundation Inc.',
'uploader_url': 'http://www.youtube.com/user/thecjf', 'uploader_url': 'https://www.youtube.com/@CJFIofficial',
'thumbnail': r're:https?://.+\.jpg$', 'thumbnail': r're:https?://.+\.jpg$',
'tags': 'count:27', 'tags': 'count:27',
'start_time': 0.0, 'start_time': 0.0,
@ -201,6 +202,7 @@ class VKIE(VKBaseIE):
'channel_url': 'https://www.youtube.com/channel/UCgzCNQ11TmR9V97ECnhi3gw', 'channel_url': 'https://www.youtube.com/channel/UCgzCNQ11TmR9V97ECnhi3gw',
'channel_follower_count': int, 'channel_follower_count': int,
'age_limit': 0, 'age_limit': 0,
'timestamp': 1358394935,
}, },
}, },
{ {
@ -222,6 +224,7 @@ class VKIE(VKBaseIE):
'thumbnail': r're:https?://.+x1080$', 'thumbnail': r're:https?://.+x1080$',
'tags': list, 'tags': list,
}, },
'skip': 'This video has been deleted and is no longer available.',
}, },
{ {
'url': 'https://vk.com/clips-74006511?z=clip-74006511_456247211', 'url': 'https://vk.com/clips-74006511?z=clip-74006511_456247211',
@ -235,13 +238,13 @@ class VKIE(VKBaseIE):
'timestamp': 1664995597, 'timestamp': 1664995597,
'title': 'Clip by @madempress', 'title': 'Clip by @madempress',
'upload_date': '20221005', 'upload_date': '20221005',
'uploader': 'Шальная императрица', 'uploader': 'Шальная Императрица',
'uploader_id': '-74006511', 'uploader_id': '-74006511',
}, },
}, },
{ {
# video key is extra_data not url\d+ # video key is extra_data not url\d+
'url': 'http://vk.com/video-110305615_171782105', 'url': 'https://vk.com/video-110305615_171782105',
'md5': 'e13fcda136f99764872e739d13fac1d1', 'md5': 'e13fcda136f99764872e739d13fac1d1',
'info_dict': { 'info_dict': {
'id': '-110305615_171782105', 'id': '-110305615_171782105',
@ -273,6 +276,7 @@ class VKIE(VKBaseIE):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
'skip': 'No formats found',
}, },
{ {
# live stream, hls and rtmp links, most likely already finished live # live stream, hls and rtmp links, most likely already finished live
@ -312,7 +316,16 @@ class VKIE(VKBaseIE):
{ {
'url': 'https://vk.com/clip30014565_456240946', 'url': 'https://vk.com/clip30014565_456240946',
'only_matching': True, 'only_matching': True,
}] },
{
'url': 'https://vkvideo.ru/video-127553155_456242961',
'only_matching': True,
},
{
'url': 'https://vk.ru/video-220754053_456242564',
'only_matching': True,
},
]
def _real_extract(self, url): def _real_extract(self, url):
mobj = self._match_valid_url(url) mobj = self._match_valid_url(url)
@ -338,7 +351,7 @@ class VKIE(VKBaseIE):
video_id = '{}_{}'.format(mobj.group('oid'), mobj.group('id')) video_id = '{}_{}'.format(mobj.group('oid'), mobj.group('id'))
info_page = self._download_webpage( info_page = self._download_webpage(
'http://vk.com/video_ext.php?' + mobj.group('embed_query'), video_id) 'https://vk.com/video_ext.php?' + mobj.group('embed_query'), video_id)
error_message = self._html_search_regex( error_message = self._html_search_regex(
[r'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>', [r'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>',
@ -432,7 +445,7 @@ class VKIE(VKBaseIE):
if m_opts_url: if m_opts_url:
opts_url = m_opts_url.group(1) opts_url = m_opts_url.group(1)
if opts_url.startswith('//'): if opts_url.startswith('//'):
opts_url = 'http:' + opts_url opts_url = 'https:' + opts_url
return self.url_result(opts_url) return self.url_result(opts_url)
data = player['params'][0] data = player['params'][0]
@ -512,8 +525,11 @@ class VKIE(VKBaseIE):
class VKUserVideosIE(VKBaseIE): class VKUserVideosIE(VKBaseIE):
IE_NAME = 'vk:uservideos' IE_NAME = 'vk:uservideos'
IE_DESC = "VK - User's Videos" IE_DESC = "VK - User's Videos"
_VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/video/(?:playlist/)?(?P<id>[^?$#/&]+)(?!\?.*\bz=video)(?:[/?#&](?:.*?\bsection=(?P<section>\w+))?|$)' _BASE_URL_RE = r'https?://(?:(?:m|new)\.)?vk(?:video\.ru|\.com/video)'
_TEMPLATE_URL = 'https://vk.com/videos' _VALID_URL = [
rf'{_BASE_URL_RE}/playlist/(?P<id>-?\d+_\d+)',
rf'{_BASE_URL_RE}/(?P<id>@[^/?#]+)(?:/all)?/?(?!\?.*\bz=video)(?:[?#]|$)',
]
_TESTS = [{ _TESTS = [{
'url': 'https://vk.com/video/@mobidevices', 'url': 'https://vk.com/video/@mobidevices',
'info_dict': { 'info_dict': {
@ -527,12 +543,20 @@ class VKUserVideosIE(VKBaseIE):
}, },
'playlist_mincount': 182, 'playlist_mincount': 182,
}, { }, {
'url': 'https://vk.com/video/playlist/-174476437_2', 'url': 'https://vkvideo.ru/playlist/-204353299_426',
'info_dict': { 'info_dict': {
'id': '-174476437_playlist_2', 'id': '-204353299_playlist_426',
'title': 'Анонсы',
}, },
'playlist_mincount': 108, 'playlist_mincount': 33,
}, {
'url': 'https://vk.com/video/@gorkyfilmstudio/all',
'only_matching': True,
}, {
'url': 'https://vkvideo.ru/@mobidevices',
'only_matching': True,
}, {
'url': 'https://vk.com/video/playlist/-174476437_2',
'only_matching': True,
}] }]
_VIDEO = collections.namedtuple('Video', ['owner_id', 'id']) _VIDEO = collections.namedtuple('Video', ['owner_id', 'id'])
@ -552,7 +576,7 @@ class VKUserVideosIE(VKBaseIE):
v = self._VIDEO._make(video[:2]) v = self._VIDEO._make(video[:2])
video_id = '%d_%d' % (v.owner_id, v.id) video_id = '%d_%d' % (v.owner_id, v.id)
yield self.url_result( yield self.url_result(
'http://vk.com/video' + video_id, VKIE.ie_key(), video_id) 'https://vk.com/video' + video_id, VKIE.ie_key(), video_id)
if count >= total: if count >= total:
break break
video_list_json = self._download_payload('al_video', page_id, { video_list_json = self._download_payload('al_video', page_id, {
@ -561,23 +585,25 @@ class VKUserVideosIE(VKBaseIE):
'oid': page_id, 'oid': page_id,
'section': section, 'section': section,
})[0][section] })[0][section]
count += video_list_json['count'] new_count = video_list_json['count']
if not new_count:
self.to_screen(f'{page_id}: Skipping {total - count} unavailable videos')
break
count += new_count
video_list = video_list_json['list'] video_list = video_list_json['list']
def _real_extract(self, url): def _real_extract(self, url):
u_id, section = self._match_valid_url(url).groups() u_id = self._match_id(url)
webpage = self._download_webpage(url, u_id) webpage = self._download_webpage(url, u_id)
if u_id.startswith('@'): if u_id.startswith('@'):
page_id = self._search_regex(r'data-owner-id\s?=\s?"([^"]+)"', webpage, 'page_id') page_id = traverse_obj(
elif '_' in u_id: self._search_json(r'\bvar newCur\s*=', webpage, 'cursor data', u_id),
page_id, section = u_id.split('_', 1) ('oid', {int}, {str_or_none}, {require('page id')}))
section = f'playlist_{section}' section = traverse_obj(parse_qs(url), ('section', 0)) or 'all'
else: else:
raise ExtractorError('Invalid URL', expected=True) page_id, _, section = u_id.partition('_')
section = f'playlist_{section}'
if not section:
section = 'all'
playlist_title = clean_html(get_element_by_class('VideoInfoPanel__title', webpage)) playlist_title = clean_html(get_element_by_class('VideoInfoPanel__title', webpage))
return self.playlist_result(self._entries(page_id, section), f'{page_id}_{section}', playlist_title) return self.playlist_result(self._entries(page_id, section), f'{page_id}_{section}', playlist_title)
@ -717,7 +743,7 @@ class VKWallPostIE(VKBaseIE):
class VKPlayBaseIE(InfoExtractor): class VKPlayBaseIE(InfoExtractor):
_BASE_URL_RE = r'https?://(?:vkplay\.live|live\.vkplay\.ru)/' _BASE_URL_RE = r'https?://(?:vkplay\.live|live\.vk(?:play|video)\.ru)/'
_RESOLUTIONS = { _RESOLUTIONS = {
'tiny': '256x144', 'tiny': '256x144',
'lowest': '426x240', 'lowest': '426x240',
@ -797,6 +823,9 @@ class VKPlayIE(VKPlayBaseIE):
}, { }, {
'url': 'https://live.vkplay.ru/lebwa/record/33a4e4ce-e3ef-49db-bb14-f006cc6fabc9/records', 'url': 'https://live.vkplay.ru/lebwa/record/33a4e4ce-e3ef-49db-bb14-f006cc6fabc9/records',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://live.vkvideo.ru/lebwa/record/33a4e4ce-e3ef-49db-bb14-f006cc6fabc9/records',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -839,6 +868,9 @@ class VKPlayLiveIE(VKPlayBaseIE):
}, { }, {
'url': 'https://live.vkplay.ru/lebwa', 'url': 'https://live.vkplay.ru/lebwa',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://live.vkvideo.ru/panterka',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):