[ie/youtube] Fix uploader_id extraction (#11818)

Closes #11816
Authored by: bashonly
This commit is contained in:
bashonly 2024-12-15 20:07:18 +00:00 committed by GitHub
parent b91c3925c2
commit 1a8851b689
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -518,11 +518,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
return self._search_regex(rf'^({self._YT_CHANNEL_UCID_RE})$', ucid, 'UC-id', default=None) return self._search_regex(rf'^({self._YT_CHANNEL_UCID_RE})$', ucid, 'UC-id', default=None)
def handle_or_none(self, handle): def handle_or_none(self, handle):
return self._search_regex(rf'^({self._YT_HANDLE_RE})$', handle, '@-handle', default=None) return self._search_regex(rf'^({self._YT_HANDLE_RE})$', urllib.parse.unquote(handle or ''),
'@-handle', default=None)
def handle_from_url(self, url): def handle_from_url(self, url):
return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_HANDLE_RE})', return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_HANDLE_RE})',
url, 'channel handle', default=None) urllib.parse.unquote(url or ''), 'channel handle', default=None)
def ucid_from_url(self, url): def ucid_from_url(self, url):
return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_CHANNEL_UCID_RE})', return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_CHANNEL_UCID_RE})',
@ -2801,6 +2802,35 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'extractor_args': {'youtube': {'player_client': ['ios'], 'player_skip': ['webpage']}}, 'extractor_args': {'youtube': {'player_client': ['ios'], 'player_skip': ['webpage']}},
}, },
}, },
{
# uploader_id has non-ASCII characters that are percent-encoded in YT's JSON
'url': 'https://www.youtube.com/shorts/18NGQq7p3LY',
'info_dict': {
'id': '18NGQq7p3LY',
'ext': 'mp4',
'title': '아이브 이서 장원영 리즈 삐끼삐끼 챌린지',
'description': '',
'uploader': 'ㅇㅇ',
'uploader_id': '@으아-v1k',
'uploader_url': 'https://www.youtube.com/@으아-v1k',
'channel': 'ㅇㅇ',
'channel_id': 'UCC25oTm2J7ZVoi5TngOHg9g',
'channel_url': 'https://www.youtube.com/channel/UCC25oTm2J7ZVoi5TngOHg9g',
'thumbnail': r're:https?://.+/.+\.jpg',
'playable_in_embed': True,
'age_limit': 0,
'duration': 3,
'timestamp': 1724306170,
'upload_date': '20240822',
'availability': 'public',
'live_status': 'not_live',
'view_count': int,
'like_count': int,
'channel_follower_count': int,
'categories': ['People & Blogs'],
'tags': [],
},
},
] ]
_WEBPAGE_TESTS = [ _WEBPAGE_TESTS = [