diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index fd9c7107c7..e12f728ea3 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -518,11 +518,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor): return self._search_regex(rf'^({self._YT_CHANNEL_UCID_RE})$', ucid, 'UC-id', default=None) def handle_or_none(self, handle): - return self._search_regex(rf'^({self._YT_HANDLE_RE})$', handle, '@-handle', default=None) + return self._search_regex(rf'^({self._YT_HANDLE_RE})$', urllib.parse.unquote(handle or ''), + '@-handle', default=None) def handle_from_url(self, url): return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_HANDLE_RE})', - url, 'channel handle', default=None) + urllib.parse.unquote(url or ''), 'channel handle', default=None) def ucid_from_url(self, url): return self._search_regex(rf'^(?:https?://(?:www\.)?youtube\.com)?/({self._YT_CHANNEL_UCID_RE})', @@ -2801,6 +2802,35 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'extractor_args': {'youtube': {'player_client': ['ios'], 'player_skip': ['webpage']}}, }, }, + { + # uploader_id has non-ASCII characters that are percent-encoded in YT's JSON + 'url': 'https://www.youtube.com/shorts/18NGQq7p3LY', + 'info_dict': { + 'id': '18NGQq7p3LY', + 'ext': 'mp4', + 'title': '아이브 이서 장원영 리즈 삐끼삐끼 챌린지', + 'description': '', + 'uploader': 'ㅇㅇ', + 'uploader_id': '@으아-v1k', + 'uploader_url': 'https://www.youtube.com/@으아-v1k', + 'channel': 'ㅇㅇ', + 'channel_id': 'UCC25oTm2J7ZVoi5TngOHg9g', + 'channel_url': 'https://www.youtube.com/channel/UCC25oTm2J7ZVoi5TngOHg9g', + 'thumbnail': r're:https?://.+/.+\.jpg', + 'playable_in_embed': True, + 'age_limit': 0, + 'duration': 3, + 'timestamp': 1724306170, + 'upload_date': '20240822', + 'availability': 'public', + 'live_status': 'not_live', + 'view_count': int, + 'like_count': int, + 'channel_follower_count': int, + 'categories': ['People & Blogs'], + 'tags': [], + }, + }, ] _WEBPAGE_TESTS = [