From 145dc6f6563e80d2da1b3e9aea2ffa795b71622c Mon Sep 17 00:00:00 2001 From: Rasmus Antons Date: Wed, 8 May 2024 22:16:32 +0200 Subject: [PATCH 001/397] [ie/boosty] Add cookies support (#9522) Closes #9401 Authored by: RasmusAntons --- yt_dlp/extractor/boosty.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/boosty.py b/yt_dlp/extractor/boosty.py index fb14ca1467..d3aab7a1a8 100644 --- a/yt_dlp/extractor/boosty.py +++ b/yt_dlp/extractor/boosty.py @@ -1,7 +1,11 @@ +import json +import urllib.parse + from .common import InfoExtractor from .youtube import YoutubeIE from ..utils import ( ExtractorError, + bug_reports_message, int_or_none, qualities, str_or_none, @@ -162,9 +166,19 @@ class BoostyIE(InfoExtractor): def _real_extract(self, url): user, post_id = self._match_valid_url(url).group('user', 'post_id') + + auth_headers = {} + auth_cookie = self._get_cookies('https://boosty.to/').get('auth') + if auth_cookie is not None: + try: + auth_data = json.loads(urllib.parse.unquote(auth_cookie.value)) + auth_headers['Authorization'] = f'Bearer {auth_data["accessToken"]}' + except (json.JSONDecodeError, KeyError): + self.report_warning(f'Failed to extract token from auth cookie{bug_reports_message()}') + post = self._download_json( f'https://api.boosty.to/v1/blog/{user}/post/{post_id}', post_id, - note='Downloading post data', errnote='Unable to download post data') + note='Downloading post data', errnote='Unable to download post data', headers=auth_headers) post_title = post.get('title') if not post_title: @@ -202,7 +216,9 @@ class BoostyIE(InfoExtractor): 'thumbnail': (('previewUrl', 'defaultPreview'), {url_or_none}), }, get_all=False)}) - if not entries: + if not entries and not post.get('hasAccess'): + self.raise_login_required('This post requires a subscription', metadata_available=True) + elif not entries: raise ExtractorError('No videos found', expected=True) if len(entries) == 1: return entries[0] From b38018b781b062d5169d104ab430489aef8e7f1e Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Wed, 8 May 2024 20:51:16 +0000 Subject: [PATCH 002/397] [ie/mixch] Extract comments (#9860) Authored by: pzhlkj6612 --- yt_dlp/extractor/mixch.py | 41 +++++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/yt_dlp/extractor/mixch.py b/yt_dlp/extractor/mixch.py index b980fd01a8..58c4a23018 100644 --- a/yt_dlp/extractor/mixch.py +++ b/yt_dlp/extractor/mixch.py @@ -1,6 +1,12 @@ from .common import InfoExtractor from ..networking.exceptions import HTTPError -from ..utils import ExtractorError, UserNotLive, int_or_none, url_or_none +from ..utils import ( + ExtractorError, + UserNotLive, + int_or_none, + str_or_none, + url_or_none, +) from ..utils.traversal import traverse_obj @@ -9,17 +15,20 @@ class MixchIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?mixch\.tv/u/(?P\d+)' _TESTS = [{ - 'url': 'https://mixch.tv/u/16236849/live', + 'url': 'https://mixch.tv/u/16943797/live', 'skip': 'don\'t know if this live persists', 'info_dict': { - 'id': '16236849', - 'title': '24配信シェア⭕️投票🙏💦', - 'comment_count': 13145, - 'view_count': 28348, - 'timestamp': 1636189377, - 'uploader': '🦥伊咲👶🏻#フレアワ', - 'uploader_id': '16236849', - } + 'id': '16943797', + 'ext': 'mp4', + 'title': '#EntView #カリナ #セブチ 2024-05-05 06:58', + 'comment_count': int, + 'view_count': int, + 'timestamp': 1714726805, + 'uploader': 'Ent.View K-news🎶💕', + 'uploader_id': '16943797', + 'live_status': 'is_live', + 'upload_date': '20240503', + }, }, { 'url': 'https://mixch.tv/u/16137876/live', 'only_matching': True, @@ -48,8 +57,20 @@ class MixchIE(InfoExtractor): 'protocol': 'm3u8', }], 'is_live': True, + '__post_extractor': self.extract_comments(video_id), } + def _get_comments(self, video_id): + yield from traverse_obj(self._download_json( + f'https://mixch.tv/api-web/lives/{video_id}/messages', video_id, + note='Downloading comments', errnote='Failed to download comments'), (..., { + 'author': ('name', {str}), + 'author_id': ('user_id', {str_or_none}), + 'id': ('message_id', {str}, {lambda x: x or None}), + 'text': ('body', {str}), + 'timestamp': ('created', {int}), + })) + class MixchArchiveIE(InfoExtractor): IE_NAME = 'mixch:archive' From df5c9e733aaba703cf285c0372b6d61629330c82 Mon Sep 17 00:00:00 2001 From: sepro <4618135+seproDev@users.noreply.github.com> Date: Wed, 8 May 2024 23:02:22 +0200 Subject: [PATCH 003/397] [ie/vk] Improve format extraction (#9885) Closes #5675 Authored by: seproDev --- yt_dlp/extractor/vk.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/vk.py b/yt_dlp/extractor/vk.py index 7e3a3a9a98..28d5026850 100644 --- a/yt_dlp/extractor/vk.py +++ b/yt_dlp/extractor/vk.py @@ -451,6 +451,7 @@ class VKIE(VKBaseIE): info_page, 'view count', default=None)) formats = [] + subtitles = {} for format_id, format_url in data.items(): format_url = url_or_none(format_url) if not format_url or not format_url.startswith(('http', '//', 'rtmp')): @@ -462,12 +463,21 @@ class VKIE(VKBaseIE): formats.append({ 'format_id': format_id, 'url': format_url, + 'ext': 'mp4', + 'source_preference': 1, 'height': height, }) elif format_id == 'hls': - formats.extend(self._extract_m3u8_formats( + fmts, subs = self._extract_m3u8_formats_and_subtitles( format_url, video_id, 'mp4', 'm3u8_native', - m3u8_id=format_id, fatal=False, live=is_live)) + m3u8_id=format_id, fatal=False, live=is_live) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + elif format_id.startswith('dash_'): + fmts, subs = self._extract_mpd_formats_and_subtitles( + format_url, video_id, mpd_id=format_id, fatal=False) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) elif format_id == 'rtmp': formats.append({ 'format_id': format_id, @@ -475,7 +485,6 @@ class VKIE(VKBaseIE): 'ext': 'flv', }) - subtitles = {} for sub in data.get('subs') or {}: subtitles.setdefault(sub.get('lang', 'en'), []).append({ 'ext': sub.get('title', '.srt').split('.')[-1], @@ -496,6 +505,7 @@ class VKIE(VKBaseIE): 'comment_count': int_or_none(mv_data.get('commcount')), 'is_live': is_live, 'subtitles': subtitles, + '_format_sort_fields': ('res', 'source'), } From 06d52c87314e0bbc16c43c405090843885577b88 Mon Sep 17 00:00:00 2001 From: fireattack Date: Thu, 9 May 2024 05:09:38 +0800 Subject: [PATCH 004/397] [ie/BilibiliSpaceVideo] Better error message (#9839) Closes #9528 Authored by: fireattack --- yt_dlp/extractor/bilibili.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index fee4b29940..6221e9a51e 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -1049,9 +1049,10 @@ class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE): raise ExtractorError( 'Request is blocked by server (412), please add cookies, wait and try later.', expected=True) raise - if response['code'] == -401: + if response['code'] in (-352, -401): raise ExtractorError( - 'Request is blocked by server (401), please add cookies, wait and try later.', expected=True) + f'Request is blocked by server ({-response["code"]}), ' + 'please add cookies, wait and try later.', expected=True) return response['data'] def get_metadata(page_data): From 2338827072dacab0f15348b70aec8685feefc8d1 Mon Sep 17 00:00:00 2001 From: fireattack Date: Thu, 9 May 2024 05:24:44 +0800 Subject: [PATCH 005/397] [ie/bilibili] Fix `--geo-verification-proxy` support (#9817) Closes #9797 Authored by: fireattack --- yt_dlp/extractor/bilibili.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 6221e9a51e..df34700033 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -93,11 +93,11 @@ class BilibiliBaseIE(InfoExtractor): return formats - def _download_playinfo(self, video_id, cid): + def _download_playinfo(self, video_id, cid, headers=None): return self._download_json( 'https://api.bilibili.com/x/player/playurl', video_id, query={'bvid': video_id, 'cid': cid, 'fnval': 4048}, - note=f'Downloading video formats for cid {cid}')['data'] + note=f'Downloading video formats for cid {cid}', headers=headers)['data'] def json2srt(self, json_data): srt_data = '' @@ -493,7 +493,8 @@ class BiliBiliIE(BilibiliBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - webpage, urlh = self._download_webpage_handle(url, video_id) + headers = self.geo_verification_headers() + webpage, urlh = self._download_webpage_handle(url, video_id, headers=headers) if not self._match_valid_url(urlh.url): return self.url_result(urlh.url) @@ -531,7 +532,7 @@ class BiliBiliIE(BilibiliBaseIE): self._download_json( 'https://api.bilibili.com/x/player/pagelist', video_id, fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'}, - note='Extracting videos in anthology'), + note='Extracting videos in anthology', headers=headers), 'data', expected_type=list) or [] is_anthology = len(page_list_json) > 1 @@ -552,7 +553,7 @@ class BiliBiliIE(BilibiliBaseIE): festival_info = {} if is_festival: - play_info = self._download_playinfo(video_id, cid) + play_info = self._download_playinfo(video_id, cid, headers=headers) festival_info = traverse_obj(initial_state, { 'uploader': ('videoInfo', 'upName'), @@ -666,14 +667,15 @@ class BiliBiliBangumiIE(BilibiliBaseIE): def _real_extract(self, url): episode_id = self._match_id(url) - webpage = self._download_webpage(url, episode_id) + headers = self.geo_verification_headers() + webpage = self._download_webpage(url, episode_id, headers=headers) if '您所在的地区无法观看本片' in webpage: raise GeoRestrictedError('This video is restricted') elif '正在观看预览,大会员免费看全片' in webpage: self.raise_login_required('This video is for premium members only') - headers = {'Referer': url, **self.geo_verification_headers()} + headers['Referer'] = url play_info = self._download_json( 'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id, 'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id}, @@ -724,7 +726,7 @@ class BiliBiliBangumiIE(BilibiliBaseIE): 'duration': float_or_none(play_info.get('timelength'), scale=1000), 'subtitles': self.extract_subtitles(episode_id, episode_info.get('cid'), aid=aid), '__post_extractor': self.extract_comments(aid), - 'http_headers': headers, + 'http_headers': {'Referer': url}, } From c4b87dd885ee5391e5f481e7c8bd550a7c543623 Mon Sep 17 00:00:00 2001 From: src-tinkerer <149616646+src-tinkerer@users.noreply.github.com> Date: Wed, 8 May 2024 21:27:30 +0000 Subject: [PATCH 006/397] [ie/ZenYandex] Fix extractor (#9813) Closes #9803 Authored by: src-tinkerer --- yt_dlp/extractor/yandexvideo.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/yandexvideo.py b/yt_dlp/extractor/yandexvideo.py index 4382a5684a..95a9446e30 100644 --- a/yt_dlp/extractor/yandexvideo.py +++ b/yt_dlp/extractor/yandexvideo.py @@ -259,15 +259,15 @@ class ZenYandexIE(InfoExtractor): webpage = self._download_webpage(redirect, video_id, note='Redirecting') data_json = self._search_json( r'("data"\s*:|data\s*=)', webpage, 'metadata', video_id, contains_pattern=r'{["\']_*serverState_*video.+}') - serverstate = self._search_regex(r'(_+serverState_+video-site_[^_]+_+)', - webpage, 'server state').replace('State', 'Settings') + serverstate = self._search_regex(r'(_+serverState_+video-site_[^_]+_+)', webpage, 'server state') uploader = self._search_regex(r'(]+>)', webpage, 'uploader', default='') uploader_name = extract_attributes(uploader).get('aria-label') - video_json = try_get(data_json, lambda x: x[serverstate]['exportData']['video'], dict) - stream_urls = try_get(video_json, lambda x: x['video']['streams']) + item_id = traverse_obj(data_json, (serverstate, 'videoViewer', 'openedItemId', {str})) + video_json = traverse_obj(data_json, (serverstate, 'videoViewer', 'items', item_id, {dict})) or {} + formats, subtitles = [], {} - for s_url in stream_urls: + for s_url in traverse_obj(video_json, ('video', 'streams', ..., {url_or_none})): ext = determine_ext(s_url) if ext == 'mpd': fmts, subs = self._extract_mpd_formats_and_subtitles(s_url, video_id, mpd_id='dash') From 6b54cccdcb892bca3e55993480d8b86f1c7e6da6 Mon Sep 17 00:00:00 2001 From: Alexandre Huot Date: Wed, 8 May 2024 18:10:06 -0400 Subject: [PATCH 007/397] [ie/Qub] Fix extractor (#7019) Closes #4989 Authored by: alexhuot1, dirkf --- yt_dlp/extractor/tva.py | 44 +++++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/yt_dlp/extractor/tva.py b/yt_dlp/extractor/tva.py index 9afe233284..e3e10557c2 100644 --- a/yt_dlp/extractor/tva.py +++ b/yt_dlp/extractor/tva.py @@ -1,10 +1,9 @@ +import functools +import re + from .common import InfoExtractor -from ..utils import ( - float_or_none, - int_or_none, - smuggle_url, - strip_or_none, -) +from ..utils import float_or_none, int_or_none, smuggle_url, strip_or_none +from ..utils.traversal import traverse_obj class TVAIE(InfoExtractor): @@ -49,11 +48,20 @@ class QubIE(InfoExtractor): 'info_dict': { 'id': '6084352463001', 'ext': 'mp4', - 'title': 'Épisode 01', + 'title': 'Ép 01. Mon dernier jour', 'uploader_id': '5481942443001', 'upload_date': '20190907', 'timestamp': 1567899756, 'description': 'md5:9c0d7fbb90939420c651fd977df90145', + 'thumbnail': r're:https://.+\.jpg', + 'episode': 'Ép 01. Mon dernier jour', + 'episode_number': 1, + 'tags': ['alerte amber', 'alerte amber saison 1', 'surdemande'], + 'duration': 2625.963, + 'season': 'Season 1', + 'season_number': 1, + 'series': 'Alerte Amber', + 'channel': 'TVA', }, }, { 'url': 'https://www.qub.ca/tele/video/lcn-ca-vous-regarde-rev-30s-ap369664-1009357943', @@ -64,22 +72,24 @@ class QubIE(InfoExtractor): def _real_extract(self, url): entity_id = self._match_id(url) - entity = self._download_json( - 'https://www.qub.ca/proxy/pfu/content-delivery-service/v1/entities', - entity_id, query={'id': entity_id}) + webpage = self._download_webpage(url, entity_id) + entity = self._search_nextjs_data(webpage, entity_id)['props']['initialProps']['pageProps']['fallbackData'] video_id = entity['videoId'] episode = strip_or_none(entity.get('name')) return { '_type': 'url_transparent', + 'url': f'https://videos.tva.ca/details/_{video_id}', + 'ie_key': TVAIE.ie_key(), 'id': video_id, 'title': episode, - # 'url': self.BRIGHTCOVE_URL_TEMPLATE % entity['referenceId'], - 'url': 'https://videos.tva.ca/details/_' + video_id, - 'description': entity.get('longDescription'), - 'duration': float_or_none(entity.get('durationMillis'), 1000), 'episode': episode, - 'episode_number': int_or_none(entity.get('episodeNumber')), - # 'ie_key': 'BrightcoveNew', - 'ie_key': TVAIE.ie_key(), + **traverse_obj(entity, { + 'description': ('longDescription', {str}), + 'duration': ('durationMillis', {functools.partial(float_or_none, scale=1000)}), + 'channel': ('knownEntities', 'channel', 'name', {str}), + 'series': ('knownEntities', 'videoShow', 'name', {str}), + 'season_number': ('slug', {lambda x: re.search(r'/s(?:ai|ea)son-(\d+)/', x)}, 1, {int_or_none}), + 'episode_number': ('episodeNumber', {int_or_none}), + }), } From 73f12119b52d98281804b0c072b2ed6aa841ec88 Mon Sep 17 00:00:00 2001 From: Mozi <29089388+pzhlkj6612@users.noreply.github.com> Date: Fri, 10 May 2024 17:13:35 +0000 Subject: [PATCH 008/397] [ie/netease:program] Improve `--no-playlist` message (#9488) Authored by: pzhlkj6612 --- yt_dlp/extractor/neteasemusic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/neteasemusic.py b/yt_dlp/extractor/neteasemusic.py index 73b33a9f94..b54c12e1e2 100644 --- a/yt_dlp/extractor/neteasemusic.py +++ b/yt_dlp/extractor/neteasemusic.py @@ -561,7 +561,8 @@ class NetEaseMusicProgramIE(NetEaseMusicBaseIE): 'timestamp': ('createTime', {self.kilo_or_none}), }) - if not self._yes_playlist(info['songs'] and program_id, info['mainSong']['id']): + if not self._yes_playlist( + info['songs'] and program_id, info['mainSong']['id'], playlist_label='program', video_label='song'): formats = self.extract_formats(info['mainSong']) return { From 00a9f2e1f7fa69499221f2e8dd73a08efeef79bc Mon Sep 17 00:00:00 2001 From: kclauhk <78251477+kclauhk@users.noreply.github.com> Date: Sat, 11 May 2024 01:19:57 +0800 Subject: [PATCH 009/397] [ie/canalalpha] Fix extractor (#9675) Authored by: kclauhk --- yt_dlp/extractor/canalalpha.py | 35 +++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/yt_dlp/extractor/canalalpha.py b/yt_dlp/extractor/canalalpha.py index df5ca58187..745e6954c7 100644 --- a/yt_dlp/extractor/canalalpha.py +++ b/yt_dlp/extractor/canalalpha.py @@ -40,7 +40,7 @@ class CanalAlphaIE(InfoExtractor): 'id': '24484', 'ext': 'mp4', 'title': 'Ces innovations qui veulent rendre l’agriculture plus durable', - 'description': 'md5:3de3f151180684621e85be7c10e4e613', + 'description': 'md5:85d594a3b5dc6ccfc4a85aba6e73b129', 'thumbnail': 'https://static.canalalpha.ch/poster/magazine/magazine_10236.jpg', 'upload_date': '20211026', 'duration': 360, @@ -58,14 +58,25 @@ class CanalAlphaIE(InfoExtractor): 'duration': 360, }, 'params': {'skip_download': True} + }, { + 'url': 'https://www.canalalpha.ch/play/le-journal/topic/33500/encore-des-mesures-deconomie-dans-le-jura', + 'info_dict': { + 'id': '33500', + 'ext': 'mp4', + 'title': 'Encore des mesures d\'économie dans le Jura', + 'description': 'md5:938b5b556592f2d1b9ab150268082a80', + 'thumbnail': 'https://static.canalalpha.ch/poster/news/news_46665.jpg', + 'upload_date': '20240411', + 'duration': 105, + }, }] def _real_extract(self, url): - id = self._match_id(url) - webpage = self._download_webpage(url, id) + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) data_json = self._parse_json(self._search_regex( r'window\.__SERVER_STATE__\s?=\s?({(?:(?!};)[^"]|"([^"]|\\")*")+})\s?;', - webpage, 'data_json'), id)['1']['data']['data'] + webpage, 'data_json'), video_id)['1']['data']['data'] manifests = try_get(data_json, lambda x: x['video']['manifests'], expected_type=dict) or {} subtitles = {} formats = [{ @@ -75,15 +86,17 @@ class CanalAlphaIE(InfoExtractor): 'height': try_get(video, lambda x: x['res']['height'], expected_type=int), } for video in try_get(data_json, lambda x: x['video']['mp4'], expected_type=list) or [] if video.get('$url')] if manifests.get('hls'): - m3u8_frmts, m3u8_subs = self._parse_m3u8_formats_and_subtitles(manifests['hls'], video_id=id) - formats.extend(m3u8_frmts) - subtitles = self._merge_subtitles(subtitles, m3u8_subs) + fmts, subs = self._extract_m3u8_formats_and_subtitles( + manifests['hls'], video_id, m3u8_id='hls', fatal=False) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) if manifests.get('dash'): - dash_frmts, dash_subs = self._parse_mpd_formats_and_subtitles(manifests['dash']) - formats.extend(dash_frmts) - subtitles = self._merge_subtitles(subtitles, dash_subs) + fmts, subs = self._extract_mpd_formats_and_subtitles( + manifests['dash'], video_id, mpd_id='dash', fatal=False) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) return { - 'id': id, + 'id': video_id, 'title': data_json.get('title').strip(), 'description': clean_html(dict_get(data_json, ('longDesc', 'shortDesc'))), 'thumbnail': data_json.get('poster'), From 98d71d8c5e5dab08b561ee6f137e968d2a004262 Mon Sep 17 00:00:00 2001 From: sepro <4618135+seproDev@users.noreply.github.com> Date: Fri, 10 May 2024 19:20:55 +0200 Subject: [PATCH 010/397] [ie/commonmistakes] Raise error on blob URLs (#9897) Authored by: seproDev --- yt_dlp/extractor/_extractors.py | 6 +++++- yt_dlp/extractor/commonmistakes.py | 16 ++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 42034275b9..1f095c932a 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -387,7 +387,11 @@ from .comedycentral import ( ComedyCentralIE, ComedyCentralTVIE, ) -from .commonmistakes import CommonMistakesIE, UnicodeBOMIE +from .commonmistakes import ( + BlobIE, + CommonMistakesIE, + UnicodeBOMIE, +) from .commonprotocols import ( MmsIE, RtmpIE, diff --git a/yt_dlp/extractor/commonmistakes.py b/yt_dlp/extractor/commonmistakes.py index 1d3b61c732..4514424e8e 100644 --- a/yt_dlp/extractor/commonmistakes.py +++ b/yt_dlp/extractor/commonmistakes.py @@ -40,3 +40,19 @@ class UnicodeBOMIE(InfoExtractor): 'Your URL starts with a Byte Order Mark (BOM). ' 'Removing the BOM and looking for "%s" ...' % real_url) return self.url_result(real_url) + + +class BlobIE(InfoExtractor): + IE_DESC = False + _VALID_URL = r'blob:' + + _TESTS = [{ + 'url': 'blob:https://www.youtube.com/4eb3d090-a761-46e6-8083-c32016a36e3b', + 'only_matching': True, + }] + + def _real_extract(self, url): + raise ExtractorError( + 'You\'ve asked yt-dlp to download a blob URL. ' + 'A blob URL exists only locally in your browser. ' + 'It is not possible for yt-dlp to access it.', expected=True) From 3c7a287e281d9f9a353dce8902ff78a84c24a040 Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Sat, 11 May 2024 10:06:58 +1200 Subject: [PATCH 011/397] [test] Add HTTP proxy tests (#9578) Also fixes HTTPS proxies for curl_cffi Authored by: coletdjnz --- test/conftest.py | 50 ++++- test/helper.py | 5 + test/test_http_proxy.py | 379 +++++++++++++++++++++++++++++++++ test/test_networking.py | 271 ++++++++++------------- test/test_websockets.py | 55 +++-- yt_dlp/networking/_curlcffi.py | 14 +- 6 files changed, 595 insertions(+), 179 deletions(-) create mode 100644 test/test_http_proxy.py diff --git a/test/conftest.py b/test/conftest.py index 2fbc269e1f..decd2c85c8 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -1,4 +1,3 @@ -import functools import inspect import pytest @@ -10,7 +9,9 @@ from yt_dlp.utils._utils import _YDLLogger as FakeLogger @pytest.fixture def handler(request): - RH_KEY = request.param + RH_KEY = getattr(request, 'param', None) + if not RH_KEY: + return if inspect.isclass(RH_KEY) and issubclass(RH_KEY, RequestHandler): handler = RH_KEY elif RH_KEY in _REQUEST_HANDLERS: @@ -18,9 +19,46 @@ def handler(request): else: pytest.skip(f'{RH_KEY} request handler is not available') - return functools.partial(handler, logger=FakeLogger) + class HandlerWrapper(handler): + RH_KEY = handler.RH_KEY + + def __init__(self, *args, **kwargs): + super().__init__(logger=FakeLogger, *args, **kwargs) + + return HandlerWrapper -def validate_and_send(rh, req): - rh.validate(req) - return rh.send(req) +@pytest.fixture(autouse=True) +def skip_handler(request, handler): + """usage: pytest.mark.skip_handler('my_handler', 'reason')""" + for marker in request.node.iter_markers('skip_handler'): + if marker.args[0] == handler.RH_KEY: + pytest.skip(marker.args[1] if len(marker.args) > 1 else '') + + +@pytest.fixture(autouse=True) +def skip_handler_if(request, handler): + """usage: pytest.mark.skip_handler_if('my_handler', lambda request: True, 'reason')""" + for marker in request.node.iter_markers('skip_handler_if'): + if marker.args[0] == handler.RH_KEY and marker.args[1](request): + pytest.skip(marker.args[2] if len(marker.args) > 2 else '') + + +@pytest.fixture(autouse=True) +def skip_handlers_if(request, handler): + """usage: pytest.mark.skip_handlers_if(lambda request, handler: True, 'reason')""" + for marker in request.node.iter_markers('skip_handlers_if'): + if handler and marker.args[0](request, handler): + pytest.skip(marker.args[1] if len(marker.args) > 1 else '') + + +def pytest_configure(config): + config.addinivalue_line( + "markers", "skip_handler(handler): skip test for the given handler", + ) + config.addinivalue_line( + "markers", "skip_handler_if(handler): skip test for the given handler if condition is true" + ) + config.addinivalue_line( + "markers", "skip_handlers_if(handler): skip test for handlers when the condition is true" + ) diff --git a/test/helper.py b/test/helper.py index 7760fd8d7f..e7473120d1 100644 --- a/test/helper.py +++ b/test/helper.py @@ -338,3 +338,8 @@ def http_server_port(httpd): def verify_address_availability(address): if find_available_port(address) is None: pytest.skip(f'Unable to bind to source address {address} (address may not exist)') + + +def validate_and_send(rh, req): + rh.validate(req) + return rh.send(req) diff --git a/test/test_http_proxy.py b/test/test_http_proxy.py new file mode 100644 index 0000000000..c1d7c53f51 --- /dev/null +++ b/test/test_http_proxy.py @@ -0,0 +1,379 @@ +import abc +import base64 +import contextlib +import functools +import json +import os +import random +import ssl +import threading +from http.server import BaseHTTPRequestHandler +from socketserver import ThreadingTCPServer + +import pytest + +from test.helper import http_server_port, verify_address_availability +from test.test_networking import TEST_DIR +from test.test_socks import IPv6ThreadingTCPServer +from yt_dlp.dependencies import urllib3 +from yt_dlp.networking import Request +from yt_dlp.networking.exceptions import HTTPError, ProxyError, SSLError + + +class HTTPProxyAuthMixin: + + def proxy_auth_error(self): + self.send_response(407) + self.send_header('Proxy-Authenticate', 'Basic realm="test http proxy"') + self.end_headers() + return False + + def do_proxy_auth(self, username, password): + if username is None and password is None: + return True + + proxy_auth_header = self.headers.get('Proxy-Authorization', None) + if proxy_auth_header is None: + return self.proxy_auth_error() + + if not proxy_auth_header.startswith('Basic '): + return self.proxy_auth_error() + + auth = proxy_auth_header[6:] + + try: + auth_username, auth_password = base64.b64decode(auth).decode().split(':', 1) + except Exception: + return self.proxy_auth_error() + + if auth_username != (username or '') or auth_password != (password or ''): + return self.proxy_auth_error() + return True + + +class HTTPProxyHandler(BaseHTTPRequestHandler, HTTPProxyAuthMixin): + def __init__(self, *args, proxy_info=None, username=None, password=None, request_handler=None, **kwargs): + self.username = username + self.password = password + self.proxy_info = proxy_info + super().__init__(*args, **kwargs) + + def do_GET(self): + if not self.do_proxy_auth(self.username, self.password): + self.server.close_request(self.request) + return + if self.path.endswith('/proxy_info'): + payload = json.dumps(self.proxy_info or { + 'client_address': self.client_address, + 'connect': False, + 'connect_host': None, + 'connect_port': None, + 'headers': dict(self.headers), + 'path': self.path, + 'proxy': ':'.join(str(y) for y in self.connection.getsockname()), + }) + self.send_response(200) + self.send_header('Content-Type', 'application/json; charset=utf-8') + self.send_header('Content-Length', str(len(payload))) + self.end_headers() + self.wfile.write(payload.encode()) + else: + self.send_response(404) + self.end_headers() + + self.server.close_request(self.request) + + +if urllib3: + import urllib3.util.ssltransport + + class SSLTransport(urllib3.util.ssltransport.SSLTransport): + """ + Modified version of urllib3 SSLTransport to support server side SSL + + This allows us to chain multiple TLS connections. + """ + def __init__(self, socket, ssl_context, server_hostname=None, suppress_ragged_eofs=True, server_side=False): + self.incoming = ssl.MemoryBIO() + self.outgoing = ssl.MemoryBIO() + + self.suppress_ragged_eofs = suppress_ragged_eofs + self.socket = socket + + self.sslobj = ssl_context.wrap_bio( + self.incoming, + self.outgoing, + server_hostname=server_hostname, + server_side=server_side + ) + self._ssl_io_loop(self.sslobj.do_handshake) + + @property + def _io_refs(self): + return self.socket._io_refs + + @_io_refs.setter + def _io_refs(self, value): + self.socket._io_refs = value + + def shutdown(self, *args, **kwargs): + self.socket.shutdown(*args, **kwargs) +else: + SSLTransport = None + + +class HTTPSProxyHandler(HTTPProxyHandler): + def __init__(self, request, *args, **kwargs): + certfn = os.path.join(TEST_DIR, 'testcert.pem') + sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) + sslctx.load_cert_chain(certfn, None) + if isinstance(request, ssl.SSLSocket): + request = SSLTransport(request, ssl_context=sslctx, server_side=True) + else: + request = sslctx.wrap_socket(request, server_side=True) + super().__init__(request, *args, **kwargs) + + +class HTTPConnectProxyHandler(BaseHTTPRequestHandler, HTTPProxyAuthMixin): + protocol_version = 'HTTP/1.1' + default_request_version = 'HTTP/1.1' + + def __init__(self, *args, username=None, password=None, request_handler=None, **kwargs): + self.username = username + self.password = password + self.request_handler = request_handler + super().__init__(*args, **kwargs) + + def do_CONNECT(self): + if not self.do_proxy_auth(self.username, self.password): + self.server.close_request(self.request) + return + self.send_response(200) + self.end_headers() + proxy_info = { + 'client_address': self.client_address, + 'connect': True, + 'connect_host': self.path.split(':')[0], + 'connect_port': int(self.path.split(':')[1]), + 'headers': dict(self.headers), + 'path': self.path, + 'proxy': ':'.join(str(y) for y in self.connection.getsockname()), + } + self.request_handler(self.request, self.client_address, self.server, proxy_info=proxy_info) + self.server.close_request(self.request) + + +class HTTPSConnectProxyHandler(HTTPConnectProxyHandler): + def __init__(self, request, *args, **kwargs): + certfn = os.path.join(TEST_DIR, 'testcert.pem') + sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) + sslctx.load_cert_chain(certfn, None) + request = sslctx.wrap_socket(request, server_side=True) + self._original_request = request + super().__init__(request, *args, **kwargs) + + def do_CONNECT(self): + super().do_CONNECT() + self.server.close_request(self._original_request) + + +@contextlib.contextmanager +def proxy_server(proxy_server_class, request_handler, bind_ip=None, **proxy_server_kwargs): + server = server_thread = None + try: + bind_address = bind_ip or '127.0.0.1' + server_type = ThreadingTCPServer if '.' in bind_address else IPv6ThreadingTCPServer + server = server_type( + (bind_address, 0), functools.partial(proxy_server_class, request_handler=request_handler, **proxy_server_kwargs)) + server_port = http_server_port(server) + server_thread = threading.Thread(target=server.serve_forever) + server_thread.daemon = True + server_thread.start() + if '.' not in bind_address: + yield f'[{bind_address}]:{server_port}' + else: + yield f'{bind_address}:{server_port}' + finally: + server.shutdown() + server.server_close() + server_thread.join(2.0) + + +class HTTPProxyTestContext(abc.ABC): + REQUEST_HANDLER_CLASS = None + REQUEST_PROTO = None + + def http_server(self, server_class, *args, **kwargs): + return proxy_server(server_class, self.REQUEST_HANDLER_CLASS, *args, **kwargs) + + @abc.abstractmethod + def proxy_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs) -> dict: + """return a dict of proxy_info""" + + +class HTTPProxyHTTPTestContext(HTTPProxyTestContext): + # Standard HTTP Proxy for http requests + REQUEST_HANDLER_CLASS = HTTPProxyHandler + REQUEST_PROTO = 'http' + + def proxy_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs): + request = Request(f'http://{target_domain or "127.0.0.1"}:{target_port or "40000"}/proxy_info', **req_kwargs) + handler.validate(request) + return json.loads(handler.send(request).read().decode()) + + +class HTTPProxyHTTPSTestContext(HTTPProxyTestContext): + # HTTP Connect proxy, for https requests + REQUEST_HANDLER_CLASS = HTTPSProxyHandler + REQUEST_PROTO = 'https' + + def proxy_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs): + request = Request(f'https://{target_domain or "127.0.0.1"}:{target_port or "40000"}/proxy_info', **req_kwargs) + handler.validate(request) + return json.loads(handler.send(request).read().decode()) + + +CTX_MAP = { + 'http': HTTPProxyHTTPTestContext, + 'https': HTTPProxyHTTPSTestContext, +} + + +@pytest.fixture(scope='module') +def ctx(request): + return CTX_MAP[request.param]() + + +@pytest.mark.parametrize( + 'handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) +@pytest.mark.parametrize('ctx', ['http'], indirect=True) # pure http proxy can only support http +class TestHTTPProxy: + def test_http_no_auth(self, handler, ctx): + with ctx.http_server(HTTPProxyHandler) as server_address: + with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}) as rh: + proxy_info = ctx.proxy_info_request(rh) + assert proxy_info['proxy'] == server_address + assert proxy_info['connect'] is False + assert 'Proxy-Authorization' not in proxy_info['headers'] + + def test_http_auth(self, handler, ctx): + with ctx.http_server(HTTPProxyHandler, username='test', password='test') as server_address: + with handler(proxies={ctx.REQUEST_PROTO: f'http://test:test@{server_address}'}) as rh: + proxy_info = ctx.proxy_info_request(rh) + assert proxy_info['proxy'] == server_address + assert 'Proxy-Authorization' in proxy_info['headers'] + + def test_http_bad_auth(self, handler, ctx): + with ctx.http_server(HTTPProxyHandler, username='test', password='test') as server_address: + with handler(proxies={ctx.REQUEST_PROTO: f'http://test:bad@{server_address}'}) as rh: + with pytest.raises(HTTPError) as exc_info: + ctx.proxy_info_request(rh) + assert exc_info.value.response.status == 407 + exc_info.value.response.close() + + def test_http_source_address(self, handler, ctx): + with ctx.http_server(HTTPProxyHandler) as server_address: + source_address = f'127.0.0.{random.randint(5, 255)}' + verify_address_availability(source_address) + with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}, + source_address=source_address) as rh: + proxy_info = ctx.proxy_info_request(rh) + assert proxy_info['proxy'] == server_address + assert proxy_info['client_address'][0] == source_address + + @pytest.mark.skip_handler('Urllib', 'urllib does not support https proxies') + def test_https(self, handler, ctx): + with ctx.http_server(HTTPSProxyHandler) as server_address: + with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh: + proxy_info = ctx.proxy_info_request(rh) + assert proxy_info['proxy'] == server_address + assert proxy_info['connect'] is False + assert 'Proxy-Authorization' not in proxy_info['headers'] + + @pytest.mark.skip_handler('Urllib', 'urllib does not support https proxies') + def test_https_verify_failed(self, handler, ctx): + with ctx.http_server(HTTPSProxyHandler) as server_address: + with handler(verify=True, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh: + # Accept SSLError as may not be feasible to tell if it is proxy or request error. + # note: if request proto also does ssl verification, this may also be the error of the request. + # Until we can support passing custom cacerts to handlers, we cannot properly test this for all cases. + with pytest.raises((ProxyError, SSLError)): + ctx.proxy_info_request(rh) + + def test_http_with_idn(self, handler, ctx): + with ctx.http_server(HTTPProxyHandler) as server_address: + with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}) as rh: + proxy_info = ctx.proxy_info_request(rh, target_domain='中文.tw') + assert proxy_info['proxy'] == server_address + assert proxy_info['path'].startswith('http://xn--fiq228c.tw') + assert proxy_info['headers']['Host'].split(':', 1)[0] == 'xn--fiq228c.tw' + + +@pytest.mark.parametrize( + 'handler,ctx', [ + ('Requests', 'https'), + ('CurlCFFI', 'https'), + ], indirect=True) +class TestHTTPConnectProxy: + def test_http_connect_no_auth(self, handler, ctx): + with ctx.http_server(HTTPConnectProxyHandler) as server_address: + with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}) as rh: + proxy_info = ctx.proxy_info_request(rh) + assert proxy_info['proxy'] == server_address + assert proxy_info['connect'] is True + assert 'Proxy-Authorization' not in proxy_info['headers'] + + def test_http_connect_auth(self, handler, ctx): + with ctx.http_server(HTTPConnectProxyHandler, username='test', password='test') as server_address: + with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'http://test:test@{server_address}'}) as rh: + proxy_info = ctx.proxy_info_request(rh) + assert proxy_info['proxy'] == server_address + assert 'Proxy-Authorization' in proxy_info['headers'] + + @pytest.mark.skip_handler( + 'Requests', + 'bug in urllib3 causes unclosed socket: https://github.com/urllib3/urllib3/issues/3374' + ) + def test_http_connect_bad_auth(self, handler, ctx): + with ctx.http_server(HTTPConnectProxyHandler, username='test', password='test') as server_address: + with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'http://test:bad@{server_address}'}) as rh: + with pytest.raises(ProxyError): + ctx.proxy_info_request(rh) + + def test_http_connect_source_address(self, handler, ctx): + with ctx.http_server(HTTPConnectProxyHandler) as server_address: + source_address = f'127.0.0.{random.randint(5, 255)}' + verify_address_availability(source_address) + with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}, + source_address=source_address, + verify=False) as rh: + proxy_info = ctx.proxy_info_request(rh) + assert proxy_info['proxy'] == server_address + assert proxy_info['client_address'][0] == source_address + + @pytest.mark.skipif(urllib3 is None, reason='requires urllib3 to test') + def test_https_connect_proxy(self, handler, ctx): + with ctx.http_server(HTTPSConnectProxyHandler) as server_address: + with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh: + proxy_info = ctx.proxy_info_request(rh) + assert proxy_info['proxy'] == server_address + assert proxy_info['connect'] is True + assert 'Proxy-Authorization' not in proxy_info['headers'] + + @pytest.mark.skipif(urllib3 is None, reason='requires urllib3 to test') + def test_https_connect_verify_failed(self, handler, ctx): + with ctx.http_server(HTTPSConnectProxyHandler) as server_address: + with handler(verify=True, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh: + # Accept SSLError as may not be feasible to tell if it is proxy or request error. + # note: if request proto also does ssl verification, this may also be the error of the request. + # Until we can support passing custom cacerts to handlers, we cannot properly test this for all cases. + with pytest.raises((ProxyError, SSLError)): + ctx.proxy_info_request(rh) + + @pytest.mark.skipif(urllib3 is None, reason='requires urllib3 to test') + def test_https_connect_proxy_auth(self, handler, ctx): + with ctx.http_server(HTTPSConnectProxyHandler, username='test', password='test') as server_address: + with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'https://test:test@{server_address}'}) as rh: + proxy_info = ctx.proxy_info_request(rh) + assert proxy_info['proxy'] == server_address + assert 'Proxy-Authorization' in proxy_info['headers'] diff --git a/test/test_networking.py b/test/test_networking.py index d613cb5681..994467014d 100644 --- a/test/test_networking.py +++ b/test/test_networking.py @@ -6,6 +6,8 @@ import sys import pytest +from yt_dlp.networking.common import Features + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import gzip @@ -27,8 +29,12 @@ import zlib from email.message import Message from http.cookiejar import CookieJar -from test.conftest import validate_and_send -from test.helper import FakeYDL, http_server_port, verify_address_availability +from test.helper import ( + FakeYDL, + http_server_port, + validate_and_send, + verify_address_availability, +) from yt_dlp.cookies import YoutubeDLCookieJar from yt_dlp.dependencies import brotli, curl_cffi, requests, urllib3 from yt_dlp.networking import ( @@ -62,21 +68,6 @@ from yt_dlp.utils.networking import HTTPHeaderDict, std_headers TEST_DIR = os.path.dirname(os.path.abspath(__file__)) -def _build_proxy_handler(name): - class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler): - proxy_name = name - - def log_message(self, format, *args): - pass - - def do_GET(self): - self.send_response(200) - self.send_header('Content-Type', 'text/plain; charset=utf-8') - self.end_headers() - self.wfile.write(f'{self.proxy_name}: {self.path}'.encode()) - return HTTPTestRequestHandler - - class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler): protocol_version = 'HTTP/1.1' default_request_version = 'HTTP/1.1' @@ -317,8 +308,9 @@ class TestRequestHandlerBase: cls.https_server_thread.start() +@pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) class TestHTTPRequestHandler(TestRequestHandlerBase): - @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) + def test_verify_cert(self, handler): with handler() as rh: with pytest.raises(CertificateVerifyError): @@ -329,7 +321,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): assert r.status == 200 r.close() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_ssl_error(self, handler): # HTTPS server with too old TLS version # XXX: is there a better way to test this than to create a new server? @@ -347,7 +338,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers')) assert not issubclass(exc_info.type, CertificateVerifyError) - @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_percent_encode(self, handler): with handler() as rh: # Unicode characters should be encoded with uppercase percent-encoding @@ -359,7 +349,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): assert res.status == 200 res.close() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) @pytest.mark.parametrize('path', [ '/a/b/./../../headers', '/redirect_dotsegments', @@ -375,15 +364,13 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): assert res.url == f'http://127.0.0.1:{self.http_port}/headers' res.close() - # Not supported by CurlCFFI (non-standard) - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.skip_handler('CurlCFFI', 'not supported by curl-cffi (non-standard)') def test_unicode_path_redirection(self, handler): with handler() as rh: r = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect')) assert r.url == f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html' r.close() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_raise_http_error(self, handler): with handler() as rh: for bad_status in (400, 500, 599, 302): @@ -393,7 +380,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): # Should not raise an error validate_and_send(rh, Request('http://127.0.0.1:%d/gen_200' % self.http_port)).close() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_response_url(self, handler): with handler() as rh: # Response url should be that of the last url in redirect chain @@ -405,7 +391,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): res2.close() # Covers some basic cases we expect some level of consistency between request handlers for - @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) @pytest.mark.parametrize('redirect_status,method,expected', [ # A 303 must either use GET or HEAD for subsequent request (303, 'POST', ('', 'GET', False)), @@ -447,7 +432,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): assert expected[1] == res.headers.get('method') assert expected[2] == ('content-length' in headers.decode().lower()) - @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_request_cookie_header(self, handler): # We should accept a Cookie header being passed as in normal headers and handle it appropriately. with handler() as rh: @@ -480,19 +464,16 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): assert b'cookie: test=ytdlp' not in data.lower() assert b'cookie: test=test3' in data.lower() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_redirect_loop(self, handler): with handler() as rh: with pytest.raises(HTTPError, match='redirect loop'): validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_loop')) - @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_incompleteread(self, handler): with handler(timeout=2) as rh: with pytest.raises(IncompleteRead, match='13 bytes read, 234221 more expected'): validate_and_send(rh, Request('http://127.0.0.1:%d/incompleteread' % self.http_port)).read() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_cookies(self, handler): cookiejar = YoutubeDLCookieJar() cookiejar.set_cookie(http.cookiejar.Cookie( @@ -509,7 +490,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': cookiejar})).read() assert b'cookie: test=ytdlp' in data.lower() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_headers(self, handler): with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh: @@ -525,7 +505,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): assert b'test2: test2' not in data assert b'test3: test3' in data - @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_read_timeout(self, handler): with handler() as rh: # Default timeout is 20 seconds, so this should go through @@ -541,7 +520,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): validate_and_send( rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1', extensions={'timeout': 4})) - @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_connect_timeout(self, handler): # nothing should be listening on this port connect_timeout_url = 'http://10.255.255.255' @@ -560,7 +538,6 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): rh, Request(connect_timeout_url, extensions={'timeout': 0.01})) assert 0.01 <= time.time() - now < 20 - @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_source_address(self, handler): source_address = f'127.0.0.{random.randint(5, 255)}' # on some systems these loopback addresses we need for testing may not be available @@ -572,13 +549,13 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): assert source_address == data # Not supported by CurlCFFI - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.skip_handler('CurlCFFI', 'not supported by curl-cffi') def test_gzip_trailing_garbage(self, handler): with handler() as rh: data = validate_and_send(rh, Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode() assert data == '