From 91410c9bfa9fd8f01fb817474bcc7b0db5cabf95 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 3 Mar 2015 00:03:06 +0100 Subject: [PATCH 1/2] [letv] Add --cn-verification-proxy (Closes #5077) --- youtube_dl/YoutubeDL.py | 5 ++++- youtube_dl/__init__.py | 1 + youtube_dl/extractor/letv.py | 34 ++++++++++++++++++++++++++++------ youtube_dl/options.py | 6 ++++++ youtube_dl/utils.py | 10 ++++++++++ 5 files changed, 49 insertions(+), 7 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index e0baa98727..915963d96a 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -54,6 +54,7 @@ from .utils import ( MaxDownloadsReached, PagedList, parse_filesize, + PerRequestProxyHandler, PostProcessingError, platform_name, preferredencoding, @@ -183,6 +184,8 @@ class YoutubeDL(object): prefer_insecure: Use HTTP instead of HTTPS to retrieve information. At the moment, this is only supported by YouTube. proxy: URL of the proxy server to use + cn_verification_proxy: URL of the proxy to use for IP address verification + on Chinese sites. (Experimental) socket_timeout: Time to wait for unresponsive hosts, in seconds bidi_workaround: Work around buggy terminals without bidirectional text support, using fridibi @@ -1762,7 +1765,7 @@ class YoutubeDL(object): # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805) if 'http' in proxies and 'https' not in proxies: proxies['https'] = proxies['http'] - proxy_handler = compat_urllib_request.ProxyHandler(proxies) + proxy_handler = PerRequestProxyHandler(proxies) debuglevel = 1 if self.params.get('debug_printtraffic') else 0 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 6056da1be8..a08ddd6709 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -364,6 +364,7 @@ def _real_main(argv=None): 'ffmpeg_location': opts.ffmpeg_location, 'hls_prefer_native': opts.hls_prefer_native, 'external_downloader_args': external_downloader_args, + 'cn_verification_proxy': opts.cn_verification_proxy, } with YoutubeDL(ydl_opts) as ydl: diff --git a/youtube_dl/extractor/letv.py b/youtube_dl/extractor/letv.py index 583ce35b90..fd5fd260e9 100644 --- a/youtube_dl/extractor/letv.py +++ b/youtube_dl/extractor/letv.py @@ -7,8 +7,9 @@ import time from .common import InfoExtractor from ..compat import ( - compat_urlparse, compat_urllib_parse, + compat_urllib_request, + compat_urlparse, ) from ..utils import ( determine_ext, @@ -42,9 +43,23 @@ class LetvIE(InfoExtractor): 'expected_warnings': [ 'publish time' ] + }, { + 'note': 'This video is available only in Mainland China, thus a proxy is needed', + 'url': 'http://www.letv.com/ptv/vplay/1118082.html', + 'md5': 'f80936fbe20fb2f58648e81386ff7927', + 'info_dict': { + 'id': '1118082', + 'ext': 'mp4', + 'title': '与龙共舞 完整版', + 'description': 'md5:7506a5eeb1722bb9d4068f85024e3986', + }, + 'expected_warnings': [ + 'publish time' + ], + 'params': { + 'cn_verification_proxy': 'proxy.uku.im:8888' + }, }] - # http://www.letv.com/ptv/vplay/1118082.html - # This video is available only in Mainland China @staticmethod def urshift(val, n): @@ -76,8 +91,14 @@ class LetvIE(InfoExtractor): 'tkey': self.calc_time_key(int(time.time())), 'domain': 'www.letv.com' } + play_json_req = compat_urllib_request.Request( + 'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params) + ) + play_json_req.add_header( + 'Ytdl-Request-Proxy', + self._downloader.params.get('cn_verification_proxy')) play_json = self._download_json( - 'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params), + play_json_req, media_id, 'playJson data') # Check for errors @@ -114,7 +135,8 @@ class LetvIE(InfoExtractor): url_info_dict = { 'url': media_url, - 'ext': determine_ext(dispatch[format_id][1]) + 'ext': determine_ext(dispatch[format_id][1]), + 'format_id': format_id, } if format_id[-1:] == 'p': @@ -123,7 +145,7 @@ class LetvIE(InfoExtractor): urls.append(url_info_dict) publish_time = parse_iso8601(self._html_search_regex( - r'发布时间 ([^<>]+) ', page, 'publish time', fatal=False), + r'发布时间 ([^<>]+) ', page, 'publish time', default=None), delimiter=' ', timezone=datetime.timedelta(hours=8)) description = self._html_search_meta('description', page, fatal=False) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index df2be7b74f..a4ca8adc42 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -195,6 +195,12 @@ def parseOpts(overrideArguments=None): action='store_const', const='::', dest='source_address', help='Make all connections via IPv6 (experimental)', ) + network.add_option( + '--cn-verification-proxy', + dest='cn_verification_proxy', default=None, metavar='URL', + help='Use this proxy to verify the IP address for some Chinese sites. ' + 'The default proxy specified by --proxy (or none, if the options is not present) is used for the actual downloading. (experimental)' + ) selection = optparse.OptionGroup(parser, 'Video Selection') selection.add_option( diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 1d3401bc2d..b568288faa 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1768,3 +1768,13 @@ def match_filter_func(filter_str): video_title = info_dict.get('title', info_dict.get('id', 'video')) return '%s does not pass filter %s, skipping ..' % (video_title, filter_str) return _match_func + + +class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): + def proxy_open(self, req, proxy, type): + req_proxy = req.headers.get('Ytdl-Request-Proxy') + if req_proxy is not None: + proxy = req_proxy + del req.headers['Ytdl-Request-Proxy'] + return compat_urllib_request.ProxyHandler.proxy_open( + self, req, proxy, type) From 2461f79d2ad9eee44644f6187e366125a29aa70f Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 3 Mar 2015 13:56:06 +0100 Subject: [PATCH 2/2] [utils] Correct per-request proxy handling --- youtube_dl/YoutubeDL.py | 3 ++- youtube_dl/extractor/letv.py | 10 ++-------- youtube_dl/utils.py | 15 +++++++++++++-- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 915963d96a..df2aebb59f 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1771,7 +1771,8 @@ class YoutubeDL(object): https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel) ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel) opener = compat_urllib_request.build_opener( - https_handler, proxy_handler, cookie_processor, ydlh) + proxy_handler, https_handler, cookie_processor, ydlh) + # Delete the default user-agent header, which would otherwise apply in # cases where our custom HTTP handler doesn't come into play # (See https://github.com/rg3/youtube-dl/issues/1309 for details) diff --git a/youtube_dl/extractor/letv.py b/youtube_dl/extractor/letv.py index fd5fd260e9..85eee141b1 100644 --- a/youtube_dl/extractor/letv.py +++ b/youtube_dl/extractor/letv.py @@ -40,9 +40,6 @@ class LetvIE(InfoExtractor): 'title': '美人天下01', 'description': 'md5:f88573d9d7225ada1359eaf0dbf8bcda', }, - 'expected_warnings': [ - 'publish time' - ] }, { 'note': 'This video is available only in Mainland China, thus a proxy is needed', 'url': 'http://www.letv.com/ptv/vplay/1118082.html', @@ -53,11 +50,8 @@ class LetvIE(InfoExtractor): 'title': '与龙共舞 完整版', 'description': 'md5:7506a5eeb1722bb9d4068f85024e3986', }, - 'expected_warnings': [ - 'publish time' - ], 'params': { - 'cn_verification_proxy': 'proxy.uku.im:8888' + 'cn_verification_proxy': 'http://proxy.uku.im:8888' }, }] @@ -95,7 +89,7 @@ class LetvIE(InfoExtractor): 'http://api.letv.com/mms/out/video/playJson?' + compat_urllib_parse.urlencode(params) ) play_json_req.add_header( - 'Ytdl-Request-Proxy', + 'Ytdl-request-proxy', self._downloader.params.get('cn_verification_proxy')) play_json = self._download_json( play_json_req, diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index b568288faa..7426e2a1ff 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1771,10 +1771,21 @@ def match_filter_func(filter_str): class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): + def __init__(self, proxies=None): + # Set default handlers + for type in ('http', 'https'): + setattr(self, '%s_open' % type, + lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open: + meth(r, proxy, type)) + return compat_urllib_request.ProxyHandler.__init__(self, proxies) + def proxy_open(self, req, proxy, type): - req_proxy = req.headers.get('Ytdl-Request-Proxy') + req_proxy = req.headers.get('Ytdl-request-proxy') if req_proxy is not None: proxy = req_proxy - del req.headers['Ytdl-Request-Proxy'] + del req.headers['Ytdl-request-proxy'] + + if proxy == '__noproxy__': + return None # No Proxy return compat_urllib_request.ProxyHandler.proxy_open( self, req, proxy, type)