2021-06-03 11:43:42 +02:00
#!/usr/bin/env python3
2016-10-02 13:39:18 +02:00
# coding: utf-8
2012-03-25 03:07:37 +02:00
2014-11-02 11:37:49 +01:00
from __future__ import unicode_literals
2015-07-22 14:03:05 +02:00
import base64
2016-02-16 23:01:44 +01:00
import binascii
2014-03-24 01:40:09 +01:00
import calendar
2014-04-04 23:00:51 +02:00
import codecs
2020-05-04 23:19:33 +02:00
import collections
2014-02-25 01:43:17 +01:00
import contextlib
2013-12-16 05:04:12 +01:00
import ctypes
2013-08-28 12:57:10 +02:00
import datetime
import email . utils
2017-05-01 17:09:18 +02:00
import email . header
2013-05-13 09:20:08 +02:00
import errno
2015-01-10 19:55:36 +01:00
import functools
2012-03-25 03:07:37 +02:00
import gzip
2021-01-24 14:40:02 +01:00
import imp
2012-11-28 00:09:17 +01:00
import io
2016-05-02 05:21:39 +02:00
import itertools
2012-12-20 13:13:24 +01:00
import json
2012-03-25 03:07:37 +02:00
import locale
2013-11-25 03:12:26 +01:00
import math
2015-02-10 03:32:21 +01:00
import operator
2012-03-25 03:07:37 +02:00
import os
2013-08-28 12:57:10 +02:00
import platform
2017-02-04 12:49:58 +01:00
import random
2012-03-25 03:07:37 +02:00
import re
2013-08-28 12:57:10 +02:00
import socket
2016-05-02 05:21:39 +02:00
import ssl
2013-12-09 18:29:07 +01:00
import subprocess
2012-03-25 03:07:37 +02:00
import sys
2014-08-21 13:01:13 +02:00
import tempfile
2020-05-04 23:19:33 +02:00
import time
2013-01-03 15:39:55 +01:00
import traceback
2014-03-10 17:31:32 +01:00
import xml . etree . ElementTree
2012-03-25 03:07:37 +02:00
import zlib
2014-11-02 11:23:40 +01:00
from . compat import (
2017-06-11 20:52:24 +02:00
compat_HTMLParseError ,
2016-01-02 20:49:59 +01:00
compat_HTMLParser ,
2021-04-17 05:02:33 +02:00
compat_HTTPError ,
2015-02-01 11:30:56 +01:00
compat_basestring ,
2014-11-02 11:23:40 +01:00
compat_chr ,
2018-12-09 00:00:32 +01:00
compat_cookiejar ,
2014-12-12 04:01:08 +01:00
compat_ctypes_WINFUNCTYPE ,
2015-10-25 20:04:55 +01:00
compat_etree_fromstring ,
2017-03-25 20:30:10 +01:00
compat_expanduser ,
2014-11-02 11:23:40 +01:00
compat_html_entities ,
2016-06-10 09:11:55 +02:00
compat_html_entities_html5 ,
2015-01-10 19:55:36 +01:00
compat_http_client ,
2019-12-15 17:15:24 +01:00
compat_integer_types ,
2021-02-02 22:15:00 +01:00
compat_numeric_types ,
2015-04-27 16:00:18 +02:00
compat_kwargs ,
2016-09-29 18:28:32 +02:00
compat_os_name ,
2014-11-02 11:23:40 +01:00
compat_parse_qs ,
2016-05-10 09:58:25 +02:00
compat_shlex_quote ,
2014-11-02 11:23:40 +01:00
compat_str ,
2016-05-03 10:50:16 +02:00
compat_struct_pack ,
2016-08-06 20:42:58 +02:00
compat_struct_unpack ,
2014-11-02 11:23:40 +01:00
compat_urllib_error ,
compat_urllib_parse ,
2016-03-25 20:46:57 +01:00
compat_urllib_parse_urlencode ,
2014-11-02 11:23:40 +01:00
compat_urllib_parse_urlparse ,
2020-10-27 11:37:21 +01:00
compat_urllib_parse_urlunparse ,
compat_urllib_parse_quote ,
compat_urllib_parse_quote_plus ,
2016-05-12 12:57:53 +02:00
compat_urllib_parse_unquote_plus ,
2014-11-02 11:23:40 +01:00
compat_urllib_request ,
compat_urlparse ,
2016-03-17 21:52:23 +01:00
compat_xpath ,
2014-11-02 11:23:40 +01:00
)
2014-09-30 17:27:53 +02:00
2016-04-23 15:30:06 +02:00
from . socks import (
ProxyType ,
sockssocket ,
)
2014-09-30 17:27:53 +02:00
2016-05-03 09:15:32 +02:00
def register_socks_protocols ( ) :
# "Register" SOCKS protocols
2016-05-03 09:37:30 +02:00
# In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
# URLs with protocols not in urlparse.uses_netloc are not handled correctly
2016-05-03 09:15:32 +02:00
for scheme in ( ' socks ' , ' socks4 ' , ' socks4a ' , ' socks5 ' ) :
if scheme not in compat_urlparse . uses_netloc :
compat_urlparse . uses_netloc . append ( scheme )
2013-06-06 14:35:08 +02:00
# This is not clearly defined otherwise
compiled_regex_type = type ( re . compile ( ' ' ) )
2019-06-28 19:32:43 +02:00
def random_user_agent ( ) :
_USER_AGENT_TPL = ' Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/ %s Safari/537.36 '
_CHROME_VERSIONS = (
' 74.0.3729.129 ' ,
' 76.0.3780.3 ' ,
' 76.0.3780.2 ' ,
' 74.0.3729.128 ' ,
' 76.0.3780.1 ' ,
' 76.0.3780.0 ' ,
' 75.0.3770.15 ' ,
' 74.0.3729.127 ' ,
' 74.0.3729.126 ' ,
' 76.0.3779.1 ' ,
' 76.0.3779.0 ' ,
' 75.0.3770.14 ' ,
' 74.0.3729.125 ' ,
' 76.0.3778.1 ' ,
' 76.0.3778.0 ' ,
' 75.0.3770.13 ' ,
' 74.0.3729.124 ' ,
' 74.0.3729.123 ' ,
' 73.0.3683.121 ' ,
' 76.0.3777.1 ' ,
' 76.0.3777.0 ' ,
' 75.0.3770.12 ' ,
' 74.0.3729.122 ' ,
' 76.0.3776.4 ' ,
' 75.0.3770.11 ' ,
' 74.0.3729.121 ' ,
' 76.0.3776.3 ' ,
' 76.0.3776.2 ' ,
' 73.0.3683.120 ' ,
' 74.0.3729.120 ' ,
' 74.0.3729.119 ' ,
' 74.0.3729.118 ' ,
' 76.0.3776.1 ' ,
' 76.0.3776.0 ' ,
' 76.0.3775.5 ' ,
' 75.0.3770.10 ' ,
' 74.0.3729.117 ' ,
' 76.0.3775.4 ' ,
' 76.0.3775.3 ' ,
' 74.0.3729.116 ' ,
' 75.0.3770.9 ' ,
' 76.0.3775.2 ' ,
' 76.0.3775.1 ' ,
' 76.0.3775.0 ' ,
' 75.0.3770.8 ' ,
' 74.0.3729.115 ' ,
' 74.0.3729.114 ' ,
' 76.0.3774.1 ' ,
' 76.0.3774.0 ' ,
' 75.0.3770.7 ' ,
' 74.0.3729.113 ' ,
' 74.0.3729.112 ' ,
' 74.0.3729.111 ' ,
' 76.0.3773.1 ' ,
' 76.0.3773.0 ' ,
' 75.0.3770.6 ' ,
' 74.0.3729.110 ' ,
' 74.0.3729.109 ' ,
' 76.0.3772.1 ' ,
' 76.0.3772.0 ' ,
' 75.0.3770.5 ' ,
' 74.0.3729.108 ' ,
' 74.0.3729.107 ' ,
' 76.0.3771.1 ' ,
' 76.0.3771.0 ' ,
' 75.0.3770.4 ' ,
' 74.0.3729.106 ' ,
' 74.0.3729.105 ' ,
' 75.0.3770.3 ' ,
' 74.0.3729.104 ' ,
' 74.0.3729.103 ' ,
' 74.0.3729.102 ' ,
' 75.0.3770.2 ' ,
' 74.0.3729.101 ' ,
' 75.0.3770.1 ' ,
' 75.0.3770.0 ' ,
' 74.0.3729.100 ' ,
' 75.0.3769.5 ' ,
' 75.0.3769.4 ' ,
' 74.0.3729.99 ' ,
' 75.0.3769.3 ' ,
' 75.0.3769.2 ' ,
' 75.0.3768.6 ' ,
' 74.0.3729.98 ' ,
' 75.0.3769.1 ' ,
' 75.0.3769.0 ' ,
' 74.0.3729.97 ' ,
' 73.0.3683.119 ' ,
' 73.0.3683.118 ' ,
' 74.0.3729.96 ' ,
' 75.0.3768.5 ' ,
' 75.0.3768.4 ' ,
' 75.0.3768.3 ' ,
' 75.0.3768.2 ' ,
' 74.0.3729.95 ' ,
' 74.0.3729.94 ' ,
' 75.0.3768.1 ' ,
' 75.0.3768.0 ' ,
' 74.0.3729.93 ' ,
' 74.0.3729.92 ' ,
' 73.0.3683.117 ' ,
' 74.0.3729.91 ' ,
' 75.0.3766.3 ' ,
' 74.0.3729.90 ' ,
' 75.0.3767.2 ' ,
' 75.0.3767.1 ' ,
' 75.0.3767.0 ' ,
' 74.0.3729.89 ' ,
' 73.0.3683.116 ' ,
' 75.0.3766.2 ' ,
' 74.0.3729.88 ' ,
' 75.0.3766.1 ' ,
' 75.0.3766.0 ' ,
' 74.0.3729.87 ' ,
' 73.0.3683.115 ' ,
' 74.0.3729.86 ' ,
' 75.0.3765.1 ' ,
' 75.0.3765.0 ' ,
' 74.0.3729.85 ' ,
' 73.0.3683.114 ' ,
' 74.0.3729.84 ' ,
' 75.0.3764.1 ' ,
' 75.0.3764.0 ' ,
' 74.0.3729.83 ' ,
' 73.0.3683.113 ' ,
' 75.0.3763.2 ' ,
' 75.0.3761.4 ' ,
' 74.0.3729.82 ' ,
' 75.0.3763.1 ' ,
' 75.0.3763.0 ' ,
' 74.0.3729.81 ' ,
' 73.0.3683.112 ' ,
' 75.0.3762.1 ' ,
' 75.0.3762.0 ' ,
' 74.0.3729.80 ' ,
' 75.0.3761.3 ' ,
' 74.0.3729.79 ' ,
' 73.0.3683.111 ' ,
' 75.0.3761.2 ' ,
' 74.0.3729.78 ' ,
' 74.0.3729.77 ' ,
' 75.0.3761.1 ' ,
' 75.0.3761.0 ' ,
' 73.0.3683.110 ' ,
' 74.0.3729.76 ' ,
' 74.0.3729.75 ' ,
' 75.0.3760.0 ' ,
' 74.0.3729.74 ' ,
' 75.0.3759.8 ' ,
' 75.0.3759.7 ' ,
' 75.0.3759.6 ' ,
' 74.0.3729.73 ' ,
' 75.0.3759.5 ' ,
' 74.0.3729.72 ' ,
' 73.0.3683.109 ' ,
' 75.0.3759.4 ' ,
' 75.0.3759.3 ' ,
' 74.0.3729.71 ' ,
' 75.0.3759.2 ' ,
' 74.0.3729.70 ' ,
' 73.0.3683.108 ' ,
' 74.0.3729.69 ' ,
' 75.0.3759.1 ' ,
' 75.0.3759.0 ' ,
' 74.0.3729.68 ' ,
' 73.0.3683.107 ' ,
' 74.0.3729.67 ' ,
' 75.0.3758.1 ' ,
' 75.0.3758.0 ' ,
' 74.0.3729.66 ' ,
' 73.0.3683.106 ' ,
' 74.0.3729.65 ' ,
' 75.0.3757.1 ' ,
' 75.0.3757.0 ' ,
' 74.0.3729.64 ' ,
' 73.0.3683.105 ' ,
' 74.0.3729.63 ' ,
' 75.0.3756.1 ' ,
' 75.0.3756.0 ' ,
' 74.0.3729.62 ' ,
' 73.0.3683.104 ' ,
' 75.0.3755.3 ' ,
' 75.0.3755.2 ' ,
' 73.0.3683.103 ' ,
' 75.0.3755.1 ' ,
' 75.0.3755.0 ' ,
' 74.0.3729.61 ' ,
' 73.0.3683.102 ' ,
' 74.0.3729.60 ' ,
' 75.0.3754.2 ' ,
' 74.0.3729.59 ' ,
' 75.0.3753.4 ' ,
' 74.0.3729.58 ' ,
' 75.0.3754.1 ' ,
' 75.0.3754.0 ' ,
' 74.0.3729.57 ' ,
' 73.0.3683.101 ' ,
' 75.0.3753.3 ' ,
' 75.0.3752.2 ' ,
' 75.0.3753.2 ' ,
' 74.0.3729.56 ' ,
' 75.0.3753.1 ' ,
' 75.0.3753.0 ' ,
' 74.0.3729.55 ' ,
' 73.0.3683.100 ' ,
' 74.0.3729.54 ' ,
' 75.0.3752.1 ' ,
' 75.0.3752.0 ' ,
' 74.0.3729.53 ' ,
' 73.0.3683.99 ' ,
' 74.0.3729.52 ' ,
' 75.0.3751.1 ' ,
' 75.0.3751.0 ' ,
' 74.0.3729.51 ' ,
' 73.0.3683.98 ' ,
' 74.0.3729.50 ' ,
' 75.0.3750.0 ' ,
' 74.0.3729.49 ' ,
' 74.0.3729.48 ' ,
' 74.0.3729.47 ' ,
' 75.0.3749.3 ' ,
' 74.0.3729.46 ' ,
' 73.0.3683.97 ' ,
' 75.0.3749.2 ' ,
' 74.0.3729.45 ' ,
' 75.0.3749.1 ' ,
' 75.0.3749.0 ' ,
' 74.0.3729.44 ' ,
' 73.0.3683.96 ' ,
' 74.0.3729.43 ' ,
' 74.0.3729.42 ' ,
' 75.0.3748.1 ' ,
' 75.0.3748.0 ' ,
' 74.0.3729.41 ' ,
' 75.0.3747.1 ' ,
' 73.0.3683.95 ' ,
' 75.0.3746.4 ' ,
' 74.0.3729.40 ' ,
' 74.0.3729.39 ' ,
' 75.0.3747.0 ' ,
' 75.0.3746.3 ' ,
' 75.0.3746.2 ' ,
' 74.0.3729.38 ' ,
' 75.0.3746.1 ' ,
' 75.0.3746.0 ' ,
' 74.0.3729.37 ' ,
' 73.0.3683.94 ' ,
' 75.0.3745.5 ' ,
' 75.0.3745.4 ' ,
' 75.0.3745.3 ' ,
' 75.0.3745.2 ' ,
' 74.0.3729.36 ' ,
' 75.0.3745.1 ' ,
' 75.0.3745.0 ' ,
' 75.0.3744.2 ' ,
' 74.0.3729.35 ' ,
' 73.0.3683.93 ' ,
' 74.0.3729.34 ' ,
' 75.0.3744.1 ' ,
' 75.0.3744.0 ' ,
' 74.0.3729.33 ' ,
' 73.0.3683.92 ' ,
' 74.0.3729.32 ' ,
' 74.0.3729.31 ' ,
' 73.0.3683.91 ' ,
' 75.0.3741.2 ' ,
' 75.0.3740.5 ' ,
' 74.0.3729.30 ' ,
' 75.0.3741.1 ' ,
' 75.0.3741.0 ' ,
' 74.0.3729.29 ' ,
' 75.0.3740.4 ' ,
' 73.0.3683.90 ' ,
' 74.0.3729.28 ' ,
' 75.0.3740.3 ' ,
' 73.0.3683.89 ' ,
' 75.0.3740.2 ' ,
' 74.0.3729.27 ' ,
' 75.0.3740.1 ' ,
' 75.0.3740.0 ' ,
' 74.0.3729.26 ' ,
' 73.0.3683.88 ' ,
' 73.0.3683.87 ' ,
' 74.0.3729.25 ' ,
' 75.0.3739.1 ' ,
' 75.0.3739.0 ' ,
' 73.0.3683.86 ' ,
' 74.0.3729.24 ' ,
' 73.0.3683.85 ' ,
' 75.0.3738.4 ' ,
' 75.0.3738.3 ' ,
' 75.0.3738.2 ' ,
' 75.0.3738.1 ' ,
' 75.0.3738.0 ' ,
' 74.0.3729.23 ' ,
' 73.0.3683.84 ' ,
' 74.0.3729.22 ' ,
' 74.0.3729.21 ' ,
' 75.0.3737.1 ' ,
' 75.0.3737.0 ' ,
' 74.0.3729.20 ' ,
' 73.0.3683.83 ' ,
' 74.0.3729.19 ' ,
' 75.0.3736.1 ' ,
' 75.0.3736.0 ' ,
' 74.0.3729.18 ' ,
' 73.0.3683.82 ' ,
' 74.0.3729.17 ' ,
' 75.0.3735.1 ' ,
' 75.0.3735.0 ' ,
' 74.0.3729.16 ' ,
' 73.0.3683.81 ' ,
' 75.0.3734.1 ' ,
' 75.0.3734.0 ' ,
' 74.0.3729.15 ' ,
' 73.0.3683.80 ' ,
' 74.0.3729.14 ' ,
' 75.0.3733.1 ' ,
' 75.0.3733.0 ' ,
' 75.0.3732.1 ' ,
' 74.0.3729.13 ' ,
' 74.0.3729.12 ' ,
' 73.0.3683.79 ' ,
' 74.0.3729.11 ' ,
' 75.0.3732.0 ' ,
' 74.0.3729.10 ' ,
' 73.0.3683.78 ' ,
' 74.0.3729.9 ' ,
' 74.0.3729.8 ' ,
' 74.0.3729.7 ' ,
' 75.0.3731.3 ' ,
' 75.0.3731.2 ' ,
' 75.0.3731.0 ' ,
' 74.0.3729.6 ' ,
' 73.0.3683.77 ' ,
' 73.0.3683.76 ' ,
' 75.0.3730.5 ' ,
' 75.0.3730.4 ' ,
' 73.0.3683.75 ' ,
' 74.0.3729.5 ' ,
' 73.0.3683.74 ' ,
' 75.0.3730.3 ' ,
' 75.0.3730.2 ' ,
' 74.0.3729.4 ' ,
' 73.0.3683.73 ' ,
' 73.0.3683.72 ' ,
' 75.0.3730.1 ' ,
' 75.0.3730.0 ' ,
' 74.0.3729.3 ' ,
' 73.0.3683.71 ' ,
' 74.0.3729.2 ' ,
' 73.0.3683.70 ' ,
' 74.0.3729.1 ' ,
' 74.0.3729.0 ' ,
' 74.0.3726.4 ' ,
' 73.0.3683.69 ' ,
' 74.0.3726.3 ' ,
' 74.0.3728.0 ' ,
' 74.0.3726.2 ' ,
' 73.0.3683.68 ' ,
' 74.0.3726.1 ' ,
' 74.0.3726.0 ' ,
' 74.0.3725.4 ' ,
' 73.0.3683.67 ' ,
' 73.0.3683.66 ' ,
' 74.0.3725.3 ' ,
' 74.0.3725.2 ' ,
' 74.0.3725.1 ' ,
' 74.0.3724.8 ' ,
' 74.0.3725.0 ' ,
' 73.0.3683.65 ' ,
' 74.0.3724.7 ' ,
' 74.0.3724.6 ' ,
' 74.0.3724.5 ' ,
' 74.0.3724.4 ' ,
' 74.0.3724.3 ' ,
' 74.0.3724.2 ' ,
' 74.0.3724.1 ' ,
' 74.0.3724.0 ' ,
' 73.0.3683.64 ' ,
' 74.0.3723.1 ' ,
' 74.0.3723.0 ' ,
' 73.0.3683.63 ' ,
' 74.0.3722.1 ' ,
' 74.0.3722.0 ' ,
' 73.0.3683.62 ' ,
' 74.0.3718.9 ' ,
' 74.0.3702.3 ' ,
' 74.0.3721.3 ' ,
' 74.0.3721.2 ' ,
' 74.0.3721.1 ' ,
' 74.0.3721.0 ' ,
' 74.0.3720.6 ' ,
' 73.0.3683.61 ' ,
' 72.0.3626.122 ' ,
' 73.0.3683.60 ' ,
' 74.0.3720.5 ' ,
' 72.0.3626.121 ' ,
' 74.0.3718.8 ' ,
' 74.0.3720.4 ' ,
' 74.0.3720.3 ' ,
' 74.0.3718.7 ' ,
' 74.0.3720.2 ' ,
' 74.0.3720.1 ' ,
' 74.0.3720.0 ' ,
' 74.0.3718.6 ' ,
' 74.0.3719.5 ' ,
' 73.0.3683.59 ' ,
' 74.0.3718.5 ' ,
' 74.0.3718.4 ' ,
' 74.0.3719.4 ' ,
' 74.0.3719.3 ' ,
' 74.0.3719.2 ' ,
' 74.0.3719.1 ' ,
' 73.0.3683.58 ' ,
' 74.0.3719.0 ' ,
' 73.0.3683.57 ' ,
' 73.0.3683.56 ' ,
' 74.0.3718.3 ' ,
' 73.0.3683.55 ' ,
' 74.0.3718.2 ' ,
' 74.0.3718.1 ' ,
' 74.0.3718.0 ' ,
' 73.0.3683.54 ' ,
' 74.0.3717.2 ' ,
' 73.0.3683.53 ' ,
' 74.0.3717.1 ' ,
' 74.0.3717.0 ' ,
' 73.0.3683.52 ' ,
' 74.0.3716.1 ' ,
' 74.0.3716.0 ' ,
' 73.0.3683.51 ' ,
' 74.0.3715.1 ' ,
' 74.0.3715.0 ' ,
' 73.0.3683.50 ' ,
' 74.0.3711.2 ' ,
' 74.0.3714.2 ' ,
' 74.0.3713.3 ' ,
' 74.0.3714.1 ' ,
' 74.0.3714.0 ' ,
' 73.0.3683.49 ' ,
' 74.0.3713.1 ' ,
' 74.0.3713.0 ' ,
' 72.0.3626.120 ' ,
' 73.0.3683.48 ' ,
' 74.0.3712.2 ' ,
' 74.0.3712.1 ' ,
' 74.0.3712.0 ' ,
' 73.0.3683.47 ' ,
' 72.0.3626.119 ' ,
' 73.0.3683.46 ' ,
' 74.0.3710.2 ' ,
' 72.0.3626.118 ' ,
' 74.0.3711.1 ' ,
' 74.0.3711.0 ' ,
' 73.0.3683.45 ' ,
' 72.0.3626.117 ' ,
' 74.0.3710.1 ' ,
' 74.0.3710.0 ' ,
' 73.0.3683.44 ' ,
' 72.0.3626.116 ' ,
' 74.0.3709.1 ' ,
' 74.0.3709.0 ' ,
' 74.0.3704.9 ' ,
' 73.0.3683.43 ' ,
' 72.0.3626.115 ' ,
' 74.0.3704.8 ' ,
' 74.0.3704.7 ' ,
' 74.0.3708.0 ' ,
' 74.0.3706.7 ' ,
' 74.0.3704.6 ' ,
' 73.0.3683.42 ' ,
' 72.0.3626.114 ' ,
' 74.0.3706.6 ' ,
' 72.0.3626.113 ' ,
' 74.0.3704.5 ' ,
' 74.0.3706.5 ' ,
' 74.0.3706.4 ' ,
' 74.0.3706.3 ' ,
' 74.0.3706.2 ' ,
' 74.0.3706.1 ' ,
' 74.0.3706.0 ' ,
' 73.0.3683.41 ' ,
' 72.0.3626.112 ' ,
' 74.0.3705.1 ' ,
' 74.0.3705.0 ' ,
' 73.0.3683.40 ' ,
' 72.0.3626.111 ' ,
' 73.0.3683.39 ' ,
' 74.0.3704.4 ' ,
' 73.0.3683.38 ' ,
' 74.0.3704.3 ' ,
' 74.0.3704.2 ' ,
' 74.0.3704.1 ' ,
' 74.0.3704.0 ' ,
' 73.0.3683.37 ' ,
' 72.0.3626.110 ' ,
' 72.0.3626.109 ' ,
' 74.0.3703.3 ' ,
' 74.0.3703.2 ' ,
' 73.0.3683.36 ' ,
' 74.0.3703.1 ' ,
' 74.0.3703.0 ' ,
' 73.0.3683.35 ' ,
' 72.0.3626.108 ' ,
' 74.0.3702.2 ' ,
' 74.0.3699.3 ' ,
' 74.0.3702.1 ' ,
' 74.0.3702.0 ' ,
' 73.0.3683.34 ' ,
' 72.0.3626.107 ' ,
' 73.0.3683.33 ' ,
' 74.0.3701.1 ' ,
' 74.0.3701.0 ' ,
' 73.0.3683.32 ' ,
' 73.0.3683.31 ' ,
' 72.0.3626.105 ' ,
' 74.0.3700.1 ' ,
' 74.0.3700.0 ' ,
' 73.0.3683.29 ' ,
' 72.0.3626.103 ' ,
' 74.0.3699.2 ' ,
' 74.0.3699.1 ' ,
' 74.0.3699.0 ' ,
' 73.0.3683.28 ' ,
' 72.0.3626.102 ' ,
' 73.0.3683.27 ' ,
' 73.0.3683.26 ' ,
' 74.0.3698.0 ' ,
' 74.0.3696.2 ' ,
' 72.0.3626.101 ' ,
' 73.0.3683.25 ' ,
' 74.0.3696.1 ' ,
' 74.0.3696.0 ' ,
' 74.0.3694.8 ' ,
' 72.0.3626.100 ' ,
' 74.0.3694.7 ' ,
' 74.0.3694.6 ' ,
' 74.0.3694.5 ' ,
' 74.0.3694.4 ' ,
' 72.0.3626.99 ' ,
' 72.0.3626.98 ' ,
' 74.0.3694.3 ' ,
' 73.0.3683.24 ' ,
' 72.0.3626.97 ' ,
' 72.0.3626.96 ' ,
' 72.0.3626.95 ' ,
' 73.0.3683.23 ' ,
' 72.0.3626.94 ' ,
' 73.0.3683.22 ' ,
' 73.0.3683.21 ' ,
' 72.0.3626.93 ' ,
' 74.0.3694.2 ' ,
' 72.0.3626.92 ' ,
' 74.0.3694.1 ' ,
' 74.0.3694.0 ' ,
' 74.0.3693.6 ' ,
' 73.0.3683.20 ' ,
' 72.0.3626.91 ' ,
' 74.0.3693.5 ' ,
' 74.0.3693.4 ' ,
' 74.0.3693.3 ' ,
' 74.0.3693.2 ' ,
' 73.0.3683.19 ' ,
' 74.0.3693.1 ' ,
' 74.0.3693.0 ' ,
' 73.0.3683.18 ' ,
' 72.0.3626.90 ' ,
' 74.0.3692.1 ' ,
' 74.0.3692.0 ' ,
' 73.0.3683.17 ' ,
' 72.0.3626.89 ' ,
' 74.0.3687.3 ' ,
' 74.0.3691.1 ' ,
' 74.0.3691.0 ' ,
' 73.0.3683.16 ' ,
' 72.0.3626.88 ' ,
' 72.0.3626.87 ' ,
' 73.0.3683.15 ' ,
' 74.0.3690.1 ' ,
' 74.0.3690.0 ' ,
' 73.0.3683.14 ' ,
' 72.0.3626.86 ' ,
' 73.0.3683.13 ' ,
' 73.0.3683.12 ' ,
' 74.0.3689.1 ' ,
' 74.0.3689.0 ' ,
' 73.0.3683.11 ' ,
' 72.0.3626.85 ' ,
' 73.0.3683.10 ' ,
' 72.0.3626.84 ' ,
' 73.0.3683.9 ' ,
' 74.0.3688.1 ' ,
' 74.0.3688.0 ' ,
' 73.0.3683.8 ' ,
' 72.0.3626.83 ' ,
' 74.0.3687.2 ' ,
' 74.0.3687.1 ' ,
' 74.0.3687.0 ' ,
' 73.0.3683.7 ' ,
' 72.0.3626.82 ' ,
' 74.0.3686.4 ' ,
' 72.0.3626.81 ' ,
' 74.0.3686.3 ' ,
' 74.0.3686.2 ' ,
' 74.0.3686.1 ' ,
' 74.0.3686.0 ' ,
' 73.0.3683.6 ' ,
' 72.0.3626.80 ' ,
' 74.0.3685.1 ' ,
' 74.0.3685.0 ' ,
' 73.0.3683.5 ' ,
' 72.0.3626.79 ' ,
' 74.0.3684.1 ' ,
' 74.0.3684.0 ' ,
' 73.0.3683.4 ' ,
' 72.0.3626.78 ' ,
' 72.0.3626.77 ' ,
' 73.0.3683.3 ' ,
' 73.0.3683.2 ' ,
' 72.0.3626.76 ' ,
' 73.0.3683.1 ' ,
' 73.0.3683.0 ' ,
' 72.0.3626.75 ' ,
' 71.0.3578.141 ' ,
' 73.0.3682.1 ' ,
' 73.0.3682.0 ' ,
' 72.0.3626.74 ' ,
' 71.0.3578.140 ' ,
' 73.0.3681.4 ' ,
' 73.0.3681.3 ' ,
' 73.0.3681.2 ' ,
' 73.0.3681.1 ' ,
' 73.0.3681.0 ' ,
' 72.0.3626.73 ' ,
' 71.0.3578.139 ' ,
' 72.0.3626.72 ' ,
' 72.0.3626.71 ' ,
' 73.0.3680.1 ' ,
' 73.0.3680.0 ' ,
' 72.0.3626.70 ' ,
' 71.0.3578.138 ' ,
' 73.0.3678.2 ' ,
' 73.0.3679.1 ' ,
' 73.0.3679.0 ' ,
' 72.0.3626.69 ' ,
' 71.0.3578.137 ' ,
' 73.0.3678.1 ' ,
' 73.0.3678.0 ' ,
' 71.0.3578.136 ' ,
' 73.0.3677.1 ' ,
' 73.0.3677.0 ' ,
' 72.0.3626.68 ' ,
' 72.0.3626.67 ' ,
' 71.0.3578.135 ' ,
' 73.0.3676.1 ' ,
' 73.0.3676.0 ' ,
' 73.0.3674.2 ' ,
' 72.0.3626.66 ' ,
' 71.0.3578.134 ' ,
' 73.0.3674.1 ' ,
' 73.0.3674.0 ' ,
' 72.0.3626.65 ' ,
' 71.0.3578.133 ' ,
' 73.0.3673.2 ' ,
' 73.0.3673.1 ' ,
' 73.0.3673.0 ' ,
' 72.0.3626.64 ' ,
' 71.0.3578.132 ' ,
' 72.0.3626.63 ' ,
' 72.0.3626.62 ' ,
' 72.0.3626.61 ' ,
' 72.0.3626.60 ' ,
' 73.0.3672.1 ' ,
' 73.0.3672.0 ' ,
' 72.0.3626.59 ' ,
' 71.0.3578.131 ' ,
' 73.0.3671.3 ' ,
' 73.0.3671.2 ' ,
' 73.0.3671.1 ' ,
' 73.0.3671.0 ' ,
' 72.0.3626.58 ' ,
' 71.0.3578.130 ' ,
' 73.0.3670.1 ' ,
' 73.0.3670.0 ' ,
' 72.0.3626.57 ' ,
' 71.0.3578.129 ' ,
' 73.0.3669.1 ' ,
' 73.0.3669.0 ' ,
' 72.0.3626.56 ' ,
' 71.0.3578.128 ' ,
' 73.0.3668.2 ' ,
' 73.0.3668.1 ' ,
' 73.0.3668.0 ' ,
' 72.0.3626.55 ' ,
' 71.0.3578.127 ' ,
' 73.0.3667.2 ' ,
' 73.0.3667.1 ' ,
' 73.0.3667.0 ' ,
' 72.0.3626.54 ' ,
' 71.0.3578.126 ' ,
' 73.0.3666.1 ' ,
' 73.0.3666.0 ' ,
' 72.0.3626.53 ' ,
' 71.0.3578.125 ' ,
' 73.0.3665.4 ' ,
' 73.0.3665.3 ' ,
' 72.0.3626.52 ' ,
' 73.0.3665.2 ' ,
' 73.0.3664.4 ' ,
' 73.0.3665.1 ' ,
' 73.0.3665.0 ' ,
' 72.0.3626.51 ' ,
' 71.0.3578.124 ' ,
' 72.0.3626.50 ' ,
' 73.0.3664.3 ' ,
' 73.0.3664.2 ' ,
' 73.0.3664.1 ' ,
' 73.0.3664.0 ' ,
' 73.0.3663.2 ' ,
' 72.0.3626.49 ' ,
' 71.0.3578.123 ' ,
' 73.0.3663.1 ' ,
' 73.0.3663.0 ' ,
' 72.0.3626.48 ' ,
' 71.0.3578.122 ' ,
' 73.0.3662.1 ' ,
' 73.0.3662.0 ' ,
' 72.0.3626.47 ' ,
' 71.0.3578.121 ' ,
' 73.0.3661.1 ' ,
' 72.0.3626.46 ' ,
' 73.0.3661.0 ' ,
' 72.0.3626.45 ' ,
' 71.0.3578.120 ' ,
' 73.0.3660.2 ' ,
' 73.0.3660.1 ' ,
' 73.0.3660.0 ' ,
' 72.0.3626.44 ' ,
' 71.0.3578.119 ' ,
' 73.0.3659.1 ' ,
' 73.0.3659.0 ' ,
' 72.0.3626.43 ' ,
' 71.0.3578.118 ' ,
' 73.0.3658.1 ' ,
' 73.0.3658.0 ' ,
' 72.0.3626.42 ' ,
' 71.0.3578.117 ' ,
' 73.0.3657.1 ' ,
' 73.0.3657.0 ' ,
' 72.0.3626.41 ' ,
' 71.0.3578.116 ' ,
' 73.0.3656.1 ' ,
' 73.0.3656.0 ' ,
' 72.0.3626.40 ' ,
' 71.0.3578.115 ' ,
' 73.0.3655.1 ' ,
' 73.0.3655.0 ' ,
' 72.0.3626.39 ' ,
' 71.0.3578.114 ' ,
' 73.0.3654.1 ' ,
' 73.0.3654.0 ' ,
' 72.0.3626.38 ' ,
' 71.0.3578.113 ' ,
' 73.0.3653.1 ' ,
' 73.0.3653.0 ' ,
' 72.0.3626.37 ' ,
' 71.0.3578.112 ' ,
' 73.0.3652.1 ' ,
' 73.0.3652.0 ' ,
' 72.0.3626.36 ' ,
' 71.0.3578.111 ' ,
' 73.0.3651.1 ' ,
' 73.0.3651.0 ' ,
' 72.0.3626.35 ' ,
' 71.0.3578.110 ' ,
' 73.0.3650.1 ' ,
' 73.0.3650.0 ' ,
' 72.0.3626.34 ' ,
' 71.0.3578.109 ' ,
' 73.0.3649.1 ' ,
' 73.0.3649.0 ' ,
' 72.0.3626.33 ' ,
' 71.0.3578.108 ' ,
' 73.0.3648.2 ' ,
' 73.0.3648.1 ' ,
' 73.0.3648.0 ' ,
' 72.0.3626.32 ' ,
' 71.0.3578.107 ' ,
' 73.0.3647.2 ' ,
' 73.0.3647.1 ' ,
' 73.0.3647.0 ' ,
' 72.0.3626.31 ' ,
' 71.0.3578.106 ' ,
' 73.0.3635.3 ' ,
' 73.0.3646.2 ' ,
' 73.0.3646.1 ' ,
' 73.0.3646.0 ' ,
' 72.0.3626.30 ' ,
' 71.0.3578.105 ' ,
' 72.0.3626.29 ' ,
' 73.0.3645.2 ' ,
' 73.0.3645.1 ' ,
' 73.0.3645.0 ' ,
' 72.0.3626.28 ' ,
' 71.0.3578.104 ' ,
' 72.0.3626.27 ' ,
' 72.0.3626.26 ' ,
' 72.0.3626.25 ' ,
' 72.0.3626.24 ' ,
' 73.0.3644.0 ' ,
' 73.0.3643.2 ' ,
' 72.0.3626.23 ' ,
' 71.0.3578.103 ' ,
' 73.0.3643.1 ' ,
' 73.0.3643.0 ' ,
' 72.0.3626.22 ' ,
' 71.0.3578.102 ' ,
' 73.0.3642.1 ' ,
' 73.0.3642.0 ' ,
' 72.0.3626.21 ' ,
' 71.0.3578.101 ' ,
' 73.0.3641.1 ' ,
' 73.0.3641.0 ' ,
' 72.0.3626.20 ' ,
' 71.0.3578.100 ' ,
' 72.0.3626.19 ' ,
' 73.0.3640.1 ' ,
' 73.0.3640.0 ' ,
' 72.0.3626.18 ' ,
' 73.0.3639.1 ' ,
' 71.0.3578.99 ' ,
' 73.0.3639.0 ' ,
' 72.0.3626.17 ' ,
' 73.0.3638.2 ' ,
' 72.0.3626.16 ' ,
' 73.0.3638.1 ' ,
' 73.0.3638.0 ' ,
' 72.0.3626.15 ' ,
' 71.0.3578.98 ' ,
' 73.0.3635.2 ' ,
' 71.0.3578.97 ' ,
' 73.0.3637.1 ' ,
' 73.0.3637.0 ' ,
' 72.0.3626.14 ' ,
' 71.0.3578.96 ' ,
' 71.0.3578.95 ' ,
' 72.0.3626.13 ' ,
' 71.0.3578.94 ' ,
' 73.0.3636.2 ' ,
' 71.0.3578.93 ' ,
' 73.0.3636.1 ' ,
' 73.0.3636.0 ' ,
' 72.0.3626.12 ' ,
' 71.0.3578.92 ' ,
' 73.0.3635.1 ' ,
' 73.0.3635.0 ' ,
' 72.0.3626.11 ' ,
' 71.0.3578.91 ' ,
' 73.0.3634.2 ' ,
' 73.0.3634.1 ' ,
' 73.0.3634.0 ' ,
' 72.0.3626.10 ' ,
' 71.0.3578.90 ' ,
' 71.0.3578.89 ' ,
' 73.0.3633.2 ' ,
' 73.0.3633.1 ' ,
' 73.0.3633.0 ' ,
' 72.0.3610.4 ' ,
' 72.0.3626.9 ' ,
' 71.0.3578.88 ' ,
' 73.0.3632.5 ' ,
' 73.0.3632.4 ' ,
' 73.0.3632.3 ' ,
' 73.0.3632.2 ' ,
' 73.0.3632.1 ' ,
' 73.0.3632.0 ' ,
' 72.0.3626.8 ' ,
' 71.0.3578.87 ' ,
' 73.0.3631.2 ' ,
' 73.0.3631.1 ' ,
' 73.0.3631.0 ' ,
' 72.0.3626.7 ' ,
' 71.0.3578.86 ' ,
' 72.0.3626.6 ' ,
' 73.0.3630.1 ' ,
' 73.0.3630.0 ' ,
' 72.0.3626.5 ' ,
' 71.0.3578.85 ' ,
' 72.0.3626.4 ' ,
' 73.0.3628.3 ' ,
' 73.0.3628.2 ' ,
' 73.0.3629.1 ' ,
' 73.0.3629.0 ' ,
' 72.0.3626.3 ' ,
' 71.0.3578.84 ' ,
' 73.0.3628.1 ' ,
' 73.0.3628.0 ' ,
' 71.0.3578.83 ' ,
' 73.0.3627.1 ' ,
' 73.0.3627.0 ' ,
' 72.0.3626.2 ' ,
' 71.0.3578.82 ' ,
' 71.0.3578.81 ' ,
' 71.0.3578.80 ' ,
' 72.0.3626.1 ' ,
' 72.0.3626.0 ' ,
' 71.0.3578.79 ' ,
' 70.0.3538.124 ' ,
' 71.0.3578.78 ' ,
' 72.0.3623.4 ' ,
' 72.0.3625.2 ' ,
' 72.0.3625.1 ' ,
' 72.0.3625.0 ' ,
' 71.0.3578.77 ' ,
' 70.0.3538.123 ' ,
' 72.0.3624.4 ' ,
' 72.0.3624.3 ' ,
' 72.0.3624.2 ' ,
' 71.0.3578.76 ' ,
' 72.0.3624.1 ' ,
' 72.0.3624.0 ' ,
' 72.0.3623.3 ' ,
' 71.0.3578.75 ' ,
' 70.0.3538.122 ' ,
' 71.0.3578.74 ' ,
' 72.0.3623.2 ' ,
' 72.0.3610.3 ' ,
' 72.0.3623.1 ' ,
' 72.0.3623.0 ' ,
' 72.0.3622.3 ' ,
' 72.0.3622.2 ' ,
' 71.0.3578.73 ' ,
' 70.0.3538.121 ' ,
' 72.0.3622.1 ' ,
' 72.0.3622.0 ' ,
' 71.0.3578.72 ' ,
' 70.0.3538.120 ' ,
' 72.0.3621.1 ' ,
' 72.0.3621.0 ' ,
' 71.0.3578.71 ' ,
' 70.0.3538.119 ' ,
' 72.0.3620.1 ' ,
' 72.0.3620.0 ' ,
' 71.0.3578.70 ' ,
' 70.0.3538.118 ' ,
' 71.0.3578.69 ' ,
' 72.0.3619.1 ' ,
' 72.0.3619.0 ' ,
' 71.0.3578.68 ' ,
' 70.0.3538.117 ' ,
' 71.0.3578.67 ' ,
' 72.0.3618.1 ' ,
' 72.0.3618.0 ' ,
' 71.0.3578.66 ' ,
' 70.0.3538.116 ' ,
' 72.0.3617.1 ' ,
' 72.0.3617.0 ' ,
' 71.0.3578.65 ' ,
' 70.0.3538.115 ' ,
' 72.0.3602.3 ' ,
' 71.0.3578.64 ' ,
' 72.0.3616.1 ' ,
' 72.0.3616.0 ' ,
' 71.0.3578.63 ' ,
' 70.0.3538.114 ' ,
' 71.0.3578.62 ' ,
' 72.0.3615.1 ' ,
' 72.0.3615.0 ' ,
' 71.0.3578.61 ' ,
' 70.0.3538.113 ' ,
' 72.0.3614.1 ' ,
' 72.0.3614.0 ' ,
' 71.0.3578.60 ' ,
' 70.0.3538.112 ' ,
' 72.0.3613.1 ' ,
' 72.0.3613.0 ' ,
' 71.0.3578.59 ' ,
' 70.0.3538.111 ' ,
' 72.0.3612.2 ' ,
' 72.0.3612.1 ' ,
' 72.0.3612.0 ' ,
' 70.0.3538.110 ' ,
' 71.0.3578.58 ' ,
' 70.0.3538.109 ' ,
' 72.0.3611.2 ' ,
' 72.0.3611.1 ' ,
' 72.0.3611.0 ' ,
' 71.0.3578.57 ' ,
' 70.0.3538.108 ' ,
' 72.0.3610.2 ' ,
' 71.0.3578.56 ' ,
' 71.0.3578.55 ' ,
' 72.0.3610.1 ' ,
' 72.0.3610.0 ' ,
' 71.0.3578.54 ' ,
' 70.0.3538.107 ' ,
' 71.0.3578.53 ' ,
' 72.0.3609.3 ' ,
' 71.0.3578.52 ' ,
' 72.0.3609.2 ' ,
' 71.0.3578.51 ' ,
' 72.0.3608.5 ' ,
' 72.0.3609.1 ' ,
' 72.0.3609.0 ' ,
' 71.0.3578.50 ' ,
' 70.0.3538.106 ' ,
' 72.0.3608.4 ' ,
' 72.0.3608.3 ' ,
' 72.0.3608.2 ' ,
' 71.0.3578.49 ' ,
' 72.0.3608.1 ' ,
' 72.0.3608.0 ' ,
' 70.0.3538.105 ' ,
' 71.0.3578.48 ' ,
' 72.0.3607.1 ' ,
' 72.0.3607.0 ' ,
' 71.0.3578.47 ' ,
' 70.0.3538.104 ' ,
' 72.0.3606.2 ' ,
' 72.0.3606.1 ' ,
' 72.0.3606.0 ' ,
' 71.0.3578.46 ' ,
' 70.0.3538.103 ' ,
' 70.0.3538.102 ' ,
' 72.0.3605.3 ' ,
' 72.0.3605.2 ' ,
' 72.0.3605.1 ' ,
' 72.0.3605.0 ' ,
' 71.0.3578.45 ' ,
' 70.0.3538.101 ' ,
' 71.0.3578.44 ' ,
' 71.0.3578.43 ' ,
' 70.0.3538.100 ' ,
' 70.0.3538.99 ' ,
' 71.0.3578.42 ' ,
' 72.0.3604.1 ' ,
' 72.0.3604.0 ' ,
' 71.0.3578.41 ' ,
' 70.0.3538.98 ' ,
' 71.0.3578.40 ' ,
' 72.0.3603.2 ' ,
' 72.0.3603.1 ' ,
' 72.0.3603.0 ' ,
' 71.0.3578.39 ' ,
' 70.0.3538.97 ' ,
' 72.0.3602.2 ' ,
' 71.0.3578.38 ' ,
' 71.0.3578.37 ' ,
' 72.0.3602.1 ' ,
' 72.0.3602.0 ' ,
' 71.0.3578.36 ' ,
' 70.0.3538.96 ' ,
' 72.0.3601.1 ' ,
' 72.0.3601.0 ' ,
' 71.0.3578.35 ' ,
' 70.0.3538.95 ' ,
' 72.0.3600.1 ' ,
' 72.0.3600.0 ' ,
' 71.0.3578.34 ' ,
' 70.0.3538.94 ' ,
' 72.0.3599.3 ' ,
' 72.0.3599.2 ' ,
' 72.0.3599.1 ' ,
' 72.0.3599.0 ' ,
' 71.0.3578.33 ' ,
' 70.0.3538.93 ' ,
' 72.0.3598.1 ' ,
' 72.0.3598.0 ' ,
' 71.0.3578.32 ' ,
' 70.0.3538.87 ' ,
' 72.0.3597.1 ' ,
' 72.0.3597.0 ' ,
' 72.0.3596.2 ' ,
' 71.0.3578.31 ' ,
' 70.0.3538.86 ' ,
' 71.0.3578.30 ' ,
' 71.0.3578.29 ' ,
' 72.0.3596.1 ' ,
' 72.0.3596.0 ' ,
' 71.0.3578.28 ' ,
' 70.0.3538.85 ' ,
' 72.0.3595.2 ' ,
' 72.0.3591.3 ' ,
' 72.0.3595.1 ' ,
' 72.0.3595.0 ' ,
' 71.0.3578.27 ' ,
' 70.0.3538.84 ' ,
' 72.0.3594.1 ' ,
' 72.0.3594.0 ' ,
' 71.0.3578.26 ' ,
' 70.0.3538.83 ' ,
' 72.0.3593.2 ' ,
' 72.0.3593.1 ' ,
' 72.0.3593.0 ' ,
' 71.0.3578.25 ' ,
' 70.0.3538.82 ' ,
' 72.0.3589.3 ' ,
' 72.0.3592.2 ' ,
' 72.0.3592.1 ' ,
' 72.0.3592.0 ' ,
' 71.0.3578.24 ' ,
' 72.0.3589.2 ' ,
' 70.0.3538.81 ' ,
' 70.0.3538.80 ' ,
' 72.0.3591.2 ' ,
' 72.0.3591.1 ' ,
' 72.0.3591.0 ' ,
' 71.0.3578.23 ' ,
' 70.0.3538.79 ' ,
' 71.0.3578.22 ' ,
' 72.0.3590.1 ' ,
' 72.0.3590.0 ' ,
' 71.0.3578.21 ' ,
' 70.0.3538.78 ' ,
' 70.0.3538.77 ' ,
' 72.0.3589.1 ' ,
' 72.0.3589.0 ' ,
' 71.0.3578.20 ' ,
' 70.0.3538.76 ' ,
' 71.0.3578.19 ' ,
' 70.0.3538.75 ' ,
' 72.0.3588.1 ' ,
' 72.0.3588.0 ' ,
' 71.0.3578.18 ' ,
' 70.0.3538.74 ' ,
' 72.0.3586.2 ' ,
' 72.0.3587.0 ' ,
' 71.0.3578.17 ' ,
' 70.0.3538.73 ' ,
' 72.0.3586.1 ' ,
' 72.0.3586.0 ' ,
' 71.0.3578.16 ' ,
' 70.0.3538.72 ' ,
' 72.0.3585.1 ' ,
' 72.0.3585.0 ' ,
' 71.0.3578.15 ' ,
' 70.0.3538.71 ' ,
' 71.0.3578.14 ' ,
' 72.0.3584.1 ' ,
' 72.0.3584.0 ' ,
' 71.0.3578.13 ' ,
' 70.0.3538.70 ' ,
' 72.0.3583.2 ' ,
' 71.0.3578.12 ' ,
' 72.0.3583.1 ' ,
' 72.0.3583.0 ' ,
' 71.0.3578.11 ' ,
' 70.0.3538.69 ' ,
' 71.0.3578.10 ' ,
' 72.0.3582.0 ' ,
' 72.0.3581.4 ' ,
' 71.0.3578.9 ' ,
' 70.0.3538.67 ' ,
' 72.0.3581.3 ' ,
' 72.0.3581.2 ' ,
' 72.0.3581.1 ' ,
' 72.0.3581.0 ' ,
' 71.0.3578.8 ' ,
' 70.0.3538.66 ' ,
' 72.0.3580.1 ' ,
' 72.0.3580.0 ' ,
' 71.0.3578.7 ' ,
' 70.0.3538.65 ' ,
' 71.0.3578.6 ' ,
' 72.0.3579.1 ' ,
' 72.0.3579.0 ' ,
' 71.0.3578.5 ' ,
' 70.0.3538.64 ' ,
' 71.0.3578.4 ' ,
' 71.0.3578.3 ' ,
' 71.0.3578.2 ' ,
' 71.0.3578.1 ' ,
' 71.0.3578.0 ' ,
' 70.0.3538.63 ' ,
' 69.0.3497.128 ' ,
' 70.0.3538.62 ' ,
' 70.0.3538.61 ' ,
' 70.0.3538.60 ' ,
' 70.0.3538.59 ' ,
' 71.0.3577.1 ' ,
' 71.0.3577.0 ' ,
' 70.0.3538.58 ' ,
' 69.0.3497.127 ' ,
' 71.0.3576.2 ' ,
' 71.0.3576.1 ' ,
' 71.0.3576.0 ' ,
' 70.0.3538.57 ' ,
' 70.0.3538.56 ' ,
' 71.0.3575.2 ' ,
' 70.0.3538.55 ' ,
' 69.0.3497.126 ' ,
' 70.0.3538.54 ' ,
' 71.0.3575.1 ' ,
' 71.0.3575.0 ' ,
' 71.0.3574.1 ' ,
' 71.0.3574.0 ' ,
' 70.0.3538.53 ' ,
' 69.0.3497.125 ' ,
' 70.0.3538.52 ' ,
' 71.0.3573.1 ' ,
' 71.0.3573.0 ' ,
' 70.0.3538.51 ' ,
' 69.0.3497.124 ' ,
' 71.0.3572.1 ' ,
' 71.0.3572.0 ' ,
' 70.0.3538.50 ' ,
' 69.0.3497.123 ' ,
' 71.0.3571.2 ' ,
' 70.0.3538.49 ' ,
' 69.0.3497.122 ' ,
' 71.0.3571.1 ' ,
' 71.0.3571.0 ' ,
' 70.0.3538.48 ' ,
' 69.0.3497.121 ' ,
' 71.0.3570.1 ' ,
' 71.0.3570.0 ' ,
' 70.0.3538.47 ' ,
' 69.0.3497.120 ' ,
' 71.0.3568.2 ' ,
' 71.0.3569.1 ' ,
' 71.0.3569.0 ' ,
' 70.0.3538.46 ' ,
' 69.0.3497.119 ' ,
' 70.0.3538.45 ' ,
' 71.0.3568.1 ' ,
' 71.0.3568.0 ' ,
' 70.0.3538.44 ' ,
' 69.0.3497.118 ' ,
' 70.0.3538.43 ' ,
' 70.0.3538.42 ' ,
' 71.0.3567.1 ' ,
' 71.0.3567.0 ' ,
' 70.0.3538.41 ' ,
' 69.0.3497.117 ' ,
' 71.0.3566.1 ' ,
' 71.0.3566.0 ' ,
' 70.0.3538.40 ' ,
' 69.0.3497.116 ' ,
' 71.0.3565.1 ' ,
' 71.0.3565.0 ' ,
' 70.0.3538.39 ' ,
' 69.0.3497.115 ' ,
' 71.0.3564.1 ' ,
' 71.0.3564.0 ' ,
' 70.0.3538.38 ' ,
' 69.0.3497.114 ' ,
' 71.0.3563.0 ' ,
' 71.0.3562.2 ' ,
' 70.0.3538.37 ' ,
' 69.0.3497.113 ' ,
' 70.0.3538.36 ' ,
' 70.0.3538.35 ' ,
' 71.0.3562.1 ' ,
' 71.0.3562.0 ' ,
' 70.0.3538.34 ' ,
' 69.0.3497.112 ' ,
' 70.0.3538.33 ' ,
' 71.0.3561.1 ' ,
' 71.0.3561.0 ' ,
' 70.0.3538.32 ' ,
' 69.0.3497.111 ' ,
' 71.0.3559.6 ' ,
' 71.0.3560.1 ' ,
' 71.0.3560.0 ' ,
' 71.0.3559.5 ' ,
' 71.0.3559.4 ' ,
' 70.0.3538.31 ' ,
' 69.0.3497.110 ' ,
' 71.0.3559.3 ' ,
' 70.0.3538.30 ' ,
' 69.0.3497.109 ' ,
' 71.0.3559.2 ' ,
' 71.0.3559.1 ' ,
' 71.0.3559.0 ' ,
' 70.0.3538.29 ' ,
' 69.0.3497.108 ' ,
' 71.0.3558.2 ' ,
' 71.0.3558.1 ' ,
' 71.0.3558.0 ' ,
' 70.0.3538.28 ' ,
' 69.0.3497.107 ' ,
' 71.0.3557.2 ' ,
' 71.0.3557.1 ' ,
' 71.0.3557.0 ' ,
' 70.0.3538.27 ' ,
' 69.0.3497.106 ' ,
' 71.0.3554.4 ' ,
' 70.0.3538.26 ' ,
' 71.0.3556.1 ' ,
' 71.0.3556.0 ' ,
' 70.0.3538.25 ' ,
' 71.0.3554.3 ' ,
' 69.0.3497.105 ' ,
' 71.0.3554.2 ' ,
' 70.0.3538.24 ' ,
' 69.0.3497.104 ' ,
' 71.0.3555.2 ' ,
' 70.0.3538.23 ' ,
' 71.0.3555.1 ' ,
' 71.0.3555.0 ' ,
' 70.0.3538.22 ' ,
' 69.0.3497.103 ' ,
' 71.0.3554.1 ' ,
' 71.0.3554.0 ' ,
' 70.0.3538.21 ' ,
' 69.0.3497.102 ' ,
' 71.0.3553.3 ' ,
' 70.0.3538.20 ' ,
' 69.0.3497.101 ' ,
' 71.0.3553.2 ' ,
' 69.0.3497.100 ' ,
' 71.0.3553.1 ' ,
' 71.0.3553.0 ' ,
' 70.0.3538.19 ' ,
' 69.0.3497.99 ' ,
' 69.0.3497.98 ' ,
' 69.0.3497.97 ' ,
' 71.0.3552.6 ' ,
' 71.0.3552.5 ' ,
' 71.0.3552.4 ' ,
' 71.0.3552.3 ' ,
' 71.0.3552.2 ' ,
' 71.0.3552.1 ' ,
' 71.0.3552.0 ' ,
' 70.0.3538.18 ' ,
' 69.0.3497.96 ' ,
' 71.0.3551.3 ' ,
' 71.0.3551.2 ' ,
' 71.0.3551.1 ' ,
' 71.0.3551.0 ' ,
' 70.0.3538.17 ' ,
' 69.0.3497.95 ' ,
' 71.0.3550.3 ' ,
' 71.0.3550.2 ' ,
' 71.0.3550.1 ' ,
' 71.0.3550.0 ' ,
' 70.0.3538.16 ' ,
' 69.0.3497.94 ' ,
' 71.0.3549.1 ' ,
' 71.0.3549.0 ' ,
' 70.0.3538.15 ' ,
' 69.0.3497.93 ' ,
' 69.0.3497.92 ' ,
' 71.0.3548.1 ' ,
' 71.0.3548.0 ' ,
' 70.0.3538.14 ' ,
' 69.0.3497.91 ' ,
' 71.0.3547.1 ' ,
' 71.0.3547.0 ' ,
' 70.0.3538.13 ' ,
' 69.0.3497.90 ' ,
' 71.0.3546.2 ' ,
' 69.0.3497.89 ' ,
' 71.0.3546.1 ' ,
' 71.0.3546.0 ' ,
' 70.0.3538.12 ' ,
' 69.0.3497.88 ' ,
' 71.0.3545.4 ' ,
' 71.0.3545.3 ' ,
' 71.0.3545.2 ' ,
' 71.0.3545.1 ' ,
' 71.0.3545.0 ' ,
' 70.0.3538.11 ' ,
' 69.0.3497.87 ' ,
' 71.0.3544.5 ' ,
' 71.0.3544.4 ' ,
' 71.0.3544.3 ' ,
' 71.0.3544.2 ' ,
' 71.0.3544.1 ' ,
' 71.0.3544.0 ' ,
' 69.0.3497.86 ' ,
' 70.0.3538.10 ' ,
' 69.0.3497.85 ' ,
' 70.0.3538.9 ' ,
' 69.0.3497.84 ' ,
' 71.0.3543.4 ' ,
' 70.0.3538.8 ' ,
' 71.0.3543.3 ' ,
' 71.0.3543.2 ' ,
' 71.0.3543.1 ' ,
' 71.0.3543.0 ' ,
' 70.0.3538.7 ' ,
' 69.0.3497.83 ' ,
' 71.0.3542.2 ' ,
' 71.0.3542.1 ' ,
' 71.0.3542.0 ' ,
' 70.0.3538.6 ' ,
' 69.0.3497.82 ' ,
' 69.0.3497.81 ' ,
' 71.0.3541.1 ' ,
' 71.0.3541.0 ' ,
' 70.0.3538.5 ' ,
' 69.0.3497.80 ' ,
' 71.0.3540.1 ' ,
' 71.0.3540.0 ' ,
' 70.0.3538.4 ' ,
' 69.0.3497.79 ' ,
' 70.0.3538.3 ' ,
' 71.0.3539.1 ' ,
' 71.0.3539.0 ' ,
' 69.0.3497.78 ' ,
' 68.0.3440.134 ' ,
' 69.0.3497.77 ' ,
' 70.0.3538.2 ' ,
' 70.0.3538.1 ' ,
' 70.0.3538.0 ' ,
' 69.0.3497.76 ' ,
' 68.0.3440.133 ' ,
' 69.0.3497.75 ' ,
' 70.0.3537.2 ' ,
' 70.0.3537.1 ' ,
' 70.0.3537.0 ' ,
' 69.0.3497.74 ' ,
' 68.0.3440.132 ' ,
' 70.0.3536.0 ' ,
' 70.0.3535.5 ' ,
' 70.0.3535.4 ' ,
' 70.0.3535.3 ' ,
' 69.0.3497.73 ' ,
' 68.0.3440.131 ' ,
' 70.0.3532.8 ' ,
' 70.0.3532.7 ' ,
' 69.0.3497.72 ' ,
' 69.0.3497.71 ' ,
' 70.0.3535.2 ' ,
' 70.0.3535.1 ' ,
' 70.0.3535.0 ' ,
' 69.0.3497.70 ' ,
' 68.0.3440.130 ' ,
' 69.0.3497.69 ' ,
' 68.0.3440.129 ' ,
' 70.0.3534.4 ' ,
' 70.0.3534.3 ' ,
' 70.0.3534.2 ' ,
' 70.0.3534.1 ' ,
' 70.0.3534.0 ' ,
' 69.0.3497.68 ' ,
' 68.0.3440.128 ' ,
' 70.0.3533.2 ' ,
' 70.0.3533.1 ' ,
' 70.0.3533.0 ' ,
' 69.0.3497.67 ' ,
' 68.0.3440.127 ' ,
' 70.0.3532.6 ' ,
' 70.0.3532.5 ' ,
' 70.0.3532.4 ' ,
' 69.0.3497.66 ' ,
' 68.0.3440.126 ' ,
' 70.0.3532.3 ' ,
' 70.0.3532.2 ' ,
' 70.0.3532.1 ' ,
' 69.0.3497.60 ' ,
' 69.0.3497.65 ' ,
' 69.0.3497.64 ' ,
' 70.0.3532.0 ' ,
' 70.0.3531.0 ' ,
' 70.0.3530.4 ' ,
' 70.0.3530.3 ' ,
' 70.0.3530.2 ' ,
' 69.0.3497.58 ' ,
' 68.0.3440.125 ' ,
' 69.0.3497.57 ' ,
' 69.0.3497.56 ' ,
' 69.0.3497.55 ' ,
' 69.0.3497.54 ' ,
' 70.0.3530.1 ' ,
' 70.0.3530.0 ' ,
' 69.0.3497.53 ' ,
' 68.0.3440.124 ' ,
' 69.0.3497.52 ' ,
' 70.0.3529.3 ' ,
' 70.0.3529.2 ' ,
' 70.0.3529.1 ' ,
' 70.0.3529.0 ' ,
' 69.0.3497.51 ' ,
' 70.0.3528.4 ' ,
' 68.0.3440.123 ' ,
' 70.0.3528.3 ' ,
' 70.0.3528.2 ' ,
' 70.0.3528.1 ' ,
' 70.0.3528.0 ' ,
' 69.0.3497.50 ' ,
' 68.0.3440.122 ' ,
' 70.0.3527.1 ' ,
' 70.0.3527.0 ' ,
' 69.0.3497.49 ' ,
' 68.0.3440.121 ' ,
' 70.0.3526.1 ' ,
' 70.0.3526.0 ' ,
' 68.0.3440.120 ' ,
' 69.0.3497.48 ' ,
' 69.0.3497.47 ' ,
' 68.0.3440.119 ' ,
' 68.0.3440.118 ' ,
' 70.0.3525.5 ' ,
' 70.0.3525.4 ' ,
' 70.0.3525.3 ' ,
' 68.0.3440.117 ' ,
' 69.0.3497.46 ' ,
' 70.0.3525.2 ' ,
' 70.0.3525.1 ' ,
' 70.0.3525.0 ' ,
' 69.0.3497.45 ' ,
' 68.0.3440.116 ' ,
' 70.0.3524.4 ' ,
' 70.0.3524.3 ' ,
' 69.0.3497.44 ' ,
' 70.0.3524.2 ' ,
' 70.0.3524.1 ' ,
' 70.0.3524.0 ' ,
' 70.0.3523.2 ' ,
' 69.0.3497.43 ' ,
' 68.0.3440.115 ' ,
' 70.0.3505.9 ' ,
' 69.0.3497.42 ' ,
' 70.0.3505.8 ' ,
' 70.0.3523.1 ' ,
' 70.0.3523.0 ' ,
' 69.0.3497.41 ' ,
' 68.0.3440.114 ' ,
' 70.0.3505.7 ' ,
' 69.0.3497.40 ' ,
' 70.0.3522.1 ' ,
' 70.0.3522.0 ' ,
' 70.0.3521.2 ' ,
' 69.0.3497.39 ' ,
' 68.0.3440.113 ' ,
' 70.0.3505.6 ' ,
' 70.0.3521.1 ' ,
' 70.0.3521.0 ' ,
' 69.0.3497.38 ' ,
' 68.0.3440.112 ' ,
' 70.0.3520.1 ' ,
' 70.0.3520.0 ' ,
' 69.0.3497.37 ' ,
' 68.0.3440.111 ' ,
' 70.0.3519.3 ' ,
' 70.0.3519.2 ' ,
' 70.0.3519.1 ' ,
' 70.0.3519.0 ' ,
' 69.0.3497.36 ' ,
' 68.0.3440.110 ' ,
' 70.0.3518.1 ' ,
' 70.0.3518.0 ' ,
' 69.0.3497.35 ' ,
' 69.0.3497.34 ' ,
' 68.0.3440.109 ' ,
' 70.0.3517.1 ' ,
' 70.0.3517.0 ' ,
' 69.0.3497.33 ' ,
' 68.0.3440.108 ' ,
' 69.0.3497.32 ' ,
' 70.0.3516.3 ' ,
' 70.0.3516.2 ' ,
' 70.0.3516.1 ' ,
' 70.0.3516.0 ' ,
' 69.0.3497.31 ' ,
' 68.0.3440.107 ' ,
' 70.0.3515.4 ' ,
' 68.0.3440.106 ' ,
' 70.0.3515.3 ' ,
' 70.0.3515.2 ' ,
' 70.0.3515.1 ' ,
' 70.0.3515.0 ' ,
' 69.0.3497.30 ' ,
' 68.0.3440.105 ' ,
' 68.0.3440.104 ' ,
' 70.0.3514.2 ' ,
' 70.0.3514.1 ' ,
' 70.0.3514.0 ' ,
' 69.0.3497.29 ' ,
' 68.0.3440.103 ' ,
' 70.0.3513.1 ' ,
' 70.0.3513.0 ' ,
' 69.0.3497.28 ' ,
)
return _USER_AGENT_TPL % random . choice ( _CHROME_VERSIONS )
2012-11-28 00:02:55 +01:00
std_headers = {
2019-06-28 19:32:43 +02:00
' User-Agent ' : random_user_agent ( ) ,
2012-11-28 02:04:46 +01:00
' Accept-Charset ' : ' ISO-8859-1,utf-8;q=0.7,*;q=0.7 ' ,
' Accept ' : ' text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8 ' ,
' Accept-Encoding ' : ' gzip, deflate ' ,
' Accept-Language ' : ' en-us,en;q=0.5 ' ,
2012-11-28 00:02:55 +01:00
}
2012-12-30 18:22:36 +01:00
2014-11-23 20:41:03 +01:00
2016-12-11 18:49:07 +01:00
USER_AGENTS = {
' Safari ' : ' Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27 ' ,
}
2015-06-28 18:56:07 +02:00
NO_DEFAULT = object ( )
2015-02-13 08:14:23 +01:00
ENGLISH_MONTH_NAMES = [
' January ' , ' February ' , ' March ' , ' April ' , ' May ' , ' June ' ,
' July ' , ' August ' , ' September ' , ' October ' , ' November ' , ' December ' ]
2016-09-14 18:13:55 +02:00
MONTH_NAMES = {
' en ' : ENGLISH_MONTH_NAMES ,
' fr ' : [
2016-09-14 18:57:01 +02:00
' janvier ' , ' février ' , ' mars ' , ' avril ' , ' mai ' , ' juin ' ,
' juillet ' , ' août ' , ' septembre ' , ' octobre ' , ' novembre ' , ' décembre ' ] ,
2016-09-14 18:13:55 +02:00
}
2016-09-02 18:31:52 +02:00
2016-01-03 20:08:34 +01:00
KNOWN_EXTENSIONS = (
' mp4 ' , ' m4a ' , ' m4p ' , ' m4b ' , ' m4r ' , ' m4v ' , ' aac ' ,
' flv ' , ' f4v ' , ' f4a ' , ' f4b ' ,
' webm ' , ' ogg ' , ' ogv ' , ' oga ' , ' ogx ' , ' spx ' , ' opus ' ,
' mkv ' , ' mka ' , ' mk3d ' ,
' avi ' , ' divx ' ,
' mov ' ,
' asf ' , ' wmv ' , ' wma ' ,
' 3gp ' , ' 3g2 ' ,
' mp3 ' ,
' flac ' ,
' ape ' ,
' wav ' ,
' f4f ' , ' f4m ' , ' m3u8 ' , ' smil ' )
2016-05-03 02:40:30 +02:00
# needed for sanitizing filenames in restricted mode
2016-06-02 11:51:48 +02:00
ACCENT_CHARS = dict ( zip ( ' ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ ' ,
2019-05-10 20:42:32 +02:00
itertools . chain ( ' AAAAAA ' , [ ' AE ' ] , ' CEEEEIIIIDNOOOOOOO ' , [ ' OE ' ] , ' UUUUUY ' , [ ' TH ' , ' ss ' ] ,
' aaaaaa ' , [ ' ae ' ] , ' ceeeeiiiionooooooo ' , [ ' oe ' ] , ' uuuuuy ' , [ ' th ' ] , ' y ' ) ) )
2016-05-03 02:40:30 +02:00
2016-06-25 17:30:35 +02:00
DATE_FORMATS = (
' %d % B % Y ' ,
' %d % b % Y ' ,
' % B %d % Y ' ,
2017-01-12 16:39:45 +01:00
' % B %d st % Y ' ,
' % B %d nd % Y ' ,
2019-11-26 18:08:37 +01:00
' % B %d rd % Y ' ,
2017-01-12 16:39:45 +01:00
' % B %d th % Y ' ,
2016-06-25 17:30:35 +02:00
' % b %d % Y ' ,
2017-01-12 16:39:45 +01:00
' % b %d st % Y ' ,
' % b %d nd % Y ' ,
2019-11-26 18:08:37 +01:00
' % b %d rd % Y ' ,
2017-01-12 16:39:45 +01:00
' % b %d th % Y ' ,
2016-06-25 17:30:35 +02:00
' % b %d st % Y % I: % M ' ,
' % b %d nd % Y % I: % M ' ,
2019-11-26 18:08:37 +01:00
' % b %d rd % Y % I: % M ' ,
2016-06-25 17:30:35 +02:00
' % b %d th % Y % I: % M ' ,
' % Y % m %d ' ,
' % Y- % m- %d ' ,
' % Y/ % m/ %d ' ,
2016-08-10 05:36:49 +02:00
' % Y/ % m/ %d % H: % M ' ,
2016-06-25 17:30:35 +02:00
' % Y/ % m/ %d % H: % M: % S ' ,
2017-01-23 16:31:43 +01:00
' % Y- % m- %d % H: % M ' ,
2016-06-25 17:30:35 +02:00
' % Y- % m- %d % H: % M: % S ' ,
' % Y- % m- %d % H: % M: % S. %f ' ,
2021-05-20 15:05:37 +02:00
' % Y- % m- %d % H: % M: % S: %f ' ,
2016-06-25 17:30:35 +02:00
' %d . % m. % Y % H: % M ' ,
' %d . % m. % Y % H. % M ' ,
' % Y- % m- %d T % H: % M: % SZ ' ,
' % Y- % m- %d T % H: % M: % S. %f Z ' ,
' % Y- % m- %d T % H: % M: % S. %f 0Z ' ,
' % Y- % m- %d T % H: % M: % S ' ,
' % Y- % m- %d T % H: % M: % S. %f ' ,
' % Y- % m- %d T % H: % M ' ,
2016-09-29 18:47:25 +02:00
' % b %d % Y at % H: % M ' ,
' % b %d % Y at % H: % M: % S ' ,
2017-12-16 15:56:16 +01:00
' % B %d % Y at % H: % M ' ,
' % B %d % Y at % H: % M: % S ' ,
2016-06-25 17:30:35 +02:00
)
DATE_FORMATS_DAY_FIRST = list ( DATE_FORMATS )
DATE_FORMATS_DAY_FIRST . extend ( [
' %d - % m- % Y ' ,
' %d . % m. % Y ' ,
' %d . % m. % y ' ,
' %d / % m/ % Y ' ,
' %d / % m/ % y ' ,
' %d / % m/ % Y % H: % M: % S ' ,
] )
DATE_FORMATS_MONTH_FIRST = list ( DATE_FORMATS )
DATE_FORMATS_MONTH_FIRST . extend ( [
' % m- %d - % Y ' ,
' % m. %d . % Y ' ,
' % m/ %d / % Y ' ,
' % m/ %d / % y ' ,
' % m/ %d / % Y % H: % M: % S ' ,
] )
2016-10-19 18:28:49 +02:00
PACKED_CODES_RE = r " } \ ( ' (.+) ' ,( \ d+),( \ d+), ' ([^ ' ]+) ' \ .split \ ( ' \ | ' \ ) "
2018-10-10 23:47:21 +02:00
JSON_LD_RE = r ' (?is)<script[^>]+type=([ " \' ]?)application/ld \ +json \ 1[^>]*>(?P<json_ld>.+?)</script> '
2016-10-19 18:28:49 +02:00
2015-02-13 08:14:23 +01:00
2012-03-25 03:07:37 +02:00
def preferredencoding ( ) :
2012-11-28 02:04:46 +01:00
""" Get preferred encoding.
2012-03-25 03:07:37 +02:00
2012-11-28 02:04:46 +01:00
Returns the best encoding scheme for the system , based on
locale . getpreferredencoding ( ) and some further tweaks .
"""
try :
pref = locale . getpreferredencoding ( )
2014-11-17 07:16:12 +01:00
' TEST ' . encode ( pref )
2015-03-27 13:02:20 +01:00
except Exception :
2012-11-28 02:04:46 +01:00
pref = ' UTF-8 '
2012-07-01 18:21:27 +02:00
2012-11-28 02:04:46 +01:00
return pref
2012-03-25 03:07:37 +02:00
2012-12-20 13:13:24 +01:00
2014-08-21 13:01:13 +02:00
def write_json_file ( obj , fn ) :
2014-11-20 07:05:39 +01:00
""" Encode obj as JSON and write it to fn, atomically if possible """
2014-08-21 13:01:13 +02:00
2014-11-18 23:28:42 +01:00
fn = encodeFilename ( fn )
2014-11-20 07:04:04 +01:00
if sys . version_info < ( 3 , 0 ) and sys . platform != ' win32 ' :
2014-11-15 22:00:32 +01:00
encoding = get_filesystem_encoding ( )
# os.path.basename returns a bytes object, but NamedTemporaryFile
# will fail if the filename contains non ascii characters unless we
# use a unicode object
path_basename = lambda f : os . path . basename ( fn ) . decode ( encoding )
# the same for os.path.dirname
path_dirname = lambda f : os . path . dirname ( fn ) . decode ( encoding )
else :
path_basename = os . path . basename
path_dirname = os . path . dirname
2014-08-21 17:03:00 +02:00
args = {
' suffix ' : ' .tmp ' ,
2014-11-15 22:00:32 +01:00
' prefix ' : path_basename ( fn ) + ' . ' ,
' dir ' : path_dirname ( fn ) ,
2014-08-21 17:03:00 +02:00
' delete ' : False ,
}
2014-08-21 13:01:13 +02:00
# In Python 2.x, json.dump expects a bytestream.
# In Python 3.x, it writes to a character stream
if sys . version_info < ( 3 , 0 ) :
2014-08-21 17:03:00 +02:00
args [ ' mode ' ] = ' wb '
2014-08-21 13:01:13 +02:00
else :
2014-08-21 17:03:00 +02:00
args . update ( {
' mode ' : ' w ' ,
' encoding ' : ' utf-8 ' ,
} )
2015-04-27 16:00:18 +02:00
tf = tempfile . NamedTemporaryFile ( * * compat_kwargs ( args ) )
2014-08-21 13:01:13 +02:00
try :
with tf :
2021-03-18 16:27:20 +01:00
json . dump ( obj , tf , default = repr )
2014-11-20 07:05:39 +01:00
if sys . platform == ' win32 ' :
# Need to remove existing file on Windows, else os.rename raises
# WindowsError or FileExistsError.
try :
os . unlink ( fn )
except OSError :
pass
2020-05-19 22:21:52 +02:00
try :
mask = os . umask ( 0 )
os . umask ( mask )
os . chmod ( tf . name , 0o666 & ~ mask )
except OSError :
pass
2014-08-21 13:01:13 +02:00
os . rename ( tf . name , fn )
2015-03-27 13:02:20 +01:00
except Exception :
2014-08-21 13:01:13 +02:00
try :
os . remove ( tf . name )
except OSError :
pass
raise
if sys . version_info > = ( 2 , 7 ) :
2015-08-01 16:22:13 +02:00
def find_xpath_attr ( node , xpath , key , val = None ) :
2013-07-11 16:12:08 +02:00
""" Find the xpath xpath[@key=val] """
2015-09-04 19:57:27 +02:00
assert re . match ( r ' ^[a-zA-Z_-]+$ ' , key )
2015-08-01 16:22:13 +02:00
expr = xpath + ( ' [@ %s ] ' % key if val is None else " [@ %s = ' %s ' ] " % ( key , val ) )
2013-07-11 16:12:08 +02:00
return node . find ( expr )
else :
2015-08-01 16:22:13 +02:00
def find_xpath_attr ( node , xpath , key , val = None ) :
2016-03-17 21:52:23 +01:00
for f in node . findall ( compat_xpath ( xpath ) ) :
2015-08-01 16:22:13 +02:00
if key not in f . attrib :
continue
if val is None or f . attrib . get ( key ) == val :
2013-07-11 16:12:08 +02:00
return f
return None
2013-10-12 21:34:04 +02:00
# On python2.6 the xml.etree.ElementTree.Element methods don't support
# the namespace parameter
2014-11-23 20:41:03 +01:00
2013-10-12 21:34:04 +02:00
def xpath_with_ns ( path , ns_map ) :
components = [ c . split ( ' : ' ) for c in path . split ( ' / ' ) ]
replaced = [ ]
for c in components :
if len ( c ) == 1 :
replaced . append ( c [ 0 ] )
else :
ns , tag = c
replaced . append ( ' { %s } %s ' % ( ns_map [ ns ] , tag ) )
return ' / ' . join ( replaced )
2012-03-25 03:07:37 +02:00
2015-09-04 19:56:45 +02:00
def xpath_element ( node , xpath , name = None , fatal = False , default = NO_DEFAULT ) :
2015-10-31 17:39:44 +01:00
def _find_xpath ( xpath ) :
2016-03-17 21:52:23 +01:00
return node . find ( compat_xpath ( xpath ) )
2015-10-31 17:39:44 +01:00
if isinstance ( xpath , ( str , compat_str ) ) :
n = _find_xpath ( xpath )
else :
for xp in xpath :
n = _find_xpath ( xp )
if n is not None :
break
2014-09-13 09:11:14 +02:00
2015-09-04 20:34:49 +02:00
if n is None :
2015-06-28 18:56:07 +02:00
if default is not NO_DEFAULT :
return default
elif fatal :
2014-09-13 09:09:55 +02:00
name = xpath if name is None else name
raise ExtractorError ( ' Could not find XML element %s ' % name )
else :
return None
2015-09-04 19:56:45 +02:00
return n
def xpath_text ( node , xpath , name = None , fatal = False , default = NO_DEFAULT ) :
2015-09-04 20:34:49 +02:00
n = xpath_element ( node , xpath , name , fatal = fatal , default = default )
if n is None or n == default :
return n
if n . text is None :
if default is not NO_DEFAULT :
return default
elif fatal :
name = xpath if name is None else name
raise ExtractorError ( ' Could not find XML element \' s text %s ' % name )
else :
return None
return n . text
2015-09-04 19:56:45 +02:00
def xpath_attr ( node , xpath , key , name = None , fatal = False , default = NO_DEFAULT ) :
n = find_xpath_attr ( node , xpath , key )
if n is None :
if default is not NO_DEFAULT :
return default
elif fatal :
name = ' %s [@ %s ] ' % ( xpath , key ) if name is None else name
raise ExtractorError ( ' Could not find XML attribute %s ' % name )
else :
return None
return n . attrib [ key ]
2014-09-13 09:09:55 +02:00
2012-04-11 00:22:51 +02:00
def get_element_by_id ( id , html ) :
2012-12-19 15:21:14 +01:00
""" Return the content of the tag with the specified ID in the passed HTML document """
2016-02-14 10:37:17 +01:00
return get_element_by_attribute ( ' id ' , id , html )
2012-12-19 15:21:14 +01:00
2014-11-04 23:20:39 +01:00
2016-07-06 14:02:52 +02:00
def get_element_by_class ( class_name , html ) :
2017-02-11 10:16:54 +01:00
""" Return the content of the first tag with the specified class in the passed HTML document """
retval = get_elements_by_class ( class_name , html )
return retval [ 0 ] if retval else None
def get_element_by_attribute ( attribute , value , html , escape_value = True ) :
retval = get_elements_by_attribute ( attribute , value , html , escape_value )
return retval [ 0 ] if retval else None
def get_elements_by_class ( class_name , html ) :
""" Return the content of all tags with the specified class in the passed HTML document as a list """
return get_elements_by_attribute (
2016-07-06 14:02:52 +02:00
' class ' , r ' [^ \' " ]* \ b %s \ b[^ \' " ]* ' % re . escape ( class_name ) ,
html , escape_value = False )
2017-02-11 10:16:54 +01:00
def get_elements_by_attribute ( attribute , value , html , escape_value = True ) :
2012-12-19 15:21:14 +01:00
""" Return the content of the tag with the specified attribute in the passed HTML document """
2012-04-11 00:22:51 +02:00
2016-07-06 14:02:52 +02:00
value = re . escape ( value ) if escape_value else value
2017-02-11 10:16:54 +01:00
retlist = [ ]
for m in re . finditer ( r ''' (?xs)
2014-11-04 23:33:43 +01:00
< ( [ a - zA - Z0 - 9 : . _ - ] + )
2017-07-05 17:23:35 +02:00
( ? : \s + [ a - zA - Z0 - 9 : . _ - ] + ( ? := [ a - zA - Z0 - 9 : . _ - ] * | = " [^ " ] * " |= ' [^ ' ]* ' |))*?
2014-11-04 23:33:43 +01:00
\s + % s = [ ' " ]? %s [ ' " ]?
2017-07-05 17:23:35 +02:00
( ? : \s + [ a - zA - Z0 - 9 : . _ - ] + ( ? := [ a - zA - Z0 - 9 : . _ - ] * | = " [^ " ] * " |= ' [^ ' ]* ' |))*?
2014-11-04 23:33:43 +01:00
\s * >
( ? P < content > . * ? )
< / \1 >
2017-02-11 10:16:54 +01:00
''' % (re.escape(attribute), value), html):
res = m . group ( ' content ' )
2014-11-04 23:33:43 +01:00
2017-02-11 10:16:54 +01:00
if res . startswith ( ' " ' ) or res . startswith ( " ' " ) :
res = res [ 1 : - 1 ]
2014-11-04 23:33:43 +01:00
2017-02-11 10:16:54 +01:00
retlist . append ( unescapeHTML ( res ) )
2013-09-13 22:05:29 +02:00
2017-02-11 10:16:54 +01:00
return retlist
2013-09-13 22:05:29 +02:00
2016-03-16 16:50:04 +01:00
2016-01-02 20:49:59 +01:00
class HTMLAttributeParser ( compat_HTMLParser ) :
""" Trivial HTML parser to gather the attributes for a single element """
2020-10-09 07:06:49 +02:00
2016-01-02 20:49:59 +01:00
def __init__ ( self ) :
2016-03-16 16:50:04 +01:00
self . attrs = { }
2016-01-02 20:49:59 +01:00
compat_HTMLParser . __init__ ( self )
def handle_starttag ( self , tag , attrs ) :
self . attrs = dict ( attrs )
2016-03-16 16:50:04 +01:00
2016-01-02 20:49:59 +01:00
def extract_attributes ( html_element ) :
""" Given a string for an HTML element such as
< el
a = " foo " B = " bar " c = " &98;az " d = boz
empty = noval entity = " & "
sq = ' " ' dq = " ' "
>
Decode and return a dictionary of attributes .
{
' a ' : ' foo ' , ' b ' : ' bar ' , c : ' baz ' , d : ' boz ' ,
' empty ' : ' ' , ' noval ' : None , ' entity ' : ' & ' ,
' sq ' : ' " ' , ' dq ' : ' \' '
} .
NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions ,
but the cases in the unit test will work for all of 2.6 , 2.7 , 3.2 - 3.5 .
"""
parser = HTMLAttributeParser ( )
2017-06-11 20:52:24 +02:00
try :
parser . feed ( html_element )
parser . close ( )
# Older Python may throw HTMLParseError in case of malformed HTML
except compat_HTMLParseError :
pass
2016-01-02 20:49:59 +01:00
return parser . attrs
2012-04-11 00:22:51 +02:00
2016-03-16 16:50:04 +01:00
2012-04-11 00:22:51 +02:00
def clean_html ( html ) :
2012-11-28 02:04:46 +01:00
""" Clean an HTML snippet into a readable string """
2015-01-09 23:59:18 +01:00
if html is None : # Convenience for sanitizing descriptions etc.
return html
2012-11-28 02:04:46 +01:00
# Newline vs <br />
html = html . replace ( ' \n ' , ' ' )
2017-04-28 17:34:27 +02:00
html = re . sub ( r ' (?u) \ s*< \ s*br \ s*/? \ s*> \ s* ' , ' \n ' , html )
html = re . sub ( r ' (?u)< \ s*/ \ s*p \ s*> \ s*< \ s*p[^>]*> ' , ' \n ' , html )
2012-11-28 02:04:46 +01:00
# Strip html tags
html = re . sub ( ' <.*?> ' , ' ' , html )
# Replace html entities
html = unescapeHTML ( html )
2013-03-29 15:59:13 +01:00
return html . strip ( )
2012-04-11 00:22:51 +02:00
2012-03-25 03:07:37 +02:00
def sanitize_open ( filename , open_mode ) :
2012-11-28 02:04:46 +01:00
""" Try to open the given filename, and slightly tweak it if this fails.
Attempts to open the given filename . If this fails , it tries to change
the filename slightly , step by step , until it ' s either able to open it
or it fails and raises a final exception , like the standard open ( )
function .
It returns the tuple ( stream , definitive_file_name ) .
"""
try :
2014-11-17 07:16:12 +01:00
if filename == ' - ' :
2012-11-28 02:04:46 +01:00
if sys . platform == ' win32 ' :
import msvcrt
msvcrt . setmode ( sys . stdout . fileno ( ) , os . O_BINARY )
2013-03-28 13:13:03 +01:00
return ( sys . stdout . buffer if hasattr ( sys . stdout , ' buffer ' ) else sys . stdout , filename )
2012-11-28 02:04:46 +01:00
stream = open ( encodeFilename ( filename ) , open_mode )
return ( stream , filename )
except ( IOError , OSError ) as err :
2013-05-13 09:20:08 +02:00
if err . errno in ( errno . EACCES , ) :
raise
2012-11-28 02:04:46 +01:00
2013-05-13 09:20:08 +02:00
# In case of error, try to remove win32 forbidden chars
2015-03-08 15:56:28 +01:00
alt_filename = sanitize_path ( filename )
2013-05-13 09:20:08 +02:00
if alt_filename == filename :
raise
else :
# An exception here should be caught in the caller
2015-03-08 15:56:28 +01:00
stream = open ( encodeFilename ( alt_filename ) , open_mode )
2013-05-13 09:20:08 +02:00
return ( stream , alt_filename )
2012-03-25 03:07:37 +02:00
def timeconvert ( timestr ) :
2012-11-28 02:04:46 +01:00
""" Convert RFC 2822 defined time string into system timestamp """
timestamp = None
timetuple = email . utils . parsedate_tz ( timestr )
if timetuple is not None :
timestamp = email . utils . mktime_tz ( timetuple )
return timestamp
2012-11-26 23:58:46 +01:00
2014-11-23 20:41:03 +01:00
2012-12-03 15:36:24 +01:00
def sanitize_filename ( s , restricted = False , is_id = False ) :
2012-11-28 02:04:46 +01:00
""" Sanitizes a string so it could be used as part of a filename.
If restricted is set , use a stricter subset of allowed characters .
2017-03-01 17:04:02 +01:00
Set is_id if this is not an arbitrary string , but an ID that should be kept
if possible .
2012-11-28 02:04:46 +01:00
"""
def replace_insane ( char ) :
2016-05-03 02:40:30 +02:00
if restricted and char in ACCENT_CHARS :
return ACCENT_CHARS [ char ]
2012-11-28 02:04:46 +01:00
if char == ' ? ' or ord ( char ) < 32 or ord ( char ) == 127 :
return ' '
elif char == ' " ' :
return ' ' if restricted else ' \' '
elif char == ' : ' :
return ' _- ' if restricted else ' - '
elif char in ' \\ /|*<> ' :
return ' _ '
2012-11-28 12:59:27 +01:00
if restricted and ( char in ' !& \' ()[] {} $;`^,# ' or char . isspace ( ) ) :
2012-11-28 02:04:46 +01:00
return ' _ '
if restricted and ord ( char ) > 127 :
return ' _ '
return char
2021-06-09 11:13:51 +02:00
if s == ' ' :
return ' '
2015-01-11 17:40:45 +01:00
# Handle timestamps
s = re . sub ( r ' [0-9]+(?::[0-9]+)+ ' , lambda m : m . group ( 0 ) . replace ( ' : ' , ' _ ' ) , s )
2014-11-17 07:16:12 +01:00
result = ' ' . join ( map ( replace_insane , s ) )
2012-12-03 15:36:24 +01:00
if not is_id :
while ' __ ' in result :
result = result . replace ( ' __ ' , ' _ ' )
result = result . strip ( ' _ ' )
# Common case of "Foreign band name - English song title"
if restricted and result . startswith ( ' -_ ' ) :
result = result [ 2 : ]
2015-02-24 11:38:01 +01:00
if result . startswith ( ' - ' ) :
result = ' _ ' + result [ len ( ' - ' ) : ]
2015-03-02 19:07:17 +01:00
result = result . lstrip ( ' . ' )
2012-12-03 15:36:24 +01:00
if not result :
result = ' _ '
2012-11-28 02:04:46 +01:00
return result
2012-03-25 03:07:37 +02:00
2014-11-23 20:41:03 +01:00
2021-02-17 20:09:38 +01:00
def sanitize_path ( s , force = False ) :
2015-03-08 15:55:22 +01:00
""" Sanitizes and normalizes path on Windows """
2021-02-17 20:09:38 +01:00
if sys . platform == ' win32 ' :
2021-02-24 19:32:44 +01:00
force = False
2021-02-17 20:09:38 +01:00
drive_or_unc , _ = os . path . splitdrive ( s )
if sys . version_info < ( 2 , 7 ) and not drive_or_unc :
drive_or_unc , _ = os . path . splitunc ( s )
elif force :
drive_or_unc = ' '
else :
2015-03-08 15:55:22 +01:00
return s
2021-02-17 20:09:38 +01:00
2015-04-16 18:12:38 +02:00
norm_path = os . path . normpath ( remove_start ( s , drive_or_unc ) ) . split ( os . path . sep )
if drive_or_unc :
2015-03-08 15:55:22 +01:00
norm_path . pop ( 0 )
sanitized_path = [
2017-01-02 13:08:07 +01:00
path_part if path_part in [ ' . ' , ' .. ' ] else re . sub ( r ' (?:[/<>: " \ | \\ ? \ *]|[ \ s.]$) ' , ' # ' , path_part )
2015-03-08 15:55:22 +01:00
for path_part in norm_path ]
2015-04-16 18:12:38 +02:00
if drive_or_unc :
sanitized_path . insert ( 0 , drive_or_unc + os . path . sep )
2021-02-24 19:32:44 +01:00
elif force and s [ 0 ] == os . path . sep :
sanitized_path . insert ( 0 , os . path . sep )
2015-03-08 15:55:22 +01:00
return os . path . join ( * sanitized_path )
2016-03-26 14:33:57 +01:00
def sanitize_url ( url ) :
2018-02-19 16:50:23 +01:00
# Prepend protocol-less URLs with `http:` scheme in order to mitigate
# the number of unwanted failures due to missing protocol
if url . startswith ( ' // ' ) :
return ' http: %s ' % url
# Fix some common typos seen so far
COMMON_TYPOS = (
2019-03-09 13:14:41 +01:00
# https://github.com/ytdl-org/youtube-dl/issues/15649
2018-02-19 16:50:23 +01:00
( r ' ^httpss:// ' , r ' https:// ' ) ,
# https://bx1.be/lives/direct-tv/
( r ' ^rmtp([es]?):// ' , r ' rtmp \ 1:// ' ) ,
)
for mistake , fixup in COMMON_TYPOS :
if re . match ( mistake , url ) :
return re . sub ( mistake , fixup , url )
2021-06-01 14:35:41 +02:00
return url
2016-03-26 14:33:57 +01:00
2021-04-19 14:07:45 +02:00
def extract_basic_auth ( url ) :
parts = compat_urlparse . urlsplit ( url )
if parts . username is None :
return url , None
url = compat_urlparse . urlunsplit ( parts . _replace ( netloc = (
parts . hostname if parts . port is None
else ' %s : %d ' % ( parts . hostname , parts . port ) ) ) )
auth_payload = base64 . b64encode (
( ' %s : %s ' % ( parts . username , parts . password or ' ' ) ) . encode ( ' utf-8 ' ) )
return url , ' Basic ' + auth_payload . decode ( ' utf-8 ' )
2015-11-20 15:33:49 +01:00
def sanitized_Request ( url , * args , * * kwargs ) :
2021-06-01 14:35:41 +02:00
url , auth_header = extract_basic_auth ( escape_url ( sanitize_url ( url ) ) )
2021-04-19 14:07:45 +02:00
if auth_header is not None :
headers = args [ 1 ] if len ( args ) > = 2 else kwargs . setdefault ( ' headers ' , { } )
headers [ ' Authorization ' ] = auth_header
return compat_urllib_request . Request ( url , * args , * * kwargs )
2015-11-20 15:33:49 +01:00
2017-03-25 20:30:10 +01:00
def expand_path ( s ) :
""" Expand shell variables and ~ """
return os . path . expandvars ( compat_expanduser ( s ) )
2012-03-25 03:07:37 +02:00
def orderedSet ( iterable ) :
2012-11-28 02:04:46 +01:00
""" Remove all duplicates from the input iterable """
res = [ ]
for el in iterable :
if el not in res :
res . append ( el )
return res
2012-03-25 03:07:37 +02:00
2014-03-24 01:40:09 +01:00
2016-06-10 09:11:55 +02:00
def _htmlentity_transform ( entity_with_semicolon ) :
2014-08-27 19:11:45 +02:00
""" Transforms an HTML entity to a character. """
2016-06-10 09:11:55 +02:00
entity = entity_with_semicolon [ : - 1 ]
2014-08-27 19:11:45 +02:00
# Known non-numeric HTML entity
if entity in compat_html_entities . name2codepoint :
return compat_chr ( compat_html_entities . name2codepoint [ entity ] )
2016-06-10 09:11:55 +02:00
# TODO: HTML5 allows entities without a semicolon. For example,
# 'Éric' should be decoded as 'Éric'.
if entity_with_semicolon in compat_html_entities_html5 :
return compat_html_entities_html5 [ entity_with_semicolon ]
2015-03-26 16:15:27 +01:00
mobj = re . match ( r ' #(x[0-9a-fA-F]+|[0-9]+) ' , entity )
2014-08-27 19:11:45 +02:00
if mobj is not None :
numstr = mobj . group ( 1 )
2014-11-17 07:16:12 +01:00
if numstr . startswith ( ' x ' ) :
2014-08-27 19:11:45 +02:00
base = 16
2014-11-17 07:16:12 +01:00
numstr = ' 0 %s ' % numstr
2014-08-27 19:11:45 +02:00
else :
base = 10
2019-03-09 13:14:41 +01:00
# See https://github.com/ytdl-org/youtube-dl/issues/7518
2015-11-16 15:20:16 +01:00
try :
return compat_chr ( int ( numstr , base ) )
except ValueError :
pass
2014-08-27 19:11:45 +02:00
# Unknown entity in name, return its literal representation
2015-11-16 15:24:09 +01:00
return ' & %s ; ' % entity
2014-08-27 19:11:45 +02:00
2012-03-25 03:07:37 +02:00
def unescapeHTML ( s ) :
2014-03-24 01:40:09 +01:00
if s is None :
return None
assert type ( s ) == compat_str
2012-03-25 03:07:37 +02:00
2014-08-27 19:11:45 +02:00
return re . sub (
2017-08-19 15:40:53 +02:00
r ' &([^&;]+;) ' , lambda m : _htmlentity_transform ( m . group ( 1 ) ) , s )
2012-03-25 03:07:37 +02:00
2014-01-05 03:07:55 +01:00
2021-05-23 18:34:49 +02:00
def escapeHTML ( text ) :
return (
text
. replace ( ' & ' , ' & ' )
. replace ( ' < ' , ' < ' )
. replace ( ' > ' , ' > ' )
. replace ( ' " ' , ' " ' )
. replace ( " ' " , ' ' ' )
)
2021-01-09 13:26:12 +01:00
def process_communicate_or_kill ( p , * args , * * kwargs ) :
try :
return p . communicate ( * args , * * kwargs )
except BaseException : # Including KeyboardInterrupt
p . kill ( )
p . wait ( )
raise
2015-04-26 00:29:41 +02:00
def get_subprocess_encoding ( ) :
if sys . platform == ' win32 ' and sys . getwindowsversion ( ) [ 0 ] > = 5 :
# For subprocess calls, encode with locale encoding
# Refer to http://stackoverflow.com/a/9951851/35070
encoding = preferredencoding ( )
else :
encoding = sys . getfilesystemencoding ( )
if encoding is None :
encoding = ' utf-8 '
return encoding
2014-01-05 03:07:55 +01:00
def encodeFilename ( s , for_subprocess = False ) :
2012-11-28 02:04:46 +01:00
"""
@param s The name of the file
"""
2012-03-25 03:07:37 +02:00
2014-01-05 03:07:55 +01:00
assert type ( s ) == compat_str
2012-03-25 03:07:37 +02:00
2012-11-28 02:04:46 +01:00
# Python 3 has a Unicode API
if sys . version_info > = ( 3 , 0 ) :
return s
2012-11-28 00:56:20 +01:00
2015-04-26 00:29:41 +02:00
# Pass '' directly to use Unicode APIs on Windows 2000 and up
# (Detecting Windows NT 4 is tricky because 'major >= 4' would
# match Windows 9x series as well. Besides, NT 4 is obsolete.)
if not for_subprocess and sys . platform == ' win32 ' and sys . getwindowsversion ( ) [ 0 ] > = 5 :
return s
2016-03-03 11:47:54 +01:00
# Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
if sys . platform . startswith ( ' java ' ) :
return s
2015-04-26 00:29:41 +02:00
return s . encode ( get_subprocess_encoding ( ) , ' ignore ' )
def decodeFilename ( b , for_subprocess = False ) :
if sys . version_info > = ( 3 , 0 ) :
return b
if not isinstance ( b , bytes ) :
return b
return b . decode ( get_subprocess_encoding ( ) , ' ignore ' )
2014-01-05 03:07:55 +01:00
2014-05-16 15:47:54 +02:00
def encodeArgument ( s ) :
if not isinstance ( s , compat_str ) :
# Legacy code that uses byte strings
# Uncomment the following line after fixing all post processors
2014-12-17 00:06:41 +01:00
# assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2014-05-16 15:47:54 +02:00
s = s . decode ( ' ascii ' )
return encodeFilename ( s , True )
2015-04-26 00:29:41 +02:00
def decodeArgument ( b ) :
return decodeFilename ( b , True )
2013-02-21 17:09:39 +01:00
def decodeOption ( optval ) :
if optval is None :
return optval
if isinstance ( optval , bytes ) :
optval = optval . decode ( preferredencoding ( ) )
assert isinstance ( optval , compat_str )
return optval
2013-01-01 20:27:53 +01:00
2014-11-23 20:41:03 +01:00
2021-05-23 18:34:49 +02:00
def formatSeconds ( secs , delim = ' : ' , msec = False ) :
2013-05-04 12:02:18 +02:00
if secs > 3600 :
2021-05-23 18:34:49 +02:00
ret = ' %d %s %02d %s %02d ' % ( secs / / 3600 , delim , ( secs % 3600 ) / / 60 , delim , secs % 60 )
2013-05-04 12:02:18 +02:00
elif secs > 60 :
2021-05-23 18:34:49 +02:00
ret = ' %d %s %02d ' % ( secs / / 60 , delim , secs % 60 )
2013-05-04 12:02:18 +02:00
else :
2021-05-23 18:34:49 +02:00
ret = ' %d ' % secs
return ' %s . %03d ' % ( ret , secs % 1 ) if msec else ret
2013-05-04 12:02:18 +02:00
2013-12-29 15:28:32 +01:00
2015-01-10 19:55:36 +01:00
def make_HTTPS_handler ( params , * * kwargs ) :
opts_no_check_certificate = params . get ( ' nocheckcertificate ' , False )
2014-12-12 23:27:25 +01:00
if hasattr ( ssl , ' create_default_context ' ) : # Python >= 3.4 or 2.7.9
2015-01-15 02:06:48 +01:00
context = ssl . create_default_context ( ssl . Purpose . SERVER_AUTH )
2014-12-12 23:27:25 +01:00
if opts_no_check_certificate :
2015-01-15 02:06:48 +01:00
context . check_hostname = False
2014-12-12 23:27:25 +01:00
context . verify_mode = ssl . CERT_NONE
2014-12-13 23:27:21 +01:00
try :
2015-01-10 19:55:36 +01:00
return YoutubeDLHTTPSHandler ( params , context = context , * * kwargs )
2014-12-13 23:27:21 +01:00
except TypeError :
# Python 2.7.8
# (create_default_context present but HTTPSHandler has no context=)
pass
if sys . version_info < ( 3 , 2 ) :
2015-01-23 11:15:18 +01:00
return YoutubeDLHTTPSHandler ( params , * * kwargs )
2014-09-12 07:50:31 +02:00
else : # Python < 3.4
2015-01-23 11:15:18 +01:00
context = ssl . SSLContext ( ssl . PROTOCOL_TLSv1 )
2013-05-04 12:19:02 +02:00
context . verify_mode = ( ssl . CERT_NONE
2013-11-22 19:57:52 +01:00
if opts_no_check_certificate
2013-05-04 12:19:02 +02:00
else ssl . CERT_REQUIRED )
2013-12-08 06:54:39 +01:00
context . set_default_verify_paths ( )
2015-01-10 19:55:36 +01:00
return YoutubeDLHTTPSHandler ( params , context = context , * * kwargs )
2013-05-04 12:19:02 +02:00
2014-11-20 12:14:28 +01:00
2021-04-22 21:16:29 +02:00
def bug_reports_message ( before = ' ; ' ) :
2015-04-17 14:55:24 +02:00
if ytdl_is_updateable ( ) :
2021-02-24 19:45:56 +01:00
update_cmd = ' type yt-dlp -U to update '
2015-04-17 14:55:24 +02:00
else :
2021-02-24 19:45:56 +01:00
update_cmd = ' see https://github.com/yt-dlp/yt-dlp on how to update '
2021-04-22 21:16:29 +02:00
msg = ' please report this issue on https://github.com/yt-dlp/yt-dlp . '
2015-04-17 14:55:24 +02:00
msg + = ' Make sure you are using the latest version; %s . ' % update_cmd
2021-02-24 19:45:56 +01:00
msg + = ' Be sure to call yt-dlp with the --verbose flag and include its complete output. '
2021-04-22 21:16:29 +02:00
before = before . rstrip ( )
if not before or before . endswith ( ( ' . ' , ' ! ' , ' ? ' ) ) :
msg = msg [ 0 ] . title ( ) + msg [ 1 : ]
return ( before + ' ' if before else ' ' ) + msg
2015-04-17 14:55:24 +02:00
2016-10-17 13:38:37 +02:00
class YoutubeDLError ( Exception ) :
""" Base exception for YoutubeDL errors. """
pass
2021-05-04 19:06:18 +02:00
network_exceptions = [ compat_urllib_error . URLError , compat_http_client . HTTPException , socket . error ]
if hasattr ( ssl , ' CertificateError ' ) :
network_exceptions . append ( ssl . CertificateError )
network_exceptions = tuple ( network_exceptions )
2016-10-17 13:38:37 +02:00
class ExtractorError ( YoutubeDLError ) :
2013-01-01 20:27:53 +01:00
""" Error during info extraction. """
2014-11-23 20:41:03 +01:00
2014-04-21 20:34:03 +02:00
def __init__ ( self , msg , tb = None , expected = False , cause = None , video_id = None ) :
2013-07-02 08:40:21 +02:00
""" tb, if given, is the original traceback (so that it can be printed out).
2021-02-24 19:45:56 +01:00
If expected is set , this is a normal error message and most likely not a bug in yt - dlp .
2013-07-02 08:40:21 +02:00
"""
2021-05-04 19:06:18 +02:00
if sys . exc_info ( ) [ 0 ] in network_exceptions :
2013-07-02 08:40:21 +02:00
expected = True
2014-04-21 20:34:03 +02:00
if video_id is not None :
msg = video_id + ' : ' + msg
2014-09-30 07:56:24 +02:00
if cause :
2014-11-17 07:16:12 +01:00
msg + = ' (caused by %r ) ' % cause
2013-07-02 08:40:21 +02:00
if not expected :
2015-04-17 14:55:24 +02:00
msg + = bug_reports_message ( )
2013-01-01 20:27:53 +01:00
super ( ExtractorError , self ) . __init__ ( msg )
2013-06-09 11:55:08 +02:00
2013-01-01 20:27:53 +01:00
self . traceback = tb
2013-03-09 10:05:43 +01:00
self . exc_info = sys . exc_info ( ) # preserve original exception
2013-08-28 04:25:38 +02:00
self . cause = cause
2014-04-21 20:34:03 +02:00
self . video_id = video_id
2013-01-01 20:27:53 +01:00
2013-01-03 15:39:55 +01:00
def format_traceback ( self ) :
if self . traceback is None :
return None
2014-11-17 07:16:12 +01:00
return ' ' . join ( traceback . format_tb ( self . traceback ) )
2013-01-03 15:39:55 +01:00
2013-01-01 20:27:53 +01:00
2014-12-30 19:35:35 +01:00
class UnsupportedError ( ExtractorError ) :
def __init__ ( self , url ) :
super ( UnsupportedError , self ) . __init__ (
' Unsupported URL: %s ' % url , expected = True )
self . url = url
2013-10-23 14:38:03 +02:00
class RegexNotFoundError ( ExtractorError ) :
""" Error when a regex didn ' t match """
pass
2017-02-04 12:49:58 +01:00
class GeoRestrictedError ( ExtractorError ) :
""" Geographic restriction Error exception.
This exception may be thrown when a video is not available from your
geographic location due to geographic restrictions imposed by a website .
"""
2020-10-09 07:06:49 +02:00
2017-02-04 12:49:58 +01:00
def __init__ ( self , msg , countries = None ) :
super ( GeoRestrictedError , self ) . __init__ ( msg , expected = True )
self . msg = msg
self . countries = countries
2016-10-17 13:38:37 +02:00
class DownloadError ( YoutubeDLError ) :
2012-11-28 02:04:46 +01:00
""" Download Error exception.
2012-03-25 03:07:37 +02:00
2012-11-28 02:04:46 +01:00
This exception may be thrown by FileDownloader objects if they are not
configured to continue on errors . They will contain the appropriate
error message .
"""
2014-11-23 20:41:03 +01:00
2013-03-09 10:05:43 +01:00
def __init__ ( self , msg , exc_info = None ) :
""" exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
super ( DownloadError , self ) . __init__ ( msg )
self . exc_info = exc_info
2012-03-25 03:07:37 +02:00
2021-03-23 20:45:53 +01:00
class EntryNotInPlaylist ( YoutubeDLError ) :
""" Entry not in playlist exception.
This exception will be thrown by YoutubeDL when a requested entry
is not found in the playlist info_dict
"""
pass
2016-10-17 13:38:37 +02:00
class SameFileError ( YoutubeDLError ) :
2012-11-28 02:04:46 +01:00
""" Same File exception.
2012-03-25 03:07:37 +02:00
2012-11-28 02:04:46 +01:00
This exception will be thrown by FileDownloader objects if they detect
multiple files would have to be downloaded to the same file on disk .
"""
pass
2012-03-25 03:07:37 +02:00
2016-10-17 13:38:37 +02:00
class PostProcessingError ( YoutubeDLError ) :
2012-11-28 02:04:46 +01:00
""" Post Processing exception.
2012-03-25 03:07:37 +02:00
2012-11-28 02:04:46 +01:00
This exception may be raised by PostProcessor ' s .run() method to
indicate an error in the postprocessing task .
"""
2014-11-23 20:41:03 +01:00
2013-01-12 15:07:59 +01:00
def __init__ ( self , msg ) :
2016-10-17 13:38:37 +02:00
super ( PostProcessingError , self ) . __init__ ( msg )
2013-01-12 15:07:59 +01:00
self . msg = msg
2012-03-25 03:07:37 +02:00
2014-11-23 20:41:03 +01:00
2021-01-13 02:01:01 +01:00
class ExistingVideoReached ( YoutubeDLError ) :
""" --max-downloads limit has been reached. """
pass
class RejectedVideoReached ( YoutubeDLError ) :
""" --max-downloads limit has been reached. """
pass
2021-06-23 01:11:09 +02:00
class ThrottledDownload ( YoutubeDLError ) :
""" Download speed below --throttled-rate. """
pass
2016-10-17 13:38:37 +02:00
class MaxDownloadsReached ( YoutubeDLError ) :
2012-11-28 02:04:46 +01:00
""" --max-downloads limit has been reached. """
pass
2012-03-25 03:07:37 +02:00
2016-10-17 13:38:37 +02:00
class UnavailableVideoError ( YoutubeDLError ) :
2012-11-28 02:04:46 +01:00
""" Unavailable Format exception.
2012-03-25 03:07:37 +02:00
2012-11-28 02:04:46 +01:00
This exception will be thrown when a video is requested
in a format that is not available for that video .
"""
pass
2012-03-25 03:07:37 +02:00
2016-10-17 13:38:37 +02:00
class ContentTooShortError ( YoutubeDLError ) :
2012-11-28 02:04:46 +01:00
""" Content Too Short exception.
2012-03-25 03:07:37 +02:00
2012-11-28 02:04:46 +01:00
This exception may be raised by FileDownloader objects when a file they
download is too small for what the server announced first , indicating
the connection was probably interrupted .
"""
2012-03-25 03:07:37 +02:00
2012-11-28 02:04:46 +01:00
def __init__ ( self , downloaded , expected ) :
2016-10-17 13:38:37 +02:00
super ( ContentTooShortError , self ) . __init__ (
' Downloaded {0} bytes, expected {1} bytes ' . format ( downloaded , expected )
)
2015-07-26 16:37:51 +02:00
# Both in bytes
2012-11-28 02:04:46 +01:00
self . downloaded = downloaded
self . expected = expected
2012-03-25 03:07:37 +02:00
2014-11-23 20:41:03 +01:00
2016-10-17 13:38:37 +02:00
class XAttrMetadataError ( YoutubeDLError ) :
2016-09-29 18:28:32 +02:00
def __init__ ( self , code = None , msg = ' Unknown error ' ) :
super ( XAttrMetadataError , self ) . __init__ ( msg )
self . code = code
2016-10-01 21:03:41 +02:00
self . msg = msg
2016-09-29 18:28:32 +02:00
# Parsing code and msg
2019-05-10 22:56:22 +02:00
if ( self . code in ( errno . ENOSPC , errno . EDQUOT )
2020-11-21 15:50:42 +01:00
or ' No space left ' in self . msg or ' Disk quota exceeded ' in self . msg ) :
2016-09-29 18:28:32 +02:00
self . reason = ' NO_SPACE '
elif self . code == errno . E2BIG or ' Argument list too long ' in self . msg :
self . reason = ' VALUE_TOO_LONG '
else :
self . reason = ' NOT_SUPPORTED '
2016-10-17 13:38:37 +02:00
class XAttrUnavailableError ( YoutubeDLError ) :
2016-09-29 18:28:32 +02:00
pass
2015-01-10 20:05:28 +01:00
def _create_http_connection ( ydl_handler , http_class , is_https , * args , * * kwargs ) :
2015-09-01 22:16:04 +02:00
# Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
# expected HTTP responses to meet HTTP/1.0 or later (see also
2019-03-09 13:14:41 +01:00
# https://github.com/ytdl-org/youtube-dl/issues/6727)
2015-09-01 22:16:04 +02:00
if sys . version_info < ( 3 , 0 ) :
2018-01-27 21:04:39 +01:00
kwargs [ ' strict ' ] = True
hc = http_class ( * args , * * compat_kwargs ( kwargs ) )
2015-01-10 19:55:36 +01:00
source_address = ydl_handler . _params . get ( ' source_address ' )
2018-03-17 01:11:47 +01:00
2015-01-10 19:55:36 +01:00
if source_address is not None :
2018-03-17 01:11:47 +01:00
# This is to workaround _create_connection() from socket where it will try all
# address data from getaddrinfo() including IPv6. This filters the result from
# getaddrinfo() based on the source_address value.
# This is based on the cpython socket.create_connection() function.
# https://github.com/python/cpython/blob/master/Lib/socket.py#L691
def _create_connection ( address , timeout = socket . _GLOBAL_DEFAULT_TIMEOUT , source_address = None ) :
host , port = address
err = None
addrs = socket . getaddrinfo ( host , port , 0 , socket . SOCK_STREAM )
2018-08-28 20:17:21 +02:00
af = socket . AF_INET if ' . ' in source_address [ 0 ] else socket . AF_INET6
ip_addrs = [ addr for addr in addrs if addr [ 0 ] == af ]
if addrs and not ip_addrs :
ip_version = ' v4 ' if af == socket . AF_INET else ' v6 '
raise socket . error (
" No remote IP %s addresses available for connect, can ' t use ' %s ' as source address "
% ( ip_version , source_address [ 0 ] ) )
2018-03-17 01:11:47 +01:00
for res in ip_addrs :
af , socktype , proto , canonname , sa = res
sock = None
try :
sock = socket . socket ( af , socktype , proto )
if timeout is not socket . _GLOBAL_DEFAULT_TIMEOUT :
sock . settimeout ( timeout )
sock . bind ( source_address )
sock . connect ( sa )
err = None # Explicitly break reference cycle
return sock
except socket . error as _ :
err = _
if sock is not None :
sock . close ( )
if err is not None :
raise err
else :
2018-08-28 20:17:21 +02:00
raise socket . error ( ' getaddrinfo returns an empty list ' )
if hasattr ( hc , ' _create_connection ' ) :
hc . _create_connection = _create_connection
2015-01-10 19:55:36 +01:00
sa = ( source_address , 0 )
if hasattr ( hc , ' source_address ' ) : # Python 2.7+
hc . source_address = sa
else : # Python 2.6
def _hc_connect ( self , * args , * * kwargs ) :
2018-08-28 20:17:21 +02:00
sock = _create_connection (
2015-01-10 19:55:36 +01:00
( self . host , self . port ) , self . timeout , sa )
if is_https :
2015-01-23 11:15:18 +01:00
self . sock = ssl . wrap_socket (
sock , self . key_file , self . cert_file ,
ssl_version = ssl . PROTOCOL_TLSv1 )
2015-01-10 19:55:36 +01:00
else :
self . sock = sock
hc . connect = functools . partial ( _hc_connect , hc )
return hc
2015-11-29 05:42:50 +01:00
def handle_youtubedl_headers ( headers ) :
2015-11-29 05:58:29 +01:00
filtered_headers = headers
if ' Youtubedl-no-compression ' in filtered_headers :
filtered_headers = dict ( ( k , v ) for k , v in filtered_headers . items ( ) if k . lower ( ) != ' accept-encoding ' )
2015-11-29 05:42:50 +01:00
del filtered_headers [ ' Youtubedl-no-compression ' ]
2015-11-29 05:58:29 +01:00
return filtered_headers
2015-11-29 05:42:50 +01:00
2013-08-27 23:15:01 +02:00
class YoutubeDLHandler ( compat_urllib_request . HTTPHandler ) :
2012-11-28 02:04:46 +01:00
""" Handler for HTTP requests and responses.
This class , when installed with an OpenerDirector , automatically adds
the standard headers to every HTTP request and handles gzipped and
deflated responses from web servers . If compression is to be avoided in
a particular request , the original request in the program code only has
2015-11-29 05:46:04 +01:00
to include the HTTP header " Youtubedl-no-compression " , which will be
2012-11-28 02:04:46 +01:00
removed before making the real request .
Part of this code was copied from :
http : / / techknack . net / python - urllib2 - handlers /
Andrew Rowls , the author of that code , agreed to release it to the
public domain .
"""
2015-01-10 19:55:36 +01:00
def __init__ ( self , params , * args , * * kwargs ) :
compat_urllib_request . HTTPHandler . __init__ ( self , * args , * * kwargs )
self . _params = params
def http_open ( self , req ) :
2016-04-23 15:30:06 +02:00
conn_class = compat_http_client . HTTPConnection
socks_proxy = req . headers . get ( ' Ytdl-socks-proxy ' )
if socks_proxy :
conn_class = make_socks_conn_class ( conn_class , socks_proxy )
del req . headers [ ' Ytdl-socks-proxy ' ]
2015-01-10 19:55:36 +01:00
return self . do_open ( functools . partial (
2016-04-23 15:30:06 +02:00
_create_http_connection , self , conn_class , False ) ,
2015-01-10 19:55:36 +01:00
req )
2012-11-28 02:04:46 +01:00
@staticmethod
def deflate ( data ) :
2021-02-11 17:01:34 +01:00
if not data :
return data
2012-11-28 02:04:46 +01:00
try :
return zlib . decompress ( data , - zlib . MAX_WBITS )
except zlib . error :
return zlib . decompress ( data )
2013-08-27 23:15:01 +02:00
def http_request ( self , req ) :
2015-08-06 18:01:01 +02:00
# According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
# always respected by websites, some tend to give out URLs with non percent-encoded
# non-ASCII characters (see telemb.py, ard.py [#3412])
# urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
# To work around aforementioned issue we will replace request's original URL with
# percent-encoded one
# Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
# the code of this workaround has been moved here from YoutubeDL.urlopen()
url = req . get_full_url ( )
url_escaped = escape_url ( url )
# Substitute URL if any change after escaping
if url != url_escaped :
2016-03-31 18:55:49 +02:00
req = update_Request ( req , url = url_escaped )
2015-08-06 18:01:01 +02:00
2014-08-26 11:51:48 +02:00
for h , v in std_headers . items ( ) :
2015-01-12 22:26:20 +01:00
# Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
# The dict keys are capitalized because of this bug by urllib
if h . capitalize ( ) not in req . headers :
2014-08-26 11:51:48 +02:00
req . add_header ( h , v )
2015-11-29 05:42:50 +01:00
req . headers = handle_youtubedl_headers ( req . headers )
2014-09-29 06:15:46 +02:00
if sys . version_info < ( 2 , 7 ) and ' # ' in req . get_full_url ( ) :
# Python 2.6 is brain-dead when it comes to fragments
req . _Request__original = req . _Request__original . partition ( ' # ' ) [ 0 ]
req . _Request__r_type = req . _Request__r_type . partition ( ' # ' ) [ 0 ]
2012-11-28 02:04:46 +01:00
return req
2013-08-27 23:15:01 +02:00
def http_response ( self , req , resp ) :
2012-11-28 02:04:46 +01:00
old_resp = resp
# gzip
if resp . headers . get ( ' Content-encoding ' , ' ' ) == ' gzip ' :
2013-08-28 11:57:13 +02:00
content = resp . read ( )
gz = gzip . GzipFile ( fileobj = io . BytesIO ( content ) , mode = ' rb ' )
try :
uncompressed = io . BytesIO ( gz . read ( ) )
except IOError as original_ioerror :
# There may be junk add the end of the file
# See http://stackoverflow.com/q/4928560/35070 for details
for i in range ( 1 , 1024 ) :
try :
gz = gzip . GzipFile ( fileobj = io . BytesIO ( content [ : - i ] ) , mode = ' rb ' )
uncompressed = io . BytesIO ( gz . read ( ) )
except IOError :
continue
break
else :
raise original_ioerror
2017-05-27 17:05:02 +02:00
resp = compat_urllib_request . addinfourl ( uncompressed , old_resp . headers , old_resp . url , old_resp . code )
2012-11-28 02:04:46 +01:00
resp . msg = old_resp . msg
[utils] Remove Content-encoding from headers after decompression
With cn_verification_proxy, our http_response() is called twice, one from
PerRequestProxyHandler.proxy_open() and another from normal
YoutubeDL.urlopen(). As a result, for proxies honoring Accept-Encoding, the
following bug occurs:
$ youtube-dl -vs --cn-verification-proxy https://secure.uku.im:993 "test:letv"
[debug] System config: []
[debug] User config: []
[debug] Command-line args: ['-vs', '--cn-verification-proxy', 'https://secure.uku.im:993', 'test:letv']
[debug] Encodings: locale UTF-8, fs utf-8, out UTF-8, pref UTF-8
[debug] youtube-dl version 2015.12.23
[debug] Git HEAD: 97f18fa
[debug] Python version 3.5.1 - Linux-4.3.3-1-ARCH-x86_64-with-arch-Arch-Linux
[debug] exe versions: ffmpeg 2.8.4, ffprobe 2.8.4, rtmpdump 2.4
[debug] Proxy map: {}
[TestURL] Test URL: http://www.letv.com/ptv/vplay/22005890.html
[Letv] 22005890: Downloading webpage
[Letv] 22005890: Downloading playJson data
ERROR: Unable to download JSON metadata: Not a gzipped file (b'{"') (caused by OSError('Not a gzipped file (b\'{"\')',)); please report this issue on https://yt-dl.org/bug . Make sure you are using the latest version; see https://yt-dl.org/update on how to update. Be sure to call youtube-dl with the --verbose flag and include its complete output.
File "/home/yen/Executables/Multimedia/youtube-dl/youtube_dl/extractor/common.py", line 330, in _request_webpage
return self._downloader.urlopen(url_or_request)
File "/home/yen/Executables/Multimedia/youtube-dl/youtube_dl/YoutubeDL.py", line 1886, in urlopen
return self._opener.open(req, timeout=self._socket_timeout)
File "/usr/lib/python3.5/urllib/request.py", line 471, in open
response = meth(req, response)
File "/home/yen/Executables/Multimedia/youtube-dl/youtube_dl/utils.py", line 773, in http_response
raise original_ioerror
File "/home/yen/Executables/Multimedia/youtube-dl/youtube_dl/utils.py", line 761, in http_response
uncompressed = io.BytesIO(gz.read())
File "/usr/lib/python3.5/gzip.py", line 274, in read
return self._buffer.read(size)
File "/usr/lib/python3.5/gzip.py", line 461, in read
if not self._read_gzip_header():
File "/usr/lib/python3.5/gzip.py", line 409, in _read_gzip_header
raise OSError('Not a gzipped file (%r)' % magic)
2015-12-27 18:09:18 +01:00
del resp . headers [ ' Content-encoding ' ]
2012-11-28 02:04:46 +01:00
# deflate
if resp . headers . get ( ' Content-encoding ' , ' ' ) == ' deflate ' :
gz = io . BytesIO ( self . deflate ( resp . read ( ) ) )
2017-05-27 17:05:02 +02:00
resp = compat_urllib_request . addinfourl ( gz , old_resp . headers , old_resp . url , old_resp . code )
2012-11-28 02:04:46 +01:00
resp . msg = old_resp . msg
[utils] Remove Content-encoding from headers after decompression
With cn_verification_proxy, our http_response() is called twice, one from
PerRequestProxyHandler.proxy_open() and another from normal
YoutubeDL.urlopen(). As a result, for proxies honoring Accept-Encoding, the
following bug occurs:
$ youtube-dl -vs --cn-verification-proxy https://secure.uku.im:993 "test:letv"
[debug] System config: []
[debug] User config: []
[debug] Command-line args: ['-vs', '--cn-verification-proxy', 'https://secure.uku.im:993', 'test:letv']
[debug] Encodings: locale UTF-8, fs utf-8, out UTF-8, pref UTF-8
[debug] youtube-dl version 2015.12.23
[debug] Git HEAD: 97f18fa
[debug] Python version 3.5.1 - Linux-4.3.3-1-ARCH-x86_64-with-arch-Arch-Linux
[debug] exe versions: ffmpeg 2.8.4, ffprobe 2.8.4, rtmpdump 2.4
[debug] Proxy map: {}
[TestURL] Test URL: http://www.letv.com/ptv/vplay/22005890.html
[Letv] 22005890: Downloading webpage
[Letv] 22005890: Downloading playJson data
ERROR: Unable to download JSON metadata: Not a gzipped file (b'{"') (caused by OSError('Not a gzipped file (b\'{"\')',)); please report this issue on https://yt-dl.org/bug . Make sure you are using the latest version; see https://yt-dl.org/update on how to update. Be sure to call youtube-dl with the --verbose flag and include its complete output.
File "/home/yen/Executables/Multimedia/youtube-dl/youtube_dl/extractor/common.py", line 330, in _request_webpage
return self._downloader.urlopen(url_or_request)
File "/home/yen/Executables/Multimedia/youtube-dl/youtube_dl/YoutubeDL.py", line 1886, in urlopen
return self._opener.open(req, timeout=self._socket_timeout)
File "/usr/lib/python3.5/urllib/request.py", line 471, in open
response = meth(req, response)
File "/home/yen/Executables/Multimedia/youtube-dl/youtube_dl/utils.py", line 773, in http_response
raise original_ioerror
File "/home/yen/Executables/Multimedia/youtube-dl/youtube_dl/utils.py", line 761, in http_response
uncompressed = io.BytesIO(gz.read())
File "/usr/lib/python3.5/gzip.py", line 274, in read
return self._buffer.read(size)
File "/usr/lib/python3.5/gzip.py", line 461, in read
if not self._read_gzip_header():
File "/usr/lib/python3.5/gzip.py", line 409, in _read_gzip_header
raise OSError('Not a gzipped file (%r)' % magic)
2015-12-27 18:09:18 +01:00
del resp . headers [ ' Content-encoding ' ]
2015-09-06 02:23:44 +02:00
# Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2019-03-09 13:14:41 +01:00
# https://github.com/ytdl-org/youtube-dl/issues/6457).
2015-08-06 21:26:40 +02:00
if 300 < = resp . code < 400 :
location = resp . headers . get ( ' Location ' )
if location :
# As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
if sys . version_info > = ( 3 , 0 ) :
location = location . encode ( ' iso-8859-1 ' ) . decode ( ' utf-8 ' )
2016-05-26 11:22:40 +02:00
else :
location = location . decode ( ' utf-8 ' )
2015-08-06 21:26:40 +02:00
location_escaped = escape_url ( location )
if location != location_escaped :
del resp . headers [ ' Location ' ]
2016-05-29 13:25:25 +02:00
if sys . version_info < ( 3 , 0 ) :
location_escaped = location_escaped . encode ( ' utf-8 ' )
2015-08-06 21:26:40 +02:00
resp . headers [ ' Location ' ] = location_escaped
2012-11-28 02:04:46 +01:00
return resp
2012-12-07 00:39:44 +01:00
2013-08-27 23:15:01 +02:00
https_request = http_request
https_response = http_response
2013-04-27 15:14:20 +02:00
2014-02-06 11:29:46 +01:00
2016-04-23 15:30:06 +02:00
def make_socks_conn_class ( base_class , socks_proxy ) :
assert issubclass ( base_class , (
compat_http_client . HTTPConnection , compat_http_client . HTTPSConnection ) )
url_components = compat_urlparse . urlparse ( socks_proxy )
if url_components . scheme . lower ( ) == ' socks5 ' :
socks_type = ProxyType . SOCKS5
elif url_components . scheme . lower ( ) in ( ' socks ' , ' socks4 ' ) :
socks_type = ProxyType . SOCKS4
2016-05-03 09:15:32 +02:00
elif url_components . scheme . lower ( ) == ' socks4a ' :
socks_type = ProxyType . SOCKS4A
2016-04-23 15:30:06 +02:00
2016-05-17 08:38:15 +02:00
def unquote_if_non_empty ( s ) :
if not s :
return s
return compat_urllib_parse_unquote_plus ( s )
2016-04-23 15:30:06 +02:00
proxy_args = (
socks_type ,
url_components . hostname , url_components . port or 1080 ,
True , # Remote DNS
2016-05-17 08:38:15 +02:00
unquote_if_non_empty ( url_components . username ) ,
unquote_if_non_empty ( url_components . password ) ,
2016-04-23 15:30:06 +02:00
)
class SocksConnection ( base_class ) :
def connect ( self ) :
self . sock = sockssocket ( )
self . sock . setproxy ( * proxy_args )
if type ( self . timeout ) in ( int , float ) :
self . sock . settimeout ( self . timeout )
self . sock . connect ( ( self . host , self . port ) )
if isinstance ( self , compat_http_client . HTTPSConnection ) :
if hasattr ( self , ' _context ' ) : # Python > 2.6
self . sock = self . _context . wrap_socket (
self . sock , server_hostname = self . host )
else :
self . sock = ssl . wrap_socket ( self . sock )
return SocksConnection
2015-01-10 19:55:36 +01:00
class YoutubeDLHTTPSHandler ( compat_urllib_request . HTTPSHandler ) :
def __init__ ( self , params , https_conn_class = None , * args , * * kwargs ) :
compat_urllib_request . HTTPSHandler . __init__ ( self , * args , * * kwargs )
self . _https_conn_class = https_conn_class or compat_http_client . HTTPSConnection
self . _params = params
def https_open ( self , req ) :
2015-01-29 13:37:17 +01:00
kwargs = { }
2016-04-23 15:30:06 +02:00
conn_class = self . _https_conn_class
2015-01-29 13:37:17 +01:00
if hasattr ( self , ' _context ' ) : # python > 2.6
kwargs [ ' context ' ] = self . _context
if hasattr ( self , ' _check_hostname ' ) : # python 3.x
kwargs [ ' check_hostname ' ] = self . _check_hostname
2016-04-23 15:30:06 +02:00
socks_proxy = req . headers . get ( ' Ytdl-socks-proxy ' )
if socks_proxy :
conn_class = make_socks_conn_class ( conn_class , socks_proxy )
del req . headers [ ' Ytdl-socks-proxy ' ]
2015-01-10 19:55:36 +01:00
return self . do_open ( functools . partial (
2016-04-23 15:30:06 +02:00
_create_http_connection , self , conn_class , True ) ,
2015-01-29 13:37:17 +01:00
req , * * kwargs )
2015-01-10 19:55:36 +01:00
2018-12-09 00:00:32 +01:00
class YoutubeDLCookieJar ( compat_cookiejar . MozillaCookieJar ) :
2020-03-09 22:59:02 +01:00
"""
See [ 1 ] for cookie file format .
1. https : / / curl . haxx . se / docs / http - cookies . html
"""
2019-03-03 13:23:59 +01:00
_HTTPONLY_PREFIX = ' #HttpOnly_ '
2020-05-04 23:19:33 +02:00
_ENTRY_LEN = 7
_HEADER = ''' # Netscape HTTP Cookie File
2021-02-24 19:45:56 +01:00
# This file is generated by yt-dlp. Do not edit.
2020-05-04 23:19:33 +02:00
'''
_CookieFileEntry = collections . namedtuple (
' CookieFileEntry ' ,
( ' domain_name ' , ' include_subdomains ' , ' path ' , ' https_only ' , ' expires_at ' , ' name ' , ' value ' ) )
2019-03-03 13:23:59 +01:00
2018-12-09 00:00:32 +01:00
def save ( self , filename = None , ignore_discard = False , ignore_expires = False ) :
2020-05-04 23:19:33 +02:00
"""
Save cookies to a file .
Most of the code is taken from CPython 3.8 and slightly adapted
to support cookie files with UTF - 8 in both python 2 and 3.
"""
if filename is None :
if self . filename is not None :
filename = self . filename
else :
raise ValueError ( compat_cookiejar . MISSING_FILENAME_TEXT )
2018-12-09 00:00:32 +01:00
# Store session cookies with `expires` set to 0 instead of an empty
# string
for cookie in self :
if cookie . expires is None :
cookie . expires = 0
2020-05-04 23:19:33 +02:00
with io . open ( filename , ' w ' , encoding = ' utf-8 ' ) as f :
f . write ( self . _HEADER )
now = time . time ( )
for cookie in self :
if not ignore_discard and cookie . discard :
continue
if not ignore_expires and cookie . is_expired ( now ) :
continue
if cookie . secure :
secure = ' TRUE '
else :
secure = ' FALSE '
if cookie . domain . startswith ( ' . ' ) :
initial_dot = ' TRUE '
else :
initial_dot = ' FALSE '
if cookie . expires is not None :
expires = compat_str ( cookie . expires )
else :
expires = ' '
if cookie . value is None :
# cookies.txt regards 'Set-Cookie: foo' as a cookie
# with no name, whereas http.cookiejar regards it as a
# cookie with no value.
name = ' '
value = cookie . name
else :
name = cookie . name
value = cookie . value
f . write (
' \t ' . join ( [ cookie . domain , initial_dot , cookie . path ,
secure , expires , name , value ] ) + ' \n ' )
2018-12-09 00:00:32 +01:00
def load ( self , filename = None , ignore_discard = False , ignore_expires = False ) :
2019-03-03 13:23:59 +01:00
""" Load cookies from a file. """
if filename is None :
if self . filename is not None :
filename = self . filename
else :
raise ValueError ( compat_cookiejar . MISSING_FILENAME_TEXT )
2020-05-04 23:19:33 +02:00
def prepare_line ( line ) :
if line . startswith ( self . _HTTPONLY_PREFIX ) :
line = line [ len ( self . _HTTPONLY_PREFIX ) : ]
# comments and empty lines are fine
if line . startswith ( ' # ' ) or not line . strip ( ) :
return line
cookie_list = line . split ( ' \t ' )
if len ( cookie_list ) != self . _ENTRY_LEN :
raise compat_cookiejar . LoadError ( ' invalid length %d ' % len ( cookie_list ) )
cookie = self . _CookieFileEntry ( * cookie_list )
if cookie . expires_at and not cookie . expires_at . isdigit ( ) :
raise compat_cookiejar . LoadError ( ' invalid expires at %s ' % cookie . expires_at )
return line
2019-03-03 13:23:59 +01:00
cf = io . StringIO ( )
2020-05-04 23:19:33 +02:00
with io . open ( filename , encoding = ' utf-8 ' ) as f :
2019-03-03 13:23:59 +01:00
for line in f :
2020-05-04 23:19:33 +02:00
try :
cf . write ( prepare_line ( line ) )
except compat_cookiejar . LoadError as e :
write_string (
' WARNING: skipping cookie file entry due to %s : %r \n '
% ( e , line ) , sys . stderr )
continue
2019-03-03 13:23:59 +01:00
cf . seek ( 0 )
self . _really_load ( cf , filename , ignore_discard , ignore_expires )
2018-12-09 00:00:32 +01:00
# Session cookies are denoted by either `expires` field set to
# an empty string or 0. MozillaCookieJar only recognizes the former
# (see [1]). So we need force the latter to be recognized as session
# cookies on our own.
# Session cookies may be important for cookies-based authentication,
# e.g. usually, when user does not check 'Remember me' check box while
# logging in on a site, some important cookies are stored as session
# cookies so that not recognizing them will result in failed login.
# 1. https://bugs.python.org/issue17164
for cookie in self :
# Treat `expires=0` cookies as session cookies
if cookie . expires == 0 :
cookie . expires = None
cookie . discard = True
2015-09-06 02:20:48 +02:00
class YoutubeDLCookieProcessor ( compat_urllib_request . HTTPCookieProcessor ) :
def __init__ ( self , cookiejar = None ) :
compat_urllib_request . HTTPCookieProcessor . __init__ ( self , cookiejar )
def http_response ( self , request , response ) :
# Python 2 will choke on next HTTP request in row if there are non-ASCII
# characters in Set-Cookie HTTP header of last response (see
2019-03-09 13:14:41 +01:00
# https://github.com/ytdl-org/youtube-dl/issues/6769).
2015-09-06 02:20:48 +02:00
# In order to at least prevent crashing we will percent encode Set-Cookie
# header before HTTPCookieProcessor starts processing it.
2015-09-06 04:16:39 +02:00
# if sys.version_info < (3, 0) and response.headers:
# for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
# set_cookie = response.headers.get(set_cookie_header)
# if set_cookie:
# set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
# if set_cookie != set_cookie_escaped:
# del response.headers[set_cookie_header]
# response.headers[set_cookie_header] = set_cookie_escaped
2015-09-06 02:20:48 +02:00
return compat_urllib_request . HTTPCookieProcessor . http_response ( self , request , response )
2021-04-29 01:19:44 +02:00
https_request = compat_urllib_request . HTTPCookieProcessor . http_request
2015-09-06 02:20:48 +02:00
https_response = http_response
2020-02-29 13:08:44 +01:00
class YoutubeDLRedirectHandler ( compat_urllib_request . HTTPRedirectHandler ) :
2021-04-17 05:02:33 +02:00
""" YoutubeDL redirect handler
The code is based on HTTPRedirectHandler implementation from CPython [ 1 ] .
This redirect handler solves two issues :
- ensures redirect URL is always unicode under python 2
- introduces support for experimental HTTP response status code
308 Permanent Redirect [ 2 ] used by some sites [ 3 ]
1. https : / / github . com / python / cpython / blob / master / Lib / urllib / request . py
2. https : / / developer . mozilla . org / en - US / docs / Web / HTTP / Status / 308
3. https : / / github . com / ytdl - org / youtube - dl / issues / 28768
"""
http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request . HTTPRedirectHandler . http_error_302
def redirect_request ( self , req , fp , code , msg , headers , newurl ) :
""" Return a Request or None in response to a redirect.
This is called by the http_error_30x methods when a
redirection response is received . If a redirection should
take place , return a new Request to allow http_error_30x to
perform the redirect . Otherwise , raise HTTPError if no - one
else should try to handle this url . Return None if you can ' t
but another Handler might .
"""
m = req . get_method ( )
if ( not ( code in ( 301 , 302 , 303 , 307 , 308 ) and m in ( " GET " , " HEAD " )
or code in ( 301 , 302 , 303 ) and m == " POST " ) ) :
raise compat_HTTPError ( req . full_url , code , msg , headers , fp )
# Strictly (according to RFC 2616), 301 or 302 in response to
# a POST MUST NOT cause a redirection without confirmation
# from the user (of urllib.request, in this case). In practice,
# essentially all clients do redirect in this case, so we do
# the same.
# On python 2 urlh.geturl() may sometimes return redirect URL
# as byte string instead of unicode. This workaround allows
# to force it always return unicode.
if sys . version_info [ 0 ] < 3 :
newurl = compat_str ( newurl )
# Be conciliant with URIs containing a space. This is mainly
# redundant with the more complete encoding done in http_error_302(),
# but it is kept for compatibility with other callers.
newurl = newurl . replace ( ' ' , ' % 20 ' )
CONTENT_HEADERS = ( " content-length " , " content-type " )
# NB: don't use dict comprehension for python 2.6 compatibility
newheaders = dict ( ( k , v ) for k , v in req . headers . items ( )
if k . lower ( ) not in CONTENT_HEADERS )
return compat_urllib_request . Request (
newurl , headers = newheaders , origin_req_host = req . origin_req_host ,
unverifiable = True )
2020-02-29 13:08:44 +01:00
2016-06-25 17:30:35 +02:00
def extract_timezone ( date_str ) :
m = re . search (
r ' ^. { 8,}?(?P<tz>Z$| ?(?P<sign> \ +|-)(?P<hours>[0-9] {2} ):?(?P<minutes>[0-9] {2} )$) ' ,
date_str )
if not m :
timezone = datetime . timedelta ( )
else :
date_str = date_str [ : - len ( m . group ( ' tz ' ) ) ]
if not m . group ( ' sign ' ) :
timezone = datetime . timedelta ( )
else :
sign = 1 if m . group ( ' sign ' ) == ' + ' else - 1
timezone = datetime . timedelta (
hours = sign * int ( m . group ( ' hours ' ) ) ,
minutes = sign * int ( m . group ( ' minutes ' ) ) )
return timezone , date_str
2015-02-12 08:55:06 +01:00
def parse_iso8601 ( date_str , delimiter = ' T ' , timezone = None ) :
2014-03-24 01:40:09 +01:00
""" Return a UNIX timestamp from the given date """
if date_str is None :
return None
2015-10-28 16:40:22 +01:00
date_str = re . sub ( r ' \ .[0-9]+ ' , ' ' , date_str )
2015-02-12 08:55:06 +01:00
if timezone is None :
2016-06-25 17:30:35 +02:00
timezone , date_str = extract_timezone ( date_str )
2015-10-28 16:40:22 +01:00
try :
date_format = ' % Y- % m- %d {0} % H: % M: % S ' . format ( delimiter )
dt = datetime . datetime . strptime ( date_str , date_format ) - timezone
return calendar . timegm ( dt . timetuple ( ) )
except ValueError :
pass
2014-03-24 01:40:09 +01:00
2016-06-25 17:30:35 +02:00
def date_formats ( day_first = True ) :
return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
2014-12-12 02:57:36 +01:00
def unified_strdate ( date_str , day_first = True ) :
2013-04-27 15:14:20 +02:00
""" Return a string with the date in the format YYYYMMDD """
2014-03-21 14:38:37 +01:00
if date_str is None :
return None
2013-04-27 15:14:20 +02:00
upload_date = None
2014-11-23 20:41:03 +01:00
# Replace commas
2014-02-09 18:09:57 +01:00
date_str = date_str . replace ( ' , ' , ' ' )
2014-12-12 02:57:36 +01:00
# Remove AM/PM + timezone
2015-02-03 10:58:28 +01:00
date_str = re . sub ( r ' (?i) \ s*(?:AM|PM)(?: \ s+[A-Z]+)? ' , ' ' , date_str )
2016-06-25 17:30:35 +02:00
_ , date_str = extract_timezone ( date_str )
2014-12-12 02:57:36 +01:00
2016-06-25 17:30:35 +02:00
for expression in date_formats ( day_first ) :
2013-04-27 15:14:20 +02:00
try :
upload_date = datetime . datetime . strptime ( date_str , expression ) . strftime ( ' % Y % m %d ' )
2014-02-06 11:29:46 +01:00
except ValueError :
2013-04-27 15:14:20 +02:00
pass
2013-12-17 12:33:55 +01:00
if upload_date is None :
timetuple = email . utils . parsedate_tz ( date_str )
if timetuple :
2016-05-22 04:28:41 +02:00
try :
upload_date = datetime . datetime ( * timetuple [ : 6 ] ) . strftime ( ' % Y % m %d ' )
except ValueError :
pass
2015-11-02 14:08:38 +01:00
if upload_date is not None :
return compat_str ( upload_date )
2013-04-27 15:14:20 +02:00
2014-11-23 20:41:03 +01:00
2016-06-25 17:30:35 +02:00
def unified_timestamp ( date_str , day_first = True ) :
if date_str is None :
return None
2017-06-11 16:27:22 +02:00
date_str = re . sub ( r ' [,|] ' , ' ' , date_str )
2016-06-25 17:30:35 +02:00
2016-08-05 05:41:55 +02:00
pm_delta = 12 if re . search ( r ' (?i)PM ' , date_str ) else 0
2016-06-25 17:30:35 +02:00
timezone , date_str = extract_timezone ( date_str )
# Remove AM/PM + timezone
date_str = re . sub ( r ' (?i) \ s*(?:AM|PM)(?: \ s+[A-Z]+)? ' , ' ' , date_str )
2017-04-30 16:07:30 +02:00
# Remove unrecognized timezones from ISO 8601 alike timestamps
m = re . search ( r ' \ d { 1,2}: \ d { 1,2}(?: \ . \ d+)?(?P<tz> \ s*[A-Z]+)$ ' , date_str )
if m :
date_str = date_str [ : - len ( m . group ( ' tz ' ) ) ]
2018-03-14 01:28:40 +01:00
# Python only supports microseconds, so remove nanoseconds
m = re . search ( r ' ^([0-9] { 4,}-[0-9] { 1,2}-[0-9] { 1,2}T[0-9] { 1,2}:[0-9] { 1,2}:[0-9] { 1,2} \ .[0-9] {6} )[0-9]+$ ' , date_str )
if m :
date_str = m . group ( 1 )
2016-06-25 17:30:35 +02:00
for expression in date_formats ( day_first ) :
try :
2016-08-05 05:41:55 +02:00
dt = datetime . datetime . strptime ( date_str , expression ) - timezone + datetime . timedelta ( hours = pm_delta )
2016-06-25 17:30:35 +02:00
return calendar . timegm ( dt . timetuple ( ) )
except ValueError :
pass
timetuple = email . utils . parsedate_tz ( date_str )
if timetuple :
2016-08-05 05:41:55 +02:00
return calendar . timegm ( timetuple ) + pm_delta * 3600
2016-06-25 17:30:35 +02:00
2014-11-17 07:16:12 +01:00
def determine_ext ( url , default_ext = ' unknown_video ' ) :
2018-06-01 19:16:22 +02:00
if url is None or ' . ' not in url :
2014-08-01 14:08:09 +02:00
return default_ext
2015-11-22 12:27:13 +01:00
guess = url . partition ( ' ? ' ) [ 0 ] . rpartition ( ' . ' ) [ 2 ]
2013-07-08 01:13:55 +02:00
if re . match ( r ' ^[A-Za-z0-9]+$ ' , guess ) :
return guess
2016-01-03 20:08:34 +01:00
# Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
elif guess . rstrip ( ' / ' ) in KNOWN_EXTENSIONS :
2015-11-22 12:27:13 +01:00
return guess . rstrip ( ' / ' )
2013-07-08 01:13:55 +02:00
else :
2013-07-12 21:52:59 +02:00
return default_ext
2013-07-08 01:13:55 +02:00
2014-11-23 20:41:03 +01:00
2019-10-17 23:03:53 +02:00
def subtitles_filename ( filename , sub_lang , sub_format , expected_real_ext = None ) :
return replace_extension ( filename , sub_lang + ' . ' + sub_format , expected_real_ext )
2013-07-20 12:48:57 +02:00
2014-11-23 20:41:03 +01:00
2021-04-06 08:45:15 +02:00
def datetime_from_str ( date_str , precision = ' auto ' , format = ' % Y % m %d ' ) :
2013-04-28 11:39:37 +02:00
"""
Return a datetime object from a string in the format YYYYMMDD or
2021-04-06 08:45:15 +02:00
( now | today | date ) [ + - ] [ 0 - 9 ] ( microsecond | second | minute | hour | day | week | month | year ) ( s ) ?
format : string date format used to return datetime object from
precision : round the time portion of a datetime object .
auto | microsecond | second | minute | hour | day .
auto : round to the unit provided in date_str ( if applicable ) .
"""
auto_precision = False
if precision == ' auto ' :
auto_precision = True
precision = ' microsecond '
today = datetime_round ( datetime . datetime . now ( ) , precision )
2014-12-11 10:29:30 +01:00
if date_str in ( ' now ' , ' today ' ) :
2013-04-28 11:39:37 +02:00
return today
2014-12-11 10:29:30 +01:00
if date_str == ' yesterday ' :
return today - datetime . timedelta ( days = 1 )
2021-04-06 08:45:15 +02:00
match = re . match (
r ' (?P<start>.+)(?P<sign>[+-])(?P<time> \ d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)? ' ,
date_str )
2013-04-28 11:39:37 +02:00
if match is not None :
2021-04-06 08:45:15 +02:00
start_time = datetime_from_str ( match . group ( ' start ' ) , precision , format )
time = int ( match . group ( ' time ' ) ) * ( - 1 if match . group ( ' sign ' ) == ' - ' else 1 )
2013-04-28 11:39:37 +02:00
unit = match . group ( ' unit ' )
2021-04-06 08:45:15 +02:00
if unit == ' month ' or unit == ' year ' :
new_date = datetime_add_months ( start_time , time * 12 if unit == ' year ' else time )
2013-04-28 11:39:37 +02:00
unit = ' day '
2021-04-06 08:45:15 +02:00
else :
if unit == ' week ' :
unit = ' day '
time * = 7
delta = datetime . timedelta ( * * { unit + ' s ' : time } )
new_date = start_time + delta
if auto_precision :
return datetime_round ( new_date , unit )
return new_date
return datetime_round ( datetime . datetime . strptime ( date_str , format ) , precision )
def date_from_str ( date_str , format = ' % Y % m %d ' ) :
"""
Return a datetime object from a string in the format YYYYMMDD or
( now | today | date ) [ + - ] [ 0 - 9 ] ( microsecond | second | minute | hour | day | week | month | year ) ( s ) ?
format : string date format used to return datetime object from
"""
return datetime_from_str ( date_str , precision = ' microsecond ' , format = format ) . date ( )
def datetime_add_months ( dt , months ) :
""" Increment/Decrement a datetime object by months. """
month = dt . month + months - 1
year = dt . year + month / / 12
month = month % 12 + 1
day = min ( dt . day , calendar . monthrange ( year , month ) [ 1 ] )
return dt . replace ( year , month , day )
def datetime_round ( dt , precision = ' day ' ) :
"""
Round a datetime object ' s time to a specific precision
"""
if precision == ' microsecond ' :
return dt
unit_seconds = {
' day ' : 86400 ,
' hour ' : 3600 ,
' minute ' : 60 ,
' second ' : 1 ,
}
roundto = lambda x , n : ( ( x + n / 2 ) / / n ) * n
timestamp = calendar . timegm ( dt . timetuple ( ) )
return datetime . datetime . utcfromtimestamp ( roundto ( timestamp , unit_seconds [ precision ] ) )
2014-11-23 20:41:03 +01:00
2014-01-02 13:47:28 +01:00
def hyphenate_date ( date_str ) :
"""
Convert a date in ' YYYYMMDD ' format to ' YYYY-MM-DD ' format """
match = re . match ( r ' ^( \ d \ d \ d \ d)( \ d \ d)( \ d \ d)$ ' , date_str )
if match is not None :
return ' - ' . join ( match . groups ( ) )
else :
return date_str
2014-11-23 20:41:03 +01:00
2013-04-27 14:01:55 +02:00
class DateRange ( object ) :
""" Represents a time interval between two dates """
2014-11-23 20:41:03 +01:00
2013-04-27 14:01:55 +02:00
def __init__ ( self , start = None , end = None ) :
""" start and end must be strings in the format accepted by date """
if start is not None :
self . start = date_from_str ( start )
else :
self . start = datetime . datetime . min . date ( )
if end is not None :
self . end = date_from_str ( end )
else :
self . end = datetime . datetime . max . date ( )
2013-04-28 11:39:37 +02:00
if self . start > self . end :
2013-04-27 14:01:55 +02:00
raise ValueError ( ' Date range: " %s " , the start date must be before the end date ' % self )
2014-11-23 20:41:03 +01:00
2013-04-27 14:01:55 +02:00
@classmethod
def day ( cls , day ) :
""" Returns a range that only contains the given day """
2014-11-23 20:41:03 +01:00
return cls ( day , day )
2013-04-27 14:01:55 +02:00
def __contains__ ( self , date ) :
""" Check if the date is in the range """
2013-04-28 11:39:37 +02:00
if not isinstance ( date , datetime . date ) :
date = date_from_str ( date )
return self . start < = date < = self . end
2014-11-23 20:41:03 +01:00
2013-04-27 14:01:55 +02:00
def __str__ ( self ) :
2014-11-23 20:41:03 +01:00
return ' %s - %s ' % ( self . start . isoformat ( ) , self . end . isoformat ( ) )
2013-08-28 12:57:10 +02:00
def platform_name ( ) :
""" Returns the platform name as a compat_str """
res = platform . platform ( )
if isinstance ( res , bytes ) :
res = res . decode ( preferredencoding ( ) )
assert isinstance ( res , compat_str )
return res
2013-08-28 18:22:28 +02:00
2014-04-07 22:48:13 +02:00
def _windows_write_string ( s , out ) :
""" Returns True if the string was written using special methods,
False if it has yet to be written out . """
# Adapted from http://stackoverflow.com/a/3259271/35070
import ctypes
import ctypes . wintypes
WIN_OUTPUT_IDS = {
1 : - 11 ,
2 : - 12 ,
}
2014-04-30 10:07:32 +02:00
try :
fileno = out . fileno ( )
except AttributeError :
# If the output stream doesn't have a fileno, it's virtual
return False
2015-01-23 12:17:12 +01:00
except io . UnsupportedOperation :
# Some strange Windows pseudo files?
return False
2014-04-07 22:48:13 +02:00
if fileno not in WIN_OUTPUT_IDS :
return False
2014-12-12 04:01:08 +01:00
GetStdHandle = compat_ctypes_WINFUNCTYPE (
2014-04-07 22:48:13 +02:00
ctypes . wintypes . HANDLE , ctypes . wintypes . DWORD ) (
2014-12-12 04:01:08 +01:00
( ' GetStdHandle ' , ctypes . windll . kernel32 ) )
2014-04-07 22:48:13 +02:00
h = GetStdHandle ( WIN_OUTPUT_IDS [ fileno ] )
2014-12-12 04:01:08 +01:00
WriteConsoleW = compat_ctypes_WINFUNCTYPE (
2014-04-07 22:48:13 +02:00
ctypes . wintypes . BOOL , ctypes . wintypes . HANDLE , ctypes . wintypes . LPWSTR ,
ctypes . wintypes . DWORD , ctypes . POINTER ( ctypes . wintypes . DWORD ) ,
2014-12-12 04:01:08 +01:00
ctypes . wintypes . LPVOID ) ( ( ' WriteConsoleW ' , ctypes . windll . kernel32 ) )
2014-04-07 22:48:13 +02:00
written = ctypes . wintypes . DWORD ( 0 )
2014-12-12 04:01:08 +01:00
GetFileType = compat_ctypes_WINFUNCTYPE ( ctypes . wintypes . DWORD , ctypes . wintypes . DWORD ) ( ( ' GetFileType ' , ctypes . windll . kernel32 ) )
2014-04-07 22:48:13 +02:00
FILE_TYPE_CHAR = 0x0002
FILE_TYPE_REMOTE = 0x8000
2014-12-12 04:01:08 +01:00
GetConsoleMode = compat_ctypes_WINFUNCTYPE (
2014-04-07 22:48:13 +02:00
ctypes . wintypes . BOOL , ctypes . wintypes . HANDLE ,
ctypes . POINTER ( ctypes . wintypes . DWORD ) ) (
2014-12-12 04:01:08 +01:00
( ' GetConsoleMode ' , ctypes . windll . kernel32 ) )
2014-04-07 22:48:13 +02:00
INVALID_HANDLE_VALUE = ctypes . wintypes . DWORD ( - 1 ) . value
def not_a_console ( handle ) :
if handle == INVALID_HANDLE_VALUE or handle is None :
return True
2019-05-10 22:56:22 +02:00
return ( ( GetFileType ( handle ) & ~ FILE_TYPE_REMOTE ) != FILE_TYPE_CHAR
or GetConsoleMode ( handle , ctypes . byref ( ctypes . wintypes . DWORD ( ) ) ) == 0 )
2014-04-07 22:48:13 +02:00
if not_a_console ( h ) :
return False
2014-04-21 04:59:44 +02:00
def next_nonbmp_pos ( s ) :
try :
return next ( i for i , c in enumerate ( s ) if ord ( c ) > 0xffff )
except StopIteration :
return len ( s )
while s :
count = min ( next_nonbmp_pos ( s ) , 1024 )
2014-04-07 22:48:13 +02:00
ret = WriteConsoleW (
2014-04-21 04:59:44 +02:00
h , s , count if count else 2 , ctypes . byref ( written ) , None )
2014-04-07 22:48:13 +02:00
if ret == 0 :
raise OSError ( ' Failed to write string ' )
2014-04-21 04:59:44 +02:00
if not count : # We just wrote a non-BMP character
assert written . value == 2
s = s [ 1 : ]
else :
assert written . value > 0
s = s [ written . value : ]
2014-04-07 22:48:13 +02:00
return True
2014-04-07 19:57:42 +02:00
def write_string ( s , out = None , encoding = None ) :
2013-09-16 06:55:33 +02:00
if out is None :
out = sys . stderr
2014-01-05 03:07:55 +01:00
assert type ( s ) == compat_str
2013-09-16 06:55:33 +02:00
2014-04-07 22:48:13 +02:00
if sys . platform == ' win32 ' and encoding is None and hasattr ( out , ' fileno ' ) :
if _windows_write_string ( s , out ) :
return
2019-05-10 22:56:22 +02:00
if ( ' b ' in getattr ( out , ' mode ' , ' ' )
or sys . version_info [ 0 ] < 3 ) : # Python 2 lies about mode of sys.stderr
2014-04-07 21:40:34 +02:00
byt = s . encode ( encoding or preferredencoding ( ) , ' ignore ' )
out . write ( byt )
elif hasattr ( out , ' buffer ' ) :
enc = encoding or getattr ( out , ' encoding ' , None ) or preferredencoding ( )
byt = s . encode ( enc , ' ignore ' )
out . buffer . write ( byt )
else :
2014-01-05 03:07:55 +01:00
out . write ( s )
2013-09-16 06:55:33 +02:00
out . flush ( )
2013-08-28 14:28:55 +02:00
def bytes_to_intlist ( bs ) :
if not bs :
return [ ]
if isinstance ( bs [ 0 ] , int ) : # Python 3
return list ( bs )
else :
return [ ord ( c ) for c in bs ]
2013-08-28 18:22:28 +02:00
2013-08-28 15:59:07 +02:00
def intlist_to_bytes ( xs ) :
if not xs :
return b ' '
2016-05-03 10:50:16 +02:00
return compat_struct_pack ( ' %d B ' % len ( xs ) , * xs )
2013-10-02 08:41:03 +02:00
2013-10-06 04:27:09 +02:00
# Cross-platform file locking
if sys . platform == ' win32 ' :
import ctypes . wintypes
import msvcrt
class OVERLAPPED ( ctypes . Structure ) :
_fields_ = [
( ' Internal ' , ctypes . wintypes . LPVOID ) ,
( ' InternalHigh ' , ctypes . wintypes . LPVOID ) ,
( ' Offset ' , ctypes . wintypes . DWORD ) ,
( ' OffsetHigh ' , ctypes . wintypes . DWORD ) ,
( ' hEvent ' , ctypes . wintypes . HANDLE ) ,
]
kernel32 = ctypes . windll . kernel32
LockFileEx = kernel32 . LockFileEx
LockFileEx . argtypes = [
ctypes . wintypes . HANDLE , # hFile
ctypes . wintypes . DWORD , # dwFlags
ctypes . wintypes . DWORD , # dwReserved
ctypes . wintypes . DWORD , # nNumberOfBytesToLockLow
ctypes . wintypes . DWORD , # nNumberOfBytesToLockHigh
ctypes . POINTER ( OVERLAPPED ) # Overlapped
]
LockFileEx . restype = ctypes . wintypes . BOOL
UnlockFileEx = kernel32 . UnlockFileEx
UnlockFileEx . argtypes = [
ctypes . wintypes . HANDLE , # hFile
ctypes . wintypes . DWORD , # dwReserved
ctypes . wintypes . DWORD , # nNumberOfBytesToLockLow
ctypes . wintypes . DWORD , # nNumberOfBytesToLockHigh
ctypes . POINTER ( OVERLAPPED ) # Overlapped
]
UnlockFileEx . restype = ctypes . wintypes . BOOL
whole_low = 0xffffffff
whole_high = 0x7fffffff
def _lock_file ( f , exclusive ) :
overlapped = OVERLAPPED ( )
overlapped . Offset = 0
overlapped . OffsetHigh = 0
overlapped . hEvent = 0
f . _lock_file_overlapped_p = ctypes . pointer ( overlapped )
handle = msvcrt . get_osfhandle ( f . fileno ( ) )
if not LockFileEx ( handle , 0x2 if exclusive else 0x0 , 0 ,
whole_low , whole_high , f . _lock_file_overlapped_p ) :
raise OSError ( ' Locking file failed: %r ' % ctypes . FormatError ( ) )
def _unlock_file ( f ) :
assert f . _lock_file_overlapped_p
handle = msvcrt . get_osfhandle ( f . fileno ( ) )
if not UnlockFileEx ( handle , 0 ,
whole_low , whole_high , f . _lock_file_overlapped_p ) :
raise OSError ( ' Unlocking file failed: %r ' % ctypes . FormatError ( ) )
else :
2016-02-20 20:28:25 +01:00
# Some platforms, such as Jython, is missing fcntl
try :
import fcntl
2013-10-06 04:27:09 +02:00
2016-02-20 20:28:25 +01:00
def _lock_file ( f , exclusive ) :
fcntl . flock ( f , fcntl . LOCK_EX if exclusive else fcntl . LOCK_SH )
2013-10-06 04:27:09 +02:00
2016-02-20 20:28:25 +01:00
def _unlock_file ( f ) :
fcntl . flock ( f , fcntl . LOCK_UN )
except ImportError :
UNSUPPORTED_MSG = ' file locking is not supported on this platform '
def _lock_file ( f , exclusive ) :
raise IOError ( UNSUPPORTED_MSG )
def _unlock_file ( f ) :
raise IOError ( UNSUPPORTED_MSG )
2013-10-06 04:27:09 +02:00
class locked_file ( object ) :
def __init__ ( self , filename , mode , encoding = None ) :
assert mode in [ ' r ' , ' a ' , ' w ' ]
self . f = io . open ( filename , mode , encoding = encoding )
self . mode = mode
def __enter__ ( self ) :
exclusive = self . mode != ' r '
try :
_lock_file ( self . f , exclusive )
except IOError :
self . f . close ( )
raise
return self
def __exit__ ( self , etype , value , traceback ) :
try :
_unlock_file ( self . f )
finally :
self . f . close ( )
def __iter__ ( self ) :
return iter ( self . f )
def write ( self , * args ) :
return self . f . write ( * args )
def read ( self , * args ) :
return self . f . read ( * args )
2013-10-12 13:49:27 +02:00
2014-09-30 17:27:53 +02:00
def get_filesystem_encoding ( ) :
encoding = sys . getfilesystemencoding ( )
return encoding if encoding is not None else ' utf-8 '
2013-10-12 13:49:27 +02:00
def shell_quote ( args ) :
2013-11-21 14:09:28 +01:00
quoted_args = [ ]
2014-09-30 17:27:53 +02:00
encoding = get_filesystem_encoding ( )
2013-11-21 14:09:28 +01:00
for a in args :
if isinstance ( a , bytes ) :
# We may get a filename encoded with 'encodeFilename'
a = a . decode ( encoding )
2017-06-17 18:48:58 +02:00
quoted_args . append ( compat_shlex_quote ( a ) )
2014-11-17 07:16:12 +01:00
return ' ' . join ( quoted_args )
2013-10-15 12:05:13 +02:00
def smuggle_url ( url , data ) :
""" Pass additional data in a URL for internal use. """
2016-07-04 18:57:44 +02:00
url , idata = unsmuggle_url ( url , { } )
data . update ( idata )
2016-03-25 20:46:57 +01:00
sdata = compat_urllib_parse_urlencode (
2014-11-17 07:16:12 +01:00
{ ' __youtubedl_smuggle ' : json . dumps ( data ) } )
return url + ' # ' + sdata
2013-10-15 12:05:13 +02:00
2014-01-07 05:34:14 +01:00
def unsmuggle_url ( smug_url , default = None ) :
2014-12-09 23:11:26 +01:00
if ' #__youtubedl_smuggle ' not in smug_url :
2014-01-07 05:34:14 +01:00
return smug_url , default
2014-11-17 07:16:12 +01:00
url , _ , sdata = smug_url . rpartition ( ' # ' )
jsond = compat_parse_qs ( sdata ) [ ' __youtubedl_smuggle ' ] [ 0 ]
2013-10-15 12:05:13 +02:00
data = json . loads ( jsond )
return url , data
2013-11-25 03:12:26 +01:00
def format_bytes ( bytes ) :
if bytes is None :
2014-11-17 07:16:12 +01:00
return ' N/A '
2013-11-25 03:12:26 +01:00
if type ( bytes ) is str :
bytes = float ( bytes )
if bytes == 0.0 :
exponent = 0
else :
exponent = int ( math . log ( bytes , 1024.0 ) )
2014-11-17 07:16:12 +01:00
suffix = [ ' B ' , ' KiB ' , ' MiB ' , ' GiB ' , ' TiB ' , ' PiB ' , ' EiB ' , ' ZiB ' , ' YiB ' ] [ exponent ]
2013-11-25 03:12:26 +01:00
converted = float ( bytes ) / float ( 1024 * * exponent )
2014-11-17 07:16:12 +01:00
return ' %.2f %s ' % ( converted , suffix )
2013-12-06 13:36:36 +01:00
2013-12-09 18:29:07 +01:00
2016-03-13 11:27:20 +01:00
def lookup_unit_table ( unit_table , s ) :
units_re = ' | ' . join ( re . escape ( u ) for u in unit_table )
m = re . match (
2016-03-19 11:42:35 +01:00
r ' (?P<num>[0-9]+(?:[,.][0-9]*)?) \ s*(?P<unit> %s ) \ b ' % units_re , s )
2016-03-13 11:27:20 +01:00
if not m :
return None
num_str = m . group ( ' num ' ) . replace ( ' , ' , ' . ' )
mult = unit_table [ m . group ( ' unit ' ) ]
return int ( float ( num_str ) * mult )
2014-11-25 09:54:54 +01:00
def parse_filesize ( s ) :
if s is None :
return None
2016-01-10 16:17:47 +01:00
# The lower-case forms are of course incorrect and unofficial,
2014-11-25 09:54:54 +01:00
# but we support those too
_UNIT_TABLE = {
' B ' : 1 ,
' b ' : 1 ,
2016-08-19 18:12:32 +02:00
' bytes ' : 1 ,
2014-11-25 09:54:54 +01:00
' KiB ' : 1024 ,
' KB ' : 1000 ,
' kB ' : 1024 ,
' Kb ' : 1000 ,
2016-08-18 18:32:00 +02:00
' kb ' : 1000 ,
2016-08-19 18:12:32 +02:00
' kilobytes ' : 1000 ,
' kibibytes ' : 1024 ,
2014-11-25 09:54:54 +01:00
' MiB ' : 1024 * * 2 ,
' MB ' : 1000 * * 2 ,
' mB ' : 1024 * * 2 ,
' Mb ' : 1000 * * 2 ,
2016-08-18 18:32:00 +02:00
' mb ' : 1000 * * 2 ,
2016-08-19 18:12:32 +02:00
' megabytes ' : 1000 * * 2 ,
' mebibytes ' : 1024 * * 2 ,
2014-11-25 09:54:54 +01:00
' GiB ' : 1024 * * 3 ,
' GB ' : 1000 * * 3 ,
' gB ' : 1024 * * 3 ,
' Gb ' : 1000 * * 3 ,
2016-08-18 18:32:00 +02:00
' gb ' : 1000 * * 3 ,
2016-08-19 18:12:32 +02:00
' gigabytes ' : 1000 * * 3 ,
' gibibytes ' : 1024 * * 3 ,
2014-11-25 09:54:54 +01:00
' TiB ' : 1024 * * 4 ,
' TB ' : 1000 * * 4 ,
' tB ' : 1024 * * 4 ,
' Tb ' : 1000 * * 4 ,
2016-08-18 18:32:00 +02:00
' tb ' : 1000 * * 4 ,
2016-08-19 18:12:32 +02:00
' terabytes ' : 1000 * * 4 ,
' tebibytes ' : 1024 * * 4 ,
2014-11-25 09:54:54 +01:00
' PiB ' : 1024 * * 5 ,
' PB ' : 1000 * * 5 ,
' pB ' : 1024 * * 5 ,
' Pb ' : 1000 * * 5 ,
2016-08-18 18:32:00 +02:00
' pb ' : 1000 * * 5 ,
2016-08-19 18:12:32 +02:00
' petabytes ' : 1000 * * 5 ,
' pebibytes ' : 1024 * * 5 ,
2014-11-25 09:54:54 +01:00
' EiB ' : 1024 * * 6 ,
' EB ' : 1000 * * 6 ,
' eB ' : 1024 * * 6 ,
' Eb ' : 1000 * * 6 ,
2016-08-18 18:32:00 +02:00
' eb ' : 1000 * * 6 ,
2016-08-19 18:12:32 +02:00
' exabytes ' : 1000 * * 6 ,
' exbibytes ' : 1024 * * 6 ,
2014-11-25 09:54:54 +01:00
' ZiB ' : 1024 * * 7 ,
' ZB ' : 1000 * * 7 ,
' zB ' : 1024 * * 7 ,
' Zb ' : 1000 * * 7 ,
2016-08-18 18:32:00 +02:00
' zb ' : 1000 * * 7 ,
2016-08-19 18:12:32 +02:00
' zettabytes ' : 1000 * * 7 ,
' zebibytes ' : 1024 * * 7 ,
2014-11-25 09:54:54 +01:00
' YiB ' : 1024 * * 8 ,
' YB ' : 1000 * * 8 ,
' yB ' : 1024 * * 8 ,
' Yb ' : 1000 * * 8 ,
2016-08-18 18:32:00 +02:00
' yb ' : 1000 * * 8 ,
2016-08-19 18:12:32 +02:00
' yottabytes ' : 1000 * * 8 ,
' yobibytes ' : 1024 * * 8 ,
2014-11-25 09:54:54 +01:00
}
2016-03-13 11:27:20 +01:00
return lookup_unit_table ( _UNIT_TABLE , s )
def parse_count ( s ) :
if s is None :
2014-11-25 09:54:54 +01:00
return None
2016-03-13 11:27:20 +01:00
s = s . strip ( )
if re . match ( r ' ^[ \ d,.]+$ ' , s ) :
return str_to_int ( s )
_UNIT_TABLE = {
' k ' : 1000 ,
' K ' : 1000 ,
' m ' : 1000 * * 2 ,
' M ' : 1000 * * 2 ,
' kk ' : 1000 * * 2 ,
' KK ' : 1000 * * 2 ,
}
2014-11-25 09:54:54 +01:00
2016-03-13 11:27:20 +01:00
return lookup_unit_table ( _UNIT_TABLE , s )
2014-11-25 09:54:54 +01:00
2016-03-13 12:23:08 +01:00
2018-03-02 17:39:04 +01:00
def parse_resolution ( s ) :
if s is None :
return { }
mobj = re . search ( r ' \ b(?P<w> \ d+) \ s*[xX× ] \ s*(?P<h> \ d+) \ b ' , s )
if mobj :
return {
' width ' : int ( mobj . group ( ' w ' ) ) ,
' height ' : int ( mobj . group ( ' h ' ) ) ,
}
mobj = re . search ( r ' \ b( \ d+)[pPiI] \ b ' , s )
if mobj :
return { ' height ' : int ( mobj . group ( 1 ) ) }
mobj = re . search ( r ' \ b([48])[kK] \ b ' , s )
if mobj :
return { ' height ' : int ( mobj . group ( 1 ) ) * 540 }
return { }
2019-03-17 03:07:47 +01:00
def parse_bitrate ( s ) :
if not isinstance ( s , compat_str ) :
return
mobj = re . search ( r ' \ b( \ d+) \ s*kbps ' , s )
if mobj :
return int ( mobj . group ( 1 ) )
2016-09-02 18:31:52 +02:00
def month_by_name ( name , lang = ' en ' ) :
2013-12-09 19:39:41 +01:00
""" Return the number of a month by (locale-independently) English name """
2016-09-14 18:13:55 +02:00
month_names = MONTH_NAMES . get ( lang , MONTH_NAMES [ ' en ' ] )
2016-09-02 18:31:52 +02:00
2013-12-09 19:39:41 +01:00
try :
2016-09-14 18:13:55 +02:00
return month_names . index ( name ) + 1
2015-02-13 08:14:23 +01:00
except ValueError :
return None
def month_by_abbreviation ( abbrev ) :
""" Return the number of a month by (locale-independently) English
abbreviations """
try :
return [ s [ : 3 ] for s in ENGLISH_MONTH_NAMES ] . index ( abbrev ) + 1
2013-12-09 19:39:41 +01:00
except ValueError :
return None
2013-12-10 21:03:53 +01:00
2014-01-20 22:11:34 +01:00
def fix_xml_ampersands ( xml_str ) :
2013-12-10 21:03:53 +01:00
""" Replace all the ' & ' by ' & ' in XML """
2014-01-20 22:11:34 +01:00
return re . sub (
r ' &(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F] { ,4};|#[0-9] { ,4};) ' ,
2014-11-17 07:16:12 +01:00
' & ' ,
2014-01-20 22:11:34 +01:00
xml_str )
2013-12-16 05:04:12 +01:00
def setproctitle ( title ) :
2014-01-05 03:07:55 +01:00
assert isinstance ( title , compat_str )
2016-02-20 20:29:02 +01:00
# ctypes in Jython is not complete
# http://bugs.jython.org/issue2148
if sys . platform . startswith ( ' java ' ) :
return
2013-12-16 05:04:12 +01:00
try :
2016-02-14 10:37:17 +01:00
libc = ctypes . cdll . LoadLibrary ( ' libc.so.6 ' )
2013-12-16 05:04:12 +01:00
except OSError :
return
2017-02-10 22:05:09 +01:00
except TypeError :
# LoadLibrary in Windows Python 2.7.13 only expects
# a bytestring, but since unicode_literals turns
# every string into a unicode string, it fails.
return
2014-03-23 14:28:22 +01:00
title_bytes = title . encode ( ' utf-8 ' )
buf = ctypes . create_string_buffer ( len ( title_bytes ) )
buf . value = title_bytes
2013-12-16 05:04:12 +01:00
try :
2014-03-23 14:28:22 +01:00
libc . prctl ( 15 , buf , 0 , 0 , 0 )
2013-12-16 05:04:12 +01:00
except AttributeError :
return # Strange libc, just skip this
2013-12-16 13:56:13 +01:00
def remove_start ( s , start ) :
2016-05-19 00:31:30 +02:00
return s [ len ( start ) : ] if s is not None and s . startswith ( start ) else s
2013-12-17 04:13:36 +01:00
2014-08-22 18:40:26 +02:00
def remove_end ( s , end ) :
2016-05-19 00:31:30 +02:00
return s [ : - len ( end ) ] if s is not None and s . endswith ( end ) else s
2014-08-22 18:40:26 +02:00
2015-12-14 16:30:58 +01:00
def remove_quotes ( s ) :
if s is None or len ( s ) < 2 :
return s
for quote in ( ' " ' , " ' " , ) :
if s [ 0 ] == quote and s [ - 1 ] == quote :
return s [ 1 : - 1 ]
return s
2020-10-09 07:06:49 +02:00
def get_domain ( url ) :
domain = re . match ( r ' (?:https?: \ / \ /)?(?:www \ .)?(?P<domain>[^ \ n \ /]+ \ .[^ \ n \ /]+)(?: \ /(.*))? ' , url )
return domain . group ( ' domain ' ) if domain else None
2013-12-17 04:13:36 +01:00
def url_basename ( url ) :
2013-12-17 14:56:29 +01:00
path = compat_urlparse . urlparse ( url ) . path
2014-11-17 07:16:12 +01:00
return path . strip ( ' / ' ) . split ( ' / ' ) [ - 1 ]
2013-12-20 17:05:28 +01:00
2016-11-01 20:14:01 +01:00
def base_url ( url ) :
return re . match ( r ' https?://[^?#&]+/ ' , url ) . group ( )
2016-12-12 20:23:49 +01:00
def urljoin ( base , path ) :
2017-03-05 21:57:46 +01:00
if isinstance ( path , bytes ) :
path = path . decode ( ' utf-8 ' )
2016-12-12 20:23:49 +01:00
if not isinstance ( path , compat_str ) or not path :
return None
2019-01-20 14:21:24 +01:00
if re . match ( r ' ^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?// ' , path ) :
2016-12-12 20:23:49 +01:00
return path
2017-03-05 21:57:46 +01:00
if isinstance ( base , bytes ) :
base = base . decode ( ' utf-8 ' )
if not isinstance ( base , compat_str ) or not re . match (
r ' ^(?:https?:)?// ' , base ) :
2016-12-12 20:23:49 +01:00
return None
return compat_urlparse . urljoin ( base , path )
2013-12-20 17:05:28 +01:00
class HEADRequest ( compat_urllib_request . Request ) :
def get_method ( self ) :
2016-02-14 10:37:17 +01:00
return ' HEAD '
2013-12-25 15:18:40 +01:00
2016-07-02 21:21:32 +02:00
class PUTRequest ( compat_urllib_request . Request ) :
def get_method ( self ) :
return ' PUT '
2014-07-21 12:02:44 +02:00
def int_or_none ( v , scale = 1 , default = None , get_attr = None , invscale = 1 ) :
2014-04-21 13:45:27 +02:00
if get_attr :
if v is not None :
v = getattr ( v , get_attr , None )
2014-08-10 13:04:45 +02:00
if v == ' ' :
v = None
2015-10-14 18:35:01 +02:00
if v is None :
return default
try :
return int ( v ) * invscale / / scale
2019-03-22 19:08:54 +01:00
except ( ValueError , TypeError ) :
2015-10-14 18:37:03 +02:00
return default
2014-07-21 12:02:44 +02:00
2014-08-10 13:04:45 +02:00
2014-08-10 11:00:14 +02:00
def str_or_none ( v , default = None ) :
return default if v is None else compat_str ( v )
2014-07-21 12:02:44 +02:00
def str_to_int ( int_str ) :
2014-08-31 23:51:36 +02:00
""" A more relaxed version of int_or_none """
2019-12-15 17:15:24 +01:00
if isinstance ( int_str , compat_integer_types ) :
2019-11-29 17:05:06 +01:00
return int_str
2019-12-15 17:15:24 +01:00
elif isinstance ( int_str , compat_str ) :
int_str = re . sub ( r ' [, \ . \ +] ' , ' ' , int_str )
return int_or_none ( int_str )
2013-12-26 13:49:44 +01:00
2014-07-21 12:02:44 +02:00
def float_or_none ( v , scale = 1 , invscale = 1 , default = None ) :
2015-10-14 18:36:37 +02:00
if v is None :
return default
try :
return float ( v ) * invscale / scale
2019-03-22 19:08:54 +01:00
except ( ValueError , TypeError ) :
2015-10-14 18:36:37 +02:00
return default
2014-03-28 23:06:34 +01:00
2017-09-10 14:08:39 +02:00
def bool_or_none ( v , default = None ) :
return v if isinstance ( v , bool ) else default
2019-05-23 18:58:35 +02:00
def strip_or_none ( v , default = None ) :
return v . strip ( ) if isinstance ( v , compat_str ) else default
2016-06-25 17:32:02 +02:00
2018-07-21 13:01:06 +02:00
def url_or_none ( url ) :
if not url or not isinstance ( url , compat_str ) :
return None
url = url . strip ( )
2021-01-01 13:26:37 +01:00
return url if re . match ( r ' ^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?// ' , url ) else None
2018-07-21 13:01:06 +02:00
2021-02-02 22:15:00 +01:00
def strftime_or_none ( timestamp , date_format , default = None ) :
datetime_object = None
try :
if isinstance ( timestamp , compat_numeric_types ) : # unix timestamp
datetime_object = datetime . datetime . utcfromtimestamp ( timestamp )
elif isinstance ( timestamp , compat_str ) : # assume YYYYMMDD
datetime_object = datetime . datetime . strptime ( timestamp , ' % Y % m %d ' )
return datetime_object . strftime ( date_format )
except ( ValueError , TypeError , AttributeError ) :
return default
2013-12-26 13:49:44 +01:00
def parse_duration ( s ) :
2015-02-01 11:30:56 +01:00
if not isinstance ( s , compat_basestring ) :
2013-12-26 13:49:44 +01:00
return None
2014-08-31 01:41:30 +02:00
s = s . strip ( )
2016-04-07 20:30:47 +02:00
days , hours , mins , secs , ms = [ None ] * 5
2017-01-26 17:23:08 +01:00
m = re . match ( r ' (?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms> \ .[0-9]+)?Z?$ ' , s )
2016-04-07 20:30:47 +02:00
if m :
days , hours , mins , secs , ms = m . groups ( )
else :
m = re . match (
2017-10-29 02:04:48 +02:00
r ''' (?ix)(?:P?
( ? :
[ 0 - 9 ] + \s * y ( ? : ears ? ) ? \s *
) ?
( ? :
[ 0 - 9 ] + \s * m ( ? : onths ? ) ? \s *
) ?
( ? :
[ 0 - 9 ] + \s * w ( ? : eeks ? ) ? \s *
) ?
2015-02-02 21:48:54 +01:00
( ? :
2016-04-07 20:30:47 +02:00
( ? P < days > [ 0 - 9 ] + ) \s * d ( ? : ays ? ) ? \s *
2015-02-02 21:48:54 +01:00
) ?
2017-10-29 02:04:48 +02:00
T ) ?
2016-04-07 20:30:47 +02:00
( ? :
( ? P < hours > [ 0 - 9 ] + ) \s * h ( ? : ours ? ) ? \s *
) ?
( ? :
( ? P < mins > [ 0 - 9 ] + ) \s * m ( ? : in ( ? : ute ) ? s ? ) ? \s *
) ?
( ? :
( ? P < secs > [ 0 - 9 ] + ) ( ? P < ms > \. [ 0 - 9 ] + ) ? \s * s ( ? : ec ( ? : ond ) ? s ? ) ? \s *
2017-01-26 17:23:08 +01:00
) ? Z ? $ ''' , s)
2016-04-07 20:30:47 +02:00
if m :
days , hours , mins , secs , ms = m . groups ( )
else :
2017-01-26 17:23:08 +01:00
m = re . match ( r ' (?i)(?:(?P<hours>[0-9.]+) \ s*(?:hours?)|(?P<mins>[0-9.]+) \ s*(?:mins? \ .?|minutes?) \ s*)Z?$ ' , s )
2016-04-07 20:30:47 +02:00
if m :
hours , mins = m . groups ( )
else :
return None
duration = 0
if secs :
duration + = float ( secs )
if mins :
duration + = float ( mins ) * 60
if hours :
duration + = float ( hours ) * 60 * 60
if days :
duration + = float ( days ) * 24 * 60 * 60
if ms :
duration + = float ( ms )
return duration
2014-01-03 12:52:27 +01:00
2015-05-02 19:06:01 +02:00
def prepend_extension ( filename , ext , expected_real_ext = None ) :
2014-11-23 20:41:03 +01:00
name , real_ext = os . path . splitext ( filename )
2015-05-02 19:06:01 +02:00
return (
' {0} . {1} {2} ' . format ( name , ext , real_ext )
if not expected_real_ext or real_ext [ 1 : ] == expected_real_ext
else ' {0} . {1} ' . format ( filename , ext ) )
2014-01-07 06:23:41 +01:00
2015-05-02 19:23:06 +02:00
def replace_extension ( filename , ext , expected_real_ext = None ) :
name , real_ext = os . path . splitext ( filename )
return ' {0} . {1} ' . format (
name if not expected_real_ext or real_ext [ 1 : ] == expected_real_ext else filename ,
ext )
2014-01-07 06:23:41 +01:00
def check_executable ( exe , args = [ ] ) :
""" Checks if the given binary is installed somewhere in PATH, and returns its name.
args can be a list of arguments for a short output ( like - version ) """
try :
2021-01-09 13:26:12 +01:00
process_communicate_or_kill ( subprocess . Popen (
[ exe ] + args , stdout = subprocess . PIPE , stderr = subprocess . PIPE ) )
2014-01-07 06:23:41 +01:00
except OSError :
return False
return exe
2014-01-20 11:36:47 +01:00
2014-11-02 10:50:30 +01:00
def get_exe_version ( exe , args = [ ' --version ' ] ,
2014-12-14 21:59:59 +01:00
version_re = None , unrecognized = ' present ' ) :
2014-11-02 10:50:30 +01:00
""" Returns the version of the specified executable,
or False if the executable is not present """
try :
2016-10-22 07:04:05 +02:00
# STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
2021-02-24 19:45:56 +01:00
# SIGTTOU if yt-dlp is run in the background.
2019-03-09 13:14:41 +01:00
# See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
2021-01-09 13:26:12 +01:00
out , _ = process_communicate_or_kill ( subprocess . Popen (
2015-05-08 11:01:24 +02:00
[ encodeArgument ( exe ) ] + args ,
2016-10-22 02:44:49 +02:00
stdin = subprocess . PIPE ,
2021-01-09 13:26:12 +01:00
stdout = subprocess . PIPE , stderr = subprocess . STDOUT ) )
2014-11-02 10:50:30 +01:00
except OSError :
return False
2014-12-14 21:59:59 +01:00
if isinstance ( out , bytes ) : # Python 2.x
out = out . decode ( ' ascii ' , ' ignore ' )
return detect_exe_version ( out , version_re , unrecognized )
def detect_exe_version ( output , version_re = None , unrecognized = ' present ' ) :
assert isinstance ( output , compat_str )
if version_re is None :
version_re = r ' version \ s+([-0-9._a-zA-Z]+) '
m = re . search ( version_re , output )
2014-11-02 10:50:30 +01:00
if m :
return m . group ( 1 )
else :
return unrecognized
2021-05-28 18:49:13 +02:00
class LazyList ( collections . Sequence ) :
''' Lazy immutable list from an iterable
Note that slices of a LazyList are lists and not LazyList '''
def __init__ ( self , iterable ) :
self . __iterable = iter ( iterable )
self . __cache = [ ]
2021-06-12 17:14:30 +02:00
self . __reversed = False
2021-05-28 18:49:13 +02:00
def __iter__ ( self ) :
2021-06-12 17:14:30 +02:00
if self . __reversed :
# We need to consume the entire iterable to iterate in reverse
2021-06-27 04:05:58 +02:00
yield from self . exhaust ( )
2021-06-12 17:14:30 +02:00
return
yield from self . __cache
2021-05-28 18:49:13 +02:00
for item in self . __iterable :
self . __cache . append ( item )
yield item
2021-06-27 04:05:58 +02:00
def __exhaust ( self ) :
2021-05-28 18:49:13 +02:00
self . __cache . extend ( self . __iterable )
2021-06-12 17:14:30 +02:00
return self . __cache
2021-06-27 04:05:58 +02:00
def exhaust ( self ) :
''' Evaluate the entire iterable '''
return self . __exhaust ( ) [ : : - 1 if self . __reversed else 1 ]
2021-06-12 17:14:30 +02:00
@staticmethod
2021-06-27 04:05:58 +02:00
def __reverse_index ( x ) :
2021-06-12 17:14:30 +02:00
return - ( x + 1 )
2021-05-28 18:49:13 +02:00
def __getitem__ ( self , idx ) :
if isinstance ( idx , slice ) :
step = idx . step or 1
2021-06-12 17:14:30 +02:00
start = idx . start if idx . start is not None else 0 if step > 0 else - 1
2021-05-28 18:49:13 +02:00
stop = idx . stop if idx . stop is not None else - 1 if step > 0 else 0
2021-06-12 17:14:30 +02:00
if self . __reversed :
2021-06-27 04:05:58 +02:00
( start , stop ) , step = map ( self . __reverse_index , ( start , stop ) ) , - step
2021-06-12 17:14:30 +02:00
idx = slice ( start , stop , step )
2021-05-28 18:49:13 +02:00
elif isinstance ( idx , int ) :
2021-06-12 17:14:30 +02:00
if self . __reversed :
2021-06-27 04:05:58 +02:00
idx = self . __reverse_index ( idx )
2021-05-28 18:49:13 +02:00
start = stop = idx
else :
raise TypeError ( ' indices must be integers or slices ' )
if start < 0 or stop < 0 :
# We need to consume the entire iterable to be able to slice from the end
# Obviously, never use this with infinite iterables
2021-06-27 04:05:58 +02:00
return self . __exhaust ( ) [ idx ]
2021-06-12 17:14:30 +02:00
n = max ( start , stop ) - len ( self . __cache ) + 1
if n > 0 :
self . __cache . extend ( itertools . islice ( self . __iterable , n ) )
2021-05-28 18:49:13 +02:00
return self . __cache [ idx ]
def __bool__ ( self ) :
try :
2021-06-12 17:14:30 +02:00
self [ - 1 ] if self . __reversed else self [ 0 ]
2021-05-28 18:49:13 +02:00
except IndexError :
return False
return True
def __len__ ( self ) :
self . exhaust ( )
return len ( self . __cache )
2021-06-27 04:05:58 +02:00
def reverse ( self ) :
2021-06-12 17:14:30 +02:00
self . __reversed = not self . __reversed
return self
def __repr__ ( self ) :
# repr and str should mimic a list. So we exhaust the iterable
return repr ( self . exhaust ( ) )
def __str__ ( self ) :
return repr ( self . exhaust ( ) )
2021-05-28 18:49:13 +02:00
2014-01-20 11:36:47 +01:00
class PagedList ( object ) :
2014-01-22 21:43:33 +01:00
def __len__ ( self ) :
# This is only useful for tests
return len ( self . getslice ( ) )
2021-05-17 15:44:20 +02:00
def getslice ( self , start , end ) :
raise NotImplementedError ( ' This method must be implemented by subclasses ' )
def __getitem__ ( self , idx ) :
if not isinstance ( idx , int ) or idx < 0 :
raise TypeError ( ' indices must be non-negative integers ' )
entries = self . getslice ( idx , idx + 1 )
return entries [ 0 ] if entries else None
2014-09-29 00:36:06 +02:00
class OnDemandPagedList ( PagedList ) :
2017-10-06 18:13:53 +02:00
def __init__ ( self , pagefunc , pagesize , use_cache = True ) :
2014-09-29 00:36:06 +02:00
self . _pagefunc = pagefunc
self . _pagesize = pagesize
2016-02-23 05:17:02 +01:00
self . _use_cache = use_cache
if use_cache :
self . _cache = { }
2014-09-29 00:36:06 +02:00
2014-01-20 11:36:47 +01:00
def getslice ( self , start = 0 , end = None ) :
res = [ ]
for pagenum in itertools . count ( start / / self . _pagesize ) :
firstid = pagenum * self . _pagesize
nextfirstid = pagenum * self . _pagesize + self . _pagesize
if start > = nextfirstid :
continue
2016-02-23 05:17:02 +01:00
page_results = None
if self . _use_cache :
page_results = self . _cache . get ( pagenum )
if page_results is None :
page_results = list ( self . _pagefunc ( pagenum ) )
if self . _use_cache :
self . _cache [ pagenum ] = page_results
2014-01-20 11:36:47 +01:00
startv = (
start % self . _pagesize
if firstid < = start < nextfirstid
else 0 )
endv = (
( ( end - 1 ) % self . _pagesize ) + 1
if ( end is not None and firstid < = end < = nextfirstid )
else None )
if startv != 0 or endv is not None :
page_results = page_results [ startv : endv ]
res . extend ( page_results )
# A little optimization - if current page is not "full", ie. does
# not contain page_size videos then we can assume that this page
# is the last one - there are no more ids on further pages -
# i.e. no need to query again.
if len ( page_results ) + startv < self . _pagesize :
break
# If we got the whole page, but the next page is not interesting,
# break out early as well
if end == nextfirstid :
break
return res
2014-02-09 17:56:10 +01:00
2014-09-29 00:36:06 +02:00
class InAdvancePagedList ( PagedList ) :
def __init__ ( self , pagefunc , pagecount , pagesize ) :
self . _pagefunc = pagefunc
self . _pagecount = pagecount
self . _pagesize = pagesize
def getslice ( self , start = 0 , end = None ) :
res = [ ]
start_page = start / / self . _pagesize
end_page = (
self . _pagecount if end is None else ( end / / self . _pagesize + 1 ) )
skip_elems = start - start_page * self . _pagesize
only_more = None if end is None else end - start
for pagenum in range ( start_page , end_page ) :
page = list ( self . _pagefunc ( pagenum ) )
if skip_elems :
page = page [ skip_elems : ]
skip_elems = None
if only_more is not None :
if len ( page ) < only_more :
only_more - = len ( page )
else :
page = page [ : only_more ]
res . extend ( page )
break
res . extend ( page )
return res
2014-02-09 17:56:10 +01:00
def uppercase_escape ( s ) :
2014-04-04 23:00:51 +02:00
unicode_escape = codecs . getdecoder ( ' unicode_escape ' )
2014-02-09 17:56:10 +01:00
return re . sub (
2014-04-01 13:17:07 +02:00
r ' \\ U[0-9a-fA-F] {8} ' ,
2014-04-04 23:00:51 +02:00
lambda m : unicode_escape ( m . group ( 0 ) ) [ 0 ] ,
s )
2015-05-04 15:53:05 +02:00
def lowercase_escape ( s ) :
unicode_escape = codecs . getdecoder ( ' unicode_escape ' )
return re . sub (
r ' \\ u[0-9a-fA-F] {4} ' ,
lambda m : unicode_escape ( m . group ( 0 ) ) [ 0 ] ,
s )
2014-02-15 16:24:43 +01:00
2014-09-13 15:59:16 +02:00
def escape_rfc3986 ( s ) :
""" Escape non-ASCII characters as suggested by RFC 3986 """
2015-02-01 11:30:56 +01:00
if sys . version_info < ( 3 , 0 ) and isinstance ( s , compat_str ) :
2014-09-13 15:59:16 +02:00
s = s . encode ( ' utf-8 ' )
2014-11-02 11:37:49 +01:00
return compat_urllib_parse . quote ( s , b " % /;:@&=+$,!~* ' ()?#[] " )
2014-09-13 15:59:16 +02:00
def escape_url ( url ) :
""" Escape URL as suggested by RFC 3986 """
url_parsed = compat_urllib_parse_urlparse ( url )
return url_parsed . _replace (
2016-03-23 15:24:52 +01:00
netloc = url_parsed . netloc . encode ( ' idna ' ) . decode ( ' ascii ' ) ,
2014-09-13 15:59:16 +02:00
path = escape_rfc3986 ( url_parsed . path ) ,
params = escape_rfc3986 ( url_parsed . params ) ,
query = escape_rfc3986 ( url_parsed . query ) ,
fragment = escape_rfc3986 ( url_parsed . fragment )
) . geturl ( )
2014-02-25 01:43:17 +01:00
def read_batch_urls ( batch_fd ) :
def fixup ( url ) :
if not isinstance ( url , compat_str ) :
url = url . decode ( ' utf-8 ' , ' replace ' )
batch-file enumeration improvements (https://github.com/ytdl-org/youtube-dl/pull/26813)
Co-authored by: glenn-slayden
Modified from https://github.com/ytdl-org/youtube-dl/pull/26813/commits/c9a9ccf8a35e157e22afeaafc2851176ddd87e68
These improvements apply to reading the list of URLs from the file supplied via the `--batch-file` (`-a`) command line option.
1. Skip blank and empty lines in the file. Currently, lines with leading whitespace are only skipped when that whitespace is followed by a comment character (`#`, `;`, or `]`). This means that empty lines and lines consisting only of whitespace are returned as (trimmed) empty strings in the list of URLs to process.
2. [bug fix] Detect and remove the Unicode BOM when the file descriptor is already decoding Unicode.
With Python 3, the `batch_fd` enumerator returns the lines of the file as Unicode. For UTF-8, this means that the raw BOM bytes from the file `\xef \xbb \xbf` show up converted into a single `\ufeff` character prefixed to the first enumerated text line.
This fix solves several buggy interactions between the presence of BOM, the skipping of comments and/or blank lines, and ensuring the list of URLs is consistently trimmed. For example, if the first line of the file is blank, the BOM is incorrectly returned as a URL standing alone. If the first line contains a URL, it will be prefixed with this unwanted single character--but note that its being there will have inhibited the proper trimming of any leading whitespace. Currently, the `UnicodeBOMIE` helper attempts to recover from some of these error cases, but this fix prevents the error from happening in the first place (at least on Python3). In any case, the `UnicodeBOMIE` approach is flawed, because it is clearly illogical for a BOM to appear in the (non-batch) URL(s) specified directly on the command line (and for that matter, on URLs *after the first line* of a batch list, also)
3. Adds proper trimming of the " #" into the read_batch_urls processing so that the URLs it enumerates are cleaned and trimmed more consistently.
2021-01-09 13:38:03 +01:00
BOM_UTF8 = ( ' \xef \xbb \xbf ' , ' \ufeff ' )
for bom in BOM_UTF8 :
if url . startswith ( bom ) :
url = url [ len ( bom ) : ]
url = url . lstrip ( )
if not url or url . startswith ( ( ' # ' , ' ; ' , ' ] ' ) ) :
2014-02-25 01:43:17 +01:00
return False
batch-file enumeration improvements (https://github.com/ytdl-org/youtube-dl/pull/26813)
Co-authored by: glenn-slayden
Modified from https://github.com/ytdl-org/youtube-dl/pull/26813/commits/c9a9ccf8a35e157e22afeaafc2851176ddd87e68
These improvements apply to reading the list of URLs from the file supplied via the `--batch-file` (`-a`) command line option.
1. Skip blank and empty lines in the file. Currently, lines with leading whitespace are only skipped when that whitespace is followed by a comment character (`#`, `;`, or `]`). This means that empty lines and lines consisting only of whitespace are returned as (trimmed) empty strings in the list of URLs to process.
2. [bug fix] Detect and remove the Unicode BOM when the file descriptor is already decoding Unicode.
With Python 3, the `batch_fd` enumerator returns the lines of the file as Unicode. For UTF-8, this means that the raw BOM bytes from the file `\xef \xbb \xbf` show up converted into a single `\ufeff` character prefixed to the first enumerated text line.
This fix solves several buggy interactions between the presence of BOM, the skipping of comments and/or blank lines, and ensuring the list of URLs is consistently trimmed. For example, if the first line of the file is blank, the BOM is incorrectly returned as a URL standing alone. If the first line contains a URL, it will be prefixed with this unwanted single character--but note that its being there will have inhibited the proper trimming of any leading whitespace. Currently, the `UnicodeBOMIE` helper attempts to recover from some of these error cases, but this fix prevents the error from happening in the first place (at least on Python3). In any case, the `UnicodeBOMIE` approach is flawed, because it is clearly illogical for a BOM to appear in the (non-batch) URL(s) specified directly on the command line (and for that matter, on URLs *after the first line* of a batch list, also)
3. Adds proper trimming of the " #" into the read_batch_urls processing so that the URLs it enumerates are cleaned and trimmed more consistently.
2021-01-09 13:38:03 +01:00
# "#" cannot be stripped out since it is part of the URI
# However, it can be safely stipped out if follwing a whitespace
return re . split ( r ' \ s# ' , url , 1 ) [ 0 ] . rstrip ( )
2014-02-25 01:43:17 +01:00
with contextlib . closing ( batch_fd ) as fd :
return [ url for url in map ( fixup , fd ) if url ]
2014-03-07 15:25:33 +01:00
def urlencode_postdata ( * args , * * kargs ) :
2016-03-25 20:46:57 +01:00
return compat_urllib_parse_urlencode ( * args , * * kargs ) . encode ( ' ascii ' )
2014-03-10 17:31:32 +01:00
2016-03-03 18:34:52 +01:00
def update_url_query ( url , query ) :
2016-04-09 13:27:54 +02:00
if not query :
return url
2016-03-03 18:34:52 +01:00
parsed_url = compat_urlparse . urlparse ( url )
qs = compat_parse_qs ( parsed_url . query )
qs . update ( query )
return compat_urlparse . urlunparse ( parsed_url . _replace (
2016-03-25 20:46:57 +01:00
query = compat_urllib_parse_urlencode ( qs , True ) ) )
2015-09-06 03:22:20 +02:00
2015-12-20 01:26:26 +01:00
2016-03-31 18:55:01 +02:00
def update_Request ( req , url = None , data = None , headers = { } , query = { } ) :
req_headers = req . headers . copy ( )
req_headers . update ( headers )
req_data = data or req . data
req_url = update_url_query ( url or req . get_full_url ( ) , query )
2016-07-02 21:21:32 +02:00
req_get_method = req . get_method ( )
if req_get_method == ' HEAD ' :
req_type = HEADRequest
elif req_get_method == ' PUT ' :
req_type = PUTRequest
else :
req_type = compat_urllib_request . Request
2016-03-31 18:55:01 +02:00
new_req = req_type (
req_url , data = req_data , headers = req_headers ,
origin_req_host = req . origin_req_host , unverifiable = req . unverifiable )
if hasattr ( req , ' timeout ' ) :
new_req . timeout = req . timeout
return new_req
2017-05-06 13:06:18 +02:00
def _multipart_encode_impl ( data , boundary ) :
2017-05-01 17:09:18 +02:00
content_type = ' multipart/form-data; boundary= %s ' % boundary
out = b ' '
for k , v in data . items ( ) :
out + = b ' -- ' + boundary . encode ( ' ascii ' ) + b ' \r \n '
if isinstance ( k , compat_str ) :
k = k . encode ( ' utf-8 ' )
if isinstance ( v , compat_str ) :
v = v . encode ( ' utf-8 ' )
# RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
# suggests sending UTF-8 directly. Firefox sends UTF-8, too
2017-05-05 14:51:59 +02:00
content = b ' Content-Disposition: form-data; name= " ' + k + b ' " \r \n \r \n ' + v + b ' \r \n '
2017-05-01 17:09:18 +02:00
if boundary . encode ( ' ascii ' ) in content :
raise ValueError ( ' Boundary overlaps with data ' )
out + = content
out + = b ' -- ' + boundary . encode ( ' ascii ' ) + b ' -- \r \n '
return out , content_type
def multipart_encode ( data , boundary = None ) :
'''
Encode a dict to RFC 7578 - compliant form - data
data :
A dict where keys and values can be either Unicode or bytes - like
objects .
boundary :
If specified a Unicode object , it ' s used as the boundary. Otherwise
a random boundary is generated .
Reference : https : / / tools . ietf . org / html / rfc7578
'''
has_specified_boundary = boundary is not None
while True :
if boundary is None :
boundary = ' --------------- ' + str ( random . randrange ( 0x0fffffff , 0xffffffff ) )
try :
2017-05-06 13:06:18 +02:00
out , content_type = _multipart_encode_impl ( data , boundary )
2017-05-01 17:09:18 +02:00
break
except ValueError :
if has_specified_boundary :
raise
boundary = None
return out , content_type
2016-02-07 03:13:04 +01:00
def dict_get ( d , key_or_keys , default = None , skip_false_values = True ) :
2016-02-07 01:12:53 +01:00
if isinstance ( key_or_keys , ( list , tuple ) ) :
for key in key_or_keys :
2016-02-07 03:13:04 +01:00
if key not in d or d [ key ] is None or skip_false_values and not d [ key ] :
continue
return d [ key ]
2016-02-07 01:12:53 +01:00
return default
return d . get ( key_or_keys , default )
2016-06-12 01:05:34 +02:00
def try_get ( src , getter , expected_type = None ) :
2021-07-10 23:59:44 +02:00
for get in variadic ( getter ) :
2017-04-18 17:39:58 +02:00
try :
v = get ( src )
except ( AttributeError , KeyError , TypeError , IndexError ) :
pass
else :
if expected_type is None or isinstance ( v , expected_type ) :
return v
2016-06-12 01:05:34 +02:00
2018-04-27 21:47:17 +02:00
def merge_dicts ( * dicts ) :
merged = { }
for a_dict in dicts :
for k , v in a_dict . items ( ) :
if v is None :
continue
2019-05-10 22:56:22 +02:00
if ( k not in merged
or ( isinstance ( v , compat_str ) and v
and isinstance ( merged [ k ] , compat_str )
and not merged [ k ] ) ) :
2018-04-27 21:47:17 +02:00
merged [ k ] = v
return merged
2015-12-20 01:26:26 +01:00
def encode_compat_str ( string , encoding = preferredencoding ( ) , errors = ' strict ' ) :
return string if isinstance ( string , compat_str ) else compat_str ( string , encoding , errors )
2015-09-06 03:22:20 +02:00
2014-03-21 00:59:51 +01:00
US_RATINGS = {
' G ' : 0 ,
' PG ' : 10 ,
' PG-13 ' : 13 ,
' R ' : 16 ,
' NC ' : 18 ,
}
2014-03-24 23:21:20 +01:00
2016-08-07 15:45:18 +02:00
TV_PARENTAL_GUIDELINES = {
2018-05-26 00:12:18 +02:00
' TV-Y ' : 0 ,
' TV-Y7 ' : 7 ,
' TV-G ' : 0 ,
' TV-PG ' : 0 ,
' TV-14 ' : 14 ,
' TV-MA ' : 17 ,
2016-08-07 15:45:18 +02:00
}
2014-10-03 14:37:25 +02:00
def parse_age_limit ( s ) :
2016-08-07 15:45:18 +02:00
if type ( s ) == int :
return s if 0 < = s < = 21 else None
if not isinstance ( s , compat_basestring ) :
2014-10-03 20:17:10 +02:00
return None
2014-10-03 14:37:25 +02:00
m = re . match ( r ' ^(?P<age> \ d { 1,2}) \ +?$ ' , s )
2016-08-07 15:45:18 +02:00
if m :
return int ( m . group ( ' age ' ) )
2021-03-20 11:41:11 +01:00
s = s . upper ( )
2016-08-07 15:45:18 +02:00
if s in US_RATINGS :
return US_RATINGS [ s ]
2018-05-26 00:12:18 +02:00
m = re . match ( r ' ^TV[_-]?( %s )$ ' % ' | ' . join ( k [ 3 : ] for k in TV_PARENTAL_GUIDELINES ) , s )
2018-05-23 13:12:20 +02:00
if m :
2018-05-26 00:12:18 +02:00
return TV_PARENTAL_GUIDELINES [ ' TV- ' + m . group ( 1 ) ]
2018-05-23 13:12:20 +02:00
return None
2014-10-03 14:37:25 +02:00
2014-03-24 23:21:20 +01:00
def strip_jsonp ( code ) :
2014-11-13 16:28:05 +01:00
return re . sub (
2017-05-26 15:58:18 +02:00
r ''' (?sx)^
2018-07-21 07:30:18 +02:00
( ? : window \. ) ? ( ? P < func_name > [ a - zA - Z0 - 9 _ . $ ] * )
2017-05-26 15:58:18 +02:00
( ? : \s * & & \s * ( ? P = func_name ) ) ?
\s * \( \s * ( ? P < callback_data > . * ) \) ; ?
\s * ? ( ? : / / [ ^ \n ] * ) * $ ''' ,
r ' \ g<callback_data> ' , code )
2014-04-21 07:12:02 +02:00
2021-01-19 20:05:50 +01:00
def js_to_json ( code , vars = { } ) :
# vars is a dict of var, val pairs to substitute
2021-07-13 09:18:20 +02:00
COMMENT_RE = r ' / \ *(?:(?! \ */).)*? \ */|//[^ \ n]* \ n '
2017-02-02 20:55:06 +01:00
SKIP_RE = r ' \ s*(?: {comment} )? \ s* ' . format ( comment = COMMENT_RE )
INTEGER_TABLE = (
( r ' (?s)^(0[xX][0-9a-fA-F]+) {skip} :?$ ' . format ( skip = SKIP_RE ) , 16 ) ,
( r ' (?s)^(0+[0-7]+) {skip} :?$ ' . format ( skip = SKIP_RE ) , 8 ) ,
)
2014-08-22 02:33:29 +02:00
def fix_kv ( m ) :
2014-09-30 11:12:59 +02:00
v = m . group ( 0 )
if v in ( ' true ' , ' false ' , ' null ' ) :
return v
2020-11-19 20:22:59 +01:00
elif v . startswith ( ' /* ' ) or v . startswith ( ' // ' ) or v . startswith ( ' ! ' ) or v == ' , ' :
2016-03-13 12:29:15 +01:00
return " "
if v [ 0 ] in ( " ' " , ' " ' ) :
v = re . sub ( r ' (?s) \\ .| " ' , lambda m : {
2014-09-30 11:12:59 +02:00
' " ' : ' \\ " ' ,
2016-03-13 12:29:15 +01:00
" \\ ' " : " ' " ,
' \\ \n ' : ' ' ,
' \\ x ' : ' \\ u00 ' ,
} . get ( m . group ( 0 ) , m . group ( 0 ) ) , v [ 1 : - 1 ] )
2020-11-19 20:22:59 +01:00
else :
for regex , base in INTEGER_TABLE :
im = re . match ( regex , v )
if im :
i = int ( im . group ( 1 ) , base )
return ' " %d " : ' % i if v . endswith ( ' : ' ) else ' %d ' % i
2016-05-14 16:39:58 +02:00
2021-01-19 20:05:50 +01:00
if v in vars :
return vars [ v ]
2014-09-30 11:12:59 +02:00
return ' " %s " ' % v
2014-08-22 02:33:29 +02:00
2016-03-13 12:29:15 +01:00
return re . sub ( r ''' (?sx)
" (?:[^ " \\] * ( ? : \\\\| \\[ ' " nurtbfx/ \n ]))*[^ " \\ ]* " |
' (?:[^ ' \\] * ( ? : \\\\| \\[ ' " nurtbfx/ \n ]))*[^ ' \\] * ' |
2017-02-02 20:55:06 +01:00
{ comment } | , ( ? = { skip } [ \] } } ] ) |
2018-01-20 16:54:21 +01:00
( ? : ( ? < ! [ 0 - 9 ] ) [ eE ] | [ a - df - zA - DF - Z_ ] ) [ . a - zA - Z_0 - 9 ] * |
2017-02-02 20:55:06 +01:00
\b ( ? : 0 [ xX ] [ 0 - 9 a - fA - F ] + | 0 + [ 0 - 7 ] + ) ( ? : { skip } : ) ? |
2020-11-19 20:22:59 +01:00
[ 0 - 9 ] + ( ? = { skip } : ) |
! +
2017-02-02 20:55:06 +01:00
''' .format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
2014-08-22 02:33:29 +02:00
2014-04-21 07:12:02 +02:00
def qualities ( quality_ids ) :
""" Get a numeric quality value out of a list of possible values """
def q ( qid ) :
try :
return quality_ids . index ( qid )
except ValueError :
return - 1
return q
2014-04-30 10:02:03 +02:00
2021-02-03 14:36:09 +01:00
DEFAULT_OUTTMPL = {
' default ' : ' %(title)s [ %(id)s ]. %(ext)s ' ,
2021-03-15 00:02:13 +01:00
' chapter ' : ' %(title)s - %(section_number)03d %(section_title)s [ %(id)s ]. %(ext)s ' ,
2021-02-03 14:36:09 +01:00
}
OUTTMPL_TYPES = {
2021-03-15 00:02:13 +01:00
' chapter ' : None ,
2021-02-03 14:36:09 +01:00
' subtitle ' : None ,
' thumbnail ' : None ,
' description ' : ' description ' ,
' annotation ' : ' annotations.xml ' ,
' infojson ' : ' info.json ' ,
2021-05-17 22:10:21 +02:00
' pl_thumbnail ' : None ,
2021-02-03 14:36:09 +01:00
' pl_description ' : ' description ' ,
' pl_infojson ' : ' info.json ' ,
}
2014-05-16 12:03:59 +02:00
2021-03-24 23:02:15 +01:00
# As of [1] format syntax is:
# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
2021-06-03 20:00:38 +02:00
STR_FORMAT_RE = r ''' (?x)
2021-03-24 23:02:15 +01:00
( ? < ! % )
%
2021-06-03 20:00:38 +02:00
( ? P < has_key > \( ( ? P < key > { 0 } ) \) ) ? # mapping key
( ? P < format >
( ? : [ #0\-+ ]+)? # conversion flags (optional)
( ? : \d + ) ? # minimum field width (optional)
( ? : \. \d + ) ? # precision (optional)
[ hlL ] ? # length modifier (optional)
[ diouxXeEfFgGcrs ] # conversion type
)
2021-03-24 23:02:15 +01:00
'''
2014-09-15 15:10:24 +02:00
def limit_length ( s , length ) :
""" Add ellipses to overly long strings """
if s is None :
return None
ELLIPSES = ' ... '
if len ( s ) > length :
return s [ : length - len ( ELLIPSES ) ] + ELLIPSES
return s
2014-10-26 16:46:34 +01:00
def version_tuple ( v ) :
2014-12-06 12:14:26 +01:00
return tuple ( int ( e ) for e in re . split ( r ' [-.] ' , v ) )
2014-10-26 16:46:34 +01:00
def is_outdated_version ( version , limit , assume_new = True ) :
if not version :
return not assume_new
try :
return version_tuple ( version ) < version_tuple ( limit )
except ValueError :
return not assume_new
2014-11-20 12:14:28 +01:00
def ytdl_is_updateable ( ) :
2021-02-24 19:45:56 +01:00
""" Returns if yt-dlp can be updated with -U """
2021-01-06 12:58:30 +01:00
return False
2014-11-20 12:14:28 +01:00
from zipimport import zipimporter
return isinstance ( globals ( ) . get ( ' __loader__ ' ) , zipimporter ) or hasattr ( sys , ' frozen ' )
2014-11-23 10:49:19 +01:00
def args_to_str ( args ) :
# Get a short string representation for a subprocess command
2016-05-10 09:58:25 +02:00
return ' ' . join ( compat_shlex_quote ( a ) for a in args )
2015-01-04 02:20:45 +01:00
2015-12-20 02:00:39 +01:00
def error_to_compat_str ( err ) :
2015-12-20 00:26:47 +01:00
err_str = str ( err )
# On python 2 error byte string must be decoded with proper
# encoding rather than ascii
if sys . version_info [ 0 ] < 3 :
err_str = err_str . decode ( preferredencoding ( ) )
return err_str
2015-02-19 00:31:01 +01:00
def mimetype2ext ( mt ) :
2016-04-24 20:03:12 +02:00
if mt is None :
return None
2016-02-20 19:55:10 +01:00
ext = {
' audio/mp4 ' : ' m4a ' ,
2016-06-09 06:58:24 +02:00
# Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
# it's the most popular one
' audio/mpeg ' : ' mp3 ' ,
2020-08-28 18:00:30 +02:00
' audio/x-wav ' : ' wav ' ,
2016-02-20 19:55:10 +01:00
} . get ( mt )
if ext is not None :
return ext
2015-02-19 00:31:01 +01:00
_ , _ , res = mt . rpartition ( ' / ' )
2016-09-02 17:57:48 +02:00
res = res . split ( ' ; ' ) [ 0 ] . strip ( ) . lower ( )
2015-02-19 00:31:01 +01:00
return {
2016-01-24 17:58:53 +01:00
' 3gpp ' : ' 3gp ' ,
2016-02-20 22:02:03 +01:00
' smptett+xml ' : ' tt ' ,
' ttaf+xml ' : ' dfxp ' ,
2016-01-24 18:01:15 +01:00
' ttml+xml ' : ' ttml ' ,
2016-01-24 17:58:53 +01:00
' x-flv ' : ' flv ' ,
2016-01-24 18:01:15 +01:00
' x-mp4-fragmented ' : ' mp4 ' ,
2017-12-03 00:04:43 +01:00
' x-ms-sami ' : ' sami ' ,
2016-01-24 18:01:15 +01:00
' x-ms-wmv ' : ' wmv ' ,
2016-07-06 10:06:28 +02:00
' mpegurl ' : ' m3u8 ' ,
' x-mpegurl ' : ' m3u8 ' ,
' vnd.apple.mpegurl ' : ' m3u8 ' ,
' dash+xml ' : ' mpd ' ,
' f4m+xml ' : ' f4m ' ,
2016-07-23 11:48:59 +02:00
' hds+xml ' : ' f4m ' ,
2016-07-14 15:13:57 +02:00
' vnd.ms-sstr+xml ' : ' ism ' ,
2016-09-02 18:50:42 +02:00
' quicktime ' : ' mov ' ,
2017-04-30 10:01:05 +02:00
' mp2t ' : ' ts ' ,
2020-09-12 05:08:57 +02:00
' x-wav ' : ' wav ' ,
2015-02-19 00:31:01 +01:00
} . get ( res , res )
2016-03-16 18:48:06 +01:00
def parse_codecs ( codecs_str ) :
# http://tools.ietf.org/html/rfc6381
if not codecs_str :
return { }
2020-11-21 15:50:42 +01:00
split_codecs = list ( filter ( None , map (
2016-03-16 18:48:06 +01:00
lambda str : str . strip ( ) , codecs_str . strip ( ) . strip ( ' , ' ) . split ( ' , ' ) ) ) )
vcodec , acodec = None , None
2020-11-21 15:50:42 +01:00
for full_codec in split_codecs :
2016-03-16 18:48:06 +01:00
codec = full_codec . split ( ' . ' ) [ 0 ]
2019-06-13 20:56:17 +02:00
if codec in ( ' avc1 ' , ' avc2 ' , ' avc3 ' , ' avc4 ' , ' vp9 ' , ' vp8 ' , ' hev1 ' , ' hev2 ' , ' h263 ' , ' h264 ' , ' mp4v ' , ' hvc1 ' , ' av01 ' , ' theora ' ) :
2016-03-16 18:48:06 +01:00
if not vcodec :
vcodec = full_codec
2017-05-14 07:33:33 +02:00
elif codec in ( ' mp4a ' , ' opus ' , ' vorbis ' , ' mp3 ' , ' aac ' , ' ac-3 ' , ' ec-3 ' , ' eac3 ' , ' dtsc ' , ' dtse ' , ' dtsh ' , ' dtsl ' ) :
2016-03-16 18:48:06 +01:00
if not acodec :
acodec = full_codec
else :
2017-05-14 07:33:33 +02:00
write_string ( ' WARNING: Unknown codec %s \n ' % full_codec , sys . stderr )
2016-03-16 18:48:06 +01:00
if not vcodec and not acodec :
2020-11-21 15:50:42 +01:00
if len ( split_codecs ) == 2 :
2016-03-16 18:48:06 +01:00
return {
2020-11-21 15:50:42 +01:00
' vcodec ' : split_codecs [ 0 ] ,
' acodec ' : split_codecs [ 1 ] ,
2016-03-16 18:48:06 +01:00
}
else :
return {
' vcodec ' : vcodec or ' none ' ,
' acodec ' : acodec or ' none ' ,
}
return { }
2015-01-04 02:20:45 +01:00
def urlhandle_detect_ext ( url_handle ) :
2016-05-15 09:32:54 +02:00
getheader = url_handle . headers . get
2015-01-04 02:20:45 +01:00
2015-01-22 12:04:07 +01:00
cd = getheader ( ' Content-Disposition ' )
if cd :
m = re . match ( r ' attachment; \ s*filename= " (?P<filename>[^ " ]+) " ' , cd )
if m :
e = determine_ext ( m . group ( ' filename ' ) , default_ext = None )
if e :
return e
2015-02-19 00:31:01 +01:00
return mimetype2ext ( getheader ( ' Content-Type ' ) )
2015-01-07 07:20:20 +01:00
2015-07-22 14:03:05 +02:00
def encode_data_uri ( data , mime_type ) :
return ' data: %s ;base64, %s ' % ( mime_type , base64 . b64encode ( data ) . decode ( ' ascii ' ) )
2015-01-07 07:20:20 +01:00
def age_restricted ( content_limit , age_limit ) :
2016-01-10 19:27:22 +01:00
""" Returns True iff the content should be blocked """
2015-01-07 07:20:20 +01:00
if age_limit is None : # No limit set
return False
if content_limit is None :
return False # Content available for everyone
return age_limit < content_limit
2015-01-23 01:21:30 +01:00
def is_html ( first_bytes ) :
""" Detect whether a file contains HTML by examining its first bytes. """
BOMS = [
( b ' \xef \xbb \xbf ' , ' utf-8 ' ) ,
( b ' \x00 \x00 \xfe \xff ' , ' utf-32-be ' ) ,
( b ' \xff \xfe \x00 \x00 ' , ' utf-32-le ' ) ,
( b ' \xff \xfe ' , ' utf-16-le ' ) ,
( b ' \xfe \xff ' , ' utf-16-be ' ) ,
]
for bom , enc in BOMS :
if first_bytes . startswith ( bom ) :
s = first_bytes [ len ( bom ) : ] . decode ( enc , ' replace ' )
break
else :
s = first_bytes . decode ( ' utf-8 ' , ' replace ' )
return re . match ( r ' ^ \ s*< ' , s )
2015-01-23 23:50:31 +01:00
def determine_protocol ( info_dict ) :
protocol = info_dict . get ( ' protocol ' )
if protocol is not None :
return protocol
url = info_dict [ ' url ' ]
if url . startswith ( ' rtmp ' ) :
return ' rtmp '
elif url . startswith ( ' mms ' ) :
return ' mms '
elif url . startswith ( ' rtsp ' ) :
return ' rtsp '
ext = determine_ext ( url )
if ext == ' m3u8 ' :
return ' m3u8 '
elif ext == ' f4m ' :
return ' f4m '
return compat_urllib_parse_urlparse ( url ) . scheme
2015-01-25 02:38:47 +01:00
2020-12-13 15:29:09 +01:00
def render_table ( header_row , data , delim = False , extraGap = 0 , hideEmpty = False ) :
2015-01-25 02:38:47 +01:00
""" Render a list of rows, each as a list of values """
2020-12-13 15:29:09 +01:00
def get_max_lens ( table ) :
return [ max ( len ( compat_str ( v ) ) for v in col ) for col in zip ( * table ) ]
def filter_using_list ( row , filterArray ) :
return [ col for ( take , col ) in zip ( filterArray , row ) if take ]
if hideEmpty :
max_lens = get_max_lens ( data )
header_row = filter_using_list ( header_row , max_lens )
data = [ filter_using_list ( row , max_lens ) for row in data ]
2015-01-25 02:38:47 +01:00
table = [ header_row ] + data
2020-12-13 15:29:09 +01:00
max_lens = get_max_lens ( table )
if delim :
table = [ header_row ] + [ [ ' - ' * ml for ml in max_lens ] ] + data
format_str = ' ' . join ( ' % - ' + compat_str ( ml + extraGap ) + ' s ' for ml in max_lens [ : - 1 ] ) + ' %s '
2015-01-25 02:38:47 +01:00
return ' \n ' . join ( format_str % tuple ( row ) for row in table )
2015-02-10 03:32:21 +01:00
def _match_one ( filter_part , dct ) :
COMPARISON_OPERATORS = {
' < ' : operator . lt ,
' <= ' : operator . le ,
' > ' : operator . gt ,
' >= ' : operator . ge ,
' = ' : operator . eq ,
' != ' : operator . ne ,
}
operator_rex = re . compile ( r ''' (?x) \ s*
( ? P < key > [ a - z_ ] + )
\s * ( ? P < op > % s ) ( ? P < none_inclusive > \s * \? ) ? \s *
( ? :
( ? P < intval > [ 0 - 9. ] + ( ? : [ kKmMgGtTpPeEzZyY ] i ? [ Bb ] ? ) ? ) |
2017-02-15 17:12:10 +01:00
( ? P < quote > [ " \' ])(?P<quotedstrval>(?: \\ .|(?!(?P=quote)| \\ ).)+?)(?P=quote)|
2015-02-10 03:32:21 +01:00
( ? P < strval > ( ? ! [ 0 - 9. ] ) [ a - z0 - 9 A - Z ] * )
)
\s * $
''' % ' | ' .join(map(re.escape, COMPARISON_OPERATORS.keys())))
m = operator_rex . search ( filter_part )
if m :
op = COMPARISON_OPERATORS [ m . group ( ' op ' ) ]
2016-10-31 17:32:08 +01:00
actual_value = dct . get ( m . group ( ' key ' ) )
2019-05-10 22:56:22 +02:00
if ( m . group ( ' quotedstrval ' ) is not None
or m . group ( ' strval ' ) is not None
2016-10-31 17:32:08 +01:00
# If the original field is a string and matching comparisonvalue is
# a number we should respect the origin of the original field
# and process comparison value as a string (see
2019-03-09 13:14:41 +01:00
# https://github.com/ytdl-org/youtube-dl/issues/11082).
2019-05-10 22:56:22 +02:00
or actual_value is not None and m . group ( ' intval ' ) is not None
and isinstance ( actual_value , compat_str ) ) :
2015-02-10 03:32:21 +01:00
if m . group ( ' op ' ) not in ( ' = ' , ' != ' ) :
raise ValueError (
' Operator %s does not support string values! ' % m . group ( ' op ' ) )
2017-02-15 17:12:10 +01:00
comparison_value = m . group ( ' quotedstrval ' ) or m . group ( ' strval ' ) or m . group ( ' intval ' )
quote = m . group ( ' quote ' )
if quote is not None :
comparison_value = comparison_value . replace ( r ' \ %s ' % quote , quote )
2015-02-10 03:32:21 +01:00
else :
try :
comparison_value = int ( m . group ( ' intval ' ) )
except ValueError :
comparison_value = parse_filesize ( m . group ( ' intval ' ) )
if comparison_value is None :
comparison_value = parse_filesize ( m . group ( ' intval ' ) + ' B ' )
if comparison_value is None :
raise ValueError (
' Invalid integer value %r in filter part %r ' % (
m . group ( ' intval ' ) , filter_part ) )
if actual_value is None :
return m . group ( ' none_inclusive ' )
return op ( actual_value , comparison_value )
UNARY_OPERATORS = {
2018-04-24 18:49:30 +02:00
' ' : lambda v : ( v is True ) if isinstance ( v , bool ) else ( v is not None ) ,
' ! ' : lambda v : ( v is False ) if isinstance ( v , bool ) else ( v is None ) ,
2015-02-10 03:32:21 +01:00
}
operator_rex = re . compile ( r ''' (?x) \ s*
( ? P < op > % s ) \s * ( ? P < key > [ a - z_ ] + )
\s * $
''' % ' | ' .join(map(re.escape, UNARY_OPERATORS.keys())))
m = operator_rex . search ( filter_part )
if m :
op = UNARY_OPERATORS [ m . group ( ' op ' ) ]
actual_value = dct . get ( m . group ( ' key ' ) )
return op ( actual_value )
raise ValueError ( ' Invalid filter part %r ' % filter_part )
def match_str ( filter_str , dct ) :
""" Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
return all (
_match_one ( filter_part , dct ) for filter_part in filter_str . split ( ' & ' ) )
def match_filter_func ( filter_str ) :
def _match_func ( info_dict ) :
if match_str ( filter_str , info_dict ) :
return None
else :
video_title = info_dict . get ( ' title ' , info_dict . get ( ' id ' , ' video ' ) )
return ' %s does not pass filter %s , skipping .. ' % ( video_title , filter_str )
return _match_func
2015-03-03 00:03:06 +01:00
2015-04-25 17:15:05 +02:00
def parse_dfxp_time_expr ( time_expr ) :
if not time_expr :
2015-12-19 11:21:42 +01:00
return
2015-04-25 17:15:05 +02:00
mobj = re . match ( r ' ^(?P<time_offset> \ d+(?: \ . \ d+)?)s?$ ' , time_expr )
if mobj :
return float ( mobj . group ( ' time_offset ' ) )
2015-12-19 12:29:51 +01:00
mobj = re . match ( r ' ^( \ d+):( \ d \ d):( \ d \ d(?:(?: \ .|:) \ d+)?)$ ' , time_expr )
2015-04-25 17:15:05 +02:00
if mobj :
2015-12-19 12:29:51 +01:00
return 3600 * int ( mobj . group ( 1 ) ) + 60 * int ( mobj . group ( 2 ) ) + float ( mobj . group ( 3 ) . replace ( ' : ' , ' . ' ) )
2015-04-25 17:15:05 +02:00
2015-05-12 07:04:54 +02:00
def srt_subtitles_timecode ( seconds ) :
return ' %02d : %02d : %02d , %03d ' % ( seconds / 3600 , ( seconds % 3600 ) / 60 , seconds % 60 , ( seconds % 1 ) * 1000 )
2015-04-25 17:15:05 +02:00
def dfxp2srt ( dfxp_data ) :
2017-09-16 06:18:38 +02:00
'''
@param dfxp_data A bytes - like object containing DFXP data
@returns A unicode object containing converted SRT data
'''
2017-02-23 18:46:20 +01:00
LEGACY_NAMESPACES = (
2017-09-16 06:18:38 +02:00
( b ' http://www.w3.org/ns/ttml ' , [
b ' http://www.w3.org/2004/11/ttaf1 ' ,
b ' http://www.w3.org/2006/04/ttaf1 ' ,
b ' http://www.w3.org/2006/10/ttaf1 ' ,
2017-02-23 18:46:20 +01:00
] ) ,
2017-09-16 06:18:38 +02:00
( b ' http://www.w3.org/ns/ttml#styling ' , [
b ' http://www.w3.org/ns/ttml#style ' ,
2017-02-23 18:46:20 +01:00
] ) ,
)
SUPPORTED_STYLING = [
' color ' ,
' fontFamily ' ,
' fontSize ' ,
' fontStyle ' ,
' fontWeight ' ,
' textDecoration '
]
2015-06-21 13:16:59 +02:00
_x = functools . partial ( xpath_with_ns , ns_map = {
2018-05-26 15:35:47 +02:00
' xml ' : ' http://www.w3.org/XML/1998/namespace ' ,
2015-06-21 13:16:59 +02:00
' ttml ' : ' http://www.w3.org/ns/ttml ' ,
2017-02-23 18:46:20 +01:00
' tts ' : ' http://www.w3.org/ns/ttml#styling ' ,
2015-06-21 13:16:59 +02:00
} )
2015-04-25 17:15:05 +02:00
2017-02-23 18:46:20 +01:00
styles = { }
default_style = { }
2016-02-02 22:30:13 +01:00
class TTMLPElementParser ( object ) :
2017-02-23 18:46:20 +01:00
_out = ' '
_unclosed_elements = [ ]
_applied_styles = [ ]
2015-04-25 17:15:05 +02:00
2016-01-28 12:38:34 +01:00
def start ( self , tag , attrib ) :
2017-02-23 18:46:20 +01:00
if tag in ( _x ( ' ttml:br ' ) , ' br ' ) :
self . _out + = ' \n '
else :
unclosed_elements = [ ]
style = { }
element_style_id = attrib . get ( ' style ' )
if default_style :
style . update ( default_style )
if element_style_id :
style . update ( styles . get ( element_style_id , { } ) )
for prop in SUPPORTED_STYLING :
prop_val = attrib . get ( _x ( ' tts: ' + prop ) )
if prop_val :
style [ prop ] = prop_val
if style :
font = ' '
for k , v in sorted ( style . items ( ) ) :
if self . _applied_styles and self . _applied_styles [ - 1 ] . get ( k ) == v :
continue
if k == ' color ' :
font + = ' color= " %s " ' % v
elif k == ' fontSize ' :
font + = ' size= " %s " ' % v
elif k == ' fontFamily ' :
font + = ' face= " %s " ' % v
elif k == ' fontWeight ' and v == ' bold ' :
self . _out + = ' <b> '
unclosed_elements . append ( ' b ' )
elif k == ' fontStyle ' and v == ' italic ' :
self . _out + = ' <i> '
unclosed_elements . append ( ' i ' )
elif k == ' textDecoration ' and v == ' underline ' :
self . _out + = ' <u> '
unclosed_elements . append ( ' u ' )
if font :
self . _out + = ' <font ' + font + ' > '
unclosed_elements . append ( ' font ' )
applied_style = { }
if self . _applied_styles :
applied_style . update ( self . _applied_styles [ - 1 ] )
applied_style . update ( style )
self . _applied_styles . append ( applied_style )
self . _unclosed_elements . append ( unclosed_elements )
2015-04-25 17:15:05 +02:00
2016-01-28 12:38:34 +01:00
def end ( self , tag ) :
2017-02-23 18:46:20 +01:00
if tag not in ( _x ( ' ttml:br ' ) , ' br ' ) :
unclosed_elements = self . _unclosed_elements . pop ( )
for element in reversed ( unclosed_elements ) :
self . _out + = ' </ %s > ' % element
if unclosed_elements and self . _applied_styles :
self . _applied_styles . pop ( )
2015-04-25 17:15:05 +02:00
2016-01-28 12:38:34 +01:00
def data ( self , data ) :
2017-02-23 18:46:20 +01:00
self . _out + = data
2016-01-28 12:38:34 +01:00
def close ( self ) :
2017-02-23 18:46:20 +01:00
return self . _out . strip ( )
2016-01-28 12:38:34 +01:00
def parse_node ( node ) :
target = TTMLPElementParser ( )
parser = xml . etree . ElementTree . XMLParser ( target = target )
parser . feed ( xml . etree . ElementTree . tostring ( node ) )
return parser . close ( )
2015-04-25 17:15:05 +02:00
2017-02-23 18:46:20 +01:00
for k , v in LEGACY_NAMESPACES :
for ns in v :
dfxp_data = dfxp_data . replace ( ns , k )
2017-09-16 06:18:38 +02:00
dfxp = compat_etree_fromstring ( dfxp_data )
2015-04-25 17:15:05 +02:00
out = [ ]
2017-02-23 18:46:20 +01:00
paras = dfxp . findall ( _x ( ' .//ttml:p ' ) ) or dfxp . findall ( ' .//p ' )
2015-05-18 18:45:01 +02:00
if not paras :
raise ValueError ( ' Invalid dfxp/TTML subtitle ' )
2015-04-25 17:15:05 +02:00
2017-02-23 18:46:20 +01:00
repeat = False
while True :
for style in dfxp . findall ( _x ( ' .//ttml:style ' ) ) :
2018-05-26 15:35:47 +02:00
style_id = style . get ( ' id ' ) or style . get ( _x ( ' xml:id ' ) )
if not style_id :
continue
2017-02-23 18:46:20 +01:00
parent_style_id = style . get ( ' style ' )
if parent_style_id :
if parent_style_id not in styles :
repeat = True
continue
styles [ style_id ] = styles [ parent_style_id ] . copy ( )
for prop in SUPPORTED_STYLING :
prop_val = style . get ( _x ( ' tts: ' + prop ) )
if prop_val :
styles . setdefault ( style_id , { } ) [ prop ] = prop_val
if repeat :
repeat = False
else :
break
for p in ( ' body ' , ' div ' ) :
ele = xpath_element ( dfxp , [ _x ( ' .//ttml: ' + p ) , ' .// ' + p ] )
if ele is None :
continue
style = styles . get ( ele . get ( ' style ' ) )
if not style :
continue
default_style . update ( style )
2015-04-25 17:15:05 +02:00
for para , index in zip ( paras , itertools . count ( 1 ) ) :
2015-12-19 11:21:42 +01:00
begin_time = parse_dfxp_time_expr ( para . attrib . get ( ' begin ' ) )
2015-05-12 06:47:37 +02:00
end_time = parse_dfxp_time_expr ( para . attrib . get ( ' end ' ) )
2015-12-19 11:21:42 +01:00
dur = parse_dfxp_time_expr ( para . attrib . get ( ' dur ' ) )
if begin_time is None :
continue
2015-05-12 06:47:37 +02:00
if not end_time :
2015-12-19 11:21:42 +01:00
if not dur :
continue
end_time = begin_time + dur
2015-04-25 17:15:05 +02:00
out . append ( ' %d \n %s --> %s \n %s \n \n ' % (
index ,
2015-05-12 07:04:54 +02:00
srt_subtitles_timecode ( begin_time ) ,
srt_subtitles_timecode ( end_time ) ,
2015-04-25 17:15:05 +02:00
parse_node ( para ) ) )
return ' ' . join ( out )
2015-09-04 23:05:11 +02:00
def cli_option ( params , command_option , param ) :
param = params . get ( param )
2016-08-12 13:30:02 +02:00
if param :
param = compat_str ( param )
2015-09-04 23:05:11 +02:00
return [ command_option , param ] if param is not None else [ ]
def cli_bool_option ( params , command_option , param , true_value = ' true ' , false_value = ' false ' , separator = None ) :
param = params . get ( param )
2017-08-09 17:28:19 +02:00
if param is None :
return [ ]
2015-09-04 23:05:11 +02:00
assert isinstance ( param , bool )
if separator :
return [ command_option + separator + ( true_value if param else false_value ) ]
return [ command_option , true_value if param else false_value ]
def cli_valueless_option ( params , command_option , param , expected_value = True ) :
param = params . get ( param )
return [ command_option ] if param == expected_value else [ ]
2021-03-09 03:17:21 +01:00
def cli_configuration_args ( argdict , keys , default = [ ] , use_compat = True ) :
2021-01-23 10:43:51 +01:00
if isinstance ( argdict , ( list , tuple ) ) : # for backward compatibility
2021-03-09 03:17:21 +01:00
if use_compat :
2021-02-24 17:05:18 +01:00
return argdict
else :
argdict = None
2021-01-23 10:43:51 +01:00
if argdict is None :
2021-02-24 17:05:18 +01:00
return default
2021-01-23 10:43:51 +01:00
assert isinstance ( argdict , dict )
2021-03-09 03:17:21 +01:00
assert isinstance ( keys , ( list , tuple ) )
for key_list in keys :
arg_list = list ( filter (
lambda x : x is not None ,
2021-07-10 23:59:44 +02:00
[ argdict . get ( key . lower ( ) ) for key in variadic ( key_list ) ] ) )
2021-03-09 03:17:21 +01:00
if arg_list :
return [ arg for args in arg_list for arg in args ]
return default
2015-09-04 23:05:11 +02:00
2015-06-21 12:53:17 +02:00
class ISO639Utils ( object ) :
# See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
_lang_map = {
' aa ' : ' aar ' ,
' ab ' : ' abk ' ,
' ae ' : ' ave ' ,
' af ' : ' afr ' ,
' ak ' : ' aka ' ,
' am ' : ' amh ' ,
' an ' : ' arg ' ,
' ar ' : ' ara ' ,
' as ' : ' asm ' ,
' av ' : ' ava ' ,
' ay ' : ' aym ' ,
' az ' : ' aze ' ,
' ba ' : ' bak ' ,
' be ' : ' bel ' ,
' bg ' : ' bul ' ,
' bh ' : ' bih ' ,
' bi ' : ' bis ' ,
' bm ' : ' bam ' ,
' bn ' : ' ben ' ,
' bo ' : ' bod ' ,
' br ' : ' bre ' ,
' bs ' : ' bos ' ,
' ca ' : ' cat ' ,
' ce ' : ' che ' ,
' ch ' : ' cha ' ,
' co ' : ' cos ' ,
' cr ' : ' cre ' ,
' cs ' : ' ces ' ,
' cu ' : ' chu ' ,
' cv ' : ' chv ' ,
' cy ' : ' cym ' ,
' da ' : ' dan ' ,
' de ' : ' deu ' ,
' dv ' : ' div ' ,
' dz ' : ' dzo ' ,
' ee ' : ' ewe ' ,
' el ' : ' ell ' ,
' en ' : ' eng ' ,
' eo ' : ' epo ' ,
' es ' : ' spa ' ,
' et ' : ' est ' ,
' eu ' : ' eus ' ,
' fa ' : ' fas ' ,
' ff ' : ' ful ' ,
' fi ' : ' fin ' ,
' fj ' : ' fij ' ,
' fo ' : ' fao ' ,
' fr ' : ' fra ' ,
' fy ' : ' fry ' ,
' ga ' : ' gle ' ,
' gd ' : ' gla ' ,
' gl ' : ' glg ' ,
' gn ' : ' grn ' ,
' gu ' : ' guj ' ,
' gv ' : ' glv ' ,
' ha ' : ' hau ' ,
' he ' : ' heb ' ,
2019-01-06 18:55:39 +01:00
' iw ' : ' heb ' , # Replaced by he in 1989 revision
2015-06-21 12:53:17 +02:00
' hi ' : ' hin ' ,
' ho ' : ' hmo ' ,
' hr ' : ' hrv ' ,
' ht ' : ' hat ' ,
' hu ' : ' hun ' ,
' hy ' : ' hye ' ,
' hz ' : ' her ' ,
' ia ' : ' ina ' ,
' id ' : ' ind ' ,
2019-01-06 18:55:39 +01:00
' in ' : ' ind ' , # Replaced by id in 1989 revision
2015-06-21 12:53:17 +02:00
' ie ' : ' ile ' ,
' ig ' : ' ibo ' ,
' ii ' : ' iii ' ,
' ik ' : ' ipk ' ,
' io ' : ' ido ' ,
' is ' : ' isl ' ,
' it ' : ' ita ' ,
' iu ' : ' iku ' ,
' ja ' : ' jpn ' ,
' jv ' : ' jav ' ,
' ka ' : ' kat ' ,
' kg ' : ' kon ' ,
' ki ' : ' kik ' ,
' kj ' : ' kua ' ,
' kk ' : ' kaz ' ,
' kl ' : ' kal ' ,
' km ' : ' khm ' ,
' kn ' : ' kan ' ,
' ko ' : ' kor ' ,
' kr ' : ' kau ' ,
' ks ' : ' kas ' ,
' ku ' : ' kur ' ,
' kv ' : ' kom ' ,
' kw ' : ' cor ' ,
' ky ' : ' kir ' ,
' la ' : ' lat ' ,
' lb ' : ' ltz ' ,
' lg ' : ' lug ' ,
' li ' : ' lim ' ,
' ln ' : ' lin ' ,
' lo ' : ' lao ' ,
' lt ' : ' lit ' ,
' lu ' : ' lub ' ,
' lv ' : ' lav ' ,
' mg ' : ' mlg ' ,
' mh ' : ' mah ' ,
' mi ' : ' mri ' ,
' mk ' : ' mkd ' ,
' ml ' : ' mal ' ,
' mn ' : ' mon ' ,
' mr ' : ' mar ' ,
' ms ' : ' msa ' ,
' mt ' : ' mlt ' ,
' my ' : ' mya ' ,
' na ' : ' nau ' ,
' nb ' : ' nob ' ,
' nd ' : ' nde ' ,
' ne ' : ' nep ' ,
' ng ' : ' ndo ' ,
' nl ' : ' nld ' ,
' nn ' : ' nno ' ,
' no ' : ' nor ' ,
' nr ' : ' nbl ' ,
' nv ' : ' nav ' ,
' ny ' : ' nya ' ,
' oc ' : ' oci ' ,
' oj ' : ' oji ' ,
' om ' : ' orm ' ,
' or ' : ' ori ' ,
' os ' : ' oss ' ,
' pa ' : ' pan ' ,
' pi ' : ' pli ' ,
' pl ' : ' pol ' ,
' ps ' : ' pus ' ,
' pt ' : ' por ' ,
' qu ' : ' que ' ,
' rm ' : ' roh ' ,
' rn ' : ' run ' ,
' ro ' : ' ron ' ,
' ru ' : ' rus ' ,
' rw ' : ' kin ' ,
' sa ' : ' san ' ,
' sc ' : ' srd ' ,
' sd ' : ' snd ' ,
' se ' : ' sme ' ,
' sg ' : ' sag ' ,
' si ' : ' sin ' ,
' sk ' : ' slk ' ,
' sl ' : ' slv ' ,
' sm ' : ' smo ' ,
' sn ' : ' sna ' ,
' so ' : ' som ' ,
' sq ' : ' sqi ' ,
' sr ' : ' srp ' ,
' ss ' : ' ssw ' ,
' st ' : ' sot ' ,
' su ' : ' sun ' ,
' sv ' : ' swe ' ,
' sw ' : ' swa ' ,
' ta ' : ' tam ' ,
' te ' : ' tel ' ,
' tg ' : ' tgk ' ,
' th ' : ' tha ' ,
' ti ' : ' tir ' ,
' tk ' : ' tuk ' ,
' tl ' : ' tgl ' ,
' tn ' : ' tsn ' ,
' to ' : ' ton ' ,
' tr ' : ' tur ' ,
' ts ' : ' tso ' ,
' tt ' : ' tat ' ,
' tw ' : ' twi ' ,
' ty ' : ' tah ' ,
' ug ' : ' uig ' ,
' uk ' : ' ukr ' ,
' ur ' : ' urd ' ,
' uz ' : ' uzb ' ,
' ve ' : ' ven ' ,
' vi ' : ' vie ' ,
' vo ' : ' vol ' ,
' wa ' : ' wln ' ,
' wo ' : ' wol ' ,
' xh ' : ' xho ' ,
' yi ' : ' yid ' ,
2019-01-06 19:02:34 +01:00
' ji ' : ' yid ' , # Replaced by yi in 1989 revision
2015-06-21 12:53:17 +02:00
' yo ' : ' yor ' ,
' za ' : ' zha ' ,
' zh ' : ' zho ' ,
' zu ' : ' zul ' ,
}
@classmethod
def short2long ( cls , code ) :
""" Convert language code from ISO 639-1 to ISO 639-2/T """
return cls . _lang_map . get ( code [ : 2 ] )
@classmethod
def long2short ( cls , code ) :
""" Convert language code from ISO 639-2/T to ISO 639-1 """
for short_name , long_name in cls . _lang_map . items ( ) :
if long_name == code :
return short_name
2015-06-27 07:13:57 +02:00
class ISO3166Utils ( object ) :
# From http://data.okfn.org/data/core/country-list
_country_map = {
' AF ' : ' Afghanistan ' ,
' AX ' : ' Åland Islands ' ,
' AL ' : ' Albania ' ,
' DZ ' : ' Algeria ' ,
' AS ' : ' American Samoa ' ,
' AD ' : ' Andorra ' ,
' AO ' : ' Angola ' ,
' AI ' : ' Anguilla ' ,
' AQ ' : ' Antarctica ' ,
' AG ' : ' Antigua and Barbuda ' ,
' AR ' : ' Argentina ' ,
' AM ' : ' Armenia ' ,
' AW ' : ' Aruba ' ,
' AU ' : ' Australia ' ,
' AT ' : ' Austria ' ,
' AZ ' : ' Azerbaijan ' ,
' BS ' : ' Bahamas ' ,
' BH ' : ' Bahrain ' ,
' BD ' : ' Bangladesh ' ,
' BB ' : ' Barbados ' ,
' BY ' : ' Belarus ' ,
' BE ' : ' Belgium ' ,
' BZ ' : ' Belize ' ,
' BJ ' : ' Benin ' ,
' BM ' : ' Bermuda ' ,
' BT ' : ' Bhutan ' ,
' BO ' : ' Bolivia, Plurinational State of ' ,
' BQ ' : ' Bonaire, Sint Eustatius and Saba ' ,
' BA ' : ' Bosnia and Herzegovina ' ,
' BW ' : ' Botswana ' ,
' BV ' : ' Bouvet Island ' ,
' BR ' : ' Brazil ' ,
' IO ' : ' British Indian Ocean Territory ' ,
' BN ' : ' Brunei Darussalam ' ,
' BG ' : ' Bulgaria ' ,
' BF ' : ' Burkina Faso ' ,
' BI ' : ' Burundi ' ,
' KH ' : ' Cambodia ' ,
' CM ' : ' Cameroon ' ,
' CA ' : ' Canada ' ,
' CV ' : ' Cape Verde ' ,
' KY ' : ' Cayman Islands ' ,
' CF ' : ' Central African Republic ' ,
' TD ' : ' Chad ' ,
' CL ' : ' Chile ' ,
' CN ' : ' China ' ,
' CX ' : ' Christmas Island ' ,
' CC ' : ' Cocos (Keeling) Islands ' ,
' CO ' : ' Colombia ' ,
' KM ' : ' Comoros ' ,
' CG ' : ' Congo ' ,
' CD ' : ' Congo, the Democratic Republic of the ' ,
' CK ' : ' Cook Islands ' ,
' CR ' : ' Costa Rica ' ,
' CI ' : ' Côte d \' Ivoire ' ,
' HR ' : ' Croatia ' ,
' CU ' : ' Cuba ' ,
' CW ' : ' Curaçao ' ,
' CY ' : ' Cyprus ' ,
' CZ ' : ' Czech Republic ' ,
' DK ' : ' Denmark ' ,
' DJ ' : ' Djibouti ' ,
' DM ' : ' Dominica ' ,
' DO ' : ' Dominican Republic ' ,
' EC ' : ' Ecuador ' ,
' EG ' : ' Egypt ' ,
' SV ' : ' El Salvador ' ,
' GQ ' : ' Equatorial Guinea ' ,
' ER ' : ' Eritrea ' ,
' EE ' : ' Estonia ' ,
' ET ' : ' Ethiopia ' ,
' FK ' : ' Falkland Islands (Malvinas) ' ,
' FO ' : ' Faroe Islands ' ,
' FJ ' : ' Fiji ' ,
' FI ' : ' Finland ' ,
' FR ' : ' France ' ,
' GF ' : ' French Guiana ' ,
' PF ' : ' French Polynesia ' ,
' TF ' : ' French Southern Territories ' ,
' GA ' : ' Gabon ' ,
' GM ' : ' Gambia ' ,
' GE ' : ' Georgia ' ,
' DE ' : ' Germany ' ,
' GH ' : ' Ghana ' ,
' GI ' : ' Gibraltar ' ,
' GR ' : ' Greece ' ,
' GL ' : ' Greenland ' ,
' GD ' : ' Grenada ' ,
' GP ' : ' Guadeloupe ' ,
' GU ' : ' Guam ' ,
' GT ' : ' Guatemala ' ,
' GG ' : ' Guernsey ' ,
' GN ' : ' Guinea ' ,
' GW ' : ' Guinea-Bissau ' ,
' GY ' : ' Guyana ' ,
' HT ' : ' Haiti ' ,
' HM ' : ' Heard Island and McDonald Islands ' ,
' VA ' : ' Holy See (Vatican City State) ' ,
' HN ' : ' Honduras ' ,
' HK ' : ' Hong Kong ' ,
' HU ' : ' Hungary ' ,
' IS ' : ' Iceland ' ,
' IN ' : ' India ' ,
' ID ' : ' Indonesia ' ,
' IR ' : ' Iran, Islamic Republic of ' ,
' IQ ' : ' Iraq ' ,
' IE ' : ' Ireland ' ,
' IM ' : ' Isle of Man ' ,
' IL ' : ' Israel ' ,
' IT ' : ' Italy ' ,
' JM ' : ' Jamaica ' ,
' JP ' : ' Japan ' ,
' JE ' : ' Jersey ' ,
' JO ' : ' Jordan ' ,
' KZ ' : ' Kazakhstan ' ,
' KE ' : ' Kenya ' ,
' KI ' : ' Kiribati ' ,
' KP ' : ' Korea, Democratic People \' s Republic of ' ,
' KR ' : ' Korea, Republic of ' ,
' KW ' : ' Kuwait ' ,
' KG ' : ' Kyrgyzstan ' ,
' LA ' : ' Lao People \' s Democratic Republic ' ,
' LV ' : ' Latvia ' ,
' LB ' : ' Lebanon ' ,
' LS ' : ' Lesotho ' ,
' LR ' : ' Liberia ' ,
' LY ' : ' Libya ' ,
' LI ' : ' Liechtenstein ' ,
' LT ' : ' Lithuania ' ,
' LU ' : ' Luxembourg ' ,
' MO ' : ' Macao ' ,
' MK ' : ' Macedonia, the Former Yugoslav Republic of ' ,
' MG ' : ' Madagascar ' ,
' MW ' : ' Malawi ' ,
' MY ' : ' Malaysia ' ,
' MV ' : ' Maldives ' ,
' ML ' : ' Mali ' ,
' MT ' : ' Malta ' ,
' MH ' : ' Marshall Islands ' ,
' MQ ' : ' Martinique ' ,
' MR ' : ' Mauritania ' ,
' MU ' : ' Mauritius ' ,
' YT ' : ' Mayotte ' ,
' MX ' : ' Mexico ' ,
' FM ' : ' Micronesia, Federated States of ' ,
' MD ' : ' Moldova, Republic of ' ,
' MC ' : ' Monaco ' ,
' MN ' : ' Mongolia ' ,
' ME ' : ' Montenegro ' ,
' MS ' : ' Montserrat ' ,
' MA ' : ' Morocco ' ,
' MZ ' : ' Mozambique ' ,
' MM ' : ' Myanmar ' ,
' NA ' : ' Namibia ' ,
' NR ' : ' Nauru ' ,
' NP ' : ' Nepal ' ,
' NL ' : ' Netherlands ' ,
' NC ' : ' New Caledonia ' ,
' NZ ' : ' New Zealand ' ,
' NI ' : ' Nicaragua ' ,
' NE ' : ' Niger ' ,
' NG ' : ' Nigeria ' ,
' NU ' : ' Niue ' ,
' NF ' : ' Norfolk Island ' ,
' MP ' : ' Northern Mariana Islands ' ,
' NO ' : ' Norway ' ,
' OM ' : ' Oman ' ,
' PK ' : ' Pakistan ' ,
' PW ' : ' Palau ' ,
' PS ' : ' Palestine, State of ' ,
' PA ' : ' Panama ' ,
' PG ' : ' Papua New Guinea ' ,
' PY ' : ' Paraguay ' ,
' PE ' : ' Peru ' ,
' PH ' : ' Philippines ' ,
' PN ' : ' Pitcairn ' ,
' PL ' : ' Poland ' ,
' PT ' : ' Portugal ' ,
' PR ' : ' Puerto Rico ' ,
' QA ' : ' Qatar ' ,
' RE ' : ' Réunion ' ,
' RO ' : ' Romania ' ,
' RU ' : ' Russian Federation ' ,
' RW ' : ' Rwanda ' ,
' BL ' : ' Saint Barthélemy ' ,
' SH ' : ' Saint Helena, Ascension and Tristan da Cunha ' ,
' KN ' : ' Saint Kitts and Nevis ' ,
' LC ' : ' Saint Lucia ' ,
' MF ' : ' Saint Martin (French part) ' ,
' PM ' : ' Saint Pierre and Miquelon ' ,
' VC ' : ' Saint Vincent and the Grenadines ' ,
' WS ' : ' Samoa ' ,
' SM ' : ' San Marino ' ,
' ST ' : ' Sao Tome and Principe ' ,
' SA ' : ' Saudi Arabia ' ,
' SN ' : ' Senegal ' ,
' RS ' : ' Serbia ' ,
' SC ' : ' Seychelles ' ,
' SL ' : ' Sierra Leone ' ,
' SG ' : ' Singapore ' ,
' SX ' : ' Sint Maarten (Dutch part) ' ,
' SK ' : ' Slovakia ' ,
' SI ' : ' Slovenia ' ,
' SB ' : ' Solomon Islands ' ,
' SO ' : ' Somalia ' ,
' ZA ' : ' South Africa ' ,
' GS ' : ' South Georgia and the South Sandwich Islands ' ,
' SS ' : ' South Sudan ' ,
' ES ' : ' Spain ' ,
' LK ' : ' Sri Lanka ' ,
' SD ' : ' Sudan ' ,
' SR ' : ' Suriname ' ,
' SJ ' : ' Svalbard and Jan Mayen ' ,
' SZ ' : ' Swaziland ' ,
' SE ' : ' Sweden ' ,
' CH ' : ' Switzerland ' ,
' SY ' : ' Syrian Arab Republic ' ,
' TW ' : ' Taiwan, Province of China ' ,
' TJ ' : ' Tajikistan ' ,
' TZ ' : ' Tanzania, United Republic of ' ,
' TH ' : ' Thailand ' ,
' TL ' : ' Timor-Leste ' ,
' TG ' : ' Togo ' ,
' TK ' : ' Tokelau ' ,
' TO ' : ' Tonga ' ,
' TT ' : ' Trinidad and Tobago ' ,
' TN ' : ' Tunisia ' ,
' TR ' : ' Turkey ' ,
' TM ' : ' Turkmenistan ' ,
' TC ' : ' Turks and Caicos Islands ' ,
' TV ' : ' Tuvalu ' ,
' UG ' : ' Uganda ' ,
' UA ' : ' Ukraine ' ,
' AE ' : ' United Arab Emirates ' ,
' GB ' : ' United Kingdom ' ,
' US ' : ' United States ' ,
' UM ' : ' United States Minor Outlying Islands ' ,
' UY ' : ' Uruguay ' ,
' UZ ' : ' Uzbekistan ' ,
' VU ' : ' Vanuatu ' ,
' VE ' : ' Venezuela, Bolivarian Republic of ' ,
' VN ' : ' Viet Nam ' ,
' VG ' : ' Virgin Islands, British ' ,
' VI ' : ' Virgin Islands, U.S. ' ,
' WF ' : ' Wallis and Futuna ' ,
' EH ' : ' Western Sahara ' ,
' YE ' : ' Yemen ' ,
' ZM ' : ' Zambia ' ,
' ZW ' : ' Zimbabwe ' ,
}
@classmethod
def short2full ( cls , code ) :
""" Convert an ISO 3166-2 country code to the corresponding full name """
return cls . _country_map . get ( code . upper ( ) )
2017-02-04 12:49:58 +01:00
class GeoUtils ( object ) :
# Major IPv4 address blocks per country
_country_ip_map = {
2019-10-29 00:10:20 +01:00
' AD ' : ' 46.172.224.0/19 ' ,
2017-02-04 12:49:58 +01:00
' AE ' : ' 94.200.0.0/13 ' ,
' AF ' : ' 149.54.0.0/17 ' ,
' AG ' : ' 209.59.64.0/18 ' ,
' AI ' : ' 204.14.248.0/21 ' ,
' AL ' : ' 46.99.0.0/16 ' ,
' AM ' : ' 46.70.0.0/15 ' ,
' AO ' : ' 105.168.0.0/13 ' ,
2019-10-29 00:10:20 +01:00
' AP ' : ' 182.50.184.0/21 ' ,
' AQ ' : ' 23.154.160.0/24 ' ,
2017-02-04 12:49:58 +01:00
' AR ' : ' 181.0.0.0/12 ' ,
' AS ' : ' 202.70.112.0/20 ' ,
2019-10-29 00:10:20 +01:00
' AT ' : ' 77.116.0.0/14 ' ,
2017-02-04 12:49:58 +01:00
' AU ' : ' 1.128.0.0/11 ' ,
' AW ' : ' 181.41.0.0/18 ' ,
2019-10-29 00:10:20 +01:00
' AX ' : ' 185.217.4.0/22 ' ,
' AZ ' : ' 5.197.0.0/16 ' ,
2017-02-04 12:49:58 +01:00
' BA ' : ' 31.176.128.0/17 ' ,
' BB ' : ' 65.48.128.0/17 ' ,
' BD ' : ' 114.130.0.0/16 ' ,
' BE ' : ' 57.0.0.0/8 ' ,
2019-10-29 00:10:20 +01:00
' BF ' : ' 102.178.0.0/15 ' ,
2017-02-04 12:49:58 +01:00
' BG ' : ' 95.42.0.0/15 ' ,
' BH ' : ' 37.131.0.0/17 ' ,
' BI ' : ' 154.117.192.0/18 ' ,
' BJ ' : ' 137.255.0.0/16 ' ,
2019-10-29 00:10:20 +01:00
' BL ' : ' 185.212.72.0/23 ' ,
2017-02-04 12:49:58 +01:00
' BM ' : ' 196.12.64.0/18 ' ,
' BN ' : ' 156.31.0.0/16 ' ,
' BO ' : ' 161.56.0.0/16 ' ,
' BQ ' : ' 161.0.80.0/20 ' ,
2019-10-29 00:10:20 +01:00
' BR ' : ' 191.128.0.0/12 ' ,
2017-02-04 12:49:58 +01:00
' BS ' : ' 24.51.64.0/18 ' ,
' BT ' : ' 119.2.96.0/19 ' ,
' BW ' : ' 168.167.0.0/16 ' ,
' BY ' : ' 178.120.0.0/13 ' ,
' BZ ' : ' 179.42.192.0/18 ' ,
' CA ' : ' 99.224.0.0/11 ' ,
' CD ' : ' 41.243.0.0/16 ' ,
2019-10-29 00:10:20 +01:00
' CF ' : ' 197.242.176.0/21 ' ,
' CG ' : ' 160.113.0.0/16 ' ,
2017-02-04 12:49:58 +01:00
' CH ' : ' 85.0.0.0/13 ' ,
2019-10-29 00:10:20 +01:00
' CI ' : ' 102.136.0.0/14 ' ,
2017-02-04 12:49:58 +01:00
' CK ' : ' 202.65.32.0/19 ' ,
' CL ' : ' 152.172.0.0/14 ' ,
2019-10-29 00:10:20 +01:00
' CM ' : ' 102.244.0.0/14 ' ,
2017-02-04 12:49:58 +01:00
' CN ' : ' 36.128.0.0/10 ' ,
' CO ' : ' 181.240.0.0/12 ' ,
' CR ' : ' 201.192.0.0/12 ' ,
' CU ' : ' 152.206.0.0/15 ' ,
' CV ' : ' 165.90.96.0/19 ' ,
' CW ' : ' 190.88.128.0/17 ' ,
2019-10-29 00:10:20 +01:00
' CY ' : ' 31.153.0.0/16 ' ,
2017-02-04 12:49:58 +01:00
' CZ ' : ' 88.100.0.0/14 ' ,
' DE ' : ' 53.0.0.0/8 ' ,
' DJ ' : ' 197.241.0.0/17 ' ,
' DK ' : ' 87.48.0.0/12 ' ,
' DM ' : ' 192.243.48.0/20 ' ,
' DO ' : ' 152.166.0.0/15 ' ,
' DZ ' : ' 41.96.0.0/12 ' ,
' EC ' : ' 186.68.0.0/15 ' ,
' EE ' : ' 90.190.0.0/15 ' ,
' EG ' : ' 156.160.0.0/11 ' ,
' ER ' : ' 196.200.96.0/20 ' ,
' ES ' : ' 88.0.0.0/11 ' ,
' ET ' : ' 196.188.0.0/14 ' ,
' EU ' : ' 2.16.0.0/13 ' ,
' FI ' : ' 91.152.0.0/13 ' ,
' FJ ' : ' 144.120.0.0/16 ' ,
2019-10-29 00:10:20 +01:00
' FK ' : ' 80.73.208.0/21 ' ,
2017-02-04 12:49:58 +01:00
' FM ' : ' 119.252.112.0/20 ' ,
' FO ' : ' 88.85.32.0/19 ' ,
' FR ' : ' 90.0.0.0/9 ' ,
' GA ' : ' 41.158.0.0/15 ' ,
' GB ' : ' 25.0.0.0/8 ' ,
' GD ' : ' 74.122.88.0/21 ' ,
' GE ' : ' 31.146.0.0/16 ' ,
' GF ' : ' 161.22.64.0/18 ' ,
' GG ' : ' 62.68.160.0/19 ' ,
2019-10-29 00:10:20 +01:00
' GH ' : ' 154.160.0.0/12 ' ,
' GI ' : ' 95.164.0.0/16 ' ,
2017-02-04 12:49:58 +01:00
' GL ' : ' 88.83.0.0/19 ' ,
' GM ' : ' 160.182.0.0/15 ' ,
' GN ' : ' 197.149.192.0/18 ' ,
' GP ' : ' 104.250.0.0/19 ' ,
' GQ ' : ' 105.235.224.0/20 ' ,
' GR ' : ' 94.64.0.0/13 ' ,
' GT ' : ' 168.234.0.0/16 ' ,
' GU ' : ' 168.123.0.0/16 ' ,
' GW ' : ' 197.214.80.0/20 ' ,
' GY ' : ' 181.41.64.0/18 ' ,
' HK ' : ' 113.252.0.0/14 ' ,
' HN ' : ' 181.210.0.0/16 ' ,
' HR ' : ' 93.136.0.0/13 ' ,
' HT ' : ' 148.102.128.0/17 ' ,
' HU ' : ' 84.0.0.0/14 ' ,
' ID ' : ' 39.192.0.0/10 ' ,
' IE ' : ' 87.32.0.0/12 ' ,
' IL ' : ' 79.176.0.0/13 ' ,
' IM ' : ' 5.62.80.0/20 ' ,
' IN ' : ' 117.192.0.0/10 ' ,
' IO ' : ' 203.83.48.0/21 ' ,
' IQ ' : ' 37.236.0.0/14 ' ,
' IR ' : ' 2.176.0.0/12 ' ,
' IS ' : ' 82.221.0.0/16 ' ,
' IT ' : ' 79.0.0.0/10 ' ,
' JE ' : ' 87.244.64.0/18 ' ,
' JM ' : ' 72.27.0.0/17 ' ,
' JO ' : ' 176.29.0.0/16 ' ,
2019-10-29 00:10:20 +01:00
' JP ' : ' 133.0.0.0/8 ' ,
2017-02-04 12:49:58 +01:00
' KE ' : ' 105.48.0.0/12 ' ,
' KG ' : ' 158.181.128.0/17 ' ,
' KH ' : ' 36.37.128.0/17 ' ,
' KI ' : ' 103.25.140.0/22 ' ,
' KM ' : ' 197.255.224.0/20 ' ,
2019-10-29 00:10:20 +01:00
' KN ' : ' 198.167.192.0/19 ' ,
2017-02-04 12:49:58 +01:00
' KP ' : ' 175.45.176.0/22 ' ,
' KR ' : ' 175.192.0.0/10 ' ,
' KW ' : ' 37.36.0.0/14 ' ,
' KY ' : ' 64.96.0.0/15 ' ,
' KZ ' : ' 2.72.0.0/13 ' ,
' LA ' : ' 115.84.64.0/18 ' ,
' LB ' : ' 178.135.0.0/16 ' ,
2019-10-29 00:10:20 +01:00
' LC ' : ' 24.92.144.0/20 ' ,
2017-02-04 12:49:58 +01:00
' LI ' : ' 82.117.0.0/19 ' ,
' LK ' : ' 112.134.0.0/15 ' ,
2019-10-29 00:10:20 +01:00
' LR ' : ' 102.183.0.0/16 ' ,
2017-02-04 12:49:58 +01:00
' LS ' : ' 129.232.0.0/17 ' ,
' LT ' : ' 78.56.0.0/13 ' ,
' LU ' : ' 188.42.0.0/16 ' ,
' LV ' : ' 46.109.0.0/16 ' ,
' LY ' : ' 41.252.0.0/14 ' ,
' MA ' : ' 105.128.0.0/11 ' ,
' MC ' : ' 88.209.64.0/18 ' ,
' MD ' : ' 37.246.0.0/16 ' ,
' ME ' : ' 178.175.0.0/17 ' ,
' MF ' : ' 74.112.232.0/21 ' ,
' MG ' : ' 154.126.0.0/17 ' ,
' MH ' : ' 117.103.88.0/21 ' ,
' MK ' : ' 77.28.0.0/15 ' ,
' ML ' : ' 154.118.128.0/18 ' ,
' MM ' : ' 37.111.0.0/17 ' ,
' MN ' : ' 49.0.128.0/17 ' ,
' MO ' : ' 60.246.0.0/16 ' ,
' MP ' : ' 202.88.64.0/20 ' ,
' MQ ' : ' 109.203.224.0/19 ' ,
' MR ' : ' 41.188.64.0/18 ' ,
' MS ' : ' 208.90.112.0/22 ' ,
' MT ' : ' 46.11.0.0/16 ' ,
' MU ' : ' 105.16.0.0/12 ' ,
' MV ' : ' 27.114.128.0/18 ' ,
2019-10-29 00:10:20 +01:00
' MW ' : ' 102.70.0.0/15 ' ,
2017-02-04 12:49:58 +01:00
' MX ' : ' 187.192.0.0/11 ' ,
' MY ' : ' 175.136.0.0/13 ' ,
' MZ ' : ' 197.218.0.0/15 ' ,
' NA ' : ' 41.182.0.0/16 ' ,
' NC ' : ' 101.101.0.0/18 ' ,
' NE ' : ' 197.214.0.0/18 ' ,
' NF ' : ' 203.17.240.0/22 ' ,
' NG ' : ' 105.112.0.0/12 ' ,
' NI ' : ' 186.76.0.0/15 ' ,
' NL ' : ' 145.96.0.0/11 ' ,
' NO ' : ' 84.208.0.0/13 ' ,
' NP ' : ' 36.252.0.0/15 ' ,
' NR ' : ' 203.98.224.0/19 ' ,
' NU ' : ' 49.156.48.0/22 ' ,
' NZ ' : ' 49.224.0.0/14 ' ,
' OM ' : ' 5.36.0.0/15 ' ,
' PA ' : ' 186.72.0.0/15 ' ,
' PE ' : ' 186.160.0.0/14 ' ,
' PF ' : ' 123.50.64.0/18 ' ,
' PG ' : ' 124.240.192.0/19 ' ,
' PH ' : ' 49.144.0.0/13 ' ,
' PK ' : ' 39.32.0.0/11 ' ,
' PL ' : ' 83.0.0.0/11 ' ,
' PM ' : ' 70.36.0.0/20 ' ,
' PR ' : ' 66.50.0.0/16 ' ,
' PS ' : ' 188.161.0.0/16 ' ,
' PT ' : ' 85.240.0.0/13 ' ,
' PW ' : ' 202.124.224.0/20 ' ,
' PY ' : ' 181.120.0.0/14 ' ,
' QA ' : ' 37.210.0.0/15 ' ,
2019-10-29 00:10:20 +01:00
' RE ' : ' 102.35.0.0/16 ' ,
2017-02-04 12:49:58 +01:00
' RO ' : ' 79.112.0.0/13 ' ,
2019-10-29 00:10:20 +01:00
' RS ' : ' 93.86.0.0/15 ' ,
2017-02-04 12:49:58 +01:00
' RU ' : ' 5.136.0.0/13 ' ,
2019-10-29 00:10:20 +01:00
' RW ' : ' 41.186.0.0/16 ' ,
2017-02-04 12:49:58 +01:00
' SA ' : ' 188.48.0.0/13 ' ,
' SB ' : ' 202.1.160.0/19 ' ,
' SC ' : ' 154.192.0.0/11 ' ,
2019-10-29 00:10:20 +01:00
' SD ' : ' 102.120.0.0/13 ' ,
2017-02-04 12:49:58 +01:00
' SE ' : ' 78.64.0.0/12 ' ,
2019-10-29 00:10:20 +01:00
' SG ' : ' 8.128.0.0/10 ' ,
2017-02-04 12:49:58 +01:00
' SI ' : ' 188.196.0.0/14 ' ,
' SK ' : ' 78.98.0.0/15 ' ,
2019-10-29 00:10:20 +01:00
' SL ' : ' 102.143.0.0/17 ' ,
2017-02-04 12:49:58 +01:00
' SM ' : ' 89.186.32.0/19 ' ,
' SN ' : ' 41.82.0.0/15 ' ,
2019-10-29 00:10:20 +01:00
' SO ' : ' 154.115.192.0/18 ' ,
2017-02-04 12:49:58 +01:00
' SR ' : ' 186.179.128.0/17 ' ,
' SS ' : ' 105.235.208.0/21 ' ,
' ST ' : ' 197.159.160.0/19 ' ,
' SV ' : ' 168.243.0.0/16 ' ,
' SX ' : ' 190.102.0.0/20 ' ,
' SY ' : ' 5.0.0.0/16 ' ,
' SZ ' : ' 41.84.224.0/19 ' ,
' TC ' : ' 65.255.48.0/20 ' ,
' TD ' : ' 154.68.128.0/19 ' ,
' TG ' : ' 196.168.0.0/14 ' ,
' TH ' : ' 171.96.0.0/13 ' ,
' TJ ' : ' 85.9.128.0/18 ' ,
' TK ' : ' 27.96.24.0/21 ' ,
' TL ' : ' 180.189.160.0/20 ' ,
' TM ' : ' 95.85.96.0/19 ' ,
' TN ' : ' 197.0.0.0/11 ' ,
' TO ' : ' 175.176.144.0/21 ' ,
' TR ' : ' 78.160.0.0/11 ' ,
' TT ' : ' 186.44.0.0/15 ' ,
' TV ' : ' 202.2.96.0/19 ' ,
' TW ' : ' 120.96.0.0/11 ' ,
' TZ ' : ' 156.156.0.0/14 ' ,
2019-10-29 00:10:20 +01:00
' UA ' : ' 37.52.0.0/14 ' ,
' UG ' : ' 102.80.0.0/13 ' ,
' US ' : ' 6.0.0.0/8 ' ,
2017-02-04 12:49:58 +01:00
' UY ' : ' 167.56.0.0/13 ' ,
2019-10-29 00:10:20 +01:00
' UZ ' : ' 84.54.64.0/18 ' ,
2017-02-04 12:49:58 +01:00
' VA ' : ' 212.77.0.0/19 ' ,
2019-10-29 00:10:20 +01:00
' VC ' : ' 207.191.240.0/21 ' ,
2017-02-04 12:49:58 +01:00
' VE ' : ' 186.88.0.0/13 ' ,
2019-10-29 00:10:20 +01:00
' VG ' : ' 66.81.192.0/20 ' ,
2017-02-04 12:49:58 +01:00
' VI ' : ' 146.226.0.0/16 ' ,
' VN ' : ' 14.160.0.0/11 ' ,
' VU ' : ' 202.80.32.0/20 ' ,
' WF ' : ' 117.20.32.0/21 ' ,
' WS ' : ' 202.4.32.0/19 ' ,
' YE ' : ' 134.35.0.0/16 ' ,
' YT ' : ' 41.242.116.0/22 ' ,
' ZA ' : ' 41.0.0.0/11 ' ,
2019-10-29 00:10:20 +01:00
' ZM ' : ' 102.144.0.0/13 ' ,
' ZW ' : ' 102.177.192.0/18 ' ,
2017-02-04 12:49:58 +01:00
}
@classmethod
2018-05-02 02:18:01 +02:00
def random_ipv4 ( cls , code_or_block ) :
if len ( code_or_block ) == 2 :
block = cls . _country_ip_map . get ( code_or_block . upper ( ) )
if not block :
return None
else :
block = code_or_block
2017-02-04 12:49:58 +01:00
addr , preflen = block . split ( ' / ' )
addr_min = compat_struct_unpack ( ' !L ' , socket . inet_aton ( addr ) ) [ 0 ]
addr_max = addr_min | ( 0xffffffff >> int ( preflen ) )
2017-02-04 14:26:43 +01:00
return compat_str ( socket . inet_ntoa (
2017-02-18 21:53:23 +01:00
compat_struct_pack ( ' !L ' , random . randint ( addr_min , addr_max ) ) ) )
2017-02-04 12:49:58 +01:00
2015-03-03 00:03:06 +01:00
class PerRequestProxyHandler ( compat_urllib_request . ProxyHandler ) :
2015-03-03 13:56:06 +01:00
def __init__ ( self , proxies = None ) :
# Set default handlers
for type in ( ' http ' , ' https ' ) :
setattr ( self , ' %s _open ' % type ,
lambda r , proxy = ' __noproxy__ ' , type = type , meth = self . proxy_open :
meth ( r , proxy , type ) )
2018-07-29 00:52:42 +02:00
compat_urllib_request . ProxyHandler . __init__ ( self , proxies )
2015-03-03 13:56:06 +01:00
2015-03-03 00:03:06 +01:00
def proxy_open ( self , req , proxy , type ) :
2015-03-03 13:56:06 +01:00
req_proxy = req . headers . get ( ' Ytdl-request-proxy ' )
2015-03-03 00:03:06 +01:00
if req_proxy is not None :
proxy = req_proxy
2015-03-03 13:56:06 +01:00
del req . headers [ ' Ytdl-request-proxy ' ]
if proxy == ' __noproxy__ ' :
return None # No Proxy
2016-05-03 09:15:32 +02:00
if compat_urlparse . urlparse ( proxy ) . scheme . lower ( ) in ( ' socks ' , ' socks4 ' , ' socks4a ' , ' socks5 ' ) :
2016-04-23 15:30:06 +02:00
req . add_header ( ' Ytdl-socks-proxy ' , proxy )
2021-02-24 19:45:56 +01:00
# yt-dlp's http/https handlers do wrapping the socket with socks
2016-04-23 15:30:06 +02:00
return None
2015-03-03 00:03:06 +01:00
return compat_urllib_request . ProxyHandler . proxy_open (
self , req , proxy , type )
2016-02-16 23:01:44 +01:00
2017-02-28 12:16:55 +01:00
# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
# released into Public Domain
# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
def long_to_bytes ( n , blocksize = 0 ) :
""" long_to_bytes(n:long, blocksize:int) : string
Convert a long integer to a byte string .
If optional blocksize is given and greater than zero , pad the front of the
byte string with binary zeros so that the length is a multiple of
blocksize .
"""
# after much testing, this algorithm was deemed to be the fastest
s = b ' '
n = int ( n )
while n > 0 :
s = compat_struct_pack ( ' >I ' , n & 0xffffffff ) + s
n = n >> 32
# strip off leading zeros
for i in range ( len ( s ) ) :
if s [ i ] != b ' \000 ' [ 0 ] :
break
else :
# only happens when n == 0
s = b ' \000 '
i = 0
s = s [ i : ]
# add back some pad bytes. this could be done more efficiently w.r.t. the
# de-padding being done above, but sigh...
if blocksize > 0 and len ( s ) % blocksize :
s = ( blocksize - len ( s ) % blocksize ) * b ' \000 ' + s
return s
def bytes_to_long ( s ) :
""" bytes_to_long(string) : long
Convert a byte string to a long integer .
This is ( essentially ) the inverse of long_to_bytes ( ) .
"""
acc = 0
length = len ( s )
if length % 4 :
extra = ( 4 - length % 4 )
s = b ' \000 ' * extra + s
length = length + extra
for i in range ( 0 , length , 4 ) :
acc = ( acc << 32 ) + compat_struct_unpack ( ' >I ' , s [ i : i + 4 ] ) [ 0 ]
return acc
2016-02-16 23:01:44 +01:00
def ohdave_rsa_encrypt ( data , exponent , modulus ) :
'''
Implement OHDave ' s RSA algorithm. See http://www.ohdave.com/rsa/
Input :
data : data to encrypt , bytes - like object
exponent , modulus : parameter e and N of RSA algorithm , both integer
Output : hex string of encrypted data
Limitation : supports one block encryption only
'''
payload = int ( binascii . hexlify ( data [ : : - 1 ] ) , 16 )
encrypted = pow ( payload , exponent , modulus )
return ' %x ' % encrypted
2016-02-24 15:08:40 +01:00
2017-02-27 11:50:19 +01:00
def pkcs1pad ( data , length ) :
"""
Padding input data with PKCS #1 scheme
@param { int [ ] } data input data
@param { int } length target length
@returns { int [ ] } padded data
"""
if len ( data ) > length - 11 :
raise ValueError ( ' Input data too long for PKCS#1 padding ' )
pseudo_random = [ random . randint ( 0 , 254 ) for _ in range ( length - len ( data ) - 3 ) ]
return [ 0 , 2 ] + pseudo_random + [ 0 ] + data
2016-02-26 20:19:50 +01:00
def encode_base_n ( num , n , table = None ) :
2016-02-26 07:37:20 +01:00
FULL_TABLE = ' 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ '
if not table :
table = FULL_TABLE [ : n ]
2016-02-26 20:19:50 +01:00
if n > len ( table ) :
raise ValueError ( ' base %d exceeds table length %d ' % ( n , len ( table ) ) )
if num == 0 :
return table [ 0 ]
2016-02-24 15:08:40 +01:00
ret = ' '
while num :
ret = table [ num % n ] + ret
num = num / / n
return ret
2016-02-26 07:58:29 +01:00
def decode_packed_codes ( code ) :
2016-10-19 18:28:49 +02:00
mobj = re . search ( PACKED_CODES_RE , code )
2020-11-21 15:50:42 +01:00
obfuscated_code , base , count , symbols = mobj . groups ( )
2016-02-26 07:58:29 +01:00
base = int ( base )
count = int ( count )
symbols = symbols . split ( ' | ' )
symbol_table = { }
while count :
count - = 1
2016-02-26 20:19:50 +01:00
base_n_count = encode_base_n ( count , base )
2016-02-26 07:58:29 +01:00
symbol_table [ base_n_count ] = symbols [ count ] or base_n_count
return re . sub (
r ' \ b( \ w+) \ b ' , lambda mobj : symbol_table [ mobj . group ( 0 ) ] ,
2020-11-21 15:50:42 +01:00
obfuscated_code )
2016-01-10 20:09:53 +01:00
2019-11-26 20:26:42 +01:00
def caesar ( s , alphabet , shift ) :
if shift == 0 :
return s
l = len ( alphabet )
return ' ' . join (
alphabet [ ( alphabet . index ( c ) + shift ) % l ] if c in alphabet else c
for c in s )
def rot47 ( s ) :
return caesar ( s , r ''' ! " #$ % & ' ()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[ \ ]^_`abcdefghijklmnopqrstuvwxyz { |}~ ''' , 47 )
2016-01-10 20:09:53 +01:00
def parse_m3u8_attributes ( attrib ) :
info = { }
for ( key , val ) in re . findall ( r ' (?P<key>[A-Z0-9-]+)=(?P<val> " [^ " ]+ " |[^ " ,]+)(?:,|$) ' , attrib ) :
if val . startswith ( ' " ' ) :
val = val [ 1 : - 1 ]
info [ key ] = val
return info
2016-06-26 09:16:49 +02:00
def urshift ( val , n ) :
return val >> n if val > = 0 else ( val + 0x100000000 ) >> n
2016-08-06 20:42:58 +02:00
# Based on png2str() written by @gdkchan and improved by @yokrysty
2019-03-09 13:14:41 +01:00
# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
2016-08-06 20:42:58 +02:00
def decode_png ( png_data ) :
# Reference: https://www.w3.org/TR/PNG/
header = png_data [ 8 : ]
if png_data [ : 8 ] != b ' \x89 PNG \x0d \x0a \x1a \x0a ' or header [ 4 : 8 ] != b ' IHDR ' :
raise IOError ( ' Not a valid PNG file. ' )
int_map = { 1 : ' >B ' , 2 : ' >H ' , 4 : ' >I ' }
unpack_integer = lambda x : compat_struct_unpack ( int_map [ len ( x ) ] , x ) [ 0 ]
chunks = [ ]
while header :
length = unpack_integer ( header [ : 4 ] )
header = header [ 4 : ]
chunk_type = header [ : 4 ]
header = header [ 4 : ]
chunk_data = header [ : length ]
header = header [ length : ]
header = header [ 4 : ] # Skip CRC
chunks . append ( {
' type ' : chunk_type ,
' length ' : length ,
' data ' : chunk_data
} )
ihdr = chunks [ 0 ] [ ' data ' ]
width = unpack_integer ( ihdr [ : 4 ] )
height = unpack_integer ( ihdr [ 4 : 8 ] )
idat = b ' '
for chunk in chunks :
if chunk [ ' type ' ] == b ' IDAT ' :
idat + = chunk [ ' data ' ]
if not idat :
raise IOError ( ' Unable to read PNG data. ' )
decompressed_data = bytearray ( zlib . decompress ( idat ) )
stride = width * 3
pixels = [ ]
def _get_pixel ( idx ) :
x = idx % stride
y = idx / / stride
return pixels [ y ] [ x ]
for y in range ( height ) :
basePos = y * ( 1 + stride )
filter_type = decompressed_data [ basePos ]
current_row = [ ]
pixels . append ( current_row )
for x in range ( stride ) :
color = decompressed_data [ 1 + basePos + x ]
basex = y * stride + x
left = 0
up = 0
if x > 2 :
left = _get_pixel ( basex - 3 )
if y > 0 :
up = _get_pixel ( basex - stride )
if filter_type == 1 : # Sub
color = ( color + left ) & 0xff
elif filter_type == 2 : # Up
color = ( color + up ) & 0xff
elif filter_type == 3 : # Average
color = ( color + ( ( left + up ) >> 1 ) ) & 0xff
elif filter_type == 4 : # Paeth
a = left
b = up
c = 0
if x > 2 and y > 0 :
c = _get_pixel ( basex - stride - 3 )
p = a + b - c
pa = abs ( p - a )
pb = abs ( p - b )
pc = abs ( p - c )
if pa < = pb and pa < = pc :
color = ( color + a ) & 0xff
elif pb < = pc :
color = ( color + b ) & 0xff
else :
color = ( color + c ) & 0xff
current_row . append ( color )
return width , height , pixels
2016-09-29 18:28:32 +02:00
def write_xattr ( path , key , value ) :
# This mess below finds the best xattr tool for the job
try :
# try the pyxattr module...
import xattr
2016-10-01 14:13:04 +02:00
if hasattr ( xattr , ' set ' ) : # pyxattr
# Unicode arguments are not supported in python-pyxattr until
# version 0.5.0
2019-03-09 13:14:41 +01:00
# See https://github.com/ytdl-org/youtube-dl/issues/5498
2016-10-01 14:13:04 +02:00
pyxattr_required_version = ' 0.5.0 '
if version_tuple ( xattr . __version__ ) < version_tuple ( pyxattr_required_version ) :
# TODO: fallback to CLI tools
raise XAttrUnavailableError (
' python-pyxattr is detected but is too old. '
2021-02-24 19:45:56 +01:00
' yt-dlp requires %s or above while your version is %s . '
2016-10-01 14:13:04 +02:00
' Falling back to other xattr implementations ' % (
pyxattr_required_version , xattr . __version__ ) )
setxattr = xattr . set
else : # xattr
setxattr = xattr . setxattr
2016-09-29 18:28:32 +02:00
try :
2016-10-01 14:13:04 +02:00
setxattr ( path , key , value )
2016-09-29 18:28:32 +02:00
except EnvironmentError as e :
raise XAttrMetadataError ( e . errno , e . strerror )
except ImportError :
if compat_os_name == ' nt ' :
# Write xattrs to NTFS Alternate Data Streams:
# http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
assert ' : ' not in key
assert os . path . exists ( path )
ads_fn = path + ' : ' + key
try :
with open ( ads_fn , ' wb ' ) as f :
f . write ( value )
except EnvironmentError as e :
raise XAttrMetadataError ( e . errno , e . strerror )
else :
user_has_setfattr = check_executable ( ' setfattr ' , [ ' --version ' ] )
user_has_xattr = check_executable ( ' xattr ' , [ ' -h ' ] )
if user_has_setfattr or user_has_xattr :
value = value . decode ( ' utf-8 ' )
if user_has_setfattr :
executable = ' setfattr '
opts = [ ' -n ' , key , ' -v ' , value ]
elif user_has_xattr :
executable = ' xattr '
opts = [ ' -w ' , key , value ]
2019-05-10 22:56:22 +02:00
cmd = ( [ encodeFilename ( executable , True ) ]
+ [ encodeArgument ( o ) for o in opts ]
+ [ encodeFilename ( path , True ) ] )
2016-09-29 18:28:32 +02:00
try :
p = subprocess . Popen (
cmd , stdout = subprocess . PIPE , stderr = subprocess . PIPE , stdin = subprocess . PIPE )
except EnvironmentError as e :
raise XAttrMetadataError ( e . errno , e . strerror )
2021-01-09 13:26:12 +01:00
stdout , stderr = process_communicate_or_kill ( p )
2016-09-29 18:28:32 +02:00
stderr = stderr . decode ( ' utf-8 ' , ' replace ' )
if p . returncode != 0 :
raise XAttrMetadataError ( p . returncode , stderr )
else :
# On Unix, and can't find pyxattr, setfattr, or xattr.
if sys . platform . startswith ( ' linux ' ) :
raise XAttrUnavailableError (
" Couldn ' t find a tool to set the xattrs. "
" Install either the python ' pyxattr ' or ' xattr ' "
" modules, or the GNU ' attr ' package "
" (which contains the ' setfattr ' tool). " )
else :
raise XAttrUnavailableError (
" Couldn ' t find a tool to set the xattrs. "
" Install either the python ' xattr ' module, "
" or the ' xattr ' binary. " )
2017-05-01 17:09:18 +02:00
def random_birthday ( year_field , month_field , day_field ) :
2018-12-01 18:05:15 +01:00
start_date = datetime . date ( 1950 , 1 , 1 )
end_date = datetime . date ( 1995 , 12 , 31 )
offset = random . randint ( 0 , ( end_date - start_date ) . days )
random_date = start_date + datetime . timedelta ( offset )
2017-05-01 17:09:18 +02:00
return {
2018-12-01 18:05:15 +01:00
year_field : str ( random_date . year ) ,
month_field : str ( random_date . month ) ,
day_field : str ( random_date . day ) ,
2017-05-01 17:09:18 +02:00
}
2020-10-27 11:37:21 +01:00
2021-01-07 07:41:05 +01:00
2020-10-27 11:37:21 +01:00
# Templates for internet shortcut files, which are plain text files.
DOT_URL_LINK_TEMPLATE = '''
[ InternetShortcut ]
URL = % ( url ) s
''' .lstrip()
DOT_WEBLOC_LINK_TEMPLATE = '''
< ? xml version = " 1.0 " encoding = " UTF-8 " ? >
< ! DOCTYPE plist PUBLIC " -//Apple//DTD PLIST 1.0//EN " " http://www.apple.com/DTDs/PropertyList-1.0.dtd " >
< plist version = " 1.0 " >
< dict >
\t < key > URL < / key >
\t < string > % ( url ) s < / string >
< / dict >
< / plist >
''' .lstrip()
DOT_DESKTOP_LINK_TEMPLATE = '''
[ Desktop Entry ]
Encoding = UTF - 8
Name = % ( filename ) s
Type = Link
URL = % ( url ) s
Icon = text - html
''' .lstrip()
def iri_to_uri ( iri ) :
"""
Converts an IRI ( Internationalized Resource Identifier , allowing Unicode characters ) to a URI ( Uniform Resource Identifier , ASCII - only ) .
The function doesn ' t add an additional layer of escaping; e.g., it doesn ' t escape ` % 3 C ` as ` % 253 C ` . Instead , it percent - escapes characters with an underlying UTF - 8 encoding * besides * those already escaped , leaving the URI intact .
"""
iri_parts = compat_urllib_parse_urlparse ( iri )
if ' [ ' in iri_parts . netloc :
raise ValueError ( ' IPv6 URIs are not, yet, supported. ' )
# Querying `.netloc`, when there's only one bracket, also raises a ValueError.
# The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
net_location = ' '
if iri_parts . username :
net_location + = compat_urllib_parse_quote ( iri_parts . username , safe = r " !$ % & ' ()*+,~ " )
if iri_parts . password is not None :
net_location + = ' : ' + compat_urllib_parse_quote ( iri_parts . password , safe = r " !$ % & ' ()*+,~ " )
net_location + = ' @ '
net_location + = iri_parts . hostname . encode ( ' idna ' ) . decode ( ' utf-8 ' ) # Punycode for Unicode hostnames.
# The 'idna' encoding produces ASCII text.
if iri_parts . port is not None and iri_parts . port != 80 :
net_location + = ' : ' + str ( iri_parts . port )
return compat_urllib_parse_urlunparse (
( iri_parts . scheme ,
net_location ,
compat_urllib_parse_quote_plus ( iri_parts . path , safe = r " !$ % & ' ()*+,/:;=@|~ " ) ,
# Unsure about the `safe` argument, since this is a legacy way of handling parameters.
compat_urllib_parse_quote_plus ( iri_parts . params , safe = r " !$ % & ' ()*+,/:;=@|~ " ) ,
# Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
compat_urllib_parse_quote_plus ( iri_parts . query , safe = r " !$ % & ' ()*+,/:;=?@ { |}~ " ) ,
compat_urllib_parse_quote_plus ( iri_parts . fragment , safe = r " !#$ % & ' ()*+,/:;=?@ { |}~ " ) ) )
# Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
def to_high_limit_path ( path ) :
if sys . platform in [ ' win32 ' , ' cygwin ' ] :
# Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
return r ' \\ ? \ ' . rstrip ( ) + os . path . abspath ( path )
return path
2020-12-13 15:29:09 +01:00
2021-01-07 07:41:05 +01:00
2020-12-13 15:29:09 +01:00
def format_field ( obj , field , template = ' %s ' , ignore = ( None , ' ' ) , default = ' ' , func = None ) :
val = obj . get ( field , default )
if func and val not in ignore :
val = func ( val )
return template % val if val not in ignore else default
2021-01-08 17:14:50 +01:00
def clean_podcast_url ( url ) :
return re . sub ( r ''' (?x)
( ? :
( ? :
chtbl \. com / track |
media \. blubrry \. com | # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
play \. podtrac \. com
) / [ ^ / ] + |
( ? : dts | www ) \. podtrac \. com / ( ? : pts / ) ? redirect \. [ 0 - 9 a - z ] { 3 , 4 } | # http://analytics.podtrac.com/how-to-measure
flex \. acast \. com |
pd ( ? :
cn \. co | # https://podcorn.com/analytics-prefix/
st \. fm # https://podsights.com/docs/
) / e
) / ''' , ' ' , url)
2021-01-22 14:43:30 +01:00
_HEX_TABLE = ' 0123456789abcdef '
def random_uuidv4 ( ) :
return re . sub ( r ' [xy] ' , lambda x : _HEX_TABLE [ random . randint ( 0 , 15 ) ] , ' xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx ' )
2021-01-23 13:18:12 +01:00
def make_dir ( path , to_screen = None ) :
try :
dn = os . path . dirname ( path )
if dn and not os . path . exists ( dn ) :
os . makedirs ( dn )
return True
except ( OSError , IOError ) as err :
if callable ( to_screen ) is not None :
to_screen ( ' unable to create directory ' + error_to_compat_str ( err ) )
return False
2021-01-24 14:40:02 +01:00
def get_executable_path ( ) :
2021-02-25 23:58:02 +01:00
from zipimport import zipimporter
if hasattr ( sys , ' frozen ' ) : # Running from PyInstaller
path = os . path . dirname ( sys . executable )
elif isinstance ( globals ( ) . get ( ' __loader__ ' ) , zipimporter ) : # Running from ZIP
path = os . path . join ( os . path . dirname ( __file__ ) , ' ../.. ' )
else :
path = os . path . join ( os . path . dirname ( __file__ ) , ' .. ' )
2021-01-24 14:40:02 +01:00
return os . path . abspath ( path )
2021-05-08 17:15:14 +02:00
def load_plugins ( name , suffix , namespace ) :
2021-01-24 14:40:02 +01:00
plugin_info = [ None ]
classes = [ ]
try :
plugin_info = imp . find_module (
name , [ os . path . join ( get_executable_path ( ) , ' ytdlp_plugins ' ) ] )
plugins = imp . load_module ( name , * plugin_info )
for name in dir ( plugins ) :
2021-05-08 17:15:14 +02:00
if name in namespace :
continue
if not name . endswith ( suffix ) :
2021-01-24 14:40:02 +01:00
continue
klass = getattr ( plugins , name )
classes . append ( klass )
namespace [ name ] = klass
except ImportError :
pass
finally :
if plugin_info [ 0 ] is not None :
plugin_info [ 0 ] . close ( )
return classes
2021-01-27 16:02:51 +01:00
2021-07-11 00:14:39 +02:00
def traverse_obj (
[utils] Improve `traverse_obj`
* Allow skipping a level: `traverse_obj([{k:v1}, {k:v2}], (None, k))` => `[v1, v2]`
* Make keys variadic: `traverse_obj(obj, k1: str, k2: str)` => `traverse_obj(obj, (k1,), (k2,))`
* Fetch from multiple keys: `traverse_obj([{k1:[1], k2:[2], k3:[3]}], (0, (k1, k2), 0))` => `[1, 2]`
TODO: Add tests
2021-07-15 16:52:49 +02:00
obj , * path_list , default = None , expected_type = None ,
2021-07-11 00:14:39 +02:00
casesense = True , is_user_input = False , traverse_string = False ) :
2021-06-08 10:53:56 +02:00
''' Traverse nested list/dict/tuple
[utils] Improve `traverse_obj`
* Allow skipping a level: `traverse_obj([{k:v1}, {k:v2}], (None, k))` => `[v1, v2]`
* Make keys variadic: `traverse_obj(obj, k1: str, k2: str)` => `traverse_obj(obj, (k1,), (k2,))`
* Fetch from multiple keys: `traverse_obj([{k1:[1], k2:[2], k3:[3]}], (0, (k1, k2), 0))` => `[1, 2]`
TODO: Add tests
2021-07-15 16:52:49 +02:00
@param path_list A list of paths which are checked one by one .
Each path is a list of keys where each key is a string ,
a tuple of strings or " ... " . When a tuple is given ,
all the keys given in the tuple are traversed , and
" ... " traverses all the keys in the object
2021-07-11 00:14:39 +02:00
@param default Default value to return
@param expected_type Only accept final value of this type
2021-06-08 10:53:56 +02:00
@param casesense Whether to consider dictionary keys as case sensitive
@param is_user_input Whether the keys are generated from user input . If True ,
strings are converted to int / slice if necessary
@param traverse_string Whether to traverse inside strings . If True , any
non - compatible object will also be converted into a string
[utils] Improve `traverse_obj`
* Allow skipping a level: `traverse_obj([{k:v1}, {k:v2}], (None, k))` => `[v1, v2]`
* Make keys variadic: `traverse_obj(obj, k1: str, k2: str)` => `traverse_obj(obj, (k1,), (k2,))`
* Fetch from multiple keys: `traverse_obj([{k1:[1], k2:[2], k3:[3]}], (0, (k1, k2), 0))` => `[1, 2]`
TODO: Add tests
2021-07-15 16:52:49 +02:00
# TODO: Write tests
2021-06-08 10:53:56 +02:00
'''
2021-07-11 00:14:39 +02:00
if not casesense :
_lower = lambda k : k . lower ( ) if isinstance ( k , str ) else k
[utils] Improve `traverse_obj`
* Allow skipping a level: `traverse_obj([{k:v1}, {k:v2}], (None, k))` => `[v1, v2]`
* Make keys variadic: `traverse_obj(obj, k1: str, k2: str)` => `traverse_obj(obj, (k1,), (k2,))`
* Fetch from multiple keys: `traverse_obj([{k1:[1], k2:[2], k3:[3]}], (0, (k1, k2), 0))` => `[1, 2]`
TODO: Add tests
2021-07-15 16:52:49 +02:00
path_list = ( map ( _lower , variadic ( path ) ) for path in path_list )
def _traverse_obj ( obj , path , _current_depth = 0 ) :
nonlocal depth
path = tuple ( variadic ( path ) )
for i , key in enumerate ( path ) :
if isinstance ( key , ( list , tuple ) ) :
obj = [ _traverse_obj ( obj , sub_key , _current_depth ) for sub_key in key ]
key = . . .
if key is . . . :
obj = ( obj . values ( ) if isinstance ( obj , dict )
else obj if isinstance ( obj , ( list , tuple , LazyList ) )
else str ( obj ) if traverse_string else [ ] )
_current_depth + = 1
depth = max ( depth , _current_depth )
return [ _traverse_obj ( inner_obj , path [ i + 1 : ] , _current_depth ) for inner_obj in obj ]
elif isinstance ( obj , dict ) :
2021-07-11 00:14:39 +02:00
obj = ( obj . get ( key ) if casesense or ( key in obj )
else next ( ( v for k , v in obj . items ( ) if _lower ( k ) == key ) , None ) )
else :
if is_user_input :
key = ( int_or_none ( key ) if ' : ' not in key
else slice ( * map ( int_or_none , key . split ( ' : ' ) ) ) )
[utils] Improve `traverse_obj`
* Allow skipping a level: `traverse_obj([{k:v1}, {k:v2}], (None, k))` => `[v1, v2]`
* Make keys variadic: `traverse_obj(obj, k1: str, k2: str)` => `traverse_obj(obj, (k1,), (k2,))`
* Fetch from multiple keys: `traverse_obj([{k1:[1], k2:[2], k3:[3]}], (0, (k1, k2), 0))` => `[1, 2]`
TODO: Add tests
2021-07-15 16:52:49 +02:00
if key == slice ( None ) :
return _traverse_obj ( obj , ( . . . , * path [ i + 1 : ] ) )
2021-07-11 00:14:39 +02:00
if not isinstance ( key , ( int , slice ) ) :
2021-06-16 22:45:57 +02:00
return None
[utils] Improve `traverse_obj`
* Allow skipping a level: `traverse_obj([{k:v1}, {k:v2}], (None, k))` => `[v1, v2]`
* Make keys variadic: `traverse_obj(obj, k1: str, k2: str)` => `traverse_obj(obj, (k1,), (k2,))`
* Fetch from multiple keys: `traverse_obj([{k1:[1], k2:[2], k3:[3]}], (0, (k1, k2), 0))` => `[1, 2]`
TODO: Add tests
2021-07-15 16:52:49 +02:00
if not isinstance ( obj , ( list , tuple , LazyList ) ) :
2021-07-11 00:14:39 +02:00
if not traverse_string :
return None
obj = str ( obj )
try :
obj = obj [ key ]
except IndexError :
2021-06-08 10:53:56 +02:00
return None
2021-07-11 00:14:39 +02:00
return obj
[utils] Improve `traverse_obj`
* Allow skipping a level: `traverse_obj([{k:v1}, {k:v2}], (None, k))` => `[v1, v2]`
* Make keys variadic: `traverse_obj(obj, k1: str, k2: str)` => `traverse_obj(obj, (k1,), (k2,))`
* Fetch from multiple keys: `traverse_obj([{k1:[1], k2:[2], k3:[3]}], (0, (k1, k2), 0))` => `[1, 2]`
TODO: Add tests
2021-07-15 16:52:49 +02:00
for path in path_list :
depth = 0
val = _traverse_obj ( obj , path )
2021-07-11 00:14:39 +02:00
if val is not None :
[utils] Improve `traverse_obj`
* Allow skipping a level: `traverse_obj([{k:v1}, {k:v2}], (None, k))` => `[v1, v2]`
* Make keys variadic: `traverse_obj(obj, k1: str, k2: str)` => `traverse_obj(obj, (k1,), (k2,))`
* Fetch from multiple keys: `traverse_obj([{k1:[1], k2:[2], k3:[3]}], (0, (k1, k2), 0))` => `[1, 2]`
TODO: Add tests
2021-07-15 16:52:49 +02:00
if depth :
for _ in range ( depth - 1 ) :
2021-07-20 19:46:38 +02:00
val = itertools . chain . from_iterable ( v for v in val if v is not None )
val = ( [ v for v in val if v is not None ] if expected_type is None
[utils] Improve `traverse_obj`
* Allow skipping a level: `traverse_obj([{k:v1}, {k:v2}], (None, k))` => `[v1, v2]`
* Make keys variadic: `traverse_obj(obj, k1: str, k2: str)` => `traverse_obj(obj, (k1,), (k2,))`
* Fetch from multiple keys: `traverse_obj([{k1:[1], k2:[2], k3:[3]}], (0, (k1, k2), 0))` => `[1, 2]`
TODO: Add tests
2021-07-15 16:52:49 +02:00
else [ v for v in val if isinstance ( v , expected_type ) ] )
if val :
return val
elif expected_type is None or isinstance ( val , expected_type ) :
2021-07-11 00:14:39 +02:00
return val
return default
2021-06-08 10:53:56 +02:00
def traverse_dict ( dictn , keys , casesense = True ) :
''' For backward compatibility. Do not use '''
return traverse_obj ( dictn , keys , casesense = casesense ,
is_user_input = True , traverse_string = True )
2021-07-10 23:59:44 +02:00
2021-07-19 22:51:55 +02:00
def variadic ( x , allowed_types = ( str , bytes ) ) :
2021-07-10 23:59:44 +02:00
return x if isinstance ( x , collections . Iterable ) and not isinstance ( x , allowed_types ) else ( x , )