mirror of
https://github.com/yt-dlp/yt-dlp
synced 2025-01-21 19:27:37 +01:00
[xminus] Simplify and extend (#4302)
This commit is contained in:
parent
c3e74731c2
commit
be64b5b098
3 changed files with 95 additions and 20 deletions
|
@ -47,6 +47,7 @@ from youtube_dl.utils import (
|
|||
js_to_json,
|
||||
intlist_to_bytes,
|
||||
args_to_str,
|
||||
parse_filesize,
|
||||
)
|
||||
|
||||
|
||||
|
@ -367,5 +368,14 @@ class TestUtil(unittest.TestCase):
|
|||
'foo ba/r -baz \'2 be\' \'\''
|
||||
)
|
||||
|
||||
def test_parse_filesize(self):
|
||||
self.assertEqual(parse_filesize(None), None)
|
||||
self.assertEqual(parse_filesize(''), None)
|
||||
self.assertEqual(parse_filesize('91 B'), 91)
|
||||
self.assertEqual(parse_filesize('foobar'), None)
|
||||
self.assertEqual(parse_filesize('2 MiB'), 2097152)
|
||||
self.assertEqual(parse_filesize('5 GB'), 5000000000)
|
||||
self.assertEqual(parse_filesize('1.2Tb'), 1200000000000)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
@ -2,7 +2,14 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..compat import (
|
||||
compat_chr,
|
||||
compat_ord,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_filesize,
|
||||
)
|
||||
|
||||
|
||||
class XMinusIE(InfoExtractor):
|
||||
|
@ -15,39 +22,46 @@ class XMinusIE(InfoExtractor):
|
|||
'ext': 'mp3',
|
||||
'title': 'Леонид Агутин-Песенка шофера',
|
||||
'duration': 156,
|
||||
'tbr': 320,
|
||||
'filesize_approx': 5900000,
|
||||
'view_count': int,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
# TODO more code goes here, for example ...
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
artist = self._html_search_regex(
|
||||
r'minus_track.artist="(.+?)"', webpage, 'artist')
|
||||
r'minus_track\.artist="(.+?)"', webpage, 'artist')
|
||||
title = artist + '-' + self._html_search_regex(
|
||||
r'minus_track.title="(.+?)"', webpage, 'title')
|
||||
r'minus_track\.title="(.+?)"', webpage, 'title')
|
||||
duration = int_or_none(self._html_search_regex(
|
||||
r'minus_track.dur_sec=\'([0-9]+?)\'', webpage, 'duration'))
|
||||
r'minus_track\.dur_sec=\'([0-9]*?)\'',
|
||||
webpage, 'duration', fatal=False))
|
||||
filesize_approx = parse_filesize(self._html_search_regex(
|
||||
r'<div class="filesize[^"]*"></div>\s*([0-9.]+\s*[a-zA-Z][bB])',
|
||||
webpage, 'approximate filesize', fatal=False))
|
||||
tbr = int_or_none(self._html_search_regex(
|
||||
r'<div class="quality[^"]*"></div>\s*([0-9]+)\s*kbps',
|
||||
webpage, 'bitrate', fatal=False))
|
||||
view_count = int_or_none(self._html_search_regex(
|
||||
r'<div class="quality.*?► ([0-9]+)',
|
||||
webpage, 'view count', fatal=False))
|
||||
|
||||
enc_token = self._html_search_regex(
|
||||
r'data-mt="(.*?)"', webpage, 'enc_token')
|
||||
token = self._decode_token(enc_token)
|
||||
url = 'http://x-minus.org/dwlf/{}/{}.mp3'.format(video_id, token)
|
||||
token = ''.join(
|
||||
c if pos == 3 else compat_chr(compat_ord(c) - 1)
|
||||
for pos, c in enumerate(reversed(enc_token)))
|
||||
video_url = 'http://x-minus.org/dwlf/%s/%s.mp3' % (video_id, token)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'url': url,
|
||||
'url': video_url,
|
||||
'duration': duration,
|
||||
'filesize_approx': filesize_approx,
|
||||
'tbr': tbr,
|
||||
'view_count': view_count,
|
||||
}
|
||||
|
||||
def _decode_token(self, enc_token):
|
||||
token = ''
|
||||
pos = 0
|
||||
for c in reversed(enc_token):
|
||||
if pos != 3:
|
||||
token += chr(ord(c) - 1)
|
||||
else:
|
||||
token += c
|
||||
pos += 1
|
||||
return token
|
||||
|
|
|
@ -1046,6 +1046,57 @@ def format_bytes(bytes):
|
|||
return '%.2f%s' % (converted, suffix)
|
||||
|
||||
|
||||
def parse_filesize(s):
|
||||
if s is None:
|
||||
return None
|
||||
|
||||
# The lower-case forms are of course incorrect and inofficial,
|
||||
# but we support those too
|
||||
_UNIT_TABLE = {
|
||||
'B': 1,
|
||||
'b': 1,
|
||||
'KiB': 1024,
|
||||
'KB': 1000,
|
||||
'kB': 1024,
|
||||
'Kb': 1000,
|
||||
'MiB': 1024 ** 2,
|
||||
'MB': 1000 ** 2,
|
||||
'mB': 1024 ** 2,
|
||||
'Mb': 1000 ** 2,
|
||||
'GiB': 1024 ** 3,
|
||||
'GB': 1000 ** 3,
|
||||
'gB': 1024 ** 3,
|
||||
'Gb': 1000 ** 3,
|
||||
'TiB': 1024 ** 4,
|
||||
'TB': 1000 ** 4,
|
||||
'tB': 1024 ** 4,
|
||||
'Tb': 1000 ** 4,
|
||||
'PiB': 1024 ** 5,
|
||||
'PB': 1000 ** 5,
|
||||
'pB': 1024 ** 5,
|
||||
'Pb': 1000 ** 5,
|
||||
'EiB': 1024 ** 6,
|
||||
'EB': 1000 ** 6,
|
||||
'eB': 1024 ** 6,
|
||||
'Eb': 1000 ** 6,
|
||||
'ZiB': 1024 ** 7,
|
||||
'ZB': 1000 ** 7,
|
||||
'zB': 1024 ** 7,
|
||||
'Zb': 1000 ** 7,
|
||||
'YiB': 1024 ** 8,
|
||||
'YB': 1000 ** 8,
|
||||
'yB': 1024 ** 8,
|
||||
'Yb': 1000 ** 8,
|
||||
}
|
||||
|
||||
units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE)
|
||||
m = re.match(r'(?P<num>[0-9]+(?:\.[0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
|
||||
if not m:
|
||||
return None
|
||||
|
||||
return int(float(m.group('num')) * _UNIT_TABLE[m.group('unit')])
|
||||
|
||||
|
||||
def get_term_width():
|
||||
columns = compat_getenv('COLUMNS', None)
|
||||
if columns:
|
||||
|
|
Loading…
Reference in a new issue