mirror of
https://github.com/yt-dlp/yt-dlp
synced 2024-11-16 07:48:01 +01:00
[openload] Improve ext extraction
This commit is contained in:
parent
926d97fc6b
commit
85750f8972
3 changed files with 7 additions and 3 deletions
|
@ -361,6 +361,7 @@ class TestUtil(unittest.TestCase):
|
||||||
self.assertEqual(determine_ext('http://example.com/foo/bar.nonext/?download', None), None)
|
self.assertEqual(determine_ext('http://example.com/foo/bar.nonext/?download', None), None)
|
||||||
self.assertEqual(determine_ext('http://example.com/foo/bar/mp4?download', None), None)
|
self.assertEqual(determine_ext('http://example.com/foo/bar/mp4?download', None), None)
|
||||||
self.assertEqual(determine_ext('http://example.com/foo/bar.m3u8//?download'), 'm3u8')
|
self.assertEqual(determine_ext('http://example.com/foo/bar.m3u8//?download'), 'm3u8')
|
||||||
|
self.assertEqual(determine_ext('foobar', None), None)
|
||||||
|
|
||||||
def test_find_xpath_attr(self):
|
def test_find_xpath_attr(self):
|
||||||
testxml = '''<root>
|
testxml = '''<root>
|
||||||
|
|
|
@ -307,6 +307,10 @@ class OpenloadIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://oload.download/f/kUEfGclsU9o',
|
'url': 'https://oload.download/f/kUEfGclsU9o',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# Its title has not got its extension but url has it
|
||||||
|
'url': 'https://oload.download/f/N4Otkw39VCw/Tomb.Raider.2018.HDRip.XviD.AC3-EVO.avi.mp4',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
|
_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
|
||||||
|
@ -368,8 +372,7 @@ class OpenloadIE(InfoExtractor):
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None),
|
'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None),
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
# Seems all videos have extensions in their titles
|
'ext': determine_ext(title, None) or determine_ext(url, 'mp4'),
|
||||||
'ext': determine_ext(title, 'mp4'),
|
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'http_headers': headers,
|
'http_headers': headers,
|
||||||
}
|
}
|
||||||
|
|
|
@ -1228,7 +1228,7 @@ def unified_timestamp(date_str, day_first=True):
|
||||||
|
|
||||||
|
|
||||||
def determine_ext(url, default_ext='unknown_video'):
|
def determine_ext(url, default_ext='unknown_video'):
|
||||||
if url is None:
|
if url is None or '.' not in url:
|
||||||
return default_ext
|
return default_ext
|
||||||
guess = url.partition('?')[0].rpartition('.')[2]
|
guess = url.partition('?')[0].rpartition('.')[2]
|
||||||
if re.match(r'^[A-Za-z0-9]+$', guess):
|
if re.match(r'^[A-Za-z0-9]+$', guess):
|
||||||
|
|
Loading…
Reference in a new issue