mirror of
https://github.com/yt-dlp/yt-dlp
synced 2025-01-04 11:01:34 +01:00
Merge branch 'master' of github.com:rg3/youtube-dl
This commit is contained in:
commit
a27a2470cd
6 changed files with 92 additions and 15 deletions
|
@ -21,14 +21,15 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||||
self.assertEqual(self.matching_ies(url), ie_list)
|
self.assertEqual(self.matching_ies(url), ie_list)
|
||||||
|
|
||||||
def test_youtube_playlist_matching(self):
|
def test_youtube_playlist_matching(self):
|
||||||
self.assertTrue(YoutubePlaylistIE.suitable(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8'))
|
assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist'])
|
||||||
self.assertTrue(YoutubePlaylistIE.suitable(u'UUBABnxM4Ar9ten8Mdjj1j0Q')) #585
|
assertPlaylist(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||||
self.assertTrue(YoutubePlaylistIE.suitable(u'PL63F0C78739B09958'))
|
assertPlaylist(u'UUBABnxM4Ar9ten8Mdjj1j0Q') #585
|
||||||
self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q'))
|
assertPlaylist(u'PL63F0C78739B09958')
|
||||||
self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8'))
|
assertPlaylist(u'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
|
||||||
self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC'))
|
assertPlaylist(u'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
|
||||||
self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
|
assertPlaylist(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
|
||||||
self.assertFalse(YoutubePlaylistIE.suitable(u'PLtS2H6bU1M'))
|
assertPlaylist(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') #668
|
||||||
|
self.assertFalse('youtube:playlist' in self.matching_ies(u'PLtS2H6bU1M'))
|
||||||
|
|
||||||
def test_youtube_matching(self):
|
def test_youtube_matching(self):
|
||||||
self.assertTrue(YoutubeIE.suitable(u'PLtS2H6bU1M'))
|
self.assertTrue(YoutubeIE.suitable(u'PLtS2H6bU1M'))
|
||||||
|
@ -37,13 +38,23 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||||
self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
|
self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
|
||||||
|
|
||||||
def test_youtube_channel_matching(self):
|
def test_youtube_channel_matching(self):
|
||||||
self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM'))
|
assertChannel = lambda url: self.assertMatch(url, ['youtube:channel'])
|
||||||
self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec'))
|
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM')
|
||||||
self.assertTrue(YoutubeChannelIE.suitable('https://www.youtube.com/channel/HCtnHdj3df7iM/videos'))
|
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec')
|
||||||
|
assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
|
||||||
|
|
||||||
def test_youtube_user_matching(self):
|
def test_youtube_user_matching(self):
|
||||||
self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
|
self.assertMatch('www.youtube.com/NASAgovVideo/videos', ['youtube:user'])
|
||||||
|
|
||||||
|
def test_youtube_feeds(self):
|
||||||
|
self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watch_later'])
|
||||||
|
self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions'])
|
||||||
|
self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended'])
|
||||||
|
self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites'])
|
||||||
|
|
||||||
|
def test_youtube_show_matching(self):
|
||||||
|
self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show'])
|
||||||
|
|
||||||
def test_justin_tv_channelid_matching(self):
|
def test_justin_tv_channelid_matching(self):
|
||||||
self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))
|
self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))
|
||||||
self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv"))
|
self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv"))
|
||||||
|
|
|
@ -52,6 +52,7 @@ from .keek import KeekIE
|
||||||
from .liveleak import LiveLeakIE
|
from .liveleak import LiveLeakIE
|
||||||
from .livestream import LivestreamIE
|
from .livestream import LivestreamIE
|
||||||
from .metacafe import MetacafeIE
|
from .metacafe import MetacafeIE
|
||||||
|
from .metacritic import MetacriticIE
|
||||||
from .mit import TechTVMITIE, MITIE
|
from .mit import TechTVMITIE, MITIE
|
||||||
from .mixcloud import MixcloudIE
|
from .mixcloud import MixcloudIE
|
||||||
from .mtv import MTVIE
|
from .mtv import MTVIE
|
||||||
|
|
|
@ -14,7 +14,7 @@ from ..utils import (
|
||||||
class DailymotionIE(InfoExtractor):
|
class DailymotionIE(InfoExtractor):
|
||||||
"""Information Extractor for Dailymotion"""
|
"""Information Extractor for Dailymotion"""
|
||||||
|
|
||||||
_VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)'
|
_VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)'
|
||||||
IE_NAME = u'dailymotion'
|
IE_NAME = u'dailymotion'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
|
u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
|
||||||
|
@ -33,6 +33,7 @@ class DailymotionIE(InfoExtractor):
|
||||||
video_id = mobj.group(1).split('_')[0].split('?')[0]
|
video_id = mobj.group(1).split('_')[0].split('?')[0]
|
||||||
|
|
||||||
video_extension = 'mp4'
|
video_extension = 'mp4'
|
||||||
|
url = 'http://www.dailymotion.com/video/%s' % video_id
|
||||||
|
|
||||||
# Retrieve video webpage to extract further information
|
# Retrieve video webpage to extract further information
|
||||||
request = compat_urllib_request.Request(url)
|
request = compat_urllib_request.Request(url)
|
||||||
|
|
|
@ -109,6 +109,11 @@ class GenericIE(InfoExtractor):
|
||||||
return new_url
|
return new_url
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
parsed_url = compat_urlparse.urlparse(url)
|
||||||
|
if not parsed_url.scheme:
|
||||||
|
self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
|
||||||
|
return self.url_result('http://' + url)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
new_url = self._test_redirect(url)
|
new_url = self._test_redirect(url)
|
||||||
if new_url:
|
if new_url:
|
||||||
|
|
55
youtube_dl/extractor/metacritic.py
Normal file
55
youtube_dl/extractor/metacritic.py
Normal file
|
@ -0,0 +1,55 @@
|
||||||
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
import operator
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class MetacriticIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://www\.metacritic\.com/.+?/trailers/(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222',
|
||||||
|
u'file': u'3698222.mp4',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'inFamous: Second Son - inSide Sucker Punch: Smoke & Mirrors',
|
||||||
|
u'description': u'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.',
|
||||||
|
u'duration': 221,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
# The xml is not well formatted, there are raw '&'
|
||||||
|
info_xml = self._download_webpage('http://www.metacritic.com/video_data?video=' + video_id,
|
||||||
|
video_id, u'Downloading info xml').replace('&', '&')
|
||||||
|
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
|
||||||
|
|
||||||
|
clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id)
|
||||||
|
formats = []
|
||||||
|
for videoFile in clip.findall('httpURI/videoFile'):
|
||||||
|
rate_str = videoFile.find('rate').text
|
||||||
|
video_url = videoFile.find('filePath').text
|
||||||
|
formats.append({
|
||||||
|
'url': video_url,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'format_id': rate_str,
|
||||||
|
'rate': int(rate_str),
|
||||||
|
})
|
||||||
|
formats.sort(key=operator.itemgetter('rate'))
|
||||||
|
|
||||||
|
description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>',
|
||||||
|
webpage, u'description', flags=re.DOTALL)
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'id': video_id,
|
||||||
|
'title': clip.find('title').text,
|
||||||
|
'formats': formats,
|
||||||
|
'description': description,
|
||||||
|
'duration': int(clip.find('duration').text),
|
||||||
|
}
|
||||||
|
# TODO: Remove when #980 has been merged
|
||||||
|
info.update(formats[-1])
|
||||||
|
return info
|
|
@ -386,7 +386,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
@classmethod
|
@classmethod
|
||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
"""Receives a URL and returns True if suitable for this IE."""
|
"""Receives a URL and returns True if suitable for this IE."""
|
||||||
if YoutubePlaylistIE.suitable(url) or YoutubeSubscriptionsIE.suitable(url): return False
|
if YoutubePlaylistIE.suitable(url): return False
|
||||||
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
|
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
|
||||||
|
|
||||||
def report_video_webpage_download(self, video_id):
|
def report_video_webpage_download(self, video_id):
|
||||||
|
@ -1015,14 +1015,18 @@ class YoutubeChannelIE(InfoExtractor):
|
||||||
|
|
||||||
class YoutubeUserIE(InfoExtractor):
|
class YoutubeUserIE(InfoExtractor):
|
||||||
IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
|
IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
|
||||||
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)([A-Za-z0-9_-]+)'
|
_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?)|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
|
||||||
_TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
|
_TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
|
||||||
_GDATA_PAGE_SIZE = 50
|
_GDATA_PAGE_SIZE = 50
|
||||||
_GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
|
_GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
|
||||||
IE_NAME = u'youtube:user'
|
IE_NAME = u'youtube:user'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
if YoutubeIE.suitable(url): return False
|
# Don't return True if the url can be extracted with other youtube
|
||||||
|
# extractor, the regex would is too permissive and it would match.
|
||||||
|
other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls)
|
||||||
|
if any(ie.suitable(url) for ie in other_ies): return False
|
||||||
else: return super(YoutubeUserIE, cls).suitable(url)
|
else: return super(YoutubeUserIE, cls).suitable(url)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
Loading…
Reference in a new issue