mirror of
https://github.com/yt-dlp/yt-dlp
synced 2025-01-16 03:40:50 +01:00
Merge branch 'master' into subtitles_rework
This commit is contained in:
commit
cf1dd0c59e
12 changed files with 158 additions and 20 deletions
|
@ -3,7 +3,8 @@
|
||||||
import json
|
import json
|
||||||
import sys
|
import sys
|
||||||
import hashlib
|
import hashlib
|
||||||
import urllib.request
|
import os.path
|
||||||
|
|
||||||
|
|
||||||
if len(sys.argv) <= 1:
|
if len(sys.argv) <= 1:
|
||||||
print('Specify the version number as parameter')
|
print('Specify the version number as parameter')
|
||||||
|
@ -25,6 +26,7 @@ filenames = {
|
||||||
'tar': 'youtube-dl-%s.tar.gz' % version}
|
'tar': 'youtube-dl-%s.tar.gz' % version}
|
||||||
build_dir = os.path.join('..', '..', 'build', version)
|
build_dir = os.path.join('..', '..', 'build', version)
|
||||||
for key, filename in filenames.items():
|
for key, filename in filenames.items():
|
||||||
|
url = 'https://yt-dl.org/downloads/%s/%s' % (version, filename)
|
||||||
fn = os.path.join(build_dir, filename)
|
fn = os.path.join(build_dir, filename)
|
||||||
with open(fn, 'rb') as f:
|
with open(fn, 'rb') as f:
|
||||||
data = f.read()
|
data = f.read()
|
||||||
|
|
|
@ -14,7 +14,7 @@ def main():
|
||||||
template = tmplf.read()
|
template = tmplf.read()
|
||||||
|
|
||||||
ie_htmls = []
|
ie_htmls = []
|
||||||
for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME):
|
for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME.lower()):
|
||||||
ie_html = '<b>{}</b>'.format(ie.IE_NAME)
|
ie_html = '<b>{}</b>'.format(ie.IE_NAME)
|
||||||
try:
|
try:
|
||||||
ie_html += ': {}'.format(ie.IE_DESC)
|
ie_html += ': {}'.format(ie.IE_DESC)
|
||||||
|
|
|
@ -23,9 +23,9 @@ tests = [
|
||||||
# 86 - vfluy6kdb 2013/09/06
|
# 86 - vfluy6kdb 2013/09/06
|
||||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
|
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
|
||||||
"yuioplkjhgfdsazxcvbnm12345678q0QWrRTYUIOELKJHGFD-AZXCVBNM!@#$%^&*()_<+={[|};?/>.S"),
|
"yuioplkjhgfdsazxcvbnm12345678q0QWrRTYUIOELKJHGFD-AZXCVBNM!@#$%^&*()_<+={[|};?/>.S"),
|
||||||
# 85
|
# 85 - vflkuzxcs 2013/09/11
|
||||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
|
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
|
||||||
".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"),
|
"T>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOvUY.REWQ0987654321mnbqcxzasdfghjklpoiuytr"),
|
||||||
# 84 - vflg0g8PQ 2013/08/29 (sporadic)
|
# 84 - vflg0g8PQ 2013/08/29 (sporadic)
|
||||||
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
|
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
|
||||||
">?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWq0987654321mnbvcxzasdfghjklpoiuytr"),
|
">?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWq0987654321mnbvcxzasdfghjklpoiuytr"),
|
||||||
|
|
|
@ -72,10 +72,13 @@ class TestAllURLsMatching(unittest.TestCase):
|
||||||
self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361"))
|
self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361"))
|
||||||
|
|
||||||
def test_youtube_extract(self):
|
def test_youtube_extract(self):
|
||||||
self.assertEqual(YoutubeIE()._extract_id('http://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
|
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE()._extract_id(url), id)
|
||||||
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
|
assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||||
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc'), 'BaW_jenozKc')
|
assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||||
self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch_popup?v=BaW_jenozKc'), 'BaW_jenozKc')
|
assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc')
|
||||||
|
assertExtractId('https://www.youtube.com/watch_popup?v=BaW_jenozKc', 'BaW_jenozKc')
|
||||||
|
assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
|
||||||
|
assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
|
||||||
|
|
||||||
def test_no_duplicates(self):
|
def test_no_duplicates(self):
|
||||||
ies = gen_extractors()
|
ies = gen_extractors()
|
||||||
|
|
|
@ -66,7 +66,7 @@ class FileDownloader(object):
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def format_seconds(seconds):
|
def format_seconds(seconds):
|
||||||
(mins, secs) = divmod(seconds, 60)
|
(mins, secs) = divmod(seconds, 60)
|
||||||
(hours, eta_mins) = divmod(mins, 60)
|
(hours, mins) = divmod(mins, 60)
|
||||||
if hours > 99:
|
if hours > 99:
|
||||||
return '--:--:--'
|
return '--:--:--'
|
||||||
if hours == 0:
|
if hours == 0:
|
||||||
|
|
|
@ -29,6 +29,10 @@ from .escapist import EscapistIE
|
||||||
from .exfm import ExfmIE
|
from .exfm import ExfmIE
|
||||||
from .facebook import FacebookIE
|
from .facebook import FacebookIE
|
||||||
from .flickr import FlickrIE
|
from .flickr import FlickrIE
|
||||||
|
from .francetv import (
|
||||||
|
PluzzIE,
|
||||||
|
FranceTvInfoIE,
|
||||||
|
)
|
||||||
from .freesound import FreesoundIE
|
from .freesound import FreesoundIE
|
||||||
from .funnyordie import FunnyOrDieIE
|
from .funnyordie import FunnyOrDieIE
|
||||||
from .gamespot import GameSpotIE
|
from .gamespot import GameSpotIE
|
||||||
|
@ -75,6 +79,7 @@ from .roxwel import RoxwelIE
|
||||||
from .rtlnow import RTLnowIE
|
from .rtlnow import RTLnowIE
|
||||||
from .sina import SinaIE
|
from .sina import SinaIE
|
||||||
from .slashdot import SlashdotIE
|
from .slashdot import SlashdotIE
|
||||||
|
from .slideshare import SlideshareIE
|
||||||
from .sohu import SohuIE
|
from .sohu import SohuIE
|
||||||
from .soundcloud import SoundcloudIE, SoundcloudSetIE
|
from .soundcloud import SoundcloudIE, SoundcloudSetIE
|
||||||
from .spiegel import SpiegelIE
|
from .spiegel import SpiegelIE
|
||||||
|
|
|
@ -5,7 +5,7 @@ from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class Canalc2IE(InfoExtractor):
|
class Canalc2IE(InfoExtractor):
|
||||||
_IE_NAME = 'canalc2.tv'
|
IE_NAME = 'canalc2.tv'
|
||||||
_VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?idVideo=(\d+)&voir=oui'
|
_VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?idVideo=(\d+)&voir=oui'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
|
|
77
youtube_dl/extractor/francetv.py
Normal file
77
youtube_dl/extractor/francetv.py
Normal file
|
@ -0,0 +1,77 @@
|
||||||
|
# encoding: utf-8
|
||||||
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urlparse,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class FranceTVBaseInfoExtractor(InfoExtractor):
|
||||||
|
def _extract_video(self, video_id):
|
||||||
|
xml_desc = self._download_webpage(
|
||||||
|
'http://www.francetvinfo.fr/appftv/webservices/video/'
|
||||||
|
'getInfosOeuvre.php?id-diffusion='
|
||||||
|
+ video_id, video_id, 'Downloading XML config')
|
||||||
|
info = xml.etree.ElementTree.fromstring(xml_desc.encode('utf-8'))
|
||||||
|
|
||||||
|
manifest_url = info.find('videos/video/url').text
|
||||||
|
video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8')
|
||||||
|
video_url = video_url.replace('/z/', '/i/')
|
||||||
|
thumbnail_path = info.find('image').text
|
||||||
|
|
||||||
|
return {'id': video_id,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'url': video_url,
|
||||||
|
'title': info.find('titre').text,
|
||||||
|
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path),
|
||||||
|
'description': info.find('synopsis').text,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class PluzzIE(FranceTVBaseInfoExtractor):
|
||||||
|
IE_NAME = u'pluzz.francetv.fr'
|
||||||
|
_VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://pluzz.francetv.fr/videos/allo_rufo_saison5_,88439064.html',
|
||||||
|
u'file': u'88439064.mp4',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Allô Rufo',
|
||||||
|
u'description': u'md5:d909f1ebdf963814b65772aea250400e',
|
||||||
|
},
|
||||||
|
u'params': {
|
||||||
|
u'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
title = re.match(self._VALID_URL, url).group(1)
|
||||||
|
webpage = self._download_webpage(url, title)
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'data-diffusion="(\d+)"', webpage, 'ID')
|
||||||
|
return self._extract_video(video_id)
|
||||||
|
|
||||||
|
|
||||||
|
class FranceTvInfoIE(FranceTVBaseInfoExtractor):
|
||||||
|
IE_NAME = u'francetvinfo.fr'
|
||||||
|
_VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+).html'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
|
||||||
|
u'file': u'84981923.mp4',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Soir 3',
|
||||||
|
},
|
||||||
|
u'params': {
|
||||||
|
u'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
page_title = mobj.group('title')
|
||||||
|
webpage = self._download_webpage(url, page_title)
|
||||||
|
video_id = self._search_regex(r'id-video=(\d+?)"', webpage, u'video id')
|
||||||
|
return self._extract_video(video_id)
|
47
youtube_dl/extractor/slideshare.py
Normal file
47
youtube_dl/extractor/slideshare.py
Normal file
|
@ -0,0 +1,47 @@
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urlparse,
|
||||||
|
ExtractorError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class SlideshareIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://www\.slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity',
|
||||||
|
u'file': u'25665706.mp4',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Managing Scale and Complexity',
|
||||||
|
u'description': u'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
page_title = mobj.group('title')
|
||||||
|
webpage = self._download_webpage(url, page_title)
|
||||||
|
slideshare_obj = self._search_regex(
|
||||||
|
r'var slideshare_object = ({.*?}); var user_info =',
|
||||||
|
webpage, u'slideshare object')
|
||||||
|
info = json.loads(slideshare_obj)
|
||||||
|
if info['slideshow']['type'] != u'video':
|
||||||
|
raise ExtractorError(u'Webpage type is "%s": only video extraction is supported for Slideshare' % info['slideshow']['type'], expected=True)
|
||||||
|
|
||||||
|
doc = info['doc']
|
||||||
|
bucket = info['jsplayer']['video_bucket']
|
||||||
|
ext = info['jsplayer']['video_extension']
|
||||||
|
video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'video',
|
||||||
|
'id': info['slideshow']['id'],
|
||||||
|
'title': info['slideshow']['title'],
|
||||||
|
'ext': ext,
|
||||||
|
'url': video_url,
|
||||||
|
'thumbnail': info['slideshow']['pin_image_url'],
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
}
|
|
@ -8,7 +8,7 @@ from ..utils import ExtractorError
|
||||||
|
|
||||||
|
|
||||||
class SohuIE(InfoExtractor):
|
class SohuIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://tv\.sohu\.com/\d+?/n(?P<id>\d+)\.shtml.*?'
|
_VALID_URL = r'https?://(?P<mytv>my\.)?tv\.sohu\.com/.+?/(?(mytv)|n)(?P<id>\d+)\.shtml.*?'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
u'url': u'http://tv.sohu.com/20130724/n382479172.shtml#super',
|
u'url': u'http://tv.sohu.com/20130724/n382479172.shtml#super',
|
||||||
|
@ -21,8 +21,11 @@ class SohuIE(InfoExtractor):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
||||||
def _fetch_data(vid_id):
|
def _fetch_data(vid_id, mytv=False):
|
||||||
base_data_url = u'http://hot.vrs.sohu.com/vrs_flash.action?vid='
|
if mytv:
|
||||||
|
base_data_url = 'http://my.tv.sohu.com/play/videonew.do?vid='
|
||||||
|
else:
|
||||||
|
base_data_url = u'http://hot.vrs.sohu.com/vrs_flash.action?vid='
|
||||||
data_url = base_data_url + str(vid_id)
|
data_url = base_data_url + str(vid_id)
|
||||||
data_json = self._download_webpage(
|
data_json = self._download_webpage(
|
||||||
data_url, video_id,
|
data_url, video_id,
|
||||||
|
@ -31,15 +34,16 @@ class SohuIE(InfoExtractor):
|
||||||
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
mytv = mobj.group('mytv') is not None
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
raw_title = self._html_search_regex(r'(?s)<title>(.+?)</title>',
|
raw_title = self._html_search_regex(r'(?s)<title>(.+?)</title>',
|
||||||
webpage, u'video title')
|
webpage, u'video title')
|
||||||
title = raw_title.partition('-')[0].strip()
|
title = raw_title.partition('-')[0].strip()
|
||||||
|
|
||||||
vid = self._html_search_regex(r'var vid="(\d+)"', webpage,
|
vid = self._html_search_regex(r'var vid ?= ?["\'](\d+)["\']', webpage,
|
||||||
u'video path')
|
u'video path')
|
||||||
data = _fetch_data(vid)
|
data = _fetch_data(vid, mytv)
|
||||||
|
|
||||||
QUALITIES = ('ori', 'super', 'high', 'nor')
|
QUALITIES = ('ori', 'super', 'high', 'nor')
|
||||||
vid_ids = [data['data'][q + 'Vid']
|
vid_ids = [data['data'][q + 'Vid']
|
||||||
|
@ -51,7 +55,7 @@ class SohuIE(InfoExtractor):
|
||||||
# For now, we just pick the highest available quality
|
# For now, we just pick the highest available quality
|
||||||
vid_id = vid_ids[-1]
|
vid_id = vid_ids[-1]
|
||||||
|
|
||||||
format_data = data if vid == vid_id else _fetch_data(vid_id)
|
format_data = data if vid == vid_id else _fetch_data(vid_id, mytv)
|
||||||
part_count = format_data['data']['totalBlocks']
|
part_count = format_data['data']['totalBlocks']
|
||||||
allot = format_data['allot']
|
allot = format_data['allot']
|
||||||
prot = format_data['prot']
|
prot = format_data['prot']
|
||||||
|
|
|
@ -209,7 +209,7 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
|
||||||
|youtu\.be/ # just youtu.be/xxxx
|
|youtu\.be/ # just youtu.be/xxxx
|
||||||
)
|
)
|
||||||
)? # all until now is optional -> you can pass the naked ID
|
)? # all until now is optional -> you can pass the naked ID
|
||||||
([0-9A-Za-z_-]+) # here is it! the YouTube video ID
|
([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
|
||||||
(?(1).+)? # if we found the ID, everything can follow
|
(?(1).+)? # if we found the ID, everything can follow
|
||||||
$"""
|
$"""
|
||||||
_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
|
_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
|
||||||
|
@ -484,7 +484,7 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
|
||||||
elif len(s) == 86:
|
elif len(s) == 86:
|
||||||
return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53]
|
return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53]
|
||||||
elif len(s) == 85:
|
elif len(s) == 85:
|
||||||
return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
|
return s[40] + s[82:43:-1] + s[22] + s[42:40:-1] + s[83] + s[39:22:-1] + s[0] + s[21:2:-1]
|
||||||
elif len(s) == 84:
|
elif len(s) == 84:
|
||||||
return s[81:36:-1] + s[0] + s[35:2:-1]
|
return s[81:36:-1] + s[0] + s[35:2:-1]
|
||||||
elif len(s) == 83:
|
elif len(s) == 83:
|
||||||
|
@ -583,7 +583,7 @@ class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
|
||||||
manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
|
manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
|
||||||
formats_urls = _get_urls(manifest)
|
formats_urls = _get_urls(manifest)
|
||||||
for format_url in formats_urls:
|
for format_url in formats_urls:
|
||||||
itag = self._search_regex(r'itag%3D(\d+?)/', format_url, 'itag')
|
itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
|
||||||
url_map[itag] = format_url
|
url_map[itag] = format_url
|
||||||
return url_map
|
return url_map
|
||||||
|
|
||||||
|
|
|
@ -1,2 +1,2 @@
|
||||||
|
|
||||||
__version__ = '2013.09.06.1'
|
__version__ = '2013.11.09'
|
||||||
|
|
Loading…
Reference in a new issue