[onionstudios] Add extractor

This commit is contained in:
Sergey M․ 2015-06-24 23:12:13 +06:00
parent 03b9c94437
commit f843300fe5
2 changed files with 68 additions and 0 deletions

View file

@ -388,6 +388,7 @@ from .nytimes import (
from .nuvid import NuvidIE from .nuvid import NuvidIE
from .odnoklassniki import OdnoklassnikiIE from .odnoklassniki import OdnoklassnikiIE
from .oktoberfesttv import OktoberfestTVIE from .oktoberfesttv import OktoberfestTVIE
from .onionstudios import OnionStudiosIE
from .ooyala import ( from .ooyala import (
OoyalaIE, OoyalaIE,
OoyalaExternalIE, OoyalaExternalIE,

View file

@ -0,0 +1,67 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import determine_ext
class OnionStudiosIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?onionstudios\.com/(?:videos/[^/]+-|embed\?.*\bid=)(?P<id>\d+)(?!-)'
_TESTS = [{
'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937',
'md5': 'd4851405d31adfadf71cd7a487b765bb',
'info_dict': {
'id': '2937',
'ext': 'mp4',
'title': 'Hannibal charges forward, stops for a cocktail',
'description': 'md5:545299bda6abf87e5ec666548c6a9448',
'thumbnail': 're:^https?://.*\.jpg$',
'uploader': 'The A.V. Club',
'uploader_id': 'TheAVClub',
},
}, {
'url': 'http://www.onionstudios.com/embed?id=2855&autoplay=true',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'http://www.onionstudios.com/embed?id=%s' % video_id, video_id)
formats = []
for src in re.findall(r'<source[^>]+src="([^"]+)"', webpage):
if determine_ext(src) != 'm3u8': # m3u8 always results in 403
formats.append({
'url': src,
})
self._sort_formats(formats)
title = self._search_regex(
r'share_title\s*=\s*"([^"]+)"', webpage, 'title')
description = self._search_regex(
r'share_description\s*=\s*"([^"]+)"', webpage,
'description', default=None)
thumbnail = self._search_regex(
r'poster="([^"]+)"', webpage, 'thumbnail', default=False)
uploader_id = self._search_regex(
r'twitter_handle\s*=\s*"([^"]+)"',
webpage, 'uploader id', fatal=False)
uploader = self._search_regex(
r'window\.channelName\s*=\s*"Embedded:([^"]+)"',
webpage, 'uploader', default=False)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'uploader': uploader,
'uploader_id': uploader_id,
'formats': formats,
}