From 5d49d879cc59beb417d04db17b2f18bb438b52e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 9 Mar 2018 23:27:44 +0700 Subject: [PATCH] [raywenderlich] Add extractor (#15251) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/raywenderlich.py | 103 ++++++++++++++++++++++++++ 2 files changed, 104 insertions(+) create mode 100644 youtube_dl/extractor/raywenderlich.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6011fd41fb..bef3b82eea 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -875,6 +875,7 @@ from .rai import ( RaiPlayPlaylistIE, RaiIE, ) +from .raywenderlich import RayWenderlichIE from .rbmaradio import RBMARadioIE from .rds import RDSIE from .redbulltv import RedBullTVIE diff --git a/youtube_dl/extractor/raywenderlich.py b/youtube_dl/extractor/raywenderlich.py new file mode 100644 index 0000000000..0f061c4b25 --- /dev/null +++ b/youtube_dl/extractor/raywenderlich.py @@ -0,0 +1,103 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from .vimeo import VimeoIE +from ..utils import ( + extract_attributes, + ExtractorError, + orderedSet, + smuggle_url, + unsmuggle_url, + urljoin, +) + + +class RayWenderlichIE(InfoExtractor): + _VALID_URL = r'https?://videos\.raywenderlich\.com/courses/(?P[^/]+)/lessons/(?P\d+)' + + _TESTS = [{ + 'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1', + 'info_dict': { + 'id': '248377018', + 'ext': 'mp4', + 'title': 'Testing In iOS Episode 1: Introduction', + 'duration': 133, + 'uploader': 'Ray Wenderlich', + 'uploader_id': 'user3304672', + }, + 'params': { + 'noplaylist': True, + 'skip_download': True, + }, + 'add_ie': [VimeoIE.ie_key()], + 'expected_warnings': ['HTTP Error 403: Forbidden'], + }, { + 'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1', + 'info_dict': { + 'title': 'Testing in iOS', + 'id': '105-testing-in-ios', + }, + 'params': { + 'noplaylist': False, + }, + 'playlist_count': 29, + }] + + def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) + + mobj = re.match(self._VALID_URL, url) + course_id, lesson_id = mobj.group('course_id', 'id') + video_id = '%s/%s' % (course_id, lesson_id) + + webpage = self._download_webpage(url, video_id) + + no_playlist = self._downloader.params.get('noplaylist') + if no_playlist or smuggled_data.get('force_video', False): + if no_playlist: + self.to_screen( + 'Downloading just video %s because of --no-playlist' + % video_id) + if '>Subscribe to unlock' in webpage: + raise ExtractorError( + 'This content is only available for subscribers', + expected=True) + vimeo_id = self._search_regex( + r'data-vimeo-id=["\'](\d+)', webpage, 'video id') + return self.url_result( + VimeoIE._smuggle_referrer( + 'https://player.vimeo.com/video/%s' % vimeo_id, url), + ie=VimeoIE.ie_key(), video_id=vimeo_id) + + self.to_screen( + 'Downloading playlist %s - add --no-playlist to just download video' + % course_id) + + lesson_ids = [lesson_id] + for lesson in re.findall( + r'(]+\bclass=["\']lesson-link[^>]+>)', webpage): + attrs = extract_attributes(lesson) + if not attrs: + continue + lesson_url = attrs.get('href') + if not lesson_url: + continue + lesson_id = self._search_regex( + r'/lessons/(\d+)', lesson_url, 'lesson id', default=None) + if not lesson_id: + continue + lesson_ids.append(lesson_id) + + entries = [] + for lesson_id in orderedSet(lesson_ids): + entries.append(self.url_result( + smuggle_url(urljoin(url, lesson_id), {'force_video': True}), + ie=RayWenderlichIE.ie_key())) + + title = self._search_regex( + r'class=["\']course-title[^>]+>([^<]+)', webpage, 'course title', + default=None) + + return self.playlist_result(entries, course_id, title)