import re import urllib.parse from .common import InfoExtractor from ..utils import ( ExtractorError, merge_dicts, ) class EroProfileIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/view/(?P<id>[^/]+)' _LOGIN_URL = 'http://www.eroprofile.com/auth/auth.php?' _NETRC_MACHINE = 'eroprofile' _TESTS = [{ 'url': 'http://www.eroprofile.com/m/videos/view/sexy-babe-softcore', 'md5': 'c26f351332edf23e1ea28ce9ec9de32f', 'info_dict': { 'id': '3733775', 'display_id': 'sexy-babe-softcore', 'ext': 'm4v', 'title': 'sexy babe softcore', 'thumbnail': r're:https?://.*\.jpg', 'age_limit': 18, }, 'skip': 'Video not found', }, { 'url': 'http://www.eroprofile.com/m/videos/view/Try-It-On-Pee_cut_2-wmv-4shared-com-file-sharing-download-movie-file', 'md5': '1baa9602ede46ce904c431f5418d8916', 'info_dict': { 'id': '1133519', 'ext': 'm4v', 'title': 'Try It On Pee_cut_2.wmv - 4shared.com - file sharing - download movie file', 'thumbnail': r're:https?://.*\.jpg', 'age_limit': 18, }, 'skip': 'Requires login', }] def _perform_login(self, username, password): query = urllib.parse.urlencode({ 'username': username, 'password': password, 'url': 'http://www.eroprofile.com/', }) login_url = self._LOGIN_URL + query login_page = self._download_webpage(login_url, None, False) m = re.search(r'Your username or password was incorrect\.', login_page) if m: raise ExtractorError( 'Wrong username and/or password.', expected=True) self.report_login() redirect_url = self._search_regex( r'<script[^>]+?src="([^"]+)"', login_page, 'login redirect url') self._download_webpage(redirect_url, None, False) def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) m = re.search(r'You must be logged in to view this video\.', webpage) if m: self.raise_login_required('This video requires login') video_id = self._search_regex( [r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'], webpage, 'video id', default=None) title = self._html_search_regex( (r'Title:</th><td>([^<]+)</td>', r'<h1[^>]*>(.+?)</h1>'), webpage, 'title') info = self._parse_html5_media_entries(url, webpage, video_id)[0] return merge_dicts(info, { 'id': video_id, 'display_id': display_id, 'title': title, 'age_limit': 18, }) class EroProfileAlbumIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/album/(?P<id>[^/]+)' IE_NAME = 'EroProfile:album' _TESTS = [{ 'url': 'https://www.eroprofile.com/m/videos/album/BBW-2-893', 'info_dict': { 'id': 'BBW-2-893', 'title': 'BBW 2', }, 'playlist_mincount': 486, }, ] def _extract_from_page(self, page): for url in re.findall(r'href=".*?(/m/videos/view/[^"]+)"', page): yield self.url_result(f'https://www.eroprofile.com{url}', EroProfileIE.ie_key()) def _entries(self, playlist_id, first_page): yield from self._extract_from_page(first_page) page_urls = re.findall(rf'href=".*?(/m/videos/album/{playlist_id}\?pnum=(\d+))"', first_page) max_page = max(int(n) for _, n in page_urls) for n in range(2, max_page + 1): url = f'https://www.eroprofile.com/m/videos/album/{playlist_id}?pnum={n}' yield from self._extract_from_page( self._download_webpage(url, playlist_id, note=f'Downloading playlist page {int(n) - 1}')) def _real_extract(self, url): playlist_id = self._match_id(url) first_page = self._download_webpage(url, playlist_id, note='Downloading playlist') playlist_title = self._search_regex( r'<title>Album: (.*) - EroProfile</title>', first_page, 'playlist_title') return self.playlist_result(self._entries(playlist_id, first_page), playlist_id, playlist_title)