Update to ytdl-2021.01.24.1

This commit is contained in:
Remita Amine 2021-01-16 18:12:05 +01:00 committed by pukkandan
parent f74980cbae
commit a820dc722e
23 changed files with 987 additions and 412 deletions

View file

@ -814,7 +814,7 @@ Available for the media that is a track or a part of a music album:
- `disc_number` (numeric): Number of the disc or other physical medium the track belongs to
- `release_year` (numeric): Year (YYYY) when the album was released
Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with `NA`.
Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default).
For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `youtube-dlc test video` and id `BaW_jenozKcj`, this will result in a `youtube-dlc test video-BaW_jenozKcj.mp4` file created in the current directory.

View file

@ -637,13 +637,20 @@ class TestYoutubeDL(unittest.TestCase):
'title2': '%PATH%',
}
def fname(templ):
ydl = YoutubeDL({'outtmpl': templ})
def fname(templ, na_placeholder='NA'):
params = {'outtmpl': templ}
if na_placeholder != 'NA':
params['outtmpl_na_placeholder'] = na_placeholder
ydl = YoutubeDL(params)
return ydl.prepare_filename(info)
self.assertEqual(fname('%(id)s.%(ext)s'), '1234.mp4')
self.assertEqual(fname('%(id)s-%(width)s.%(ext)s'), '1234-NA.mp4')
# Replace missing fields with 'NA'
self.assertEqual(fname('%(uploader_date)s-%(id)s.%(ext)s'), 'NA-1234.mp4')
NA_TEST_OUTTMPL = '%(uploader_date)s-%(width)d-%(id)s.%(ext)s'
# Replace missing fields with 'NA' by default
self.assertEqual(fname(NA_TEST_OUTTMPL), 'NA-NA-1234.mp4')
# Or by provided placeholder
self.assertEqual(fname(NA_TEST_OUTTMPL, na_placeholder='none'), 'none-none-1234.mp4')
self.assertEqual(fname(NA_TEST_OUTTMPL, na_placeholder=''), '--1234.mp4')
self.assertEqual(fname('%(height)d.%(ext)s'), '1080.mp4')
self.assertEqual(fname('%(height)6d.%(ext)s'), ' 1080.mp4')
self.assertEqual(fname('%(height)-6d.%(ext)s'), '1080 .mp4')

View file

@ -181,9 +181,12 @@ class YoutubeDL(object):
allow_multiple_video_streams: Allow multiple video streams to be merged into a single file
allow_multiple_audio_streams: Allow multiple audio streams to be merged into a single file
outtmpl: Template for output names.
restrictfilenames: Do not allow "&" and spaces in file names.
trim_file_name: Limit length of filename (extension excluded).
ignoreerrors: Do not stop on download errors. (Default True when running youtube-dlc, but False when directly accessing YoutubeDL class)
outtmpl_na_placeholder: Placeholder for unavailable meta fields.
restrictfilenames: Do not allow "&" and spaces in file names
trim_file_name: Limit length of filename (extension excluded)
ignoreerrors: Do not stop on download errors
(Default True when running youtube-dlc,
but False when directly accessing YoutubeDL class)
force_generic_extractor: Force downloader to use the generic extractor
overwrites: Overwrite all video and metadata files if True,
overwrite only non-video files if None
@ -741,7 +744,7 @@ class YoutubeDL(object):
template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
for k, v in template_dict.items()
if v is not None and not isinstance(v, (list, tuple, dict)))
template_dict = collections.defaultdict(lambda: 'NA', template_dict)
template_dict = collections.defaultdict(lambda: self.params.get('outtmpl_na_placeholder', 'NA'), template_dict)
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
@ -761,8 +764,8 @@ class YoutubeDL(object):
# Missing numeric fields used together with integer presentation types
# in format specification will break the argument substitution since
# string 'NA' is returned for missing fields. We will patch output
# template for missing fields to meet string presentation type.
# string NA placeholder is returned for missing fields. We will patch
# output template for missing fields to meet string presentation type.
for numeric_field in self._NUMERIC_FIELDS:
if numeric_field not in template_dict:
# As of [1] format syntax is:

View file

@ -373,6 +373,7 @@ def _real_main(argv=None):
'listformats': opts.listformats,
'listformats_table': opts.listformats_table,
'outtmpl': outtmpl,
'outtmpl_na_placeholder': opts.outtmpl_na_placeholder,
'paths': opts.paths,
'autonumber_size': opts.autonumber_size,
'autonumber_start': opts.autonumber_start,

View file

@ -256,7 +256,7 @@ class AENetworksShowIE(AENetworksListBaseIE):
'title': 'Ancient Aliens',
'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f',
},
'playlist_mincount': 168,
'playlist_mincount': 150,
}]
_RESOURCE = 'series'
_ITEMS_KEY = 'episodes'

View file

@ -1,13 +1,16 @@
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
class AlJazeeraIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?:programmes|video)/.*?/(?P<id>[^/]+)\.html'
_VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?P<type>program/[^/]+|(?:feature|video)s)/\d{4}/\d{1,2}/\d{1,2}/(?P<id>[^/?&#]+)'
_TESTS = [{
'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html',
'url': 'https://www.aljazeera.com/program/episode/2014/9/19/deliverance',
'info_dict': {
'id': '3792260579001',
'ext': 'mp4',
@ -20,14 +23,34 @@ class AlJazeeraIE(InfoExtractor):
'add_ie': ['BrightcoveNew'],
'skip': 'Not accessible from Travis CI server',
}, {
'url': 'http://www.aljazeera.com/video/news/2017/05/sierra-leone-709-carat-diamond-auctioned-170511100111930.html',
'url': 'https://www.aljazeera.com/videos/2017/5/11/sierra-leone-709-carat-diamond-to-be-auctioned-off',
'only_matching': True,
}, {
'url': 'https://www.aljazeera.com/features/2017/8/21/transforming-pakistans-buses-into-art',
'only_matching': True,
}]
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/665003303001/default_default/index.html?videoId=%s'
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
def _real_extract(self, url):
program_name = self._match_id(url)
webpage = self._download_webpage(url, program_name)
brightcove_id = self._search_regex(
r'RenderPagesVideo\(\'(.+?)\'', webpage, 'brightcove id')
return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
post_type, name = re.match(self._VALID_URL, url).groups()
post_type = {
'features': 'post',
'program': 'episode',
'videos': 'video',
}[post_type.split('/')[0]]
video = self._download_json(
'https://www.aljazeera.com/graphql', name, query={
'operationName': 'SingleArticleQuery',
'variables': json.dumps({
'name': name,
'postType': post_type,
}),
}, headers={
'wp-site': 'aje',
})['data']['article']['video']
video_id = video['id']
account_id = video.get('accountId') or '665003303001'
player_id = video.get('playerId') or 'BkeSH5BDb'
return self.url_result(
self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id),
'BrightcoveNew', video_id)

View file

@ -1,13 +1,16 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import (
clean_html,
int_or_none,
try_get,
unified_strdate,
unified_timestamp,
)
@ -22,8 +25,8 @@ class AmericasTestKitchenIE(InfoExtractor):
'ext': 'mp4',
'description': 'md5:64e606bfee910627efc4b5f050de92b3',
'thumbnail': r're:^https?://',
'timestamp': 1523664000,
'upload_date': '20180414',
'timestamp': 1523318400,
'upload_date': '20180410',
'release_date': '20180410',
'series': "America's Test Kitchen",
'season_number': 18,
@ -33,6 +36,27 @@ class AmericasTestKitchenIE(InfoExtractor):
'params': {
'skip_download': True,
},
}, {
# Metadata parsing behaves differently for newer episodes (705) as opposed to older episodes (582 above)
'url': 'https://www.americastestkitchen.com/episode/705-simple-chicken-dinner',
'md5': '06451608c57651e985a498e69cec17e5',
'info_dict': {
'id': '5fbe8c61bda2010001c6763b',
'title': 'Simple Chicken Dinner',
'ext': 'mp4',
'description': 'md5:eb68737cc2fd4c26ca7db30139d109e7',
'thumbnail': r're:^https?://',
'timestamp': 1610755200,
'upload_date': '20210116',
'release_date': '20210116',
'series': "America's Test Kitchen",
'season_number': 21,
'episode': 'Simple Chicken Dinner',
'episode_number': 3,
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
'only_matching': True,
@ -60,7 +84,76 @@ class AmericasTestKitchenIE(InfoExtractor):
'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % video['zypeId'],
'ie_key': 'Zype',
'description': clean_html(video.get('description')),
'timestamp': unified_timestamp(video.get('publishDate')),
'release_date': unified_strdate(video.get('publishDate')),
'episode_number': int_or_none(episode.get('number')),
'season_number': int_or_none(episode.get('season')),
'series': try_get(episode, lambda x: x['show']['title']),
'episode': episode.get('title'),
}
class AmericasTestKitchenSeasonIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|cookscountry)\.com/episodes/browse/season_(?P<id>\d+)'
_TESTS = [{
# ATK Season
'url': 'https://www.americastestkitchen.com/episodes/browse/season_1',
'info_dict': {
'id': 'season_1',
'title': 'Season 1',
},
'playlist_count': 13,
}, {
# Cooks Country Season
'url': 'https://www.cookscountry.com/episodes/browse/season_12',
'info_dict': {
'id': 'season_12',
'title': 'Season 12',
},
'playlist_count': 13,
}]
def _real_extract(self, url):
show_name, season_number = re.match(self._VALID_URL, url).groups()
season_number = int(season_number)
slug = 'atk' if show_name == 'americastestkitchen' else 'cco'
season = 'Season %d' % season_number
season_search = self._download_json(
'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
season, headers={
'Origin': 'https://www.%s.com' % show_name,
'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
'X-Algolia-Application-Id': 'Y1FNZXUI30',
}, query={
'facetFilters': json.dumps([
'search_season_list:' + season,
'search_document_klass:episode',
'search_show_slug:' + slug,
]),
'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title' % slug,
'attributesToHighlight': '',
'hitsPerPage': 1000,
})
def entries():
for episode in (season_search.get('hits') or []):
search_url = episode.get('search_url')
if not search_url:
continue
yield {
'_type': 'url',
'url': 'https://www.%s.com%s' % (show_name, search_url),
'id': try_get(episode, lambda e: e['objectID'].split('_')[-1]),
'title': episode.get('title'),
'description': episode.get('description'),
'timestamp': unified_timestamp(episode.get('search_document_date')),
'season_number': season_number,
'episode_number': int_or_none(episode.get('search_%s_episode_number' % slug)),
'ie_key': AmericasTestKitchenIE.ie_key(),
}
return self.playlist_result(
entries(), 'season_%d' % season_number, season)

View file

@ -3,7 +3,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .yahoo import YahooIE
from ..compat import (
compat_parse_qs,
compat_urllib_parse_urlparse,
@ -15,9 +15,9 @@ from ..utils import (
)
class AolIE(InfoExtractor):
class AolIE(YahooIE):
IE_NAME = 'aol.com'
_VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>[0-9a-f]+)'
_VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>\d{9}|[0-9a-f]{24}|[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12})'
_TESTS = [{
# video with 5min ID
@ -76,10 +76,16 @@ class AolIE(InfoExtractor):
}, {
'url': 'https://www.aol.jp/video/playlist/5a28e936a1334d000137da0c/5a28f3151e642219fde19831/',
'only_matching': True,
}, {
# Yahoo video
'url': 'https://www.aol.com/video/play/991e6700-ac02-11ea-99ff-357400036f61/24bbc846-3e30-3c46-915e-fe8ccd7fcc46/',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
if '-' in video_id:
return self._extract_yahoo_video(video_id, 'us')
response = self._download_json(
'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id,

View file

@ -226,13 +226,13 @@ class ARDMediathekIE(ARDMediathekBaseIE):
if doc.tag == 'rss':
return GenericIE()._extract_rss(url, video_id, doc)
title = self._html_search_regex(
title = self._og_search_title(webpage, default=None) or self._html_search_regex(
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
r'<meta name="dcterms\.title" content="(.*?)"/>',
r'<h4 class="headline">(.*?)</h4>',
r'<title[^>]*>(.*?)</title>'],
webpage, 'title')
description = self._html_search_meta(
description = self._og_search_description(webpage, default=None) or self._html_search_meta(
'dcterms.abstract', webpage, 'description', default=None)
if description is None:
description = self._html_search_meta(
@ -289,18 +289,18 @@ class ARDMediathekIE(ARDMediathekBaseIE):
class ARDIE(InfoExtractor):
_VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
_VALID_URL = r'(?P<mainurl>https?://(?:www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P<display_id>[^/?#]+)-(?:video-?)?(?P<id>[0-9]+))\.html'
_TESTS = [{
# available till 14.02.2019
'url': 'http://www.daserste.de/information/talk/maischberger/videos/das-groko-drama-zerlegen-sich-die-volksparteien-video-102.html',
'md5': '8e4ec85f31be7c7fc08a26cdbc5a1f49',
# available till 7.01.2022
'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-die-woche-video100.html',
'md5': '867d8aa39eeaf6d76407c5ad1bb0d4c1',
'info_dict': {
'display_id': 'das-groko-drama-zerlegen-sich-die-volksparteien-video',
'id': '102',
'display_id': 'maischberger-die-woche',
'id': '100',
'ext': 'mp4',
'duration': 4435.0,
'title': 'Das GroKo-Drama: Zerlegen sich die Volksparteien?',
'upload_date': '20180214',
'duration': 3687.0,
'title': 'maischberger. die woche vom 7. Januar 2021',
'upload_date': '20210107',
'thumbnail': r're:^https?://.*\.jpg$',
},
}, {
@ -355,17 +355,17 @@ class ARDIE(InfoExtractor):
class ARDBetaMediathekIE(ARDMediathekBaseIE):
_VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?P<client>[^/]+)/(?P<mode>player|live|video|sendung|sammlung)/(?P<display_id>(?:[^/]+/)*)(?P<video_id>[a-zA-Z0-9]+)'
_TESTS = [{
'url': 'https://ardmediathek.de/ard/video/die-robuste-roswita/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
'md5': 'dfdc87d2e7e09d073d5a80770a9ce88f',
'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
'md5': 'a1dc75a39c61601b980648f7c9f9f71d',
'info_dict': {
'display_id': 'die-robuste-roswita',
'id': '70153354',
'id': '78566716',
'title': 'Die robuste Roswita',
'description': r're:^Der Mord.*trüber ist als die Ilm.',
'description': r're:^Der Mord.*totgeglaubte Ehefrau Roswita',
'duration': 5316,
'thumbnail': 'https://img.ardmediathek.de/standard/00/70/15/33/90/-1852531467/16x9/960?mandant=ard',
'timestamp': 1577047500,
'upload_date': '20191222',
'thumbnail': 'https://img.ardmediathek.de/standard/00/78/56/67/84/575672121/16x9/960?mandant=ard',
'timestamp': 1596658200,
'upload_date': '20200805',
'ext': 'mp4',
},
}, {

View file

@ -1,142 +1,51 @@
from __future__ import unicode_literals
from .mtv import MTVServicesInfoExtractor
from .common import InfoExtractor
class ComedyCentralIE(MTVServicesInfoExtractor):
_VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
(video-clips|episodes|cc-studios|video-collections|shows(?=/[^/]+/(?!full-episodes)))
/(?P<title>.*)'''
_VALID_URL = r'https?://(?:www\.)?cc\.com/(?:episodes|video(?:-clips)?)/(?P<id>[0-9a-z]{6})'
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
_TESTS = [{
'url': 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
'md5': 'c4f48e9eda1b16dd10add0744344b6d8',
'url': 'http://www.cc.com/video-clips/5ke9v2/the-daily-show-with-trevor-noah-doc-rivers-and-steve-ballmer---the-nba-player-strike',
'md5': 'b8acb347177c680ff18a292aa2166f80',
'info_dict': {
'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',
'id': '89ccc86e-1b02-4f83-b0c9-1d9592ecd025',
'ext': 'mp4',
'title': 'CC:Stand-Up|August 18, 2013|1|0101|Uncensored - Too Good of a Mother',
'description': 'After a certain point, breastfeeding becomes c**kblocking.',
'timestamp': 1376798400,
'upload_date': '20130818',
'title': 'The Daily Show with Trevor Noah|August 28, 2020|25|25149|Doc Rivers and Steve Ballmer - The NBA Player Strike',
'description': 'md5:5334307c433892b85f4f5e5ac9ef7498',
'timestamp': 1598670000,
'upload_date': '20200829',
},
}, {
'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/interviews/6yx39d/exclusive-rand-paul-extended-interview',
'url': 'http://www.cc.com/episodes/pnzzci/drawn-together--american-idol--parody-clip-show-season-3-ep-314',
'only_matching': True,
}]
class ComedyCentralFullEpisodesIE(MTVServicesInfoExtractor):
_VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
(?:full-episodes|shows(?=/[^/]+/full-episodes))
/(?P<id>[^?]+)'''
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
_TESTS = [{
'url': 'http://www.cc.com/full-episodes/pv391a/the-daily-show-with-trevor-noah-november-28--2016---ryan-speedo-green-season-22-ep-22028',
'info_dict': {
'description': 'Donald Trump is accused of exploiting his president-elect status for personal gain, Cuban leader Fidel Castro dies, and Ryan Speedo Green discusses "Sing for Your Life."',
'title': 'November 28, 2016 - Ryan Speedo Green',
},
'playlist_count': 4,
}, {
'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
'only_matching': True,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
mgid = self._extract_mgid(webpage, url, data_zone='t2_lc_promo1')
videos_info = self._get_videos_info(mgid)
return videos_info
class ToshIE(MTVServicesInfoExtractor):
IE_DESC = 'Tosh.0'
_VALID_URL = r'^https?://tosh\.cc\.com/video-(?:clips|collections)/[^/]+/(?P<videotitle>[^/?#]+)'
_FEED_URL = 'http://tosh.cc.com/feeds/mrss'
_TESTS = [{
'url': 'http://tosh.cc.com/video-clips/68g93d/twitter-users-share-summer-plans',
'info_dict': {
'description': 'Tosh asked fans to share their summer plans.',
'title': 'Twitter Users Share Summer Plans',
},
'playlist': [{
'md5': 'f269e88114c1805bb6d7653fecea9e06',
'info_dict': {
'id': '90498ec2-ed00-11e0-aca6-0026b9414f30',
'ext': 'mp4',
'title': 'Tosh.0|June 9, 2077|2|211|Twitter Users Share Summer Plans',
'description': 'Tosh asked fans to share their summer plans.',
'thumbnail': r're:^https?://.*\.jpg',
# It's really reported to be published on year 2077
'upload_date': '20770610',
'timestamp': 3390510600,
'subtitles': {
'en': 'mincount:3',
},
},
}]
}, {
'url': 'http://tosh.cc.com/video-collections/x2iz7k/just-plain-foul/m5q4fp',
'url': 'https://www.cc.com/video/k3sdvm/the-daily-show-with-jon-stewart-exclusive-the-fourth-estate',
'only_matching': True,
}]
class ComedyCentralTVIE(MTVServicesInfoExtractor):
_VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/(?:staffeln|shows)/(?P<id>[^/?#&]+)'
_VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/folgen/(?P<id>[0-9a-z]{6})'
_TESTS = [{
'url': 'http://www.comedycentral.tv/staffeln/7436-the-mindy-project-staffel-4',
'url': 'https://www.comedycentral.tv/folgen/pxdpec/josh-investigates-klimawandel-staffel-1-ep-1',
'info_dict': {
'id': 'local_playlist-f99b626bdfe13568579a',
'ext': 'flv',
'title': 'Episode_the-mindy-project_shows_season-4_episode-3_full-episode_part1',
'id': '15907dc3-ec3c-11e8-a442-0e40cf2fc285',
'ext': 'mp4',
'title': 'Josh Investigates',
'description': 'Steht uns das Ende der Welt bevor?',
},
'params': {
# rtmp download
'skip_download': True,
},
}, {
'url': 'http://www.comedycentral.tv/shows/1074-workaholics',
'only_matching': True,
}, {
'url': 'http://www.comedycentral.tv/shows/1727-the-mindy-project/bonus',
'only_matching': True,
}]
_FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
_GEO_COUNTRIES = ['DE']
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
mrss_url = self._search_regex(
r'data-mrss=(["\'])(?P<url>(?:(?!\1).)+)\1',
webpage, 'mrss url', group='url')
return self._get_videos_info_from_url(mrss_url, video_id)
class ComedyCentralShortnameIE(InfoExtractor):
_VALID_URL = r'^:(?P<id>tds|thedailyshow|theopposition)$'
_TESTS = [{
'url': ':tds',
'only_matching': True,
}, {
'url': ':thedailyshow',
'only_matching': True,
}, {
'url': ':theopposition',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
shortcut_map = {
'tds': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
'thedailyshow': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
'theopposition': 'http://www.cc.com/shows/the-opposition-with-jordan-klepper/full-episodes',
def _get_feed_query(self, uri):
return {
'accountOverride': 'intl.mtvi.com',
'arcEp': 'web.cc.tv',
'ep': 'b9032c3a',
'imageEp': 'web.cc.tv',
'mgid': uri,
}
return self.url_result(shortcut_map[video_id])

View file

@ -50,7 +50,10 @@ from .animelab import (
AnimeLabIE,
AnimeLabShowsIE,
)
from .americastestkitchen import AmericasTestKitchenIE
from .americastestkitchen import (
AmericasTestKitchenIE,
AmericasTestKitchenSeasonIE,
)
from .animeondemand import AnimeOnDemandIE
from .anvato import AnvatoIE
from .aol import AolIE
@ -244,11 +247,8 @@ from .cnn import (
)
from .coub import CoubIE
from .comedycentral import (
ComedyCentralFullEpisodesIE,
ComedyCentralIE,
ComedyCentralShortnameIE,
ComedyCentralTVIE,
ToshIE,
)
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
from .commonprotocols import (
@ -682,6 +682,11 @@ from .mildom import (
MildomVodIE,
MildomUserVodIE,
)
from .minds import (
MindsIE,
MindsChannelIE,
MindsGroupIE,
)
from .ministrygrid import MinistryGridIE
from .minoto import MinotoIE
from .miomio import MioMioIE
@ -1162,6 +1167,10 @@ from .stitcher import StitcherIE
from .sport5 import Sport5IE
from .sportbox import SportBoxIE
from .sportdeutschland import SportDeutschlandIE
from .spotify import (
SpotifyIE,
SpotifyShowIE,
)
from .spreaker import (
SpreakerIE,
SpreakerPageIE,
@ -1270,7 +1279,10 @@ from .toutv import TouTvIE
from .toypics import ToypicsUserIE, ToypicsIE
from .traileraddict import TrailerAddictIE
from .trilulilu import TriluliluIE
from .trovolive import TrovoLiveIE
from .trovo import (
TrovoIE,
TrovoVodIE,
)
from .trunews import TruNewsIE
from .trutv import TruTVIE
from .tube8 import Tube8IE

View file

@ -11,7 +11,7 @@ from ..utils import (
class FranceCultureIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emissions/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TEST = {
_TESTS = [{
'url': 'http://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks',
'info_dict': {
'id': 'rendez-vous-au-pays-des-geeks',
@ -20,10 +20,14 @@ class FranceCultureIE(InfoExtractor):
'title': 'Rendez-vous au pays des geeks',
'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20140301',
'timestamp': 1393642916,
'timestamp': 1393700400,
'vcodec': 'none',
}
}
}, {
# no thumbnail
'url': 'https://www.franceculture.fr/emissions/la-recherche-montre-en-main/la-recherche-montre-en-main-du-mercredi-10-octobre-2018',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
@ -36,19 +40,19 @@ class FranceCultureIE(InfoExtractor):
</h1>|
<div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>
).*?
(<button[^>]+data-asset-source="[^"]+"[^>]+>)
(<button[^>]+data-(?:url|asset-source)="[^"]+"[^>]+>)
''',
webpage, 'video data'))
video_url = video_data['data-asset-source']
title = video_data.get('data-asset-title') or self._og_search_title(webpage)
video_url = video_data.get('data-url') or video_data['data-asset-source']
title = video_data.get('data-asset-title') or video_data.get('data-diffusion-title') or self._og_search_title(webpage)
description = self._html_search_regex(
r'(?s)<div[^>]+class="intro"[^>]*>.*?<h2>(.+?)</h2>',
webpage, 'description', default=None)
thumbnail = self._search_regex(
r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+(?:data-dejavu-)?src="([^"]+)"',
webpage, 'thumbnail', fatal=False)
webpage, 'thumbnail', default=None)
uploader = self._html_search_regex(
r'(?s)<span class="author">(.*?)</span>',
webpage, 'uploader', default=None)
@ -64,6 +68,6 @@ class FranceCultureIE(InfoExtractor):
'ext': ext,
'vcodec': 'none' if ext == 'mp3' else None,
'uploader': uploader,
'timestamp': int_or_none(video_data.get('data-asset-created-date')),
'timestamp': int_or_none(video_data.get('data-start-time')) or int_or_none(video_data.get('data-asset-created-date')),
'duration': int_or_none(video_data.get('data-duration')),
}

View file

@ -5,7 +5,10 @@ import functools
import json
from .common import InfoExtractor
from ..compat import compat_str
from ..compat import (
compat_str,
compat_urllib_parse_unquote,
)
from ..utils import (
determine_ext,
ExtractorError,
@ -131,6 +134,9 @@ class LBRYIE(LBRYBaseIE):
}, {
'url': 'https://lbry.tv/$/download/Episode-1/e7d93d772bd87e2b62d5ab993c1c3ced86ebb396',
'only_matching': True,
}, {
'url': 'https://lbry.tv/@lacajadepandora:a/TRUMP-EST%C3%81-BIEN-PUESTO-con-Pilar-Baselga,-Carlos-Senra,-Luis-Palacios-(720p_30fps_H264-192kbit_AAC):1',
'only_matching': True,
}]
def _real_extract(self, url):
@ -139,6 +145,7 @@ class LBRYIE(LBRYBaseIE):
display_id = display_id.split('/', 2)[-1].replace('/', ':')
else:
display_id = display_id.replace(':', '#')
display_id = compat_urllib_parse_unquote(display_id)
uri = 'lbry://' + display_id
result = self._resolve_url(uri, display_id, 'stream')
result_value = result['value']

View file

@ -0,0 +1,196 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
clean_html,
int_or_none,
str_or_none,
strip_or_none,
)
class MindsBaseIE(InfoExtractor):
_VALID_URL_BASE = r'https?://(?:www\.)?minds\.com/'
def _call_api(self, path, video_id, resource, query=None):
api_url = 'https://www.minds.com/api/' + path
token = self._get_cookies(api_url).get('XSRF-TOKEN')
return self._download_json(
api_url, video_id, 'Downloading %s JSON metadata' % resource, headers={
'Referer': 'https://www.minds.com/',
'X-XSRF-TOKEN': token.value if token else '',
}, query=query)
class MindsIE(MindsBaseIE):
IE_NAME = 'minds'
_VALID_URL = MindsBaseIE._VALID_URL_BASE + r'(?:media|newsfeed|archive/view)/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'https://www.minds.com/media/100000000000086822',
'md5': '215a658184a419764852239d4970b045',
'info_dict': {
'id': '100000000000086822',
'ext': 'mp4',
'title': 'Minds intro sequence',
'thumbnail': r're:https?://.+\.png',
'uploader_id': 'ottman',
'upload_date': '20130524',
'timestamp': 1369404826,
'uploader': 'Bill Ottman',
'view_count': int,
'like_count': int,
'dislike_count': int,
'tags': ['animation'],
'comment_count': int,
'license': 'attribution-cc',
},
}, {
# entity.type == 'activity' and empty title
'url': 'https://www.minds.com/newsfeed/798025111988506624',
'md5': 'b2733a74af78d7fd3f541c4cbbaa5950',
'info_dict': {
'id': '798022190320226304',
'ext': 'mp4',
'title': '798022190320226304',
'uploader': 'ColinFlaherty',
'upload_date': '20180111',
'timestamp': 1515639316,
'uploader_id': 'ColinFlaherty',
},
}, {
'url': 'https://www.minds.com/archive/view/715172106794442752',
'only_matching': True,
}, {
# youtube perma_url
'url': 'https://www.minds.com/newsfeed/1197131838022602752',
'only_matching': True,
}]
def _real_extract(self, url):
entity_id = self._match_id(url)
entity = self._call_api(
'v1/entities/entity/' + entity_id, entity_id, 'entity')['entity']
if entity.get('type') == 'activity':
if entity.get('custom_type') == 'video':
video_id = entity['entity_guid']
else:
return self.url_result(entity['perma_url'])
else:
assert(entity['subtype'] == 'video')
video_id = entity_id
# 1080p and webm formats available only on the sources array
video = self._call_api(
'v2/media/video/' + video_id, video_id, 'video')
formats = []
for source in (video.get('sources') or []):
src = source.get('src')
if not src:
continue
formats.append({
'format_id': source.get('label'),
'height': int_or_none(source.get('size')),
'url': src,
})
self._sort_formats(formats)
entity = video.get('entity') or entity
owner = entity.get('ownerObj') or {}
uploader_id = owner.get('username')
tags = entity.get('tags')
if tags and isinstance(tags, compat_str):
tags = [tags]
thumbnail = None
poster = video.get('poster') or entity.get('thumbnail_src')
if poster:
urlh = self._request_webpage(poster, video_id, fatal=False)
if urlh:
thumbnail = urlh.geturl()
return {
'id': video_id,
'title': entity.get('title') or video_id,
'formats': formats,
'description': clean_html(entity.get('description')) or None,
'license': str_or_none(entity.get('license')),
'timestamp': int_or_none(entity.get('time_created')),
'uploader': strip_or_none(owner.get('name')),
'uploader_id': uploader_id,
'uploader_url': 'https://www.minds.com/' + uploader_id if uploader_id else None,
'view_count': int_or_none(entity.get('play:count')),
'like_count': int_or_none(entity.get('thumbs:up:count')),
'dislike_count': int_or_none(entity.get('thumbs:down:count')),
'tags': tags,
'comment_count': int_or_none(entity.get('comments:count')),
'thumbnail': thumbnail,
}
class MindsFeedBaseIE(MindsBaseIE):
_PAGE_SIZE = 150
def _entries(self, feed_id):
query = {'limit': self._PAGE_SIZE, 'sync': 1}
i = 1
while True:
data = self._call_api(
'v2/feeds/container/%s/videos' % feed_id,
feed_id, 'page %s' % i, query)
entities = data.get('entities') or []
for entity in entities:
guid = entity.get('guid')
if not guid:
continue
yield self.url_result(
'https://www.minds.com/newsfeed/' + guid,
MindsIE.ie_key(), guid)
query['from_timestamp'] = data['load-next']
if not (query['from_timestamp'] and len(entities) == self._PAGE_SIZE):
break
i += 1
def _real_extract(self, url):
feed_id = self._match_id(url)
feed = self._call_api(
'v1/%s/%s' % (self._FEED_PATH, feed_id),
feed_id, self._FEED_TYPE)[self._FEED_TYPE]
return self.playlist_result(
self._entries(feed['guid']), feed_id,
strip_or_none(feed.get('name')),
feed.get('briefdescription'))
class MindsChannelIE(MindsFeedBaseIE):
_FEED_TYPE = 'channel'
IE_NAME = 'minds:' + _FEED_TYPE
_VALID_URL = MindsBaseIE._VALID_URL_BASE + r'(?!(?:newsfeed|media|api|archive|groups)/)(?P<id>[^/?&#]+)'
_FEED_PATH = 'channel'
_TEST = {
'url': 'https://www.minds.com/ottman',
'info_dict': {
'id': 'ottman',
'title': 'Bill Ottman',
'description': 'Co-creator & CEO @minds',
},
'playlist_mincount': 54,
}
class MindsGroupIE(MindsFeedBaseIE):
_FEED_TYPE = 'group'
IE_NAME = 'minds:' + _FEED_TYPE
_VALID_URL = MindsBaseIE._VALID_URL_BASE + r'groups/profile/(?P<id>[0-9]+)'
_FEED_PATH = 'groups/group'
_TEST = {
'url': 'https://www.minds.com/groups/profile/785582576369672204/feed/videos',
'info_dict': {
'id': '785582576369672204',
'title': 'Cooking Videos',
},
'playlist_mincount': 1,
}

View file

@ -255,6 +255,10 @@ class MTVServicesInfoExtractor(InfoExtractor):
return try_get(feed, lambda x: x['result']['data']['id'], compat_str)
@staticmethod
def _extract_child_with_type(parent, t):
return next(c for c in parent['children'] if c.get('type') == t)
def _extract_new_triforce_mgid(self, webpage, url='', video_id=None):
if url == '':
return
@ -332,6 +336,13 @@ class MTVServicesInfoExtractor(InfoExtractor):
if not mgid:
mgid = self._extract_triforce_mgid(webpage, data_zone)
if not mgid:
data = self._parse_json(self._search_regex(
r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
main_container = self._extract_child_with_type(data, 'MainContainer')
video_player = self._extract_child_with_type(main_container, 'VideoPlayer')
mgid = video_player['props']['media']['video']['config']['uri']
return mgid
def _real_extract(self, url):
@ -403,18 +414,6 @@ class MTVIE(MTVServicesInfoExtractor):
'only_matching': True,
}]
@staticmethod
def extract_child_with_type(parent, t):
children = parent['children']
return next(c for c in children if c.get('type') == t)
def _extract_mgid(self, webpage):
data = self._parse_json(self._search_regex(
r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
main_container = self.extract_child_with_type(data, 'MainContainer')
video_player = self.extract_child_with_type(main_container, 'VideoPlayer')
return video_player['props']['media']['video']['config']['uri']
class MTVJapanIE(MTVServicesInfoExtractor):
IE_NAME = 'mtvjapan'

View file

@ -1,104 +1,125 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import str_to_int
from ..utils import (
determine_ext,
ExtractorError,
int_or_none,
try_get,
url_or_none,
)
class NineGagIE(InfoExtractor):
IE_NAME = '9gag'
_VALID_URL = r'https?://(?:www\.)?9gag(?:\.com/tv|\.tv)/(?:p|embed)/(?P<id>[a-zA-Z0-9]+)(?:/(?P<display_id>[^?#/]+))?'
_VALID_URL = r'https?://(?:www\.)?9gag\.com/gag/(?P<id>[^/?&#]+)'
_TESTS = [{
'url': 'http://9gag.com/tv/p/Kk2X5/people-are-awesome-2013-is-absolutely-awesome',
_TEST = {
'url': 'https://9gag.com/gag/ae5Ag7B',
'info_dict': {
'id': 'kXzwOKyGlSA',
'id': 'ae5Ag7B',
'ext': 'mp4',
'description': 'This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)',
'title': '\"People Are Awesome 2013\" Is Absolutely Awesome',
'uploader_id': 'UCdEH6EjDKwtTe-sO2f0_1XA',
'uploader': 'CompilationChannel',
'upload_date': '20131110',
'view_count': int,
},
'add_ie': ['Youtube'],
}, {
'url': 'http://9gag.com/tv/p/aKolP3',
'info_dict': {
'id': 'aKolP3',
'ext': 'mp4',
'title': 'This Guy Travelled 11 countries In 44 days Just To Make This Amazing Video',
'description': "I just saw more in 1 minute than I've seen in 1 year. This guy's video is epic!!",
'uploader_id': 'rickmereki',
'uploader': 'Rick Mereki',
'upload_date': '20110803',
'view_count': int,
},
'add_ie': ['Vimeo'],
}, {
'url': 'http://9gag.com/tv/p/KklwM',
'only_matching': True,
}, {
'url': 'http://9gag.tv/p/Kk2X5',
'only_matching': True,
}, {
'url': 'http://9gag.com/tv/embed/a5Dmvl',
'only_matching': True,
}]
_EXTERNAL_VIDEO_PROVIDER = {
'1': {
'url': '%s',
'ie_key': 'Youtube',
},
'2': {
'url': 'http://player.vimeo.com/video/%s',
'ie_key': 'Vimeo',
},
'3': {
'url': 'http://instagram.com/p/%s',
'ie_key': 'Instagram',
},
'4': {
'url': 'http://vine.co/v/%s',
'ie_key': 'Vine',
},
'title': 'Capybara Agility Training',
'upload_date': '20191108',
'timestamp': 1573237208,
'categories': ['Awesome'],
'tags': ['Weimaraner', 'American Pit Bull Terrier'],
'duration': 44,
'like_count': int,
'dislike_count': int,
'comment_count': int,
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
display_id = mobj.group('display_id') or video_id
post_id = self._match_id(url)
post = self._download_json(
'https://9gag.com/v1/post', post_id, query={
'id': post_id
})['data']['post']
webpage = self._download_webpage(url, display_id)
if post.get('type') != 'Animated':
raise ExtractorError(
'The given url does not contain a video',
expected=True)
post_view = self._parse_json(
self._search_regex(
r'var\s+postView\s*=\s*new\s+app\.PostView\({\s*post:\s*({.+?})\s*,\s*posts:\s*prefetchedCurrentPost',
webpage, 'post view'),
display_id)
title = post['title']
ie_key = None
source_url = post_view.get('sourceUrl')
if not source_url:
external_video_id = post_view['videoExternalId']
external_video_provider = post_view['videoExternalProvider']
source_url = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['url'] % external_video_id
ie_key = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['ie_key']
title = post_view['title']
description = post_view.get('description')
view_count = str_to_int(post_view.get('externalView'))
thumbnail = post_view.get('thumbnail_700w') or post_view.get('ogImageUrl') or post_view.get('thumbnail_300w')
duration = None
formats = []
thumbnails = []
for key, image in (post.get('images') or {}).items():
image_url = url_or_none(image.get('url'))
if not image_url:
continue
ext = determine_ext(image_url)
image_id = key.strip('image')
common = {
'url': image_url,
'width': int_or_none(image.get('width')),
'height': int_or_none(image.get('height')),
}
if ext in ('jpg', 'png'):
webp_url = image.get('webpUrl')
if webp_url:
t = common.copy()
t.update({
'id': image_id + '-webp',
'url': webp_url,
})
thumbnails.append(t)
common.update({
'id': image_id,
'ext': ext,
})
thumbnails.append(common)
elif ext in ('webm', 'mp4'):
if not duration:
duration = int_or_none(image.get('duration'))
common['acodec'] = 'none' if image.get('hasAudio') == 0 else None
for vcodec in ('vp8', 'vp9', 'h265'):
c_url = image.get(vcodec + 'Url')
if not c_url:
continue
c_f = common.copy()
c_f.update({
'format_id': image_id + '-' + vcodec,
'url': c_url,
'vcodec': vcodec,
})
formats.append(c_f)
common.update({
'ext': ext,
'format_id': image_id,
})
formats.append(common)
self._sort_formats(formats)
section = try_get(post, lambda x: x['postSection']['name'])
tags = None
post_tags = post.get('tags')
if post_tags:
tags = []
for tag in post_tags:
tag_key = tag.get('key')
if not tag_key:
continue
tags.append(tag_key)
get_count = lambda x: int_or_none(post.get(x + 'Count'))
return {
'_type': 'url_transparent',
'url': source_url,
'ie_key': ie_key,
'id': video_id,
'display_id': display_id,
'id': post_id,
'title': title,
'description': description,
'view_count': view_count,
'thumbnail': thumbnail,
'timestamp': int_or_none(post.get('creationTs')),
'duration': duration,
'formats': formats,
'thumbnails': thumbnails,
'like_count': get_count('upVote'),
'dislike_count': get_count('downVote'),
'comment_count': get_count('comments'),
'age_limit': 18 if post.get('nsfw') == 1 else None,
'categories': [section] if section else None,
'tags': tags,
}

View file

@ -6,30 +6,40 @@ import re
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
extract_attributes,
get_element_by_class,
urlencode_postdata,
)
class NJPWWorldIE(InfoExtractor):
_VALID_URL = r'https?://njpwworld\.com/p/(?P<id>[a-z0-9_]+)'
_VALID_URL = r'https?://(front\.)?njpwworld\.com/p/(?P<id>[a-z0-9_]+)'
IE_DESC = '新日本プロレスワールド'
_NETRC_MACHINE = 'njpwworld'
_TEST = {
_TESTS = [{
'url': 'http://njpwworld.com/p/s_series_00155_1_9/',
'info_dict': {
'id': 's_series_00155_1_9',
'ext': 'mp4',
'title': '第9試合 ランディ・サベージ vs リック・スタイナー',
'title': '闘強導夢2000 2000年1月4日 東京ドーム 第9試合 ランディ・サベージ VS リック・スタイナー',
'tags': list,
},
'params': {
'skip_download': True, # AES-encrypted m3u8
},
'skip': 'Requires login',
}
}, {
'url': 'https://front.njpwworld.com/p/s_series_00563_16_bs',
'info_dict': {
'id': 's_series_00563_16_bs',
'ext': 'mp4',
'title': 'WORLD TAG LEAGUE 2020 & BEST OF THE SUPER Jr.27 2020年12月6日 福岡・福岡国際センター バックステージコメント(字幕あり)',
'tags': ["福岡・福岡国際センター", "バックステージコメント", "2020", "20年代"],
},
'params': {
'skip_download': True,
},
}]
_LOGIN_URL = 'https://front.njpwworld.com/auth/login'
@ -64,35 +74,27 @@ class NJPWWorldIE(InfoExtractor):
webpage = self._download_webpage(url, video_id)
formats = []
for mobj in re.finditer(r'<a[^>]+\bhref=(["\'])/player.+?[^>]*>', webpage):
player = extract_attributes(mobj.group(0))
player_path = player.get('href')
if not player_path:
continue
kind = self._search_regex(
r'(low|high)$', player.get('class') or '', 'kind',
default='low')
for kind, vid in re.findall(r'if\s+\(\s*imageQualityType\s*==\s*\'([^\']+)\'\s*\)\s*{\s*video_id\s*=\s*"(\d+)"', webpage):
player_path = '/intent?id=%s&type=url' % vid
player_url = compat_urlparse.urljoin(url, player_path)
player_page = self._download_webpage(
player_url, video_id, note='Downloading player page')
entries = self._parse_html5_media_entries(
player_url, player_page, video_id, m3u8_id='hls-%s' % kind,
m3u8_entry_protocol='m3u8_native')
kind_formats = entries[0]['formats']
for f in kind_formats:
f['quality'] = 2 if kind == 'high' else 1
formats.extend(kind_formats)
formats.append({
'url': player_url,
'format_id': kind,
'ext': 'mp4',
'protocol': 'm3u8',
'quality': 2 if kind == 'high' else 1,
})
self._sort_formats(formats)
post_content = get_element_by_class('post-content', webpage)
tag_block = get_element_by_class('tag-block', webpage)
tags = re.findall(
r'<li[^>]+class="tag-[^"]+"><a[^>]*>([^<]+)</a></li>', post_content
) if post_content else None
r'<a[^>]+class="tag-[^"]+"[^>]*>([^<]+)</a>', tag_block
) if tag_block else None
return {
'id': video_id,
'title': self._og_search_title(webpage),
'title': get_element_by_class('article-title', webpage) or self._og_search_title(webpage),
'formats': formats,
'tags': tags,
}

View file

@ -20,19 +20,6 @@ class BellatorIE(MTVServicesInfoExtractor):
_FEED_URL = 'http://www.bellator.com/feeds/mrss/'
_GEO_COUNTRIES = ['US']
def _extract_mgid(self, webpage, url):
mgid = None
if not mgid:
mgid = self._extract_triforce_mgid(webpage)
if not mgid:
mgid = self._extract_new_triforce_mgid(webpage, url)
return mgid
# TODO Remove - Reason: Outdated Site
class ParamountNetworkIE(MTVServicesInfoExtractor):
_VALID_URL = r'https?://(?:www\.)?paramountnetwork\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)'
@ -56,16 +43,6 @@ class ParamountNetworkIE(MTVServicesInfoExtractor):
def _get_feed_query(self, uri):
return {
'arcEp': 'paramountnetwork.com',
'imageEp': 'paramountnetwork.com',
'mgid': uri,
}
def _extract_mgid(self, webpage, url):
root_data = self._parse_json(self._search_regex(
r'window\.__DATA__\s*=\s*({.+})',
webpage, 'data'), None)
def find_sub_data(data, data_type):
return next(c for c in data['children'] if c.get('type') == data_type)
c = find_sub_data(find_sub_data(root_data, 'MainContainer'), 'VideoPlayer')
return c['props']['media']['video']['config']['uri']

View file

@ -0,0 +1,156 @@
# coding: utf-8
from __future__ import unicode_literals
import json
import re
from .common import InfoExtractor
from ..utils import (
clean_podcast_url,
float_or_none,
int_or_none,
strip_or_none,
try_get,
unified_strdate,
)
class SpotifyBaseIE(InfoExtractor):
_ACCESS_TOKEN = None
_OPERATION_HASHES = {
'Episode': '8276d4423d709ae9b68ec1b74cc047ba0f7479059a37820be730f125189ac2bf',
'MinimalShow': '13ee079672fad3f858ea45a55eb109553b4fb0969ed793185b2e34cbb6ee7cc0',
'ShowEpisodes': 'e0e5ce27bd7748d2c59b4d44ba245a8992a05be75d6fabc3b20753fc8857444d',
}
_VALID_URL_TEMPL = r'https?://open\.spotify\.com/%s/(?P<id>[^/?&#]+)'
def _real_initialize(self):
self._ACCESS_TOKEN = self._download_json(
'https://open.spotify.com/get_access_token', None)['accessToken']
def _call_api(self, operation, video_id, variables):
return self._download_json(
'https://api-partner.spotify.com/pathfinder/v1/query', video_id, query={
'operationName': 'query' + operation,
'variables': json.dumps(variables),
'extensions': json.dumps({
'persistedQuery': {
'sha256Hash': self._OPERATION_HASHES[operation],
},
})
}, headers={'authorization': 'Bearer ' + self._ACCESS_TOKEN})['data']
def _extract_episode(self, episode, series):
episode_id = episode['id']
title = episode['name'].strip()
formats = []
audio_preview = episode.get('audioPreview') or {}
audio_preview_url = audio_preview.get('url')
if audio_preview_url:
f = {
'url': audio_preview_url.replace('://p.scdn.co/mp3-preview/', '://anon-podcast.scdn.co/'),
'vcodec': 'none',
}
audio_preview_format = audio_preview.get('format')
if audio_preview_format:
f['format_id'] = audio_preview_format
mobj = re.match(r'([0-9A-Z]{3})_(?:[A-Z]+_)?(\d+)', audio_preview_format)
if mobj:
f.update({
'abr': int(mobj.group(2)),
'ext': mobj.group(1).lower(),
})
formats.append(f)
for item in (try_get(episode, lambda x: x['audio']['items']) or []):
item_url = item.get('url')
if not (item_url and item.get('externallyHosted')):
continue
formats.append({
'url': clean_podcast_url(item_url),
'vcodec': 'none',
})
thumbnails = []
for source in (try_get(episode, lambda x: x['coverArt']['sources']) or []):
source_url = source.get('url')
if not source_url:
continue
thumbnails.append({
'url': source_url,
'width': int_or_none(source.get('width')),
'height': int_or_none(source.get('height')),
})
return {
'id': episode_id,
'title': title,
'formats': formats,
'thumbnails': thumbnails,
'description': strip_or_none(episode.get('description')),
'duration': float_or_none(try_get(
episode, lambda x: x['duration']['totalMilliseconds']), 1000),
'release_date': unified_strdate(try_get(
episode, lambda x: x['releaseDate']['isoString'])),
'series': series,
}
class SpotifyIE(SpotifyBaseIE):
IE_NAME = 'spotify'
_VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'episode'
_TEST = {
'url': 'https://open.spotify.com/episode/4Z7GAJ50bgctf6uclHlWKo',
'md5': '74010a1e3fa4d9e1ab3aa7ad14e42d3b',
'info_dict': {
'id': '4Z7GAJ50bgctf6uclHlWKo',
'ext': 'mp3',
'title': 'From the archive: Why time management is ruining our lives',
'description': 'md5:b120d9c4ff4135b42aa9b6d9cde86935',
'duration': 2083.605,
'release_date': '20201217',
'series': "The Guardian's Audio Long Reads",
}
}
def _real_extract(self, url):
episode_id = self._match_id(url)
episode = self._call_api('Episode', episode_id, {
'uri': 'spotify:episode:' + episode_id
})['episode']
return self._extract_episode(
episode, try_get(episode, lambda x: x['podcast']['name']))
class SpotifyShowIE(SpotifyBaseIE):
IE_NAME = 'spotify:show'
_VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'show'
_TEST = {
'url': 'https://open.spotify.com/show/4PM9Ke6l66IRNpottHKV9M',
'info_dict': {
'id': '4PM9Ke6l66IRNpottHKV9M',
'title': 'The Story from the Guardian',
'description': 'The Story podcast is dedicated to our finest audio documentaries, investigations and long form stories',
},
'playlist_mincount': 36,
}
def _real_extract(self, url):
show_id = self._match_id(url)
podcast = self._call_api('ShowEpisodes', show_id, {
'limit': 1000000000,
'offset': 0,
'uri': 'spotify:show:' + show_id,
})['podcast']
podcast_name = podcast.get('name')
entries = []
for item in (try_get(podcast, lambda x: x['episodes']['items']) or []):
episode = item.get('episode')
if not episode:
continue
entries.append(self._extract_episode(episode, podcast_name))
return self.playlist_result(
entries, show_id, podcast_name, podcast.get('description'))

View file

@ -0,0 +1,193 @@
# coding: utf-8
from __future__ import unicode_literals
import json
from .common import InfoExtractor
from ..utils import (
ExtractorError,
int_or_none,
str_or_none,
try_get,
)
class TrovoBaseIE(InfoExtractor):
_VALID_URL_BASE = r'https?://(?:www\.)?trovo\.live/'
def _extract_streamer_info(self, data):
streamer_info = data.get('streamerInfo') or {}
username = streamer_info.get('userName')
return {
'uploader': streamer_info.get('nickName'),
'uploader_id': str_or_none(streamer_info.get('uid')),
'uploader_url': 'https://trovo.live/' + username if username else None,
}
class TrovoIE(TrovoBaseIE):
_VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?!(?:clip|video)/)(?P<id>[^/?&#]+)'
def _real_extract(self, url):
username = self._match_id(url)
live_info = self._download_json(
'https://gql.trovo.live/', username, query={
'query': '''{
getLiveInfo(params: {userName: "%s"}) {
isLive
programInfo {
coverUrl
id
streamInfo {
desc
playUrl
}
title
}
streamerInfo {
nickName
uid
userName
}
}
}''' % username,
})['data']['getLiveInfo']
if live_info.get('isLive') == 0:
raise ExtractorError('%s is offline' % username, expected=True)
program_info = live_info['programInfo']
program_id = program_info['id']
title = self._live_title(program_info['title'])
formats = []
for stream_info in (program_info.get('streamInfo') or []):
play_url = stream_info.get('playUrl')
if not play_url:
continue
format_id = stream_info.get('desc')
formats.append({
'format_id': format_id,
'height': int_or_none(format_id[:-1]) if format_id else None,
'url': play_url,
})
self._sort_formats(formats)
info = {
'id': program_id,
'title': title,
'formats': formats,
'thumbnail': program_info.get('coverUrl'),
'is_live': True,
}
info.update(self._extract_streamer_info(live_info))
return info
class TrovoVodIE(TrovoBaseIE):
_VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?:clip|video)/(?P<id>[^/?&#]+)'
_TESTS = [{
'url': 'https://trovo.live/video/ltv-100095501_100095501_1609596043',
'info_dict': {
'id': 'ltv-100095501_100095501_1609596043',
'ext': 'mp4',
'title': 'Spontaner 12 Stunden Stream! - Ok Boomer!',
'uploader': 'Exsl',
'timestamp': 1609640305,
'upload_date': '20210103',
'uploader_id': '100095501',
'duration': 43977,
'view_count': int,
'like_count': int,
'comment_count': int,
'comments': 'mincount:8',
'categories': ['Grand Theft Auto V'],
},
}, {
'url': 'https://trovo.live/clip/lc-5285890810184026005',
'only_matching': True,
}]
def _real_extract(self, url):
vid = self._match_id(url)
resp = self._download_json(
'https://gql.trovo.live/', vid, data=json.dumps([{
'query': '''{
batchGetVodDetailInfo(params: {vids: ["%s"]}) {
VodDetailInfos
}
}''' % vid,
}, {
'query': '''{
getCommentList(params: {appInfo: {postID: "%s"}, pageSize: 1000000000, preview: {}}) {
commentList {
author {
nickName
uid
}
commentID
content
createdAt
parentID
}
}
}''' % vid,
}]).encode(), headers={
'Content-Type': 'application/json',
})
vod_detail_info = resp[0]['data']['batchGetVodDetailInfo']['VodDetailInfos'][vid]
vod_info = vod_detail_info['vodInfo']
title = vod_info['title']
language = vod_info.get('languageName')
formats = []
for play_info in (vod_info.get('playInfos') or []):
play_url = play_info.get('playUrl')
if not play_url:
continue
format_id = play_info.get('desc')
formats.append({
'ext': 'mp4',
'filesize': int_or_none(play_info.get('fileSize')),
'format_id': format_id,
'height': int_or_none(format_id[:-1]) if format_id else None,
'language': language,
'protocol': 'm3u8_native',
'tbr': int_or_none(play_info.get('bitrate')),
'url': play_url,
})
self._sort_formats(formats)
category = vod_info.get('categoryName')
get_count = lambda x: int_or_none(vod_info.get(x + 'Num'))
comment_list = try_get(resp, lambda x: x[1]['data']['getCommentList']['commentList'], list) or []
comments = []
for comment in comment_list:
content = comment.get('content')
if not content:
continue
author = comment.get('author') or {}
parent = comment.get('parentID')
comments.append({
'author': author.get('nickName'),
'author_id': str_or_none(author.get('uid')),
'id': str_or_none(comment.get('commentID')),
'text': content,
'timestamp': int_or_none(comment.get('createdAt')),
'parent': 'root' if parent == 0 else str_or_none(parent),
})
info = {
'id': vid,
'title': title,
'formats': formats,
'thumbnail': vod_info.get('coverUrl'),
'timestamp': int_or_none(vod_info.get('publishTs')),
'duration': int_or_none(vod_info.get('duration')),
'view_count': get_count('watch'),
'like_count': get_count('like'),
'comment_count': get_count('comment'),
'comments': comments,
'categories': [category] if category else None,
}
info.update(self._extract_streamer_info(vod_detail_info))
return info

View file

@ -1,12 +1,9 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
unified_strdate,
HEADRequest,
int_or_none,
@ -46,15 +43,6 @@ class WatIE(InfoExtractor):
},
]
_FORMATS = (
(200, 416, 234),
(400, 480, 270),
(600, 640, 360),
(1200, 640, 360),
(1800, 960, 540),
(2500, 1280, 720),
)
def _real_extract(self, url):
video_id = self._match_id(url)
video_id = video_id if video_id.isdigit() and len(video_id) > 6 else compat_str(int(video_id, 36))
@ -97,46 +85,20 @@ class WatIE(InfoExtractor):
return red_url
return None
def remove_bitrate_limit(manifest_url):
return re.sub(r'(?:max|min)_bitrate=\d+&?', '', manifest_url)
formats = []
try:
alt_urls = lambda manifest_url: [re.sub(r'(?:wdv|ssm)?\.ism/', repl + '.ism/', manifest_url) for repl in ('', 'ssm')]
manifest_urls = self._download_json(
'http://www.wat.tv/get/webhtml/' + video_id, video_id)
m3u8_url = manifest_urls.get('hls')
if m3u8_url:
m3u8_url = remove_bitrate_limit(m3u8_url)
for m3u8_alt_url in alt_urls(m3u8_url):
formats.extend(self._extract_m3u8_formats(
m3u8_alt_url, video_id, 'mp4',
m3u8_url, video_id, 'mp4',
'm3u8_native', m3u8_id='hls', fatal=False))
formats.extend(self._extract_f4m_formats(
m3u8_alt_url.replace('ios', 'web').replace('.m3u8', '.f4m'),
video_id, f4m_id='hds', fatal=False))
mpd_url = manifest_urls.get('mpd')
if mpd_url:
mpd_url = remove_bitrate_limit(mpd_url)
for mpd_alt_url in alt_urls(mpd_url):
formats.extend(self._extract_mpd_formats(
mpd_alt_url, video_id, mpd_id='dash', fatal=False))
mpd_url.replace('://das-q1.tf1.fr/', '://das-q1-ssl.tf1.fr/'),
video_id, mpd_id='dash', fatal=False))
self._sort_formats(formats)
except ExtractorError:
abr = 64
for vbr, width, height in self._FORMATS:
tbr = vbr + abr
format_id = 'http-%s' % tbr
fmt_url = 'http://dnl.adv.tf1.fr/2/USP-0x0/%s/%s/%s/ssm/%s-%s-64k.mp4' % (video_id[-4:-2], video_id[-2:], video_id, video_id, vbr)
if self._is_valid_url(fmt_url, video_id, format_id):
formats.append({
'format_id': format_id,
'url': fmt_url,
'vbr': vbr,
'abr': abr,
'width': width,
'height': height,
})
date_diffusion = first_chapter.get('date_diffusion') or video_data.get('configv4', {}).get('estatS4')
upload_date = unified_strdate(date_diffusion) if date_diffusion else None

View file

@ -177,46 +177,9 @@ class YahooIE(InfoExtractor):
'only_matching': True,
}]
def _real_extract(self, url):
url, country, display_id = re.match(self._VALID_URL, url).groups()
if not country:
country = 'us'
else:
country = country.split('-')[0]
api_base = 'https://%s.yahoo.com/_td/api/resource/' % country
for i, uuid in enumerate(['url=' + url, 'ymedia-alias=' + display_id]):
content = self._download_json(
api_base + 'content;getDetailView=true;uuids=["%s"]' % uuid,
display_id, 'Downloading content JSON metadata', fatal=i == 1)
if content:
item = content['items'][0]
break
if item.get('type') != 'video':
entries = []
cover = item.get('cover') or {}
if cover.get('type') == 'yvideo':
cover_url = cover.get('url')
if cover_url:
entries.append(self.url_result(
cover_url, 'Yahoo', cover.get('uuid')))
for e in item.get('body', []):
if e.get('type') == 'videoIframe':
iframe_url = e.get('url')
if not iframe_url:
continue
entries.append(self.url_result(iframe_url))
return self.playlist_result(
entries, item.get('uuid'),
item.get('title'), item.get('summary'))
video_id = item['uuid']
def _extract_yahoo_video(self, video_id, country):
video = self._download_json(
api_base + 'VideoService.videos;view=full;video_ids=["%s"]' % video_id,
'https://%s.yahoo.com/_td/api/resource/VideoService.videos;view=full;video_ids=["%s"]' % (country, video_id),
video_id, 'Downloading video JSON metadata')[0]
title = video['title']
@ -298,7 +261,6 @@ class YahooIE(InfoExtractor):
'id': video_id,
'title': self._live_title(title) if is_live else title,
'formats': formats,
'display_id': display_id,
'thumbnails': thumbnails,
'description': clean_html(video.get('description')),
'timestamp': parse_iso8601(video.get('publish_time')),
@ -311,6 +273,44 @@ class YahooIE(InfoExtractor):
'episode_number': int_or_none(series_info.get('episode_number')),
}
def _real_extract(self, url):
url, country, display_id = re.match(self._VALID_URL, url).groups()
if not country:
country = 'us'
else:
country = country.split('-')[0]
item = self._download_json(
'https://%s.yahoo.com/caas/content/article' % country, display_id,
'Downloading content JSON metadata', query={
'url': url
})['items'][0]['data']['partnerData']
if item.get('type') != 'video':
entries = []
cover = item.get('cover') or {}
if cover.get('type') == 'yvideo':
cover_url = cover.get('url')
if cover_url:
entries.append(self.url_result(
cover_url, 'Yahoo', cover.get('uuid')))
for e in (item.get('body') or []):
if e.get('type') == 'videoIframe':
iframe_url = e.get('url')
if not iframe_url:
continue
entries.append(self.url_result(iframe_url))
return self.playlist_result(
entries, item.get('uuid'),
item.get('title'), item.get('summary'))
info = self._extract_yahoo_video(item['uuid'], country)
info['display_id'] = display_id
return info
class YahooSearchIE(SearchInfoExtractor):
IE_DESC = 'Yahoo screen search'

View file

@ -842,6 +842,10 @@ def parseOpts(overrideArguments=None):
'-o', '--output',
dest='outtmpl', metavar='TEMPLATE',
help='Output filename template, see "OUTPUT TEMPLATE" for details')
filesystem.add_option(
'--output-na-placeholder',
dest='outtmpl_na_placeholder', metavar='PLACEHOLDER', default='NA',
help=('Placeholder value for unavailable meta fields in output filename template (default is "%default")'))
filesystem.add_option(
'--autonumber-size',
dest='autonumber_size', metavar='NUMBER', type=int,
@ -997,7 +1001,7 @@ def parseOpts(overrideArguments=None):
postproc.add_option(
'-x', '--extract-audio',
action='store_true', dest='extractaudio', default=False,
help='Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
help='Convert video files to audio-only files (requires ffmpeg/avconv and ffprobe/avprobe)')
postproc.add_option(
'--audio-format', metavar='FORMAT', dest='audioformat', default='best',
help='Specify audio format: "best", "aac", "flac", "mp3", "m4a", "opus", "vorbis", or "wav"; "%default" by default; No effect without -x')