From 0bb1bc1b107b9c3d68ea0c887bd09cad75d7714d Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Tue, 24 Aug 2021 15:52:40 +1200 Subject: [PATCH 001/641] [youtube] Remove annotations and deprecate `--write-annotations` (#765) Closes #692 Authored by: coletdjnz --- README.md | 2 ++ ....py => test_write_annotations.py.disabled} | 0 yt_dlp/extractor/youtube.py | 36 +------------------ yt_dlp/options.py | 4 +-- 4 files changed, 5 insertions(+), 37 deletions(-) rename test/{test_write_annotations.py => test_write_annotations.py.disabled} (100%) diff --git a/README.md b/README.md index 248b7e688c..917350bdad 100644 --- a/README.md +++ b/README.md @@ -1500,6 +1500,8 @@ These options may no longer work as intended --no-call-home Default --include-ads No longer supported --no-include-ads Default + --write-annotations No supported site has annotations now + --no-write-annotations Default #### Removed These options were deprecated since 2014 and have now been entirely removed diff --git a/test/test_write_annotations.py b/test/test_write_annotations.py.disabled similarity index 100% rename from test/test_write_annotations.py rename to test/test_write_annotations.py.disabled diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 375eca8f84..9ca81e6cb7 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -59,7 +59,6 @@ from ..utils import ( unsmuggle_url, update_url_query, url_or_none, - urlencode_postdata, urljoin, variadic, ) @@ -3168,40 +3167,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): needs_auth=info['age_limit'] >= 18, is_unlisted=None if is_private is None else is_unlisted) - # get xsrf for annotations or comments - get_annotations = self.get_param('writeannotations', False) - get_comments = self.get_param('getcomments', False) - if get_annotations or get_comments: - xsrf_token = None - if master_ytcfg: - xsrf_token = try_get(master_ytcfg, lambda x: x['XSRF_TOKEN'], compat_str) - if not xsrf_token: - xsrf_token = self._search_regex( - r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P(?:(?!\2).)+)\2', - webpage, 'xsrf token', group='xsrf_token', fatal=False) - - # annotations - if get_annotations: - invideo_url = get_first( - player_responses, - ('annotations', 0, 'playerAnnotationsUrlsRenderer', 'invideoUrl'), - expected_type=str) - if xsrf_token and invideo_url: - xsrf_field_name = None - if master_ytcfg: - xsrf_field_name = try_get(master_ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str) - if not xsrf_field_name: - xsrf_field_name = self._search_regex( - r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P\w+)\2', - webpage, 'xsrf field name', - group='xsrf_field_name', default='session_token') - info['annotations'] = self._download_webpage( - self._proto_relative_url(invideo_url), - video_id, note='Downloading annotations', - errnote='Unable to download video annotations', fatal=False, - data=urlencode_postdata({xsrf_field_name: xsrf_token})) - - if get_comments: + if self.get_param('getcomments', False): info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage) self.mark_watched(video_id, player_responses) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 6bad37d198..86aad33939 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1070,11 +1070,11 @@ def parseOpts(overrideArguments=None): filesystem.add_option( '--write-annotations', action='store_true', dest='writeannotations', default=False, - help='Write video annotations to a .annotations.xml file') + help=optparse.SUPPRESS_HELP) filesystem.add_option( '--no-write-annotations', action='store_false', dest='writeannotations', - help='Do not write video annotations (default)') + help=optparse.SUPPRESS_HELP) filesystem.add_option( '--write-playlist-metafiles', action='store_true', dest='allow_playlist_files', default=None, From 63b1ad0f05aa2cbb408b0e9112124b049a664377 Mon Sep 17 00:00:00 2001 From: i6t <62123048+i6t@users.noreply.github.com> Date: Wed, 25 Aug 2021 06:36:15 +0900 Subject: [PATCH 002/641] [iwara] Add thumbnail (#781) Authored by: i6t --- yt_dlp/extractor/iwara.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/yt_dlp/extractor/iwara.py b/yt_dlp/extractor/iwara.py index 907d5fc8bb..dae3da32cd 100644 --- a/yt_dlp/extractor/iwara.py +++ b/yt_dlp/extractor/iwara.py @@ -72,6 +72,10 @@ class IwaraIE(InfoExtractor): title = remove_end(self._html_search_regex( r'([^<]+)', webpage, 'title'), ' | Iwara') + thumbnail = self._html_search_regex( + r']+id=[\'"]video-player[\'"][^>]+poster=[\'"]([^\'"]+)', + webpage, 'thumbnail', default=None) + formats = [] for a_format in video_data: format_uri = url_or_none(a_format.get('uri')) @@ -96,4 +100,5 @@ class IwaraIE(InfoExtractor): 'title': title, 'age_limit': age_limit, 'formats': formats, + 'thumbnail': self._proto_relative_url(thumbnail, 'https:'), } From 1931a55ee8412ee385357f33128996cc3d07560e Mon Sep 17 00:00:00 2001 From: The Hatsune Daishi Date: Wed, 25 Aug 2021 13:48:27 +0900 Subject: [PATCH 003/641] [radiko] Add extractors (#731) https://github.com/ytdl-org/youtube-dl/issues/29840 Authored by: nao20010128nao --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/radiko.py | 234 +++++++++++++++++++++++++++++++++ yt_dlp/utils.py | 2 + 3 files changed, 237 insertions(+) create mode 100644 yt_dlp/extractor/radiko.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index a58a5001cf..99deebbcc0 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1079,6 +1079,7 @@ from .r7 import ( R7IE, R7ArticleIE, ) +from .radiko import RadikoIE, RadikoRadioIE from .radiocanada import ( RadioCanadaIE, RadioCanadaAudioVideoIE, diff --git a/yt_dlp/extractor/radiko.py b/yt_dlp/extractor/radiko.py new file mode 100644 index 0000000000..1e60de1539 --- /dev/null +++ b/yt_dlp/extractor/radiko.py @@ -0,0 +1,234 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +import base64 +import calendar +import datetime + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + update_url_query, + clean_html, + unified_timestamp, +) +from ..compat import compat_urllib_parse + + +class RadikoBaseIE(InfoExtractor): + _FULL_KEY = None + + def _auth_client(self): + auth_cache = self._downloader.cache.load('radiko', 'auth_data') + if auth_cache: + return auth_cache + + _, auth1_handle = self._download_webpage_handle( + 'https://radiko.jp/v2/api/auth1', None, 'Downloading authentication page', + headers={ + 'x-radiko-app': 'pc_html5', + 'x-radiko-app-version': '0.0.1', + 'x-radiko-device': 'pc', + 'x-radiko-user': 'dummy_user', + }) + auth1_header = auth1_handle.info() + + auth_token = auth1_header['X-Radiko-AuthToken'] + kl = int(auth1_header['X-Radiko-KeyLength']) + ko = int(auth1_header['X-Radiko-KeyOffset']) + raw_partial_key = self._extract_full_key()[ko:ko + kl] + partial_key = base64.b64encode(raw_partial_key).decode() + + area_id = self._download_webpage( + 'https://radiko.jp/v2/api/auth2', None, 'Authenticating', + headers={ + 'x-radiko-device': 'pc', + 'x-radiko-user': 'dummy_user', + 'x-radiko-authtoken': auth_token, + 'x-radiko-partialkey': partial_key, + }).split(',')[0] + + auth_data = (auth_token, area_id) + self._downloader.cache.store('radiko', 'auth_data', auth_data) + return auth_data + + def _extract_full_key(self): + if self._FULL_KEY: + return self._FULL_KEY + + jscode = self._download_webpage( + 'https://radiko.jp/apps/js/playerCommon.js', None, + note='Downloading player js code') + full_key = self._search_regex( + (r"RadikoJSPlayer\([^,]*,\s*(['\"])pc_html5\1,\s*(['\"])(?P[0-9a-f]+)\2,\s*{"), + jscode, 'full key', fatal=False, group='fullkey') + + if full_key: + full_key = full_key.encode() + else: # use full key ever known + full_key = b'bcd151073c03b352e1ef2fd66c32209da9ca0afa' + + self._FULL_KEY = full_key + return full_key + + def _find_program(self, video_id, station, cursor): + station_program = self._download_xml( + 'https://radiko.jp/v3/program/station/weekly/%s.xml' % station, video_id, + note='Downloading radio program for %s station' % station) + + prog = None + for p in station_program.findall('.//prog'): + ft_str, to_str = p.attrib['ft'], p.attrib['to'] + ft = unified_timestamp(ft_str, False) + to = unified_timestamp(to_str, False) + if ft <= cursor and cursor < to: + prog = p + break + if not prog: + raise ExtractorError('Cannot identify radio program to download!') + assert ft, to + return prog, station_program, ft, ft_str, to_str + + def _extract_formats(self, video_id, station, is_onair, ft, cursor, auth_token, area_id, query): + m3u8_playlist_data = self._download_xml( + 'https://radiko.jp/v3/station/stream/pc_html5/%s.xml' % station, video_id, + note='Downloading m3u8 information') + m3u8_urls = m3u8_playlist_data.findall('.//url') + + formats = [] + found = set() + for url_tag in m3u8_urls: + pcu = url_tag.find('playlist_create_url') + url_attrib = url_tag.attrib + playlist_url = update_url_query(pcu.text, { + 'station_id': station, + **query, + 'l': '15', + 'lsid': '77d0678df93a1034659c14d6fc89f018', + 'type': 'b', + }) + if playlist_url in found: + continue + else: + found.add(playlist_url) + + time_to_skip = None if is_onair else cursor - ft + + subformats = self._extract_m3u8_formats( + playlist_url, video_id, ext='m4a', + live=True, fatal=False, m3u8_id=None, + headers={ + 'X-Radiko-AreaId': area_id, + 'X-Radiko-AuthToken': auth_token, + }) + for sf in subformats: + domain = sf['format_id'] = compat_urllib_parse.urlparse(sf['url']).netloc + if re.match(r'^[cf]-radiko\.smartstream\.ne\.jp$', domain): + # Prioritize live radio vs playback based on extractor + sf['preference'] = 100 if is_onair else -100 + if not is_onair and url_attrib['timefree'] == '1' and time_to_skip: + sf['_ffmpeg_args'] = ['-ss', time_to_skip] + formats.extend(subformats) + + self._sort_formats(formats) + return formats + + +class RadikoIE(RadikoBaseIE): + _VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/ts/(?P[A-Z0-9-]+)/(?P\d+)' + + _TESTS = [{ + # QRR (文化放送) station provides + 'url': 'https://radiko.jp/#!/ts/QRR/20210425101300', + 'only_matching': True, + }, { + # FMT (TOKYO FM) station does not provide + 'url': 'https://radiko.jp/#!/ts/FMT/20210810150000', + 'only_matching': True, + }, { + 'url': 'https://radiko.jp/#!/ts/JOAK-FM/20210509090000', + 'only_matching': True, + }] + + def _real_extract(self, url): + station, video_id = self._match_valid_url(url).groups() + vid_int = unified_timestamp(video_id, False) + + auth_token, area_id = self._auth_client() + + prog, station_program, ft, radio_begin, radio_end = self._find_program(video_id, station, vid_int) + + title = prog.find('title').text + description = clean_html(prog.find('info').text) + station_name = station_program.find('.//name').text + + formats = self._extract_formats( + video_id=video_id, station=station, is_onair=False, + ft=ft, cursor=vid_int, auth_token=auth_token, area_id=area_id, + query={ + 'start_at': radio_begin, + 'ft': radio_begin, + 'end_at': radio_end, + 'to': radio_end, + 'seek': video_id, + }) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'uploader': station_name, + 'uploader_id': station, + 'timestamp': vid_int, + 'formats': formats, + 'is_live': True, + } + + +class RadikoRadioIE(RadikoBaseIE): + _VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/live/(?P[A-Z0-9-]+)' + + _TESTS = [{ + # QRR (文化放送) station provides + 'url': 'https://radiko.jp/#!/live/QRR', + 'only_matching': True, + }, { + # FMT (TOKYO FM) station does not provide + 'url': 'https://radiko.jp/#!/live/FMT', + 'only_matching': True, + }, { + 'url': 'https://radiko.jp/#!/live/JOAK-FM', + 'only_matching': True, + }] + + def _real_extract(self, url): + station = self._match_id(url) + self.report_warning('Downloader will not stop at the end of the program! Press Ctrl+C to stop') + + auth_token, area_id = self._auth_client() + # get current time in JST (GMT+9:00 w/o DST) + vid_now = datetime.datetime.now(datetime.timezone(datetime.timedelta(hours=9))) + vid_now = calendar.timegm(vid_now.timetuple()) + + prog, station_program, ft, _, _ = self._find_program(station, station, vid_now) + + title = prog.find('title').text + description = clean_html(prog.find('info').text) + station_name = station_program.find('.//name').text + + formats = self._extract_formats( + video_id=station, station=station, is_onair=True, + ft=ft, cursor=vid_now, auth_token=auth_token, area_id=area_id, + query={}) + + return { + 'id': station, + 'title': title, + 'description': description, + 'uploader': station_name, + 'uploader_id': station, + 'timestamp': ft, + 'formats': formats, + 'is_live': True, + } diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index c5fbc1c5ce..fa9c509b2d 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -1743,6 +1743,8 @@ DATE_FORMATS = ( '%Y/%m/%d', '%Y/%m/%d %H:%M', '%Y/%m/%d %H:%M:%S', + '%Y%m%d%H%M', + '%Y%m%d%H%M%S', '%Y-%m-%d %H:%M', '%Y-%m-%d %H:%M:%S', '%Y-%m-%d %H:%M:%S.%f', From e0493e90fc4183a3fee114c2d87d3e2463495984 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 25 Aug 2021 02:18:05 +0530 Subject: [PATCH 004/641] fix bug in 88acdbc2698169e22cdbf358e44765150434c69e --- yt_dlp/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 422b26ffe9..7da25a7ba2 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2169,9 +2169,9 @@ class YoutubeDL(object): else: formats = info_dict['formats'] + info_dict['__has_drm'] = any(f.get('has_drm') for f in formats) if not self.params.get('allow_unplayable_formats'): formats = [f for f in formats if not f.get('has_drm')] - info_dict['__has_drm'] = len(info_dict.get('formats') or ['']) > len(formats) if not formats: self.raise_no_formats(info_dict) From 1c36c1f3205e514717b974355f84b7fc823194d8 Mon Sep 17 00:00:00 2001 From: CeruleanSky Date: Wed, 25 Aug 2021 07:49:05 -0400 Subject: [PATCH 005/641] Fix `--no-prefer-free-formats` (#787) Authored by: CeruleanSky --- yt_dlp/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 86aad33939..c26d498a57 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -548,7 +548,7 @@ def parseOpts(overrideArguments=None): 'Use with "-S ext" to strictly prefer free containers irrespective of quality')) video_format.add_option( '--no-prefer-free-formats', - action='store_true', dest='prefer_free_formats', default=False, + action='store_false', dest='prefer_free_formats', default=False, help="Don't give any special preference to free containers (default)") video_format.add_option( '--check-formats', From 7a45a1590b2c75904830c994b68c71275fdf4ba0 Mon Sep 17 00:00:00 2001 From: Ashish <39122144+Ashish0804@users.noreply.github.com> Date: Wed, 25 Aug 2021 19:33:32 +0530 Subject: [PATCH 006/641] [Epicon] Add extractors (#789) Authored by: Ashish0804 --- yt_dlp/extractor/epicon.py | 119 +++++++++++++++++++++++++++++++++ yt_dlp/extractor/extractors.py | 4 ++ 2 files changed, 123 insertions(+) create mode 100644 yt_dlp/extractor/epicon.py diff --git a/yt_dlp/extractor/epicon.py b/yt_dlp/extractor/epicon.py new file mode 100644 index 0000000000..b4e544d4f6 --- /dev/null +++ b/yt_dlp/extractor/epicon.py @@ -0,0 +1,119 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ExtractorError + + +class EpiconIE(InfoExtractor): + _VALID_URL = r'(?:https?://)(?:www\.)?epicon\.in/(?:documentaries|movies|tv-shows/[^/?#]+/[^/?#]+)/(?P[^/?#]+)' + _TESTS = [{ + 'url': 'https://www.epicon.in/documentaries/air-battle-of-srinagar', + 'info_dict': { + 'id': 'air-battle-of-srinagar', + 'ext': 'mp4', + 'title': 'Air Battle of Srinagar', + 'description': 'md5:c4de2013af9bc05ae4392e4115d518d7', + 'thumbnail': r're:^https?://.*\.jpg$', + } + }, { + 'url': 'https://www.epicon.in/movies/krit', + 'info_dict': { + 'id': 'krit', + 'ext': 'mp4', + 'title': 'Krit', + 'description': 'md5:c12b35dad915d48ccff7f013c79bab4a', + 'thumbnail': r're:^https?://.*\.jpg$', + } + }, { + 'url': 'https://www.epicon.in/tv-shows/paapnaashini-ganga/season-1/vardaan', + 'info_dict': { + 'id': 'vardaan', + 'ext': 'mp4', + 'title': 'Paapnaashini Ganga - Season 1 - Ep 1 - VARDAAN', + 'description': 'md5:f517058c3d0402398eefa6242f4dd6ae', + 'thumbnail': r're:^https?://.*\.jpg$', + } + }, { + 'url': 'https://www.epicon.in/movies/jayadev', + 'info_dict': { + 'id': 'jayadev', + 'ext': 'mp4', + 'title': 'Jayadev', + 'description': 'md5:09e349eecd8e585a3b6466904f19df6c', + 'thumbnail': r're:^https?://.*\.jpg$', + } + }] + + def _real_extract(self, url): + id = self._match_id(url) + webpage = self._download_webpage(url, id) + cid = self._search_regex(r'class=\"mylist-icon\ iconclick\"\ id=\"(\d+)', webpage, 'cid') + headers = {'content-type': 'application/x-www-form-urlencoded; charset=UTF-8'} + data = f'cid={cid}&action=st&type=video'.encode() + data_json = self._parse_json(self._download_json('https://www.epicon.in/ajaxplayer/', id, headers=headers, data=data), id) + + if not data_json['success']: + raise ExtractorError(data_json['message'], expected=True) + + title = self._search_regex(r'setplaytitle=\"([^\"]+)', webpage, 'title') + description = self._og_search_description(webpage) or None + thumbnail = self._og_search_thumbnail(webpage) or None + formats = self._extract_m3u8_formats(data_json['url']['video_url'], id) + self._sort_formats(formats) + + subtitles = {} + for subtitle in data_json.get('subtitles', []): + sub_url = subtitle.get('file') + if not sub_url: + continue + subtitles.setdefault(subtitle.get('lang', 'English'), []).append({ + 'url': self._proto_relative_url(sub_url), + }) + + return { + 'id': id, + 'formats': formats, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'subtitles': subtitles, + } + + +class EpiconSeriesIE(InfoExtractor): + _VALID_URL = r'(?!.*season)(?:https?://)(?:www\.)?epicon\.in/tv-shows/(?P[^/?#]+)' + _TESTS = [{ + 'url': 'https://www.epicon.in/tv-shows/1-of-something', + 'playlist_mincount': 5, + 'info_dict': { + 'id': '1-of-something', + }, + }, { + 'url': 'https://www.epicon.in/tv-shows/eco-india-english', + 'playlist_mincount': 76, + 'info_dict': { + 'id': 'eco-india-english', + }, + }, { + 'url': 'https://www.epicon.in/tv-shows/s/', + 'playlist_mincount': 25, + 'info_dict': { + 'id': 's', + }, + }, { + 'url': 'https://www.epicon.in/tv-shows/ekaant', + 'playlist_mincount': 38, + 'info_dict': { + 'id': 'ekaant', + }, + }] + + def _real_extract(self, url): + id = self._match_id(url) + webpage = self._download_webpage(url, id) + episodes = re.findall(r'ct-tray-url=\"(tv-shows/%s/[^\"]+)' % id, webpage) + entries = [self.url_result('https://www.epicon.in/%s' % episode, ie=EpiconIE.ie_key()) for episode in episodes] + return self.playlist_result(entries, playlist_id=id) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 99deebbcc0..a1be9bdfc2 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -390,6 +390,10 @@ from .elonet import ElonetIE from .elpais import ElPaisIE from .embedly import EmbedlyIE from .engadget import EngadgetIE +from .epicon import ( + EpiconIE, + EpiconSeriesIE, +) from .eporner import EpornerIE from .eroprofile import ( EroProfileIE, From 85a0021fb387959b83ac2c25e46f07d507d5ad75 Mon Sep 17 00:00:00 2001 From: Ashish <39122144+Ashish0804@users.noreply.github.com> Date: Wed, 25 Aug 2021 20:17:58 +0530 Subject: [PATCH 007/641] [ProjectVeritas] Add extractor (#790) https://github.com/ytdl-org/youtube-dl/issues/26749 Authored by: Ashish0804 --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/projectveritas.py | 55 ++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 yt_dlp/extractor/projectveritas.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index a1be9bdfc2..6427577fa9 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1069,6 +1069,7 @@ from .puhutv import ( PuhuTVSerieIE, ) from .presstv import PressTVIE +from .projectveritas import ProjectVeritasIE from .prosiebensat1 import ProSiebenSat1IE from .puls4 import Puls4IE from .pyvideo import PyvideoIE diff --git a/yt_dlp/extractor/projectveritas.py b/yt_dlp/extractor/projectveritas.py new file mode 100644 index 0000000000..1d832a6796 --- /dev/null +++ b/yt_dlp/extractor/projectveritas.py @@ -0,0 +1,55 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + traverse_obj, + unified_strdate, +) + + +class ProjectVeritasIE(InfoExtractor): + _VALID_URL = r'(?:https?://)(?:www\.)?projectveritas\.com/(?Pnews|video)/(?P[^/?#]+)' + _TESTS = [{ + 'url': 'https://www.projectveritas.com/news/exclusive-inside-the-new-york-and-new-jersey-hospitals-battling-coronavirus/', + 'info_dict': { + 'id': '51910aab-365a-5cf1-88f2-8eb1ca5fd3c6', + 'ext': 'mp4', + 'title': 'Exclusive: Inside The New York and New Jersey Hospitals Battling Coronavirus', + 'upload_date': '20200327', + 'thumbnail': 'md5:6076477fe50b03eb8708be9415e18e1c', + } + }, { + 'url': 'https://www.projectveritas.com/video/ilhan-omar-connected-ballot-harvester-in-cash-for-ballots-scheme-car-is-full/', + 'info_dict': { + 'id': 'c5aab304-a56b-54b1-9f0b-03b77bc5f2f6', + 'ext': 'mp4', + 'title': 'Ilhan Omar connected Ballot Harvester in cash-for-ballots scheme: "Car is full" of absentee ballots', + 'upload_date': '20200927', + 'thumbnail': 'md5:194b8edf0e2ba64f25500ff4378369a4', + } + }] + + def _real_extract(self, url): + id, type = self._match_valid_url(url).group('id', 'type') + api_url = f'https://www.projectveritas.com/page-data/{type}/{id}/page-data.json' + data_json = self._download_json(api_url, id)['result']['data'] + main_data = traverse_obj(data_json, 'video', 'post') + video_id = main_data['id'] + thumbnail = traverse_obj(main_data, ('image', 'ogImage', 'src')) + mux_asset = traverse_obj(main_data, + 'muxAsset', ('body', 'json', 'content', ..., 'data', 'target', 'fields', 'muxAsset'), + get_all=False, expected_type=dict) + if not mux_asset: + raise ExtractorError('No video on the provided url.', expected=True) + playback_id = traverse_obj(mux_asset, 'playbackId', ('en-US', 'playbackId')) + formats = self._extract_m3u8_formats(f'https://stream.mux.com/{playback_id}.m3u8', video_id) + self._sort_formats(formats) + return { + 'id': video_id, + 'title': main_data['title'], + 'upload_date': unified_strdate(main_data.get('date')), + 'thumbnail': thumbnail.replace('//', ''), + 'formats': formats, + } From 61bfacb233ab4c45256083637c4526be4b3f1944 Mon Sep 17 00:00:00 2001 From: Robin Date: Wed, 25 Aug 2021 17:01:43 +0200 Subject: [PATCH 008/641] [facebook] Update onion URL (#788) Authored by: Derkades --- yt_dlp/extractor/facebook.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index e5bdb335a8..2991a9f35d 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -35,7 +35,7 @@ class FacebookIE(InfoExtractor): _VALID_URL = r'''(?x) (?: https?:// - (?:[\w-]+\.)?(?:facebook\.com|facebookcorewwwi\.onion)/ + (?:[\w-]+\.)?(?:facebook\.com|facebookwkhpilnemxj7asaniu7vnjjbiltxjqhye3mhbshg7kx5tfyd\.onion)/ (?:[^#]*?\#!/)? (?: (?: @@ -226,7 +226,7 @@ class FacebookIE(InfoExtractor): 'only_matching': True, }, { # data.video - 'url': 'https://www.facebookcorewwwi.onion/video.php?v=274175099429670', + 'url': 'https://www.facebookwkhpilnemxj7asaniu7vnjjbiltxjqhye3mhbshg7kx5tfyd.onion/video.php?v=274175099429670', 'only_matching': True, }, { # no title From 198e3a04c9b147a5d63e8e5bfdb2bac5a356ae18 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 26 Aug 2021 07:34:08 +0530 Subject: [PATCH 009/641] [FormatSort] Remove priority of `lang` --- README.md | 2 +- yt_dlp/extractor/common.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 917350bdad..9e93eea4b2 100644 --- a/README.md +++ b/README.md @@ -1203,7 +1203,7 @@ You can change the criteria for being considered the `best` by using `-S` (`--fo Note that any other **numerical** field made available by the extractor can also be used. All fields, unless specified otherwise, are sorted in descending order. To reverse this, prefix the field with a `+`. Eg: `+res` prefers format with the smallest resolution. Additionally, you can suffix a preferred value for the fields, separated by a `:`. Eg: `res:720` prefers larger videos, but no larger than 720p and the smallest video if there are no videos less than 720p. For `codec` and `ext`, you can provide two preferred values, the first for video and the second for audio. Eg: `+codec:avc:m4a` (equivalent to `+vcodec:avc,+acodec:m4a`) sets the video codec preference to `h264` > `h265` > `vp9` > `vp9.2` > `av01` > `vp8` > `h263` > `theora` and audio codec preference to `mp4a` > `aac` > `vorbis` > `opus` > `mp3` > `ac3` > `dts`. You can also make the sorting prefer the nearest values to the provided by using `~` as the delimiter. Eg: `filesize~1G` prefers the format with filesize closest to 1 GiB. -The fields `hasvid`, `ie_pref`, `lang` are always given highest priority in sorting, irrespective of the user-defined order. This behaviour can be changed by using `--force-format-sort`. Apart from these, the default order used is: `quality,res,fps,codec:vp9.2,size,br,asr,proto,ext,hasaud,source,id`. Note that the extractors may override this default order, but they cannot override the user-provided order. +The fields `hasvid` and `ie_pref` are always given highest priority in sorting, irrespective of the user-defined order. This behaviour can be changed by using `--force-format-sort`. Apart from these, the default order used is: `lang,quality,res,fps,codec:vp9.2,size,br,asr,proto,ext,hasaud,source,id`. Note that the extractors may override this default order, but they cannot override the user-provided order. If your format selector is `worst`, the last item is selected after sorting. This means it will select the format that is worst in all respects. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-f best -S +size,+br,+res,+fps`. diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index b7a55177f9..54a9dc2631 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1502,7 +1502,7 @@ class InfoExtractor(object): default = ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality', 'res', 'fps', 'codec:vp9.2', 'size', 'br', 'asr', 'proto', 'ext', 'hasaud', 'source', 'format_id') # These must not be aliases - ytdl_default = ('hasaud', 'quality', 'tbr', 'filesize', 'vbr', + ytdl_default = ('hasaud', 'lang', 'quality', 'tbr', 'filesize', 'vbr', 'height', 'width', 'proto', 'vext', 'abr', 'aext', 'fps', 'fs_approx', 'source', 'format_id') @@ -1526,7 +1526,7 @@ class InfoExtractor(object): 'ie_pref': {'priority': True, 'type': 'extractor'}, 'hasvid': {'priority': True, 'field': 'vcodec', 'type': 'boolean', 'not_in_list': ('none',)}, 'hasaud': {'field': 'acodec', 'type': 'boolean', 'not_in_list': ('none',)}, - 'lang': {'priority': True, 'convert': 'ignore', 'field': 'language_preference'}, + 'lang': {'convert': 'ignore', 'field': 'language_preference'}, 'quality': {'convert': 'float_none', 'default': -1}, 'filesize': {'convert': 'bytes'}, 'fs_approx': {'convert': 'bytes', 'field': 'filesize_approx'}, From 26e8e04454d28b623c16f34496e31752086ff457 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 26 Aug 2021 07:40:02 +0530 Subject: [PATCH 010/641] [youtube] Prefer audio stream that YouTube considers default Fixes: https://github.com/ytdl-org/youtube-dl/issues/29864 Related: https://github.com/clsid2/mpc-hc/issues/1268 --- yt_dlp/extractor/youtube.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 9ca81e6cb7..80c3cc05ec 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2622,7 +2622,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'filesize': int_or_none(fmt.get('contentLength')), 'format_id': itag, 'format_note': ', '.join(filter(None, ( - audio_track.get('displayName'), + '%s%s' % (audio_track.get('displayName') or '', + ' (default)' if audio_track.get('audioIsDefault') else ''), fmt.get('qualityLabel') or quality.replace('audio_quality_', '')))), 'fps': int_or_none(fmt.get('fps')), 'height': height, @@ -2631,6 +2632,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'url': fmt_url, 'width': int_or_none(fmt.get('width')), 'language': audio_track.get('id', '').split('.')[0], + 'language_preference': 1 if audio_track.get('audioIsDefault') else -1, } mime_mobj = re.match( r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '') @@ -2817,7 +2819,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Source is given priority since formats that throttle are given lower source_preference # When throttling issue is fully fixed, remove this - self._sort_formats(formats, ('quality', 'height', 'fps', 'source')) + self._sort_formats(formats, ('quality', 'res', 'fps', 'source', 'codec:vp9.2', 'size', 'br', 'lang')) keywords = get_first(video_details, 'keywords', expected_type=list) or [] if not keywords and webpage: From c311988d19abaa35e935617df3bcfc42ac3aeb61 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 26 Aug 2021 08:25:56 +0530 Subject: [PATCH 011/641] [youtube] Improve 26e8e04454d28b623c16f34496e31752086ff457 The streams of the same itag may have slightly different size/bitrate --- yt_dlp/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 80c3cc05ec..de7ff32589 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2819,7 +2819,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Source is given priority since formats that throttle are given lower source_preference # When throttling issue is fully fixed, remove this - self._sort_formats(formats, ('quality', 'res', 'fps', 'source', 'codec:vp9.2', 'size', 'br', 'lang')) + self._sort_formats(formats, ('quality', 'res', 'fps', 'source', 'codec:vp9.2', 'lang')) keywords = get_first(video_details, 'keywords', expected_type=list) or [] if not keywords and webpage: From 691d5823d6ff72b813eb34ede8009b70bebd73da Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 27 Aug 2021 00:59:36 +0530 Subject: [PATCH 012/641] [aria2c] Obey `--rate-limit` --- yt_dlp/downloader/external.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index fdfabb38da..3dddedb14f 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -288,6 +288,7 @@ class Aria2cFD(ExternalFD): if info_dict.get('http_headers') is not None: for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] + cmd += self._option('--max-overall-download-limit', 'ratelimit') cmd += self._option('--interface', 'source_address') cmd += self._option('--all-proxy', 'proxy') cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=') From d75201a873a413d73f12748e5710f000e9f727da Mon Sep 17 00:00:00 2001 From: Paul Wrubel Date: Thu, 26 Aug 2021 21:27:20 -0500 Subject: [PATCH 013/641] Use `os.replace` where applicable (#793) When using ```py os.remove(encodeFilename(filename)) os.rename(encodeFilename(temp_filename), encodeFilename(filename)) ``` the `os.remove` need not be atomic and so can be executed arbitrarily compared to the immediately following rename call. It is better to use `os.replace` instead Authored by: paulwrubel --- .gitignore | 2 ++ yt_dlp/downloader/common.py | 7 ++----- yt_dlp/postprocessor/embedthumbnail.py | 3 +-- yt_dlp/postprocessor/ffmpeg.py | 13 ++++--------- yt_dlp/postprocessor/sponskrub.py | 3 +-- 5 files changed, 10 insertions(+), 18 deletions(-) diff --git a/.gitignore b/.gitignore index 7ed34448a1..619d6ba98a 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,8 @@ cookies.txt *.wav *.ape *.mkv +*.flac +*.avi *.swf *.part *.part-* diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index f5f6393a62..ce914bd4a2 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -207,12 +207,9 @@ class FileDownloader(object): if old_filename == new_filename: return try: - if self.params.get('overwrites', False): - if os.path.isfile(encodeFilename(new_filename)): - os.remove(encodeFilename(new_filename)) - os.rename(encodeFilename(old_filename), encodeFilename(new_filename)) + os.replace(old_filename, new_filename) except (IOError, OSError) as err: - self.report_error('unable to rename file: %s' % error_to_compat_str(err)) + self.report_error(f'unable to rename file: {err}') def try_utime(self, filename, last_modified_hdr): """Try to set the last-modified time of the given file.""" diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py index 7008f4d4db..3139a63388 100644 --- a/yt_dlp/postprocessor/embedthumbnail.py +++ b/yt_dlp/postprocessor/embedthumbnail.py @@ -222,8 +222,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor): raise EmbedThumbnailPPError('Supported filetypes for thumbnail embedding are: mp3, mkv/mka, ogg/opus/flac, m4a/mp4/mov') if success and temp_filename != filename: - os.remove(encodeFilename(filename)) - os.rename(encodeFilename(temp_filename), encodeFilename(filename)) + os.replace(temp_filename, filename) self.try_utime(filename, mtime, mtime) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index be6cc9f096..b66a0b4452 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -520,8 +520,7 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): temp_filename = prepend_extension(filename, 'temp') self.to_screen('Embedding subtitles in "%s"' % filename) self.run_ffmpeg_multiple_files(input_files, temp_filename, opts) - os.remove(encodeFilename(filename)) - os.rename(encodeFilename(temp_filename), encodeFilename(filename)) + os.replace(temp_filename, filename) files_to_delete = [] if self._already_have_subtitle else sub_filenames return files_to_delete, information @@ -628,8 +627,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor): itertools.chain(self._options(info['ext']), *options)) if chapters: os.remove(metadata_filename) - os.remove(encodeFilename(filename)) - os.rename(encodeFilename(temp_filename), encodeFilename(filename)) + os.replace(temp_filename, filename) return [], info @@ -673,8 +671,7 @@ class FFmpegFixupPostProcessor(FFmpegPostProcessor): self.to_screen(f'{msg} of "{filename}"') self.run_ffmpeg(filename, temp_filename, options) - os.remove(encodeFilename(filename)) - os.rename(encodeFilename(temp_filename), encodeFilename(filename)) + os.replace(temp_filename, filename) class FFmpegFixupStretchedPP(FFmpegFixupPostProcessor): @@ -866,9 +863,7 @@ class FFmpegThumbnailsConvertorPP(FFmpegPostProcessor): if thumbnail_ext != 'webp' and self.is_webp(thumbnail_filename): self.to_screen('Correcting thumbnail "%s" extension to webp' % thumbnail_filename) webp_filename = replace_extension(thumbnail_filename, 'webp') - if os.path.exists(webp_filename): - os.remove(webp_filename) - os.rename(encodeFilename(thumbnail_filename), encodeFilename(webp_filename)) + os.replace(thumbnail_filename, webp_filename) info['thumbnails'][idx]['filepath'] = webp_filename info['__files_to_move'][webp_filename] = replace_extension( info['__files_to_move'].pop(thumbnail_filename), 'webp') diff --git a/yt_dlp/postprocessor/sponskrub.py b/yt_dlp/postprocessor/sponskrub.py index 73b6b4a20e..588f0ae125 100644 --- a/yt_dlp/postprocessor/sponskrub.py +++ b/yt_dlp/postprocessor/sponskrub.py @@ -84,8 +84,7 @@ class SponSkrubPP(PostProcessor): stdout = process_communicate_or_kill(p)[0] if p.returncode == 0: - os.remove(encodeFilename(filename)) - os.rename(encodeFilename(temp_filename), encodeFilename(filename)) + os.replace(temp_filename, filename) self.to_screen('Sponsor sections have been %s' % ('removed' if self.cutout else 'marked')) elif p.returncode == 3: self.to_screen('No segments in the SponsorBlock database') From bc36bc36a10fb3bfe6b835f12b6be2e53f69916e Mon Sep 17 00:00:00 2001 From: Ashish <39122144+Ashish0804@users.noreply.github.com> Date: Fri, 27 Aug 2021 20:39:13 +0530 Subject: [PATCH 014/641] [ShemarooMe] Fix extractor (#798) Closes #797 Authored by: Ashish0804 --- yt_dlp/extractor/shemaroome.py | 54 ++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 13 deletions(-) diff --git a/yt_dlp/extractor/shemaroome.py b/yt_dlp/extractor/shemaroome.py index fb010180ca..142d5dc3a0 100644 --- a/yt_dlp/extractor/shemaroome.py +++ b/yt_dlp/extractor/shemaroome.py @@ -1,8 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..aes import aes_cbc_decrypt from ..compat import ( @@ -11,9 +9,9 @@ from ..compat import ( ) from ..utils import ( bytes_to_intlist, + ExtractorError, intlist_to_bytes, unified_strdate, - url_or_none, ) @@ -26,7 +24,7 @@ class ShemarooMeIE(InfoExtractor): 'ext': 'mp4', 'title': 'Dil Hai Tumhaara', 'release_date': '20020906', - 'thumbnail': 'https://daex9l847wg3n.cloudfront.net/shemoutputimages/Dil-Hai-Tumhaara/60599346a609d2faa3000020/large_16_9_1616436538.jpg?1616483693', + 'thumbnail': r're:^https?://.*\.jpg$', 'description': 'md5:2782c4127807103cf5a6ae2ca33645ce', }, 'params': { @@ -39,8 +37,23 @@ class ShemarooMeIE(InfoExtractor): 'ext': 'mp4', 'title': 'Laalach', 'description': 'md5:92b79c2dcb539b0ab53f9fa5a048f53c', + 'thumbnail': r're:^https?://.*\.jpg$', 'release_date': '20210507', }, + 'params': { + 'skip_download': True + }, + 'skip': 'Premium videos cannot be downloaded yet.' + }, { + 'url': 'https://www.shemaroome.com/shows/jai-jai-jai-bajrang-bali/jai-jai-jai-bajrang-bali-episode-99', + 'info_dict': { + 'id': 'jai-jai-jai-bajrang-bali_jai-jai-jai-bajrang-bali-episode-99', + 'ext': 'mp4', + 'title': 'Jai Jai Jai Bajrang Bali Episode 99', + 'description': 'md5:850d127a18ee3f9529d7fbde2f49910d', + 'thumbnail': r're:^https?://.*\.jpg$', + 'release_date': '20110101', + }, 'params': { 'skip_download': True } @@ -49,28 +62,43 @@ class ShemarooMeIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url).replace('/', '_') webpage = self._download_webpage(url, video_id) - m = re.search( - r'params_for_player\s*=\s*"(?P[^|]+)\|key=(?P[^|]+)\|image=(?P[^|]+)\|title=(?P[^|]+)', - webpage) - data = bytes_to_intlist(compat_b64decode(m.group('data'))) - key = bytes_to_intlist(compat_b64decode(m.group('key'))) + title = self._search_regex(r'id=\"ma_title\" value=\"([^\"]+)', webpage, 'title') + thumbnail = self._og_search_thumbnail(webpage) + content_def = self._search_regex(r'id=\"content_definition\" value=\"([^\"]+)', webpage, 'content_def') + catalog_id = self._search_regex(r'id=\"catalog_id\" value=\"([^\"]+)', webpage, 'catalog_id') + item_category = self._search_regex(r'id=\"item_category\" value=\"([^\"]+)', webpage, 'item_category') + content_id = self._search_regex(r'id=\"content_id\" value=\"([^\"]+)', webpage, 'content_id') + + data = f'catalog_id={catalog_id}&content_id={content_id}&category={item_category}&content_def={content_def}' + data_json = self._download_json('https://www.shemaroome.com/users/user_all_lists', video_id, data=data.encode()) + if not data_json.get('status'): + raise ExtractorError('Premium videos cannot be downloaded yet.', expected=True) + url_data = bytes_to_intlist(compat_b64decode(data_json['new_play_url'])) + key = bytes_to_intlist(compat_b64decode(data_json['key'])) iv = [0] * 16 - m3u8_url = intlist_to_bytes(aes_cbc_decrypt(data, key, iv)) + m3u8_url = intlist_to_bytes(aes_cbc_decrypt(url_data, key, iv)) m3u8_url = m3u8_url[:-compat_ord((m3u8_url[-1]))].decode('ascii') - formats = self._extract_m3u8_formats(m3u8_url, video_id, fatal=False) + formats = self._extract_m3u8_formats(m3u8_url, video_id, fatal=False, headers={'stream_key': data_json['stream_key']}) self._sort_formats(formats) release_date = self._html_search_regex( (r'itemprop="uploadDate">\s*([\d-]+)', r'id="release_date" value="([\d-]+)'), webpage, 'release date', fatal=False) + subtitles = {} + sub_url = data_json.get('subtitle') + if sub_url: + subtitles.setdefault('EN', []).append({ + 'url': self._proto_relative_url(sub_url), + }) description = self._html_search_regex(r'(?s)>Synopsis(</.+?)</', webpage, 'description', fatal=False) return { 'id': video_id, 'formats': formats, - 'title': m.group('title'), - 'thumbnail': url_or_none(m.group('thumbnail')), + 'title': title, + 'thumbnail': thumbnail, 'release_date': unified_strdate(release_date), 'description': description, + 'subtitles': subtitles, } From 2e7781a93c702fffacf0076ccd498d79e03258d4 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Sat, 28 Aug 2021 02:20:18 +0530 Subject: [PATCH 015/641] [docs] Fix some typos Closes #677, #774 --- .github/ISSUE_TEMPLATE/6_question.md | 2 +- .github/PULL_REQUEST_TEMPLATE.md | 2 +- README.md | 8 +++++--- yt_dlp/options.py | 2 +- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/6_question.md b/.github/ISSUE_TEMPLATE/6_question.md index dd2857c092..9f052090a1 100644 --- a/.github/ISSUE_TEMPLATE/6_question.md +++ b/.github/ISSUE_TEMPLATE/6_question.md @@ -1,6 +1,6 @@ --- name: Ask question -about: Ask youtube-dl related question +about: Ask yt-dlp related question title: "[Question]" labels: question assignees: '' diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index f711701cb6..1bcac69dad 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -11,7 +11,7 @@ - [ ] [Searched](https://github.com/yt-dlp/yt-dlp/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests - [ ] Checked the code with [flake8](https://pypi.python.org/pypi/flake8) -### In order to be accepted and merged into youtube-dl each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check one of the following options: +### In order to be accepted and merged into yt-dlp each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check one of the following options: - [ ] I am the original author of this code and I am willing to release it under [Unlicense](http://unlicense.org/) - [ ] I am not the original author of this code but it is in public domain or released under [Unlicense](http://unlicense.org/) (provide reliable evidence) diff --git a/README.md b/README.md index 9e93eea4b2..b0b34506d7 100644 --- a/README.md +++ b/README.md @@ -351,7 +351,7 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t filters can be checked with "&". Use a "\" to escape "&" or quotes if needed. Eg: --match-filter "!is_live & like_count>?100 - & description~=\'(?i)\bcats \& dogs\b\'" + & description~='(?i)\bcats \& dogs\b'" matches only videos that are not live, has a like count more than 100 (or the like field is not available), and also has a @@ -1173,7 +1173,9 @@ Format selectors can also be grouped using parentheses, for example if you want ## Sorting Formats -You can change the criteria for being considered the `best` by using `-S` (`--format-sort`). The general format for this is `--format-sort field1,field2...`. The available fields are: +You can change the criteria for being considered the `best` by using `-S` (`--format-sort`). The general format for this is `--format-sort field1,field2...`. + +The available fields are: - `hasvid`: Gives priority to formats that has a video stream - `hasaud`: Gives priority to formats that has a audio stream @@ -1339,7 +1341,7 @@ The metadata obtained the the extractors can be modified by using `--parse-metad `--replace-in-metadata FIELDS REGEX REPLACE` is used to replace text in any metadata field using [python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax). [Backreferences](https://docs.python.org/3/library/re.html?highlight=backreferences#re.sub) can be used in the replace string for advanced use. -The general syntax of `--parse-metadata FROM:TO` is to give the name of a field or a template (with same syntax as [output template](#output-template)) to extract data from, and the format to interpret it as, separated by a colon `:`. Either a [python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax) with named capture groups or a similar syntax to the [output template](#output-template) (only `%(field)s` formatting is supported) can be used for `TO`. The option can be used multiple times to parse and modify various fields. +The general syntax of `--parse-metadata FROM:TO` is to give the name of a field or an [output template](#output-template) to extract data from, and the format to interpret it as, separated by a colon `:`. Either a [python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax) with named capture groups or a similar syntax to the [output template](#output-template) (only `%(field)s` formatting is supported) can be used for `TO`. The option can be used multiple times to parse and modify various fields. Note that any field created by this can be used in the [output template](#output-template) and will also affect the media file's metadata added when using `--add-metadata`. diff --git a/yt_dlp/options.py b/yt_dlp/options.py index c26d498a57..0f8ce8ce86 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -411,7 +411,7 @@ def parseOpts(overrideArguments=None): 'Python style regular expression matching can be done using "~=", ' 'and multiple filters can be checked with "&". ' 'Use a "\\" to escape "&" or quotes if needed. Eg: --match-filter ' - r'"!is_live & like_count>?100 & description~=\'(?i)\bcats \& dogs\b\'" ' + '"!is_live & like_count>?100 & description~=\'(?i)\\bcats \\& dogs\\b\'" ' 'matches only videos that are not live, has a like count more than 100 ' '(or the like field is not available), and also has a description ' 'that contains the phrase "cats & dogs" (ignoring case)')) From abafce59a11538539112ad97c9e9898879999589 Mon Sep 17 00:00:00 2001 From: animelover1984 <54511032+animelover1984@users.noreply.github.com> Date: Fri, 27 Aug 2021 18:37:13 -0700 Subject: [PATCH 016/641] [Niconico] Add Search extractors (#672) Authored by: animelover1984, pukkandan --- test/test_download.py | 0 yt_dlp/extractor/extractors.py | 10 ++- yt_dlp/extractor/niconico.py | 109 +++++++++++++++++++++++++++++++-- 3 files changed, 114 insertions(+), 5 deletions(-) mode change 100644 => 100755 test/test_download.py diff --git a/test/test_download.py b/test/test_download.py old mode 100644 new mode 100755 diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 6427577fa9..9144635f9b 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -888,7 +888,15 @@ from .nick import ( NickNightIE, NickRuIE, ) -from .niconico import NiconicoIE, NiconicoPlaylistIE, NiconicoUserIE + +from .niconico import ( + NiconicoIE, + NiconicoPlaylistIE, + NiconicoUserIE, + NicovideoSearchDateIE, + NicovideoSearchIE, + NicovideoSearchURLIE, +) from .ninecninemedia import NineCNineMediaIE from .ninegag import NineGagIE from .ninenow import NineNowIE diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py index 2fa81b5c2e..f19afa485d 100644 --- a/yt_dlp/extractor/niconico.py +++ b/yt_dlp/extractor/niconico.py @@ -1,11 +1,12 @@ # coding: utf-8 from __future__ import unicode_literals -import re -import json import datetime +import itertools +import json +import re -from .common import InfoExtractor +from .common import InfoExtractor, SearchInfoExtractor from ..postprocessor.ffmpeg import FFmpegPostProcessor from ..compat import ( compat_str, @@ -661,6 +662,106 @@ class NiconicoPlaylistIE(InfoExtractor): } +NicovideoSearchIE_NAME = 'nicovideo:search' + + +class NicovideoSearchURLIE(InfoExtractor): + IE_NAME = f'{NicovideoSearchIE_NAME}_url' + IE_DESC = 'Nico video search URLs' + _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/search/(?P<id>[^?#&]+)?' + _TESTS = [{ + 'url': 'http://www.nicovideo.jp/search/sm9', + 'info_dict': { + 'id': 'sm9', + 'title': 'sm9' + }, + 'playlist_mincount': 40, + }, { + 'url': 'https://www.nicovideo.jp/search/sm9?sort=h&order=d&end=2020-12-31&start=2020-01-01', + 'info_dict': { + 'id': 'sm9', + 'title': 'sm9' + }, + 'playlist_count': 31, + }] + + def _entries(self, url, item_id, query=None, note='Downloading page %(page)s'): + query = query or {} + pages = [query['page']] if 'page' in query else itertools.count(1) + for page_num in pages: + query['page'] = str(page_num) + webpage = self._download_webpage(url, item_id, query=query, note=note % {'page': page_num}) + results = re.findall(r'(?<=data-video-id=)["\']?(?P<videoid>.*?)(?=["\'])', webpage) + for item in results: + yield self.url_result(f'http://www.nicovideo.jp/watch/{item}', 'Niconico', item) + if not results: + break + + def _real_extract(self, url): + query = self._match_id(url) + return self.playlist_result(self._entries(url, query), query, query) + + +class NicovideoSearchIE(SearchInfoExtractor, NicovideoSearchURLIE): + IE_DESC = 'Nico video searches' + _MAX_RESULTS = float('inf') + IE_NAME = NicovideoSearchIE_NAME + _SEARCH_KEY = 'nicosearch' + _TESTS = [] + + def _get_n_results(self, query, n): + entries = self._entries(self._proto_relative_url(f'//www.nicovideo.jp/search/{query}'), query) + if n < float('inf'): + entries = itertools.islice(entries, 0, n) + return self.playlist_result(entries, query, query) + + +class NicovideoSearchDateIE(NicovideoSearchIE): + IE_DESC = 'Nico video searches, newest first' + IE_NAME = f'{NicovideoSearchIE_NAME}:date' + _SEARCH_KEY = 'nicosearchdate' + _TESTS = [{ + 'url': 'nicosearchdateall:a', + 'info_dict': { + 'id': 'a', + 'title': 'a' + }, + 'playlist_mincount': 1610, + }] + + _START_DATE = datetime.date(2007, 1, 1) + _RESULTS_PER_PAGE = 32 + _MAX_PAGES = 50 + + def _entries(self, url, item_id, start_date=None, end_date=None): + start_date, end_date = start_date or self._START_DATE, end_date or datetime.datetime.now().date() + + # If the last page has a full page of videos, we need to break down the query interval further + last_page_len = len(list(self._get_entries_for_date( + url, item_id, start_date, end_date, self._MAX_PAGES, + note=f'Checking number of videos from {start_date} to {end_date}'))) + if (last_page_len == self._RESULTS_PER_PAGE and start_date != end_date): + midpoint = start_date + ((end_date - start_date) // 2) + yield from self._entries(url, item_id, midpoint, end_date) + yield from self._entries(url, item_id, start_date, midpoint) + else: + self.to_screen(f'{item_id}: Downloading results from {start_date} to {end_date}') + yield from self._get_entries_for_date( + url, item_id, start_date, end_date, note=' Downloading page %(page)s') + + def _get_entries_for_date(self, url, item_id, start_date, end_date=None, page_num=None, note=None): + query = { + 'start': str(start_date), + 'end': str(end_date or start_date), + 'sort': 'f', + 'order': 'd', + } + if page_num: + query['page'] = str(page_num) + + yield from NicovideoSearchURLIE._entries(self, url, item_id, query=query, note=note) + + class NiconicoUserIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/user/(?P<id>\d+)/?(?:$|[#?])' _TEST = { @@ -678,7 +779,7 @@ class NiconicoUserIE(InfoExtractor): 'X-Frontend-Version': '0' } - def _entries(self, list_id, ): + def _entries(self, list_id): total_count = 1 count = page_num = 0 while count < total_count: From 58f68fe7037ce5ac071d732b5c9528175957e4fc Mon Sep 17 00:00:00 2001 From: Ashish <39122144+Ashish0804@users.noreply.github.com> Date: Sun, 29 Aug 2021 06:44:22 +0530 Subject: [PATCH 017/641] [TV2Hu] Fix `TV2HuIE` and add `TV2HuSeriesIE` (#804) Closes #799 Authored by: Ashish0804 --- yt_dlp/extractor/extractors.py | 5 +- yt_dlp/extractor/tv2hu.py | 128 ++++++++++++++++++++++----------- 2 files changed, 92 insertions(+), 41 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 9144635f9b..bcc669c7a1 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1440,7 +1440,10 @@ from .tv2dk import ( TV2DKIE, TV2DKBornholmPlayIE, ) -from .tv2hu import TV2HuIE +from .tv2hu import ( + TV2HuIE, + TV2HuSeriesIE, +) from .tv4 import TV4IE from .tv5mondeplus import TV5MondePlusIE from .tv5unis import ( diff --git a/yt_dlp/extractor/tv2hu.py b/yt_dlp/extractor/tv2hu.py index 86017b7570..f2104358bb 100644 --- a/yt_dlp/extractor/tv2hu.py +++ b/yt_dlp/extractor/tv2hu.py @@ -2,61 +2,109 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( + traverse_obj, + UnsupportedError, +) class TV2HuIE(InfoExtractor): - IE_NAME = 'tv2.hu' - _VALID_URL = r'https?://(?:www\.)?tv2\.hu/(?:[^/]+/)+(?P<id>\d+)_[^/?#]+?\.html' + IE_NAME = 'tv2play.hu' + _VALID_URL = r'https?://(?:www\.)?tv2play\.hu/(?!szalag/)(?P<id>[^#&?]+)' _TESTS = [{ - 'url': 'http://tv2.hu/ezek_megorultek/217679_ezek-megorultek---1.-adas-1.-resz.html', + 'url': 'https://tv2play.hu/mintaapak/mintaapak_213_epizod_resz', + 'info_dict': { + 'id': '249240', + 'ext': 'mp4', + 'title': 'Mintaapák - 213. epizód', + 'series': 'Mintaapák', + 'duration': 2164, + 'description': 'md5:7350147e75485a59598e806c47967b07', + 'thumbnail': r're:^https?://.*\.jpg$', + 'release_date': '20210825', + 'season_number': None, + 'episode_number': 213, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://tv2play.hu/taxi_2', 'md5': '585e58e2e090f34603804bb2c48e98d8', 'info_dict': { - 'id': '217679', + 'id': '199363', 'ext': 'mp4', - 'title': 'Ezek megőrültek! - 1. adás 1. rész', - 'upload_date': '20160826', - 'thumbnail': r're:^https?://.*\.jpg$' - } - }, { - 'url': 'http://tv2.hu/ezek_megorultek/teljes_adasok/217677_ezek-megorultek---1.-adas-2.-resz.html', - 'only_matching': True - }, { - 'url': 'http://tv2.hu/musoraink/aktiv/aktiv_teljes_adas/217963_aktiv-teljes-adas---2016.08.30..html', - 'only_matching': True + 'title': 'Taxi 2', + 'series': 'Taxi 2', + 'duration': 5087, + 'description': 'md5:47762155dc9a50241797ded101b1b08c', + 'thumbnail': r're:^https?://.*\.jpg$', + 'release_date': '20210118', + 'season_number': None, + 'episode_number': None, + }, + 'params': { + 'skip_download': True, + }, }] def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - json_url = self._search_regex( - r'jsonUrl\s*=\s*"([^"]+)"', webpage, 'json url') - json_data = self._download_json(json_url, video_id) + id = self._match_id(url) + json_data = self._download_json(f'https://tv2play.hu/api/search/{id}', id) - formats = [] - for b in ('bitrates', 'backupBitrates'): - bitrates = json_data.get(b, {}) - m3u8_url = bitrates.get('hls') - if m3u8_url: - formats.extend(self._extract_wowza_formats( - m3u8_url, video_id, skip_protocols=['rtmp', 'rtsp'])) + if json_data['contentType'] == 'showpage': + ribbon_ids = traverse_obj(json_data, ('pages', ..., 'tabs', ..., 'ribbonIds'), get_all=False, expected_type=list) + entries = [self.url_result(f'https://tv2play.hu/szalag/{ribbon_id}', + ie=TV2HuSeriesIE.ie_key(), video_id=ribbon_id) for ribbon_id in ribbon_ids] + return self.playlist_result(entries, playlist_id=id) + elif json_data['contentType'] != 'video': + raise UnsupportedError(url) - for mp4_url in bitrates.get('mp4', []): - height = int_or_none(self._search_regex( - r'\.(\d+)p\.mp4', mp4_url, 'height', default=None)) - formats.append({ - 'format_id': 'http' + ('-%d' % height if height else ''), - 'url': mp4_url, - 'height': height, - 'width': int_or_none(height / 9.0 * 16.0 if height else None), - }) + video_id = str(json_data['id']) + player_id = json_data.get('playerId') + series_json = json_data.get('seriesInfo', {}) + + video_json_url = self._download_json(f'https://tv2play.hu/api/streaming-url?playerId={player_id}', video_id)['url'] + video_json = self._download_json(video_json_url, video_id) + m3u8_url = self._proto_relative_url(traverse_obj(video_json, ('bitrates', 'hls'))) + formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id) self._sort_formats(formats) return { 'id': video_id, - 'title': self._og_search_title(webpage).strip(), - 'thumbnail': self._og_search_thumbnail(webpage), - 'upload_date': self._search_regex( - r'/vod/(\d{8})/', json_url, 'upload_date', default=None), + 'title': json_data['title'], + 'series': json_data.get('seriesTitle'), + 'duration': json_data.get('length'), + 'description': json_data.get('description'), + 'thumbnail': 'https://tv2play.hu' + json_data.get('thumbnailUrl'), + 'release_date': json_data.get('uploadedAt').replace('.', ''), + 'season_number': series_json.get('seasonNr'), + 'episode_number': series_json.get('episodeNr'), 'formats': formats, + 'subtitles': subtitles, } + + +class TV2HuSeriesIE(InfoExtractor): + IE_NAME = 'tv2playseries.hu' + _VALID_URL = r'https?://(?:www\.)?tv2play\.hu/szalag/(?P<id>[^#&?]+)' + + _TESTS = [{ + 'url': 'https://tv2play.hu/szalag/59?rendezes=nepszeruseg', + 'playlist_mincount': 284, + 'info_dict': { + 'id': '59', + } + }] + + def _real_extract(self, url): + id = self._match_id(url) + json_data = self._download_json(f'https://tv2play.hu/api/ribbons/{id}/0?size=100000', id) + entries = [] + for card in json_data.get('cards', []): + video_id = card.get('slug') + if video_id: + entries.append(self.url_result(f'https://tv2play.hu/{video_id}', + ie=TV2HuIE.ie_key(), video_id=video_id)) + + return self.playlist_result(entries, playlist_id=id) From 2fc14b99253d337f7b8cfb6d5bf1a8f9ac16ad43 Mon Sep 17 00:00:00 2001 From: std-move <26625259+std-move@users.noreply.github.com> Date: Sun, 29 Aug 2021 03:34:42 +0200 Subject: [PATCH 018/641] [Nova] fix extractor (#807) Fixes: https://github.com/ytdl-org/youtube-dl/issues/27840 Authored by: std-move --- yt_dlp/extractor/nova.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/nova.py b/yt_dlp/extractor/nova.py index fdf604d2af..3acb881217 100644 --- a/yt_dlp/extractor/nova.py +++ b/yt_dlp/extractor/nova.py @@ -39,7 +39,7 @@ class NovaEmbedIE(InfoExtractor): player = self._parse_json( self._search_regex( - r'Player\.init\s*\([^,]+,\s*({.+?})\s*,\s*{.+?}\s*\)\s*;', + r'Player\.init\s*\([^,]+,\s*(?:\w+\s*\?\s*{.+?}\s*:\s*)?({.+})\s*,\s*{.+?}\s*\)\s*;', webpage, 'player', default='{}'), video_id, fatal=False) if player: for format_id, format_list in player['tracks'].items(): From 7e558722866a0bdccaffceea8d1aa79db7dbd78f Mon Sep 17 00:00:00 2001 From: coletdjnz <colethedj@protonmail.com> Date: Mon, 30 Aug 2021 09:11:03 +1200 Subject: [PATCH 019/641] [camtube] remove extractor (#810) Co-authored-by: alerikaisattera --- yt_dlp/extractor/camtube.py | 71 ---------------------------------- yt_dlp/extractor/extractors.py | 1 - 2 files changed, 72 deletions(-) delete mode 100644 yt_dlp/extractor/camtube.py diff --git a/yt_dlp/extractor/camtube.py b/yt_dlp/extractor/camtube.py deleted file mode 100644 index b3be3bdcf7..0000000000 --- a/yt_dlp/extractor/camtube.py +++ /dev/null @@ -1,71 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - unified_timestamp, -) - - -class CamTubeIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www|api)\.)?camtube\.co/recordings?/(?P<id>[^/?#&]+)' - _TESTS = [{ - 'url': 'https://camtube.co/recording/minafay-030618-1136-chaturbate-female', - 'info_dict': { - 'id': '42ad3956-dd5b-445a-8313-803ea6079fac', - 'display_id': 'minafay-030618-1136-chaturbate-female', - 'ext': 'mp4', - 'title': 'minafay-030618-1136-chaturbate-female', - 'duration': 1274, - 'timestamp': 1528018608, - 'upload_date': '20180603', - 'age_limit': 18 - }, - 'params': { - 'skip_download': True, - }, - }] - - _API_BASE = 'https://api.camtube.co' - - def _real_extract(self, url): - display_id = self._match_id(url) - - token = self._download_json( - '%s/rpc/session/new' % self._API_BASE, display_id, - 'Downloading session token')['token'] - - self._set_cookie('api.camtube.co', 'session', token) - - video = self._download_json( - '%s/recordings/%s' % (self._API_BASE, display_id), display_id, - headers={'Referer': url}) - - video_id = video['uuid'] - timestamp = unified_timestamp(video.get('createdAt')) - duration = int_or_none(video.get('duration')) - view_count = int_or_none(video.get('viewCount')) - like_count = int_or_none(video.get('likeCount')) - creator = video.get('stageName') - - formats = [{ - 'url': '%s/recordings/%s/manifest.m3u8' - % (self._API_BASE, video_id), - 'format_id': 'hls', - 'ext': 'mp4', - 'protocol': 'm3u8_native', - }] - - return { - 'id': video_id, - 'display_id': display_id, - 'title': display_id, - 'timestamp': timestamp, - 'duration': duration, - 'view_count': view_count, - 'like_count': like_count, - 'creator': creator, - 'formats': formats, - 'age_limit': 18 - } diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index bcc669c7a1..fa5027f814 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -188,7 +188,6 @@ from .camdemy import ( CamdemyFolderIE ) from .cammodels import CamModelsIE -from .camtube import CamTubeIE from .camwithher import CamWithHerIE from .canalplus import CanalplusIE from .canalc2 import Canalc2IE From 9a292a620cf239ad70b5fdfe6976d17a36de14b0 Mon Sep 17 00:00:00 2001 From: coletdjnz <colethedj@protonmail.com> Date: Mon, 30 Aug 2021 09:34:39 +1200 Subject: [PATCH 020/641] [ATV.at] Fix extractor for ATV.at (#816) Authored-by: NeroBurner, coletdjnz Fixes https://github.com/ytdl-org/youtube-dl/issues/29079 --- yt_dlp/extractor/atvat.py | 83 +++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 42 deletions(-) diff --git a/yt_dlp/extractor/atvat.py b/yt_dlp/extractor/atvat.py index 95e572d70c..bfcf88f1af 100644 --- a/yt_dlp/extractor/atvat.py +++ b/yt_dlp/extractor/atvat.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( determine_ext, + dict_get, int_or_none, unescapeHTML, ) @@ -12,64 +13,62 @@ from ..utils import ( class ATVAtIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?atv\.at/(?:[^/]+/){2}(?P<id>[dv]\d+)' _TESTS = [{ - 'url': 'http://atv.at/aktuell/di-210317-2005-uhr/v1698449/', - 'md5': 'c3b6b975fb3150fc628572939df205f2', + 'url': 'https://www.atv.at/bauer-sucht-frau-die-zweite-chance/folge-1/d3390693/', + 'md5': 'c471605591009dfb6e6c54f7e62e2807', 'info_dict': { - 'id': '1698447', + 'id': '3390684', 'ext': 'mp4', - 'title': 'DI, 21.03.17 | 20:05 Uhr 1/1', + 'title': 'Bauer sucht Frau - Die zweite Chance Folge 1', } }, { - 'url': 'http://atv.at/aktuell/meinrad-knapp/d8416/', + 'url': 'https://www.atv.at/bauer-sucht-frau-staffel-17/fuenfte-eventfolge/d3339537/', 'only_matching': True, }] + def _process_source_entry(self, source, part_id): + source_url = source.get('url') + if not source_url: + return + if determine_ext(source_url) == 'm3u8': + return self._extract_m3u8_formats( + source_url, part_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False) + else: + return [{ + 'url': source_url, + }] + + def _process_entry(self, entry): + part_id = entry.get('id') + if not part_id: + return + formats = [] + for source in entry.get('sources', []): + formats.extend(self._process_source_entry(source, part_id) or []) + + self._sort_formats(formats) + return { + 'id': part_id, + 'title': entry.get('title'), + 'duration': int_or_none(entry.get('duration')), + 'formats': formats + } + def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) video_data = self._parse_json(unescapeHTML(self._search_regex( - [r'flashPlayerOptions\s*=\s*(["\'])(?P<json>(?:(?!\1).)+)\1', - r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="(?P<json>[^"]+)"'], + r'var\splaylist\s*=\s*(?P<json>\[.*\]);', webpage, 'player data', group='json')), - display_id)['config']['initial_video'] + display_id) - video_id = video_data['id'] - video_title = video_data['title'] - - parts = [] - for part in video_data.get('parts', []): - part_id = part['id'] - part_title = part['title'] - - formats = [] - for source in part.get('sources', []): - source_url = source.get('src') - if not source_url: - continue - ext = determine_ext(source_url) - if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - source_url, part_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False)) - else: - formats.append({ - 'format_id': source.get('delivery'), - 'url': source_url, - }) - self._sort_formats(formats) - - parts.append({ - 'id': part_id, - 'title': part_title, - 'thumbnail': part.get('preview_image_url'), - 'duration': int_or_none(part.get('duration')), - 'is_live': part.get('is_livestream'), - 'formats': formats, - }) + first_video = video_data[0] + video_id = first_video['id'] + video_title = dict_get(first_video, ('tvShowTitle', 'title')) return { '_type': 'multi_video', 'id': video_id, 'title': video_title, - 'entries': parts, + 'entries': (self._process_entry(entry) for entry in video_data), } From 356ac009d3411f69fd1dc33baecd0c41846fd767 Mon Sep 17 00:00:00 2001 From: IONECarter <81190688+IONECarter@users.noreply.github.com> Date: Sun, 29 Aug 2021 17:43:59 -0400 Subject: [PATCH 021/641] [peloton] Add extractor (#192) Authored by: IONECarter, capntrips, pukkandan --- yt_dlp/extractor/extractors.py | 4 + yt_dlp/extractor/peloton.py | 222 +++++++++++++++++++++++++++++++++ 2 files changed, 226 insertions(+) create mode 100644 yt_dlp/extractor/peloton.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index fa5027f814..da5716ad1f 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1010,6 +1010,10 @@ from .patreon import PatreonIE from .pbs import PBSIE from .pearvideo import PearVideoIE from .peertube import PeerTubeIE +from .peloton import ( + PelotonIE, + PelotonLiveIE +) from .people import PeopleIE from .performgroup import PerformGroupIE from .periscope import ( diff --git a/yt_dlp/extractor/peloton.py b/yt_dlp/extractor/peloton.py new file mode 100644 index 0000000000..287d341c98 --- /dev/null +++ b/yt_dlp/extractor/peloton.py @@ -0,0 +1,222 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..compat import ( + compat_HTTPError, + compat_urllib_parse, +) +from ..utils import ( + ExtractorError, + float_or_none, + str_or_none, + traverse_obj, + url_or_none, +) + + +class PelotonIE(InfoExtractor): + IE_NAME = 'peloton' + _NETRC_MACHINE = 'peloton' + _VALID_URL = r'https?://members\.onepeloton\.com/classes/player/(?P<id>[a-f0-9]+)' + _TESTS = [{ + 'url': 'https://members.onepeloton.com/classes/player/0e9653eb53544eeb881298c8d7a87b86', + 'info_dict': { + 'id': '0e9653eb53544eeb881298c8d7a87b86', + 'title': '20 min Chest & Back Strength', + 'ext': 'mp4', + 'thumbnail': r're:^https?://.+\.jpg', + 'description': 'md5:fcd5be9b9eda0194b470e13219050a66', + 'creator': 'Chase Tucker', + 'release_timestamp': 1556141400, + 'timestamp': 1556141400, + 'upload_date': '20190424', + 'duration': 1389, + 'categories': ['Strength'], + 'tags': ['Workout Mat', 'Light Weights', 'Medium Weights'], + 'is_live': False, + 'chapters': 'count:1', + 'subtitles': {'en': [{ + 'url': r're:^https?://.+', + 'ext': 'vtt' + }]}, + }, 'params': { + 'skip_download': 'm3u8', + }, + '_skip': 'Account needed' + }, { + 'url': 'https://members.onepeloton.com/classes/player/26603d53d6bb4de1b340514864a6a6a8', + 'info_dict': { + 'id': '26603d53d6bb4de1b340514864a6a6a8', + 'title': '30 min Earth Day Run', + 'ext': 'm4a', + 'thumbnail': r're:https://.+\.jpg', + 'description': 'md5:adc065a073934d7ee0475d217afe0c3d', + 'creator': 'Selena Samuela', + 'release_timestamp': 1587567600, + 'timestamp': 1587567600, + 'upload_date': '20200422', + 'duration': 1802, + 'categories': ['Running'], + 'is_live': False, + 'chapters': 'count:3' + }, 'params': { + 'skip_download': 'm3u8', + }, + '_skip': 'Account needed' + }] + + _MANIFEST_URL_TEMPLATE = '%s?hdnea=%s' + + def _start_session(self, video_id): + self._download_webpage('https://api.onepeloton.com/api/started_client_session', video_id, note='Starting session') + + def _login(self, video_id): + username, password = self._get_login_info() + if not (username and password): + self.raise_login_required() + try: + self._download_json( + 'https://api.onepeloton.com/auth/login', video_id, note='Logging in', + data=json.dumps({ + 'username_or_email': username, + 'password': password, + 'with_pubsub': False + }).encode(), + headers={'Content-Type': 'application/json', 'User-Agent': 'web'}) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + json_string = self._webpage_read_content(e.cause, None, video_id) + res = self._parse_json(json_string, video_id) + raise ExtractorError(res['message'], expected=res['message'] == 'Login failed') + else: + raise + + def _get_token(self, video_id): + try: + subscription = self._download_json( + 'https://api.onepeloton.com/api/subscription/stream', video_id, note='Downloading token', + data=json.dumps({}).encode(), headers={'Content-Type': 'application/json'}) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + json_string = self._webpage_read_content(e.cause, None, video_id) + res = self._parse_json(json_string, video_id) + raise ExtractorError(res['message'], expected=res['message'] == 'Stream limit reached') + else: + raise + return subscription['token'] + + def _real_extract(self, url): + video_id = self._match_id(url) + try: + self._start_session(video_id) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + self._login(video_id) + self._start_session(video_id) + else: + raise + + metadata = self._download_json('https://api.onepeloton.com/api/ride/%s/details?stream_source=multichannel' % video_id, video_id) + ride_data = metadata.get('ride') + if not ride_data: + raise ExtractorError('Missing stream metadata') + token = self._get_token(video_id) + + is_live = False + if ride_data.get('content_format') == 'audio': + url = self._MANIFEST_URL_TEMPLATE % (ride_data.get('vod_stream_url'), compat_urllib_parse.quote(token)) + formats = [{ + 'url': url, + 'ext': 'm4a', + 'format_id': 'audio', + 'vcodec': 'none', + }] + subtitles = {} + else: + if ride_data.get('vod_stream_url'): + url = 'https://members.onepeloton.com/.netlify/functions/m3u8-proxy?displayLanguage=en&acceptedSubtitles=%s&url=%s?hdnea=%s' % ( + ','.join([re.sub('^([a-z]+)-([A-Z]+)$', r'\1', caption) for caption in ride_data['captions']]), + ride_data['vod_stream_url'], + compat_urllib_parse.quote(compat_urllib_parse.quote(token))) + elif ride_data.get('live_stream_url'): + url = self._MANIFEST_URL_TEMPLATE % (ride_data.get('live_stream_url'), compat_urllib_parse.quote(token)) + is_live = True + else: + raise ExtractorError('Missing video URL') + formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4') + + if metadata.get('instructor_cues'): + subtitles['cues'] = [{ + 'data': json.dumps(metadata.get('instructor_cues')), + 'ext': 'json' + }] + + category = ride_data.get('fitness_discipline_display_name') + chapters = [{ + 'start_time': segment.get('start_time_offset'), + 'end_time': segment.get('start_time_offset') + segment.get('length'), + 'title': segment.get('name') + } for segment in traverse_obj(metadata, ('segments', 'segment_list'))] + + self._sort_formats(formats) + return { + 'id': video_id, + 'title': ride_data.get('title'), + 'formats': formats, + 'thumbnail': url_or_none(ride_data.get('image_url')), + 'description': str_or_none(ride_data.get('description')), + 'creator': traverse_obj(ride_data, ('instructor', 'name')), + 'release_timestamp': ride_data.get('original_air_time'), + 'timestamp': ride_data.get('original_air_time'), + 'subtitles': subtitles, + 'duration': float_or_none(ride_data.get('length')), + 'categories': [category] if category else None, + 'tags': traverse_obj(ride_data, ('equipment_tags', ..., 'name')), + 'is_live': is_live, + 'chapters': chapters + } + + +class PelotonLiveIE(InfoExtractor): + IE_NAME = 'peloton:live' + IE_DESC = 'Peloton Live' + _VALID_URL = r'https?://members\.onepeloton\.com/player/live/(?P<id>[a-f0-9]+)' + _TEST = { + 'url': 'https://members.onepeloton.com/player/live/eedee2d19f804a9788f53aa8bd38eb1b', + 'info_dict': { + 'id': '32edc92d28044be5bf6c7b6f1f8d1cbc', + 'title': '30 min HIIT Ride: Live from Home', + 'ext': 'mp4', + 'thumbnail': r're:^https?://.+\.png', + 'description': 'md5:f0d7d8ed3f901b7ee3f62c1671c15817', + 'creator': 'Alex Toussaint', + 'release_timestamp': 1587736620, + 'timestamp': 1587736620, + 'upload_date': '20200424', + 'duration': 2014, + 'categories': ['Cycling'], + 'is_live': False, + 'chapters': 'count:3' + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': 'm3u8', + }, + '_skip': 'Account needed' + } + + def _real_extract(self, url): + workout_id = self._match_id(url) + peloton = self._download_json(f'https://api.onepeloton.com/api/peloton/{workout_id}', workout_id) + + if peloton.get('ride_id'): + if not peloton.get('is_live') or peloton.get('is_encore') or peloton.get('status') != 'PRE_START': + return self.url_result('https://members.onepeloton.com/classes/player/%s' % peloton['ride_id']) + else: + raise ExtractorError('Ride has not started', expected=True) + else: + raise ExtractorError('Missing video ID') From 1dd6d9ca9d9d23525a4f00eb851d6e72ef52c4c7 Mon Sep 17 00:00:00 2001 From: zenerdi0de <83358565+zenerdi0de@users.noreply.github.com> Date: Mon, 30 Aug 2021 03:17:50 +0530 Subject: [PATCH 022/641] [Patreon] Add PatreonUserIE (#573) Authored by: zenerdi0de --- yt_dlp/extractor/extractors.py | 5 ++- yt_dlp/extractor/patreon.py | 57 +++++++++++++++++++++++++++++++++- 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index da5716ad1f..c52eb2635e 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1006,7 +1006,10 @@ from .paramountplus import ( ) from .parliamentliveuk import ParliamentLiveUKIE from .parlview import ParlviewIE -from .patreon import PatreonIE +from .patreon import ( + PatreonIE, + PatreonUserIE +) from .pbs import PBSIE from .pearvideo import PearVideoIE from .peertube import PeerTubeIE diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py index 7bd892fa56..a189c0237d 100644 --- a/yt_dlp/extractor/patreon.py +++ b/yt_dlp/extractor/patreon.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import itertools + from .common import InfoExtractor from .vimeo import VimeoIE @@ -14,7 +16,7 @@ from ..utils import ( parse_iso8601, str_or_none, try_get, - url_or_none + url_or_none, ) @@ -185,3 +187,56 @@ class PatreonIE(InfoExtractor): }) return info + + +class PatreonUserIE(InfoExtractor): + + _VALID_URL = r'https?://(?:www\.)?patreon\.com/(?P<id>[-_\w\d]+)/?(?:posts/?)?' + + _TESTS = [{ + 'url': 'https://www.patreon.com/dissonancepod/', + 'info_dict': { + 'title': 'dissonancepod', + }, + 'playlist_mincount': 68, + 'expected_warnings': 'Post not viewable by current user! Skipping!', + }, { + 'url': 'https://www.patreon.com/dissonancepod/posts', + 'only_matching': True + }, ] + + @classmethod + def suitable(cls, url): + return False if PatreonIE.suitable(url) else super(PatreonUserIE, cls).suitable(url) + + def _entries(self, campaign_id, user_id): + cursor = None + params = { + 'fields[campaign]': 'show_audio_post_download_links,name,url', + 'fields[post]': 'current_user_can_view,embed,image,is_paid,post_file,published_at,patreon_url,url,post_type,thumbnail_url,title', + 'filter[campaign_id]': campaign_id, + 'filter[is_draft]': 'false', + 'sort': '-published_at', + 'json-api-version': 1.0, + 'json-api-use-default-includes': 'false', + } + + for page in itertools.count(1): + + params.update({'page[cursor]': cursor} if cursor else {}) + posts_json = self._download_json('https://www.patreon.com/api/posts', user_id, note='Downloading posts page %d' % page, query=params, headers={'Cookie': '.'}) + + cursor = try_get(posts_json, lambda x: x['meta']['pagination']['cursors']['next']) + + for post in posts_json.get('data') or []: + yield self.url_result(url_or_none(try_get(post, lambda x: x['attributes']['patreon_url'])), 'Patreon') + + if cursor is None: + break + + def _real_extract(self, url): + + user_id = self._match_id(url) + webpage = self._download_webpage(url, user_id, headers={'Cookie': '.'}) + campaign_id = self._search_regex(r'https://www.patreon.com/api/campaigns/(\d+)/?', webpage, 'Campaign ID') + return self.playlist_result(self._entries(campaign_id, user_id), playlist_title=user_id) From 54153fb71bb6846040823abd3ce3ff0eb96e5b44 Mon Sep 17 00:00:00 2001 From: Sipherdrakon <64430430+Sipherdrakon@users.noreply.github.com> Date: Sun, 29 Aug 2021 17:50:58 -0400 Subject: [PATCH 023/641] [VH1,TVLand] Fix extractors (#784) Fixes #745 but not #713 Authored by: Sipherdrakon --- yt_dlp/extractor/mtv.py | 4 ++++ yt_dlp/extractor/nick.py | 5 +---- yt_dlp/extractor/tvland.py | 5 +++-- yt_dlp/extractor/vh1.py | 25 +++++++++---------------- 4 files changed, 17 insertions(+), 22 deletions(-) diff --git a/yt_dlp/extractor/mtv.py b/yt_dlp/extractor/mtv.py index e446a955b9..6b506ad9ae 100644 --- a/yt_dlp/extractor/mtv.py +++ b/yt_dlp/extractor/mtv.py @@ -313,6 +313,10 @@ class MTVServicesInfoExtractor(InfoExtractor): video_player = self._extract_child_with_type(ab_testing or main_container, 'VideoPlayer') mgid = video_player['props']['media']['video']['config']['uri'] + if not mgid: + mgid = self._search_regex( + r'"media":{"video":{"config":{"uri":"(mgid:.*?)"', webpage, 'mgid', default=None) + return mgid def _real_extract(self, url): diff --git a/yt_dlp/extractor/nick.py b/yt_dlp/extractor/nick.py index 084538d711..ba7da76026 100644 --- a/yt_dlp/extractor/nick.py +++ b/yt_dlp/extractor/nick.py @@ -67,6 +67,7 @@ class NickIE(MTVServicesInfoExtractor): 'description': 'md5:9d65a66df38e02254852794b2809d1cf', 'title': 'Blue\'s Imagination Station', }, + 'skip': 'Not accessible?' }] def _get_feed_query(self, uri): @@ -75,10 +76,6 @@ class NickIE(MTVServicesInfoExtractor): 'mgid': uri, } - def _extract_mgid(self, webpage): - mgid = self._search_regex(r'"media":{"video":{"config":{"uri":"(mgid:.*?)"', webpage, 'mgid', default=None) - return mgid - def _real_extract(self, url): domain, video_type, display_id = self._match_valid_url(url).groups() if video_type.startswith("episodes"): diff --git a/yt_dlp/extractor/tvland.py b/yt_dlp/extractor/tvland.py index 225b6b078c..9ebf57f740 100644 --- a/yt_dlp/extractor/tvland.py +++ b/yt_dlp/extractor/tvland.py @@ -1,12 +1,12 @@ # coding: utf-8 from __future__ import unicode_literals -from .spike import ParamountNetworkIE +from .mtv import MTVServicesInfoExtractor # TODO: Remove - Reason not used anymore - Service moved to youtube -class TVLandIE(ParamountNetworkIE): +class TVLandIE(MTVServicesInfoExtractor): IE_NAME = 'tvland.com' _VALID_URL = r'https?://(?:www\.)?tvland\.com/(?:video-clips|(?:full-)?episodes)/(?P<id>[^/?#.]+)' _FEED_URL = 'http://www.tvland.com/feeds/mrss/' @@ -19,6 +19,7 @@ class TVLandIE(ParamountNetworkIE): 'title': 'The Dog', }, 'playlist_mincount': 5, + 'skip': '404 Not found', }, { 'url': 'https://www.tvland.com/video-clips/4n87f2/younger-a-first-look-at-younger-season-6', 'md5': 'e2c6389401cf485df26c79c247b08713', diff --git a/yt_dlp/extractor/vh1.py b/yt_dlp/extractor/vh1.py index ea576dc6ba..862c5c7dcd 100644 --- a/yt_dlp/extractor/vh1.py +++ b/yt_dlp/extractor/vh1.py @@ -10,22 +10,22 @@ class VH1IE(MTVServicesInfoExtractor): IE_NAME = 'vh1.com' _FEED_URL = 'http://www.vh1.com/feeds/mrss/' _TESTS = [{ - 'url': 'http://www.vh1.com/episodes/0umwpq/hip-hop-squares-kent-jones-vs-nick-young-season-1-ep-120', + 'url': 'https://www.vh1.com/episodes/0aqivv/nick-cannon-presents-wild-n-out-foushee-season-16-ep-12', 'info_dict': { - 'title': 'Kent Jones vs. Nick Young', - 'description': 'Come to Play. Stay to Party. With Mike Epps, TIP, O’Shea Jackson Jr., T-Pain, Tisha Campbell-Martin and more.', + 'title': 'Fousheé', + 'description': 'Fousheé joins Team Evolutions fight against Nick and Team Revolution in Baby Daddy, Baby Mama; Kick Em Out the Classroom; Backseat of My Ride and Wildstyle; and Fousheé performs.', }, 'playlist_mincount': 4, + 'skip': '404 Not found', }, { # Clip - 'url': 'http://www.vh1.com/video-clips/t74mif/scared-famous-scared-famous-extended-preview', + 'url': 'https://www.vh1.com/video-clips/e0sja0/nick-cannon-presents-wild-n-out-foushee-clap-for-him', 'info_dict': { - 'id': '0a50c2d2-a86b-4141-9565-911c7e2d0b92', + 'id': 'a07563f7-a37b-4e7f-af68-85855c2c7cc3', 'ext': 'mp4', - 'title': 'Scared Famous|October 9, 2017|1|NO-EPISODE#|Scared Famous + Extended Preview', - 'description': 'md5:eff5551a274c473a29463de40f7b09da', - 'upload_date': '20171009', - 'timestamp': 1507574700, + 'title': 'Fousheé - "clap for him"', + 'description': 'Singer Fousheé hits the Wild N Out: In the Dark stage with a performance of the tongue-in-cheek track "clap for him" from her 2021 album "time machine."', + 'upload_date': '20210826', }, 'params': { # m3u8 download @@ -34,10 +34,3 @@ class VH1IE(MTVServicesInfoExtractor): }] _VALID_URL = r'https?://(?:www\.)?vh1\.com/(?:video-clips|episodes)/(?P<id>[^/?#.]+)' - - def _real_extract(self, url): - playlist_id = self._match_id(url) - webpage = self._download_webpage(url, playlist_id) - mgid = self._extract_triforce_mgid(webpage) - videos_info = self._get_videos_info(mgid) - return videos_info From 419508eabb5f324143c606b9ba0136d4a40abdf4 Mon Sep 17 00:00:00 2001 From: coletdjnz <colethedj@protonmail.com> Date: Mon, 30 Aug 2021 10:22:57 +1200 Subject: [PATCH 024/641] [Motherless] Fix extractor (#809) Authored-by: coletdjnz Fixes #806, https://github.com/ytdl-org/youtube-dl/issues/29626 --- yt_dlp/extractor/motherless.py | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/motherless.py b/yt_dlp/extractor/motherless.py index ef1e081f20..111c7c5442 100644 --- a/yt_dlp/extractor/motherless.py +++ b/yt_dlp/extractor/motherless.py @@ -127,9 +127,9 @@ class MotherlessIE(InfoExtractor): comment_count = webpage.count('class="media-comment-contents"') uploader_id = self._html_search_regex( - r'"thumb-member-username">\s+<a href="/m/([^"]+)"', - webpage, 'uploader_id') - + (r'"media-meta-member">\s+<a href="/m/([^"]+)"', + r'<span\b[^>]+\bclass="username">([^<]+)</span>'), + webpage, 'uploader_id', fatal=False) categories = self._html_search_meta('keywords', webpage, default=None) if categories: categories = [cat.strip() for cat in categories.split(',')] @@ -169,7 +169,18 @@ class MotherlessGroupIE(InfoExtractor): 'description': 'Sex can be funny. Wide smiles,laugh, games, fun of ' 'any kind!' }, - 'playlist_mincount': 9, + 'playlist_mincount': 0, + 'expected_warnings': [ + 'This group has no videos.', + ] + }, { + 'url': 'https://motherless.com/g/beautiful_cock', + 'info_dict': { + 'id': 'beautiful_cock', + 'title': 'Beautiful Cock', + 'description': 'Group for lovely cocks yours, mine, a friends anything human', + }, + 'playlist_mincount': 2500, }] @classmethod @@ -209,11 +220,18 @@ class MotherlessGroupIE(InfoExtractor): description = self._html_search_meta( 'description', webpage, fatal=False) page_count = self._int(self._search_regex( - r'(\d+)</(?:a|span)><(?:a|span)[^>]+>\s*NEXT', - webpage, 'page_count'), 'page_count') + r'(\d+)</(?:a|span)><(?:a|span)[^>]+rel="next">', + webpage, 'page_count', default=0), 'page_count') + if not page_count: + message = self._search_regex( + r'class="error-page"[^>]*>\s*<p[^>]*>\s*(?P<error_msg>[^<]+)(?<=\S)\s*', + webpage, 'error_msg', default=None) or 'This group has no videos.' + self.report_warning(message, group_id) PAGE_SIZE = 80 def _get_page(idx): + if not page_count: + return webpage = self._download_webpage( page_url, group_id, query={'page': idx + 1}, note='Downloading page %d/%d' % (idx + 1, page_count) From 62cdaaf0e2781f45bdc23ccc1012175590c36d72 Mon Sep 17 00:00:00 2001 From: coletdjnz <colethedj@protonmail.com> Date: Mon, 30 Aug 2021 10:29:42 +1200 Subject: [PATCH 025/641] [StarTV] Add extractor for startv.com.tr (#815) Authored-by: mrfade, coletdjnz Related: https://github.com/ytdl-org/youtube-dl/issues/22715 --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/startv.py | 103 +++++++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+) create mode 100644 yt_dlp/extractor/startv.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index c52eb2635e..2fe852570e 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1313,6 +1313,7 @@ from .srgssr import ( ) from .srmediathek import SRMediathekIE from .stanfordoc import StanfordOpenClassroomIE +from .startv import StarTVIE from .steam import SteamIE from .storyfire import ( StoryFireIE, diff --git a/yt_dlp/extractor/startv.py b/yt_dlp/extractor/startv.py new file mode 100644 index 0000000000..411320ede8 --- /dev/null +++ b/yt_dlp/extractor/startv.py @@ -0,0 +1,103 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import ( + compat_str, +) +from ..utils import ( + clean_html, + ExtractorError, + traverse_obj, + int_or_none, +) + + +class StarTVIE(InfoExtractor): + _VALID_URL = r"""(?x) + https?://(?:www\.)?startv\.com\.tr/ + (?: + (?:dizi|program)/(?:[^/?#&]+)/(?:bolumler|fragmanlar|ekstralar)| + video/arsiv/(?:dizi|program)/(?:[^/?#&]+) + )/ + (?P<id>[^/?#&]+) + """ + IE_NAME = 'startv' + _TESTS = [ + { + 'url': 'https://www.startv.com.tr/dizi/cocuk/bolumler/3-bolum', + 'md5': '72381a32bcc2e2eb5841e8c8bf68f127', + 'info_dict': { + 'id': '904972', + 'display_id': '3-bolum', + 'ext': 'mp4', + 'title': '3. Bölüm', + 'description': 'md5:3a8049f05a75c2e8747116a673275de4', + 'thumbnail': r're:^https?://.*\.jpg(?:\?.*?)?$', + 'timestamp': 1569281400, + 'upload_date': '20190923' + }, + }, + { + 'url': 'https://www.startv.com.tr/video/arsiv/dizi/avlu/44-bolum', + 'only_matching': True + }, + { + 'url': 'https://www.startv.com.tr/dizi/cocuk/fragmanlar/5-bolum-fragmani', + 'only_matching': True + }, + { + 'url': 'https://www.startv.com.tr/dizi/cocuk/ekstralar/5-bolumun-nefes-kesen-final-sahnesi', + 'only_matching': True + }, + { + 'url': 'https://www.startv.com.tr/program/burcu-ile-haftasonu/bolumler/1-bolum', + 'only_matching': True + }, + { + 'url': 'https://www.startv.com.tr/program/burcu-ile-haftasonu/fragmanlar/2-fragman', + 'only_matching': True + }, + { + 'url': 'https://www.startv.com.tr/video/arsiv/program/buyukrisk/14-bolumde-hangi-unlu-ne-sordu-', + 'only_matching': True + }, + { + 'url': 'https://www.startv.com.tr/video/arsiv/program/buyukrisk/buyuk-risk-334-bolum', + 'only_matching': True + }, + { + 'url': 'https://www.startv.com.tr/video/arsiv/program/dada/dada-58-bolum', + 'only_matching': True + } + ] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + info_url = self._search_regex( + r'(["\'])videoUrl\1\s*:\s*\1(?P<url>(?:(?!\1).)+)\1\s*', + webpage, 'video info url', group='url') + + info = traverse_obj(self._download_json(info_url, display_id), 'data', expected_type=dict) + if not info: + raise ExtractorError('Failed to extract API data') + + video_id = compat_str(info.get('id')) + title = info.get('title') or self._og_search_title(webpage) + description = clean_html(info.get('description')) or self._og_search_description(webpage, default=None) + thumbnail = self._proto_relative_url( + self._og_search_thumbnail(webpage), scheme='http:') + + formats = self._extract_m3u8_formats( + traverse_obj(info, ('flavors', 'hls')), video_id, entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': int_or_none(info.get('release_date')), + 'formats': formats + } From 2ee6389bef2e0c3e2ca932ca4e4fa0d47dfa0cc4 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Mon, 30 Aug 2021 08:28:36 +0530 Subject: [PATCH 026/641] [build] Fix bug in making `yt-dlp.tar.gz` --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index bc0b4e399e..4ee1095d16 100644 --- a/Makefile +++ b/Makefile @@ -110,7 +110,7 @@ _EXTRACTOR_FILES = $(shell find yt_dlp/extractor -iname '*.py' -and -not -iname yt_dlp/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES) $(PYTHON) devscripts/make_lazy_extractors.py $@ -yt-dlp.tar.gz: README.md yt-dlp.1 completions Changelog.md AUTHORS +yt-dlp.tar.gz: yt-dlp README.md supportedsites.md yt-dlp.1 completions Changelog.md AUTHORS @tar -czf $(DESTDIR)/yt-dlp.tar.gz --transform "s|^|yt-dlp/|" --owner 0 --group 0 \ --exclude '*.DS_Store' \ --exclude '*.kate-swp' \ @@ -124,7 +124,7 @@ yt-dlp.tar.gz: README.md yt-dlp.1 completions Changelog.md AUTHORS devscripts test \ Changelog.md AUTHORS LICENSE README.md supportedsites.md \ Makefile MANIFEST.in yt-dlp.1 completions \ - setup.py setup.cfg yt-dlp + setup.py setup.cfg yt-dlp yt_dlp AUTHORS: .mailmap git shortlog -s -n | cut -f2 | sort > AUTHORS From 05664a2f7b4e8fadd7a463b450f16d39663e3e09 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Mon, 30 Aug 2021 14:07:03 +0000 Subject: [PATCH 027/641] [CDA] Add more formats (#805) Fixes: #791, https://github.com/ytdl-org/youtube-dl/issues/29844 Authored by: u-spec-png --- yt_dlp/extractor/cda.py | 44 ++++++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/yt_dlp/extractor/cda.py b/yt_dlp/extractor/cda.py index e1b3919371..72c47050ff 100644 --- a/yt_dlp/extractor/cda.py +++ b/yt_dlp/extractor/cda.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import codecs import re +import json from .common import InfoExtractor from ..compat import ( @@ -19,6 +20,7 @@ from ..utils import ( parse_duration, random_birthday, urljoin, + try_get, ) @@ -38,6 +40,8 @@ class CDAIE(InfoExtractor): 'average_rating': float, 'duration': 39, 'age_limit': 0, + 'upload_date': '20160221', + 'timestamp': 1456078244, } }, { 'url': 'http://www.cda.pl/video/57413289', @@ -143,7 +147,7 @@ class CDAIE(InfoExtractor): b = [] for c in a: f = compat_ord(c) - b.append(compat_chr(33 + (f + 14) % 94) if 33 <= f and 126 >= f else compat_chr(f)) + b.append(compat_chr(33 + (f + 14) % 94) if 33 <= f <= 126 else compat_chr(f)) a = ''.join(b) a = a.replace('.cda.mp4', '') for p in ('.2cda.pl', '.3cda.pl'): @@ -173,18 +177,34 @@ class CDAIE(InfoExtractor): video['file'] = video['file'].replace('adc.mp4', '.mp4') elif not video['file'].startswith('http'): video['file'] = decrypt_file(video['file']) - f = { + video_quality = video.get('quality') + qualities = video.get('qualities', {}) + video_quality = next((k for k, v in qualities.items() if v == video_quality), video_quality) + info_dict['formats'].append({ 'url': video['file'], - } - m = re.search( - r'<a[^>]+data-quality="(?P<format_id>[^"]+)"[^>]+href="[^"]+"[^>]+class="[^"]*quality-btn-active[^"]*">(?P<height>[0-9]+)p', - page) - if m: - f.update({ - 'format_id': m.group('format_id'), - 'height': int(m.group('height')), - }) - info_dict['formats'].append(f) + 'format_id': video_quality, + 'height': int_or_none(video_quality[:-1]), + }) + for quality, cda_quality in qualities.items(): + if quality == video_quality: + continue + data = {'jsonrpc': '2.0', 'method': 'videoGetLink', 'id': 2, + 'params': [video_id, cda_quality, video.get('ts'), video.get('hash2'), {}]} + data = json.dumps(data).encode('utf-8') + video_url = self._download_json( + f'https://www.cda.pl/video/{video_id}', video_id, headers={ + 'Content-Type': 'application/json', + 'X-Requested-With': 'XMLHttpRequest' + }, data=data, note=f'Fetching {quality} url', + errnote=f'Failed to fetch {quality} url', fatal=False) + if try_get(video_url, lambda x: x['result']['status']) == 'ok': + video_url = try_get(video_url, lambda x: x['result']['resp']) + info_dict['formats'].append({ + 'url': video_url, + 'format_id': quality, + 'height': int_or_none(quality[:-1]) + }) + if not info_dict['duration']: info_dict['duration'] = parse_duration(video.get('duration')) From b8773e63f01510a254a40b8d4d3018414b52eda7 Mon Sep 17 00:00:00 2001 From: The Hatsune Daishi <nao20010128@gmail.com> Date: Mon, 30 Aug 2021 23:37:43 +0900 Subject: [PATCH 028/641] [build] Add homebrew taps (#827) https://github.com/yt-dlp/homebrew-taps Closes: #754, #770 Authored by: nao20010128nao --- .github/workflows/build.yml | 13 ++++++++++++ README.md | 8 ++++++++ devscripts/update-formulae.py | 37 +++++++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+) create mode 100644 devscripts/update-formulae.py diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4c56a5180b..b55429e1dd 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -84,6 +84,19 @@ jobs: rm -rf dist/* python setup.py sdist bdist_wheel twine upload dist/* + - name: Install SSH private key + if: ${{ secrets.BREW_TOKEN }} + uses: webfactory/ssh-agent@v0.5.3 + with: + ssh-private-key: ${{ secrets.BREW_TOKEN }} + - name: Update Homebrew Formulae + # can't use secrets.GITHUB_TOKEN because it's outside yt-dlp repository + if: ${{ secrets.BREW_TOKEN }} + run: | + git clone git@github.com:yt-dlp/homebrew-taps taps/ + python3 devscripts/update-formulae.py taps/Formula/yt-dlp.rb "${{ steps.bump_version.outputs.ytdlp_version }}" + git -C taps/ commit -am 'yt-dlp: ${{ steps.bump_version.outputs.ytdlp_version }}' + git -C taps/ push build_windows: runs-on: windows-latest diff --git a/README.md b/README.md index b0b34506d7..a9720bfb9f 100644 --- a/README.md +++ b/README.md @@ -151,6 +151,7 @@ yt-dlp is not platform specific. So it should work on your Unix box, on Windows You can install yt-dlp using one of the following methods: * Download the binary from the [latest release](https://github.com/yt-dlp/yt-dlp/releases/latest) (recommended method) +* With Homebrew, `brew install yt-dlp/taps/yt-dlp` * Use [PyPI package](https://pypi.org/project/yt-dlp): `python3 -m pip install --upgrade yt-dlp` * Use pip+git: `python3 -m pip install --upgrade git+https://github.com/yt-dlp/yt-dlp.git@release` * Install master branch: `python3 -m pip install --upgrade git+https://github.com/yt-dlp/yt-dlp` @@ -174,9 +175,16 @@ sudo aria2c https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp -o sudo chmod a+rx /usr/local/bin/yt-dlp ``` +macOS or Linux users that are using Homebrew (formerly known as Linuxbrew for Linux users) can also install it by: + +``` +brew install yt-dlp/taps/yt-dlp +``` + ### UPDATE You can use `yt-dlp -U` to update if you are using the provided release. If you are using `pip`, simply re-run the same command that was used to install the program. +If you have installed using Homebrew, run `brew upgrade yt-dlp/taps/yt-dlp` ### DEPENDENCIES Python versions 3.6+ (CPython and PyPy) are supported. Other versions and implementations may or may not work correctly. diff --git a/devscripts/update-formulae.py b/devscripts/update-formulae.py new file mode 100644 index 0000000000..41bc1ac7a3 --- /dev/null +++ b/devscripts/update-formulae.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 +from __future__ import unicode_literals + +import json +import os +import re +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from yt_dlp.compat import compat_urllib_request + + +# usage: python3 ./devscripts/update-formulae.py <path-to-formulae-rb> <version> +# version can be either 0-aligned (yt-dlp version) or normalized (PyPl version) + +filename, version = sys.argv[1:] + +normalized_version = '.'.join(str(int(x)) for x in version.split('.')) + +pypi_release = json.loads(compat_urllib_request.urlopen( + 'https://pypi.org/pypi/yt-dlp/%s/json' % normalized_version +).read().decode('utf-8')) + +tarball_file = next(x for x in pypi_release['urls'] if x['filename'].endswith('.tar.gz')) + +sha256sum = tarball_file['digests']['sha256'] +url = tarball_file['url'] + +with open(filename, 'r') as r: + formulae_text = r.read() + +formulae_text = re.sub(r'sha256 "[0-9a-f]*?"', 'sha256 "%s"' % sha256sum, formulae_text) +formulae_text = re.sub(r'url "[^"]*?"', 'url "%s"' % url, formulae_text) + +with open(filename, 'w') as w: + w.write(formulae_text) From 875cfb8cbc4c14f204d636760134400d3cea20a0 Mon Sep 17 00:00:00 2001 From: Luc Ritchie <luc.ritchie@gmail.com> Date: Mon, 30 Aug 2021 11:35:48 -0400 Subject: [PATCH 029/641] [afreecatv] Fix adult VODs (#831) Original PR: https://github.com/ytdl-org/youtube-dl/pull/28405 Fixes https://github.com/ytdl-org/youtube-dl/issues/26622, https://github.com/ytdl-org/youtube-dl/issues/26926 Authored by: wlritchi --- yt_dlp/extractor/afreecatv.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/yt_dlp/extractor/afreecatv.py b/yt_dlp/extractor/afreecatv.py index 648f1122dc..d45bcb762a 100644 --- a/yt_dlp/extractor/afreecatv.py +++ b/yt_dlp/extractor/afreecatv.py @@ -237,6 +237,7 @@ class AfreecaTVIE(InfoExtractor): r'nTitleNo\s*=\s*(\d+)', webpage, 'title', default=video_id) partial_view = False + adult_view = False for _ in range(2): query = { 'nTitleNo': video_id, @@ -245,6 +246,8 @@ class AfreecaTVIE(InfoExtractor): } if partial_view: query['partialView'] = 'SKIP_ADULT' + if adult_view: + query['adultView'] = 'ADULT_VIEW' video_xml = self._download_xml( 'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php', video_id, 'Downloading video info XML%s' @@ -264,6 +267,9 @@ class AfreecaTVIE(InfoExtractor): partial_view = True continue elif flag == 'ADULT': + if not adult_view: + adult_view = True + continue error = 'Only users older than 19 are able to watch this video. Provide account credentials to download this content.' else: error = flag From b2eeee0ce018a50cb6f496829147fbc1f85f0487 Mon Sep 17 00:00:00 2001 From: Luc Ritchie <luc.ritchie@gmail.com> Date: Mon, 30 Aug 2021 12:07:34 -0400 Subject: [PATCH 030/641] [afreecatv] Tolerate failure to parse date string (#832) Authored by: wlritchi --- yt_dlp/extractor/afreecatv.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/afreecatv.py b/yt_dlp/extractor/afreecatv.py index d45bcb762a..063872b4f5 100644 --- a/yt_dlp/extractor/afreecatv.py +++ b/yt_dlp/extractor/afreecatv.py @@ -6,9 +6,11 @@ import re from .common import InfoExtractor from ..compat import compat_xpath from ..utils import ( + date_from_str, determine_ext, ExtractorError, int_or_none, + unified_strdate, url_or_none, urlencode_postdata, xpath_text, @@ -315,8 +317,15 @@ class AfreecaTVIE(InfoExtractor): if not file_url: continue key = file_element.get('key', '') - upload_date = self._search_regex( - r'^(\d{8})_', key, 'upload date', default=None) + upload_date = unified_strdate(self._search_regex( + r'^(\d{8})_', key, 'upload date', default=None)) + if upload_date is not None: + # sometimes the upload date isn't included in the file name + # instead, another random ID is, which may parse as a valid + # date but be wildly out of a reasonable range + parsed_date = date_from_str(upload_date) + if parsed_date.year < 2000 or parsed_date.year >= 2100: + upload_date = None file_duration = int_or_none(file_element.get('duration')) format_id = key if key else '%s_%s' % (video_id, file_num) if determine_ext(file_url) == 'm3u8': From 8eb7ba82ca4e9853cbc9db403fc066e7707f3110 Mon Sep 17 00:00:00 2001 From: BunnyHelp <bunnyhelp120@gmail.com> Date: Tue, 31 Aug 2021 15:29:30 -0400 Subject: [PATCH 031/641] [iwara.tv] Extract more metadata (#829) Authored-by: BunnyHelp --- yt_dlp/extractor/iwara.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/iwara.py b/yt_dlp/extractor/iwara.py index dae3da32cd..254d986923 100644 --- a/yt_dlp/extractor/iwara.py +++ b/yt_dlp/extractor/iwara.py @@ -1,5 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals +import re from .common import InfoExtractor from ..compat import compat_urllib_parse_urlparse @@ -8,6 +9,8 @@ from ..utils import ( mimetype2ext, remove_end, url_or_none, + unified_strdate, + strip_or_none, ) @@ -21,6 +24,10 @@ class IwaraIE(InfoExtractor): 'ext': 'mp4', 'title': '【MMD R-18】ガールフレンド carry_me_off', 'age_limit': 18, + 'thumbnail': 'https://i.iwara.tv/sites/default/files/videos/thumbnails/7951/thumbnail-7951_0001.png', + 'uploader': 'Reimu丨Action', + 'upload_date': '20150828', + 'description': 'md5:1d4905ce48c66c9299c617f08e106e0f', }, }, { 'url': 'http://ecchi.iwara.tv/videos/Vb4yf2yZspkzkBO', @@ -73,8 +80,17 @@ class IwaraIE(InfoExtractor): r'<title>([^<]+)', webpage, 'title'), ' | Iwara') thumbnail = self._html_search_regex( - r']+id=[\'"]video-player[\'"][^>]+poster=[\'"]([^\'"]+)', - webpage, 'thumbnail', default=None) + r'poster=[\'"]([^\'"]+)', webpage, 'thumbnail', default=None) + + uploader = self._html_search_regex( + r'class="username">([^<]+)', webpage, 'uploader', fatal=False) + + upload_date = unified_strdate(self._html_search_regex( + r'作成日:([^\s]+)', webpage, 'upload_date', fatal=False)) + + description = strip_or_none(self._search_regex( + r'

(.+?(?= Date: Wed, 1 Sep 2021 10:31:11 +1200 Subject: [PATCH 032/641] [Viafree] Fix extractor and extract subtitles (#828) Authored by: coletdjnz Fixes #820 --- yt_dlp/extractor/tvplay.py | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/yt_dlp/extractor/tvplay.py b/yt_dlp/extractor/tvplay.py index c60af111c7..9b6d17f619 100644 --- a/yt_dlp/extractor/tvplay.py +++ b/yt_dlp/extractor/tvplay.py @@ -34,8 +34,8 @@ class TVPlayIE(InfoExtractor): tvplay(?:\.skaties)?\.lv(?:/parraides)?| (?:tv3play|play\.tv3)\.lt(?:/programos)?| tv3play(?:\.tv3)?\.ee/sisu| - (?:tv(?:3|6|8|10)play|viafree)\.se/program| - (?:(?:tv3play|viasat4play|tv6play|viafree)\.no|(?:tv3play|viafree)\.dk)/programmer| + (?:tv(?:3|6|8|10)play)\.se/program| + (?:(?:tv3play|viasat4play|tv6play)\.no|(?:tv3play)\.dk)/programmer| play\.nova(?:tv)?\.bg/programi ) /(?:[^/]+/)+ @@ -223,10 +223,6 @@ class TVPlayIE(InfoExtractor): 'url': 'http://tv3play.tv3.ee/sisu/kodu-keset-linna/238551?autostart=true', 'only_matching': True, }, - { - 'url': 'http://www.viafree.se/program/underhallning/i-like-radio-live/sasong-1/676869', - 'only_matching': True, - }, { 'url': 'mtg:418113', 'only_matching': True, @@ -359,6 +355,23 @@ class ViafreeIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, { + 'url': 'https://www.viafree.dk/programmer/humor/comedy-central-roast-of-charlie-sheen/film/1047660', + 'info_dict': { + 'id': '1047660', + 'ext': 'mp4', + 'title': 'Comedy Central Roast of Charlie Sheen - Comedy Central Roast of Charlie Sheen', + 'description': 'md5:ec956d941ae9fd7c65a48fd64951dc6d', + 'series': 'Comedy Central Roast of Charlie Sheen', + 'season_number': 1, + 'duration': 3747, + 'timestamp': 1608246060, + 'upload_date': '20201217' + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True + } }, { # with relatedClips 'url': 'http://www.viafree.se/program/reality/sommaren-med-youtube-stjarnorna/sasong-1/avsnitt-1', @@ -373,13 +386,12 @@ class ViafreeIE(InfoExtractor): }, { 'url': 'http://www.viafree.dk/programmer/reality/paradise-hotel/saeson-7/episode-5', 'only_matching': True, + }, { + 'url': 'http://www.viafree.se/program/underhallning/i-like-radio-live/sasong-1/676869', + 'only_matching': True, }] _GEO_BYPASS = False - @classmethod - def suitable(cls, url): - return False if TVPlayIE.suitable(url) else super(ViafreeIE, cls).suitable(url) - def _real_extract(self, url): country, path = self._match_valid_url(url).groups() content = self._download_json( @@ -398,16 +410,16 @@ class ViafreeIE(InfoExtractor): self.raise_geo_restricted(countries=[country]) raise - formats = self._extract_m3u8_formats(stream_href, guid, 'mp4') + formats, subtitles = self._extract_m3u8_formats_and_subtitles(stream_href, guid, 'mp4') self._sort_formats(formats) episode = program.get('episode') or {} - return { 'id': guid, 'title': title, 'thumbnail': meta.get('image'), 'description': meta.get('description'), 'series': episode.get('seriesTitle'), + 'subtitles': subtitles, 'episode_number': int_or_none(episode.get('episodeNumber')), 'season_number': int_or_none(episode.get('seasonNumber')), 'duration': int_or_none(try_get(program, lambda x: x['video']['duration']['milliseconds']), 1000), From 8e25d624df003d691be922488d6ab7007f75333d Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 1 Sep 2021 08:45:56 +0530 Subject: [PATCH 033/641] [EmbedSubtitle] Continue even if some files are missing --- yt_dlp/postprocessor/ffmpeg.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index b66a0b4452..7537d5db4e 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -478,6 +478,9 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): mp4_ass_warn = False for lang, sub_info in subtitles.items(): + if not os.path.exists(information.get('filepath', '')): + self.report_warning(f'Skipping embedding {lang} subtitle because the file is missing') + continue sub_ext = sub_info['ext'] if sub_ext == 'json': self.report_warning('JSON subtitles cannot be embedded') From 8a2d992389c37f5f99f5c74677f7900f1ae45f94 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 1 Sep 2021 09:17:35 +0530 Subject: [PATCH 034/641] [facebook] Fix format sorting Closes #795 --- yt_dlp/extractor/facebook.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index 2991a9f35d..44d3dc0d79 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -479,7 +479,7 @@ class FacebookIE(InfoExtractor): for f in formats: f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1' - self._sort_formats(formats) + self._sort_formats(formats, ('res', 'quality')) def extract_relay_data(_filter): return self._parse_json(self._search_regex( @@ -687,13 +687,14 @@ class FacebookIE(InfoExtractor): for src_type in ('src', 'src_no_ratelimit'): src = f[0].get('%s_%s' % (quality, src_type)) if src: - preference = -10 if format_id == 'progressive' else 0 + preference = -10 if format_id == 'progressive' else -1 if quality == 'hd': preference += 5 formats.append({ 'format_id': '%s_%s_%s' % (format_id, quality, src_type), 'url': src, 'quality': preference, + 'height': 720 if quality == 'hd' else None }) extract_dash_manifest(f[0], formats) subtitles_src = f[0].get('subtitles_src') From 1461d7bef244b7fc1a84f82512a330576baf327b Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Wed, 1 Sep 2021 13:10:25 +0000 Subject: [PATCH 035/641] [Tokentube] Add extractor (#842) Closes #800 Authored by: u-spec-png --- yt_dlp/extractor/extractors.py | 4 + yt_dlp/extractor/tokentube.py | 152 +++++++++++++++++++++++++++++++++ 2 files changed, 156 insertions(+) create mode 100644 yt_dlp/extractor/tokentube.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 2fe852570e..f0c22cd579 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1410,6 +1410,10 @@ from .toggle import ( ToggleIE, MeWatchIE, ) +from .tokentube import ( + TokentubeIE, + TokentubeChannelIE +) from .tonline import TOnlineIE from .toongoggles import ToonGogglesIE from .toutv import TouTvIE diff --git a/yt_dlp/extractor/tokentube.py b/yt_dlp/extractor/tokentube.py new file mode 100644 index 0000000000..d6362117f7 --- /dev/null +++ b/yt_dlp/extractor/tokentube.py @@ -0,0 +1,152 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import functools +import re + +from .common import InfoExtractor +from ..utils import ( + parse_count, + unified_strdate, + js_to_json, + OnDemandPagedList, +) + + +class TokentubeIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?tokentube\.net/(?:view\?[vl]=|[vl]/)(?P\d+)' + _TESTS = [{ + 'url': 'https://tokentube.net/l/3236632011/Praise-A-Thon-Pastori-Chrisin-ja-Pastori-Bennyn-kanssa-27-8-2021', + 'info_dict': { + 'id': '3236632011', + 'ext': 'mp4', + 'title': 'Praise-A-Thon Pastori Chrisin ja Pastori Bennyn kanssa 27.8.2021', + 'description': '', + 'uploader': 'Pastori Chris - Rapsodia.fi', + 'upload_date': '20210827', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://tokentube.net/v/3950239124/Linux-Ubuntu-Studio-perus-k%C3%A4ytt%C3%B6', + 'md5': '0e1f00421f501f5eada9890d38fcfb56', + 'info_dict': { + 'id': '3950239124', + 'ext': 'mp4', + 'title': 'Linux Ubuntu Studio perus käyttö', + 'description': 'md5:854ff1dc732ff708976de2880ea32050', + 'uploader': 'jyrilehtonen', + 'upload_date': '20210825', + }, + }, { + 'url': 'https://tokentube.net/view?v=3582463289', + 'info_dict': { + 'id': '3582463289', + 'ext': 'mp4', + 'title': 'Police for Freedom - toiminta aloitetaan Suomessa ❤️??', + 'description': 'md5:cd92e620d7f5fa162e8410d0fc9a08be', + 'uploader': 'Voitontie', + 'upload_date': '20210428', + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex(r'(.+?)', webpage, 'title') + + data_json = self._html_search_regex(r'({["\']html5["\'].+?}}}+)', webpage, 'data json') + data_json = self._parse_json(js_to_json(data_json), video_id, fatal=False) + + sources = data_json.get('sources') or self._parse_json( + self._html_search_regex(r'updateSrc\(([^\)]+)\)', webpage, 'sources'), + video_id, transform_source=js_to_json) + + formats = [{ + 'url': format.get('src'), + 'format_id': format.get('label'), + 'height': format.get('res'), + } for format in sources] + + view_count = parse_count(self._html_search_regex( + r'\s*([\d\.,]+)\s*views?

', + webpage, 'view_count', fatal=False)) + + like_count = parse_count(self._html_search_regex( + r'\s*(\d+)\s*', + webpage, 'like count', fatal=False)) + + dislike_count = parse_count(self._html_search_regex( + r'\s*(\d+)\s*', + webpage, 'dislike count', fatal=False)) + + upload_date = unified_strdate(self._html_search_regex( + r'Published\s*on\s+([^<]+)', + webpage, 'upload date', fatal=False)) + + uploader = self._html_search_regex( + r']+>(.+?)', + webpage, 'uploader', fatal=False) + + description = self._html_search_meta('description', webpage) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'formats': formats, + 'title': title, + 'view_count': view_count, + 'like_count': like_count, + 'dislike_count': dislike_count, + 'upload_date': upload_date, + 'description': description, + 'uploader': uploader, + } + + +class TokentubeChannelIE(InfoExtractor): + _PAGE_SIZE = 20 + IE_NAME = 'Tokentube:channel' + _VALID_URL = r'https?://(?:www\.)?tokentube\.net/channel/(?P\d+)/[^/]+(?:/videos)?' + _TESTS = [{ + 'url': 'https://tokentube.net/channel/3697658904/TokenTube', + 'info_dict': { + 'id': '3697658904', + }, + 'playlist_mincount': 7, + }, { + 'url': 'https://tokentube.net/channel/3353234420/Linux/videos', + 'info_dict': { + 'id': '3353234420', + }, + 'playlist_mincount': 20, + }, { + 'url': 'https://tokentube.net/channel/3475834195/Voitontie', + 'info_dict': { + 'id': '3475834195', + }, + 'playlist_mincount': 150, + }] + + def _fetch_page(self, channel_id, page): + page += 1 + videos_info = self._download_webpage( + f'https://tokentube.net/videos?p=0&m=1&sort=recent&u={channel_id}&page={page}', + channel_id, headers={'X-Requested-With': 'XMLHttpRequest'}, + note=f'Downloading page {page}', fatal=False) + if ' Sorry, no results were found.' not in videos_info: + for path, media_id in re.findall( + r']+\bhref=["\']([^"\']+/[lv]/(\d+)/\S+)["\'][^>]+>', + videos_info): + yield self.url_result(path, ie=TokentubeIE.ie_key(), video_id=media_id) + + def _real_extract(self, url): + channel_id = self._match_id(url) + + entries = OnDemandPagedList(functools.partial( + self._fetch_page, channel_id), self._PAGE_SIZE) + + return self.playlist_result(entries, channel_id) From 908b56eaf7872149706dbd7fa071f838d0c786b7 Mon Sep 17 00:00:00 2001 From: octotherp <89869439+octotherp@users.noreply.github.com> Date: Wed, 1 Sep 2021 16:28:25 +0300 Subject: [PATCH 036/641] [XHamster] Extract `uploader_id` (#844) Authored by: octotherp --- yt_dlp/extractor/xhamster.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/yt_dlp/extractor/xhamster.py b/yt_dlp/extractor/xhamster.py index 7e33c420e4..9d4ed47d41 100644 --- a/yt_dlp/extractor/xhamster.py +++ b/yt_dlp/extractor/xhamster.py @@ -245,6 +245,8 @@ class XHamsterIE(InfoExtractor): else: categories = None + uploader_url = url_or_none(try_get(video, lambda x: x['author']['pageURL'])) + return { 'id': video_id, 'display_id': display_id, @@ -253,6 +255,8 @@ class XHamsterIE(InfoExtractor): 'timestamp': int_or_none(video.get('created')), 'uploader': try_get( video, lambda x: x['author']['name'], compat_str), + 'uploader_url': uploader_url, + 'uploader_id': uploader_url.split('/')[-1] if uploader_url else None, 'thumbnail': video.get('thumbURL'), 'duration': int_or_none(video.get('duration')), 'view_count': int_or_none(video.get('views')), @@ -352,6 +356,7 @@ class XHamsterIE(InfoExtractor): 'description': description, 'upload_date': upload_date, 'uploader': uploader, + 'uploader_id': uploader.lower() if uploader else None, 'thumbnail': thumbnail, 'duration': duration, 'view_count': view_count, From ee57a19d845e01572830535bd2308f4561ddf740 Mon Sep 17 00:00:00 2001 From: nyuszika7h Date: Wed, 1 Sep 2021 17:39:15 +0200 Subject: [PATCH 037/641] [mediaset] Fix extraction for some videos (#850) This was broken by #564 Closes #849 Authored by: nyuszika7h --- yt_dlp/extractor/mediaset.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/mediaset.py b/yt_dlp/extractor/mediaset.py index d8f12dca6b..f4db58e64a 100644 --- a/yt_dlp/extractor/mediaset.py +++ b/yt_dlp/extractor/mediaset.py @@ -58,6 +58,22 @@ class MediasetIE(ThePlatformBaseIE): 'uploader': 'Canale 5', 'uploader_id': 'C5', }, + }, { + 'url': 'https://www.mediasetplay.mediaset.it/video/cameracafe5/episodio-69-pezzo-di-luna_F303843101017801', + 'md5': 'd1650ac9ff944f185556126a736df148', + 'info_dict': { + 'id': 'F303843101017801', + 'ext': 'mp4', + 'title': 'Episodio 69 - Pezzo di luna', + 'description': '', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 263.008, + 'upload_date': '20200902', + 'series': 'Camera Café 5', + 'timestamp': 1599064700, + 'uploader': 'Italia 1', + 'uploader_id': 'I1', + }, }, { # clip 'url': 'https://www.mediasetplay.mediaset.it/video/gogglebox/un-grande-classico-della-commedia-sexy_FAFU000000661680', @@ -132,7 +148,7 @@ class MediasetIE(ThePlatformBaseIE): formats = [] subtitles = {} first_e = None - asset_type = 'HD,browser,geoIT|SD,browser,geoIT|geoNo:HD,browser,geoIT|geoNo:SD,browser,geoIT|geoNo' + asset_type = 'HD,browser,geoIT|SD,browser,geoIT|geoNo:HD,browser,geoIT|geoNo:SD,browser,geoIT|geoNo|HD|SD' # TODO: fixup ISM+none manifest URLs for f in ('MPEG4', 'MPEG-DASH+none', 'M3U+none'): try: From 49ca8db06bf712ff8ce262039e0c154520ecb874 Mon Sep 17 00:00:00 2001 From: nyuszika7h Date: Wed, 1 Sep 2021 20:53:19 +0200 Subject: [PATCH 038/641] [mediaset] Fix extraction for more videos (#852) Closes #851 Authored by: nyuszika7h --- yt_dlp/extractor/mediaset.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/mediaset.py b/yt_dlp/extractor/mediaset.py index f4db58e64a..26e7abc493 100644 --- a/yt_dlp/extractor/mediaset.py +++ b/yt_dlp/extractor/mediaset.py @@ -44,7 +44,7 @@ class MediasetIE(ThePlatformBaseIE): }, }, { 'url': 'https://www.mediasetplay.mediaset.it/video/matrix/puntata-del-25-maggio_F309013801000501', - 'md5': '288532f0ad18307705b01e581304cd7b', + 'md5': '1276f966ac423d16ba255ce867de073e', 'info_dict': { 'id': 'F309013801000501', 'ext': 'mp4', @@ -74,6 +74,22 @@ class MediasetIE(ThePlatformBaseIE): 'uploader': 'Italia 1', 'uploader_id': 'I1', }, + }, { + 'url': 'https://www.mediasetplay.mediaset.it/video/cameracafe5/episodio-51-tu-chi-sei_F303843107000601', + 'md5': '567e9ad375b7a27a0e370650f572a1e3', + 'info_dict': { + 'id': 'F303843107000601', + 'ext': 'mp4', + 'title': 'Episodio 51 - Tu chi sei?', + 'description': '', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 367.021, + 'upload_date': '20200902', + 'series': 'Camera Café 5', + 'timestamp': 1599069817, + 'uploader': 'Italia 1', + 'uploader_id': 'I1', + }, }, { # clip 'url': 'https://www.mediasetplay.mediaset.it/video/gogglebox/un-grande-classico-della-commedia-sexy_FAFU000000661680', @@ -148,7 +164,7 @@ class MediasetIE(ThePlatformBaseIE): formats = [] subtitles = {} first_e = None - asset_type = 'HD,browser,geoIT|SD,browser,geoIT|geoNo:HD,browser,geoIT|geoNo:SD,browser,geoIT|geoNo|HD|SD' + asset_type = 'geoNo:HD,browser,geoIT|geoNo:HD,geoIT|geoNo:SD,browser,geoIT|geoNo:SD,geoIT|geoNo|HD|SD' # TODO: fixup ISM+none manifest URLs for f in ('MPEG4', 'MPEG-DASH+none', 'M3U+none'): try: From f0e53663350a96eacb9fe273607ae564af57a329 Mon Sep 17 00:00:00 2001 From: ouwou <26526779+ouwou@users.noreply.github.com> Date: Wed, 1 Sep 2021 18:54:31 +0000 Subject: [PATCH 039/641] [reddit] Fix for quarantined subreddits (#848) Authored by: ouwou --- yt_dlp/extractor/reddit.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/yt_dlp/extractor/reddit.py b/yt_dlp/extractor/reddit.py index 2a1b950bd7..638f2b6a84 100644 --- a/yt_dlp/extractor/reddit.py +++ b/yt_dlp/extractor/reddit.py @@ -102,6 +102,8 @@ class RedditRIE(InfoExtractor): video_id = self._match_id(url) + self._set_cookie('reddit.com', '_options', '%7B%22pref_quarantine_optin%22%3A%20true%7D') + data = self._download_json( url + '/.json', video_id)[0]['data']['children'][0]['data'] From 7a340e0df352bf97da7a7fd238f7d705afbd9c6a Mon Sep 17 00:00:00 2001 From: Nil Admirari <50202386+nihil-admirari@users.noreply.github.com> Date: Wed, 1 Sep 2021 20:55:16 +0000 Subject: [PATCH 040/641] Native SponsorBlock implementation and related improvements (#360) SponsorBlock options: * The fetched sponsor sections are written to infojson * `--sponsorblock-remove` removes specified chapters from file * `--sponsorblock-mark` marks the specified sponsor sections as chapters * `--sponsorblock-chapter-title` to specify sponsor chapter template * `--sponsorblock-api` to use a different API Related improvements: * Split `--embed-chapters` from `--embed-metadata` * Add `--remove-chapters` to remove arbitrary chapters * Add `--force-keyframes-at-cuts` for more accurate cuts when removing and splitting chapters Deprecates all `--sponskrub` options Authored by: nihil-admirari, pukkandan --- README.md | 124 +++++-- test/test_postprocessors.py | 460 ++++++++++++++++++++++++ yt_dlp/__init__.py | 84 ++++- yt_dlp/options.py | 113 ++++-- yt_dlp/postprocessor/__init__.py | 4 + yt_dlp/postprocessor/ffmpeg.py | 174 ++++++--- yt_dlp/postprocessor/modify_chapters.py | 333 +++++++++++++++++ yt_dlp/postprocessor/sponskrub.py | 1 + yt_dlp/postprocessor/sponsorblock.py | 96 +++++ 9 files changed, 1260 insertions(+), 129 deletions(-) create mode 100644 yt_dlp/postprocessor/modify_chapters.py create mode 100644 yt_dlp/postprocessor/sponsorblock.py diff --git a/README.md b/README.md index a9720bfb9f..45b5541cc9 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t * [Subtitle Options](#subtitle-options) * [Authentication Options](#authentication-options) * [Post-processing Options](#post-processing-options) - * [SponSkrub (SponsorBlock) Options](#sponskrub-sponsorblock-options) + * [SponsorBlock Options](#sponsorblock-options) * [Extractor Options](#extractor-options) * [CONFIGURATION](#configuration) * [Authentication with .netrc file](#authentication-with-netrc-file) @@ -62,7 +62,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t # NEW FEATURES The major new features from the latest release of [blackjack4494/yt-dlc](https://github.com/blackjack4494/yt-dlc) are: -* **[SponSkrub Integration](#sponskrub-sponsorblock-options)**: You can use [SponSkrub](https://github.com/yt-dlp/SponSkrub) to mark/remove sponsor sections in youtube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API +* **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in youtube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API * **[Format Sorting](#sorting-formats)**: The default format sorting options have been changed so that higher resolution and better codecs will be now preferred instead of simply using larger bitrate. Furthermore, you can now specify the sort order using `-S`. This allows for much easier format selection than what is possible by simply using `--format` ([examples](#format-selection-examples)) @@ -194,7 +194,6 @@ On windows, [Microsoft Visual C++ 2010 SP1 Redistributable Package (x86)](https: While all the other dependancies are optional, `ffmpeg` and `ffprobe` are highly recommended * [**ffmpeg** and **ffprobe**](https://www.ffmpeg.org) - Required for [merging seperate video and audio files](#format-selection) as well as for various [post-processing](#post-processing-options) tasks. Licence [depends on the build](https://www.ffmpeg.org/legal.html) -* [**sponskrub**](https://github.com/faissaloo/SponSkrub) - For using the [sponskrub options](#sponskrub-sponsorblock-options). Licenced under [GPLv3+](https://github.com/faissaloo/SponSkrub/blob/master/LICENCE.md) * [**mutagen**](https://github.com/quodlibet/mutagen) - For embedding thumbnail in certain formats. Licenced under [GPLv2+](https://github.com/quodlibet/mutagen/blob/master/COPYING) * [**pycryptodome**](https://github.com/Legrandin/pycryptodome) - For decrypting various data. Licenced under [BSD2](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst) * [**websockets**](https://github.com/aaugustin/websockets) - For downloading over websocket. Licenced under [BSD3](https://github.com/aaugustin/websockets/blob/main/LICENSE) @@ -203,6 +202,7 @@ While all the other dependancies are optional, `ffmpeg` and `ffprobe` are highly * [**rtmpdump**](http://rtmpdump.mplayerhq.hu) - For downloading `rtmp` streams. ffmpeg will be used as a fallback. Licenced under [GPLv2+](http://rtmpdump.mplayerhq.hu) * [**mplayer**](http://mplayerhq.hu/design7/info.html) or [**mpv**](https://mpv.io) - For downloading `rstp` streams. ffmpeg will be used as a fallback. Licenced under [GPLv2+](https://github.com/mpv-player/mpv/blob/master/Copyright) * [**phantomjs**](https://github.com/ariya/phantomjs) - Used in extractors where javascript needs to be run. Licenced under [BSD3](https://github.com/ariya/phantomjs/blob/master/LICENSE.BSD) +* [**sponskrub**](https://github.com/faissaloo/SponSkrub) - For using the now **deprecated** [sponskrub options](#sponskrub-options). Licenced under [GPLv3+](https://github.com/faissaloo/SponSkrub/blob/master/LICENCE.md) * Any external downloader that you want to use with `--downloader` To use or redistribute the dependencies, you must agree to their respective licensing terms. @@ -744,24 +744,23 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t and the arguments separated by a colon ":" to give the argument to the specified postprocessor/executable. Supported PP are: - Merger, ExtractAudio, SplitChapters, + Merger, ModifyChapters, SplitChapters, + ExtractAudio, VideoRemuxer, VideoConvertor, Metadata, EmbedSubtitle, EmbedThumbnail, SubtitlesConvertor, ThumbnailsConvertor, - VideoRemuxer, VideoConvertor, SponSkrub, FixupStretched, FixupM4a, FixupM3u8, FixupTimestamp and FixupDuration. The supported executables are: AtomicParsley, - FFmpeg, FFprobe, and SponSkrub. You can - also specify "PP+EXE:ARGS" to give the - arguments to the specified executable only - when being used by the specified - postprocessor. Additionally, for - ffmpeg/ffprobe, "_i"/"_o" can be appended - to the prefix optionally followed by a - number to pass the argument before the - specified input/output file. Eg: --ppa - "Merger+ffmpeg_i1:-v quiet". You can use - this option multiple times to give + FFmpeg and FFprobe.You can also specify + "PP+EXE:ARGS" to give the arguments to the + specified executable only when being used + by the specified postprocessor. + Additionally, for ffmpeg/ffprobe, "_i"/"_o" + can be appended to the prefix optionally + followed by a number to pass the argument + before the specified input/output file. Eg: + --ppa "Merger+ffmpeg_i1:-v quiet". You can + use this option multiple times to give different arguments to different postprocessors. (Alias: --ppa) -k, --keep-video Keep the intermediate video file on disk @@ -775,11 +774,15 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t --no-embed-subs Do not embed subtitles (default) --embed-thumbnail Embed thumbnail in the video as cover art --no-embed-thumbnail Do not embed thumbnail (default) - --embed-metadata Embed metadata including chapter markers - (if supported by the format) to the video - file (Alias: --add-metadata) - --no-embed-metadata Do not write metadata (default) + --embed-metadata Embed metadata to the video file. Also adds + chapters to file unless --no-add-chapters + is used (Alias: --add-metadata) + --no-embed-metadata Do not add metadata to file (default) (Alias: --no-add-metadata) + --embed-chapters Add chapter markers to the video file + (Alias: --add-chapters) + --no-embed-chapters Do not add chapter markers (default) + (Alias: --no-add-chapters) --parse-metadata FROM:TO Parse additional metadata like title/artist from other fields; see "MODIFYING METADATA" for details @@ -827,27 +830,51 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t files. See "OUTPUT TEMPLATE" for details --no-split-chapters Do not split video based on chapters (default) + --remove-chapters REGEX Remove chapters whose title matches the + given regular expression. This option can + be used multiple times + --no-remove-chapters Do not remove any normal chapters from the + file (default) + --force-keyframes-at-cuts Force keyframes around the chapters before + removing/splitting them. Requires a + reencode and thus is very slow, but the + resulting video may have fewer artifacts + around the cuts + --no-force-keyframes-at-cuts Do not force keyframes around the chapters + when cutting/splitting (default) -## SponSkrub (SponsorBlock) Options: -[SponSkrub](https://github.com/yt-dlp/SponSkrub) is a utility to - mark/remove sponsor segments from downloaded YouTube videos using +## SponsorBlock Options: +Make chapter entries for, or remove various segments (sponsor, + introductions, etc.) from downloaded YouTube videos using the [SponsorBlock API](https://sponsor.ajay.app) - --sponskrub Use sponskrub to mark sponsored sections. - This is enabled by default if the sponskrub - binary exists (Youtube only) - --no-sponskrub Do not use sponskrub - --sponskrub-cut Cut out the sponsor sections instead of - simply marking them - --no-sponskrub-cut Simply mark the sponsor sections, not cut - them out (default) - --sponskrub-force Run sponskrub even if the video was already - downloaded - --no-sponskrub-force Do not cut out the sponsor sections if the - video was already downloaded (default) - --sponskrub-location PATH Location of the sponskrub binary; either - the path to the binary or its containing - directory + --sponsorblock-mark CATS SponsorBlock categories to create chapters + for, separated by commas. Available + categories are all, sponsor, intro, outro, + selfpromo, interaction, preview, + music_offtopic. You can prefix the category + with a "-" to exempt it. See + https://wiki.sponsor.ajay.app/index.php/Segment_Categories + for description of the categories. Eg: + --sponsorblock-query all,-preview + --sponsorblock-remove CATS SponsorBlock categories to be removed from + the video file, separated by commas. If a + category is present in both mark and + remove, remove takes precedence. The syntax + and available categories are the same as + for --sponsorblock-mark + --sponsorblock-chapter-title TEMPLATE + The title template for SponsorBlock + chapters created by --sponsorblock-mark. + The same syntax as the output template is + used, but the only available fields are + start_time, end_time, category, categories, + name, category_names. Defaults to + "[SponsorBlock]: %(category_names)l" + --no-sponsorblock Disable both --sponsorblock-mark and + --sponsorblock-remove + --sponsorblock-api URL SponsorBlock API location, defaults to + https://sponsor.ajay.app ## Extractor Options: --extractor-retries RETRIES Number of retries for known extractor @@ -1057,6 +1084,15 @@ Available only when used in `--print`: - `urls` (string): The URLs of all requested formats, one in each line - `filename` (string): Name of the video file. Note that the actual filename may be different due to post-processing. Use `--exec echo` to get the name after all postprocessing is complete + +Available only in `--sponsorblock-chapter-title`: + + - `start_time` (numeric): Start time of the chapter in seconds + - `end_time` (numeric): End time of the chapter in seconds + - `categories` (list): The SponsorBlock categories the chapter belongs to + - `category` (string): The smallest SponsorBlock category the chapter belongs to + - `category_names` (list): Friendly names of the categories + - `name` (string): Friendly name of the smallest category Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default). @@ -1501,6 +1537,18 @@ These are aliases that are no longer documented for various reasons --write-srt --write-subs --yes-overwrites --force-overwrites +#### Sponskrub Options +Support for [SponSkrub](https://github.com/faissaloo/SponSkrub) has been deprecated in favor of `--sponsorblock` + + --sponskrub --sponsorblock-mark all + --no-sponskrub --no-sponsorblock + --sponskrub-cut --sponsorblock-remove all + --no-sponskrub-cut --sponsorblock-remove -all + --sponskrub-force Not applicable + --no-sponskrub-force Not applicable + --sponskrub-location Not applicable + --sponskrub-args Not applicable + #### No longer supported These options may no longer work as intended diff --git a/test/test_postprocessors.py b/test/test_postprocessors.py index b15cbd28c8..7d13687696 100644 --- a/test/test_postprocessors.py +++ b/test/test_postprocessors.py @@ -6,6 +6,7 @@ from __future__ import unicode_literals import os import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from yt_dlp import YoutubeDL @@ -15,6 +16,7 @@ from yt_dlp.postprocessor import ( FFmpegThumbnailsConvertorPP, MetadataFromFieldPP, MetadataParserPP, + ModifyChaptersPP ) @@ -68,3 +70,461 @@ class TestExec(unittest.TestCase): self.assertEqual(pp.parse_cmd('echo', info), cmd) self.assertEqual(pp.parse_cmd('echo {}', info), cmd) self.assertEqual(pp.parse_cmd('echo %(filepath)q', info), cmd) + + +class TestModifyChaptersPP(unittest.TestCase): + def setUp(self): + self._pp = ModifyChaptersPP(YoutubeDL()) + + @staticmethod + def _sponsor_chapter(start, end, cat, remove=False): + c = {'start_time': start, 'end_time': end, '_categories': [(cat, start, end)]} + if remove: + c['remove'] = True + return c + + @staticmethod + def _chapter(start, end, title=None, remove=False): + c = {'start_time': start, 'end_time': end} + if title is not None: + c['title'] = title + if remove: + c['remove'] = True + return c + + def _chapters(self, ends, titles): + self.assertEqual(len(ends), len(titles)) + start = 0 + chapters = [] + for e, t in zip(ends, titles): + chapters.append(self._chapter(start, e, t)) + start = e + return chapters + + def _remove_marked_arrange_sponsors_test_impl( + self, chapters, expected_chapters, expected_removed): + actual_chapters, actual_removed = ( + self._pp._remove_marked_arrange_sponsors(chapters)) + for c in actual_removed: + c.pop('title', None) + c.pop('_categories', None) + actual_chapters = [{ + 'start_time': c['start_time'], + 'end_time': c['end_time'], + 'title': c['title'], + } for c in actual_chapters] + self.assertSequenceEqual(expected_chapters, actual_chapters) + self.assertSequenceEqual(expected_removed, actual_removed) + + def test_remove_marked_arrange_sponsors_CanGetThroughUnaltered(self): + chapters = self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']) + self._remove_marked_arrange_sponsors_test_impl(chapters, chapters, []) + + def test_remove_marked_arrange_sponsors_ChapterWithSponsors(self): + chapters = self._chapters([70], ['c']) + [ + self._sponsor_chapter(10, 20, 'sponsor'), + self._sponsor_chapter(30, 40, 'preview'), + self._sponsor_chapter(50, 60, 'sponsor')] + expected = self._chapters( + [10, 20, 30, 40, 50, 60, 70], + ['c', '[SponsorBlock]: Sponsor', 'c', '[SponsorBlock]: Preview/Recap', + 'c', '[SponsorBlock]: Sponsor', 'c']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_UniqueNamesForOverlappingSponsors(self): + chapters = self._chapters([120], ['c']) + [ + self._sponsor_chapter(10, 45, 'sponsor'), self._sponsor_chapter(20, 40, 'selfpromo'), + self._sponsor_chapter(50, 70, 'sponsor'), self._sponsor_chapter(60, 85, 'selfpromo'), + self._sponsor_chapter(90, 120, 'selfpromo'), self._sponsor_chapter(100, 110, 'sponsor')] + expected = self._chapters( + [10, 20, 40, 45, 50, 60, 70, 85, 90, 100, 110, 120], + ['c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Sponsor, Unpaid/Self Promotion', + '[SponsorBlock]: Sponsor', + 'c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Sponsor, Unpaid/Self Promotion', + '[SponsorBlock]: Unpaid/Self Promotion', + 'c', '[SponsorBlock]: Unpaid/Self Promotion', '[SponsorBlock]: Unpaid/Self Promotion, Sponsor', + '[SponsorBlock]: Unpaid/Self Promotion']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_ChapterWithCuts(self): + cuts = [self._chapter(10, 20, remove=True), + self._sponsor_chapter(30, 40, 'sponsor', remove=True), + self._chapter(50, 60, remove=True)] + chapters = self._chapters([70], ['c']) + cuts + self._remove_marked_arrange_sponsors_test_impl( + chapters, self._chapters([40], ['c']), cuts) + + def test_remove_marked_arrange_sponsors_ChapterWithSponsorsAndCuts(self): + chapters = self._chapters([70], ['c']) + [ + self._sponsor_chapter(10, 20, 'sponsor'), + self._sponsor_chapter(30, 40, 'selfpromo', remove=True), + self._sponsor_chapter(50, 60, 'interaction')] + expected = self._chapters([10, 20, 40, 50, 60], + ['c', '[SponsorBlock]: Sponsor', 'c', + '[SponsorBlock]: Interaction Reminder', 'c']) + self._remove_marked_arrange_sponsors_test_impl( + chapters, expected, [self._chapter(30, 40, remove=True)]) + + def test_remove_marked_arrange_sponsors_ChapterWithSponsorCutInTheMiddle(self): + cuts = [self._sponsor_chapter(20, 30, 'selfpromo', remove=True), + self._chapter(40, 50, remove=True)] + chapters = self._chapters([70], ['c']) + [self._sponsor_chapter(10, 60, 'sponsor')] + cuts + expected = self._chapters( + [10, 40, 50], ['c', '[SponsorBlock]: Sponsor', 'c']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) + + def test_remove_marked_arrange_sponsors_ChapterWithCutHidingSponsor(self): + cuts = [self._sponsor_chapter(20, 50, 'selpromo', remove=True)] + chapters = self._chapters([60], ['c']) + [ + self._sponsor_chapter(10, 20, 'intro'), + self._sponsor_chapter(30, 40, 'sponsor'), + self._sponsor_chapter(50, 60, 'outro'), + ] + cuts + expected = self._chapters( + [10, 20, 30], ['c', '[SponsorBlock]: Intermission/Intro Animation', '[SponsorBlock]: Endcards/Credits']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) + + def test_remove_marked_arrange_sponsors_ChapterWithAdjacentSponsors(self): + chapters = self._chapters([70], ['c']) + [ + self._sponsor_chapter(10, 20, 'sponsor'), + self._sponsor_chapter(20, 30, 'selfpromo'), + self._sponsor_chapter(30, 40, 'interaction')] + expected = self._chapters( + [10, 20, 30, 40, 70], + ['c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Unpaid/Self Promotion', + '[SponsorBlock]: Interaction Reminder', 'c']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_ChapterWithAdjacentCuts(self): + chapters = self._chapters([70], ['c']) + [ + self._sponsor_chapter(10, 20, 'sponsor'), + self._sponsor_chapter(20, 30, 'interaction', remove=True), + self._chapter(30, 40, remove=True), + self._sponsor_chapter(40, 50, 'selpromo', remove=True), + self._sponsor_chapter(50, 60, 'interaction')] + expected = self._chapters([10, 20, 30, 40], + ['c', '[SponsorBlock]: Sponsor', + '[SponsorBlock]: Interaction Reminder', 'c']) + self._remove_marked_arrange_sponsors_test_impl( + chapters, expected, [self._chapter(20, 50, remove=True)]) + + def test_remove_marked_arrange_sponsors_ChapterWithOverlappingSponsors(self): + chapters = self._chapters([70], ['c']) + [ + self._sponsor_chapter(10, 30, 'sponsor'), + self._sponsor_chapter(20, 50, 'selfpromo'), + self._sponsor_chapter(40, 60, 'interaction')] + expected = self._chapters( + [10, 20, 30, 40, 50, 60, 70], + ['c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Sponsor, Unpaid/Self Promotion', + '[SponsorBlock]: Unpaid/Self Promotion', '[SponsorBlock]: Unpaid/Self Promotion, Interaction Reminder', + '[SponsorBlock]: Interaction Reminder', 'c']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_ChapterWithOverlappingCuts(self): + chapters = self._chapters([70], ['c']) + [ + self._sponsor_chapter(10, 30, 'sponsor', remove=True), + self._sponsor_chapter(20, 50, 'selfpromo', remove=True), + self._sponsor_chapter(40, 60, 'interaction', remove=True)] + self._remove_marked_arrange_sponsors_test_impl( + chapters, self._chapters([20], ['c']), [self._chapter(10, 60, remove=True)]) + + def test_remove_marked_arrange_sponsors_ChapterWithRunsOfOverlappingSponsors(self): + chapters = self._chapters([170], ['c']) + [ + self._sponsor_chapter(0, 30, 'intro'), + self._sponsor_chapter(20, 50, 'sponsor'), + self._sponsor_chapter(40, 60, 'selfpromo'), + self._sponsor_chapter(70, 90, 'sponsor'), + self._sponsor_chapter(80, 100, 'sponsor'), + self._sponsor_chapter(90, 110, 'sponsor'), + self._sponsor_chapter(120, 140, 'selfpromo'), + self._sponsor_chapter(130, 160, 'interaction'), + self._sponsor_chapter(150, 170, 'outro')] + expected = self._chapters( + [20, 30, 40, 50, 60, 70, 110, 120, 130, 140, 150, 160, 170], + ['[SponsorBlock]: Intermission/Intro Animation', '[SponsorBlock]: Intermission/Intro Animation, Sponsor', '[SponsorBlock]: Sponsor', + '[SponsorBlock]: Sponsor, Unpaid/Self Promotion', '[SponsorBlock]: Unpaid/Self Promotion', 'c', + '[SponsorBlock]: Sponsor', 'c', '[SponsorBlock]: Unpaid/Self Promotion', + '[SponsorBlock]: Unpaid/Self Promotion, Interaction Reminder', + '[SponsorBlock]: Interaction Reminder', + '[SponsorBlock]: Interaction Reminder, Endcards/Credits', '[SponsorBlock]: Endcards/Credits']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_ChapterWithRunsOfOverlappingCuts(self): + chapters = self._chapters([170], ['c']) + [ + self._chapter(0, 30, remove=True), + self._sponsor_chapter(20, 50, 'sponsor', remove=True), + self._chapter(40, 60, remove=True), + self._sponsor_chapter(70, 90, 'sponsor', remove=True), + self._chapter(80, 100, remove=True), + self._chapter(90, 110, remove=True), + self._sponsor_chapter(120, 140, 'sponsor', remove=True), + self._sponsor_chapter(130, 160, 'selfpromo', remove=True), + self._chapter(150, 170, remove=True)] + expected_cuts = [self._chapter(0, 60, remove=True), + self._chapter(70, 110, remove=True), + self._chapter(120, 170, remove=True)] + self._remove_marked_arrange_sponsors_test_impl( + chapters, self._chapters([20], ['c']), expected_cuts) + + def test_remove_marked_arrange_sponsors_OverlappingSponsorsDifferentTitlesAfterCut(self): + chapters = self._chapters([60], ['c']) + [ + self._sponsor_chapter(10, 60, 'sponsor'), + self._sponsor_chapter(10, 40, 'intro'), + self._sponsor_chapter(30, 50, 'interaction'), + self._sponsor_chapter(30, 50, 'selfpromo', remove=True), + self._sponsor_chapter(40, 50, 'interaction'), + self._sponsor_chapter(50, 60, 'outro')] + expected = self._chapters( + [10, 30, 40], ['c', '[SponsorBlock]: Sponsor, Intermission/Intro Animation', '[SponsorBlock]: Sponsor, Endcards/Credits']) + self._remove_marked_arrange_sponsors_test_impl( + chapters, expected, [self._chapter(30, 50, remove=True)]) + + def test_remove_marked_arrange_sponsors_SponsorsNoLongerOverlapAfterCut(self): + chapters = self._chapters([70], ['c']) + [ + self._sponsor_chapter(10, 30, 'sponsor'), + self._sponsor_chapter(20, 50, 'interaction'), + self._sponsor_chapter(30, 50, 'selpromo', remove=True), + self._sponsor_chapter(40, 60, 'sponsor'), + self._sponsor_chapter(50, 60, 'interaction')] + expected = self._chapters( + [10, 20, 40, 50], ['c', '[SponsorBlock]: Sponsor', + '[SponsorBlock]: Sponsor, Interaction Reminder', 'c']) + self._remove_marked_arrange_sponsors_test_impl( + chapters, expected, [self._chapter(30, 50, remove=True)]) + + def test_remove_marked_arrange_sponsors_SponsorsStillOverlapAfterCut(self): + chapters = self._chapters([70], ['c']) + [ + self._sponsor_chapter(10, 60, 'sponsor'), + self._sponsor_chapter(20, 60, 'interaction'), + self._sponsor_chapter(30, 50, 'selfpromo', remove=True)] + expected = self._chapters( + [10, 20, 40, 50], ['c', '[SponsorBlock]: Sponsor', + '[SponsorBlock]: Sponsor, Interaction Reminder', 'c']) + self._remove_marked_arrange_sponsors_test_impl( + chapters, expected, [self._chapter(30, 50, remove=True)]) + + def test_remove_marked_arrange_sponsors_ChapterWithRunsOfOverlappingSponsorsAndCuts(self): + chapters = self._chapters([200], ['c']) + [ + self._sponsor_chapter(10, 40, 'sponsor'), + self._sponsor_chapter(10, 30, 'intro'), + self._chapter(20, 30, remove=True), + self._sponsor_chapter(30, 40, 'selfpromo'), + self._sponsor_chapter(50, 70, 'sponsor'), + self._sponsor_chapter(60, 80, 'interaction'), + self._chapter(70, 80, remove=True), + self._sponsor_chapter(70, 90, 'sponsor'), + self._sponsor_chapter(80, 100, 'interaction'), + self._sponsor_chapter(120, 170, 'selfpromo'), + self._sponsor_chapter(130, 180, 'outro'), + self._chapter(140, 150, remove=True), + self._chapter(150, 160, remove=True)] + expected = self._chapters( + [10, 20, 30, 40, 50, 70, 80, 100, 110, 130, 140, 160], + ['c', '[SponsorBlock]: Sponsor, Intermission/Intro Animation', '[SponsorBlock]: Sponsor, Unpaid/Self Promotion', + 'c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Sponsor, Interaction Reminder', + '[SponsorBlock]: Interaction Reminder', 'c', '[SponsorBlock]: Unpaid/Self Promotion', + '[SponsorBlock]: Unpaid/Self Promotion, Endcards/Credits', '[SponsorBlock]: Endcards/Credits', 'c']) + expected_cuts = [self._chapter(20, 30, remove=True), + self._chapter(70, 80, remove=True), + self._chapter(140, 160, remove=True)] + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, expected_cuts) + + def test_remove_marked_arrange_sponsors_SponsorOverlapsMultipleChapters(self): + chapters = (self._chapters([20, 40, 60, 80, 100], ['c1', 'c2', 'c3', 'c4', 'c5']) + + [self._sponsor_chapter(10, 90, 'sponsor')]) + expected = self._chapters([10, 90, 100], ['c1', '[SponsorBlock]: Sponsor', 'c5']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_CutOverlapsMultipleChapters(self): + cuts = [self._chapter(10, 90, remove=True)] + chapters = self._chapters([20, 40, 60, 80, 100], ['c1', 'c2', 'c3', 'c4', 'c5']) + cuts + expected = self._chapters([10, 20], ['c1', 'c5']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) + + def test_remove_marked_arrange_sponsors_SponsorsWithinSomeChaptersAndOverlappingOthers(self): + chapters = (self._chapters([10, 40, 60, 80], ['c1', 'c2', 'c3', 'c4']) + + [self._sponsor_chapter(20, 30, 'sponsor'), + self._sponsor_chapter(50, 70, 'selfpromo')]) + expected = self._chapters([10, 20, 30, 40, 50, 70, 80], + ['c1', 'c2', '[SponsorBlock]: Sponsor', 'c2', 'c3', + '[SponsorBlock]: Unpaid/Self Promotion', 'c4']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_CutsWithinSomeChaptersAndOverlappingOthers(self): + cuts = [self._chapter(20, 30, remove=True), self._chapter(50, 70, remove=True)] + chapters = self._chapters([10, 40, 60, 80], ['c1', 'c2', 'c3', 'c4']) + cuts + expected = self._chapters([10, 30, 40, 50], ['c1', 'c2', 'c3', 'c4']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) + + def test_remove_marked_arrange_sponsors_ChaptersAfterLastSponsor(self): + chapters = (self._chapters([20, 40, 50, 60], ['c1', 'c2', 'c3', 'c4']) + + [self._sponsor_chapter(10, 30, 'music_offtopic')]) + expected = self._chapters( + [10, 30, 40, 50, 60], + ['c1', '[SponsorBlock]: Non-Music Section', 'c2', 'c3', 'c4']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_ChaptersAfterLastCut(self): + cuts = [self._chapter(10, 30, remove=True)] + chapters = self._chapters([20, 40, 50, 60], ['c1', 'c2', 'c3', 'c4']) + cuts + expected = self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) + + def test_remove_marked_arrange_sponsors_SponsorStartsAtChapterStart(self): + chapters = (self._chapters([10, 20, 40], ['c1', 'c2', 'c3']) + + [self._sponsor_chapter(20, 30, 'sponsor')]) + expected = self._chapters([10, 20, 30, 40], ['c1', 'c2', '[SponsorBlock]: Sponsor', 'c3']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_CutStartsAtChapterStart(self): + cuts = [self._chapter(20, 30, remove=True)] + chapters = self._chapters([10, 20, 40], ['c1', 'c2', 'c3']) + cuts + expected = self._chapters([10, 20, 30], ['c1', 'c2', 'c3']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) + + def test_remove_marked_arrange_sponsors_SponsorEndsAtChapterEnd(self): + chapters = (self._chapters([10, 30, 40], ['c1', 'c2', 'c3']) + + [self._sponsor_chapter(20, 30, 'sponsor')]) + expected = self._chapters([10, 20, 30, 40], ['c1', 'c2', '[SponsorBlock]: Sponsor', 'c3']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_CutEndsAtChapterEnd(self): + cuts = [self._chapter(20, 30, remove=True)] + chapters = self._chapters([10, 30, 40], ['c1', 'c2', 'c3']) + cuts + expected = self._chapters([10, 20, 30], ['c1', 'c2', 'c3']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) + + def test_remove_marked_arrange_sponsors_SponsorCoincidesWithChapters(self): + chapters = (self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']) + + [self._sponsor_chapter(10, 30, 'sponsor')]) + expected = self._chapters([10, 30, 40], ['c1', '[SponsorBlock]: Sponsor', 'c4']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_CutCoincidesWithChapters(self): + cuts = [self._chapter(10, 30, remove=True)] + chapters = self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']) + cuts + expected = self._chapters([10, 20], ['c1', 'c4']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) + + def test_remove_marked_arrange_sponsors_SponsorsAtVideoBoundaries(self): + chapters = (self._chapters([20, 40, 60], ['c1', 'c2', 'c3']) + + [self._sponsor_chapter(0, 10, 'intro'), self._sponsor_chapter(50, 60, 'outro')]) + expected = self._chapters( + [10, 20, 40, 50, 60], ['[SponsorBlock]: Intermission/Intro Animation', 'c1', 'c2', 'c3', '[SponsorBlock]: Endcards/Credits']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_CutsAtVideoBoundaries(self): + cuts = [self._chapter(0, 10, remove=True), self._chapter(50, 60, remove=True)] + chapters = self._chapters([20, 40, 60], ['c1', 'c2', 'c3']) + cuts + expected = self._chapters([10, 30, 40], ['c1', 'c2', 'c3']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) + + def test_remove_marked_arrange_sponsors_SponsorsOverlapChaptersAtVideoBoundaries(self): + chapters = (self._chapters([10, 40, 50], ['c1', 'c2', 'c3']) + + [self._sponsor_chapter(0, 20, 'intro'), self._sponsor_chapter(30, 50, 'outro')]) + expected = self._chapters( + [20, 30, 50], ['[SponsorBlock]: Intermission/Intro Animation', 'c2', '[SponsorBlock]: Endcards/Credits']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_CutsOverlapChaptersAtVideoBoundaries(self): + cuts = [self._chapter(0, 20, remove=True), self._chapter(30, 50, remove=True)] + chapters = self._chapters([10, 40, 50], ['c1', 'c2', 'c3']) + cuts + expected = self._chapters([10], ['c2']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) + + def test_remove_marked_arrange_sponsors_EverythingSponsored(self): + chapters = (self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']) + + [self._sponsor_chapter(0, 20, 'intro'), self._sponsor_chapter(20, 40, 'outro')]) + expected = self._chapters([20, 40], ['[SponsorBlock]: Intermission/Intro Animation', '[SponsorBlock]: Endcards/Credits']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_EverythingCut(self): + cuts = [self._chapter(0, 20, remove=True), self._chapter(20, 40, remove=True)] + chapters = self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']) + cuts + self._remove_marked_arrange_sponsors_test_impl( + chapters, [], [self._chapter(0, 40, remove=True)]) + + def test_remove_marked_arrange_sponsors_TinyChaptersInTheOriginalArePreserved(self): + chapters = self._chapters([0.1, 0.2, 0.3, 0.4], ['c1', 'c2', 'c3', 'c4']) + self._remove_marked_arrange_sponsors_test_impl(chapters, chapters, []) + + def test_remove_marked_arrange_sponsors_TinySponsorsAreIgnored(self): + chapters = [self._sponsor_chapter(0, 0.1, 'intro'), self._chapter(0.1, 0.2, 'c1'), + self._sponsor_chapter(0.2, 0.3, 'sponsor'), self._chapter(0.3, 0.4, 'c2'), + self._sponsor_chapter(0.4, 0.5, 'outro')] + self._remove_marked_arrange_sponsors_test_impl( + chapters, self._chapters([0.3, 0.5], ['c1', 'c2']), []) + + def test_remove_marked_arrange_sponsors_TinyChaptersResultingFromCutsAreIgnored(self): + cuts = [self._chapter(1.5, 2.5, remove=True)] + chapters = self._chapters([2, 3, 3.5], ['c1', 'c2', 'c3']) + cuts + self._remove_marked_arrange_sponsors_test_impl( + chapters, self._chapters([2, 2.5], ['c1', 'c3']), cuts) + + def test_remove_marked_arrange_sponsors_TinyChaptersResultingFromSponsorOverlapAreIgnored(self): + chapters = self._chapters([1, 3, 4], ['c1', 'c2', 'c3']) + [ + self._sponsor_chapter(1.5, 2.5, 'sponsor')] + self._remove_marked_arrange_sponsors_test_impl( + chapters, self._chapters([1.5, 3, 4], ['c1', '[SponsorBlock]: Sponsor', 'c3']), []) + + def test_remove_marked_arrange_sponsors_TinySponsorsOverlapsAreIgnored(self): + chapters = self._chapters([2, 3, 5], ['c1', 'c2', 'c3']) + [ + self._sponsor_chapter(1, 3, 'sponsor'), + self._sponsor_chapter(2.5, 4, 'selfpromo') + ] + self._remove_marked_arrange_sponsors_test_impl( + chapters, self._chapters([1, 3, 4, 5], [ + 'c1', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Unpaid/Self Promotion', 'c3']), []) + + def test_make_concat_opts_CommonCase(self): + sponsor_chapters = [self._chapter(1, 2, 's1'), self._chapter(10, 20, 's2')] + expected = '''ffconcat version 1.0 +file 'file:test' +outpoint 1.000000 +file 'file:test' +inpoint 2.000000 +outpoint 10.000000 +file 'file:test' +inpoint 20.000000 +''' + opts = self._pp._make_concat_opts(sponsor_chapters, 30) + self.assertEqual(expected, ''.join(self._pp._concat_spec(['test'] * len(opts), opts))) + + def test_make_concat_opts_NoZeroDurationChunkAtVideoStart(self): + sponsor_chapters = [self._chapter(0, 1, 's1'), self._chapter(10, 20, 's2')] + expected = '''ffconcat version 1.0 +file 'file:test' +inpoint 1.000000 +outpoint 10.000000 +file 'file:test' +inpoint 20.000000 +''' + opts = self._pp._make_concat_opts(sponsor_chapters, 30) + self.assertEqual(expected, ''.join(self._pp._concat_spec(['test'] * len(opts), opts))) + + def test_make_concat_opts_NoZeroDurationChunkAtVideoEnd(self): + sponsor_chapters = [self._chapter(1, 2, 's1'), self._chapter(10, 20, 's2')] + expected = '''ffconcat version 1.0 +file 'file:test' +outpoint 1.000000 +file 'file:test' +inpoint 2.000000 +outpoint 10.000000 +''' + opts = self._pp._make_concat_opts(sponsor_chapters, 20) + self.assertEqual(expected, ''.join(self._pp._concat_spec(['test'] * len(opts), opts))) + + def test_quote_for_concat_RunsOfQuotes(self): + self.assertEqual( + r"'special '\'' '\'\''characters'\'\'\''galore'", + self._pp._quote_for_ffmpeg("special ' ''characters'''galore")) + + def test_quote_for_concat_QuotesAtStart(self): + self.assertEqual( + r"\'\'\''special '\'' characters '\'' galore'", + self._pp._quote_for_ffmpeg("'''special ' characters ' galore")) + + def test_quote_for_concat_QuotesAtEnd(self): + self.assertEqual( + r"'special '\'' characters '\'' galore'\'\'\'", + self._pp._quote_for_ffmpeg("special ' characters ' galore'''")) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 58e8ea5d93..91b2bcb852 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -13,7 +13,6 @@ import random import re import sys - from .options import ( parseOpts, ) @@ -307,6 +306,7 @@ def _real_main(argv=None): opts.forceprint = opts.forceprint or [] for tmpl in opts.forceprint or []: validate_outtmpl(tmpl, 'print template') + validate_outtmpl(opts.sponsorblock_chapter_title, 'SponsorBlock chapter title') if opts.extractaudio and not opts.keepvideo and opts.format is None: opts.format = 'bestaudio/best' @@ -353,15 +353,34 @@ def _real_main(argv=None): if opts.getcomments and not printing_json: opts.writeinfojson = True + if opts.no_sponsorblock: + opts.sponsorblock_mark = set() + opts.sponsorblock_remove = set() + sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove + + if (opts.addmetadata or opts.sponsorblock_mark) and opts.addchapters is None: + opts.addchapters = True + opts.remove_chapters = opts.remove_chapters or [] + def report_conflict(arg1, arg2): warnings.append('%s is ignored since %s was given' % (arg2, arg1)) + if (opts.remove_chapters or sponsorblock_query) and opts.sponskrub is not False: + if opts.sponskrub: + if opts.remove_chapters: + report_conflict('--remove-chapters', '--sponskrub') + if opts.sponsorblock_mark: + report_conflict('--sponsorblock-mark', '--sponskrub') + if opts.sponsorblock_remove: + report_conflict('--sponsorblock-remove', '--sponskrub') + opts.sponskrub = False + if opts.sponskrub_cut and opts.split_chapters and opts.sponskrub is not False: + report_conflict('--split-chapter', '--sponskrub-cut') + opts.sponskrub_cut = False + if opts.remuxvideo and opts.recodevideo: report_conflict('--recode-video', '--remux-video') opts.remuxvideo = False - if opts.sponskrub_cut and opts.split_chapters and opts.sponskrub is not False: - report_conflict('--split-chapter', '--sponskrub-cut') - opts.sponskrub_cut = False if opts.allow_unplayable_formats: if opts.extractaudio: @@ -388,12 +407,26 @@ def _real_main(argv=None): if opts.fixup and opts.fixup.lower() not in ('never', 'ignore'): report_conflict('--allow-unplayable-formats', '--fixup') opts.fixup = 'never' + if opts.remove_chapters: + report_conflict('--allow-unplayable-formats', '--remove-chapters') + opts.remove_chapters = [] + if opts.sponsorblock_remove: + report_conflict('--allow-unplayable-formats', '--sponsorblock-remove') + opts.sponsorblock_remove = set() if opts.sponskrub: report_conflict('--allow-unplayable-formats', '--sponskrub') opts.sponskrub = False # PostProcessors postprocessors = [] + if sponsorblock_query: + postprocessors.append({ + 'key': 'SponsorBlock', + 'categories': sponsorblock_query, + 'api': opts.sponsorblock_api, + # Run this immediately after extraction is complete + 'when': 'pre_process' + }) if opts.parse_metadata: postprocessors.append({ 'key': 'MetadataParser', @@ -439,16 +472,7 @@ def _real_main(argv=None): 'key': 'FFmpegVideoConvertor', 'preferedformat': opts.recodevideo, }) - # FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and - # FFmpegExtractAudioPP as containers before conversion may not support - # metadata (3gp, webm, etc.) - # And this post-processor should be placed before other metadata - # manipulating post-processors (FFmpegEmbedSubtitle) to prevent loss of - # extra metadata. By default ffmpeg preserves metadata applicable for both - # source and target containers. From this point the container won't change, - # so metadata can be added here. - if opts.addmetadata: - postprocessors.append({'key': 'FFmpegMetadata'}) + # If ModifyChapters is going to remove chapters, subtitles must already be in the container. if opts.embedsubtitles: already_have_subtitle = opts.writesubtitles and 'no-keep-subs' not in compat_opts postprocessors.append({ @@ -462,6 +486,33 @@ def _real_main(argv=None): # this was the old behaviour if only --all-sub was given. if opts.allsubtitles and not opts.writeautomaticsub: opts.writesubtitles = True + # ModifyChapters must run before FFmpegMetadataPP + remove_chapters_patterns = [] + for regex in opts.remove_chapters: + try: + remove_chapters_patterns.append(re.compile(regex)) + except re.error as err: + parser.error(f'invalid --remove-chapters regex {regex!r} - {err}') + if opts.remove_chapters or sponsorblock_query: + postprocessors.append({ + 'key': 'ModifyChapters', + 'remove_chapters_patterns': remove_chapters_patterns, + 'remove_sponsor_segments': opts.sponsorblock_remove, + 'sponsorblock_chapter_title': opts.sponsorblock_chapter_title, + 'force_keyframes': opts.force_keyframes_at_cuts + }) + # FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and + # FFmpegExtractAudioPP as containers before conversion may not support + # metadata (3gp, webm, etc.) + # By default ffmpeg preserves metadata applicable for both + # source and target containers. From this point the container won't change, + # so metadata can be added here. + if opts.addmetadata or opts.addchapters: + postprocessors.append({ + 'key': 'FFmpegMetadata', + 'add_chapters': opts.addchapters, + 'add_metadata': opts.addmetadata, + }) # This should be above EmbedThumbnail since sponskrub removes the thumbnail attachment # but must be below EmbedSubtitle and FFmpegMetadata # See https://github.com/yt-dlp/yt-dlp/issues/204 , https://github.com/faissaloo/SponSkrub/issues/29 @@ -485,7 +536,10 @@ def _real_main(argv=None): if not already_have_thumbnail: opts.writethumbnail = True if opts.split_chapters: - postprocessors.append({'key': 'FFmpegSplitChapters'}) + postprocessors.append({ + 'key': 'FFmpegSplitChapters', + 'force_keyframes': opts.force_keyframes_at_cuts, + }) # XAttrMetadataPP should be run after post-processors that may change file contents if opts.xattrs: postprocessors.append({'key': 'XAttrMetadata'}) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 0f8ce8ce86..483cce8d86 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -28,7 +28,9 @@ from .postprocessor import ( FFmpegSubtitlesConvertorPP, FFmpegThumbnailsConvertorPP, FFmpegVideoRemuxerPP, + SponsorBlockPP, ) +from .postprocessor.modify_chapters import DEFAULT_SPONSORBLOCK_CHAPTER_TITLE def _hide_login_info(opts): @@ -1218,10 +1220,10 @@ def parseOpts(overrideArguments=None): 'Give these arguments to the postprocessors. ' 'Specify the postprocessor/executable name and the arguments separated by a colon ":" ' 'to give the argument to the specified postprocessor/executable. Supported PP are: ' - 'Merger, ExtractAudio, SplitChapters, Metadata, EmbedSubtitle, EmbedThumbnail, ' - 'SubtitlesConvertor, ThumbnailsConvertor, VideoRemuxer, VideoConvertor, ' - 'SponSkrub, FixupStretched, FixupM4a, FixupM3u8, FixupTimestamp and FixupDuration. ' - 'The supported executables are: AtomicParsley, FFmpeg, FFprobe, and SponSkrub. ' + 'Merger, ModifyChapters, SplitChapters, ExtractAudio, VideoRemuxer, VideoConvertor, ' + 'Metadata, EmbedSubtitle, EmbedThumbnail, SubtitlesConvertor, ThumbnailsConvertor, ' + 'FixupStretched, FixupM4a, FixupM3u8, FixupTimestamp and FixupDuration. ' + 'The supported executables are: AtomicParsley, FFmpeg and FFprobe. ' 'You can also specify "PP+EXE:ARGS" to give the arguments to the specified executable ' 'only when being used by the specified postprocessor. Additionally, for ffmpeg/ffprobe, ' '"_i"/"_o" can be appended to the prefix optionally followed by a number to pass the argument ' @@ -1263,11 +1265,19 @@ def parseOpts(overrideArguments=None): postproc.add_option( '--embed-metadata', '--add-metadata', action='store_true', dest='addmetadata', default=False, - help='Embed metadata including chapter markers (if supported by the format) to the video file (Alias: --add-metadata)') + help='Embed metadata to the video file. Also adds chapters to file unless --no-add-chapters is used (Alias: --add-metadata)') postproc.add_option( '--no-embed-metadata', '--no-add-metadata', action='store_false', dest='addmetadata', - help='Do not write metadata (default) (Alias: --no-add-metadata)') + help='Do not add metadata to file (default) (Alias: --no-add-metadata)') + postproc.add_option( + '--embed-chapters', '--add-chapters', + action='store_true', dest='addchapters', default=None, + help='Add chapter markers to the video file (Alias: --add-chapters)') + postproc.add_option( + '--no-embed-chapters', '--no-add-chapters', + action='store_false', dest='addchapters', + help='Do not add chapter markers (default) (Alias: --no-add-chapters)') postproc.add_option( '--metadata-from-title', metavar='FORMAT', dest='metafromtitle', @@ -1354,41 +1364,90 @@ def parseOpts(overrideArguments=None): '--no-split-chapters', '--no-split-tracks', dest='split_chapters', action='store_false', help='Do not split video based on chapters (default)') + postproc.add_option( + '--remove-chapters', + metavar='REGEX', dest='remove_chapters', action='append', + help='Remove chapters whose title matches the given regular expression. This option can be used multiple times') + postproc.add_option( + '--no-remove-chapters', dest='remove_chapters', action='store_const', const=None, + help='Do not remove any chapters from the file (default)') + postproc.add_option( + '--force-keyframes-at-cuts', + action='store_true', dest='force_keyframes_at_cuts', default=False, + help=( + 'Force keyframes around the chapters before removing/splitting them. ' + 'Requires a reencode and thus is very slow, but the resulting video ' + 'may have fewer artifacts around the cuts')) + postproc.add_option( + '--no-force-keyframes-at-cuts', + action='store_false', dest='force_keyframes_at_cuts', + help='Do not force keyframes around the chapters when cutting/splitting (default)') - sponskrub = optparse.OptionGroup(parser, 'SponSkrub (SponsorBlock) Options', description=( - 'SponSkrub (https://github.com/yt-dlp/SponSkrub) is a utility to mark/remove sponsor segments ' - 'from downloaded YouTube videos using SponsorBlock API (https://sponsor.ajay.app)')) - sponskrub.add_option( + sponsorblock = optparse.OptionGroup(parser, 'SponsorBlock Options', description=( + 'Make chapter entries for, or remove various segments (sponsor, introductions, etc.) ' + 'from downloaded YouTube videos using the SponsorBlock API (https://sponsor.ajay.app)')) + sponsorblock.add_option( + '--sponsorblock-mark', metavar='CATS', + dest='sponsorblock_mark', default=set(), action='callback', type='str', + callback=_set_from_options_callback, callback_kwargs={'allowed_values': SponsorBlockPP.CATEGORIES.keys()}, + help=( + 'SponsorBlock categories to create chapters for, separated by commas. ' + 'Available categories are all, %s. You can prefix the category with a "-" to exempt it. ' + 'See https://wiki.sponsor.ajay.app/index.php/Segment_Categories for description of the categories. ' + 'Eg: --sponsorblock-query all,-preview' % ', '.join(SponsorBlockPP.CATEGORIES.keys()))) + sponsorblock.add_option( + '--sponsorblock-remove', metavar='CATS', + dest='sponsorblock_remove', default=set(), action='callback', type='str', + callback=_set_from_options_callback, callback_kwargs={'allowed_values': SponsorBlockPP.CATEGORIES.keys()}, + help=( + 'SponsorBlock categories to be removed from the video file, separated by commas. ' + 'If a category is present in both mark and remove, remove takes precedence. ' + 'The syntax and available categories are the same as for --sponsorblock-mark')) + sponsorblock.add_option( + '--sponsorblock-chapter-title', metavar='TEMPLATE', + default=DEFAULT_SPONSORBLOCK_CHAPTER_TITLE, dest='sponsorblock_chapter_title', + help=( + 'The title template for SponsorBlock chapters created by --sponsorblock-mark. ' + 'The same syntax as the output template is used, but the only available fields are ' + 'start_time, end_time, category, categories, name, category_names. Defaults to "%default"')) + sponsorblock.add_option( + '--no-sponsorblock', default=False, + action='store_true', dest='no_sponsorblock', + help='Disable both --sponsorblock-mark and --sponsorblock-remove') + sponsorblock.add_option( + '--sponsorblock-api', metavar='URL', + default='https://sponsor.ajay.app', dest='sponsorblock_api', + help='SponsorBlock API location, defaults to %default') + + sponsorblock.add_option( '--sponskrub', action='store_true', dest='sponskrub', default=None, - help=( - 'Use sponskrub to mark sponsored sections. ' - 'This is enabled by default if the sponskrub binary exists (Youtube only)')) - sponskrub.add_option( + help=optparse.SUPPRESS_HELP) + sponsorblock.add_option( '--no-sponskrub', action='store_false', dest='sponskrub', - help='Do not use sponskrub') - sponskrub.add_option( + help=optparse.SUPPRESS_HELP) + sponsorblock.add_option( '--sponskrub-cut', default=False, action='store_true', dest='sponskrub_cut', - help='Cut out the sponsor sections instead of simply marking them') - sponskrub.add_option( + help=optparse.SUPPRESS_HELP) + sponsorblock.add_option( '--no-sponskrub-cut', action='store_false', dest='sponskrub_cut', - help='Simply mark the sponsor sections, not cut them out (default)') - sponskrub.add_option( + help=optparse.SUPPRESS_HELP) + sponsorblock.add_option( '--sponskrub-force', default=False, action='store_true', dest='sponskrub_force', - help='Run sponskrub even if the video was already downloaded') - sponskrub.add_option( + help=optparse.SUPPRESS_HELP) + sponsorblock.add_option( '--no-sponskrub-force', action='store_true', dest='sponskrub_force', - help='Do not cut out the sponsor sections if the video was already downloaded (default)') - sponskrub.add_option( + help=optparse.SUPPRESS_HELP) + sponsorblock.add_option( '--sponskrub-location', metavar='PATH', dest='sponskrub_path', default='', - help='Location of the sponskrub binary; either the path to the binary or its containing directory') - sponskrub.add_option( + help=optparse.SUPPRESS_HELP) + sponsorblock.add_option( '--sponskrub-args', dest='sponskrub_args', metavar='ARGS', help=optparse.SUPPRESS_HELP) @@ -1457,7 +1516,7 @@ def parseOpts(overrideArguments=None): parser.add_option_group(subtitles) parser.add_option_group(authentication) parser.add_option_group(postproc) - parser.add_option_group(sponskrub) + parser.add_option_group(sponsorblock) parser.add_option_group(extractor) if overrideArguments is not None: diff --git a/yt_dlp/postprocessor/__init__.py b/yt_dlp/postprocessor/__init__.py index 31c2d7c68a..adbcd37556 100644 --- a/yt_dlp/postprocessor/__init__.py +++ b/yt_dlp/postprocessor/__init__.py @@ -26,7 +26,9 @@ from .metadataparser import ( MetadataParserPP, ) from .movefilesafterdownload import MoveFilesAfterDownloadPP +from .sponsorblock import SponsorBlockPP from .sponskrub import SponSkrubPP +from .modify_chapters import ModifyChaptersPP def get_postprocessor(key): @@ -56,6 +58,8 @@ __all__ = [ 'MetadataFromFieldPP', 'MetadataFromTitlePP', 'MoveFilesAfterDownloadPP', + 'SponsorBlockPP', 'SponSkrubPP', + 'ModifyChaptersPP', 'XAttrMetadataPP', ] diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 7537d5db4e..8063346450 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -8,22 +8,22 @@ import time import re import json - from .common import AudioConversionError, PostProcessor from ..compat import compat_str, compat_numeric_types from ..utils import ( + dfxp2srt, encodeArgument, encodeFilename, get_exe_version, is_outdated_version, + ISO639Utils, + orderedSet, PostProcessingError, prepend_extension, - shell_quote, - dfxp2srt, - ISO639Utils, process_communicate_or_kill, replace_extension, + shell_quote, traverse_obj, variadic, ) @@ -281,7 +281,8 @@ class FFmpegPostProcessor(PostProcessor): def run_ffmpeg(self, path, out_path, opts, **kwargs): return self.run_ffmpeg_multiple_files([path], out_path, opts, **kwargs) - def _ffmpeg_filename_argument(self, fn): + @staticmethod + def _ffmpeg_filename_argument(fn): # Always use 'file:' because the filename may contain ':' (ffmpeg # interprets that as a protocol) or can start with '-' (-- is broken in # ffmpeg, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details) @@ -290,6 +291,62 @@ class FFmpegPostProcessor(PostProcessor): return fn return 'file:' + fn if fn != '-' else fn + @staticmethod + def _quote_for_ffmpeg(string): + # See https://ffmpeg.org/ffmpeg-utils.html#toc-Quoting-and-escaping + # A sequence of '' produces '\'''\''; + # final replace removes the empty '' between \' \'. + string = string.replace("'", r"'\''").replace("'''", "'") + # Handle potential ' at string boundaries. + string = string[1:] if string[0] == "'" else "'" + string + return string[:-1] if string[-1] == "'" else string + "'" + + def force_keyframes(self, filename, timestamps): + timestamps = orderedSet(timestamps) + if timestamps[0] == 0: + timestamps = timestamps[1:] + keyframe_file = prepend_extension(filename, 'keyframes.temp') + self.to_screen(f'Re-encoding "{filename}" with appropriate keyframes') + self.run_ffmpeg(filename, keyframe_file, ['-force_key_frames', ','.join( + f'{t:.6f}' for t in timestamps)]) + return keyframe_file + + def concat_files(self, in_files, out_file, concat_opts=None): + """ + Use concat demuxer to concatenate multiple files having identical streams. + + Only inpoint, outpoint, and duration concat options are supported. + See https://ffmpeg.org/ffmpeg-formats.html#concat-1 for details + """ + concat_file = f'{out_file}.concat' + self.write_debug(f'Writing concat spec to {concat_file}') + with open(concat_file, 'wt', encoding='utf-8') as f: + f.writelines(self._concat_spec(in_files, concat_opts)) + + out_flags = ['-c', 'copy'] + if out_file.rpartition('.')[-1] in ('mp4', 'mov'): + # For some reason, '-c copy' is not enough to copy subtitles + out_flags.extend(['-c:s', 'mov_text', '-movflags', '+faststart']) + + try: + self.real_run_ffmpeg( + [(concat_file, ['-hide_banner', '-nostdin', '-f', 'concat', '-safe', '0'])], + [(out_file, out_flags)]) + finally: + os.remove(concat_file) + + @classmethod + def _concat_spec(cls, in_files, concat_opts=None): + if concat_opts is None: + concat_opts = [{}] * len(in_files) + yield 'ffconcat version 1.0\n' + for file, opts in zip(in_files, concat_opts): + yield f'file {cls._quote_for_ffmpeg(cls._ffmpeg_filename_argument(file))}\n' + # Iterate explicitly to yield the following directives in order, ignoring the rest. + for directive in 'inpoint', 'outpoint', 'duration': + if directive in opts: + yield f'{directive} {opts[directive]}\n' + class FFmpegExtractAudioPP(FFmpegPostProcessor): COMMON_AUDIO_EXTS = ('wav', 'flac', 'm4a', 'aiff', 'mp3', 'ogg', 'mka', 'opus', 'wma') @@ -531,6 +588,11 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): class FFmpegMetadataPP(FFmpegPostProcessor): + def __init__(self, downloader, add_metadata=True, add_chapters=True): + FFmpegPostProcessor.__init__(self, downloader) + self._add_metadata = add_metadata + self._add_chapters = add_chapters + @staticmethod def _options(target_ext): yield from ('-map', '0', '-dn') @@ -541,6 +603,46 @@ class FFmpegMetadataPP(FFmpegPostProcessor): @PostProcessor._restrict_to(images=False) def run(self, info): + filename, metadata_filename = info['filepath'], None + options = [] + if self._add_chapters and info.get('chapters'): + metadata_filename = replace_extension(filename, 'meta') + options.extend(self._get_chapter_opts(info['chapters'], metadata_filename)) + if self._add_metadata: + options.extend(self._get_metadata_opts(info)) + + if not options: + self.to_screen('There isn\'t any metadata to add') + return [], info + + temp_filename = prepend_extension(filename, 'temp') + self.to_screen('Adding metadata to "%s"' % filename) + self.run_ffmpeg_multiple_files( + (filename, metadata_filename), temp_filename, + itertools.chain(self._options(info['ext']), *options)) + if metadata_filename: + os.remove(metadata_filename) + os.replace(temp_filename, filename) + return [], info + + @staticmethod + def _get_chapter_opts(chapters, metadata_filename): + with io.open(metadata_filename, 'wt', encoding='utf-8') as f: + def ffmpeg_escape(text): + return re.sub(r'([\\=;#\n])', r'\\\1', text) + + metadata_file_content = ';FFMETADATA1\n' + for chapter in chapters: + metadata_file_content += '[CHAPTER]\nTIMEBASE=1/1000\n' + metadata_file_content += 'START=%d\n' % (chapter['start_time'] * 1000) + metadata_file_content += 'END=%d\n' % (chapter['end_time'] * 1000) + chapter_title = chapter.get('title') + if chapter_title: + metadata_file_content += 'title=%s\n' % ffmpeg_escape(chapter_title) + f.write(metadata_file_content) + yield ('-map_metadata', '1') + + def _get_metadata_opts(self, info): metadata = {} def add(meta_list, info_list=None): @@ -577,61 +679,27 @@ class FFmpegMetadataPP(FFmpegPostProcessor): for key in filter(lambda k: k.startswith(prefix), info.keys()): add(key[len(prefix):], key) - filename, metadata_filename = info['filepath'], None - options = [('-metadata', f'{name}={value}') for name, value in metadata.items()] + for name, value in metadata.items(): + yield ('-metadata', f'{name}={value}') stream_idx = 0 for fmt in info.get('requested_formats') or []: stream_count = 2 if 'none' not in (fmt.get('vcodec'), fmt.get('acodec')) else 1 if fmt.get('language'): lang = ISO639Utils.short2long(fmt['language']) or fmt['language'] - options.extend(('-metadata:s:%d' % (stream_idx + i), 'language=%s' % lang) - for i in range(stream_count)) + for i in range(stream_count): + yield ('-metadata:s:%d' % (stream_idx + i), 'language=%s' % lang) stream_idx += stream_count - chapters = info.get('chapters', []) - if chapters: - metadata_filename = replace_extension(filename, 'meta') - with io.open(metadata_filename, 'wt', encoding='utf-8') as f: - def ffmpeg_escape(text): - return re.sub(r'([\\=;#\n])', r'\\\1', text) - - metadata_file_content = ';FFMETADATA1\n' - for chapter in chapters: - metadata_file_content += '[CHAPTER]\nTIMEBASE=1/1000\n' - metadata_file_content += 'START=%d\n' % (chapter['start_time'] * 1000) - metadata_file_content += 'END=%d\n' % (chapter['end_time'] * 1000) - chapter_title = chapter.get('title') - if chapter_title: - metadata_file_content += 'title=%s\n' % ffmpeg_escape(chapter_title) - f.write(metadata_file_content) - options.append(('-map_metadata', '1')) - if ('no-attach-info-json' not in self.get_param('compat_opts', []) and '__infojson_filename' in info and info['ext'] in ('mkv', 'mka')): - old_stream, new_stream = self.get_stream_number(filename, ('tags', 'mimetype'), 'application/json') + old_stream, new_stream = self.get_stream_number(info['filepath'], ('tags', 'mimetype'), 'application/json') if old_stream is not None: - options.append(('-map', '-0:%d' % old_stream)) + yield ('-map', '-0:%d' % old_stream) new_stream -= 1 - options.append(( - '-attach', info['__infojson_filename'], - '-metadata:s:%d' % new_stream, 'mimetype=application/json' - )) - - if not options: - self.to_screen('There isn\'t any metadata to add') - return [], info - - temp_filename = prepend_extension(filename, 'temp') - self.to_screen('Adding metadata to "%s"' % filename) - self.run_ffmpeg_multiple_files( - (filename, metadata_filename), temp_filename, - itertools.chain(self._options(info['ext']), *options)) - if chapters: - os.remove(metadata_filename) - os.replace(temp_filename, filename) - return [], info + yield ('-attach', info['__infojson_filename'], + '-metadata:s:%d' % new_stream, 'mimetype=application/json') class FFmpegMergerPP(FFmpegPostProcessor): @@ -808,6 +876,9 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): class FFmpegSplitChaptersPP(FFmpegPostProcessor): + def __init__(self, downloader, force_keyframes=False): + FFmpegPostProcessor.__init__(self, downloader) + self._force_keyframes = force_keyframes def _prepare_filename(self, number, chapter, info): info = info.copy() @@ -835,13 +906,18 @@ class FFmpegSplitChaptersPP(FFmpegPostProcessor): def run(self, info): chapters = info.get('chapters') or [] if not chapters: - self.report_warning('Chapter information is unavailable') + self.to_screen('Chapter information is unavailable') return [], info + in_file = info['filepath'] + if self._force_keyframes and len(chapters) > 1: + in_file = self.force_keyframes(in_file, (c['start_time'] for c in chapters)) self.to_screen('Splitting video by chapters; %d chapters found' % len(chapters)) for idx, chapter in enumerate(chapters): destination, opts = self._ffmpeg_args_for_chapter(idx + 1, chapter, info) - self.real_run_ffmpeg([(info['filepath'], opts)], [(destination, ['-c', 'copy'])]) + self.real_run_ffmpeg([(in_file, opts)], [(destination, ['-c', 'copy'])]) + if in_file != info['filepath']: + os.remove(in_file) return [], info diff --git a/yt_dlp/postprocessor/modify_chapters.py b/yt_dlp/postprocessor/modify_chapters.py new file mode 100644 index 0000000000..3d6493b683 --- /dev/null +++ b/yt_dlp/postprocessor/modify_chapters.py @@ -0,0 +1,333 @@ +import copy +import heapq +import os + +from .common import PostProcessor +from .ffmpeg import ( + FFmpegPostProcessor, + FFmpegSubtitlesConvertorPP +) +from .sponsorblock import SponsorBlockPP +from ..utils import ( + float_or_none, + orderedSet, + PostProcessingError, + prepend_extension, + traverse_obj, +) + + +_TINY_SPONSOR_OVERLAP_DURATION = 1 +DEFAULT_SPONSORBLOCK_CHAPTER_TITLE = '[SponsorBlock]: %(category_names)l' + + +class ModifyChaptersPP(FFmpegPostProcessor): + def __init__(self, downloader, remove_chapters_patterns=None, remove_sponsor_segments=None, + sponsorblock_chapter_title=DEFAULT_SPONSORBLOCK_CHAPTER_TITLE, force_keyframes=False): + FFmpegPostProcessor.__init__(self, downloader) + self._remove_chapters_patterns = set(remove_chapters_patterns or []) + self._remove_sponsor_segments = set(remove_sponsor_segments or []) + self._sponsorblock_chapter_title = sponsorblock_chapter_title + self._force_keyframes = force_keyframes + + @PostProcessor._restrict_to(images=False) + def run(self, info): + chapters, sponsor_chapters = self._mark_chapters_to_remove( + info.get('chapters') or [], info.get('sponsorblock_chapters') or []) + if not chapters and not sponsor_chapters: + return [], info + + real_duration = self._get_real_video_duration(info['filepath']) + if not chapters: + chapters = [{'start_time': 0, 'end_time': real_duration, 'title': info['title']}] + + info['chapters'], cuts = self._remove_marked_arrange_sponsors(chapters + sponsor_chapters) + if not cuts: + return [], info + + if abs(real_duration - info['duration']) > 1: + if abs(real_duration - info['chapters'][-1]['end_time']) < 1: + self.to_screen(f'Skipping {self.pp_key()} since the video appears to be already cut') + return [], info + if not info.get('__real_download'): + raise PostProcessingError('Cannot cut video since the real and expected durations mismatch. ' + 'Different chapters may have already been removed') + return [], info + else: + self.write_debug('Expected and actual durations mismatch') + + concat_opts = self._make_concat_opts(cuts, real_duration) + + def remove_chapters(file, is_sub): + return file, self.remove_chapters(file, cuts, concat_opts, self._force_keyframes and not is_sub) + + in_out_files = [remove_chapters(info['filepath'], False)] + in_out_files.extend(remove_chapters(in_file, True) for in_file in self._get_supported_subs(info)) + + # Renaming should only happen after all files are processed + files_to_remove = [] + for in_file, out_file in in_out_files: + uncut_file = prepend_extension(in_file, 'uncut') + os.replace(in_file, uncut_file) + os.replace(out_file, in_file) + files_to_remove.append(uncut_file) + + return files_to_remove, info + + def _mark_chapters_to_remove(self, chapters, sponsor_chapters): + if self._remove_chapters_patterns: + warn_no_chapter_to_remove = True + if not chapters: + self.to_screen('Chapter information is unavailable') + warn_no_chapter_to_remove = False + for c in chapters: + if any(regex.search(c['title']) for regex in self._remove_chapters_patterns): + c['remove'] = True + warn_no_chapter_to_remove = False + if warn_no_chapter_to_remove: + self.to_screen('There are no chapters matching the regex') + + if self._remove_sponsor_segments: + warn_no_chapter_to_remove = True + if not sponsor_chapters: + self.to_screen('SponsorBlock information is unavailable') + warn_no_chapter_to_remove = False + for c in sponsor_chapters: + if c['category'] in self._remove_sponsor_segments: + c['remove'] = True + warn_no_chapter_to_remove = False + if warn_no_chapter_to_remove: + self.to_screen('There are no matching SponsorBlock chapters') + + return chapters, sponsor_chapters + + def _get_real_video_duration(self, filename): + duration = float_or_none( + traverse_obj(self.get_metadata_object(filename), ('format', 'duration'))) + if duration is None: + raise PostProcessingError('ffprobe returned empty duration') + return duration + + def _get_supported_subs(self, info): + for sub in (info.get('requested_subtitles') or {}).values(): + sub_file = sub.get('filepath') + # The file might have been removed by --embed-subs + if not sub_file or not os.path.exists(sub_file): + continue + ext = sub['ext'] + if ext not in FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS: + self.report_warning(f'Cannot remove chapters from external {ext} subtitles; "{sub_file}" is now out of sync') + continue + # TODO: create __real_download for subs? + yield sub_file + + def _remove_marked_arrange_sponsors(self, chapters): + # Store cuts separately, since adjacent and overlapping cuts must be merged. + cuts = [] + + def append_cut(c): + assert 'remove' in c + last_to_cut = cuts[-1] if cuts else None + if last_to_cut and last_to_cut['end_time'] >= c['start_time']: + last_to_cut['end_time'] = max(last_to_cut['end_time'], c['end_time']) + else: + cuts.append(c) + return len(cuts) - 1 + + def excess_duration(c): + # Cuts that are completely within the chapter reduce chapters' duration. + # Since cuts can overlap, excess duration may be less that the sum of cuts' durations. + # To avoid that, chapter stores the index to the fist cut within the chapter, + # instead of storing excess duration. append_cut ensures that subsequent cuts (if any) + # will be merged with previous ones (if necessary). + cut_idx, excess = c.pop('cut_idx', len(cuts)), 0 + while cut_idx < len(cuts): + cut = cuts[cut_idx] + if cut['start_time'] >= c['end_time']: + break + if cut['end_time'] > c['start_time']: + excess += min(cut['end_time'], c['end_time']) + excess -= max(cut['start_time'], c['start_time']) + cut_idx += 1 + return excess + + new_chapters = [] + + def chapter_length(c): + return c['end_time'] - c['start_time'] + + def original_uncut_chapter(c): + return '_was_cut' not in c and '_categories' not in c + + def append_chapter(c): + assert 'remove' not in c + length = chapter_length(c) - excess_duration(c) + # Chapter is completely covered by cuts or sponsors. + if length <= 0: + return + start = new_chapters[-1]['end_time'] if new_chapters else 0 + c.update(start_time=start, end_time=start + length) + # Append without checking for tininess to prevent having + # a completely empty chapter list. + if not new_chapters: + new_chapters.append(c) + return + old_c = new_chapters[-1] + # Merge with the previous if the chapter is tiny. + # Only tiny chapters resulting from a cut can be skipped. + # Chapters that were already tiny in the original list will be preserved. + if not original_uncut_chapter(c) and length < _TINY_SPONSOR_OVERLAP_DURATION: + old_c['end_time'] = c['end_time'] + # Previous tiny chapter was appended for the sake of preventing an empty chapter list. + # Replace it with the current one. + elif not original_uncut_chapter(old_c) and chapter_length(old_c) < _TINY_SPONSOR_OVERLAP_DURATION: + c['start_time'] = old_c['start_time'] + new_chapters[-1] = c + else: + new_chapters.append(c) + + # Turn into a priority queue, index is a tie breaker. + # Plain stack sorted by start_time is not enough: after splitting the chapter, + # the part returned to the stack is not guaranteed to have start_time + # less than or equal to the that of the stack's head. + chapters = [(c['start_time'], i, c) for i, c in enumerate(chapters)] + heapq.heapify(chapters) + + _, cur_i, cur_chapter = heapq.heappop(chapters) + while chapters: + _, i, c = heapq.heappop(chapters) + # Non-overlapping chapters or cuts can be appended directly. However, + # adjacent non-overlapping cuts must be merged, which is handled by append_cut. + if cur_chapter['end_time'] <= c['start_time']: + (append_chapter if 'remove' not in cur_chapter else append_cut)(cur_chapter) + cur_i, cur_chapter = i, c + continue + + # Eight possibilities for overlapping chapters: (cut, cut), (cut, sponsor), + # (cut, normal), (sponsor, cut), (normal, cut), (sponsor, sponsor), + # (sponsor, normal), and (normal, sponsor). There is no (normal, normal): + # normal chapters are assumed not to overlap. + if 'remove' in cur_chapter: + # (cut, cut): adjust end_time. + if 'remove' in c: + cur_chapter['end_time'] = max(cur_chapter['end_time'], c['end_time']) + # (cut, sponsor/normal): chop the beginning of the later chapter + # (if it's not completely hidden by the cut). Push to the priority queue + # to restore sorting by start_time: with beginning chopped, c may actually + # start later than the remaining chapters from the queue. + elif cur_chapter['end_time'] < c['end_time']: + c['start_time'] = cur_chapter['end_time'] + c['_was_cut'] = True + heapq.heappush(chapters, (c['start_time'], i, c)) + # (sponsor/normal, cut). + elif 'remove' in c: + cur_chapter['_was_cut'] = True + # Chop the end of the current chapter if the cut is not contained within it. + # Chopping the end doesn't break start_time sorting, no PQ push is necessary. + if cur_chapter['end_time'] <= c['end_time']: + cur_chapter['end_time'] = c['start_time'] + append_chapter(cur_chapter) + cur_i, cur_chapter = i, c + continue + # Current chapter contains the cut within it. If the current chapter is + # a sponsor chapter, check whether the categories before and after the cut differ. + if '_categories' in cur_chapter: + after_c = dict(cur_chapter, start_time=c['end_time'], _categories=[]) + cur_cats = [] + for cat_start_end in cur_chapter['_categories']: + if cat_start_end[1] < c['start_time']: + cur_cats.append(cat_start_end) + if cat_start_end[2] > c['end_time']: + after_c['_categories'].append(cat_start_end) + cur_chapter['_categories'] = cur_cats + if cur_chapter['_categories'] != after_c['_categories']: + # Categories before and after the cut differ: push the after part to PQ. + heapq.heappush(chapters, (after_c['start_time'], cur_i, after_c)) + cur_chapter['end_time'] = c['start_time'] + append_chapter(cur_chapter) + cur_i, cur_chapter = i, c + continue + # Either sponsor categories before and after the cut are the same or + # we're dealing with a normal chapter. Just register an outstanding cut: + # subsequent append_chapter will reduce the duration. + cur_chapter.setdefault('cut_idx', append_cut(c)) + # (sponsor, normal): if a normal chapter is not completely overlapped, + # chop the beginning of it and push it to PQ. + elif '_categories' in cur_chapter and '_categories' not in c: + if cur_chapter['end_time'] < c['end_time']: + c['start_time'] = cur_chapter['end_time'] + c['_was_cut'] = True + heapq.heappush(chapters, (c['start_time'], i, c)) + # (normal, sponsor) and (sponsor, sponsor) + else: + assert '_categories' in c + cur_chapter['_was_cut'] = True + c['_was_cut'] = True + # Push the part after the sponsor to PQ. + if cur_chapter['end_time'] > c['end_time']: + # deepcopy to make categories in after_c and cur_chapter/c refer to different lists. + after_c = dict(copy.deepcopy(cur_chapter), start_time=c['end_time']) + heapq.heappush(chapters, (after_c['start_time'], cur_i, after_c)) + # Push the part after the overlap to PQ. + elif c['end_time'] > cur_chapter['end_time']: + after_cur = dict(copy.deepcopy(c), start_time=cur_chapter['end_time']) + heapq.heappush(chapters, (after_cur['start_time'], cur_i, after_cur)) + c['end_time'] = cur_chapter['end_time'] + # (sponsor, sponsor): merge categories in the overlap. + if '_categories' in cur_chapter: + c['_categories'] = cur_chapter['_categories'] + c['_categories'] + # Inherit the cuts that the current chapter has accumulated within it. + if 'cut_idx' in cur_chapter: + c['cut_idx'] = cur_chapter['cut_idx'] + cur_chapter['end_time'] = c['start_time'] + append_chapter(cur_chapter) + cur_i, cur_chapter = i, c + (append_chapter if 'remove' not in cur_chapter else append_cut)(cur_chapter) + + i = -1 + for c in new_chapters.copy(): + i += 1 + c.pop('_was_cut', None) + cats = c.pop('_categories', None) + if cats: + category = min(cats, key=lambda c: c[2] - c[1])[0] + cats = orderedSet(x[0] for x in cats) + c.update({ + 'category': category, + 'categories': cats, + 'name': SponsorBlockPP.CATEGORIES[category], + 'category_names': [SponsorBlockPP.CATEGORIES[c] for c in cats] + }) + outtmpl, tmpl_dict = self._downloader.prepare_outtmpl(self._sponsorblock_chapter_title, c) + c['title'] = self._downloader.escape_outtmpl(outtmpl) % tmpl_dict + if i > 0 and c['title'] == new_chapters[i - 1]['title']: + new_chapters[i - 1]['end_time'] = c['end_time'] + new_chapters.pop(i) + i -= 1 + + return new_chapters, cuts + + def remove_chapters(self, filename, ranges_to_cut, concat_opts, force_keyframes=False): + in_file = filename + out_file = prepend_extension(in_file, 'temp') + if force_keyframes: + in_file = self.force_keyframes(in_file, (t for r in ranges_to_cut for t in r)) + self.to_screen(f'Removing chapters from {filename}') + self.concat_files([in_file] * len(concat_opts), out_file, concat_opts) + if in_file != filename: + os.remove(in_file) + return out_file + + @staticmethod + def _make_concat_opts(chapters_to_remove, duration): + opts = [{}] + for s in chapters_to_remove: + # Do not create 0 duration chunk at the beginning. + if s['start_time'] == 0: + opts[-1]['inpoint'] = f'{s["end_time"]:.6f}' + continue + opts[-1]['outpoint'] = f'{s["start_time"]:.6f}' + # Do not create 0 duration chunk at the end. + if s['end_time'] != duration: + opts.append({'inpoint': f'{s["end_time"]:.6f}'}) + return opts diff --git a/yt_dlp/postprocessor/sponskrub.py b/yt_dlp/postprocessor/sponskrub.py index 588f0ae125..932555a0ee 100644 --- a/yt_dlp/postprocessor/sponskrub.py +++ b/yt_dlp/postprocessor/sponskrub.py @@ -17,6 +17,7 @@ from ..utils import ( ) +# Deprecated in favor of the native implementation class SponSkrubPP(PostProcessor): _temp_ext = 'spons' _exe_name = 'sponskrub' diff --git a/yt_dlp/postprocessor/sponsorblock.py b/yt_dlp/postprocessor/sponsorblock.py new file mode 100644 index 0000000000..6264d45c5d --- /dev/null +++ b/yt_dlp/postprocessor/sponsorblock.py @@ -0,0 +1,96 @@ +import json +import re +from hashlib import sha256 + +from .ffmpeg import FFmpegPostProcessor +from ..compat import compat_urllib_parse_urlencode, compat_HTTPError +from ..utils import PostProcessingError, sanitized_Request + + +class SponsorBlockPP(FFmpegPostProcessor): + + EXTRACTORS = { + 'Youtube': 'YouTube', + } + CATEGORIES = { + 'sponsor': 'Sponsor', + 'intro': 'Intermission/Intro Animation', + 'outro': 'Endcards/Credits', + 'selfpromo': 'Unpaid/Self Promotion', + 'interaction': 'Interaction Reminder', + 'preview': 'Preview/Recap', + 'music_offtopic': 'Non-Music Section' + } + + def __init__(self, downloader, categories=None, api='https://sponsor.ajay.app'): + FFmpegPostProcessor.__init__(self, downloader) + self._categories = tuple(categories or self.CATEGORIES.keys()) + self._API_URL = api if re.match('^https?://', api) else 'https://' + api + + def run(self, info): + extractor = info['extractor_key'] + if extractor not in self.EXTRACTORS: + self.to_screen(f'SponsorBlock is not supported for {extractor}') + return [], info + + info['sponsorblock_chapters'] = self._get_sponsor_chapters(info, info['duration']) + return [], info + + def _get_sponsor_chapters(self, info, duration): + segments = self._get_sponsor_segments(info['id'], self.EXTRACTORS[info['extractor_key']]) + + def duration_filter(s): + start_end = s['segment'] + # Ignore milliseconds difference at the start. + if start_end[0] <= 1: + start_end[0] = 0 + # Ignore milliseconds difference at the end. + # Never allow the segment to exceed the video. + if duration and duration - start_end[1] <= 1: + start_end[1] = duration + # SponsorBlock duration may be absent or it may deviate from the real one. + return s['videoDuration'] == 0 or not duration or abs(duration - s['videoDuration']) <= 1 + + duration_match = [s for s in segments if duration_filter(s)] + if len(duration_match) != len(segments): + self.report_warning('Some SponsorBlock segments are from a video of different duration, maybe from an old version of this video') + + def to_chapter(s): + (start, end), cat = s['segment'], s['category'] + return { + 'start_time': start, + 'end_time': end, + 'category': cat, + 'title': self.CATEGORIES[cat], + '_categories': [(cat, start, end)] + } + + sponsor_chapters = [to_chapter(s) for s in duration_match] + if not sponsor_chapters: + self.to_screen('No segments were found in the SponsorBlock database') + else: + self.to_screen(f'Found {len(sponsor_chapters)} segments in the SponsorBlock database') + return sponsor_chapters + + def _get_sponsor_segments(self, video_id, service): + hash = sha256(video_id.encode('ascii')).hexdigest() + # SponsorBlock API recommends using first 4 hash characters. + url = f'{self._API_URL}/api/skipSegments/{hash[:4]}?' + compat_urllib_parse_urlencode({ + 'service': service, + 'categories': json.dumps(self._categories), + }) + for d in self._get_json(url): + if d['videoID'] == video_id: + return d['segments'] + return [] + + def _get_json(self, url): + self.write_debug(f'SponsorBlock query: {url}') + try: + rsp = self._downloader.urlopen(sanitized_Request(url)) + except compat_HTTPError as e: + if e.code == 404: + return [] + raise PostProcessingError(f'Error communicating with SponsorBlock API - {e}') + + return json.loads(rsp.read().decode(rsp.info().get_param('charset') or 'utf-8')) From a7429aa9fa3bc6616d9861a8ce5584a241a93ecc Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 1 Sep 2021 19:04:51 +0530 Subject: [PATCH 041/641] [youtube] Fix subtitle names --- yt_dlp/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index de7ff32589..24fca3f84a 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2977,7 +2977,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): continue process_language( subtitles, base_url, lang_code, - traverse_obj(caption_track, ('name', 'simpleText')), + traverse_obj(caption_track, ('name', 'simpleText'), ('name', 'runs', ..., 'text'), get_all=False), {}) continue automatic_captions = {} From 347182a0cdc175283185ad887fcae3075c955cdc Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 2 Sep 2021 03:52:08 +0530 Subject: [PATCH 042/641] Show a more useful error in older python versions --- yt_dlp/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 91b2bcb852..ad2d5e035a 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # coding: utf-8 -from __future__ import unicode_literals +f'You are using an unsupported version of Python. Only Python versions 3.6 and above are supported by yt-dlp' # noqa: F541 __license__ = 'Public Domain' From be4d9f4cd9144d8c08c64264386a76c2b2fd0bed Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 2 Sep 2021 04:38:02 +0530 Subject: [PATCH 043/641] Partially revert "[build] Add homebrew taps (#827)" --- .github/workflows/build.yml | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b55429e1dd..4c56a5180b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -84,19 +84,6 @@ jobs: rm -rf dist/* python setup.py sdist bdist_wheel twine upload dist/* - - name: Install SSH private key - if: ${{ secrets.BREW_TOKEN }} - uses: webfactory/ssh-agent@v0.5.3 - with: - ssh-private-key: ${{ secrets.BREW_TOKEN }} - - name: Update Homebrew Formulae - # can't use secrets.GITHUB_TOKEN because it's outside yt-dlp repository - if: ${{ secrets.BREW_TOKEN }} - run: | - git clone git@github.com:yt-dlp/homebrew-taps taps/ - python3 devscripts/update-formulae.py taps/Formula/yt-dlp.rb "${{ steps.bump_version.outputs.ytdlp_version }}" - git -C taps/ commit -am 'yt-dlp: ${{ steps.bump_version.outputs.ytdlp_version }}' - git -C taps/ push build_windows: runs-on: windows-latest From 9ee4f0bb5b82fab44185b762d482bf9e96acd55a Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 2 Sep 2021 04:39:47 +0530 Subject: [PATCH 044/641] Release 2021.09.02 --- CONTRIBUTORS | 24 +++++++++- Changelog.md | 109 ++++++++++++++++++++++++++++++++++++++++++++++ README.md | 40 +++++++---------- supportedsites.md | 30 ++++++++++++- yt_dlp/options.py | 4 +- 5 files changed, 177 insertions(+), 30 deletions(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index fe28dfc784..5a976fad76 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -22,7 +22,7 @@ Zocker1999NET nao20010128nao kurumigi bbepis -animelover1984 +animelover1984/horahoradev Pccode66 RobinD42 hseg @@ -78,3 +78,25 @@ pgaig PSlava stdedos u-spec-png +Sipherdrakon +kidonng +smege1001 +tandy1000 +IONECarter +capntrips +mrfade +ParadoxGBB +wlritchi +NeroBurner +mahanstreamer +alerikaisattera +Derkades +BunnyHelp +i6t +std-move +Chocobozzz +ouwou +korli +octotherp +CeruleanSky +zootedb0t diff --git a/Changelog.md b/Changelog.md index e818aaddcb..9ccc505b71 100644 --- a/Changelog.md +++ b/Changelog.md @@ -19,6 +19,115 @@ --> +### 2021.09.02 + +* **Native SponsorBlock** implementation by [nihil-admirari](https://github.com/nihil-admirari), [pukkandan](https://github.com/pukkandan) + * `--sponsorblock-remove CATS` removes specified chapters from file + * `--sponsorblock-mark CATS` marks the specified sponsor sections as chapters + * `--sponsorblock-chapter-title TMPL` to specify sponsor chapter template + * `--sponsorblock-api URL` to use a different API + * No re-encoding is done unless `--force-keyframes-at-cuts` is used + * The fetched sponsor sections are written to the infojson + * Deprecates: `--sponskrub`, `--no-sponskrub`, `--sponskrub-cut`, `--no-sponskrub-cut`, `--sponskrub-force`, `--no-sponskrub-force`, `--sponskrub-location`, `--sponskrub-args` +* Split `--embed-chapters` from `--embed-metadata` (it still implies the former by default) +* Add option `--remove-chapters` to remove arbitrary chapters by [nihil-admirari](https://github.com/nihil-admirari), pukkandan +* Add option `--force-keyframes-at-cuts` for more accurate cuts when removing and splitting chapters by [nihil-admirari](https://github.com/nihil-admirari) +* Let `--match-filter` reject entries early + * Makes redundant: `--match-title`, `--reject-title`, `--min-views`, `--max-views` +* [lazy_extractor] Improvements (It now passes all tests) + * Bugfix for when plugin directory doesn't exist by [kidonng](https://github.com/kidonng) + * Create instance only after pre-checking archive + * Import actual class if an attribute is accessed + * Fix `suitable` and add flake8 test +* [downloader/ffmpeg] Experimental support for DASH manifests (including live) + * Your ffmpeg must have [this patch](https://github.com/FFmpeg/FFmpeg/commit/3249c757aed678780e22e99a1a49f4672851bca9) applied for YouTube DASH to work +* [downloader/ffmpeg] Allow passing custom arguments before `-i` + +* [BannedVideo] Add extractor by [smege1001](https://github.com/smege1001), [blackjack4494](https://github.com/blackjack4494), [pukkandan](https://github.com/pukkandan) +* [bilibili] Add category extractor by [animelover1984](https://github.com/animelover1984) +* [Epicon] Add extractors by [Ashish0804](https://github.com/Ashish0804) +* [filmmodu] Add extractor by [mzbaulhaque](https://github.com/mzbaulhaque) +* [GabTV] Add extractor by [Ashish0804](https://github.com/Ashish0804) +* [Hungama] Fix `HungamaSongIE` and add `HungamaAlbumPlaylistIE` by [Ashish0804](https://github.com/Ashish0804) +* [ManotoTV] Add new extractors by [tandy1000](https://github.com/tandy1000) +* [Niconico] Add Search extractors by [animelover1984](https://github.com/animelover1984), [pukkandan](https://github.com/pukkandan) +* [Patreon] Add `PatreonUserIE` by [zenerdi0de](https://github.com/zenerdi0de) +* [peloton] Add extractor by [IONECarter](https://github.com/IONECarter), [capntrips](https://github.com/capntrips), [pukkandan](https://github.com/pukkandan) +* [ProjectVeritas] Add extractor by [Ashish0804](https://github.com/Ashish0804) +* [radiko] Add extractors by [nao20010128nao](https://github.com/nao20010128nao) +* [StarTV] Add extractor for `startv.com.tr` by [mrfade](https://github.com/mrfade), [coletdjnz](https://github.com/coletdjnz) +* [tiktok] Add `TikTokUserIE` by [Ashish0804](https://github.com/Ashish0804), [pukkandan](https://github.com/pukkandan) +* [Tokentube] Add extractor by [u-spec-png](https://github.com/u-spec-png) +* [TV2Hu] Fix `TV2HuIE` and add `TV2HuSeriesIE` by [Ashish0804](https://github.com/Ashish0804) +* [voicy] Add extractor by [nao20010128nao](https://github.com/nao20010128nao) + +* [adobepass] Fix Verizon SAML login by [nyuszika7h](https://github.com/nyuszika7h), [ParadoxGBB](https://github.com/ParadoxGBB) +* [afreecatv] Fix adult VODs by [wlritchi](https://github.com/wlritchi) +* [afreecatv] Tolerate failure to parse date string by [wlritchi](https://github.com/wlritchi) +* [aljazeera] Fix extractor by [MinePlayersPE](https://github.com/MinePlayersPE) +* [ATV.at] Fix extractor for ATV.at by [NeroBurner](https://github.com/NeroBurner), [coletdjnz](https://github.com/coletdjnz) +* [bitchute] Fix test by [mahanstreamer](https://github.com/mahanstreamer) +* [camtube] Remove obsolete extractor by [alerikaisattera](https://github.com/alerikaisattera) +* [CDA] Add more formats by [u-spec-png](https://github.com/u-spec-png) +* [eroprofile] Fix page skipping in albums by [jhwgh1968](https://github.com/jhwgh1968) +* [facebook] Fix format sorting +* [facebook] Fix metadata extraction by [kikuyan](https://github.com/kikuyan) +* [facebook] Update onion URL by [Derkades](https://github.com/Derkades) +* [HearThisAtIE] Fix extractor by [Ashish0804](https://github.com/Ashish0804) +* [instagram] Add referrer to prevent throttling by [u-spec-png](https://github.com/u-spec-png), [kikuyan](https://github.com/kikuyan) +* [iwara.tv] Extract more metadata by [BunnyHelp](https://github.com/BunnyHelp) +* [iwara] Add thumbnail by [i6t](https://github.com/i6t) +* [kakao] Fix extractor +* [mediaset] Fix extraction for some videos by [nyuszika7h](https://github.com/nyuszika7h) +* [Motherless] Fix extractor by [coletdjnz](https://github.com/coletdjnz) +* [Nova] fix extractor by [std-move](https://github.com/std-move) +* [ParamountPlus] Fix geo verification by [shirt](https://github.com/shirt-dev) +* [peertube] handle new video URL format by [Chocobozzz](https://github.com/Chocobozzz) +* [pornhub] Separate and fix playlist extractor by [mzbaulhaque](https://github.com/mzbaulhaque) +* [reddit] Fix for quarantined subreddits by [ouwou](https://github.com/ouwou) +* [ShemarooMe] Fix extractor by [Ashish0804](https://github.com/Ashish0804) +* [soundcloud] Refetch `client_id` on 403 +* [tiktok] Fix metadata extraction +* [TV2] Fix extractor by [Ashish0804](https://github.com/Ashish0804) +* [tv5mondeplus] Fix extractor by [korli](https://github.com/korli) +* [VH1,TVLand] Fix extractors by [Sipherdrakon](https://github.com/Sipherdrakon) +* [Viafree] Fix extractor and extract subtitles by [coletdjnz](https://github.com/coletdjnz) +* [XHamster] Extract `uploader_id` by [octotherp](https://github.com/octotherp) +* [youtube] Add `shorts` to `_VALID_URL` +* [youtube] Add av01 itags to known formats list by [blackjack4494](https://github.com/blackjack4494) +* [youtube] Extract error messages from HTTPError response by [coletdjnz](https://github.com/coletdjnz) +* [youtube] Fix subtitle names +* [youtube] Prefer audio stream that YouTube considers default +* [youtube] Remove annotations and deprecate `--write-annotations` by [coletdjnz](https://github.com/coletdjnz) +* [Zee5] Fix extractor and add subtitles by [Ashish0804](https://github.com/Ashish0804) + +* [aria2c] Obey `--rate-limit` +* [EmbedSubtitle] Continue even if some files are missing +* [extractor] Better error message for DRM +* [extractor] Common function `_match_valid_url` +* [extractor] Show video id in error messages if possible +* [FormatSort] Remove priority of `lang` +* [options] Add `_set_from_options_callback` +* [SubtitleConvertor] Fix bug during subtitle conversion +* [utils] Add `parse_qs` +* [webvtt] Fix timestamp overflow adjustment by [fstirlitz](https://github.com/fstirlitz) +* Bugfix for `--replace-in-metadata` +* Don't try to merge with final extension +* Fix `--force-overwrites` when using `-k` +* Fix `--no-prefer-free-formats` by [CeruleanSky](https://github.com/CeruleanSky) +* Fix `-F` for extractors that directly return url +* Fix `-J` when there are failed videos +* Fix `extra_info` being reused across runs +* Fix `playlist_index` not obeying `playlist_start` and add tests +* Fix resuming of single formats when using `--no-part` +* Revert erroneous use of the `Content-Length` header by [fstirlitz](https://github.com/fstirlitz) +* Use `os.replace` where applicable by; paulwrubel +* [build] Add homebrew taps `yt-dlp/taps/yt-dlp` by [nao20010128nao](https://github.com/nao20010128nao) +* [build] Fix bug in making `yt-dlp.tar.gz` +* [docs] Fix some typos by [pukkandan](https://github.com/pukkandan), [zootedb0t](https://github.com/zootedb0t) +* [cleanup] Replace improper use of tab in trovo by [glenn-slayden](https://github.com/glenn-slayden) + + ### 2021.08.10 * Add option `--replace-in-metadata` diff --git a/README.md b/README.md index 45b5541cc9..84974249d4 100644 --- a/README.md +++ b/README.md @@ -78,7 +78,7 @@ The major new features from the latest release of [blackjack4494/yt-dlc](https:/ * Partial workaround for throttling issue * Redirect channel's home URL automatically to `/video` to preserve the old behaviour * `255kbps` audio is extracted from youtube music if premium cookies are given - * Youtube music Albums, channels etc can be downloaded + * Youtube music Albums, channels etc can be downloaded ([except self-uploaded music](https://github.com/yt-dlp/yt-dlp/issues/723)) * **Cookies from browser**: Cookies can be automatically extracted from all major web browsers using `--cookies-from-browser BROWSER[:PROFILE]` @@ -88,9 +88,9 @@ The major new features from the latest release of [blackjack4494/yt-dlc](https:/ * **Aria2c with HLS/DASH**: You can use `aria2c` as the external downloader for DASH(mpd) and HLS(m3u8) formats -* **New extractors**: AnimeLab, Philo MSO, Spectrum MSO, SlingTV MSO, Cablevision MSO, Rcs, Gedi, bitwave.tv, mildom, audius, zee5, mtv.it, wimtv, pluto.tv, niconico users, discoveryplus.in, mediathek, NFHSNetwork, nebula, ukcolumn, whowatch, MxplayerShow, parlview (au), YoutubeWebArchive, fancode, Saitosan, ShemarooMe, telemundo, VootSeries, SonyLIVSeries, HotstarSeries, VidioPremier, VidioLive, RCTIPlus, TBS Live, douyin, pornflip, ParamountPlusSeries, ScienceChannel, Utreon, OpenRec, BandcampMusic, blackboardcollaborate, eroprofile albums, mirrativ +* **New extractors**: AnimeLab, Philo MSO, Spectrum MSO, SlingTV MSO, Cablevision MSO, Rcs, Gedi, bitwave.tv, mildom, audius, zee5, mtv.it, wimtv, pluto.tv, niconico users, discoveryplus.in, mediathek, NFHSNetwork, nebula, ukcolumn, whowatch, MxplayerShow, parlview (au), YoutubeWebArchive, fancode, Saitosan, ShemarooMe, telemundo, VootSeries, SonyLIVSeries, HotstarSeries, VidioPremier, VidioLive, RCTIPlus, TBS Live, douyin, pornflip, ParamountPlusSeries, ScienceChannel, Utreon, OpenRec, BandcampMusic, blackboardcollaborate, eroprofile albums, mirrativ, BannedVideo, bilibili categories, Epicon, filmmodu, GabTV, HungamaAlbum, ManotoTV, Niconico search, Patreon User, peloton, ProjectVeritas, radiko, StarTV, tiktok user, Tokentube, voicy, TV2HuSeries -* **Fixed/improved extractors**: archive.org, roosterteeth.com, skyit, instagram, itv, SouthparkDe, spreaker, Vlive, akamai, ina, rumble, tennistv, amcnetworks, la7 podcasts, linuxacadamy, nitter, twitcasting, viu, crackle, curiositystream, mediasite, rmcdecouverte, sonyliv, tubi, tenplay, patreon, videa, yahoo, BravoTV, crunchyroll playlist, RTP, viki, Hotstar, vidio, vimeo, mediaset, Mxplayer, nbcolympics, ParamountPlus, Newgrounds +* **Fixed/improved extractors**: archive.org, roosterteeth.com, skyit, instagram, itv, SouthparkDe, spreaker, Vlive, akamai, ina, rumble, tennistv, amcnetworks, la7 podcasts, linuxacadamy, nitter, twitcasting, viu, crackle, curiositystream, mediasite, rmcdecouverte, sonyliv, tubi, tenplay, patreon, videa, yahoo, BravoTV, crunchyroll playlist, RTP, viki, Hotstar, vidio, vimeo, mediaset, Mxplayer, nbcolympics, ParamountPlus, Newgrounds, SAML Verizon login, Hungama, afreecatv, aljazeera, ATV, bitchute, camtube, CDA, eroprofile, facebook, HearThisAtIE, iwara, kakao, Motherless, Nova, peertube, pornhub, reddit, tiktok, TV2, TV2Hu, tv5mondeplus, VH1, Viafree, XHamster * **Subtitle extraction from manifests**: Subtitles can be extracted from streaming media manifests. See [commit/be6202f](https://github.com/yt-dlp/yt-dlp/commit/be6202f12b97858b9d716e608394b51065d0419f) for details @@ -256,9 +256,9 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t extractor --default-search PREFIX Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos - from google videos for youtube-dl "large - apple". Use the value "auto" to let - youtube-dl guess ("auto_warning" to emit a + from google videos for the search term + "large apple". Use the value "auto" to let + yt-dlp guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if @@ -325,10 +325,6 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13 - --match-title REGEX Download only matching titles (regex or - caseless sub-string) - --reject-title REGEX Skip download for matching titles (regex or - caseless sub-string) --max-downloads NUMBER Abort after downloading NUMBER files --min-filesize SIZE Do not download any videos smaller than SIZE (e.g. 50k or 44.6m) @@ -343,10 +339,6 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t --dateafter DATE Download only videos uploaded on or after this date. The date formats accepted is the same as --date - --min-views COUNT Do not download any videos with less than - COUNT views - --max-views COUNT Do not download any videos with more than - COUNT views --match-filter FILTER Generic video filter. Any field (see "OUTPUT TEMPLATE") can be compared with a number or a string using the operators @@ -511,9 +503,6 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t --write-info-json Write video metadata to a .info.json file (this may contain personal information) --no-write-info-json Do not write video metadata (default) - --write-annotations Write video annotations to a - .annotations.xml file - --no-write-annotations Do not write video annotations (default) --write-playlist-metafiles Write playlist metadata in addition to the video metadata when using --write-info-json, --write-description etc. (default) @@ -552,8 +541,8 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t --cache-dir DIR Location in the filesystem where youtube-dl can store some downloaded information (such as client ids and signatures) permanently. - By default $XDG_CACHE_HOME/youtube-dl or - ~/.cache/youtube-dl + By default $XDG_CACHE_HOME/yt-dlp or + ~/.cache/yt-dlp --no-cache-dir Disable filesystem caching --rm-cache-dir Delete all filesystem cache files @@ -751,7 +740,7 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t FixupStretched, FixupM4a, FixupM3u8, FixupTimestamp and FixupDuration. The supported executables are: AtomicParsley, - FFmpeg and FFprobe.You can also specify + FFmpeg and FFprobe. You can also specify "PP+EXE:ARGS" to give the arguments to the specified executable only when being used by the specified postprocessor. @@ -833,8 +822,8 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t --remove-chapters REGEX Remove chapters whose title matches the given regular expression. This option can be used multiple times - --no-remove-chapters Do not remove any normal chapters from the - file (default) + --no-remove-chapters Do not remove any chapters from the file + (default) --force-keyframes-at-cuts Force keyframes around the chapters before removing/splitting them. Requires a reencode and thus is very slow, but the @@ -1247,9 +1236,11 @@ The available fields are: - `br`: Equivalent to using `tbr,vbr,abr` - `asr`: Audio sample rate in Hz -Note that any other **numerical** field made available by the extractor can also be used. All fields, unless specified otherwise, are sorted in descending order. To reverse this, prefix the field with a `+`. Eg: `+res` prefers format with the smallest resolution. Additionally, you can suffix a preferred value for the fields, separated by a `:`. Eg: `res:720` prefers larger videos, but no larger than 720p and the smallest video if there are no videos less than 720p. For `codec` and `ext`, you can provide two preferred values, the first for video and the second for audio. Eg: `+codec:avc:m4a` (equivalent to `+vcodec:avc,+acodec:m4a`) sets the video codec preference to `h264` > `h265` > `vp9` > `vp9.2` > `av01` > `vp8` > `h263` > `theora` and audio codec preference to `mp4a` > `aac` > `vorbis` > `opus` > `mp3` > `ac3` > `dts`. You can also make the sorting prefer the nearest values to the provided by using `~` as the delimiter. Eg: `filesize~1G` prefers the format with filesize closest to 1 GiB. +All fields, unless specified otherwise, are sorted in descending order. To reverse this, prefix the field with a `+`. Eg: `+res` prefers format with the smallest resolution. Additionally, you can suffix a preferred value for the fields, separated by a `:`. Eg: `res:720` prefers larger videos, but no larger than 720p and the smallest video if there are no videos less than 720p. For `codec` and `ext`, you can provide two preferred values, the first for video and the second for audio. Eg: `+codec:avc:m4a` (equivalent to `+vcodec:avc,+acodec:m4a`) sets the video codec preference to `h264` > `h265` > `vp9` > `vp9.2` > `av01` > `vp8` > `h263` > `theora` and audio codec preference to `mp4a` > `aac` > `vorbis` > `opus` > `mp3` > `ac3` > `dts`. You can also make the sorting prefer the nearest values to the provided by using `~` as the delimiter. Eg: `filesize~1G` prefers the format with filesize closest to 1 GiB. -The fields `hasvid` and `ie_pref` are always given highest priority in sorting, irrespective of the user-defined order. This behaviour can be changed by using `--force-format-sort`. Apart from these, the default order used is: `lang,quality,res,fps,codec:vp9.2,size,br,asr,proto,ext,hasaud,source,id`. Note that the extractors may override this default order, but they cannot override the user-provided order. +The fields `hasvid` and `ie_pref` are always given highest priority in sorting, irrespective of the user-defined order. This behaviour can be changed by using `--force-format-sort`. Apart from these, the default order used is: `lang,quality,res,fps,codec:vp9.2,size,br,asr,proto,ext,hasaud,source,id`. The extractors may override this default order, but they cannot override the user-provided order. + +Note that the default has `codec:vp9.2`; i.e. `av1` is not prefered If your format selector is `worst`, the last item is selected after sorting. This means it will select the format that is worst in all respects. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-f best -S +size,+br,+res,+fps`. @@ -1502,7 +1493,6 @@ While these options still work, their use is not recommended since there are oth --hls-prefer-ffmpeg --downloader "m3u8:ffmpeg" --list-formats-old --compat-options list-formats (Alias: --no-list-formats-as-table) --list-formats-as-table --compat-options -list-formats [Default] (Alias: --no-list-formats-old) - --sponskrub-args ARGS --ppa "sponskrub:ARGS" --youtube-skip-dash-manifest --extractor-args "youtube:skip=dash" (Alias: --no-youtube-include-dash-manifest) --youtube-skip-hls-manifest --extractor-args "youtube:skip=hls" (Alias: --no-youtube-include-hls-manifest) --youtube-include-dash-manifest Default (Alias: --no-youtube-skip-dash-manifest) diff --git a/supportedsites.md b/supportedsites.md index 7e19b324c4..3c805ba76c 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -97,6 +97,7 @@ - **Bandcamp:weekly** - **BandcampMusic** - **bangumi.bilibili.com**: BiliBili番剧 + - **BannedVideo** - **bbc**: BBC - **bbc.co.uk**: BBC iPlayer - **bbc.co.uk:article**: BBC articles @@ -118,6 +119,7 @@ - **Bigflix** - **Bild**: Bild.de - **BiliBili** + - **Bilibili category extractor** - **BilibiliAudio** - **BilibiliAudioAlbum** - **BilibiliChannel** @@ -153,7 +155,6 @@ - **Camdemy** - **CamdemyFolder** - **CamModels** - - **CamTube** - **CamWithHer** - **canalc2.tv** - **Canalplus**: mycanal.fr and piwiplus.fr @@ -295,6 +296,8 @@ - **Embedly** - **EMPFlix** - **Engadget** + - **Epicon** + - **EpiconSeries** - **Eporner** - **EroProfile** - **EroProfile:album** @@ -316,6 +319,7 @@ - **fc2** - **fc2:embed** - **Fczenit** + - **Filmmodu** - **filmon** - **filmon:channel** - **Filmweb** @@ -353,6 +357,7 @@ - **Funk** - **Fusion** - **Fux** + - **GabTV** - **Gaia** - **GameInformer** - **GameSpot** @@ -408,6 +413,7 @@ - **Huajiao**: 花椒直播 - **HuffPost**: Huffington Post - **Hungama** + - **HungamaAlbumPlaylist** - **HungamaSong** - **Hypem** - **ign.com** @@ -520,6 +526,9 @@ - **MallTV** - **mangomolo:live** - **mangomolo:video** + - **ManotoTV**: Manoto TV (Episode) + - **ManotoTVLive**: Manoto TV (Live) + - **ManotoTVShow**: Manoto TV (Show) - **ManyVids** - **MaoriTV** - **Markiza** @@ -658,6 +667,9 @@ - **niconico**: ニコニコ動画 - **NiconicoPlaylist** - **NiconicoUser** + - **nicovideo:search**: Nico video searches + - **nicovideo:search:date**: Nico video searches, newest first + - **nicovideo:search_url**: Nico video search URLs - **Nintendo** - **Nitter** - **njoy**: N-JOY @@ -740,9 +752,12 @@ - **parliamentlive.tv**: UK parliament videos - **Parlview** - **Patreon** + - **PatreonUser** - **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC) - **PearVideo** - **PeerTube** + - **peloton** + - **peloton:live**: Peloton Live - **People** - **PerformGroup** - **periscope**: Periscope @@ -783,6 +798,7 @@ - **PornHd** - **PornHub**: PornHub and Thumbzilla - **PornHubPagedVideoList** + - **PornHubPlaylist** - **PornHubUser** - **PornHubUserVideosUpload** - **Pornotube** @@ -790,6 +806,7 @@ - **PornoXO** - **PornTube** - **PressTV** + - **ProjectVeritas** - **prosiebensat1**: ProSiebenSat.1 Digital - **puhutv** - **puhutv:serie** @@ -806,6 +823,8 @@ - **QuicklineLive** - **R7** - **R7Article** + - **Radiko** + - **RadikoRadio** - **radio.de** - **radiobremen** - **radiocanada** @@ -956,6 +975,7 @@ - **SRGSSR** - **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites - **stanfordoc**: Stanford Open ClassRoom + - **startv** - **Steam** - **Stitcher** - **StitcherShow** @@ -1023,11 +1043,14 @@ - **ThisAV** - **ThisOldHouse** - **TikTok** + - **tiktok:user** - **tinypic**: tinypic.com videos - **TMZ** - **TNAFlix** - **TNAFlixNetworkEmbed** - **toggle** + - **Tokentube** + - **Tokentube:channel** - **ToonGoggles** - **tou.tv** - **Toypics**: Toypics video @@ -1050,10 +1073,11 @@ - **Turbo** - **tv.dfb.de** - **TV2** - - **tv2.hu** - **TV2Article** - **TV2DK** - **TV2DKBornholmPlay** + - **tv2play.hu** + - **tv2playseries.hu** - **TV4**: tv4.se and tv4play.se - **TV5MondePlus**: TV5MONDE+ - **tv5unis** @@ -1187,6 +1211,8 @@ - **VODPl** - **VODPlatform** - **VoiceRepublic** + - **voicy** + - **voicy:channel** - **Voot** - **VootSeries** - **VoxMedia** diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 483cce8d86..2086e12655 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -232,7 +232,7 @@ def parseOpts(overrideArguments=None): general.add_option( '--default-search', dest='default_search', metavar='PREFIX', - help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching') + help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for the search term "large apple". Use the value "auto" to let yt-dlp guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching') general.add_option( '--ignore-config', '--no-config', action='store_true', @@ -1135,7 +1135,7 @@ def parseOpts(overrideArguments=None): help='Do not load cookies from browser (default)') filesystem.add_option( '--cache-dir', dest='cachedir', default=None, metavar='DIR', - help='Location in the filesystem where youtube-dl can store some downloaded information (such as client ids and signatures) permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl') + help='Location in the filesystem where youtube-dl can store some downloaded information (such as client ids and signatures) permanently. By default $XDG_CACHE_HOME/yt-dlp or ~/.cache/yt-dlp') filesystem.add_option( '--no-cache-dir', action='store_false', dest='cachedir', help='Disable filesystem caching') From 8026e5015274f3899f5db2d43eaaecdfc7d94ffd Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 2 Sep 2021 05:33:38 +0530 Subject: [PATCH 045/641] [version] update :ci skip all --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- yt_dlp/version.py | 2 +- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index f6c01ce7af..a1b459cc72 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -21,7 +21,7 @@ assignees: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running yt-dlp version **2021.08.10** +- [ ] I've verified that I'm running yt-dlp version **2021.09.02** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -44,7 +44,7 @@ Add the `-v` flag to your command line you run yt-dlp with (`yt-dlp -v - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running yt-dlp version **2021.08.10** +- [ ] I've verified that I'm running yt-dlp version **2021.09.02** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] The provided URLs do not contain any DRM to the best of my knowledge diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index c4f2617961..03fea013f0 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -21,13 +21,13 @@ assignees: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running yt-dlp version **2021.08.10** +- [ ] I've verified that I'm running yt-dlp version **2021.09.02** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index cf2763b2ec..c76452be21 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -21,7 +21,7 @@ assignees: '' - [ ] I'm reporting a bug unrelated to a specific site -- [ ] I've verified that I'm running yt-dlp version **2021.08.10** +- [ ] I've verified that I'm running yt-dlp version **2021.09.02** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] The provided URLs do not contain any DRM to the best of my knowledge - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped @@ -47,7 +47,7 @@ Add the `-v` flag to your command line you run yt-dlp with (`yt-dlp -v - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running yt-dlp version **2021.08.10** +- [ ] I've verified that I'm running yt-dlp version **2021.09.02** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/yt_dlp/version.py b/yt_dlp/version.py index b42fc98bc6..f03898ae3e 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2021.08.10' +__version__ = '2021.09.02' From 8113999995063c8f5c98d6b8c9aa1d5f9ccc0da2 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 3 Sep 2021 06:34:55 +0530 Subject: [PATCH 046/641] Fix `--compat-option playlist-index` --- yt_dlp/YoutubeDL.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 7da25a7ba2..9135123878 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1531,8 +1531,8 @@ class YoutubeDL(object): max_failures = self.params.get('skip_playlist_after_errors') or float('inf') for i, entry_tuple in enumerate(entries, 1): playlist_index, entry = entry_tuple - if 'playlist-index' in self.params.get('compat_options', []): - playlist_index = playlistitems[i - 1] if playlistitems else i + if 'playlist-index' in self.params.get('compat_opts', []): + playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries)) # This __x_forwarded_for_ip thing is a bit ugly but requires # minimal changes From 409e18286e5dcd0cba60726f8143847dfed743cf Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 3 Sep 2021 22:48:42 +0530 Subject: [PATCH 047/641] Fix `extra_info` being reused across runs 58adec46773ee95be356daf88da7ac8a0ff1e703 was supposed to solve this, but ended up being an incomplete fix Closes #727 --- yt_dlp/YoutubeDL.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 9135123878..cf8304c39c 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1166,7 +1166,7 @@ class YoutubeDL(object): for key, value in extra_info.items(): info_dict.setdefault(key, value) - def extract_info(self, url, download=True, ie_key=None, extra_info={}, + def extract_info(self, url, download=True, ie_key=None, extra_info=None, process=True, force_generic_extractor=False): """ Return a list with a dictionary for each video extracted. @@ -1183,6 +1183,9 @@ class YoutubeDL(object): force_generic_extractor -- force using the generic extractor """ + if extra_info is None: + extra_info = {} + if not ie_key and force_generic_extractor: ie_key = 'Generic' From dd594deb2a0449dd8b145ef0552235f66ee3d454 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 4 Sep 2021 01:18:56 +0530 Subject: [PATCH 048/641] Fix `--no-get-comments` Closes #882 --- yt_dlp/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 2086e12655..505160cec3 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1105,7 +1105,7 @@ def parseOpts(overrideArguments=None): 'The comments are fetched even without this option if the extraction is known to be quick (Alias: --get-comments)')) filesystem.add_option( '--no-write-comments', '--no-get-comments', - action='store_true', dest='getcomments', default=False, + action='store_false', dest='getcomments', help='Do not retrieve video comments unless the extraction is known to be quick (Alias: --no-get-comments)') filesystem.add_option( '--load-info-json', '--load-info', From 165efb823b3a8a6a6788cfe23e6b93dfbe150568 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 4 Sep 2021 01:37:41 +0530 Subject: [PATCH 049/641] [ModifyChapters] fixes (See desc) * [docs] Fix typo * Do not enable `sponskrub` by default * Fix `--force-keyframes-at-cuts` * Don't embed subtitles if the video has been cut. Previously, running `--remove-chapters` with `--embed-subs` multiple times caused repeated cuts and out-of-sync subtitles * Store `_real_duration` to prevent running ffprobe multiple times --- yt_dlp/options.py | 4 ++-- yt_dlp/postprocessor/ffmpeg.py | 22 ++++++++++++++++++++++ yt_dlp/postprocessor/modify_chapters.py | 18 +++++------------- 3 files changed, 29 insertions(+), 15 deletions(-) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 505160cec3..c2d7a74ff7 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1394,7 +1394,7 @@ def parseOpts(overrideArguments=None): 'SponsorBlock categories to create chapters for, separated by commas. ' 'Available categories are all, %s. You can prefix the category with a "-" to exempt it. ' 'See https://wiki.sponsor.ajay.app/index.php/Segment_Categories for description of the categories. ' - 'Eg: --sponsorblock-query all,-preview' % ', '.join(SponsorBlockPP.CATEGORIES.keys()))) + 'Eg: --sponsorblock-mark all,-preview' % ', '.join(SponsorBlockPP.CATEGORIES.keys()))) sponsorblock.add_option( '--sponsorblock-remove', metavar='CATS', dest='sponsorblock_remove', default=set(), action='callback', type='str', @@ -1421,7 +1421,7 @@ def parseOpts(overrideArguments=None): sponsorblock.add_option( '--sponskrub', - action='store_true', dest='sponskrub', default=None, + action='store_true', dest='sponskrub', default=False, help=optparse.SUPPRESS_HELP) sponsorblock.add_option( '--no-sponskrub', diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 8063346450..25488e58bc 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -15,6 +15,7 @@ from ..utils import ( dfxp2srt, encodeArgument, encodeFilename, + float_or_none, get_exe_version, is_outdated_version, ISO639Utils, @@ -233,6 +234,23 @@ class FFmpegPostProcessor(PostProcessor): None) return num, len(streams) + def _get_real_video_duration(self, info, fatal=True): + try: + if '_real_duration' not in info: + info['_real_duration'] = float_or_none( + traverse_obj(self.get_metadata_object(info['filepath']), ('format', 'duration'))) + if not info['_real_duration']: + raise PostProcessingError('ffprobe returned empty duration') + except PostProcessingError as e: + if fatal: + raise PostProcessingError(f'Unable to determine video duration; {e}') + return info.setdefault('_real_duration', None) + + def _duration_mismatch(self, d1, d2): + if not d1 or not d2: + return None + return abs(d1 - d2) > 1 + def run_ffmpeg_multiple_files(self, input_paths, out_path, opts, **kwargs): return self.real_run_ffmpeg( [(path, []) for path in input_paths], @@ -528,6 +546,10 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): return [], information filename = information['filepath'] + if self._duration_mismatch( + self._get_real_video_duration(information, False), information['duration']): + self.to_screen(f'Skipping {self.pp_key()} since the real and expected durations mismatch') + return [], information ext = information['ext'] sub_langs, sub_names, sub_filenames = [], [], [] diff --git a/yt_dlp/postprocessor/modify_chapters.py b/yt_dlp/postprocessor/modify_chapters.py index 3d6493b683..9a7ba8effe 100644 --- a/yt_dlp/postprocessor/modify_chapters.py +++ b/yt_dlp/postprocessor/modify_chapters.py @@ -9,11 +9,9 @@ from .ffmpeg import ( ) from .sponsorblock import SponsorBlockPP from ..utils import ( - float_or_none, orderedSet, PostProcessingError, prepend_extension, - traverse_obj, ) @@ -37,7 +35,7 @@ class ModifyChaptersPP(FFmpegPostProcessor): if not chapters and not sponsor_chapters: return [], info - real_duration = self._get_real_video_duration(info['filepath']) + real_duration = self._get_real_video_duration(info) if not chapters: chapters = [{'start_time': 0, 'end_time': real_duration, 'title': info['title']}] @@ -45,8 +43,8 @@ class ModifyChaptersPP(FFmpegPostProcessor): if not cuts: return [], info - if abs(real_duration - info['duration']) > 1: - if abs(real_duration - info['chapters'][-1]['end_time']) < 1: + if self._duration_mismatch(real_duration, info.get('duration')): + if not self._duration_mismatch(real_duration, info['chapters'][-1]['end_time']): self.to_screen(f'Skipping {self.pp_key()} since the video appears to be already cut') return [], info if not info.get('__real_download'): @@ -72,6 +70,7 @@ class ModifyChaptersPP(FFmpegPostProcessor): os.replace(out_file, in_file) files_to_remove.append(uncut_file) + info['_real_duration'] = info['chapters'][-1]['end_time'] return files_to_remove, info def _mark_chapters_to_remove(self, chapters, sponsor_chapters): @@ -101,13 +100,6 @@ class ModifyChaptersPP(FFmpegPostProcessor): return chapters, sponsor_chapters - def _get_real_video_duration(self, filename): - duration = float_or_none( - traverse_obj(self.get_metadata_object(filename), ('format', 'duration'))) - if duration is None: - raise PostProcessingError('ffprobe returned empty duration') - return duration - def _get_supported_subs(self, info): for sub in (info.get('requested_subtitles') or {}).values(): sub_file = sub.get('filepath') @@ -311,7 +303,7 @@ class ModifyChaptersPP(FFmpegPostProcessor): in_file = filename out_file = prepend_extension(in_file, 'temp') if force_keyframes: - in_file = self.force_keyframes(in_file, (t for r in ranges_to_cut for t in r)) + in_file = self.force_keyframes(in_file, (t for c in ranges_to_cut for t in (c['start_time'], c['end_time']))) self.to_screen(f'Removing chapters from {filename}') self.concat_files([in_file] * len(concat_opts), out_file, concat_opts) if in_file != filename: From 8e5fecc88c53611de538a50c1e51eb048b1544e6 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 4 Sep 2021 03:07:27 +0530 Subject: [PATCH 050/641] Handle more playlist errors with `-i` --- yt_dlp/YoutubeDL.py | 25 +++++++++++++++---------- yt_dlp/utils.py | 19 ++++++++++++++----- 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index cf8304c39c..9768bb8caa 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1211,7 +1211,8 @@ class YoutubeDL(object): else: self.report_error('no suitable InfoExtractor for URL %s' % url) - def __handle_extraction_exceptions(func, handle_all_errors=True): + def __handle_extraction_exceptions(func): + def wrapper(self, *args, **kwargs): try: return func(self, *args, **kwargs) @@ -1228,10 +1229,10 @@ class YoutubeDL(object): self.to_stderr('\r') self.report_warning('The download speed is below throttle limit. Re-extracting data') return wrapper(self, *args, **kwargs) - except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached): + except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached, LazyList.IndexError): raise except Exception as e: - if handle_all_errors and self.params.get('ignoreerrors', False): + if self.params.get('ignoreerrors', False): self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc())) else: raise @@ -1436,14 +1437,18 @@ class YoutubeDL(object): msg = ( 'Downloading %d videos' if not isinstance(ie_entries, list) else 'Collected %d videos; downloading %%d of them' % len(ie_entries)) - if not isinstance(ie_entries, (list, PagedList)): - ie_entries = LazyList(ie_entries) - def get_entry(i): - return YoutubeDL.__handle_extraction_exceptions( - lambda self, i: ie_entries[i - 1], - False - )(self, i) + if isinstance(ie_entries, list): + def get_entry(i): + return ie_entries[i - 1] + else: + if not isinstance(ie_entries, PagedList): + ie_entries = LazyList(ie_entries) + + def get_entry(i): + return YoutubeDL.__handle_extraction_exceptions( + lambda self, i: ie_entries[i - 1] + )(self, i) entries = [] for i in playlistitems or itertools.count(playliststart): diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index fa9c509b2d..65d585d053 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -3972,6 +3972,9 @@ class LazyList(collections.abc.Sequence): ''' Lazy immutable list from an iterable Note that slices of a LazyList are lists and not LazyList''' + class IndexError(IndexError): + pass + def __init__(self, iterable): self.__iterable = iter(iterable) self.__cache = [] @@ -4015,22 +4018,28 @@ class LazyList(collections.abc.Sequence): or (stop is None and step > 0)): # We need to consume the entire iterable to be able to slice from the end # Obviously, never use this with infinite iterables - return self.__exhaust()[idx] - + self.__exhaust() + try: + return self.__cache[idx] + except IndexError as e: + raise self.IndexError(e) from e n = max(start or 0, stop or 0) - len(self.__cache) + 1 if n > 0: self.__cache.extend(itertools.islice(self.__iterable, n)) - return self.__cache[idx] + try: + return self.__cache[idx] + except IndexError as e: + raise self.IndexError(e) from e def __bool__(self): try: self[-1] if self.__reversed else self[0] - except IndexError: + except self.IndexError: return False return True def __len__(self): - self.exhaust() + self.__exhaust() return len(self.__cache) def reverse(self): From 4614bc22c1003a0b63ec6ed9c1a5d12a3e0cf05a Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 3 Sep 2021 06:14:26 +0530 Subject: [PATCH 051/641] Allow `--force-write-archive` to work with `--flat-playlist` Related: #876 --- yt_dlp/YoutubeDL.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 9768bb8caa..ada870c487 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1291,10 +1291,14 @@ class YoutubeDL(object): if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or extract_flat is True): info_copy = ie_result.copy() - self.add_extra_info(info_copy, extra_info) ie = try_get(ie_result.get('ie_key'), self.get_info_extractor) + if not ie_result.get('id'): + info_copy['id'] = ie.get_temp_id(ie_result['url']) self.add_default_extra_info(info_copy, ie, ie_result['url']) + self.add_extra_info(info_copy, extra_info) self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True) + if self.params.get('force_write_download_archive', False): + self.record_download_archive(info_copy) return ie_result if result_type == 'video': From f9be9cb9fd8e85504735a6c60f4d7a2332764d05 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 4 Sep 2021 07:52:16 +0530 Subject: [PATCH 052/641] [cookies] Print warning for cookie decoding error only once Closes #889 --- yt_dlp/cookies.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index c28833159a..74219a8f7c 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -559,7 +559,7 @@ def _parse_safari_cookies_record(data, jar, logger): p.skip_to(value_offset) value = p.read_cstring() except UnicodeDecodeError: - logger.warning('failed to parse cookie because UTF-8 decoding failed') + logger.warning('failed to parse cookie because UTF-8 decoding failed', only_once=True) return record_size p.skip_to(record_size, 'space at the end of the record') @@ -655,7 +655,7 @@ def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16): try: return intlist_to_bytes(plaintext[:-padding_length]).decode('utf-8') except UnicodeDecodeError: - logger.warning('failed to decrypt cookie because UTF-8 decoding failed. Possibly the key is wrong?') + logger.warning('failed to decrypt cookie because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True) return None @@ -664,13 +664,13 @@ def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger): try: plaintext = cipher.decrypt_and_verify(ciphertext, authentication_tag) except ValueError: - logger.warning('failed to decrypt cookie because the MAC check failed. Possibly the key is wrong?') + logger.warning('failed to decrypt cookie because the MAC check failed. Possibly the key is wrong?', only_once=True) return None try: return plaintext.decode('utf-8') except UnicodeDecodeError: - logger.warning('failed to decrypt cookie because UTF-8 decoding failed. Possibly the key is wrong?') + logger.warning('failed to decrypt cookie because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True) return None @@ -698,7 +698,7 @@ def _decrypt_windows_dpapi(ciphertext, logger): ctypes.byref(blob_out) # pDataOut ) if not ret: - logger.warning('failed to decrypt with DPAPI') + logger.warning('failed to decrypt with DPAPI', only_once=True) return None result = ctypes.string_at(blob_out.pbData, blob_out.cbData) From 02def2714cfe54d63943d058229cb2dc9bef8248 Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Sat, 4 Sep 2021 02:31:47 +0000 Subject: [PATCH 053/641] [southpark] Fix SouthParkDE (#812) This was broken by https://github.com/yt-dlp/yt-dlp/commit/ee1e05581e32114c52e75e90983a66fb25fbc730 Authored by: coletdjnz --- yt_dlp/extractor/mtv.py | 10 ++++----- yt_dlp/extractor/southpark.py | 42 ++++++++++++++++++++++++++++++++--- 2 files changed, 44 insertions(+), 8 deletions(-) diff --git a/yt_dlp/extractor/mtv.py b/yt_dlp/extractor/mtv.py index 6b506ad9ae..e0608845dd 100644 --- a/yt_dlp/extractor/mtv.py +++ b/yt_dlp/extractor/mtv.py @@ -44,7 +44,7 @@ class MTVServicesInfoExtractor(InfoExtractor): # Remove the templates, like &device={device} return re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', url) - def _get_feed_url(self, uri): + def _get_feed_url(self, uri, url=None): return self._FEED_URL def _get_thumbnail_url(self, uri, itemdoc): @@ -229,9 +229,9 @@ class MTVServicesInfoExtractor(InfoExtractor): data['lang'] = self._LANG return data - def _get_videos_info(self, uri, use_hls=True): + def _get_videos_info(self, uri, use_hls=True, url=None): video_id = self._id_from_uri(uri) - feed_url = self._get_feed_url(uri) + feed_url = self._get_feed_url(uri, url) info_url = update_url_query(feed_url, self._get_feed_query(uri)) return self._get_videos_info_from_url(info_url, video_id, use_hls) @@ -323,7 +323,7 @@ class MTVServicesInfoExtractor(InfoExtractor): title = url_basename(url) webpage = self._download_webpage(url, title) mgid = self._extract_mgid(webpage) - videos_info = self._get_videos_info(mgid) + videos_info = self._get_videos_info(mgid, url=url) return videos_info @@ -352,7 +352,7 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor): if mobj: return mobj.group('url') - def _get_feed_url(self, uri): + def _get_feed_url(self, uri, url=None): video_id = self._id_from_uri(uri) config = self._download_json( 'http://media.mtvnservices.com/pmt/e1/access/index.html?uri=%s&configtype=edge' % uri, video_id) diff --git a/yt_dlp/extractor/southpark.py b/yt_dlp/extractor/southpark.py index 9aedaa04a4..d49749467d 100644 --- a/yt_dlp/extractor/southpark.py +++ b/yt_dlp/extractor/southpark.py @@ -56,9 +56,7 @@ class SouthParkEsIE(SouthParkIE): class SouthParkDeIE(SouthParkIE): IE_NAME = 'southpark.de' - _VALID_URL = r'https?://(?:www\.)?(?Psouthpark\.de/(?:(en/(videoclip|collections|episodes))|(videoclip|collections|folgen))/(?P(?P.+?)/.+?)(?:\?|#|$))' - # _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed' - + _VALID_URL = r'https?://(?:www\.)?(?Psouthpark\.de/(?:(en/(videoclip|collections|episodes|video-clips))|(videoclip|collections|folgen))/(?P(?P.+?)/.+?)(?:\?|#|$))' _TESTS = [{ 'url': 'https://www.southpark.de/videoclip/rsribv/south-park-rueckzug-zum-gummibonbon-wald', 'only_matching': True, @@ -68,6 +66,41 @@ class SouthParkDeIE(SouthParkIE): }, { 'url': 'https://www.southpark.de/collections/zzno5a/south-park-good-eats/7q26gp', 'only_matching': True, + }, { + # clip + 'url': 'https://www.southpark.de/en/video-clips/ct46op/south-park-tooth-fairy-cartman', + 'info_dict': { + 'id': 'e99d45ea-ed00-11e0-aca6-0026b9414f30', + 'ext': 'mp4', + 'title': 'Tooth Fairy Cartman', + 'description': 'md5:db02e23818b4dc9cb5f0c5a7e8833a68', + }, + }, { + # episode + 'url': 'https://www.southpark.de/en/episodes/yy0vjs/south-park-the-pandemic-special-season-24-ep-1', + 'info_dict': { + 'id': 'f5fbd823-04bc-11eb-9b1b-0e40cf2fc285', + 'ext': 'mp4', + 'title': 'South Park', + 'description': 'md5:ae0d875eff169dcbed16b21531857ac1', + }, + }, { + # clip + 'url': 'https://www.southpark.de/videoclip/ct46op/south-park-zahnfee-cartman', + 'info_dict': { + 'id': 'e99d45ea-ed00-11e0-aca6-0026b9414f30', + 'ext': 'mp4', + 'title': 'Zahnfee Cartman', + 'description': 'md5:b917eec991d388811d911fd1377671ac' + }, + }, { + # episode + 'url': 'https://www.southpark.de/folgen/242csn/south-park-her-mit-dem-hirn-staffel-1-ep-7', + 'info_dict': { + 'id': '607115f3-496f-40c3-8647-2b0bcff486c0', + 'ext': 'mp4', + 'title': 'md5:South Park | Pink Eye | E 0107 | HDSS0107X deu | Version: 634312 | Comedy Central S1', + }, }] def _get_feed_url(self, uri, url=None): @@ -76,6 +109,9 @@ class SouthParkDeIE(SouthParkIE): 'http://media.mtvnservices.com/pmt/e1/access/index.html?uri=%s&configtype=edge&ref=%s' % (uri, url), video_id) return self._remove_template_parameter(config['feedWithQueryParams']) + def _get_feed_query(self, uri): + return + class SouthParkNlIE(SouthParkIE): IE_NAME = 'southpark.nl' From c0ac49bcca766c4487fb25f5124bfb4dba331b9c Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Sat, 4 Sep 2021 02:33:42 +0000 Subject: [PATCH 054/641] [youtube] Retry on 'Unknown Error' (#854) and do not repeat unimportant alerts Closes #839 Authored by: coletdjnz --- yt_dlp/extractor/youtube.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 24fca3f84a..e184cc6a6e 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -48,6 +48,7 @@ from ..utils import ( parse_iso8601, parse_qs, qualities, + remove_end, remove_start, smuggle_url, str_or_none, @@ -720,7 +721,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): if message: yield alert_type, message - def _report_alerts(self, alerts, expected=True, fatal=True): + def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False): errors = [] warnings = [] for alert_type, alert_message in alerts: @@ -730,7 +731,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): warnings.append([alert_type, alert_message]) for alert_type, alert_message in (warnings + errors[:-1]): - self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message)) + self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once) if errors: raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected) @@ -779,7 +780,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): while count < retries: count += 1 if last_error: - self.report_warning('%s. Retrying ...' % last_error) + self.report_warning('%s. Retrying ...' % remove_end(last_error, '.')) try: response = self._call_api( ep=ep, fatal=True, headers=headers, @@ -814,8 +815,13 @@ class YoutubeBaseInfoExtractor(InfoExtractor): else: # Youtube may send alerts if there was an issue with the continuation page try: - self._extract_and_report_alerts(response, expected=False) + self._extract_and_report_alerts(response, expected=False, only_once=True) except ExtractorError as e: + # YouTube servers may return errors we want to retry on in a 200 OK response + # See: https://github.com/yt-dlp/yt-dlp/issues/839 + if 'unknown error' in e.msg.lower(): + last_error = e.msg + continue if fatal: raise self.report_warning(error_to_compat_str(e)) @@ -4285,7 +4291,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): # YouTube sometimes provides a button to reload playlist with unavailable videos. if 'no-youtube-unavailable-videos' not in compat_opts: data = self._reload_with_unavailable_videos(item_id, data, webpage) or data - self._extract_and_report_alerts(data) + self._extract_and_report_alerts(data, only_once=True) tabs = try_get( data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list) if tabs: From 421ddcb8b4712f41c6060b6d651ec8dc7d4b139a Mon Sep 17 00:00:00 2001 From: ChillingPepper <90042155+ChillingPepper@users.noreply.github.com> Date: Sat, 4 Sep 2021 14:29:35 +0200 Subject: [PATCH 055/641] [SovietsCloset] Add extractor (#884) Authored by: ChillingPepper --- yt_dlp/extractor/extractors.py | 4 + yt_dlp/extractor/sovietscloset.py | 202 ++++++++++++++++++++++++++++++ yt_dlp/utils.py | 4 +- 3 files changed, 209 insertions(+), 1 deletion(-) create mode 100644 yt_dlp/extractor/sovietscloset.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index f0c22cd579..1a3093c150 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1278,6 +1278,10 @@ from .southpark import ( SouthParkEsIE, SouthParkNlIE ) +from .sovietscloset import ( + SovietsClosetIE, + SovietsClosetPlaylistIE +) from .spankbang import ( SpankBangIE, SpankBangPlaylistIE, diff --git a/yt_dlp/extractor/sovietscloset.py b/yt_dlp/extractor/sovietscloset.py new file mode 100644 index 0000000000..218a146dfe --- /dev/null +++ b/yt_dlp/extractor/sovietscloset.py @@ -0,0 +1,202 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + js_to_json, + try_get, + unified_timestamp +) + + +class SovietsClosetBaseIE(InfoExtractor): + MEDIADELIVERY_REFERER = {'Referer': 'https://iframe.mediadelivery.net/'} + + def parse_nuxt_jsonp(self, nuxt_jsonp_url, video_id, name): + nuxt_jsonp = self._download_webpage(nuxt_jsonp_url, video_id, note=f'Downloading {name} __NUXT_JSONP__') + js, arg_keys, arg_vals = self._search_regex( + r'__NUXT_JSONP__\(.*?\(function\((?P.*?)\)\{return\s(?P\{.*?\})\}\((?P.*?)\)', + nuxt_jsonp, '__NUXT_JSONP__', group=['js', 'arg_keys', 'arg_vals']) + + args = dict(zip(arg_keys.split(','), arg_vals.split(','))) + + for key, val in args.items(): + if val in ('undefined', 'void 0'): + args[key] = 'null' + + return self._parse_json(js_to_json(js, args), video_id)['data'][0] + + def video_meta(self, video_id, game_name, category_name, episode_number, stream_date): + title = game_name + if category_name and category_name != 'Misc': + title += f' - {category_name}' + if episode_number: + title += f' #{episode_number}' + + timestamp = unified_timestamp(stream_date) + + return { + 'id': video_id, + 'title': title, + 'http_headers': self.MEDIADELIVERY_REFERER, + 'uploader': 'SovietWomble', + 'creator': 'SovietWomble', + 'release_timestamp': timestamp, + 'timestamp': timestamp, + 'uploader_id': 'SovietWomble', + 'uploader_url': 'https://www.twitch.tv/SovietWomble', + 'was_live': True, + 'availability': 'public', + 'series': game_name, + 'season': category_name, + 'episode_number': episode_number, + } + + +class SovietsClosetIE(SovietsClosetBaseIE): + _VALID_URL = r'https?://(?:www\.)?sovietscloset\.com/video/(?P[0-9]+)/?' + _TESTS = [ + { + 'url': 'https://sovietscloset.com/video/1337', + 'md5': '11e58781c4ca5b283307aa54db5b3f93', + 'info_dict': { + 'id': '1337', + 'ext': 'mp4', + 'title': 'The Witcher #13', + 'thumbnail': r're:^https?://.*\.b-cdn\.net/2f0cfbf4-3588-43a9-a7d6-7c9ea3755e67/thumbnail\.jpg$', + 'uploader': 'SovietWomble', + 'creator': 'SovietWomble', + 'release_timestamp': 1492091580, + 'release_date': '20170413', + 'timestamp': 1492091580, + 'upload_date': '20170413', + 'uploader_id': 'SovietWomble', + 'uploader_url': 'https://www.twitch.tv/SovietWomble', + 'was_live': True, + 'availability': 'public', + 'series': 'The Witcher', + 'season': 'Misc', + 'episode_number': 13, + }, + }, + { + 'url': 'https://sovietscloset.com/video/1105', + 'md5': '578b1958a379e7110ba38697042e9efb', + 'info_dict': { + 'id': '1105', + 'ext': 'mp4', + 'title': 'Arma 3 - Zeus Games #3', + 'uploader': 'SovietWomble', + 'thumbnail': r're:^https?://.*\.b-cdn\.net/c0e5e76f-3a93-40b4-bf01-12343c2eec5d/thumbnail\.jpg$', + 'uploader': 'SovietWomble', + 'creator': 'SovietWomble', + 'release_timestamp': 1461157200, + 'release_date': '20160420', + 'timestamp': 1461157200, + 'upload_date': '20160420', + 'uploader_id': 'SovietWomble', + 'uploader_url': 'https://www.twitch.tv/SovietWomble', + 'was_live': True, + 'availability': 'public', + 'series': 'Arma 3', + 'season': 'Zeus Games', + 'episode_number': 3, + }, + }, + ] + + def _extract_bunnycdn_iframe(self, video_id, bunnycdn_id): + iframe = self._download_webpage( + f'https://iframe.mediadelivery.net/embed/5105/{bunnycdn_id}', + video_id, note='Downloading BunnyCDN iframe', headers=self.MEDIADELIVERY_REFERER) + + m3u8_url = self._search_regex(r'(https?://.*?\.m3u8)', iframe, 'm3u8 url') + thumbnail_url = self._search_regex(r'(https?://.*?thumbnail\.jpg)', iframe, 'thumbnail url') + + m3u8_formats = self._extract_m3u8_formats(m3u8_url, video_id, headers=self.MEDIADELIVERY_REFERER) + self._sort_formats(m3u8_formats) + + return { + 'formats': m3u8_formats, + 'thumbnail': thumbnail_url, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + static_assets_base = self._search_regex(r'staticAssetsBase:\"(.*?)\"', webpage, 'staticAssetsBase') + static_assets_base = f'https://sovietscloset.com{static_assets_base}' + + stream = self.parse_nuxt_jsonp(f'{static_assets_base}/video/{video_id}/payload.js', video_id, 'video')['stream'] + + return { + **self.video_meta( + video_id=video_id, game_name=stream['game']['name'], + category_name=try_get(stream, lambda x: x['subcategory']['name'], str), + episode_number=stream.get('number'), stream_date=stream.get('date')), + **self._extract_bunnycdn_iframe(video_id, stream['bunnyId']), + } + + +class SovietsClosetPlaylistIE(SovietsClosetBaseIE): + _VALID_URL = r'https?://(?:www\.)?sovietscloset\.com/(?!video)(?P[^#?]+)' + _TESTS = [ + + { + 'url': 'https://sovietscloset.com/The-Witcher', + 'info_dict': { + 'id': 'The-Witcher', + 'title': 'The Witcher', + }, + 'playlist_mincount': 31, + }, + { + 'url': 'https://sovietscloset.com/Arma-3/Zeus-Games', + 'info_dict': { + 'id': 'Arma-3/Zeus-Games', + 'title': 'Arma 3 - Zeus Games', + }, + 'playlist_mincount': 3, + }, + { + 'url': 'https://sovietscloset.com/arma-3/zeus-games/', + 'info_dict': { + 'id': 'arma-3/zeus-games', + 'title': 'Arma 3 - Zeus Games', + }, + 'playlist_mincount': 3, + }, + ] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + if playlist_id.endswith('/'): + playlist_id = playlist_id[:-1] + + webpage = self._download_webpage(url, playlist_id) + + static_assets_base = self._search_regex(r'staticAssetsBase:\"(.*?)\"', webpage, 'staticAssetsBase') + static_assets_base = f'https://sovietscloset.com{static_assets_base}' + + sovietscloset = self.parse_nuxt_jsonp(f'{static_assets_base}/payload.js', playlist_id, 'global')['games'] + + if '/' in playlist_id: + game_slug, category_slug = playlist_id.lower().split('/') + else: + game_slug = playlist_id.lower() + category_slug = 'misc' + + game = next(game for game in sovietscloset if game['slug'].lower() == game_slug) + category = next(cat for cat in game['subcategories'] if cat['slug'].lower() == category_slug) + playlist_title = game.get('name') or game_slug + if category_slug != 'misc': + playlist_title += f' - {category.get("name") or category_slug}' + entries = [{ + **self.url_result(f'https://sovietscloset.com/video/{stream["id"]}', ie=SovietsClosetIE.ie_key()), + **self.video_meta( + video_id=stream['id'], game_name=game['name'], category_name=category.get('name'), + episode_number=i + 1, stream_date=stream.get('date')), + } for i, stream in enumerate(category['streams'])] + + return self.playlist_result(entries, playlist_id, playlist_title) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 65d585d053..cdf4c0755b 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4387,6 +4387,8 @@ def js_to_json(code, vars={}): v = m.group(0) if v in ('true', 'false', 'null'): return v + elif v in ('undefined', 'void 0'): + return 'null' elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',': return "" @@ -4413,7 +4415,7 @@ def js_to_json(code, vars={}): "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"| '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| {comment}|,(?={skip}[\]}}])| - (?:(? Date: Sun, 5 Sep 2021 07:37:28 +0900 Subject: [PATCH 056/641] [17live] Add 17.live extractor (#866) Authored by: nao20010128nao --- yt_dlp/extractor/extractors.py | 4 + yt_dlp/extractor/ichinanalive.py | 167 +++++++++++++++++++++++++++++++ 2 files changed, 171 insertions(+) create mode 100644 yt_dlp/extractor/ichinanalive.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 1a3093c150..4ef581b076 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -547,6 +547,10 @@ from .hungama import ( HungamaAlbumPlaylistIE, ) from .hypem import HypemIE +from .ichinanalive import ( + IchinanaLiveIE, + IchinanaLiveClipIE, +) from .ign import ( IGNIE, IGNVideoIE, diff --git a/yt_dlp/extractor/ichinanalive.py b/yt_dlp/extractor/ichinanalive.py new file mode 100644 index 0000000000..cb39f821c6 --- /dev/null +++ b/yt_dlp/extractor/ichinanalive.py @@ -0,0 +1,167 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ExtractorError, str_or_none, traverse_obj, unified_strdate +from ..compat import compat_str + + +class IchinanaLiveIE(InfoExtractor): + IE_NAME = '17live' + _VALID_URL = r'https?://(?:www\.)?17\.live/(?:[^/]+/)*(?:live|profile/r)/(?P\d+)' + _TESTS = [{ + 'url': 'https://17.live/live/3773096', + 'info_dict': { + 'id': '3773096', + 'title': '萠珈☕🤡🍫moka', + 'is_live': True, + 'uploader': '萠珈☕🤡🍫moka', + 'uploader_id': '3773096', + 'like_count': 366, + 'view_count': 18121, + 'timestamp': 1630569012, + }, + 'skip': 'running as of writing, but may be ended as of testing', + }, { + 'note': 'nothing except language differs', + 'url': 'https://17.live/ja/live/3773096', + 'only_matching': True, + }] + + @classmethod + def suitable(cls, url): + return not IchinanaLiveClipIE.suitable(url) and super(IchinanaLiveIE, cls).suitable(url) + + def _real_extract(self, url): + video_id = self._match_id(url) + url = 'https://17.live/live/%s' % video_id + + enter = self._download_json( + 'https://api-dsa.17app.co/api/v1/lives/%s/enter' % video_id, video_id, + headers={'Referer': url}, fatal=False, expected_status=420, + data=b'\0') + if enter and enter.get('message') == 'ended': + raise ExtractorError('This live has ended.', expected=True) + + view_data = self._download_json( + 'https://api-dsa.17app.co/api/v1/lives/%s' % video_id, video_id, + headers={'Referer': url}) + + uploader = traverse_obj( + view_data, ('userInfo', 'displayName'), ('userInfo', 'openID')) + + video_urls = view_data.get('rtmpUrls') + if not video_urls: + raise ExtractorError('unable to extract live URL information') + formats = [] + for (name, value) in video_urls[0].items(): + if not isinstance(value, compat_str): + continue + if not value.startswith('http'): + continue + quality = -1 + if 'web' in name: + quality -= 1 + if 'High' in name: + quality += 4 + if 'Low' in name: + quality -= 2 + formats.append({ + 'format_id': name, + 'url': value, + 'quality': quality, + 'http_headers': {'Referer': url}, + 'ext': 'flv', + 'vcodec': 'h264', + 'acodec': 'aac', + }) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': uploader or video_id, + 'formats': formats, + 'is_live': True, + 'uploader': uploader, + 'uploader_id': video_id, + 'like_count': view_data.get('receivedLikeCount'), + 'view_count': view_data.get('viewerCount'), + 'thumbnail': view_data.get('coverPhoto'), + 'description': view_data.get('caption'), + 'timestamp': view_data.get('beginTime'), + } + + +class IchinanaLiveClipIE(InfoExtractor): + IE_NAME = '17live:clip' + _VALID_URL = r'https?://(?:www\.)?17\.live/(?:[^/]+/)*profile/r/(?P\d+)/clip/(?P[^/]+)' + _TESTS = [{ + 'url': 'https://17.live/profile/r/1789280/clip/1bHQSK8KUieruFXaCH4A4upCzlN', + 'info_dict': { + 'id': '1bHQSK8KUieruFXaCH4A4upCzlN', + 'title': 'マチコ先生🦋Class💋', + 'description': 'マチ戦隊 第一次 バスターコール\n総額200万coin!\n動画制作@うぉーかー🌱Walker🎫', + 'uploader_id': '1789280', + }, + }, { + 'url': 'https://17.live/ja/profile/r/1789280/clip/1bHQSK8KUieruFXaCH4A4upCzlN', + 'only_matching': True, + }] + + def _real_extract(self, url): + uploader_id, video_id = self._match_valid_url(url).groups() + url = 'https://17.live/profile/r/%s/clip/%s' % (uploader_id, video_id) + + view_data = self._download_json( + 'https://api-dsa.17app.co/api/v1/clips/%s' % video_id, video_id, + headers={'Referer': url}) + + uploader = traverse_obj( + view_data, ('userInfo', 'displayName'), ('userInfo', 'name')) + + formats = [] + if view_data.get('videoURL'): + formats.append({ + 'id': 'video', + 'url': view_data['videoURL'], + 'quality': -1, + }) + if view_data.get('transcodeURL'): + formats.append({ + 'id': 'transcode', + 'url': view_data['transcodeURL'], + 'quality': -1, + }) + if view_data.get('srcVideoURL'): + # highest quality + formats.append({ + 'id': 'srcVideo', + 'url': view_data['srcVideoURL'], + 'quality': 1, + }) + + for fmt in formats: + fmt.update({ + 'ext': 'mp4', + 'protocol': 'https', + 'vcodec': 'h264', + 'acodec': 'aac', + 'http_headers': {'Referer': url}, + }) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': uploader or video_id, + 'formats': formats, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'like_count': view_data.get('likeCount'), + 'view_count': view_data.get('viewCount'), + 'thumbnail': view_data.get('imageURL'), + 'duration': view_data.get('duration'), + 'description': view_data.get('caption'), + 'upload_date': unified_strdate(str_or_none(view_data.get('createdAt'))), + } From 826446bd82b0168bc40c3be027b2bfa47313ce19 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 5 Sep 2021 04:41:02 +0530 Subject: [PATCH 057/641] [plutotv] Fix extractor for URLs with `/en` Closes #431 --- yt_dlp/extractor/plutotv.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/yt_dlp/extractor/plutotv.py b/yt_dlp/extractor/plutotv.py index b19ff8d021..0cf82466a6 100644 --- a/yt_dlp/extractor/plutotv.py +++ b/yt_dlp/extractor/plutotv.py @@ -19,7 +19,16 @@ from ..utils import ( class PlutoTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?pluto\.tv(?:/en)?/on-demand/(?Pmovies|series)/(?P.*)/?$' + _VALID_URL = r'''(?x) + https?://(?:www\.)?pluto\.tv(?:/en)?/on-demand + /(?Pmovies|series) + /(?P[^/]+) + (?: + /seasons?/(?P\d+) + (?:/episode/(?P[^/]+))? + )? + /?(?:$|[#?])''' + _INFO_URL = 'https://service-vod.clusters.pluto.tv/v3/vod/slugs/' _INFO_QUERY_PARAMS = { 'appName': 'web', @@ -146,17 +155,13 @@ class PlutoTVIE(InfoExtractor): return info def _real_extract(self, url): - path = compat_urlparse.urlparse(url).path - path_components = path.split('/') - video_type = path_components[2] - info_slug = path_components[3] - video_json = self._download_json(self._INFO_URL + info_slug, info_slug, - query=self._INFO_QUERY_PARAMS) + mobj = self._match_valid_url(url).groupdict() + info_slug = mobj['series_or_movie_slug'] + video_json = self._download_json(self._INFO_URL + info_slug, info_slug, query=self._INFO_QUERY_PARAMS) - if video_type == 'series': + if mobj['video_type'] == 'series': series_name = video_json.get('name', info_slug) - season_number = int_or_none(try_get(path_components, lambda x: x[5])) - episode_slug = try_get(path_components, lambda x: x[7]) + season_number, episode_slug = mobj.get('season_number'), mobj.get('episode_slug') videos = [] for season in video_json['seasons']: From 265a7a8ee59c1f60d8b5c541918ef4030c694b06 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 5 Sep 2021 05:22:45 +0530 Subject: [PATCH 058/641] [redtube] Fix exts Closes #464 --- yt_dlp/extractor/redtube.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/redtube.py b/yt_dlp/extractor/redtube.py index a1ca791caa..747ce51995 100644 --- a/yt_dlp/extractor/redtube.py +++ b/yt_dlp/extractor/redtube.py @@ -98,13 +98,14 @@ class RedTubeIE(InfoExtractor): format_id = media.get('quality') formats.append({ 'url': format_url, + 'ext': 'mp4', 'format_id': format_id, 'height': int_or_none(format_id), }) if not formats: video_url = self._html_search_regex( r'', webpage, 'video URL') - formats.append({'url': video_url}) + formats.append({'url': video_url, 'ext': 'mp4'}) self._sort_formats(formats) thumbnail = self._og_search_thumbnail(webpage) From d98b006b85bbdb5f202c2e25366b9017ee9f5782 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 5 Sep 2021 07:03:27 +0530 Subject: [PATCH 059/641] [dw] Fix extractor Closes #830 --- yt_dlp/extractor/dw.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/dw.py b/yt_dlp/extractor/dw.py index d740652f17..6eaee07b47 100644 --- a/yt_dlp/extractor/dw.py +++ b/yt_dlp/extractor/dw.py @@ -5,6 +5,7 @@ from .common import InfoExtractor from ..utils import ( int_or_none, unified_strdate, + url_or_none, ) from ..compat import compat_urlparse @@ -15,13 +16,13 @@ class DWIE(InfoExtractor): _TESTS = [{ # video 'url': 'http://www.dw.com/en/intelligent-light/av-19112290', - 'md5': '7372046e1815c5a534b43f3c3c36e6e9', + 'md5': 'fb9dfd9520811d3ece80f04befd73428', 'info_dict': { 'id': '19112290', 'ext': 'mp4', 'title': 'Intelligent light', 'description': 'md5:90e00d5881719f2a6a5827cb74985af1', - 'upload_date': '20160311', + 'upload_date': '20160605', } }, { # audio @@ -55,15 +56,16 @@ class DWIE(InfoExtractor): title = hidden_inputs['media_title'] media_id = hidden_inputs.get('media_id') or media_id - if hidden_inputs.get('player_type') == 'video' and hidden_inputs.get('stream_file') == '1': + direct_url = url_or_none(hidden_inputs.get('file_name')) + if direct_url: + formats = [{'url': hidden_inputs['file_name']}] + else: formats = self._extract_smil_formats( 'http://www.dw.com/smil/v-%s' % media_id, media_id, transform_source=lambda s: s.replace( 'rtmp://tv-od.dw.de/flash/', 'http://tv-download.dw.de/dwtv_video/flv/')) - self._sort_formats(formats) - else: - formats = [{'url': hidden_inputs['file_name']}] + self._sort_formats(formats) upload_date = hidden_inputs.get('display_date') if not upload_date: From aa6c25309a1734490fc094248a4b14d48eb60567 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 5 Sep 2021 10:26:46 +0530 Subject: [PATCH 060/641] [soundcloud] Make playlist extraction lazy --- yt_dlp/extractor/soundcloud.py | 69 ++++++++++++---------------------- 1 file changed, 25 insertions(+), 44 deletions(-) diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index 8f0713e134..a9ccb7a8b0 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -14,7 +14,6 @@ from ..compat import ( compat_HTTPError, compat_kwargs, compat_str, - compat_urlparse, ) from ..utils import ( error_to_compat_str, @@ -24,6 +23,7 @@ from ..utils import ( int_or_none, KNOWN_EXTENSIONS, mimetype2ext, + parse_qs, str_or_none, try_get, unified_timestamp, @@ -49,8 +49,7 @@ class SoundcloudEmbedIE(InfoExtractor): webpage)] def _real_extract(self, url): - query = compat_urlparse.parse_qs( - compat_urlparse.urlparse(url).query) + query = parse_qs(url) api_url = query['url'][0] secret_token = query.get('secret_token') if secret_token: @@ -656,64 +655,46 @@ class SoundcloudSetIE(SoundcloudPlaylistBaseIE): class SoundcloudPagedPlaylistBaseIE(SoundcloudIE): def _extract_playlist(self, base_url, playlist_id, playlist_title): - # Per the SoundCloud documentation, the maximum limit for a linked partitioning query is 200. - # https://developers.soundcloud.com/blog/offset-pagination-deprecated - COMMON_QUERY = { - 'limit': 200, - 'linked_partitioning': '1', + return { + '_type': 'playlist', + 'id': playlist_id, + 'title': playlist_title, + 'entries': self._entries(base_url, playlist_id), + } + + def _entries(self, base_url, playlist_id): + # Per the SoundCloud documentation, the maximum limit for a linked partitioning query is 200. + # https://developers.soundcloud.com/blog/offset-pagination-deprecated + query = { + 'limit': 200, + 'linked_partitioning': '1', + 'offset': 0, } - query = COMMON_QUERY.copy() - query['offset'] = 0 next_href = base_url - - entries = [] for i in itertools.count(): response = self._download_json( next_href, playlist_id, 'Downloading track page %s' % (i + 1), query=query, headers=self._HEADERS) - collection = response['collection'] - - if not isinstance(collection, list): - collection = [] - - # Empty collection may be returned, in this case we proceed - # straight to next_href - - def resolve_entry(candidates): + def resolve_entry(*candidates): for cand in candidates: if not isinstance(cand, dict): continue permalink_url = url_or_none(cand.get('permalink_url')) - if not permalink_url: - continue - return self.url_result( - permalink_url, - SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None, - str_or_none(cand.get('id')), cand.get('title')) + if permalink_url: + return self.url_result( + permalink_url, + SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None, + str_or_none(cand.get('id')), cand.get('title')) - for e in collection: - entry = resolve_entry((e, e.get('track'), e.get('playlist'))) - if entry: - entries.append(entry) + for e in response['collection'] or []: + yield resolve_entry(e, e.get('track'), e.get('playlist')) next_href = response.get('next_href') - if not next_href: - break + query.pop('offset', None) - next_href = response['next_href'] - parsed_next_href = compat_urlparse.urlparse(next_href) - query = compat_urlparse.parse_qs(parsed_next_href.query) - query.update(COMMON_QUERY) - - return { - '_type': 'playlist', - 'id': playlist_id, - 'title': playlist_title, - 'entries': entries, - } class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE): From e04a1ff92e015bf431486d1fbcc8b243a92bfc71 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 5 Sep 2021 10:27:49 +0530 Subject: [PATCH 061/641] [soundcloud] Retry playlist pages on `502` error Closes #872 --- yt_dlp/extractor/soundcloud.py | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index a9ccb7a8b0..c7078ece6d 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -23,6 +23,8 @@ from ..utils import ( int_or_none, KNOWN_EXTENSIONS, mimetype2ext, + network_exceptions, + remove_end, parse_qs, str_or_none, try_get, @@ -662,7 +664,7 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudIE): 'entries': self._entries(base_url, playlist_id), } - def _entries(self, base_url, playlist_id): + def _entries(self, url, playlist_id): # Per the SoundCloud documentation, the maximum limit for a linked partitioning query is 200. # https://developers.soundcloud.com/blog/offset-pagination-deprecated query = { @@ -671,12 +673,25 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudIE): 'offset': 0, } + retries = self.get_param('extractor_retries', 3) - next_href = base_url for i in itertools.count(): - response = self._download_json( - next_href, playlist_id, - 'Downloading track page %s' % (i + 1), query=query, headers=self._HEADERS) + attempt, last_error = -1, None + while attempt < retries: + attempt += 1 + if last_error: + self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'), playlist_id) + try: + response = self._download_json( + url, playlist_id, query=query, headers=self._HEADERS, + note='Downloading track page %s%s' % (i + 1, f' (retry #{attempt})' if attempt else '')) + break + except ExtractorError as e: + # Downloading page may result in intermittent 502 HTTP error + # See https://github.com/yt-dlp/yt-dlp/issues/872 + if attempt >= retries or not isinstance(e.cause, compat_HTTPError) or e.cause.code != 502: + raise + last_error = str(e.cause or e.msg) def resolve_entry(*candidates): for cand in candidates: @@ -692,7 +707,7 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudIE): for e in response['collection'] or []: yield resolve_entry(e, e.get('track'), e.get('playlist')) - next_href = response.get('next_href') + url = response.get('next_href') query.pop('offset', None) From 526d74ec5a8bd422d5327d576fd341136ec802d2 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 5 Sep 2021 11:16:23 +0530 Subject: [PATCH 062/641] [cleanup] Misc --- Changelog.md | 2 +- README.md | 2 +- yt_dlp/cookies.py | 4 ++-- yt_dlp/extractor/soundcloud.py | 2 -- yt_dlp/extractor/tiktok.py | 6 +++--- yt_dlp/extractor/youtube.py | 2 +- yt_dlp/utils.py | 4 ++-- 7 files changed, 10 insertions(+), 12 deletions(-) diff --git a/Changelog.md b/Changelog.md index 9ccc505b71..6901e28f2f 100644 --- a/Changelog.md +++ b/Changelog.md @@ -30,7 +30,7 @@ * The fetched sponsor sections are written to the infojson * Deprecates: `--sponskrub`, `--no-sponskrub`, `--sponskrub-cut`, `--no-sponskrub-cut`, `--sponskrub-force`, `--no-sponskrub-force`, `--sponskrub-location`, `--sponskrub-args` * Split `--embed-chapters` from `--embed-metadata` (it still implies the former by default) -* Add option `--remove-chapters` to remove arbitrary chapters by [nihil-admirari](https://github.com/nihil-admirari), pukkandan +* Add option `--remove-chapters` to remove arbitrary chapters by [nihil-admirari](https://github.com/nihil-admirari), [pukkandan](https://github.com/pukkandan) * Add option `--force-keyframes-at-cuts` for more accurate cuts when removing and splitting chapters by [nihil-admirari](https://github.com/nihil-admirari) * Let `--match-filter` reject entries early * Makes redundant: `--match-title`, `--reject-title`, `--min-views`, `--max-views` diff --git a/README.md b/README.md index 84974249d4..2e4bedc938 100644 --- a/README.md +++ b/README.md @@ -966,7 +966,7 @@ To summarize, the general syntax for a field is: %(name[.keys][addition][>strf][|default])[flags][width][.precision][length]type ``` -Additionally, you can set different output templates for the various metadata files separately from the general output template by specifying the type of file followed by the template separated by a colon `:`. The different file types supported are `subtitle`, `thumbnail`, `description`, `annotation`, `infojson`, `pl_thumbnail`, `pl_description`, `pl_infojson`, `chapter`. For example, `-o '%(title)s.%(ext)s' -o 'thumbnail:%(title)s\%(title)s.%(ext)s'` will put the thumbnails in a folder with the same name as the video. +Additionally, you can set different output templates for the various metadata files separately from the general output template by specifying the type of file followed by the template separated by a colon `:`. The different file types supported are `subtitle`, `thumbnail`, `description`, `annotation` (deprecated), `infojson`, `pl_thumbnail`, `pl_description`, `pl_infojson`, `chapter`. For example, `-o '%(title)s.%(ext)s' -o 'thumbnail:%(title)s\%(title)s.%(ext)s'` will put the thumbnails in a folder with the same name as the video. The available fields are: diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index 74219a8f7c..bc3bb62f41 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -123,7 +123,7 @@ def _extract_firefox_cookies(profile, logger): cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite') if cookie_database_path is None: raise FileNotFoundError('could not find firefox cookies database in {}'.format(search_root)) - logger.debug('extracting from: "{}"'.format(cookie_database_path)) + logger.debug('Extracting cookies from: "{}"'.format(cookie_database_path)) with tempfile.TemporaryDirectory(prefix='youtube_dl') as tmpdir: cursor = None @@ -240,7 +240,7 @@ def _extract_chrome_cookies(browser_name, profile, logger): cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies') if cookie_database_path is None: raise FileNotFoundError('could not find {} cookies database in "{}"'.format(browser_name, search_root)) - logger.debug('extracting from: "{}"'.format(cookie_database_path)) + logger.debug('Extracting cookies from: "{}"'.format(cookie_database_path)) decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger) diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index c7078ece6d..77e248a477 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -23,7 +23,6 @@ from ..utils import ( int_or_none, KNOWN_EXTENSIONS, mimetype2ext, - network_exceptions, remove_end, parse_qs, str_or_none, @@ -711,7 +710,6 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudIE): query.pop('offset', None) - class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE): _VALID_URL = r'''(?x) https?:// diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 08a34db47a..6c50ec7dfd 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -132,9 +132,9 @@ class TikTokIE(InfoExtractor): class TikTokUserIE(InfoExtractor): IE_NAME = 'tiktok:user' - _VALID_URL = r'(?!.*/video/)https?://www\.tiktok\.com/@(?P[\w\._]+)' + _VALID_URL = r'https?://(?:www\.)?tiktok\.com/@(?P[\w\._]+)/?(?:$|[#?])' _TESTS = [{ - 'url': 'https://www.tiktok.com/@corgibobaa?lang=en', + 'url': 'https://tiktok.com/@corgibobaa?lang=en', 'playlist_mincount': 45, 'info_dict': { 'id': '6935371178089399301', @@ -196,7 +196,7 @@ class TikTokUserIE(InfoExtractor): 'Referer': video_url, } } - if not data_json['hasMore']: + if not data_json.get('hasMore'): break cursor = data_json['cursor'] diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index e184cc6a6e..65a6c043e0 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -803,7 +803,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): # We also want to catch all other network exceptions since errors in later pages can be troublesome # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210 if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429): - last_error = error_to_compat_str(e.cause or e) + last_error = error_to_compat_str(e.cause or e.msg) if count < retries: continue if fatal: diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index cdf4c0755b..ce84f74166 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -2408,7 +2408,7 @@ class ExtractorError(YoutubeDLError): if sys.exc_info()[0] in network_exceptions: expected = True - self.msg = msg + self.msg = str(msg) self.traceback = tb self.expected = expected self.cause = cause @@ -2419,7 +2419,7 @@ class ExtractorError(YoutubeDLError): super(ExtractorError, self).__init__(''.join(( format_field(ie, template='[%s] '), format_field(video_id, template='%s: '), - msg, + self.msg, format_field(cause, template=' (caused by %r)'), '' if expected else bug_reports_message()))) From bd9ff55bcd9ac8a131e555deb2e822a8ee94c459 Mon Sep 17 00:00:00 2001 From: MinePlayersPE Date: Sun, 5 Sep 2021 12:34:58 +0700 Subject: [PATCH 063/641] [tiktok] Use API to fetch higher quality video (#843) Authored by: MinePlayersPE, llacb47 --- yt_dlp/extractor/tiktok.py | 229 +++++++++++++++++++++++++++++++------ 1 file changed, 196 insertions(+), 33 deletions(-) diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 6c50ec7dfd..953ff05b6e 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -2,18 +2,23 @@ from __future__ import unicode_literals import itertools +import random +import string +import time from .common import InfoExtractor from ..utils import ( ExtractorError, int_or_none, str_or_none, - try_get + traverse_obj, + try_get, + qualities, ) class TikTokIE(InfoExtractor): - _VALID_URL = r'https?://www\.tiktok\.com/@[\w\._]+/video/(?P\d+)' + _VALID_URL = r'https?://www\.tiktok\.com/@[\w\.-]+/video/(?P\d+)' _TESTS = [{ 'url': 'https://www.tiktok.com/@leenabhushan/video/6748451240264420610', @@ -61,13 +66,22 @@ class TikTokIE(InfoExtractor): 'repost_count': int, 'comment_count': int, } + }, { + # Promoted content/ad + 'url': 'https://www.tiktok.com/@MS4wLjABAAAAAR29F6J2Ktu0Daw03BJyXPNoRQ-W7U5a0Mn3lVCq2rQhjOd_WNLclHUoFgwX8Eno/video/6932675057474981122', + 'only_matching': True, }] + _APP_VERSION = '20.9.3' + _MANIFEST_APP_VERSION = '291' + QUALITIES = ('360p', '540p', '720p') def _extract_aweme(self, props_data, webpage, url): video_info = try_get( props_data, lambda x: x['pageProps']['itemInfo']['itemStruct'], dict) author_info = try_get( props_data, lambda x: x['pageProps']['itemInfo']['itemStruct']['author'], dict) or {} + music_info = try_get( + props_data, lambda x: x['pageProps']['itemInfo']['itemStruct']['music'], dict) or {} stats_info = try_get(props_data, lambda x: x['pageProps']['itemInfo']['itemStruct']['stats'], dict) or {} user_id = str_or_none(author_info.get('uniqueId')) @@ -99,6 +113,9 @@ class TikTokIE(InfoExtractor): 'uploader': user_id, 'uploader_id': str_or_none(author_info.get('id')), 'uploader_url': f'https://www.tiktok.com/@{user_id}', + 'track': str_or_none(music_info.get('title')), + 'album': str_or_none(music_info.get('album')) or None, + 'artist': str_or_none(music_info.get('authorName')), 'thumbnails': thumbnails, 'description': str_or_none(video_info.get('desc')), 'webpage_url': self._og_search_url(webpage), @@ -108,9 +125,185 @@ class TikTokIE(InfoExtractor): } } + def _extract_aweme_app(self, aweme_id): + query = { + 'aweme_id': aweme_id, + 'version_name': self._APP_VERSION, + 'version_code': self._MANIFEST_APP_VERSION, + 'build_number': self._APP_VERSION, + 'manifest_version_code': self._MANIFEST_APP_VERSION, + 'update_version_code': self._MANIFEST_APP_VERSION, + 'openudid': ''.join(random.choice('0123456789abcdef') for i in range(16)), + 'uuid': ''.join([random.choice(string.digits) for num in range(16)]), + '_rticket': int(time.time() * 1000), + 'ts': int(time.time()), + 'device_brand': 'Google', + 'device_type': 'Pixel 4', + 'device_platform': 'android', + 'resolution': '1080*1920', + 'dpi': 420, + 'os_version': '10', + 'os_api': '29', + 'carrier_region': 'US', + 'sys_region': 'US', + 'region': 'US', + 'app_name': 'trill', + 'app_language': 'en', + 'language': 'en', + 'timezone_name': 'America/New_York', + 'timezone_offset': '-14400', + 'channel': 'googleplay', + 'ac': 'wifi', + 'mcc_mnc': '310260', + 'is_my_cn': 0, + 'aid': 1180, + 'ssmix': 'a', + 'as': 'a1qwert123', + 'cp': 'cbfhckdckkde1', + } + + self._set_cookie('.tiktokv.com', 'odin_tt', ''.join(random.choice('0123456789abcdef') for i in range(160))) + + aweme_detail = self._download_json( + 'https://api-t2.tiktokv.com/aweme/v1/aweme/detail/', aweme_id, + 'Downloading video details', 'Unable to download video details', + headers={ + 'User-Agent': f'com.ss.android.ugc.trill/{self._MANIFEST_APP_VERSION} (Linux; U; Android 10; en_US; Pixel 4; Build/QQ3A.200805.001; Cronet/58.0.2991.0)', + }, query=query)['aweme_detail'] + video_info = aweme_detail['video'] + + def parse_url_key(url_key): + format_id, codec, res, bitrate = self._search_regex( + r'v[^_]+_(?P(?P[^_]+)_(?P\d+p)_(?P\d+))', url_key, + 'url key', default=(None, None, None, None), group=('id', 'codec', 'res', 'bitrate')) + if not format_id: + return {}, None + return { + 'format_id': format_id, + 'vcodec': 'h265' if codec == 'bytevc1' else codec, + 'tbr': int_or_none(bitrate, scale=1000) or None, + 'quality': qualities(self.QUALITIES)(res), + }, res + + known_resolutions = {} + + def extract_addr(addr, add_meta={}): + parsed_meta, res = parse_url_key(addr.get('url_key', '')) + if res: + known_resolutions.setdefault(res, {}).setdefault('height', add_meta.get('height')) + known_resolutions[res].setdefault('width', add_meta.get('width')) + parsed_meta.update(known_resolutions.get(res, {})) + add_meta.setdefault('height', int_or_none(res[:-1])) + return [{ + 'url': url, + 'filesize': int_or_none(addr.get('data_size')), + 'ext': 'mp4', + 'acodec': 'aac', + **add_meta, **parsed_meta + } for url in addr.get('url_list') or []] + + # Hack: Add direct video links first to prioritize them when removing duplicate formats + formats = [] + if video_info.get('play_addr'): + formats.extend(extract_addr(video_info['play_addr'], { + 'format_id': 'play_addr', + 'format_note': 'Direct video', + 'vcodec': 'h265' if traverse_obj( + video_info, 'is_bytevc1', 'is_h265') else 'h264', # Always h264? + 'width': video_info.get('width'), + 'height': video_info.get('height'), + })) + if video_info.get('download_addr'): + formats.extend(extract_addr(video_info['download_addr'], { + 'format_id': 'download_addr', + 'format_note': 'Download video%s' % (', watermarked' if video_info.get('has_watermark') else ''), + 'vcodec': 'h264', + 'width': video_info.get('width'), + 'height': video_info.get('height'), + 'source_preference': -2 if video_info.get('has_watermark') else -1, + })) + if video_info.get('play_addr_h264'): + formats.extend(extract_addr(video_info['play_addr_h264'], { + 'format_id': 'play_addr_h264', + 'format_note': 'Direct video', + 'vcodec': 'h264', + })) + if video_info.get('play_addr_bytevc1'): + formats.extend(extract_addr(video_info['play_addr_bytevc1'], { + 'format_id': 'play_addr_bytevc1', + 'format_note': 'Direct video', + 'vcodec': 'h265', + })) + + for bitrate in video_info.get('bit_rate', []): + if bitrate.get('play_addr'): + formats.extend(extract_addr(bitrate['play_addr'], { + 'format_id': bitrate.get('gear_name'), + 'format_note': 'Playback video', + 'tbr': try_get(bitrate, lambda x: x['bit_rate'] / 1000), + 'vcodec': 'h265' if traverse_obj( + bitrate, 'is_bytevc1', 'is_h265') else 'h264', + })) + + self._remove_duplicate_formats(formats) + self._sort_formats(formats, ('quality', 'source', 'codec', 'size', 'br')) + + thumbnails = [] + for cover_id in ('cover', 'ai_dynamic_cover', 'animated_cover', 'ai_dynamic_cover_bak', + 'origin_cover', 'dynamic_cover'): + cover = video_info.get(cover_id) + if cover: + for cover_url in cover['url_list']: + thumbnails.append({ + 'id': cover_id, + 'url': cover_url, + }) + + stats_info = aweme_detail.get('statistics', {}) + author_info = aweme_detail.get('author', {}) + music_info = aweme_detail.get('music', {}) + user_id = str_or_none(author_info.get('nickname')) + + contained_music_track = traverse_obj( + music_info, ('matched_song', 'title'), ('matched_pgc_sound', 'title'), expected_type=str) + contained_music_author = traverse_obj( + music_info, ('matched_song', 'author'), ('matched_pgc_sound', 'author'), 'author', expected_type=str) + + is_generic_og_trackname = music_info.get('is_original_sound') and music_info.get('title') == 'original sound - %s' % music_info.get('owner_handle') + if is_generic_og_trackname: + music_track, music_author = contained_music_track or 'original sound', contained_music_author + else: + music_track, music_author = music_info.get('title'), music_info.get('author') + + return { + 'id': aweme_id, + 'title': aweme_detail['desc'], + 'description': aweme_detail['desc'], + 'view_count': int_or_none(stats_info.get('play_count')), + 'like_count': int_or_none(stats_info.get('digg_count')), + 'repost_count': int_or_none(stats_info.get('share_count')), + 'comment_count': int_or_none(stats_info.get('comment_count')), + 'uploader': str_or_none(author_info.get('unique_id')), + 'creator': user_id, + 'uploader_id': str_or_none(author_info.get('uid')), + 'uploader_url': f'https://www.tiktok.com/@{user_id}' if user_id else None, + 'track': music_track, + 'album': str_or_none(music_info.get('album')) or None, + 'artist': music_author, + 'timestamp': int_or_none(aweme_detail.get('create_time')), + 'formats': formats, + 'thumbnails': thumbnails, + 'duration': int_or_none(traverse_obj(video_info, 'duration', ('download_addr', 'duration')), scale=1000) + } + def _real_extract(self, url): video_id = self._match_id(url) + try: + return self._extract_aweme_app(video_id) + except ExtractorError as e: + self.report_warning(f'{e}; Retrying with webpage') + # If we only call once, we get a 403 when downlaoding the video. self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id, note='Downloading video webpage') @@ -165,37 +358,7 @@ class TikTokUserIE(InfoExtractor): for video in data_json.get('itemList', []): video_id = video['id'] video_url = f'https://www.tiktok.com/@{user_id}/video/{video_id}' - download_url = try_get(video, (lambda x: x['video']['playAddr'], - lambda x: x['video']['downloadAddr'])) - thumbnail = try_get(video, lambda x: x['video']['originCover']) - height = try_get(video, lambda x: x['video']['height'], int) - width = try_get(video, lambda x: x['video']['width'], int) - yield { - 'id': video_id, - 'ie_key': TikTokIE.ie_key(), - 'extractor': 'TikTok', - 'url': download_url, - 'ext': 'mp4', - 'height': height, - 'width': width, - 'title': str_or_none(video.get('desc')), - 'duration': try_get(video, lambda x: x['video']['duration'], int), - 'view_count': try_get(video, lambda x: x['stats']['playCount'], int), - 'like_count': try_get(video, lambda x: x['stats']['diggCount'], int), - 'comment_count': try_get(video, lambda x: x['stats']['commentCount'], int), - 'repost_count': try_get(video, lambda x: x['stats']['shareCount'], int), - 'timestamp': video.get('createTime'), - 'creator': try_get(video, lambda x: x['author']['nickname'], str), - 'uploader': try_get(video, lambda x: x['author']['uniqueId'], str), - 'uploader_id': try_get(video, lambda x: x['author']['id'], str), - 'uploader_url': f'https://www.tiktok.com/@{user_id}', - 'thumbnails': [{'url': thumbnail, 'height': height, 'width': width}], - 'description': str_or_none(video.get('desc')), - 'webpage_url': video_url, - 'http_headers': { - 'Referer': video_url, - } - } + yield self._url_result(video_url, 'TikTok', video_id, str_or_none(video.get('desc'))) if not data_json.get('hasMore'): break cursor = data_json['cursor'] From bccdbd22d559cc22b23bbd2ff96075ea5d88c944 Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Mon, 6 Sep 2021 06:52:38 +0000 Subject: [PATCH 064/641] [Mediaklikk] Add Extractor (#867) Original PR: https://github.com/ytdl-org/youtube-dl/pull/17453, https://github.com/ytdl-org/youtube-dl/pull/25098 Fixes: https://github.com/ytdl-org/youtube-dl/issues/21431 Authored-by: tmarki, mrx23dot, coletdjnz --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/mediaklikk.py | 104 +++++++++++++++++++++++++++++++++ yt_dlp/utils.py | 1 + 3 files changed, 106 insertions(+) create mode 100644 yt_dlp/extractor/mediaklikk.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 4ef581b076..c745fd0793 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -725,6 +725,7 @@ from .massengeschmacktv import MassengeschmackTVIE from .matchtv import MatchTVIE from .mdr import MDRIE from .medaltv import MedalTVIE +from .mediaklikk import MediaKlikkIE from .mediaset import MediasetIE from .mediasite import ( MediasiteIE, diff --git a/yt_dlp/extractor/mediaklikk.py b/yt_dlp/extractor/mediaklikk.py new file mode 100644 index 0000000000..b9b6d739f5 --- /dev/null +++ b/yt_dlp/extractor/mediaklikk.py @@ -0,0 +1,104 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from ..utils import ( + unified_strdate +) +from .common import InfoExtractor +from ..compat import ( + compat_urllib_parse_unquote, + compat_str +) + + +class MediaKlikkIE(InfoExtractor): + _VALID_URL = r'''(?x)^https?:\/\/(?:www\.)? + (?:mediaklikk|m4sport|hirado|petofilive)\.hu\/.*?videok?\/ + (?:(?P[0-9]{4})/(?P[0-9]{1,2})/(?P[0-9]{1,2})/)? + (?P[^/#?_]+)''' + + _TESTS = [{ + # mediaklikk. date in html. + 'url': 'https://mediaklikk.hu/video/hazajaro-delnyugat-bacska-a-duna-menten-palankatol-doroszloig/', + 'info_dict': { + 'id': '4754129', + 'title': 'Hazajáró, DÉLNYUGAT-BÁCSKA – A Duna mentén Palánkától Doroszlóig', + 'ext': 'mp4', + 'upload_date': '20210901', + 'thumbnail': 'http://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg' + } + }, { + # m4sport + 'url': 'https://m4sport.hu/video/2021/08/30/gyemant-liga-parizs/', + 'info_dict': { + 'id': '4754999', + 'title': 'Gyémánt Liga, Párizs', + 'ext': 'mp4', + 'upload_date': '20210830', + 'thumbnail': 'http://m4sport.hu/wp-content/uploads/sites/4/2021/08/vlcsnap-2021-08-30-18h21m20s10-1024x576.jpg' + } + }, { + # m4sport with *video/ url and no date + 'url': 'https://m4sport.hu/bl-video/real-madrid-chelsea-1-1/', + 'info_dict': { + 'id': '4492099', + 'title': 'Real Madrid - Chelsea 1-1', + 'ext': 'mp4', + 'thumbnail': 'http://m4sport.hu/wp-content/uploads/sites/4/2021/04/Sequence-01.Still001-1024x576.png' + } + }, { + # hirado + 'url': 'https://hirado.hu/videok/felteteleket-szabott-a-fovaros/', + 'info_dict': { + 'id': '4760120', + 'title': 'Feltételeket szabott a főváros', + 'ext': 'mp4', + 'thumbnail': 'http://hirado.hu/wp-content/uploads/sites/4/2021/09/vlcsnap-2021-09-01-20h20m37s165.jpg' + } + }, { + # petofilive + 'url': 'https://petofilive.hu/video/2021/06/07/tha-shudras-az-akusztikban/', + 'info_dict': { + 'id': '4571948', + 'title': 'Tha Shudras az Akusztikban', + 'ext': 'mp4', + 'upload_date': '20210607', + 'thumbnail': 'http://petofilive.hu/wp-content/uploads/sites/4/2021/06/vlcsnap-2021-06-07-22h14m23s915-1024x576.jpg' + } + }] + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + display_id = mobj.group('id') + webpage = self._download_webpage(url, display_id) + + player_data_str = self._html_search_regex( + r'mtva_player_manager\.player\(document.getElementById\(.*\),\s?(\{.*\}).*\);', webpage, 'player data') + player_data = self._parse_json(player_data_str, display_id, compat_urllib_parse_unquote) + video_id = compat_str(player_data['contentId']) + title = player_data.get('title') or self._og_search_title(webpage, fatal=False) or \ + self._html_search_regex(r']+\bclass="article_title">([^<]+)<', webpage, 'title') + + upload_date = unified_strdate( + '%s-%s-%s' % (mobj.group('year'), mobj.group('month'), mobj.group('day'))) + if not upload_date: + upload_date = unified_strdate(self._html_search_regex( + r']+\bclass="article_date">([^<]+)<', webpage, 'upload date', default=None)) + + player_data['video'] = player_data.pop('token') + player_page = self._download_webpage('https://player.mediaklikk.hu/playernew/player.php', video_id, query=player_data) + playlist_url = self._proto_relative_url(compat_urllib_parse_unquote( + self._html_search_regex(r'\"file\":\s*\"(\\?/\\?/.*playlist\.m3u8)\"', player_page, 'playlist_url')).replace('\\/', '/')) + + formats = self._extract_wowza_formats( + playlist_url, video_id, skip_protocols=['f4m', 'smil', 'dash']) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'display_id': display_id, + 'formats': formats, + 'upload_date': upload_date, + 'thumbnail': player_data.get('bgImage') or self._og_search_thumbnail(webpage) + } diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index ce84f74166..983ca6cede 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -1740,6 +1740,7 @@ DATE_FORMATS = ( '%b %dth %Y %I:%M', '%Y %m %d', '%Y-%m-%d', + '%Y.%m.%d.', '%Y/%m/%d', '%Y/%m/%d %H:%M', '%Y/%m/%d %H:%M:%S', From b6de707d13ca3b7a573d9695b7fc0616fe394f60 Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Mon, 6 Sep 2021 07:26:41 +0000 Subject: [PATCH 065/641] [youtube] Improvements to JS player extraction (See desc) (#860) * fallback player url extraction when it fails to be extracted from the webpage * don't download js player unnecessarily for clients that don't require it * try to extract js player url from any additional client configs * ability to skip the js player usage/download using `player_skip=js` * ability to skip the initial webpage download using `player_skip=webpage` known issue: * authentication for multi-channel accounts and multi-account cookies may not work correctly if the webpage or client configs are skipped * formats from the web client requiring signature decryption will be skipped if player js extraction is skipped Authored by: coletdjnz --- README.md | 2 +- yt_dlp/extractor/youtube.py | 83 +++++++++++++++++++++++++------------ 2 files changed, 57 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 2e4bedc938..d9daee69e6 100644 --- a/README.md +++ b/README.md @@ -1436,7 +1436,7 @@ The following extractors use this feature: * **youtube** * `skip`: `hls` or `dash` (or both) to skip download of the respective manifests * `player_client`: Clients to extract video data from. The main clients are `web`, `android`, `ios`, `mweb`. These also have `_music`, `_embedded`, `_agegate`, and `_creator` variants (Eg: `web_embedded`) (`mweb` has only `_agegate`). By default, `android,web` is used, but the agegate and creator variants are added as required for age-gated videos. Similarly the music variants are added for `music.youtube.com` urls. You can also use `all` to use all the clients - * `player_skip`: `configs` - skip any requests for client configs and use defaults + * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details * `include_live_dash`: Include live dash formats (These formats don't download properly) * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side). * `max_comments`: Maximum amount of comments to download (default all). diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 65a6c043e0..1549c36dfe 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -117,6 +117,7 @@ INNERTUBE_CLIENTS = { } }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 3, + 'REQUIRE_JS_PLAYER': False }, 'android_embedded': { 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', @@ -126,7 +127,8 @@ INNERTUBE_CLIENTS = { 'clientVersion': '16.20', }, }, - 'INNERTUBE_CONTEXT_CLIENT_NAME': 55 + 'INNERTUBE_CONTEXT_CLIENT_NAME': 55, + 'REQUIRE_JS_PLAYER': False }, 'android_music': { 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30', @@ -138,6 +140,7 @@ INNERTUBE_CLIENTS = { } }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 21, + 'REQUIRE_JS_PLAYER': False }, 'android_creator': { 'INNERTUBE_CONTEXT': { @@ -146,7 +149,8 @@ INNERTUBE_CLIENTS = { 'clientVersion': '21.24.100', }, }, - 'INNERTUBE_CONTEXT_CLIENT_NAME': 14 + 'INNERTUBE_CONTEXT_CLIENT_NAME': 14, + 'REQUIRE_JS_PLAYER': False }, # ios has HLS live streams # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680 @@ -158,7 +162,8 @@ INNERTUBE_CLIENTS = { 'clientVersion': '16.20', } }, - 'INNERTUBE_CONTEXT_CLIENT_NAME': 5 + 'INNERTUBE_CONTEXT_CLIENT_NAME': 5, + 'REQUIRE_JS_PLAYER': False }, 'ios_embedded': { 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8', @@ -168,7 +173,8 @@ INNERTUBE_CLIENTS = { 'clientVersion': '16.20', }, }, - 'INNERTUBE_CONTEXT_CLIENT_NAME': 66 + 'INNERTUBE_CONTEXT_CLIENT_NAME': 66, + 'REQUIRE_JS_PLAYER': False }, 'ios_music': { 'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og', @@ -179,7 +185,8 @@ INNERTUBE_CLIENTS = { 'clientVersion': '4.32', }, }, - 'INNERTUBE_CONTEXT_CLIENT_NAME': 26 + 'INNERTUBE_CONTEXT_CLIENT_NAME': 26, + 'REQUIRE_JS_PLAYER': False }, 'ios_creator': { 'INNERTUBE_CONTEXT': { @@ -188,7 +195,8 @@ INNERTUBE_CLIENTS = { 'clientVersion': '21.24.100', }, }, - 'INNERTUBE_CONTEXT_CLIENT_NAME': 15 + 'INNERTUBE_CONTEXT_CLIENT_NAME': 15, + 'REQUIRE_JS_PLAYER': False }, # mweb has 'ultralow' formats # See: https://github.com/yt-dlp/yt-dlp/pull/557 @@ -215,6 +223,7 @@ def build_innertube_clients(): for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()): ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8') ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com') + ytcfg.setdefault('REQUIRE_JS_PLAYER', True) ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en') ytcfg['priority'] = 10 * priority(client.split('_', 1)[0]) @@ -1858,14 +1867,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): self._code_cache = {} self._player_cache = {} - def _extract_player_url(self, ytcfg=None, webpage=None): - player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str) - if not player_url and webpage: - player_url = self._search_regex( - r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"', - webpage, 'player URL', fatal=False) + def _extract_player_url(self, *ytcfgs, webpage=None): + player_url = traverse_obj( + ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'), + get_all=False, expected_type=compat_str) if not player_url: - return None + return if player_url.startswith('//'): player_url = 'https:' + player_url elif not re.match(r'https?://', player_url): @@ -1873,6 +1880,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'https://www.youtube.com', player_url) return player_url + def _download_player_url(self, video_id, fatal=False): + res = self._download_webpage( + 'https://www.youtube.com/iframe_api', + note='Downloading iframe API JS', video_id=video_id, fatal=fatal) + if res: + player_version = self._search_regex( + r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal) + if player_version: + return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js' + def _signature_cache_id(self, example_sig): """ Return a string representation of a signature """ return '.'.join(compat_str(len(part)) for part in example_sig.split('.')) @@ -2462,7 +2479,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): session_index = self._extract_session_index(player_ytcfg, master_ytcfg) syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr) - sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) + sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None headers = self.generate_api_headers( player_ytcfg, identity_token, syncid, default_client=client, session_index=session_index) @@ -2507,7 +2524,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config') return self.extract_ytcfg(video_id, webpage) or {} - def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, player_url, identity_token): + def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, identity_token): initial_pr = None if webpage: initial_pr = self._extract_yt_initial_variable( @@ -2516,6 +2533,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): original_clients = clients clients = clients[::-1] + prs = [] def append_client(client_name): if client_name in INNERTUBE_CLIENTS and client_name not in original_clients: @@ -2525,23 +2543,33 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # extraction of some data. So we return the initial_pr with formats # stripped out even if not requested by the user # See: https://github.com/yt-dlp/yt-dlp/issues/501 - yielded_pr = False if initial_pr: pr = dict(initial_pr) pr['streamingData'] = None - yielded_pr = True - yield pr + prs.append(pr) last_error = None + tried_iframe_fallback = False + player_url = None while clients: client = clients.pop() player_ytcfg = master_ytcfg if client == 'web' else {} if 'configs' not in self._configuration_arg('player_skip'): player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg + player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage) + require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER') + if 'js' in self._configuration_arg('player_skip'): + require_js_player = False + player_url = None + + if not player_url and not tried_iframe_fallback and require_js_player: + player_url = self._download_player_url(video_id) + tried_iframe_fallback = True + try: pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response( - client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr) + client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url if require_js_player else None, initial_pr) except ExtractorError as e: if last_error: self.report_warning(last_error) @@ -2549,8 +2577,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): continue if pr: - yielded_pr = True - yield pr + prs.append(pr) # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in if client.endswith('_agegate') and self._is_unplayable(pr) and self._generate_sapisidhash_header(): @@ -2559,9 +2586,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): append_client(f'{client}_agegate') if last_error: - if not yielded_pr: + if not len(prs): raise last_error self.report_warning(last_error) + return prs, player_url def _extract_formats(self, streaming_data, video_id, player_url, is_live): itags, stream_ids = [], [] @@ -2708,16 +2736,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor): base_url = self.http_scheme() + '//www.youtube.com/' webpage_url = base_url + 'watch?v=' + video_id - webpage = self._download_webpage( - webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False) + webpage = None + if 'webpage' not in self._configuration_arg('player_skip'): + webpage = self._download_webpage( + webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False) master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg() - player_url = self._extract_player_url(master_ytcfg, webpage) identity_token = self._extract_identity_token(webpage, video_id) - player_responses = list(self._extract_player_responses( + player_responses, player_url = self._extract_player_responses( self._get_requested_clients(url, smuggled_data), - video_id, webpage, master_ytcfg, player_url, identity_token)) + video_id, webpage, master_ytcfg, identity_token) get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False) From 92ddaa415e20134eaa20421e16bb692dc5e1f18d Mon Sep 17 00:00:00 2001 From: Poschi <825911+poschi3@users.noreply.github.com> Date: Tue, 7 Sep 2021 19:11:56 +0200 Subject: [PATCH 066/641] [gotostage] Add extractor (#883) Authored by: poschi3 --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/gotostage.py | 73 ++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 yt_dlp/extractor/gotostage.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index c745fd0793..736868a09a 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -511,6 +511,7 @@ from .googlepodcasts import ( ) from .googlesearch import GoogleSearchIE from .goshgay import GoshgayIE +from .gotostage import GoToStageIE from .gputechconf import GPUTechConfIE from .groupon import GrouponIE from .hbo import HBOIE diff --git a/yt_dlp/extractor/gotostage.py b/yt_dlp/extractor/gotostage.py new file mode 100644 index 0000000000..6aa96106a6 --- /dev/null +++ b/yt_dlp/extractor/gotostage.py @@ -0,0 +1,73 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + try_get, + url_or_none +) + +import json + + +class GoToStageIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?gotostage\.com/channel/[a-z0-9]+/recording/(?P[a-z0-9]+)/watch' + _TESTS = [{ + 'url': 'https://www.gotostage.com/channel/8901680603948959494/recording/60bb55548d434f21b9ce4f0e225c4895/watch', + 'md5': 'ca72ce990cdcd7a2bd152f7217e319a2', + 'info_dict': { + 'id': '60bb55548d434f21b9ce4f0e225c4895', + 'ext': 'mp4', + 'title': 'What is GoToStage?', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 93.924711 + } + }, { + 'url': 'https://www.gotostage.com/channel/bacc3d3535b34bafacc3f4ef8d4df78a/recording/831e74cd3e0042be96defba627b6f676/watch?source=HOMEPAGE', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + metadata = self._download_json( + 'https://api.gotostage.com/contents?ids=%s' % video_id, + video_id, + note='Downloading video metadata', + errnote='Unable to download video metadata')[0] + + registration_data = { + 'product': metadata['product'], + 'resourceType': metadata['contentType'], + 'productReferenceKey': metadata['productRefKey'], + 'firstName': 'foo', + 'lastName': 'bar', + 'email': 'foobar@example.com' + } + + registration_response = self._download_json( + 'https://api-registrations.logmeininc.com/registrations', + video_id, + data=json.dumps(registration_data).encode(), + expected_status=409, + headers={'Content-Type': 'application/json'}, + note='Register user', + errnote='Unable to register user') + + content_response = self._download_json( + 'https://api.gotostage.com/contents/%s/asset' % video_id, + video_id, + headers={'x-registrantkey': registration_response['registrationKey']}, + note='Get download url', + errnote='Unable to get download url') + + return { + 'id': video_id, + 'title': try_get(metadata, lambda x: x['title'], compat_str), + 'url': try_get(content_response, lambda x: x['cdnLocation'], compat_str), + 'ext': 'mp4', + 'thumbnail': url_or_none(try_get(metadata, lambda x: x['thumbnail']['location'])), + 'duration': try_get(metadata, lambda x: x['duration'], float), + 'categories': [try_get(metadata, lambda x: x['category'], compat_str)], + 'is_live': False + } From dc9de9cbd24a53262de4a3e169bed4e681c22810 Mon Sep 17 00:00:00 2001 From: Ashish <39122144+Ashish0804@users.noreply.github.com> Date: Tue, 7 Sep 2021 23:03:19 +0530 Subject: [PATCH 067/641] [Yandex] Add ZenYandexIE and ZenYandexChannelIE (#900) Authored by: Ashish0804 --- yt_dlp/extractor/extractors.py | 6 ++- yt_dlp/extractor/yandexvideo.py | 88 +++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 736868a09a..18df1549bd 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1765,7 +1765,11 @@ from .yandexmusic import ( YandexMusicArtistTracksIE, YandexMusicArtistAlbumsIE, ) -from .yandexvideo import YandexVideoIE +from .yandexvideo import ( + YandexVideoIE, + ZenYandexIE, + ZenYandexChannelIE, +) from .yapfiles import YapFilesIE from .yesjapan import YesJapanIE from .yinyuetai import YinYueTaiIE diff --git a/yt_dlp/extractor/yandexvideo.py b/yt_dlp/extractor/yandexvideo.py index 6a166ec9b9..9974d65d6e 100644 --- a/yt_dlp/extractor/yandexvideo.py +++ b/yt_dlp/extractor/yandexvideo.py @@ -1,6 +1,9 @@ # coding: utf-8 from __future__ import unicode_literals +import itertools +import re + from .common import InfoExtractor from ..utils import ( determine_ext, @@ -142,3 +145,88 @@ class YandexVideoIE(InfoExtractor): 'release_year': int_or_none(content.get('release_year')), 'formats': formats, } + + +class ZenYandexIE(InfoExtractor): + _VALID_URL = r'https?://zen\.yandex\.ru/media/(?:id/[^/]+/|[^/]+/)(?:[a-z0-9-]+)-(?P[a-z0-9-]+)' + _TESTS = [{ + 'url': 'https://zen.yandex.ru/media/popmech/izverjenie-vulkana-iz-spichek-zreliscnyi-opyt-6002240ff8b1af50bb2da5e3', + 'info_dict': { + 'id': '6002240ff8b1af50bb2da5e3', + 'ext': 'mp4', + 'title': 'Извержение вулкана из спичек: зрелищный опыт', + 'description': 'md5:053ad3c61b5596d510c9a199dc8ee633', + 'thumbnail': 'https://avatars.mds.yandex.net/get-zen-pub-og/3558619/pub_6002240ff8b1af50bb2da5e3_600bad814d953e4132a30b5e/orig', + 'uploader': 'Популярная механика', + }, + }, { + 'url': 'https://zen.yandex.ru/media/id/606fd806cc13cb3c58c05cf5/vot-eto-focus-dedy-morozy-na-gidrociklah-60c7c443da18892ebfe85ed7', + 'info_dict': { + 'id': '60c7c443da18892ebfe85ed7', + 'ext': 'mp4', + 'title': 'ВОТ ЭТО Focus. Деды Морозы на гидроциклах', + 'description': 'md5:8684912f6086f298f8078d4af0e8a600', + 'thumbnail': 'https://avatars.mds.yandex.net/get-zen-pub-og/4410519/pub_60c7c443da18892ebfe85ed7_60c7c48e060a163121f42cc3/orig', + 'uploader': 'AcademeG DailyStream' + }, + }, { + 'url': 'https://zen.yandex.ru/media/id/606fd806cc13cb3c58c05cf5/novyi-samsung-fold-3-moskvich-barahlit-612f93b7f8d48e7e945792a2?from=channel&rid=2286618386.482.1630817595976.42360', + 'only_matching': True, + }] + + def _real_extract(self, url): + id = self._match_id(url) + webpage = self._download_webpage(url, id) + data_json = self._parse_json(self._search_regex(r'w\._data\s?=\s?({.+?});', webpage, 'metadata'), id) + stream_json = try_get(data_json, lambda x: x['publication']['content']['gifContent'], dict) + stream_url = stream_json.get('stream') or try_get(stream_json, lambda x: x['streams']['url']) + formats = self._extract_m3u8_formats(stream_url, id) + self._sort_formats(formats) + return { + 'id': id, + 'title': try_get(data_json, (lambda x: x['og']['title'], lambda x: x['publication']['content']['preview']['title'])), + 'uploader': data_json.get('authorName') or try_get(data_json, lambda x: x['publisher']['name']), + 'description': try_get(data_json, lambda x: x['og']['description']), + 'thumbnail': try_get(data_json, lambda x: x['og']['imageUrl']), + 'formats': formats, + } + + +class ZenYandexChannelIE(InfoExtractor): + _VALID_URL = r'https?://zen\.yandex\.ru/(?!media)(?:id/)?(?P[a-z0-9-_]+)' + _TESTS = [{ + 'url': 'https://zen.yandex.ru/tok_media', + 'info_dict': { + 'id': 'tok_media', + }, + 'playlist_mincount': 169, + }, { + 'url': 'https://zen.yandex.ru/id/606fd806cc13cb3c58c05cf5', + 'info_dict': { + 'id': '606fd806cc13cb3c58c05cf5', + }, + 'playlist_mincount': 657, + }] + + def _entries(self, id, url): + webpage = self._download_webpage(url, id) + data_json = self._parse_json(re.findall(r'var\s?data\s?=\s?({.+?})\s?;', webpage)[-1], id) + for key in data_json.keys(): + if key.startswith('__serverState__'): + data_json = data_json[key] + items = list(try_get(data_json, lambda x: x['feed']['items'], dict).values()) + more = try_get(data_json, lambda x: x['links']['more']) or None + for page in itertools.count(1): + for item in items: + video_id = item.get('publication_id') or item.get('publicationId') + video_url = item.get('link') + yield self.url_result(video_url, ie=ZenYandexIE.ie_key(), video_id=video_id.split(':')[-1]) + if not more: + break + data_json = self._download_json(more, id, note='Downloading Page %d' % page) + items = data_json.get('items', []) + more = try_get(data_json, lambda x: x['more']['link']) or None + + def _real_extract(self, url): + id = self._match_id(url) + return self.playlist_result(self._entries(id, url), playlist_id=id) From 71407b3ecaf9346fe316a24d1753d365ed343ee7 Mon Sep 17 00:00:00 2001 From: Ashish <39122144+Ashish0804@users.noreply.github.com> Date: Tue, 7 Sep 2021 23:05:27 +0530 Subject: [PATCH 068/641] [Olympics] Add replay extractor (#905) Closes #897 Authored by: Ashish0804 --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/olympics.py | 56 ++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 yt_dlp/extractor/olympics.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 18df1549bd..4910bd14fe 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -961,6 +961,7 @@ from .nzz import NZZIE from .odatv import OdaTVIE from .odnoklassniki import OdnoklassnikiIE from .oktoberfesttv import OktoberfestTVIE +from .olympics import OlympicsReplayIE from .ondemandkorea import OnDemandKoreaIE from .onet import ( OnetIE, diff --git a/yt_dlp/extractor/olympics.py b/yt_dlp/extractor/olympics.py new file mode 100644 index 0000000000..0bc9206ed5 --- /dev/null +++ b/yt_dlp/extractor/olympics.py @@ -0,0 +1,56 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import unified_strdate + + +class OlympicsReplayIE(InfoExtractor): + _VALID_URL = r'(?:https?://)(?:www\.)?olympics\.com/tokyo-2020/(?:[a-z]{2}/)?replay/(?P[^/#&?]+)' + _TESTS = [{ + 'url': 'https://olympics.com/tokyo-2020/en/replay/300622eb-abc0-43ea-b03b-c5f2d429ec7b/jumping-team-qualifier', + 'info_dict': { + 'id': '300622eb-abc0-43ea-b03b-c5f2d429ec7b', + 'ext': 'mp4', + 'title': 'Jumping Team Qualifier', + 'release_date': '20210806', + 'upload_date': '20210713', + }, + 'params': { + 'format': 'bv', + }, + }, { + 'url': 'https://olympics.com/tokyo-2020/en/replay/bd242924-4b22-49a5-a846-f1d4c809250d/mens-bronze-medal-match-hun-esp', + 'only_matching': True, + }] + + def _real_extract(self, url): + id = self._match_id(url) + # The parameters are hardcoded in the webpage, it's not necessary to download the webpage just for these parameters. + # If in downloading webpage serves other functions aswell, then extract these parameters from it. + token_url = 'https://appovptok.ovpobs.tv/api/identity/app/token?api_key=OTk5NDcxOjpvY3N3LWFwaXVzZXI%3D&api_secret=ODY4ODM2MjE3ODMwYmVjNTAxMWZlMDJiMTYxZmY0MjFiMjMwMjllMjJmNDA1YWRiYzA5ODcxYTZjZTljZDkxOTo6NTM2NWIzNjRlMTM1ZmI2YWNjNmYzMGMzOGM3NzZhZTY%3D' + token = self._download_webpage(token_url, id) + headers = {'x-obs-app-token': token} + data_json = self._download_json(f'https://appocswtok.ovpobs.tv/api/schedule-sessions/{id}?include=stream', + id, headers=headers) + meta_data = data_json['data']['attributes'] + for t_dict in data_json['included']: + if t_dict.get('type') == 'Stream': + stream_data = t_dict['attributes'] + m3u8_url = self._download_json( + 'https://meteringtok.ovpobs.tv/api/playback-sessions', id, headers=headers, query={ + 'alias': stream_data['alias'], + 'stream': stream_data['stream'], + 'type': 'vod' + })['data']['attributes']['url'] + formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, id) + self._sort_formats(formats) + + return { + 'id': id, + 'title': meta_data['title'], + 'release_date': unified_strdate(meta_data.get('start') or meta_data.get('broadcastPublished')), + 'upload_date': unified_strdate(meta_data.get('publishedAt')), + 'formats': formats, + 'subtitles': subtitles, + } From a7e999beeca17909bb0088d796c3181b4f35144e Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Tue, 7 Sep 2021 20:59:20 +0000 Subject: [PATCH 069/641] [pbs] Fix subtitle extraction (#813) Original PR: https://github.com/ytdl-org/youtube-dl/pull/24430, https://github.com/ytdl-org/youtube-dl/pull/17434 Closes: #836, https://github.com/ytdl-org/youtube-dl/issues/18796, https://github.com/ytdl-org/youtube-dl/issues/17273 Authored-by: coletdjnz, gesa, raphaeldore --- test/test_subtitles.py | 38 ++++++++++++++++++++++++++++++++++++++ yt_dlp/extractor/pbs.py | 31 ++++++++++--------------------- 2 files changed, 48 insertions(+), 21 deletions(-) diff --git a/test/test_subtitles.py b/test/test_subtitles.py index 0c5b49ee8c..9b39dbd39b 100644 --- a/test/test_subtitles.py +++ b/test/test_subtitles.py @@ -19,6 +19,7 @@ from yt_dlp.extractor import ( CeskaTelevizeIE, LyndaIE, NPOIE, + PBSIE, ComedyCentralIE, NRKTVIE, RaiPlayIE, @@ -372,5 +373,42 @@ class TestDemocracynowSubtitles(BaseTestSubtitles): self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c') +@is_download_test +class TestPBSSubtitles(BaseTestSubtitles): + url = 'https://www.pbs.org/video/how-fantasy-reflects-our-world-picecq/' + IE = PBSIE + + def test_allsubtitles(self): + self.DL.params['writesubtitles'] = True + self.DL.params['allsubtitles'] = True + subtitles = self.getSubtitles() + self.assertEqual(set(subtitles.keys()), set(['en'])) + + def test_subtitles_dfxp_format(self): + self.DL.params['writesubtitles'] = True + self.DL.params['subtitlesformat'] = 'dfxp' + subtitles = self.getSubtitles() + self.assertIn(md5(subtitles['en']), ['643b034254cdc3768ff1e750b6b5873b']) + + def test_subtitles_vtt_format(self): + self.DL.params['writesubtitles'] = True + self.DL.params['subtitlesformat'] = 'vtt' + subtitles = self.getSubtitles() + self.assertIn( + md5(subtitles['en']), ['937a05711555b165d4c55a9667017045', 'f49ea998d6824d94959c8152a368ff73']) + + def test_subtitles_srt_format(self): + self.DL.params['writesubtitles'] = True + self.DL.params['subtitlesformat'] = 'srt' + subtitles = self.getSubtitles() + self.assertIn(md5(subtitles['en']), ['2082c21b43759d9bf172931b2f2ca371']) + + def test_subtitles_sami_format(self): + self.DL.params['writesubtitles'] = True + self.DL.params['subtitlesformat'] = 'sami' + subtitles = self.getSubtitles() + self.assertIn(md5(subtitles['en']), ['4256b16ac7da6a6780fafd04294e85cd']) + + if __name__ == '__main__': unittest.main() diff --git a/yt_dlp/extractor/pbs.py b/yt_dlp/extractor/pbs.py index d68855d62d..0eabf9beee 100644 --- a/yt_dlp/extractor/pbs.py +++ b/yt_dlp/extractor/pbs.py @@ -600,6 +600,7 @@ class PBSIE(InfoExtractor): formats = [] http_url = None + hls_subs = {} for num, redirect in enumerate(redirects): redirect_id = redirect.get('eeid') @@ -622,8 +623,9 @@ class PBSIE(InfoExtractor): continue if determine_ext(format_url) == 'm3u8': - formats.extend(self._extract_m3u8_formats( - format_url, display_id, 'mp4', m3u8_id='hls', fatal=False)) + hls_formats, hls_subs = self._extract_m3u8_formats_and_subtitles( + format_url, display_id, 'mp4', m3u8_id='hls', fatal=False) + formats.extend(hls_formats) else: formats.append({ 'url': format_url, @@ -666,25 +668,12 @@ class PBSIE(InfoExtractor): age_limit = US_RATINGS.get(rating_str) subtitles = {} - closed_captions_url = info.get('closed_captions_url') - if closed_captions_url: - subtitles['en'] = [{ - 'ext': 'ttml', - 'url': closed_captions_url, - }] - mobj = re.search(r'/(\d+)_Encoded\.dfxp', closed_captions_url) - if mobj: - ttml_caption_suffix, ttml_caption_id = mobj.group(0, 1) - ttml_caption_id = int(ttml_caption_id) - subtitles['en'].extend([{ - 'url': closed_captions_url.replace( - ttml_caption_suffix, '/%d_Encoded.srt' % (ttml_caption_id + 1)), - 'ext': 'srt', - }, { - 'url': closed_captions_url.replace( - ttml_caption_suffix, '/%d_Encoded.vtt' % (ttml_caption_id + 2)), - 'ext': 'vtt', - }]) + captions = info.get('cc') or {} + for caption_url in captions.values(): + subtitles.setdefault('en', []).append({ + 'url': caption_url + }) + subtitles = self._merge_subtitles(subtitles, hls_subs) # info['title'] is often incomplete (e.g. 'Full Episode', 'Episode 5', etc) # Try turning it to 'program - title' naming scheme if possible From eab3f867e246b064ff8cd38460f93623b03b4540 Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Tue, 7 Sep 2021 22:49:57 +0000 Subject: [PATCH 070/641] [nzherald] Add NZHeraldIE (#909) Authored-by: coletdjnz Related: https://github.com/ytdl-org/youtube-dl/issues/28267 --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/nzherald.py | 98 ++++++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+) create mode 100644 yt_dlp/extractor/nzherald.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 4910bd14fe..ee368b7b12 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -957,6 +957,7 @@ from .nytimes import ( NYTimesCookingIE, ) from .nuvid import NuvidIE +from .nzherald import NZHeraldIE from .nzz import NZZIE from .odatv import OdaTVIE from .odnoklassniki import OdnoklassnikiIE diff --git a/yt_dlp/extractor/nzherald.py b/yt_dlp/extractor/nzherald.py new file mode 100644 index 0000000000..e5601b4953 --- /dev/null +++ b/yt_dlp/extractor/nzherald.py @@ -0,0 +1,98 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .brightcove import BrightcoveNewIE +from .common import InfoExtractor + +from ..compat import compat_str +from ..utils import ( + ExtractorError, + traverse_obj +) + + +class NZHeraldIE(InfoExtractor): + IE_NAME = 'nzherald' + _VALID_URL = r'https?://(?:www\.)?nzherald\.co\.nz/[\w\/-]+\/(?P[A-Z0-9]+)' + _TESTS = [ + { + 'url': 'https://www.nzherald.co.nz/nz/weather-heavy-rain-gales-across-nz-most-days-this-week/PTG7QWY4E2225YHZ5NAIRBTYTQ/', + 'info_dict': { + 'id': '6271084466001', + 'ext': 'mp4', + 'title': 'MetService severe weather warning: September 6th - 7th', + 'timestamp': 1630891576, + 'upload_date': '20210906', + 'uploader_id': '1308227299001', + 'description': 'md5:db6ca335a22e2cdf37ab9d2bcda52902' + } + + }, { + # Webpage has brightcove embed player url + 'url': 'https://www.nzherald.co.nz/travel/pencarrow-coastal-trail/HDVTPJEPP46HJ2UEMK4EGD2DFI/', + 'info_dict': { + 'id': '6261791733001', + 'ext': 'mp4', + 'title': 'Pencarrow Coastal Trail', + 'timestamp': 1625102897, + 'upload_date': '20210701', + 'uploader_id': '1308227299001', + 'description': 'md5:d361aaa0c6498f7ac1bc4fc0a0aec1e4' + } + + }, { + # two video embeds of the same video + 'url': 'https://www.nzherald.co.nz/nz/truck-driver-captured-cutting-off-motorist-on-state-highway-1-in-canterbury/FIHNJB7PLLPHWQPK4S7ZBDUC4I/', + 'info_dict': { + 'id': '6251114530001', + 'ext': 'mp4', + 'title': 'Truck travelling north from Rakaia runs car off road', + 'timestamp': 1619730509, + 'upload_date': '20210429', + 'uploader_id': '1308227299001', + 'description': 'md5:4cae7dfb7613ac4c73b9e73a75c6b5d7' + } + }, { + 'url': 'https://www.nzherald.co.nz/kahu/kaupapa-companies-my-taiao-supporting-maori-in-study-and-business/PQBO2J25WCG77VGRX7W7BVYEAI/', + 'only_matching': True + }, { + 'url': 'https://nzherald.co.nz/the-country/video/focus-nzs-first-mass-covid-19-vaccination-event/N5I7IL3BRFLZSD33TLDLYJDGK4/', + 'only_matching': True + }, { + 'url': 'https://www.nzherald.co.nz/the-vision-is-clear/news/tvic-damian-roper-planting-trees-an-addiction/AN2AAEPNRK5VLISDWQAJZB6ATQ', + 'only_matching': True + } + ] + + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1308227299001/S1BXZn8t_default/index.html?videoId=%s' + + def _extract_bc_embed_url(self, webpage): + """The initial webpage may include the brightcove player embed url""" + bc_url = BrightcoveNewIE._extract_url(self, webpage) + return bc_url or self._search_regex( + r'(?:embedUrl)\"\s*:\s*\"(?P%s)' % BrightcoveNewIE._VALID_URL, + webpage, 'embed url', default=None, group='embed_url') + + def _real_extract(self, url): + article_id = self._match_id(url) + webpage = self._download_webpage(url, article_id) + bc_url = self._extract_bc_embed_url(webpage) + + if not bc_url: + fusion_metadata = self._parse_json( + self._search_regex(r'Fusion\.globalContent\s*=\s*({.+?})\s*;', webpage, 'fusion metadata'), article_id) + + video_metadata = fusion_metadata.get('video') + bc_video_id = traverse_obj( + video_metadata or fusion_metadata, # fusion metadata is the video metadata for video-only pages + 'brightcoveId', ('content_elements', ..., 'referent', 'id'), + get_all=False, expected_type=compat_str) + + if not bc_video_id: + if isinstance(video_metadata, dict) and len(video_metadata) == 0: + raise ExtractorError('This article does not have a video.', expected=True) + else: + raise ExtractorError('Failed to extract brightcove video id') + bc_url = self.BRIGHTCOVE_URL_TEMPLATE % bc_video_id + + return self.url_result(bc_url, 'BrightcoveNew') From 81a136b80f3d29c73884bb116f869df44bfd6fa1 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 8 Sep 2021 16:10:10 +0530 Subject: [PATCH 071/641] [WebVTT] Adjust parser to accommodate PBS subtitles (#922) Closes #921 --- yt_dlp/webvtt.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/yt_dlp/webvtt.py b/yt_dlp/webvtt.py index eee2a4a2dd..cd936e7e5f 100644 --- a/yt_dlp/webvtt.py +++ b/yt_dlp/webvtt.py @@ -89,8 +89,12 @@ class ParseError(Exception): )) +# While the specification +# prescribes that hours must be *2 or more* digits, timestamps with a single +# digit for the hour part has been seen in the wild. +# See https://github.com/yt-dlp/yt-dlp/issues/921 _REGEX_TS = re.compile(r'''(?x) - (?:([0-9]{2,}):)? + (?:([0-9]{1,}):)? ([0-9]{2}): ([0-9]{2})\. ([0-9]{3})? @@ -172,6 +176,7 @@ class Magic(HeaderBlock): _REGEX_TSMAP = re.compile(r'X-TIMESTAMP-MAP=') _REGEX_TSMAP_LOCAL = re.compile(r'LOCAL:') _REGEX_TSMAP_MPEGTS = re.compile(r'MPEGTS:([0-9]+)') + _REGEX_TSMAP_SEP = re.compile(r'[ \t]*,[ \t]*') @classmethod def __parse_tsmap(cls, parser): @@ -194,7 +199,7 @@ class Magic(HeaderBlock): raise ParseError(parser) else: raise ParseError(parser) - if parser.consume(','): + if parser.consume(cls._REGEX_TSMAP_SEP): continue if parser.consume(_REGEX_NL): break From 1c5ce74c045530eb3d085e96c1d5f2b3ce88a57c Mon Sep 17 00:00:00 2001 From: Felix S Date: Sat, 11 Sep 2021 10:16:03 +0000 Subject: [PATCH 072/641] [zype] Extract subtitles from the m3u8 manifest (#948) Closes #929 Authored by: fstirlitz --- yt_dlp/extractor/zype.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/zype.py b/yt_dlp/extractor/zype.py index f20f953cb8..7663cb36b0 100644 --- a/yt_dlp/extractor/zype.py +++ b/yt_dlp/extractor/zype.py @@ -56,6 +56,8 @@ class ZypeIE(InfoExtractor): video = response['video'] title = video['title'] + subtitles = {} + if isinstance(body, dict): formats = [] for output in body.get('outputs', []): @@ -64,7 +66,7 @@ class ZypeIE(InfoExtractor): continue name = output.get('name') if name == 'm3u8': - formats = self._extract_m3u8_formats( + formats, subtitles = self._extract_m3u8_formats_and_subtitles( output_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) else: @@ -97,7 +99,7 @@ class ZypeIE(InfoExtractor): if get_attr('integration') == 'verizon-media': m3u8_url = 'https://content.uplynk.com/%s.m3u8' % get_attr('id') - formats = self._extract_m3u8_formats( + formats, subtitles = self._extract_m3u8_formats_and_subtitles( m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls') text_tracks = self._search_regex( r'textTracks\s*:\s*(\[[^]]+\])', @@ -107,7 +109,6 @@ class ZypeIE(InfoExtractor): text_tracks, video_id, js_to_json, False) self._sort_formats(formats) - subtitles = {} if text_tracks: for text_track in text_tracks: tt_url = dict_get(text_track, ('file', 'src')) From ffecd3034b00671dc9438ff70474dcc57220e558 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Sat, 11 Sep 2021 18:51:11 +0530 Subject: [PATCH 073/641] [MuseScore] Add Extractor (#918) Closes #911 Authored by: Ashish0804 --- .gitignore | 2 +- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/musescore.py | 67 ++++++++++++++++++++++++++++++++++ 3 files changed, 69 insertions(+), 1 deletion(-) create mode 100644 yt_dlp/extractor/musescore.py diff --git a/.gitignore b/.gitignore index 619d6ba98a..443e637aee 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,7 @@ *.conf *.spec cookies -cookies.txt +*cookies.txt # Downloaded *.srt diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index ee368b7b12..c56bf5b2be 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -802,6 +802,7 @@ from .mtv import ( MTVItaliaProgrammaIE, ) from .muenchentv import MuenchenTVIE +from .musescore import MuseScoreIE from .mwave import MwaveIE, MwaveMeetGreetIE from .mxplayer import ( MxplayerIE, diff --git a/yt_dlp/extractor/musescore.py b/yt_dlp/extractor/musescore.py new file mode 100644 index 0000000000..dcd26388a6 --- /dev/null +++ b/yt_dlp/extractor/musescore.py @@ -0,0 +1,67 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class MuseScoreIE(InfoExtractor): + _VALID_URL = r'(?:https?://)(?:www\.)?musescore\.com/(?:user/\d+|[^/]+)(?:/scores)?/(?P[^#&?]+)' + _TESTS = [{ + 'url': 'https://musescore.com/user/73797/scores/142975', + 'info_dict': { + 'id': '142975', + 'ext': 'mp3', + 'title': 'WA Mozart Marche Turque (Turkish March fingered)', + 'description': 'md5:7ede08230e4eaabd67a4a98bb54d07be', + 'thumbnail': r're:(?:https?://)(?:www\.)?musescore\.com/.*\.png[^$]+', + 'uploader': 'PapyPiano', + 'creator': 'Wolfgang Amadeus Mozart', + } + }, { + 'url': 'https://musescore.com/user/36164500/scores/6837638', + 'info_dict': { + 'id': '6837638', + 'ext': 'mp3', + 'title': 'Sweet Child O\' Mine – Guns N\' Roses sweet child', + 'description': 'md5:4dca71191c14abc312a0a4192492eace', + 'thumbnail': r're:(?:https?://)(?:www\.)?musescore\.com/.*\.png[^$]+', + 'uploader': 'roxbelviolin', + 'creator': 'Guns N´Roses Arr. Roxbel Violin', + } + }, { + 'url': 'https://musescore.com/classicman/fur-elise', + 'info_dict': { + 'id': '33816', + 'ext': 'mp3', + 'title': 'Für Elise – Beethoven', + 'description': 'md5:49515a3556d5ecaf9fa4b2514064ac34', + 'thumbnail': r're:(?:https?://)(?:www\.)?musescore\.com/.*\.png[^$]+', + 'uploader': 'ClassicMan', + 'creator': 'Ludwig van Beethoven (1770–1827)', + } + }, { + 'url': 'https://musescore.com/minh_cuteee/scores/6555384', + 'only_matching': True, + }] + + def _real_extract(self, url): + webpage = self._download_webpage(url, None) + url = self._og_search_url(webpage) or url + id = self._match_id(url) + mp3_url = self._download_json(f'https://musescore.com/api/jmuse?id={id}&index=0&type=mp3&v2=1', id, + headers={'authorization': '63794e5461e4cfa046edfbdddfccc1ac16daffd2'})['info']['url'] + formats = [{ + 'url': mp3_url, + 'ext': 'mp3', + 'vcodec': 'none', + }] + + return { + 'id': id, + 'formats': formats, + 'title': self._og_search_title(webpage), + 'description': self._og_search_description(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), + 'uploader': self._html_search_meta('musescore:author', webpage, 'uploader'), + 'creator': self._html_search_meta('musescore:composer', webpage, 'composer'), + } From 16f7e6be3a9d38352c630544b91c1e86b8cf2332 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Sat, 11 Sep 2021 18:59:48 +0530 Subject: [PATCH 074/641] [bilibili]Add BiliIntlIE and BiliIntlSeriesIE (#907) Closes #611 Authored by: Ashish0804 --- yt_dlp/extractor/bilibili.py | 140 +++++++++++++++++++++++++++++++++ yt_dlp/extractor/extractors.py | 2 + 2 files changed, 142 insertions(+) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 8aab6a01b4..0a81452c32 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -23,6 +23,7 @@ from ..utils import ( try_get, smuggle_url, str_or_none, + str_to_int, strip_jsonp, unified_timestamp, unsmuggle_url, @@ -774,3 +775,142 @@ class BiliBiliPlayerIE(InfoExtractor): return self.url_result( 'http://www.bilibili.tv/video/av%s/' % video_id, ie=BiliBiliIE.ie_key(), video_id=video_id) + + +class BiliIntlBaseIE(InfoExtractor): + _API_URL = 'https://api.bili{}/intl/gateway{}' + + def _call_api(self, type, endpoint, id): + return self._download_json(self._API_URL.format(type, endpoint), id)['data'] + + def _get_subtitles(self, type, ep_id): + sub_json = self._call_api(type, f'/m/subtitle?ep_id={ep_id}&platform=web', ep_id) + subtitles = {} + for sub in sub_json.get('subtitles', []): + sub_url = sub.get('url') + if not sub_url: + continue + subtitles.setdefault(sub.get('key', 'en'), []).append({ + 'url': sub_url, + }) + return subtitles + + def _get_formats(self, type, ep_id): + video_json = self._call_api(type, f'/web/playurl?ep_id={ep_id}&platform=web', ep_id) + if not video_json: + self.raise_login_required(method='cookies') + video_json = video_json['playurl'] + formats = [] + for vid in video_json.get('video', []): + video_res = vid.get('video_resource') or {} + video_info = vid.get('stream_info') or {} + if not video_res.get('url'): + continue + formats.append({ + 'url': video_res['url'], + 'ext': 'mp4', + 'format_note': video_info.get('desc_words'), + 'width': video_res.get('width'), + 'height': video_res.get('height'), + 'vbr': video_res.get('bandwidth'), + 'acodec': 'none', + 'vcodec': video_res.get('codecs'), + 'filesize': video_res.get('size'), + }) + for aud in video_json.get('audio_resource', []): + if not aud.get('url'): + continue + formats.append({ + 'url': aud['url'], + 'ext': 'mp4', + 'abr': aud.get('bandwidth'), + 'acodec': aud.get('codecs'), + 'vcodec': 'none', + 'filesize': aud.get('size'), + }) + + self._sort_formats(formats) + return formats + + def _extract_ep_info(self, type, episode_data, ep_id): + return { + 'id': ep_id, + 'title': episode_data.get('long_title') or episode_data['title'], + 'thumbnail': episode_data.get('cover'), + 'episode_number': str_to_int(episode_data.get('title')), + 'formats': self._get_formats(type, ep_id), + 'subtitles': self._get_subtitles(type, ep_id), + 'extractor_key': BiliIntlIE.ie_key(), + } + + +class BiliIntlIE(BiliIntlBaseIE): + _VALID_URL = r'https?://(?:www\.)?bili(?Pbili\.tv|intl.com)/(?:[a-z]{2}/)?play/(?P\d+)/(?P\d+)' + _TESTS = [{ + 'url': 'https://www.bilibili.tv/en/play/34613/341736', + 'info_dict': { + 'id': '341736', + 'ext': 'mp4', + 'title': 'The First Night', + 'thumbnail': 'https://i0.hdslb.com/bfs/intl/management/91e30e5521235d9b163339a26a0b030ebda54310.png', + 'episode_number': 2, + }, + 'params': { + 'format': 'bv', + }, + }, { + 'url': 'https://www.biliintl.com/en/play/34613/341736', + 'info_dict': { + 'id': '341736', + 'ext': 'mp4', + 'title': 'The First Night', + 'thumbnail': 'https://i0.hdslb.com/bfs/intl/management/91e30e5521235d9b163339a26a0b030ebda54310.png', + 'episode_number': 2, + }, + 'params': { + 'format': 'bv', + }, + }] + + def _real_extract(self, url): + type, season_id, id = self._match_valid_url(url).groups() + data_json = self._call_api(type, f'/web/view/ogv_collection?season_id={season_id}', id) + episode_data = next( + episode for episode in data_json.get('episodes', []) + if str(episode.get('ep_id')) == id) + return self._extract_ep_info(type, episode_data, id) + + +class BiliIntlSeriesIE(BiliIntlBaseIE): + _VALID_URL = r'https?://(?:www\.)?bili(?Pbili\.tv|intl.com)/(?:[a-z]{2}/)?play/(?P\d+)$' + _TESTS = [{ + 'url': 'https://www.bilibili.tv/en/play/34613', + 'playlist_mincount': 15, + 'info_dict': { + 'id': '34613', + }, + 'params': { + 'skip_download': True, + 'format': 'bv', + }, + }, { + 'url': 'https://www.biliintl.com/en/play/34613', + 'playlist_mincount': 15, + 'info_dict': { + 'id': '34613', + }, + 'params': { + 'skip_download': True, + 'format': 'bv', + }, + }] + + def _entries(self, id, type): + data_json = self._call_api(type, f'/web/view/ogv_collection?season_id={id}', id) + for episode in data_json.get('episodes', []): + episode_id = str(episode.get('ep_id')) + yield self._extract_ep_info(type, episode, episode_id) + + def _real_extract(self, url): + type, id = self._match_valid_url(url).groups() + return self.playlist_result(self._entries(id, type), playlist_id=id) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index c56bf5b2be..16bc78ffcd 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -147,6 +147,8 @@ from .bilibili import ( BilibiliAudioAlbumIE, BiliBiliPlayerIE, BilibiliChannelIE, + BiliIntlIE, + BiliIntlSeriesIE, ) from .biobiochiletv import BioBioChileTVIE from .bitchute import ( From 02c7ae81045d35401301cf15346fcb41dfee61bf Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Sun, 12 Sep 2021 05:37:44 +0000 Subject: [PATCH 075/641] [Newgrounds] Add `NewgroundsUserIE` and improve extractor (#942) Authored by: u-spec-png --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/newgrounds.py | 86 +++++++++++++++++++++++++++++----- 2 files changed, 74 insertions(+), 13 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 16bc78ffcd..e456475e51 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -867,6 +867,7 @@ from .neteasemusic import ( from .newgrounds import ( NewgroundsIE, NewgroundsPlaylistIE, + NewgroundsUserIE, ) from .newstube import NewstubeIE from .nextmedia import ( diff --git a/yt_dlp/extractor/newgrounds.py b/yt_dlp/extractor/newgrounds.py index 41549a2f1e..25b468b7dc 100644 --- a/yt_dlp/extractor/newgrounds.py +++ b/yt_dlp/extractor/newgrounds.py @@ -1,5 +1,7 @@ +# coding: utf-8 from __future__ import unicode_literals +import functools import re from .common import InfoExtractor @@ -8,8 +10,9 @@ from ..utils import ( int_or_none, parse_count, parse_duration, - parse_filesize, unified_timestamp, + OnDemandPagedList, + try_get, ) @@ -88,10 +91,10 @@ class NewgroundsIE(InfoExtractor): webpage = self._download_webpage(url, media_id) title = self._html_search_regex( - r'([^>]+)', webpage, 'title') + r'(.+?)', webpage, 'title') media_url_string = self._search_regex( - r'"url"\s*:\s*("[^"]+"),', webpage, 'media url', default=None, fatal=False) + r'"url"\s*:\s*("[^"]+"),', webpage, 'media url', default=None) if media_url_string: media_url = self._parse_json(media_url_string, media_id) @@ -128,20 +131,26 @@ class NewgroundsIE(InfoExtractor): (r'
\s*Uploaded\s*
\s*
([^<]+
\s*
[^<]+)', r'
\s*Uploaded\s*
\s*
([^<]+)'), webpage, 'timestamp', default=None)) - duration = parse_duration(self._search_regex( - r'(?s)
\s*Song\s*
\s*
.+?
\s*
([^<]+)', webpage, + duration = parse_duration(self._html_search_regex( + r'"duration"\s*:\s*["\']?([\d]+)["\']?,', webpage, 'duration', default=None)) - view_count = parse_count(self._html_search_regex(r'(?s)
\s*Views\s*
\s*
([\d\.,]+)
', webpage, - 'view_count', fatal=False, default=None)) + view_count = parse_count(self._html_search_regex( + r'(?s)
\s*Views\s*
\s*
([\d\.,]+)
', webpage, + 'view count', default=None)) - filesize_approx = parse_filesize(self._html_search_regex( - r'(?s)
\s*Song\s*
\s*
(.+?)
', webpage, 'filesize', + filesize = int_or_none(self._html_search_regex( + r'"filesize"\s*:\s*["\']?([\d]+)["\']?,', webpage, 'filesize', default=None)) - if len(formats) == 1: - formats[0]['filesize_approx'] = filesize_approx - if '
Song' in webpage: + video_type_description = self._html_search_regex( + r'"description"\s*:\s*["\']?([^"\']+)["\']?,', webpage, 'filesize', + default=None) + + if len(formats) == 1: + formats[0]['filesize'] = filesize + + if video_type_description == 'Audio File': formats[0]['vcodec'] = 'none' self._check_formats(formats, media_id) self._sort_formats(formats) @@ -160,6 +169,7 @@ class NewgroundsIE(InfoExtractor): class NewgroundsPlaylistIE(InfoExtractor): + IE_NAME = 'Newgrounds:playlist' _VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:collection|[^/]+/search/[^/]+)/(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://www.newgrounds.com/collection/cats', @@ -202,7 +212,57 @@ class NewgroundsPlaylistIE(InfoExtractor): continue entries.append( self.url_result( - 'https://www.newgrounds.com/%s' % path, + f'https://www.newgrounds.com/{path}', ie=NewgroundsIE.ie_key(), video_id=media_id)) return self.playlist_result(entries, playlist_id, title) + + +class NewgroundsUserIE(InfoExtractor): + IE_NAME = 'Newgrounds:user' + _VALID_URL = r'https?://(?P[^\.]+)\.newgrounds\.com/(?:movies|audio)/?(?:[#?]|$)' + _TESTS = [{ + 'url': 'https://burn7.newgrounds.com/audio', + 'info_dict': { + 'id': 'burn7', + }, + 'playlist_mincount': 150, + }, { + 'url': 'https://burn7.newgrounds.com/movies', + 'info_dict': { + 'id': 'burn7', + }, + 'playlist_mincount': 2, + }, { + 'url': 'https://brian-beaton.newgrounds.com/movies', + 'info_dict': { + 'id': 'brian-beaton', + }, + 'playlist_mincount': 10, + }] + _PAGE_SIZE = 30 + + def _fetch_page(self, channel_id, url, page): + page += 1 + posts_info = self._download_json( + f'{url}/page/{page}', channel_id, + note=f'Downloading page {page}', headers={ + 'Accept': 'application/json, text/javascript, */*; q = 0.01', + 'X-Requested-With': 'XMLHttpRequest', + }) + sequence = posts_info.get('sequence', []) + for year in sequence: + posts = try_get(posts_info, lambda x: x['years'][str(year)]['items']) + for post in posts: + path, media_id = self._search_regex( + r']+\bhref=["\'][^"\']+((?:portal/view|audio/listen)/(\d+))[^>]+>', + post, 'url', group=(1, 2)) + yield self.url_result(f'https://www.newgrounds.com/{path}', NewgroundsIE.ie_key(), media_id) + + def _real_extract(self, url): + channel_id = self._match_id(url) + + entries = OnDemandPagedList(functools.partial( + self._fetch_page, channel_id, url), self._PAGE_SIZE) + + return self.playlist_result(entries, channel_id) From 0fd6661edb7e671eb7b131de12fa89bb85a6cbaf Mon Sep 17 00:00:00 2001 From: MinePlayersPE Date: Sun, 12 Sep 2021 13:21:59 +0700 Subject: [PATCH 076/641] [TikTokUser] Fix extractor using mobile API (#925) and misc cleanup Closes #859 Authored by: MinePlayersPE, llacb47 --- yt_dlp/extractor/tiktok.py | 417 +++++++++++++++++++++---------------- 1 file changed, 232 insertions(+), 185 deletions(-) diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 953ff05b6e..4b0efd4a3d 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -5,6 +5,7 @@ import itertools import random import string import time +import json from .common import InfoExtractor from ..utils import ( @@ -17,7 +18,189 @@ from ..utils import ( ) -class TikTokIE(InfoExtractor): +class TikTokBaseIE(InfoExtractor): + _APP_VERSION = '20.9.3' + _MANIFEST_APP_VERSION = '291' + QUALITIES = ('360p', '540p', '720p') + + def _call_api(self, ep, query, video_id, fatal=True, + note='Downloading API JSON', errnote='Unable to download API page'): + real_query = { + **query, + 'version_name': self._APP_VERSION, + 'version_code': self._MANIFEST_APP_VERSION, + 'build_number': self._APP_VERSION, + 'manifest_version_code': self._MANIFEST_APP_VERSION, + 'update_version_code': self._MANIFEST_APP_VERSION, + 'openudid': ''.join(random.choice('0123456789abcdef') for i in range(16)), + 'uuid': ''.join([random.choice(string.digits) for num in range(16)]), + '_rticket': int(time.time() * 1000), + 'ts': int(time.time()), + 'device_brand': 'Google', + 'device_type': 'Pixel 4', + 'device_platform': 'android', + 'resolution': '1080*1920', + 'dpi': 420, + 'os_version': '10', + 'os_api': '29', + 'carrier_region': 'US', + 'sys_region': 'US', + 'region': 'US', + 'app_name': 'trill', + 'app_language': 'en', + 'language': 'en', + 'timezone_name': 'America/New_York', + 'timezone_offset': '-14400', + 'channel': 'googleplay', + 'ac': 'wifi', + 'mcc_mnc': '310260', + 'is_my_cn': 0, + 'aid': 1180, + 'ssmix': 'a', + 'as': 'a1qwert123', + 'cp': 'cbfhckdckkde1', + } + self._set_cookie('.tiktokv.com', 'odin_tt', ''.join(random.choice('0123456789abcdef') for i in range(160))) + return self._download_json( + 'https://api-t2.tiktokv.com/aweme/v1/%s/' % ep, video_id=video_id, + fatal=fatal, note=note, errnote=errnote, headers={ + 'User-Agent': f'com.ss.android.ugc.trill/{self._MANIFEST_APP_VERSION} (Linux; U; Android 10; en_US; Pixel 4; Build/QQ3A.200805.001; Cronet/58.0.2991.0)', + 'Accept': 'application/json', + }, query=real_query) + + def _parse_aweme_video(self, aweme_detail): + aweme_id = aweme_detail['aweme_id'] + video_info = aweme_detail['video'] + + def parse_url_key(url_key): + format_id, codec, res, bitrate = self._search_regex( + r'v[^_]+_(?P(?P[^_]+)_(?P\d+p)_(?P\d+))', url_key, + 'url key', default=(None, None, None, None), group=('id', 'codec', 'res', 'bitrate')) + if not format_id: + return {}, None + return { + 'format_id': format_id, + 'vcodec': 'h265' if codec == 'bytevc1' else codec, + 'tbr': int_or_none(bitrate, scale=1000) or None, + 'quality': qualities(self.QUALITIES)(res), + }, res + + known_resolutions = {} + + def extract_addr(addr, add_meta={}): + parsed_meta, res = parse_url_key(addr.get('url_key', '')) + if res: + known_resolutions.setdefault(res, {}).setdefault('height', add_meta.get('height')) + known_resolutions[res].setdefault('width', add_meta.get('width')) + parsed_meta.update(known_resolutions.get(res, {})) + add_meta.setdefault('height', int_or_none(res[:-1])) + return [{ + 'url': url, + 'filesize': int_or_none(addr.get('data_size')), + 'ext': 'mp4', + 'acodec': 'aac', + 'source_preference': -2 if 'aweme/v1' in url else -1, # Downloads from API might get blocked + **add_meta, **parsed_meta, + 'format_note': ' '.join(filter(None, ( + add_meta.get('format_note'), '(API)' if 'aweme/v1' in url else ''))) + } for url in addr.get('url_list') or []] + + # Hack: Add direct video links first to prioritize them when removing duplicate formats + formats = [] + if video_info.get('play_addr'): + formats.extend(extract_addr(video_info['play_addr'], { + 'format_id': 'play_addr', + 'format_note': 'Direct video', + 'vcodec': 'h265' if traverse_obj( + video_info, 'is_bytevc1', 'is_h265') else 'h264', # Always h264? + 'width': video_info.get('width'), + 'height': video_info.get('height'), + })) + if video_info.get('download_addr'): + formats.extend(extract_addr(video_info['download_addr'], { + 'format_id': 'download_addr', + 'format_note': 'Download video%s' % (', watermarked' if video_info.get('has_watermark') else ''), + 'vcodec': 'h264', + 'width': video_info.get('width'), + 'height': video_info.get('height'), + 'preference': -2 if video_info.get('has_watermark') else -1, + })) + if video_info.get('play_addr_h264'): + formats.extend(extract_addr(video_info['play_addr_h264'], { + 'format_id': 'play_addr_h264', + 'format_note': 'Direct video', + 'vcodec': 'h264', + })) + if video_info.get('play_addr_bytevc1'): + formats.extend(extract_addr(video_info['play_addr_bytevc1'], { + 'format_id': 'play_addr_bytevc1', + 'format_note': 'Direct video', + 'vcodec': 'h265', + })) + + for bitrate in video_info.get('bit_rate', []): + if bitrate.get('play_addr'): + formats.extend(extract_addr(bitrate['play_addr'], { + 'format_id': bitrate.get('gear_name'), + 'format_note': 'Playback video', + 'tbr': try_get(bitrate, lambda x: x['bit_rate'] / 1000), + 'vcodec': 'h265' if traverse_obj( + bitrate, 'is_bytevc1', 'is_h265') else 'h264', + })) + + self._remove_duplicate_formats(formats) + self._sort_formats(formats, ('quality', 'codec', 'size', 'br')) + + thumbnails = [] + for cover_id in ('cover', 'ai_dynamic_cover', 'animated_cover', 'ai_dynamic_cover_bak', + 'origin_cover', 'dynamic_cover'): + cover = video_info.get(cover_id) + if cover: + for cover_url in cover['url_list']: + thumbnails.append({ + 'id': cover_id, + 'url': cover_url, + }) + + stats_info = aweme_detail.get('statistics', {}) + author_info = aweme_detail.get('author', {}) + music_info = aweme_detail.get('music', {}) + user_id = str_or_none(author_info.get('nickname')) + + contained_music_track = traverse_obj( + music_info, ('matched_song', 'title'), ('matched_pgc_sound', 'title'), expected_type=str) + contained_music_author = traverse_obj( + music_info, ('matched_song', 'author'), ('matched_pgc_sound', 'author'), 'author', expected_type=str) + + is_generic_og_trackname = music_info.get('is_original_sound') and music_info.get('title') == 'original sound - %s' % music_info.get('owner_handle') + if is_generic_og_trackname: + music_track, music_author = contained_music_track or 'original sound', contained_music_author + else: + music_track, music_author = music_info.get('title'), music_info.get('author') + + return { + 'id': aweme_id, + 'title': aweme_detail['desc'], + 'description': aweme_detail['desc'], + 'view_count': int_or_none(stats_info.get('play_count')), + 'like_count': int_or_none(stats_info.get('digg_count')), + 'repost_count': int_or_none(stats_info.get('share_count')), + 'comment_count': int_or_none(stats_info.get('comment_count')), + 'uploader': str_or_none(author_info.get('unique_id')), + 'creator': user_id, + 'uploader_id': str_or_none(author_info.get('uid')), + 'uploader_url': f'https://www.tiktok.com/@{user_id}' if user_id else None, + 'track': music_track, + 'album': str_or_none(music_info.get('album')) or None, + 'artist': music_author, + 'timestamp': int_or_none(aweme_detail.get('create_time')), + 'formats': formats, + 'thumbnails': thumbnails, + 'duration': int_or_none(traverse_obj(video_info, 'duration', ('download_addr', 'duration')), scale=1000) + } + + +class TikTokIE(TikTokBaseIE): _VALID_URL = r'https?://www\.tiktok\.com/@[\w\.-]+/video/(?P\d+)' _TESTS = [{ @@ -71,9 +254,6 @@ class TikTokIE(InfoExtractor): 'url': 'https://www.tiktok.com/@MS4wLjABAAAAAR29F6J2Ktu0Daw03BJyXPNoRQ-W7U5a0Mn3lVCq2rQhjOd_WNLclHUoFgwX8Eno/video/6932675057474981122', 'only_matching': True, }] - _APP_VERSION = '20.9.3' - _MANIFEST_APP_VERSION = '291' - QUALITIES = ('360p', '540p', '720p') def _extract_aweme(self, props_data, webpage, url): video_info = try_get( @@ -126,175 +306,9 @@ class TikTokIE(InfoExtractor): } def _extract_aweme_app(self, aweme_id): - query = { - 'aweme_id': aweme_id, - 'version_name': self._APP_VERSION, - 'version_code': self._MANIFEST_APP_VERSION, - 'build_number': self._APP_VERSION, - 'manifest_version_code': self._MANIFEST_APP_VERSION, - 'update_version_code': self._MANIFEST_APP_VERSION, - 'openudid': ''.join(random.choice('0123456789abcdef') for i in range(16)), - 'uuid': ''.join([random.choice(string.digits) for num in range(16)]), - '_rticket': int(time.time() * 1000), - 'ts': int(time.time()), - 'device_brand': 'Google', - 'device_type': 'Pixel 4', - 'device_platform': 'android', - 'resolution': '1080*1920', - 'dpi': 420, - 'os_version': '10', - 'os_api': '29', - 'carrier_region': 'US', - 'sys_region': 'US', - 'region': 'US', - 'app_name': 'trill', - 'app_language': 'en', - 'language': 'en', - 'timezone_name': 'America/New_York', - 'timezone_offset': '-14400', - 'channel': 'googleplay', - 'ac': 'wifi', - 'mcc_mnc': '310260', - 'is_my_cn': 0, - 'aid': 1180, - 'ssmix': 'a', - 'as': 'a1qwert123', - 'cp': 'cbfhckdckkde1', - } - - self._set_cookie('.tiktokv.com', 'odin_tt', ''.join(random.choice('0123456789abcdef') for i in range(160))) - - aweme_detail = self._download_json( - 'https://api-t2.tiktokv.com/aweme/v1/aweme/detail/', aweme_id, - 'Downloading video details', 'Unable to download video details', - headers={ - 'User-Agent': f'com.ss.android.ugc.trill/{self._MANIFEST_APP_VERSION} (Linux; U; Android 10; en_US; Pixel 4; Build/QQ3A.200805.001; Cronet/58.0.2991.0)', - }, query=query)['aweme_detail'] - video_info = aweme_detail['video'] - - def parse_url_key(url_key): - format_id, codec, res, bitrate = self._search_regex( - r'v[^_]+_(?P(?P[^_]+)_(?P\d+p)_(?P\d+))', url_key, - 'url key', default=(None, None, None, None), group=('id', 'codec', 'res', 'bitrate')) - if not format_id: - return {}, None - return { - 'format_id': format_id, - 'vcodec': 'h265' if codec == 'bytevc1' else codec, - 'tbr': int_or_none(bitrate, scale=1000) or None, - 'quality': qualities(self.QUALITIES)(res), - }, res - - known_resolutions = {} - - def extract_addr(addr, add_meta={}): - parsed_meta, res = parse_url_key(addr.get('url_key', '')) - if res: - known_resolutions.setdefault(res, {}).setdefault('height', add_meta.get('height')) - known_resolutions[res].setdefault('width', add_meta.get('width')) - parsed_meta.update(known_resolutions.get(res, {})) - add_meta.setdefault('height', int_or_none(res[:-1])) - return [{ - 'url': url, - 'filesize': int_or_none(addr.get('data_size')), - 'ext': 'mp4', - 'acodec': 'aac', - **add_meta, **parsed_meta - } for url in addr.get('url_list') or []] - - # Hack: Add direct video links first to prioritize them when removing duplicate formats - formats = [] - if video_info.get('play_addr'): - formats.extend(extract_addr(video_info['play_addr'], { - 'format_id': 'play_addr', - 'format_note': 'Direct video', - 'vcodec': 'h265' if traverse_obj( - video_info, 'is_bytevc1', 'is_h265') else 'h264', # Always h264? - 'width': video_info.get('width'), - 'height': video_info.get('height'), - })) - if video_info.get('download_addr'): - formats.extend(extract_addr(video_info['download_addr'], { - 'format_id': 'download_addr', - 'format_note': 'Download video%s' % (', watermarked' if video_info.get('has_watermark') else ''), - 'vcodec': 'h264', - 'width': video_info.get('width'), - 'height': video_info.get('height'), - 'source_preference': -2 if video_info.get('has_watermark') else -1, - })) - if video_info.get('play_addr_h264'): - formats.extend(extract_addr(video_info['play_addr_h264'], { - 'format_id': 'play_addr_h264', - 'format_note': 'Direct video', - 'vcodec': 'h264', - })) - if video_info.get('play_addr_bytevc1'): - formats.extend(extract_addr(video_info['play_addr_bytevc1'], { - 'format_id': 'play_addr_bytevc1', - 'format_note': 'Direct video', - 'vcodec': 'h265', - })) - - for bitrate in video_info.get('bit_rate', []): - if bitrate.get('play_addr'): - formats.extend(extract_addr(bitrate['play_addr'], { - 'format_id': bitrate.get('gear_name'), - 'format_note': 'Playback video', - 'tbr': try_get(bitrate, lambda x: x['bit_rate'] / 1000), - 'vcodec': 'h265' if traverse_obj( - bitrate, 'is_bytevc1', 'is_h265') else 'h264', - })) - - self._remove_duplicate_formats(formats) - self._sort_formats(formats, ('quality', 'source', 'codec', 'size', 'br')) - - thumbnails = [] - for cover_id in ('cover', 'ai_dynamic_cover', 'animated_cover', 'ai_dynamic_cover_bak', - 'origin_cover', 'dynamic_cover'): - cover = video_info.get(cover_id) - if cover: - for cover_url in cover['url_list']: - thumbnails.append({ - 'id': cover_id, - 'url': cover_url, - }) - - stats_info = aweme_detail.get('statistics', {}) - author_info = aweme_detail.get('author', {}) - music_info = aweme_detail.get('music', {}) - user_id = str_or_none(author_info.get('nickname')) - - contained_music_track = traverse_obj( - music_info, ('matched_song', 'title'), ('matched_pgc_sound', 'title'), expected_type=str) - contained_music_author = traverse_obj( - music_info, ('matched_song', 'author'), ('matched_pgc_sound', 'author'), 'author', expected_type=str) - - is_generic_og_trackname = music_info.get('is_original_sound') and music_info.get('title') == 'original sound - %s' % music_info.get('owner_handle') - if is_generic_og_trackname: - music_track, music_author = contained_music_track or 'original sound', contained_music_author - else: - music_track, music_author = music_info.get('title'), music_info.get('author') - - return { - 'id': aweme_id, - 'title': aweme_detail['desc'], - 'description': aweme_detail['desc'], - 'view_count': int_or_none(stats_info.get('play_count')), - 'like_count': int_or_none(stats_info.get('digg_count')), - 'repost_count': int_or_none(stats_info.get('share_count')), - 'comment_count': int_or_none(stats_info.get('comment_count')), - 'uploader': str_or_none(author_info.get('unique_id')), - 'creator': user_id, - 'uploader_id': str_or_none(author_info.get('uid')), - 'uploader_url': f'https://www.tiktok.com/@{user_id}' if user_id else None, - 'track': music_track, - 'album': str_or_none(music_info.get('album')) or None, - 'artist': music_author, - 'timestamp': int_or_none(aweme_detail.get('create_time')), - 'formats': formats, - 'thumbnails': thumbnails, - 'duration': int_or_none(traverse_obj(video_info, 'duration', ('download_addr', 'duration')), scale=1000) - } + aweme_detail = self._call_api('aweme/detail', {'aweme_id': aweme_id}, aweme_id, + note='Downloading video details', errnote='Unable to download video details')['aweme_detail'] + return self._parse_aweme_video(aweme_detail) def _real_extract(self, url): video_id = self._match_id(url) @@ -323,38 +337,33 @@ class TikTokIE(InfoExtractor): raise ExtractorError('Video not available', video_id=video_id) -class TikTokUserIE(InfoExtractor): +class TikTokUserIE(TikTokBaseIE): IE_NAME = 'tiktok:user' - _VALID_URL = r'https?://(?:www\.)?tiktok\.com/@(?P[\w\._]+)/?(?:$|[#?])' + _VALID_URL = r'https?://(?:www\.)?tiktok\.com/@(?P[\w\.-]+)/?(?:$|[#?])' _TESTS = [{ 'url': 'https://tiktok.com/@corgibobaa?lang=en', 'playlist_mincount': 45, 'info_dict': { 'id': '6935371178089399301', }, - 'skip': 'Cookies (not necessarily logged in) are needed.' }, { 'url': 'https://www.tiktok.com/@meme', 'playlist_mincount': 593, 'info_dict': { 'id': '79005827461758976', }, - 'skip': 'Cookies (not necessarily logged in) are needed.' }] - def _entries(self, url, user_id): - webpage = self._download_webpage(url, user_id) - own_id = self._search_regex(r'\"id\":\"(?P\d+)', webpage, user_id, default=None) - if not own_id: - raise ExtractorError('Cookies (not necessarily logged in) are needed.', expected=True) - secuid = self._search_regex(r'\"secUid\":\"(?P[^\"]+)', webpage, user_id) + r''' # TODO: Fix by adding _signature to api_url + def _entries(self, webpage, user_id, username): + secuid = self._search_regex(r'\"secUid\":\"(?P[^\"]+)', webpage, username) verifyfp_cookie = self._get_cookies('https://www.tiktok.com').get('s_v_web_id') if not verifyfp_cookie: raise ExtractorError('Improper cookies (missing s_v_web_id).', expected=True) api_url = f'https://m.tiktok.com/api/post/item_list/?aid=1988&cookie_enabled=true&count=30&verifyFp={verifyfp_cookie.value}&secUid={secuid}&cursor=' cursor = '0' for page in itertools.count(): - data_json = self._download_json(api_url + cursor, user_id, note='Downloading Page %d' % page) + data_json = self._download_json(api_url + cursor, username, note='Downloading Page %d' % page) for video in data_json.get('itemList', []): video_id = video['id'] video_url = f'https://www.tiktok.com/@{user_id}/video/{video_id}' @@ -362,7 +371,45 @@ class TikTokUserIE(InfoExtractor): if not data_json.get('hasMore'): break cursor = data_json['cursor'] + ''' + + def _entries_api(self, webpage, user_id, username): + query = { + 'user_id': user_id, + 'count': 21, + 'max_cursor': 0, + 'min_cursor': 0, + 'retry_type': 'no_retry', + 'device_id': ''.join(random.choice(string.digits) for i in range(19)), # Some endpoints don't like randomized device_id, so it isn't directly set in _call_api. + } + + max_retries = self.get_param('extractor_retries', 3) + for page in itertools.count(1): + for retries in itertools.count(): + try: + post_list = self._call_api('aweme/post', query, username, + note='Downloading user video list page %d%s' % (page, f' (attempt {retries})' if retries != 0 else ''), + errnote='Unable to download user video list') + except ExtractorError as e: + if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0 and retries != max_retries: + self.report_warning('%s. Retrying...' % str(e.cause or e.msg)) + continue + raise + break + for video in post_list.get('aweme_list', []): + yield { + **self._parse_aweme_video(video), + 'ie_key': TikTokIE.ie_key(), + 'extractor': 'TikTok', + } + if not post_list.get('has_more'): + break + query['max_cursor'] = post_list['max_cursor'] def _real_extract(self, url): user_id = self._match_id(url) - return self.playlist_result(self._entries(url, user_id), user_id) + webpage = self._download_webpage(url, user_id, headers={ + 'User-Agent': 'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)' + }) + own_id = self._html_search_regex(r'snssdk\d*://user/profile/(\d+)', webpage, 'user ID') + return self.playlist_result(self._entries_api(webpage, own_id, user_id), user_id) From 03c862794f7bb815d4fd054a1b89268fefc99ec1 Mon Sep 17 00:00:00 2001 From: dalan <863286+dalanmiller@users.noreply.github.com> Date: Sun, 12 Sep 2021 22:11:24 +1000 Subject: [PATCH 077/641] [9Now] handle episodes of series (#896) Authored by: dalanmiller --- yt_dlp/extractor/ninenow.py | 59 ++++++++++++++++++++++++++++--------- 1 file changed, 45 insertions(+), 14 deletions(-) diff --git a/yt_dlp/extractor/ninenow.py b/yt_dlp/extractor/ninenow.py index 0ee450cc5c..6043674ba1 100644 --- a/yt_dlp/extractor/ninenow.py +++ b/yt_dlp/extractor/ninenow.py @@ -8,6 +8,10 @@ from ..utils import ( int_or_none, float_or_none, smuggle_url, + str_or_none, + try_get, + unified_strdate, + unified_timestamp, ) @@ -37,6 +41,24 @@ class NineNowIE(InfoExtractor): # DRM protected 'url': 'https://www.9now.com.au/andrew-marrs-history-of-the-world/season-1/episode-1', 'only_matching': True, + }, { + # episode of series + 'url': 'https://www.9now.com.au/lego-masters/season-3/episode-3', + 'info_dict': { + 'id': '6249614030001', + 'title': 'Episode 3', + 'ext': 'mp4', + 'season_number': 3, + 'episode_number': 3, + 'description': 'In the first elimination of the competition, teams will have 10 hours to build a world inside a snow globe.', + 'uploader_id': '4460760524001', + 'timestamp': 1619002200, + 'upload_date': '20210421', + }, + 'expected_warnings': ['Ignoring subtitle tracks'], + 'params':{ + 'skip_download': True, + } }] BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4460760524001/default_default/index.html?videoId=%s' @@ -59,26 +81,31 @@ class NineNowIE(InfoExtractor): cache = page_data.get(kind, {}).get('%sCache' % kind, {}) if not cache: continue - common_data = (cache.get(current_key) or list(cache.values())[0])[kind] + common_data = { + 'episode': (cache.get(current_key) or list(cache.values())[0])[kind], + 'season': (cache.get(current_key) or list(cache.values())[0]).get('season', None) + } break else: raise ExtractorError('Unable to find video data') - video_data = common_data['video'] - - brightcove_id = video_data.get('brightcoveId') or 'ref:' + video_data['referenceId'] - video_id = compat_str(video_data.get('id') or brightcove_id) - - if not self.get_param('allow_unplayable_formats') and video_data.get('drm'): - self.report_drm(video_id) - - title = common_data['name'] + if not self.get_param('allow_unplayable_formats') and try_get(common_data, lambda x: x['episode']['video']['drm'], bool): + self.report_drm(display_id) + brightcove_id = try_get( + common_data, lambda x: x['episode']['video']['brightcoveId'], compat_str) or 'ref:%s' % common_data['episode']['video']['referenceId'] + video_id = str_or_none(try_get(common_data, lambda x: x['episode']['video']['id'])) or brightcove_id + title = try_get(common_data, lambda x: x['episode']['name'], compat_str) + season_number = try_get(common_data, lambda x: x['season']['seasonNumber'], int) + episode_number = try_get(common_data, lambda x: x['episode']['episodeNumber'], int) + timestamp = unified_timestamp(try_get(common_data, lambda x: x['episode']['airDate'], compat_str)) + release_date = unified_strdate(try_get(common_data, lambda x: x['episode']['availability'], compat_str)) + thumbnails_data = try_get(common_data, lambda x: x['episode']['image']['sizes'], dict) or {} thumbnails = [{ 'id': thumbnail_id, 'url': thumbnail_url, - 'width': int_or_none(thumbnail_id[1:]) - } for thumbnail_id, thumbnail_url in common_data.get('image', {}).get('sizes', {}).items()] + 'width': int_or_none(thumbnail_id[1:]), + } for thumbnail_id, thumbnail_url in thumbnails_data.items()] return { '_type': 'url_transparent', @@ -87,8 +114,12 @@ class NineNowIE(InfoExtractor): {'geo_countries': self._GEO_COUNTRIES}), 'id': video_id, 'title': title, - 'description': common_data.get('description'), - 'duration': float_or_none(video_data.get('duration'), 1000), + 'description': try_get(common_data, lambda x: x['episode']['description'], compat_str), + 'duration': float_or_none(try_get(common_data, lambda x: x['episode']['video']['duration'], float), 1000), 'thumbnails': thumbnails, 'ie_key': 'BrightcoveNew', + 'season_number': season_number, + 'episode_number': episode_number, + 'timestamp': timestamp, + 'release_date': release_date, } From d5fe04f5c72d9d64c29fd7496e76d2b99f9dd5cd Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 11 Sep 2021 17:47:26 +0530 Subject: [PATCH 078/641] Fix `--compat-option no-direct-merge` --- yt_dlp/YoutubeDL.py | 2 +- yt_dlp/downloader/external.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index ada870c487..8432abf1a9 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2750,7 +2750,7 @@ class YoutubeDL(object): _protocols = set(determine_protocol(f) for f in requested_formats) if len(_protocols) == 1: # All requested formats have same protocol info_dict['protocol'] = _protocols.pop() - directly_mergable = FFmpegFD.can_merge_formats(info_dict) + directly_mergable = FFmpegFD.can_merge_formats(info_dict, self.params) if dl_filename is not None: self.report_file_already_downloaded(dl_filename) elif (directly_mergable and get_suitable_downloader( diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index 3dddedb14f..9db248df4d 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -357,7 +357,7 @@ class FFmpegFD(ExternalFD): pass @classmethod - def can_merge_formats(cls, info_dict, params={}): + def can_merge_formats(cls, info_dict, params): return ( info_dict.get('requested_formats') and info_dict.get('protocol') From 67ad7759af135c35aa13f7c1e39bebf41f54493d Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 11 Sep 2021 17:47:59 +0530 Subject: [PATCH 079/641] [brightcove] Extract subtitles from manifests --- yt_dlp/extractor/brightcove.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py index bb68dc481f..cd1c3f01cb 100644 --- a/yt_dlp/extractor/brightcove.py +++ b/yt_dlp/extractor/brightcove.py @@ -472,7 +472,7 @@ class BrightcoveNewIE(AdobePassIE): title = json_data['name'].strip() num_drm_sources = 0 - formats = [] + formats, subtitles = [], {} sources = json_data.get('sources') or [] for source in sources: container = source.get('container') @@ -488,12 +488,16 @@ class BrightcoveNewIE(AdobePassIE): elif ext == 'm3u8' or container == 'M2TS': if not src: continue - formats.extend(self._extract_m3u8_formats( - src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + f, subs = self._extract_m3u8_formats_and_subtitles( + src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) + formats.extend(f) + subtitles = self._merge_subtitles(subtitles, subs) elif ext == 'mpd': if not src: continue - formats.extend(self._extract_mpd_formats(src, video_id, 'dash', fatal=False)) + f, subs = self._extract_mpd_formats_and_subtitles(src, video_id, 'dash', fatal=False) + formats.extend(f) + subtitles = self._merge_subtitles(subtitles, subs) else: streaming_src = source.get('streaming_src') stream_name, app_name = source.get('stream_name'), source.get('app_name') @@ -556,7 +560,6 @@ class BrightcoveNewIE(AdobePassIE): for f in formats: f.setdefault('http_headers', {}).update(headers) - subtitles = {} for text_track in json_data.get('text_tracks', []): if text_track.get('kind') != 'captions': continue From ca46b941349c0b5ab183320182fc61af28c70c45 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 11 Sep 2021 17:49:03 +0530 Subject: [PATCH 080/641] [cookies] Make browser names case insensitive --- yt_dlp/__init__.py | 2 +- yt_dlp/cookies.py | 1 + yt_dlp/options.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index ad2d5e035a..f9a7e2f111 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -248,7 +248,7 @@ def _real_main(argv=None): if opts.cookiesfrombrowser is not None: opts.cookiesfrombrowser = [ part.strip() or None for part in opts.cookiesfrombrowser.split(':', 1)] - if opts.cookiesfrombrowser[0] not in SUPPORTED_BROWSERS: + if opts.cookiesfrombrowser[0].lower() not in SUPPORTED_BROWSERS: parser.error('unsupported browser specified for cookies') if opts.date is not None: diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index bc3bb62f41..b5aff38ddc 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -748,6 +748,7 @@ def _is_path(value): def _parse_browser_specification(browser_name, profile=None): + browser_name = browser_name.lower() if browser_name not in SUPPORTED_BROWSERS: raise ValueError(f'unsupported browser: "{browser_name}"') if profile is not None and _is_path(profile): diff --git a/yt_dlp/options.py b/yt_dlp/options.py index c2d7a74ff7..7cabc35ae5 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1128,7 +1128,7 @@ def parseOpts(overrideArguments=None): 'You can specify the user profile name or directory using ' '"BROWSER:PROFILE_NAME" or "BROWSER:PROFILE_PATH". ' 'If no profile is given, the most recently accessed one is used'.format( - '|'.join(sorted(SUPPORTED_BROWSERS))))) + ', '.join(sorted(SUPPORTED_BROWSERS))))) filesystem.add_option( '--no-cookies-from-browser', action='store_const', const=None, dest='cookiesfrombrowser', From ad226b1dc9763fd2840514e7636a9e16ecc31f9d Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 12 Sep 2021 21:34:45 +0530 Subject: [PATCH 081/641] [funimation] Fix for locations outside US Closes #868 Authored by: Jules-A, pukkandan --- yt_dlp/extractor/funimation.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/funimation.py b/yt_dlp/extractor/funimation.py index 4d95f1c7c4..5846884849 100644 --- a/yt_dlp/extractor/funimation.py +++ b/yt_dlp/extractor/funimation.py @@ -302,12 +302,20 @@ class FunimationShowIE(FunimationIE): }, }] + def _real_initialize(self): + region = self._get_cookies('https://www.funimation.com').get('region') + self._region = region.value if region else try_get( + self._download_json( + 'https://geo-service.prd.funimationsvc.com/geo/v1/region/check', None, fatal=False, + note='Checking geo-location', errnote='Unable to fetch geo-location information'), + lambda x: x['region']) or 'US' + def _real_extract(self, url): base_url, locale, display_id = self._match_valid_url(url).groups() show_info = self._download_json( - 'https://title-api.prd.funimationsvc.com/v2/shows/%s?region=US&deviceType=web&locale=%s' - % (display_id, locale or 'en'), display_id) + 'https://title-api.prd.funimationsvc.com/v2/shows/%s?region=%s&deviceType=web&locale=%s' + % (display_id, self._region, locale or 'en'), display_id) items = self._download_json( 'https://prod-api-funimationnow.dadcdigital.com/api/funimation/episodes/?limit=99999&title_id=%s' % show_info.get('id'), display_id).get('items') From f60990ddfc779e84c784eb004c1047c768785452 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 13 Sep 2021 02:02:28 +0530 Subject: [PATCH 082/641] [peertube] Update instances (#957) Authored by: u-spec-png --- yt_dlp/extractor/peertube.py | 631 +++++++++++++++++++++++++++++++++++ 1 file changed, 631 insertions(+) diff --git a/yt_dlp/extractor/peertube.py b/yt_dlp/extractor/peertube.py index fb9fbb2e55..7576f683aa 100644 --- a/yt_dlp/extractor/peertube.py +++ b/yt_dlp/extractor/peertube.py @@ -19,6 +19,637 @@ from ..utils import ( class PeerTubeIE(InfoExtractor): _INSTANCES_RE = r'''(?: # Taken from https://instances.joinpeertube.org/instances + 40two\.tube| + a\.metube\.ch| + advtv\.ml| + algorithmic\.tv| + alimulama\.com| + arcana\.fun| + archive\.vidicon\.org| + artefac-paris\.tv| + auf1\.eu| + battlepenguin\.video| + beertube\.epgn\.ch| + befree\.nohost\.me| + bideoak\.argia\.eus| + birkeundnymphe\.de| + bitcointv\.com| + cattube\.org| + clap\.nerv-project\.eu| + climatejustice\.video| + comf\.tube| + conspiracydistillery\.com| + darkvapor\.nohost\.me| + daschauher\.aksel\.rocks| + digitalcourage\.video| + dreiecksnebel\.alex-detsch\.de| + eduvid\.org| + evangelisch\.video| + exo\.tube| + fair\.tube| + fediverse\.tv| + film\.k-prod\.fr| + flim\.txmn\.tk| + fotogramas\.politicaconciencia\.org| + ftsi\.ru| + gary\.vger\.cloud| + graeber\.video| + greatview\.video| + grypstube\.uni-greifswald\.de| + highvoltage\.tv| + hpstube\.fr| + htp\.live| + hyperreal\.tube| + juggling\.digital| + kino\.kompot\.si| + kino\.schuerz\.at| + kinowolnosc\.pl| + kirche\.peertube-host\.de| + kodcast\.com| + kolektiva\.media| + kraut\.zone| + kumi\.tube| + lastbreach\.tv| + lepetitmayennais\.fr\.nf| + lexx\.impa\.me| + libertynode\.tv| + libra\.syntazia\.org| + libremedia\.video| + live\.libratoi\.org| + live\.nanao\.moe| + live\.toobnix\.org| + livegram\.net| + lolitube\.freedomchan\.moe| + lucarne\.balsamine\.be| + maindreieck-tv\.de| + mani\.tube| + manicphase\.me| + media\.gzevd\.de| + media\.inno3\.cricket| + media\.kaitaia\.life| + media\.krashboyz\.org| + media\.over-world\.org| + media\.skewed\.de| + media\.undeadnetwork\.de| + medias\.pingbase\.net| + melsungen\.peertube-host\.de| + mirametube\.fr| + mojotube\.net| + monplaisirtube\.ddns\.net| + mountaintown\.video| + my\.bunny\.cafe| + myfreetube\.de| + mytube\.kn-cloud\.de| + mytube\.madzel\.de| + myworkoutarenapeertube\.cf| + nanawel-peertube\.dyndns\.org| + nastub\.cz| + offenes\.tv| + orgdup\.media| + ovaltube\.codinglab\.ch| + p2ptv\.ru| + p\.eertu\.be| + p\.lu| + peer\.azurs\.fr| + peertube1\.zeteo\.me| + peertube\.020\.pl| + peertube\.0x5e\.eu| + peertube\.alpharius\.io| + peertube\.am-networks\.fr| + peertube\.anduin\.net| + peertube\.anzui\.dev| + peertube\.arbleizez\.bzh| + peertube\.art3mis\.de| + peertube\.atilla\.org| + peertube\.atsuchan\.page| + peertube\.aukfood\.net| + peertube\.aventer\.biz| + peertube\.b38\.rural-it\.org| + peertube\.beeldengeluid\.nl| + peertube\.be| + peertube\.bgzashtita\.es| + peertube\.bitsandlinux\.com| + peertube\.biz| + peertube\.boba\.best| + peertube\.br0\.fr| + peertube\.bridaahost\.ynh\.fr| + peertube\.bubbletea\.dev| + peertube\.bubuit\.net| + peertube\.cabaal\.net| + peertube\.cats-home\.net| + peertube\.chemnitz\.freifunk\.net| + peertube\.chevro\.fr| + peertube\.chrisspiegl\.com| + peertube\.chtisurel\.net| + peertube\.cipherbliss\.com| + peertube\.cloud\.sans\.pub| + peertube\.cpge-brizeux\.fr| + peertube\.ctseuro\.com| + peertube\.cuatrolibertades\.org| + peertube\.cybercirujas\.club| + peertube\.cythin\.com| + peertube\.davigge\.com| + peertube\.dc\.pini\.fr| + peertube\.debian\.social| + peertube\.demonix\.fr| + peertube\.designersethiques\.org| + peertube\.desmu\.fr| + peertube\.devloprog\.org| + peertube\.devol\.it| + peertube\.dtmf\.ca| + peertube\.ecologie\.bzh| + peertube\.eu\.org| + peertube\.european-pirates\.eu| + peertube\.euskarabildua\.eus| + peertube\.fenarinarsa\.com| + peertube\.fomin\.site| + peertube\.forsud\.be| + peertube\.francoispelletier\.org| + peertube\.freenet\.ru| + peertube\.freetalklive\.com| + peertube\.functional\.cafe| + peertube\.gardeludwig\.fr| + peertube\.gargantia\.fr| + peertube\.gcfamily\.fr| + peertube\.genma\.fr| + peertube\.get-racing\.de| + peertube\.gidikroon\.eu| + peertube\.gruezishop\.ch| + peertube\.habets\.house| + peertube\.hackerfraternity\.org| + peertube\.ichigo\.everydayimshuflin\.com| + peertube\.ignifi\.me| + peertube\.inapurna\.org| + peertube\.informaction\.info| + peertube\.interhop\.org| + peertube\.iselfhost\.com| + peertube\.it| + peertube\.jensdiemer\.de| + peertube\.joffreyverd\.fr| + peertube\.kalua\.im| + peertube\.kathryl\.fr| + peertube\.keazilla\.net| + peertube\.klaewyss\.fr| + peertube\.kodcast\.com| + peertube\.kx\.studio| + peertube\.lagvoid\.com| + peertube\.lavallee\.tech| + peertube\.le5emeaxe\.fr| + peertube\.lestutosdeprocessus\.fr| + peertube\.librenet\.co\.za| + peertube\.logilab\.fr| + peertube\.louisematic\.site| + peertube\.luckow\.org| + peertube\.luga\.at| + peertube\.lyceeconnecte\.fr| + peertube\.manalejandro\.com| + peertube\.marud\.fr| + peertube\.mattone\.net| + peertube\.maxweiss\.io| + peertube\.monlycee\.net| + peertube\.mxinfo\.fr| + peertube\.myrasp\.eu| + peertube\.nebelcloud\.de| + peertube\.netzbegruenung\.de| + peertube\.newsocial\.tech| + peertube\.nicolastissot\.fr| + peertube\.nz| + peertube\.offerman\.com| + peertube\.opencloud\.lu| + peertube\.orthus\.link| + peertube\.patapouf\.xyz| + peertube\.pi2\.dev| + peertube\.plataformess\.org| + peertube\.pl| + peertube\.portaesgnos\.org| + peertube\.r2\.enst\.fr| + peertube\.r5c3\.fr| + peertube\.radres\.xyz| + peertube\.red| + peertube\.robonomics\.network| + peertube\.rtnkv\.cloud| + peertube\.runfox\.tk| + peertube\.satoshishop\.de| + peertube\.scic-tetris\.org| + peertube\.securitymadein\.lu| + peertube\.semweb\.pro| + peertube\.social\.my-wan\.de| + peertube\.soykaf\.org| + peertube\.stefofficiel\.me| + peertube\.stream| + peertube\.su| + peertube\.swrs\.net| + peertube\.takeko\.cyou| + peertube\.tangentfox\.com| + peertube\.taxinachtegel\.de| + peertube\.thenewoil\.xyz| + peertube\.ti-fr\.com| + peertube\.tiennot\.net| + peertube\.troback\.com| + peertube\.tspu\.edu\.ru| + peertube\.tux\.ovh| + peertube\.tv| + peertube\.tweb\.tv| + peertube\.ucy\.de| + peertube\.underworld\.fr| + peertube\.us\.to| + peertube\.ventresmous\.fr| + peertube\.vlaki\.cz| + peertube\.w\.utnw\.de| + peertube\.westring\.digital| + peertube\.xwiki\.com| + peertube\.zoz-serv\.org| + peervideo\.ru| + periscope\.numenaute\.org| + perron-tube\.de| + petitlutinartube\.fr| + phijkchu\.com| + pierre\.tube| + piraten\.space| + play\.rosano\.ca| + player\.ojamajo\.moe| + plextube\.nl| + pocketnetpeertube1\.nohost\.me| + pocketnetpeertube3\.nohost\.me| + pocketnetpeertube4\.nohost\.me| + pocketnetpeertube5\.nohost\.me| + pocketnetpeertube6\.nohost\.me| + pt\.24-7\.ro| + pt\.apathy\.top| + pt\.diaspodon\.fr| + pt\.fedi\.tech| + pt\.maciej\.website| + ptb\.lunarviews\.net| + ptmir1\.inter21\.net| + ptmir2\.inter21\.net| + ptmir3\.inter21\.net| + ptmir4\.inter21\.net| + ptmir5\.inter21\.net| + ptube\.horsentiers\.fr| + ptube\.xmanifesto\.club| + queermotion\.org| + re-wizja\.re-medium\.com| + regarder\.sans\.pub| + ruraletv\.ovh| + s1\.gegenstimme\.tv| + s2\.veezee\.tube| + sdmtube\.fr| + sender-fm\.veezee\.tube| + serv1\.wiki-tube\.de| + serv3\.wiki-tube\.de| + sickstream\.net| + sleepy\.tube| + sovran\.video| + spectra\.video| + stream\.elven\.pw| + stream\.k-prod\.fr| + stream\.shahab\.nohost\.me| + streamsource\.video| + studios\.racer159\.com| + testtube\.florimond\.eu| + tgi\.hosted\.spacebear\.ee| + thaitube\.in\.th| + the\.jokertv\.eu| + theater\.ethernia\.net| + thecool\.tube| + tilvids\.com| + toob\.bub\.org| + tpaw\.video| + truetube\.media| + tuba\.lhub\.pl| + tube-aix-marseille\.beta\.education\.fr| + tube-amiens\.beta\.education\.fr| + tube-besancon\.beta\.education\.fr| + tube-bordeaux\.beta\.education\.fr| + tube-clermont-ferrand\.beta\.education\.fr| + tube-corse\.beta\.education\.fr| + tube-creteil\.beta\.education\.fr| + tube-dijon\.beta\.education\.fr| + tube-education\.beta\.education\.fr| + tube-grenoble\.beta\.education\.fr| + tube-lille\.beta\.education\.fr| + tube-limoges\.beta\.education\.fr| + tube-montpellier\.beta\.education\.fr| + tube-nancy\.beta\.education\.fr| + tube-nantes\.beta\.education\.fr| + tube-nice\.beta\.education\.fr| + tube-normandie\.beta\.education\.fr| + tube-orleans-tours\.beta\.education\.fr| + tube-outremer\.beta\.education\.fr| + tube-paris\.beta\.education\.fr| + tube-poitiers\.beta\.education\.fr| + tube-reims\.beta\.education\.fr| + tube-rennes\.beta\.education\.fr| + tube-strasbourg\.beta\.education\.fr| + tube-toulouse\.beta\.education\.fr| + tube-versailles\.beta\.education\.fr| + tube1\.it\.tuwien\.ac\.at| + tube\.abolivier\.bzh| + tube\.ac-amiens\.fr| + tube\.aerztefueraufklaerung\.de| + tube\.alexx\.ml| + tube\.amic37\.fr| + tube\.anufrij\.de| + tube\.apolut\.net| + tube\.arkhalabs\.io| + tube\.arthack\.nz| + tube\.as211696\.net| + tube\.avensio\.de| + tube\.azbyka\.ru| + tube\.azkware\.net| + tube\.bachaner\.fr| + tube\.bmesh\.org| + tube\.borked\.host| + tube\.bstly\.de| + tube\.chaoszone\.tv| + tube\.chatelet\.ovh| + tube\.cloud-libre\.eu| + tube\.cms\.garden| + tube\.cowfee\.moe| + tube\.cryptography\.dog| + tube\.darknight-coffee\.org| + tube\.dev\.lhub\.pl| + tube\.distrilab\.fr| + tube\.dsocialize\.net| + tube\.ebin\.club| + tube\.fdn\.fr| + tube\.florimond\.eu| + tube\.foxarmy\.ml| + tube\.foxden\.party| + tube\.frischesicht\.de| + tube\.futuretic\.fr| + tube\.gnous\.eu| + tube\.grap\.coop| + tube\.graz\.social| + tube\.grin\.hu| + tube\.hackerscop\.org| + tube\.hordearii\.fr| + tube\.jeena\.net| + tube\.kai-stuht\.com| + tube\.kockatoo\.org| + tube\.kotur\.org| + tube\.lacaveatonton\.ovh| + tube\.linkse\.media| + tube\.lokad\.com| + tube\.lucie-philou\.com| + tube\.melonbread\.xyz| + tube\.mfraters\.net| + tube\.motuhake\.xyz| + tube\.mrbesen\.de| + tube\.nah\.re| + tube\.nchoco\.net| + tube\.novg\.net| + tube\.nox-rhea\.org| + tube\.nuagelibre\.fr| + tube\.nx12\.net| + tube\.octaplex\.net| + tube\.odat\.xyz| + tube\.oisux\.org| + tube\.opportunis\.me| + tube\.org\.il| + tube\.ortion\.xyz| + tube\.others\.social| + tube\.picasoft\.net| + tube\.plomlompom\.com| + tube\.pmj\.rocks| + tube\.portes-imaginaire\.org| + tube\.pyngu\.com| + tube\.rebellion\.global| + tube\.rhythms-of-resistance\.org| + tube\.rita\.moe| + tube\.rsi\.cnr\.it| + tube\.s1gm4\.eu| + tube\.saumon\.io| + tube\.schleuss\.online| + tube\.schule\.social| + tube\.seditio\.fr| + tube\.shanti\.cafe| + tube\.shela\.nu| + tube\.skrep\.in| + tube\.sp-codes\.de| + tube\.sp4ke\.com| + tube\.superseriousbusiness\.org| + tube\.systest\.eu| + tube\.tappret\.fr| + tube\.tardis\.world| + tube\.toontoet\.nl| + tube\.tpshd\.de| + tube\.troopers\.agency| + tube\.tylerdavis\.xyz| + tube\.undernet\.uy| + tube\.vigilian-consulting\.nl| + tube\.vraphim\.com| + tube\.wehost\.lgbt| + tube\.wien\.rocks| + tube\.wolfe\.casa| + tube\.xd0\.de| + tube\.xy-space\.de| + tube\.yapbreak\.fr| + tubedu\.org| + tubes\.jodh\.us| + tuktube\.com| + turkum\.me| + tututu\.tube| + tuvideo\.encanarias\.info| + tv1\.cocu\.cc| + tv1\.gomntu\.space| + tv2\.cocu\.cc| + tv\.adn\.life| + tv\.atmx\.ca| + tv\.bitma\.st| + tv\.generallyrubbish\.net\.au| + tv\.lumbung\.space| + tv\.mattchristiansenmedia\.com| + tv\.netwhood\.online| + tv\.neue\.city| + tv\.piejacker\.net| + tv\.pirateradio\.social| + tv\.undersco\.re| + tvox\.ru| + twctube\.twc-zone\.eu| + unfilter\.tube| + v\.basspistol\.org| + v\.kisombrella\.top| + v\.lastorder\.xyz| + v\.lor\.sh| + v\.phreedom\.club| + v\.sil\.sh| + v\.szy\.io| + v\.xxxapex\.com| + veezee\.tube| + vid\.dascoyote\.xyz| + vid\.garwood\.io| + vid\.ncrypt\.at| + vid\.pravdastalina\.info| + vid\.qorg11\.net| + vid\.rajeshtaylor\.com| + vid\.samtripoli\.com| + vid\.werefox\.dev| + vid\.wildeboer\.net| + video-cave-v2\.de| + video\.076\.ne\.jp| + video\.1146\.nohost\.me| + video\.altertek\.org| + video\.anartist\.org| + video\.apps\.thedoodleproject\.net| + video\.artist\.cx| + video\.asgardius\.company| + video\.balsillie\.net| + video\.bards\.online| + video\.binarydad\.com| + video\.blast-info\.fr| + video\.catgirl\.biz| + video\.cigliola\.com| + video\.cm-en-transition\.fr| + video\.cnt\.social| + video\.coales\.co| + video\.codingfield\.com| + video\.comptoir\.net| + video\.comune\.trento\.it| + video\.cpn\.so| + video\.csc49\.fr| + video\.cybre\.town| + video\.demokratischer-sommer\.de| + video\.discord-insoumis\.fr| + video\.dolphincastle\.com| + video\.dresden\.network| + video\.ecole-89\.com| + video\.elgrillolibertario\.org| + video\.emergeheart\.info| + video\.eradicatinglove\.xyz| + video\.ethantheenigma\.me| + video\.exodus-privacy\.eu\.org| + video\.fbxl\.net| + video\.fhtagn\.org| + video\.greenmycity\.eu| + video\.guerredeclasse\.fr| + video\.gyt\.is| + video\.hackers\.town| + video\.hardlimit\.com| + video\.hooli\.co| + video\.igem\.org| + video\.internet-czas-dzialac\.pl| + video\.islameye\.com| + video\.kicik\.fr| + video\.kuba-orlik\.name| + video\.kyushojitsu\.ca| + video\.lavolte\.net| + video\.lespoesiesdheloise\.fr| + video\.liberta\.vip| + video\.liege\.bike| + video\.linc\.systems| + video\.linux\.it| + video\.linuxtrent\.it| + video\.lokal\.social| + video\.lono\.space| + video\.lunasqu\.ee| + video\.lundi\.am| + video\.marcorennmaus\.de| + video\.mass-trespass\.uk| + video\.mugoreve\.fr| + video\.mundodesconocido\.com| + video\.mycrowd\.ca| + video\.nogafam\.es| + video\.odayacres\.farm| + video\.ozgurkon\.org| + video\.p1ng0ut\.social| + video\.p3x\.de| + video\.pcf\.fr| + video\.pony\.gallery| + video\.potate\.space| + video\.pourpenser\.pro| + video\.progressiv\.dev| + video\.resolutions\.it| + video\.rw501\.de| + video\.screamer\.wiki| + video\.sdm-tools\.net| + video\.sftblw\.moe| + video\.shitposter\.club| + video\.skyn3t\.in| + video\.soi\.ch| + video\.stuartbrand\.co\.uk| + video\.thinkof\.name| + video\.toot\.pt| + video\.triplea\.fr| + video\.turbo\.chat| + video\.vaku\.org\.ua| + video\.veloma\.org| + video\.violoncello\.ch| + video\.wilkie\.how| + video\.wsf2021\.info| + videorelay\.co| + videos-passages\.huma-num\.fr| + videos\.3d-wolf\.com| + videos\.ahp-numerique\.fr| + videos\.alexandrebadalo\.pt| + videos\.archigny\.net| + videos\.benjaminbrady\.ie| + videos\.buceoluegoexisto\.com| + videos\.capas\.se| + videos\.casually\.cat| + videos\.cloudron\.io| + videos\.coletivos\.org| + videos\.danksquad\.org| + videos\.denshi\.live| + videos\.fromouter\.space| + videos\.fsci\.in| + videos\.globenet\.org| + videos\.hauspie\.fr| + videos\.hush\.is| + videos\.john-livingston\.fr| + videos\.jordanwarne\.xyz| + videos\.lavoixdessansvoix\.org| + videos\.leslionsfloorball\.fr| + videos\.lucero\.top| + videos\.martyn\.berlin| + videos\.mastodont\.cat| + videos\.monstro1\.com| + videos\.npo\.city| + videos\.optoutpod\.com| + videos\.petch\.rocks| + videos\.pzelawski\.xyz| + videos\.rampin\.org| + videos\.scanlines\.xyz| + videos\.shmalls\.pw| + videos\.sibear\.fr| + videos\.stadtfabrikanten\.org| + videos\.tankernn\.eu| + videos\.testimonia\.org| + videos\.thisishowidontdisappear\.com| + videos\.traumaheilung\.net| + videos\.trom\.tf| + videos\.wakkerewereld\.nu| + videos\.weblib\.re| + videos\.yesil\.club| + vids\.roshless\.me| + vids\.tekdmn\.me| + vidz\.dou\.bet| + vod\.lumikko\.dev| + vs\.uniter\.network| + vulgarisation-informatique\.fr| + watch\.breadtube\.tv| + watch\.deranalyst\.ch| + watch\.ignorance\.eu| + watch\.krazy\.party| + watch\.libertaria\.space| + watch\.rt4mn\.org| + watch\.softinio\.com| + watch\.tubelab\.video| + web-fellow\.de| + webtv\.vandoeuvre\.net| + wechill\.space| + wikileaks\.video| + wiwi\.video| + worldofvids\.com| + wwtube\.net| + www4\.mir\.inter21\.net| + www\.birkeundnymphe\.de| + www\.captain-german\.com| + www\.wiki-tube\.de| + xxivproduction\.video| + xxx\.noho\.st| + + # from youtube-dl peertube\.rainbowswingers\.net| tube\.stanisic\.nl| peer\.suiri\.us| From ea706726d6783cf738877d4c58fb62e2a355f9d0 Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Sun, 12 Sep 2021 20:56:19 +0000 Subject: [PATCH 083/641] [ITV] Fix extractor, add subtitles and thumbnails (#913) Original PR: https://github.com/ytdl-org/youtube-dl/pull/28955 (see also https://github.com/ytdl-org/youtube-dl/issues/28906#issuecomment-831008270) Closes #861, https://github.com/ytdl-org/youtube-dl/issues/28906, https://github.com/ytdl-org/youtube-dl/issues/29337, https://github.com/ytdl-org/youtube-dl/issues/29190, https://github.com/ytdl-org/youtube-dl/issues/28939, https://github.com/ytdl-org/youtube-dl/issues/29620 Authored-by: coletdjnz, sleaux-meaux, Vangelis66 --- yt_dlp/extractor/itv.py | 146 +++++++++++++++++++++++++++++----------- 1 file changed, 105 insertions(+), 41 deletions(-) diff --git a/yt_dlp/extractor/itv.py b/yt_dlp/extractor/itv.py index 4122ac880c..3418689d67 100644 --- a/yt_dlp/extractor/itv.py +++ b/yt_dlp/extractor/itv.py @@ -5,10 +5,14 @@ import json from .common import InfoExtractor from .brightcove import BrightcoveNewIE + +from ..compat import compat_str from ..utils import ( + base_url, clean_html, determine_ext, extract_attributes, + ExtractorError, get_element_by_class, JSON_LD_RE, merge_dicts, @@ -16,6 +20,8 @@ from ..utils import ( smuggle_url, try_get, url_or_none, + url_basename, + urljoin, ) @@ -23,15 +29,32 @@ class ITVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P[0-9a-zA-Z]+)' _GEO_COUNTRIES = ['GB'] _TESTS = [{ - 'url': 'https://www.itv.com/hub/liar/2a4547a0012', + 'url': 'https://www.itv.com/hub/plebs/2a1873a0002', 'info_dict': { - 'id': '2a4547a0012', + 'id': '2a1873a0002', 'ext': 'mp4', - 'title': 'Liar - Series 2 - Episode 6', - 'description': 'md5:d0f91536569dec79ea184f0a44cca089', - 'series': 'Liar', - 'season_number': 2, - 'episode_number': 6, + 'title': 'Plebs - The Orgy', + 'description': 'md5:4d7159af53ebd5b36e8b3ec82a41fdb4', + 'series': 'Plebs', + 'season_number': 1, + 'episode_number': 1, + 'thumbnail': r're:https?://hubimages\.itv\.com/episode/2_1873_0002' + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'https://www.itv.com/hub/the-jonathan-ross-show/2a1166a0209', + 'info_dict': { + 'id': '2a1166a0209', + 'ext': 'mp4', + 'title': 'The Jonathan Ross Show - Series 17 - Episode 8', + 'description': 'md5:3023dcdd375db1bc9967186cdb3f1399', + 'series': 'The Jonathan Ross Show', + 'episode_number': 8, + 'season_number': 17, + 'thumbnail': r're:https?://hubimages\.itv\.com/episode/2_1873_0002' }, 'params': { # m3u8 download @@ -51,22 +74,16 @@ class ITVIE(InfoExtractor): 'only_matching': True, }] - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - params = extract_attributes(self._search_regex( - r'(?s)(<[^>]+id="video"[^>]*>)', webpage, 'params')) - - ios_playlist_url = params.get('data-video-playlist') or params['data-video-id'] - hmac = params['data-video-hmac'] - headers = self.geo_verification_headers() - headers.update({ + def _generate_api_headers(self, hmac): + return merge_dicts({ 'Accept': 'application/vnd.itv.vod.playlist.v2+json', 'Content-Type': 'application/json', 'hmac': hmac.upper(), - }) - ios_playlist = self._download_json( - ios_playlist_url, video_id, data=json.dumps({ + }, self.geo_verification_headers()) + + def _call_api(self, video_id, playlist_url, headers, platform_tag, featureset, fatal=True): + return self._download_json( + playlist_url, video_id, data=json.dumps({ 'user': { 'itvUserId': '', 'entitlements': [], @@ -87,15 +104,56 @@ class ITVIE(InfoExtractor): }, 'variantAvailability': { 'featureset': { - 'min': ['hls', 'aes', 'outband-webvtt'], - 'max': ['hls', 'aes', 'outband-webvtt'] + 'min': featureset, + 'max': featureset }, - 'platformTag': 'dotcom' + 'platformTag': platform_tag } - }).encode(), headers=headers) - video_data = ios_playlist['Playlist']['Video'] - ios_base_url = video_data.get('Base') + }).encode(), headers=headers, fatal=fatal) + def _get_subtitles(self, video_id, variants, ios_playlist_url, headers, *args, **kwargs): + subtitles = {} + platform_tag_subs, featureset_subs = next( + ((platform_tag, featureset) + for platform_tag, featuresets in variants.items() for featureset in featuresets + if try_get(featureset, lambda x: x[2]) == 'outband-webvtt'), + (None, None)) + if platform_tag_subs or featureset_subs: + subs_playlist = self._call_api( + video_id, ios_playlist_url, headers, platform_tag_subs, featureset_subs, fatal=False) + subs = try_get(subs_playlist, lambda x: x['Playlist']['Video']['Subtitles'], list) or [] + for sub in subs: + if not isinstance(sub, dict): + continue + href = url_or_none(sub.get('Href')) + if not href: + continue + subtitles.setdefault('en', []).append({'url': href}) + return subtitles + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + params = extract_attributes(self._search_regex( + r'(?s)(<[^>]+id="video"[^>]*>)', webpage, 'params')) + variants = self._parse_json( + try_get(params, lambda x: x['data-video-variants'], compat_str) or '{}', + video_id, fatal=False) + platform_tag_video, featureset_video = next( + ((platform_tag, featureset) + for platform_tag, featuresets in variants.items() for featureset in featuresets + if try_get(featureset, lambda x: x[:2]) == ['hls', 'aes']), + (None, None)) + if not platform_tag_video or not featureset_video: + raise ExtractorError('No downloads available', expected=True, video_id=video_id) + + ios_playlist_url = params.get('data-video-playlist') or params['data-video-id'] + headers = self._generate_api_headers(params['data-video-hmac']) + ios_playlist = self._call_api( + video_id, ios_playlist_url, headers, platform_tag_video, featureset_video) + + video_data = try_get(ios_playlist, lambda x: x['Playlist']['Video'], dict) or {} + ios_base_url = video_data.get('Base') formats = [] for media_file in (video_data.get('MediaFiles') or []): href = media_file.get('Href') @@ -113,20 +171,6 @@ class ITVIE(InfoExtractor): 'url': href, }) self._sort_formats(formats) - - subtitles = {} - subs = video_data.get('Subtitles') or [] - for sub in subs: - if not isinstance(sub, dict): - continue - href = url_or_none(sub.get('Href')) - if not href: - continue - subtitles.setdefault('en', []).append({ - 'url': href, - 'ext': determine_ext(href, 'vtt'), - }) - info = self._search_json_ld(webpage, video_id, default={}) if not info: json_ld = self._parse_json(self._search_regex( @@ -140,13 +184,33 @@ class ITVIE(InfoExtractor): info = self._json_ld(item, video_id, fatal=False) or {} break + thumbnails = [] + thumbnail_url = try_get(params, lambda x: x['data-video-posterframe'], compat_str) + if thumbnail_url: + thumbnails.extend([{ + 'url': thumbnail_url.format(width=1920, height=1080, quality=100, blur=0, bg='false'), + 'width': 1920, + 'height': 1080, + }, { + 'url': urljoin(base_url(thumbnail_url), url_basename(thumbnail_url)), + 'preference': -2 + }]) + + thumbnail_url = self._html_search_meta(['og:image', 'twitter:image'], webpage, default=None) + if thumbnail_url: + thumbnails.append({ + 'url': thumbnail_url, + }) + self._remove_duplicate_formats(thumbnails) + return merge_dicts({ 'id': video_id, 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage), 'formats': formats, - 'subtitles': subtitles, + 'subtitles': self.extract_subtitles(video_id, variants, ios_playlist_url, headers), 'duration': parse_duration(video_data.get('Duration')), 'description': clean_html(get_element_by_class('episode-info__synopsis', webpage)), + 'thumbnails': thumbnails }, info) From 9c95ac677e049df4ead19e5a0e4b66ee6b0ba96c Mon Sep 17 00:00:00 2001 From: zenerdi0de <83358565+zenerdi0de@users.noreply.github.com> Date: Mon, 13 Sep 2021 21:10:32 +0530 Subject: [PATCH 084/641] [Fancode] Fix live streams (#961) Authored by: zenerdi0de --- yt_dlp/extractor/fancode.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/fancode.py b/yt_dlp/extractor/fancode.py index fd84a6e508..912feb7023 100644 --- a/yt_dlp/extractor/fancode.py +++ b/yt_dlp/extractor/fancode.py @@ -173,7 +173,7 @@ class FancodeLiveIE(FancodeVodIE): match_info = try_get(info_json, lambda x: x['data']['match']) - if match_info.get('status') != "LIVE": + if match_info.get('streamingStatus') != "STARTED": raise ExtractorError('The stream can\'t be accessed', expected=True) self._check_login_required(match_info.get('isUserEntitled'), True) # all live streams are premium only From e9a30b181e4c27319df5e97d46bdfeb61e0d07bf Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Tue, 14 Sep 2021 03:55:26 +0000 Subject: [PATCH 085/641] [Peertube] Add playlist extractor (#957) Authored by: u-spec-png --- yt_dlp/extractor/extractors.py | 5 +- yt_dlp/extractor/peertube.py | 94 +++++++++++++++++++++++++++++++--- 2 files changed, 90 insertions(+), 9 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index e456475e51..bd6aabdd76 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1024,7 +1024,10 @@ from .patreon import ( ) from .pbs import PBSIE from .pearvideo import PearVideoIE -from .peertube import PeerTubeIE +from .peertube import ( + PeerTubeIE, + PeerTubePlaylistIE, +) from .peloton import ( PelotonIE, PelotonLiveIE diff --git a/yt_dlp/extractor/peertube.py b/yt_dlp/extractor/peertube.py index 7576f683aa..b4f57a9990 100644 --- a/yt_dlp/extractor/peertube.py +++ b/yt_dlp/extractor/peertube.py @@ -1,6 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals +import functools import re from .common import InfoExtractor @@ -13,6 +14,7 @@ from ..utils import ( unified_timestamp, url_or_none, urljoin, + OnDemandPagedList, ) @@ -1070,9 +1072,9 @@ class PeerTubeIE(InfoExtractor): 'uploader': 'Framasoft', 'uploader_id': '3', 'uploader_url': 'https://framatube.org/accounts/framasoft', - 'channel': 'Les vidéos de Framasoft', - 'channel_id': '2', - 'channel_url': 'https://framatube.org/video-channels/bf54d359-cfad-4935-9d45-9d6be93f63e8', + 'channel': 'A propos de PeerTube', + 'channel_id': '2215', + 'channel_url': 'https://framatube.org/video-channels/joinpeertube', 'language': 'en', 'license': 'Attribution - Share Alike', 'duration': 113, @@ -1128,20 +1130,20 @@ class PeerTubeIE(InfoExtractor): 'uploader': 'Drew DeVault', } }, { - 'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44', + 'url': 'https://peertube.debian.social/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44', 'only_matching': True, }, { # nsfw - 'url': 'https://tube.22decembre.eu/videos/watch/9bb88cd3-9959-46d9-9ab9-33d2bb704c39', + 'url': 'https://vod.ksite.de/videos/watch/9bb88cd3-9959-46d9-9ab9-33d2bb704c39', 'only_matching': True, }, { - 'url': 'https://tube.22decembre.eu/videos/embed/fed67262-6edb-4d1c-833b-daa9085c71d7', + 'url': 'https://vod.ksite.de/videos/embed/fed67262-6edb-4d1c-833b-daa9085c71d7', 'only_matching': True, }, { - 'url': 'https://tube.openalgeria.org/api/v1/videos/c1875674-97d0-4c94-a058-3f7e64c962e8', + 'url': 'https://peertube.tv/api/v1/videos/c1875674-97d0-4c94-a058-3f7e64c962e8', 'only_matching': True, }, { - 'url': 'peertube:video.blender.org:b37a5b9f-e6b5-415c-b700-04a5cd6ec205', + 'url': 'peertube:framatube.org:b37a5b9f-e6b5-415c-b700-04a5cd6ec205', 'only_matching': True, }] @@ -1291,3 +1293,79 @@ class PeerTubeIE(InfoExtractor): 'subtitles': subtitles, 'webpage_url': webpage_url, } + + +class PeerTubePlaylistIE(InfoExtractor): + IE_NAME = 'PeerTube:Playlist' + _VALID_URL = r'''(?x) + (?: + https?://(?P%s)/w/p/ + ) + (?P%s) + ''' % (PeerTubeIE._INSTANCES_RE, PeerTubeIE._UUID_RE) + _API_BASE = 'https://%s/api/v1/video-playlists/%s%s' + _TESTS = [{ + 'url': 'https://peertube.tux.ovh/w/p/3af94cba-95e8-4b74-b37a-807ab6d82526', + 'info_dict': { + 'id': '3af94cba-95e8-4b74-b37a-807ab6d82526', + 'description': 'playlist', + 'timestamp': 1611171863, + 'title': 'playlist', + }, + 'playlist_mincount': 6, + }, { + 'url': 'https://peertube.tux.ovh/w/p/wkyqcQBnsvFxtUB2pkYc1e', + 'info_dict': { + 'id': 'wkyqcQBnsvFxtUB2pkYc1e', + 'description': 'Cette liste de vidéos contient uniquement les jeux qui peuvent être terminés en une seule vidéo.', + 'title': 'Let\'s Play', + 'timestamp': 1604147331, + }, + 'playlist_mincount': 6, + }, { + 'url': 'https://peertube.debian.social/w/p/hFdJoTuyhNJVa1cDWd1d12', + 'info_dict': { + 'id': 'hFdJoTuyhNJVa1cDWd1d12', + 'description': 'Diversas palestras do Richard Stallman no Brasil.', + 'title': 'Richard Stallman no Brasil', + 'timestamp': 1599676222, + }, + 'playlist_mincount': 9, + }] + _PAGE_SIZE = 30 + + def _call_api(self, host, uuid, path, note=None, errnote=None, fatal=True): + return self._download_json( + self._API_BASE % (host, uuid, path), uuid, + note=note, errnote=errnote, fatal=fatal) + + def _fetch_page(self, host, uuid, page): + page += 1 + video_data = self._call_api( + host, uuid, f'/videos?sort=-createdAt&start={self._PAGE_SIZE * (page - 1)}&count={self._PAGE_SIZE}', + note=f'Downloading page {page}').get('data', []) + for video in video_data: + shortUUID = try_get(video, lambda x: x['video']['shortUUID']) + video_title = try_get(video, lambda x: x['video']['name']) + yield self.url_result( + f'https://{host}/w/{shortUUID}', PeerTubeIE.ie_key(), + video_id=shortUUID, video_title=video_title) + + def _real_extract(self, url): + host, playlist_id = self._match_valid_url(url).group('host', 'id') + playlist_info = self._call_api(host, playlist_id, '', note='Downloading playlist information', fatal=False) + + playlist_title = playlist_info.get('displayName') + playlist_description = playlist_info.get('description') + playlist_timestamp = unified_timestamp(playlist_info.get('createdAt')) + channel = try_get(playlist_info, lambda x: x['ownerAccount']['name']) + channel_id = try_get(playlist_info, lambda x: x['ownerAccount']['id']) + thumbnail = playlist_info.get('thumbnailPath') + thumbnail = f'https://{host}{thumbnail}' + + entries = OnDemandPagedList(functools.partial( + self._fetch_page, host, playlist_id), self._PAGE_SIZE) + + return self.playlist_result( + entries, playlist_id, playlist_title, playlist_description, + timestamp=playlist_timestamp, channel=channel, channel_id=channel_id, thumbnail=thumbnail) From 40b18348e70abdbdbd7445404336f96a525f9457 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81kos=20S=C3=BClyi?= Date: Tue, 14 Sep 2021 20:23:47 +0200 Subject: [PATCH 086/641] [cleanup] Improve `make clean-test` (#972) Authored by: sulyi --- .gitignore | 2 +- Makefile | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 443e637aee..88a9605f7b 100644 --- a/.gitignore +++ b/.gitignore @@ -42,7 +42,7 @@ cookies *.description # Allow config/media files in testdata -!test/testdata/** +!test/** # Python *.pyc diff --git a/Makefile b/Makefile index 4ee1095d16..763d5223df 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,9 @@ pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites com .PHONY: all clean install test tar pypi-files completions ot offlinetest codetest supportedsites clean-test: - rm -rf *.dump *.part* *.ytdl *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.swf *.jpg *.png *.frag *.frag.urls *.frag.aria2 test/testdata/player-*.js *.opus *.webp *.ttml *.vtt *.jpeg + rm -rf *.3gp *.annotations.xml *.ape *.avi *.description *.dump *.flac *.flv *.frag *.frag.aria2 *.frag.urls \ + *.info.json *.jpeg *.jpg *.live_chat.json *.m4a *.m4v *.mkv *.mp3 *.mp4 *.ogg *.opus *.part* *.png *.sbv *.srt \ + *.swf *.swp *.ttml *.vtt *.wav *.webm *.webp *.ytdl test/testdata/player-*.js clean-dist: rm -rf yt-dlp.1.temp.md yt-dlp.1 README.txt MANIFEST build/ dist/ .coverage cover/ yt-dlp.tar.gz completions/ yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS .mailmap clean-cache: From 1722099ded6d2d3568197c412e740fda82d188d4 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Wed, 15 Sep 2021 02:23:36 +0530 Subject: [PATCH 087/641] [Mxplayer] Use mobile API (#966) Authored by: Ashish0804 --- yt_dlp/extractor/mxplayer.py | 205 +++++++++++++++++++++-------------- 1 file changed, 126 insertions(+), 79 deletions(-) diff --git a/yt_dlp/extractor/mxplayer.py b/yt_dlp/extractor/mxplayer.py index 0f1c439aa8..5874556e34 100644 --- a/yt_dlp/extractor/mxplayer.py +++ b/yt_dlp/extractor/mxplayer.py @@ -3,43 +3,68 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import compat_str -from ..utils import ( - ExtractorError, - js_to_json, - qualities, - try_get, - url_or_none, - urljoin, -) +from ..utils import try_get class MxplayerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?mxplayer\.in/(?:movie|show/[-\w]+/[-\w]+)/(?P[-\w]+)-(?P\w+)' + _VALID_URL = r'https?://(?:www\.)?mxplayer\.in/(?Pmovie|show/[-\w]+/[-\w]+)/(?P[-\w]+)-(?P\w+)' _TESTS = [{ + 'url': 'https://www.mxplayer.in/show/watch-my-girlfriend-is-an-alien-hindi-dubbed/season-1/episode-1-online-9d2013d31d5835bb8400e3b3c5e7bb72', + 'info_dict': { + 'id': '9d2013d31d5835bb8400e3b3c5e7bb72', + 'ext': 'mp4', + 'title': 'Episode 1', + 'description': 'md5:62ed43eb9fec5efde5cf3bd1040b7670', + 'season_number': 1, + 'episode_number': 1, + 'duration': 2451, + 'season': 'Season 1', + 'series': 'My Girlfriend Is An Alien (Hindi Dubbed)', + 'thumbnail': 'https://qqcdnpictest.mxplay.com/pic/9d2013d31d5835bb8400e3b3c5e7bb72/en/16x9/320x180/9562f5f8df42cad09c9a9c4e69eb1567_1920x1080.webp', + 'episode': 'Episode 1' + }, + 'params': { + 'format': 'bv', + 'skip_download': True, + }, + }, { 'url': 'https://www.mxplayer.in/movie/watch-knock-knock-hindi-dubbed-movie-online-b9fa28df3bfb8758874735bbd7d2655a?watch=true', 'info_dict': { 'id': 'b9fa28df3bfb8758874735bbd7d2655a', 'ext': 'mp4', 'title': 'Knock Knock (Hindi Dubbed)', - 'description': 'md5:b195ba93ff1987309cfa58e2839d2a5b' + 'description': 'md5:b195ba93ff1987309cfa58e2839d2a5b', + 'season_number': 0, + 'episode_number': 0, + 'duration': 5970, + 'season': 'Season 0', + 'series': None, + 'thumbnail': 'https://qqcdnpictest.mxplay.com/pic/b9fa28df3bfb8758874735bbd7d2655a/en/16x9/320x180/test_pic1588676032011.webp', + 'episode': 'Episode 0' }, 'params': { + 'format': 'bv', 'skip_download': True, - 'format': 'bestvideo' - } + }, }, { 'url': 'https://www.mxplayer.in/show/watch-shaitaan/season-1/the-infamous-taxi-gang-of-meerut-online-45055d5bcff169ad48f2ad7552a83d6c', 'info_dict': { 'id': '45055d5bcff169ad48f2ad7552a83d6c', - 'ext': 'm3u8', + 'ext': 'mp4', 'title': 'The infamous taxi gang of Meerut', 'description': 'md5:033a0a7e3fd147be4fb7e07a01a3dc28', + 'season_number': 1, + 'episode_number': 1, + 'duration': 2332, 'season': 'Season 1', - 'series': 'Shaitaan' + 'series': 'Shaitaan', + 'thumbnail': 'https://qqcdnpictest.mxplay.com/pic/45055d5bcff169ad48f2ad7552a83d6c/en/16x9/320x180/voot_8e7d5f8d8183340869279c732c1e3a43.webp', + 'episode': 'Episode 1' }, 'params': { + 'format': 'best', 'skip_download': True, - } + }, }, { 'url': 'https://www.mxplayer.in/show/watch-aashram/chapter-1/duh-swapna-online-d445579792b0135598ba1bc9088a84cb', 'info_dict': { @@ -47,88 +72,110 @@ class MxplayerIE(InfoExtractor): 'ext': 'mp4', 'title': 'Duh Swapna', 'description': 'md5:35ff39c4bdac403c53be1e16a04192d8', + 'season_number': 1, + 'episode_number': 3, + 'duration': 2568, 'season': 'Chapter 1', - 'series': 'Aashram' + 'series': 'Aashram', + 'thumbnail': 'https://qqcdnpictest.mxplay.com/pic/d445579792b0135598ba1bc9088a84cb/en/4x3/1600x1200/test_pic1624819307993.webp', + 'episode': 'Episode 3' }, - 'expected_warnings': ['Unknown MIME type application/mp4 in DASH manifest'], 'params': { + 'format': 'bv', 'skip_download': True, - 'format': 'bestvideo' - } + }, + }, { + 'url': 'https://www.mxplayer.in/show/watch-dangerous/season-1/chapter-1-online-5a351b4f9fb69436f6bd6ae3a1a75292', + 'info_dict': { + 'id': '5a351b4f9fb69436f6bd6ae3a1a75292', + 'ext': 'mp4', + 'title': 'Chapter 1', + 'description': 'md5:233886b8598bc91648ac098abe1d288f', + 'season_number': 1, + 'episode_number': 1, + 'duration': 1305, + 'season': 'Season 1', + 'series': 'Dangerous', + 'thumbnail': 'https://qqcdnpictest.mxplay.com/pic/5a351b4f9fb69436f6bd6ae3a1a75292/en/4x3/1600x1200/test_pic1624706302350.webp', + 'episode': 'Episode 1' + }, + 'params': { + 'format': 'bv', + 'skip_download': True, + }, + }, { + 'url': 'https://www.mxplayer.in/movie/watch-the-attacks-of-2611-movie-online-0452f0d80226c398d63ce7e3ea40fa2d', + 'info_dict': { + 'id': '0452f0d80226c398d63ce7e3ea40fa2d', + 'ext': 'mp4', + 'title': 'The Attacks of 26/11', + 'description': 'md5:689bacd29e97b3f31eaf519eb14127e5', + 'season_number': 0, + 'episode_number': 0, + 'duration': 6085, + 'season': 'Season 0', + 'series': None, + 'thumbnail': 'https://qqcdnpictest.mxplay.com/pic/0452f0d80226c398d63ce7e3ea40fa2d/en/16x9/320x180/00c8955dab5e5d340dbde643f9b1f6fd_1920x1080.webp', + 'episode': 'Episode 0' + }, + 'params': { + 'format': 'best', + 'skip_download': True, + }, }] - def _get_stream_urls(self, video_dict): - stream_provider_dict = try_get( - video_dict, - lambda x: x['stream'][x['stream']['provider']]) - if not stream_provider_dict: - raise ExtractorError('No stream provider found', expected=True) - - for stream_name, stream in stream_provider_dict.items(): - if stream_name in ('hls', 'dash', 'hlsUrl', 'dashUrl'): - stream_type = stream_name.replace('Url', '') - if isinstance(stream, dict): - for quality, stream_url in stream.items(): - if stream_url: - yield stream_type, quality, stream_url - else: - yield stream_type, 'base', stream - def _real_extract(self, url): - display_id, video_id = self._match_valid_url(url).groups() - webpage = self._download_webpage(url, video_id) - - source = self._parse_json( - js_to_json(self._html_search_regex( - r'(?s)).*', - webpage, 'WindowState')), - video_id) - if not source: - raise ExtractorError('Cannot find source', expected=True) - - config_dict = source['config'] - video_dict = source['entities'][video_id] + type, display_id, video_id = self._match_valid_url(url).groups() + type = 'movie_film' if type == 'movie' else 'tvshow_episode' + API_URL = 'https://androidapi.mxplay.com/v1/detail/' + headers = { + 'X-Av-Code': '23', + 'X-Country': 'IN', + 'X-Platform': 'android', + 'X-App-Version': '1370001318', + 'X-Resolution': '3840x2160', + } + data_json = self._download_json(f'{API_URL}{type}/{video_id}', display_id, headers=headers)['profile'] + season, series = None, None + for dct in data_json.get('levelInfos', []): + if dct.get('type') == 'tvshow_season': + season = dct.get('name') + elif dct.get('type') == 'tvshow_show': + series = dct.get('name') thumbnails = [] - for i in video_dict.get('imageInfo') or []: + for thumb in data_json.get('poster', []): thumbnails.append({ - 'url': urljoin(config_dict['imageBaseUrl'], i['url']), - 'width': i['width'], - 'height': i['height'], + 'url': thumb.get('url'), + 'width': thumb.get('width'), + 'height': thumb.get('height'), }) formats = [] - get_quality = qualities(['main', 'base', 'high']) - for stream_type, quality, stream_url in self._get_stream_urls(video_dict): - format_url = url_or_none(urljoin(config_dict['videoCdnBaseUrl'], stream_url)) - if not format_url: - continue - if stream_type == 'dash': - dash_formats = self._extract_mpd_formats( - format_url, video_id, mpd_id='dash-%s' % quality, headers={'Referer': url}) - for frmt in dash_formats: - frmt['quality'] = get_quality(quality) - formats.extend(dash_formats) - dash_formats_h265 = self._extract_mpd_formats( - format_url.replace('h264_high', 'h265_main'), video_id, mpd_id='dash-%s' % quality, headers={'Referer': url}, fatal=False) - for frmt in dash_formats_h265: - frmt['quality'] = get_quality(quality) - formats.extend(dash_formats_h265) - elif stream_type == 'hls': - formats.extend(self._extract_m3u8_formats( - format_url, video_id, fatal=False, - m3u8_id='hls-%s' % quality, quality=get_quality(quality), ext='mp4')) - + subtitles = {} + for dct in data_json.get('playInfo', []): + if dct.get('extension') == 'mpd': + frmt, subs = self._extract_mpd_formats_and_subtitles(dct.get('playUrl'), display_id, fatal=False) + formats.extend(frmt) + subtitles = self._merge_subtitles(subtitles, subs) + elif dct.get('extension') == 'm3u8': + frmt, subs = self._extract_m3u8_formats_and_subtitles(dct.get('playUrl'), display_id, fatal=False) + formats.extend(frmt) + subtitles = self._merge_subtitles(subtitles, subs) self._sort_formats(formats) return { 'id': video_id, 'display_id': display_id, - 'title': video_dict['title'] or self._og_search_title(webpage), - 'formats': formats, - 'description': video_dict.get('description'), - 'season': try_get(video_dict, lambda x: x['container']['title']), - 'series': try_get(video_dict, lambda x: x['container']['container']['title']), + 'title': data_json.get('name') or display_id, + 'description': data_json.get('description'), + 'season_number': data_json.get('seasonNum'), + 'episode_number': data_json.get('episodeNum'), + 'duration': data_json.get('duration'), + 'season': season, + 'series': series, 'thumbnails': thumbnails, + 'formats': formats, + 'subtitles': subtitles, } From cc33cc4395143256c2781c5e607fc76215baef16 Mon Sep 17 00:00:00 2001 From: LE Date: Tue, 14 Sep 2021 16:58:49 -0400 Subject: [PATCH 088/641] [VrtNU] Handle login errors (#977) Authored by: llacb47 --- yt_dlp/extractor/canvas.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/yt_dlp/extractor/canvas.py b/yt_dlp/extractor/canvas.py index b417f8577a..49e7e4e390 100644 --- a/yt_dlp/extractor/canvas.py +++ b/yt_dlp/extractor/canvas.py @@ -286,6 +286,9 @@ class VrtNUIE(GigyaBaseIE): 'targetEnv': 'jssdk', })) + if auth_info.get('errorDetails'): + raise ExtractorError('Unable to login: VrtNU said: ' + auth_info.get('errorDetails'), expected=True) + # Sometimes authentication fails for no good reason, retry login_attempt = 1 while login_attempt <= 3: From b5a39ed43beb831dfd4ad59cc4340031e87030bc Mon Sep 17 00:00:00 2001 From: Sipherdrakon <64430430+Sipherdrakon@users.noreply.github.com> Date: Tue, 14 Sep 2021 20:25:03 -0400 Subject: [PATCH 089/641] [DIYNetwork] Support new format (#934) Authored by: Sipherdrakon --- yt_dlp/extractor/dplay.py | 20 ++++++++++++++++++++ yt_dlp/extractor/extractors.py | 3 ++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/dplay.py b/yt_dlp/extractor/dplay.py index fcc4ce4dcc..f2aca4d67a 100644 --- a/yt_dlp/extractor/dplay.py +++ b/yt_dlp/extractor/dplay.py @@ -389,3 +389,23 @@ class ScienceChannelIE(DiscoveryPlusIE): _PRODUCT = 'sci' _API_URL = 'us1-prod-direct.sciencechannel.com' + + +class DIYNetworkIE(DiscoveryPlusIE): + _VALID_URL = r'https?://(?:watch\.)?diynetwork\.com/video' + DPlayIE._PATH_REGEX + _TESTS = [{ + 'url': 'https://watch.diynetwork.com/video/pool-kings-diy-network/bringing-beach-life-to-texas', + 'info_dict': { + 'id': '2309730', + 'display_id': 'pool-kings-diy-network/bringing-beach-life-to-texas', + 'ext': 'mp4', + 'title': 'Bringing Beach Life to Texas', + 'description': 'The Pool Kings give a family a day at the beach in their own backyard.', + 'season_number': 10, + 'episode_number': 2, + }, + 'skip': 'Available for Premium users', + }] + + _PRODUCT = 'diy' + _API_URL = 'us1-prod-direct.watch.diynetwork.com' diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index bd6aabdd76..ecbb879770 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -339,7 +339,8 @@ from .dplay import ( DPlayIE, DiscoveryPlusIE, HGTVDeIE, - ScienceChannelIE + ScienceChannelIE, + DIYNetworkIE ) from .dreisat import DreiSatIE from .drbonanza import DRBonanzaIE From 92790da2bb64de67cdc8ec9d8cc459e631feff03 Mon Sep 17 00:00:00 2001 From: nyuszika7h Date: Wed, 15 Sep 2021 03:45:10 +0200 Subject: [PATCH 090/641] [radlive] Add new extractor (#870) Closes #312 Authored by: nyuszika7h --- yt_dlp/extractor/extractors.py | 5 + yt_dlp/extractor/radlive.py | 179 +++++++++++++++++++++++++++++++++ 2 files changed, 184 insertions(+) create mode 100644 yt_dlp/extractor/radlive.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index ecbb879770..bb1e21a07a 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1123,6 +1123,11 @@ from .radiode import RadioDeIE from .radiojavan import RadioJavanIE from .radiobremen import RadioBremenIE from .radiofrance import RadioFranceIE +from .radlive import ( + RadLiveIE, + RadLiveChannelIE, + RadLiveSeasonIE, +) from .rai import ( RaiPlayIE, RaiPlayLiveIE, diff --git a/yt_dlp/extractor/radlive.py b/yt_dlp/extractor/radlive.py new file mode 100644 index 0000000000..2de7ab04a9 --- /dev/null +++ b/yt_dlp/extractor/radlive.py @@ -0,0 +1,179 @@ +import json + +from ..utils import ExtractorError, traverse_obj, try_get, unified_timestamp +from .common import InfoExtractor + + +class RadLiveIE(InfoExtractor): + IE_NAME = 'radlive' + _VALID_URL = r'https?://(?:www\.)?rad\.live/content/(?Pfeature|episode)/(?P[a-f0-9-]+)' + _TESTS = [{ + 'url': 'https://rad.live/content/feature/dc5acfbc-761b-4bec-9564-df999905116a', + 'md5': '6219d5d31d52de87d21c9cf5b7cb27ff', + 'info_dict': { + 'id': 'dc5acfbc-761b-4bec-9564-df999905116a', + 'ext': 'mp4', + 'title': 'Deathpact - Digital Mirage 2 [Full Set]', + 'language': 'en', + 'thumbnail': 'https://static.12core.net/cb65ae077a079c68380e38f387fbc438.png', + 'description': '', + 'release_timestamp': 1600185600.0, + 'channel': 'Proximity', + 'channel_id': '9ce6dd01-70a4-4d59-afb6-d01f807cd009', + 'channel_url': 'https://rad.live/content/channel/9ce6dd01-70a4-4d59-afb6-d01f807cd009', + } + }, { + 'url': 'https://rad.live/content/episode/bbcf66ec-0d02-4ca0-8dc0-4213eb2429bf', + 'md5': '40b2175f347592125d93e9a344080125', + 'info_dict': { + 'id': 'bbcf66ec-0d02-4ca0-8dc0-4213eb2429bf', + 'ext': 'mp4', + 'title': 'E01: Bad Jokes 1', + 'language': 'en', + 'thumbnail': 'https://lsp.littlstar.com/channels/WHISTLE/BAD_JOKES/SEASON_1/BAD_JOKES_101/poster.jpg', + 'description': 'Bad Jokes - Champions, Adam Pally, Super Troopers, Team Edge and 2Hype', + 'release_timestamp': None, + 'channel': None, + 'channel_id': None, + 'channel_url': None, + 'episode': 'E01: Bad Jokes 1', + 'episode_number': 1, + 'episode_id': '336', + }, + }] + + def _real_extract(self, url): + content_type, video_id = self._match_valid_url(url).groups() + + webpage = self._download_webpage(url, video_id) + + content_info = json.loads(self._search_regex( + r']*type=([\'"])application/json\1[^>]*>(?P{.+?})', + webpage, 'video info', group='json'))['props']['pageProps']['initialContentData'] + video_info = content_info[content_type] + + if not video_info: + raise ExtractorError('Unable to extract video info, make sure the URL is valid') + + formats = self._extract_m3u8_formats(video_info['assets']['videos'][0]['url'], video_id) + self._sort_formats(formats) + + data = video_info.get('structured_data', {}) + + release_date = unified_timestamp(traverse_obj(data, ('releasedEvent', 'startDate'))) + channel = next(iter(content_info.get('channels', [])), {}) + channel_id = channel.get('lrn', '').split(':')[-1] or None + + result = { + 'id': video_id, + 'title': video_info['title'], + 'formats': formats, + 'language': traverse_obj(data, ('potentialAction', 'target', 'inLanguage')), + 'thumbnail': traverse_obj(data, ('image', 'contentUrl')), + 'description': data.get('description'), + 'release_timestamp': release_date, + 'channel': channel.get('name'), + 'channel_id': channel_id, + 'channel_url': f'https://rad.live/content/channel/{channel_id}' if channel_id else None, + + } + if content_type == 'episode': + result.update({ + # TODO: Get season number when downloading single episode + 'episode': video_info.get('title'), + 'episode_number': video_info.get('number'), + 'episode_id': video_info.get('id'), + }) + + return result + + +class RadLiveSeasonIE(RadLiveIE): + IE_NAME = 'radlive:season' + _VALID_URL = r'https?://(?:www\.)?rad\.live/content/season/(?P[a-f0-9-]+)' + _TESTS = [{ + 'url': 'https://rad.live/content/season/08a290f7-c9ef-4e22-9105-c255995a2e75', + 'md5': '40b2175f347592125d93e9a344080125', + 'info_dict': { + 'id': '08a290f7-c9ef-4e22-9105-c255995a2e75', + 'title': 'Bad Jokes - Season 1', + }, + 'playlist_mincount': 5, + }] + + @classmethod + def suitable(cls, url): + return False if RadLiveIE.suitable(url) else super(RadLiveSeasonIE, cls).suitable(url) + + def _real_extract(self, url): + season_id = self._match_id(url) + webpage = self._download_webpage(url, season_id) + + content_info = json.loads(self._search_regex( + r']*type=([\'"])application/json\1[^>]*>(?P{.+?})', + webpage, 'video info', group='json'))['props']['pageProps']['initialContentData'] + video_info = content_info['season'] + + entries = [{ + '_type': 'url_transparent', + 'id': episode['structured_data']['url'].split('/')[-1], + 'url': episode['structured_data']['url'], + 'series': try_get(content_info, lambda x: x['series']['title']), + 'season': video_info['title'], + 'season_number': video_info.get('number'), + 'season_id': video_info.get('id'), + 'ie_key': RadLiveIE.ie_key(), + } for episode in video_info['episodes']] + + return self.playlist_result(entries, season_id, video_info.get('title')) + + +class RadLiveChannelIE(RadLiveIE): + IE_NAME = 'radlive:channel' + _VALID_URL = r'https?://(?:www\.)?rad\.live/content/channel/(?P[a-f0-9-]+)' + _TESTS = [{ + 'url': 'https://rad.live/content/channel/5c4d8df4-6fa0-413c-81e3-873479b49274', + 'md5': '625156a08b7f2b0b849f234e664457ac', + 'info_dict': { + 'id': '5c4d8df4-6fa0-413c-81e3-873479b49274', + 'title': 'Whistle Sports', + }, + 'playlist_mincount': 7, + }] + + _QUERY = ''' +query WebChannelListing ($lrn: ID!) { + channel (id:$lrn) { + name + features { + structured_data + } + } +}''' + + @classmethod + def suitable(cls, url): + return False if RadLiveIE.suitable(url) else super(RadLiveChannelIE, cls).suitable(url) + + def _real_extract(self, url): + channel_id = self._match_id(url) + + graphql = self._download_json( + 'https://content.mhq.12core.net/graphql', channel_id, + headers={'Content-Type': 'application/json'}, + data=json.dumps({ + 'query': self._QUERY, + 'variables': {'lrn': f'lrn:12core:media:content:channel:{channel_id}'} + }).encode('utf-8')) + + data = traverse_obj(graphql, ('data', 'channel')) + if not data: + raise ExtractorError('Unable to extract video info, make sure the URL is valid') + + entries = [{ + '_type': 'url_transparent', + 'url': feature['structured_data']['url'], + 'ie_key': RadLiveIE.ie_key(), + } for feature in data['features']] + + return self.playlist_result(entries, channel_id, data.get('name')) From a8cb7eca615c9d80f458c65a2a24bc3a7fe43118 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Wed, 15 Sep 2021 07:34:54 +0530 Subject: [PATCH 091/641] [HiDive] Fix extractor (#958) Closes #952, #408 Authored by: Ashish0804 --- yt_dlp/extractor/hidive.py | 85 ++++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 40 deletions(-) diff --git a/yt_dlp/extractor/hidive.py b/yt_dlp/extractor/hidive.py index a5aa0853ce..90457b77ea 100644 --- a/yt_dlp/extractor/hidive.py +++ b/yt_dlp/extractor/hidive.py @@ -1,12 +1,13 @@ # coding: utf-8 from __future__ import unicode_literals +import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, int_or_none, + try_get, url_or_none, urlencode_postdata, ) @@ -57,48 +58,51 @@ class HiDiveIE(InfoExtractor): mobj = self._match_valid_url(url) title, key = mobj.group('title', 'key') video_id = '%s/%s' % (title, key) - - settings = self._download_json( - 'https://www.hidive.com/play/settings', video_id, - data=urlencode_postdata({ - 'Title': title, - 'Key': key, - 'PlayerId': 'f4f895ce1ca713ba263b91caeb1daa2d08904783', - })) - - restriction = settings.get('restrictionReason') - if restriction == 'RegionRestricted': - self.raise_geo_restricted() - - if restriction and restriction != 'None': - raise ExtractorError( - '%s said: %s' % (self.IE_NAME, restriction), expected=True) - + webpage = self._download_webpage(url, video_id, fatal=False) + data_videos = re.findall(r'data-video=\"([^\"]+)\"\s?data-captions=\"([^\"]+)\"', webpage) formats = [] subtitles = {} - for rendition_id, rendition in settings['renditions'].items(): - bitrates = rendition.get('bitrates') - if not isinstance(bitrates, dict): - continue - m3u8_url = url_or_none(bitrates.get('hls')) - if not m3u8_url: - continue - formats.extend(self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='%s-hls' % rendition_id, fatal=False)) - cc_files = rendition.get('ccFiles') - if not isinstance(cc_files, list): - continue - for cc_file in cc_files: - if not isinstance(cc_file, list) or len(cc_file) < 3: + for data_video in data_videos: + _, _, _, version, audio, _, extra = data_video[0].split('_') + caption = data_video[1] + + settings = self._download_json( + 'https://www.hidive.com/play/settings', video_id, + data=urlencode_postdata({ + 'Title': title, + 'Key': key, + 'PlayerId': 'f4f895ce1ca713ba263b91caeb1daa2d08904783', + 'Version': version, + 'Audio': audio, + 'Captions': caption, + 'Extra': extra, + })) + + restriction = settings.get('restrictionReason') + if restriction == 'RegionRestricted': + self.raise_geo_restricted() + + if restriction and restriction != 'None': + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, restriction), expected=True) + + for rendition_id, rendition in settings['renditions'].items(): + m3u8_url = url_or_none(try_get(rendition, lambda x: x['bitrates']['hls'])) + if not m3u8_url: continue - cc_lang = cc_file[0] - cc_url = url_or_none(cc_file[2]) - if not isinstance(cc_lang, compat_str) or not cc_url: - continue - subtitles.setdefault(cc_lang, []).append({ - 'url': cc_url, - }) + frmt = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='%s-%s-%s-%s' % (version, audio, extra, caption), fatal=False) + for f in frmt: + f['language'] = audio + formats.extend(frmt) + + for cc_file in rendition.get('ccFiles', []): + cc_url = url_or_none(try_get(cc_file, lambda x: x[2])) + # name is used since we cant distinguish subs with same language code + cc_lang = try_get(cc_file, (lambda x: x[1].replace(' ', '-').lower(), lambda x: x[0]), str) + if cc_url and cc_lang: + subtitles.setdefault(cc_lang, []).append({'url': cc_url}) self._sort_formats(formats) season_number = int_or_none(self._search_regex( @@ -114,4 +118,5 @@ class HiDiveIE(InfoExtractor): 'series': title, 'season_number': season_number, 'episode_number': episode_number, + 'http_headers': {'Referer': url} } From d21bba78533ca53dac179e4a4f1cfaa9ba241b2f Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 14 Sep 2021 12:03:26 +0530 Subject: [PATCH 092/641] [options] Strip spaces in list-like switches --- yt_dlp/options.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 7cabc35ae5..2ff0fbfc11 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -116,19 +116,19 @@ def parseOpts(overrideArguments=None): return ''.join(opts) - def _list_from_options_callback(option, opt_str, value, parser, append=True, delim=','): + def _list_from_options_callback(option, opt_str, value, parser, append=True, delim=',', process=str.strip): # append can be True, False or -1 (prepend) current = getattr(parser.values, option.dest) if append else [] - value = [value] if delim is None else value.split(delim) + value = [process(value)] if delim is None else list(map(process, value.split(delim))) setattr( parser.values, option.dest, current + value if append is True else value + current) def _set_from_options_callback( - option, opt_str, value, parser, - delim=',', allowed_values=None, process=str.lower, aliases={}): + option, opt_str, value, parser, delim=',', allowed_values=None, aliases={}, + process=lambda x: x.lower().strip()): current = getattr(parser.values, option.dest) - values = [process(value)] if delim is None else process(value).split(delim)[::-1] + values = [process(value)] if delim is None else list(map(process, value.split(delim)[::-1])) while values: actual_val = val = values.pop() if val == 'all': @@ -275,8 +275,7 @@ def parseOpts(overrideArguments=None): 'multistreams', 'no-live-chat', 'playlist-index', 'list-formats', 'no-direct-merge', 'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-attach-info-json', 'embed-thumbnail-atomicparsley', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', - }, - 'aliases': { + }, 'aliases': { 'youtube-dl': ['-multistreams', 'all'], 'youtube-dlc': ['-no-youtube-channel-redirect', '-no-live-chat', 'all'], } From dbf7eca917e7189ad0224fd3b7b6068b4940b71b Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 15 Sep 2021 00:21:22 +0530 Subject: [PATCH 093/641] [soundcloud] Update `_CLIENT_ID` Related: #975 --- yt_dlp/extractor/soundcloud.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index 77e248a477..1503ae586a 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -305,7 +305,7 @@ class SoundcloudIE(InfoExtractor): raise def _real_initialize(self): - self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or 'fSSdm5yTnDka1g0Fz1CO5Yx6z0NbeHAj' + self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or 'a3e059563d7fd3372b49b37f00a00bcf' self._login() _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36' From f7590d47641cedbf630b909aa8f53930c4a9ce5c Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 16 Sep 2021 00:31:22 +0530 Subject: [PATCH 094/641] [vrv] Don't raise error when thumbnails are missing Closes #983 --- yt_dlp/extractor/vrv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/vrv.py b/yt_dlp/extractor/vrv.py index 44f90acbdf..4196021481 100644 --- a/yt_dlp/extractor/vrv.py +++ b/yt_dlp/extractor/vrv.py @@ -218,7 +218,7 @@ class VRVIE(VRVBaseIE): }) thumbnails = [] - for thumbnail in traverse_obj(video_data, ('images', 'thumbnail', ..., ...)): + for thumbnail in traverse_obj(video_data, ('images', 'thumbnail', ..., ...)) or []: thumbnail_url = thumbnail.get('source') if not thumbnail_url: continue From c589c1d3956cb9a8655e8555f2e02f14fbca8a2e Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 16 Sep 2021 00:34:18 +0530 Subject: [PATCH 095/641] [compat] Don't ignore `HOME` (if set) on windows Related: #792 --- README.md | 2 +- yt_dlp/compat.py | 19 ++++++++++++++++++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d9daee69e6..8ffb20a8c1 100644 --- a/README.md +++ b/README.md @@ -897,7 +897,7 @@ You can configure yt-dlp by placing any supported command line option to a confi * `~/yt-dlp.conf` * `~/yt-dlp.conf.txt` - Note that `~` points to `C:\Users\` on windows. Also, `%XDG_CONFIG_HOME%` defaults to `~/.config` if undefined + `%XDG_CONFIG_HOME%` defaults to `~/.config` if undefined. On windows, `~` points to %HOME% if present, `%USERPROFILE%` (generally `C:\Users\`) or `%HOMEDRIVE%%HOMEPATH%`. 1. **System Configuration**: `/etc/yt-dlp.conf` For example, with the following configuration file yt-dlp will always extract the audio, not copy the mtime, use a proxy and save all videos under `YouTube` directory in your home directory: diff --git a/yt_dlp/compat.py b/yt_dlp/compat.py index ab1a3ba44c..363c2d57a2 100644 --- a/yt_dlp/compat.py +++ b/yt_dlp/compat.py @@ -130,6 +130,24 @@ except AttributeError: asyncio.run = compat_asyncio_run +# Python 3.8+ does not honor %HOME% on windows, but this breaks compatibility with youtube-dl +# See https://github.com/yt-dlp/yt-dlp/issues/792 +# https://docs.python.org/3/library/os.path.html#os.path.expanduser +if compat_os_name in ('nt', 'ce') and 'HOME' in os.environ: + _userhome = os.environ['HOME'] + + def compat_expanduser(path): + if not path.startswith('~'): + return path + i = path.replace('\\', '/', 1).find('/') # ~user + if i < 0: + i = len(path) + userhome = os.path.join(os.path.dirname(_userhome), path[1:i]) if i > 1 else _userhome + return userhome + path[i:] +else: + compat_expanduser = os.path.expanduser + + # Deprecated compat_basestring = str @@ -152,7 +170,6 @@ compat_cookies = http.cookies compat_cookies_SimpleCookie = compat_cookies.SimpleCookie compat_etree_Element = etree.Element compat_etree_register_namespace = etree.register_namespace -compat_expanduser = os.path.expanduser compat_get_terminal_size = shutil.get_terminal_size compat_getenv = os.getenv compat_getpass = getpass.getpass From 0001fcb586c3ab297cd48c77ddd6f5d40546dac4 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 16 Sep 2021 00:51:40 +0530 Subject: [PATCH 096/641] Add option `--netrc-location` Closes #792, #963 --- .gitignore | 1 + README.md | 12 ++++++------ yt_dlp/__init__.py | 1 + yt_dlp/extractor/common.py | 6 +++++- yt_dlp/options.py | 4 ++++ 5 files changed, 17 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index 88a9605f7b..bf06c81f06 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ *.spec cookies *cookies.txt +.netrc # Downloaded *.srt diff --git a/README.md b/README.md index 8ffb20a8c1..a2c1cbd82f 100644 --- a/README.md +++ b/README.md @@ -695,6 +695,9 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t out, yt-dlp will ask interactively -2, --twofactor TWOFACTOR Two-factor authentication code -n, --netrc Use .netrc authentication data + --netrc-location PATH Location of .netrc authentication data; + either the path or its containing + directory. Defaults to ~/.netrc --video-password PASSWORD Video password (vimeo, youku) --ap-mso MSO Adobe Pass multiple-system operator (TV provider) identifier, use --ap-list-mso for @@ -923,14 +926,14 @@ You can use `--ignore-config` if you want to disable all configuration files for ### Authentication with `.netrc` file -You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every yt-dlp execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](https://stackoverflow.com/tags/.netrc/info) on a per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by only you: +You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every yt-dlp execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](https://stackoverflow.com/tags/.netrc/info) on a per extractor basis. For that you will need to create a `.netrc` file in `--netrc-location` and restrict permissions to read/write by only you: ``` touch $HOME/.netrc chmod a-rwx,u+rw $HOME/.netrc ``` After that you can add credentials for an extractor in the following format, where *extractor* is the name of the extractor in lowercase: ``` -machine login password +machine login password ``` For example: ``` @@ -939,10 +942,7 @@ machine twitch login my_twitch_account_name password my_twitch_password ``` To activate authentication with the `.netrc` file you should pass `--netrc` to yt-dlp or place it in the [configuration file](#configuration). -On Windows you may also need to setup the `%HOME%` environment variable manually. For example: -``` -set HOME=%USERPROFILE% -``` +The default location of the .netrc file is `$HOME` (`~`) in UNIX. On Windows, it is `%HOME%` if present, `%USERPROFILE%` (generally `C:\Users\`) or `%HOMEDRIVE%%HOMEPATH%` # OUTPUT TEMPLATE diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index f9a7e2f111..5168ed0f7c 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -575,6 +575,7 @@ def _real_main(argv=None): ydl_opts = { 'usenetrc': opts.usenetrc, + 'netrc_location': opts.netrc_location, 'username': opts.username, 'password': opts.password, 'twofactor': opts.twofactor, diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 54a9dc2631..e796842312 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -18,6 +18,7 @@ from ..compat import ( compat_cookies_SimpleCookie, compat_etree_Element, compat_etree_fromstring, + compat_expanduser, compat_getpass, compat_http_client, compat_os_name, @@ -1166,7 +1167,10 @@ class InfoExtractor(object): if self.get_param('usenetrc', False): try: - info = netrc.netrc().authenticators(netrc_machine) + netrc_file = compat_expanduser(self.get_param('netrc_location') or '~') + if os.path.isdir(netrc_file): + netrc_file = os.path.join(netrc_file, '.netrc') + info = netrc.netrc(file=netrc_file).authenticators(netrc_machine) if info is not None: username = info[0] password = info[2] diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 2ff0fbfc11..099b151c65 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -478,6 +478,10 @@ def parseOpts(overrideArguments=None): '-n', '--netrc', action='store_true', dest='usenetrc', default=False, help='Use .netrc authentication data') + authentication.add_option( + '--netrc-location', + dest='netrc_location', metavar='PATH', + help='Location of .netrc authentication data; either the path or its containing directory. Defaults to ~/.netrc') authentication.add_option( '--video-password', dest='videopassword', metavar='PASSWORD', From b89378a69a2f105e7c57041ffef6ef0580854d4a Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Thu, 16 Sep 2021 23:01:39 +0530 Subject: [PATCH 097/641] [globo] Fix GloboIE (#994) Closes #991 Authored by: Ashish0804 --- yt_dlp/extractor/globo.py | 153 ++++++++++++++++---------------------- 1 file changed, 63 insertions(+), 90 deletions(-) diff --git a/yt_dlp/extractor/globo.py b/yt_dlp/extractor/globo.py index 0cb3aa31bf..a3f0241570 100644 --- a/yt_dlp/extractor/globo.py +++ b/yt_dlp/extractor/globo.py @@ -9,15 +9,14 @@ import re from .common import InfoExtractor from ..compat import ( - compat_HTTPError, compat_str, ) from ..utils import ( ExtractorError, float_or_none, - int_or_none, orderedSet, str_or_none, + try_get, ) @@ -26,18 +25,19 @@ class GloboIE(InfoExtractor): _NETRC_MACHINE = 'globo' _TESTS = [{ 'url': 'http://g1.globo.com/carros/autoesporte/videos/t/exclusivos-do-g1/v/mercedes-benz-gla-passa-por-teste-de-colisao-na-europa/3607726/', - 'md5': 'b3ccc801f75cd04a914d51dadb83a78d', 'info_dict': { 'id': '3607726', 'ext': 'mp4', 'title': 'Mercedes-Benz GLA passa por teste de colisão na Europa', 'duration': 103.204, - 'uploader': 'Globo.com', - 'uploader_id': '265', + 'uploader': 'G1', + 'uploader_id': '2015', + }, + 'params': { + 'skip_download': True, }, }, { 'url': 'http://globoplay.globo.com/v/4581987/', - 'md5': 'f36a1ecd6a50da1577eee6dd17f67eff', 'info_dict': { 'id': '4581987', 'ext': 'mp4', @@ -46,6 +46,9 @@ class GloboIE(InfoExtractor): 'uploader': 'Rede Globo', 'uploader_id': '196', }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'http://canalbrasil.globo.com/programas/sangue-latino/videos/3928201.html', 'only_matching': True, @@ -66,30 +69,6 @@ class GloboIE(InfoExtractor): 'only_matching': True, }] - def _real_initialize(self): - email, password = self._get_login_info() - if email is None: - return - - try: - glb_id = (self._download_json( - 'https://login.globo.com/api/authentication', None, data=json.dumps({ - 'payload': { - 'email': email, - 'password': password, - 'serviceId': 4654, - }, - }).encode(), headers={ - 'Content-Type': 'application/json; charset=utf-8', - }) or {}).get('glbId') - if glb_id: - self._set_cookie('.globo.com', 'GLBID', glb_id) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: - resp = self._parse_json(e.cause.read(), None) - raise ExtractorError(resp.get('userMessage') or resp['id'], expected=True) - raise - def _real_extract(self, url): video_id = self._match_id(url) @@ -102,73 +81,67 @@ class GloboIE(InfoExtractor): title = video['title'] formats = [] + security = self._download_json( + 'https://playback.video.globo.com/v1/video-session', video_id, 'Downloading security hash for %s' % video_id, + headers={'content-type': 'application/json'}, data=json.dumps({ + "player_type": "desktop", + "video_id": video_id, + "quality": "max", + "content_protection": "widevine", + "vsid": "581b986b-4c40-71f0-5a58-803e579d5fa2", + "tz": "-3.0:00" + }).encode()) + + security_hash = security['source']['token'] + if not security_hash: + message = security.get('message') + if message: + raise ExtractorError( + '%s returned error: %s' % (self.IE_NAME, message), expected=True) + + hash_code = security_hash[:2] + padding = '%010d' % random.randint(1, 10000000000) + if hash_code in ('04', '14'): + received_time = security_hash[3:13] + received_md5 = security_hash[24:] + hash_prefix = security_hash[:23] + elif hash_code in ('02', '12', '03', '13'): + received_time = security_hash[2:12] + received_md5 = security_hash[22:] + padding += '1' + hash_prefix = '05' + security_hash[:22] + + padded_sign_time = compat_str(int(received_time) + 86400) + padding + md5_data = (received_md5 + padded_sign_time + '0xAC10FD').encode() + signed_md5 = base64.urlsafe_b64encode(hashlib.md5(md5_data).digest()).decode().strip('=') + signed_hash = hash_prefix + padded_sign_time + signed_md5 + source = security['source']['url_parts'] + resource_url = source['scheme'] + '://' + source['domain'] + source['path'] + signed_url = '%s?h=%s&k=html5&a=%s' % (resource_url, signed_hash, 'F' if video.get('subscriber_only') else 'A') + + formats.extend(self._extract_m3u8_formats( + signed_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) + self._sort_formats(formats) + subtitles = {} for resource in video['resources']: - resource_id = resource.get('_id') - resource_url = resource.get('url') - resource_type = resource.get('type') - if not resource_url or (resource_type == 'media' and not resource_id) or resource_type not in ('subtitle', 'media'): - continue - - if resource_type == 'subtitle': + if resource.get('type') == 'subtitle': subtitles.setdefault(resource.get('language') or 'por', []).append({ - 'url': resource_url, + 'url': resource.get('url'), }) - continue - - security = self._download_json( - 'http://security.video.globo.com/videos/%s/hash' % video_id, - video_id, 'Downloading security hash for %s' % resource_id, query={ - 'player': 'desktop', - 'version': '5.19.1', - 'resource_id': resource_id, + subs = try_get(security, lambda x: x['source']['subtitles'], expected_type=dict) or {} + for sub_lang, sub_url in subs.items(): + if sub_url: + subtitles.setdefault(sub_lang or 'por', []).append({ + 'url': sub_url, }) - - security_hash = security.get('hash') - if not security_hash: - message = security.get('message') - if message: - raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, message), expected=True) - continue - - hash_code = security_hash[:2] - padding = '%010d' % random.randint(1, 10000000000) - if hash_code in ('04', '14'): - received_time = security_hash[3:13] - received_md5 = security_hash[24:] - hash_prefix = security_hash[:23] - elif hash_code in ('02', '12', '03', '13'): - received_time = security_hash[2:12] - received_md5 = security_hash[22:] - padding += '1' - hash_prefix = '05' + security_hash[:22] - - padded_sign_time = compat_str(int(received_time) + 86400) + padding - md5_data = (received_md5 + padded_sign_time + '0xAC10FD').encode() - signed_md5 = base64.urlsafe_b64encode(hashlib.md5(md5_data).digest()).decode().strip('=') - signed_hash = hash_prefix + padded_sign_time + signed_md5 - signed_url = '%s?h=%s&k=html5&a=%s&u=%s' % (resource_url, signed_hash, 'F' if video.get('subscriber_only') else 'A', security.get('user') or '') - - if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'): - formats.extend(self._extract_m3u8_formats( - signed_url, resource_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - elif resource_id.endswith('mpd') or resource_url.endswith('.mpd'): - formats.extend(self._extract_mpd_formats( - signed_url, resource_id, mpd_id='dash', fatal=False)) - elif resource_id.endswith('manifest') or resource_url.endswith('/manifest'): - formats.extend(self._extract_ism_formats( - signed_url, resource_id, ism_id='mss', fatal=False)) - else: - formats.append({ - 'url': signed_url, - 'format_id': 'http-%s' % resource_id, - 'height': int_or_none(resource.get('height')), + subs = try_get(security, lambda x: x['source']['subtitles_webvtt'], expected_type=dict) or {} + for sub_lang, sub_url in subs.items(): + if sub_url: + subtitles.setdefault(sub_lang or 'por', []).append({ + 'url': sub_url, }) - self._sort_formats(formats) - duration = float_or_none(video.get('duration'), 1000) uploader = video.get('channel') uploader_id = str_or_none(video.get('channel_id')) From 23dd2d9a3230c183ba1342734bb1a2ff09fb0dbf Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Thu, 16 Sep 2021 23:41:55 +0530 Subject: [PATCH 098/641] [NDR] Rewrite NDRIE (#962) Closes #959 Authored by: Ashish0804 --- yt_dlp/extractor/ndr.py | 169 ++++++++++++++++++++-------------------- 1 file changed, 84 insertions(+), 85 deletions(-) diff --git a/yt_dlp/extractor/ndr.py b/yt_dlp/extractor/ndr.py index 45aa106c80..f2bae2c1a0 100644 --- a/yt_dlp/extractor/ndr.py +++ b/yt_dlp/extractor/ndr.py @@ -1,15 +1,14 @@ # coding: utf-8 from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( determine_ext, int_or_none, - merge_dicts, - parse_iso8601, + parse_duration, qualities, try_get, + unified_strdate, urljoin, ) @@ -28,110 +27,110 @@ class NDRIE(NDRBaseIE): IE_DESC = 'NDR.de - Norddeutscher Rundfunk' _VALID_URL = r'https?://(?:www\.)?(?:daserste\.)?ndr\.de/(?:[^/]+/)*(?P[^/?#]+),(?P[\da-z]+)\.html' _TESTS = [{ - # httpVideo, same content id 'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html', - 'md5': '6515bc255dc5c5f8c85bbc38e035a659', 'info_dict': { 'id': 'hafengeburtstag988', - 'display_id': 'Party-Poette-und-Parade', 'ext': 'mp4', 'title': 'Party, Pötte und Parade', + 'thumbnail': 'https://www.ndr.de/fernsehen/hafengeburtstag990_v-contentxl.jpg', 'description': 'md5:ad14f9d2f91d3040b6930c697e5f6b4c', - 'uploader': 'ndrtv', - 'timestamp': 1431108900, - 'upload_date': '20150510', + 'series': None, + 'channel': 'NDR Fernsehen', + 'upload_date': '20150508', 'duration': 3498, }, - 'params': { - 'skip_download': True, - }, }, { - # httpVideo, different content id - 'url': 'http://www.ndr.de/sport/fussball/40-Osnabrueck-spielt-sich-in-einen-Rausch,osna270.html', - 'md5': '1043ff203eab307f0c51702ec49e9a71', + 'url': 'https://www.ndr.de/sport/fussball/Rostocks-Matchwinner-Froede-Ein-Hansa-Debuet-wie-im-Maerchen,hansa10312.html', + 'only_matching': True + }, { + 'url': 'https://www.ndr.de/nachrichten/niedersachsen/kommunalwahl_niedersachsen_2021/Grosse-Parteien-zufrieden-mit-Ergebnissen-der-Kommunalwahl,kommunalwahl1296.html', 'info_dict': { - 'id': 'osna272', - 'display_id': '40-Osnabrueck-spielt-sich-in-einen-Rausch', + 'id': 'kommunalwahl1296', 'ext': 'mp4', - 'title': 'Osnabrück - Wehen Wiesbaden: Die Highlights', - 'description': 'md5:32e9b800b3d2d4008103752682d5dc01', - 'uploader': 'ndrtv', - 'timestamp': 1442059200, - 'upload_date': '20150912', - 'duration': 510, - }, - 'params': { - 'skip_download': True, + 'title': 'Die Spitzenrunde: Die Wahl aus Sicht der Landespolitik', + 'thumbnail': 'https://www.ndr.de/fernsehen/screenshot1194912_v-contentxl.jpg', + 'description': 'md5:5c6e2ad744cef499135735a1036d7aa7', + 'series': 'Hallo Niedersachsen', + 'channel': 'NDR Fernsehen', + 'upload_date': '20210913', + 'duration': 438, }, }, { - # httpAudio, same content id - 'url': 'http://www.ndr.de/info/La-Valette-entgeht-der-Hinrichtung,audio51535.html', - 'md5': 'bb3cd38e24fbcc866d13b50ca59307b8', - 'info_dict': { - 'id': 'audio51535', - 'display_id': 'La-Valette-entgeht-der-Hinrichtung', - 'ext': 'mp3', - 'title': 'La Valette entgeht der Hinrichtung', - 'description': 'md5:22f9541913a40fe50091d5cdd7c9f536', - 'uploader': 'ndrinfo', - 'timestamp': 1290626100, - 'upload_date': '20140729', - 'duration': 884, - }, - 'params': { - 'skip_download': True, - }, - }, { - # with subtitles 'url': 'https://www.ndr.de/fernsehen/sendungen/extra_3/extra-3-Satiremagazin-mit-Christian-Ehring,sendung1091858.html', 'info_dict': { - 'id': 'extra18674', - 'display_id': 'extra-3-Satiremagazin-mit-Christian-Ehring', + 'id': 'sendung1091858', 'ext': 'mp4', 'title': 'Extra 3 vom 11.11.2020 mit Christian Ehring', - 'description': 'md5:42ee53990a715eaaf4dc7f13a3bd56c6', - 'uploader': 'ndrtv', - 'upload_date': '20201113', + 'thumbnail': 'https://www.ndr.de/fernsehen/screenshot983938_v-contentxl.jpg', + 'description': 'md5:700f6de264010585012a72f97b0ac0c9', + 'series': 'extra 3', + 'channel': 'NDR Fernsehen', + 'upload_date': '20201111', 'duration': 1749, - 'subtitles': { - 'de': [{ - 'ext': 'ttml', - 'url': r're:^https://www\.ndr\.de.+', - }], - }, - }, - 'params': { - 'skip_download': True, - }, - 'expected_warnings': ['Unable to download f4m manifest'], + } }, { - 'url': 'https://www.ndr.de/Fettes-Brot-Ferris-MC-und-Thees-Uhlmann-live-on-stage,festivalsommer116.html', - 'only_matching': True, + 'url': 'http://www.ndr.de/info/La-Valette-entgeht-der-Hinrichtung,audio51535.html', + 'info_dict': { + 'id': 'audio51535', + 'ext': 'mp3', + 'title': 'La Valette entgeht der Hinrichtung', + 'thumbnail': 'https://www.ndr.de/mediathek/mediathekbild140_v-podcast.jpg', + 'description': 'md5:22f9541913a40fe50091d5cdd7c9f536', + 'upload_date': '20140729', + 'duration': 884.0, + }, + 'expected_warnings': ['unable to extract json url'], }] def _extract_embed(self, webpage, display_id, id): - embed_url = self._html_search_meta( - 'embedURL', webpage, 'embed URL', - default=None) or self._search_regex( - r'\bembedUrl["\']\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, - 'embed URL', fatal=False, group='url') - if embed_url is None: - return self.url_result('ndr:%s' % id, ie=NDREmbedBaseIE.ie_key()) - description = self._search_regex( - r']+itemprop="description">([^<]+)

', - webpage, 'description', default=None) or self._og_search_description(webpage) - timestamp = parse_iso8601( - self._search_regex( - r']+itemprop="(?:datePublished|uploadDate)"[^>]+content="([^"]+)"', - webpage, 'upload date', default=None)) - info = self._search_json_ld(webpage, display_id, default={}) - return merge_dicts({ - '_type': 'url_transparent', - 'url': embed_url, - 'display_id': display_id, - 'description': description, - 'timestamp': timestamp, - }, info) + formats = [] + base_url = 'https://www.ndr.de' + json_url = self._search_regex(r']+src=\"([^\"]+)_theme-ndrde[^\.]*\.html\"', webpage, + 'json url', fatal=False) + if json_url: + data_json = self._download_json(base_url + json_url.replace('ardplayer_image', 'ardjson_image') + '.json', + id, fatal=False) + info_json = data_json.get('_info', {}) + media_json = try_get(data_json, lambda x: x['_mediaArray'][0]['_mediaStreamArray']) + for media in media_json: + if media.get('_quality') == 'auto': + formats.extend(self._extract_m3u8_formats(media['_stream'], id)) + subtitles = {} + sub_url = data_json.get('_subtitleUrl') + if sub_url: + subtitles.setdefault('de', []).append({ + 'url': base_url + sub_url, + }) + self._sort_formats(formats) + return { + 'id': id, + 'title': info_json.get('clipTitle'), + 'thumbnail': base_url + data_json.get('_previewImage'), + 'description': info_json.get('clipDescription'), + 'series': info_json.get('seriesTitle') or None, + 'channel': info_json.get('channelTitle'), + 'upload_date': unified_strdate(info_json.get('clipDate')), + 'duration': data_json.get('_duration'), + 'formats': formats, + 'subtitles': subtitles, + } + else: + json_url = base_url + self._search_regex(r'apiUrl\s?=\s?\'([^\']+)\'', webpage, 'json url').replace( + '_belongsToPodcast-', '') + data_json = self._download_json(json_url, id, fatal=False) + return { + 'id': id, + 'title': data_json.get('title'), + 'thumbnail': base_url + data_json.get('poster'), + 'description': data_json.get('summary'), + 'upload_date': unified_strdate(data_json.get('publicationDate')), + 'duration': parse_duration(data_json.get('duration')), + 'formats': [{ + 'url': try_get(data_json, (lambda x: x['audio'][0]['url'], lambda x: x['files'][0]['url'])), + 'vcodec': 'none', + 'ext': 'mp3', + }], + } class NJoyIE(NDRBaseIE): From 2fac2e91361a219b9dbc24c2fe91bd42787e851d Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Thu, 16 Sep 2021 23:42:45 +0530 Subject: [PATCH 099/641] [Mediaite] Add Extractor (#973) Closes #969 Authored by: Ashish0804 --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/generic.py | 11 +++-- yt_dlp/extractor/mediaite.py | 80 ++++++++++++++++++++++++++++++++++ 3 files changed, 86 insertions(+), 6 deletions(-) create mode 100644 yt_dlp/extractor/mediaite.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index bb1e21a07a..8b7af0fd0f 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -729,6 +729,7 @@ from .massengeschmacktv import MassengeschmackTVIE from .matchtv import MatchTVIE from .mdr import MDRIE from .medaltv import MedalTVIE +from .mediaite import MediaiteIE from .mediaklikk import MediaKlikkIE from .mediaset import MediasetIE from .mediasite import ( diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index d08f8f30de..b9c5772e06 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -1215,14 +1215,13 @@ class GenericIE(InfoExtractor): }, { # JWPlatform iframe - 'url': 'https://www.mediaite.com/tv/dem-senator-claims-gary-cohn-faked-a-bad-connection-during-trump-call-to-get-him-off-the-phone/', - 'md5': 'ca00a040364b5b439230e7ebfd02c4e9', + 'url': 'https://www.covermagazine.co.uk/feature/2465255/business-protection-involved', 'info_dict': { - 'id': 'O0c5JcKT', + 'id': 'AG26UQXM', 'ext': 'mp4', - 'upload_date': '20171122', - 'timestamp': 1511366290, - 'title': 'Dem Senator Claims Gary Cohn Faked a Bad Connection During Trump Call to Get Him Off the Phone', + 'upload_date': '20160719', + 'timestamp': 468923808, + 'title': '2016_05_18 Cover L&G Business Protection V1 FINAL.mp4', }, 'add_ie': [JWPlatformIE.ie_key()], }, diff --git a/yt_dlp/extractor/mediaite.py b/yt_dlp/extractor/mediaite.py new file mode 100644 index 0000000000..646c922231 --- /dev/null +++ b/yt_dlp/extractor/mediaite.py @@ -0,0 +1,80 @@ +from __future__ import unicode_literals + + +from .common import InfoExtractor + + +class MediaiteIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?mediaite.com/(?:tv|sports|politics|podcasts|opinion)/[\w-]+/' + _TESTS = [{ + 'url': 'https://www.mediaite.com/sports/bill-burr-roasts-nfl-for-promoting-black-lives-matter-while-scheduling-more-games-after-all-the-sht-they-know-about-cte/', + 'info_dict': { + 'id': 'vPHKITzy', + 'ext': 'm4a', + 'title': 'Bill Burr On NFL And Black Lives Matter', + 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', + 'thumbnail': 'https://cdn.jwplayer.com/v2/media/vPHKITzy/poster.jpg?width=720', + 'duration': 55, + 'timestamp': 1631630185, + 'upload_date': '20210914', + }, + 'params': {'skip_download': True} + }, { + 'url': 'https://www.mediaite.com/tv/joe-scarborough-goes-off-on-tax-breaks-for-super-wealthy-largest-income-redistribution-scam-in-american-history/', + 'info_dict': { + 'id': 'eeFcK4Xm', + 'ext': 'mp4', + 'title': 'Morning Joe-6_16_52 am - 6_21_10 am-2021-09-14.mp4', + 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', + 'thumbnail': 'https://cdn.jwplayer.com/v2/media/eeFcK4Xm/poster.jpg?width=720', + 'duration': 258, + 'timestamp': 1631618057, + 'upload_date': '20210914', + }, + 'params': {'skip_download': True} + }, { + 'url': 'https://www.mediaite.com/politics/watch-rudy-giuliani-impersonates-queen-elizabeth-calls-mark-milley-an-asshle-in-bizarre-9-11-speech/', + 'info_dict': { + 'id': 'EiyiXKcr', + 'ext': 'mp4', + 'title': 'Giuliani 1', + 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', + 'thumbnail': 'https://cdn.jwplayer.com/v2/media/EiyiXKcr/poster.jpg?width=720', + 'duration': 39, + 'timestamp': 1631536476, + 'upload_date': '20210913', + }, + 'params': {'skip_download': True} + }, { + 'url': 'https://www.mediaite.com/podcasts/clarissa-ward-says-she-decided-to-become-a-journalist-on-9-11/', + 'info_dict': { + 'id': 'TxavoRTx', + 'ext': 'mp4', + 'title': 'clarissa-ward-3.mp4', + 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', + 'thumbnail': 'https://cdn.jwplayer.com/v2/media/TxavoRTx/poster.jpg?width=720', + 'duration': 83, + 'timestamp': 1631311188, + 'upload_date': '20210910', + }, + 'params': {'skip_download': True} + }, { + 'url': 'https://www.mediaite.com/opinion/mainstream-media-ignores-rose-mcgowans-bombshell-allegation-that-newsoms-wife-tried-to-silence-her-on-weinstein/', + 'info_dict': { + 'id': 'sEIWvKR7', + 'ext': 'mp4', + 'title': 'KTTV_09-13-2021_05.34.21', + 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', + 'thumbnail': 'https://cdn.jwplayer.com/v2/media/sEIWvKR7/poster.jpg?width=720', + 'duration': 52, + 'timestamp': 1631553328, + 'upload_date': '20210913', + }, + 'params': {'skip_download': True} + }] + + def _real_extract(self, url): + webpage = self._download_webpage(url, None) + id = self._search_regex(r'data-video-id\s?=\s?\"([^\"]+)\"', webpage, 'id') + data_json = self._download_json(f'https://cdn.jwplayer.com/v2/media/{id}', id) + return self._parse_jwplayer_data(data_json) From f1f6ca78b439343aa3f8ef44f803befd682a3d37 Mon Sep 17 00:00:00 2001 From: Aleri Kaisattera <73682764+alerikaisattera@users.noreply.github.com> Date: Fri, 17 Sep 2021 00:15:10 +0600 Subject: [PATCH 100/641] [Streamanity] Add Extractor (#984) Authored by: alerikaisattera --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/streamanity.py | 51 +++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 yt_dlp/extractor/streamanity.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 8b7af0fd0f..e5d6306a94 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1347,6 +1347,7 @@ from .storyfire import ( StoryFireSeriesIE, ) from .streamable import StreamableIE +from .streamanity import StreamanityIE from .streamcloud import StreamcloudIE from .streamcz import StreamCZIE from .streetvoice import StreetVoiceIE diff --git a/yt_dlp/extractor/streamanity.py b/yt_dlp/extractor/streamanity.py new file mode 100644 index 0000000000..2e2d5eedf9 --- /dev/null +++ b/yt_dlp/extractor/streamanity.py @@ -0,0 +1,51 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class StreamanityIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?streamanity\.com/video/(?P[A-Za-z0-9]+)' + _TESTS = [{ + 'url': 'https://streamanity.com/video/9DFPTnuYi8f2', + 'md5': '6ab171e8d4a02ad5dcbff6bea44cf5a1', + 'info_dict': { + 'id': '9DFPTnuYi8f2', + 'ext': 'mp4', + 'title': 'Bitcoin vs The Lighting Network', + 'thumbnail': r're:https://res\.cloudinary\.com/.+\.png', + 'description': '', + 'uploader': 'Tom Bombadil (Freddy78)', + } + }, { + 'url': 'https://streamanity.com/video/JktOUjSlfzTD', + 'md5': '31f131e28abd3377c38be586a59532dc', + 'info_dict': { + 'id': 'JktOUjSlfzTD', + 'ext': 'mp4', + 'title': 'Share data when you see it', + 'thumbnail': r're:https://res\.cloudinary\.com/.+\.png', + 'description': 'Reposting as data should be public and stored on blockchain', + 'uploader': 'digitalcurrencydaily', + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + video_info = self._download_json( + f'https://app.streamanity.com/api/video/{video_id}', video_id)['data']['video'] + + formats = self._extract_m3u8_formats( + f'https://stream.mux.com/{video_info["play_id"]}.m3u8?token={video_info["token"]}', + video_id, ext='mp4', m3u8_id='hls') + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': video_info['title'], + 'description': video_info.get('description'), + 'uploader': video_info.get('author_name'), + 'is_live': False, + 'thumbnail': video_info.get('thumb'), + 'formats': formats, + } From f5aa5cfbffeea9352ace141707f35c86f5e11b89 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 17 Sep 2021 23:46:17 +0530 Subject: [PATCH 101/641] Add format type `B` for outtmpl to treat the value as bytes This is useful to limit the filename to a certain number of bytes rather than characters Closes #1003 --- README.md | 4 ++-- test/test_YoutubeDL.py | 2 ++ yt_dlp/YoutubeDL.py | 7 +++++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index a2c1cbd82f..c4f9968342 100644 --- a/README.md +++ b/README.md @@ -952,14 +952,14 @@ The `-o` option is used to indicate a template for the output file names while ` The simplest usage of `-o` is not to set any template arguments when downloading a single file, like in `yt-dlp -o funny_video.flv "https://some/video"` (hard-coding file extension like this is _not_ recommended and could break some post-processing). -It may however also contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. +It may however also contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. The field names themselves (the part inside the parenthesis) can also have some special formatting: 1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) separator. You can also do python slicing using `:`. Eg: `%(tags.0)s`, `%(subtitles.en.-1.ext)s`, `%(id.3:7:-1)s`, `%(formats.:.format_id)s`. `%()s` refers to the entire infodict. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields 1. **Addition**: Addition and subtraction of numeric fields can be done using `+` and `-` respectively. Eg: `%(playlist_index+10)03d`, `%(n_entries+1-playlist_index)d` 1. **Date/time Formatting**: Date/time fields can be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it separated from the field name using a `>`. Eg: `%(duration>%H-%M-%S)s`, `%(upload_date>%Y-%m-%d)s`, `%(epoch-3600>%H-%M-%S)s` 1. **Default**: A default value can be specified for when the field is empty using a `|` seperator. This overrides `--output-na-template`. Eg: `%(uploader|Unknown)s` -1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, `j`, `l`, `q` can be used for converting to **j**son, a comma seperated **l**ist and a string **q**uoted for the terminal respectively +1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, `B`, `j`, `l`, `q` can be used for converting to **B**ytes, **j**son, a comma seperated **l**ist and a string **q**uoted for the terminal respectively To summarize, the general syntax for a field is: ``` diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index e689978fd3..e61492ec81 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -649,6 +649,7 @@ class TestYoutubeDL(unittest.TestCase): 'title2': '%PATH%', 'title3': 'foo/bar\\test', 'title4': 'foo "bar" test', + 'title5': 'áéí', 'timestamp': 1618488000, 'duration': 100000, 'playlist_index': 1, @@ -767,6 +768,7 @@ class TestYoutubeDL(unittest.TestCase): test('%(ext)l', 'mp4') test('%(formats.:.id) 15l', ' id1, id2, id3') test('%(formats)j', (json.dumps(FORMATS), sanitize(json.dumps(FORMATS)))) + test('%(title5).3B', 'á') if compat_os_name == 'nt': test('%(title4)q', ('"foo \\"bar\\" test"', "'foo _'bar_' test'")) else: diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 8432abf1a9..c9dc50e64b 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -907,7 +907,7 @@ class YoutubeDL(object): def validate_outtmpl(cls, outtmpl): ''' @return None or Exception object ''' outtmpl = re.sub( - STR_FORMAT_RE_TMPL.format('[^)]*', '[ljq]'), + STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqB]'), lambda mobj: f'{mobj.group(0)[:-1]}s', cls._outtmpl_expandpath(outtmpl)) try: @@ -939,7 +939,7 @@ class YoutubeDL(object): } TMPL_DICT = {} - EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljq]')) + EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqB]')) MATH_FUNCTIONS = { '+': float.__add__, '-': float.__sub__, @@ -1031,6 +1031,9 @@ class YoutubeDL(object): value, fmt = json.dumps(value, default=_dumpjson_default), str_fmt elif fmt[-1] == 'q': value, fmt = compat_shlex_quote(str(value)), str_fmt + elif fmt[-1] == 'B': + value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8') + value, fmt = value.decode('utf-8', 'ignore'), 's' elif fmt[-1] == 'c': value = str(value) if value is None: From 7303f84abeeb283b15806f7ef47bfe694f55b99c Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 13 Sep 2021 08:22:11 +0530 Subject: [PATCH 102/641] [options] Fix `--no-config` and refactor reading of config files Closes #912, #914 --- yt_dlp/options.py | 79 ++++++++++++++++++++--------------------------- 1 file changed, 34 insertions(+), 45 deletions(-) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 099b151c65..74c8104712 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -235,7 +235,7 @@ def parseOpts(overrideArguments=None): help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for the search term "large apple". Use the value "auto" to let yt-dlp guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching') general.add_option( '--ignore-config', '--no-config', - action='store_true', + action='store_true', dest='ignoreconfig', help=( 'Disable loading any configuration files except the one provided by --config-location. ' 'When given inside a configuration file, no further configuration files are loaded. ' @@ -1536,57 +1536,47 @@ def parseOpts(overrideArguments=None): 'command-line': compat_conf(sys.argv[1:]), 'custom': [], 'home': [], 'portable': [], 'user': [], 'system': []} paths = {'command-line': False} - opts, args = parser.parse_args(configs['command-line']) + + def read_options(name, path, user=False): + ''' loads config files and returns ignoreconfig ''' + # Multiple package names can be given here + # Eg: ('yt-dlp', 'youtube-dlc', 'youtube-dl') will look for + # the configuration file of any of these three packages + for package in ('yt-dlp',): + if user: + config, current_path = _readUserConf(package, default=None) + else: + current_path = os.path.join(path, '%s.conf' % package) + config = _readOptions(current_path, default=None) + if config is not None: + configs[name], paths[name] = config, current_path + return parser.parse_args(config)[0].ignoreconfig + return False def get_configs(): - if '--config-location' in configs['command-line']: + opts, _ = parser.parse_args(configs['command-line']) + if opts.config_location is not None: location = compat_expanduser(opts.config_location) if os.path.isdir(location): location = os.path.join(location, 'yt-dlp.conf') if not os.path.exists(location): parser.error('config-location %s does not exist.' % location) - configs['custom'] = _readOptions(location, default=None) - if configs['custom'] is None: - configs['custom'] = [] - else: - paths['custom'] = location - if '--ignore-config' in configs['command-line']: + config = _readOptions(location, default=None) + if config: + configs['custom'], paths['config'] = config, location + + if opts.ignoreconfig: return - if '--ignore-config' in configs['custom']: + if parser.parse_args(configs['custom'])[0].ignoreconfig: return - - def read_options(path, user=False): - # Multiple package names can be given here - # Eg: ('yt-dlp', 'youtube-dlc', 'youtube-dl') will look for - # the configuration file of any of these three packages - for package in ('yt-dlp',): - if user: - config, current_path = _readUserConf(package, default=None) - else: - current_path = os.path.join(path, '%s.conf' % package) - config = _readOptions(current_path, default=None) - if config is not None: - return config, current_path - return [], None - - configs['portable'], paths['portable'] = read_options(get_executable_path()) - if '--ignore-config' in configs['portable']: + if read_options('portable', get_executable_path()): return - - def get_home_path(): - opts = parser.parse_args(configs['portable'] + configs['custom'] + configs['command-line'])[0] - return expand_path(opts.paths.get('home', '')).strip() - - configs['home'], paths['home'] = read_options(get_home_path()) - if '--ignore-config' in configs['home']: + opts, _ = parser.parse_args(configs['portable'] + configs['custom'] + configs['command-line']) + if read_options('home', expand_path(opts.paths.get('home', '')).strip()): return - - configs['system'], paths['system'] = read_options('/etc') - if '--ignore-config' in configs['system']: + if read_options('system', '/etc'): return - - configs['user'], paths['user'] = read_options('', True) - if '--ignore-config' in configs['user']: + if read_options('user', None, user=True): configs['system'], paths['system'] = [], None get_configs() @@ -1595,10 +1585,9 @@ def parseOpts(overrideArguments=None): if opts.verbose: for label in ('System', 'User', 'Portable', 'Home', 'Custom', 'Command-line'): key = label.lower() - if paths.get(key) is None: - continue - if paths[key]: - write_string('[debug] %s config file: %s\n' % (label, paths[key])) - write_string('[debug] %s config: %s\n' % (label, repr(_hide_login_info(configs[key])))) + if paths.get(key): + write_string(f'[debug] {label} config file: {paths[key]}\n') + if paths.get(key) is not None: + write_string(f'[debug] {label} config: {_hide_login_info(configs[key])!r}\n') return parser, opts, args From edf65256aa630a5ce011138e8957c95c9bef0584 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 18 Sep 2021 00:51:27 +0530 Subject: [PATCH 103/641] [hls,aes] Fallback to native implementation for AES-CBC and detect `Cryptodome` in addition to `Crypto` Closes #935 Related: #938 --- test/test_cookies.py | 4 ++-- yt_dlp/YoutubeDL.py | 4 ++-- yt_dlp/aes.py | 14 +++++++++++++- yt_dlp/compat.py | 10 ++++++++++ yt_dlp/cookies.py | 11 +++-------- yt_dlp/downloader/external.py | 10 ++-------- yt_dlp/downloader/fragment.py | 9 ++------- yt_dlp/downloader/hls.py | 7 ++----- yt_dlp/extractor/ivi.py | 26 ++++++++++---------------- 9 files changed, 46 insertions(+), 49 deletions(-) diff --git a/test/test_cookies.py b/test/test_cookies.py index 6faaaa0c99..6053ebb4eb 100644 --- a/test/test_cookies.py +++ b/test/test_cookies.py @@ -2,8 +2,8 @@ import unittest from datetime import datetime, timezone from yt_dlp import cookies +from yt_dlp.compat import compat_pycrypto_AES from yt_dlp.cookies import ( - CRYPTO_AVAILABLE, LinuxChromeCookieDecryptor, MacChromeCookieDecryptor, WindowsChromeCookieDecryptor, @@ -53,7 +53,7 @@ class TestCookies(unittest.TestCase): decryptor = LinuxChromeCookieDecryptor('Chrome', YDLLogger()) self.assertEqual(decryptor.decrypt(encrypted_value), value) - @unittest.skipIf(not CRYPTO_AVAILABLE, 'cryptography library not available') + @unittest.skipIf(not compat_pycrypto_AES, 'cryptography library not available') def test_chrome_cookie_decryptor_windows_v10(self): with MonkeyPatch(cookies, { '_get_windows_v10_key': lambda *args, **kwargs: b'Y\xef\xad\xad\xeerp\xf0Y\xe6\x9b\x12\xc2 Date: Sat, 18 Sep 2021 02:54:17 +0600 Subject: [PATCH 104/641] [CAM4] Add extractor (#1010) Authored by: alerikaisattera --- yt_dlp/extractor/cam4.py | 32 ++++++++++++++++++++++++++++++++ yt_dlp/extractor/extractors.py | 1 + 2 files changed, 33 insertions(+) create mode 100644 yt_dlp/extractor/cam4.py diff --git a/yt_dlp/extractor/cam4.py b/yt_dlp/extractor/cam4.py new file mode 100644 index 0000000000..30daf2be9a --- /dev/null +++ b/yt_dlp/extractor/cam4.py @@ -0,0 +1,32 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class CAM4IE(InfoExtractor): + _VALID_URL = r'https?://(?:[^/]+\.)?cam4\.com/(?P[a-z0-9_]+)' + _TEST = { + 'url': 'https://www.cam4.com/foxynesss', + 'info_dict': { + 'id': 'foxynesss', + 'ext': 'mp4', + 'title': 're:^foxynesss [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'age_limit': 18, + } + } + + def _real_extract(self, url): + channel_id = self._match_id(url) + m3u8_playlist = self._download_json('https://www.cam4.com/rest/v1.0/profile/{}/streamInfo'.format(channel_id), channel_id).get('cdnURL') + + formats = self._extract_m3u8_formats(m3u8_playlist, channel_id, 'mp4', m3u8_id='hls', live=True) + self._sort_formats(formats) + + return { + 'id': channel_id, + 'title': self._live_title(channel_id), + 'is_live': True, + 'age_limit': 18, + 'formats': formats, + } diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index e5d6306a94..802907bd99 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -185,6 +185,7 @@ from .businessinsider import BusinessInsiderIE from .buzzfeed import BuzzFeedIE from .byutv import BYUtvIE from .c56 import C56IE +from .cam4 import CAM4IE from .camdemy import ( CamdemyIE, CamdemyFolderIE From 298bf1d275f33ce30b3ebe83f73ba189f6526174 Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Fri, 17 Sep 2021 20:55:49 +0000 Subject: [PATCH 105/641] [itv] Prefer last matching featureset (#1001) Bug fix for #986 Authored by: coletdjnz --- yt_dlp/extractor/itv.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/itv.py b/yt_dlp/extractor/itv.py index 3418689d67..d69782b782 100644 --- a/yt_dlp/extractor/itv.py +++ b/yt_dlp/extractor/itv.py @@ -113,12 +113,15 @@ class ITVIE(InfoExtractor): def _get_subtitles(self, video_id, variants, ios_playlist_url, headers, *args, **kwargs): subtitles = {} + # Prefer last matching featureset + # See: https://github.com/yt-dlp/yt-dlp/issues/986 platform_tag_subs, featureset_subs = next( ((platform_tag, featureset) - for platform_tag, featuresets in variants.items() for featureset in featuresets + for platform_tag, featuresets in reversed(variants.items()) for featureset in featuresets if try_get(featureset, lambda x: x[2]) == 'outband-webvtt'), (None, None)) - if platform_tag_subs or featureset_subs: + + if platform_tag_subs and featureset_subs: subs_playlist = self._call_api( video_id, ios_playlist_url, headers, platform_tag_subs, featureset_subs, fatal=False) subs = try_get(subs_playlist, lambda x: x['Playlist']['Video']['Subtitles'], list) or [] @@ -139,9 +142,11 @@ class ITVIE(InfoExtractor): variants = self._parse_json( try_get(params, lambda x: x['data-video-variants'], compat_str) or '{}', video_id, fatal=False) + # Prefer last matching featureset + # See: https://github.com/yt-dlp/yt-dlp/issues/986 platform_tag_video, featureset_video = next( ((platform_tag, featureset) - for platform_tag, featuresets in variants.items() for featureset in featuresets + for platform_tag, featuresets in reversed(variants.items()) for featureset in featuresets if try_get(featureset, lambda x: x[:2]) == ['hls', 'aes']), (None, None)) if not platform_tag_video or not featureset_video: From d47f46e17e8611d6bad81b1cae3cc076385a6283 Mon Sep 17 00:00:00 2001 From: The Hatsune Daishi Date: Sat, 18 Sep 2021 14:55:17 +0900 Subject: [PATCH 106/641] [damtomo] Add extractor (#992) Authored by: nao20010128nao --- yt_dlp/extractor/damtomo.py | 113 +++++++++++++++++++++++++++++++++ yt_dlp/extractor/extractors.py | 4 ++ 2 files changed, 117 insertions(+) create mode 100644 yt_dlp/extractor/damtomo.py diff --git a/yt_dlp/extractor/damtomo.py b/yt_dlp/extractor/damtomo.py new file mode 100644 index 0000000000..456cd35a44 --- /dev/null +++ b/yt_dlp/extractor/damtomo.py @@ -0,0 +1,113 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ExtractorError, clean_html, int_or_none, try_get, unified_strdate +from ..compat import compat_str + + +class DamtomoBaseIE(InfoExtractor): + def _real_extract(self, url): + video_id = self._match_id(url) + webpage, handle = self._download_webpage_handle(self._WEBPAGE_URL_TMPL % video_id, video_id, encoding='sjis') + + if handle.url == 'https://www.clubdam.com/sorry/': + raise ExtractorError('You are rate-limited. Try again later.', expected=True) + if '

予期せぬエラーが発生しました。

' in webpage: + raise ExtractorError('There is an error on server-side. Try again later.', expected=True) + + description = self._search_regex(r'(?m)
\s*

\s*([^<]*?)\s*

', webpage, 'description', default=None) + uploader_id = self._search_regex(r'(?P.+?)', webpage)} + + # since videos do not have title, give the name of song instead + data_dict['user_name'] = re.sub(r'\s*さん\s*$', '', data_dict['user_name']) + title = data_dict.get('song_title') + + stream_tree = self._download_xml( + self._DKML_XML_URL % video_id, video_id, note='Requesting stream information', encoding='sjis', + # doing this has no problem since there is no character outside ASCII, + # and never likely to happen in the future + transform_source=lambda x: re.sub(r'\s*encoding="[^"]+?"', '', x)) + m3u8_url = try_get(stream_tree, lambda x: x.find( + './/d:streamingUrl', {'d': self._DKML_XML_NS}).text.strip(), compat_str) + if not m3u8_url: + raise ExtractorError('Failed to obtain m3u8 URL') + formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'uploader_id': uploader_id, + 'description': description, + 'uploader': data_dict.get('user_name'), + 'upload_date': unified_strdate(self._search_regex(r'(\d{4}/\d{2}/\d{2})', data_dict.get('date'), 'upload_date', default=None)), + 'view_count': int_or_none(self._search_regex(r'(\d+)', data_dict['audience'], 'view_count', default=None)), + 'like_count': int_or_none(self._search_regex(r'(\d+)', data_dict['nice'], 'like_count', default=None)), + 'track': title, + 'artist': data_dict.get('song_artist'), + 'formats': formats, + } + + +class DamtomoVideoIE(DamtomoBaseIE): + IE_NAME = 'damtomo:video' + _VALID_URL = r'https?://(?:www\.)?clubdam\.com/app/damtomo/(?:SP/)?karaokeMovie/StreamingDkm\.do\?karaokeMovieId=(?P\d+)' + _WEBPAGE_URL_TMPL = 'https://www.clubdam.com/app/damtomo/karaokeMovie/StreamingDkm.do?karaokeMovieId=%s' + _DKML_XML_URL = 'https://www.clubdam.com/app/damtomo/karaokeMovie/GetStreamingDkmUrlXML.do?movieSelectFlg=2&karaokeMovieId=%s' + _DKML_XML_NS = 'https://www.clubdam.com/app/damtomo/karaokeMovie/GetStreamingDkmUrlXML' + _TESTS = [{ + 'url': 'https://www.clubdam.com/app/damtomo/karaokeMovie/StreamingDkm.do?karaokeMovieId=2414316', + 'info_dict': { + 'id': '2414316', + 'title': 'Get Wild', + 'uploader': 'Kドロン', + 'uploader_id': 'ODk5NTQwMzQ', + 'track': 'Get Wild', + 'artist': 'TM NETWORK(TMN)', + 'upload_date': '20201226', + } + }] + + +class DamtomoRecordIE(DamtomoBaseIE): + IE_NAME = 'damtomo:record' + _VALID_URL = r'https?://(?:www\.)?clubdam\.com/app/damtomo/(?:SP/)?karaokePost/StreamingKrk\.do\?karaokeContributeId=(?P\d+)' + _WEBPAGE_URL_TMPL = 'https://www.clubdam.com/app/damtomo/karaokePost/StreamingKrk.do?karaokeContributeId=%s' + _DKML_XML_URL = 'https://www.clubdam.com/app/damtomo/karaokePost/GetStreamingKrkUrlXML.do?karaokeContributeId=%s' + _DKML_XML_NS = 'https://www.clubdam.com/app/damtomo/karaokePost/GetStreamingKrkUrlXML' + _TESTS = [{ + 'url': 'https://www.clubdam.com/app/damtomo/karaokePost/StreamingKrk.do?karaokeContributeId=27376862', + 'info_dict': { + 'id': '27376862', + 'title': 'イカSUMMER [良音]', + 'description': None, + 'uploader': 'NANA', + 'uploader_id': 'MzAyMDExNTY', + 'upload_date': '20210721', + 'view_count': 4, + 'like_count': 1, + 'track': 'イカSUMMER [良音]', + 'artist': 'ORANGE RANGE', + } + }, { + 'url': 'https://www.clubdam.com/app/damtomo/karaokePost/StreamingKrk.do?karaokeContributeId=27489418', + 'info_dict': { + 'id': '27489418', + 'title': '心みだれて〜say it with flowers〜(生音)', + 'uploader_id': 'NjI1MjI2MjU', + 'description': 'やっぱりキーを下げて正解だった感じ。リベンジ成功ということで。', + 'uploader': '箱の「中の人」', + 'upload_date': '20210815', + 'view_count': 5, + 'like_count': 3, + 'track': '心みだれて〜say it with flowers〜(生音)', + 'artist': '小林明子', + } + }] diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 802907bd99..3dd56c65a2 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -310,6 +310,10 @@ from .dailymotion import ( DailymotionPlaylistIE, DailymotionUserIE, ) +from .damtomo import ( + DamtomoRecordIE, + DamtomoVideoIE, +) from .daum import ( DaumIE, DaumClipIE, From 7c37ff97d3b95444ece7e7da2da6f03293003df3 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 18 Sep 2021 16:21:38 +0530 Subject: [PATCH 107/641] Allow alternate fields in outtmpl Closes #899, #1004 --- README.md | 5 +++-- test/test_YoutubeDL.py | 6 ++++++ yt_dlp/YoutubeDL.py | 17 +++++++++++------ 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index c4f9968342..44766b76be 100644 --- a/README.md +++ b/README.md @@ -958,12 +958,13 @@ The field names themselves (the part inside the parenthesis) can also have some 1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) separator. You can also do python slicing using `:`. Eg: `%(tags.0)s`, `%(subtitles.en.-1.ext)s`, `%(id.3:7:-1)s`, `%(formats.:.format_id)s`. `%()s` refers to the entire infodict. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields 1. **Addition**: Addition and subtraction of numeric fields can be done using `+` and `-` respectively. Eg: `%(playlist_index+10)03d`, `%(n_entries+1-playlist_index)d` 1. **Date/time Formatting**: Date/time fields can be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it separated from the field name using a `>`. Eg: `%(duration>%H-%M-%S)s`, `%(upload_date>%Y-%m-%d)s`, `%(epoch-3600>%H-%M-%S)s` -1. **Default**: A default value can be specified for when the field is empty using a `|` seperator. This overrides `--output-na-template`. Eg: `%(uploader|Unknown)s` +1. **Alternatives**: Alternate fields can be specified seperated with a `,`. Eg: `%(release_date>%Y,upload_date>%Y|Unknown)s` +1. **Default**: A literal default value can be specified for when the field is empty using a `|` seperator. This overrides `--output-na-template`. Eg: `%(uploader|Unknown)s` 1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, `B`, `j`, `l`, `q` can be used for converting to **B**ytes, **j**son, a comma seperated **l**ist and a string **q**uoted for the terminal respectively To summarize, the general syntax for a field is: ``` -%(name[.keys][addition][>strf][|default])[flags][width][.precision][length]type +%(name[.keys][addition][>strf][,alternate][|default])[flags][width][.precision][length]type ``` Additionally, you can set different output templates for the various metadata files separately from the general output template by specifying the type of file followed by the template separated by a colon `:`. The different file types supported are `subtitle`, `thumbnail`, `description`, `annotation` (deprecated), `infojson`, `pl_thumbnail`, `pl_description`, `pl_infojson`, `chapter`. For example, `-o '%(title)s.%(ext)s' -o 'thumbnail:%(title)s\%(title)s.%(ext)s'` will put the thumbnails in a folder with the same name as the video. diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index e61492ec81..210bf441c8 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -790,6 +790,12 @@ class TestYoutubeDL(unittest.TestCase): test('%(formats.0.id.-1+id)f', '1235.000000') test('%(formats.0.id.-1+formats.1.id.-1)d', '3') + # Alternates + test('%(title,id)s', '1234') + test('%(width-100,height+20|def)d', '1100') + test('%(width-100,height+width|def)s', 'def') + test('%(timestamp-x>%H\\,%M\\,%S,timestamp>%H\\,%M\\,%S)s', '12,00,00') + # Laziness def gen(): yield from range(5) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index c53c7ec38e..50e902c53f 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -955,6 +955,7 @@ class YoutubeDL(object): (?P{field}) (?P(?:{math_op}{math_field})*) (?:>(?P.+?))? + (?P(?.*?))? $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE)) @@ -996,7 +997,7 @@ class YoutubeDL(object): operator = None # Datetime formatting if mdict['strf_format']: - value = strftime_or_none(value, mdict['strf_format']) + value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ',')) return value @@ -1012,12 +1013,16 @@ class YoutubeDL(object): return f'%{outer_mobj.group(0)}' key = outer_mobj.group('key') mobj = re.match(INTERNAL_FORMAT_RE, key) - if mobj is None: - value, default, mobj = None, na, {'fields': ''} - else: + initial_field = mobj.group('fields').split('.')[-1] if mobj else '' + value, default = None, na + while mobj: mobj = mobj.groupdict() - default = mobj['default'] if mobj['default'] is not None else na + default = mobj['default'] if mobj['default'] is not None else default value = get_value(mobj) + if value is None and mobj['alternate']: + mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:]) + else: + break fmt = outer_mobj.group('format') if fmt == 's' and value is not None and key in field_size_compat_map.keys(): @@ -1052,7 +1057,7 @@ class YoutubeDL(object): # So we convert it to repr first value, fmt = repr(value), str_fmt if fmt[-1] in 'csr': - value = sanitize(mobj['fields'].split('.')[-1], value) + value = sanitize(initial_field, value) key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format')) TMPL_DICT[key] = value From 7738bd32722154a26f70006e0fe586f40d06e606 Mon Sep 17 00:00:00 2001 From: Mohammad Khaled AbouElSherbini <50295916+MKSherbini@users.noreply.github.com> Date: Sat, 18 Sep 2021 13:33:06 +0200 Subject: [PATCH 108/641] [Oreilly] Handle new web url (#990) The change in URL is most likely a server side issue. But we can work around it by a simple substitution Authored by: MKSherbini --- yt_dlp/extractor/safari.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/safari.py b/yt_dlp/extractor/safari.py index c92e8849bd..fbbbc7e77a 100644 --- a/yt_dlp/extractor/safari.py +++ b/yt_dlp/extractor/safari.py @@ -193,7 +193,12 @@ class SafariApiIE(SafariBaseIE): part = self._download_json( url, '%s/%s' % (mobj.group('course_id'), mobj.group('part')), 'Downloading part JSON') - return self.url_result(part['web_url'], SafariIE.ie_key()) + web_url = part['web_url'] + if 'library/view' in web_url: + web_url = web_url.replace('library/view', 'videos') + natural_keys = part['natural_key'] + web_url = f'{web_url.rsplit("/")[0]}/{natural_keys[0]}-{natural_keys[1][:-5]}' + return self.url_result(web_url, SafariIE.ie_key()) class SafariCourseIE(SafariBaseIE): From c6af2dd8e5a4ee71e7378d7ad12395dce658f7b3 Mon Sep 17 00:00:00 2001 From: Nil Admirari <50202386+nihil-admirari@users.noreply.github.com> Date: Sun, 19 Sep 2021 03:08:50 +0000 Subject: [PATCH 109/641] [SponsorBlock] Improve merge algorithm (#999) Authored by: nihil-admirari --- test/test_postprocessors.py | 34 ++++++++++- yt_dlp/postprocessor/modify_chapters.py | 75 +++++++++++++------------ 2 files changed, 72 insertions(+), 37 deletions(-) diff --git a/test/test_postprocessors.py b/test/test_postprocessors.py index 7d13687696..090c7b47b0 100644 --- a/test/test_postprocessors.py +++ b/test/test_postprocessors.py @@ -461,11 +461,23 @@ class TestModifyChaptersPP(unittest.TestCase): self._remove_marked_arrange_sponsors_test_impl( chapters, self._chapters([2, 2.5], ['c1', 'c3']), cuts) + def test_remove_marked_arrange_sponsors_SingleTinyChapterIsPreserved(self): + cuts = [self._chapter(0.5, 2, remove=True)] + chapters = self._chapters([2], ['c']) + cuts + self._remove_marked_arrange_sponsors_test_impl( + chapters, self._chapters([0.5], ['c']), cuts) + + def test_remove_marked_arrange_sponsors_TinyChapterAtTheStartPrependedToTheNext(self): + cuts = [self._chapter(0.5, 2, remove=True)] + chapters = self._chapters([2, 4], ['c1', 'c2']) + cuts + self._remove_marked_arrange_sponsors_test_impl( + chapters, self._chapters([2.5], ['c2']), cuts) + def test_remove_marked_arrange_sponsors_TinyChaptersResultingFromSponsorOverlapAreIgnored(self): chapters = self._chapters([1, 3, 4], ['c1', 'c2', 'c3']) + [ self._sponsor_chapter(1.5, 2.5, 'sponsor')] self._remove_marked_arrange_sponsors_test_impl( - chapters, self._chapters([1.5, 3, 4], ['c1', '[SponsorBlock]: Sponsor', 'c3']), []) + chapters, self._chapters([1.5, 2.5, 4], ['c1', '[SponsorBlock]: Sponsor', 'c3']), []) def test_remove_marked_arrange_sponsors_TinySponsorsOverlapsAreIgnored(self): chapters = self._chapters([2, 3, 5], ['c1', 'c2', 'c3']) + [ @@ -476,6 +488,26 @@ class TestModifyChaptersPP(unittest.TestCase): chapters, self._chapters([1, 3, 4, 5], [ 'c1', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Unpaid/Self Promotion', 'c3']), []) + def test_remove_marked_arrange_sponsors_TinySponsorsPrependedToTheNextSponsor(self): + chapters = self._chapters([4], ['c']) + [ + self._sponsor_chapter(1.5, 2, 'sponsor'), + self._sponsor_chapter(2, 4, 'selfpromo') + ] + self._remove_marked_arrange_sponsors_test_impl( + chapters, self._chapters([1.5, 4], ['c', '[SponsorBlock]: Unpaid/Self Promotion']), []) + + def test_remove_marked_arrange_sponsors_SmallestSponsorInTheOverlapGetsNamed(self): + self._pp._sponsorblock_chapter_title = '[SponsorBlock]: %(name)s' + chapters = self._chapters([10], ['c']) + [ + self._sponsor_chapter(2, 8, 'sponsor'), + self._sponsor_chapter(4, 6, 'selfpromo') + ] + self._remove_marked_arrange_sponsors_test_impl( + chapters, self._chapters([2, 4, 6, 8, 10], [ + 'c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Unpaid/Self Promotion', + '[SponsorBlock]: Sponsor', 'c' + ]), []) + def test_make_concat_opts_CommonCase(self): sponsor_chapters = [self._chapter(1, 2, 's1'), self._chapter(10, 20, 's2')] expected = '''ffconcat version 1.0 diff --git a/yt_dlp/postprocessor/modify_chapters.py b/yt_dlp/postprocessor/modify_chapters.py index 9a7ba8effe..2871e16d51 100644 --- a/yt_dlp/postprocessor/modify_chapters.py +++ b/yt_dlp/postprocessor/modify_chapters.py @@ -15,7 +15,7 @@ from ..utils import ( ) -_TINY_SPONSOR_OVERLAP_DURATION = 1 +_TINY_CHAPTER_DURATION = 1 DEFAULT_SPONSORBLOCK_CHAPTER_TITLE = '[SponsorBlock]: %(category_names)l' @@ -50,7 +50,6 @@ class ModifyChaptersPP(FFmpegPostProcessor): if not info.get('__real_download'): raise PostProcessingError('Cannot cut video since the real and expected durations mismatch. ' 'Different chapters may have already been removed') - return [], info else: self.write_debug('Expected and actual durations mismatch') @@ -145,38 +144,15 @@ class ModifyChaptersPP(FFmpegPostProcessor): new_chapters = [] - def chapter_length(c): - return c['end_time'] - c['start_time'] - - def original_uncut_chapter(c): - return '_was_cut' not in c and '_categories' not in c - def append_chapter(c): assert 'remove' not in c - length = chapter_length(c) - excess_duration(c) + length = c['end_time'] - c['start_time'] - excess_duration(c) # Chapter is completely covered by cuts or sponsors. if length <= 0: return start = new_chapters[-1]['end_time'] if new_chapters else 0 c.update(start_time=start, end_time=start + length) - # Append without checking for tininess to prevent having - # a completely empty chapter list. - if not new_chapters: - new_chapters.append(c) - return - old_c = new_chapters[-1] - # Merge with the previous if the chapter is tiny. - # Only tiny chapters resulting from a cut can be skipped. - # Chapters that were already tiny in the original list will be preserved. - if not original_uncut_chapter(c) and length < _TINY_SPONSOR_OVERLAP_DURATION: - old_c['end_time'] = c['end_time'] - # Previous tiny chapter was appended for the sake of preventing an empty chapter list. - # Replace it with the current one. - elif not original_uncut_chapter(old_c) and chapter_length(old_c) < _TINY_SPONSOR_OVERLAP_DURATION: - c['start_time'] = old_c['start_time'] - new_chapters[-1] = c - else: - new_chapters.append(c) + new_chapters.append(c) # Turn into a priority queue, index is a tie breaker. # Plain stack sorted by start_time is not enough: after splitting the chapter, @@ -275,10 +251,36 @@ class ModifyChaptersPP(FFmpegPostProcessor): append_chapter(cur_chapter) cur_i, cur_chapter = i, c (append_chapter if 'remove' not in cur_chapter else append_cut)(cur_chapter) + return self._remove_tiny_rename_sponsors(new_chapters), cuts + + def _remove_tiny_rename_sponsors(self, chapters): + new_chapters = [] + for i, c in enumerate(chapters): + # Merge with the previous/next if the chapter is tiny. + # Only tiny chapters resulting from a cut can be skipped. + # Chapters that were already tiny in the original list will be preserved. + if (('_was_cut' in c or '_categories' in c) + and c['end_time'] - c['start_time'] < _TINY_CHAPTER_DURATION): + if not new_chapters: + # Prepend tiny chapter to the next one if possible. + if i < len(chapters) - 1: + chapters[i + 1]['start_time'] = c['start_time'] + continue + else: + old_c = new_chapters[-1] + if i < len(chapters) - 1: + next_c = chapters[i + 1] + # Not a typo: key names in old_c and next_c are really different. + prev_is_sponsor = 'categories' in old_c + next_is_sponsor = '_categories' in next_c + # Preferentially prepend tiny normals to normals and sponsors to sponsors. + if (('_categories' not in c and prev_is_sponsor and not next_is_sponsor) + or ('_categories' in c and not prev_is_sponsor and next_is_sponsor)): + next_c['start_time'] = c['start_time'] + continue + old_c['end_time'] = c['end_time'] + continue - i = -1 - for c in new_chapters.copy(): - i += 1 c.pop('_was_cut', None) cats = c.pop('_categories', None) if cats: @@ -292,12 +294,13 @@ class ModifyChaptersPP(FFmpegPostProcessor): }) outtmpl, tmpl_dict = self._downloader.prepare_outtmpl(self._sponsorblock_chapter_title, c) c['title'] = self._downloader.escape_outtmpl(outtmpl) % tmpl_dict - if i > 0 and c['title'] == new_chapters[i - 1]['title']: - new_chapters[i - 1]['end_time'] = c['end_time'] - new_chapters.pop(i) - i -= 1 - - return new_chapters, cuts + # Merge identically named sponsors. + if (new_chapters and 'categories' in new_chapters[-1] + and new_chapters[-1]['title'] == c['title']): + new_chapters[-1]['end_time'] = c['end_time'] + continue + new_chapters.append(c) + return new_chapters def remove_chapters(self, filename, ranges_to_cut, concat_opts, force_keyframes=False): in_file = filename From f9cc0161e67fcf1471178b43649ad8ba6b508c93 Mon Sep 17 00:00:00 2001 From: DigitalDJ Date: Sun, 19 Sep 2021 18:07:57 +0930 Subject: [PATCH 110/641] [extractor] Fix root-relative URLs in MPD (#1006) Authored by: DigitalDJ --- yt_dlp/extractor/common.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index e796842312..f6ca686a3e 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -2622,8 +2622,10 @@ class InfoExtractor(object): base_url = base_url_e.text + base_url if re.match(r'^https?://', base_url): break - if mpd_base_url and not re.match(r'^https?://', base_url): - if not mpd_base_url.endswith('/') and not base_url.startswith('/'): + if mpd_base_url and base_url.startswith('/'): + base_url = compat_urlparse.urljoin(mpd_base_url, base_url) + elif mpd_base_url and not re.match(r'^https?://', base_url): + if not mpd_base_url.endswith('/'): mpd_base_url += '/' base_url = mpd_base_url + base_url representation_id = representation_attrib.get('id') From 9c1c3ec016d61c346dc465cee32090df1a40c942 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 19 Sep 2021 14:16:11 +0530 Subject: [PATCH 111/641] [Oreilly] Bugfix for 7738bd32722154a26f70006e0fe586f40d06e606 --- yt_dlp/extractor/safari.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/safari.py b/yt_dlp/extractor/safari.py index fbbbc7e77a..cca4464ca8 100644 --- a/yt_dlp/extractor/safari.py +++ b/yt_dlp/extractor/safari.py @@ -197,7 +197,7 @@ class SafariApiIE(SafariBaseIE): if 'library/view' in web_url: web_url = web_url.replace('library/view', 'videos') natural_keys = part['natural_key'] - web_url = f'{web_url.rsplit("/")[0]}/{natural_keys[0]}-{natural_keys[1][:-5]}' + web_url = f'{web_url.rsplit("/", 1)[0]}/{natural_keys[0]}-{natural_keys[1][:-5]}' return self.url_result(web_url, SafariIE.ie_key()) From 57aa7b8511165c48a6e9c33af820bf9ca459d149 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 19 Sep 2021 14:20:20 +0530 Subject: [PATCH 112/641] [hls] Byterange + AES128 is supported by native downloader --- yt_dlp/downloader/hls.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index bea2866048..e0dc1def70 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -56,8 +56,6 @@ class HlsFD(FragmentFD): def check_results(): yield not info_dict.get('is_live') - is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest - yield not (is_aes128_enc and r'#EXT-X-BYTERANGE' in manifest) for feature in UNSUPPORTED_FEATURES: yield not re.search(feature, manifest) return all(check_results()) From 47626219253f18dbaf578b16f2f4499705e096de Mon Sep 17 00:00:00 2001 From: nyuszika7h Date: Sun, 19 Sep 2021 13:37:50 +0200 Subject: [PATCH 113/641] [videa] Fix some extraction errors (#1028) Authored by: nyuszika7h --- yt_dlp/extractor/videa.py | 49 +++++++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 15 deletions(-) diff --git a/yt_dlp/extractor/videa.py b/yt_dlp/extractor/videa.py index da0212bb27..512ade7af2 100644 --- a/yt_dlp/extractor/videa.py +++ b/yt_dlp/extractor/videa.py @@ -47,10 +47,24 @@ class VideaIE(InfoExtractor): }, }, { 'url': 'http://videa.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH', - 'only_matching': True, + 'md5': 'd57ccd8812c7fd491d33b1eab8c99975', + 'info_dict': { + 'id': 'jAHDWfWSJH5XuFhH', + 'ext': 'mp4', + 'title': 'Supercars előzés', + 'thumbnail': r're:^https?://.*', + 'duration': 64, + }, }, { 'url': 'http://videa.hu/player?v=8YfIAjxwWGwT8HVQ', - 'only_matching': True, + 'md5': '97a7af41faeaffd9f1fc864a7c7e7603', + 'info_dict': { + 'id': '8YfIAjxwWGwT8HVQ', + 'ext': 'mp4', + 'title': 'Az őrült kígyász 285 kígyót enged szabadon', + 'thumbnail': r're:^https?://.*', + 'duration': 21, + }, }, { 'url': 'http://videa.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1', 'only_matching': True, @@ -100,10 +114,14 @@ class VideaIE(InfoExtractor): video_page = self._download_webpage(url, video_id) - player_url = self._search_regex( - r' Date: Sun, 19 Sep 2021 17:45:41 +0530 Subject: [PATCH 114/641] [utils] Improve `extract_timezone` Code taken from: https://github.com/ytdl-org/youtube-dl/pull/29845 Fixes: https://github.com/ytdl-org/youtube-dl/issues/29948 Authored by: dirkf --- yt_dlp/utils.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 983ca6cede..4c0ac5a254 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -3034,8 +3034,16 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler): def extract_timezone(date_str): m = re.search( - r'^.{8,}?(?PZ$| ?(?P\+|-)(?P[0-9]{2}):?(?P[0-9]{2})$)', - date_str) + r'''(?x) + ^.{8,}? # >=8 char non-TZ prefix, if present + (?PZ| # just the UTC Z, or + (?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or + (?= 4 alpha or 2 digits + [ ]? # optional space + (?P\+|-) # +/- + (?P[0-9]{2}):?(?P[0-9]{2}) # hh[:]mm + $) + ''', date_str) if not m: timezone = datetime.timedelta() else: From a63d9bd0b00c2c6b8d5f1a90fd783780ceda4023 Mon Sep 17 00:00:00 2001 From: Yuan Chao Date: Sun, 19 Sep 2021 08:18:22 -0400 Subject: [PATCH 115/641] [CGTN] Add extractor (#981) Authored by: chao813 --- yt_dlp/extractor/cgtn.py | 64 ++++++++++++++++++++++++++++++++++ yt_dlp/extractor/extractors.py | 1 + yt_dlp/utils.py | 1 + 3 files changed, 66 insertions(+) create mode 100644 yt_dlp/extractor/cgtn.py diff --git a/yt_dlp/extractor/cgtn.py b/yt_dlp/extractor/cgtn.py new file mode 100644 index 0000000000..89f173887e --- /dev/null +++ b/yt_dlp/extractor/cgtn.py @@ -0,0 +1,64 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + try_get, + unified_timestamp, +) + + +class CGTNIE(InfoExtractor): + _VALID_URL = r'https?://news\.cgtn\.com/news/[0-9]{4}-[0-9]{2}-[0-9]{2}/[a-zA-Z0-9-]+-(?P[a-zA-Z0-9-]+)/index\.html' + _TESTS = [ + { + 'url': 'https://news.cgtn.com/news/2021-03-09/Up-and-Out-of-Poverty-Ep-1-A-solemn-promise-YuOUaOzGQU/index.html', + 'info_dict': { + 'id': 'YuOUaOzGQU', + 'ext': 'mp4', + 'title': 'Up and Out of Poverty Ep. 1: A solemn promise', + 'thumbnail': r're:^https?://.*\.jpg$', + 'timestamp': 1615295940, + 'upload_date': '20210309', + }, + 'params': { + 'skip_download': True + } + }, { + 'url': 'https://news.cgtn.com/news/2021-06-06/China-Indonesia-vow-to-further-deepen-maritime-cooperation-10REvJCewCY/index.html', + 'info_dict': { + 'id': '10REvJCewCY', + 'ext': 'mp4', + 'title': 'China, Indonesia vow to further deepen maritime cooperation', + 'thumbnail': r're:^https?://.*\.png$', + 'description': 'China and Indonesia vowed to upgrade their cooperation into the maritime sector and also for political security, economy, and cultural and people-to-people exchanges.', + 'author': 'CGTN', + 'category': 'China', + 'timestamp': 1622950200, + 'upload_date': '20210606', + }, + 'params': { + 'skip_download': False + } + } + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + download_url = self._html_search_regex(r'data-video ="(?P.+m3u8)"', webpage, 'download_url') + datetime_str = self._html_search_regex(r'\s*(.+?)\s*', webpage, 'datetime_str', fatal=False) + + return { + 'id': video_id, + 'title': self._og_search_title(webpage), + 'description': self._og_search_description(webpage, default=None), + 'thumbnail': self._og_search_thumbnail(webpage), + 'formats': self._extract_m3u8_formats(download_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls'), + 'category': self._html_search_regex(r'\s*(.+?)\s*', + webpage, 'category', fatal=False), + 'author': self._html_search_regex(r'
\s*(.+?)\s*
', + webpage, 'author', default=None, fatal=False), + 'timestamp': try_get(unified_timestamp(datetime_str), lambda x: x - 8 * 3600), + } diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 3dd56c65a2..ddae1d7cc9 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -239,6 +239,7 @@ from .ceskatelevize import ( CeskaTelevizeIE, CeskaTelevizePoradyIE, ) +from .cgtn import CGTNIE from .channel9 import Channel9IE from .charlierose import CharlieRoseIE from .chaturbate import ChaturbateIE diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 4c0ac5a254..de0213b142 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -1762,6 +1762,7 @@ DATE_FORMATS = ( '%b %d %Y at %H:%M:%S', '%B %d %Y at %H:%M', '%B %d %Y at %H:%M:%S', + '%H:%M %d-%b-%Y', ) DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS) From 09906f554d485a30b21e56c485718ea9c55db452 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81kos=20S=C3=BClyi?= Date: Sun, 19 Sep 2021 14:22:31 +0200 Subject: [PATCH 116/641] [aes] Add `aes_gcm_decrypt_and_verify` (#1020) Authored by: sulyi, pukkandan --- test/test_aes.py | 49 ++++++++-- test/test_cookies.py | 2 - yt_dlp/aes.py | 209 ++++++++++++++++++++++++++++++++++--------- yt_dlp/cookies.py | 23 ++--- 4 files changed, 214 insertions(+), 69 deletions(-) diff --git a/test/test_aes.py b/test/test_aes.py index d2e51af29f..46db59e57b 100644 --- a/test/test_aes.py +++ b/test/test_aes.py @@ -7,7 +7,19 @@ import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from yt_dlp.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_cbc_encrypt, aes_decrypt_text +from yt_dlp.aes import ( + aes_decrypt, + aes_encrypt, + aes_cbc_decrypt, + aes_cbc_decrypt_bytes, + aes_cbc_encrypt, + aes_ctr_decrypt, + aes_ctr_encrypt, + aes_gcm_decrypt_and_verify, + aes_gcm_decrypt_and_verify_bytes, + aes_decrypt_text +) +from yt_dlp.compat import compat_pycrypto_AES from yt_dlp.utils import bytes_to_intlist, intlist_to_bytes import base64 @@ -27,18 +39,43 @@ class TestAES(unittest.TestCase): self.assertEqual(decrypted, msg) def test_cbc_decrypt(self): - data = bytes_to_intlist( - b"\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd" - ) - decrypted = intlist_to_bytes(aes_cbc_decrypt(data, self.key, self.iv)) + data = b'\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6\x27\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd' + decrypted = intlist_to_bytes(aes_cbc_decrypt(bytes_to_intlist(data), self.key, self.iv)) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) + if compat_pycrypto_AES: + decrypted = aes_cbc_decrypt_bytes(data, intlist_to_bytes(self.key), intlist_to_bytes(self.iv)) + self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) def test_cbc_encrypt(self): data = bytes_to_intlist(self.secret_msg) encrypted = intlist_to_bytes(aes_cbc_encrypt(data, self.key, self.iv)) self.assertEqual( encrypted, - b"\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd") + b'\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6\'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd') + + def test_ctr_decrypt(self): + data = bytes_to_intlist(b'\x03\xc7\xdd\xd4\x8e\xb3\xbc\x1a*O\xdc1\x12+8Aio\xd1z\xb5#\xaf\x08') + decrypted = intlist_to_bytes(aes_ctr_decrypt(data, self.key, self.iv)) + self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) + + def test_ctr_encrypt(self): + data = bytes_to_intlist(self.secret_msg) + encrypted = intlist_to_bytes(aes_ctr_encrypt(data, self.key, self.iv)) + self.assertEqual( + encrypted, + b'\x03\xc7\xdd\xd4\x8e\xb3\xbc\x1a*O\xdc1\x12+8Aio\xd1z\xb5#\xaf\x08') + + def test_gcm_decrypt(self): + data = b'\x159Y\xcf5eud\x90\x9c\x85&]\x14\x1d\x0f.\x08\xb4T\xe4/\x17\xbd' + authentication_tag = b'\xe8&I\x80rI\x07\x9d}YWuU@:e' + + decrypted = intlist_to_bytes(aes_gcm_decrypt_and_verify( + bytes_to_intlist(data), self.key, bytes_to_intlist(authentication_tag), self.iv[:12])) + self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) + if compat_pycrypto_AES: + decrypted = aes_gcm_decrypt_and_verify_bytes( + data, intlist_to_bytes(self.key), authentication_tag, intlist_to_bytes(self.iv[:12])) + self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) def test_decrypt_text(self): password = intlist_to_bytes(self.key).decode('utf-8') diff --git a/test/test_cookies.py b/test/test_cookies.py index 6053ebb4eb..15afb66272 100644 --- a/test/test_cookies.py +++ b/test/test_cookies.py @@ -2,7 +2,6 @@ import unittest from datetime import datetime, timezone from yt_dlp import cookies -from yt_dlp.compat import compat_pycrypto_AES from yt_dlp.cookies import ( LinuxChromeCookieDecryptor, MacChromeCookieDecryptor, @@ -53,7 +52,6 @@ class TestCookies(unittest.TestCase): decryptor = LinuxChromeCookieDecryptor('Chrome', YDLLogger()) self.assertEqual(decryptor.decrypt(encrypted_value), value) - @unittest.skipIf(not compat_pycrypto_AES, 'cryptography library not available') def test_chrome_cookie_decryptor_windows_v10(self): with MonkeyPatch(cookies, { '_get_windows_v10_key': lambda *args, **kwargs: b'Y\xef\xad\xad\xeerp\xf0Y\xe6\x9b\x12\xc2>= 1 + data_shifted.append(n) + + return data_shifted + + def inc(data): data = data[:] # copy for i in range(len(data) - 1, -1, -1): @@ -370,4 +445,50 @@ def inc(data): return data -__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text'] +def block_product(block_x, block_y): + # NIST SP 800-38D, Algorithm 1 + + if len(block_x) != BLOCK_SIZE_BYTES or len(block_y) != BLOCK_SIZE_BYTES: + raise ValueError("Length of blocks need to be %d bytes" % BLOCK_SIZE_BYTES) + + block_r = [0xE1] + [0] * (BLOCK_SIZE_BYTES - 1) + block_v = block_y[:] + block_z = [0] * BLOCK_SIZE_BYTES + + for i in block_x: + for bit in range(7, -1, -1): + if i & (1 << bit): + block_z = xor(block_z, block_v) + + do_xor = block_v[-1] & 1 + block_v = shift_block(block_v) + if do_xor: + block_v = xor(block_v, block_r) + + return block_z + + +def ghash(subkey, data): + # NIST SP 800-38D, Algorithm 2 + + if len(data) % BLOCK_SIZE_BYTES: + raise ValueError("Length of data should be %d bytes" % BLOCK_SIZE_BYTES) + + last_y = [0] * BLOCK_SIZE_BYTES + for i in range(0, len(data), BLOCK_SIZE_BYTES): + block = data[i : i + BLOCK_SIZE_BYTES] # noqa: E203 + last_y = block_product(xor(last_y, block), subkey) + + return last_y + + +__all__ = [ + 'aes_ctr_decrypt', + 'aes_cbc_decrypt', + 'aes_cbc_decrypt_bytes', + 'aes_decrypt_text', + 'aes_encrypt', + 'aes_gcm_decrypt_and_verify', + 'aes_gcm_decrypt_and_verify_bytes', + 'key_expansion' +] diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index 4f582f4e1e..1409e6799b 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -9,17 +9,14 @@ import tempfile from datetime import datetime, timedelta, timezone from hashlib import pbkdf2_hmac -from yt_dlp.aes import aes_cbc_decrypt -from yt_dlp.compat import ( +from .aes import aes_cbc_decrypt_bytes, aes_gcm_decrypt_and_verify_bytes +from .compat import ( compat_b64decode, compat_cookiejar_Cookie, - compat_pycrypto_AES ) -from yt_dlp.utils import ( +from .utils import ( bug_reports_message, - bytes_to_intlist, expand_path, - intlist_to_bytes, process_communicate_or_kill, YoutubeDLCookieJar, ) @@ -395,11 +392,6 @@ class WindowsChromeCookieDecryptor(ChromeCookieDecryptor): if self._v10_key is None: self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True) return None - elif not compat_pycrypto_AES: - self._logger.warning('cannot decrypt cookie as the `pycryptodome` module is not installed. ' - 'Please install by running `python3 -m pip install pycryptodome`', - only_once=True) - return None # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc # kNonceLength @@ -643,21 +635,18 @@ def pbkdf2_sha1(password, salt, iterations, key_length): def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16): - plaintext = aes_cbc_decrypt(bytes_to_intlist(ciphertext), - bytes_to_intlist(key), - bytes_to_intlist(initialization_vector)) + plaintext = aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector) padding_length = plaintext[-1] try: - return intlist_to_bytes(plaintext[:-padding_length]).decode('utf-8') + return plaintext[:-padding_length].decode('utf-8') except UnicodeDecodeError: logger.warning('failed to decrypt cookie because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True) return None def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger): - cipher = compat_pycrypto_AES.new(key, compat_pycrypto_AES.MODE_GCM, nonce) try: - plaintext = cipher.decrypt_and_verify(ciphertext, authentication_tag) + plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce) except ValueError: logger.warning('failed to decrypt cookie because the MAC check failed. Possibly the key is wrong?', only_once=True) return None From 8f8e8eba2408df78d08a601af037ed9bf589ee4b Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Sun, 19 Sep 2021 12:26:29 +0000 Subject: [PATCH 117/641] [Nuvid] Fix extractor (#1022) Fixes: https://github.com/ytdl-org/youtube-dl/issues/29886 Authored by: u-spec-png --- yt_dlp/extractor/nuvid.py | 86 ++++++++++++++++++++------------------- 1 file changed, 44 insertions(+), 42 deletions(-) diff --git a/yt_dlp/extractor/nuvid.py b/yt_dlp/extractor/nuvid.py index ab6bfcd7f4..7487824f98 100644 --- a/yt_dlp/extractor/nuvid.py +++ b/yt_dlp/extractor/nuvid.py @@ -1,71 +1,73 @@ +# coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import ( parse_duration, + int_or_none, + try_get, ) class NuvidIE(InfoExtractor): _VALID_URL = r'https?://(?:www|m)\.nuvid\.com/video/(?P[0-9]+)' - _TEST = { - 'url': 'http://m.nuvid.com/video/1310741/', - 'md5': 'eab207b7ac4fccfb4e23c86201f11277', + _TESTS = [{ + 'url': 'https://www.nuvid.com/video/6513023/italian-babe', + 'md5': '772d2f8288f3d3c5c45f7a41761c7844', 'info_dict': { - 'id': '1310741', + 'id': '6513023', 'ext': 'mp4', - 'title': 'Horny babes show their awesome bodeis and', - 'duration': 129, + 'title': 'italian babe', + 'duration': 321.0, 'age_limit': 18, } - } + }, { + 'url': 'https://m.nuvid.com/video/6523263', + 'info_dict': { + 'id': '6523263', + 'ext': 'mp4', + 'age_limit': 18, + 'title': 'Slut brunette college student anal dorm', + } + }] def _real_extract(self, url): video_id = self._match_id(url) - page_url = 'http://m.nuvid.com/video/%s' % video_id - webpage = self._download_webpage( - page_url, video_id, 'Downloading video page') - # When dwnld_speed exists and has a value larger than the MP4 file's - # bitrate, Nuvid returns the MP4 URL - # It's unit is 100bytes/millisecond, see mobile-nuvid-min.js for the algorithm - self._set_cookie('nuvid.com', 'dwnld_speed', '10.0') - mp4_webpage = self._download_webpage( - page_url, video_id, 'Downloading video page for MP4 format') + qualities = { + 'lq': '360p', + 'hq': '720p', + } - html5_video_re = r'(?s)<(?:video|audio)[^<]*(?:>.*?]*)?\s+src=["\'](.*?)["\']', - video_url = self._html_search_regex(html5_video_re, webpage, video_id) - mp4_video_url = self._html_search_regex(html5_video_re, mp4_webpage, video_id) - formats = [{ - 'url': video_url, - }] - if mp4_video_url != video_url: - formats.append({ - 'url': mp4_video_url, + json_url = f'https://www.nuvid.com/player_config_json/?vid={video_id}&aid=0&domain_id=0&embed=0&check_speed=0' + video_data = self._download_json( + json_url, video_id, headers={ + 'Accept': 'application/json, text/javascript, */*; q = 0.01', + 'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8', }) - title = self._html_search_regex( - [r'', - r'
\s*]*>([^<]+)', - r']+class="title_thumb">([^<]+)'], webpage, 'title').strip() - thumbnails = [ - { - 'url': thumb_url, - } for thumb_url in re.findall(r'', webpage) - ] - thumbnail = thumbnails[0]['url'] if thumbnails else None - duration = parse_duration(self._html_search_regex( - [r'\s*(\d{2}:\d{2})', - r']+class="view_time">([^<]+)'], webpage, 'duration', fatal=False)) + formats = [{ + 'url': source, + 'format_id': qualities.get(quality), + 'height': int_or_none(qualities.get(quality)[:-1]), + } for quality, source in video_data.get('files').items() if source] + + self._check_formats(formats, video_id) + self._sort_formats(formats) + + title = video_data.get('title') + thumbnail_base_url = try_get(video_data, lambda x: x['thumbs']['url']) + thumbnail_extension = try_get(video_data, lambda x: x['thumbs']['extension']) + thumbnail_id = self._search_regex( + r'/media/videos/tmb/6523263/preview/(/d+)' + thumbnail_extension, video_data.get('poster', ''), 'thumbnail id', default=19) + thumbnail = f'{thumbnail_base_url}player/{thumbnail_id}{thumbnail_extension}' + duration = parse_duration(video_data.get('duration') or video_data.get('duration_format')) return { 'id': video_id, + 'formats': formats, 'title': title, - 'thumbnails': thumbnails, 'thumbnail': thumbnail, 'duration': duration, 'age_limit': 18, - 'formats': formats, } From 1b629e1b4c93753e878d59f2c5780e9e814788c1 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 19 Sep 2021 19:39:01 +0530 Subject: [PATCH 118/641] [test/cookies] Improve logging --- test/test_cookies.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/test/test_cookies.py b/test/test_cookies.py index 15afb66272..7d509ebe85 100644 --- a/test/test_cookies.py +++ b/test/test_cookies.py @@ -6,12 +6,25 @@ from yt_dlp.cookies import ( LinuxChromeCookieDecryptor, MacChromeCookieDecryptor, WindowsChromeCookieDecryptor, - YDLLogger, parse_safari_cookies, pbkdf2_sha1, ) +class Logger: + def debug(self, message): + print(f'[verbose] {message}') + + def info(self, message): + print(message) + + def warning(self, message, only_once=False): + self.error(message) + + def error(self, message): + raise Exception(message) + + class MonkeyPatch: def __init__(self, module, temporary_values): self._module = module @@ -41,7 +54,7 @@ class TestCookies(unittest.TestCase): with MonkeyPatch(cookies, {'_get_linux_keyring_password': lambda *args, **kwargs: b''}): encrypted_value = b'v10\xccW%\xcd\xe6\xe6\x9fM" \xa7\xb0\xca\xe4\x07\xd6' value = 'USD' - decryptor = LinuxChromeCookieDecryptor('Chrome', YDLLogger()) + decryptor = LinuxChromeCookieDecryptor('Chrome', Logger()) self.assertEqual(decryptor.decrypt(encrypted_value), value) def test_chrome_cookie_decryptor_linux_v11(self): @@ -49,7 +62,7 @@ class TestCookies(unittest.TestCase): 'KEYRING_AVAILABLE': True}): encrypted_value = b'v11#\x81\x10>`w\x8f)\xc0\xb2\xc1\r\xf4\x1al\xdd\x93\xfd\xf8\xf8N\xf2\xa9\x83\xf1\xe9o\x0elVQd' value = 'tz=Europe.London' - decryptor = LinuxChromeCookieDecryptor('Chrome', YDLLogger()) + decryptor = LinuxChromeCookieDecryptor('Chrome', Logger()) self.assertEqual(decryptor.decrypt(encrypted_value), value) def test_chrome_cookie_decryptor_windows_v10(self): @@ -58,14 +71,14 @@ class TestCookies(unittest.TestCase): }): encrypted_value = b'v10T\xb8\xf3\xb8\x01\xa7TtcV\xfc\x88\xb8\xb8\xef\x05\xb5\xfd\x18\xc90\x009\xab\xb1\x893\x85)\x87\xe1\xa9-\xa3\xad=' value = '32101439' - decryptor = WindowsChromeCookieDecryptor('', YDLLogger()) + decryptor = WindowsChromeCookieDecryptor('', Logger()) self.assertEqual(decryptor.decrypt(encrypted_value), value) def test_chrome_cookie_decryptor_mac_v10(self): with MonkeyPatch(cookies, {'_get_mac_keyring_password': lambda *args, **kwargs: b'6eIDUdtKAacvlHwBVwvg/Q=='}): encrypted_value = b'v10\xb3\xbe\xad\xa1[\x9fC\xa1\x98\xe0\x9a\x01\xd9\xcf\xbfc' value = '2021-06-01-22' - decryptor = MacChromeCookieDecryptor('', YDLLogger()) + decryptor = MacChromeCookieDecryptor('', Logger()) self.assertEqual(decryptor.decrypt(encrypted_value), value) def test_safari_cookie_parsing(self): From 3cd786dbd7f84c25743ba8d8f8a1a95a4e18491c Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 19 Sep 2021 19:37:47 +0530 Subject: [PATCH 119/641] [youtube] Warn when trying to download clips --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/youtube.py | 12 +++++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index ddae1d7cc9..6cafa82a2d 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1812,6 +1812,7 @@ from .yourporn import YourPornIE from .yourupload import YourUploadIE from .youtube import ( YoutubeIE, + YoutubeClipIE, YoutubeFavouritesIE, YoutubeHistoryIE, YoutubeTabIE, diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 1549c36dfe..eb69b88a3a 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -246,7 +246,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): """Provide base functions for Youtube extractors""" _RESERVED_NAMES = ( - r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|' + r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|' r'shorts|movies|results|shared|hashtag|trending|feed|feeds|' r'browse|oembed|get_video_info|iframe_api|s/player|' r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout') @@ -4727,6 +4727,16 @@ class YoutubeTruncatedURLIE(InfoExtractor): expected=True) +class YoutubeClipIE(InfoExtractor): + IE_NAME = 'youtube:clip' + IE_DESC = False # Do not list + _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/' + + def _real_extract(self, url): + self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead') + return self.url_result(url, 'Generic') + + class YoutubeTruncatedIDIE(InfoExtractor): IE_NAME = 'youtube:truncated_id' IE_DESC = False # Do not list From cb2ec90e919e7ad99a8db1ce87f188285130a2ec Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Sun, 19 Sep 2021 17:47:41 +0000 Subject: [PATCH 120/641] [Peertube] Add channel extractor (#1023) Authored by: u-spec-png --- yt_dlp/extractor/peertube.py | 85 ++++++++++++++++++++++++------------ 1 file changed, 58 insertions(+), 27 deletions(-) diff --git a/yt_dlp/extractor/peertube.py b/yt_dlp/extractor/peertube.py index b4f57a9990..1e22f24e3f 100644 --- a/yt_dlp/extractor/peertube.py +++ b/yt_dlp/extractor/peertube.py @@ -1297,13 +1297,15 @@ class PeerTubeIE(InfoExtractor): class PeerTubePlaylistIE(InfoExtractor): IE_NAME = 'PeerTube:Playlist' + _TYPES = { + 'a': 'accounts', + 'c': 'video-channels', + 'w/p': 'video-playlists', + } _VALID_URL = r'''(?x) - (?: - https?://(?P%s)/w/p/ - ) - (?P%s) - ''' % (PeerTubeIE._INSTANCES_RE, PeerTubeIE._UUID_RE) - _API_BASE = 'https://%s/api/v1/video-playlists/%s%s' + https?://(?P%s)/(?P(?:%s))/ + (?P[^/]+) + ''' % (PeerTubeIE._INSTANCES_RE, '|'.join(_TYPES.keys())) _TESTS = [{ 'url': 'https://peertube.tux.ovh/w/p/3af94cba-95e8-4b74-b37a-807ab6d82526', 'info_dict': { @@ -1331,41 +1333,70 @@ class PeerTubePlaylistIE(InfoExtractor): 'timestamp': 1599676222, }, 'playlist_mincount': 9, + }, { + 'url': 'https://peertube2.cpy.re/a/chocobozzz/videos', + 'info_dict': { + 'id': 'chocobozzz', + 'timestamp': 1553874564, + 'title': 'chocobozzz', + }, + 'playlist_mincount': 2, + }, { + 'url': 'https://framatube.org/c/bf54d359-cfad-4935-9d45-9d6be93f63e8/videos', + 'info_dict': { + 'id': 'bf54d359-cfad-4935-9d45-9d6be93f63e8', + 'timestamp': 1519917377, + 'title': 'Les vidéos de Framasoft', + }, + 'playlist_mincount': 345, + }, { + 'url': 'https://peertube2.cpy.re/c/blender_open_movies@video.blender.org/videos', + 'info_dict': { + 'id': 'blender_open_movies@video.blender.org', + 'timestamp': 1542287810, + 'title': 'Official Blender Open Movies', + }, + 'playlist_mincount': 11, }] + _API_BASE = 'https://%s/api/v1/%s/%s%s' _PAGE_SIZE = 30 - def _call_api(self, host, uuid, path, note=None, errnote=None, fatal=True): + def call_api(self, host, name, path, base, **kwargs): return self._download_json( - self._API_BASE % (host, uuid, path), uuid, - note=note, errnote=errnote, fatal=fatal) + self._API_BASE % (host, base, name, path), name, **kwargs) - def _fetch_page(self, host, uuid, page): + def fetch_page(self, host, id, type, page): page += 1 - video_data = self._call_api( - host, uuid, f'/videos?sort=-createdAt&start={self._PAGE_SIZE * (page - 1)}&count={self._PAGE_SIZE}', - note=f'Downloading page {page}').get('data', []) + video_data = self.call_api( + host, id, + f'/videos?sort=-createdAt&start={self._PAGE_SIZE * (page - 1)}&count={self._PAGE_SIZE}&nsfw=both', + type, note=f'Downloading page {page}').get('data', []) for video in video_data: - shortUUID = try_get(video, lambda x: x['video']['shortUUID']) - video_title = try_get(video, lambda x: x['video']['name']) + shortUUID = video.get('shortUUID') or try_get(video, lambda x: x['video']['shortUUID']) + video_title = video.get('name') or try_get(video, lambda x: x['video']['name']) yield self.url_result( f'https://{host}/w/{shortUUID}', PeerTubeIE.ie_key(), video_id=shortUUID, video_title=video_title) - def _real_extract(self, url): - host, playlist_id = self._match_valid_url(url).group('host', 'id') - playlist_info = self._call_api(host, playlist_id, '', note='Downloading playlist information', fatal=False) + def _extract_playlist(self, host, type, id): + info = self.call_api(host, id, '', type, note='Downloading playlist information', fatal=False) - playlist_title = playlist_info.get('displayName') - playlist_description = playlist_info.get('description') - playlist_timestamp = unified_timestamp(playlist_info.get('createdAt')) - channel = try_get(playlist_info, lambda x: x['ownerAccount']['name']) - channel_id = try_get(playlist_info, lambda x: x['ownerAccount']['id']) - thumbnail = playlist_info.get('thumbnailPath') - thumbnail = f'https://{host}{thumbnail}' + playlist_title = info.get('displayName') + playlist_description = info.get('description') + playlist_timestamp = unified_timestamp(info.get('createdAt')) + channel = try_get(info, lambda x: x['ownerAccount']['name']) or info.get('displayName') + channel_id = try_get(info, lambda x: x['ownerAccount']['id']) or info.get('id') + thumbnail = info.get('thumbnailPath') + thumbnail = f'https://{host}{thumbnail}' if thumbnail else None entries = OnDemandPagedList(functools.partial( - self._fetch_page, host, playlist_id), self._PAGE_SIZE) + self.fetch_page, host, id, type), self._PAGE_SIZE) return self.playlist_result( - entries, playlist_id, playlist_title, playlist_description, + entries, id, playlist_title, playlist_description, timestamp=playlist_timestamp, channel=channel, channel_id=channel_id, thumbnail=thumbnail) + + def _real_extract(self, url): + type, host, id = self._match_valid_url(url).group('type', 'host', 'id') + type = self._TYPES[type] + return self._extract_playlist(host, type, id) From 0d32e124c69c5d53eb9dd286aa6147ec4cf52e54 Mon Sep 17 00:00:00 2001 From: makeworld <25111343+makeworld-the-better-one@users.noreply.github.com> Date: Sun, 19 Sep 2021 18:13:26 -0400 Subject: [PATCH 121/641] [CBC] Fix CBC Gem extractors (#1013) Closes #936 Authored by: makeworld-the-better-one --- yt_dlp/extractor/cbc.py | 486 ++++++++++++++------------------- yt_dlp/extractor/extractors.py | 6 +- 2 files changed, 212 insertions(+), 280 deletions(-) diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py index fd5ec6033b..061b09908d 100644 --- a/yt_dlp/extractor/cbc.py +++ b/yt_dlp/extractor/cbc.py @@ -1,30 +1,18 @@ # coding: utf-8 from __future__ import unicode_literals -import hashlib -import json import re -from xml.sax.saxutils import escape from .common import InfoExtractor from ..compat import ( compat_str, - compat_HTTPError, ) from ..utils import ( js_to_json, smuggle_url, try_get, - xpath_text, - xpath_element, - xpath_with_ns, - find_xpath_attr, orderedSet, - parse_duration, - parse_iso8601, - parse_age_limit, strip_or_none, - int_or_none, ExtractorError, ) @@ -59,6 +47,7 @@ class CBCIE(InfoExtractor): 'uploader': 'CBCC-NEW', 'timestamp': 1382717907, }, + 'skip': 'No longer available', }, { # with clipId, feed only available via tpfeed.cbc.ca 'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live', @@ -209,289 +198,232 @@ class CBCPlayerIE(InfoExtractor): } -class CBCWatchBaseIE(InfoExtractor): - _device_id = None - _device_token = None - _API_BASE_URL = 'https://api-cbc.cloud.clearleap.com/cloffice/client/' - _NS_MAP = { - 'media': 'http://search.yahoo.com/mrss/', - 'clearleap': 'http://www.clearleap.com/namespace/clearleap/1.0/', - } - _GEO_COUNTRIES = ['CA'] - _LOGIN_URL = 'https://api.loginradius.com/identity/v2/auth/login' - _TOKEN_URL = 'https://cloud-api.loginradius.com/sso/jwt/api/token' - _API_KEY = '3f4beddd-2061-49b0-ae80-6f1f2ed65b37' - _NETRC_MACHINE = 'cbcwatch' - - def _signature(self, email, password): - data = json.dumps({ - 'email': email, - 'password': password, - }).encode() - headers = {'content-type': 'application/json'} - query = {'apikey': self._API_KEY} - resp = self._download_json(self._LOGIN_URL, None, data=data, headers=headers, query=query) - access_token = resp['access_token'] - - # token - query = { - 'access_token': access_token, - 'apikey': self._API_KEY, - 'jwtapp': 'jwt', - } - resp = self._download_json(self._TOKEN_URL, None, headers=headers, query=query) - return resp['signature'] - - def _call_api(self, path, video_id): - url = path if path.startswith('http') else self._API_BASE_URL + path - for _ in range(2): - try: - result = self._download_xml(url, video_id, headers={ - 'X-Clearleap-DeviceId': self._device_id, - 'X-Clearleap-DeviceToken': self._device_token, - }) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: - # Device token has expired, re-acquiring device token - self._register_device() - continue - raise - error_message = xpath_text(result, 'userMessage') or xpath_text(result, 'systemMessage') - if error_message: - raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message)) - return result - - def _real_initialize(self): - if self._valid_device_token(): - return - device = self._downloader.cache.load( - 'cbcwatch', self._cache_device_key()) or {} - self._device_id, self._device_token = device.get('id'), device.get('token') - if self._valid_device_token(): - return - self._register_device() - - def _valid_device_token(self): - return self._device_id and self._device_token - - def _cache_device_key(self): - email, _ = self._get_login_info() - return '%s_device' % hashlib.sha256(email.encode()).hexdigest() if email else 'device' - - def _register_device(self): - result = self._download_xml( - self._API_BASE_URL + 'device/register', - None, 'Acquiring device token', - data=b'web') - self._device_id = xpath_text(result, 'deviceId', fatal=True) - email, password = self._get_login_info() - if email and password: - signature = self._signature(email, password) - data = '{0}{1}web'.format( - escape(signature), escape(self._device_id)).encode() - url = self._API_BASE_URL + 'device/login' - result = self._download_xml( - url, None, data=data, - headers={'content-type': 'application/xml'}) - self._device_token = xpath_text(result, 'token', fatal=True) - else: - self._device_token = xpath_text(result, 'deviceToken', fatal=True) - self._downloader.cache.store( - 'cbcwatch', self._cache_device_key(), { - 'id': self._device_id, - 'token': self._device_token, - }) - - def _parse_rss_feed(self, rss): - channel = xpath_element(rss, 'channel', fatal=True) - - def _add_ns(path): - return xpath_with_ns(path, self._NS_MAP) - - entries = [] - for item in channel.findall('item'): - guid = xpath_text(item, 'guid', fatal=True) - title = xpath_text(item, 'title', fatal=True) - - media_group = xpath_element(item, _add_ns('media:group'), fatal=True) - content = xpath_element(media_group, _add_ns('media:content'), fatal=True) - content_url = content.attrib['url'] - - thumbnails = [] - for thumbnail in media_group.findall(_add_ns('media:thumbnail')): - thumbnail_url = thumbnail.get('url') - if not thumbnail_url: - continue - thumbnails.append({ - 'id': thumbnail.get('profile'), - 'url': thumbnail_url, - 'width': int_or_none(thumbnail.get('width')), - 'height': int_or_none(thumbnail.get('height')), - }) - - timestamp = None - release_date = find_xpath_attr( - item, _add_ns('media:credit'), 'role', 'releaseDate') - if release_date is not None: - timestamp = parse_iso8601(release_date.text) - - entries.append({ - '_type': 'url_transparent', - 'url': content_url, - 'id': guid, - 'title': title, - 'description': xpath_text(item, 'description'), - 'timestamp': timestamp, - 'duration': int_or_none(content.get('duration')), - 'age_limit': parse_age_limit(xpath_text(item, _add_ns('media:rating'))), - 'episode': xpath_text(item, _add_ns('clearleap:episode')), - 'episode_number': int_or_none(xpath_text(item, _add_ns('clearleap:episodeInSeason'))), - 'series': xpath_text(item, _add_ns('clearleap:series')), - 'season_number': int_or_none(xpath_text(item, _add_ns('clearleap:season'))), - 'thumbnails': thumbnails, - 'ie_key': 'CBCWatchVideo', - }) - - return self.playlist_result( - entries, xpath_text(channel, 'guid'), - xpath_text(channel, 'title'), - xpath_text(channel, 'description')) - - -class CBCWatchVideoIE(CBCWatchBaseIE): - IE_NAME = 'cbc.ca:watch:video' - _VALID_URL = r'https?://api-cbc\.cloud\.clearleap\.com/cloffice/client/web/play/?\?.*?\bcontentId=(?P[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' - _TEST = { - # geo-restricted to Canada, bypassable - 'url': 'https://api-cbc.cloud.clearleap.com/cloffice/client/web/play/?contentId=3c84472a-1eea-4dee-9267-2655d5055dcf&categoryId=ebc258f5-ee40-4cca-b66b-ba6bd55b7235', - 'only_matching': True, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - result = self._call_api(url, video_id) - - m3u8_url = xpath_text(result, 'url', fatal=True) - formats = self._extract_m3u8_formats(re.sub(r'/([^/]+)/[^/?]+\.m3u8', r'/\1/\1.m3u8', m3u8_url), video_id, 'mp4', fatal=False) - if len(formats) < 2: - formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4') - for f in formats: - format_id = f.get('format_id') - if format_id.startswith('AAC'): - f['acodec'] = 'aac' - elif format_id.startswith('AC3'): - f['acodec'] = 'ac-3' - self._sort_formats(formats) - - info = { - 'id': video_id, - 'title': video_id, - 'formats': formats, - } - - rss = xpath_element(result, 'rss') - if rss: - info.update(self._parse_rss_feed(rss)['entries'][0]) - del info['url'] - del info['_type'] - del info['ie_key'] - return info - - -class CBCWatchIE(CBCWatchBaseIE): - IE_NAME = 'cbc.ca:watch' - _VALID_URL = r'https?://(?:gem|watch)\.cbc\.ca/(?:[^/]+/)+(?P[0-9a-f-]+)' +class CBCGemIE(InfoExtractor): + IE_NAME = 'gem.cbc.ca' + _VALID_URL = r'https?://gem\.cbc\.ca/media/(?P[0-9a-z-]+/s[0-9]+[a-z][0-9]+)' _TESTS = [{ # geo-restricted to Canada, bypassable - 'url': 'http://watch.cbc.ca/doc-zone/season-6/customer-disservice/38e815a-009e3ab12e4', + # This is a normal, public, TV show video + 'url': 'https://gem.cbc.ca/media/schitts-creek/s06e01', + 'md5': '93dbb31c74a8e45b378cf13bd3f6f11e', 'info_dict': { - 'id': '9673749a-5e77-484c-8b62-a1092a6b5168', + 'id': 'schitts-creek/s06e01', 'ext': 'mp4', - 'title': 'Customer (Dis)Service', - 'description': 'md5:8bdd6913a0fe03d4b2a17ebe169c7c87', - 'upload_date': '20160219', - 'timestamp': 1455840000, - }, - 'params': { - # m3u8 download - 'skip_download': True, - 'format': 'bestvideo', + 'title': 'Smoke Signals', + 'description': 'md5:929868d20021c924020641769eb3e7f1', + 'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/episode/perso/cbc_schitts_creek_season_06e01_thumbnail_v01.jpg?im=Resize=(Size)', + 'duration': 1314, + 'categories': ['comedy'], + 'series': 'Schitt\'s Creek', + 'season': 'Season 6', + 'season_number': 6, + 'episode': 'Smoke Signals', + 'episode_number': 1, + 'episode_id': 'schitts-creek/s06e01', }, + 'params': {'format': 'bv'}, + 'skip': 'Geo-restricted to Canada', }, { # geo-restricted to Canada, bypassable - 'url': 'http://watch.cbc.ca/arthur/all/1ed4b385-cd84-49cf-95f0-80f004680057', + # This video requires an account in the browser, but works fine in yt-dlp + 'url': 'https://gem.cbc.ca/media/schitts-creek/s01e01', + 'md5': '297a9600f554f2258aed01514226a697', 'info_dict': { - 'id': '1ed4b385-cd84-49cf-95f0-80f004680057', - 'title': 'Arthur', - 'description': 'Arthur, the sweetest 8-year-old aardvark, and his pals solve all kinds of problems with humour, kindness and teamwork.', + 'id': 'schitts-creek/s01e01', + 'ext': 'mp4', + 'title': 'The Cup Runneth Over', + 'description': 'md5:9bca14ea49ab808097530eb05a29e797', + 'thumbnail': 'https://images.radio-canada.ca/v1/synps-cbc/episode/perso/cbc_schitts_creek_season_01e01_thumbnail_v01.jpg?im=Resize=(Size)', + 'series': 'Schitt\'s Creek', + 'season_number': 1, + 'season': 'Season 1', + 'episode_number': 1, + 'episode': 'The Cup Runneth Over', + 'episode_id': 'schitts-creek/s01e01', + 'duration': 1309, + 'categories': ['comedy'], }, - 'playlist_mincount': 30, - }, { - 'url': 'https://gem.cbc.ca/media/this-hour-has-22-minutes/season-26/episode-20/38e815a-0108c6c6a42', - 'only_matching': True, + 'params': {'format': 'bv'}, + 'skip': 'Geo-restricted to Canada', }] + _API_BASE = 'https://services.radio-canada.ca/ott/cbc-api/v2/assets/' def _real_extract(self, url): video_id = self._match_id(url) - rss = self._call_api('web/browse/' + video_id, video_id) - return self._parse_rss_feed(rss) + video_info = self._download_json(self._API_BASE + video_id, video_id) - -class CBCOlympicsIE(InfoExtractor): - IE_NAME = 'cbc.ca:olympics' - _VALID_URL = r'https?://olympics\.cbc\.ca/video/[^/]+/(?P[^/?#]+)' - _TESTS = [{ - 'url': 'https://olympics.cbc.ca/video/whats-on-tv/olympic-morning-featuring-the-opening-ceremony/', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - video_id = self._hidden_inputs(webpage)['videoId'] - video_doc = self._download_xml( - 'https://olympics.cbc.ca/videodata/%s.xml' % video_id, video_id) - title = xpath_text(video_doc, 'title', fatal=True) - is_live = xpath_text(video_doc, 'kind') == 'Live' - if is_live: - title = self._live_title(title) - - formats = [] - for video_source in video_doc.findall('videoSources/videoSource'): - uri = xpath_text(video_source, 'uri') - if not uri: - continue - tokenize = self._download_json( - 'https://olympics.cbc.ca/api/api-akamai/tokenize', - video_id, data=json.dumps({ - 'VideoSource': uri, - }).encode(), headers={ - 'Content-Type': 'application/json', - 'Referer': url, - # d3.VideoPlayer._init in https://olympics.cbc.ca/components/script/base.js - 'Cookie': '_dvp=TK:C0ObxjerU', # AKAMAI CDN cookie - }, fatal=False) - if not tokenize: - continue - content_url = tokenize['ContentUrl'] - video_source_format = video_source.get('format') - if video_source_format == 'IIS': - formats.extend(self._extract_ism_formats( - content_url, video_id, ism_id=video_source_format, fatal=False)) + last_error = None + attempt = -1 + retries = self.get_param('extractor_retries', 15) + while attempt < retries: + attempt += 1 + if last_error: + self.report_warning('%s. Retrying ...' % last_error) + m3u8_info = self._download_json( + video_info['playSession']['url'], video_id, + note='Downloading JSON metadata%s' % f' (attempt {attempt})') + m3u8_url = m3u8_info.get('url') + if m3u8_url: + break + elif m3u8_info.get('errorCode') == 1: + self.raise_geo_restricted(countries=['CA']) else: - formats.extend(self._extract_m3u8_formats( - content_url, video_id, 'mp4', - 'm3u8' if is_live else 'm3u8_native', - m3u8_id=video_source_format, fatal=False)) + last_error = f'{self.IE_NAME} said: {m3u8_info.get("errorCode")} - {m3u8_info.get("message")}' + # 35 means media unavailable, but retries work + if m3u8_info.get('errorCode') != 35 or attempt >= retries: + raise ExtractorError(last_error) + + formats = self._extract_m3u8_formats(m3u8_url, video_id, m3u8_id='hls') + self._remove_duplicate_formats(formats) + + for i, format in enumerate(formats): + if format.get('vcodec') == 'none': + if format.get('ext') is None: + format['ext'] = 'm4a' + if format.get('acodec') is None: + format['acodec'] = 'mp4a.40.2' + + # Put described audio at the beginning of the list, so that it + # isn't chosen by default, as most people won't want it. + if 'descriptive' in format['format_id'].lower(): + format['preference'] = -2 + self._sort_formats(formats) return { 'id': video_id, - 'display_id': display_id, - 'title': title, - 'description': xpath_text(video_doc, 'description'), - 'thumbnail': xpath_text(video_doc, 'thumbnailUrl'), - 'duration': parse_duration(xpath_text(video_doc, 'duration')), + 'title': video_info['title'], + 'description': video_info.get('description'), + 'thumbnail': video_info.get('image'), + 'series': video_info.get('series'), + 'season_number': video_info.get('season'), + 'season': f'Season {video_info.get("season")}', + 'episode_number': video_info.get('episode'), + 'episode': video_info.get('title'), + 'episode_id': video_id, + 'duration': video_info.get('duration'), + 'categories': [video_info.get('category')], 'formats': formats, - 'is_live': is_live, + 'release_timestamp': video_info.get('airDate'), + 'timestamp': video_info.get('availableDate'), + } + + +class CBCGemPlaylistIE(InfoExtractor): + IE_NAME = 'gem.cbc.ca:playlist' + _VALID_URL = r'https?://gem\.cbc\.ca/media/(?P(?P[0-9a-z-]+)/s(?P[0-9]+))/?(?:[?#]|$)' + _TESTS = [{ + # geo-restricted to Canada, bypassable + # TV show playlist, all public videos + 'url': 'https://gem.cbc.ca/media/schitts-creek/s06', + 'playlist_count': 16, + 'info_dict': { + 'id': 'schitts-creek/s06', + 'title': 'Season 6', + 'description': 'md5:6a92104a56cbeb5818cc47884d4326a2', + }, + 'skip': 'Geo-restricted to Canada', + }] + _API_BASE = 'https://services.radio-canada.ca/ott/cbc-api/v2/shows/' + + def _real_extract(self, url): + match = self._match_valid_url(url) + season_id = match.group('id') + show = match.group('show') + show_info = self._download_json(self._API_BASE + show, season_id) + season = int(match.group('season')) + season_info = try_get(show_info, lambda x: x['seasons'][season - 1]) + + if season_info is None: + raise ExtractorError(f'Couldn\'t find season {season} of {show}') + + episodes = [] + for episode in season_info['assets']: + episodes.append({ + '_type': 'url_transparent', + 'ie_key': 'CBCGem', + 'url': 'https://gem.cbc.ca/media/' + episode['id'], + 'id': episode['id'], + 'title': episode.get('title'), + 'description': episode.get('description'), + 'thumbnail': episode.get('image'), + 'series': episode.get('series'), + 'season_number': episode.get('season'), + 'season': season_info['title'], + 'season_id': season_info.get('id'), + 'episode_number': episode.get('episode'), + 'episode': episode.get('title'), + 'episode_id': episode['id'], + 'duration': episode.get('duration'), + 'categories': [episode.get('category')], + }) + + thumbnail = None + tn_uri = season_info.get('image') + # the-national was observed to use a "data:image/png;base64" + # URI for their 'image' value. The image was 1x1, and is + # probably just a placeholder, so it is ignored. + if tn_uri is not None and not tn_uri.startswith('data:'): + thumbnail = tn_uri + + return { + '_type': 'playlist', + 'entries': episodes, + 'id': season_id, + 'title': season_info['title'], + 'description': season_info.get('description'), + 'thumbnail': thumbnail, + 'series': show_info.get('title'), + 'season_number': season_info.get('season'), + 'season': season_info['title'], + } + + +class CBCGemLiveIE(InfoExtractor): + IE_NAME = 'gem.cbc.ca:live' + _VALID_URL = r'https?://gem\.cbc\.ca/live/(?P[0-9]{12})' + _TEST = { + 'url': 'https://gem.cbc.ca/live/920604739687', + 'info_dict': { + 'title': 'Ottawa', + 'description': 'The live TV channel and local programming from Ottawa', + 'thumbnail': 'https://thumbnails.cbc.ca/maven_legacy/thumbnails/CBC_OTT_VMS/Live_Channel_Static_Images/Ottawa_2880x1620.jpg', + 'is_live': True, + 'id': 'AyqZwxRqh8EH', + 'ext': 'mp4', + 'timestamp': 1492106160, + 'upload_date': '20170413', + 'uploader': 'CBCC-NEW', + }, + 'skip': 'Live might have ended', + } + + # It's unclear where the chars at the end come from, but they appear to be + # constant. Might need updating in the future. + _API = 'https://tpfeed.cbc.ca/f/ExhSPC/t_t3UKJR6MAT' + + def _real_extract(self, url): + video_id = self._match_id(url) + live_info = self._download_json(self._API, video_id)['entries'] + + video_info = None + for stream in live_info: + if stream.get('guid') == video_id: + video_info = stream + + if video_info is None: + raise ExtractorError( + 'Couldn\'t find video metadata, maybe this livestream is now offline', + expected=True) + + return { + '_type': 'url_transparent', + 'ie_key': 'ThePlatform', + 'url': video_info['content'][0]['url'], + 'id': video_id, + 'title': video_info.get('title'), + 'description': video_info.get('description'), + 'tags': try_get(video_info, lambda x: x['keywords'].split(', ')), + 'thumbnail': video_info.get('cbc$staticImage'), + 'is_live': True, } diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 6cafa82a2d..10581a71be 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -208,9 +208,9 @@ from .cartoonnetwork import CartoonNetworkIE from .cbc import ( CBCIE, CBCPlayerIE, - CBCWatchVideoIE, - CBCWatchIE, - CBCOlympicsIE, + CBCGemIE, + CBCGemPlaylistIE, + CBCGemLiveIE, ) from .cbs import CBSIE from .cbslocal import ( From 96933fc1b6043060498f2c85c25457bd526ba793 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 22 Sep 2021 00:15:56 +0530 Subject: [PATCH 122/641] [aria2c] Fix IV for some AES-128 streams Authored by: shirt --- yt_dlp/downloader/external.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index 1057382e0b..a0d346c12e 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -11,6 +11,7 @@ from ..aes import aes_cbc_decrypt_bytes from ..compat import ( compat_setenv, compat_str, + compat_struct_pack, ) from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS from ..utils import ( @@ -155,7 +156,7 @@ class ExternalFD(FileDownloader): decrypt_info = fragment.get('decrypt_info') if decrypt_info: if decrypt_info['METHOD'] == 'AES-128': - iv = decrypt_info.get('IV') + iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', fragment['media_sequence']) decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen( self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read() encrypted_data = src.read() From daf7ac2b92494bed6bc9fcada69fbb9b94b539bb Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 22 Sep 2021 01:15:16 +0530 Subject: [PATCH 123/641] [fragment] Avoid repeated request for AES key --- yt_dlp/downloader/fragment.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index 567bf69d3d..cd1e2350de 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -369,13 +369,19 @@ class FragmentFD(FileDownloader): return False, frag_index return frag_content, frag_index + _key_cache = {} + + def _get_key(url): + if url not in _key_cache: + _key_cache[url] = self.ydl.urlopen(self._prepare_url(info_dict, url)).read() + return _key_cache[url] + def decrypt_fragment(fragment, frag_content): decrypt_info = fragment.get('decrypt_info') if not decrypt_info or decrypt_info['METHOD'] != 'AES-128': return frag_content iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', fragment['media_sequence']) - decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen( - self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read() + decrypt_info['KEY'] = decrypt_info.get('KEY') or _get_key(info_dict.get('_decryption_key_url') or decrypt_info['URI']) # Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block # size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded, # not what it decrypts to. From d9d8b857477d8797ab1b55a99684d6d71959c51b Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 22 Sep 2021 04:16:50 +0530 Subject: [PATCH 124/641] [fragment] Fix range header when using `-N` and media sequence (#1048) Authored by: shirt --- yt_dlp/downloader/fragment.py | 2 +- yt_dlp/downloader/hls.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index cd1e2350de..10ab90ba6f 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -332,7 +332,7 @@ class FragmentFD(FileDownloader): def download_fragment(fragment, ctx): frag_index = ctx['fragment_index'] = fragment['frag_index'] - headers = info_dict.get('http_headers', {}) + headers = info_dict.get('http_headers', {}).copy() byte_range = fragment.get('byte_range') if byte_range: headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1) diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index e0dc1def70..f343e18797 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -167,6 +167,7 @@ class HlsFD(FragmentFD): 'byte_range': byte_range, 'media_sequence': media_sequence, }) + media_sequence += 1 elif line.startswith('#EXT-X-MAP'): if format_index and discontinuity_count != format_index: @@ -191,6 +192,7 @@ class HlsFD(FragmentFD): 'byte_range': byte_range, 'media_sequence': media_sequence }) + media_sequence += 1 if map_info.get('BYTERANGE'): splitted_byte_range = map_info.get('BYTERANGE').split('@') From bd6f722de8d44958ebc1b4b80bb59cbcb37c8ff3 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 22 Sep 2021 05:25:17 +0530 Subject: [PATCH 125/641] dump files should obey `--trim-filename` (#1043) Authored by: sulyi --- yt_dlp/extractor/common.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index f6ca686a3e..ae03c1bab3 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -789,9 +789,10 @@ class InfoExtractor(object): self._downloader.to_screen(dump) if self.get_param('write_pages', False): basen = '%s_%s' % (video_id, urlh.geturl()) - if len(basen) > 240: + trim_length = self.get_param('trim_file_name') or 240 + if len(basen) > trim_length: h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest() - basen = basen[:240 - len(h)] + h + basen = basen[:trim_length - len(h)] + h raw_filename = basen + '.dump' filename = sanitize_filename(raw_filename, restricted=True) self.to_screen('Saving request to ' + filename) From 1009f67c2a9a774bd4b3d7b09de4ad1268fa2f02 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 22 Sep 2021 05:27:07 +0530 Subject: [PATCH 126/641] [fragment,aria2c] Generalize and refactor some code --- yt_dlp/downloader/external.py | 27 ++++----------------- yt_dlp/downloader/fragment.py | 45 +++++++++++++++++++---------------- 2 files changed, 30 insertions(+), 42 deletions(-) diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index a0d346c12e..025eb38cb6 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -6,7 +6,7 @@ import subprocess import sys import time -from .common import FileDownloader +from .fragment import FragmentFD from ..aes import aes_cbc_decrypt_bytes from ..compat import ( compat_setenv, @@ -30,7 +30,7 @@ from ..utils import ( ) -class ExternalFD(FileDownloader): +class ExternalFD(FragmentFD): SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps') can_download_to_stdout = False @@ -142,6 +142,7 @@ class ExternalFD(FileDownloader): self.report_error('Giving up after %s fragment retries' % fragment_retries) return -1 + decrypt_fragment = self.decrypter(info_dict) dest, _ = sanitize_open(tmpfilename, 'wb') for frag_index, fragment in enumerate(info_dict['fragments']): fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index) @@ -153,21 +154,7 @@ class ExternalFD(FileDownloader): continue self.report_error('Unable to open fragment %d' % frag_index) return -1 - decrypt_info = fragment.get('decrypt_info') - if decrypt_info: - if decrypt_info['METHOD'] == 'AES-128': - iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', fragment['media_sequence']) - decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen( - self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read() - encrypted_data = src.read() - decrypted_data = aes_cbc_decrypt_bytes(encrypted_data, decrypt_info['KEY'], iv) - dest.write(decrypted_data) - else: - fragment_data = src.read() - dest.write(fragment_data) - else: - fragment_data = src.read() - dest.write(fragment_data) + dest.write(decrypt_fragment(fragment, src.read())) src.close() if not self.params.get('keep_fragments', False): os.remove(encodeFilename(fragment_filename)) @@ -181,10 +168,6 @@ class ExternalFD(FileDownloader): self.to_stderr(stderr.decode('utf-8', 'replace')) return p.returncode - def _prepare_url(self, info_dict, url): - headers = info_dict.get('http_headers') - return sanitized_Request(url, None, headers) if headers else url - class CurlFD(ExternalFD): AVAILABLE_OPT = '-V' @@ -518,7 +501,7 @@ class AVconvFD(FFmpegFD): _BY_NAME = dict( (klass.get_basename(), klass) for name, klass in globals().items() - if name.endswith('FD') and name != 'ExternalFD' + if name.endswith('FD') and name not in ('ExternalFD', 'FragmentFD') ) diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index 10ab90ba6f..ebdef27dbe 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -324,6 +324,29 @@ class FragmentFD(FileDownloader): 'fragment_index': 0, }) + def decrypter(self, info_dict): + _key_cache = {} + + def _get_key(url): + if url not in _key_cache: + _key_cache[url] = self.ydl.urlopen(self._prepare_url(info_dict, url)).read() + return _key_cache[url] + + def decrypt_fragment(fragment, frag_content): + decrypt_info = fragment.get('decrypt_info') + if not decrypt_info or decrypt_info['METHOD'] != 'AES-128': + return frag_content + iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', fragment['media_sequence']) + decrypt_info['KEY'] = decrypt_info.get('KEY') or _get_key(info_dict.get('_decryption_key_url') or decrypt_info['URI']) + # Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block + # size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded, + # not what it decrypts to. + if self.params.get('test', False): + return frag_content + return aes_cbc_decrypt_bytes(frag_content, decrypt_info['KEY'], iv) + + return decrypt_fragment + def download_and_append_fragments(self, ctx, fragments, info_dict, *, pack_func=None, finish_func=None): fragment_retries = self.params.get('fragment_retries', 0) is_fatal = (lambda idx: idx == 0) if self.params.get('skip_unavailable_fragments', True) else (lambda _: True) @@ -369,26 +392,6 @@ class FragmentFD(FileDownloader): return False, frag_index return frag_content, frag_index - _key_cache = {} - - def _get_key(url): - if url not in _key_cache: - _key_cache[url] = self.ydl.urlopen(self._prepare_url(info_dict, url)).read() - return _key_cache[url] - - def decrypt_fragment(fragment, frag_content): - decrypt_info = fragment.get('decrypt_info') - if not decrypt_info or decrypt_info['METHOD'] != 'AES-128': - return frag_content - iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', fragment['media_sequence']) - decrypt_info['KEY'] = decrypt_info.get('KEY') or _get_key(info_dict.get('_decryption_key_url') or decrypt_info['URI']) - # Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block - # size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded, - # not what it decrypts to. - if self.params.get('test', False): - return frag_content - return aes_cbc_decrypt_bytes(frag_content, decrypt_info['KEY'], iv) - def append_fragment(frag_content, frag_index, ctx): if not frag_content: if not is_fatal(frag_index - 1): @@ -402,6 +405,8 @@ class FragmentFD(FileDownloader): self._append_fragment(ctx, pack_func(frag_content, frag_index)) return True + decrypt_fragment = self.decrypter(info_dict) + max_workers = self.params.get('concurrent_fragment_downloads', 1) if can_threaded_download and max_workers > 1: From 5e3f2f8fc4cdf600b5030c70478274bdb4dcf4c6 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 22 Sep 2021 05:35:39 +0530 Subject: [PATCH 127/641] [youtube] Return full URL instead of just ID --- yt_dlp/extractor/youtube.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index eb69b88a3a..7f65e2b7dd 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -869,7 +869,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): '_type': 'url', 'ie_key': YoutubeIE.ie_key(), 'id': video_id, - 'url': video_id, + 'url': f'https://www.youtube.com/watch?v={video_id}', 'title': title, 'description': description, 'duration': duration, @@ -4284,7 +4284,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): if video_id and playlist_id: if self.get_param('noplaylist'): self.to_screen('Downloading just video %s because of --no-playlist' % video_id) - return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id) + return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id) self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id)) webpage, data = self._extract_webpage(url, item_id) @@ -4337,7 +4337,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): if video_id: if mobj['tab'] != '/live': # live tab is expected to redirect to video self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id) - return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id) + return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id) raise ExtractorError('Unable to recognize tab page') From d806c9fd97052b05f978d28d6a8d5bf81ef54fcf Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 22 Sep 2021 05:50:11 +0530 Subject: [PATCH 128/641] [docs,cleanup] Add deprecation warning in docs for some counter intuitive behaviour that may be removed in future. and fix linter --- README.md | 8 +++++++- yt_dlp/downloader/external.py | 3 --- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 44766b76be..641b672e0d 100644 --- a/README.md +++ b/README.md @@ -1170,7 +1170,11 @@ If you want to download multiple videos and they don't have the same formats ava If you want to download several formats of the same video use a comma as a separator, e.g. `-f 22,17,18` will download all these three formats, of course if they are available. Or a more sophisticated example combined with the precedence feature: `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`. -You can merge the video and audio of multiple formats into a single file using `-f ++...` (requires ffmpeg installed), for example `-f bestvideo+bestaudio` will download the best video-only format, the best audio-only format and mux them together with ffmpeg. Unless `--video-multistreams` is used, all formats with a video stream except the first one are ignored. Similarly, unless `--audio-multistreams` is used, all formats with an audio stream except the first one are ignored. For example, `-f bestvideo+best+bestaudio --video-multistreams --audio-multistreams` will download and merge all 3 given formats. The resulting file will have 2 video streams and 2 audio streams. But `-f bestvideo+best+bestaudio --no-video-multistreams` will download and merge only `bestvideo` and `bestaudio`. `best` is ignored since another format containing a video stream (`bestvideo`) has already been selected. The order of the formats is therefore important. `-f best+bestaudio --no-audio-multistreams` will download and merge both formats while `-f bestaudio+best --no-audio-multistreams` will ignore `best` and download only `bestaudio`. +You can merge the video and audio of multiple formats into a single file using `-f ++...` (requires ffmpeg installed), for example `-f bestvideo+bestaudio` will download the best video-only format, the best audio-only format and mux them together with ffmpeg. + +**Deprecation warning**: Since the *below* described behavior is complex and counter-intuitive, this will be removed and multistreams will be enabled by default in the future. A new operator will be instead added to limit formats to single audio/video + +Unless `--video-multistreams` is used, all formats with a video stream except the first one are ignored. Similarly, unless `--audio-multistreams` is used, all formats with an audio stream except the first one are ignored. For example, `-f bestvideo+best+bestaudio --video-multistreams --audio-multistreams` will download and merge all 3 given formats. The resulting file will have 2 video streams and 2 audio streams. But `-f bestvideo+best+bestaudio --no-video-multistreams` will download and merge only `bestvideo` and `bestaudio`. `best` is ignored since another format containing a video stream (`bestvideo`) has already been selected. The order of the formats is therefore important. `-f best+bestaudio --no-audio-multistreams` will download and merge both formats while `-f bestaudio+best --no-audio-multistreams` will ignore `best` and download only `bestaudio`. ## Filtering Formats @@ -1236,6 +1240,8 @@ The available fields are: - `abr`: Average audio bitrate in KBit/s - `br`: Equivalent to using `tbr,vbr,abr` - `asr`: Audio sample rate in Hz + +**Deprecation warning**: Many of these fields have (currently undocumented) aliases, that may be removed in a future version. It is recommended to use only the documented field names. All fields, unless specified otherwise, are sorted in descending order. To reverse this, prefix the field with a `+`. Eg: `+res` prefers format with the smallest resolution. Additionally, you can suffix a preferred value for the fields, separated by a `:`. Eg: `res:720` prefers larger videos, but no larger than 720p and the smallest video if there are no videos less than 720p. For `codec` and `ext`, you can provide two preferred values, the first for video and the second for audio. Eg: `+codec:avc:m4a` (equivalent to `+vcodec:avc,+acodec:m4a`) sets the video codec preference to `h264` > `h265` > `vp9` > `vp9.2` > `av01` > `vp8` > `h263` > `theora` and audio codec preference to `mp4a` > `aac` > `vorbis` > `opus` > `mp3` > `ac3` > `dts`. You can also make the sorting prefer the nearest values to the provided by using `~` as the delimiter. Eg: `filesize~1G` prefers the format with filesize closest to 1 GiB. diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index 025eb38cb6..9c1229cf6f 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -7,11 +7,9 @@ import sys import time from .fragment import FragmentFD -from ..aes import aes_cbc_decrypt_bytes from ..compat import ( compat_setenv, compat_str, - compat_struct_pack, ) from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS from ..utils import ( @@ -25,7 +23,6 @@ from ..utils import ( check_executable, is_outdated_version, process_communicate_or_kill, - sanitized_Request, sanitize_open, ) From f6d8776d34ad74afc80b9cfb7207024d32419eea Mon Sep 17 00:00:00 2001 From: ChillingPepper <90042155+ChillingPepper@users.noreply.github.com> Date: Wed, 22 Sep 2021 04:10:02 +0200 Subject: [PATCH 129/641] [SovietsCloset] Fix playlists for games with only named categories Authored by: ConquerorDopy --- yt_dlp/extractor/sovietscloset.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/sovietscloset.py b/yt_dlp/extractor/sovietscloset.py index 218a146dfe..64201c88c3 100644 --- a/yt_dlp/extractor/sovietscloset.py +++ b/yt_dlp/extractor/sovietscloset.py @@ -167,6 +167,14 @@ class SovietsClosetPlaylistIE(SovietsClosetBaseIE): }, 'playlist_mincount': 3, }, + { + 'url': 'https://sovietscloset.com/Total-War-Warhammer', + 'info_dict': { + 'id': 'Total-War-Warhammer', + 'title': 'Total War: Warhammer - Greenskins', + }, + 'playlist_mincount': 33, + }, ] def _real_extract(self, url): @@ -188,7 +196,9 @@ class SovietsClosetPlaylistIE(SovietsClosetBaseIE): category_slug = 'misc' game = next(game for game in sovietscloset if game['slug'].lower() == game_slug) - category = next(cat for cat in game['subcategories'] if cat['slug'].lower() == category_slug) + category = next((cat for cat in game['subcategories'] if cat.get('slug', '').lower() == category_slug), + game['subcategories'][0]) + category_slug = category.get('slug', '').lower() or category_slug playlist_title = game.get('name') or game_slug if category_slug != 'misc': playlist_title += f' - {category.get("name") or category_slug}' From c12977bdc455883e7061c2275da093c5b419a32a Mon Sep 17 00:00:00 2001 From: Sipherdrakon <64430430+Sipherdrakon@users.noreply.github.com> Date: Wed, 22 Sep 2021 10:09:45 -0400 Subject: [PATCH 130/641] [AnimalPlanet] Fix extractor (#1050) Authored by: Sipherdrakon --- yt_dlp/extractor/dplay.py | 22 +++++++++++++++++++++- yt_dlp/extractor/extractors.py | 3 ++- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/dplay.py b/yt_dlp/extractor/dplay.py index f2aca4d67a..e0e446b873 100644 --- a/yt_dlp/extractor/dplay.py +++ b/yt_dlp/extractor/dplay.py @@ -349,7 +349,7 @@ class DiscoveryPlusIE(DPlayIE): _API_URL = 'us1-prod-direct.discoveryplus.com' def _update_disco_api_headers(self, headers, disco_base, display_id, realm): - headers['x-disco-client'] = f'WEB:UNKNOWN:{self._PRODUCT}:15.0.0' + headers['x-disco-client'] = f'WEB:UNKNOWN:{self._PRODUCT}:25.2.6' def _download_video_playback_info(self, disco_base, video_id, headers): return self._download_json( @@ -409,3 +409,23 @@ class DIYNetworkIE(DiscoveryPlusIE): _PRODUCT = 'diy' _API_URL = 'us1-prod-direct.watch.diynetwork.com' + + +class AnimalPlanetIE(DiscoveryPlusIE): + _VALID_URL = r'https?://(?:www\.)?animalplanet\.com/video' + DPlayIE._PATH_REGEX + _TESTS = [{ + 'url': 'https://www.animalplanet.com/video/north-woods-law-animal-planet/squirrel-showdown', + 'info_dict': { + 'id': '3338923', + 'display_id': 'north-woods-law-animal-planet/squirrel-showdown', + 'ext': 'mp4', + 'title': 'Squirrel Showdown', + 'description': 'A woman is suspected of being in possession of flying squirrel kits.', + 'season_number': 16, + 'episode_number': 11, + }, + 'skip': 'Available for Premium users', + }] + + _PRODUCT = 'apl' + _API_URL = 'us1-prod-direct.animalplanet.com' diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 10581a71be..468fefbf14 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -346,7 +346,8 @@ from .dplay import ( DiscoveryPlusIE, HGTVDeIE, ScienceChannelIE, - DIYNetworkIE + DIYNetworkIE, + AnimalPlanetIE ) from .dreisat import DreiSatIE from .drbonanza import DRBonanzaIE From bd50a52b0d7247cdbf205eb851ce33ae4b89c516 Mon Sep 17 00:00:00 2001 From: The Hatsune Daishi Date: Wed, 22 Sep 2021 23:12:04 +0900 Subject: [PATCH 131/641] Basic framework for simultaneous download of multiple formats (#1036) Authored by: nao20010128nao --- yt_dlp/downloader/common.py | 33 +++++++-- yt_dlp/downloader/fragment.py | 53 ++++++++++++- yt_dlp/downloader/http.py | 2 + yt_dlp/minicurses.py | 135 ++++++++++++++++++++++++++++++++++ yt_dlp/utils.py | 8 ++ 5 files changed, 224 insertions(+), 7 deletions(-) create mode 100644 yt_dlp/minicurses.py diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index ce914bd4a2..53e83d2c3f 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -16,6 +16,11 @@ from ..utils import ( shell_quote, timeconvert, ) +from ..minicurses import ( + MultilinePrinter, + QuietMultilinePrinter, + BreaklineStatusPrinter +) class FileDownloader(object): @@ -68,6 +73,7 @@ class FileDownloader(object): self.ydl = ydl self._progress_hooks = [] self.params = params + self._multiline = None self.add_progress_hook(self.report_progress) @staticmethod @@ -236,12 +242,28 @@ class FileDownloader(object): """Report destination filename.""" self.to_screen('[download] Destination: ' + filename) - def _report_progress_status(self, msg, is_last_line=False): + def _prepare_multiline_status(self, lines): + if self.params.get('quiet'): + self._multiline = QuietMultilinePrinter() + elif self.params.get('progress_with_newline', False): + self._multiline = BreaklineStatusPrinter(sys.stderr, lines) + elif self.params.get('noprogress', False): + self._multiline = None + else: + self._multiline = MultilinePrinter(sys.stderr, lines) + + def _finish_multiline_status(self): + if self._multiline is not None: + self._multiline.end() + + def _report_progress_status(self, msg, is_last_line=False, progress_line=None): fullmsg = '[download] ' + msg if self.params.get('progress_with_newline', False): self.to_screen(fullmsg) + elif progress_line is not None and self._multiline is not None: + self._multiline.print_at_line(fullmsg, progress_line) else: - if compat_os_name == 'nt': + if compat_os_name == 'nt' or not sys.stderr.isatty(): prev_len = getattr(self, '_report_progress_prev_line_length', 0) if prev_len > len(fullmsg): @@ -249,7 +271,7 @@ class FileDownloader(object): self._report_progress_prev_line_length = len(fullmsg) clear_line = '\r' else: - clear_line = ('\r\x1b[K' if sys.stderr.isatty() else '\r') + clear_line = '\r\x1b[K' self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line) self.to_console_title('yt-dlp ' + msg) @@ -266,7 +288,8 @@ class FileDownloader(object): s['_elapsed_str'] = self.format_seconds(s['elapsed']) msg_template += ' in %(_elapsed_str)s' self._report_progress_status( - msg_template % s, is_last_line=True) + msg_template % s, progress_line=s.get('progress_idx')) + return if self.params.get('noprogress'): return @@ -311,7 +334,7 @@ class FileDownloader(object): else: msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s' - self._report_progress_status(msg_template % s) + self._report_progress_status(msg_template % s, progress_line=s.get('progress_idx')) def report_resuming_byte(self, resume_len): """Report attempt to resume at given byte.""" diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index ebdef27dbe..31f9467922 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -3,6 +3,7 @@ from __future__ import division, unicode_literals import os import time import json +from math import ceil try: import concurrent.futures @@ -120,6 +121,7 @@ class FragmentFD(FileDownloader): 'url': frag_url, 'http_headers': headers or info_dict.get('http_headers'), 'request_data': request_data, + 'ctx_id': ctx.get('ctx_id'), } success = ctx['dl'].download(fragment_filename, fragment_info_dict) if not success: @@ -219,6 +221,7 @@ class FragmentFD(FileDownloader): def _start_frag_download(self, ctx, info_dict): resume_len = ctx['complete_frags_downloaded_bytes'] total_frags = ctx['total_frags'] + ctx_id = ctx.get('ctx_id') # This dict stores the download progress, it's updated by the progress # hook state = { @@ -242,6 +245,12 @@ class FragmentFD(FileDownloader): if s['status'] not in ('downloading', 'finished'): return + if ctx_id is not None and s.get('ctx_id') != ctx_id: + return + + state['max_progress'] = ctx.get('max_progress') + state['progress_idx'] = ctx.get('progress_idx') + time_now = time.time() state['elapsed'] = time_now - start frag_total_bytes = s.get('total_bytes') or 0 @@ -301,6 +310,9 @@ class FragmentFD(FileDownloader): 'filename': ctx['filename'], 'status': 'finished', 'elapsed': elapsed, + 'ctx_id': ctx.get('ctx_id'), + 'max_progress': ctx.get('max_progress'), + 'progress_idx': ctx.get('progress_idx'), }, info_dict) def _prepare_external_frag_download(self, ctx): @@ -347,7 +359,44 @@ class FragmentFD(FileDownloader): return decrypt_fragment - def download_and_append_fragments(self, ctx, fragments, info_dict, *, pack_func=None, finish_func=None): + def download_and_append_fragments_multiple(self, *args, pack_func=None, finish_func=None): + ''' + @params (ctx1, fragments1, info_dict1), (ctx2, fragments2, info_dict2), ... + all args must be either tuple or list + ''' + max_progress = len(args) + if max_progress == 1: + return self.download_and_append_fragments(*args[0], pack_func=pack_func, finish_func=finish_func) + max_workers = self.params.get('concurrent_fragment_downloads', max_progress) + self._prepare_multiline_status(max_progress) + + def thread_func(idx, ctx, fragments, info_dict, tpe): + ctx['max_progress'] = max_progress + ctx['progress_idx'] = idx + return self.download_and_append_fragments(ctx, fragments, info_dict, pack_func=pack_func, finish_func=finish_func, tpe=tpe) + + class FTPE(concurrent.futures.ThreadPoolExecutor): + # has to stop this or it's going to wait on the worker thread itself + def __exit__(self, exc_type, exc_val, exc_tb): + pass + + spins = [] + for idx, (ctx, fragments, info_dict) in enumerate(args): + tpe = FTPE(ceil(max_workers / max_progress)) + job = tpe.submit(thread_func, idx, ctx, fragments, info_dict, tpe) + spins.append((tpe, job)) + + result = True + for tpe, job in spins: + try: + result = result and job.result() + finally: + tpe.shutdown(wait=True) + + self._finish_multiline_status() + return True + + def download_and_append_fragments(self, ctx, fragments, info_dict, *, pack_func=None, finish_func=None, tpe=None): fragment_retries = self.params.get('fragment_retries', 0) is_fatal = (lambda idx: idx == 0) if self.params.get('skip_unavailable_fragments', True) else (lambda _: True) if not pack_func: @@ -416,7 +465,7 @@ class FragmentFD(FileDownloader): return fragment, frag_content, frag_index, ctx_copy.get('fragment_filename_sanitized') self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome') - with concurrent.futures.ThreadPoolExecutor(max_workers) as pool: + with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool: for fragment, frag_content, frag_index, frag_filename in pool.map(_download_fragment, fragments): ctx['fragment_filename_sanitized'] = frag_filename ctx['fragment_index'] = frag_index diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index 1edb0f91f6..9e79051ada 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -310,6 +310,7 @@ class HttpFD(FileDownloader): 'eta': eta, 'speed': speed, 'elapsed': now - ctx.start_time, + 'ctx_id': info_dict.get('ctx_id'), }, info_dict) if data_len is not None and byte_counter == data_len: @@ -357,6 +358,7 @@ class HttpFD(FileDownloader): 'filename': ctx.filename, 'status': 'finished', 'elapsed': time.time() - ctx.start_time, + 'ctx_id': info_dict.get('ctx_id'), }, info_dict) return True diff --git a/yt_dlp/minicurses.py b/yt_dlp/minicurses.py new file mode 100644 index 0000000000..74ad891c99 --- /dev/null +++ b/yt_dlp/minicurses.py @@ -0,0 +1,135 @@ +import os + +from threading import Lock +from .utils import compat_os_name, get_windows_version + + +class MultilinePrinterBase(): + def __enter__(self): + return self + + def __exit__(self, *args): + self.end() + + def print_at_line(self, text, pos): + pass + + def end(self): + pass + + +class MultilinePrinter(MultilinePrinterBase): + + def __init__(self, stream, lines): + """ + @param stream stream to write to + @lines number of lines to be written + """ + self.stream = stream + + is_win10 = compat_os_name == 'nt' and get_windows_version() >= (10, ) + self.CARRIAGE_RETURN = '\r' + if os.getenv('TERM') and self._isatty() or is_win10: + # reason not to use curses https://github.com/yt-dlp/yt-dlp/pull/1036#discussion_r713851492 + # escape sequences for Win10 https://docs.microsoft.com/en-us/windows/console/console-virtual-terminal-sequences + self.UP = '\x1b[A' + self.DOWN = '\n' + self.ERASE_LINE = '\x1b[K' + self._HAVE_FULLCAP = self._isatty() or is_win10 + else: + self.UP = self.DOWN = self.ERASE_LINE = None + self._HAVE_FULLCAP = False + + # lines are numbered from top to bottom, counting from 0 to self.maximum + self.maximum = lines - 1 + self.lastline = 0 + self.lastlength = 0 + + self.movelock = Lock() + + @property + def have_fullcap(self): + """ + True if the TTY is allowing to control cursor, + so that multiline progress works + """ + return self._HAVE_FULLCAP + + def _isatty(self): + try: + return self.stream.isatty() + except BaseException: + return False + + def _move_cursor(self, dest): + current = min(self.lastline, self.maximum) + self.stream.write(self.CARRIAGE_RETURN) + if current == dest: + # current and dest are at same position, no need to move cursor + return + elif current > dest: + # when maximum == 2, + # 0. dest + # 1. + # 2. current + self.stream.write(self.UP * (current - dest)) + elif current < dest: + # when maximum == 2, + # 0. current + # 1. + # 2. dest + self.stream.write(self.DOWN * (dest - current)) + self.lastline = dest + + def print_at_line(self, text, pos): + with self.movelock: + if self.have_fullcap: + self._move_cursor(pos) + self.stream.write(self.ERASE_LINE) + self.stream.write(text) + else: + if self.maximum != 0: + # let user know about which line is updating the status + text = f'{pos + 1}: {text}' + textlen = len(text) + if self.lastline == pos: + # move cursor at the start of progress when writing to same line + self.stream.write(self.CARRIAGE_RETURN) + if self.lastlength > textlen: + text += ' ' * (self.lastlength - textlen) + self.lastlength = textlen + else: + # otherwise, break the line + self.stream.write('\n') + self.lastlength = 0 + self.stream.write(text) + self.lastline = pos + + def end(self): + with self.movelock: + # move cursor to the end of the last line, and write line break + # so that other to_screen calls can precede + self._move_cursor(self.maximum) + self.stream.write('\n') + + +class QuietMultilinePrinter(MultilinePrinterBase): + def __init__(self): + self.have_fullcap = True + + +class BreaklineStatusPrinter(MultilinePrinterBase): + + def __init__(self, stream, lines): + """ + @param stream stream to write to + """ + self.stream = stream + self.maximum = lines + self.have_fullcap = True + + def print_at_line(self, text, pos): + if self.maximum != 0: + # let user know about which line is updating the status + text = f'{pos + 1}: {text}' + self.stream.write(text + '\n') diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index de0213b142..9eb47fccb1 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -6373,3 +6373,11 @@ def traverse_dict(dictn, keys, casesense=True): def variadic(x, allowed_types=(str, bytes)): return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,) + + +def get_windows_version(): + ''' Get Windows version. None if it's not running on Windows ''' + if compat_os_name == 'nt': + return version_tuple(platform.win32_ver()[1]) + else: + return None From a76e2e0f8898c06939b6a123fa863ab8876cfa20 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 22 Sep 2021 19:50:24 +0530 Subject: [PATCH 132/641] [reddit] Workaround for 429 by redirecting to old.reddit.com Closes #1014 --- yt_dlp/extractor/reddit.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/yt_dlp/extractor/reddit.py b/yt_dlp/extractor/reddit.py index 638f2b6a84..8e1463d5be 100644 --- a/yt_dlp/extractor/reddit.py +++ b/yt_dlp/extractor/reddit.py @@ -49,7 +49,7 @@ class RedditIE(InfoExtractor): class RedditRIE(InfoExtractor): - _VALID_URL = r'(?Phttps?://(?:[^/]+\.)?reddit\.com/r/[^/]+/comments/(?P[^/?#&]+))' + _VALID_URL = r'https?://(?:[^/]+\.)?reddit\.com/r/(?P[^/]+/comments/(?P[^/?#&]+))' _TESTS = [{ 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/', 'info_dict': { @@ -97,15 +97,11 @@ class RedditRIE(InfoExtractor): }] def _real_extract(self, url): - mobj = self._match_valid_url(url) - url, video_id = mobj.group('url', 'id') - - video_id = self._match_id(url) + slug, video_id = self._match_valid_url(url).group('slug', 'id') self._set_cookie('reddit.com', '_options', '%7B%22pref_quarantine_optin%22%3A%20true%7D') - data = self._download_json( - url + '/.json', video_id)[0]['data']['children'][0]['data'] + f'https://old.reddit.com/r/{slug}/.json', video_id)[0]['data']['children'][0]['data'] video_url = data['url'] From a21e0ab1a1a03f82517cd8cec4b9a2b4d6b81ac3 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 22 Sep 2021 19:51:40 +0530 Subject: [PATCH 133/641] [ffmpeg] Add `aac_adtstoasc` when merging if needed Related: #1039 --- yt_dlp/YoutubeDL.py | 1 + yt_dlp/postprocessor/ffmpeg.py | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 50e902c53f..d05d856042 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2798,6 +2798,7 @@ class YoutubeDL(object): 'f%s' % f['format_id'], new_info['ext']) if not self._ensure_dir_exists(fname): return + f['filepath'] = fname downloaded.append(fname) partial_success, real_download = self.dl(fname, new_info) info_dict['__real_download'] = info_dict['__real_download'] or real_download diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 25488e58bc..6f274b1967 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -732,7 +732,9 @@ class FFmpegMergerPP(FFmpegPostProcessor): args = ['-c', 'copy'] for (i, fmt) in enumerate(info['requested_formats']): if fmt.get('acodec') != 'none': - args.extend(['-map', '%u:a:0' % (i)]) + args.extend(['-map', f'{i}:a:0']) + if self.get_audio_codec(fmt['filepath']) == 'aac': + args.extend([f'-bsf:{i}:a:0', 'aac_adtstoasc']) if fmt.get('vcodec') != 'none': args.extend(['-map', '%u:v:0' % (i)]) self.to_screen('Merging formats into "%s"' % filename) From 4be9dbdc2413a796a6a88efb69a2a59612be5fae Mon Sep 17 00:00:00 2001 From: nixxo Date: Thu, 23 Sep 2021 08:15:54 +0200 Subject: [PATCH 134/641] [comedycentral] Support `collection-playlist` (#1058) Authored by: nixxo --- yt_dlp/extractor/comedycentral.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/comedycentral.py b/yt_dlp/extractor/comedycentral.py index 1bfa912be4..5a12ab5e69 100644 --- a/yt_dlp/extractor/comedycentral.py +++ b/yt_dlp/extractor/comedycentral.py @@ -4,7 +4,7 @@ from .mtv import MTVServicesInfoExtractor class ComedyCentralIE(MTVServicesInfoExtractor): - _VALID_URL = r'https?://(?:www\.)?cc\.com/(?:episodes|video(?:-clips)?)/(?P[0-9a-z]{6})' + _VALID_URL = r'https?://(?:www\.)?cc\.com/(?:episodes|video(?:-clips)?|collection-playlist)/(?P[0-9a-z]{6})' _FEED_URL = 'http://comedycentral.com/feeds/mrss/' _TESTS = [{ @@ -24,6 +24,9 @@ class ComedyCentralIE(MTVServicesInfoExtractor): }, { 'url': 'https://www.cc.com/video/k3sdvm/the-daily-show-with-jon-stewart-exclusive-the-fourth-estate', 'only_matching': True, + }, { + 'url': 'https://www.cc.com/collection-playlist/cosnej/stand-up-specials/t6vtjb', + 'only_matching': True, }] From 50eff38c1c071e2d389799843530c294d31887ed Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 23 Sep 2021 11:48:49 +0530 Subject: [PATCH 135/641] bugfix for a21e0ab1a1a03f82517cd8cec4b9a2b4d6b81ac3 Closes #1061 --- yt_dlp/postprocessor/ffmpeg.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 6f274b1967..7ea01620ea 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -730,11 +730,13 @@ class FFmpegMergerPP(FFmpegPostProcessor): filename = info['filepath'] temp_filename = prepend_extension(filename, 'temp') args = ['-c', 'copy'] + audio_streams = 0 for (i, fmt) in enumerate(info['requested_formats']): if fmt.get('acodec') != 'none': args.extend(['-map', f'{i}:a:0']) if self.get_audio_codec(fmt['filepath']) == 'aac': - args.extend([f'-bsf:{i}:a:0', 'aac_adtstoasc']) + args.extend([f'-bsf:a:{audio_streams}', 'aac_adtstoasc']) + audio_streams += 1 if fmt.get('vcodec') != 'none': args.extend(['-map', '%u:v:0' % (i)]) self.to_screen('Merging formats into "%s"' % filename) From 388bc4a640561b78a8d38e95253721e7715b22cc Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Thu, 23 Sep 2021 14:30:49 +0530 Subject: [PATCH 136/641] [Hotstar] Add referer for subs (#1062) Authored by: Ashish0804 --- yt_dlp/YoutubeDL.py | 4 +++- yt_dlp/extractor/hotstar.py | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index d05d856042..410186b913 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2594,7 +2594,9 @@ class YoutubeDL(object): return else: try: - self.dl(sub_filename, sub_info.copy(), subtitle=True) + sub_copy = sub_info.copy() + sub_copy.setdefault('http_headers', info_dict.get('http_headers')) + self.dl(sub_filename, sub_copy, subtitle=True) sub_info['filepath'] = sub_filename files_to_move[sub_filename] = sub_filename_final except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err: diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index 8d8a8bd75e..8f0c673034 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -254,6 +254,9 @@ class HotStarIE(HotStarBaseIE): 'season_id': video_data.get('seasonId'), 'episode': title, 'episode_number': int_or_none(video_data.get('episodeNo')), + 'http_headers': { + 'Referer': 'https://www.hotstar.com/in', + } } From bdc196a444303575bd9122b34763decbe16df2d2 Mon Sep 17 00:00:00 2001 From: Glenn Slayden <5589855+glenn-slayden@users.noreply.github.com> Date: Thu, 23 Sep 2021 02:05:01 -0700 Subject: [PATCH 137/641] [cleanup] Fix line endings for `nebula.py` (#1064) :ci skip Authored by: glenn-slayden --- yt_dlp/extractor/nebula.py | 476 ++++++++++++++++++------------------- 1 file changed, 238 insertions(+), 238 deletions(-) diff --git a/yt_dlp/extractor/nebula.py b/yt_dlp/extractor/nebula.py index 4426a8fdc9..9698a358e1 100644 --- a/yt_dlp/extractor/nebula.py +++ b/yt_dlp/extractor/nebula.py @@ -1,238 +1,238 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import json -import time - -from urllib.error import HTTPError -from .common import InfoExtractor -from ..compat import compat_str, compat_urllib_parse_unquote, compat_urllib_parse_quote -from ..utils import ( - ExtractorError, - parse_iso8601, - try_get, - urljoin, -) - - -class NebulaIE(InfoExtractor): - - _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/videos/(?P[-\w]+)' - _TESTS = [ - { - 'url': 'https://nebula.app/videos/that-time-disney-remade-beauty-and-the-beast', - 'md5': 'fe79c4df8b3aa2fea98a93d027465c7e', - 'info_dict': { - 'id': '5c271b40b13fd613090034fd', - 'ext': 'mp4', - 'title': 'That Time Disney Remade Beauty and the Beast', - 'description': 'Note: this video was originally posted on YouTube with the sponsor read included. We weren’t able to remove it without reducing video quality, so it’s presented here in its original context.', - 'upload_date': '20180731', - 'timestamp': 1533009600, - 'channel': 'Lindsay Ellis', - 'uploader': 'Lindsay Ellis', - }, - 'params': { - 'usenetrc': True, - }, - 'skip': 'All Nebula content requires authentication', - }, - { - 'url': 'https://nebula.app/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore', - 'md5': '6d4edd14ce65720fa63aba5c583fb328', - 'info_dict': { - 'id': '5e7e78171aaf320001fbd6be', - 'ext': 'mp4', - 'title': 'Landing Craft - How The Allies Got Ashore', - 'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.', - 'upload_date': '20200327', - 'timestamp': 1585348140, - 'channel': 'The Logistics of D-Day', - 'uploader': 'The Logistics of D-Day', - }, - 'params': { - 'usenetrc': True, - }, - 'skip': 'All Nebula content requires authentication', - }, - { - 'url': 'https://nebula.app/videos/money-episode-1-the-draw', - 'md5': '8c7d272910eea320f6f8e6d3084eecf5', - 'info_dict': { - 'id': '5e779ebdd157bc0001d1c75a', - 'ext': 'mp4', - 'title': 'Episode 1: The Draw', - 'description': r'contains:There’s free money on offer… if the players can all work together.', - 'upload_date': '20200323', - 'timestamp': 1584980400, - 'channel': 'Tom Scott Presents: Money', - 'uploader': 'Tom Scott Presents: Money', - }, - 'params': { - 'usenetrc': True, - }, - 'skip': 'All Nebula content requires authentication', - }, - { - 'url': 'https://watchnebula.com/videos/money-episode-1-the-draw', - 'only_matching': True, - }, - ] - _NETRC_MACHINE = 'watchnebula' - - _nebula_token = None - - def _retrieve_nebula_auth(self): - """ - Log in to Nebula, and returns a Nebula API token - """ - - username, password = self._get_login_info() - if not (username and password): - self.raise_login_required() - - self.report_login() - data = json.dumps({'email': username, 'password': password}).encode('utf8') - response = self._download_json( - 'https://api.watchnebula.com/api/v1/auth/login/', - data=data, fatal=False, video_id=None, - headers={ - 'content-type': 'application/json', - # Submitting the 'sessionid' cookie always causes a 403 on auth endpoint - 'cookie': '' - }, - note='Authenticating to Nebula with supplied credentials', - errnote='Authentication failed or rejected') - if not response or not response.get('key'): - self.raise_login_required() - - # save nebula token as cookie - self._set_cookie( - 'nebula.app', 'nebula-auth', - compat_urllib_parse_quote( - json.dumps({ - "apiToken": response["key"], - "isLoggingIn": False, - "isLoggingOut": False, - }, separators=(",", ":"))), - expire_time=int(time.time()) + 86400 * 365, - ) - - return response['key'] - - def _retrieve_zype_api_key(self, page_url, display_id): - """ - Retrieves the Zype API key - """ - - # Find the js that has the API key from the webpage and download it - webpage = self._download_webpage(page_url, video_id=display_id) - main_script_relpath = self._search_regex( - r']*src="(?P[^"]*main.[0-9a-f]*.chunk.js)"[^>]*>', webpage, - group='script_relpath', name='script relative path', fatal=True) - main_script_abspath = urljoin(page_url, main_script_relpath) - main_script = self._download_webpage(main_script_abspath, video_id=display_id, - note='Retrieving Zype API key') - - api_key = self._search_regex( - r'REACT_APP_ZYPE_API_KEY\s*:\s*"(?P[\w-]*)"', main_script, - group='api_key', name='API key', fatal=True) - - return api_key - - def _call_zype_api(self, path, params, video_id, api_key, note): - """ - A helper for making calls to the Zype API. - """ - query = {'api_key': api_key, 'per_page': 1} - query.update(params) - return self._download_json('https://api.zype.com' + path, video_id, query=query, note=note) - - def _call_nebula_api(self, path, video_id, access_token, note): - """ - A helper for making calls to the Nebula API. - """ - return self._download_json('https://api.watchnebula.com/api/v1' + path, video_id, headers={ - 'Authorization': 'Token {access_token}'.format(access_token=access_token) - }, note=note) - - def _fetch_zype_access_token(self, video_id): - try: - user_object = self._call_nebula_api('/auth/user/', video_id, self._nebula_token, note='Retrieving Zype access token') - except ExtractorError as exc: - # if 401, attempt credential auth and retry - if exc.cause and isinstance(exc.cause, HTTPError) and exc.cause.code == 401: - self._nebula_token = self._retrieve_nebula_auth() - user_object = self._call_nebula_api('/auth/user/', video_id, self._nebula_token, note='Retrieving Zype access token') - else: - raise - - access_token = try_get(user_object, lambda x: x['zype_auth_info']['access_token'], compat_str) - if not access_token: - if try_get(user_object, lambda x: x['is_subscribed'], bool): - # TODO: Reimplement the same Zype token polling the Nebula frontend implements - # see https://github.com/ytdl-org/youtube-dl/pull/24805#issuecomment-749231532 - raise ExtractorError( - 'Unable to extract Zype access token from Nebula API authentication endpoint. ' - 'Open an arbitrary video in a browser with this account to generate a token', - expected=True) - raise ExtractorError('Unable to extract Zype access token from Nebula API authentication endpoint') - return access_token - - def _extract_channel_title(self, video_meta): - # TODO: Implement the API calls giving us the channel list, - # so that we can do the title lookup and then figure out the channel URL - categories = video_meta.get('categories', []) if video_meta else [] - # the channel name is the value of the first category - for category in categories: - if category.get('value'): - return category['value'][0] - - def _real_initialize(self): - # check cookie jar for valid token - nebula_cookies = self._get_cookies('https://nebula.app') - nebula_cookie = nebula_cookies.get('nebula-auth') - if nebula_cookie: - self.to_screen('Authenticating to Nebula with token from cookie jar') - nebula_cookie_value = compat_urllib_parse_unquote(nebula_cookie.value) - self._nebula_token = self._parse_json(nebula_cookie_value, None).get('apiToken') - - # try to authenticate using credentials if no valid token has been found - if not self._nebula_token: - self._nebula_token = self._retrieve_nebula_auth() - - def _real_extract(self, url): - display_id = self._match_id(url) - api_key = self._retrieve_zype_api_key(url, display_id) - - response = self._call_zype_api('/videos', {'friendly_title': display_id}, - display_id, api_key, note='Retrieving metadata from Zype') - if len(response.get('response') or []) != 1: - raise ExtractorError('Unable to find video on Zype API') - video_meta = response['response'][0] - - video_id = video_meta['_id'] - zype_access_token = self._fetch_zype_access_token(display_id) - - channel_title = self._extract_channel_title(video_meta) - - return { - 'id': video_id, - 'display_id': display_id, - '_type': 'url_transparent', - 'ie_key': 'Zype', - 'url': 'https://player.zype.com/embed/%s.html?access_token=%s' % (video_id, zype_access_token), - 'title': video_meta.get('title'), - 'description': video_meta.get('description'), - 'timestamp': parse_iso8601(video_meta.get('published_at')), - 'thumbnails': [{ - 'id': tn.get('name'), # this appears to be null - 'url': tn['url'], - 'width': tn.get('width'), - 'height': tn.get('height'), - } for tn in video_meta.get('thumbnails', [])], - 'duration': video_meta.get('duration'), - 'channel': channel_title, - 'uploader': channel_title, # we chose uploader = channel name - # TODO: uploader_url, channel_id, channel_url - } +# coding: utf-8 +from __future__ import unicode_literals + +import json +import time + +from urllib.error import HTTPError +from .common import InfoExtractor +from ..compat import compat_str, compat_urllib_parse_unquote, compat_urllib_parse_quote +from ..utils import ( + ExtractorError, + parse_iso8601, + try_get, + urljoin, +) + + +class NebulaIE(InfoExtractor): + + _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/videos/(?P[-\w]+)' + _TESTS = [ + { + 'url': 'https://nebula.app/videos/that-time-disney-remade-beauty-and-the-beast', + 'md5': 'fe79c4df8b3aa2fea98a93d027465c7e', + 'info_dict': { + 'id': '5c271b40b13fd613090034fd', + 'ext': 'mp4', + 'title': 'That Time Disney Remade Beauty and the Beast', + 'description': 'Note: this video was originally posted on YouTube with the sponsor read included. We weren’t able to remove it without reducing video quality, so it’s presented here in its original context.', + 'upload_date': '20180731', + 'timestamp': 1533009600, + 'channel': 'Lindsay Ellis', + 'uploader': 'Lindsay Ellis', + }, + 'params': { + 'usenetrc': True, + }, + 'skip': 'All Nebula content requires authentication', + }, + { + 'url': 'https://nebula.app/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore', + 'md5': '6d4edd14ce65720fa63aba5c583fb328', + 'info_dict': { + 'id': '5e7e78171aaf320001fbd6be', + 'ext': 'mp4', + 'title': 'Landing Craft - How The Allies Got Ashore', + 'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.', + 'upload_date': '20200327', + 'timestamp': 1585348140, + 'channel': 'The Logistics of D-Day', + 'uploader': 'The Logistics of D-Day', + }, + 'params': { + 'usenetrc': True, + }, + 'skip': 'All Nebula content requires authentication', + }, + { + 'url': 'https://nebula.app/videos/money-episode-1-the-draw', + 'md5': '8c7d272910eea320f6f8e6d3084eecf5', + 'info_dict': { + 'id': '5e779ebdd157bc0001d1c75a', + 'ext': 'mp4', + 'title': 'Episode 1: The Draw', + 'description': r'contains:There’s free money on offer… if the players can all work together.', + 'upload_date': '20200323', + 'timestamp': 1584980400, + 'channel': 'Tom Scott Presents: Money', + 'uploader': 'Tom Scott Presents: Money', + }, + 'params': { + 'usenetrc': True, + }, + 'skip': 'All Nebula content requires authentication', + }, + { + 'url': 'https://watchnebula.com/videos/money-episode-1-the-draw', + 'only_matching': True, + }, + ] + _NETRC_MACHINE = 'watchnebula' + + _nebula_token = None + + def _retrieve_nebula_auth(self): + """ + Log in to Nebula, and returns a Nebula API token + """ + + username, password = self._get_login_info() + if not (username and password): + self.raise_login_required() + + self.report_login() + data = json.dumps({'email': username, 'password': password}).encode('utf8') + response = self._download_json( + 'https://api.watchnebula.com/api/v1/auth/login/', + data=data, fatal=False, video_id=None, + headers={ + 'content-type': 'application/json', + # Submitting the 'sessionid' cookie always causes a 403 on auth endpoint + 'cookie': '' + }, + note='Authenticating to Nebula with supplied credentials', + errnote='Authentication failed or rejected') + if not response or not response.get('key'): + self.raise_login_required() + + # save nebula token as cookie + self._set_cookie( + 'nebula.app', 'nebula-auth', + compat_urllib_parse_quote( + json.dumps({ + "apiToken": response["key"], + "isLoggingIn": False, + "isLoggingOut": False, + }, separators=(",", ":"))), + expire_time=int(time.time()) + 86400 * 365, + ) + + return response['key'] + + def _retrieve_zype_api_key(self, page_url, display_id): + """ + Retrieves the Zype API key + """ + + # Find the js that has the API key from the webpage and download it + webpage = self._download_webpage(page_url, video_id=display_id) + main_script_relpath = self._search_regex( + r']*src="(?P[^"]*main.[0-9a-f]*.chunk.js)"[^>]*>', webpage, + group='script_relpath', name='script relative path', fatal=True) + main_script_abspath = urljoin(page_url, main_script_relpath) + main_script = self._download_webpage(main_script_abspath, video_id=display_id, + note='Retrieving Zype API key') + + api_key = self._search_regex( + r'REACT_APP_ZYPE_API_KEY\s*:\s*"(?P[\w-]*)"', main_script, + group='api_key', name='API key', fatal=True) + + return api_key + + def _call_zype_api(self, path, params, video_id, api_key, note): + """ + A helper for making calls to the Zype API. + """ + query = {'api_key': api_key, 'per_page': 1} + query.update(params) + return self._download_json('https://api.zype.com' + path, video_id, query=query, note=note) + + def _call_nebula_api(self, path, video_id, access_token, note): + """ + A helper for making calls to the Nebula API. + """ + return self._download_json('https://api.watchnebula.com/api/v1' + path, video_id, headers={ + 'Authorization': 'Token {access_token}'.format(access_token=access_token) + }, note=note) + + def _fetch_zype_access_token(self, video_id): + try: + user_object = self._call_nebula_api('/auth/user/', video_id, self._nebula_token, note='Retrieving Zype access token') + except ExtractorError as exc: + # if 401, attempt credential auth and retry + if exc.cause and isinstance(exc.cause, HTTPError) and exc.cause.code == 401: + self._nebula_token = self._retrieve_nebula_auth() + user_object = self._call_nebula_api('/auth/user/', video_id, self._nebula_token, note='Retrieving Zype access token') + else: + raise + + access_token = try_get(user_object, lambda x: x['zype_auth_info']['access_token'], compat_str) + if not access_token: + if try_get(user_object, lambda x: x['is_subscribed'], bool): + # TODO: Reimplement the same Zype token polling the Nebula frontend implements + # see https://github.com/ytdl-org/youtube-dl/pull/24805#issuecomment-749231532 + raise ExtractorError( + 'Unable to extract Zype access token from Nebula API authentication endpoint. ' + 'Open an arbitrary video in a browser with this account to generate a token', + expected=True) + raise ExtractorError('Unable to extract Zype access token from Nebula API authentication endpoint') + return access_token + + def _extract_channel_title(self, video_meta): + # TODO: Implement the API calls giving us the channel list, + # so that we can do the title lookup and then figure out the channel URL + categories = video_meta.get('categories', []) if video_meta else [] + # the channel name is the value of the first category + for category in categories: + if category.get('value'): + return category['value'][0] + + def _real_initialize(self): + # check cookie jar for valid token + nebula_cookies = self._get_cookies('https://nebula.app') + nebula_cookie = nebula_cookies.get('nebula-auth') + if nebula_cookie: + self.to_screen('Authenticating to Nebula with token from cookie jar') + nebula_cookie_value = compat_urllib_parse_unquote(nebula_cookie.value) + self._nebula_token = self._parse_json(nebula_cookie_value, None).get('apiToken') + + # try to authenticate using credentials if no valid token has been found + if not self._nebula_token: + self._nebula_token = self._retrieve_nebula_auth() + + def _real_extract(self, url): + display_id = self._match_id(url) + api_key = self._retrieve_zype_api_key(url, display_id) + + response = self._call_zype_api('/videos', {'friendly_title': display_id}, + display_id, api_key, note='Retrieving metadata from Zype') + if len(response.get('response') or []) != 1: + raise ExtractorError('Unable to find video on Zype API') + video_meta = response['response'][0] + + video_id = video_meta['_id'] + zype_access_token = self._fetch_zype_access_token(display_id) + + channel_title = self._extract_channel_title(video_meta) + + return { + 'id': video_id, + 'display_id': display_id, + '_type': 'url_transparent', + 'ie_key': 'Zype', + 'url': 'https://player.zype.com/embed/%s.html?access_token=%s' % (video_id, zype_access_token), + 'title': video_meta.get('title'), + 'description': video_meta.get('description'), + 'timestamp': parse_iso8601(video_meta.get('published_at')), + 'thumbnails': [{ + 'id': tn.get('name'), # this appears to be null + 'url': tn['url'], + 'width': tn.get('width'), + 'height': tn.get('height'), + } for tn in video_meta.get('thumbnails', [])], + 'duration': video_meta.get('duration'), + 'channel': channel_title, + 'uploader': channel_title, # we chose uploader = channel name + # TODO: uploader_url, channel_id, channel_url + } From ee2b3563f367bf6644f58e16212262af8a664f6f Mon Sep 17 00:00:00 2001 From: The Hatsune Daishi Date: Thu, 23 Sep 2021 18:06:48 +0900 Subject: [PATCH 138/641] [downloader/niconico] Pass custom headers (#1063) Closes #1057 Authored by: nao20010128nao --- yt_dlp/downloader/niconico.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/yt_dlp/downloader/niconico.py b/yt_dlp/downloader/niconico.py index 256840d689..521dfece31 100644 --- a/yt_dlp/downloader/niconico.py +++ b/yt_dlp/downloader/niconico.py @@ -6,7 +6,7 @@ import threading from .common import FileDownloader from ..downloader import get_suitable_downloader from ..extractor.niconico import NiconicoIE -from ..compat import compat_urllib_request +from ..utils import sanitized_Request class NiconicoDmcFD(FileDownloader): @@ -29,9 +29,11 @@ class NiconicoDmcFD(FileDownloader): heartbeat_data = heartbeat_info_dict['data'].encode() heartbeat_interval = heartbeat_info_dict.get('interval', 30) + request = sanitized_Request(heartbeat_url, heartbeat_data) + def heartbeat(): try: - compat_urllib_request.urlopen(url=heartbeat_url, data=heartbeat_data) + self.ydl.urlopen(request).read() except Exception: self.to_screen('[%s] Heartbeat failed' % self.FD_NAME) From 49fa4d9af725d006e4722b0e169afe99290232d7 Mon Sep 17 00:00:00 2001 From: NeroBurner Date: Thu, 23 Sep 2021 19:40:51 +0200 Subject: [PATCH 139/641] [atv.at] Use jwt for API (#1012) The jwt token is implemented according to RFC7519 Closes #988 Authored by: NeroBurner --- yt_dlp/extractor/atvat.py | 118 ++++++++++++++++++++++++-------------- yt_dlp/utils.py | 33 +++++++++-- 2 files changed, 102 insertions(+), 49 deletions(-) diff --git a/yt_dlp/extractor/atvat.py b/yt_dlp/extractor/atvat.py index bfcf88f1af..7c30cfcbb9 100644 --- a/yt_dlp/extractor/atvat.py +++ b/yt_dlp/extractor/atvat.py @@ -1,74 +1,106 @@ # coding: utf-8 from __future__ import unicode_literals +import datetime + from .common import InfoExtractor from ..utils import ( - determine_ext, - dict_get, - int_or_none, - unescapeHTML, + float_or_none, + jwt_encode_hs256, + try_get, ) class ATVAtIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?atv\.at/(?:[^/]+/){2}(?P[dv]\d+)' + _VALID_URL = r'https?://(?:www\.)?atv\.at/tv/(?:[^/]+/){2,3}(?P.*)' + _TESTS = [{ - 'url': 'https://www.atv.at/bauer-sucht-frau-die-zweite-chance/folge-1/d3390693/', - 'md5': 'c471605591009dfb6e6c54f7e62e2807', + 'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/bauer-sucht-frau/bauer-sucht-frau-staffel-18-folge-3-die-hofwochen', + 'md5': '3c3b4aaca9f63e32b35e04a9c2515903', 'info_dict': { - 'id': '3390684', + 'id': 'v-ce9cgn1e70n5-1', 'ext': 'mp4', - 'title': 'Bauer sucht Frau - Die zweite Chance Folge 1', + 'title': 'Bauer sucht Frau - Staffel 18 Folge 3 - Die Hofwochen', } }, { - 'url': 'https://www.atv.at/bauer-sucht-frau-staffel-17/fuenfte-eventfolge/d3339537/', + 'url': 'https://www.atv.at/tv/bauer-sucht-frau/staffel-18/episode-01/bauer-sucht-frau-staffel-18-vorstellungsfolge-1', 'only_matching': True, }] - def _process_source_entry(self, source, part_id): - source_url = source.get('url') - if not source_url: - return - if determine_ext(source_url) == 'm3u8': - return self._extract_m3u8_formats( - source_url, part_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False) - else: - return [{ - 'url': source_url, - }] + # extracted from bootstrap.js function (search for e.encryption_key and use your browser's debugger) + _ACCESS_ID = 'x_atv' + _ENCRYPTION_KEY = 'Hohnaekeishoogh2omaeghooquooshia' - def _process_entry(self, entry): - part_id = entry.get('id') - if not part_id: - return + def _extract_video_info(self, url, content, video): + clip_id = content.get('splitId', content['id']) formats = [] - for source in entry.get('sources', []): - formats.extend(self._process_source_entry(source, part_id) or []) - + clip_urls = video['urls'] + for protocol, variant in clip_urls.items(): + source_url = try_get(variant, lambda x: x['clear']['url']) + if not source_url: + continue + if protocol == 'dash': + formats.extend(self._extract_mpd_formats( + source_url, clip_id, mpd_id=protocol, fatal=False)) + elif protocol == 'hls': + formats.extend(self._extract_m3u8_formats( + source_url, clip_id, 'mp4', 'm3u8_native', + m3u8_id=protocol, fatal=False)) + else: + formats.append({ + 'url': source_url, + 'format_id': protocol, + }) self._sort_formats(formats) + return { - 'id': part_id, - 'title': entry.get('title'), - 'duration': int_or_none(entry.get('duration')), - 'formats': formats + 'id': clip_id, + 'title': content.get('title'), + 'duration': float_or_none(content.get('duration')), + 'series': content.get('tvShowTitle'), + 'formats': formats, } def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - video_data = self._parse_json(unescapeHTML(self._search_regex( - r'var\splaylist\s*=\s*(?P\[.*\]);', - webpage, 'player data', group='json')), - display_id) + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + json_data = self._parse_json( + self._search_regex(r'', webpage, 'json_data'), + video_id=video_id) - first_video = video_data[0] - video_id = first_video['id'] - video_title = dict_get(first_video, ('tvShowTitle', 'title')) + video_title = json_data['views']['default']['page']['title'] + contentResource = json_data['views']['default']['page']['contentResource'] + content_id = contentResource[0]['id'] + content_ids = [{'id': id, 'subclip_start': content['start'], 'subclip_end': content['end']} + for id, content in enumerate(contentResource)] + + time_of_request = datetime.datetime.now() + not_before = time_of_request - datetime.timedelta(minutes=5) + expire = time_of_request + datetime.timedelta(minutes=5) + payload = { + 'content_ids': { + content_id: content_ids, + }, + 'secure_delivery': True, + 'iat': int(time_of_request.timestamp()), + 'nbf': int(not_before.timestamp()), + 'exp': int(expire.timestamp()), + } + jwt_token = jwt_encode_hs256(payload, self._ENCRYPTION_KEY, headers={'kid': self._ACCESS_ID}) + videos = self._download_json( + 'https://vas-v4.p7s1video.net/4.0/getsources', + content_id, 'Downloading videos JSON', query={ + 'token': jwt_token.decode('utf-8') + }) + + video_id, videos_data = list(videos['data'].items())[0] + entries = [ + self._extract_video_info(url, contentResource[video['id']], video) + for video in videos_data] return { '_type': 'multi_video', 'id': video_id, 'title': video_title, - 'entries': (self._process_entry(entry) for entry in video_data), + 'entries': entries, } diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 9eb47fccb1..141d2c9ccd 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -16,6 +16,8 @@ import email.header import errno import functools import gzip +import hashlib +import hmac import imp import io import itertools @@ -3290,6 +3292,14 @@ def platform_name(): return res +def get_windows_version(): + ''' Get Windows version. None if it's not running on Windows ''' + if compat_os_name == 'nt': + return version_tuple(platform.win32_ver()[1]) + else: + return None + + def _windows_write_string(s, out): """ Returns True if the string was written using special methods, False if it has yet to be written out.""" @@ -6375,9 +6385,20 @@ def variadic(x, allowed_types=(str, bytes)): return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,) -def get_windows_version(): - ''' Get Windows version. None if it's not running on Windows ''' - if compat_os_name == 'nt': - return version_tuple(platform.win32_ver()[1]) - else: - return None +# create a JSON Web Signature (jws) with HS256 algorithm +# the resulting format is in JWS Compact Serialization +# implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html +# implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html +def jwt_encode_hs256(payload_data, key, headers={}): + header_data = { + 'alg': 'HS256', + 'typ': 'JWT', + } + if headers: + header_data.update(headers) + header_b64 = base64.b64encode(json.dumps(header_data).encode('utf-8')) + payload_b64 = base64.b64encode(json.dumps(payload_data).encode('utf-8')) + h = hmac.new(key.encode('utf-8'), header_b64 + b'.' + payload_b64, hashlib.sha256) + signature_b64 = base64.b64encode(h.digest()) + token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64 + return token From d1a7768432247816f699e73e3cbba19138d1e088 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Thu, 23 Sep 2021 23:31:55 +0530 Subject: [PATCH 140/641] [Chingari] Add extractors (#1038) Authored by: Ashish0804 --- yt_dlp/extractor/chingari.py | 209 +++++++++++++++++++++++++++++++++ yt_dlp/extractor/extractors.py | 4 + 2 files changed, 213 insertions(+) create mode 100644 yt_dlp/extractor/chingari.py diff --git a/yt_dlp/extractor/chingari.py b/yt_dlp/extractor/chingari.py new file mode 100644 index 0000000000..6bdc4f6bbb --- /dev/null +++ b/yt_dlp/extractor/chingari.py @@ -0,0 +1,209 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import itertools +import json + +from .common import InfoExtractor +from ..compat import compat_urllib_parse_unquote_plus +from ..utils import ( + clean_html, + ExtractorError, + int_or_none, + str_to_int, + url_or_none, +) + + +class ChingariBaseIE(InfoExtractor): + def _get_post(self, id, post_data): + media_data = post_data['mediaLocation'] + base_url = media_data['base'] + author_data = post_data.get('authorData', {}) + song_data = post_data.get('song', {}) # revist this in future for differentiating b/w 'art' and 'author' + + formats = [{ + 'format_id': frmt, + 'width': str_to_int(frmt[1:]), + 'url': base_url + frmt_path, + } for frmt, frmt_path in media_data.get('transcoded', {}).items()] + + if media_data.get('path'): + formats.append({ + 'format_id': 'original', + 'format_note': 'Direct video.', + 'url': base_url + '/apipublic' + media_data['path'], + 'quality': 10, + }) + self._sort_formats(formats) + timestamp = str_to_int(post_data.get('created_at')) + if timestamp: + timestamp = int_or_none(timestamp, 1000) + + thumbnail, uploader_url = None, None + if media_data.get('thumbnail'): + thumbnail = base_url + media_data.get('thumbnail') + if author_data.get('username'): + uploader_url = 'https://chingari.io/' + author_data.get('username') + + return { + 'id': id, + 'title': compat_urllib_parse_unquote_plus(clean_html(post_data.get('caption'))), + 'description': compat_urllib_parse_unquote_plus(clean_html(post_data.get('caption'))), + 'duration': media_data.get('duration'), + 'thumbnail': url_or_none(thumbnail), + 'like_count': post_data.get('likeCount'), + 'view_count': post_data.get('viewsCount'), + 'comment_count': post_data.get('commentCount'), + 'repost_count': post_data.get('shareCount'), + 'timestamp': timestamp, + 'uploader_id': post_data.get('userId') or author_data.get('_id'), + 'uploader': author_data.get('name'), + 'uploader_url': url_or_none(uploader_url), + 'track': song_data.get('title'), + 'artist': song_data.get('author'), + 'formats': formats, + } + + +class ChingariIE(ChingariBaseIE): + _VALID_URL = r'(?:https?://)(?:www\.)?chingari\.io/share/post\?id=(?P[^&/#?]+)' + _TESTS = [{ + 'url': 'https://chingari.io/share/post?id=612f8f4ce1dc57090e8a7beb', + 'info_dict': { + 'id': '612f8f4ce1dc57090e8a7beb', + 'ext': 'mp4', + 'title': 'Happy birthday Srila Prabhupada', + 'description': 'md5:c7080ebfdfeb06016e638c286d6bc3fa', + 'duration': 0, + 'thumbnail': 'https://media.chingari.io/uploads/c41d30e2-06b6-4e3b-9b4b-edbb929cec06-1630506826911/thumbnail/198f993f-ce87-4623-82c6-cd071bd6d4f4-1630506828016.jpg', + 'like_count': int, + 'view_count': int, + 'comment_count': int, + 'repost_count': int, + 'timestamp': 1630506828, + 'upload_date': '20210901', + 'uploader_id': '5f0403982c8bd344f4813f8c', + 'uploader': 'ISKCON,Inc.', + 'uploader_url': 'https://chingari.io/iskcon,inc', + 'track': None, + 'artist': None, + }, + 'params': {'skip_download': True} + }] + + def _real_extract(self, url): + id = self._match_id(url) + post_json = self._download_json(f'https://api.chingari.io/post/post_details/{id}', id) + if post_json['code'] != 200: + raise ExtractorError(post_json['message'], expected=True) + post_data = post_json['data'] + return self._get_post(id, post_data) + + +class ChingariUserIE(ChingariBaseIE): + _VALID_URL = r'(?:https?://)(?:www\.)?chingari\.io/(?!share/post)(?P[^/?]+)' + _TESTS = [{ + 'url': 'https://chingari.io/dada1023', + 'playlist_mincount': 3, + 'info_dict': { + 'id': 'dada1023', + }, + 'entries': [{ + 'url': 'https://chingari.io/share/post?id=614781f3ade60b3a0bfff42a', + 'info_dict': { + 'id': '614781f3ade60b3a0bfff42a', + 'ext': 'mp4', + 'title': '#chingaribappa ', + 'description': 'md5:d1df21d84088770468fa63afe3b17857', + 'duration': 7, + 'thumbnail': 'https://media.chingari.io/uploads/346d86d4-abb2-474e-a164-ffccf2bbcb72-1632076273717/thumbnail/b0b3aac2-2b86-4dd1-909d-9ed6e57cf77c-1632076275552.jpg', + 'like_count': int, + 'view_count': int, + 'comment_count': int, + 'repost_count': int, + 'timestamp': 1632076275, + 'upload_date': '20210919', + 'uploader_id': '5efc4b12cca35c3d1794c2d3', + 'uploader': 'dada (girish) dhawale', + 'uploader_url': 'https://chingari.io/dada1023', + 'track': None, + 'artist': None + }, + 'params': {'skip_download': True} + }, { + 'url': 'https://chingari.io/share/post?id=6146b132bcbf860959e12cba', + 'info_dict': { + 'id': '6146b132bcbf860959e12cba', + 'ext': 'mp4', + 'title': 'Tactor harvesting', + 'description': 'md5:8403f12dce68828b77ecee7eb7e887b7', + 'duration': 59.3, + 'thumbnail': 'https://media.chingari.io/uploads/b353ca70-7a87-400d-93a6-fa561afaec86-1632022814584/thumbnail/c09302e3-2043-41b1-a2fe-77d97e5bd676-1632022834260.jpg', + 'like_count': int, + 'view_count': int, + 'comment_count': int, + 'repost_count': int, + 'timestamp': 1632022834, + 'upload_date': '20210919', + 'uploader_id': '5efc4b12cca35c3d1794c2d3', + 'uploader': 'dada (girish) dhawale', + 'uploader_url': 'https://chingari.io/dada1023', + 'track': None, + 'artist': None + }, + 'params': {'skip_download': True} + }, { + 'url': 'https://chingari.io/share/post?id=6145651b74cb030a64c40b82', + 'info_dict': { + 'id': '6145651b74cb030a64c40b82', + 'ext': 'mp4', + 'title': '#odiabhajan ', + 'description': 'md5:687ea36835b9276cf2af90f25e7654cb', + 'duration': 56.67, + 'thumbnail': 'https://media.chingari.io/uploads/6cbf216b-babc-4cce-87fe-ceaac8d706ac-1631937782708/thumbnail/8855754f-6669-48ce-b269-8cc0699ed6da-1631937819522.jpg', + 'like_count': int, + 'view_count': int, + 'comment_count': int, + 'repost_count': int, + 'timestamp': 1631937819, + 'upload_date': '20210918', + 'uploader_id': '5efc4b12cca35c3d1794c2d3', + 'uploader': 'dada (girish) dhawale', + 'uploader_url': 'https://chingari.io/dada1023', + 'track': None, + 'artist': None + }, + 'params': {'skip_download': True} + }], + }, { + 'url': 'https://chingari.io/iskcon%2Cinc', + 'playlist_mincount': 1025, + 'info_dict': { + 'id': 'iskcon%2Cinc', + }, + }] + + def _entries(self, id): + skip = 0 + has_more = True + for page in itertools.count(): + posts = self._download_json('https://api.chingari.io/users/getPosts', id, + data=json.dumps({'userId': id, 'ownerId': id, 'skip': skip, 'limit': 20}).encode(), + headers={'content-type': 'application/json;charset=UTF-8'}, + note='Downloading page %s' % page) + for post in posts.get('data', []): + post_data = post['post'] + yield self._get_post(post_data['_id'], post_data) + skip += 20 + has_more = posts['hasMoreData'] + if not has_more: + break + + def _real_extract(self, url): + alt_id = self._match_id(url) + post_json = self._download_json(f'https://api.chingari.io/user/{alt_id}', alt_id) + if post_json['code'] != 200: + raise ExtractorError(post_json['message'], expected=True) + id = post_json['data']['_id'] + return self.playlist_result(self._entries(id), playlist_id=alt_id) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 468fefbf14..eb9cff6c25 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -244,6 +244,10 @@ from .channel9 import Channel9IE from .charlierose import CharlieRoseIE from .chaturbate import ChaturbateIE from .chilloutzone import ChilloutzoneIE +from .chingari import ( + ChingariIE, + ChingariUserIE, +) from .chirbit import ( ChirbitIE, ChirbitProfileIE, From 9ada988bfcac44e22129606b8bb6467bccedb202 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Thu, 23 Sep 2021 23:45:17 +0530 Subject: [PATCH 141/641] [Koo] Add extractor (#1044) Authored by: Ashish0804 --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/koo.py | 116 +++++++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+) create mode 100644 yt_dlp/extractor/koo.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index eb9cff6c25..7be6eec1f7 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -632,6 +632,7 @@ from .kickstarter import KickStarterIE from .kinja import KinjaEmbedIE from .kinopoisk import KinoPoiskIE from .konserthusetplay import KonserthusetPlayIE +from .koo import KooIE from .krasview import KrasViewIE from .ku6 import Ku6IE from .kusi import KUSIIE diff --git a/yt_dlp/extractor/koo.py b/yt_dlp/extractor/koo.py new file mode 100644 index 0000000000..1706b28a0d --- /dev/null +++ b/yt_dlp/extractor/koo.py @@ -0,0 +1,116 @@ +# coding: utf-8 +from __future__ import unicode_literals +from .common import InfoExtractor +from ..utils import ( + clean_html, + try_get, +) + + +class KooIE(InfoExtractor): + _VALID_URL = r'(?:https?://)(?:www\.)?kooapp\.com/koo/[^/]+/(?P[^/&#$?]+)' + _TESTS = [{ # Test for video in the comments + 'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/946c4189-bc2d-4524-b95b-43f641e2adde', + 'info_dict': { + 'id': '946c4189-bc2d-4524-b95b-43f641e2adde', + 'ext': 'mp4', + 'title': 'test for video in comment', + 'description': 'md5:daa77dc214add4da8b6ea7d2226776e7', + 'timestamp': 1632215195, + 'uploader_id': 'ytdlpTestAccount', + 'uploader': 'yt-dlpTestAccount', + 'duration': 7000, + 'upload_date': '20210921' + }, + 'params': {'skip_download': True} + }, { # Test for koo with long title + 'url': 'https://www.kooapp.com/koo/laxman_kumarDBFEC/33decbf7-5e1e-4bb8-bfd7-04744a064361', + 'info_dict': { + 'id': '33decbf7-5e1e-4bb8-bfd7-04744a064361', + 'ext': 'mp4', + 'title': 'md5:47a71c2337295330c5a19a8af1bbf450', + 'description': 'md5:06a6a84e9321499486dab541693d8425', + 'timestamp': 1632106884, + 'uploader_id': 'laxman_kumarDBFEC', + 'uploader': 'Laxman Kumar 🇮🇳', + 'duration': 46000, + 'upload_date': '20210920' + }, + 'params': {'skip_download': True} + }, { # Test for audio + 'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/a2a9c88e-ce4b-4d2d-952f-d06361c5b602', + 'info_dict': { + 'id': 'a2a9c88e-ce4b-4d2d-952f-d06361c5b602', + 'ext': 'mp4', + 'title': 'Test for audio', + 'description': 'md5:ecb9a2b6a5d34b736cecb53788cb11e8', + 'timestamp': 1632211634, + 'uploader_id': 'ytdlpTestAccount', + 'uploader': 'yt-dlpTestAccount', + 'duration': 214000, + 'upload_date': '20210921' + }, + 'params': {'skip_download': True} + }, { # Test for video + 'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/a3e56c53-c1ed-4ac9-ac02-ed1630e6b1d1', + 'info_dict': { + 'id': 'a3e56c53-c1ed-4ac9-ac02-ed1630e6b1d1', + 'ext': 'mp4', + 'title': 'Test for video', + 'description': 'md5:7afc4eb839074ddeb2beea5dd6fe9500', + 'timestamp': 1632211468, + 'uploader_id': 'ytdlpTestAccount', + 'uploader': 'yt-dlpTestAccount', + 'duration': 14000, + 'upload_date': '20210921' + }, + 'params': {'skip_download': True} + }, { # Test for link + 'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/01bf5b94-81a5-4d8e-a387-5f732022e15a', + 'skip': 'No video/audio found at the provided url.', + 'info_dict': { + 'id': '01bf5b94-81a5-4d8e-a387-5f732022e15a', + 'title': 'Test for link', + 'ext': 'none', + }, + }, { # Test for images + 'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/dc05d9cd-a61d-45fd-bb07-e8019d8ca8cb', + 'skip': 'No video/audio found at the provided url.', + 'info_dict': { + 'id': 'dc05d9cd-a61d-45fd-bb07-e8019d8ca8cb', + 'title': 'Test for images', + 'ext': 'none', + }, + }] + + def _real_extract(self, url): + id = self._match_id(url) + data_json = self._download_json(f'https://www.kooapp.com/apiV1/ku/{id}?limit=20&offset=0&showSimilarKoos=true', id)['parentContent'] + item_json = next(content['items'][0] for content in data_json + if try_get(content, lambda x: x['items'][0]['id']) == id) + media_json = item_json['mediaMap'] + formats = [] + + mp4_url = media_json.get('videoMp4') + video_m3u8_url = media_json.get('videoHls') + if mp4_url: + formats.append({ + 'url': mp4_url, + 'ext': 'mp4', + }) + if video_m3u8_url: + formats.extend(self._extract_m3u8_formats(video_m3u8_url, id, fatal=False, ext='mp4')) + if not formats: + self.raise_no_formats('No video/audio found at the provided url.', expected=True) + + self._sort_formats(formats) + return { + 'id': id, + 'title': clean_html(item_json.get('title')), + 'description': f'{clean_html(item_json.get("title"))}\n\n{clean_html(item_json.get("enTransliteration"))}', + 'timestamp': item_json.get('createdAt'), + 'uploader_id': item_json.get('handle'), + 'uploader': item_json.get('name'), + 'duration': media_json.get('duration'), + 'formats': formats, + } From 8100c77223d100e91fdc427e28dc39fc9753944e Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 23 Sep 2021 17:11:43 +0530 Subject: [PATCH 142/641] [lbry] Show error message from API response --- yt_dlp/extractor/lbry.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/lbry.py b/yt_dlp/extractor/lbry.py index 4289c51b81..0f87bf1d7f 100644 --- a/yt_dlp/extractor/lbry.py +++ b/yt_dlp/extractor/lbry.py @@ -28,14 +28,19 @@ class LBRYBaseIE(InfoExtractor): _SUPPORTED_STREAM_TYPES = ['video', 'audio'] def _call_api_proxy(self, method, display_id, params, resource): - return self._download_json( + response = self._download_json( 'https://api.lbry.tv/api/v1/proxy', display_id, 'Downloading %s JSON metadata' % resource, headers={'Content-Type': 'application/json-rpc'}, data=json.dumps({ 'method': method, 'params': params, - }).encode())['result'] + }).encode()) + err = response.get('error') + if err: + raise ExtractorError( + f'{self.IE_NAME} said: {err.get("code")} - {err.get("message")}', expected=True) + return response['result'] def _resolve_url(self, url, display_id, resource): return self._call_api_proxy( From 1813a6ccd4928f81ca5f4c0144c0008f404d67dd Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 24 Sep 2021 02:16:03 +0530 Subject: [PATCH 143/641] [youtube] Fix `--mark-watched` with `--cookies-from-browser` Closes #1019 --- yt_dlp/extractor/common.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index ae03c1bab3..4797e8e2d6 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -3503,9 +3503,11 @@ class InfoExtractor(object): raise NotImplementedError('This method must be implemented by subclasses') def mark_watched(self, *args, **kwargs): - if (self.get_param('mark_watched', False) - and (self._get_login_info()[0] is not None - or self.get_param('cookiefile') is not None)): + if not self.get_param('mark_watched', False): + return + if (self._get_login_info()[0] is not None + or self.get_param('cookiefile') + or self.get_param('cookiesfrombrowser')): self._mark_watched(*args, **kwargs) def _mark_watched(self, *args, **kwargs): From be867b03f56b53892d55e573502713b20b88bec1 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 24 Sep 2021 03:35:29 +0530 Subject: [PATCH 144/641] bugfix for bd50a52b0d7247cdbf205eb851ce33ae4b89c516 --- yt_dlp/downloader/common.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index 53e83d2c3f..bb0614037a 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -264,8 +264,7 @@ class FileDownloader(object): self._multiline.print_at_line(fullmsg, progress_line) else: if compat_os_name == 'nt' or not sys.stderr.isatty(): - prev_len = getattr(self, '_report_progress_prev_line_length', - 0) + prev_len = getattr(self, '_report_progress_prev_line_length', 0) if prev_len > len(fullmsg): fullmsg += ' ' * (prev_len - len(fullmsg)) self._report_progress_prev_line_length = len(fullmsg) @@ -288,7 +287,7 @@ class FileDownloader(object): s['_elapsed_str'] = self.format_seconds(s['elapsed']) msg_template += ' in %(_elapsed_str)s' self._report_progress_status( - msg_template % s, progress_line=s.get('progress_idx')) + msg_template % s, is_last_line=True, progress_line=s.get('progress_idx')) return if self.params.get('noprogress'): From 8f70b0b82f4b8d0c9f40ff60893ffc8601b3dab6 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 24 Sep 2021 04:09:03 +0530 Subject: [PATCH 145/641] [cbs] Report appropriate error for DRM Closes #1056 --- yt_dlp/extractor/cbs.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/yt_dlp/extractor/cbs.py b/yt_dlp/extractor/cbs.py index ed5dc84a76..ae9ce58628 100644 --- a/yt_dlp/extractor/cbs.py +++ b/yt_dlp/extractor/cbs.py @@ -130,6 +130,7 @@ class CBSIE(CBSBaseIE): title = xpath_text(video_data, 'videoTitle', 'title') or xpath_text(video_data, 'videotitle', 'title') asset_types = {} + has_drm = False for item in items_data.findall('.//item'): asset_type = xpath_text(item, 'assetType') query = { @@ -144,6 +145,8 @@ class CBSIE(CBSBaseIE): if asset_type in asset_types: continue elif any(excluded in asset_type for excluded in ('HLS_FPS', 'DASH_CENC', 'OnceURL')): + if 'DASH_CENC' in asset_type: + has_drm = True continue if asset_type.startswith('HLS') or 'StreamPack' in asset_type: query['formats'] = 'MPEG4,M3U' @@ -151,6 +154,9 @@ class CBSIE(CBSBaseIE): query['formats'] = 'MPEG4,FLV' asset_types[asset_type] = query + if not asset_types and has_drm: + self.report_drm(content_id) + return self._extract_common_video_info(content_id, asset_types, mpx_acc, extra_info={ 'title': title, 'series': xpath_text(video_data, 'seriesTitle'), From 77c4a9ef680837af9d26b3ecf1c3fea9754c8b7b Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 24 Sep 2021 05:10:04 +0530 Subject: [PATCH 146/641] Download subtitles in order of `--sub-langs` Closes #1041 --- yt_dlp/YoutubeDL.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 410186b913..117461f5a9 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2363,20 +2363,24 @@ class YoutubeDL(object): if self.params.get('allsubtitles', False): requested_langs = all_sub_langs elif self.params.get('subtitleslangs', False): - requested_langs = set() - for lang in self.params.get('subtitleslangs'): - if lang == 'all': - requested_langs.update(all_sub_langs) + # A list is used so that the order of languages will be the same as + # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041 + requested_langs = [] + for lang_re in self.params.get('subtitleslangs'): + if lang_re == 'all': + requested_langs.extend(all_sub_langs) continue - discard = lang[0] == '-' + discard = lang_re[0] == '-' if discard: - lang = lang[1:] - current_langs = filter(re.compile(lang + '$').match, all_sub_langs) + lang_re = lang_re[1:] + current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs) if discard: for lang in current_langs: - requested_langs.discard(lang) + while lang in requested_langs: + requested_langs.remove(lang) else: - requested_langs.update(current_langs) + requested_langs.extend(current_langs) + requested_langs = orderedSet(requested_langs) elif 'en' in available_subs: requested_langs = ['en'] else: From 1f8471e22cdb5181aa19b0c63523ad96891ea2dd Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 24 Sep 2021 05:10:33 +0530 Subject: [PATCH 147/641] Ignore empty entries in `_list_from_options_callback` --- yt_dlp/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 74c8104712..80b3df74f7 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -119,7 +119,7 @@ def parseOpts(overrideArguments=None): def _list_from_options_callback(option, opt_str, value, parser, append=True, delim=',', process=str.strip): # append can be True, False or -1 (prepend) current = getattr(parser.values, option.dest) if append else [] - value = [process(value)] if delim is None else list(map(process, value.split(delim))) + value = list(filter(None, [process(value)] if delim is None else map(process, value.split(delim)))) setattr( parser.values, option.dest, current + value if append is True else value + current) From b19404591a8ad4d0c7e962931ea809221e3f0b8e Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 24 Sep 2021 05:51:54 +0530 Subject: [PATCH 148/641] Separate the options `--ignore-errors` and `--no-abort-on-error` In youtube-dl, `-i` ignores both download and post-processing error, and treats the download as successful even if the post-processor fails. yt-dlp used to skip the entire video on either error and there was no option to ignore the post-processing errors like youtube-dl does. By splitting the option into two, now either just the download errors (--no-abort-on-error, default on CLI) or all errors (--ignore-errors) can be ignored as per the users' needs Closes #893 --- README.md | 9 ++++++--- yt_dlp/YoutubeDL.py | 21 ++++++++++++++------- yt_dlp/__init__.py | 2 +- yt_dlp/options.py | 10 +++++++--- yt_dlp/postprocessor/common.py | 1 + yt_dlp/postprocessor/ffmpeg.py | 3 +-- yt_dlp/postprocessor/xattrpp.py | 5 ++--- 7 files changed, 32 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 641b672e0d..07a8e5ef25 100644 --- a/README.md +++ b/README.md @@ -243,9 +243,12 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t -U, --update Update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed) - -i, --ignore-errors Continue on download errors, for example to - skip unavailable videos in a playlist - (default) (Alias: --no-abort-on-error) + -i, --ignore-errors Ignore download and postprocessing errors. + The download will be considered successfull + even if the postprocessing fails + --no-abort-on-error Continue with next video on download + errors; e.g. to skip unavailable videos in + a playlist (default) --abort-on-error Abort downloading of further videos if an error occurs (Alias: --no-ignore-errors) --dump-user-agent Display the current user-agent and exit diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 117461f5a9..8df8f16754 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -226,9 +226,9 @@ class YoutubeDL(object): restrictfilenames: Do not allow "&" and spaces in file names trim_file_name: Limit length of filename (extension excluded) windowsfilenames: Force the filenames to be windows compatible - ignoreerrors: Do not stop on download errors - (Default True when running yt-dlp, - but False when directly accessing YoutubeDL class) + ignoreerrors: Do not stop on download/postprocessing errors. + Can be 'only_download' to ignore only download errors. + Default is 'only_download' for CLI, but False for API skip_playlist_after_errors: Number of allowed failures until the rest of the playlist is skipped force_generic_extractor: Force downloader to use the generic extractor @@ -776,7 +776,7 @@ class YoutubeDL(object): tb = ''.join(tb_data) if tb: self.to_stderr(tb) - if not self.params.get('ignoreerrors', False): + if not self.params.get('ignoreerrors'): if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: exc_info = sys.exc_info()[1].exc_info else: @@ -1241,7 +1241,7 @@ class YoutubeDL(object): except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached, LazyList.IndexError): raise except Exception as e: - if self.params.get('ignoreerrors', False): + if self.params.get('ignoreerrors'): self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc())) else: raise @@ -2989,10 +2989,17 @@ class YoutubeDL(object): files_to_delete = [] if '__files_to_move' not in infodict: infodict['__files_to_move'] = {} - files_to_delete, infodict = pp.run(infodict) + try: + files_to_delete, infodict = pp.run(infodict) + except PostProcessingError as e: + # Must be True and not 'only_download' + if self.params.get('ignoreerrors') is True: + self.report_error(e) + return infodict + raise + if not files_to_delete: return infodict - if self.params.get('keepvideo', False): for f in files_to_delete: infodict['__files_to_move'].setdefault(f, '') diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 5168ed0f7c..9987c64721 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -279,7 +279,7 @@ def _real_main(argv=None): setattr(opts, opt_name, default) return None - set_default_compat('abort-on-error', 'ignoreerrors') + set_default_compat('abort-on-error', 'ignoreerrors', 'only_download') set_default_compat('no-playlist-metafiles', 'allow_playlist_files') set_default_compat('no-clean-infojson', 'clean_infojson') if 'format-sort' in compat_opts: diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 80b3df74f7..57e25a5183 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -206,9 +206,13 @@ def parseOpts(overrideArguments=None): action='store_true', dest='update_self', help='Update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)') general.add_option( - '-i', '--ignore-errors', '--no-abort-on-error', - action='store_true', dest='ignoreerrors', default=None, - help='Continue on download errors, for example to skip unavailable videos in a playlist (default) (Alias: --no-abort-on-error)') + '-i', '--ignore-errors', + action='store_true', dest='ignoreerrors', + help='Ignore download and postprocessing errors. The download will be considered successfull even if the postprocessing fails') + general.add_option( + '--no-abort-on-error', + action='store_const', dest='ignoreerrors', const='only_download', + help='Continue with next video on download errors; e.g. to skip unavailable videos in a playlist (default)') general.add_option( '--abort-on-error', '--no-ignore-errors', action='store_false', dest='ignoreerrors', diff --git a/yt_dlp/postprocessor/common.py b/yt_dlp/postprocessor/common.py index aa4715b062..d8ec997d9d 100644 --- a/yt_dlp/postprocessor/common.py +++ b/yt_dlp/postprocessor/common.py @@ -52,6 +52,7 @@ class PostProcessor(object): return self._downloader.report_warning(text, *args, **kwargs) def report_error(self, text, *args, **kwargs): + # Exists only for compatibility. Do not use if self._downloader: return self._downloader.report_error(text, *args, **kwargs) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 7ea01620ea..679377aa63 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -288,8 +288,7 @@ class FFmpegPostProcessor(PostProcessor): stdout, stderr = process_communicate_or_kill(p) if p.returncode not in variadic(expected_retcodes): stderr = stderr.decode('utf-8', 'replace').strip() - if self.get_param('verbose', False): - self.report_error(stderr) + self.write_debug(stderr) raise FFmpegPostProcessorError(stderr.split('\n')[-1]) for out_path, _ in output_path_opts: if out_path: diff --git a/yt_dlp/postprocessor/xattrpp.py b/yt_dlp/postprocessor/xattrpp.py index 3d31f0ce5b..95afa1c4f6 100644 --- a/yt_dlp/postprocessor/xattrpp.py +++ b/yt_dlp/postprocessor/xattrpp.py @@ -57,8 +57,7 @@ class XAttrMetadataPP(PostProcessor): return [], info except XAttrUnavailableError as e: - self.report_error(str(e)) - return [], info + raise PostProcessingError(str(e)) except XAttrMetadataError as e: if e.reason == 'NO_SPACE': @@ -74,5 +73,5 @@ class XAttrMetadataPP(PostProcessor): msg += 'You need to use NTFS.' else: msg += '(You may have to enable them in your /etc/fstab)' - self.report_error(msg) + raise PostProcessingError(str(e)) return [], info From 51ff9ca0b0ff34fc09036c948b433c60d8247c77 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 24 Sep 2021 06:20:42 +0530 Subject: [PATCH 149/641] [xattr] bugfix for b19404591a8ad4d0c7e962931ea809221e3f0b8e --- yt_dlp/postprocessor/xattrpp.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/postprocessor/xattrpp.py b/yt_dlp/postprocessor/xattrpp.py index 95afa1c4f6..93acd6d133 100644 --- a/yt_dlp/postprocessor/xattrpp.py +++ b/yt_dlp/postprocessor/xattrpp.py @@ -5,6 +5,7 @@ from ..compat import compat_os_name from ..utils import ( hyphenate_date, write_xattr, + PostProcessingError, XAttrMetadataError, XAttrUnavailableError, ) From 99e9e001de8a4106654d7a20757cae725a5ac0c3 Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Fri, 24 Sep 2021 00:52:17 +0000 Subject: [PATCH 150/641] [youtube] Cleanup authentication code (#786) Authored by: coletdjnz --- yt_dlp/downloader/youtube_live_chat.py | 2 +- yt_dlp/extractor/youtube.py | 130 ++++++++++++------------- 2 files changed, 62 insertions(+), 70 deletions(-) diff --git a/yt_dlp/downloader/youtube_live_chat.py b/yt_dlp/downloader/youtube_live_chat.py index 2dc6ff954c..ef4205edc7 100644 --- a/yt_dlp/downloader/youtube_live_chat.py +++ b/yt_dlp/downloader/youtube_live_chat.py @@ -183,7 +183,7 @@ class YoutubeLiveChatFD(FragmentFD): request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))} if click_tracking_params: request_data['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params} - headers = ie.generate_api_headers(ytcfg, visitor_data=visitor_data) + headers = ie.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data) headers.update({'content-type': 'application/json'}) fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n' success, continuation_id, offset, click_tracking_params = download_and_parse_fragment( diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 7f65e2b7dd..272bdb0597 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -508,13 +508,6 @@ class YoutubeBaseInfoExtractor(InfoExtractor): ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'], lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client) - @staticmethod - def _extract_session_index(*data): - for ytcfg in data: - session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX'])) - if session_index is not None: - return session_index - def _extract_client_version(self, ytcfg, default_client='web'): return self._ytcfg_get_safe( ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'], @@ -593,17 +586,27 @@ class YoutubeBaseInfoExtractor(InfoExtractor): self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'), video_id) - def _extract_identity_token(self, webpage, item_id): - if not webpage: - return None - ytcfg = self.extract_ytcfg(item_id, webpage) + @staticmethod + def _extract_session_index(*data): + """ + Index of current account in account list. + See: https://github.com/yt-dlp/yt-dlp/pull/519 + """ + for ytcfg in data: + session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX'])) + if session_index is not None: + return session_index + + # Deprecated? + def _extract_identity_token(self, ytcfg=None, webpage=None): if ytcfg: token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str) if token: return token - return self._search_regex( - r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage, - 'identity token', default=None) + if webpage: + return self._search_regex( + r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage, + 'identity token', default=None, fatal=False) @staticmethod def _extract_account_syncid(*args): @@ -624,6 +627,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor): # and just "user_syncid||" for primary channel. We only want the channel_syncid return sync_ids[0] + @property + def is_authenticated(self): + return bool(self._generate_sapisidhash_header()) + def extract_ytcfg(self, video_id, webpage): if not webpage: return {} @@ -633,33 +640,30 @@ class YoutubeBaseInfoExtractor(InfoExtractor): default='{}'), video_id, fatal=False) or {} def generate_api_headers( - self, ytcfg=None, identity_token=None, account_syncid=None, - visitor_data=None, api_hostname=None, default_client='web', session_index=None): + self, *, ytcfg=None, account_syncid=None, session_index=None, + visitor_data=None, identity_token=None, api_hostname=None, default_client='web'): + origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client)) headers = { 'X-YouTube-Client-Name': compat_str( self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)), 'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client), - 'Origin': origin - } - if not visitor_data and ytcfg: - visitor_data = try_get( + 'Origin': origin, + 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg), + 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg), + 'X-Goog-Visitor-Id': visitor_data or try_get( self._extract_context(ytcfg, default_client), lambda x: x['client']['visitorData'], compat_str) - if identity_token: - headers['X-Youtube-Identity-Token'] = identity_token - if account_syncid: - headers['X-Goog-PageId'] = account_syncid - if session_index is None and ytcfg: + } + if session_index is None: session_index = self._extract_session_index(ytcfg) if account_syncid or session_index is not None: headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0 - if visitor_data: - headers['X-Goog-Visitor-Id'] = visitor_data + auth = self._generate_sapisidhash_header(origin) if auth is not None: headers['Authorization'] = auth headers['X-Origin'] = origin - return headers + return {h: v for h, v in headers.items() if v is not None} @staticmethod def _build_api_continuation_query(continuation, ctp=None): @@ -2224,8 +2228,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'parent': parent or 'root' } - def _comment_entries(self, root_continuation_data, identity_token, account_syncid, - ytcfg, video_id, parent=None, comment_counts=None): + def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, comment_counts=None): def extract_header(contents): _total_comments = 0 @@ -2283,8 +2286,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if comment_replies_renderer: comment_counts[2] += 1 comment_entries_iter = self._comment_entries( - comment_replies_renderer, identity_token, account_syncid, ytcfg, - video_id, parent=comment.get('id'), comment_counts=comment_counts) + comment_replies_renderer, ytcfg, video_id, + parent=comment.get('id'), comment_counts=comment_counts) for reply_comment in comment_entries_iter: yield reply_comment @@ -2309,7 +2312,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): for page_num in itertools.count(0): if not continuation: break - headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data) + headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data) comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1]) if page_num == 0: if is_first_continuation: @@ -2409,18 +2412,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _extract_comments(self, ytcfg, video_id, contents, webpage): """Entry for comment extraction""" def _real_comment_extract(contents): - if isinstance(contents, list): - for entry in contents: - for key, renderer in entry.items(): - if key not in known_entry_comment_renderers: - continue - yield from self._comment_entries( - renderer, video_id=video_id, ytcfg=ytcfg, - identity_token=self._extract_identity_token(webpage, item_id=video_id), - account_syncid=self._extract_account_syncid(ytcfg)) - break + yield from self._comment_entries( + traverse_obj(contents, (..., 'itemSectionRenderer'), get_all=False), ytcfg, video_id) + comments = [] - known_entry_comment_renderers = ('itemSectionRenderer',) estimated_total = 0 max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf') # Force English regardless of account setting to prevent parsing issues @@ -2445,7 +2440,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): } @staticmethod - def _generate_player_context(sts=None): + def _get_checkok_params(): + return {'contentCheckOk': True, 'racyCheckOk': True} + + @classmethod + def _generate_player_context(cls, sts=None): context = { 'html5Preference': 'HTML5_PREF_WANTS', } @@ -2455,8 +2454,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'playbackContext': { 'contentPlaybackContext': context }, - 'contentCheckOk': True, - 'racyCheckOk': True + **cls._get_checkok_params() } @staticmethod @@ -2475,14 +2473,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _is_unplayable(player_response): return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE' - def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr): + def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr): session_index = self._extract_session_index(player_ytcfg, master_ytcfg) syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr) sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None headers = self.generate_api_headers( - player_ytcfg, identity_token, syncid, - default_client=client, session_index=session_index) + ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client) yt_query = {'videoId': video_id} yt_query.update(self._generate_player_context(sts)) @@ -2524,7 +2521,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config') return self.extract_ytcfg(video_id, webpage) or {} - def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, identity_token): + def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg): initial_pr = None if webpage: initial_pr = self._extract_yt_initial_variable( @@ -2569,7 +2566,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): try: pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response( - client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url if require_js_player else None, initial_pr) + client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr) except ExtractorError as e: if last_error: self.report_warning(last_error) @@ -2580,7 +2577,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): prs.append(pr) # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in - if client.endswith('_agegate') and self._is_unplayable(pr) and self._generate_sapisidhash_header(): + if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated: append_client(client.replace('_agegate', '_creator')) elif self._is_agegated(pr): append_client(f'{client}_agegate') @@ -2742,11 +2739,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False) master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg() - identity_token = self._extract_identity_token(webpage, video_id) player_responses, player_url = self._extract_player_responses( self._get_requested_clients(url, smuggled_data), - video_id, webpage, master_ytcfg, identity_token) + video_id, webpage, master_ytcfg) get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False) @@ -3059,13 +3055,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): webpage, self._YT_INITIAL_DATA_RE, video_id, 'yt initial data') if not initial_data: - headers = self.generate_api_headers( - master_ytcfg, identity_token, self._extract_account_syncid(master_ytcfg), - session_index=self._extract_session_index(master_ytcfg)) - + query = {'videoId': video_id} + query.update(self._get_checkok_params()) initial_data = self._extract_response( item_id=video_id, ep='next', fatal=False, - ytcfg=master_ytcfg, headers=headers, query={'videoId': video_id}, + ytcfg=master_ytcfg, query=query, + headers=self.generate_api_headers(ytcfg=master_ytcfg), note='Downloading initial data API JSON') try: @@ -3837,7 +3832,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): if entry: yield entry ''' - def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg): + def _entries(self, tab, item_id, account_syncid, ytcfg): def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds contents = try_get(parent_renderer, lambda x: x['contents'], list) or [] @@ -3894,7 +3889,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): for page_num in itertools.count(1): if not continuation: break - headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data) + headers = self.generate_api_headers( + ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data) response = self._extract_response( item_id='%s page %s' % (item_id, page_num), query=continuation, headers=headers, ytcfg=ytcfg, @@ -4048,7 +4044,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): return self.playlist_result( self._entries( selected_tab, playlist_id, - self._extract_identity_token(webpage, item_id), self._extract_account_syncid(ytcfg, data), ytcfg), **metadata) @@ -4056,8 +4051,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): first_id = last_id = None ytcfg = self.extract_ytcfg(playlist_id, webpage) headers = self.generate_api_headers( - ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data), - identity_token=self._extract_identity_token(webpage, item_id=playlist_id)) + ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data)) for page_num in itertools.count(1): videos = list(self._playlist_entries(playlist)) if not videos: @@ -4173,10 +4167,8 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): ytcfg = self.extract_ytcfg(item_id, webpage) headers = self.generate_api_headers( - ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data), - identity_token=self._extract_identity_token(webpage, item_id=item_id), - visitor_data=try_get( - self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str)) + ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data), + visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str)) query = { 'params': params or 'wgYCCAA=', 'browseId': browse_id or 'VL%s' % item_id From eb6d4ad1caf04ddf8c4278866790a259fed09629 Mon Sep 17 00:00:00 2001 From: Aleri Kaisattera <73682764+alerikaisattera@users.noreply.github.com> Date: Fri, 24 Sep 2021 06:53:51 +0600 Subject: [PATCH 151/641] [Theta] Add extractor (#1068) Authored by: alerikaisattera --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/theta.py | 51 ++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 yt_dlp/extractor/theta.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 7be6eec1f7..4b15598863 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1428,6 +1428,7 @@ from .theplatform import ( from .thescene import TheSceneIE from .thestar import TheStarIE from .thesun import TheSunIE +from .theta import ThetaIE from .theweatherchannel import TheWeatherChannelIE from .thisamericanlife import ThisAmericanLifeIE from .thisav import ThisAVIE diff --git a/yt_dlp/extractor/theta.py b/yt_dlp/extractor/theta.py new file mode 100644 index 0000000000..34c0da8156 --- /dev/null +++ b/yt_dlp/extractor/theta.py @@ -0,0 +1,51 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import try_get + + +class ThetaIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?theta\.tv/(?P[a-z0-9]+)' + _TESTS = [{ + 'url': 'https://www.theta.tv/davirus', + 'skip': 'The live may have ended', + 'info_dict': { + 'id': 'DaVirus', + 'ext': 'mp4', + 'title': 'I choose you - My Community is King -👀 - YO HABLO ESPANOL - CODE DAVIRUS', + 'thumbnail': r're:https://live-thumbnails-prod-theta-tv\.imgix\.net/thumbnail/.+\.jpg', + } + }, { + 'url': 'https://www.theta.tv/mst3k', + 'note': 'This channel is live 24/7', + 'info_dict': { + 'id': 'MST3K', + 'ext': 'mp4', + 'title': 'Mystery Science Theatre 3000 24/7 Powered by the THETA Network.', + 'thumbnail': r're:https://user-prod-theta-tv\.imgix\.net/.+\.jpg', + } + }] + + def _real_extract(self, url): + channel_id = self._match_id(url) + info = self._download_json(f'https://api.theta.tv/v1/channel?alias={channel_id}', channel_id)['body'] + + m3u8_playlist = next( + data['url'] for data in info['live_stream']['video_urls'] + if data.get('type') != 'embed' and data.get('resolution') in ('master', 'source')) + + formats = self._extract_m3u8_formats(m3u8_playlist, channel_id, 'mp4', m3u8_id='hls', live=True) + self._sort_formats(formats) + + channel = try_get(info, lambda x: x['user']['username']) # using this field instead of channel_id due to capitalization + + return { + 'id': channel, + 'title': try_get(info, lambda x: x['live_stream']['title']), + 'channel': channel, + 'view_count': try_get(info, lambda x: x['live_stream']['view_count']), + 'is_live': True, + 'formats': formats, + 'thumbnail': try_get(info, lambda x: x['live_stream']['thumbnail_url']), + } From e27cc5d864f8b7be27357e5dd2d32493fd9e5829 Mon Sep 17 00:00:00 2001 From: renalid Date: Fri, 24 Sep 2021 02:56:15 +0200 Subject: [PATCH 152/641] [Arte] Improve description extraction (#1046) Authored by: renalid --- yt_dlp/extractor/arte.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py index ed245b75fd..296b169d2a 100644 --- a/yt_dlp/extractor/arte.py +++ b/yt_dlp/extractor/arte.py @@ -174,7 +174,7 @@ class ArteTVIE(ArteTVBaseIE): return { 'id': player_info.get('VID') or video_id, 'title': title, - 'description': player_info.get('VDE'), + 'description': player_info.get('VDE') or player_info.get('V7T'), 'upload_date': unified_strdate(upload_date_str), 'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'), 'formats': formats, From 4c88ff87fc0e84659f7b6a7a88997eb6851125a0 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 24 Sep 2021 06:31:43 +0530 Subject: [PATCH 153/641] [build] Improve release process (#880) * Automate more of the release process by animelover1984, pukkandan - closes #823 * Fix sha256 by nihil-admirari - closes #385 * Bring back brew taps by nao20010128nao #865 * Provide `--onedir` zip for windows by pukkandan - Closes #1024, #661, #705 and #890 Authored by: pukkandan, animelover1984, nihil-admirari, nao20010128nao --- .github/workflows/build.yml | 186 +++++++++++++++++++++++++----------- Changelog.md | 7 +- pyinst.py | 7 +- yt_dlp/YoutubeDL.py | 10 +- yt_dlp/update.py | 34 ++++--- 5 files changed, 162 insertions(+), 82 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4c56a5180b..ccc2b2e471 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -12,11 +12,15 @@ jobs: outputs: ytdlp_version: ${{ steps.bump_version.outputs.ytdlp_version }} upload_url: ${{ steps.create_release.outputs.upload_url }} - sha256_unix: ${{ steps.sha256_file.outputs.sha256_unix }} - sha512_unix: ${{ steps.sha512_file.outputs.sha512_unix }} + sha256_bin: ${{ steps.sha256_bin.outputs.sha256_bin }} + sha512_bin: ${{ steps.sha512_bin.outputs.sha512_bin }} + sha256_tar: ${{ steps.sha256_tar.outputs.sha256_tar }} + sha512_tar: ${{ steps.sha512_tar.outputs.sha512_tar }} steps: - uses: actions/checkout@v2 + with: + fetch-depth: 0 - name: Set up Python uses: actions/setup-python@v2 with: @@ -25,11 +29,76 @@ jobs: run: sudo apt-get -y install zip pandoc man - name: Bump version id: bump_version - run: python devscripts/update-version.py + run: | + python devscripts/update-version.py + make issuetemplates - name: Print version run: echo "${{ steps.bump_version.outputs.ytdlp_version }}" + - name: Update master + id: push_update + run: | + git config --global user.email "${{ github.event.pusher.email }}" + git config --global user.name "${{ github.event.pusher.name }}" + git add -u + git commit -m "[version] update" -m ":ci skip all" + git pull --rebase origin ${{ github.event.repository.master_branch }} + git push origin ${{ github.event.ref }}:${{ github.event.repository.master_branch }} + echo ::set-output name=head_sha::$(git rev-parse HEAD) + - name: Get Changelog + id: get_changelog + run: | + changelog=$(cat Changelog.md | grep -oPz '(?s)(?<=### ${{ steps.bump_version.outputs.ytdlp_version }}\n{2}).+?(?=\n{2,3}###)') + echo "changelog<> $GITHUB_ENV + echo "$changelog" >> $GITHUB_ENV + echo "EOF" >> $GITHUB_ENV - name: Run Make run: make all tar + - name: Get SHA2-256SUMS for yt-dlp + id: sha256_bin + run: echo "::set-output name=sha256_bin::$(sha256sum yt-dlp | awk '{print $1}')" + - name: Get SHA2-256SUMS for yt-dlp.tar.gz + id: sha256_tar + run: echo "::set-output name=sha256_tar::$(sha256sum yt-dlp.tar.gz | awk '{print $1}')" + - name: Get SHA2-512SUMS for yt-dlp + id: sha512_bin + run: echo "::set-output name=sha512_bin::$(sha512sum yt-dlp | awk '{print $1}')" + - name: Get SHA2-512SUMS for yt-dlp.tar.gz + id: sha512_tar + run: echo "::set-output name=sha512_tar::$(sha512sum yt-dlp.tar.gz | awk '{print $1}')" + - name: Install SSH private key + env: + BREW_TOKEN: ${{ secrets.BREW_TOKEN }} + if: "env.BREW_TOKEN != ''" + uses: webfactory/ssh-agent@v0.5.3 + with: + ssh-private-key: ${{ env.BREW_TOKEN }} + - name: Update Homebrew Formulae + env: + BREW_TOKEN: ${{ secrets.BREW_TOKEN }} + if: "env.BREW_TOKEN != ''" + run: | + git clone git@github.com:yt-dlp/homebrew-taps taps/ + python3 devscripts/update-formulae.py taps/Formula/yt-dlp.rb "${{ steps.bump_version.outputs.ytdlp_version }}" + git -C taps/ config user.name github-actions + git -C taps/ config user.email github-actions@example.com + git -C taps/ commit -am 'yt-dlp: ${{ steps.bump_version.outputs.ytdlp_version }}' + git -C taps/ push + - name: Install dependencies for pypi + env: + PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} + if: "env.PYPI_TOKEN != ''" + run: | + python -m pip install --upgrade pip + pip install setuptools wheel twine + - name: Build and publish on pypi + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} + if: "env.TWINE_PASSWORD != ''" + run: | + rm -rf dist/* + python setup.py sdist bdist_wheel + twine upload dist/* - name: Create Release id: create_release uses: actions/create-release@v1 @@ -38,9 +107,10 @@ jobs: with: tag_name: ${{ steps.bump_version.outputs.ytdlp_version }} release_name: yt-dlp ${{ steps.bump_version.outputs.ytdlp_version }} + commitish: ${{ steps.push_update.outputs.head_sha }} body: | Changelog: - PLACEHOLDER + ${{ env.changelog }} draft: false prerelease: false - name: Upload yt-dlp Unix binary @@ -62,36 +132,16 @@ jobs: asset_path: ./yt-dlp.tar.gz asset_name: yt-dlp.tar.gz asset_content_type: application/gzip - - name: Get SHA2-256SUMS for yt-dlp - id: sha256_file - run: echo "::set-output name=sha256_unix::$(sha256sum yt-dlp | awk '{print $1}')" - - name: Get SHA2-512SUMS for yt-dlp - id: sha512_file - run: echo "::set-output name=sha512_unix::$(sha512sum yt-dlp | awk '{print $1}')" - - name: Install dependencies for pypi - env: - PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} - if: "env.PYPI_TOKEN != ''" - run: | - python -m pip install --upgrade pip - pip install setuptools wheel twine - - name: Build and publish on pypi - env: - TWINE_USERNAME: __token__ - TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} - if: "env.TWINE_PASSWORD != ''" - run: | - rm -rf dist/* - python setup.py sdist bdist_wheel - twine upload dist/* build_windows: runs-on: windows-latest needs: build_unix outputs: - sha256_windows: ${{ steps.sha256_file_win.outputs.sha256_windows }} - sha512_windows: ${{ steps.sha512_file_win.outputs.sha512_windows }} + sha256_win: ${{ steps.sha256_win.outputs.sha256_win }} + sha512_win: ${{ steps.sha512_win.outputs.sha512_win }} + sha256_win_zip: ${{ steps.sha256_win_zip.outputs.sha256_win_zip }} + sha512_win_zip: ${{ steps.sha512_win_zip.outputs.sha512_win_zip }} steps: - uses: actions/checkout@v2 @@ -104,7 +154,7 @@ jobs: run: python -m pip install --upgrade pip setuptools wheel - name: Install Requirements # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds - run: pip install "https://yt-dlp.github.io/pyinstaller-builds/x86_64/pyinstaller-4.5.1-py3-none-any.whl" mutagen pycryptodome websockets + run: pip install "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-4.5.1-py3-none-any.whl" mutagen pycryptodome websockets - name: Bump version id: bump_version run: python devscripts/update-version.py @@ -123,19 +173,41 @@ jobs: asset_name: yt-dlp.exe asset_content_type: application/vnd.microsoft.portable-executable - name: Get SHA2-256SUMS for yt-dlp.exe - id: sha256_file_win - run: echo "::set-output name=sha256_windows::$((Get-FileHash dist\yt-dlp.exe -Algorithm SHA256).Hash.ToLower())" + id: sha256_win + run: echo "::set-output name=sha256_win::$((Get-FileHash dist\yt-dlp.exe -Algorithm SHA256).Hash.ToLower())" - name: Get SHA2-512SUMS for yt-dlp.exe - id: sha512_file_win - run: echo "::set-output name=sha512_windows::$((Get-FileHash dist\yt-dlp.exe -Algorithm SHA512).Hash.ToLower())" + id: sha512_win + run: echo "::set-output name=sha512_win::$((Get-FileHash dist\yt-dlp.exe -Algorithm SHA512).Hash.ToLower())" + - name: Run PyInstaller Script with --onedir + run: python pyinst.py 64 --onedir + - uses: papeloto/action-zip@v1 + with: + files: ./dist/yt-dlp + dest: ./dist/yt-dlp.zip + - name: Upload yt-dlp.zip Windows onedir + id: upload-release-windows-zip + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ needs.build_unix.outputs.upload_url }} + asset_path: ./dist/yt-dlp.zip + asset_name: yt-dlp.zip + asset_content_type: application/zip + - name: Get SHA2-256SUMS for yt-dlp.zip + id: sha256_win_zip + run: echo "::set-output name=sha256_win_zip::$((Get-FileHash dist\yt-dlp.zip -Algorithm SHA256).Hash.ToLower())" + - name: Get SHA2-512SUMS for yt-dlp.zip + id: sha512_win_zip + run: echo "::set-output name=sha512_win_zip::$((Get-FileHash dist\yt-dlp.zip -Algorithm SHA512).Hash.ToLower())" build_windows32: runs-on: windows-latest needs: [build_unix, build_windows] outputs: - sha256_windows32: ${{ steps.sha256_file_win32.outputs.sha256_windows32 }} - sha512_windows32: ${{ steps.sha512_file_win32.outputs.sha512_windows32 }} + sha256_win32: ${{ steps.sha256_win32.outputs.sha256_win32 }} + sha512_win32: ${{ steps.sha512_win32.outputs.sha512_win32 }} steps: - uses: actions/checkout@v2 @@ -148,7 +220,7 @@ jobs: - name: Upgrade pip and enable wheel support run: python -m pip install --upgrade pip setuptools wheel - name: Install Requirements - run: pip install "https://yt-dlp.github.io/pyinstaller-builds/i686/pyinstaller-4.5.1-py3-none-any.whl" mutagen pycryptodome websockets + run: pip install "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-4.5.1-py3-none-any.whl" mutagen pycryptodome websockets - name: Bump version id: bump_version run: python devscripts/update-version.py @@ -167,11 +239,11 @@ jobs: asset_name: yt-dlp_x86.exe asset_content_type: application/vnd.microsoft.portable-executable - name: Get SHA2-256SUMS for yt-dlp_x86.exe - id: sha256_file_win32 - run: echo "::set-output name=sha256_windows32::$((Get-FileHash dist\yt-dlp_x86.exe -Algorithm SHA256).Hash.ToLower())" + id: sha256_win32 + run: echo "::set-output name=sha256_win32::$((Get-FileHash dist\yt-dlp_x86.exe -Algorithm SHA256).Hash.ToLower())" - name: Get SHA2-512SUMS for yt-dlp_x86.exe - id: sha512_file_win32 - run: echo "::set-output name=sha512_windows32::$((Get-FileHash dist\yt-dlp_x86.exe -Algorithm SHA512).Hash.ToLower())" + id: sha512_win32 + run: echo "::set-output name=sha512_win32::$((Get-FileHash dist\yt-dlp_x86.exe -Algorithm SHA512).Hash.ToLower())" finish: runs-on: ubuntu-latest @@ -180,15 +252,17 @@ jobs: steps: - name: Make SHA2-256SUMS file env: - SHA256_WINDOWS: ${{ needs.build_windows.outputs.sha256_windows }} - SHA256_WINDOWS32: ${{ needs.build_windows32.outputs.sha256_windows32 }} - SHA256_UNIX: ${{ needs.build_unix.outputs.sha256_unix }} - YTDLP_VERSION: ${{ needs.build_unix.outputs.ytdlp_version }} + SHA256_WIN: ${{ needs.build_windows.outputs.sha256_win }} + SHA256_WIN_ZIP: ${{ needs.build_windows.outputs.sha256_win_zip }} + SHA256_WIN32: ${{ needs.build_windows32.outputs.sha256_win32 }} + SHA256_BIN: ${{ needs.build_unix.outputs.sha256_bin }} + SHA256_TAR: ${{ needs.build_unix.outputs.sha256_tar }} run: | - echo "version:${{ env.YTDLP_VERSION }}" >> SHA2-256SUMS - echo "yt-dlp.exe:${{ env.SHA256_WINDOWS }}" >> SHA2-256SUMS - echo "yt-dlp_x86.exe:${{ env.SHA256_WINDOWS32 }}" >> SHA2-256SUMS - echo "yt-dlp:${{ env.SHA256_UNIX }}" >> SHA2-256SUMS + echo "${{ env.SHA256_WIN }} yt-dlp.exe" >> SHA2-256SUMS + echo "${{ env.SHA256_WIN32 }} yt-dlp_x86.exe" >> SHA2-256SUMS + echo "${{ env.SHA256_BIN }} yt-dlp" >> SHA2-256SUMS + echo "${{ env.SHA256_TAR }} yt-dlp.tar.gz" >> SHA2-256SUMS + echo "${{ env.SHA256_WIN_ZIP }} yt-dlp.zip" >> SHA2-256SUMS - name: Upload 256SUMS file id: upload-sums uses: actions/upload-release-asset@v1 @@ -201,13 +275,17 @@ jobs: asset_content_type: text/plain - name: Make SHA2-512SUMS file env: - SHA512_WINDOWS: ${{ needs.build_windows.outputs.sha512_windows }} - SHA512_WINDOWS32: ${{ needs.build_windows32.outputs.sha512_windows32 }} - SHA512_UNIX: ${{ needs.build_unix.outputs.sha512_unix }} + SHA512_WIN: ${{ needs.build_windows.outputs.sha512_win }} + SHA512_WIN_ZIP: ${{ needs.build_windows.outputs.sha512_win_zip }} + SHA512_WIN32: ${{ needs.build_windows32.outputs.sha512_win32 }} + SHA512_BIN: ${{ needs.build_unix.outputs.sha512_bin }} + SHA512_TAR: ${{ needs.build_unix.outputs.sha512_tar }} run: | - echo "${{ env.SHA512_WINDOWS }} yt-dlp.exe" >> SHA2-512SUMS - echo "${{ env.SHA512_WINDOWS32 }} yt-dlp_x86.exe" >> SHA2-512SUMS - echo "${{ env.SHA512_UNIX }} yt-dlp" >> SHA2-512SUMS + echo "${{ env.SHA512_WIN }} yt-dlp.exe" >> SHA2-512SUMS + echo "${{ env.SHA512_WIN32 }} yt-dlp_x86.exe" >> SHA2-512SUMS + echo "${{ env.SHA512_BIN }} yt-dlp" >> SHA2-512SUMS + echo "${{ env.SHA512_TAR }} yt-dlp.tar.gz" >> SHA2-512SUMS + echo "${{ env.SHA512_WIN_ZIP }} yt-dlp.zip" >> SHA2-512SUMS - name: Upload 512SUMS file id: upload-512sums uses: actions/upload-release-asset@v1 diff --git a/Changelog.md b/Changelog.md index 6901e28f2f..b555c953f0 100644 --- a/Changelog.md +++ b/Changelog.md @@ -7,14 +7,9 @@ * Update Changelog.md and CONTRIBUTORS * Change "Merged with ytdl" version in Readme.md if needed * Add new/fixed extractors in "new features" section of Readme.md -* Commit to master as `Release ` +* Commit as `Release ` * Push to origin/release using `git push origin master:release` build task will now run -* Update version.py using `devscripts\update-version.py` -* Run `make issuetemplates` -* Commit to master as `[version] update :ci skip all` -* Push to origin/master -* Update changelog in /releases --> diff --git a/pyinst.py b/pyinst.py index fb8eca3e5c..d65243f880 100644 --- a/pyinst.py +++ b/pyinst.py @@ -15,9 +15,11 @@ import PyInstaller.__main__ arch = sys.argv[1] if len(sys.argv) > 1 else platform.architecture()[0][:2] assert arch in ('32', '64') -print('Building %sbit version' % arch) _x86 = '_x86' if arch == '32' else '' +opts = sys.argv[2:] or ['--onefile'] +print(f'Building {arch}bit version with options {opts}') + FILE_DESCRIPTION = 'yt-dlp%s' % (' (32 Bit)' if _x86 else '') # root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) @@ -72,11 +74,12 @@ excluded_modules = ['test', 'ytdlp_plugins', 'youtube-dl', 'youtube-dlc'] PyInstaller.__main__.run([ '--name=yt-dlp%s' % _x86, - '--onefile', '--icon=devscripts/logo.ico', *[f'--exclude-module={module}' for module in excluded_modules], *[f'--hidden-import={module}' for module in dependancies], '--upx-exclude=vcruntime140.dll', + '--noconfirm', + *opts, 'yt_dlp/__main__.py', ]) SetVersion('dist/yt-dlp%s.exe' % _x86, VERSION_FILE) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 8df8f16754..2258e22af4 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -27,7 +27,6 @@ import traceback import random from string import ascii_letters -from zipimport import zipimporter from .compat import ( compat_basestring, @@ -143,6 +142,7 @@ from .postprocessor import ( FFmpegPostProcessor, MoveFilesAfterDownloadPP, ) +from .update import detect_variant from .version import __version__ if compat_os_name == 'nt': @@ -3266,12 +3266,8 @@ class YoutubeDL(object): self.get_encoding())) write_string(encoding_str, encoding=None) - source = ( - '(exe)' if hasattr(sys, 'frozen') - else '(zip)' if isinstance(globals().get('__loader__'), zipimporter) - else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py' - else '') - self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source)) + source = detect_variant() + self._write_string('[debug] yt-dlp version %s%s\n' % (__version__, '' if source == 'unknown' else f' ({source})')) if _LAZY_LOADER: self._write_string('[debug] Lazy loading extractors enabled\n') if _PLUGIN_CLASSES: diff --git a/yt_dlp/update.py b/yt_dlp/update.py index d3681b8323..531eea7c91 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -31,6 +31,18 @@ def rsa_verify(message, signature, key): ''' +def detect_variant(): + if hasattr(sys, 'frozen') and getattr(sys, '_MEIPASS', None): + if sys._MEIPASS == os.path.dirname(sys.executable): + return 'dir' + return 'exe' + elif isinstance(globals().get('__loader__'), zipimporter): + return 'zip' + elif os.path.basename(sys.argv[0]) == '__main__.py': + return 'source' + return 'unknown' + + def update_self(to_screen, verbose, opener): ''' Exists for backward compatibility. Use run_update(ydl) instead ''' @@ -87,13 +99,14 @@ def run_update(ydl): h.update(mv[:n]) return h.hexdigest() - err = None - if isinstance(globals().get('__loader__'), zipimporter): - pass - elif hasattr(sys, 'frozen'): - pass - else: - err = 'It looks like you installed yt-dlp with a package manager, pip, setup.py or a tarball. Please use that to update' + ERRORS = { + 'exe': None, + 'zip': None, + 'dir': 'Auto-update is not supported for unpackaged windows executable. Re-download the latest release', + 'source': 'You cannot update when running from source code', + 'unknown': 'It looks like you installed yt-dlp with a package manager, pip, setup.py or a tarball. Use that to update', + } + err = ERRORS.get(detect_variant(), ERRORS['unknown']) if err: return report_error(err, expected=True) @@ -138,12 +151,7 @@ def run_update(ydl): if not urlh: return None hash_data = ydl._opener.open(urlh).read().decode('utf-8') - if hash_data.startswith('version:'): - # Old colon-separated hash file - return dict(ln.split(':') for ln in hash_data.splitlines()).get(filename) - else: - # GNU-style hash file - return dict(ln.split()[::-1] for ln in hash_data.splitlines()).get(filename) + return dict(ln.split()[::-1] for ln in hash_data.splitlines()).get(filename) if not os.access(filename, os.W_OK): return report_error('no write permissions on %s' % filename, expected=True) From 20b91b9b633fbdf3bc31897a41efd2b6cf8fe140 Mon Sep 17 00:00:00 2001 From: f4pp3rk1ng <86558148+f4pp3rk1ng@users.noreply.github.com> Date: Fri, 24 Sep 2021 03:06:30 +0200 Subject: [PATCH 154/641] [SpankBang] Fix uploader (#892) Closes #833 Authored by: f4pp3rk1ng, coletdjnz --- yt_dlp/extractor/spankbang.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/yt_dlp/extractor/spankbang.py b/yt_dlp/extractor/spankbang.py index d70331bb35..dd849ae13e 100644 --- a/yt_dlp/extractor/spankbang.py +++ b/yt_dlp/extractor/spankbang.py @@ -26,17 +26,18 @@ class SpankBangIE(InfoExtractor): ) ''' _TESTS = [{ - 'url': 'http://spankbang.com/3vvn/video/fantasy+solo', - 'md5': '1cc433e1d6aa14bc376535b8679302f7', + 'url': 'https://spankbang.com/56b3d/video/the+slut+maker+hmv', + 'md5': '2D13903DE4ECC7895B5D55930741650A', 'info_dict': { - 'id': '3vvn', + 'id': '56b3d', 'ext': 'mp4', - 'title': 'fantasy solo', - 'description': 'dillion harper masturbates on a bed', + 'title': 'The Slut Maker HMV', + 'description': 'Girls getting converted into cock slaves.', 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'silly2587', - 'timestamp': 1422571989, - 'upload_date': '20150129', + 'uploader': 'Mindself', + 'uploader_id': 'mindself', + 'timestamp': 1617109572, + 'upload_date': '20210330', 'age_limit': 18, } }, { @@ -134,15 +135,15 @@ class SpankBangIE(InfoExtractor): info = self._search_json_ld(webpage, video_id, default={}) title = self._html_search_regex( - r'(?s)]*>(.+?)', webpage, 'title', default=None) + r'(?s)]+\btitle=["\']([^"]+)["\']>', webpage, 'title', default=None) description = self._search_regex( r']+\bclass=["\']bottom[^>]+>\s*

[^<]*

\s*

([^<]+)', webpage, 'description', default=None) thumbnail = self._og_search_thumbnail(webpage, default=None) uploader = self._html_search_regex( - (r'(?s)]+class=["\']profile[^>]+>(.+?)', - r'class="user"[^>]*>]+>([^<]+)'), - webpage, 'uploader', default=None) + r']+\bclass="(?:[^"]*?user[^"]*?)">.*?([^<]+)', webpage, 'uploader', default=None) + uploader_id = self._html_search_regex( + r']+href="/profile/([^"]+)"', webpage, 'uploader_id', default=None) duration = parse_duration(self._search_regex( r']+\bclass=["\']right_side[^>]+>\s*([^<]+)', webpage, 'duration', default=None)) @@ -157,6 +158,7 @@ class SpankBangIE(InfoExtractor): 'description': description, 'thumbnail': thumbnail, 'uploader': uploader, + 'uploader_id': uploader_id, 'duration': duration, 'view_count': view_count, 'formats': formats, From 600e900300139406a9ad76190bfa4459afbffe6e Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 24 Sep 2021 07:44:59 +0530 Subject: [PATCH 155/641] [zdf] Improve format sorting Closes #910 --- yt_dlp/extractor/zdf.py | 42 +++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/yt_dlp/extractor/zdf.py b/yt_dlp/extractor/zdf.py index 4dd56f66d4..8c279c5ab3 100644 --- a/yt_dlp/extractor/zdf.py +++ b/yt_dlp/extractor/zdf.py @@ -14,6 +14,7 @@ from ..utils import ( orderedSet, parse_codecs, qualities, + str_or_none, try_get, unified_timestamp, update_url_query, @@ -49,35 +50,35 @@ class ZDFBaseIE(InfoExtractor): def _extract_format(self, video_id, formats, format_urls, meta): format_url = url_or_none(meta.get('url')) - if not format_url: - return - if format_url in format_urls: + if not format_url or format_url in format_urls: return format_urls.add(format_url) - mime_type = meta.get('mimeType') - ext = determine_ext(format_url) + + mime_type, ext = meta.get('mimeType'), determine_ext(format_url) if mime_type == 'application/x-mpegURL' or ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( + new_formats = self._extract_m3u8_formats( format_url, video_id, 'mp4', m3u8_id='hls', - entry_protocol='m3u8_native', fatal=False)) + entry_protocol='m3u8_native', fatal=False) elif mime_type == 'application/f4m+xml' or ext == 'f4m': - formats.extend(self._extract_f4m_formats( - update_url_query(format_url, {'hdcore': '3.7.0'}), video_id, f4m_id='hds', fatal=False)) + new_formats = self._extract_f4m_formats( + update_url_query(format_url, {'hdcore': '3.7.0'}), video_id, f4m_id='hds', fatal=False) else: f = parse_codecs(meta.get('mimeCodec')) - format_id = ['http'] - for p in (meta.get('type'), meta.get('quality')): - if p and isinstance(p, compat_str): - format_id.append(p) + if not f and meta.get('type'): + data = meta['type'].split('_') + if try_get(data, lambda x: x[2]) == ext: + f = {'vcodec': data[0], 'acodec': data[1]} f.update({ 'url': format_url, - 'format_id': '-'.join(format_id), - 'format_note': meta.get('quality'), - 'language': meta.get('language'), - 'quality': qualities(self._QUALITIES)(meta.get('quality')), - 'preference': -10, + 'format_id': '-'.join(filter(str_or_none, ('http', meta.get('type'), meta.get('quality')))), }) - formats.append(f) + new_formats = [f] + formats.extend(merge_dicts(f, { + 'format_note': ', '.join(filter(None, (meta.get('quality'), meta.get('class')))), + 'language': meta.get('language'), + 'language_preference': 10 if meta.get('class') == 'main' else -10 if meta.get('class') == 'ad' else -1, + 'quality': qualities(self._QUALITIES)(meta.get('quality')), + }) for f in new_formats) def _extract_ptmd(self, ptmd_url, video_id, api_token, referrer): ptmd = self._call_api( @@ -106,9 +107,10 @@ class ZDFBaseIE(InfoExtractor): 'type': f.get('type'), 'mimeType': f.get('mimeType'), 'quality': quality.get('quality'), + 'class': track.get('class'), 'language': track.get('language'), }) - self._sort_formats(formats) + self._sort_formats(formats, ('hasaud', 'res', 'quality', 'language_preference')) duration = float_or_none(try_get( ptmd, lambda x: x['attributes']['duration']['value']), scale=1000) From ff1c7fc9d3e54c3584117ce76e2b6ce9da030af2 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 25 Sep 2021 03:31:35 +0530 Subject: [PATCH 156/641] Allow `0` in `--playlist-items` --- test/test_YoutubeDL.py | 1 + yt_dlp/YoutubeDL.py | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 210bf441c8..6feca2ce24 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -1000,6 +1000,7 @@ class TestYoutubeDL(unittest.TestCase): test_selection({'playlist_items': '2-4'}, [2, 3, 4]) test_selection({'playlist_items': '2,4'}, [2, 4]) test_selection({'playlist_items': '10'}, []) + test_selection({'playlist_items': '0'}, []) # Tests for https://github.com/ytdl-org/youtube-dl/issues/10591 test_selection({'playlist_items': '2-4,3-4,3'}, [2, 3, 4]) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 2258e22af4..11371fa860 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1464,7 +1464,10 @@ class YoutubeDL(object): )(self, i) entries = [] - for i in playlistitems or itertools.count(playliststart): + items = playlistitems if playlistitems is not None else itertools.count(playliststart) + for i in items: + if i == 0: + continue if playlistitems is None and playlistend is not None and playlistend < i: break entry = None From a5de4099cb9042d057ab0cc9a6c379c31b0be3c9 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 25 Sep 2021 06:27:37 +0530 Subject: [PATCH 157/641] [build] Fix brew tap --- .github/workflows/build.yml | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ccc2b2e471..515c501642 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -65,6 +65,22 @@ jobs: - name: Get SHA2-512SUMS for yt-dlp.tar.gz id: sha512_tar run: echo "::set-output name=sha512_tar::$(sha512sum yt-dlp.tar.gz | awk '{print $1}')" + - name: Install dependencies for pypi + env: + PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} + if: "env.PYPI_TOKEN != ''" + run: | + python -m pip install --upgrade pip + pip install setuptools wheel twine + - name: Build and publish on pypi + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} + if: "env.TWINE_PASSWORD != ''" + run: | + rm -rf dist/* + python setup.py sdist bdist_wheel + twine upload dist/* - name: Install SSH private key env: BREW_TOKEN: ${{ secrets.BREW_TOKEN }} @@ -83,22 +99,6 @@ jobs: git -C taps/ config user.email github-actions@example.com git -C taps/ commit -am 'yt-dlp: ${{ steps.bump_version.outputs.ytdlp_version }}' git -C taps/ push - - name: Install dependencies for pypi - env: - PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} - if: "env.PYPI_TOKEN != ''" - run: | - python -m pip install --upgrade pip - pip install setuptools wheel twine - - name: Build and publish on pypi - env: - TWINE_USERNAME: __token__ - TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} - if: "env.TWINE_PASSWORD != ''" - run: | - rm -rf dist/* - python setup.py sdist bdist_wheel - twine upload dist/* - name: Create Release id: create_release uses: actions/create-release@v1 From 0ef787d7734b60c874a4d37455e5060c6d6d4504 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 25 Sep 2021 06:22:22 +0530 Subject: [PATCH 158/641] Release 2021.09.25 --- CONTRIBUTORS | 18 +++++++ Changelog.md | 119 +++++++++++++++++++++++++++++++++++++++++++--- README.md | 12 ++--- supportedsites.md | 38 +++++++++++++-- 4 files changed, 171 insertions(+), 16 deletions(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 5a976fad76..e44302d57a 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -100,3 +100,21 @@ korli octotherp CeruleanSky zootedb0t +chao813 +ChillingPepper +ConquerorDopy +dalanmiller +DigitalDJ +f4pp3rk1ng +gesa +Jules-A +makeworld-the-better-one +MKSherbini +mrx23dot +poschi3 +raphaeldore +renalid +sleaux-meaux +sulyi +tmarki +Vangelis66 diff --git a/Changelog.md b/Changelog.md index b555c953f0..35a1b2680d 100644 --- a/Changelog.md +++ b/Changelog.md @@ -14,6 +14,116 @@ --> +### 2021.09.25 + +* Add new option `--netrc-location` +* [outtmpl] Allow alternate fields using `,` +* [outtmpl] Add format type `B` to treat the value as bytes (eg: to limit the filename to a certain number of bytes) +* Separate the options `--ignore-errors` and `--no-abort-on-error` +* Basic framework for simultaneous download of multiple formats by [nao20010128nao](https://github.com/nao20010128nao) +* [17live] Add 17.live extractor by [nao20010128nao](https://github.com/nao20010128nao) +* [bilibili] Add BiliIntlIE and BiliIntlSeriesIE by [Ashish0804](https://github.com/Ashish0804) +* [CAM4] Add extractor by [alerikaisattera](https://github.com/alerikaisattera) +* [Chingari] Add extractors by [Ashish0804](https://github.com/Ashish0804) +* [CGTN] Add extractor by [chao813](https://github.com/chao813) +* [damtomo] Add extractor by [nao20010128nao](https://github.com/nao20010128nao) +* [gotostage] Add extractor by [poschi3](https://github.com/poschi3) +* [Koo] Add extractor by [Ashish0804](https://github.com/Ashish0804) +* [Mediaite] Add Extractor by [Ashish0804](https://github.com/Ashish0804) +* [Mediaklikk] Add Extractor by [tmarki](https://github.com/tmarki), [mrx23dot](https://github.com/mrx23dot), [coletdjnz](https://github.com/coletdjnz) +* [MuseScore] Add Extractor by [Ashish0804](https://github.com/Ashish0804) +* [Newgrounds] Add NewgroundsUserIE and improve extractor by [u-spec-png](https://github.com/u-spec-png) +* [nzherald] Add NZHeraldIE by [coletdjnz](https://github.com/coletdjnz) +* [Olympics] Add replay extractor by [Ashish0804](https://github.com/Ashish0804) +* [Peertube] Add channel and playlist extractors by [u-spec-png](https://github.com/u-spec-png) +* [radlive] Add extractor by [nyuszika7h](https://github.com/nyuszika7h) +* [SovietsCloset] Add extractor by [ChillingPepper](https://github.com/ChillingPepper) +* [Streamanity] Add Extractor by [alerikaisattera](https://github.com/alerikaisattera) +* [Theta] Add extractor by [alerikaisattera](https://github.com/alerikaisattera) +* [Yandex] Add ZenYandexIE and ZenYandexChannelIE by [Ashish0804](https://github.com/Ashish0804) + +* [9Now] handle episodes of series by [dalanmiller](https://github.com/dalanmiller) +* [AnimalPlanet] Fix extractor by [Sipherdrakon](https://github.com/Sipherdrakon) +* [Arte] Improve description extraction by [renalid](https://github.com/renalid) +* [atv.at] Use jwt for API by [NeroBurner](https://github.com/NeroBurner) +* [brightcove] Extract subtitles from manifests +* [CBC] Fix CBC Gem extractors by [makeworld-the-better-one](https://github.com/makeworld-the-better-one) +* [cbs] Report appropriate error for DRM +* [comedycentral] Support `collection-playlist` by [nixxo](https://github.com/nixxo) +* [DIYNetwork] Support new format by [Sipherdrakon](https://github.com/Sipherdrakon) +* [downloader/niconico] Pass custom headers by [nao20010128nao](https://github.com/nao20010128nao) +* [dw] Fix extractor +* [Fancode] Fix live streams by [zenerdi0de](https://github.com/zenerdi0de) +* [funimation] Fix for locations outside US by [Jules-A](https://github.com/Jules-A), [pukkandan](https://github.com/pukkandan) +* [globo] Fix GloboIE by [Ashish0804](https://github.com/Ashish0804) +* [HiDive] Fix extractor by [Ashish0804](https://github.com/Ashish0804) +* [Hotstar] Add referer for subs by [Ashish0804](https://github.com/Ashish0804) +* [itv] Fix extractor, add subtitles and thumbnails by [coletdjnz](https://github.com/coletdjnz), [sleaux-meaux](https://github.com/sleaux-meaux), [Vangelis66](https://github.com/Vangelis66) +* [lbry] Show error message from API response +* [Mxplayer] Use mobile API by [Ashish0804](https://github.com/Ashish0804) +* [NDR] Rewrite NDRIE by [Ashish0804](https://github.com/Ashish0804) +* [Nuvid] Fix extractor by [u-spec-png](https://github.com/u-spec-png) +* [Oreilly] Handle new web url by [MKSherbini](https://github.com/MKSherbini) +* [pbs] Fix subtitle extraction by [coletdjnz](https://github.com/coletdjnz), [gesa](https://github.com/gesa), [raphaeldore](https://github.com/raphaeldore) +* [peertube] Update instances by [u-spec-png](https://github.com/u-spec-png) +* [plutotv] Fix extractor for URLs with `/en` +* [reddit] Workaround for 429 by redirecting to old.reddit.com +* [redtube] Fix exts +* [soundcloud] Make playlist extraction lazy +* [soundcloud] Retry playlist pages on `502` error and update `_CLIENT_ID` +* [southpark] Fix SouthParkDE by [coletdjnz](https://github.com/coletdjnz) +* [SovietsCloset] Fix playlists for games with only named categories by [ConquerorDopy](https://github.com/ConquerorDopy) +* [SpankBang] Fix uploader by [f4pp3rk1ng](https://github.com/f4pp3rk1ng), [coletdjnz](https://github.com/coletdjnz) +* [tiktok] Use API to fetch higher quality video by [MinePlayersPE](https://github.com/MinePlayersPE), [llacb47](https://github.com/llacb47) +* [TikTokUser] Fix extractor using mobile API by [MinePlayersPE](https://github.com/MinePlayersPE), [llacb47](https://github.com/llacb47) +* [videa] Fix some extraction errors by [nyuszika7h](https://github.com/nyuszika7h) +* [VrtNU] Handle login errors by [llacb47](https://github.com/llacb47) +* [vrv] Don't raise error when thumbnails are missing +* [youtube] Cleanup authentication code by [coletdjnz](https://github.com/coletdjnz) +* [youtube] Fix `--mark-watched` with `--cookies-from-browser` +* [youtube] Improvements to JS player extraction and add extractor-args to skip it by [coletdjnz](https://github.com/coletdjnz) +* [youtube] Retry on 'Unknown Error' by [coletdjnz](https://github.com/coletdjnz) +* [youtube] Return full URL instead of just ID +* [youtube] Warn when trying to download clips +* [zdf] Improve format sorting +* [zype] Extract subtitles from the m3u8 manifest by [fstirlitz](https://github.com/fstirlitz) +* Allow `--force-write-archive` to work with `--flat-playlist` +* Download subtitles in order of `--sub-langs` +* Allow `0` in `--playlist-items` +* Handle more playlist errors with `-i` +* Fix `--no-get-comments` +* Fix `extra_info` being reused across runs +* Fix compat options `no-direct-merge` and `playlist-index` +* Dump files should obey `--trim-filename` by [sulyi](https://github.com/sulyi) +* [aes] Add `aes_gcm_decrypt_and_verify` by [sulyi](https://github.com/sulyi), [pukkandan](https://github.com/pukkandan) +* [aria2c] Fix IV for some AES-128 streams by [shirt](https://github.com/shirt-dev) +* [compat] Don't ignore `HOME` (if set) on windows +* [cookies] Make browser names case insensitive +* [cookies] Print warning for cookie decoding error only once +* [extractor] Fix root-relative URLs in MPD by [DigitalDJ](https://github.com/DigitalDJ) +* [ffmpeg] Add `aac_adtstoasc` when merging if needed +* [fragment,aria2c] Generalize and refactor some code +* [fragment] Avoid repeated request for AES key +* [fragment] Fix range header when using `-N` and media sequence by [shirt](https://github.com/shirt-dev) +* [hls,aes] Fallback to native implementation for AES-CBC and detect `Cryptodome` in addition to `Crypto` +* [hls] Byterange + AES128 is supported by native downloader +* [ModifyChapters] Improve sponsor chapter merge algorithm by [nihil-admirari](https://github.com/nihil-admirari) +* [ModifyChapters] Minor fixes +* [WebVTT] Adjust parser to accommodate PBS subtitles +* [utils] Improve `extract_timezone` by [dirkf](https://github.com/dirkf) +* [options] Fix `--no-config` and refactor reading of config files +* [options] Strip spaces and ignore empty entries in list-like switches +* [test/cookies] Improve logging +* [build] Automate more of the release process by [animelover1984](https://github.com/animelover1984), [pukkandan](https://github.com/pukkandan) +* [build] Fix sha256 by [nihil-admirari](https://github.com/nihil-admirari) +* [build] Bring back brew taps by [nao20010128nao](https://github.com/nao20010128nao) +* [build] Provide `--onedir` zip for windows by [pukkandan](https://github.com/pukkandan) +* [cleanup,docs] Add deprecation warning in docs for some counter intuitive behaviour +* [cleanup] Fix line endings for `nebula.py` by [glenn-slayden](https://github.com/glenn-slayden) +* [cleanup] Improve `make clean-test` by [sulyi](https://github.com/sulyi) +* [cleanup] Misc + + ### 2021.09.02 * **Native SponsorBlock** implementation by [nihil-admirari](https://github.com/nihil-admirari), [pukkandan](https://github.com/pukkandan) @@ -37,7 +147,6 @@ * [downloader/ffmpeg] Experimental support for DASH manifests (including live) * Your ffmpeg must have [this patch](https://github.com/FFmpeg/FFmpeg/commit/3249c757aed678780e22e99a1a49f4672851bca9) applied for YouTube DASH to work * [downloader/ffmpeg] Allow passing custom arguments before `-i` - * [BannedVideo] Add extractor by [smege1001](https://github.com/smege1001), [blackjack4494](https://github.com/blackjack4494), [pukkandan](https://github.com/pukkandan) * [bilibili] Add category extractor by [animelover1984](https://github.com/animelover1984) * [Epicon] Add extractors by [Ashish0804](https://github.com/Ashish0804) @@ -55,7 +164,6 @@ * [Tokentube] Add extractor by [u-spec-png](https://github.com/u-spec-png) * [TV2Hu] Fix `TV2HuIE` and add `TV2HuSeriesIE` by [Ashish0804](https://github.com/Ashish0804) * [voicy] Add extractor by [nao20010128nao](https://github.com/nao20010128nao) - * [adobepass] Fix Verizon SAML login by [nyuszika7h](https://github.com/nyuszika7h), [ParadoxGBB](https://github.com/ParadoxGBB) * [afreecatv] Fix adult VODs by [wlritchi](https://github.com/wlritchi) * [afreecatv] Tolerate failure to parse date string by [wlritchi](https://github.com/wlritchi) @@ -95,7 +203,6 @@ * [youtube] Prefer audio stream that YouTube considers default * [youtube] Remove annotations and deprecate `--write-annotations` by [coletdjnz](https://github.com/coletdjnz) * [Zee5] Fix extractor and add subtitles by [Ashish0804](https://github.com/Ashish0804) - * [aria2c] Obey `--rate-limit` * [EmbedSubtitle] Continue even if some files are missing * [extractor] Better error message for DRM @@ -180,8 +287,8 @@ ### 2021.08.02 * Add logo, banner and donate links -* Expand and escape environment variables correctly in output template -* Add format types `j` (json), `l` (comma delimited list), `q` (quoted for terminal) in output template +* [outtmpl] Expand and escape environment variables +* [outtmpl] Add format types `j` (json), `l` (comma delimited list), `q` (quoted for terminal) * [downloader] Allow streaming some unmerged formats to stdout using ffmpeg * [youtube] **Age-gate bypass** * Add `agegate` clients by [pukkandan](https://github.com/pukkandan), [MinePlayersPE](https://github.com/MinePlayersPE) @@ -386,7 +493,7 @@ ### 2021.06.09 * Fix bug where `%(field)d` in filename template throws error -* Improve offset parsing in outtmpl +* [outtmpl] Improve offset parsing * [test] More rigorous tests for `prepare_filename` ### 2021.06.08 diff --git a/README.md b/README.md index 07a8e5ef25..a148802822 100644 --- a/README.md +++ b/README.md @@ -88,9 +88,9 @@ The major new features from the latest release of [blackjack4494/yt-dlc](https:/ * **Aria2c with HLS/DASH**: You can use `aria2c` as the external downloader for DASH(mpd) and HLS(m3u8) formats -* **New extractors**: AnimeLab, Philo MSO, Spectrum MSO, SlingTV MSO, Cablevision MSO, Rcs, Gedi, bitwave.tv, mildom, audius, zee5, mtv.it, wimtv, pluto.tv, niconico users, discoveryplus.in, mediathek, NFHSNetwork, nebula, ukcolumn, whowatch, MxplayerShow, parlview (au), YoutubeWebArchive, fancode, Saitosan, ShemarooMe, telemundo, VootSeries, SonyLIVSeries, HotstarSeries, VidioPremier, VidioLive, RCTIPlus, TBS Live, douyin, pornflip, ParamountPlusSeries, ScienceChannel, Utreon, OpenRec, BandcampMusic, blackboardcollaborate, eroprofile albums, mirrativ, BannedVideo, bilibili categories, Epicon, filmmodu, GabTV, HungamaAlbum, ManotoTV, Niconico search, Patreon User, peloton, ProjectVeritas, radiko, StarTV, tiktok user, Tokentube, voicy, TV2HuSeries +* **New extractors**: AnimeLab, Philo MSO, Spectrum MSO, SlingTV MSO, Cablevision MSO, Rcs, Gedi, bitwave.tv, mildom, audius, zee5, mtv.it, wimtv, pluto.tv, niconico users, discoveryplus.in, mediathek, NFHSNetwork, nebula, ukcolumn, whowatch, MxplayerShow, parlview (au), YoutubeWebArchive, fancode, Saitosan, ShemarooMe, telemundo, VootSeries, SonyLIVSeries, HotstarSeries, VidioPremier, VidioLive, RCTIPlus, TBS Live, douyin, pornflip, ParamountPlusSeries, ScienceChannel, Utreon, OpenRec, BandcampMusic, blackboardcollaborate, eroprofile albums, mirrativ, BannedVideo, bilibili categories, Epicon, filmmodu, GabTV, HungamaAlbum, ManotoTV, Niconico search, Patreon User, peloton, ProjectVeritas, radiko, StarTV, tiktok user, Tokentube, voicy, TV2HuSeries, biliintl, 17live, NewgroundsUser, peertube channel/playlist, ZenYandex, CAM4, CGTN, damtomo, gotostage, Koo, Mediaite, Mediaklikk, MuseScore, nzherald, Olympics replay, radlive, SovietsCloset, Streamanity, Theta, Chingari -* **Fixed/improved extractors**: archive.org, roosterteeth.com, skyit, instagram, itv, SouthparkDe, spreaker, Vlive, akamai, ina, rumble, tennistv, amcnetworks, la7 podcasts, linuxacadamy, nitter, twitcasting, viu, crackle, curiositystream, mediasite, rmcdecouverte, sonyliv, tubi, tenplay, patreon, videa, yahoo, BravoTV, crunchyroll playlist, RTP, viki, Hotstar, vidio, vimeo, mediaset, Mxplayer, nbcolympics, ParamountPlus, Newgrounds, SAML Verizon login, Hungama, afreecatv, aljazeera, ATV, bitchute, camtube, CDA, eroprofile, facebook, HearThisAtIE, iwara, kakao, Motherless, Nova, peertube, pornhub, reddit, tiktok, TV2, TV2Hu, tv5mondeplus, VH1, Viafree, XHamster +* **Fixed/improved extractors**: archive.org, roosterteeth.com, skyit, instagram, itv, SouthparkDe, spreaker, Vlive, akamai, ina, rumble, tennistv, amcnetworks, la7 podcasts, linuxacadamy, nitter, twitcasting, viu, crackle, curiositystream, mediasite, rmcdecouverte, sonyliv, tubi, tenplay, patreon, videa, yahoo, BravoTV, crunchyroll playlist, RTP, viki, Hotstar, vidio, vimeo, mediaset, Mxplayer, nbcolympics, ParamountPlus, Newgrounds, SAML Verizon login, Hungama, afreecatv, aljazeera, ATV, bitchute, camtube, CDA, eroprofile, facebook, HearThisAtIE, iwara, kakao, Motherless, Nova, peertube, pornhub, reddit, tiktok, TV2, TV2Hu, tv5mondeplus, VH1, Viafree, XHamster, 9Now, AnimalPlanet, Arte, CBC, Chingari, comedycentral, DIYNetwork, niconico, dw, funimation, globo, HiDive, NDR, Nuvid, Oreilly, pbs, plutotv, reddit, redtube, soundcloud, SpankBang, VrtNU * **Subtitle extraction from manifests**: Subtitles can be extracted from streaming media manifests. See [commit/be6202f](https://github.com/yt-dlp/yt-dlp/commit/be6202f12b97858b9d716e608394b51065d0419f) for details @@ -533,10 +533,10 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t --cookies-from-browser BROWSER[:PROFILE] Load cookies from a user profile of the given web browser. Currently supported - browsers are: brave|chrome|chromium|edge|fi - refox|opera|safari|vivaldi. You can specify - the user profile name or directory using - "BROWSER:PROFILE_NAME" or + browsers are: brave, chrome, chromium, + edge, firefox, opera, safari, vivaldi. You + can specify the user profile name or + directory using "BROWSER:PROFILE_NAME" or "BROWSER:PROFILE_PATH". If no profile is given, the most recently accessed one is used diff --git a/supportedsites.md b/supportedsites.md index 3c805ba76c..e883351a97 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -1,4 +1,6 @@ # Supported sites + - **17live** + - **17live:clip** - **1tv**: Первый канал - **20min** - **220.ro** @@ -50,6 +52,7 @@ - **AmericasTestKitchen** - **AmericasTestKitchenSeason** - **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl + - **AnimalPlanet** - **AnimeLab** - **AnimeLabShows** - **AnimeOnDemand** @@ -125,6 +128,8 @@ - **BilibiliChannel** - **BiliBiliPlayer** - **BiliBiliSearch**: Bilibili video search, "bilisearch" keyword + - **BiliIntl** + - **BiliIntlSeries** - **BioBioChileTV** - **Biography** - **BIQLE** @@ -152,6 +157,7 @@ - **BusinessInsider** - **BuzzFeed** - **BYUtv** + - **CAM4** - **Camdemy** - **CamdemyFolder** - **CamModels** @@ -164,10 +170,7 @@ - **CarambaTVPage** - **CartoonNetwork** - **cbc.ca** - - **cbc.ca:olympics** - **cbc.ca:player** - - **cbc.ca:watch** - - **cbc.ca:watch:video** - **CBS** - **CBSInteractive** - **CBSLocal** @@ -182,10 +185,13 @@ - **CDA** - **CeskaTelevize** - **CeskaTelevizePorady** + - **CGTN** - **channel9**: Channel 9 - **CharlieRose** - **Chaturbate** - **Chilloutzone** + - **Chingari** + - **ChingariUser** - **chirbit** - **chirbit:profile** - **cielotv.it** @@ -235,6 +241,8 @@ - **dailymotion** - **dailymotion:playlist** - **dailymotion:user** + - **damtomo:record** + - **damtomo:video** - **daum.net** - **daum.net:clip** - **daum.net:playlist** @@ -258,6 +266,7 @@ - **DiscoveryPlusIndiaShow** - **DiscoveryVR** - **Disney** + - **DIYNetwork** - **dlive:stream** - **dlive:vod** - **DoodStream** @@ -366,6 +375,9 @@ - **Gazeta** - **GDCVault** - **GediDigital** + - **gem.cbc.ca** + - **gem.cbc.ca:live** + - **gem.cbc.ca:playlist** - **generic**: Generic downloader that works on some sites - **Gfycat** - **GiantBomb** @@ -381,6 +393,7 @@ - **google:podcasts:feed** - **GoogleDrive** - **Goshgay** + - **GoToStage** - **GPUTechConf** - **Groupon** - **hbo** @@ -466,6 +479,7 @@ - **KinjaEmbed** - **KinoPoisk** - **KonserthusetPlay** + - **Koo** - **KrasView**: Красвью - **Ku6** - **KUSI** @@ -539,6 +553,8 @@ - **MedalTV** - **media.ccc.de** - **media.ccc.de:lists** + - **Mediaite** + - **MediaKlikk** - **Medialaan** - **Mediaset** - **Mediasite** @@ -597,6 +613,7 @@ - **mtvservices:embedded** - **MTVUutisetArticle** - **MuenchenTV**: münchen.tv + - **MuseScore** - **mva**: Microsoft Virtual Academy videos - **mva:course**: Microsoft Virtual Academy courses - **Mwave** @@ -646,7 +663,8 @@ - **NetPlus** - **Netzkino** - **Newgrounds** - - **NewgroundsPlaylist** + - **Newgrounds:playlist** + - **Newgrounds:user** - **Newstube** - **NextMedia**: 蘋果日報 - **NextMediaActionNews**: 蘋果日報 - 動新聞 @@ -707,11 +725,13 @@ - **NYTimes** - **NYTimesArticle** - **NYTimesCooking** + - **nzherald** - **NZZ** - **ocw.mit.edu** - **OdaTV** - **Odnoklassniki** - **OktoberfestTV** + - **OlympicsReplay** - **OnDemandKorea** - **onet.pl** - **onet.tv** @@ -756,6 +776,7 @@ - **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC) - **PearVideo** - **PeerTube** + - **PeerTube:Playlist** - **peloton** - **peloton:live**: Peloton Live - **People** @@ -831,6 +852,9 @@ - **radiocanada:audiovideo** - **radiofrance** - **RadioJavan** + - **radlive** + - **radlive:channel** + - **radlive:season** - **Rai** - **RaiPlay** - **RaiPlayLive** @@ -955,6 +979,8 @@ - **southpark.de** - **southpark.nl** - **southparkstudios.dk** + - **SovietsCloset** + - **SovietsClosetPlaylist** - **SpankBang** - **SpankBangPlaylist** - **Spankwire** @@ -983,6 +1009,7 @@ - **StoryFireSeries** - **StoryFireUser** - **Streamable** + - **Streamanity** - **streamcloud.eu** - **StreamCZ** - **StreetVoice** @@ -1038,6 +1065,7 @@ - **TheScene** - **TheStar** - **TheSun** + - **Theta** - **TheWeatherChannel** - **ThisAmericanLife** - **ThisAV** @@ -1325,6 +1353,8 @@ - **ZDFChannel** - **Zee5** - **zee5:series** + - **ZenYandex** + - **ZenYandexChannel** - **Zhihu** - **zingmp3**: mp3.zing.vn - **zingmp3:album** From 1fed2773498c7c85852ac92bbb4400a42697b5bd Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 25 Sep 2021 00:59:59 +0000 Subject: [PATCH 159/641] [version] update :ci skip all --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- yt_dlp/version.py | 2 +- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index a1b459cc72..53ca71219c 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -21,7 +21,7 @@ assignees: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running yt-dlp version **2021.09.02** +- [ ] I've verified that I'm running yt-dlp version **2021.09.25** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -44,7 +44,7 @@ Add the `-v` flag to your command line you run yt-dlp with (`yt-dlp -v - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running yt-dlp version **2021.09.02** +- [ ] I've verified that I'm running yt-dlp version **2021.09.25** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] The provided URLs do not contain any DRM to the best of my knowledge diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 03fea013f0..6cd8b8ba06 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -21,13 +21,13 @@ assignees: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running yt-dlp version **2021.09.02** +- [ ] I've verified that I'm running yt-dlp version **2021.09.25** - [ ] I've searched the bugtracker for similar site feature requests including closed ones diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index c76452be21..a302daab63 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -21,7 +21,7 @@ assignees: '' - [ ] I'm reporting a bug unrelated to a specific site -- [ ] I've verified that I'm running yt-dlp version **2021.09.02** +- [ ] I've verified that I'm running yt-dlp version **2021.09.25** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] The provided URLs do not contain any DRM to the best of my knowledge - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped @@ -47,7 +47,7 @@ Add the `-v` flag to your command line you run yt-dlp with (`yt-dlp -v - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running yt-dlp version **2021.09.02** +- [ ] I've verified that I'm running yt-dlp version **2021.09.25** - [ ] I've searched the bugtracker for similar feature requests including closed ones diff --git a/yt_dlp/version.py b/yt_dlp/version.py index f03898ae3e..965a89b885 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2021.09.02' +__version__ = '2021.09.25' From e99b2d2771f9373da346222e6b5a88c6e1890457 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Sat, 25 Sep 2021 01:12:30 +0000 Subject: [PATCH 160/641] [Newgrounds] Fix view count on songs (#1071) Authored by: u-spec-png --- yt_dlp/extractor/newgrounds.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/newgrounds.py b/yt_dlp/extractor/newgrounds.py index 25b468b7dc..3c49008a0a 100644 --- a/yt_dlp/extractor/newgrounds.py +++ b/yt_dlp/extractor/newgrounds.py @@ -136,7 +136,7 @@ class NewgroundsIE(InfoExtractor): 'duration', default=None)) view_count = parse_count(self._html_search_regex( - r'(?s)

\s*Views\s*
\s*
([\d\.,]+)
', webpage, + r'(?s)
\s*(?:Views|Listens)\s*
\s*
([\d\.,]+)
', webpage, 'view count', default=None)) filesize = int_or_none(self._html_search_regex( From 8dc831f7150bcd2cd07629fb41764778b85a4455 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Sat, 25 Sep 2021 16:55:33 +0530 Subject: [PATCH 161/641] [LinkedInLearning] Add subtitles (#1077) Authored by: Ashish0804 Closes #1072 --- yt_dlp/extractor/linkedin.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/linkedin.py b/yt_dlp/extractor/linkedin.py index 6d54d638ac..f47d59a38c 100644 --- a/yt_dlp/extractor/linkedin.py +++ b/yt_dlp/extractor/linkedin.py @@ -1,6 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals +from itertools import zip_longest import re from .common import InfoExtractor @@ -8,6 +9,8 @@ from ..utils import ( ExtractorError, float_or_none, int_or_none, + srt_subtitles_timecode, + try_get, urlencode_postdata, urljoin, ) @@ -86,6 +89,16 @@ class LinkedInLearningIE(LinkedInLearningBaseIE): }, } + def json2srt(self, transcript_lines, duration=None): + srt_data = '' + for line, (line_dict, next_dict) in enumerate(zip_longest(transcript_lines, transcript_lines[1:])): + start_time, caption = line_dict['transcriptStartAt'] / 1000, line_dict['caption'] + end_time = next_dict['transcriptStartAt'] / 1000 if next_dict else duration or start_time + 1 + srt_data += '%d\n%s --> %s\n%s\n' % (line + 1, srt_subtitles_timecode(start_time), + srt_subtitles_timecode(end_time), + caption) + return srt_data + def _real_extract(self, url): course_slug, video_slug = self._match_valid_url(url).groups() @@ -101,6 +114,7 @@ class LinkedInLearningIE(LinkedInLearningBaseIE): formats.append({ 'format_id': 'progressive-%dp' % height, 'url': progressive_url, + 'ext': 'mp4', 'height': height, 'width': width, 'source_preference': 1, @@ -128,6 +142,14 @@ class LinkedInLearningIE(LinkedInLearningBaseIE): # However, unless someone can confirm this, the old # behaviour is being kept as-is self._sort_formats(formats, ('res', 'source_preference')) + subtitles = {} + duration = int_or_none(video_data.get('durationInSeconds')) + transcript_lines = try_get(video_data, lambda x: x['transcript']['lines'], expected_type=list) + if transcript_lines: + subtitles['en'] = [{ + 'ext': 'srt', + 'data': self.json2srt(transcript_lines, duration) + }] return { 'id': self._get_video_id(video_data, course_slug, video_slug), @@ -135,7 +157,8 @@ class LinkedInLearningIE(LinkedInLearningBaseIE): 'formats': formats, 'thumbnail': video_data.get('defaultThumbnail'), 'timestamp': float_or_none(video_data.get('publishedOn'), 1000), - 'duration': int_or_none(video_data.get('durationInSeconds')), + 'duration': duration, + 'subtitles': subtitles, } From f440b14f873bd0b15017d996c2a762c8b7ac56b6 Mon Sep 17 00:00:00 2001 From: Matt Broadway Date: Sat, 25 Sep 2021 16:34:16 +0100 Subject: [PATCH 162/641] [cookies] Fix keyring fallback (#1078) The password returned by `security find-generic-password` has a newline at the end Closes #1073 Authored by: mbway --- yt_dlp/cookies.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index 1409e6799b..049ec9fb1f 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -353,7 +353,7 @@ class LinuxChromeCookieDecryptor(ChromeCookieDecryptor): class MacChromeCookieDecryptor(ChromeCookieDecryptor): def __init__(self, browser_keyring_name, logger): self._logger = logger - password = _get_mac_keyring_password(browser_keyring_name) + password = _get_mac_keyring_password(browser_keyring_name, logger) self._v10_key = None if password is None else self.derive_key(password) @staticmethod @@ -546,7 +546,7 @@ def _parse_safari_cookies_record(data, jar, logger): p.skip_to(value_offset) value = p.read_cstring() except UnicodeDecodeError: - logger.warning('failed to parse cookie because UTF-8 decoding failed', only_once=True) + logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True) return record_size p.skip_to(record_size, 'space at the end of the record') @@ -592,11 +592,13 @@ def _get_linux_keyring_password(browser_keyring_name): return password.encode('utf-8') -def _get_mac_keyring_password(browser_keyring_name): +def _get_mac_keyring_password(browser_keyring_name, logger): if KEYRING_AVAILABLE: + logger.debug('using keyring to obtain password') password = keyring.get_password('{} Safe Storage'.format(browser_keyring_name), browser_keyring_name) return password.encode('utf-8') else: + logger.debug('using find-generic-password to obtain password') proc = subprocess.Popen(['security', 'find-generic-password', '-w', # write password to stdout '-a', browser_keyring_name, # match 'account' @@ -605,8 +607,11 @@ def _get_mac_keyring_password(browser_keyring_name): stderr=subprocess.DEVNULL) try: stdout, stderr = process_communicate_or_kill(proc) + if stdout[-1:] == b'\n': + stdout = stdout[:-1] return stdout - except BaseException: + except BaseException as e: + logger.warning(f'exception running find-generic-password: {type(e).__name__}({e})') return None @@ -640,7 +645,7 @@ def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16): try: return plaintext[:-padding_length].decode('utf-8') except UnicodeDecodeError: - logger.warning('failed to decrypt cookie because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True) + logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True) return None @@ -648,13 +653,13 @@ def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger): try: plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce) except ValueError: - logger.warning('failed to decrypt cookie because the MAC check failed. Possibly the key is wrong?', only_once=True) + logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True) return None try: return plaintext.decode('utf-8') except UnicodeDecodeError: - logger.warning('failed to decrypt cookie because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True) + logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True) return None From 524e2e4fda4d0deb135398ef85752be522b507e7 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 26 Sep 2021 01:39:44 +0530 Subject: [PATCH 163/641] [outtmpl] Format type `U` for unicode normalization --- README.md | 1 + test/test_YoutubeDL.py | 6 +++++- yt_dlp/YoutubeDL.py | 26 ++++++++++++++++---------- yt_dlp/utils.py | 10 +++++----- 4 files changed, 27 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index a148802822..d13eb4dc1e 100644 --- a/README.md +++ b/README.md @@ -964,6 +964,7 @@ The field names themselves (the part inside the parenthesis) can also have some 1. **Alternatives**: Alternate fields can be specified seperated with a `,`. Eg: `%(release_date>%Y,upload_date>%Y|Unknown)s` 1. **Default**: A literal default value can be specified for when the field is empty using a `|` seperator. This overrides `--output-na-template`. Eg: `%(uploader|Unknown)s` 1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, `B`, `j`, `l`, `q` can be used for converting to **B**ytes, **j**son, a comma seperated **l**ist and a string **q**uoted for the terminal respectively +1. **Unicode normalization**: The format type `U` can be used for NFC [unicode normalization](https://docs.python.org/3/library/unicodedata.html#unicodedata.normalize). The alternate form flag (`#`) changes the normalization to NFD and the conversion flag `+` can be used for NFKC/NFKD compatibility equivalence normalization. Eg: `%(title)+.100U` is NFKC To summarize, the general syntax for a field is: ``` diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 6feca2ce24..f6483575f3 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -649,7 +649,7 @@ class TestYoutubeDL(unittest.TestCase): 'title2': '%PATH%', 'title3': 'foo/bar\\test', 'title4': 'foo "bar" test', - 'title5': 'áéí', + 'title5': 'áéí 𝐀', 'timestamp': 1618488000, 'duration': 100000, 'playlist_index': 1, @@ -769,6 +769,10 @@ class TestYoutubeDL(unittest.TestCase): test('%(formats.:.id) 15l', ' id1, id2, id3') test('%(formats)j', (json.dumps(FORMATS), sanitize(json.dumps(FORMATS)))) test('%(title5).3B', 'á') + test('%(title5)U', 'áéí 𝐀') + test('%(title5)#U', 'a\u0301e\u0301i\u0301 𝐀') + test('%(title5)+U', 'áéí A') + test('%(title5)+#U', 'a\u0301e\u0301i\u0301 A') if compat_os_name == 'nt': test('%(title4)q', ('"foo \\"bar\\" test"', "'foo _'bar_' test'")) else: diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 11371fa860..a6eddd7f78 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -25,6 +25,7 @@ import time import tokenize import traceback import random +import unicodedata from string import ascii_letters @@ -908,7 +909,7 @@ class YoutubeDL(object): def validate_outtmpl(cls, outtmpl): ''' @return None or Exception object ''' outtmpl = re.sub( - STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqB]'), + STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'), lambda mobj: f'{mobj.group(0)[:-1]}s', cls._outtmpl_expandpath(outtmpl)) try: @@ -940,7 +941,7 @@ class YoutubeDL(object): } TMPL_DICT = {} - EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqB]')) + EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]')) MATH_FUNCTIONS = { '+': float.__add__, '-': float.__sub__, @@ -1031,21 +1032,26 @@ class YoutubeDL(object): value = default if value is None else value str_fmt = f'{fmt[:-1]}s' - if fmt[-1] == 'l': + if fmt[-1] == 'l': # list value, fmt = ', '.join(variadic(value)), str_fmt - elif fmt[-1] == 'j': + elif fmt[-1] == 'j': # json value, fmt = json.dumps(value, default=_dumpjson_default), str_fmt - elif fmt[-1] == 'q': + elif fmt[-1] == 'q': # quoted value, fmt = compat_shlex_quote(str(value)), str_fmt - elif fmt[-1] == 'B': + elif fmt[-1] == 'B': # bytes value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8') value, fmt = value.decode('utf-8', 'ignore'), 's' + elif fmt[-1] == 'U': # unicode normalized + opts = outer_mobj.group('conversion') or '' + value, fmt = unicodedata.normalize( + # "+" = compatibility equivalence, "#" = NFD + 'NF%s%s' % ('K' if '+' in opts else '', 'D' if '#' in opts else 'C'), + value), str_fmt elif fmt[-1] == 'c': - value = str(value) - if value is None: - value, fmt = default, 's' + if value: + value = str(value)[0] else: - value = value[0] + fmt = str_fmt elif fmt[-1] not in 'rs': # numeric value = float_or_none(value) if value is None: diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 141d2c9ccd..770d7feb9c 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4474,12 +4474,12 @@ OUTTMPL_TYPES = { STR_FORMAT_RE_TMPL = r'''(?x) (?(?:%%)*) % - (?P\((?P{0})\))? # mapping key + (?P\((?P{0})\))? (?P - (?:[#0\-+ ]+)? # conversion flags (optional) - (?:\d+)? # minimum field width (optional) - (?:\.\d+)? # precision (optional) - [hlL]? # length modifier (optional) + (?P[#0\-+ ]+)? + (?P\d+)? + (?P\.\d+)? + (?P[hlL])? # unused in python {1} # conversion type ) ''' From 9a1334543976b3044be88b5bc2a35f43a5d021e1 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Sat, 25 Sep 2021 22:30:22 +0000 Subject: [PATCH 164/641] [PolskieRadio] Fix extractors (#1082) Closes #1033 Authored by: jakubadamw, u-spec-png --- yt_dlp/extractor/polskieradio.py | 47 ++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/yt_dlp/extractor/polskieradio.py b/yt_dlp/extractor/polskieradio.py index 978d6f813b..53fe0340a0 100644 --- a/yt_dlp/extractor/polskieradio.py +++ b/yt_dlp/extractor/polskieradio.py @@ -15,12 +15,13 @@ from ..utils import ( int_or_none, strip_or_none, unified_timestamp, + unescapeHTML, ) class PolskieRadioIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+/\d+/Artykul/(?P[0-9]+)' - _TESTS = [{ + _TESTS = [{ # Old-style single broadcast. 'url': 'http://www.polskieradio.pl/7/5102/Artykul/1587943,Prof-Andrzej-Nowak-o-historii-nie-da-sie-myslec-beznamietnie', 'info_dict': { 'id': '1587943', @@ -39,14 +40,41 @@ class PolskieRadioIE(InfoExtractor): 'thumbnail': r're:^https?://static\.prsa\.pl/images/.*\.jpg$' }, }], - }, { - 'url': 'http://www.polskieradio.pl/265/5217/Artykul/1635803,Euro-2016-nie-ma-miejsca-na-blad-Polacy-graja-ze-Szwajcaria-o-cwiercfinal', + }, { # New-style single broadcast. + 'url': 'https://www.polskieradio.pl/8/2382/Artykul/2534482,Zagarysci-Poezja-jak-spoiwo', 'info_dict': { - 'id': '1635803', - 'title': 'Euro 2016: nie ma miejsca na błąd. Polacy grają ze Szwajcarią o ćwierćfinał', - 'description': 'md5:01cb7d0cad58664095d72b51a1ebada2', + 'id': '2534482', + 'title': 'Żagaryści. Poezja jak spoiwo', + 'description': 'md5:f18d95d5dcba747a09b635e21a4c0695', }, - 'playlist_mincount': 12, + 'playlist': [{ + 'md5': 'd07559829f61d5a93a75755987ded760', + 'info_dict': { + 'id': '2516679', + 'ext': 'mp3', + 'title': 'md5:c6e1234e0b747ad883cb91b7ad06b98c', + 'timestamp': 1592654400, + 'upload_date': '20200620', + 'duration': 1430, + 'thumbnail': r're:^https?://static\.prsa\.pl/images/.*\.jpg$' + }, + }], + }, { # Old-style multiple broadcast playlist. + 'url': 'https://www.polskieradio.pl/8/4346/Artykul/2487823,Marek-Kondrat-czyta-Mistrza-i-Malgorzate', + 'info_dict': { + 'id': '2487823', + 'title': 'Marek Kondrat czyta "Mistrza i Małgorzatę"', + 'description': 'md5:8422a95cc83834f2aaeff9d82e9c8f39', + }, + 'playlist_mincount': 50, + }, { # New-style multiple broadcast playlist. + 'url': 'https://www.polskieradio.pl/8/4346/Artykul/2541317,Czytamy-Kalendarz-i-klepsydre-Tadeusza-Konwickiego', + 'info_dict': { + 'id': '2541317', + 'title': 'Czytamy "Kalendarz i klepsydrę" Tadeusza Konwickiego', + 'description': 'md5:0baeaa46d877f1351fb2eeed3e871f9f', + }, + 'playlist_mincount': 15, }, { 'url': 'http://polskieradio.pl/9/305/Artykul/1632955,Bardzo-popularne-slowo-remis', 'only_matching': True, @@ -78,8 +106,8 @@ class PolskieRadioIE(InfoExtractor): media_urls = set() - for data_media in re.findall(r'<[^>]+data-media=({[^>]+})', content): - media = self._parse_json(data_media, playlist_id, fatal=False) + for data_media in re.findall(r'<[^>]+data-media="?({[^>]+})"?', content): + media = self._parse_json(data_media, playlist_id, transform_source=unescapeHTML, fatal=False) if not media.get('file') or not media.get('desc'): continue media_url = self._proto_relative_url(media['file'], 'http:') @@ -98,6 +126,7 @@ class PolskieRadioIE(InfoExtractor): title = self._og_search_title(webpage).strip() description = strip_or_none(self._og_search_description(webpage)) + description = description.replace('\xa0', ' ') if description is not None else None return self.playlist_result(entries, playlist_id, title, description) From 2333ea102986f5ae792d3f297aac04cf8065d9f3 Mon Sep 17 00:00:00 2001 From: i6t <62123048+i6t@users.noreply.github.com> Date: Sun, 26 Sep 2021 07:39:45 +0900 Subject: [PATCH 165/641] [Veo] Add extractor (#1084) Fixes: https://github.com/ytdl-org/youtube-dl/issues/29445 Authored by: i6t --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/veo.py | 74 ++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) create mode 100644 yt_dlp/extractor/veo.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 4b15598863..59d5dae40d 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1600,6 +1600,7 @@ from .utreon import UtreonIE from .varzesh3 import Varzesh3IE from .vbox7 import Vbox7IE from .veehd import VeeHDIE +from .veo import VeoIE from .veoh import VeohIE from .vesti import VestiIE from .vevo import ( diff --git a/yt_dlp/extractor/veo.py b/yt_dlp/extractor/veo.py new file mode 100644 index 0000000000..4e57a52d1e --- /dev/null +++ b/yt_dlp/extractor/veo.py @@ -0,0 +1,74 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +from ..utils import ( + int_or_none, + mimetype2ext, + unified_timestamp, + url_or_none, +) + + +class VeoIE(InfoExtractor): + _VALID_URL = r'https?://app\.veo\.co/matches/(?P[0-9A-Za-z-]+)' + + _TESTS = [{ + 'url': 'https://app.veo.co/matches/20201027-last-period/', + 'info_dict': { + 'id': '20201027-last-period', + 'ext': 'mp4', + 'title': 'Akidemy u11s v Bradford Boys u11s (Game 3)', + 'thumbnail': 're:https://c.veocdn.com/.+/thumbnail.jpg', + 'upload_date': '20201028', + 'timestamp': 1603847208, + 'duration': 1916, + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + metadata = self._download_json( + 'https://app.veo.co/api/app/matches/%s' % video_id, video_id) + + video_data = self._download_json( + 'https://app.veo.co/api/app/matches/%s/videos' % video_id, video_id, 'Downloading video data') + + title = metadata.get('title') + thumbnail = url_or_none(metadata.get('thumbnail')) + + timestamp = unified_timestamp(metadata.get('created')) + duration = int_or_none(metadata.get('duration')) + view_count = int_or_none(metadata.get('view_count')) + + formats = [] + for fmt in video_data: + mimetype = fmt.get('mime_type') + # skip configuration file for panoramic video + if mimetype == 'video/mp2t': + continue + height = int_or_none(fmt.get('height')) + bitrate = int_or_none(fmt.get('bit_rate'), scale=1000) + render_type = fmt.get('render_type') + formats.append({ + 'url': url_or_none(fmt.get('url')), + 'format_id': '%s-%sp' % (render_type, height), + 'ext': mimetype2ext(mimetype), + 'width': int_or_none(fmt.get('width')), + 'height': height, + 'vbr': bitrate + }) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'view_count': view_count, + 'duration': duration + } From c470901ccf602d43b69fb1092ed6fdff36021137 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Sun, 26 Sep 2021 13:28:22 +0000 Subject: [PATCH 166/641] [reddit] Add embedded url (#1090) Authored by: u-spec-png --- yt_dlp/extractor/reddit.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/reddit.py b/yt_dlp/extractor/reddit.py index 8e1463d5be..14592bc62c 100644 --- a/yt_dlp/extractor/reddit.py +++ b/yt_dlp/extractor/reddit.py @@ -49,7 +49,7 @@ class RedditIE(InfoExtractor): class RedditRIE(InfoExtractor): - _VALID_URL = r'https?://(?:[^/]+\.)?reddit\.com/r/(?P[^/]+/comments/(?P[^/?#&]+))' + _VALID_URL = r'https?://(?:[^/]+\.)?reddit(?:media)?\.com/r/(?P[^/]+/comments/(?P[^/?#&]+))' _TESTS = [{ 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/', 'info_dict': { @@ -94,6 +94,9 @@ class RedditRIE(InfoExtractor): # reddit video @ nm reddit 'url': 'https://nm.reddit.com/r/Cricket/comments/8idvby/lousy_cameraman_finds_himself_in_cairns_line_of/', 'only_matching': True, + }, { + 'url': 'https://www.redditmedia.com/r/serbia/comments/pu9wbx/ako_vu%C4%8Di%C4%87_izgubi_izbore_ja_%C4%87u_da_crknem/', + 'only_matching': True, }] def _real_extract(self, url): From d31dab70847aeedd9992f5921dfcf270e29b02ea Mon Sep 17 00:00:00 2001 From: Aleri Kaisattera <73682764+alerikaisattera@users.noreply.github.com> Date: Mon, 27 Sep 2021 08:12:44 +0600 Subject: [PATCH 167/641] [vidme] Remove extractor (#1095) Authored by: alerikaisattera --- yt_dlp/extractor/extractors.py | 5 - yt_dlp/extractor/generic.py | 6 - yt_dlp/extractor/vidme.py | 296 --------------------------------- 3 files changed, 307 deletions(-) delete mode 100644 yt_dlp/extractor/vidme.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 59d5dae40d..eb121460b5 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1635,11 +1635,6 @@ from .vidio import ( VidioLiveIE ) from .vidlii import VidLiiIE -from .vidme import ( - VidmeIE, - VidmeUserIE, - VidmeUserLikesIE, -) from .vier import VierIE, VierVideosIE from .viewlift import ( ViewLiftIE, diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index b9c5772e06..5918c8c562 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -2755,12 +2755,6 @@ class GenericIE(InfoExtractor): if vhx_url: return self.url_result(vhx_url, VHXEmbedIE.ie_key()) - vid_me_embed_url = self._search_regex( - r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]', - webpage, 'vid.me embed', default=None) - if vid_me_embed_url is not None: - return self.url_result(vid_me_embed_url, 'Vidme') - # Invidious Instances # https://github.com/yt-dlp/yt-dlp/issues/195 # https://github.com/iv-org/invidious/pull/1730 diff --git a/yt_dlp/extractor/vidme.py b/yt_dlp/extractor/vidme.py deleted file mode 100644 index a02f917318..0000000000 --- a/yt_dlp/extractor/vidme.py +++ /dev/null @@ -1,296 +0,0 @@ -from __future__ import unicode_literals - -import itertools - -from .common import InfoExtractor -from ..compat import compat_HTTPError -from ..utils import ( - ExtractorError, - int_or_none, - float_or_none, - parse_iso8601, - url_or_none, -) - - -class VidmeIE(InfoExtractor): - IE_NAME = 'vidme' - _VALID_URL = r'https?://vid\.me/(?:e/)?(?P[\da-zA-Z]{,5})(?:[^\da-zA-Z]|$)' - _TESTS = [{ - 'url': 'https://vid.me/QNB', - 'md5': 'f42d05e7149aeaec5c037b17e5d3dc82', - 'info_dict': { - 'id': 'QNB', - 'ext': 'mp4', - 'title': 'Fishing for piranha - the easy way', - 'description': 'source: https://www.facebook.com/photo.php?v=312276045600871', - 'thumbnail': r're:^https?://.*\.jpg', - 'timestamp': 1406313244, - 'upload_date': '20140725', - 'age_limit': 0, - 'duration': 119.92, - 'view_count': int, - 'like_count': int, - 'comment_count': int, - }, - }, { - 'url': 'https://vid.me/Gc6M', - 'md5': 'f42d05e7149aeaec5c037b17e5d3dc82', - 'info_dict': { - 'id': 'Gc6M', - 'ext': 'mp4', - 'title': 'O Mere Dil ke chain - Arnav and Khushi VM', - 'thumbnail': r're:^https?://.*\.jpg', - 'timestamp': 1441211642, - 'upload_date': '20150902', - 'uploader': 'SunshineM', - 'uploader_id': '3552827', - 'age_limit': 0, - 'duration': 223.72, - 'view_count': int, - 'like_count': int, - 'comment_count': int, - }, - 'params': { - 'skip_download': True, - }, - }, { - # tests uploader field - 'url': 'https://vid.me/4Iib', - 'info_dict': { - 'id': '4Iib', - 'ext': 'mp4', - 'title': 'The Carver', - 'description': 'md5:e9c24870018ae8113be936645b93ba3c', - 'thumbnail': r're:^https?://.*\.jpg', - 'timestamp': 1433203629, - 'upload_date': '20150602', - 'uploader': 'Thomas', - 'uploader_id': '109747', - 'age_limit': 0, - 'duration': 97.859999999999999, - 'view_count': int, - 'like_count': int, - 'comment_count': int, - }, - 'params': { - 'skip_download': True, - }, - }, { - # nsfw test from http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching - 'url': 'https://vid.me/e/Wmur', - 'info_dict': { - 'id': 'Wmur', - 'ext': 'mp4', - 'title': 'naked smoking & stretching', - 'thumbnail': r're:^https?://.*\.jpg', - 'timestamp': 1430931613, - 'upload_date': '20150506', - 'uploader': 'naked-yogi', - 'uploader_id': '1638622', - 'age_limit': 18, - 'duration': 653.26999999999998, - 'view_count': int, - 'like_count': int, - 'comment_count': int, - }, - 'params': { - 'skip_download': True, - }, - }, { - # nsfw, user-disabled - 'url': 'https://vid.me/dzGJ', - 'only_matching': True, - }, { - # suspended - 'url': 'https://vid.me/Ox3G', - 'only_matching': True, - }, { - # deleted - 'url': 'https://vid.me/KTPm', - 'only_matching': True, - }, { - # no formats in the API response - 'url': 'https://vid.me/e5g', - 'info_dict': { - 'id': 'e5g', - 'ext': 'mp4', - 'title': 'Video upload (e5g)', - 'thumbnail': r're:^https?://.*\.jpg', - 'timestamp': 1401480195, - 'upload_date': '20140530', - 'uploader': None, - 'uploader_id': None, - 'age_limit': 0, - 'duration': 483, - 'view_count': int, - 'like_count': int, - 'comment_count': int, - }, - 'params': { - 'skip_download': True, - }, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - try: - response = self._download_json( - 'https://api.vid.me/videoByUrl/%s' % video_id, video_id) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: - response = self._parse_json(e.cause.read(), video_id) - else: - raise - - error = response.get('error') - if error: - raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, error), expected=True) - - video = response['video'] - - if video.get('state') == 'deleted': - raise ExtractorError( - 'Vidme said: Sorry, this video has been deleted.', - expected=True) - - if video.get('state') in ('user-disabled', 'suspended'): - raise ExtractorError( - 'Vidme said: This video has been suspended either due to a copyright claim, ' - 'or for violating the terms of use.', - expected=True) - - formats = [] - for f in video.get('formats', []): - format_url = url_or_none(f.get('uri')) - if not format_url: - continue - format_type = f.get('type') - if format_type == 'dash': - formats.extend(self._extract_mpd_formats( - format_url, video_id, mpd_id='dash', fatal=False)) - elif format_type == 'hls': - formats.extend(self._extract_m3u8_formats( - format_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) - else: - formats.append({ - 'format_id': f.get('type'), - 'url': format_url, - 'width': int_or_none(f.get('width')), - 'height': int_or_none(f.get('height')), - # Clips should never be prefered over full video - 'preference': 0 if f.get('type', '').endswith( - 'clip') else 1, - }) - - if not formats and video.get('complete_url'): - formats.append({ - 'url': video.get('complete_url'), - 'width': int_or_none(video.get('width')), - 'height': int_or_none(video.get('height')), - }) - - self._sort_formats(formats) - - title = video['title'] - description = video.get('description') - thumbnail = video.get('thumbnail_url') - timestamp = parse_iso8601(video.get('date_created'), ' ') - uploader = video.get('user', {}).get('username') - uploader_id = video.get('user', {}).get('user_id') - age_limit = 18 if video.get('nsfw') is True else 0 - duration = float_or_none(video.get('duration')) - view_count = int_or_none(video.get('view_count')) - like_count = int_or_none(video.get('likes_count')) - comment_count = int_or_none(video.get('comment_count')) - - return { - 'id': video_id, - 'title': title or 'Video upload (%s)' % video_id, - 'description': description, - 'thumbnail': thumbnail, - 'uploader': uploader, - 'uploader_id': uploader_id, - 'age_limit': age_limit, - 'timestamp': timestamp, - 'duration': duration, - 'view_count': view_count, - 'like_count': like_count, - 'comment_count': comment_count, - 'formats': formats, - } - - -class VidmeListBaseIE(InfoExtractor): - # Max possible limit according to https://docs.vid.me/#api-Videos-List - _LIMIT = 100 - - def _entries(self, user_id, user_name): - for page_num in itertools.count(1): - page = self._download_json( - 'https://api.vid.me/videos/%s?user=%s&limit=%d&offset=%d' - % (self._API_ITEM, user_id, self._LIMIT, (page_num - 1) * self._LIMIT), - user_name, 'Downloading user %s page %d' % (self._API_ITEM, page_num)) - - videos = page.get('videos', []) - if not videos: - break - - for video in videos: - video_url = video.get('full_url') or video.get('embed_url') - if video_url: - yield self.url_result(video_url, VidmeIE.ie_key()) - - total = int_or_none(page.get('page', {}).get('total')) - if total and self._LIMIT * page_num >= total: - break - - def _real_extract(self, url): - user_name = self._match_id(url) - - user_id = self._download_json( - 'https://api.vid.me/userByUsername?username=%s' % user_name, - user_name)['user']['user_id'] - - return self.playlist_result( - self._entries(user_id, user_name), user_id, - '%s - %s' % (user_name, self._TITLE)) - - -class VidmeUserIE(VidmeListBaseIE): - IE_NAME = 'vidme:user' - _VALID_URL = r'https?://vid\.me/(?:e/)?(?P[\da-zA-Z_-]{6,})(?!/likes)(?:[^\da-zA-Z_-]|$)' - _API_ITEM = 'list' - _TITLE = 'Videos' - _TESTS = [{ - 'url': 'https://vid.me/MasakoX', - 'info_dict': { - 'id': '16112341', - 'title': 'MasakoX - %s' % _TITLE, - }, - 'playlist_mincount': 191, - }, { - 'url': 'https://vid.me/unsQuare_netWork', - 'only_matching': True, - }] - - -class VidmeUserLikesIE(VidmeListBaseIE): - IE_NAME = 'vidme:user:likes' - _VALID_URL = r'https?://vid\.me/(?:e/)?(?P[\da-zA-Z_-]{6,})/likes' - _API_ITEM = 'likes' - _TITLE = 'Likes' - _TESTS = [{ - 'url': 'https://vid.me/ErinAlexis/likes', - 'info_dict': { - 'id': '6483530', - 'title': 'ErinAlexis - %s' % _TITLE, - }, - 'playlist_mincount': 415, - }, { - 'url': 'https://vid.me/Kaleidoscope-Ish/likes', - 'only_matching': True, - }] From 91dd88b90f52c4bdb250db22bca6928f2c7c5551 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 27 Sep 2021 11:29:16 +0530 Subject: [PATCH 168/641] [outtmpl] Alternate form of format type `l` for `\n` delimited list --- README.md | 2 +- test/test_YoutubeDL.py | 1 + yt_dlp/YoutubeDL.py | 3 ++- yt_dlp/utils.py | 4 +++- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index d13eb4dc1e..897e0600e1 100644 --- a/README.md +++ b/README.md @@ -963,7 +963,7 @@ The field names themselves (the part inside the parenthesis) can also have some 1. **Date/time Formatting**: Date/time fields can be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it separated from the field name using a `>`. Eg: `%(duration>%H-%M-%S)s`, `%(upload_date>%Y-%m-%d)s`, `%(epoch-3600>%H-%M-%S)s` 1. **Alternatives**: Alternate fields can be specified seperated with a `,`. Eg: `%(release_date>%Y,upload_date>%Y|Unknown)s` 1. **Default**: A literal default value can be specified for when the field is empty using a `|` seperator. This overrides `--output-na-template`. Eg: `%(uploader|Unknown)s` -1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, `B`, `j`, `l`, `q` can be used for converting to **B**ytes, **j**son, a comma seperated **l**ist and a string **q**uoted for the terminal respectively +1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, `B`, `j`, `l`, `q` can be used for converting to **B**ytes, **j**son, a comma seperated **l**ist (alternate form flag `#` makes it new line `\n` seperated) and a string **q**uoted for the terminal, respectively 1. **Unicode normalization**: The format type `U` can be used for NFC [unicode normalization](https://docs.python.org/3/library/unicodedata.html#unicodedata.normalize). The alternate form flag (`#`) changes the normalization to NFD and the conversion flag `+` can be used for NFKC/NFKD compatibility equivalence normalization. Eg: `%(title)+.100U` is NFKC To summarize, the general syntax for a field is: diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index f6483575f3..e746589450 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -765,6 +765,7 @@ class TestYoutubeDL(unittest.TestCase): # Custom type casting test('%(formats.:.id)l', 'id1, id2, id3') + test('%(formats.:.id)#l', ('id1\nid2\nid3', 'id1 id2 id3')) test('%(ext)l', 'mp4') test('%(formats.:.id) 15l', ' id1, id2, id3') test('%(formats)j', (json.dumps(FORMATS), sanitize(json.dumps(FORMATS)))) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index a6eddd7f78..1cbe8dc8d8 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1033,7 +1033,8 @@ class YoutubeDL(object): str_fmt = f'{fmt[:-1]}s' if fmt[-1] == 'l': # list - value, fmt = ', '.join(variadic(value)), str_fmt + delim = '\n' if '#' in (outer_mobj.group('conversion') or '') else ', ' + value, fmt = delim.join(variadic(value)), str_fmt elif fmt[-1] == 'j': # json value, fmt = json.dumps(value, default=_dumpjson_default), str_fmt elif fmt[-1] == 'q': # quoted diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 770d7feb9c..eba89fb8bc 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -2099,7 +2099,9 @@ def sanitize_filename(s, restricted=False, is_id=False): def replace_insane(char): if restricted and char in ACCENT_CHARS: return ACCENT_CHARS[char] - if char == '?' or ord(char) < 32 or ord(char) == 127: + elif not restricted and char == '\n': + return ' ' + elif char == '?' or ord(char) < 32 or ord(char) == 127: return '' elif char == '"': return '' if restricted else '\'' From 28234287f17e5751a15d33e3fe6fea2c8e697799 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 27 Sep 2021 09:21:28 +0530 Subject: [PATCH 169/641] [update] Check for new version even if not updateable --- yt_dlp/update.py | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/yt_dlp/update.py b/yt_dlp/update.py index 531eea7c91..8160dab377 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -99,23 +99,6 @@ def run_update(ydl): h.update(mv[:n]) return h.hexdigest() - ERRORS = { - 'exe': None, - 'zip': None, - 'dir': 'Auto-update is not supported for unpackaged windows executable. Re-download the latest release', - 'source': 'You cannot update when running from source code', - 'unknown': 'It looks like you installed yt-dlp with a package manager, pip, setup.py or a tarball. Use that to update', - } - err = ERRORS.get(detect_variant(), ERRORS['unknown']) - if err: - return report_error(err, expected=True) - - # sys.executable is set to the full pathname of the exe-file for py2exe - # though symlinks are not followed so that we need to do this manually - # with help of realpath - filename = compat_realpath(sys.executable if hasattr(sys, 'frozen') else sys.argv[0]) - ydl.to_screen('Current Build Hash %s' % calc_sha256sum(filename)) - # Download and check versions info try: version_info = ydl._opener.open(JSON_URL).read().decode('utf-8') @@ -128,10 +111,27 @@ def run_update(ydl): version_id = version_info['tag_name'] if version_tuple(__version__) >= version_tuple(version_id): - ydl.to_screen('yt-dlp is up to date (%s)' % __version__) + ydl.to_screen(f'yt-dlp is up to date ({__version__})') return - ydl.to_screen('Updating to version ' + version_id + ' ...') + ERRORS = { + 'exe': None, + 'zip': None, + 'dir': 'Auto-update is not supported for unpackaged windows executable. Re-download the latest release', + 'source': 'You cannot update when running from source code', + 'unknown': 'It looks like you installed yt-dlp with a package manager, pip, setup.py or a tarball. Use that to update', + } + err = ERRORS.get(detect_variant(), ERRORS['unknown']) + if err: + ydl.to_screen(f'Latest version: {version_id}, Current version: {__version__}') + return report_error(err, expected=True) + + # sys.executable is set to the full pathname of the exe-file for py2exe + # though symlinks are not followed so that we need to do this manually + # with help of realpath + filename = compat_realpath(sys.executable if hasattr(sys, 'frozen') else sys.argv[0]) + ydl.to_screen(f'Current version {__version__}; Build Hash {calc_sha256sum(filename)}') + ydl.to_screen(f'Updating to version {version_id} ...') version_labels = { 'zip_3': '', From 360167b9fca07cb870038b12112a611b9e872ffe Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 27 Sep 2021 11:24:22 +0530 Subject: [PATCH 170/641] Fix `--flat-playlist` when neither IE nor id is known --- yt_dlp/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 1cbe8dc8d8..367d3fa608 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1308,7 +1308,7 @@ class YoutubeDL(object): or extract_flat is True): info_copy = ie_result.copy() ie = try_get(ie_result.get('ie_key'), self.get_info_extractor) - if not ie_result.get('id'): + if ie and not ie_result.get('id'): info_copy['id'] = ie.get_temp_id(ie_result['url']) self.add_default_extra_info(info_copy, ie, ie_result['url']) self.add_extra_info(info_copy, extra_info) From fecb20a503720e03349391752c17afd7194856e6 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Mon, 27 Sep 2021 20:10:51 +0000 Subject: [PATCH 171/641] [N1] Add extractor (#1080) Authored by: u-spec-png --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/n1.py | 136 +++++++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+) create mode 100644 yt_dlp/extractor/n1.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index eb121460b5..4774a3ebb8 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -833,6 +833,7 @@ from .myvi import ( ) from .myvideoge import MyVideoGeIE from .myvidster import MyVidsterIE +from .n1 import N1InfoIIE, N1InfoAssetIE from .nationalgeographic import ( NationalGeographicVideoIE, NationalGeographicTVIE, diff --git a/yt_dlp/extractor/n1.py b/yt_dlp/extractor/n1.py new file mode 100644 index 0000000000..7a09c6779c --- /dev/null +++ b/yt_dlp/extractor/n1.py @@ -0,0 +1,136 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .youtube import YoutubeIE +from .reddit import RedditRIE +from .common import InfoExtractor +from ..utils import ( + unified_timestamp, + extract_attributes, +) + + +class N1InfoAssetIE(InfoExtractor): + _VALID_URL = r'https?://best-vod\.umn\.cdn\.united\.cloud/stream\?asset=(?P[^&]+)' + _TESTS = [{ + 'url': 'https://best-vod.umn.cdn.united.cloud/stream?asset=ljsottomazilirija3060921-n1info-si-worldwide&stream=hp1400&t=0&player=m3u8v&sp=n1info&u=n1info&p=n1Sh4redSecre7iNf0', + 'md5': '28b08b32aeaff2b8562736ccd5a66fe7', + 'info_dict': { + 'id': 'ljsottomazilirija3060921-n1info-si-worldwide', + 'ext': 'mp4', + 'title': 'ljsottomazilirija3060921-n1info-si-worldwide', + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + formats = self._extract_m3u8_formats( + url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': video_id, + 'formats': formats, + } + + +class N1InfoIIE(InfoExtractor): + IE_NAME = 'N1Info:article' + _VALID_URL = r'https?://(?:(?:ba|rs|hr)\.)?n1info\.(?:com|si)/(?:[^/]+/){1,2}(?P[^/]+)' + _TESTS = [{ + # Youtube embedded + 'url': 'https://rs.n1info.com/sport-klub/tenis/kako-je-djokovic-propustio-istorijsku-priliku-video/', + 'md5': '01ddb6646d0fd9c4c7d990aa77fe1c5a', + 'info_dict': { + 'id': 'L5Hd4hQVUpk', + 'ext': 'mp4', + 'upload_date': '20210913', + 'title': 'Ozmo i USO21, ep. 13: Novak Đoković – Danil Medvedev | Ključevi Poraza, Budućnost | SPORT KLUB TENIS', + 'description': 'md5:467f330af1effedd2e290f10dc31bb8e', + 'uploader': 'Sport Klub', + 'uploader_id': 'sportklub', + } + }, { + 'url': 'https://rs.n1info.com/vesti/djilas-los-plan-za-metro-nece-resiti-nijedan-saobracajni-problem/', + 'info_dict': { + 'id': 'bgmetrosot2409zta20210924174316682-n1info-rs-worldwide', + 'ext': 'mp4', + 'title': 'Đilas: Predlog izgradnje metroa besmislen; SNS odbacuje navode', + 'upload_date': '20210924', + 'timestamp': 1632481347, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://n1info.si/novice/slovenija/zadnji-dnevi-na-kopaliscu-ilirija-ilirija-ni-umrla-ubili-so-jo/', + 'info_dict': { + 'id': 'ljsottomazilirija3060921-n1info-si-worldwide', + 'ext': 'mp4', + 'title': 'Zadnji dnevi na kopališču Ilirija: “Ilirija ni umrla, ubili so jo”', + 'timestamp': 1632567630, + 'upload_date': '20210925', + }, + 'params': { + 'skip_download': True, + }, + }, { + # Reddit embedded + 'url': 'https://ba.n1info.com/lifestyle/vucic-bolji-od-tita-ako-izgubi-ja-cu-da-crknem-jugoslavija-je-gotova/', + 'info_dict': { + 'id': '2wmfee9eycp71', + 'ext': 'mp4', + 'title': '"Ako Vučić izgubi izbore, ja ću da crknem, Jugoslavija je gotova"', + 'upload_date': '20210924', + 'timestamp': 1632448649.0, + 'uploader': 'YouLotWhatDontStop', + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, + }, + }, { + 'url': 'https://hr.n1info.com/vijesti/pravobraniteljica-o-ubojstvu-u-zagrebu-radi-se-o-doista-nezapamcenoj-situaciji/', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex(r']+>(.+?)', webpage, 'title') + timestamp = unified_timestamp(self._html_search_meta('article:published_time', webpage)) + + videos = re.findall(r'(?m)(]+>)', webpage) + entries = [] + for video in videos: + video_data = extract_attributes(video) + entries.append({ + '_type': 'url_transparent', + 'url': video_data.get('data-url'), + 'id': video_data.get('id'), + 'title': title, + 'thumbnail': video_data.get('data-thumbnail'), + 'timestamp': timestamp, + 'ie_key': N1InfoAssetIE.ie_key()}) + + embedded_videos = re.findall(r'(]+>)', webpage) + for embedded_video in embedded_videos: + video_data = extract_attributes(embedded_video) + url = video_data.get('src') + if url.startswith('https://www.youtube.com'): + entries.append(self.url_result(url, ie=YoutubeIE.ie_key())) + elif url.startswith('https://www.redditmedia.com'): + entries.append(self.url_result(url, ie=RedditRIE.ie_key())) + + return { + '_type': 'playlist', + 'id': video_id, + 'title': title, + 'timestamp': timestamp, + 'entries': entries, + } From 3cf4b91dc5ecc8e936e75204afe62b2884c55362 Mon Sep 17 00:00:00 2001 From: ChillingPepper <90042155+ChillingPepper@users.noreply.github.com> Date: Mon, 27 Sep 2021 23:00:41 +0200 Subject: [PATCH 172/641] [SovietsCloset] Add duration from m3u8 (#908) Authored by: ChillingPepper --- yt_dlp/extractor/common.py | 19 +++++++++++++++++++ yt_dlp/extractor/sovietscloset.py | 9 +++++++++ 2 files changed, 28 insertions(+) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 4797e8e2d6..114b1faaf2 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -2219,6 +2219,25 @@ class InfoExtractor(object): last_stream_inf = {} return formats, subtitles + def _extract_m3u8_vod_duration( + self, m3u8_vod_url, video_id, note=None, errnote=None, data=None, headers={}, query={}): + + m3u8_vod = self._download_webpage( + m3u8_vod_url, video_id, + note='Downloading m3u8 VOD manifest' if note is None else note, + errnote='Failed to download VOD manifest' if errnote is None else errnote, + fatal=False, data=data, headers=headers, query=query) + + return self._parse_m3u8_vod_duration(m3u8_vod or '', video_id) + + def _parse_m3u8_vod_duration(self, m3u8_vod, video_id): + if '#EXT-X-PLAYLIST-TYPE:VOD' not in m3u8_vod: + return None + + return int(sum( + float(line[len('#EXTINF:'):].split(',')[0]) + for line in m3u8_vod.splitlines() if line.startswith('#EXTINF:'))) or None + @staticmethod def _xpath_ns(path, namespace=None): if not namespace: diff --git a/yt_dlp/extractor/sovietscloset.py b/yt_dlp/extractor/sovietscloset.py index 64201c88c3..7df23759ab 100644 --- a/yt_dlp/extractor/sovietscloset.py +++ b/yt_dlp/extractor/sovietscloset.py @@ -72,6 +72,7 @@ class SovietsClosetIE(SovietsClosetBaseIE): 'upload_date': '20170413', 'uploader_id': 'SovietWomble', 'uploader_url': 'https://www.twitch.tv/SovietWomble', + 'duration': 7007, 'was_live': True, 'availability': 'public', 'series': 'The Witcher', @@ -96,6 +97,7 @@ class SovietsClosetIE(SovietsClosetBaseIE): 'upload_date': '20160420', 'uploader_id': 'SovietWomble', 'uploader_url': 'https://www.twitch.tv/SovietWomble', + 'duration': 8804, 'was_live': True, 'availability': 'public', 'series': 'Arma 3', @@ -116,9 +118,16 @@ class SovietsClosetIE(SovietsClosetBaseIE): m3u8_formats = self._extract_m3u8_formats(m3u8_url, video_id, headers=self.MEDIADELIVERY_REFERER) self._sort_formats(m3u8_formats) + if not m3u8_formats: + duration = None + else: + duration = self._extract_m3u8_vod_duration( + m3u8_formats[0]['url'], video_id, headers=self.MEDIADELIVERY_REFERER) + return { 'formats': m3u8_formats, 'thumbnail': thumbnail_url, + 'duration': duration, } def _real_extract(self, url): From f1d42a83ab47683ddbe7c66393130f63262aeca0 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Tue, 28 Sep 2021 02:31:23 +0530 Subject: [PATCH 173/641] [Rumble] Add RumbleChannelIE (#1088) Authored by: Ashish0804 --- yt_dlp/extractor/extractors.py | 5 ++++- yt_dlp/extractor/rumble.py | 37 +++++++++++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 4774a3ebb8..93934b682f 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1205,7 +1205,10 @@ from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETe from .rtvnh import RTVNHIE from .rtvs import RTVSIE from .ruhd import RUHDIE -from .rumble import RumbleEmbedIE +from .rumble import ( + RumbleEmbedIE, + RumbleChannelIE, +) from .rutube import ( RutubeIE, RutubeChannelIE, diff --git a/yt_dlp/extractor/rumble.py b/yt_dlp/extractor/rumble.py index b526de76bc..49c1f44851 100644 --- a/yt_dlp/extractor/rumble.py +++ b/yt_dlp/extractor/rumble.py @@ -1,15 +1,17 @@ # coding: utf-8 from __future__ import unicode_literals +import itertools import re from .common import InfoExtractor -from ..compat import compat_str +from ..compat import compat_str, compat_HTTPError from ..utils import ( determine_ext, int_or_none, parse_iso8601, try_get, + ExtractorError, ) @@ -75,3 +77,36 @@ class RumbleEmbedIE(InfoExtractor): 'channel_url': author.get('url'), 'duration': int_or_none(video.get('duration')), } + + +class RumbleChannelIE(InfoExtractor): + _VALID_URL = r'(?Phttps?://(?:www\.)?rumble\.com/(?:c|user)/(?P[^&?#$/]+))' + + _TESTS = [{ + 'url': 'https://rumble.com/c/Styxhexenhammer666', + 'playlist_mincount': 1160, + 'info_dict': { + 'id': 'Styxhexenhammer666', + }, + }, { + 'url': 'https://rumble.com/user/goldenpoodleharleyeuna', + 'playlist_count': 4, + 'info_dict': { + 'id': 'goldenpoodleharleyeuna', + }, + }] + + def entries(self, url, playlist_id): + for page in itertools.count(1): + try: + webpage = self._download_webpage(f'{url}?page={page}', playlist_id, note='Downloading page %d' % page) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: + break + raise + for video_url in re.findall(r'class=video-item--a\s?href=([^>]+\.html)', webpage): + yield self.url_result('https://rumble.com' + video_url) + + def _real_extract(self, url): + url, playlist_id = self._match_valid_url(url).groups() + return self.playlist_result(self.entries(url, playlist_id), playlist_id=playlist_id) From 250a938de82fb6b023c09ce3d89471c5871ff830 Mon Sep 17 00:00:00 2001 From: shirt <2660574+shirt-dev@users.noreply.github.com> Date: Mon, 27 Sep 2021 18:42:33 -0400 Subject: [PATCH 174/641] [ffmpeg] Set max probesize to workaround AAC HLS stream issues (#1109) Fixes: #618, #998, #1039 Authored by: shirt-dev --- yt_dlp/postprocessor/ffmpeg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 679377aa63..ad330ab8e5 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -262,7 +262,7 @@ class FFmpegPostProcessor(PostProcessor): oldest_mtime = min( os.stat(encodeFilename(path)).st_mtime for path, _ in input_path_opts if path) - cmd = [encodeFilename(self.executable, True), encodeArgument('-y')] + cmd = [encodeFilename(self.executable, True), encodeArgument('-y'), encodeArgument('-probesize'), encodeArgument('max')] # avconv does not have repeat option if self.basename == 'ffmpeg': cmd += [encodeArgument('-loglevel'), encodeArgument('repeat+info')] From 80c360d7aad7ccda48ccd13be62dbb6fc5a6f128 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Tue, 28 Sep 2021 16:06:31 +0530 Subject: [PATCH 175/641] [LinkedInLearning] Fix newline bug in subtitles (#1104) Authored by: Ashish0804 --- yt_dlp/extractor/linkedin.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/linkedin.py b/yt_dlp/extractor/linkedin.py index f47d59a38c..3ce906e2f1 100644 --- a/yt_dlp/extractor/linkedin.py +++ b/yt_dlp/extractor/linkedin.py @@ -94,9 +94,9 @@ class LinkedInLearningIE(LinkedInLearningBaseIE): for line, (line_dict, next_dict) in enumerate(zip_longest(transcript_lines, transcript_lines[1:])): start_time, caption = line_dict['transcriptStartAt'] / 1000, line_dict['caption'] end_time = next_dict['transcriptStartAt'] / 1000 if next_dict else duration or start_time + 1 - srt_data += '%d\n%s --> %s\n%s\n' % (line + 1, srt_subtitles_timecode(start_time), - srt_subtitles_timecode(end_time), - caption) + srt_data += '%d\n%s --> %s\n%s\n\n' % (line + 1, srt_subtitles_timecode(start_time), + srt_subtitles_timecode(end_time), + caption) return srt_data def _real_extract(self, url): From 7687c8ac6e223a725b3ef8f56f04779bebdc86c5 Mon Sep 17 00:00:00 2001 From: shirt <2660574+shirt-dev@users.noreply.github.com> Date: Tue, 28 Sep 2021 14:53:24 -0400 Subject: [PATCH 176/641] [HLS] Fix decryption issues (#1117) * Unpad HLS fragments with PKCS#7 according to datatracker.ietf.org/doc/html/rfc8216 * media_sequence should only be incremented in for media fragments * The native decryption should only be used if ffmpeg is unavailable since it is significantly slower. Closes #1086 Authored by: shirt-dev, pukkandan --- yt_dlp/downloader/fragment.py | 3 ++- yt_dlp/downloader/hls.py | 20 +++++++++++++------- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index 31f9467922..22134f3b6c 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -355,7 +355,8 @@ class FragmentFD(FileDownloader): # not what it decrypts to. if self.params.get('test', False): return frag_content - return aes_cbc_decrypt_bytes(frag_content, decrypt_info['KEY'], iv) + decrypted_data = aes_cbc_decrypt_bytes(frag_content, decrypt_info['KEY'], iv) + return decrypted_data[:-decrypted_data[-1]] return decrypt_fragment diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index f343e18797..751d874d42 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -9,6 +9,7 @@ from .fragment import FragmentFD from .external import FFmpegFD from ..compat import ( + compat_pycrypto_AES, compat_urlparse, ) from ..utils import ( @@ -68,14 +69,20 @@ class HlsFD(FragmentFD): man_url = urlh.geturl() s = urlh.read().decode('utf-8', 'ignore') - if not self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')): - if info_dict.get('extra_param_to_segment_url') or info_dict.get('_decryption_key_url'): - self.report_error('pycryptodome not found. Please install') - return False + can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None + if can_download and not compat_pycrypto_AES and '#EXT-X-KEY:METHOD=AES-128' in s: + if FFmpegFD.available(): + can_download, message = False, 'The stream has AES-128 encryption and pycryptodome is not available' + else: + message = ('The stream has AES-128 encryption and neither ffmpeg nor pycryptodome are available; ' + 'Decryption will be performed natively, but will be extremely slow') + if not can_download: + message = message or 'Unsupported features have been detected' fd = FFmpegFD(self.ydl, self.params) - self.report_warning( - '%s detected unsupported features; extraction will be delegated to %s' % (self.FD_NAME, fd.get_basename())) + self.report_warning(f'{message}; extraction will be delegated to {fd.get_basename()}') return fd.real_download(filename, info_dict) + elif message: + self.report_warning(message) is_webvtt = info_dict['ext'] == 'vtt' if is_webvtt: @@ -232,7 +239,6 @@ class HlsFD(FragmentFD): elif line.startswith('#EXT-X-DISCONTINUITY'): discontinuity_count += 1 i += 1 - media_sequence += 1 # We only download the first fragment during the test if self.params.get('test', False): From 7756277882e2dddde53df604945d02c74f477f38 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 29 Sep 2021 03:07:23 +0530 Subject: [PATCH 177/641] Workaround for bug in `ssl.SSLContext.load_default_certs` (#1118) * Remove old compat code * Load certificates only when not using nocheckcertificate * Load each certificate individually Closes #1060 Related bugs.python.org/issue35665, bugs.python.org/issue4531 --- yt_dlp/utils.py | 44 +++++++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index eba89fb8bc..4aa36a1165 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -2352,29 +2352,35 @@ def formatSeconds(secs, delim=':', msec=False): return '%s.%03d' % (ret, secs % 1) if msec else ret -def make_HTTPS_handler(params, **kwargs): - opts_no_check_certificate = params.get('nocheckcertificate', False) - if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9 - context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH) - if opts_no_check_certificate: - context.check_hostname = False - context.verify_mode = ssl.CERT_NONE +def _ssl_load_windows_store_certs(ssl_context, storename): + # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py + try: + certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename) + if encoding == 'x509_asn' and ( + trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)] + except PermissionError: + return + for cert in certs: try: - return YoutubeDLHTTPSHandler(params, context=context, **kwargs) - except TypeError: - # Python 2.7.8 - # (create_default_context present but HTTPSHandler has no context=) + ssl_context.load_verify_locations(cadata=cert) + except ssl.SSLError: pass - if sys.version_info < (3, 2): - return YoutubeDLHTTPSHandler(params, **kwargs) - else: # Python < 3.4 - context = ssl.SSLContext(ssl.PROTOCOL_TLSv1) - context.verify_mode = (ssl.CERT_NONE - if opts_no_check_certificate - else ssl.CERT_REQUIRED) + +def make_HTTPS_handler(params, **kwargs): + opts_check_certificate = not params.get('nocheckcertificate') + context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) + context.check_hostname = opts_check_certificate + context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE + if opts_check_certificate: + # Work around the issue in load_default_certs when there are bad certificates. See: + # https://github.com/yt-dlp/yt-dlp/issues/1060, + # https://bugs.python.org/issue35665, https://bugs.python.org/issue4531 + if sys.platform == 'win32': + for storename in ('CA', 'ROOT'): + _ssl_load_windows_store_certs(context, storename) context.set_default_verify_paths() - return YoutubeDLHTTPSHandler(params, context=context, **kwargs) + return YoutubeDLHTTPSHandler(params, context=context, **kwargs) def bug_reports_message(before=';'): From 2d997542cae916d168f2e27bf05844cf8586494c Mon Sep 17 00:00:00 2001 From: ajj8 <35781586+ajj8@users.noreply.github.com> Date: Tue, 28 Sep 2021 23:37:33 +0100 Subject: [PATCH 178/641] [bbc] Extract better quality videos (#1113) mobile-tablet-main only provides 540p25, so it shouldn't be used for the first attempt. Instead pc provides up to 720p50 Authored by: ajj8 --- yt_dlp/extractor/bbc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py index de497ab1d3..4e2dcd76b8 100644 --- a/yt_dlp/extractor/bbc.py +++ b/yt_dlp/extractor/bbc.py @@ -588,8 +588,8 @@ class BBCIE(BBCCoUkIE): _VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P[^/#?]+)' _MEDIA_SETS = [ - 'mobile-tablet-main', 'pc', + 'mobile-tablet-main', ] _TESTS = [{ From 851876095bd0ce671bbd09cc2c58c76b0dffe533 Mon Sep 17 00:00:00 2001 From: i6t <62123048+i6t@users.noreply.github.com> Date: Wed, 29 Sep 2021 19:23:56 +0900 Subject: [PATCH 179/641] [Gettr] Add extractor (#1120) Fixes: https://github.com/ytdl-org/youtube-dl/issues/29589 Authored by: i6t --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/gettr.py | 110 +++++++++++++++++++++++++++++++++ 2 files changed, 111 insertions(+) create mode 100644 yt_dlp/extractor/gettr.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 93934b682f..1776a4d268 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -507,6 +507,7 @@ from .gazeta import GazetaIE from .gdcvault import GDCVaultIE from .gedidigital import GediDigitalIE from .generic import GenericIE +from .gettr import GettrIE from .gfycat import GfycatIE from .giantbomb import GiantBombIE from .giga import GigaIE diff --git a/yt_dlp/extractor/gettr.py b/yt_dlp/extractor/gettr.py new file mode 100644 index 0000000000..aa50b2f357 --- /dev/null +++ b/yt_dlp/extractor/gettr.py @@ -0,0 +1,110 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + dict_get, + float_or_none, + int_or_none, + remove_end, + str_or_none, + try_get, + url_or_none, + urljoin, +) + + +class GettrIE(InfoExtractor): + _VALID_URL = r'https?://(www\.)?gettr\.com/post/(?P[a-z0-9]+)' + _MEDIA_BASE_URL = 'https://media.gettr.com/' + + _TESTS = [{ + 'url': 'https://www.gettr.com/post/pcf6uv838f', + 'info_dict': { + 'id': 'pcf6uv838f', + 'title': 'md5:9086a646bbd06c41c4fe8e52b3c93454', + 'description': 'md5:be0577f1e4caadc06de4a002da2bf287', + 'ext': 'mp4', + 'uploader': 'EpochTV', + 'uploader_id': 'epochtv', + 'thumbnail': r're:^https?://.+/out\.jpg', + 'timestamp': 1632782451058, + 'duration': 58.5585, + } + }, { + 'url': 'https://gettr.com/post/p4iahp', + 'info_dict': { + 'id': 'p4iahp', + 'title': 'md5:b03c07883db6fbc1aab88877a6c3b149', + 'description': 'md5:741b7419d991c403196ed2ea7749a39d', + 'ext': 'mp4', + 'uploader': 'Neues Forum Freiheit', + 'uploader_id': 'nf_freiheit', + 'thumbnail': r're:^https?://.+/out\.jpg', + 'timestamp': 1626594455017, + 'duration': 23, + } + }] + + def _real_extract(self, url): + post_id = self._match_id(url) + webpage = self._download_webpage(url, post_id) + + api_data = self._download_json( + 'https://api.gettr.com/u/post/%s?incl="poststats|userinfo"' % post_id, post_id) + + post_data = try_get(api_data, lambda x: x['result']['data']) + user_data = try_get(api_data, lambda x: x['result']['aux']['uinf'][post_data['uid']]) or {} + + if post_data.get('nfound'): + raise ExtractorError(post_data.get('txt'), expected=True) + + title = description = str_or_none( + post_data.get('txt') or self._og_search_description(webpage)) + + uploader = str_or_none( + user_data.get('nickname') + or remove_end(self._og_search_title(webpage), ' on GETTR')) + if uploader: + title = '%s - %s' % (uploader, title) + + if not dict_get(post_data, ['vid', 'ovid']): + raise ExtractorError('There\'s no video in this post.') + + vid = post_data.get('vid') + ovid = post_data.get('ovid') + + formats = self._extract_m3u8_formats( + urljoin(self._MEDIA_BASE_URL, vid), post_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls') if vid else [] + + if ovid: + formats.append({ + 'url': urljoin(self._MEDIA_BASE_URL, ovid), + 'format_id': 'ovid', + 'ext': 'mp4', + 'width': int_or_none(post_data.get('vid_wid')), + 'height': int_or_none(post_data.get('vid_hgt')), + 'source_preference': 1, + 'quality': 1, + }) + + self._sort_formats(formats) + + return { + 'id': post_id, + 'title': title, + 'description': description, + 'thumbnail': url_or_none( + urljoin(self._MEDIA_BASE_URL, post_data.get('main')) + or self._og_search_thumbnail(webpage)), + 'timestamp': int_or_none(post_data.get('cdate')), + 'uploader_id': str_or_none( + dict_get(user_data, ['_id', 'username']) + or post_data.get('uid')), + 'uploader': uploader, + 'formats': formats, + 'duration': float_or_none(post_data.get('vid_dur')), + 'tags': post_data.get('htgs'), + } From 804ca01cc7ab01548513515373836277431217e3 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 27 Sep 2021 12:27:54 +0530 Subject: [PATCH 180/641] [build] Add more files to the tarball Closes #1099 --- Makefile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 763d5223df..9ce975ea2b 100644 --- a/Makefile +++ b/Makefile @@ -112,7 +112,7 @@ _EXTRACTOR_FILES = $(shell find yt_dlp/extractor -iname '*.py' -and -not -iname yt_dlp/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES) $(PYTHON) devscripts/make_lazy_extractors.py $@ -yt-dlp.tar.gz: yt-dlp README.md supportedsites.md yt-dlp.1 completions Changelog.md AUTHORS +yt-dlp.tar.gz: all @tar -czf $(DESTDIR)/yt-dlp.tar.gz --transform "s|^|yt-dlp/|" --owner 0 --group 0 \ --exclude '*.DS_Store' \ --exclude '*.kate-swp' \ @@ -121,12 +121,12 @@ yt-dlp.tar.gz: yt-dlp README.md supportedsites.md yt-dlp.1 completions Changelog --exclude '*~' \ --exclude '__pycache__' \ --exclude '.git' \ - --exclude 'docs/_build' \ -- \ - devscripts test \ - Changelog.md AUTHORS LICENSE README.md supportedsites.md \ - Makefile MANIFEST.in yt-dlp.1 completions \ - setup.py setup.cfg yt-dlp yt_dlp + README.md supportedsites.md Changelog.md LICENSE \ + CONTRIBUTING.md Collaborators.md CONTRIBUTORS AUTHORS \ + Makefile MANIFEST.in yt-dlp.1 README.txt completions \ + setup.py setup.cfg yt-dlp yt_dlp requirements.txt \ + devscripts test tox.ini pytest.ini AUTHORS: .mailmap git shortlog -s -n | cut -f2 | sort > AUTHORS From 1f2a268bd33339a5375bffb77a27871213261a13 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 30 Sep 2021 02:15:33 +0530 Subject: [PATCH 181/641] [embedsubtitle] Fix error when duration is unknown --- yt_dlp/postprocessor/ffmpeg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index ad330ab8e5..058926929f 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -545,7 +545,7 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): return [], information filename = information['filepath'] - if self._duration_mismatch( + if information.get('duration') and self._duration_mismatch( self._get_real_video_duration(information, False), information['duration']): self.to_screen(f'Skipping {self.pp_key()} since the real and expected durations mismatch') return [], information From 80c03fa98fdd54410bd36684ef453f6976a9c0bf Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 30 Sep 2021 02:14:42 +0530 Subject: [PATCH 182/641] Allow empty output template to skip a type of file Closes #760, #1111 --- README.md | 2 +- yt_dlp/YoutubeDL.py | 281 +++++++++++++++++++++++--------------------- yt_dlp/__init__.py | 1 + 3 files changed, 150 insertions(+), 134 deletions(-) diff --git a/README.md b/README.md index 897e0600e1..512b36b2e0 100644 --- a/README.md +++ b/README.md @@ -971,7 +971,7 @@ To summarize, the general syntax for a field is: %(name[.keys][addition][>strf][,alternate][|default])[flags][width][.precision][length]type ``` -Additionally, you can set different output templates for the various metadata files separately from the general output template by specifying the type of file followed by the template separated by a colon `:`. The different file types supported are `subtitle`, `thumbnail`, `description`, `annotation` (deprecated), `infojson`, `pl_thumbnail`, `pl_description`, `pl_infojson`, `chapter`. For example, `-o '%(title)s.%(ext)s' -o 'thumbnail:%(title)s\%(title)s.%(ext)s'` will put the thumbnails in a folder with the same name as the video. +Additionally, you can set different output templates for the various metadata files separately from the general output template by specifying the type of file followed by the template separated by a colon `:`. The different file types supported are `subtitle`, `thumbnail`, `description`, `annotation` (deprecated), `infojson`, `pl_thumbnail`, `pl_description`, `pl_infojson`, `chapter`. For example, `-o '%(title)s.%(ext)s' -o 'thumbnail:%(title)s\%(title)s.%(ext)s'` will put the thumbnails in a folder with the same name as the video. If any of the templates (except default) is empty, that type of file will not be written. Eg: `--write-thumbnail -o "thumbnail:"` will write thumbnails only for playlists and not for video. The available fields are: diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 367d3fa608..2e150cd979 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -859,7 +859,7 @@ class YoutubeDL(object): outtmpl_dict = {'default': outtmpl_dict} outtmpl_dict.update({ k: v for k, v in DEFAULT_OUTTMPL.items() - if not outtmpl_dict.get(k)}) + if outtmpl_dict.get(k) is None}) for key, val in outtmpl_dict.items(): if isinstance(val, bytes): self.report_warning( @@ -1084,7 +1084,7 @@ class YoutubeDL(object): filename = outtmpl % template_dict force_ext = OUTTMPL_TYPES.get(tmpl_type) - if force_ext is not None: + if filename and force_ext is not None: filename = replace_extension(filename, force_ext, info_dict.get('ext')) # https://github.com/blackjack4494/youtube-dlc/issues/85 @@ -1106,6 +1106,8 @@ class YoutubeDL(object): """Generate the output filename.""" filename = self._prepare_filename(info_dict, dir_type or 'default') + if not filename and dir_type not in ('', 'temp'): + return '' if warn: if not self.params.get('paths'): @@ -1517,38 +1519,14 @@ class YoutubeDL(object): } ie_copy.update(dict(ie_result)) - if self.params.get('writeinfojson', False): - infofn = self.prepare_filename(ie_copy, 'pl_infojson') - if not self._ensure_dir_exists(encodeFilename(infofn)): - return - if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)): - self.to_screen('[info] Playlist metadata is already present') - else: - self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn) - try: - write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn) - except (OSError, IOError): - self.report_error('Cannot write playlist metadata to JSON file ' + infofn) - + if self._write_info_json('playlist', ie_result, + self.prepare_filename(ie_copy, 'pl_infojson')) is None: + return + if self._write_description('playlist', ie_result, + self.prepare_filename(ie_copy, 'pl_description')) is None: + return # TODO: This should be passed to ThumbnailsConvertor if necessary - self._write_thumbnails(ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail')) - - if self.params.get('writedescription', False): - descfn = self.prepare_filename(ie_copy, 'pl_description') - if not self._ensure_dir_exists(encodeFilename(descfn)): - return - if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)): - self.to_screen('[info] Playlist description is already present') - elif ie_result.get('description') is None: - self.report_warning('There\'s no playlist description to write.') - else: - try: - self.to_screen('[info] Writing playlist description to: ' + descfn) - with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: - descfile.write(ie_result['description']) - except (OSError, IOError): - self.report_error('Cannot write playlist description file ' + descfn) - return + self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail')) if self.params.get('playlistreverse', False): entries = entries[::-1] @@ -2528,37 +2506,43 @@ class YoutubeDL(object): if self.params.get('simulate'): if self.params.get('force_write_download_archive', False): self.record_download_archive(info_dict) - # Do nothing else if in simulate mode return if full_filename is None: return - if not self._ensure_dir_exists(encodeFilename(full_filename)): return if not self._ensure_dir_exists(encodeFilename(temp_filename)): return - if self.params.get('writedescription', False): - descfn = self.prepare_filename(info_dict, 'description') - if not self._ensure_dir_exists(encodeFilename(descfn)): - return - if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)): - self.to_screen('[info] Video description is already present') - elif info_dict.get('description') is None: - self.report_warning('There\'s no description to write.') - else: - try: - self.to_screen('[info] Writing video description to: ' + descfn) - with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: - descfile.write(info_dict['description']) - except (OSError, IOError): - self.report_error('Cannot write description file ' + descfn) - return + if self._write_description('video', info_dict, + self.prepare_filename(info_dict, 'description')) is None: + return + sub_files = self._write_subtitles(info_dict, temp_filename) + if sub_files is None: + return + files_to_move.update(dict(sub_files)) + + thumb_files = self._write_thumbnails( + 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail')) + if thumb_files is None: + return + files_to_move.update(dict(thumb_files)) + + infofn = self.prepare_filename(info_dict, 'infojson') + _infojson_written = self._write_info_json('video', info_dict, infofn) + if _infojson_written: + info_dict['__infojson_filename'] = infofn + elif _infojson_written is None: + return + + # Note: Annotations are deprecated + annofn = None if self.params.get('writeannotations', False): annofn = self.prepare_filename(info_dict, 'annotation') + if annofn: if not self._ensure_dir_exists(encodeFilename(annofn)): return if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)): @@ -2576,69 +2560,6 @@ class YoutubeDL(object): self.report_error('Cannot write annotations file: ' + annofn) return - subtitles_are_requested = any([self.params.get('writesubtitles', False), - self.params.get('writeautomaticsub')]) - - if subtitles_are_requested and info_dict.get('requested_subtitles'): - # subtitles download errors are already managed as troubles in relevant IE - # that way it will silently go on when used with unsupporting IE - subtitles = info_dict['requested_subtitles'] - # ie = self.get_info_extractor(info_dict['extractor_key']) - for sub_lang, sub_info in subtitles.items(): - sub_format = sub_info['ext'] - sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext')) - sub_filename_final = subtitles_filename( - self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext')) - if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)): - self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format)) - sub_info['filepath'] = sub_filename - files_to_move[sub_filename] = sub_filename_final - else: - self.to_screen('[info] Writing video subtitles to: ' + sub_filename) - if sub_info.get('data') is not None: - try: - # Use newline='' to prevent conversion of newline characters - # See https://github.com/ytdl-org/youtube-dl/issues/10268 - with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile: - subfile.write(sub_info['data']) - sub_info['filepath'] = sub_filename - files_to_move[sub_filename] = sub_filename_final - except (OSError, IOError): - self.report_error('Cannot write subtitles file ' + sub_filename) - return - else: - try: - sub_copy = sub_info.copy() - sub_copy.setdefault('http_headers', info_dict.get('http_headers')) - self.dl(sub_filename, sub_copy, subtitle=True) - sub_info['filepath'] = sub_filename - files_to_move[sub_filename] = sub_filename_final - except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err: - self.report_warning('Unable to download subtitle for "%s": %s' % - (sub_lang, error_to_compat_str(err))) - continue - - if self.params.get('writeinfojson', False): - infofn = self.prepare_filename(info_dict, 'infojson') - if not self._ensure_dir_exists(encodeFilename(infofn)): - return - if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)): - self.to_screen('[info] Video metadata is already present') - else: - self.to_screen('[info] Writing video metadata as JSON to: ' + infofn) - try: - write_json_file(self.sanitize_info(info_dict, self.params.get('clean_infojson', True)), infofn) - except (OSError, IOError): - self.report_error('Cannot write video metadata to JSON file ' + infofn) - return - info_dict['__infojson_filename'] = infofn - - for thumb_ext in self._write_thumbnails(info_dict, temp_filename): - thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext')) - thumb_filename = replace_extension( - self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext')) - files_to_move[thumb_filename_temp] = thumb_filename - # Write internet shortcut files url_link = webloc_link = desktop_link = False if self.params.get('writelink', False): @@ -3416,39 +3337,133 @@ class YoutubeDL(object): encoding = preferredencoding() return encoding - def _write_thumbnails(self, info_dict, filename): # return the extensions + def _write_info_json(self, label, ie_result, infofn): + ''' Write infojson and returns True = written, False = skip, None = error ''' + if not self.params.get('writeinfojson'): + return False + elif not infofn: + self.write_debug(f'Skipping writing {label} infojson') + return False + elif not self._ensure_dir_exists(infofn): + return None + elif not self.params.get('overwrites', True) and os.path.exists(infofn): + self.to_screen(f'[info] {label.title()} metadata is already present') + else: + self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}') + try: + write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn) + except (OSError, IOError): + self.report_error(f'Cannot write {label} metadata to JSON file {infofn}') + return None + return True + + def _write_description(self, label, ie_result, descfn): + ''' Write description and returns True = written, False = skip, None = error ''' + if not self.params.get('writedescription'): + return False + elif not descfn: + self.write_debug(f'Skipping writing {label} description') + return False + elif not self._ensure_dir_exists(descfn): + return None + elif not self.params.get('overwrites', True) and os.path.exists(descfn): + self.to_screen(f'[info] {label.title()} description is already present') + elif ie_result.get('description') is None: + self.report_warning(f'There\'s no {label} description to write') + return False + else: + try: + self.to_screen(f'[info] Writing {label} description to: {descfn}') + with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: + descfile.write(ie_result['description']) + except (OSError, IOError): + self.report_error(f'Cannot write {label} description file {descfn}') + return None + return True + + def _write_subtitles(self, info_dict, filename): + ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error''' + ret = [] + subtitles = info_dict.get('requested_subtitles') + if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')): + # subtitles download errors are already managed as troubles in relevant IE + # that way it will silently go on when used with unsupporting IE + return ret + + sub_filename_base = self.prepare_filename(info_dict, 'subtitle') + if not sub_filename_base: + self.to_screen('[info] Skipping writing video subtitles') + return ret + for sub_lang, sub_info in subtitles.items(): + sub_format = sub_info['ext'] + sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext')) + sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext')) + if not self.params.get('overwrites', True) and os.path.exists(sub_filename): + self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present') + sub_info['filepath'] = sub_filename + ret.append((sub_filename, sub_filename_final)) + continue + + self.to_screen(f'[info] Writing video subtitles to: {sub_filename}') + if sub_info.get('data') is not None: + try: + # Use newline='' to prevent conversion of newline characters + # See https://github.com/ytdl-org/youtube-dl/issues/10268 + with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile: + subfile.write(sub_info['data']) + sub_info['filepath'] = sub_filename + ret.append((sub_filename, sub_filename_final)) + continue + except (OSError, IOError): + self.report_error(f'Cannot write video subtitles file {sub_filename}') + return None + + try: + sub_copy = sub_info.copy() + sub_copy.setdefault('http_headers', info_dict.get('http_headers')) + self.dl(sub_filename, sub_copy, subtitle=True) + sub_info['filepath'] = sub_filename + ret.append((sub_filename, sub_filename_final)) + except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err: + self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}') + continue + return ret + + def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None): + ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) ''' write_all = self.params.get('write_all_thumbnails', False) - thumbnails = [] + thumbnails, ret = [], [] if write_all or self.params.get('writethumbnail', False): thumbnails = info_dict.get('thumbnails') or [] multiple = write_all and len(thumbnails) > 1 - ret = [] - for t in thumbnails[::-1]: - thumb_ext = determine_ext(t['url'], 'jpg') - suffix = '%s.' % t['id'] if multiple else '' - thumb_display_id = '%s ' % t['id'] if multiple else '' - thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext')) + if thumb_filename_base is None: + thumb_filename_base = filename + if thumbnails and not thumb_filename_base: + self.write_debug(f'Skipping writing {label} thumbnail') + return ret - if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)): - ret.append(suffix + thumb_ext) + for t in thumbnails[::-1]: + thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg') + thumb_display_id = f'{label} thumbnail' + (f' {t["id"]}' if multiple else '') + thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext')) + thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext')) + + if not self.params.get('overwrites', True) and os.path.exists(thumb_filename): + ret.append((thumb_filename, thumb_filename_final)) t['filepath'] = thumb_filename - self.to_screen('[%s] %s: Thumbnail %sis already present' % - (info_dict['extractor'], info_dict['id'], thumb_display_id)) + self.to_screen(f'[info] {thumb_display_id.title()} is already present') else: - self.to_screen('[%s] %s: Downloading thumbnail %s ...' % - (info_dict['extractor'], info_dict['id'], thumb_display_id)) + self.to_screen(f'[info] Downloading {thumb_display_id} ...') try: uf = self.urlopen(t['url']) + self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}') with open(encodeFilename(thumb_filename), 'wb') as thumbf: shutil.copyfileobj(uf, thumbf) - ret.append(suffix + thumb_ext) - self.to_screen('[%s] %s: Writing thumbnail %sto: %s' % - (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename)) + ret.append((thumb_filename, thumb_filename_final)) t['filepath'] = thumb_filename except network_exceptions as err: - self.report_warning('Unable to download thumbnail "%s": %s' % - (t['url'], error_to_compat_str(err))) + self.report_warning(f'Unable to download {thumb_display_id}: {err}') if ret and not write_all: break return ret diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 9987c64721..53ea8136f0 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -535,6 +535,7 @@ def _real_main(argv=None): }) if not already_have_thumbnail: opts.writethumbnail = True + opts.outtmpl['pl_thumbnail'] = '' if opts.split_chapters: postprocessors.append({ 'key': 'FFmpegSplitChapters', From 8e3fd7e034cdd54972d13394821cd9e55e1c3735 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 26 Sep 2021 20:00:56 +0530 Subject: [PATCH 183/641] [reddit] Fix 429 by generating a random `reddit_session` Related: a76e2e0f8898c06939b6a123fa863ab8876cfa20, #1014, https://github.com/ytdl-org/youtube-dl/issues/29986 Original PR: https://github.com/ytdl-org/youtube-dl/pull/30017 Authored by: AjaxGb --- yt_dlp/extractor/reddit.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/yt_dlp/extractor/reddit.py b/yt_dlp/extractor/reddit.py index 14592bc62c..e5a1f69205 100644 --- a/yt_dlp/extractor/reddit.py +++ b/yt_dlp/extractor/reddit.py @@ -1,5 +1,4 @@ -from __future__ import unicode_literals - +import random from .common import InfoExtractor from ..utils import ( @@ -49,7 +48,7 @@ class RedditIE(InfoExtractor): class RedditRIE(InfoExtractor): - _VALID_URL = r'https?://(?:[^/]+\.)?reddit(?:media)?\.com/r/(?P[^/]+/comments/(?P[^/?#&]+))' + _VALID_URL = r'https?://(?P[^/]+\.)?reddit(?:media)?\.com/r/(?P[^/]+/comments/(?P[^/?#&]+))' _TESTS = [{ 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/', 'info_dict': { @@ -99,13 +98,22 @@ class RedditRIE(InfoExtractor): 'only_matching': True, }] + @staticmethod + def _gen_session_id(): + id_length = 16 + rand_max = 1 << (id_length * 4) + return '%0.*x' % (id_length, random.randrange(rand_max)) + def _real_extract(self, url): - slug, video_id = self._match_valid_url(url).group('slug', 'id') - - self._set_cookie('reddit.com', '_options', '%7B%22pref_quarantine_optin%22%3A%20true%7D') - data = self._download_json( - f'https://old.reddit.com/r/{slug}/.json', video_id)[0]['data']['children'][0]['data'] + subdomain, slug, video_id = self._match_valid_url(url).group('subdomain', 'slug', 'id') + self._set_cookie('.reddit.com', 'reddit_session', self._gen_session_id()) + self._set_cookie('.reddit.com', '_options', '%7B%22pref_quarantine_optin%22%3A%20true%7D') + data = self._download_json(f'https://{subdomain}.reddit.com/r/{slug}/.json', video_id, fatal=False) + if not data: + # Fall back to old.reddit.com in case the requested subdomain fails + data = self._download_json(f'https://old.reddit.com/r/{slug}/.json', video_id) + data = data[0]['data']['children'][0]['data'] video_url = data['url'] # Avoid recursing into the same reddit URL From 3ae5e7977439193519c0ea62eba3aa3111c5571b Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 30 Sep 2021 02:23:33 +0530 Subject: [PATCH 184/641] [postprocessor] Add plugin support Adds option `--use-postprocessor` to enable them --- README.md | 25 ++++++++++++-- yt_dlp/YoutubeDL.py | 10 +++--- yt_dlp/__init__.py | 2 +- yt_dlp/extractor/__init__.py | 4 +-- yt_dlp/options.py | 20 +++++++++++ yt_dlp/postprocessor/__init__.py | 46 +++++++------------------ yt_dlp/utils.py | 5 ++- ytdlp_plugins/extractor/__init__.py | 3 +- ytdlp_plugins/extractor/sample.py | 2 -- ytdlp_plugins/postprocessor/__init__.py | 4 +++ ytdlp_plugins/postprocessor/sample.py | 23 +++++++++++++ 11 files changed, 95 insertions(+), 49 deletions(-) create mode 100644 ytdlp_plugins/postprocessor/__init__.py create mode 100644 ytdlp_plugins/postprocessor/sample.py diff --git a/README.md b/README.md index 512b36b2e0..510770a14c 100644 --- a/README.md +++ b/README.md @@ -837,6 +837,20 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t around the cuts --no-force-keyframes-at-cuts Do not force keyframes around the chapters when cutting/splitting (default) + --use-postprocessor NAME[:ARGS] The (case sensitive) name of plugin + postprocessors to be enabled, and + (optionally) arguments to be passed to it, + seperated by a colon ":". ARGS are a + semicolon ";" delimited list of NAME=VALUE. + The "when" argument determines when the + postprocessor is invoked. It can be one of + "pre_process" (after extraction), + "before_dl" (before video download), + "post_process" (after video download; + default) or "after_move" (after moving file + to their final locations). This option can + be used multiple times to add different + postprocessors ## SponsorBlock Options: Make chapter entries for, or remove various segments (sponsor, @@ -1465,9 +1479,16 @@ NOTE: These options may be changed/removed in the future without concern for bac # PLUGINS -Plugins are loaded from `/ytdlp_plugins//__init__.py`. Currently only `extractor` plugins are supported. Support for `downloader` and `postprocessor` plugins may be added in the future. See [ytdlp_plugins](ytdlp_plugins) for example. +Plugins are loaded from `/ytdlp_plugins//__init__.py`; where `` is the directory of the binary (`/yt-dlp`), or the root directory of the module if you are running directly from source-code (`/yt_dlp/__main__.py`). Plugins are currently not supported for the `pip` version + +Plugins can be of ``s `extractor` or `postprocessor`. Extractor plugins do not need to be enabled from the CLI and are automatically invoked when the input URL is suitable for it. Postprocessor plugins can be invoked using `--use-postprocessor NAME`. + +See [ytdlp_plugins](ytdlp_plugins) for example plugins. + +Note that **all** plugins are imported even if not invoked, and that **there are no checks** performed on plugin code. Use plugins at your own risk and only if you trust the code + +If you are a plugin author, add [ytdlp-plugins](https://github.com/topics/ytdlp-plugins) as a topic to your repository for discoverability -**Note**: `` is the directory of the binary (`/yt-dlp`), or the root directory of the module if you are running directly from source-code (`/yt_dlp/__main__.py`) # DEPRECATED OPTIONS diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 2e150cd979..873c22ad62 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -123,7 +123,7 @@ from .extractor import ( gen_extractor_classes, get_info_extractor, _LAZY_LOADER, - _PLUGIN_CLASSES + _PLUGIN_CLASSES as plugin_extractors ) from .extractor.openload import PhantomJSwrapper from .downloader import ( @@ -142,6 +142,7 @@ from .postprocessor import ( FFmpegMergerPP, FFmpegPostProcessor, MoveFilesAfterDownloadPP, + _PLUGIN_CLASSES as plugin_postprocessors ) from .update import detect_variant from .version import __version__ @@ -3201,9 +3202,10 @@ class YoutubeDL(object): self._write_string('[debug] yt-dlp version %s%s\n' % (__version__, '' if source == 'unknown' else f' ({source})')) if _LAZY_LOADER: self._write_string('[debug] Lazy loading extractors enabled\n') - if _PLUGIN_CLASSES: - self._write_string( - '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES]) + if plugin_extractors or plugin_postprocessors: + self._write_string('[debug] Plugins: %s\n' % [ + '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}') + for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())]) if self.params.get('compat_opts'): self._write_string( '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts'))) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 53ea8136f0..2ae08f154e 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -418,7 +418,7 @@ def _real_main(argv=None): opts.sponskrub = False # PostProcessors - postprocessors = [] + postprocessors = list(opts.add_postprocessors) if sponsorblock_query: postprocessors.append({ 'key': 'SponsorBlock', diff --git a/yt_dlp/extractor/__init__.py b/yt_dlp/extractor/__init__.py index 7d540540e2..198c4ae17f 100644 --- a/yt_dlp/extractor/__init__.py +++ b/yt_dlp/extractor/__init__.py @@ -6,7 +6,7 @@ try: from .lazy_extractors import * from .lazy_extractors import _ALL_CLASSES _LAZY_LOADER = True - _PLUGIN_CLASSES = [] + _PLUGIN_CLASSES = {} except ImportError: _LAZY_LOADER = False @@ -20,7 +20,7 @@ if not _LAZY_LOADER: _ALL_CLASSES.append(GenericIE) _PLUGIN_CLASSES = load_plugins('extractor', 'IE', globals()) - _ALL_CLASSES = _PLUGIN_CLASSES + _ALL_CLASSES + _ALL_CLASSES = list(_PLUGIN_CLASSES.values()) + _ALL_CLASSES def gen_extractor_classes(): diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 57e25a5183..daf4c0041c 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -17,6 +17,7 @@ from .utils import ( get_executable_path, OUTTMPL_TYPES, preferredencoding, + remove_end, write_string, ) from .cookies import SUPPORTED_BROWSERS @@ -1389,6 +1390,25 @@ def parseOpts(overrideArguments=None): '--no-force-keyframes-at-cuts', action='store_false', dest='force_keyframes_at_cuts', help='Do not force keyframes around the chapters when cutting/splitting (default)') + _postprocessor_opts_parser = lambda key, val='': ( + *(item.split('=', 1) for item in (val.split(';') if val else [])), + ('key', remove_end(key, 'PP'))) + postproc.add_option( + '--use-postprocessor', + metavar='NAME[:ARGS]', dest='add_postprocessors', default=[], type='str', + action='callback', callback=_list_from_options_callback, + callback_kwargs={ + 'delim': None, + 'process': lambda val: dict(_postprocessor_opts_parser(*val.split(':', 1))) + }, help=( + 'The (case sensitive) name of plugin postprocessors to be enabled, ' + 'and (optionally) arguments to be passed to it, seperated by a colon ":". ' + 'ARGS are a semicolon ";" delimited list of NAME=VALUE. ' + 'The "when" argument determines when the postprocessor is invoked. ' + 'It can be one of "pre_process" (after extraction), ' + '"before_dl" (before video download), "post_process" (after video download; default) ' + 'or "after_move" (after moving file to their final locations). ' + 'This option can be used multiple times to add different postprocessors')) sponsorblock = optparse.OptionGroup(parser, 'SponsorBlock Options', description=( 'Make chapter entries for, or remove various segments (sponsor, introductions, etc.) ' diff --git a/yt_dlp/postprocessor/__init__.py b/yt_dlp/postprocessor/__init__.py index adbcd37556..07c87b76a8 100644 --- a/yt_dlp/postprocessor/__init__.py +++ b/yt_dlp/postprocessor/__init__.py @@ -1,6 +1,9 @@ -from __future__ import unicode_literals +# flake8: noqa: F401 + +from ..utils import load_plugins from .embedthumbnail import EmbedThumbnailPP +from .exec import ExecPP, ExecAfterDownloadPP from .ffmpeg import ( FFmpegPostProcessor, FFmpegEmbedSubtitlePP, @@ -18,48 +21,23 @@ from .ffmpeg import ( FFmpegVideoConvertorPP, FFmpegVideoRemuxerPP, ) -from .xattrpp import XAttrMetadataPP -from .exec import ExecPP, ExecAfterDownloadPP from .metadataparser import ( MetadataFromFieldPP, MetadataFromTitlePP, MetadataParserPP, ) -from .movefilesafterdownload import MoveFilesAfterDownloadPP -from .sponsorblock import SponsorBlockPP -from .sponskrub import SponSkrubPP from .modify_chapters import ModifyChaptersPP +from .movefilesafterdownload import MoveFilesAfterDownloadPP +from .sponskrub import SponSkrubPP +from .sponsorblock import SponsorBlockPP +from .xattrpp import XAttrMetadataPP + +_PLUGIN_CLASSES = load_plugins('postprocessor', 'PP', globals()) def get_postprocessor(key): return globals()[key + 'PP'] -__all__ = [ - 'FFmpegPostProcessor', - 'EmbedThumbnailPP', - 'ExecPP', - 'ExecAfterDownloadPP', - 'FFmpegEmbedSubtitlePP', - 'FFmpegExtractAudioPP', - 'FFmpegSplitChaptersPP', - 'FFmpegFixupDurationPP', - 'FFmpegFixupM3u8PP', - 'FFmpegFixupM4aPP', - 'FFmpegFixupStretchedPP', - 'FFmpegFixupTimestampPP', - 'FFmpegMergerPP', - 'FFmpegMetadataPP', - 'FFmpegSubtitlesConvertorPP', - 'FFmpegThumbnailsConvertorPP', - 'FFmpegVideoConvertorPP', - 'FFmpegVideoRemuxerPP', - 'MetadataParserPP', - 'MetadataFromFieldPP', - 'MetadataFromTitlePP', - 'MoveFilesAfterDownloadPP', - 'SponsorBlockPP', - 'SponSkrubPP', - 'ModifyChaptersPP', - 'XAttrMetadataPP', -] +__all__ = [name for name in globals().keys() if name.endswith('IE')] +__all__.append('FFmpegPostProcessor') diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 4aa36a1165..1bc0ac7671 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -6278,7 +6278,7 @@ def get_executable_path(): def load_plugins(name, suffix, namespace): plugin_info = [None] - classes = [] + classes = {} try: plugin_info = imp.find_module( name, [os.path.join(get_executable_path(), 'ytdlp_plugins')]) @@ -6289,8 +6289,7 @@ def load_plugins(name, suffix, namespace): if not name.endswith(suffix): continue klass = getattr(plugins, name) - classes.append(klass) - namespace[name] = klass + classes[name] = namespace[name] = klass except ImportError: pass finally: diff --git a/ytdlp_plugins/extractor/__init__.py b/ytdlp_plugins/extractor/__init__.py index 92f2bfd861..3045a590bd 100644 --- a/ytdlp_plugins/extractor/__init__.py +++ b/ytdlp_plugins/extractor/__init__.py @@ -1,3 +1,4 @@ -# flake8: noqa +# flake8: noqa: F401 +# ℹ️ The imported name must end in "IE" from .sample import SamplePluginIE diff --git a/ytdlp_plugins/extractor/sample.py b/ytdlp_plugins/extractor/sample.py index 99a3841409..986e5bb228 100644 --- a/ytdlp_plugins/extractor/sample.py +++ b/ytdlp_plugins/extractor/sample.py @@ -1,7 +1,5 @@ # coding: utf-8 -from __future__ import unicode_literals - # ⚠ Don't use relative imports from yt_dlp.extractor.common import InfoExtractor diff --git a/ytdlp_plugins/postprocessor/__init__.py b/ytdlp_plugins/postprocessor/__init__.py new file mode 100644 index 0000000000..61099abbc6 --- /dev/null +++ b/ytdlp_plugins/postprocessor/__init__.py @@ -0,0 +1,4 @@ +# flake8: noqa: F401 + +# ℹ️ The imported name must end in "PP" and is the name to be used in --use-postprocessor +from .sample import SamplePluginPP diff --git a/ytdlp_plugins/postprocessor/sample.py b/ytdlp_plugins/postprocessor/sample.py new file mode 100644 index 0000000000..6891280d50 --- /dev/null +++ b/ytdlp_plugins/postprocessor/sample.py @@ -0,0 +1,23 @@ +# coding: utf-8 + +# ⚠ Don't use relative imports +from yt_dlp.postprocessor.common import PostProcessor + + +# ℹ️ See the docstring of yt_dlp.postprocessor.common.PostProcessor +class SamplePluginPP(PostProcessor): + def __init__(self, downloader=None, **kwargs): + # ⚠ Only kwargs can be passed from the CLI, and all argument values will be string + # Also, "downloader", "when" and "key" are reserved names + super().__init__(downloader) + self._kwargs = kwargs + + # ℹ️ See docstring of yt_dlp.postprocessor.common.PostProcessor.run + def run(self, info): + filepath = info.get('filepath') + if filepath: # PP was called after download (default) + self.to_screen(f'Post-processed {filepath!r} with {self._kwargs}') + else: # PP was called before actual download + filepath = info.get('_filename') + self.to_screen(f'Pre-processed {filepath!r} with {self._kwargs}') + return [], info # return list_of_files_to_delete, info_dict From d710cc6d3660b7bb79cbbefe1f0faec6726b020c Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 30 Sep 2021 02:44:40 +0530 Subject: [PATCH 185/641] [docs] Add note about our custom ffmpeg builds --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 510770a14c..bbcc183d8a 100644 --- a/README.md +++ b/README.md @@ -207,7 +207,10 @@ While all the other dependancies are optional, `ffmpeg` and `ffprobe` are highly To use or redistribute the dependencies, you must agree to their respective licensing terms. -Note that the windows releases are already built with the python interpreter, mutagen, pycryptodome and websockets included. +The windows releases are already built with the python interpreter, mutagen, pycryptodome and websockets included. + +**Note**: There are some regressions in newer ffmpeg versions that causes various issues when used alongside yt-dlp. Since ffmpeg is such an important dependancy, we provide [custom builds](https://github.com/yt-dlp/FFmpeg-Builds/wiki/Latest#latest-autobuilds) with patches for these issues at [yt-dlp/FFmpeg-Builds](https://github.com/yt-dlp/FFmpeg-Builds). See [the readme](https://github.com/yt-dlp/FFmpeg-Builds#patches-applied) for details on the specifc issues solved by these builds + ### COMPILE From e6f21b3d925ea708955c60c400a31fc2e0e36ac0 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 17 Sep 2021 23:53:55 +0530 Subject: [PATCH 186/641] [docs,cleanup] Some minor refactoring and improve docs --- Changelog.md | 1 - README.md | 15 ++++++++------ devscripts/lazy_load_template.py | 12 +++++++++--- devscripts/make_lazy_extractors.py | 2 -- test/test_YoutubeDL.py | 2 ++ yt_dlp/YoutubeDL.py | 8 ++++---- yt_dlp/__init__.py | 1 + yt_dlp/cache.py | 2 ++ yt_dlp/compat.py | 2 ++ yt_dlp/extractor/common.py | 6 +++++- yt_dlp/extractor/youtube.py | 2 +- yt_dlp/options.py | 4 ++-- yt_dlp/postprocessor/ffmpeg.py | 29 ++++++++++++++-------------- yt_dlp/postprocessor/sponsorblock.py | 8 ++++---- 14 files changed, 55 insertions(+), 39 deletions(-) diff --git a/Changelog.md b/Changelog.md index 35a1b2680d..7334f87c5e 100644 --- a/Changelog.md +++ b/Changelog.md @@ -41,7 +41,6 @@ * [Streamanity] Add Extractor by [alerikaisattera](https://github.com/alerikaisattera) * [Theta] Add extractor by [alerikaisattera](https://github.com/alerikaisattera) * [Yandex] Add ZenYandexIE and ZenYandexChannelIE by [Ashish0804](https://github.com/Ashish0804) - * [9Now] handle episodes of series by [dalanmiller](https://github.com/dalanmiller) * [AnimalPlanet] Fix extractor by [Sipherdrakon](https://github.com/Sipherdrakon) * [Arte] Improve description extraction by [renalid](https://github.com/renalid) diff --git a/README.md b/README.md index bbcc183d8a..d219b28d3b 100644 --- a/README.md +++ b/README.md @@ -77,7 +77,7 @@ The major new features from the latest release of [blackjack4494/yt-dlc](https:/ * Most (but not all) age-gated content can be downloaded without cookies * Partial workaround for throttling issue * Redirect channel's home URL automatically to `/video` to preserve the old behaviour - * `255kbps` audio is extracted from youtube music if premium cookies are given + * `255kbps` audio is extracted (if available) from youtube music when premium cookies are given * Youtube music Albums, channels etc can be downloaded ([except self-uploaded music](https://github.com/yt-dlp/yt-dlp/issues/723)) * **Cookies from browser**: Cookies can be automatically extracted from all major web browsers using `--cookies-from-browser BROWSER[:PROFILE]` @@ -150,7 +150,7 @@ For ease of use, a few more compat options are available: yt-dlp is not platform specific. So it should work on your Unix box, on Windows or on macOS You can install yt-dlp using one of the following methods: -* Download the binary from the [latest release](https://github.com/yt-dlp/yt-dlp/releases/latest) (recommended method) +* Download the binary from the [latest release](https://github.com/yt-dlp/yt-dlp/releases/latest) * With Homebrew, `brew install yt-dlp/taps/yt-dlp` * Use [PyPI package](https://pypi.org/project/yt-dlp): `python3 -m pip install --upgrade yt-dlp` * Use pip+git: `python3 -m pip install --upgrade git+https://github.com/yt-dlp/yt-dlp.git@release` @@ -195,7 +195,7 @@ On windows, [Microsoft Visual C++ 2010 SP1 Redistributable Package (x86)](https: While all the other dependancies are optional, `ffmpeg` and `ffprobe` are highly recommended * [**ffmpeg** and **ffprobe**](https://www.ffmpeg.org) - Required for [merging seperate video and audio files](#format-selection) as well as for various [post-processing](#post-processing-options) tasks. Licence [depends on the build](https://www.ffmpeg.org/legal.html) * [**mutagen**](https://github.com/quodlibet/mutagen) - For embedding thumbnail in certain formats. Licenced under [GPLv2+](https://github.com/quodlibet/mutagen/blob/master/COPYING) -* [**pycryptodome**](https://github.com/Legrandin/pycryptodome) - For decrypting various data. Licenced under [BSD2](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst) +* [**pycryptodome**](https://github.com/Legrandin/pycryptodome) - For decrypting AES-128 HLS streams and various other data. Licenced under [BSD2](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst) * [**websockets**](https://github.com/aaugustin/websockets) - For downloading over websocket. Licenced under [BSD3](https://github.com/aaugustin/websockets/blob/main/LICENSE) * [**keyring**](https://github.com/jaraco/keyring) - For decrypting cookies of chromium-based browsers on Linux. Licenced under [MIT](https://github.com/jaraco/keyring/blob/main/LICENSE) * [**AtomicParsley**](https://github.com/wez/atomicparsley) - For embedding thumbnail in mp4/m4a if mutagen is not present. Licenced under [GPLv2+](https://github.com/wez/atomicparsley/blob/master/COPYING) @@ -1002,9 +1002,10 @@ The available fields are: - `uploader` (string): Full name of the video uploader - `license` (string): License name the video is licensed under - `creator` (string): The creator of the video - - `release_date` (string): The date (YYYYMMDD) when the video was released - `timestamp` (numeric): UNIX timestamp of the moment the video became available - `upload_date` (string): Video upload date (YYYYMMDD) + - `release_date` (string): The date (YYYYMMDD) when the video was released + - `release_timestamp` (numeric): UNIX timestamp of the moment the video was released - `uploader_id` (string): Nickname or id of the video uploader - `channel` (string): Full name of the channel the video is uploaded on - `channel_id` (string): Id of the channel @@ -1046,8 +1047,10 @@ The available fields are: - `extractor_key` (string): Key name of the extractor - `epoch` (numeric): Unix epoch when creating the file - `autonumber` (numeric): Number that will be increased with each download, starting at `--autonumber-start` + - `n_entries` (numeric): Total number of extracted items in the playlist - `playlist` (string): Name or id of the playlist that contains the video - - `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according to the total length of the playlist + - `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according the final index + - `playlist_autonumber` (numeric): Position of the video in the playlist download queue padded with leading zeros according to the total length of the playlist - `playlist_id` (string): Playlist identifier - `playlist_title` (string): Playlist title - `playlist_uploader` (string): Full name of the playlist uploader @@ -1266,7 +1269,7 @@ The available fields are: All fields, unless specified otherwise, are sorted in descending order. To reverse this, prefix the field with a `+`. Eg: `+res` prefers format with the smallest resolution. Additionally, you can suffix a preferred value for the fields, separated by a `:`. Eg: `res:720` prefers larger videos, but no larger than 720p and the smallest video if there are no videos less than 720p. For `codec` and `ext`, you can provide two preferred values, the first for video and the second for audio. Eg: `+codec:avc:m4a` (equivalent to `+vcodec:avc,+acodec:m4a`) sets the video codec preference to `h264` > `h265` > `vp9` > `vp9.2` > `av01` > `vp8` > `h263` > `theora` and audio codec preference to `mp4a` > `aac` > `vorbis` > `opus` > `mp3` > `ac3` > `dts`. You can also make the sorting prefer the nearest values to the provided by using `~` as the delimiter. Eg: `filesize~1G` prefers the format with filesize closest to 1 GiB. -The fields `hasvid` and `ie_pref` are always given highest priority in sorting, irrespective of the user-defined order. This behaviour can be changed by using `--force-format-sort`. Apart from these, the default order used is: `lang,quality,res,fps,codec:vp9.2,size,br,asr,proto,ext,hasaud,source,id`. The extractors may override this default order, but they cannot override the user-provided order. +The fields `hasvid` and `ie_pref` are always given highest priority in sorting, irrespective of the user-defined order. This behaviour can be changed by using `--format-sort-force`. Apart from these, the default order used is: `lang,quality,res,fps,codec:vp9.2,size,br,asr,proto,ext,hasaud,source,id`. The extractors may override this default order, but they cannot override the user-provided order. Note that the default has `codec:vp9.2`; i.e. `av1` is not prefered diff --git a/devscripts/lazy_load_template.py b/devscripts/lazy_load_template.py index 036e2e767c..da89e070de 100644 --- a/devscripts/lazy_load_template.py +++ b/devscripts/lazy_load_template.py @@ -1,9 +1,15 @@ # coding: utf-8 import re +from ..utils import bug_reports_message, write_string + class LazyLoadMetaClass(type): def __getattr__(cls, name): + if '_real_class' not in cls.__dict__: + write_string( + f'WARNING: Falling back to normal extractor since lazy extractor ' + f'{cls.__name__} does not have attribute {name}{bug_reports_message()}') return getattr(cls._get_real_class(), name) @@ -13,10 +19,10 @@ class LazyLoadExtractor(metaclass=LazyLoadMetaClass): @classmethod def _get_real_class(cls): - if '__real_class' not in cls.__dict__: + if '_real_class' not in cls.__dict__: mod = __import__(cls._module, fromlist=(cls.__name__,)) - cls.__real_class = getattr(mod, cls.__name__) - return cls.__real_class + cls._real_class = getattr(mod, cls.__name__) + return cls._real_class def __new__(cls, *args, **kwargs): real_cls = cls._get_real_class() diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py index e7b024490c..427045b984 100644 --- a/devscripts/make_lazy_extractors.py +++ b/devscripts/make_lazy_extractors.py @@ -7,8 +7,6 @@ import os from os.path import dirname as dirn import sys -print('WARNING: Lazy loading extractors is an experimental feature that may not always work', file=sys.stderr) - sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) lazy_extractors_filename = sys.argv[1] diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index e746589450..450f254933 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -653,6 +653,7 @@ class TestYoutubeDL(unittest.TestCase): 'timestamp': 1618488000, 'duration': 100000, 'playlist_index': 1, + 'playlist_autonumber': 2, '_last_playlist_index': 100, 'n_entries': 10, 'formats': [{'id': 'id1'}, {'id': 'id2'}, {'id': 'id3'}] @@ -690,6 +691,7 @@ class TestYoutubeDL(unittest.TestCase): test('%(duration_string)s', ('27:46:40', '27-46-40')) test('%(resolution)s', '1080p') test('%(playlist_index)s', '001') + test('%(playlist_autonumber)s', '02') test('%(autonumber)s', '00001') test('%(autonumber+2)03d', '005', autonumber_start=3) test('%(autonumber)s', '001', autonumber_size=3) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 873c22ad62..c42a29ee3f 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -454,13 +454,12 @@ class YoutubeDL(object): _NUMERIC_FIELDS = set(( 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx', - 'timestamp', 'upload_year', 'upload_month', 'upload_day', + 'timestamp', 'release_timestamp', 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count', 'average_rating', 'comment_count', 'age_limit', 'start_time', 'end_time', 'chapter_number', 'season_number', 'episode_number', 'track_number', 'disc_number', 'release_year', - 'playlist_index', )) params = None @@ -579,8 +578,8 @@ class YoutubeDL(object): self._setup_opener() - """Preload the archive, if any is specified""" def preload_download_archive(fn): + """Preload the archive, if any is specified""" if fn is None: return False self.write_debug('Loading archive file %r\n' % fn) @@ -934,10 +933,11 @@ class YoutubeDL(object): if info_dict.get('resolution') is None: info_dict['resolution'] = self.format_resolution(info_dict, default=None) - # For fields playlist_index and autonumber convert all occurrences + # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences # of %(field)s to %(field)0Nd for backward compatibility field_size_compat_map = { 'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')), + 'playlist_autonumber': len(str(info_dict.get('n_entries') or '')), 'autonumber': self.params.get('autonumber_size') or 5, } diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 2ae08f154e..38e1d0ec65 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -513,6 +513,7 @@ def _real_main(argv=None): 'add_chapters': opts.addchapters, 'add_metadata': opts.addmetadata, }) + # Note: Deprecated # This should be above EmbedThumbnail since sponskrub removes the thumbnail attachment # but must be below EmbedSubtitle and FFmpegMetadata # See https://github.com/yt-dlp/yt-dlp/issues/204 , https://github.com/faissaloo/SponSkrub/issues/29 diff --git a/yt_dlp/cache.py b/yt_dlp/cache.py index dde9cca646..e5cb193bce 100644 --- a/yt_dlp/cache.py +++ b/yt_dlp/cache.py @@ -50,6 +50,7 @@ class Cache(object): except OSError as ose: if ose.errno != errno.EEXIST: raise + self._ydl.write_debug(f'Saving {section}.{key} to cache') write_json_file(data, fn) except Exception: tb = traceback.format_exc() @@ -66,6 +67,7 @@ class Cache(object): try: try: with io.open(cache_fn, 'r', encoding='utf-8') as cachef: + self._ydl.write_debug(f'Loading {section}.{key} from cache') return json.load(cachef) except ValueError: try: diff --git a/yt_dlp/compat.py b/yt_dlp/compat.py index 7b55b7d9d4..9bf05c7373 100644 --- a/yt_dlp/compat.py +++ b/yt_dlp/compat.py @@ -33,6 +33,8 @@ class compat_HTMLParseError(Exception): pass +# compat_ctypes_WINFUNCTYPE = ctypes.WINFUNCTYPE +# will not work since ctypes.WINFUNCTYPE does not exist in UNIX machines def compat_ctypes_WINFUNCTYPE(*args, **kwargs): return ctypes.WINFUNCTYPE(*args, **kwargs) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 114b1faaf2..5da29dc63d 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -406,6 +406,10 @@ class InfoExtractor(object): _real_extract() methods and define a _VALID_URL regexp. Probably, they should also be added to the list of extractors. + Subclasses may also override suitable() if necessary, but ensure the function + signature is preserved and that this function imports everything it needs + (except other extractors), so that lazy_extractors works correctly + _GEO_BYPASS attribute may be set to False in order to disable geo restriction bypass mechanisms for a particular extractor. Though it won't disable explicit geo restriction bypass based on @@ -421,7 +425,7 @@ class InfoExtractor(object): will be used by geo restriction bypass mechanism similarly to _GEO_COUNTRIES. - Finally, the _WORKING attribute should be set to False for broken IEs + The _WORKING attribute should be set to False for broken IEs in order to warn the users and skip the tests. """ diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 272bdb0597..159b0a3b9d 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -621,7 +621,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): return delegated_sid sync_ids = (try_get( data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'], - lambda x: x['DATASYNC_ID']), compat_str) or '').split("||") + lambda x: x['DATASYNC_ID']), compat_str) or '').split('||') if len(sync_ids) >= 2 and sync_ids[1]: # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel # and just "user_syncid||" for primary channel. We only want the channel_syncid diff --git a/yt_dlp/options.py b/yt_dlp/options.py index daf4c0041c..be43f37ee1 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -261,7 +261,7 @@ def parseOpts(overrideArguments=None): general.add_option( '--mark-watched', action='store_true', dest='mark_watched', default=False, - help='Mark videos watched (YouTube only)') + help='Mark videos watched (even with --simulate). Currently only supported for YouTube') general.add_option( '--no-mark-watched', action='store_false', dest='mark_watched', @@ -768,7 +768,7 @@ def parseOpts(overrideArguments=None): dest='encoding', metavar='ENCODING', help='Force the specified encoding (experimental)') workarounds.add_option( - '--no-check-certificate', + '--no-check-certificates', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation') workarounds.add_option( diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 058926929f..311170920c 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -478,7 +478,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): class FFmpegVideoConvertorPP(FFmpegPostProcessor): SUPPORTED_EXTS = ('mp4', 'mkv', 'flv', 'webm', 'mov', 'avi', 'mp3', 'mka', 'm4a', 'ogg', 'opus') FORMAT_RE = re.compile(r'{0}(?:/{0})*$'.format(r'(?:\w+>)?(?:%s)' % '|'.join(SUPPORTED_EXTS))) - _action = 'converting' + _ACTION = 'converting' def __init__(self, downloader=None, preferedformat=None): super(FFmpegVideoConvertorPP, self).__init__(downloader) @@ -497,29 +497,28 @@ class FFmpegVideoConvertorPP(FFmpegPostProcessor): return [] @PostProcessor._restrict_to(images=False) - def run(self, information): - path, source_ext = information['filepath'], information['ext'].lower() + def run(self, info): + filename, source_ext = info['filepath'], info['ext'].lower() target_ext = self._target_ext(source_ext) _skip_msg = ( - 'could not find a mapping for %s' if not target_ext - else 'already is in target format %s' if source_ext == target_ext + f'could not find a mapping for {source_ext}' if not target_ext + else f'already is in target format {source_ext}' if source_ext == target_ext else None) if _skip_msg: - self.to_screen('Not %s media file "%s"; %s' % (self._action, path, _skip_msg % source_ext)) - return [], information + self.to_screen(f'Not {self._ACTION} media file {filename!r}; {_skip_msg}') + return [], info - prefix, sep, oldext = path.rpartition('.') - outpath = prefix + sep + target_ext - self.to_screen('%s video from %s to %s; Destination: %s' % (self._action.title(), source_ext, target_ext, outpath)) - self.run_ffmpeg(path, outpath, self._options(target_ext)) + outpath = replace_extension(filename, target_ext, source_ext) + self.to_screen(f'{self._ACTION.title()} video from {source_ext} to {target_ext}; Destination: {outpath}') + self.run_ffmpeg(filename, outpath, self._options(target_ext)) - information['filepath'] = outpath - information['format'] = information['ext'] = target_ext - return [path], information + info['filepath'] = outpath + info['format'] = info['ext'] = target_ext + return [filename], info class FFmpegVideoRemuxerPP(FFmpegVideoConvertorPP): - _action = 'remuxing' + _ACTION = 'remuxing' @staticmethod def _options(target_ext): diff --git a/yt_dlp/postprocessor/sponsorblock.py b/yt_dlp/postprocessor/sponsorblock.py index 6264d45c5d..7265a9de7c 100644 --- a/yt_dlp/postprocessor/sponsorblock.py +++ b/yt_dlp/postprocessor/sponsorblock.py @@ -4,7 +4,7 @@ from hashlib import sha256 from .ffmpeg import FFmpegPostProcessor from ..compat import compat_urllib_parse_urlencode, compat_HTTPError -from ..utils import PostProcessingError, sanitized_Request +from ..utils import PostProcessingError, network_exceptions, sanitized_Request class SponsorBlockPP(FFmpegPostProcessor): @@ -88,9 +88,9 @@ class SponsorBlockPP(FFmpegPostProcessor): self.write_debug(f'SponsorBlock query: {url}') try: rsp = self._downloader.urlopen(sanitized_Request(url)) - except compat_HTTPError as e: - if e.code == 404: + except network_exceptions as e: + if isinstance(e, compat_HTTPError) and e.code == 404: return [] - raise PostProcessingError(f'Error communicating with SponsorBlock API - {e}') + raise PostProcessingError(f'Unable to communicate with SponsorBlock API - {e}') return json.loads(rsp.read().decode(rsp.info().get_param('charset') or 'utf-8')) From ad095c4283a5159739d88d681af6381df3d7c146 Mon Sep 17 00:00:00 2001 From: jfogelman Date: Thu, 30 Sep 2021 11:44:20 -0400 Subject: [PATCH 187/641] [adobepass] Add RCN as MSO (#1129) Authored by: jfogelman --- yt_dlp/extractor/adobepass.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py index ffab332941..9378c33cd3 100644 --- a/yt_dlp/extractor/adobepass.py +++ b/yt_dlp/extractor/adobepass.py @@ -37,6 +37,11 @@ MSO_INFO = { 'username_field': 'email', 'password_field': 'loginpassword', }, + 'RCN': { + 'name': 'RCN', + 'username_field': 'UserName', + 'password_field': 'UserPassword', + }, 'Rogers': { 'name': 'Rogers', 'username_field': 'UserName', From 0eaec13ba6abe18d6ddf35f2ebffdcaf3937e485 Mon Sep 17 00:00:00 2001 From: Aleri Kaisattera <73682764+alerikaisattera@users.noreply.github.com> Date: Sat, 2 Oct 2021 00:45:15 +0600 Subject: [PATCH 188/641] [Theta] Add video extractor (#1137) Authored by: alerikaisattera --- yt_dlp/extractor/extractors.py | 5 ++++- yt_dlp/extractor/theta.py | 40 ++++++++++++++++++++++++++++++++-- 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 1776a4d268..8e8d269ced 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1433,7 +1433,10 @@ from .theplatform import ( from .thescene import TheSceneIE from .thestar import TheStarIE from .thesun import TheSunIE -from .theta import ThetaIE +from .theta import ( + ThetaVideoIE, + ThetaStreamIE, +) from .theweatherchannel import TheWeatherChannelIE from .thisamericanlife import ThisAmericanLifeIE from .thisav import ThisAVIE diff --git a/yt_dlp/extractor/theta.py b/yt_dlp/extractor/theta.py index 34c0da8156..3b65436295 100644 --- a/yt_dlp/extractor/theta.py +++ b/yt_dlp/extractor/theta.py @@ -5,8 +5,8 @@ from .common import InfoExtractor from ..utils import try_get -class ThetaIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?theta\.tv/(?P[a-z0-9]+)' +class ThetaStreamIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?theta\.tv/(?!video/)(?P[a-z0-9]+)' _TESTS = [{ 'url': 'https://www.theta.tv/davirus', 'skip': 'The live may have ended', @@ -49,3 +49,39 @@ class ThetaIE(InfoExtractor): 'formats': formats, 'thumbnail': try_get(info, lambda x: x['live_stream']['thumbnail_url']), } + + +class ThetaVideoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?theta\.tv/video/(?Pvid[a-z0-9]+)' + _TEST = { + 'url': 'https://www.theta.tv/video/vidiq6aaet3kzf799p0', + 'md5': '633d8c29eb276bb38a111dbd591c677f', + 'info_dict': { + 'id': 'vidiq6aaet3kzf799p0', + 'ext': 'mp4', + 'title': 'Theta EdgeCast Tutorial', + 'uploader': 'Pixiekittie', + 'description': 'md5:e316253f5bdced8b5a46bb50ae60a09f', + 'thumbnail': r're:https://user-prod-theta-tv\.imgix\.net/.+/vod_thumb/.+.jpg', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + info = self._download_json(f'https://api.theta.tv/v1/video/{video_id}/raw', video_id)['body'] + + m3u8_playlist = try_get(info, lambda x: x['video_urls'][0]['url']) + + formats = self._extract_m3u8_formats(m3u8_playlist, video_id, 'mp4', m3u8_id='hls') + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': info.get('title'), + 'uploader': try_get(info, lambda x: x['user']['username']), + 'description': info.get('description'), + 'view_count': info.get('view_count'), + 'like_count': info.get('like_count'), + 'formats': formats, + 'thumbnail': info.get('thumbnail_url'), + } From 9359f3d4f02856128f5626e754c7f64e2232b02f Mon Sep 17 00:00:00 2001 From: Felix S Date: Sat, 2 Oct 2021 18:43:42 +0000 Subject: [PATCH 189/641] [extractor] Extract storyboards from SMIL manifests (#1128) Authored by: fstirlitz --- yt_dlp/YoutubeDL.py | 6 +++--- yt_dlp/extractor/common.py | 23 ++++++++++++++++++++-- yt_dlp/utils.py | 39 +++++++++++++++++++++++++++++++------- 3 files changed, 56 insertions(+), 12 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index c42a29ee3f..9c4dd3ec52 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -3029,9 +3029,7 @@ class YoutubeDL(object): @staticmethod def format_resolution(format, default='unknown'): - if format.get('vcodec') == 'none': - if format.get('acodec') == 'none': - return 'images' + if format.get('vcodec') == 'none' and format.get('acodec') != 'none': return 'audio only' if format.get('resolution') is not None: return format['resolution'] @@ -3043,6 +3041,8 @@ class YoutubeDL(object): res = '%dx?' % format['width'] else: res = default + if format.get('vcodec') == 'none' and format.get('acodec') == 'none': + res += ' (images)' return res def _format_note(self, fdict): diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 5da29dc63d..f65a098d72 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -2346,14 +2346,15 @@ class InfoExtractor(object): rtmp_count = 0 http_count = 0 m3u8_count = 0 + imgs_count = 0 - srcs = [] + srcs = set() media = smil.findall(self._xpath_ns('.//video', namespace)) + smil.findall(self._xpath_ns('.//audio', namespace)) for medium in media: src = medium.get('src') if not src or src in srcs: continue - srcs.append(src) + srcs.add(src) bitrate = float_or_none(medium.get('system-bitrate') or medium.get('systemBitrate'), 1000) filesize = int_or_none(medium.get('size') or medium.get('fileSize')) @@ -2427,6 +2428,24 @@ class InfoExtractor(object): 'height': height, }) + for medium in smil.findall(self._xpath_ns('.//imagestream', namespace)): + src = medium.get('src') + if not src or src in srcs: + continue + srcs.add(src) + + imgs_count += 1 + formats.append({ + 'format_id': 'imagestream-%d' % (imgs_count), + 'url': src, + 'ext': mimetype2ext(medium.get('type')), + 'acodec': 'none', + 'vcodec': 'none', + 'width': int_or_none(medium.get('width')), + 'height': int_or_none(medium.get('height')), + 'format_note': 'SMIL storyboards', + }) + return formats def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'): diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 1bc0ac7671..7a77edf4c3 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4546,20 +4546,24 @@ def mimetype2ext(mt): if mt is None: return None - ext = { + mt, _, params = mt.partition(';') + mt = mt.strip() + + FULL_MAP = { 'audio/mp4': 'm4a', # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as # it's the most popular one 'audio/mpeg': 'mp3', 'audio/x-wav': 'wav', - }.get(mt) + 'audio/wav': 'wav', + 'audio/wave': 'wav', + } + + ext = FULL_MAP.get(mt) if ext is not None: return ext - _, _, res = mt.rpartition('/') - res = res.split(';')[0].strip().lower() - - return { + SUBTYPE_MAP = { '3gpp': '3gp', 'smptett+xml': 'tt', 'ttaf+xml': 'dfxp', @@ -4578,7 +4582,28 @@ def mimetype2ext(mt): 'quicktime': 'mov', 'mp2t': 'ts', 'x-wav': 'wav', - }.get(res, res) + 'filmstrip+json': 'fs', + 'svg+xml': 'svg', + } + + _, _, subtype = mt.rpartition('/') + ext = SUBTYPE_MAP.get(subtype.lower()) + if ext is not None: + return ext + + SUFFIX_MAP = { + 'json': 'json', + 'xml': 'xml', + 'zip': 'zip', + 'gzip': 'gz', + } + + _, _, suffix = subtype.partition('+') + ext = SUFFIX_MAP.get(suffix) + if ext is not None: + return ext + + return subtype.replace('+', '.') def parse_codecs(codecs_str): From ff1dec819a38addb73c9d52bd47fbac01b10e5d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81kos=20S=C3=BClyi?= Date: Sat, 2 Oct 2021 20:50:39 +0200 Subject: [PATCH 190/641] [aes] Improve performance slightly (#1135) Authored by: sulyi --- yt_dlp/aes.py | 52 +++++++++++++-------------------------------------- 1 file changed, 13 insertions(+), 39 deletions(-) diff --git a/yt_dlp/aes.py b/yt_dlp/aes.py index f52b992df0..60cdeb74e0 100644 --- a/yt_dlp/aes.py +++ b/yt_dlp/aes.py @@ -178,7 +178,7 @@ def aes_encrypt(data, expanded_key): data = sub_bytes(data) data = shift_rows(data) if i != rounds: - data = mix_columns(data) + data = list(iter_mix_columns(data, MIX_COLUMN_MATRIX)) data = xor(data, expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]) return data @@ -197,7 +197,7 @@ def aes_decrypt(data, expanded_key): for i in range(rounds, 0, -1): data = xor(data, expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]) if i != rounds: - data = mix_columns_inv(data) + data = list(iter_mix_columns(data, MIX_COLUMN_MATRIX_INV)) data = shift_rows_inv(data) data = sub_bytes_inv(data) data = xor(data, expanded_key[:BLOCK_SIZE_BYTES]) @@ -375,49 +375,23 @@ def xor(data1, data2): return [x ^ y for x, y in zip(data1, data2)] -def rijndael_mul(a, b): - if a == 0 or b == 0: - return 0 - return RIJNDAEL_EXP_TABLE[(RIJNDAEL_LOG_TABLE[a] + RIJNDAEL_LOG_TABLE[b]) % 0xFF] - - -def mix_column(data, matrix): - data_mixed = [] - for row in range(4): - mixed = 0 - for column in range(4): - # xor is (+) and (-) - mixed ^= rijndael_mul(data[column], matrix[row][column]) - data_mixed.append(mixed) - return data_mixed - - -def mix_columns(data, matrix=MIX_COLUMN_MATRIX): - data_mixed = [] - for i in range(4): - column = data[i * 4: (i + 1) * 4] - data_mixed += mix_column(column, matrix) - return data_mixed - - -def mix_columns_inv(data): - return mix_columns(data, MIX_COLUMN_MATRIX_INV) +def iter_mix_columns(data, matrix): + for i in (0, 4, 8, 12): + for row in matrix: + mixed = 0 + for j in range(4): + # xor is (+) and (-) + mixed ^= (0 if data[i:i + 4][j] == 0 or row[j] == 0 else + RIJNDAEL_EXP_TABLE[(RIJNDAEL_LOG_TABLE[data[i + j]] + RIJNDAEL_LOG_TABLE[row[j]]) % 0xFF]) + yield mixed def shift_rows(data): - data_shifted = [] - for column in range(4): - for row in range(4): - data_shifted.append(data[((column + row) & 0b11) * 4 + row]) - return data_shifted + return [data[((column + row) & 0b11) * 4 + row] for column in range(4) for row in range(4)] def shift_rows_inv(data): - data_shifted = [] - for column in range(4): - for row in range(4): - data_shifted.append(data[((column - row) & 0b11) * 4 + row]) - return data_shifted + return [data[((column - row) & 0b11) * 4 + row] for column in range(4) for row in range(4)] def shift_block(data): From e919569e6792b59c5e6826bf2e6b4ca874eb011d Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 2 Oct 2021 22:35:24 +0530 Subject: [PATCH 191/641] [funimation] Sort formats according to the relevant extractor-args --- yt_dlp/extractor/funimation.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/funimation.py b/yt_dlp/extractor/funimation.py index 5846884849..ede53b326e 100644 --- a/yt_dlp/extractor/funimation.py +++ b/yt_dlp/extractor/funimation.py @@ -13,6 +13,7 @@ from ..utils import ( js_to_json, str_or_none, try_get, + qualities, urlencode_postdata, ExtractorError, ) @@ -180,6 +181,8 @@ class FunimationIE(InfoExtractor): formats, subtitles, thumbnails, duration = [], {}, [], 0 requested_languages, requested_versions = self._configuration_arg('language'), self._configuration_arg('version') + language_preference = qualities((requested_languages or [''])[::-1]) + source_preference = qualities((requested_versions or ['uncut', 'simulcast'])[::-1]) only_initial_experience = 'seperate-video-versions' in self.get_param('compat_opts', []) for lang, version, fmt in self._get_experiences(episode): @@ -227,10 +230,15 @@ class FunimationIE(InfoExtractor): }) for f in current_formats: # TODO: Convert language to code - f.update({'language': lang, 'format_note': version}) + f.update({ + 'language': lang, + 'format_note': version, + 'source_preference': source_preference(version.lower()), + 'language_preference': language_preference(lang.lower()), + }) formats.extend(current_formats) self._remove_duplicate_formats(formats) - self._sort_formats(formats) + self._sort_formats(formats, ('lang', 'source')) return { 'id': initial_experience_id if only_initial_experience else episode_id, From a1c3967307053767d8c44a5814c88610fe6c4860 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 2 Oct 2021 22:36:31 +0530 Subject: [PATCH 192/641] [EmbedSubtitle, SubtitlesConvertor] Fix error when subtitle file is missing Closes #1152, #1134 Bug from 8e25d624df003d691be922488d6ab7007f75333d --- yt_dlp/postprocessor/ffmpeg.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 311170920c..6bb66569ae 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -555,7 +555,7 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): mp4_ass_warn = False for lang, sub_info in subtitles.items(): - if not os.path.exists(information.get('filepath', '')): + if not os.path.exists(sub_info.get('filepath', '')): self.report_warning(f'Skipping embedding {lang} subtitle because the file is missing') continue sub_ext = sub_info['ext'] @@ -845,6 +845,9 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): self.to_screen('Converting subtitles') sub_filenames = [] for lang, sub in subs.items(): + if not os.path.exists(sub.get('filepath', '')): + self.report_warning(f'Skipping embedding {lang} subtitle because the file is missing') + continue ext = sub['ext'] if ext == new_ext: self.to_screen('Subtitle file for %s is already in the requested format' % new_ext) From 5d535b4a559ff114866368bfb3cde38b54f9462b Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 4 Oct 2021 02:25:13 +0530 Subject: [PATCH 193/641] [build] Allow building with py2exe (and misc fixes) py2exe config is copied from youtube-dl Closes #1160 --- .github/workflows/build.yml | 6 +-- pyinst.py | 13 ++++-- setup.py | 90 ++++++++++++++++++++++++++----------- yt_dlp/update.py | 33 +++++++++----- yt_dlp/utils.py | 5 +-- 5 files changed, 100 insertions(+), 47 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 515c501642..4f983f2c10 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -161,7 +161,7 @@ jobs: - name: Print version run: echo "${{ steps.bump_version.outputs.ytdlp_version }}" - name: Run PyInstaller Script - run: python pyinst.py 64 + run: python pyinst.py - name: Upload yt-dlp.exe Windows binary id: upload-release-windows uses: actions/upload-release-asset@v1 @@ -179,7 +179,7 @@ jobs: id: sha512_win run: echo "::set-output name=sha512_win::$((Get-FileHash dist\yt-dlp.exe -Algorithm SHA512).Hash.ToLower())" - name: Run PyInstaller Script with --onedir - run: python pyinst.py 64 --onedir + run: python pyinst.py --onedir - uses: papeloto/action-zip@v1 with: files: ./dist/yt-dlp @@ -227,7 +227,7 @@ jobs: - name: Print version run: echo "${{ steps.bump_version.outputs.ytdlp_version }}" - name: Run PyInstaller Script for 32 Bit - run: python pyinst.py 32 + run: python pyinst.py - name: Upload Executable yt-dlp_x86.exe id: upload-release-windows32 uses: actions/upload-release-asset@v1 diff --git a/pyinst.py b/pyinst.py index d65243f880..7e040647c2 100644 --- a/pyinst.py +++ b/pyinst.py @@ -13,11 +13,18 @@ from PyInstaller.utils.win32.versioninfo import ( ) import PyInstaller.__main__ -arch = sys.argv[1] if len(sys.argv) > 1 else platform.architecture()[0][:2] +arch = platform.architecture()[0][:2] assert arch in ('32', '64') _x86 = '_x86' if arch == '32' else '' -opts = sys.argv[2:] or ['--onefile'] +# Compatability with older arguments +opts = sys.argv[1:] +if opts[0:1] in (['32'], ['64']): + if arch != opts[0]: + raise Exception(f'{opts[0]}bit executable cannot be built on a {arch}bit system') + opts = opts[1:] +opts = opts or ['--onefile'] + print(f'Building {arch}bit version with options {opts}') FILE_DESCRIPTION = 'yt-dlp%s' % (' (32 Bit)' if _x86 else '') @@ -82,4 +89,4 @@ PyInstaller.__main__.run([ *opts, 'yt_dlp/__main__.py', ]) -SetVersion('dist/yt-dlp%s.exe' % _x86, VERSION_FILE) +SetVersion('dist/%syt-dlp%s.exe' % ('yt-dlp/' if '--onedir' in opts else '', _x86), VERSION_FILE) diff --git a/setup.py b/setup.py index d54806f151..b5eb81c301 100644 --- a/setup.py +++ b/setup.py @@ -1,12 +1,16 @@ #!/usr/bin/env python3 # coding: utf-8 - -from setuptools import setup, Command, find_packages import os.path import warnings import sys -from distutils.spawn import spawn +try: + from setuptools import setup, Command, find_packages + setuptools_available = True +except ImportError: + from distutils.core import setup, Command + setuptools_available = False +from distutils.spawn import spawn # Get the version from yt_dlp/version.py without importing the package exec(compile(open('yt_dlp/version.py').read(), 'yt_dlp/version.py', 'exec')) @@ -21,32 +25,62 @@ LONG_DESCRIPTION = '\n\n'.join(( REQUIREMENTS = ['mutagen', 'pycryptodome', 'websockets'] + if sys.argv[1:2] == ['py2exe']: - raise NotImplementedError('py2exe is not currently supported; instead, use "pyinst.py" to build with pyinstaller') + import py2exe + warnings.warn( + 'Building with py2exe is not officially supported. ' + 'The recommended way is to use "pyinst.py" to build using pyinstaller') + params = { + 'console': [{ + 'script': './yt_dlp/__main__.py', + 'dest_base': 'yt-dlp', + 'version': __version__, + 'description': DESCRIPTION, + 'comments': LONG_DESCRIPTION.split('\n')[0], + 'product_name': 'yt-dlp', + 'product_version': __version__, + }], + 'options': { + 'py2exe': { + 'bundle_files': 0, + 'compressed': 1, + 'optimize': 2, + 'dist_dir': './dist', + 'excludes': ['Crypto', 'Cryptodome'], # py2exe cannot import Crypto + 'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'], + } + }, + 'zipfile': None + } +else: + files_spec = [ + ('share/bash-completion/completions', ['completions/bash/yt-dlp']), + ('share/zsh/site-functions', ['completions/zsh/_yt-dlp']), + ('share/fish/vendor_completions.d', ['completions/fish/yt-dlp.fish']), + ('share/doc/yt_dlp', ['README.txt']), + ('share/man/man1', ['yt-dlp.1']) + ] + root = os.path.dirname(os.path.abspath(__file__)) + data_files = [] + for dirname, files in files_spec: + resfiles = [] + for fn in files: + if not os.path.exists(fn): + warnings.warn('Skipping file %s since it is not present. Try running `make pypi-files` first' % fn) + else: + resfiles.append(fn) + data_files.append((dirname, resfiles)) -files_spec = [ - ('share/bash-completion/completions', ['completions/bash/yt-dlp']), - ('share/zsh/site-functions', ['completions/zsh/_yt-dlp']), - ('share/fish/vendor_completions.d', ['completions/fish/yt-dlp.fish']), - ('share/doc/yt_dlp', ['README.txt']), - ('share/man/man1', ['yt-dlp.1']) -] -root = os.path.dirname(os.path.abspath(__file__)) -data_files = [] -for dirname, files in files_spec: - resfiles = [] - for fn in files: - if not os.path.exists(fn): - warnings.warn('Skipping file %s since it is not present. Try running `make pypi-files` first' % fn) - else: - resfiles.append(fn) - data_files.append((dirname, resfiles)) + params = { + 'data_files': data_files, + } -params = { - 'data_files': data_files, -} -params['entry_points'] = {'console_scripts': ['yt-dlp = yt_dlp:main']} + if setuptools_available: + params['entry_points'] = {'console_scripts': ['yt-dlp = yt_dlp:main']} + else: + params['scripts'] = ['yt-dlp'] class build_lazy_extractors(Command): @@ -64,7 +98,11 @@ class build_lazy_extractors(Command): dry_run=self.dry_run) -packages = find_packages(exclude=('youtube_dl', 'test', 'ytdlp_plugins')) +if setuptools_available: + packages = find_packages(exclude=('youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins')) +else: + packages = ['yt_dlp', 'yt_dlp.downloader', 'yt_dlp.extractor', 'yt_dlp.postprocessor'] + setup( name='yt-dlp', diff --git a/yt_dlp/update.py b/yt_dlp/update.py index 8160dab377..4fbe7bd7e7 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -32,10 +32,12 @@ def rsa_verify(message, signature, key): def detect_variant(): - if hasattr(sys, 'frozen') and getattr(sys, '_MEIPASS', None): - if sys._MEIPASS == os.path.dirname(sys.executable): - return 'dir' - return 'exe' + if hasattr(sys, 'frozen'): + if getattr(sys, '_MEIPASS', None): + if sys._MEIPASS == os.path.dirname(sys.executable): + return 'dir' + return 'exe' + return 'py2exe' elif isinstance(globals().get('__loader__'), zipimporter): return 'zip' elif os.path.basename(sys.argv[0]) == '__main__.py': @@ -43,6 +45,20 @@ def detect_variant(): return 'unknown' +_NON_UPDATEABLE_REASONS = { + 'exe': None, + 'zip': None, + 'dir': 'Auto-update is not supported for unpackaged windows executable. Re-download the latest release', + 'py2exe': 'There is no official release for py2exe executable. Build it again with the latest source code', + 'source': 'You cannot update when running from source code', + 'unknown': 'It looks like you installed yt-dlp with a package manager, pip, setup.py or a tarball. Use that to update', +} + + +def is_non_updateable(): + return _NON_UPDATEABLE_REASONS.get(detect_variant(), _NON_UPDATEABLE_REASONS['unknown']) + + def update_self(to_screen, verbose, opener): ''' Exists for backward compatibility. Use run_update(ydl) instead ''' @@ -114,14 +130,7 @@ def run_update(ydl): ydl.to_screen(f'yt-dlp is up to date ({__version__})') return - ERRORS = { - 'exe': None, - 'zip': None, - 'dir': 'Auto-update is not supported for unpackaged windows executable. Re-download the latest release', - 'source': 'You cannot update when running from source code', - 'unknown': 'It looks like you installed yt-dlp with a package manager, pip, setup.py or a tarball. Use that to update', - } - err = ERRORS.get(detect_variant(), ERRORS['unknown']) + err = is_non_updateable() if err: ydl.to_screen(f'Latest version: {version_id}, Current version: {__version__}') return report_error(err, expected=True) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 7a77edf4c3..b79b796889 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4521,11 +4521,10 @@ def is_outdated_version(version, limit, assume_new=True): def ytdl_is_updateable(): """ Returns if yt-dlp can be updated with -U """ - return False - from zipimport import zipimporter + from .update import is_non_updateable - return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen') + return not is_non_updateable() def args_to_str(args): From b11c04a8ae07608de8b0d0e1975f92b05270aeb0 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 4 Oct 2021 02:55:11 +0530 Subject: [PATCH 194/641] Fix `-f mp4` behaving differently from youtube-dl --- yt_dlp/YoutubeDL.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 9c4dd3ec52..f009e9e195 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1944,9 +1944,14 @@ class YoutubeDL(object): filter_f = lambda f: _filter_f(f) and ( f.get('vcodec') != 'none' or f.get('acodec') != 'none') else: - filter_f = ((lambda f: f.get('ext') == format_spec) - if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension - else (lambda f: f.get('format_id') == format_spec)) # id + if format_spec in ('m4a', 'mp3', 'ogg', 'aac'): # audio extension + filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' + elif format_spec in ('mp4', 'flv', 'webm', '3gp'): # video extension + filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none' + elif format_spec in ('mhtml', ): # storyboards extension + filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none' + else: + filter_f = (lambda f: f.get('format_id') == format_spec) # id def selector_function(ctx): formats = list(ctx['formats']) From efc947fb3eea38eeae257980e663de806f1e19d0 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Mon, 4 Oct 2021 18:37:05 +0000 Subject: [PATCH 195/641] [Bilibili] Add subtitle converter (#1144) Closes #1015 Based on https://github.com/y2361547758/bcc2ass Authored by: u-spec-png --- yt_dlp/extractor/bilibili.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 0a81452c32..daa224b17f 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -22,6 +22,7 @@ from ..utils import ( parse_iso8601, try_get, smuggle_url, + srt_subtitles_timecode, str_or_none, str_to_int, strip_jsonp, @@ -623,7 +624,7 @@ class BiliBiliSearchIE(SearchInfoExtractor): while True: pageNumber += 1 # FIXME - api_url = "https://api.bilibili.com/x/web-interface/search/type?context=&page=%s&order=pubdate&keyword=%s&duration=0&tids_2=&__refresh__=true&search_type=video&tids=0&highlight=1" % (pageNumber, query) + api_url = 'https://api.bilibili.com/x/web-interface/search/type?context=&page=%s&order=pubdate&keyword=%s&duration=0&tids_2=&__refresh__=true&search_type=video&tids=0&highlight=1' % (pageNumber, query) json_str = self._download_webpage( api_url, "None", query={"Search_key": query}, note='Extracting results from page %s' % pageNumber) @@ -783,6 +784,12 @@ class BiliIntlBaseIE(InfoExtractor): def _call_api(self, type, endpoint, id): return self._download_json(self._API_URL.format(type, endpoint), id)['data'] + def json2srt(self, json): + data = '\n\n'.join( + f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}' + for i, line in enumerate(json['body'])) + return data + def _get_subtitles(self, type, ep_id): sub_json = self._call_api(type, f'/m/subtitle?ep_id={ep_id}&platform=web', ep_id) subtitles = {} @@ -790,8 +797,13 @@ class BiliIntlBaseIE(InfoExtractor): sub_url = sub.get('url') if not sub_url: continue + sub_data = self._download_json(sub_url, ep_id, fatal=False) + if not sub_data: + continue + sub_data = self._parse_json(sub_data) subtitles.setdefault(sub.get('key', 'en'), []).append({ - 'url': sub_url, + 'ext': 'srt', + 'data': self.json2srt(sub_data) }) return subtitles From ebf2fb4d619b7d65b40ae6bacc79bd9f3d3ceab8 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Mon, 4 Oct 2021 18:42:24 +0000 Subject: [PATCH 196/641] [Vupload] Add extractor (#1146) Fixes: https://github.com/ytdl-org/youtube-dl/issues/29877 Authored by: u-spec-png --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/vupload.py | 51 ++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 yt_dlp/extractor/vupload.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 8e8d269ced..b90110c7f6 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1713,6 +1713,7 @@ from .vtm import VTMIE from .medialaan import MedialaanIE from .vube import VubeIE from .vuclip import VuClipIE +from .vupload import VuploadIE from .vvvvid import ( VVVVIDIE, VVVVIDShowIE, diff --git a/yt_dlp/extractor/vupload.py b/yt_dlp/extractor/vupload.py new file mode 100644 index 0000000000..9846ababcc --- /dev/null +++ b/yt_dlp/extractor/vupload.py @@ -0,0 +1,51 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + parse_duration, + parse_filesize, + extract_attributes, + int_or_none, +) + + +class VuploadIE(InfoExtractor): + _VALID_URL = r'https://vupload\.com/v/(?P[a-z0-9]+)' + _TESTS = [{ + 'url': 'https://vupload.com/v/u28d0pl2tphy', + 'md5': '9b42a4a193cca64d80248e58527d83c8', + 'info_dict': { + 'id': 'u28d0pl2tphy', + 'ext': 'mp4', + 'description': 'md5:e9e6c0045c78cbf0d5bb19a55ce199fb', + 'title': 'md5:e9e6c0045c78cbf0d5bb19a55ce199fb', + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex(r'(.+?)', webpage, 'title') + video_e = self._html_search_regex(r'\|([a-z0-9]{60})\|', webpage, 'video') + video_url = f'https://wurize.megaupload.to/{video_e}/v.mp4' + duration = parse_duration(self._html_search_regex( + r'\s*([\d:]+)\s*', webpage, 'duration', fatal=False)) + filesize_approx = parse_filesize(self._html_search_regex( + r'\s*([^<]+)\s*', webpage, 'filesize', fatal=False)) + extra_video_info = extract_attributes(self._html_search_regex( + r'(]+>)', webpage, 'video_info', fatal=False)) + description = self._html_search_meta('description', webpage) + + return { + 'id': video_id, + 'url': video_url, + 'duration': duration, + 'filesize_approx': filesize_approx, + 'width': int_or_none(extra_video_info.get('width')), + 'height': int_or_none(extra_video_info.get('height')), + 'format_id': extra_video_info.get('height', '') + 'p', + 'title': title, + 'description': description, + } From 3001a84dca08612e72aa2116941868636e800f32 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Mon, 4 Oct 2021 18:58:02 +0000 Subject: [PATCH 197/641] [Newgrounds] Add age_limit and fix duration (#1156) Authored by: u-spec-png --- yt_dlp/extractor/newgrounds.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/newgrounds.py b/yt_dlp/extractor/newgrounds.py index 3c49008a0a..bbbd9e8eec 100644 --- a/yt_dlp/extractor/newgrounds.py +++ b/yt_dlp/extractor/newgrounds.py @@ -42,6 +42,7 @@ class NewgroundsIE(InfoExtractor): 'timestamp': 955064100, 'upload_date': '20000406', 'description': 'Scrotum plays "catch."', + 'age_limit': 17, }, }, { # source format unavailable, additional mp4 formats @@ -54,6 +55,7 @@ class NewgroundsIE(InfoExtractor): 'timestamp': 1487965140, 'upload_date': '20170224', 'description': 'ZTV News Episode 8 (February 2017)', + 'age_limit': 17, }, 'params': { 'skip_download': True, @@ -69,6 +71,7 @@ class NewgroundsIE(InfoExtractor): 'timestamp': 1140663240, 'upload_date': '20060223', 'description': 'Metal Gear is awesome is so is this movie.', + 'age_limit': 13, } }, { 'url': 'https://www.newgrounds.com/portal/view/297383/format/flash', @@ -81,8 +84,15 @@ class NewgroundsIE(InfoExtractor): 'uploader': 'Egoraptor', 'upload_date': '20060223', 'timestamp': 1140663240, + 'age_limit': 13, } }] + _AGE_LIMIT = { + 'e': 0, + 't': 13, + 'm': 17, + 'a': 18, + } def _real_extract(self, url): media_id = self._match_id(url) @@ -127,12 +137,16 @@ class NewgroundsIE(InfoExtractor): r'(?:Author|Writer)\s*]+>([^<]+)'), webpage, 'uploader', fatal=False) + age_limit = self._html_search_regex( + r']+>', webpage, 'age_limit', default='e') + age_limit = self._AGE_LIMIT.get(age_limit) + timestamp = unified_timestamp(self._html_search_regex( (r'
\s*Uploaded\s*
\s*
([^<]+
\s*
[^<]+)', r'
\s*Uploaded\s*
\s*
([^<]+)'), webpage, 'timestamp', default=None)) duration = parse_duration(self._html_search_regex( - r'"duration"\s*:\s*["\']?([\d]+)["\']?,', webpage, + r'"duration"\s*:\s*["\']?(\d+)["\']?', webpage, 'duration', default=None)) view_count = parse_count(self._html_search_regex( @@ -164,6 +178,7 @@ class NewgroundsIE(InfoExtractor): 'formats': formats, 'thumbnail': self._og_search_thumbnail(webpage), 'description': self._og_search_description(webpage), + 'age_limit': age_limit, 'view_count': view_count, } From 943d5ab13305b6a37424e6572d10f562384ada9a Mon Sep 17 00:00:00 2001 From: MinePlayersPE Date: Tue, 5 Oct 2021 02:01:33 +0700 Subject: [PATCH 198/641] [Douyin] Rewrite extractor (#1157) Closes #1121 Authored by: MinePlayersPE --- yt_dlp/extractor/douyin.py | 145 ------------------ yt_dlp/extractor/extractors.py | 2 +- yt_dlp/extractor/tiktok.py | 264 +++++++++++++++++++++++++-------- 3 files changed, 205 insertions(+), 206 deletions(-) delete mode 100644 yt_dlp/extractor/douyin.py diff --git a/yt_dlp/extractor/douyin.py b/yt_dlp/extractor/douyin.py deleted file mode 100644 index 7f3176be7a..0000000000 --- a/yt_dlp/extractor/douyin.py +++ /dev/null @@ -1,145 +0,0 @@ -# coding: utf-8 - -from ..utils import ( - int_or_none, - traverse_obj, - url_or_none, -) -from .common import ( - InfoExtractor, - compat_urllib_parse_unquote, -) - - -class DouyinIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?douyin\.com/video/(?P[0-9]+)' - _TESTS = [{ - 'url': 'https://www.douyin.com/video/6961737553342991651', - 'md5': '10523312c8b8100f353620ac9dc8f067', - 'info_dict': { - 'id': '6961737553342991651', - 'ext': 'mp4', - 'title': '#杨超越 小小水手带你去远航❤️', - 'uploader': '杨超越', - 'upload_date': '20210513', - 'timestamp': 1620905839, - 'uploader_id': '110403406559', - 'view_count': int, - 'like_count': int, - 'repost_count': int, - 'comment_count': int, - } - }, { - 'url': 'https://www.douyin.com/video/6982497745948921092', - 'md5': 'd78408c984b9b5102904cf6b6bc2d712', - 'info_dict': { - 'id': '6982497745948921092', - 'ext': 'mp4', - 'title': '这个夏日和小羊@杨超越 一起遇见白色幻想', - 'uploader': '杨超越工作室', - 'upload_date': '20210708', - 'timestamp': 1625739481, - 'uploader_id': '408654318141572', - 'view_count': int, - 'like_count': int, - 'repost_count': int, - 'comment_count': int, - } - }, { - 'url': 'https://www.douyin.com/video/6953975910773099811', - 'md5': '72e882e24f75064c218b76c8b713c185', - 'info_dict': { - 'id': '6953975910773099811', - 'ext': 'mp4', - 'title': '#一起看海 出现在你的夏日里', - 'uploader': '杨超越', - 'upload_date': '20210422', - 'timestamp': 1619098692, - 'uploader_id': '110403406559', - 'view_count': int, - 'like_count': int, - 'repost_count': int, - 'comment_count': int, - } - }, { - 'url': 'https://www.douyin.com/video/6950251282489675042', - 'md5': 'b4db86aec367ef810ddd38b1737d2fed', - 'info_dict': { - 'id': '6950251282489675042', - 'ext': 'mp4', - 'title': '哈哈哈,成功了哈哈哈哈哈哈', - 'uploader': '杨超越', - 'upload_date': '20210412', - 'timestamp': 1618231483, - 'uploader_id': '110403406559', - 'view_count': int, - 'like_count': int, - 'repost_count': int, - 'comment_count': int, - } - }, { - 'url': 'https://www.douyin.com/video/6963263655114722595', - 'md5': '1abe1c477d05ee62efb40bf2329957cf', - 'info_dict': { - 'id': '6963263655114722595', - 'ext': 'mp4', - 'title': '#哪个爱豆的105度最甜 换个角度看看我哈哈', - 'uploader': '杨超越', - 'upload_date': '20210517', - 'timestamp': 1621261163, - 'uploader_id': '110403406559', - 'view_count': int, - 'like_count': int, - 'repost_count': int, - 'comment_count': int, - } - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - render_data = self._parse_json( - self._search_regex( - r'', - webpage, 'render data'), - video_id, transform_source=compat_urllib_parse_unquote) - details = traverse_obj(render_data, (..., 'aweme', 'detail'), get_all=False) - - thumbnails = [{'url': self._proto_relative_url(url)} for url in traverse_obj( - details, ('video', ('cover', 'dynamicCover', 'originCover')), expected_type=url_or_none, default=[])] - - common = { - 'width': traverse_obj(details, ('video', 'width'), expected_type=int), - 'height': traverse_obj(details, ('video', 'height'), expected_type=int), - 'ext': 'mp4', - } - formats = [{**common, 'url': self._proto_relative_url(url)} for url in traverse_obj( - details, ('video', 'playAddr', ..., 'src'), expected_type=url_or_none, default=[]) if url] - self._remove_duplicate_formats(formats) - - download_url = traverse_obj(details, ('download', 'url'), expected_type=url_or_none) - if download_url: - formats.append({ - **common, - 'format_id': 'download', - 'url': self._proto_relative_url(download_url), - 'quality': 1, - }) - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': details.get('desc') or self._html_search_meta('title', webpage), - 'formats': formats, - 'thumbnails': thumbnails, - 'uploader': traverse_obj(details, ('authorInfo', 'nickname'), expected_type=str), - 'uploader_id': traverse_obj(details, ('authorInfo', 'uid'), expected_type=str), - 'uploader_url': 'https://www.douyin.com/user/%s' % traverse_obj( - details, ('authorInfo', 'secUid'), expected_type=str), - 'timestamp': int_or_none(details.get('createTime')), - 'duration': traverse_obj(details, ('video', 'duration'), expected_type=int), - 'view_count': traverse_obj(details, ('stats', 'playCount'), expected_type=int), - 'like_count': traverse_obj(details, ('stats', 'diggCount'), expected_type=int), - 'repost_count': traverse_obj(details, ('stats', 'shareCount'), expected_type=int), - 'comment_count': traverse_obj(details, ('stats', 'commentCount'), expected_type=int), - } diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index b90110c7f6..71e4cd4cf8 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -340,7 +340,6 @@ from .discoveryplusindia import ( DiscoveryPlusIndiaShowIE, ) from .dotsub import DotsubIE -from .douyin import DouyinIE from .douyutv import ( DouyuShowIE, DouyuTVIE, @@ -1445,6 +1444,7 @@ from .threeqsdn import ThreeQSDNIE from .tiktok import ( TikTokIE, TikTokUserIE, + DouyinIE, ) from .tinypic import TinyPicIE from .tmz import TMZIE diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 4b0efd4a3d..fc0915fb02 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -8,12 +8,14 @@ import time import json from .common import InfoExtractor +from ..compat import compat_urllib_parse_unquote from ..utils import ( ExtractorError, int_or_none, str_or_none, traverse_obj, try_get, + url_or_none, qualities, ) @@ -21,6 +23,10 @@ from ..utils import ( class TikTokBaseIE(InfoExtractor): _APP_VERSION = '20.9.3' _MANIFEST_APP_VERSION = '291' + _APP_NAME = 'trill' + _AID = 1180 + _API_HOSTNAME = 'api-t2.tiktokv.com' + _UPLOADER_URL_FORMAT = 'https://www.tiktok.com/@%s' QUALITIES = ('360p', '540p', '720p') def _call_api(self, ep, query, video_id, fatal=True, @@ -46,7 +52,7 @@ class TikTokBaseIE(InfoExtractor): 'carrier_region': 'US', 'sys_region': 'US', 'region': 'US', - 'app_name': 'trill', + 'app_name': self._APP_NAME, 'app_language': 'en', 'language': 'en', 'timezone_name': 'America/New_York', @@ -55,20 +61,20 @@ class TikTokBaseIE(InfoExtractor): 'ac': 'wifi', 'mcc_mnc': '310260', 'is_my_cn': 0, - 'aid': 1180, + 'aid': self._AID, 'ssmix': 'a', 'as': 'a1qwert123', 'cp': 'cbfhckdckkde1', } - self._set_cookie('.tiktokv.com', 'odin_tt', ''.join(random.choice('0123456789abcdef') for i in range(160))) + self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choice('0123456789abcdef') for i in range(160))) return self._download_json( - 'https://api-t2.tiktokv.com/aweme/v1/%s/' % ep, video_id=video_id, + 'https://%s/aweme/v1/%s/' % (self._API_HOSTNAME, ep), video_id=video_id, fatal=fatal, note=note, errnote=errnote, headers={ 'User-Agent': f'com.ss.android.ugc.trill/{self._MANIFEST_APP_VERSION} (Linux; U; Android 10; en_US; Pixel 4; Build/QQ3A.200805.001; Cronet/58.0.2991.0)', 'Accept': 'application/json', }, query=real_query) - def _parse_aweme_video(self, aweme_detail): + def _parse_aweme_video_app(self, aweme_detail): aweme_id = aweme_detail['aweme_id'] video_info = aweme_detail['video'] @@ -146,6 +152,7 @@ class TikTokBaseIE(InfoExtractor): 'tbr': try_get(bitrate, lambda x: x['bit_rate'] / 1000), 'vcodec': 'h265' if traverse_obj( bitrate, 'is_bytevc1', 'is_h265') else 'h264', + 'fps': bitrate.get('FPS'), })) self._remove_duplicate_formats(formats) @@ -165,7 +172,9 @@ class TikTokBaseIE(InfoExtractor): stats_info = aweme_detail.get('statistics', {}) author_info = aweme_detail.get('author', {}) music_info = aweme_detail.get('music', {}) - user_id = str_or_none(author_info.get('nickname')) + user_url = self._UPLOADER_URL_FORMAT % (traverse_obj(author_info, + 'sec_uid', 'id', 'uid', 'unique_id', + expected_type=str_or_none, get_all=False)) contained_music_track = traverse_obj( music_info, ('matched_song', 'title'), ('matched_pgc_sound', 'title'), expected_type=str) @@ -187,9 +196,9 @@ class TikTokBaseIE(InfoExtractor): 'repost_count': int_or_none(stats_info.get('share_count')), 'comment_count': int_or_none(stats_info.get('comment_count')), 'uploader': str_or_none(author_info.get('unique_id')), - 'creator': user_id, + 'creator': str_or_none(author_info.get('nickname')), 'uploader_id': str_or_none(author_info.get('uid')), - 'uploader_url': f'https://www.tiktok.com/@{user_id}' if user_id else None, + 'uploader_url': user_url, 'track': music_track, 'album': str_or_none(music_info.get('album')) or None, 'artist': music_author, @@ -199,6 +208,79 @@ class TikTokBaseIE(InfoExtractor): 'duration': int_or_none(traverse_obj(video_info, 'duration', ('download_addr', 'duration')), scale=1000) } + def _parse_aweme_video_web(self, aweme_detail, webpage, url): + video_info = aweme_detail['video'] + author_info = traverse_obj(aweme_detail, 'author', 'authorInfo', default={}) + music_info = aweme_detail.get('music') or {} + stats_info = aweme_detail.get('stats') or {} + user_url = self._UPLOADER_URL_FORMAT % (traverse_obj(author_info, + 'secUid', 'id', 'uid', 'uniqueId', + expected_type=str_or_none, get_all=False)) + + formats = [] + play_url = video_info.get('playAddr') + width = video_info.get('width') + height = video_info.get('height') + if isinstance(play_url, str): + formats = [{ + 'url': self._proto_relative_url(play_url), + 'ext': 'mp4', + 'width': width, + 'height': height, + }] + elif isinstance(play_url, list): + formats = [{ + 'url': self._proto_relative_url(url), + 'ext': 'mp4', + 'width': width, + 'height': height, + } for url in traverse_obj(play_url, (..., 'src'), expected_type=url_or_none, default=[]) if url] + + download_url = url_or_none(video_info.get('downloadAddr')) or traverse_obj(video_info, ('download', 'url'), expected_type=url_or_none) + if download_url: + formats.append({ + 'format_id': 'download', + 'url': self._proto_relative_url(download_url), + 'ext': 'mp4', + 'width': width, + 'height': height, + }) + self._remove_duplicate_formats(formats) + self._sort_formats(formats) + + thumbnails = [] + for thumbnail_name in ('thumbnail', 'cover', 'dynamicCover', 'originCover'): + if aweme_detail.get(thumbnail_name): + thumbnails = [{ + 'url': self._proto_relative_url(aweme_detail[thumbnail_name]), + 'width': width, + 'height': height + }] + + return { + 'id': traverse_obj(aweme_detail, 'id', 'awemeId', expected_type=str_or_none), + 'title': aweme_detail.get('desc'), + 'duration': try_get(aweme_detail, lambda x: x['video']['duration'], int), + 'view_count': int_or_none(stats_info.get('playCount')), + 'like_count': int_or_none(stats_info.get('diggCount')), + 'repost_count': int_or_none(stats_info.get('shareCount')), + 'comment_count': int_or_none(stats_info.get('commentCount')), + 'timestamp': int_or_none(aweme_detail.get('createTime')), + 'creator': str_or_none(author_info.get('nickname')), + 'uploader': str_or_none(author_info.get('uniqueId')), + 'uploader_id': str_or_none(author_info.get('id')), + 'uploader_url': user_url, + 'track': str_or_none(music_info.get('title')), + 'album': str_or_none(music_info.get('album')) or None, + 'artist': str_or_none(music_info.get('authorName')), + 'formats': formats, + 'thumbnails': thumbnails, + 'description': str_or_none(aweme_detail.get('desc')), + 'http_headers': { + 'Referer': url + } + } + class TikTokIE(TikTokBaseIE): _VALID_URL = r'https?://www\.tiktok\.com/@[\w\.-]+/video/(?P\d+)' @@ -255,60 +337,10 @@ class TikTokIE(TikTokBaseIE): 'only_matching': True, }] - def _extract_aweme(self, props_data, webpage, url): - video_info = try_get( - props_data, lambda x: x['pageProps']['itemInfo']['itemStruct'], dict) - author_info = try_get( - props_data, lambda x: x['pageProps']['itemInfo']['itemStruct']['author'], dict) or {} - music_info = try_get( - props_data, lambda x: x['pageProps']['itemInfo']['itemStruct']['music'], dict) or {} - stats_info = try_get(props_data, lambda x: x['pageProps']['itemInfo']['itemStruct']['stats'], dict) or {} - - user_id = str_or_none(author_info.get('uniqueId')) - download_url = try_get(video_info, (lambda x: x['video']['playAddr'], - lambda x: x['video']['downloadAddr'])) - height = try_get(video_info, lambda x: x['video']['height'], int) - width = try_get(video_info, lambda x: x['video']['width'], int) - thumbnails = [{ - 'url': video_info.get('thumbnail') or self._og_search_thumbnail(webpage), - 'width': width, - 'height': height - }] - tracker = try_get(props_data, lambda x: x['initialProps']['$wid']) - - return { - 'id': str_or_none(video_info.get('id')), - 'url': download_url, - 'ext': 'mp4', - 'height': height, - 'width': width, - 'title': video_info.get('desc') or self._og_search_title(webpage), - 'duration': try_get(video_info, lambda x: x['video']['duration'], int), - 'view_count': int_or_none(stats_info.get('playCount')), - 'like_count': int_or_none(stats_info.get('diggCount')), - 'repost_count': int_or_none(stats_info.get('shareCount')), - 'comment_count': int_or_none(stats_info.get('commentCount')), - 'timestamp': try_get(video_info, lambda x: int(x['createTime']), int), - 'creator': str_or_none(author_info.get('nickname')), - 'uploader': user_id, - 'uploader_id': str_or_none(author_info.get('id')), - 'uploader_url': f'https://www.tiktok.com/@{user_id}', - 'track': str_or_none(music_info.get('title')), - 'album': str_or_none(music_info.get('album')) or None, - 'artist': str_or_none(music_info.get('authorName')), - 'thumbnails': thumbnails, - 'description': str_or_none(video_info.get('desc')), - 'webpage_url': self._og_search_url(webpage), - 'http_headers': { - 'Referer': url, - 'Cookie': 'tt_webid=%s; tt_webid_v2=%s' % (tracker, tracker), - } - } - def _extract_aweme_app(self, aweme_id): aweme_detail = self._call_api('aweme/detail', {'aweme_id': aweme_id}, aweme_id, note='Downloading video details', errnote='Unable to download video details')['aweme_detail'] - return self._parse_aweme_video(aweme_detail) + return self._parse_aweme_video_app(aweme_detail) def _real_extract(self, url): video_id = self._match_id(url) @@ -330,7 +362,7 @@ class TikTokIE(TikTokBaseIE): # Chech statusCode for success status = props_data.get('pageProps').get('statusCode') if status == 0: - return self._extract_aweme(props_data, webpage, url) + return self._parse_aweme_video_web(props_data['pageProps']['itemInfo']['itemStruct'], webpage, url) elif status == 10216: raise ExtractorError('This video is private', expected=True) @@ -413,3 +445,115 @@ class TikTokUserIE(TikTokBaseIE): }) own_id = self._html_search_regex(r'snssdk\d*://user/profile/(\d+)', webpage, 'user ID') return self.playlist_result(self._entries_api(webpage, own_id, user_id), user_id) + + +class DouyinIE(TikTokIE): + _VALID_URL = r'https?://(?:www\.)?douyin\.com/video/(?P[0-9]+)' + _TESTS = [{ + 'url': 'https://www.douyin.com/video/6961737553342991651', + 'md5': '10523312c8b8100f353620ac9dc8f067', + 'info_dict': { + 'id': '6961737553342991651', + 'ext': 'mp4', + 'title': '#杨超越 小小水手带你去远航❤️', + 'uploader': '杨超越', + 'upload_date': '20210513', + 'timestamp': 1620905839, + 'uploader_id': '110403406559', + 'view_count': int, + 'like_count': int, + 'repost_count': int, + 'comment_count': int, + } + }, { + 'url': 'https://www.douyin.com/video/6982497745948921092', + 'md5': 'd78408c984b9b5102904cf6b6bc2d712', + 'info_dict': { + 'id': '6982497745948921092', + 'ext': 'mp4', + 'title': '这个夏日和小羊@杨超越 一起遇见白色幻想', + 'uploader': '杨超越工作室', + 'upload_date': '20210708', + 'timestamp': 1625739481, + 'uploader_id': '408654318141572', + 'view_count': int, + 'like_count': int, + 'repost_count': int, + 'comment_count': int, + } + }, { + 'url': 'https://www.douyin.com/video/6953975910773099811', + 'md5': '72e882e24f75064c218b76c8b713c185', + 'info_dict': { + 'id': '6953975910773099811', + 'ext': 'mp4', + 'title': '#一起看海 出现在你的夏日里', + 'uploader': '杨超越', + 'upload_date': '20210422', + 'timestamp': 1619098692, + 'uploader_id': '110403406559', + 'view_count': int, + 'like_count': int, + 'repost_count': int, + 'comment_count': int, + } + }, { + 'url': 'https://www.douyin.com/video/6950251282489675042', + 'md5': 'b4db86aec367ef810ddd38b1737d2fed', + 'info_dict': { + 'id': '6950251282489675042', + 'ext': 'mp4', + 'title': '哈哈哈,成功了哈哈哈哈哈哈', + 'uploader': '杨超越', + 'upload_date': '20210412', + 'timestamp': 1618231483, + 'uploader_id': '110403406559', + 'view_count': int, + 'like_count': int, + 'repost_count': int, + 'comment_count': int, + } + }, { + 'url': 'https://www.douyin.com/video/6963263655114722595', + 'md5': '1abe1c477d05ee62efb40bf2329957cf', + 'info_dict': { + 'id': '6963263655114722595', + 'ext': 'mp4', + 'title': '#哪个爱豆的105度最甜 换个角度看看我哈哈', + 'uploader': '杨超越', + 'upload_date': '20210517', + 'timestamp': 1621261163, + 'uploader_id': '110403406559', + 'view_count': int, + 'like_count': int, + 'repost_count': int, + 'comment_count': int, + } + }] + _APP_VERSION = '9.6.0' + _MANIFEST_APP_VERSION = '960' + _APP_NAME = 'aweme' + _AID = 1128 + _API_HOSTNAME = 'aweme.snssdk.com' + _UPLOADER_URL_FORMAT = 'https://www.douyin.com/user/%s' + + def _real_extract(self, url): + video_id = self._match_id(url) + + try: + return self._extract_aweme_app(video_id) + except ExtractorError as e: + self.report_warning(f'{e}; Retrying with webpage') + + webpage = self._download_webpage(url, video_id) + render_data_json = self._search_regex( + r'', + webpage, 'render data', default=None) + if not render_data_json: + # TODO: Run verification challenge code to generate signature cookies + raise ExtractorError('Fresh cookies (not necessarily logged in) are needed') + + render_data = self._parse_json( + render_data_json, video_id, transform_source=compat_urllib_parse_unquote) + return self._parse_aweme_video_web( + traverse_obj(render_data, (..., 'aweme', 'detail'), get_all=False), webpage, url) From 755203fc3fd33c257e582377c67790e1d4e0bfb6 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Mon, 4 Oct 2021 19:09:00 +0000 Subject: [PATCH 199/641] [parliamentlive.tv] Fix extractor (#1153) Closes #1139 Authored by: u-spec-png --- yt_dlp/extractor/parliamentliveuk.py | 76 +++++++++++++++++++++------- 1 file changed, 58 insertions(+), 18 deletions(-) diff --git a/yt_dlp/extractor/parliamentliveuk.py b/yt_dlp/extractor/parliamentliveuk.py index bdd5ff5654..869ebd8655 100644 --- a/yt_dlp/extractor/parliamentliveuk.py +++ b/yt_dlp/extractor/parliamentliveuk.py @@ -1,6 +1,14 @@ +# coding: utf-8 from __future__ import unicode_literals +import json +import uuid + from .common import InfoExtractor +from ..utils import ( + unified_timestamp, + try_get, +) class ParliamentLiveUKIE(InfoExtractor): @@ -11,12 +19,14 @@ class ParliamentLiveUKIE(InfoExtractor): _TESTS = [{ 'url': 'http://parliamentlive.tv/Event/Index/c1e9d44d-fd6c-4263-b50f-97ed26cc998b', 'info_dict': { - 'id': '1_af9nv9ym', + 'id': 'c1e9d44d-fd6c-4263-b50f-97ed26cc998b', 'ext': 'mp4', 'title': 'Home Affairs Committee', - 'uploader_id': 'FFMPEG-01', - 'timestamp': 1422696664, - 'upload_date': '20150131', + 'timestamp': 1395153872, + 'upload_date': '20140318', + }, + 'params': { + 'format': 'bestvideo', }, }, { 'url': 'http://parliamentlive.tv/event/index/3f24936f-130f-40bf-9a5d-b3d6479da6a4', @@ -25,19 +35,49 @@ class ParliamentLiveUKIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage( - 'http://vodplayer.parliamentlive.tv/?mid=' + video_id, video_id) - widget_config = self._parse_json(self._search_regex( - r'(?s)kWidgetConfig\s*=\s*({.+});', - webpage, 'kaltura widget config'), video_id) - kaltura_url = 'kaltura:%s:%s' % ( - widget_config['wid'][1:], widget_config['entry_id']) - event_title = self._download_json( - 'http://parliamentlive.tv/Event/GetShareVideo/' + video_id, video_id)['event']['title'] + video_info = self._download_json(f'https://www.parliamentlive.tv/Event/GetShareVideo/{video_id}', video_id) + _DEVICE_ID = str(uuid.uuid4()) + auth = 'Bearer ' + self._download_json( + 'https://exposure.api.redbee.live/v2/customer/UKParliament/businessunit/ParliamentLive/auth/anonymous', + video_id, headers={ + 'Origin': 'https://videoplayback.parliamentlive.tv', + 'Accept': 'application/json, text/plain, */*', + 'Content-Type': 'application/json;charset=utf-8' + }, data=json.dumps({ + 'deviceId': _DEVICE_ID, + 'device': { + 'deviceId': _DEVICE_ID, + 'width': 653, + 'height': 368, + 'type': 'WEB', + 'name': ' Mozilla Firefox 91' + } + }).encode('utf-8'))['sessionToken'] + + video_urls = self._download_json( + f'https://exposure.api.redbee.live/v2/customer/UKParliament/businessunit/ParliamentLive/entitlement/{video_id}/play', + video_id, headers={'Authorization': auth, 'Accept': 'application/json, text/plain, */*'})['formats'] + + formats = [] + for format in video_urls: + if not format.get('mediaLocator'): + continue + if format.get('format') == 'DASH': + formats.extend(self._extract_mpd_formats( + format['mediaLocator'], video_id, mpd_id='dash', fatal=False)) + elif format.get('format') == 'SMOOTHSTREAMING': + formats.extend(self._extract_ism_formats( + format['mediaLocator'], video_id, ism_id='ism', fatal=False)) + elif format.get('format') == 'HLS': + formats.extend(self._extract_m3u8_formats( + format['mediaLocator'], video_id, m3u8_id='hls', fatal=False)) + + self._sort_formats(formats) + return { - '_type': 'url_transparent', - 'title': event_title, - 'description': '', - 'url': kaltura_url, - 'ie_key': 'Kaltura', + 'id': video_id, + 'formats': formats, + 'title': video_info['event']['title'], + 'timestamp': unified_timestamp(try_get(video_info, lambda x: x['event']['publishedStartTime'])), + 'thumbnail': video_info.get('thumbnailUrl'), } From 0f0ac87be3fc55cab8fec767c446431a8ce085f3 Mon Sep 17 00:00:00 2001 From: makeworld <25111343+makeworld-the-better-one@users.noreply.github.com> Date: Mon, 4 Oct 2021 15:11:00 -0400 Subject: [PATCH 200/641] [CBC] Cleanup tests (#1162) Related: #1013 Authored by: makeworld-the-better-one --- yt_dlp/extractor/cbc.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py index 061b09908d..5e4526c535 100644 --- a/yt_dlp/extractor/cbc.py +++ b/yt_dlp/extractor/cbc.py @@ -202,7 +202,6 @@ class CBCGemIE(InfoExtractor): IE_NAME = 'gem.cbc.ca' _VALID_URL = r'https?://gem\.cbc\.ca/media/(?P[0-9a-z-]+/s[0-9]+[a-z][0-9]+)' _TESTS = [{ - # geo-restricted to Canada, bypassable # This is a normal, public, TV show video 'url': 'https://gem.cbc.ca/media/schitts-creek/s06e01', 'md5': '93dbb31c74a8e45b378cf13bd3f6f11e', @@ -224,7 +223,6 @@ class CBCGemIE(InfoExtractor): 'params': {'format': 'bv'}, 'skip': 'Geo-restricted to Canada', }, { - # geo-restricted to Canada, bypassable # This video requires an account in the browser, but works fine in yt-dlp 'url': 'https://gem.cbc.ca/media/schitts-creek/s01e01', 'md5': '297a9600f554f2258aed01514226a697', @@ -313,7 +311,6 @@ class CBCGemPlaylistIE(InfoExtractor): IE_NAME = 'gem.cbc.ca:playlist' _VALID_URL = r'https?://gem\.cbc\.ca/media/(?P(?P[0-9a-z-]+)/s(?P[0-9]+))/?(?:[?#]|$)' _TESTS = [{ - # geo-restricted to Canada, bypassable # TV show playlist, all public videos 'url': 'https://gem.cbc.ca/media/schitts-creek/s06', 'playlist_count': 16, @@ -322,7 +319,6 @@ class CBCGemPlaylistIE(InfoExtractor): 'title': 'Season 6', 'description': 'md5:6a92104a56cbeb5818cc47884d4326a2', }, - 'skip': 'Geo-restricted to Canada', }] _API_BASE = 'https://services.radio-canada.ca/ott/cbc-api/v2/shows/' From d92125aeba4eefe8ef2c4f9ead8af99dd33ff0d4 Mon Sep 17 00:00:00 2001 From: i6t <62123048+i6t@users.noreply.github.com> Date: Tue, 5 Oct 2021 04:23:37 +0900 Subject: [PATCH 201/641] [GoPro] Add extractor (#1167) Fixes: https://github.com/ytdl-org/youtube-dl/issues/30044 Authored by: i6t --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/gopro.py | 110 +++++++++++++++++++++++++++++++++ 2 files changed, 111 insertions(+) create mode 100644 yt_dlp/extractor/gopro.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 71e4cd4cf8..8c5b8b1607 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -524,6 +524,7 @@ from .googlepodcasts import ( GooglePodcastsFeedIE, ) from .googlesearch import GoogleSearchIE +from .gopro import GoProIE from .goshgay import GoshgayIE from .gotostage import GoToStageIE from .gputechconf import GPUTechConfIE diff --git a/yt_dlp/extractor/gopro.py b/yt_dlp/extractor/gopro.py new file mode 100644 index 0000000000..10cc1aec1d --- /dev/null +++ b/yt_dlp/extractor/gopro.py @@ -0,0 +1,110 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + remove_end, + str_or_none, + try_get, + unified_timestamp, + url_or_none, +) + + +class GoProIE(InfoExtractor): + _VALID_URL = r'https?://(www\.)?gopro\.com/v/(?P[A-Za-z0-9]+)' + + _TESTS = [{ + 'url': 'https://gopro.com/v/ZNVvED8QDzR5V', + 'info_dict': { + 'id': 'ZNVvED8QDzR5V', + 'title': 'My GoPro Adventure - 9/19/21', + 'thumbnail': r're:https?://.+', + 'ext': 'mp4', + 'timestamp': 1632072947, + 'upload_date': '20210919', + 'uploader_id': 'fireydive30018', + 'duration': 396062, + } + }, { + 'url': 'https://gopro.com/v/KRm6Vgp2peg4e', + 'info_dict': { + 'id': 'KRm6Vgp2peg4e', + 'title': 'じゃがいも カリカリ オーブン焼き', + 'thumbnail': r're:https?://.+', + 'ext': 'mp4', + 'timestamp': 1607231125, + 'upload_date': '20201206', + 'uploader_id': 'dc9bcb8b-47d2-47c6-afbc-4c48f9a3769e', + 'duration': 45187, + 'track': 'The Sky Machine', + } + }, { + 'url': 'https://gopro.com/v/kVrK9wlJvBMwn', + 'info_dict': { + 'id': 'kVrK9wlJvBMwn', + 'title': 'DARKNESS', + 'thumbnail': r're:https?://.+', + 'ext': 'mp4', + 'timestamp': 1594183735, + 'upload_date': '20200708', + 'uploader_id': '闇夜乃皇帝', + 'duration': 313075, + 'track': 'Battery (Live)', + 'artist': 'Metallica', + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + metadata = self._parse_json( + self._html_search_regex(r'window\.__reflectData\s*=\s*([^;]+)', webpage, 'metadata'), video_id) + + video_info = metadata['collectionMedia'][0] + media_data = self._download_json( + 'https://api.gopro.com/media/%s/download' % video_info['id'], video_id) + + formats = [] + for fmt in try_get(media_data, lambda x: x['_embedded']['variations']) or []: + format_url = url_or_none(fmt.get('url')) + if not format_url: + continue + formats.append({ + 'url': format_url, + 'format_id': str_or_none(fmt.get('quality')), + 'format_note': str_or_none(fmt.get('label')), + 'ext': str_or_none(fmt.get('type')), + 'width': int_or_none(fmt.get('width')), + 'height': int_or_none(fmt.get('height')), + }) + + self._sort_formats(formats) + + title = str_or_none( + try_get(metadata, lambda x: x['collection']['title']) + or self._html_search_meta(['og:title', 'twitter:title'], webpage) + or remove_end(self._html_search_regex( + r']*>([^<]+)', webpage, 'title', fatal=False), ' | GoPro')) + if title: + title = title.replace('\n', ' ') + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnail': url_or_none( + self._html_search_meta(['og:image', 'twitter:image'], webpage)), + 'timestamp': unified_timestamp( + try_get(metadata, lambda x: x['collection']['created_at'])), + 'uploader_id': str_or_none( + try_get(metadata, lambda x: x['account']['nickname'])), + 'duration': int_or_none( + video_info.get('source_duration')), + 'artist': str_or_none( + video_info.get('music_track_artist')), + 'track': str_or_none( + video_info.get('music_track_name')), + } From 762e509d91be50546f62fc5c717280839b83c1e2 Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Tue, 5 Oct 2021 08:30:57 +1300 Subject: [PATCH 202/641] [Mediaite] Relax valid url (#1158) Closes #1131 Authored by: coletdjnz --- yt_dlp/extractor/mediaite.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/mediaite.py b/yt_dlp/extractor/mediaite.py index 646c922231..b670f0d615 100644 --- a/yt_dlp/extractor/mediaite.py +++ b/yt_dlp/extractor/mediaite.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class MediaiteIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?mediaite.com/(?:tv|sports|politics|podcasts|opinion)/[\w-]+/' + _VALID_URL = r'https?://(?:www\.)?mediaite.com(?!/category)(?:/[\w-]+){2}' _TESTS = [{ 'url': 'https://www.mediaite.com/sports/bill-burr-roasts-nfl-for-promoting-black-lives-matter-while-scheduling-more-games-after-all-the-sht-they-know-about-cte/', 'info_dict': { @@ -71,6 +71,19 @@ class MediaiteIE(InfoExtractor): 'upload_date': '20210913', }, 'params': {'skip_download': True} + }, { + 'url': 'https://www.mediaite.com/news/watch-cnbcs-jim-cramer-says-nobody-wants-to-die-getting-infected-by-unvaccinated-coworker-even-for-22-an-hour/', + 'info_dict': { + 'id': 'nwpt1elX', + 'ext': 'mp4', + 'title': "CNBC's Jim Cramer Says Nobody Wants to Die Getting Infected by Unvaccinated Coworker 'Even for $22 an Hour'.mp4", + 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', + 'thumbnail': 'https://cdn.jwplayer.com/v2/media/nwpt1elX/poster.jpg?width=720', + 'duration': 60, + 'timestamp': 1633014214, + 'upload_date': '20210930', + }, + 'params': {'skip_download': True} }] def _real_extract(self, url): From f85e6be42ec5e65c07a3f99927ca9dfe81d683f0 Mon Sep 17 00:00:00 2001 From: shirt <2660574+shirt-dev@users.noreply.github.com> Date: Tue, 5 Oct 2021 13:37:58 -0400 Subject: [PATCH 203/641] [build] Use pycryptodomex for PyInstaller (#1179) --- .github/workflows/build.yml | 4 ++-- .github/workflows/quick-test.yml | 2 +- README.md | 6 +++--- pyinst.py | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4f983f2c10..324cf7eb65 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -154,7 +154,7 @@ jobs: run: python -m pip install --upgrade pip setuptools wheel - name: Install Requirements # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds - run: pip install "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-4.5.1-py3-none-any.whl" mutagen pycryptodome websockets + run: pip install "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-4.5.1-py3-none-any.whl" mutagen pycryptodomex websockets - name: Bump version id: bump_version run: python devscripts/update-version.py @@ -220,7 +220,7 @@ jobs: - name: Upgrade pip and enable wheel support run: python -m pip install --upgrade pip setuptools wheel - name: Install Requirements - run: pip install "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-4.5.1-py3-none-any.whl" mutagen pycryptodome websockets + run: pip install "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-4.5.1-py3-none-any.whl" mutagen pycryptodomex websockets - name: Bump version id: bump_version run: python devscripts/update-version.py diff --git a/.github/workflows/quick-test.yml b/.github/workflows/quick-test.yml index 500a504a4a..bbad209b39 100644 --- a/.github/workflows/quick-test.yml +++ b/.github/workflows/quick-test.yml @@ -12,7 +12,7 @@ jobs: with: python-version: 3.9 - name: Install test requirements - run: pip install pytest pycryptodome + run: pip install pytest pycryptodomex - name: Run tests run: ./devscripts/run_tests.sh core flake8: diff --git a/README.md b/README.md index d219b28d3b..cf46360a90 100644 --- a/README.md +++ b/README.md @@ -207,7 +207,7 @@ While all the other dependancies are optional, `ffmpeg` and `ffprobe` are highly To use or redistribute the dependencies, you must agree to their respective licensing terms. -The windows releases are already built with the python interpreter, mutagen, pycryptodome and websockets included. +The windows releases are already built with the python interpreter, mutagen, pycryptodomex and websockets included. **Note**: There are some regressions in newer ffmpeg versions that causes various issues when used alongside yt-dlp. Since ffmpeg is such an important dependancy, we provide [custom builds](https://github.com/yt-dlp/FFmpeg-Builds/wiki/Latest#latest-autobuilds) with patches for these issues at [yt-dlp/FFmpeg-Builds](https://github.com/yt-dlp/FFmpeg-Builds). See [the readme](https://github.com/yt-dlp/FFmpeg-Builds#patches-applied) for details on the specifc issues solved by these builds @@ -215,9 +215,9 @@ The windows releases are already built with the python interpreter, mutagen, pyc ### COMPILE **For Windows**: -To build the Windows executable, you must have pyinstaller (and optionally mutagen, pycryptodome, websockets) +To build the Windows executable, you must have pyinstaller (and optionally mutagen, pycryptodomex, websockets) - python3 -m pip install --upgrade pyinstaller mutagen pycryptodome websockets + python3 -m pip install --upgrade pyinstaller mutagen pycryptodomex websockets Once you have all the necessary dependencies installed, just run `py pyinst.py`. The executable will be built for the same architecture (32/64 bit) as the python used to build it. diff --git a/pyinst.py b/pyinst.py index 7e040647c2..be1e00caae 100644 --- a/pyinst.py +++ b/pyinst.py @@ -76,7 +76,7 @@ VERSION_FILE = VSVersionInfo( ] ) -dependancies = ['Crypto', 'mutagen'] + collect_submodules('websockets') +dependancies = ['Cryptodome', 'mutagen'] + collect_submodules('websockets') excluded_modules = ['test', 'ytdlp_plugins', 'youtube-dl', 'youtube-dlc'] PyInstaller.__main__.run([ From 4e3d1898a802b3729a56fabecbcd5a641a6ab19c Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 5 Oct 2021 08:32:05 +0530 Subject: [PATCH 204/641] Workaround ssl errors in mingw python Closes #1151 --- yt_dlp/utils.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index b79b796889..8b5b15103b 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -2373,13 +2373,20 @@ def make_HTTPS_handler(params, **kwargs): context.check_hostname = opts_check_certificate context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE if opts_check_certificate: - # Work around the issue in load_default_certs when there are bad certificates. See: - # https://github.com/yt-dlp/yt-dlp/issues/1060, - # https://bugs.python.org/issue35665, https://bugs.python.org/issue4531 - if sys.platform == 'win32': - for storename in ('CA', 'ROOT'): - _ssl_load_windows_store_certs(context, storename) - context.set_default_verify_paths() + try: + context.load_default_certs() + # Work around the issue in load_default_certs when there are bad certificates. See: + # https://github.com/yt-dlp/yt-dlp/issues/1060, + # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312 + except ssl.SSLError: + # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151 + if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'): + # Create a new context to discard any certificates that were already loaded + context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) + context.check_hostname, context.verify_mode = True, ssl.CERT_REQUIRED + for storename in ('CA', 'ROOT'): + _ssl_load_windows_store_certs(context, storename) + context.set_default_verify_paths() return YoutubeDLHTTPSHandler(params, context=context, **kwargs) From 644149afec99b2db4c1cc1286eb5c753ac187c44 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 5 Oct 2021 08:33:36 +0530 Subject: [PATCH 205/641] [soundcloud:playlist] Detect last page correctly Closes #1168 --- yt_dlp/extractor/soundcloud.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index 1503ae586a..ad3a32a024 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -707,6 +707,8 @@ class SoundcloudPagedPlaylistBaseIE(SoundcloudIE): yield resolve_entry(e, e.get('track'), e.get('playlist')) url = response.get('next_href') + if not url: + break query.pop('offset', None) From 1b6bb4a85a74028111597e1a683914bb33615ef8 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 5 Oct 2021 08:34:05 +0530 Subject: [PATCH 206/641] [reddit] bugfix for 8e3fd7e034cdd54972d13394821cd9e55e1c3735 --- yt_dlp/extractor/reddit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/reddit.py b/yt_dlp/extractor/reddit.py index e5a1f69205..c75d95a8e8 100644 --- a/yt_dlp/extractor/reddit.py +++ b/yt_dlp/extractor/reddit.py @@ -109,7 +109,7 @@ class RedditRIE(InfoExtractor): self._set_cookie('.reddit.com', 'reddit_session', self._gen_session_id()) self._set_cookie('.reddit.com', '_options', '%7B%22pref_quarantine_optin%22%3A%20true%7D') - data = self._download_json(f'https://{subdomain}.reddit.com/r/{slug}/.json', video_id, fatal=False) + data = self._download_json(f'https://{subdomain}reddit.com/r/{slug}/.json', video_id, fatal=False) if not data: # Fall back to old.reddit.com in case the requested subdomain fails data = self._download_json(f'https://old.reddit.com/r/{slug}/.json', video_id) From 519804a92fbc065e35b752ca160dcef3f3656ef7 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 5 Oct 2021 09:45:46 +0530 Subject: [PATCH 207/641] bugfix for 80c03fa98fdd54410bd36684ef453f6976a9c0bf --- yt_dlp/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index f009e9e195..3abb43000c 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -3434,7 +3434,7 @@ class YoutubeDL(object): except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err: self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}') continue - return ret + return ret def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None): ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) ''' From 1276a43a77144567fc575d6aaec5b5f8468b7d56 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 4 Oct 2021 02:44:55 +0530 Subject: [PATCH 208/641] [youtube] Fix non-fatal errors in fetching player --- yt_dlp/extractor/youtube.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 159b0a3b9d..56de2ef591 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -1911,10 +1911,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _load_player(self, video_id, player_url, fatal=True) -> bool: player_id = self._extract_player_info(player_url) if player_id not in self._code_cache: - self._code_cache[player_id] = self._download_webpage( + code = self._download_webpage( player_url, video_id, fatal=fatal, note='Downloading player ' + player_id, errnote='Download of %s failed' % player_url) + if code: + self._code_cache[player_id] = code return player_id in self._code_cache def _extract_signature_function(self, video_id, player_url, example_sig): From 84726743993295f6105ed9ef5412040b8842e4c6 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 6 Oct 2021 05:43:22 +0530 Subject: [PATCH 209/641] [FixupM3u8] Do not run if merge is needed We pass the relevant arguments to the merger, so separate fixup in redundant --- yt_dlp/YoutubeDL.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 3abb43000c..770f627342 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2820,7 +2820,8 @@ class YoutubeDL(object): downloader = (get_suitable_downloader(info_dict, self.params).__name__ if 'protocol' in info_dict else None) - ffmpeg_fixup(downloader == 'HlsFD', 'malformed AAC bitstream detected', FFmpegFixupM3u8PP) + ffmpeg_fixup(info_dict.get('requested_formats') is None and downloader == 'HlsFD', + 'malformed AAC bitstream detected', FFmpegFixupM3u8PP) ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP) ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP) From 49e7e9c3ce9d5294f024757cbbfedd6c9d0623be Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 6 Oct 2021 06:34:10 +0530 Subject: [PATCH 210/641] [docs,build] Change all pycryptodome references to pycryptodomex --- README.md | 4 ++-- pyinst.py | 22 ++++++++++++++++------ requirements.txt | 2 +- setup.py | 2 +- yt_dlp/downloader/hls.py | 4 ++-- yt_dlp/extractor/ivi.py | 2 +- 6 files changed, 23 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index cf46360a90..3c73d3aac1 100644 --- a/README.md +++ b/README.md @@ -195,7 +195,7 @@ On windows, [Microsoft Visual C++ 2010 SP1 Redistributable Package (x86)](https: While all the other dependancies are optional, `ffmpeg` and `ffprobe` are highly recommended * [**ffmpeg** and **ffprobe**](https://www.ffmpeg.org) - Required for [merging seperate video and audio files](#format-selection) as well as for various [post-processing](#post-processing-options) tasks. Licence [depends on the build](https://www.ffmpeg.org/legal.html) * [**mutagen**](https://github.com/quodlibet/mutagen) - For embedding thumbnail in certain formats. Licenced under [GPLv2+](https://github.com/quodlibet/mutagen/blob/master/COPYING) -* [**pycryptodome**](https://github.com/Legrandin/pycryptodome) - For decrypting AES-128 HLS streams and various other data. Licenced under [BSD2](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst) +* [**pycryptodomex**](https://github.com/Legrandin/pycryptodomex) - For decrypting AES-128 HLS streams and various other data. Licenced under [BSD2](https://github.com/Legrandin/pycryptodomex/blob/master/LICENSE.rst) * [**websockets**](https://github.com/aaugustin/websockets) - For downloading over websocket. Licenced under [BSD3](https://github.com/aaugustin/websockets/blob/main/LICENSE) * [**keyring**](https://github.com/jaraco/keyring) - For decrypting cookies of chromium-based browsers on Linux. Licenced under [MIT](https://github.com/jaraco/keyring/blob/main/LICENSE) * [**AtomicParsley**](https://github.com/wez/atomicparsley) - For embedding thumbnail in mp4/m4a if mutagen is not present. Licenced under [GPLv2+](https://github.com/wez/atomicparsley/blob/master/COPYING) @@ -217,7 +217,7 @@ The windows releases are already built with the python interpreter, mutagen, pyc **For Windows**: To build the Windows executable, you must have pyinstaller (and optionally mutagen, pycryptodomex, websockets) - python3 -m pip install --upgrade pyinstaller mutagen pycryptodomex websockets + python3 -m pip install -U -r requirements.txt Once you have all the necessary dependencies installed, just run `py pyinst.py`. The executable will be built for the same architecture (32/64 bit) as the python used to build it. diff --git a/pyinst.py b/pyinst.py index be1e00caae..ed410e0f2e 100644 --- a/pyinst.py +++ b/pyinst.py @@ -3,7 +3,6 @@ from __future__ import unicode_literals import sys -# import os import platform from PyInstaller.utils.hooks import collect_submodules @@ -29,10 +28,6 @@ print(f'Building {arch}bit version with options {opts}') FILE_DESCRIPTION = 'yt-dlp%s' % (' (32 Bit)' if _x86 else '') -# root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) -# print('Changing working directory to %s' % root_dir) -# os.chdir(root_dir) - exec(compile(open('yt_dlp/version.py').read(), 'yt_dlp/version.py', 'exec')) VERSION = locals()['__version__'] @@ -76,7 +71,22 @@ VERSION_FILE = VSVersionInfo( ] ) -dependancies = ['Cryptodome', 'mutagen'] + collect_submodules('websockets') + +def pycryptodome_module(): + try: + import Cryptodome # noqa: F401 + except ImportError: + try: + import Crypto # noqa: F401 + print('WARNING: Using Crypto since Cryptodome is not available. ' + 'Install with: pip install pycryptodomex', file=sys.stderr) + return 'Crypto' + except ImportError: + pass + return 'Cryptodome' + + +dependancies = [pycryptodome_module(), 'mutagen'] + collect_submodules('websockets') excluded_modules = ['test', 'ytdlp_plugins', 'youtube-dl', 'youtube-dlc'] PyInstaller.__main__.run([ diff --git a/requirements.txt b/requirements.txt index 6a982fa369..cecd08eae8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ mutagen -pycryptodome +pycryptodomex websockets diff --git a/setup.py b/setup.py index b5eb81c301..ff23877dcc 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ LONG_DESCRIPTION = '\n\n'.join(( '**PS**: Some links in this document will not work since this is a copy of the README.md from Github', open('README.md', 'r', encoding='utf-8').read())) -REQUIREMENTS = ['mutagen', 'pycryptodome', 'websockets'] +REQUIREMENTS = ['mutagen', 'pycryptodomex', 'websockets'] if sys.argv[1:2] == ['py2exe']: diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index 751d874d42..3c5a2617d0 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -72,9 +72,9 @@ class HlsFD(FragmentFD): can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None if can_download and not compat_pycrypto_AES and '#EXT-X-KEY:METHOD=AES-128' in s: if FFmpegFD.available(): - can_download, message = False, 'The stream has AES-128 encryption and pycryptodome is not available' + can_download, message = False, 'The stream has AES-128 encryption and pycryptodomex is not available' else: - message = ('The stream has AES-128 encryption and neither ffmpeg nor pycryptodome are available; ' + message = ('The stream has AES-128 encryption and neither ffmpeg nor pycryptodomex are available; ' 'Decryption will be performed natively, but will be extremely slow') if not can_download: message = message or 'Unsupported features have been detected' diff --git a/yt_dlp/extractor/ivi.py b/yt_dlp/extractor/ivi.py index 098ab66659..5f8a046e08 100644 --- a/yt_dlp/extractor/ivi.py +++ b/yt_dlp/extractor/ivi.py @@ -141,7 +141,7 @@ class IviIE(InfoExtractor): elif site == 353: continue elif not pycryptodome_found: - raise ExtractorError('pycryptodome not found. Please install', expected=True) + raise ExtractorError('pycryptodomex not found. Please install', expected=True) elif message: extractor_msg += ': ' + message raise ExtractorError(extractor_msg % video_id, expected=True) From 705e7c2005dfe67a905e18736c9f6345ee9d386b Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 6 Oct 2021 10:53:22 +0530 Subject: [PATCH 211/641] [Hidive] Fix duplicate and incorrect formats --- yt_dlp/extractor/hidive.py | 85 +++++++++++++++----------------------- 1 file changed, 34 insertions(+), 51 deletions(-) diff --git a/yt_dlp/extractor/hidive.py b/yt_dlp/extractor/hidive.py index 90457b77ea..909d1fbc10 100644 --- a/yt_dlp/extractor/hidive.py +++ b/yt_dlp/extractor/hidive.py @@ -1,8 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import ( ExtractorError, @@ -14,7 +12,7 @@ from ..utils import ( class HiDiveIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?hidive\.com/stream/(?P[^/]+)/(?P<key>[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?hidive\.com/stream/(?P<id>(?P<title>[^/]+)/(?P<key>[^/?#&]+))' # Using X-Forwarded-For results in 403 HTTP error for HLS fragments, # so disabling geo bypass completely _GEO_BYPASS = False @@ -55,68 +53,53 @@ class HiDiveIE(InfoExtractor): self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(data)) def _real_extract(self, url): - mobj = self._match_valid_url(url) - title, key = mobj.group('title', 'key') - video_id = '%s/%s' % (title, key) - webpage = self._download_webpage(url, video_id, fatal=False) - data_videos = re.findall(r'data-video=\"([^\"]+)\"\s?data-captions=\"([^\"]+)\"', webpage) - formats = [] - subtitles = {} - for data_video in data_videos: - _, _, _, version, audio, _, extra = data_video[0].split('_') - caption = data_video[1] + video_id, title, key = self._match_valid_url(url).group('id', 'title', 'key') + settings = self._download_json( + 'https://www.hidive.com/play/settings', video_id, + data=urlencode_postdata({ + 'Title': title, + 'Key': key, + 'PlayerId': 'f4f895ce1ca713ba263b91caeb1daa2d08904783', + })) - settings = self._download_json( - 'https://www.hidive.com/play/settings', video_id, - data=urlencode_postdata({ - 'Title': title, - 'Key': key, - 'PlayerId': 'f4f895ce1ca713ba263b91caeb1daa2d08904783', - 'Version': version, - 'Audio': audio, - 'Captions': caption, - 'Extra': extra, - })) + restriction = settings.get('restrictionReason') + if restriction == 'RegionRestricted': + self.raise_geo_restricted() + if restriction and restriction != 'None': + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, restriction), expected=True) - restriction = settings.get('restrictionReason') - if restriction == 'RegionRestricted': - self.raise_geo_restricted() - - if restriction and restriction != 'None': - raise ExtractorError( - '%s said: %s' % (self.IE_NAME, restriction), expected=True) - - for rendition_id, rendition in settings['renditions'].items(): - m3u8_url = url_or_none(try_get(rendition, lambda x: x['bitrates']['hls'])) - if not m3u8_url: - continue + formats, subtitles, urls = [], {}, {None} + for rendition_id, rendition in settings['renditions'].items(): + audio, version, extra = rendition_id.split('_') + m3u8_url = url_or_none(try_get(rendition, lambda x: x['bitrates']['hls'])) + if m3u8_url not in urls: + urls.add(m3u8_url) frmt = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='%s-%s-%s-%s' % (version, audio, extra, caption), fatal=False) + m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id=rendition_id, fatal=False) for f in frmt: f['language'] = audio + f['format_note'] = f'{version}, {extra}' formats.extend(frmt) - for cc_file in rendition.get('ccFiles', []): - cc_url = url_or_none(try_get(cc_file, lambda x: x[2])) - # name is used since we cant distinguish subs with same language code - cc_lang = try_get(cc_file, (lambda x: x[1].replace(' ', '-').lower(), lambda x: x[0]), str) - if cc_url and cc_lang: - subtitles.setdefault(cc_lang, []).append({'url': cc_url}) + for cc_file in rendition.get('ccFiles', []): + cc_url = url_or_none(try_get(cc_file, lambda x: x[2])) + # name is used since we cant distinguish subs with same language code + cc_lang = try_get(cc_file, (lambda x: x[1].replace(' ', '-').lower(), lambda x: x[0]), str) + if cc_url not in urls and cc_lang: + urls.add(cc_url) + subtitles.setdefault(cc_lang, []).append({'url': cc_url}) self._sort_formats(formats) - season_number = int_or_none(self._search_regex( - r's(\d+)', key, 'season number', default=None)) - episode_number = int_or_none(self._search_regex( - r'e(\d+)', key, 'episode number', default=None)) - return { 'id': video_id, 'title': video_id, 'subtitles': subtitles, 'formats': formats, 'series': title, - 'season_number': season_number, - 'episode_number': episode_number, + 'season_number': int_or_none( + self._search_regex(r's(\d+)', key, 'season number', default=None)), + 'episode_number': int_or_none( + self._search_regex(r'e(\d+)', key, 'episode number', default=None)), 'http_headers': {'Referer': url} } From fee3f44f5f58274c637499f077aa0312e650f493 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Thu, 7 Oct 2021 14:32:42 +0000 Subject: [PATCH 212/641] [Streamable] Add codecs (#1189) Authored by: u-spec-png --- yt_dlp/extractor/streamable.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/streamable.py b/yt_dlp/extractor/streamable.py index 34725274e4..808129649c 100644 --- a/yt_dlp/extractor/streamable.py +++ b/yt_dlp/extractor/streamable.py @@ -8,6 +8,8 @@ from ..utils import ( ExtractorError, float_or_none, int_or_none, + try_get, + parse_codecs, ) @@ -29,7 +31,7 @@ class StreamableIE(InfoExtractor): 'view_count': int, } }, - # older video without bitrate, width/height, etc. info + # older video without bitrate, width/height, codecs, etc. info { 'url': 'https://streamable.com/moo', 'md5': '2cf6923639b87fba3279ad0df3a64e73', @@ -95,7 +97,9 @@ class StreamableIE(InfoExtractor): 'height': int_or_none(info.get('height')), 'filesize': int_or_none(info.get('size')), 'fps': int_or_none(info.get('framerate')), - 'vbr': float_or_none(info.get('bitrate'), 1000) + 'vbr': float_or_none(info.get('bitrate'), 1000), + 'vcodec': parse_codecs(try_get(info, lambda x: x['input_metadata']['video_codec_name'])).get('vcodec'), + 'acodec': parse_codecs(try_get(info, lambda x: x['input_metadata']['audio_codec_name'])).get('acodec'), }) self._sort_formats(formats) From 819e05319baff2d896df026f1ef905e1f21be942 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Sat, 9 Oct 2021 00:41:59 +0530 Subject: [PATCH 213/641] Improved progress reporting (See desc) (#1125) * Separate `--console-title` and `--no-progress` * Add option `--progress` to show progress-bar even in quiet mode * Fix and refactor `minicurses` * Use `minicurses` for all progress reporting * Standardize use of terminal sequences and enable color support for windows 10 * Add option `--progress-template` to customize progress-bar and console-title * Add postprocessor hooks and progress reporting Closes: #906, #901, #1085, #1170 --- README.md | 11 ++ test/test_YoutubeDL.py | 3 +- yt_dlp/YoutubeDL.py | 77 ++++++---- yt_dlp/__init__.py | 8 +- yt_dlp/compat.py | 7 + yt_dlp/downloader/common.py | 84 +++++----- yt_dlp/downloader/fragment.py | 4 +- yt_dlp/extractor/common.py | 5 +- yt_dlp/minicurses.py | 196 ++++++++++-------------- yt_dlp/options.py | 20 ++- yt_dlp/postprocessor/common.py | 63 +++++++- yt_dlp/postprocessor/metadataparser.py | 3 +- yt_dlp/postprocessor/modify_chapters.py | 3 +- yt_dlp/utils.py | 23 +++ 14 files changed, 301 insertions(+), 206 deletions(-) diff --git a/README.md b/README.md index 3c73d3aac1..1723865535 100644 --- a/README.md +++ b/README.md @@ -604,7 +604,18 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t (Alias: --force-download-archive) --newline Output progress bar as new lines --no-progress Do not print progress bar + --progress Show progress bar, even if in quiet mode --console-title Display progress in console titlebar + --progress-template [TYPES:]TEMPLATE + Template for progress outputs, optionally + prefixed with one of "download:" (default), + "download-title:" (the console title), + "postprocess:", or "postprocess-title:". + The video's fields are accessible under the + "info" key and the progress attributes are + accessible under "progress" key. Eg: + --console-title --progress-template + "download-title:%(info.id)s-%(progress.eta)s" -v, --verbose Print various debugging information --dump-pages Print downloaded pages encoded using base64 to debug problems (very verbose) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 450f254933..06963f7a8e 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -666,8 +666,7 @@ class TestYoutubeDL(unittest.TestCase): ydl._num_downloads = 1 self.assertEqual(ydl.validate_outtmpl(tmpl), None) - outtmpl, tmpl_dict = ydl.prepare_outtmpl(tmpl, info or self.outtmpl_info) - out = ydl.escape_outtmpl(outtmpl) % tmpl_dict + out = ydl.evaluate_outtmpl(tmpl, info or self.outtmpl_info) fname = ydl.prepare_filename(info or self.outtmpl_info) if not isinstance(expected, (list, tuple)): diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 770f627342..1d865161af 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -42,6 +42,7 @@ from .compat import ( compat_urllib_error, compat_urllib_request, compat_urllib_request_DataHandler, + windows_enable_vt_mode, ) from .cookies import load_cookies from .utils import ( @@ -67,8 +68,6 @@ from .utils import ( float_or_none, format_bytes, format_field, - STR_FORMAT_RE_TMPL, - STR_FORMAT_TYPES, formatSeconds, GeoRestrictedError, HEADRequest, @@ -101,9 +100,13 @@ from .utils import ( sanitize_url, sanitized_Request, std_headers, + STR_FORMAT_RE_TMPL, + STR_FORMAT_TYPES, str_or_none, strftime_or_none, subtitles_filename, + supports_terminal_sequences, + TERMINAL_SEQUENCES, ThrottledDownload, to_high_limit_path, traverse_obj, @@ -248,6 +251,7 @@ class YoutubeDL(object): rejecttitle: Reject downloads for matching titles. logger: Log messages to a logging.Logger instance. logtostderr: Log messages to stderr instead of stdout. + consoletitle: Display progress in console window's titlebar. writedescription: Write the video description to a .description file writeinfojson: Write the video description to a .info.json file clean_infojson: Remove private fields from the infojson @@ -353,6 +357,15 @@ class YoutubeDL(object): Progress hooks are guaranteed to be called at least once (with status "finished") if the download is successful. + postprocessor_hooks: A list of functions that get called on postprocessing + progress, with a dictionary with the entries + * status: One of "started", "processing", or "finished". + Check this first and ignore unknown values. + * postprocessor: Name of the postprocessor + * info_dict: The extracted info_dict + + Progress hooks are guaranteed to be called at least twice + (with status "started" and "finished") if the processing is successful. merge_output_format: Extension to use when merging formats. final_ext: Expected final extension; used to detect when the file was already downloaded and converted. "merge_output_format" is @@ -412,11 +425,15 @@ class YoutubeDL(object): filename, abort-on-error, multistreams, no-live-chat, no-clean-infojson, no-playlist-metafiles, no-keep-subs. Refer __init__.py for their implementation + progress_template: Dictionary of templates for progress outputs. + Allowed keys are 'download', 'postprocess', + 'download-title' (console title) and 'postprocess-title'. + The template is mapped on a dictionary with keys 'progress' and 'info' The following parameters are not used by YoutubeDL itself, they are used by the downloader (see yt_dlp/downloader/common.py): nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize, - max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle, + max_filesize, test, noresizebuffer, retries, continuedl, noprogress, xattr_set_filesize, external_downloader_args, hls_use_mpegts, http_chunk_size. The following options are used by the post processors: @@ -484,26 +501,27 @@ class YoutubeDL(object): self._first_webpage_request = True self._post_hooks = [] self._progress_hooks = [] + self._postprocessor_hooks = [] self._download_retcode = 0 self._num_downloads = 0 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] self._err_file = sys.stderr - self.params = { - # Default parameters - 'nocheckcertificate': False, - } - self.params.update(params) + self.params = params self.cache = Cache(self) + windows_enable_vt_mode() + self.params['no_color'] = self.params.get('no_color') or not supports_terminal_sequences(self._err_file) + if sys.version_info < (3, 6): self.report_warning( 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2]) if self.params.get('allow_unplayable_formats'): self.report_warning( - 'You have asked for unplayable formats to be listed/downloaded. ' - 'This is a developer option intended for debugging. ' - 'If you experience any issues while using this option, DO NOT open a bug report') + f'You have asked for {self._color_text("unplayable formats", "blue")} to be listed/downloaded. ' + 'This is a developer option intended for debugging. \n' + ' If you experience any issues while using this option, ' + f'{self._color_text("DO NOT", "red")} open a bug report') def check_deprecated(param, option, suggestion): if self.params.get(param) is not None: @@ -675,9 +693,13 @@ class YoutubeDL(object): self._post_hooks.append(ph) def add_progress_hook(self, ph): - """Add the progress hook (currently only for the file downloader)""" + """Add the download progress hook""" self._progress_hooks.append(ph) + def add_postprocessor_hook(self, ph): + """Add the postprocessing progress hook""" + self._postprocessor_hooks.append(ph) + def _bidi_workaround(self, message): if not hasattr(self, '_output_channel'): return message @@ -790,6 +812,11 @@ class YoutubeDL(object): self.to_stdout( message, skip_eol, quiet=self.params.get('quiet', False)) + def _color_text(self, text, color): + if self.params.get('no_color'): + return text + return f'{TERMINAL_SEQUENCES[color.upper()]}{text}{TERMINAL_SEQUENCES["RESET_STYLE"]}' + def report_warning(self, message, only_once=False): ''' Print the message to stderr, it will be prefixed with 'WARNING:' @@ -800,24 +827,14 @@ class YoutubeDL(object): else: if self.params.get('no_warnings'): return - if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt': - _msg_header = '\033[0;33mWARNING:\033[0m' - else: - _msg_header = 'WARNING:' - warning_message = '%s %s' % (_msg_header, message) - self.to_stderr(warning_message, only_once) + self.to_stderr(f'{self._color_text("WARNING:", "yellow")} {message}', only_once) def report_error(self, message, tb=None): ''' Do the same as trouble, but prefixes the message with 'ERROR:', colored in red if stderr is a tty file. ''' - if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt': - _msg_header = '\033[0;31mERROR:\033[0m' - else: - _msg_header = 'ERROR:' - error_message = '%s %s' % (_msg_header, message) - self.trouble(error_message, tb) + self.trouble(f'{self._color_text("ERROR:", "red")} {message}', tb) def write_debug(self, message, only_once=False): '''Log debug message or Print message to stderr''' @@ -919,7 +936,7 @@ class YoutubeDL(object): return err def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None): - """ Make the template and info_dict suitable for substitution : ydl.outtmpl_escape(outtmpl) % info_dict """ + """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """ info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set info_dict = dict(info_dict) # Do not sanitize so as not to consume LazyList @@ -1073,6 +1090,10 @@ class YoutubeDL(object): return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT + def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs): + outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs) + return self.escape_outtmpl(outtmpl) % info_dict + def _prepare_filename(self, info_dict, tmpl_type='default'): try: sanitize = lambda k, v: sanitize_filename( @@ -2431,10 +2452,8 @@ class YoutubeDL(object): if self.params.get('forceprint') or self.params.get('forcejson'): self.post_extract(info_dict) for tmpl in self.params.get('forceprint', []): - if re.match(r'\w+$', tmpl): - tmpl = '%({})s'.format(tmpl) - tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict) - self.to_stdout(self.escape_outtmpl(tmpl) % info_copy) + self.to_stdout(self.evaluate_outtmpl( + f'%({tmpl})s' if re.match(r'\w+$', tmpl) else tmpl, info_dict)) print_mandatory('title') print_mandatory('id') diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 38e1d0ec65..ade8222992 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -302,11 +302,14 @@ def _real_main(argv=None): parser.error('invalid %s %r: %s' % (msg, tmpl, error_to_compat_str(err))) for k, tmpl in opts.outtmpl.items(): - validate_outtmpl(tmpl, '%s output template' % k) + validate_outtmpl(tmpl, f'{k} output template') opts.forceprint = opts.forceprint or [] for tmpl in opts.forceprint or []: validate_outtmpl(tmpl, 'print template') validate_outtmpl(opts.sponsorblock_chapter_title, 'SponsorBlock chapter title') + for k, tmpl in opts.progress_template.items(): + k = f'{k[:-6]} console title' if '-title' in k else f'{k} progress' + validate_outtmpl(tmpl, f'{k} template') if opts.extractaudio and not opts.keepvideo and opts.format is None: opts.format = 'bestaudio/best' @@ -633,8 +636,9 @@ def _real_main(argv=None): 'noresizebuffer': opts.noresizebuffer, 'http_chunk_size': opts.http_chunk_size, 'continuedl': opts.continue_dl, - 'noprogress': opts.noprogress, + 'noprogress': opts.quiet if opts.noprogress is None else opts.noprogress, 'progress_with_newline': opts.progress_with_newline, + 'progress_template': opts.progress_template, 'playliststart': opts.playliststart, 'playlistend': opts.playlistend, 'playlistreverse': opts.playlist_reverse, diff --git a/yt_dlp/compat.py b/yt_dlp/compat.py index 9bf05c7373..b107b21142 100644 --- a/yt_dlp/compat.py +++ b/yt_dlp/compat.py @@ -159,6 +159,12 @@ except ImportError: compat_pycrypto_AES = None +def windows_enable_vt_mode(): # TODO: Do this the proper way https://bugs.python.org/issue30075 + if compat_os_name != 'nt': + return + os.system('') + + # Deprecated compat_basestring = str @@ -281,5 +287,6 @@ __all__ = [ 'compat_xml_parse_error', 'compat_xpath', 'compat_zip', + 'windows_enable_vt_mode', 'workaround_optparse_bug9161', ] diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index bb0614037a..50e674829e 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -7,7 +7,6 @@ import sys import time import random -from ..compat import compat_os_name from ..utils import ( decodeArgument, encodeFilename, @@ -17,6 +16,7 @@ from ..utils import ( timeconvert, ) from ..minicurses import ( + MultilineLogger, MultilinePrinter, QuietMultilinePrinter, BreaklineStatusPrinter @@ -44,8 +44,6 @@ class FileDownloader(object): noresizebuffer: Do not automatically resize the download buffer. continuedl: Try to continue downloads if possible. noprogress: Do not print the progress bar. - logtostderr: Log messages to stderr instead of stdout. - consoletitle: Display progress in console window's titlebar. nopart: Do not use temporary .part files. updatetime: Use the Last-modified header to set output file timestamps. test: Download only first bytes to test the downloader. @@ -61,6 +59,7 @@ class FileDownloader(object): http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be useful for bypassing bandwidth throttling imposed by a webserver (experimental) + progress_template: See YoutubeDL.py Subclasses of this one must re-define the real_download method. """ @@ -73,7 +72,7 @@ class FileDownloader(object): self.ydl = ydl self._progress_hooks = [] self.params = params - self._multiline = None + self._prepare_multiline_status() self.add_progress_hook(self.report_progress) @staticmethod @@ -242,55 +241,46 @@ class FileDownloader(object): """Report destination filename.""" self.to_screen('[download] Destination: ' + filename) - def _prepare_multiline_status(self, lines): - if self.params.get('quiet'): + def _prepare_multiline_status(self, lines=1): + if self.params.get('noprogress'): self._multiline = QuietMultilinePrinter() - elif self.params.get('progress_with_newline', False): + elif self.ydl.params.get('logger'): + self._multiline = MultilineLogger(self.ydl.params['logger'], lines) + elif self.params.get('progress_with_newline'): self._multiline = BreaklineStatusPrinter(sys.stderr, lines) - elif self.params.get('noprogress', False): - self._multiline = None else: - self._multiline = MultilinePrinter(sys.stderr, lines) + self._multiline = MultilinePrinter(sys.stderr, lines, not self.params.get('quiet')) def _finish_multiline_status(self): - if self._multiline is not None: - self._multiline.end() + self._multiline.end() - def _report_progress_status(self, msg, is_last_line=False, progress_line=None): - fullmsg = '[download] ' + msg - if self.params.get('progress_with_newline', False): - self.to_screen(fullmsg) - elif progress_line is not None and self._multiline is not None: - self._multiline.print_at_line(fullmsg, progress_line) - else: - if compat_os_name == 'nt' or not sys.stderr.isatty(): - prev_len = getattr(self, '_report_progress_prev_line_length', 0) - if prev_len > len(fullmsg): - fullmsg += ' ' * (prev_len - len(fullmsg)) - self._report_progress_prev_line_length = len(fullmsg) - clear_line = '\r' - else: - clear_line = '\r\x1b[K' - self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line) - self.to_console_title('yt-dlp ' + msg) + def _report_progress_status(self, s): + progress_dict = s.copy() + progress_dict.pop('info_dict') + progress_dict = {'info': s['info_dict'], 'progress': progress_dict} + + progress_template = self.params.get('progress_template', {}) + self._multiline.print_at_line(self.ydl.evaluate_outtmpl( + progress_template.get('download') or '[download] %(progress._default_template)s', + progress_dict), s.get('progress_idx') or 0) + self.to_console_title(self.ydl.evaluate_outtmpl( + progress_template.get('download-title') or 'yt-dlp %(progress._default_template)s', + progress_dict)) def report_progress(self, s): if s['status'] == 'finished': - if self.params.get('noprogress', False): + if self.params.get('noprogress'): self.to_screen('[download] Download completed') - else: - msg_template = '100%%' - if s.get('total_bytes') is not None: - s['_total_bytes_str'] = format_bytes(s['total_bytes']) - msg_template += ' of %(_total_bytes_str)s' - if s.get('elapsed') is not None: - s['_elapsed_str'] = self.format_seconds(s['elapsed']) - msg_template += ' in %(_elapsed_str)s' - self._report_progress_status( - msg_template % s, is_last_line=True, progress_line=s.get('progress_idx')) - return - - if self.params.get('noprogress'): + msg_template = '100%%' + if s.get('total_bytes') is not None: + s['_total_bytes_str'] = format_bytes(s['total_bytes']) + msg_template += ' of %(_total_bytes_str)s' + if s.get('elapsed') is not None: + s['_elapsed_str'] = self.format_seconds(s['elapsed']) + msg_template += ' in %(_elapsed_str)s' + s['_percent_str'] = self.format_percent(100) + s['_default_template'] = msg_template % s + self._report_progress_status(s) return if s['status'] != 'downloading': @@ -332,8 +322,8 @@ class FileDownloader(object): msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s' else: msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s' - - self._report_progress_status(msg_template % s, progress_line=s.get('progress_idx')) + s['_default_template'] = msg_template % s + self._report_progress_status(s) def report_resuming_byte(self, resume_len): """Report attempt to resume at given byte.""" @@ -405,7 +395,9 @@ class FileDownloader(object): '[download] Sleeping %s seconds ...' % ( sleep_interval_sub)) time.sleep(sleep_interval_sub) - return self.real_download(filename, info_dict), True + ret = self.real_download(filename, info_dict) + self._finish_multiline_status() + return ret, True def real_download(self, filename, info_dict): """Real download process. Redefine in subclasses.""" diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index 22134f3b6c..6a490131b1 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -393,9 +393,7 @@ class FragmentFD(FileDownloader): result = result and job.result() finally: tpe.shutdown(wait=True) - - self._finish_multiline_status() - return True + return result def download_and_append_fragments(self, ctx, fragments, info_dict, *, pack_func=None, finish_func=None, tpe=None): fragment_retries = self.params.get('fragment_retries', 0) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index f65a098d72..4f940730a4 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1134,10 +1134,7 @@ class InfoExtractor(object): if mobj: break - if not self.get_param('no_color') and compat_os_name != 'nt' and sys.stderr.isatty(): - _name = '\033[0;34m%s\033[0m' % name - else: - _name = name + _name = self._downloader._color_text(name, 'blue') if mobj: if group is None: diff --git a/yt_dlp/minicurses.py b/yt_dlp/minicurses.py index 74ad891c99..a466fb4b03 100644 --- a/yt_dlp/minicurses.py +++ b/yt_dlp/minicurses.py @@ -1,10 +1,12 @@ -import os - from threading import Lock -from .utils import compat_os_name, get_windows_version +from .utils import supports_terminal_sequences, TERMINAL_SEQUENCES -class MultilinePrinterBase(): +class MultilinePrinterBase: + def __init__(self, stream=None, lines=1): + self.stream = stream + self.maximum = lines - 1 + def __enter__(self): return self @@ -17,119 +19,87 @@ class MultilinePrinterBase(): def end(self): pass - -class MultilinePrinter(MultilinePrinterBase): - - def __init__(self, stream, lines): - """ - @param stream stream to write to - @lines number of lines to be written - """ - self.stream = stream - - is_win10 = compat_os_name == 'nt' and get_windows_version() >= (10, ) - self.CARRIAGE_RETURN = '\r' - if os.getenv('TERM') and self._isatty() or is_win10: - # reason not to use curses https://github.com/yt-dlp/yt-dlp/pull/1036#discussion_r713851492 - # escape sequences for Win10 https://docs.microsoft.com/en-us/windows/console/console-virtual-terminal-sequences - self.UP = '\x1b[A' - self.DOWN = '\n' - self.ERASE_LINE = '\x1b[K' - self._HAVE_FULLCAP = self._isatty() or is_win10 - else: - self.UP = self.DOWN = self.ERASE_LINE = None - self._HAVE_FULLCAP = False - - # lines are numbered from top to bottom, counting from 0 to self.maximum - self.maximum = lines - 1 - self.lastline = 0 - self.lastlength = 0 - - self.movelock = Lock() - - @property - def have_fullcap(self): - """ - True if the TTY is allowing to control cursor, - so that multiline progress works - """ - return self._HAVE_FULLCAP - - def _isatty(self): - try: - return self.stream.isatty() - except BaseException: - return False - - def _move_cursor(self, dest): - current = min(self.lastline, self.maximum) - self.stream.write(self.CARRIAGE_RETURN) - if current == dest: - # current and dest are at same position, no need to move cursor - return - elif current > dest: - # when maximum == 2, - # 0. dest - # 1. - # 2. current - self.stream.write(self.UP * (current - dest)) - elif current < dest: - # when maximum == 2, - # 0. current - # 1. - # 2. dest - self.stream.write(self.DOWN * (dest - current)) - self.lastline = dest - - def print_at_line(self, text, pos): - with self.movelock: - if self.have_fullcap: - self._move_cursor(pos) - self.stream.write(self.ERASE_LINE) - self.stream.write(text) - else: - if self.maximum != 0: - # let user know about which line is updating the status - text = f'{pos + 1}: {text}' - textlen = len(text) - if self.lastline == pos: - # move cursor at the start of progress when writing to same line - self.stream.write(self.CARRIAGE_RETURN) - if self.lastlength > textlen: - text += ' ' * (self.lastlength - textlen) - self.lastlength = textlen - else: - # otherwise, break the line - self.stream.write('\n') - self.lastlength = 0 - self.stream.write(text) - self.lastline = pos - - def end(self): - with self.movelock: - # move cursor to the end of the last line, and write line break - # so that other to_screen calls can precede - self._move_cursor(self.maximum) - self.stream.write('\n') + def _add_line_number(self, text, line): + if self.maximum: + return f'{line + 1}: {text}' + return text class QuietMultilinePrinter(MultilinePrinterBase): - def __init__(self): - self.have_fullcap = True + pass + + +class MultilineLogger(MultilinePrinterBase): + def print_at_line(self, text, pos): + # stream is the logger object, not an actual stream + self.stream.debug(self._add_line_number(text, pos)) class BreaklineStatusPrinter(MultilinePrinterBase): - - def __init__(self, stream, lines): - """ - @param stream stream to write to - """ - self.stream = stream - self.maximum = lines - self.have_fullcap = True - def print_at_line(self, text, pos): - if self.maximum != 0: - # let user know about which line is updating the status - text = f'{pos + 1}: {text}' - self.stream.write(text + '\n') + self.stream.write(self._add_line_number(text, pos) + '\n') + + +class MultilinePrinter(MultilinePrinterBase): + def __init__(self, stream=None, lines=1, preserve_output=True): + super().__init__(stream, lines) + self.preserve_output = preserve_output + self._lastline = self._lastlength = 0 + self._movelock = Lock() + self._HAVE_FULLCAP = supports_terminal_sequences(self.stream) + + def lock(func): + def wrapper(self, *args, **kwargs): + with self._movelock: + return func(self, *args, **kwargs) + return wrapper + + def _move_cursor(self, dest): + current = min(self._lastline, self.maximum) + self.stream.write('\r') + distance = dest - current + if distance < 0: + self.stream.write(TERMINAL_SEQUENCES['UP'] * -distance) + elif distance > 0: + self.stream.write(TERMINAL_SEQUENCES['DOWN'] * distance) + self._lastline = dest + + @lock + def print_at_line(self, text, pos): + if self._HAVE_FULLCAP: + self._move_cursor(pos) + self.stream.write(TERMINAL_SEQUENCES['ERASE_LINE']) + self.stream.write(text) + return + + text = self._add_line_number(text, pos) + textlen = len(text) + if self._lastline == pos: + # move cursor at the start of progress when writing to same line + self.stream.write('\r') + if self._lastlength > textlen: + text += ' ' * (self._lastlength - textlen) + self._lastlength = textlen + else: + # otherwise, break the line + self.stream.write('\n') + self._lastlength = textlen + self.stream.write(text) + self._lastline = pos + + @lock + def end(self): + # move cursor to the end of the last line, and write line break + # so that other to_screen calls can precede + if self._HAVE_FULLCAP: + self._move_cursor(self.maximum) + if self.preserve_output: + self.stream.write('\n') + return + + if self._HAVE_FULLCAP: + self.stream.write( + TERMINAL_SEQUENCES['ERASE_LINE'] + + f'{TERMINAL_SEQUENCES["UP"]}{TERMINAL_SEQUENCES["ERASE_LINE"]}' * self.maximum) + else: + self.stream.write(' ' * self._lastlength) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index be43f37ee1..4652e8c589 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -910,12 +910,30 @@ def parseOpts(overrideArguments=None): help='Output progress bar as new lines') verbosity.add_option( '--no-progress', - action='store_true', dest='noprogress', default=False, + action='store_true', dest='noprogress', default=None, help='Do not print progress bar') + verbosity.add_option( + '--progress', + action='store_false', dest='noprogress', + help='Show progress bar, even if in quiet mode') verbosity.add_option( '--console-title', action='store_true', dest='consoletitle', default=False, help='Display progress in console titlebar') + verbosity.add_option( + '--progress-template', + metavar='[TYPES:]TEMPLATE', dest='progress_template', default={}, type='str', + action='callback', callback=_dict_from_options_callback, + callback_kwargs={ + 'allowed_keys': '(download|postprocess)(-title)?', + 'default_key': 'download' + }, help=( + 'Template for progress outputs, optionally prefixed with one of "download:" (default), ' + '"download-title:" (the console title), "postprocess:", or "postprocess-title:". ' + 'The video\'s fields are accessible under the "info" key and ' + 'the progress attributes are accessible under "progress" key. Eg: ' + # TODO: Document the fields inside "progress" + '--console-title --progress-template "download-title:%(info.id)s-%(progress.eta)s"')) verbosity.add_option( '-v', '--verbose', action='store_true', dest='verbose', default=False, diff --git a/yt_dlp/postprocessor/common.py b/yt_dlp/postprocessor/common.py index d8ec997d9d..376a1c95ef 100644 --- a/yt_dlp/postprocessor/common.py +++ b/yt_dlp/postprocessor/common.py @@ -1,5 +1,6 @@ from __future__ import unicode_literals +import copy import functools import os @@ -11,7 +12,26 @@ from ..utils import ( ) -class PostProcessor(object): +class PostProcessorMetaClass(type): + @staticmethod + def run_wrapper(func): + @functools.wraps(func) + def run(self, info, *args, **kwargs): + self._hook_progress({'status': 'started'}, info) + ret = func(self, info, *args, **kwargs) + if ret is not None: + _, info = ret + self._hook_progress({'status': 'finished'}, info) + return ret + return run + + def __new__(cls, name, bases, attrs): + if 'run' in attrs: + attrs['run'] = cls.run_wrapper(attrs['run']) + return type.__new__(cls, name, bases, attrs) + + +class PostProcessor(metaclass=PostProcessorMetaClass): """Post Processor class. PostProcessor objects can be added to downloaders with their @@ -34,7 +54,9 @@ class PostProcessor(object): _downloader = None def __init__(self, downloader=None): - self._downloader = downloader + self._progress_hooks = [] + self.add_progress_hook(self.report_progress) + self.set_downloader(downloader) self.PP_NAME = self.pp_key() @classmethod @@ -68,6 +90,10 @@ class PostProcessor(object): def set_downloader(self, downloader): """Sets the downloader for this PP.""" self._downloader = downloader + if not downloader: + return + for ph in downloader._postprocessor_hooks: + self.add_progress_hook(ph) @staticmethod def _restrict_to(*, video=True, audio=True, images=True): @@ -115,6 +141,39 @@ class PostProcessor(object): return _configuration_args( self.pp_key(), self.get_param('postprocessor_args'), exe, *args, **kwargs) + def _hook_progress(self, status, info_dict): + if not self._progress_hooks: + return + info_dict = dict(info_dict) + for key in ('__original_infodict', '__postprocessors'): + info_dict.pop(key, None) + status.update({ + 'info_dict': copy.deepcopy(info_dict), + 'postprocessor': self.pp_key(), + }) + for ph in self._progress_hooks: + ph(status) + + def add_progress_hook(self, ph): + # See YoutubeDl.py (search for postprocessor_hooks) for a description of this interface + self._progress_hooks.append(ph) + + def report_progress(self, s): + s['_default_template'] = '%(postprocessor)s %(status)s' % s + + progress_dict = s.copy() + progress_dict.pop('info_dict') + progress_dict = {'info': s['info_dict'], 'progress': progress_dict} + + progress_template = self.get_param('progress_template', {}) + tmpl = progress_template.get('postprocess') + if tmpl: + self._downloader.to_stdout(self._downloader.evaluate_outtmpl(tmpl, progress_dict)) + + self._downloader.to_console_title(self._downloader.evaluate_outtmpl( + progress_template.get('postprocess-title') or 'yt-dlp %(progress._default_template)s', + progress_dict)) + class AudioConversionError(PostProcessingError): pass diff --git a/yt_dlp/postprocessor/metadataparser.py b/yt_dlp/postprocessor/metadataparser.py index f7b0d8bde7..96aac9beba 100644 --- a/yt_dlp/postprocessor/metadataparser.py +++ b/yt_dlp/postprocessor/metadataparser.py @@ -62,8 +62,7 @@ class MetadataParserPP(PostProcessor): def interpretter(self, inp, out): def f(info): - outtmpl, tmpl_dict = self._downloader.prepare_outtmpl(template, info) - data_to_parse = self._downloader.escape_outtmpl(outtmpl) % tmpl_dict + data_to_parse = self._downloader.evaluate_outtmpl(template, info) self.write_debug(f'Searching for {out_re.pattern!r} in {template!r}') match = out_re.search(data_to_parse) if match is None: diff --git a/yt_dlp/postprocessor/modify_chapters.py b/yt_dlp/postprocessor/modify_chapters.py index 2871e16d51..72a705fc55 100644 --- a/yt_dlp/postprocessor/modify_chapters.py +++ b/yt_dlp/postprocessor/modify_chapters.py @@ -292,8 +292,7 @@ class ModifyChaptersPP(FFmpegPostProcessor): 'name': SponsorBlockPP.CATEGORIES[category], 'category_names': [SponsorBlockPP.CATEGORIES[c] for c in cats] }) - outtmpl, tmpl_dict = self._downloader.prepare_outtmpl(self._sponsorblock_chapter_title, c) - c['title'] = self._downloader.escape_outtmpl(outtmpl) % tmpl_dict + c['title'] = self._downloader.evaluate_outtmpl(self._sponsorblock_chapter_title, c) # Merge identically named sponsors. if (new_chapters and 'categories' in new_chapters[-1] and new_chapters[-1]['title'] == c['title']): diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 8b5b15103b..0273878974 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -6440,3 +6440,26 @@ def jwt_encode_hs256(payload_data, key, headers={}): signature_b64 = base64.b64encode(h.digest()) token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64 return token + + +def supports_terminal_sequences(stream): + if compat_os_name == 'nt': + if get_windows_version() < (10, ): + return False + elif not os.getenv('TERM'): + return False + try: + return stream.isatty() + except BaseException: + return False + + +TERMINAL_SEQUENCES = { + 'DOWN': '\n', + 'UP': '\x1b[A', + 'ERASE_LINE': '\x1b[K', + 'RED': '\033[0;31m', + 'YELLOW': '\033[0;33m', + 'BLUE': '\033[0;34m', + 'RESET_STYLE': '\033[0m', +} From c08b8873eae857b0bbd2a3cfee402253c02a3180 Mon Sep 17 00:00:00 2001 From: Damiano Amatruda <damiano.amatruda@outlook.com> Date: Fri, 8 Oct 2021 21:36:27 +0200 Subject: [PATCH 214/641] [ciscowebex] Add extractor (#1199) Authored by: damianoamatruda --- yt_dlp/extractor/ciscowebex.py | 90 ++++++++++++++++++++++++++++++++++ yt_dlp/extractor/extractors.py | 1 + 2 files changed, 91 insertions(+) create mode 100644 yt_dlp/extractor/ciscowebex.py diff --git a/yt_dlp/extractor/ciscowebex.py b/yt_dlp/extractor/ciscowebex.py new file mode 100644 index 0000000000..882dae91b5 --- /dev/null +++ b/yt_dlp/extractor/ciscowebex.py @@ -0,0 +1,90 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + try_get, + unified_timestamp, +) + + +class CiscoWebexIE(InfoExtractor): + IE_NAME = 'ciscowebex' + IE_DESC = 'Cisco Webex' + _VALID_URL = r'''(?x) + (?P<url>https?://(?P<subdomain>[^/#?]*)\.webex\.com/(?: + (?P<siteurl_1>[^/#?]*)/(?:ldr|lsr).php\?(?:[^#]*&)*RCID=(?P<rcid>[0-9a-f]{32})| + (?:recordingservice|webappng)/sites/(?P<siteurl_2>[^/#?]*)/recording/(?:playback/|play/)?(?P<id>[0-9a-f]{32}) + ))''' + + _TESTS = [{ + 'url': 'https://demosubdomain.webex.com/demositeurl/ldr.php?RCID=e58e803bc0f766bb5f6376d2e86adb5b', + 'only_matching': True, + }, { + 'url': 'http://demosubdomain.webex.com/demositeurl/lsr.php?RCID=bc04b4a7b5ea2cc3a493d5ae6aaff5d7', + 'only_matching': True, + }, { + 'url': 'https://demosubdomain.webex.com/recordingservice/sites/demositeurl/recording/88e7a42f7b19f5b423c54754aecc2ce9/playback', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + rcid = mobj.group('rcid') + if rcid: + webpage = self._download_webpage(url, None, note='Getting video ID') + url = self._search_regex(self._VALID_URL, webpage, 'redirection url', group='url') + url = self._request_webpage(url, None, note='Resolving final URL').geturl() + mobj = self._match_valid_url(url) + subdomain = mobj.group('subdomain') + siteurl = mobj.group('siteurl_1') or mobj.group('siteurl_2') + video_id = mobj.group('id') + + stream = self._download_json( + 'https://%s.webex.com/webappng/api/v1/recordings/%s/stream' % (subdomain, video_id), + video_id, fatal=False, query={'siteurl': siteurl}) + if not stream: + self.raise_login_required(method='cookies') + + video_id = stream.get('recordUUID') or video_id + + formats = [{ + 'format_id': 'video', + 'url': stream['fallbackPlaySrc'], + 'ext': 'mp4', + 'vcodec': 'avc1.640028', + 'acodec': 'mp4a.40.2', + }] + if stream.get('preventDownload') is False: + mp4url = try_get(stream, lambda x: x['downloadRecordingInfo']['downloadInfo']['mp4URL']) + if mp4url: + formats.append({ + 'format_id': 'video', + 'url': mp4url, + 'ext': 'mp4', + 'vcodec': 'avc1.640028', + 'acodec': 'mp4a.40.2', + }) + audiourl = try_get(stream, lambda x: x['downloadRecordingInfo']['downloadInfo']['audioURL']) + if audiourl: + formats.append({ + 'format_id': 'audio', + 'url': audiourl, + 'ext': 'mp3', + 'vcodec': 'none', + 'acodec': 'mp3', + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': stream['recordName'], + 'description': stream.get('description'), + 'uploader': stream.get('ownerDisplayName'), + 'uploader_id': stream.get('ownerUserName') or stream.get('ownerId'), # mail or id + 'timestamp': unified_timestamp(stream.get('createTime')), + 'duration': int_or_none(stream.get('duration'), 1000), + 'webpage_url': 'https://%s.webex.com/recordingservice/sites/%s/recording/playback/%s' % (subdomain, siteurl, video_id), + 'formats': formats, + } diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 8c5b8b1607..a224c4f9a6 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -258,6 +258,7 @@ from .ciscolive import ( CiscoLiveSessionIE, CiscoLiveSearchIE, ) +from .ciscowebex import CiscoWebexIE from .cjsw import CJSWIE from .cliphunter import CliphunterIE from .clippit import ClippitIE From ac56cf38a463f0e21e3a3ec89572fcd1cade1563 Mon Sep 17 00:00:00 2001 From: coletdjnz <colethedj@protonmail.com> Date: Sat, 9 Oct 2021 10:19:25 +1300 Subject: [PATCH 215/641] [youtube:tab] Fallback to API when webpage fails to download (#1122) and add some extractor_args to force this mode Authored by: coletdjnz --- README.md | 3 + yt_dlp/extractor/youtube.py | 227 ++++++++++++++++++++++++++---------- 2 files changed, 171 insertions(+), 59 deletions(-) diff --git a/README.md b/README.md index 1723865535..ff117663af 100644 --- a/README.md +++ b/README.md @@ -1483,6 +1483,9 @@ The following extractors use this feature: * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side). * `max_comments`: Maximum amount of comments to download (default all). * `max_comment_depth`: Maximum depth for nested comments. YouTube supports depths 1 or 2 (default). +* **youtubetab** + (YouTube playlists, channels, feeds, etc.) + * `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details) * **funimation** * `language`: Languages to extract. Eg: `funimation:language=english,japanese` diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 56de2ef591..97d02dc0b4 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -579,12 +579,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor): data=json.dumps(data).encode('utf8'), headers=real_headers, query={'key': api_key or self._extract_api_key()}) - def extract_yt_initial_data(self, video_id, webpage): - return self._parse_json( - self._search_regex( - (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE), - self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'), - video_id) + def extract_yt_initial_data(self, item_id, webpage, fatal=True): + data = self._search_regex( + (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE), + self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal) + if data: + return self._parse_json(data, item_id, fatal=fatal) @staticmethod def _extract_session_index(*data): @@ -627,6 +627,16 @@ class YoutubeBaseInfoExtractor(InfoExtractor): # and just "user_syncid||" for primary channel. We only want the channel_syncid return sync_ids[0] + @staticmethod + def _extract_visitor_data(*args): + """ + Extracts visitorData from an API response or ytcfg + Appears to be used to track session state + """ + return traverse_obj( + args, (..., ('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))), + expected_type=compat_str, get_all=False) + @property def is_authenticated(self): return bool(self._generate_sapisidhash_header()) @@ -651,8 +661,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'Origin': origin, 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg), 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg), - 'X-Goog-Visitor-Id': visitor_data or try_get( - self._extract_context(ytcfg, default_client), lambda x: x['client']['visitorData'], compat_str) + 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg) } if session_index is None: session_index = self._extract_session_index(ytcfg) @@ -826,9 +835,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor): return else: - # Youtube may send alerts if there was an issue with the continuation page try: - self._extract_and_report_alerts(response, expected=False, only_once=True) + self._extract_and_report_alerts(response, only_once=True) except ExtractorError as e: # YouTube servers may return errors we want to retry on in a 200 OK response # See: https://github.com/yt-dlp/yt-dlp/issues/839 @@ -3549,7 +3557,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): 'url': 'https://www.youtube.com/feed/watch_later', 'only_matching': True, }, { - 'note': 'Recommended - redirects to home page', + 'note': 'Recommended - redirects to home page.', 'url': 'https://www.youtube.com/feed/recommended', 'only_matching': True, }, { @@ -3646,6 +3654,51 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): 'availability': 'unlisted' }, 'playlist_count': 1, + }, { + 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData', + 'url': 'https://www.youtube.com/feed/recommended', + 'info_dict': { + 'id': 'recommended', + 'title': 'recommended', + }, + 'playlist_mincount': 50, + 'params': { + 'skip_download': True, + 'extractor_args': {'youtubetab': {'skip': ['webpage']}} + }, + }, { + 'note': 'API Fallback: /videos tab, sorted by oldest first', + 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid', + 'info_dict': { + 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw', + 'title': 'Cody\'sLab - Videos', + 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa', + 'uploader': 'Cody\'sLab', + 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw', + }, + 'playlist_mincount': 650, + 'params': { + 'skip_download': True, + 'extractor_args': {'youtubetab': {'skip': ['webpage']}} + }, + }, { + 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...', + 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw', + 'info_dict': { + 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw', + 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw', + 'title': 'Uploads from Royalty Free Music - Topic', + 'uploader': 'Royalty Free Music - Topic', + }, + 'expected_warnings': [ + 'A channel/user page was given', + 'The URL does not have a videos tab', + ], + 'playlist_mincount': 101, + 'params': { + 'skip_download': True, + 'extractor_args': {'youtubetab': {'skip': ['webpage']}} + }, }] @classmethod @@ -3834,7 +3887,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): if entry: yield entry ''' - def _entries(self, tab, item_id, account_syncid, ytcfg): + def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data): def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds contents = try_get(parent_renderer, lambda x: x['contents'], list) or [] @@ -3876,7 +3929,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): if not continuation_list[0]: continuation_list[0] = self._extract_continuation(parent_renderer) - continuation_list = [None] # Python 2 doesnot support nonlocal + continuation_list = [None] # Python 2 does not support nonlocal tab_content = try_get(tab, lambda x: x['content'], dict) if not tab_content: return @@ -3886,7 +3939,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): for entry in extract_entries(parent_renderer): yield entry continuation = continuation_list[0] - visitor_data = None for page_num in itertools.count(1): if not continuation: @@ -3900,8 +3952,9 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): if not response: break - visitor_data = try_get( - response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data + # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases + # See: https://github.com/ytdl-org/youtube-dl/issues/28702 + visitor_data = self._extract_visitor_data(response) or visitor_data known_continuation_renderers = { 'playlistVideoListContinuation': self._playlist_entries, @@ -3975,9 +4028,10 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str)) return {k: v for k, v in uploader.items() if v is not None} - def _extract_from_tabs(self, item_id, webpage, data, tabs): + def _extract_from_tabs(self, item_id, ytcfg, data, tabs): playlist_id = title = description = channel_url = channel_name = channel_id = None - thumbnails_list = tags = [] + thumbnails_list = [] + tags = [] selected_tab = self._extract_selected_tab(tabs) renderer = try_get( @@ -4042,18 +4096,15 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): 'channel': metadata['uploader'], 'channel_id': metadata['uploader_id'], 'channel_url': metadata['uploader_url']}) - ytcfg = self.extract_ytcfg(item_id, webpage) return self.playlist_result( self._entries( - selected_tab, playlist_id, - self._extract_account_syncid(ytcfg, data), ytcfg), + selected_tab, playlist_id, ytcfg, + self._extract_account_syncid(ytcfg, data), + self._extract_visitor_data(data, ytcfg)), **metadata) - def _extract_mix_playlist(self, playlist, playlist_id, data, webpage): - first_id = last_id = None - ytcfg = self.extract_ytcfg(playlist_id, webpage) - headers = self.generate_api_headers( - ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data)) + def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg): + first_id = last_id = response = None for page_num in itertools.count(1): videos = list(self._playlist_entries(playlist)) if not videos: @@ -4070,6 +4121,9 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): last_id = videos[-1]['id'] watch_endpoint = try_get( playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint']) + headers = self.generate_api_headers( + ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data), + visitor_data=self._extract_visitor_data(response, data, ytcfg)) query = { 'playlistId': playlist_id, 'videoId': watch_endpoint.get('videoId') or last_id, @@ -4084,7 +4138,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): playlist = try_get( response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict) - def _extract_from_playlist(self, item_id, url, data, playlist, webpage): + def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg): title = playlist.get('title') or try_get( data, lambda x: x['titleText']['simpleText'], compat_str) playlist_id = playlist.get('playlistId') or item_id @@ -4099,7 +4153,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): video_title=title) return self.playlist_result( - self._extract_mix_playlist(playlist, playlist_id, data, webpage), + self._extract_mix_playlist(playlist, playlist_id, data, ytcfg), playlist_id=playlist_id, playlist_title=title) def _extract_availability(self, data): @@ -4143,7 +4197,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): if renderer: return renderer - def _reload_with_unavailable_videos(self, item_id, data, webpage): + def _reload_with_unavailable_videos(self, item_id, data, ytcfg): """ Get playlist with unavailable videos if the 'show unavailable videos' button exists. """ @@ -4167,10 +4221,9 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): params = browse_endpoint.get('params') break - ytcfg = self.extract_ytcfg(item_id, webpage) headers = self.generate_api_headers( ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data), - visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str)) + visitor_data=self._extract_visitor_data(data, ytcfg)) query = { 'params': params or 'wgYCCAA=', 'browseId': browse_id or 'VL%s' % item_id @@ -4180,28 +4233,87 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): check_get_keys='contents', fatal=False, ytcfg=ytcfg, note='Downloading API JSON with unavailable videos') - def _extract_webpage(self, url, item_id): + def _extract_webpage(self, url, item_id, fatal=True): retries = self.get_param('extractor_retries', 3) count = -1 - last_error = 'Incomplete yt initial data recieved' + webpage = data = last_error = None while count < retries: count += 1 # Sometimes youtube returns a webpage with incomplete ytInitialData # See: https://github.com/yt-dlp/yt-dlp/issues/116 - if count: + if last_error: self.report_warning('%s. Retrying ...' % last_error) - webpage = self._download_webpage( - url, item_id, - 'Downloading webpage%s' % (' (retry #%d)' % count if count else '')) - data = self.extract_yt_initial_data(item_id, webpage) - if data.get('contents') or data.get('currentVideoEndpoint'): + try: + webpage = self._download_webpage( + url, item_id, + note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',)) + data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {} + except ExtractorError as e: + if isinstance(e.cause, network_exceptions): + if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429): + last_error = error_to_compat_str(e.cause or e.msg) + if count < retries: + continue + if fatal: + raise + self.report_warning(error_to_compat_str(e)) break - # Extract alerts here only when there is error - self._extract_and_report_alerts(data) - if count >= retries: - raise ExtractorError(last_error) + else: + try: + self._extract_and_report_alerts(data) + except ExtractorError as e: + if fatal: + raise + self.report_warning(error_to_compat_str(e)) + break + + if dict_get(data, ('contents', 'currentVideoEndpoint')): + break + + last_error = 'Incomplete yt initial data received' + if count >= retries: + if fatal: + raise ExtractorError(last_error) + self.report_warning(last_error) + break + return webpage, data + def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'): + data = None + if 'webpage' not in self._configuration_arg('skip'): + webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal) + ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage) + if not data: + if not ytcfg and self.is_authenticated: + msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.' + if 'authcheck' not in self._configuration_arg('skip') and fatal: + raise ExtractorError( + msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,' + ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check', + expected=True) + self.report_warning(msg, only_once=True) + data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client) + return data, ytcfg + + def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'): + headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client) + resolve_response = self._extract_response( + item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal, + ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client) + endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'} + for ep_key, ep in endpoints.items(): + params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict) + if params: + return self._extract_response( + item_id=item_id, query=params, ep=ep, headers=headers, + ytcfg=ytcfg, fatal=fatal, default_client=default_client, + check_get_keys=('contents', 'currentVideoEndpoint')) + err_note = 'Failed to resolve url (does the playlist exist?)' + if fatal: + raise ExtractorError(err_note, expected=True) + self.report_warning(err_note, item_id) + @staticmethod def _smuggle_data(entries, data): for entry in entries: @@ -4234,7 +4346,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): mobj = get_mobj(url) # Youtube returns incomplete data if tabname is not lower case pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel'] - if is_channel: if smuggled_data.get('is_music_url'): if item_id[:2] == 'VL': @@ -4242,12 +4353,14 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): item_id = item_id[2:] pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False elif item_id[:2] == 'MP': - # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage - item_id = self._search_regex( - r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22', - self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id), - 'playlist id') - pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False + # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist + mdata = self._extract_tab_endpoint( + 'https://music.youtube.com/channel/%s' % item_id, item_id, default_client='web_music') + murl = traverse_obj( + mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'), get_all=False, expected_type=compat_str) + if not murl: + raise ExtractorError('Failed to resolve album to playlist.') + return self.url_result(murl, ie=YoutubeTabIE.ie_key()) elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/ pre = 'https://www.youtube.com/channel/%s' % item_id @@ -4281,7 +4394,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id) self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id)) - webpage, data = self._extract_webpage(url, item_id) + data, ytcfg = self._extract_data(url, item_id) tabs = try_get( data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list) @@ -4299,11 +4412,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): pl_id = 'UU%s' % item_id[2:] pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post']) try: - pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id) - for alert_type, alert_message in self._extract_alerts(pl_data): - if alert_type == 'error': - raise ExtractorError('Youtube said: %s' % alert_message) - item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data + data, ytcfg, item_id, url = *self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True), pl_id, pl_url except ExtractorError: self.report_warning('The playlist gave error. Falling back to channel URL') else: @@ -4313,17 +4422,17 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): # YouTube sometimes provides a button to reload playlist with unavailable videos. if 'no-youtube-unavailable-videos' not in compat_opts: - data = self._reload_with_unavailable_videos(item_id, data, webpage) or data + data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data self._extract_and_report_alerts(data, only_once=True) tabs = try_get( data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list) if tabs: - return self._extract_from_tabs(item_id, webpage, data, tabs) + return self._extract_from_tabs(item_id, ytcfg, data, tabs) playlist = try_get( data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict) if playlist: - return self._extract_from_playlist(item_id, url, data, playlist, webpage) + return self._extract_from_playlist(item_id, url, data, playlist, ytcfg) video_id = try_get( data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'], From 8c6f4daa4c7c54df600bf4990bd91ca381fbd8f3 Mon Sep 17 00:00:00 2001 From: timethrow <39486242+timethrow@users.noreply.github.com> Date: Sat, 9 Oct 2021 02:08:01 +0100 Subject: [PATCH 216/641] [docs] Write embedding and contributing documentation (#528) Authored by: pukkandan, timethrow --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- CONTRIBUTING.md | 269 ++++++++++++++++++++++--------- README.md | 86 +++++++++- devscripts/make_contributing.py | 21 +-- 4 files changed, 291 insertions(+), 87 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 1bcac69dad..7ef08d68ac 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -7,7 +7,7 @@ --- ### Before submitting a *pull request* make sure you have: -- [ ] At least skimmed through [adding new extractor tutorial](https://github.com/ytdl-org/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/ytdl-org/youtube-dl#youtube-dl-coding-conventions) sections +- [ ] At least skimmed through [adding new extractor tutorial](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#adding-support-for-a-new-site) and [yt-dlp coding conventions](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#yt-dlp-coding-conventions) sections - [ ] [Searched](https://github.com/yt-dlp/yt-dlp/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests - [ ] Checked the code with [flake8](https://pypi.python.org/pypi/flake8) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 5faf97b102..7aaf6a52ba 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,26 +1,59 @@ -**Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this: +# CONTRIBUTING TO YT-DLP + +- [OPENING AN ISSUE](#opening-an-issue) + - [Is the description of the issue itself sufficient?](#is-the-description-of-the-issue-itself-sufficient) + - [Are you using the latest version?](#are-you-using-the-latest-version) + - [Is the issue already documented?](#is-the-issue-already-documented) + - [Why are existing options not enough?](#why-are-existing-options-not-enough) + - [Have you read and understood the changes, between youtube-dl and yt-dlp](#have-you-read-and-understood-the-changes-between-youtube-dl-and-yt-dlp) + - [Is there enough context in your bug report?](#is-there-enough-context-in-your-bug-report) + - [Does the issue involve one problem, and one problem only?](#does-the-issue-involve-one-problem-and-one-problem-only) + - [Is anyone going to need the feature?](#is-anyone-going-to-need-the-feature) + - [Is your question about yt-dlp?](#is-your-question-about-yt-dlp) +- [DEVELOPER INSTRUCTIONS](#developer-instructions) + - [Adding new feature or making overarching changes](#adding-new-feature-or-making-overarching-changes) + - [Adding support for a new site](#adding-support-for-a-new-site) + - [yt-dlp coding conventions](#yt-dlp-coding-conventions) + - [Mandatory and optional metafields](#mandatory-and-optional-metafields) + - [Provide fallbacks](#provide-fallbacks) + - [Regular expressions](#regular-expressions) + - [Long lines policy](#long-lines-policy) + - [Inline values](#inline-values) + - [Collapse fallbacks](#collapse-fallbacks) + - [Trailing parentheses](#trailing-parentheses) + - [Use convenience conversion and parsing functions](#use-convenience-conversion-and-parsing-functions) +- [EMBEDDING YT-DLP](README.md#embedding-yt-dlp) + + + +# OPENING AN ISSUE + +Bugs and suggestions should be reported at: [yt-dlp/yt-dlp/issues](https://github.com/yt-dlp/yt-dlp/issues). Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in our [discord server](https://discord.gg/H5MNcFW63r). + +**Please include the full output of yt-dlp when run with `-Uv`**, i.e. **add** `-Uv` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this: ``` -$ youtube-dl -v <your command line> -[debug] System config: [] -[debug] User config: [] -[debug] Command-line args: [u'-v', u'https://www.youtube.com/watch?v=BaW_jenozKc'] -[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2015.12.06 -[debug] Git HEAD: 135392e -[debug] Python version 2.6.6 - Windows-2003Server-5.2.3790-SP2 -[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 +$ yt-dlp -Uv <your command line> +[debug] Command-line config: ['-v', 'demo.com'] +[debug] Encodings: locale UTF-8, fs utf-8, out utf-8, pref UTF-8 +[debug] yt-dlp version 2021.09.25 (zip) +[debug] Python version 3.8.10 (CPython 64bit) - Linux-5.4.0-74-generic-x86_64-with-glibc2.29 +[debug] exe versions: ffmpeg 4.2.4, ffprobe 4.2.4 [debug] Proxy map: {} +Current Build Hash 25cc412d1d3c0725a1f2f5b7e4682f6fb40e6d15f7024e96f7afd572e9919535 +yt-dlp is up to date (2021.09.25) ... ``` **Do not post screenshots of verbose logs; only plain text is acceptable.** -The output (including the first lines) contains important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever. +The output (including the first lines) contains important debugging information. Issues without the full output are often not reproducible and therefore will be closed as `incomplete`. + +The templates provided for the Issues, should be completed and **not removed**, this helps aide the resolution of the issue. Please re-read your issue once again to avoid a couple of common mistakes (you can and should use this as a checklist): ### Is the description of the issue itself sufficient? -We often get issue reports that we cannot really decipher. While in most cases we eventually get the required information after asking back multiple times, this poses an unnecessary drain on our resources. Many contributors, including myself, are also not native speakers, so we may misread some parts. +We often get issue reports that we cannot really decipher. While in most cases we eventually get the required information after asking back multiple times, this poses an unnecessary drain on our resources. So please elaborate on what feature you are requesting, or what bug you want to be fixed. Make sure that it's obvious @@ -28,25 +61,31 @@ So please elaborate on what feature you are requesting, or what bug you want to - How it could be fixed - How your proposed solution would look like -If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a committer myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over. +If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. We often get frustrated by these issues, since the only possible way for us to move forward on them is to ask for clarification over and over. -For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the `-v` flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information. +For bug reports, this means that your report should contain the **complete** output of yt-dlp when called with the `-Uv` flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information. -If your server has multiple IPs or you suspect censorship, adding `--call-home` may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/). +If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--write-pages` and upload the `.dump` files you get [somewhere](https://gist.github.com). **Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `https://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `https://www.youtube.com/`) is *not* an example URL. ### Are you using the latest version? -Before reporting any issue, type `youtube-dl -U`. This should report that you're up-to-date. About 20% of the reports we receive are already fixed, but people are using outdated versions. This goes for feature requests as well. +Before reporting any issue, type `yt-dlp -U`. This should report that you're up-to-date. This goes for feature requests as well. ### Is the issue already documented? -Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/ytdl-org/youtube-dl/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity. +Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/yt-dlp/yt-dlp/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2021.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity. + +Additionally, it is also helpful to see if the issue has already been documented in the [youtube-dl issue tracker](https://github.com/ytdl-org/youtube-dl/issues). If similar issues have already been reported in youtube-dl (but not in our issue tracker), links to them can be included in your issue report here. ### Why are existing options not enough? -Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem. +Before requesting a new feature, please have a quick peek at [the list of supported options](README.md#usage-and-options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem. + +### Have you read and understood the changes, between youtube-dl and yt-dlp + +There are many changes between youtube-dl and yt-dlp [(changes to default behavior)](README.md#differences-in-default-behavior), and some of the options available have a different behaviour in yt-dlp, or have been removed all together [(list of changes to options)](README.md#deprecated-options). Make sure you have read and understand the differences in the options and how this may impact your downloads before opening an issue. ### Is there enough context in your bug report? @@ -58,23 +97,28 @@ We are then presented with a very complicated request when the original problem Some of our users seem to think there is a limit of issues they can or should open. There is no limit of issues they can or should open. While it may seem appealing to be able to dump all your issues into one ticket, that means that someone who solves one of your issues cannot mark the issue as closed. Typically, reporting a bunch of issues leads to the ticket lingering since nobody wants to attack that behemoth, until someone mercifully splits the issue into multiple ones. -In particular, every site support request issue should only pertain to services at one site (generally under a common domain, but always using the same backend technology). Do not request support for vimeo user videos, White house podcasts, and Google Plus pages in the same issue. Also, make sure that you don't post bug reports alongside feature requests. As a rule of thumb, a feature request does not include outputs of youtube-dl that are not immediately related to the feature at hand. Do not post reports of a network error alongside the request for a new video service. +In particular, every site support request issue should only pertain to services at one site (generally under a common domain, but always using the same backend technology). Do not request support for vimeo user videos, White house podcasts, and Google Plus pages in the same issue. Also, make sure that you don't post bug reports alongside feature requests. As a rule of thumb, a feature request does not include outputs of yt-dlp that are not immediately related to the feature at hand. Do not post reports of a network error alongside the request for a new video service. ### Is anyone going to need the feature? Only post features that you (or an incapacitated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them. -### Is your question about youtube-dl? +### Is your question about yt-dlp? + +Some bug reports are completely unrelated to yt-dlp and relate to a different, or even the reporter's own, application. Please make sure that you are actually using yt-dlp. If you are using a UI for yt-dlp, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for yt-dlp fails in some way you believe is related to yt-dlp, by all means, go ahead and report the bug. + +If the issue is with `youtube-dl` (the upstream fork of yt-dlp) and not with yt-dlp, the issue should be raised in the youtube-dl project. + + -It may sound strange, but some bug reports we receive are completely unrelated to youtube-dl and relate to a different, or even the reporter's own, application. Please make sure that you are actually using youtube-dl. If you are using a UI for youtube-dl, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for youtube-dl fails in some way you believe is related to youtube-dl, by all means, go ahead and report the bug. # DEVELOPER INSTRUCTIONS -Most users do not need to build youtube-dl and can [download the builds](https://ytdl-org.github.io/youtube-dl/download.html) or get them from their distribution. +Most users do not need to build yt-dlp and can [download the builds](https://github.com/yt-dlp/yt-dlp/releases) or get them via [the other installation methods](README.md#installation). -To run youtube-dl as a developer, you don't need to build anything either. Simply execute +To run yt-dlp as a developer, you don't need to build anything either. Simply execute - python -m youtube_dl + python -m yt_dlp To run the test, simply invoke your favorite test runner, or execute a test file directly; any of the following work: @@ -85,42 +129,42 @@ To run the test, simply invoke your favorite test runner, or execute a test file See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases. -If you want to create a build of youtube-dl yourself, you'll need +If you want to create a build of yt-dlp yourself, you can follow the instructions [here](README.md#compile). -* python3 -* make (only GNU make is supported) -* pandoc -* zip -* pytest -### Adding support for a new site +## Adding new feature or making overarching changes -If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](README.md#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**. +Before you start writing code for implementing a new feature, open an issue explaining your feature request and atleast one use case. This allows the maintainers to decide whether such a feature is desired for the project in the first place, and will provide an avenue to discuss some implementation details. If you open a pull request for a new feature without discussing with us first, do not be surprised when we ask for large changes to the code, or even reject it outright. + +The same applies for overarching changes to the architecture, documentation or code style + + +## Adding support for a new site + +If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](https://www.github.com/ytdl-org/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. yt-dlp does **not support** such sites thus pull requests adding support for them **will be rejected**. After you have ensured this site is distributing its content legally, you can follow this quick list (assuming your service is called `yourextractor`): -1. [Fork this repository](https://github.com/ytdl-org/youtube-dl/fork) -2. Check out the source code with: +1. [Fork this repository](https://github.com/yt-dlp/yt-dlp/fork) +1. Check out the source code with: - git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git + git clone git@github.com:YOUR_GITHUB_USERNAME/yt-dlp.git -3. Start a new git branch with +1. Start a new git branch with - cd youtube-dl + cd yt-dlp git checkout -b yourextractor -4. Start with this simple template and save it to `youtube_dl/extractor/yourextractor.py`: +1. Start with this simple template and save it to `yt_dlp/extractor/yourextractor.py`: ```python # coding: utf-8 - from __future__ import unicode_literals - from .common import InfoExtractor - - + + class YourExtractorIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)' - _TEST = { + _TESTS = [{ 'url': 'https://yourextractor.com/watch/42', 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', 'info_dict': { @@ -134,12 +178,12 @@ After you have ensured this site is distributing its content legally, you can fo # * A regular expression; start the string with re: # * Any Python type (for example int or float) } - } + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - + # TODO more code goes here, for example ... title = self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title') @@ -148,45 +192,48 @@ After you have ensured this site is distributing its content legally, you can fo 'title': title, 'description': self._og_search_description(webpage), 'uploader': self._search_regex(r'<div[^>]+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False), - # TODO more properties (see youtube_dl/extractor/common.py) + # TODO more properties (see yt_dlp/extractor/common.py) } ``` -5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). -6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. -7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want. -8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart): +1. Add an import in [`yt_dlp/extractor/extractors.py`](yt_dlp/extractor/extractors.py). +1. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, the tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. You can also run all the tests in one go with `TestDownload.test_YourExtractor_all` +1. Make sure you have atleast one test for your extractor. Even if all videos covered by the extractor are expected to be inaccessible for automated testing, tests should still be added with a `skip` parameter indicating why the purticular test is disabled from running. +1. Have a look at [`yt_dlp/extractor/common.py`](yt_dlp/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](yt_dlp/extractor/common.py#L91-L426). Add tests and code for as many as you want. +1. Make sure your code follows [yt-dlp coding conventions](#yt-dlp-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart): - $ flake8 youtube_dl/extractor/yourextractor.py + $ flake8 yt_dlp/extractor/yourextractor.py -9. Make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+. -10. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: +1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython and PyPy for Python 3.6 and above. Backward compatability is not required for even older versions of Python. +1. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files, [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: - $ git add youtube_dl/extractor/extractors.py - $ git add youtube_dl/extractor/yourextractor.py - $ git commit -m '[yourextractor] Add new extractor' + $ git add yt_dlp/extractor/extractors.py + $ git add yt_dlp/extractor/yourextractor.py + $ git commit -m '[yourextractor] Add extractor' $ git push origin yourextractor -11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it. +1. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it. In any case, thank you very much for your contributions! -## youtube-dl coding conventions + +## yt-dlp coding conventions This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code. -Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hosters out of your control and this layout tends to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize dependency on the source's layout and even to make the code foresee potential future changes and be ready for that. This is important because it will allow the extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with a fix incorporated, all the previous versions become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say, some non rolling release distros may never receive an update at all. +Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hosters out of your control and this layout tends to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize dependency on the source's layout and even to make the code foresee potential future changes and be ready for that. This is important because it will allow the extractor not to break on minor layout changes thus keeping old yt-dlp versions working. Even though this breakage issue may be easily fixed by a new version of yt-dlp, this could take some time, during which the the extractor will remain broken. + ### Mandatory and optional metafields -For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl: +For extraction to work yt-dlp relies on metadata your extractor extracts and provides to yt-dlp expressed by an [information dictionary](yt_dlp/extractor/common.py#L91-L426) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by yt-dlp: - `id` (media identifier) - `title` (media title) - `url` (media download URL) or `formats` -In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken. +The aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken. While, in fact, only `id` is technically mandatory, due to compatability reasons, yt-dlp also treats `title` as mandatory. The extractor is allowed to return the info dict without url or formats in some special cases if it allows the user to extract usefull information with `--ignore-no-formats-error` - Eg: when the video is a live stream that has not started yet. -[Any field](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L188-L303) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. +[Any field](yt_dlp/extractor/common.py#219-L426) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. #### Example @@ -200,8 +247,10 @@ Assume at this point `meta`'s layout is: ```python { - ... "summary": "some fancy summary text", + "user": { + "name": "uploader name" + }, ... } ``` @@ -220,6 +269,30 @@ description = meta['summary'] # incorrect The latter will break extraction process with `KeyError` if `summary` disappears from `meta` at some later time but with the former approach extraction will just go ahead with `description` set to `None` which is perfectly fine (remember `None` is equivalent to the absence of data). + +If the data is nested, do not use `.get` chains, but instead make use of the utility functions `try_get` or `traverse_obj` + +Considering the above `meta` again, assume you want to extract `["user"]["name"]` and put it in the resulting info dict as `uploader` + +```python +uploader = try_get(meta, lambda x: x['user']['name']) # correct +``` +or +```python +uploader = traverse_obj(meta, ('user', 'name')) # correct +``` + +and not like: + +```python +uploader = meta['user']['name'] # incorrect +``` +or +```python +uploader = meta.get('user', {}).get('name') # incorrect +``` + + Similarly, you should pass `fatal=False` when extracting optional data from a webpage with `_search_regex`, `_html_search_regex` or similar methods, for instance: ```python @@ -239,11 +312,36 @@ description = self._search_regex( ``` On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that may or may not be present. - + + +Another thing to remember is not to try to iterate over `None` + +Say you extracted a list of thumbnails into `thumbnail_data` using `try_get` and now want to iterate over them + +```python +thumbnail_data = try_get(...) +thumbnails = [{ + 'url': item['url'] +} for item in thumbnail_data or []] # correct +``` + +and not like: + +```python +thumbnail_data = try_get(...) +thumbnails = [{ + 'url': item['url'] +} for item in thumbnail_data] # incorrect +``` + +In the later case, `thumbnail_data` will be `None` if the field was not found and this will cause the loop `for item in thumbnail_data` to raise a fatal error. Using `for item in thumbnail_data or []` avoids this error and results in setting an empty list in `thumbnails` instead. + + ### Provide fallbacks When extracting metadata try to do so from multiple sources. For example if `title` is present in several places, try extracting from at least some of them. This makes it more future-proof in case some of the sources become unavailable. + #### Example Say `meta` from the previous example has a `title` and you are about to extract it. Since `title` is a mandatory meta field you should end up with something like: @@ -262,6 +360,7 @@ title = meta.get('title') or self._og_search_title(webpage) This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`. + ### Regular expressions #### Don't capture groups you don't use @@ -283,11 +382,10 @@ Incorrect: r'(id|ID)=(?P<id>\d+)' ``` - #### Make regular expressions relaxed and flexible When using regular expressions try to write them fuzzy, relaxed and flexible, skipping insignificant parts that are more likely to change, allowing both single and double quotes for quoted values and so on. - + ##### Example Say you need to extract `title` from the following HTML code: @@ -299,14 +397,14 @@ Say you need to extract `title` from the following HTML code: The code for that task should look similar to: ```python -title = self._search_regex( +title = self._search_regex( # correct r'<span[^>]+class="title"[^>]*>([^<]+)', webpage, 'title') ``` Or even better: ```python -title = self._search_regex( +title = self._search_regex( # correct r'<span[^>]+class=(["\'])title\1[^>]*>(?P<title>[^<]+)', webpage, 'title', group='title') ``` @@ -316,14 +414,25 @@ Note how you tolerate potential changes in the `style` attribute's value or swit The code definitely should not look like: ```python -title = self._search_regex( +title = self._search_regex( # incorrect r'<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">(.*?)</span>', webpage, 'title', group='title') ``` +or even + +```python +title = self._search_regex( # incorrect + r'<span style=".*?" class="title">(.*?)</span>', + webpage, 'title', group='title') +``` + +Here the presence or absence of other attributes including `style` is irrelevent for the data we need, and so the regex must not depend on it + + ### Long lines policy -There is a soft limit to keep lines of code under 80 characters long. This means it should be respected if possible and if it does not make readability and code maintenance worse. +There is a soft limit to keep lines of code under 100 characters long. This means it should be respected if possible and if it does not make readability and code maintenance worse. Sometimes, it may be reasonable to go upto 120 characters and sometimes even 80 can be unreadable. Keep in mind that this is not a hard limit and is just one of many tools to make the code more readable For example, you should **never** split long string literals like URLs or some other often copied entities over multiple lines to fit this limit: @@ -360,6 +469,7 @@ TITLE_RE = r'<title>([^<]+)' title = self._html_search_regex(TITLE_RE, webpage, 'title') ``` + ### Collapse fallbacks Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of patterns. @@ -385,10 +495,13 @@ description = ( Methods supporting list of patterns are: `_search_regex`, `_html_search_regex`, `_og_search_property`, `_html_search_meta`. + ### Trailing parentheses Always move trailing parentheses after the last argument. +Note that this *does not* apply to braces `}` or square brackets `]` both of which should closed be in a new line + #### Example Correct: @@ -406,30 +519,36 @@ Incorrect: ) ``` + ### Use convenience conversion and parsing functions -Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. +Wrap all extracted numeric data into safe functions from [`yt_dlp/utils.py`](yt_dlp/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. Use `url_or_none` for safe URL processing. -Use `try_get` for safe metadata extraction from parsed JSON. +Use `try_get`, `dict_get` and `traverse_obj` for safe metadata extraction from parsed JSON. Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction. -Explore [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions. +Explore [`yt_dlp/utils.py`](yt_dlp/utils.py) for more useful convenience functions. #### More examples ##### Safely extract optional description from parsed JSON ```python -description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str) +description = traverse_obj(response, ('result', 'video', 'summary'), expected_type=str) ``` ##### Safely extract more optional metadata ```python -video = try_get(response, lambda x: x['result']['video'][0], dict) or {} +video = traverse_obj(response, ('result', 'video', 0), default={}, expected_type=dict) description = video.get('summary') duration = float_or_none(video.get('durationMs'), scale=1000) view_count = int_or_none(video.get('views')) ``` + + + +# EMBEDDING YT-DLP +See [README.md#embedding-yt-dlp](README.md#embedding-yt-dlp) for instructions on how to embed yt-dlp in another Python program diff --git a/README.md b/README.md index ff117663af..f98fe98b6e 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,11 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t * [Modifying metadata examples](#modifying-metadata-examples) * [EXTRACTOR ARGUMENTS](#extractor-arguments) * [PLUGINS](#plugins) +* [EMBEDDING YT-DLP](#embedding-yt-dlp) * [DEPRECATED OPTIONS](#deprecated-options) +* [CONTRIBUTING](CONTRIBUTING.md#contributing-to-yt-dlp) + * [Opening an Issue](CONTRIBUTING.md#opening-an-issue) + * [Developer Instructions](CONTRIBUTING.md#developer-instructions) * [MORE](#more) @@ -1510,6 +1514,84 @@ Note that **all** plugins are imported even if not invoked, and that **there are If you are a plugin author, add [ytdlp-plugins](https://github.com/topics/ytdlp-plugins) as a topic to your repository for discoverability + +# EMBEDDING YT-DLP + +yt-dlp makes the best effort to be a good command-line program, and thus should be callable from any programming language. + +Your program should avoid parsing the normal stdout since they may change in future versions. Instead they should use options such as `-J`, `--print`, `--progress-template`, `--exec` etc to create console output that you can reliably reproduce and parse. + +From a Python program, you can embed yt-dlp in a more powerful fashion, like this: + +```python +import yt_dlp + +ydl_opts = {} +with yt_dlp.YoutubeDL(ydl_opts) as ydl: + ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc']) +``` + +Most likely, you'll want to use various options. For a list of options available, have a look at [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py#L154-L452). + +Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), converts the video to an mp3 file, implements a custom postprocessor and prints the final info_dict as json: + +```python +import json + +import yt_dlp +from yt_dlp.postprocessor.common import PostProcessor + + +class MyLogger: + def debug(self, msg): + # For compatability with youtube-dl, both debug and info are passed into debug + # You can distinguish them by the prefix '[debug] ' + if msg.startswith('[debug] '): + pass + else: + self.info(msg) + + def info(self, msg): + pass + + def warning(self, msg): + pass + + def error(self, msg): + print(msg) + + +class MyCustomPP(PostProcessor): + def run(self, info): + self.to_screen('Doing stuff') + return [], info + + +def my_hook(d): + if d['status'] == 'finished': + print('Done downloading, now converting ...') + + +ydl_opts = { + 'format': 'bestaudio/best', + 'postprocessors': [{ + 'key': 'FFmpegExtractAudio', + 'preferredcodec': 'mp3', + 'preferredquality': '192', + }], + 'logger': MyLogger(), + 'progress_hooks': [my_hook], +} + +with yt_dlp.YoutubeDL(ydl_opts) as ydl: + ydl.add_post_processor(MyCustomPP()) + info = ydl.extract_info('https://www.youtube.com/watch?v=BaW_jenozKc') + print(json.dumps(ydl.sanitize_info(info))) +``` + +See the public functions in [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py) for other available functions. Eg: `ydl.download`, `ydl.download_with_info_file` + + # DEPRECATED OPTIONS These are all the deprecated options and the current alternative to achieve the same effect @@ -1611,6 +1693,8 @@ These options were deprecated since 2014 and have now been entirely removed -t, --title -o "%(title)s-%(id)s.%(ext)s" -l, --literal -o accepts literal names +# CONTRIBUTING +See [CONTRIBUTING.md](CONTRIBUTING.md#contributing-to-yt-dlp) for instructions on [Opening an Issue](CONTRIBUTING.md#opening-an-issue) and [Contributing code to the project](CONTRIBUTING.md#developer-instructions) # MORE -For FAQ, Developer Instructions etc., see the [original README](https://github.com/ytdl-org/youtube-dl#faq) +For FAQ see the [youtube-dl README](https://github.com/ytdl-org/youtube-dl#faq) diff --git a/devscripts/make_contributing.py b/devscripts/make_contributing.py index c7f3eef761..6b1b8219c4 100755 --- a/devscripts/make_contributing.py +++ b/devscripts/make_contributing.py @@ -1,33 +1,34 @@ #!/usr/bin/env python3 from __future__ import unicode_literals -# import io +import io import optparse -# import re +import re def main(): + return # This is unused in yt-dlp + parser = optparse.OptionParser(usage='%prog INFILE OUTFILE') options, args = parser.parse_args() if len(args) != 2: parser.error('Expected an input and an output filename') - -""" infile, outfile = args + infile, outfile = args with io.open(infile, encoding='utf-8') as inf: readme = inf.read() - bug_text = re.search( """ -# r'(?s)#\s*BUGS\s*[^\n]*\s*(.*?)#\s*COPYRIGHT', readme).group(1) -# dev_text = re.search( -# r'(?s)(#\s*DEVELOPER INSTRUCTIONS.*?)#\s*EMBEDDING yt-dlp', -""" readme).group(1) + bug_text = re.search( + r'(?s)#\s*BUGS\s*[^\n]*\s*(.*?)#\s*COPYRIGHT', readme).group(1) + dev_text = re.search( + r'(?s)(#\s*DEVELOPER INSTRUCTIONS.*?)#\s*EMBEDDING yt-dlp', readme).group(1) out = bug_text + dev_text with io.open(outfile, 'w', encoding='utf-8') as outf: - outf.write(out) """ + outf.write(out) + if __name__ == '__main__': main() From d6124e191e17f03fd48acf78db536400607c49d1 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Sat, 9 Oct 2021 02:04:02 +0000 Subject: [PATCH 217/641] [bilibili] Fix bug in efc947fb3eea38eeae257980e663de806f1e19d0 Authored by: u-spec-png --- yt_dlp/extractor/bilibili.py | 1 - 1 file changed, 1 deletion(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index daa224b17f..a1be7e04b3 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -800,7 +800,6 @@ class BiliIntlBaseIE(InfoExtractor): sub_data = self._download_json(sub_url, ep_id, fatal=False) if not sub_data: continue - sub_data = self._parse_json(sub_data) subtitles.setdefault(sub.get('key', 'en'), []).append({ 'ext': 'srt', 'data': self.json2srt(sub_data) From f2cad2e496843889274b79deb3f7f6e1c8c3f948 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 8 Oct 2021 20:37:24 +0530 Subject: [PATCH 218/641] [Hidive] Fix subtitles broken by 705e7c2005dfe67a905e18736c9f6345ee9d386b --- yt_dlp/extractor/hidive.py | 56 +++++++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 19 deletions(-) diff --git a/yt_dlp/extractor/hidive.py b/yt_dlp/extractor/hidive.py index 909d1fbc10..18ae4d3792 100644 --- a/yt_dlp/extractor/hidive.py +++ b/yt_dlp/extractor/hidive.py @@ -1,5 +1,5 @@ # coding: utf-8 -from __future__ import unicode_literals +import re from .common import InfoExtractor from ..utils import ( @@ -52,15 +52,39 @@ class HiDiveIE(InfoExtractor): self._download_webpage( self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(data)) + def _call_api(self, video_id, title, key, data={}, **kwargs): + data = { + **data, + 'Title': title, + 'Key': key, + 'PlayerId': 'f4f895ce1ca713ba263b91caeb1daa2d08904783', + } + return self._download_json( + 'https://www.hidive.com/play/settings', video_id, + data=urlencode_postdata(data), **kwargs) or {} + + def _extract_subtitles_from_rendition(self, rendition, subtitles, parsed_urls): + for cc_file in rendition.get('ccFiles', []): + cc_url = url_or_none(try_get(cc_file, lambda x: x[2])) + # name is used since we cant distinguish subs with same language code + cc_lang = try_get(cc_file, (lambda x: x[1].replace(' ', '-').lower(), lambda x: x[0]), str) + if cc_url not in parsed_urls and cc_lang: + parsed_urls.add(cc_url) + subtitles.setdefault(cc_lang, []).append({'url': cc_url}) + + def _get_subtitles(self, url, video_id, title, key, subtitles, parsed_urls): + webpage = self._download_webpage(url, video_id, fatal=False) or '' + for caption in set(re.findall(r'data-captions=\"([^\"]+)\"', webpage)): + renditions = self._call_api( + video_id, title, key, {'Captions': caption}, fatal=False, + note=f'Downloading {caption} subtitle information').get('renditions') or {} + for rendition_id, rendition in renditions.items(): + self._extract_subtitles_from_rendition(rendition, subtitles, parsed_urls) + return subtitles + def _real_extract(self, url): video_id, title, key = self._match_valid_url(url).group('id', 'title', 'key') - settings = self._download_json( - 'https://www.hidive.com/play/settings', video_id, - data=urlencode_postdata({ - 'Title': title, - 'Key': key, - 'PlayerId': 'f4f895ce1ca713ba263b91caeb1daa2d08904783', - })) + settings = self._call_api(video_id, title, key) restriction = settings.get('restrictionReason') if restriction == 'RegionRestricted': @@ -69,12 +93,12 @@ class HiDiveIE(InfoExtractor): raise ExtractorError( '%s said: %s' % (self.IE_NAME, restriction), expected=True) - formats, subtitles, urls = [], {}, {None} + formats, subtitles, parsed_urls = [], {}, {None} for rendition_id, rendition in settings['renditions'].items(): audio, version, extra = rendition_id.split('_') m3u8_url = url_or_none(try_get(rendition, lambda x: x['bitrates']['hls'])) - if m3u8_url not in urls: - urls.add(m3u8_url) + if m3u8_url not in parsed_urls: + parsed_urls.add(m3u8_url) frmt = self._extract_m3u8_formats( m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id=rendition_id, fatal=False) for f in frmt: @@ -82,19 +106,13 @@ class HiDiveIE(InfoExtractor): f['format_note'] = f'{version}, {extra}' formats.extend(frmt) - for cc_file in rendition.get('ccFiles', []): - cc_url = url_or_none(try_get(cc_file, lambda x: x[2])) - # name is used since we cant distinguish subs with same language code - cc_lang = try_get(cc_file, (lambda x: x[1].replace(' ', '-').lower(), lambda x: x[0]), str) - if cc_url not in urls and cc_lang: - urls.add(cc_url) - subtitles.setdefault(cc_lang, []).append({'url': cc_url}) + self._extract_subtitles_from_rendition(rendition, subtitles, parsed_urls) self._sort_formats(formats) return { 'id': video_id, 'title': video_id, - 'subtitles': subtitles, + 'subtitles': self.extract_subtitles(url, video_id, title, key, subtitles, parsed_urls), 'formats': formats, 'series': title, 'season_number': int_or_none( From b922db9fe58f73aacd5dab4fe5ba1001d803a798 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 9 Oct 2021 02:06:30 +0530 Subject: [PATCH 219/641] [http] Respect user-provided chunk size over extractor's --- yt_dlp/downloader/http.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index 9e79051ada..5d7c988c71 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -48,8 +48,9 @@ class HttpFD(FileDownloader): is_test = self.params.get('test', False) chunk_size = self._TEST_FILE_SIZE if is_test else ( + self.params.get('http_chunk_size') or info_dict.get('downloader_options', {}).get('http_chunk_size') - or self.params.get('http_chunk_size') or 0) + or 0) ctx.open_mode = 'wb' ctx.resume_len = 0 From 2614f64600f9249682897786f5345a61d98dafeb Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 9 Oct 2021 08:14:41 +0530 Subject: [PATCH 220/641] [utils] Let traverse_obj accept functions as keys --- yt_dlp/utils.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 0273878974..db9b9de948 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -6335,7 +6335,9 @@ def traverse_obj( ''' Traverse nested list/dict/tuple @param path_list A list of paths which are checked one by one. Each path is a list of keys where each key is a string, - a tuple of strings or "...". When a tuple is given, + a function, a tuple of strings or "...". + When a fuction is given, it takes the key as argument and + returns whether the key matches or not. When a tuple is given, all the keys given in the tuple are traversed, and "..." traverses all the keys in the object @param default Default value to return @@ -6368,6 +6370,18 @@ def traverse_obj( _current_depth += 1 depth = max(depth, _current_depth) return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj] + elif callable(key): + if isinstance(obj, (list, tuple, LazyList)): + obj = enumerate(obj) + elif isinstance(obj, dict): + obj = obj.items() + else: + if not traverse_string: + return None + obj = str(obj) + _current_depth += 1 + depth = max(depth, _current_depth) + return [_traverse_obj(v, path[i + 1:], _current_depth) for k, v in obj if key(k)] elif isinstance(obj, dict) and not (is_user_input and key == ':'): obj = (obj.get(key) if casesense or (key in obj) else next((v for k, v in obj.items() if _lower(k) == key), None)) From 8cd69fc40786d081b5523f9dc20861c130a2843d Mon Sep 17 00:00:00 2001 From: Jules-A Date: Sat, 9 Oct 2021 23:21:41 +0800 Subject: [PATCH 221/641] [Funimation] Fix for /v/ urls (#1196) Closes #993 Authored by: pukkandan, Jules-A --- yt_dlp/extractor/funimation.py | 133 ++++++++++++++++++--------------- 1 file changed, 72 insertions(+), 61 deletions(-) diff --git a/yt_dlp/extractor/funimation.py b/yt_dlp/extractor/funimation.py index ede53b326e..382cbe159c 100644 --- a/yt_dlp/extractor/funimation.py +++ b/yt_dlp/extractor/funimation.py @@ -2,26 +2,61 @@ from __future__ import unicode_literals import random +import re import string from .common import InfoExtractor from ..compat import compat_HTTPError from ..utils import ( determine_ext, - dict_get, int_or_none, js_to_json, - str_or_none, - try_get, + orderedSet, qualities, + str_or_none, + traverse_obj, + try_get, urlencode_postdata, ExtractorError, ) -class FunimationPageIE(InfoExtractor): +class FunimationBaseIE(InfoExtractor): + _NETRC_MACHINE = 'funimation' + _REGION = None + _TOKEN = None + + def _get_region(self): + region_cookie = self._get_cookies('https://www.funimation.com').get('region') + region = region_cookie.value if region_cookie else self.get_param('geo_bypass_country') + return region or traverse_obj( + self._download_json( + 'https://geo-service.prd.funimationsvc.com/geo/v1/region/check', None, fatal=False, + note='Checking geo-location', errnote='Unable to fetch geo-location information'), + 'region') or 'US' + + def _login(self): + username, password = self._get_login_info() + if username is None: + return + try: + data = self._download_json( + 'https://prod-api-funimationnow.dadcdigital.com/api/auth/login/', + None, 'Logging in', data=urlencode_postdata({ + 'username': username, + 'password': password, + })) + return data['token'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + error = self._parse_json(e.cause.read().decode(), None)['error'] + raise ExtractorError(error, expected=True) + raise + + +class FunimationPageIE(FunimationBaseIE): IE_NAME = 'funimation:page' - _VALID_URL = r'(?Phttps?://(?:www\.)?funimation(?:\.com|now\.uk))/(?P[^/]+/)?(?Pshows/(?P[^/]+/[^/?#&]+).*$)' + _VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/(?:(?P[^/]+)/)?(?:shows|v)/(?P[^/]+)/(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://www.funimation.com/shows/attack-on-titan-junior-high/broadcast-dub-preview/', @@ -46,38 +81,34 @@ class FunimationPageIE(InfoExtractor): }, { 'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/', 'only_matching': True, + }, { + 'url': 'https://www.funimation.com/v/a-certain-scientific-railgun/super-powered-level-5', + 'only_matching': True, }] + def _real_initialize(self): + if not self._REGION: + FunimationBaseIE._REGION = self._get_region() + if not self._TOKEN: + FunimationBaseIE._TOKEN = self._login() + def _real_extract(self, url): - mobj = self._match_valid_url(url) - display_id = mobj.group('id').replace('/', '_') - if not mobj.group('lang'): - url = '%s/en/%s' % (mobj.group('origin'), mobj.group('path')) + locale, show, episode = self._match_valid_url(url).group('lang', 'show', 'episode') - webpage = self._download_webpage(url, display_id) - title_data = self._parse_json(self._search_regex( - r'TITLE_DATA\s*=\s*({[^}]+})', - webpage, 'title data', default=''), - display_id, js_to_json, fatal=False) or {} + video_id = traverse_obj(self._download_json( + f'https://title-api.prd.funimationsvc.com/v1/shows/{show}/episodes/{episode}', + f'{show}_{episode}', query={ + 'deviceType': 'web', + 'region': self._REGION, + 'locale': locale or 'en' + }), ('videoList', ..., 'id'), get_all=False) - video_id = ( - title_data.get('id') - or self._search_regex( - (r"KANE_customdimensions.videoID\s*=\s*'(\d+)';", r']+src="/player/(\d+)'), - webpage, 'video_id', default=None) - or self._search_regex( - r'/player/(\d+)', - self._html_search_meta(['al:web:url', 'og:video:url', 'og:video:secure_url'], webpage, fatal=True), - 'video id')) return self.url_result(f'https://www.funimation.com/player/{video_id}', FunimationIE.ie_key(), video_id) -class FunimationIE(InfoExtractor): +class FunimationIE(FunimationBaseIE): _VALID_URL = r'https?://(?:www\.)?funimation\.com/player/(?P\d+)' - _NETRC_MACHINE = 'funimation' - _TOKEN = None - _TESTS = [{ 'url': 'https://www.funimation.com/player/210051', 'info_dict': { @@ -93,7 +124,7 @@ class FunimationIE(InfoExtractor): 'season_number': 99, 'series': 'Attack on Titan: Junior High', 'description': '', - 'duration': 154, + 'duration': 155, }, 'params': { 'skip_download': 'm3u8', @@ -114,7 +145,7 @@ class FunimationIE(InfoExtractor): 'season_number': 99, 'series': 'Attack on Titan: Junior High', 'description': '', - 'duration': 154, + 'duration': 155, }, 'params': { 'skip_download': 'm3u8', @@ -122,26 +153,9 @@ class FunimationIE(InfoExtractor): }, }] - def _login(self): - username, password = self._get_login_info() - if username is None: - return - try: - data = self._download_json( - 'https://prod-api-funimationnow.dadcdigital.com/api/auth/login/', - None, 'Logging in', data=urlencode_postdata({ - 'username': username, - 'password': password, - })) - self._TOKEN = data['token'] - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: - error = self._parse_json(e.cause.read().decode(), None)['error'] - raise ExtractorError(error, expected=True) - raise - def _real_initialize(self): - self._login() + if not self._TOKEN: + FunimationBaseIE._TOKEN = self._login() @staticmethod def _get_experiences(episode): @@ -283,7 +297,7 @@ class FunimationIE(InfoExtractor): return subtitles -class FunimationShowIE(FunimationIE): +class FunimationShowIE(FunimationBaseIE): IE_NAME = 'funimation:show' _VALID_URL = r'(?Phttps?://(?:www\.)?funimation(?:\.com|now\.uk)/(?P[^/]+)?/?shows/(?P[^/?#&]+))/?(?:[?#]|$)' @@ -311,31 +325,28 @@ class FunimationShowIE(FunimationIE): }] def _real_initialize(self): - region = self._get_cookies('https://www.funimation.com').get('region') - self._region = region.value if region else try_get( - self._download_json( - 'https://geo-service.prd.funimationsvc.com/geo/v1/region/check', None, fatal=False, - note='Checking geo-location', errnote='Unable to fetch geo-location information'), - lambda x: x['region']) or 'US' + if not self._REGION: + FunimationBaseIE._REGION = self._get_region() def _real_extract(self, url): base_url, locale, display_id = self._match_valid_url(url).groups() show_info = self._download_json( 'https://title-api.prd.funimationsvc.com/v2/shows/%s?region=%s&deviceType=web&locale=%s' - % (display_id, self._region, locale or 'en'), display_id) - items = self._download_json( + % (display_id, self._REGION, locale or 'en'), display_id) + items_info = self._download_json( 'https://prod-api-funimationnow.dadcdigital.com/api/funimation/episodes/?limit=99999&title_id=%s' - % show_info.get('id'), display_id).get('items') - vod_items = map(lambda k: dict_get(k, ('mostRecentSvod', 'mostRecentAvod')).get('item'), items) + % show_info.get('id'), display_id) + + vod_items = traverse_obj(items_info, ('items', ..., re.compile('(?i)mostRecent[AS]vod').match, 'item')) return { '_type': 'playlist', 'id': show_info['id'], 'title': show_info['name'], - 'entries': [ + 'entries': orderedSet( self.url_result( '%s/%s' % (base_url, vod_item.get('episodeSlug')), FunimationPageIE.ie_key(), vod_item.get('episodeId'), vod_item.get('episodeName')) - for vod_item in sorted(vod_items, key=lambda x: x.get('episodeOrder'))], + for vod_item in sorted(vod_items, key=lambda x: x.get('episodeOrder', -1))), } From 4e3b637d5be70b92ee511743405f3c907fed20f6 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 9 Oct 2021 21:48:46 +0530 Subject: [PATCH 222/641] Merge webm formats into mkv if thumbnails are to be embedded This was originally implemented in 4d971a16b831a45147b6ae7ce53b3e105d204da7 (#173) by @damianoamatruda but was reverted in 3b297919e046082cc4ab26ecb959d9f4f584102b since it was unintentionally being triggered for `write_thumbnail` (See #500) --- yt_dlp/YoutubeDL.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 1d865161af..398fb67af1 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -137,6 +137,7 @@ from .downloader import ( from .downloader.rtmp import rtmpdump_version from .postprocessor import ( get_postprocessor, + EmbedThumbnailPP, FFmpegFixupDurationPP, FFmpegFixupM3u8PP, FFmpegFixupM4aPP, @@ -2696,10 +2697,19 @@ class YoutubeDL(object): requested_formats = info_dict['requested_formats'] old_ext = info_dict['ext'] - if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats): - info_dict['ext'] = 'mkv' - self.report_warning( - 'Requested formats are incompatible for merge and will be merged into mkv.') + if self.params.get('merge_output_format') is None: + if not compatible_formats(requested_formats): + info_dict['ext'] = 'mkv' + self.report_warning( + 'Requested formats are incompatible for merge and will be merged into mkv') + if (info_dict['ext'] == 'webm' + and info_dict.get('thumbnails') + # check with type instead of pp_key, __name__, or isinstance + # since we dont want any custom PPs to trigger this + and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): + info_dict['ext'] = 'mkv' + self.report_warning( + 'webm doesn\'t support embedding a thumbnail, mkv will be used') new_ext = info_dict['ext'] def correct_ext(filename, ext=new_ext): From b5ae35ee6d3f913898770b8c74ee5f5e5cc33560 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 9 Oct 2021 05:53:15 +0530 Subject: [PATCH 223/641] [cleanup] Misc cleanup --- .github/ISSUE_TEMPLATE/1_broken_site.md | 15 ++++--- .../ISSUE_TEMPLATE/2_site_support_request.md | 15 ++++--- .../ISSUE_TEMPLATE/3_site_feature_request.md | 13 +++--- .github/ISSUE_TEMPLATE/4_bug_report.md | 17 ++++---- .github/ISSUE_TEMPLATE/5_feature_request.md | 11 +++-- .github/ISSUE_TEMPLATE/6_question.md | 11 +++-- .github/ISSUE_TEMPLATE_tmpl/1_broken_site.md | 15 ++++--- .../2_site_support_request.md | 15 ++++--- .../3_site_feature_request.md | 13 +++--- .github/ISSUE_TEMPLATE_tmpl/4_bug_report.md | 17 ++++---- .../ISSUE_TEMPLATE_tmpl/5_feature_request.md | 11 +++-- .github/PULL_REQUEST_TEMPLATE.md | 2 +- README.md | 5 ++- setup.py | 2 +- test/helper.py | 6 +-- yt_dlp/YoutubeDL.py | 29 ++++++++----- yt_dlp/__init__.py | 4 -- yt_dlp/downloader/http.py | 4 +- yt_dlp/extractor/common.py | 42 +++++++------------ yt_dlp/extractor/hidive.py | 6 +-- yt_dlp/extractor/minoto.py | 2 +- yt_dlp/extractor/palcomp3.py | 2 +- yt_dlp/minicurses.py | 2 + yt_dlp/options.py | 17 +------- ytdlp_plugins/extractor/sample.py | 2 +- 25 files changed, 142 insertions(+), 136 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index 53ca71219c..8a55035103 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -1,8 +1,8 @@ --- name: Broken site support about: Report broken or misfunctioning site -title: "[Broken]" -labels: Broken +title: "[Broken] Website Name: A short description of the issue" +labels: ['triage', 'extractor-bug'] assignees: '' --- @@ -21,11 +21,12 @@ assignees: '' - [ ] I'm reporting a broken site support @@ -33,6 +34,8 @@ Carefully read and work through this check list in order to prevent the most com - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones +- [ ] I've read the opening an issue section in CONTRIBUTING.md +- [ ] I have given an appropriate title to the issue ## Verbose log diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md index a9e2a9c532..7f58fc8a72 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -1,8 +1,8 @@ --- name: Site support request about: Request support for a new site -title: "[Site Request]" -labels: Request +title: "[Site Request] Website Name" +labels: ['triage', 'site-request'] assignees: '' --- @@ -21,11 +21,12 @@ assignees: '' - [ ] I'm reporting a new site support request @@ -34,6 +35,8 @@ Carefully read and work through this check list in order to prevent the most com - [ ] I've checked that none of provided URLs violate any copyrights - [ ] The provided URLs do not contain any DRM to the best of my knowledge - [ ] I've searched the bugtracker for similar site support requests including closed ones +- [ ] I've read the opening an issue section in CONTRIBUTING.md +- [ ] I have given an appropriate title to the issue ## Example URLs diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 6cd8b8ba06..38b38c803b 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -1,8 +1,8 @@ --- name: Site feature request about: Request a new functionality for a site -title: "[Site Request]" -labels: Request +title: "[Site Feature] Website Name: A short description of the feature" +labels: ['triage', 'site-enhancement'] assignees: '' --- @@ -21,14 +21,17 @@ assignees: '' - [ ] I'm reporting a site feature request - [ ] I've verified that I'm running yt-dlp version **2021.09.25** - [ ] I've searched the bugtracker for similar site feature requests including closed ones +- [ ] I've read the opening an issue section in CONTRIBUTING.md +- [ ] I have given an appropriate title to the issue ## Description diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index a302daab63..b2f7efcdab 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -1,8 +1,8 @@ --- name: Bug report about: Report a bug unrelated to any particular site or extractor -title: '' -labels: '' +title: '[Bug] A short description of the issue' +labels: ['triage', 'bug'] assignees: '' --- @@ -21,12 +21,12 @@ assignees: '' - [ ] I'm reporting a bug unrelated to a specific site @@ -35,7 +35,8 @@ Carefully read and work through this check list in order to prevent the most com - [ ] The provided URLs do not contain any DRM to the best of my knowledge - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones -- [ ] I've read bugs section in FAQ +- [ ] I've read the opening an issue section in CONTRIBUTING.md +- [ ] I have given an appropriate title to the issue ## Verbose log diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md index c40a5ad35d..4aad8ab188 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -1,8 +1,8 @@ --- name: Feature request about: Request a new functionality unrelated to any particular site or extractor -title: "[Feature Request]" -labels: Request +title: "[Feature Request] A short description of your feature" +labels: ['triage', 'enhancement'] assignees: '' --- @@ -21,14 +21,17 @@ assignees: '' - [ ] I'm reporting a feature request - [ ] I've verified that I'm running yt-dlp version **2021.09.25** - [ ] I've searched the bugtracker for similar feature requests including closed ones +- [ ] I've read the opening an issue section in CONTRIBUTING.md +- [ ] I have given an appropriate title to the issue ## Description diff --git a/.github/ISSUE_TEMPLATE/6_question.md b/.github/ISSUE_TEMPLATE/6_question.md index 9f052090a1..5ab17802a7 100644 --- a/.github/ISSUE_TEMPLATE/6_question.md +++ b/.github/ISSUE_TEMPLATE/6_question.md @@ -1,7 +1,7 @@ --- name: Ask question about: Ask yt-dlp related question -title: "[Question]" +title: "[Question] A short description of your question" labels: question assignees: '' @@ -21,14 +21,17 @@ assignees: '' - [ ] I'm asking a question -- [ ] I've looked through the README and FAQ for similar questions +- [ ] I've looked through the README +- [ ] I've read the opening an issue section in CONTRIBUTING.md - [ ] I've searched the bugtracker for similar questions including closed ones +- [ ] I have given an appropriate title to the issue ## Question diff --git a/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md b/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md index 6da13a7b50..9ee0022964 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md @@ -1,8 +1,8 @@ --- name: Broken site support about: Report broken or misfunctioning site -title: "[Broken]" -labels: Broken +title: "[Broken] Website Name: A short description of the issue" +labels: ['triage', 'extractor-bug'] assignees: '' --- @@ -21,11 +21,12 @@ assignees: '' - [ ] I'm reporting a broken site support @@ -33,6 +34,8 @@ Carefully read and work through this check list in order to prevent the most com - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones +- [ ] I've read the opening an issue section in CONTRIBUTING.md +- [ ] I have given an appropriate title to the issue ## Verbose log diff --git a/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md index 79adb709c1..e71abbab29 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md +++ b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md @@ -1,8 +1,8 @@ --- name: Site support request about: Request support for a new site -title: "[Site Request]" -labels: Request +title: "[Site Request] Website Name" +labels: ['triage', 'site-request'] assignees: '' --- @@ -21,11 +21,12 @@ assignees: '' - [ ] I'm reporting a new site support request @@ -34,6 +35,8 @@ Carefully read and work through this check list in order to prevent the most com - [ ] I've checked that none of provided URLs violate any copyrights - [ ] The provided URLs do not contain any DRM to the best of my knowledge - [ ] I've searched the bugtracker for similar site support requests including closed ones +- [ ] I've read the opening an issue section in CONTRIBUTING.md +- [ ] I have given an appropriate title to the issue ## Example URLs diff --git a/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md b/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md index d74b6e279f..e0ccd54161 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md @@ -1,8 +1,8 @@ --- name: Site feature request about: Request a new functionality for a site -title: "[Site Request]" -labels: Request +title: "[Site Feature] Website Name: A short description of the feature" +labels: ['triage', 'site-enhancement'] assignees: '' --- @@ -21,14 +21,17 @@ assignees: '' - [ ] I'm reporting a site feature request - [ ] I've verified that I'm running yt-dlp version **%(version)s** - [ ] I've searched the bugtracker for similar site feature requests including closed ones +- [ ] I've read the opening an issue section in CONTRIBUTING.md +- [ ] I have given an appropriate title to the issue ## Description diff --git a/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md b/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md index 13b577f862..43e91b0522 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md @@ -1,8 +1,8 @@ --- name: Bug report about: Report a bug unrelated to any particular site or extractor -title: '' -labels: '' +title: '[Bug] A short description of the issue' +labels: ['triage', 'bug'] assignees: '' --- @@ -21,12 +21,12 @@ assignees: '' - [ ] I'm reporting a bug unrelated to a specific site @@ -35,7 +35,8 @@ Carefully read and work through this check list in order to prevent the most com - [ ] The provided URLs do not contain any DRM to the best of my knowledge - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar bug reports including closed ones -- [ ] I've read bugs section in FAQ +- [ ] I've read the opening an issue section in CONTRIBUTING.md +- [ ] I have given an appropriate title to the issue ## Verbose log diff --git a/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.md b/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.md index 4a0209db1b..075e0b1b32 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.md +++ b/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.md @@ -1,8 +1,8 @@ --- name: Feature request about: Request a new functionality unrelated to any particular site or extractor -title: "[Feature Request]" -labels: Request +title: "[Feature Request] A short description of your feature" +labels: ['triage', 'enhancement'] assignees: '' --- @@ -21,14 +21,17 @@ assignees: '' - [ ] I'm reporting a feature request - [ ] I've verified that I'm running yt-dlp version **%(version)s** - [ ] I've searched the bugtracker for similar feature requests including closed ones +- [ ] I've read the opening an issue section in CONTRIBUTING.md +- [ ] I have given an appropriate title to the issue ## Description diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 7ef08d68ac..684bf59e91 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -7,7 +7,7 @@ --- ### Before submitting a *pull request* make sure you have: -- [ ] At least skimmed through [adding new extractor tutorial](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#adding-support-for-a-new-site) and [yt-dlp coding conventions](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#yt-dlp-coding-conventions) sections +- [ ] At least skimmed through [contributing guidelines](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#developer-instructions) including [yt-dlp coding conventions](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#yt-dlp-coding-conventions) - [ ] [Searched](https://github.com/yt-dlp/yt-dlp/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests - [ ] Checked the code with [flake8](https://pypi.python.org/pypi/flake8) diff --git a/README.md b/README.md index f98fe98b6e..c0f84fcac3 100644 --- a/README.md +++ b/README.md @@ -199,7 +199,7 @@ On windows, [Microsoft Visual C++ 2010 SP1 Redistributable Package (x86)](https: While all the other dependancies are optional, `ffmpeg` and `ffprobe` are highly recommended * [**ffmpeg** and **ffprobe**](https://www.ffmpeg.org) - Required for [merging seperate video and audio files](#format-selection) as well as for various [post-processing](#post-processing-options) tasks. Licence [depends on the build](https://www.ffmpeg.org/legal.html) * [**mutagen**](https://github.com/quodlibet/mutagen) - For embedding thumbnail in certain formats. Licenced under [GPLv2+](https://github.com/quodlibet/mutagen/blob/master/COPYING) -* [**pycryptodomex**](https://github.com/Legrandin/pycryptodomex) - For decrypting AES-128 HLS streams and various other data. Licenced under [BSD2](https://github.com/Legrandin/pycryptodomex/blob/master/LICENSE.rst) +* [**pycryptodomex**](https://github.com/Legrandin/pycryptodome) - For decrypting AES-128 HLS streams and various other data. Licenced under [BSD2](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst) * [**websockets**](https://github.com/aaugustin/websockets) - For downloading over websocket. Licenced under [BSD3](https://github.com/aaugustin/websockets/blob/main/LICENSE) * [**keyring**](https://github.com/jaraco/keyring) - For decrypting cookies of chromium-based browsers on Linux. Licenced under [MIT](https://github.com/jaraco/keyring/blob/main/LICENSE) * [**AtomicParsley**](https://github.com/wez/atomicparsley) - For embedding thumbnail in mp4/m4a if mutagen is not present. Licenced under [GPLv2+](https://github.com/wez/atomicparsley/blob/master/COPYING) @@ -287,7 +287,8 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t --flat-playlist Do not extract the videos of a playlist, only list them --no-flat-playlist Extract the videos of a playlist - --mark-watched Mark videos watched (YouTube only) + --mark-watched Mark videos watched (even with --simulate). + Currently only supported for YouTube --no-mark-watched Do not mark videos watched (default) --no-colors Do not emit color codes in output --compat-options OPTS Options that can help keep compatibility diff --git a/setup.py b/setup.py index ff23877dcc..fbd2be0aeb 100644 --- a/setup.py +++ b/setup.py @@ -119,7 +119,7 @@ setup( 'Documentation': 'https://yt-dlp.readthedocs.io', 'Source': 'https://github.com/yt-dlp/yt-dlp', 'Tracker': 'https://github.com/yt-dlp/yt-dlp/issues', - #'Funding': 'https://donate.pypi.org', + 'Funding': 'https://github.com/yt-dlp/yt-dlp/blob/master/Collaborators.md#collaborators', }, classifiers=[ 'Topic :: Multimedia :: Video', diff --git a/test/helper.py b/test/helper.py index 9599eab8e2..5c0e645f95 100644 --- a/test/helper.py +++ b/test/helper.py @@ -22,7 +22,7 @@ from yt_dlp.utils import ( ) -if "pytest" in sys.modules: +if 'pytest' in sys.modules: import pytest is_download_test = pytest.mark.download else: @@ -32,9 +32,9 @@ else: def get_params(override=None): PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), - "parameters.json") + 'parameters.json') LOCAL_PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), - "local_parameters.json") + 'local_parameters.json') with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: parameters = json.load(pf) if os.path.exists(LOCAL_PARAMETERS_FILE): diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 398fb67af1..2b3c33ce53 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -9,6 +9,7 @@ import copy import datetime import errno import fileinput +import functools import io import itertools import json @@ -330,7 +331,8 @@ class YoutubeDL(object): * when: When to run the postprocessor. Can be one of pre_process|before_dl|post_process|after_move. Assumed to be 'post_process' if not given - post_hooks: A list of functions that get called as the final step + post_hooks: Deprecated - Register a custom postprocessor instead + A list of functions that get called as the final step for each video file, after all postprocessors have been called. The filename will be passed as the only argument. progress_hooks: A list of functions that get called on download @@ -423,7 +425,7 @@ class YoutubeDL(object): use downloader suggested by extractor if None. compat_opts: Compatibility options. See "Differences in default behavior". The following options do not work when used through the API: - filename, abort-on-error, multistreams, no-live-chat, + filename, abort-on-error, multistreams, no-live-chat, format-sort no-clean-infojson, no-playlist-metafiles, no-keep-subs. Refer __init__.py for their implementation progress_template: Dictionary of templates for progress outputs. @@ -434,8 +436,9 @@ class YoutubeDL(object): The following parameters are not used by YoutubeDL itself, they are used by the downloader (see yt_dlp/downloader/common.py): nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize, - max_filesize, test, noresizebuffer, retries, continuedl, noprogress, - xattr_set_filesize, external_downloader_args, hls_use_mpegts, http_chunk_size. + max_filesize, test, noresizebuffer, retries, fragment_retries, continuedl, + noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size, + external_downloader_args. The following options are used by the post processors: prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available, @@ -541,13 +544,13 @@ class YoutubeDL(object): for msg in self.params.get('warnings', []): self.report_warning(msg) - if self.params.get('overwrites') is None: - self.params.pop('overwrites', None) - elif self.params.get('nooverwrites') is not None: + if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None: # nooverwrites was unnecessarily changed to overwrites # in 0c3d0f51778b153f65c21906031c2e091fcfb641 # This ensures compatibility with both keys self.params['overwrites'] = not self.params['nooverwrites'] + elif self.params.get('overwrites') is None: + self.params.pop('overwrites', None) else: self.params['nooverwrites'] = not self.params['overwrites'] @@ -1253,7 +1256,7 @@ class YoutubeDL(object): self.report_error('no suitable InfoExtractor for URL %s' % url) def __handle_extraction_exceptions(func): - + @functools.wraps(func) def wrapper(self, *args, **kwargs): try: return func(self, *args, **kwargs) @@ -1973,7 +1976,7 @@ class YoutubeDL(object): elif format_spec in ('mhtml', ): # storyboards extension filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none' else: - filter_f = (lambda f: f.get('format_id') == format_spec) # id + filter_f = lambda f: f.get('format_id') == format_spec # id def selector_function(ctx): formats = list(ctx['formats']) @@ -2453,8 +2456,12 @@ class YoutubeDL(object): if self.params.get('forceprint') or self.params.get('forcejson'): self.post_extract(info_dict) for tmpl in self.params.get('forceprint', []): - self.to_stdout(self.evaluate_outtmpl( - f'%({tmpl})s' if re.match(r'\w+$', tmpl) else tmpl, info_dict)) + mobj = re.match(r'\w+(=?)$', tmpl) + if mobj and mobj.group(1): + tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s' + elif mobj: + tmpl = '%({})s'.format(tmpl) + self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict)) print_mandatory('title') print_mandatory('id') diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index ade8222992..4b82efea7f 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -735,10 +735,6 @@ def _real_main(argv=None): 'geo_bypass_ip_block': opts.geo_bypass_ip_block, 'warnings': warnings, 'compat_opts': compat_opts, - # just for deprecation check - 'autonumber': opts.autonumber or None, - 'usetitle': opts.usetitle or None, - 'useid': opts.useid or None, } with YoutubeDL(ydl_opts) as ydl: diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index 5d7c988c71..704ae6f5ad 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -48,8 +48,8 @@ class HttpFD(FileDownloader): is_test = self.params.get('test', False) chunk_size = self._TEST_FILE_SIZE if is_test else ( - self.params.get('http_chunk_size') or - info_dict.get('downloader_options', {}).get('http_chunk_size') + self.params.get('http_chunk_size') + or info_dict.get('downloader_options', {}).get('http_chunk_size') or 0) ctx.open_mode = 'wb' diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 4f940730a4..65444d3bf3 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1678,7 +1678,7 @@ class InfoExtractor(object): has_multiple_limits = has_limit and has_multiple_fields and not self._get_field_setting(field, 'same_limit') fields = self._get_field_setting(field, 'field') if has_multiple_fields else (field,) - limits = limit_text.split(":") if has_multiple_limits else (limit_text,) if has_limit else tuple() + limits = limit_text.split(':') if has_multiple_limits else (limit_text,) if has_limit else tuple() limit_count = len(limits) for (i, f) in enumerate(fields): add_item(f, reverse, closest, @@ -1762,9 +1762,9 @@ class InfoExtractor(object): if format.get('vbr') is not None and format.get('abr') is not None: format['tbr'] = format.get('vbr', 0) + format.get('abr', 0) else: - if format.get('vcodec') != "none" and format.get('vbr') is None: + if format.get('vcodec') != 'none' and format.get('vbr') is None: format['vbr'] = format.get('tbr') - format.get('abr', 0) - if format.get('acodec') != "none" and format.get('abr') is None: + if format.get('acodec') != 'none' and format.get('abr') is None: format['abr'] = format.get('tbr') - format.get('vbr', 0) return tuple(self._calculate_field_preference(format, field) for field in self._order) @@ -1966,13 +1966,16 @@ class InfoExtractor(object): 'format_note': 'Quality selection URL', } + def _report_ignoring_subs(self, name): + self.report_warning(bug_reports_message( + f'Ignoring subtitle tracks found in the {name} manifest; ' + 'if any subtitle tracks are missing,' + ), only_once=True) + def _extract_m3u8_formats(self, *args, **kwargs): fmts, subs = self._extract_m3u8_formats_and_subtitles(*args, **kwargs) if subs: - self.report_warning(bug_reports_message( - "Ignoring subtitle tracks found in the HLS manifest; " - "if any subtitle tracks are missing," - ), only_once=True) + self._report_ignoring_subs('HLS') return fmts def _extract_m3u8_formats_and_subtitles( @@ -2270,10 +2273,7 @@ class InfoExtractor(object): def _extract_smil_formats(self, *args, **kwargs): fmts, subs = self._extract_smil_formats_and_subtitles(*args, **kwargs) if subs: - self.report_warning(bug_reports_message( - "Ignoring subtitle tracks found in the SMIL manifest; " - "if any subtitle tracks are missing," - ), only_once=True) + self._report_ignoring_subs('SMIL') return fmts def _extract_smil_info(self, smil_url, video_id, fatal=True, f4m_params=None): @@ -2515,10 +2515,7 @@ class InfoExtractor(object): def _extract_mpd_formats(self, *args, **kwargs): fmts, subs = self._extract_mpd_formats_and_subtitles(*args, **kwargs) if subs: - self.report_warning(bug_reports_message( - "Ignoring subtitle tracks found in the DASH manifest; " - "if any subtitle tracks are missing," - ), only_once=True) + self._report_ignoring_subs('DASH') return fmts def _extract_mpd_formats_and_subtitles( @@ -2542,10 +2539,7 @@ class InfoExtractor(object): def _parse_mpd_formats(self, *args, **kwargs): fmts, subs = self._parse_mpd_formats_and_subtitles(*args, **kwargs) if subs: - self.report_warning(bug_reports_message( - "Ignoring subtitle tracks found in the DASH manifest; " - "if any subtitle tracks are missing," - ), only_once=True) + self._report_ignoring_subs('DASH') return fmts def _parse_mpd_formats_and_subtitles( @@ -2873,10 +2867,7 @@ class InfoExtractor(object): def _extract_ism_formats(self, *args, **kwargs): fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs) if subs: - self.report_warning(bug_reports_message( - "Ignoring subtitle tracks found in the ISM manifest; " - "if any subtitle tracks are missing," - )) + self._report_ignoring_subs('ISM') return fmts def _extract_ism_formats_and_subtitles(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}): @@ -3136,10 +3127,7 @@ class InfoExtractor(object): def _extract_akamai_formats(self, *args, **kwargs): fmts, subs = self._extract_akamai_formats_and_subtitles(*args, **kwargs) if subs: - self.report_warning(bug_reports_message( - "Ignoring subtitle tracks found in the manifests; " - "if any subtitle tracks are missing," - )) + self._report_ignoring_subs('akamai') return fmts def _extract_akamai_formats_and_subtitles(self, manifest_url, video_id, hosts={}): diff --git a/yt_dlp/extractor/hidive.py b/yt_dlp/extractor/hidive.py index 18ae4d3792..ef1ca197e2 100644 --- a/yt_dlp/extractor/hidive.py +++ b/yt_dlp/extractor/hidive.py @@ -93,7 +93,7 @@ class HiDiveIE(InfoExtractor): raise ExtractorError( '%s said: %s' % (self.IE_NAME, restriction), expected=True) - formats, subtitles, parsed_urls = [], {}, {None} + formats, parsed_urls = [], {}, {None} for rendition_id, rendition in settings['renditions'].items(): audio, version, extra = rendition_id.split('_') m3u8_url = url_or_none(try_get(rendition, lambda x: x['bitrates']['hls'])) @@ -105,14 +105,12 @@ class HiDiveIE(InfoExtractor): f['language'] = audio f['format_note'] = f'{version}, {extra}' formats.extend(frmt) - - self._extract_subtitles_from_rendition(rendition, subtitles, parsed_urls) self._sort_formats(formats) return { 'id': video_id, 'title': video_id, - 'subtitles': self.extract_subtitles(url, video_id, title, key, subtitles, parsed_urls), + 'subtitles': self.extract_subtitles(url, video_id, title, key, parsed_urls), 'formats': formats, 'series': title, 'season_number': int_or_none( diff --git a/yt_dlp/extractor/minoto.py b/yt_dlp/extractor/minoto.py index dba82db5ff..603ce940ba 100644 --- a/yt_dlp/extractor/minoto.py +++ b/yt_dlp/extractor/minoto.py @@ -37,7 +37,7 @@ class MinotoIE(InfoExtractor): 'filesize': int_or_none(fmt.get('filesize')), 'width': int_or_none(fmt.get('width')), 'height': int_or_none(fmt.get('height')), - 'codecs': parse_codecs(fmt.get('codecs')), + **parse_codecs(fmt.get('codecs')), }) self._sort_formats(formats) diff --git a/yt_dlp/extractor/palcomp3.py b/yt_dlp/extractor/palcomp3.py index 269e67a57e..d0a62fb17e 100644 --- a/yt_dlp/extractor/palcomp3.py +++ b/yt_dlp/extractor/palcomp3.py @@ -108,7 +108,7 @@ class PalcoMP3ArtistIE(PalcoMP3BaseIE): } name''' - @ classmethod + @classmethod def suitable(cls, url): return False if PalcoMP3IE._match_valid_url(url) else super(PalcoMP3ArtistIE, cls).suitable(url) diff --git a/yt_dlp/minicurses.py b/yt_dlp/minicurses.py index a466fb4b03..0e37ed8183 100644 --- a/yt_dlp/minicurses.py +++ b/yt_dlp/minicurses.py @@ -1,3 +1,4 @@ +import functools from threading import Lock from .utils import supports_terminal_sequences, TERMINAL_SEQUENCES @@ -49,6 +50,7 @@ class MultilinePrinter(MultilinePrinterBase): self._HAVE_FULLCAP = supports_terminal_sequences(self.stream) def lock(func): + @functools.wraps(func) def wrapper(self, *args, **kwargs): with self._movelock: return func(self, *args, **kwargs) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 4652e8c589..f45c548f2a 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -971,9 +971,6 @@ def parseOpts(overrideArguments=None): dest='batchfile', metavar='FILE', help="File containing URLs to download ('-' for stdin), one URL per line. " "Lines starting with '#', ';' or ']' are considered as comments and ignored") - filesystem.add_option( - '--id', default=False, - action='store_true', dest='useid', help=optparse.SUPPRESS_HELP) filesystem.add_option( '-P', '--paths', metavar='[TYPES:]PATH', dest='paths', default={}, type='str', @@ -1029,18 +1026,6 @@ def parseOpts(overrideArguments=None): '--trim-filenames', '--trim-file-names', metavar='LENGTH', dest='trim_file_name', default=0, type=int, help='Limit the filename length (excluding extension) to the specified number of characters') - filesystem.add_option( - '--auto-number', - action='store_true', dest='autonumber', default=False, - help=optparse.SUPPRESS_HELP) - filesystem.add_option( - '--title', - action='store_true', dest='usetitle', default=False, - help=optparse.SUPPRESS_HELP) - filesystem.add_option( - '--literal', default=False, - action='store_true', dest='usetitle', - help=optparse.SUPPRESS_HELP) filesystem.add_option( '-w', '--no-overwrites', action='store_false', dest='overwrites', default=None, @@ -1625,7 +1610,7 @@ def parseOpts(overrideArguments=None): argv = configs['system'] + configs['user'] + configs['home'] + configs['portable'] + configs['custom'] + configs['command-line'] opts, args = parser.parse_args(argv) if opts.verbose: - for label in ('System', 'User', 'Portable', 'Home', 'Custom', 'Command-line'): + for label in ('Command-line', 'Custom', 'Portable', 'Home', 'User', 'System'): key = label.lower() if paths.get(key): write_string(f'[debug] {label} config file: {paths[key]}\n') diff --git a/ytdlp_plugins/extractor/sample.py b/ytdlp_plugins/extractor/sample.py index 986e5bb228..d99b7ca331 100644 --- a/ytdlp_plugins/extractor/sample.py +++ b/ytdlp_plugins/extractor/sample.py @@ -5,7 +5,7 @@ from yt_dlp.extractor.common import InfoExtractor # ℹ️ Instructions on making extractors can be found at: -# 🔗 https://github.com/ytdl-org/youtube-dl#adding-support-for-a-new-site +# 🔗 https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#adding-support-for-a-new-site class SamplePluginIE(InfoExtractor): _WORKING = False From 81bcd43a033e62a2663d91ac1f7f1be6a785c182 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Sat, 9 Oct 2021 23:57:08 +0530 Subject: [PATCH 224/641] [HotStarSeries] Fix cookies (#1187) Authored by: Ashish0804 --- yt_dlp/extractor/hotstar.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index 8f0c673034..f66d3e433c 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -290,7 +290,7 @@ class HotStarPlaylistIE(HotStarBaseIE): class HotStarSeriesIE(HotStarBaseIE): IE_NAME = 'hotstar:series' - _VALID_URL = r'(?:https?://)(?:www\.)?hotstar\.com(?:/in)?/tv/[^/]+/(?P\d+)' + _VALID_URL = r'(?P(?:https?://)(?:www\.)?hotstar\.com(?:/in)?/tv/[^/]+/(?P\d+))' _TESTS = [{ 'url': 'https://www.hotstar.com/in/tv/radhakrishn/1260000646', 'info_dict': { @@ -312,7 +312,7 @@ class HotStarSeriesIE(HotStarBaseIE): }] def _real_extract(self, url): - series_id = self._match_id(url) + url, series_id = self._match_valid_url(url).groups() headers = { 'x-country-code': 'IN', 'x-platform-code': 'PCTV', @@ -324,7 +324,7 @@ class HotStarSeriesIE(HotStarBaseIE): video_id=series_id, headers=headers) entries = [ self.url_result( - 'hotstar:episode:%d' % video['contentId'], + '%s/ignoreme/%d' % (url, video['contentId']), ie=HotStarIE.ie_key(), video_id=video['contentId']) for video in item_json['body']['results']['items'] if video.get('contentId')] From 90d55df3304b13ffbc1dbf2db5bcb4c03c086d4f Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 10 Oct 2021 00:39:23 +0530 Subject: [PATCH 225/641] Release 2021.10.09 --- CONTRIBUTORS | 5 ++++ Changelog.md | 67 +++++++++++++++++++++++++++++++++++++++++++++++ README.md | 4 +-- supportedsites.md | 14 +++++++--- 4 files changed, 84 insertions(+), 6 deletions(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index e44302d57a..a535411c6e 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -118,3 +118,8 @@ sleaux-meaux sulyi tmarki Vangelis66 +AjaxGb +ajj8 +jakubadamw +jfogelman +timethrow diff --git a/Changelog.md b/Changelog.md index 7334f87c5e..2350f67add 100644 --- a/Changelog.md +++ b/Changelog.md @@ -14,6 +14,73 @@ --> +### 2021.10.09 + +* Improved progress reporting + * Separate `--console-title` and `--no-progress` + * Add option `--progress` to show progress-bar even in quiet mode + * Fix and refactor `minicurses` and use it for all progress reporting + * Standardize use of terminal sequences and enable color support for windows 10 + * Add option `--progress-template` to customize progress-bar and console-title + * Add postprocessor hooks and progress reporting +* [postprocessor] Add plugin support with option `--use-postprocessor` +* [extractor] Extract storyboards from SMIL manifests by [fstirlitz](https://github.com/fstirlitz) +* [outtmpl] Alternate form of format type `l` for `\n` delimited list +* [outtmpl] Format type `U` for unicode normalization +* [outtmpl] Allow empty output template to skip a type of file +* Merge webm formats into mkv if thumbnails are to be embedded +* [adobepass] Add RCN as MSO by [jfogelman](https://github.com/jfogelman) +* [ciscowebex] Add extractor by [damianoamatruda](https://github.com/damianoamatruda) +* [Gettr] Add extractor by [i6t](https://github.com/i6t) +* [GoPro] Add extractor by [i6t](https://github.com/i6t) +* [N1] Add extractor by [u-spec-png](https://github.com/u-spec-png) +* [Theta] Add video extractor by [alerikaisattera](https://github.com/alerikaisattera) +* [Veo] Add extractor by [i6t](https://github.com/i6t) +* [Vupload] Add extractor by [u-spec-png](https://github.com/u-spec-png) +* [bbc] Extract better quality videos by [ajj8](https://github.com/ajj8) +* [Bilibili] Add subtitle converter by [u-spec-png](https://github.com/u-spec-png) +* [CBC] Cleanup tests by [makeworld-the-better-one](https://github.com/makeworld-the-better-one) +* [Douyin] Rewrite extractor by [MinePlayersPE](https://github.com/MinePlayersPE) +* [Funimation] Fix for /v/ urls by [pukkandan](https://github.com/pukkandan), [Jules-A](https://github.com/Jules-A) +* [Funimation] Sort formats according to the relevant extractor-args +* [Hidive] Fix duplicate and incorrect formats +* [HotStarSeries] Fix cookies by [Ashish0804](https://github.com/Ashish0804) +* [LinkedInLearning] Add subtitles by [Ashish0804](https://github.com/Ashish0804) +* [Mediaite] Relax valid url by [coletdjnz](https://github.com/coletdjnz) +* [Newgrounds] Add age_limit and fix duration by [u-spec-png](https://github.com/u-spec-png) +* [Newgrounds] Fix view count on songs by [u-spec-png](https://github.com/u-spec-png) +* [parliamentlive.tv] Fix extractor by [u-spec-png](https://github.com/u-spec-png) +* [PolskieRadio] Fix extractors by [jakubadamw](https://github.com/jakubadamw), [u-spec-png](https://github.com/u-spec-png) +* [reddit] Add embedded url by [u-spec-png](https://github.com/u-spec-png) +* [reddit] Fix 429 by generating a random `reddit_session` by [AjaxGb](https://github.com/AjaxGb) +* [Rumble] Add RumbleChannelIE by [Ashish0804](https://github.com/Ashish0804) +* [soundcloud:playlist] Detect last page correctly +* [SovietsCloset] Add duration from m3u8 by [ChillingPepper](https://github.com/ChillingPepper) +* [Streamable] Add codecs by [u-spec-png](https://github.com/u-spec-png) +* [vidme] Remove extractor by [alerikaisattera](https://github.com/alerikaisattera) +* [youtube:tab] Fallback to API when webpage fails to download by [coletdjnz](https://github.com/coletdjnz) +* [youtube] Fix non-fatal errors in fetching player +* Fix `--flat-playlist` when neither IE nor id is known +* Fix `-f mp4` behaving differently from youtube-dl +* Workaround for bug in `ssl.SSLContext.load_default_certs` +* [aes] Improve performance slightly by [sulyi](https://github.com/sulyi) +* [cookies] Fix keyring fallback by [mbway](https://github.com/mbway) +* [embedsubtitle] Fix error when duration is unknown +* [ffmpeg] Fix error when subtitle file is missing +* [ffmpeg] Set max probesize to workaround AAC HLS stream issues by [shirt](https://github.com/shirt-dev) +* [FixupM3u8] Remove redundant run if merged is needed +* [hls] Fix decryption issues by [shirt](https://github.com/shirt-dev), [pukkandan](https://github.com/pukkandan) +* [http] Respect user-provided chunk size over extractor's +* [utils] Let traverse_obj accept functions as keys +* [docs] Add note about our custom ffmpeg builds +* [docs] Write embedding and contributing documentation by [pukkandan](https://github.com/pukkandan), [timethrow](https://github.com/timethrow) +* [update] Check for new version even if not updateable +* [build] Add more files to the tarball +* [build] Allow building with py2exe (and misc fixes) +* [build] Use pycryptodomex by [shirt](https://github.com/shirt-dev), [pukkandan](https://github.com/pukkandan) +* [cleanup] Some minor refactoring, improve docs and misc cleanup + + ### 2021.09.25 * Add new option `--netrc-location` diff --git a/README.md b/README.md index c0f84fcac3..56755f00bb 100644 --- a/README.md +++ b/README.md @@ -92,9 +92,9 @@ The major new features from the latest release of [blackjack4494/yt-dlc](https:/ * **Aria2c with HLS/DASH**: You can use `aria2c` as the external downloader for DASH(mpd) and HLS(m3u8) formats -* **New extractors**: AnimeLab, Philo MSO, Spectrum MSO, SlingTV MSO, Cablevision MSO, Rcs, Gedi, bitwave.tv, mildom, audius, zee5, mtv.it, wimtv, pluto.tv, niconico users, discoveryplus.in, mediathek, NFHSNetwork, nebula, ukcolumn, whowatch, MxplayerShow, parlview (au), YoutubeWebArchive, fancode, Saitosan, ShemarooMe, telemundo, VootSeries, SonyLIVSeries, HotstarSeries, VidioPremier, VidioLive, RCTIPlus, TBS Live, douyin, pornflip, ParamountPlusSeries, ScienceChannel, Utreon, OpenRec, BandcampMusic, blackboardcollaborate, eroprofile albums, mirrativ, BannedVideo, bilibili categories, Epicon, filmmodu, GabTV, HungamaAlbum, ManotoTV, Niconico search, Patreon User, peloton, ProjectVeritas, radiko, StarTV, tiktok user, Tokentube, voicy, TV2HuSeries, biliintl, 17live, NewgroundsUser, peertube channel/playlist, ZenYandex, CAM4, CGTN, damtomo, gotostage, Koo, Mediaite, Mediaklikk, MuseScore, nzherald, Olympics replay, radlive, SovietsCloset, Streamanity, Theta, Chingari +* **New extractors**: AnimeLab, Philo MSO, Spectrum MSO, SlingTV MSO, Cablevision MSO, RCN MSO, Rcs, Gedi, bitwave.tv, mildom, audius, zee5, mtv.it, wimtv, pluto.tv, niconico users, discoveryplus.in, mediathek, NFHSNetwork, nebula, ukcolumn, whowatch, MxplayerShow, parlview (au), YoutubeWebArchive, fancode, Saitosan, ShemarooMe, telemundo, VootSeries, SonyLIVSeries, HotstarSeries, VidioPremier, VidioLive, RCTIPlus, TBS Live, douyin, pornflip, ParamountPlusSeries, ScienceChannel, Utreon, OpenRec, BandcampMusic, blackboardcollaborate, eroprofile albums, mirrativ, BannedVideo, bilibili categories, Epicon, filmmodu, GabTV, HungamaAlbum, ManotoTV, Niconico search, Patreon User, peloton, ProjectVeritas, radiko, StarTV, tiktok user, Tokentube, voicy, TV2HuSeries, biliintl, 17live, NewgroundsUser, peertube channel/playlist, ZenYandex, CAM4, CGTN, damtomo, gotostage, Koo, Mediaite, Mediaklikk, MuseScore, nzherald, Olympics replay, radlive, SovietsCloset, Streamanity, Theta, Chingari, ciscowebex, Gettr, GoPro, N1, Theta, Veo, Vupload -* **Fixed/improved extractors**: archive.org, roosterteeth.com, skyit, instagram, itv, SouthparkDe, spreaker, Vlive, akamai, ina, rumble, tennistv, amcnetworks, la7 podcasts, linuxacadamy, nitter, twitcasting, viu, crackle, curiositystream, mediasite, rmcdecouverte, sonyliv, tubi, tenplay, patreon, videa, yahoo, BravoTV, crunchyroll playlist, RTP, viki, Hotstar, vidio, vimeo, mediaset, Mxplayer, nbcolympics, ParamountPlus, Newgrounds, SAML Verizon login, Hungama, afreecatv, aljazeera, ATV, bitchute, camtube, CDA, eroprofile, facebook, HearThisAtIE, iwara, kakao, Motherless, Nova, peertube, pornhub, reddit, tiktok, TV2, TV2Hu, tv5mondeplus, VH1, Viafree, XHamster, 9Now, AnimalPlanet, Arte, CBC, Chingari, comedycentral, DIYNetwork, niconico, dw, funimation, globo, HiDive, NDR, Nuvid, Oreilly, pbs, plutotv, reddit, redtube, soundcloud, SpankBang, VrtNU +* **Fixed/improved extractors**: archive.org, roosterteeth.com, skyit, instagram, itv, SouthparkDe, spreaker, Vlive, akamai, ina, rumble, tennistv, amcnetworks, la7 podcasts, linuxacadamy, nitter, twitcasting, viu, crackle, curiositystream, mediasite, rmcdecouverte, sonyliv, tubi, tenplay, patreon, videa, yahoo, BravoTV, crunchyroll playlist, RTP, viki, Hotstar, vidio, vimeo, mediaset, Mxplayer, nbcolympics, ParamountPlus, Newgrounds, SAML Verizon login, Hungama, afreecatv, aljazeera, ATV, bitchute, camtube, CDA, eroprofile, facebook, HearThisAtIE, iwara, kakao, Motherless, Nova, peertube, pornhub, reddit, tiktok, TV2, TV2Hu, tv5mondeplus, VH1, Viafree, XHamster, 9Now, AnimalPlanet, Arte, CBC, Chingari, comedycentral, DIYNetwork, niconico, dw, funimation, globo, HiDive, NDR, Nuvid, Oreilly, pbs, plutotv, reddit, redtube, soundcloud, SpankBang, VrtNU, bbc, Bilibili, LinkedInLearning, parliamentlive, PolskieRadio, Streamable, vidme * **Subtitle extraction from manifests**: Subtitles can be extracted from streaming media manifests. See [commit/be6202f](https://github.com/yt-dlp/yt-dlp/commit/be6202f12b97858b9d716e608394b51065d0419f) for details diff --git a/supportedsites.md b/supportedsites.md index e883351a97..3fe79683aa 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -199,6 +199,7 @@ - **Cinemax** - **CiscoLiveSearch** - **CiscoLiveSession** + - **ciscowebex**: Cisco Webex - **CJSW** - **cliphunter** - **Clippit** @@ -379,6 +380,7 @@ - **gem.cbc.ca:live** - **gem.cbc.ca:playlist** - **generic**: Generic downloader that works on some sites + - **Gettr** - **Gfycat** - **GiantBomb** - **Giga** @@ -392,6 +394,7 @@ - **google:podcasts** - **google:podcasts:feed** - **GoogleDrive** + - **GoPro** - **Goshgay** - **GoToStage** - **GPUTechConf** @@ -630,6 +633,8 @@ - **MyviEmbed** - **MyVisionTV** - **n-tv.de** + - **N1Info:article** + - **N1InfoAsset** - **natgeo:video** - **NationalGeographicTV** - **Naver** @@ -905,6 +910,7 @@ - **RTVNH** - **RTVS** - **RUHD** + - **RumbleChannel** - **RumbleEmbed** - **rutube**: Rutube videos - **rutube:channel**: Rutube channels @@ -1065,7 +1071,8 @@ - **TheScene** - **TheStar** - **TheSun** - - **Theta** + - **ThetaStream** + - **ThetaVideo** - **TheWeatherChannel** - **ThisAmericanLife** - **ThisAV** @@ -1174,6 +1181,7 @@ - **Varzesh3** - **Vbox7** - **VeeHD** + - **Veo** - **Veoh** - **Vesti**: Вести.Ru - **Vevo** @@ -1202,9 +1210,6 @@ - **VidioLive** - **VidioPremier** - **VidLii** - - **vidme** - - **vidme:user** - - **vidme:user:likes** - **vier**: vier.be and vijf.be - **vier:videos** - **viewlift** @@ -1256,6 +1261,7 @@ - **VTXTV** - **vube**: Vube.com - **VuClip** + - **Vupload** - **VVVVID** - **VVVVIDShow** - **VyboryMos** From a170527e1fc382dd7be214c5134f5013a5f0747f Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 9 Oct 2021 19:11:24 +0000 Subject: [PATCH 226/641] [version] update :ci skip all --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- yt_dlp/version.py | 2 +- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index 8a55035103..d07c33e110 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -21,7 +21,7 @@ assignees: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running yt-dlp version **2021.09.25** +- [ ] I've verified that I'm running yt-dlp version **2021.10.09** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -47,7 +47,7 @@ Add the `-v` flag to your command line you run yt-dlp with (`yt-dlp -v - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running yt-dlp version **2021.09.25** +- [ ] I've verified that I'm running yt-dlp version **2021.10.09** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] The provided URLs do not contain any DRM to the best of my knowledge diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index 38b38c803b..dd9bc1faa8 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -21,14 +21,14 @@ assignees: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running yt-dlp version **2021.09.25** +- [ ] I've verified that I'm running yt-dlp version **2021.10.09** - [ ] I've searched the bugtracker for similar site feature requests including closed ones - [ ] I've read the opening an issue section in CONTRIBUTING.md - [ ] I have given an appropriate title to the issue diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index b2f7efcdab..8981eca0e5 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -21,7 +21,7 @@ assignees: '' - [ ] I'm reporting a bug unrelated to a specific site -- [ ] I've verified that I'm running yt-dlp version **2021.09.25** +- [ ] I've verified that I'm running yt-dlp version **2021.10.09** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] The provided URLs do not contain any DRM to the best of my knowledge - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped @@ -48,7 +48,7 @@ Add the `-v` flag to your command line you run yt-dlp with (`yt-dlp -v - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running yt-dlp version **2021.09.25** +- [ ] I've verified that I'm running yt-dlp version **2021.10.09** - [ ] I've searched the bugtracker for similar feature requests including closed ones - [ ] I've read the opening an issue section in CONTRIBUTING.md - [ ] I have given an appropriate title to the issue diff --git a/yt_dlp/version.py b/yt_dlp/version.py index 965a89b885..34b6e9a5d8 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2021.09.25' +__version__ = '2021.10.09' From aa9a92fdbbca172689495f2990af6a135bae90d5 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 10 Oct 2021 02:23:42 +0530 Subject: [PATCH 227/641] [downloader/ffmpeg] Fix bug in initializing `FFmpegPostProcessor` When `FFmpegFD` initializes the PP, it passes `self` as the `downloader` But it does not have a `_postprocessor_hooks` attribute Closes #1211 --- yt_dlp/postprocessor/common.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/yt_dlp/postprocessor/common.py b/yt_dlp/postprocessor/common.py index 376a1c95ef..d2daeb0fba 100644 --- a/yt_dlp/postprocessor/common.py +++ b/yt_dlp/postprocessor/common.py @@ -90,9 +90,7 @@ class PostProcessor(metaclass=PostProcessorMetaClass): def set_downloader(self, downloader): """Sets the downloader for this PP.""" self._downloader = downloader - if not downloader: - return - for ph in downloader._postprocessor_hooks: + for ph in getattr(downloader, '_postprocessor_hooks', []): self.add_progress_hook(ph) @staticmethod From 28fe35b4e3da41ec78a092d06ad76f5ff67c12e8 Mon Sep 17 00:00:00 2001 From: Felix S Date: Sat, 9 Oct 2021 21:50:17 +0000 Subject: [PATCH 228/641] [francetv] Update extractor (#1096) Original PR: https://github.com/ytdl-org/youtube-dl/pull/29996 Closes: https://github.com/yt-dlp/yt-dlp/issues/970, https://github.com/ytdl-org/youtube-dl/issues/29956, https://github.com/ytdl-org/youtube-dl/issues/29957, https://github.com/ytdl-org/youtube-dl/issues/29969, https://github.com/ytdl-org/youtube-dl/issues/29990, https://github.com/ytdl-org/youtube-dl/issues/30010 Authored by: fstirlitz, sarnoud --- yt_dlp/extractor/extractors.py | 5 - yt_dlp/extractor/francetv.py | 347 +++++++++------------------------ 2 files changed, 87 insertions(+), 265 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index a224c4f9a6..d8e3cd738b 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -474,12 +474,7 @@ from .franceinter import FranceInterIE from .francetv import ( FranceTVIE, FranceTVSiteIE, - FranceTVEmbedIE, FranceTVInfoIE, - FranceTVInfoSportIE, - FranceTVJeunesseIE, - GenerationWhatIE, - CultureboxIE, ) from .freesound import FreesoundIE from .freespeech import FreespeechIE diff --git a/yt_dlp/extractor/francetv.py b/yt_dlp/extractor/francetv.py index 41910cefb1..3bbab69e61 100644 --- a/yt_dlp/extractor/francetv.py +++ b/yt_dlp/extractor/francetv.py @@ -4,19 +4,12 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import ( - compat_str, -) from ..utils import ( - clean_html, determine_ext, ExtractorError, - int_or_none, - parse_duration, + format_field, + parse_iso8601, parse_qs, - try_get, - url_or_none, - urljoin, ) from .dailymotion import DailymotionIE @@ -89,97 +82,81 @@ class FranceTVIE(InfoExtractor): # Videos are identified by idDiffusion so catalogue part is optional. # However when provided, some extra formats may be returned so we pass # it if available. - info = self._download_json( - 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/', - video_id, 'Downloading video JSON', query={ - 'idDiffusion': video_id, - 'catalogue': catalogue or '', - }) - - if info.get('status') == 'NOK': - raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, info['message']), - expected=True) - allowed_countries = info['videos'][0].get('geoblocage') - if allowed_countries: - georestricted = True - geo_info = self._download_json( - 'http://geo.francetv.fr/ws/edgescape.json', video_id, - 'Downloading geo restriction info') - country = geo_info['reponse']['geo_info']['country_code'] - if country not in allowed_countries: - raise ExtractorError( - 'The video is not available from your location', - expected=True) - else: - georestricted = False - - def sign(manifest_url, manifest_id): - for host in ('hdfauthftv-a.akamaihd.net', 'hdfauth.francetv.fr'): - signed_url = url_or_none(self._download_webpage( - 'https://%s/esi/TA' % host, video_id, - 'Downloading signed %s manifest URL' % manifest_id, - fatal=False, query={ - 'url': manifest_url, - })) - if signed_url: - return signed_url - return manifest_url - is_live = None - videos = [] + title = None + subtitle = None + image = None + duration = None + timestamp = None + spritesheets = None - for video in (info.get('videos') or []): - if video.get('statut') != 'ONLINE': + for device_type in ('desktop', 'mobile'): + dinfo = self._download_json( + 'https://player.webservices.francetelevisions.fr/v1/videos/%s' % video_id, + video_id, 'Downloading %s video JSON' % device_type, query={ + 'device_type': device_type, + 'browser': 'chrome', + }, fatal=False) + + if not dinfo: continue - if not video.get('url'): - continue - videos.append(video) - if not videos: - for device_type in ['desktop', 'mobile']: - fallback_info = self._download_json( - 'https://player.webservices.francetelevisions.fr/v1/videos/%s' % video_id, - video_id, 'Downloading fallback %s video JSON' % device_type, query={ - 'device_type': device_type, - 'browser': 'chrome', - }, fatal=False) + video = dinfo.get('video') + if video: + videos.append(video) + if duration is None: + duration = video.get('duration') + if is_live is None: + is_live = video.get('is_live') + if spritesheets is None: + spritesheets = video.get('spritesheets') - if fallback_info and fallback_info.get('video'): - videos.append(fallback_info['video']) + meta = dinfo.get('meta') + if meta: + if title is None: + title = meta.get('title') + # XXX: what is meta['pre_title']? + if subtitle is None: + subtitle = meta.get('additional_title') + if image is None: + image = meta.get('image_url') + if timestamp is None: + timestamp = parse_iso8601(meta.get('broadcasted_at')) formats = [] subtitles = {} for video in videos: - video_url = video.get('url') - if not video_url: - continue - if is_live is None: - is_live = (try_get( - video, lambda x: x['plages_ouverture'][0]['direct'], bool) is True - or video.get('is_live') is True - or '/live.francetv.fr/' in video_url) format_id = video.get('format') + + video_url = None + if video.get('workflow') == 'token-akamai': + token_url = video.get('token') + if token_url: + token_json = self._download_json( + token_url, video_id, + 'Downloading signed %s manifest URL' % format_id) + if token_json: + video_url = token_json.get('url') + if not video_url: + video_url = video.get('url') + ext = determine_ext(video_url) if ext == 'f4m': - if georestricted: - # See https://github.com/ytdl-org/youtube-dl/issues/3963 - # m3u8 urls work fine - continue formats.extend(self._extract_f4m_formats( - sign(video_url, format_id) + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44', - video_id, f4m_id=format_id, fatal=False)) + video_url, video_id, f4m_id=format_id, fatal=False)) elif ext == 'm3u8': - m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles( - sign(video_url, format_id), video_id, 'mp4', + fmts, subs = self._extract_m3u8_formats_and_subtitles( + video_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id=format_id, fatal=False) - formats.extend(m3u8_fmts) - subtitles = self._merge_subtitles(subtitles, m3u8_subs) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) elif ext == 'mpd': - formats.extend(self._extract_mpd_formats( - sign(video_url, format_id), video_id, mpd_id=format_id, fatal=False)) + fmts, subs = self._extract_mpd_formats_and_subtitles( + video_url, video_id, mpd_id=format_id, fatal=False) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) elif video_url.startswith('rtmp'): formats.append({ 'url': video_url, @@ -193,28 +170,43 @@ class FranceTVIE(InfoExtractor): 'format_id': format_id, }) + # XXX: what is video['captions']? + + for f in formats: + if f.get('acodec') != 'none' and f.get('language') in ('qtz', 'qad'): + f['language_preference'] = -10 + f['format_note'] = 'audio description%s' % format_field(f, 'format_note', ', %s') + + if spritesheets: + formats.append({ + 'format_id': 'spritesheets', + 'format_note': 'storyboard', + 'acodec': 'none', + 'vcodec': 'none', + 'ext': 'mhtml', + 'protocol': 'mhtml', + 'url': 'about:dummy', + 'fragments': [{ + 'path': sheet, + # XXX: not entirely accurate; each spritesheet seems to be + # a 10×10 grid of thumbnails corresponding to approximately + # 2 seconds of the video; the last spritesheet may be shorter + 'duration': 200, + } for sheet in spritesheets] + }) + self._sort_formats(formats) - title = info['titre'] - subtitle = info.get('sous_titre') if subtitle: title += ' - %s' % subtitle title = title.strip() - subtitles.setdefault('fr', []).extend( - [{ - 'url': subformat['url'], - 'ext': subformat.get('format'), - } for subformat in info.get('subtitles', []) if subformat.get('url')] - ) - return { 'id': video_id, 'title': self._live_title(title) if is_live else title, - 'description': clean_html(info.get('synopsis')), - 'thumbnail': urljoin('https://sivideo.webservices.francetelevisions.fr', info.get('image')), - 'duration': int_or_none(info.get('real_duration')) or parse_duration(info.get('duree')), - 'timestamp': int_or_none(try_get(info, lambda x: x['diffusion']['timestamp'])), + 'thumbnail': image, + 'duration': duration, + 'timestamp': timestamp, 'is_live': is_live, 'formats': formats, 'subtitles': subtitles, @@ -308,35 +300,6 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor): return self._make_url_result(video_id, catalogue) -class FranceTVEmbedIE(FranceTVBaseInfoExtractor): - _VALID_URL = r'https?://embed\.francetv\.fr/*\?.*?\bue=(?P[^&]+)' - - _TESTS = [{ - 'url': 'http://embed.francetv.fr/?ue=7fd581a2ccf59d2fc5719c5c13cf6961', - 'info_dict': { - 'id': 'NI_983319', - 'ext': 'mp4', - 'title': 'Le Pen Reims', - 'upload_date': '20170505', - 'timestamp': 1493981780, - 'duration': 16, - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': [FranceTVIE.ie_key()], - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - video = self._download_json( - 'http://api-embed.webservices.francetelevisions.fr/key/%s' % video_id, - video_id) - - return self._make_url_result(video['video_id'], video.get('catalog')) - - class FranceTVInfoIE(FranceTVBaseInfoExtractor): IE_NAME = 'francetvinfo.fr' _VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/(?:[^/]+/)*(?P[^/?#&.]+)' @@ -426,139 +389,3 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor): webpage, 'video id') return self._make_url_result(video_id) - - -class FranceTVInfoSportIE(FranceTVBaseInfoExtractor): - IE_NAME = 'sport.francetvinfo.fr' - _VALID_URL = r'https?://sport\.francetvinfo\.fr/(?:[^/]+/)*(?P[^/?#&]+)' - _TESTS = [{ - 'url': 'https://sport.francetvinfo.fr/les-jeux-olympiques/retour-sur-les-meilleurs-moments-de-pyeongchang-2018', - 'info_dict': { - 'id': '6e49080e-3f45-11e8-b459-000d3a2439ea', - 'ext': 'mp4', - 'title': 'Retour sur les meilleurs moments de Pyeongchang 2018', - 'timestamp': 1523639962, - 'upload_date': '20180413', - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': [FranceTVIE.ie_key()], - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - video_id = self._search_regex(r'data-video="([^"]+)"', webpage, 'video_id') - return self._make_url_result(video_id, 'Sport-web') - - -class GenerationWhatIE(InfoExtractor): - IE_NAME = 'france2.fr:generation-what' - _VALID_URL = r'https?://generation-what\.francetv\.fr/[^/]+/video/(?P[^/?#&]+)' - - _TESTS = [{ - 'url': 'http://generation-what.francetv.fr/portrait/video/present-arms', - 'info_dict': { - 'id': 'wtvKYUG45iw', - 'ext': 'mp4', - 'title': 'Generation What - Garde à vous - FRA', - 'uploader': 'Generation What', - 'uploader_id': 'UCHH9p1eetWCgt4kXBYCb3_w', - 'upload_date': '20160411', - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': ['Youtube'], - }, { - 'url': 'http://generation-what.francetv.fr/europe/video/present-arms', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - - youtube_id = self._search_regex( - r"window\.videoURL\s*=\s*'([0-9A-Za-z_-]{11})';", - webpage, 'youtube id') - - return self.url_result(youtube_id, ie='Youtube', video_id=youtube_id) - - -class CultureboxIE(FranceTVBaseInfoExtractor): - _VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?:[^/]+/)*(?P[^/?#&]+)' - - _TESTS = [{ - 'url': 'https://culturebox.francetvinfo.fr/opera-classique/musique-classique/c-est-baroque/concerts/cantates-bwv-4-106-et-131-de-bach-par-raphael-pichon-57-268689', - 'info_dict': { - 'id': 'EV_134885', - 'ext': 'mp4', - 'title': 'Cantates BWV 4, 106 et 131 de Bach par Raphaël Pichon 5/7', - 'description': 'md5:19c44af004b88219f4daa50fa9a351d4', - 'upload_date': '20180206', - 'timestamp': 1517945220, - 'duration': 5981, - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': [FranceTVIE.ie_key()], - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - - if ">Ce live n'est plus disponible en replay<" in webpage: - raise ExtractorError( - 'Video %s is not available' % display_id, expected=True) - - video_id, catalogue = self._search_regex( - r'["\'>]https?://videos\.francetv\.fr/video/([^@]+@.+?)["\'<]', - webpage, 'video id').split('@') - - return self._make_url_result(video_id, catalogue) - - -class FranceTVJeunesseIE(FranceTVBaseInfoExtractor): - _VALID_URL = r'(?Phttps?://(?:www\.)?(?:zouzous|ludo)\.fr/heros/(?P[^/?#&]+))' - - _TESTS = [{ - 'url': 'https://www.zouzous.fr/heros/simon', - 'info_dict': { - 'id': 'simon', - }, - 'playlist_count': 9, - }, { - 'url': 'https://www.ludo.fr/heros/ninjago', - 'info_dict': { - 'id': 'ninjago', - }, - 'playlist_count': 10, - }, { - 'url': 'https://www.zouzous.fr/heros/simon?abc', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - playlist_id = mobj.group('id') - - playlist = self._download_json( - '%s/%s' % (mobj.group('url'), 'playlist'), playlist_id) - - if not playlist.get('count'): - raise ExtractorError( - '%s is not available' % playlist_id, expected=True) - - entries = [] - for item in playlist['items']: - identity = item.get('identity') - if identity and isinstance(identity, compat_str): - entries.append(self._make_url_result(identity)) - - return self.playlist_result(entries, playlist_id) From 91b6c884c9c02a2a8ffe247131d05e8e8a6021a4 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 10 Oct 2021 02:56:30 +0530 Subject: [PATCH 229/641] Revert "[ffmpeg] Set max probesize to workaround AAC HLS stream issues (#1109)" This reverts commit 250a938de82fb6b023c09ce3d89471c5871ff830. This is no longer necessary since 7687c8ac6e223a725b3ef8f56f04779bebdc86c5 --- yt_dlp/postprocessor/ffmpeg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 6bb66569ae..5f6861f938 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -262,7 +262,7 @@ class FFmpegPostProcessor(PostProcessor): oldest_mtime = min( os.stat(encodeFilename(path)).st_mtime for path, _ in input_path_opts if path) - cmd = [encodeFilename(self.executable, True), encodeArgument('-y'), encodeArgument('-probesize'), encodeArgument('max')] + cmd = [encodeFilename(self.executable, True), encodeArgument('-y')] # avconv does not have repeat option if self.basename == 'ffmpeg': cmd += [encodeArgument('-loglevel'), encodeArgument('repeat+info')] From c9652aa4185afa1c93aeba4e0b06a14b9bb78b5c Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 10 Oct 2021 03:23:47 +0530 Subject: [PATCH 230/641] [docs] Remove incorrect dependency on VC++10 Closes #1163 --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 56755f00bb..8acb574846 100644 --- a/README.md +++ b/README.md @@ -193,8 +193,10 @@ If you have installed using Homebrew, run `brew upgrade yt-dlp/taps/yt-dlp` ### DEPENDENCIES Python versions 3.6+ (CPython and PyPy) are supported. Other versions and implementations may or may not work correctly. - + While all the other dependancies are optional, `ffmpeg` and `ffprobe` are highly recommended * [**ffmpeg** and **ffprobe**](https://www.ffmpeg.org) - Required for [merging seperate video and audio files](#format-selection) as well as for various [post-processing](#post-processing-options) tasks. Licence [depends on the build](https://www.ffmpeg.org/legal.html) From 2e01ba62181fee12bf44b8f3f6cb0f46cd591e61 Mon Sep 17 00:00:00 2001 From: Bojidar Qnkov <41879217+Bojidarist@users.noreply.github.com> Date: Sun, 10 Oct 2021 03:11:10 +0300 Subject: [PATCH 231/641] [NovaPlay] Add extractor (#1209) Authored by: Bojidarist --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/novaplay.py | 63 ++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 yt_dlp/extractor/novaplay.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index d8e3cd738b..0a761135e3 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -933,6 +933,7 @@ from .nova import ( NovaEmbedIE, NovaIE, ) +from .novaplay import NovaPlayIE from .nowness import ( NownessIE, NownessPlaylistIE, diff --git a/yt_dlp/extractor/novaplay.py b/yt_dlp/extractor/novaplay.py new file mode 100644 index 0000000000..724986a060 --- /dev/null +++ b/yt_dlp/extractor/novaplay.py @@ -0,0 +1,63 @@ +# coding: utf-8 +from .common import InfoExtractor +from ..utils import int_or_none, parse_duration, parse_iso8601 + + +class NovaPlayIE(InfoExtractor): + _VALID_URL = r'https://play.nova\.bg/video/.*/(?P\d+)' + _TESTS = [ + { + 'url': 'https://play.nova.bg/video/bratya/season-3/bratq-2021-10-08/548677', + 'md5': 'b1127a84e61bed1632b7c2ca9cbb4153', + 'info_dict': { + 'id': '548677', + 'ext': 'mp4', + 'title': 'Братя', + 'alt_title': 'bratya/season-3/bratq-2021-10-08', + 'duration': 1603.0, + 'timestamp': 1633724150, + 'upload_date': '20211008', + 'thumbnail': 'https://nbg-img.fite.tv/img/548677_460x260.jpg', + 'description': 'Сезон 3 Епизод 25' + }, + }, + { + 'url': 'https://play.nova.bg/video/igri-na-volqta/season-3/igri-na-volqta-2021-09-20-1/548227', + 'md5': '5fd61b8ecbe582fc021019d570965d58', + 'info_dict': { + 'id': '548227', + 'ext': 'mp4', + 'title': 'Игри на волята: България (20.09.2021) - част 1', + 'alt_title': 'gri-na-volqta/season-3/igri-na-volqta-2021-09-20-1', + 'duration': 4060.0, + 'timestamp': 1632167564, + 'upload_date': '20210920', + 'thumbnail': 'https://nbg-img.fite.tv/img/548227_460x260.jpg', + 'description': 'Сезон 3 Епизод 13' + }, + } + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + video_props = self._parse_json(self._search_regex( + r'({.+})', + webpage, 'video_props'), video_id)['props']['pageProps']['video'] + m3u8_url = self._download_json( + f'https://nbg-api.fite.tv/api/v2/videos/{video_id}/streams', + video_id, headers={'x-flipps-user-agent': 'Flipps/75/9.7'})[0]['url'] + formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls') + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': video_props['title'], + 'alt_title': video_props.get('slug'), + 'thumbnail': self._og_search_thumbnail(webpage), + 'description': self._og_search_description(webpage), + 'formats': formats, + 'duration': parse_duration(video_props['duration']), + 'timestamp': parse_iso8601(video_props['published_at']), + 'view_count': int_or_none(video_props['view_count']), + } From d1d5c08f29b3b1d60d8b11b812029757fe3fd90a Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 10 Oct 2021 07:08:22 +0530 Subject: [PATCH 232/641] [minicurses] Fix when printing to file Closes #1215 --- yt_dlp/YoutubeDL.py | 4 ++++ yt_dlp/downloader/common.py | 5 ++--- yt_dlp/minicurses.py | 42 +++++++++++++++++++------------------ yt_dlp/utils.py | 2 +- 4 files changed, 29 insertions(+), 24 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 2b3c33ce53..49d6b3779b 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -514,6 +514,7 @@ class YoutubeDL(object): self.cache = Cache(self) windows_enable_vt_mode() + # FIXME: This will break if we ever print color to stdout self.params['no_color'] = self.params.get('no_color') or not supports_terminal_sequences(self._err_file) if sys.version_info < (3, 6): @@ -3298,6 +3299,9 @@ class YoutubeDL(object): KEYRING_AVAILABLE and 'keyring', )))) or 'none' self._write_string('[debug] Optional libraries: %s\n' % lib_str) + self._write_string('[debug] ANSI escape support: stdout = %s, stderr = %s\n' % ( + supports_terminal_sequences(self._screen_file), + supports_terminal_sequences(self._err_file))) proxy_map = {} for handler in self._opener.handlers: diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index 50e674829e..89cdffd246 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -3,7 +3,6 @@ from __future__ import division, unicode_literals import copy import os import re -import sys import time import random @@ -247,9 +246,9 @@ class FileDownloader(object): elif self.ydl.params.get('logger'): self._multiline = MultilineLogger(self.ydl.params['logger'], lines) elif self.params.get('progress_with_newline'): - self._multiline = BreaklineStatusPrinter(sys.stderr, lines) + self._multiline = BreaklineStatusPrinter(self.ydl._screen_file, lines) else: - self._multiline = MultilinePrinter(sys.stderr, lines, not self.params.get('quiet')) + self._multiline = MultilinePrinter(self.ydl._screen_file, lines, not self.params.get('quiet')) def _finish_multiline_status(self): self._multiline.end() diff --git a/yt_dlp/minicurses.py b/yt_dlp/minicurses.py index 0e37ed8183..a6e159a143 100644 --- a/yt_dlp/minicurses.py +++ b/yt_dlp/minicurses.py @@ -1,6 +1,6 @@ import functools from threading import Lock -from .utils import supports_terminal_sequences, TERMINAL_SEQUENCES +from .utils import supports_terminal_sequences, TERMINAL_SEQUENCES, write_string class MultilinePrinterBase: @@ -25,20 +25,26 @@ class MultilinePrinterBase: return f'{line + 1}: {text}' return text + def write(self, *text): + write_string(''.join(text), self.stream) + class QuietMultilinePrinter(MultilinePrinterBase): pass class MultilineLogger(MultilinePrinterBase): + def write(self, *text): + self.stream.debug(''.join(text)) + def print_at_line(self, text, pos): # stream is the logger object, not an actual stream - self.stream.debug(self._add_line_number(text, pos)) + self.write(self._add_line_number(text, pos)) class BreaklineStatusPrinter(MultilinePrinterBase): def print_at_line(self, text, pos): - self.stream.write(self._add_line_number(text, pos) + '\n') + self.write(self._add_line_number(text, pos), '\n') class MultilinePrinter(MultilinePrinterBase): @@ -58,50 +64,46 @@ class MultilinePrinter(MultilinePrinterBase): def _move_cursor(self, dest): current = min(self._lastline, self.maximum) - self.stream.write('\r') + yield '\r' distance = dest - current if distance < 0: - self.stream.write(TERMINAL_SEQUENCES['UP'] * -distance) + yield TERMINAL_SEQUENCES['UP'] * -distance elif distance > 0: - self.stream.write(TERMINAL_SEQUENCES['DOWN'] * distance) + yield TERMINAL_SEQUENCES['DOWN'] * distance self._lastline = dest @lock def print_at_line(self, text, pos): if self._HAVE_FULLCAP: - self._move_cursor(pos) - self.stream.write(TERMINAL_SEQUENCES['ERASE_LINE']) - self.stream.write(text) - return + self.write(*self._move_cursor(pos), TERMINAL_SEQUENCES['ERASE_LINE'], text) text = self._add_line_number(text, pos) textlen = len(text) if self._lastline == pos: # move cursor at the start of progress when writing to same line - self.stream.write('\r') + prefix = '\r' if self._lastlength > textlen: text += ' ' * (self._lastlength - textlen) self._lastlength = textlen else: # otherwise, break the line - self.stream.write('\n') + prefix = '\n' self._lastlength = textlen - self.stream.write(text) + self.write(prefix, text) self._lastline = pos @lock def end(self): # move cursor to the end of the last line, and write line break # so that other to_screen calls can precede - if self._HAVE_FULLCAP: - self._move_cursor(self.maximum) + text = self._move_cursor(self.maximum) if self._HAVE_FULLCAP else [] if self.preserve_output: - self.stream.write('\n') + self.write(*text, '\n') return if self._HAVE_FULLCAP: - self.stream.write( - TERMINAL_SEQUENCES['ERASE_LINE'] - + f'{TERMINAL_SEQUENCES["UP"]}{TERMINAL_SEQUENCES["ERASE_LINE"]}' * self.maximum) + self.write( + *text, TERMINAL_SEQUENCES['ERASE_LINE'], + f'{TERMINAL_SEQUENCES["UP"]}{TERMINAL_SEQUENCES["ERASE_LINE"]}' * self.maximum) else: - self.stream.write(' ' * self._lastlength) + self.write(*text, ' ' * self._lastlength) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index db9b9de948..8e5c08ce54 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -6458,7 +6458,7 @@ def jwt_encode_hs256(payload_data, key, headers={}): def supports_terminal_sequences(stream): if compat_os_name == 'nt': - if get_windows_version() < (10, ): + if get_windows_version() < (10, 0, 10586): return False elif not os.getenv('TERM'): return False From 84999521c89a1146feaa0e58d735155df06a6fe5 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 10 Oct 2021 07:19:06 +0530 Subject: [PATCH 233/641] [build] Allow to release without changelog so that forks can build using GHA easily --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 324cf7eb65..5717ce8ee4 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -47,7 +47,7 @@ jobs: - name: Get Changelog id: get_changelog run: | - changelog=$(cat Changelog.md | grep -oPz '(?s)(?<=### ${{ steps.bump_version.outputs.ytdlp_version }}\n{2}).+?(?=\n{2,3}###)') + changelog=$(cat Changelog.md | grep -oPz '(?s)(?<=### ${{ steps.bump_version.outputs.ytdlp_version }}\n{2}).+?(?=\n{2,3}###)') || true echo "changelog<> $GITHUB_ENV echo "$changelog" >> $GITHUB_ENV echo "EOF" >> $GITHUB_ENV From 21186af70a8809f59ad39d1d01f63203ce74da3b Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 10 Oct 2021 09:28:43 +0530 Subject: [PATCH 234/641] [downloader] Fix throttledratelimit The timer should not reset at start of each block --- yt_dlp/downloader/http.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index 704ae6f5ad..3bc41e5b22 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -58,6 +58,7 @@ class HttpFD(FileDownloader): ctx.block_size = self.params.get('buffersize', 1024) ctx.start_time = time.time() ctx.chunk_size = None + throttle_start = None if self.params.get('continuedl', True): # Establish possible resume length @@ -197,6 +198,7 @@ class HttpFD(FileDownloader): raise RetryDownload(err) def download(): + nonlocal throttle_start data_len = ctx.data.info().get('Content-length', None) # Range HTTP header may be ignored/unsupported by a webserver @@ -225,7 +227,6 @@ class HttpFD(FileDownloader): # measure time over whole while-loop, so slow_down() and best_block_size() work together properly now = None # needed for slow_down() in the first loop run before = start # start measuring - throttle_start = None def retry(e): to_stdout = ctx.tmpfilename == '-' @@ -326,7 +327,7 @@ class HttpFD(FileDownloader): if ctx.stream is not None and ctx.tmpfilename != '-': ctx.stream.close() raise ThrottledDownload() - else: + elif speed: throttle_start = None if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len: From dec0d56fa9bee6a9c10ed33184a1a852e3d6180b Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 10 Oct 2021 04:59:54 +0530 Subject: [PATCH 235/641] Release 2021.10.10 --- CONTRIBUTORS | 2 ++ Changelog.md | 11 +++++++++++ README.md | 4 ++-- supportedsites.md | 6 +----- yt_dlp/extractor/trovolive.py | 0 5 files changed, 16 insertions(+), 7 deletions(-) delete mode 100644 yt_dlp/extractor/trovolive.py diff --git a/CONTRIBUTORS b/CONTRIBUTORS index a535411c6e..048d988529 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -123,3 +123,5 @@ ajj8 jakubadamw jfogelman timethrow +sarnoud +Bojidarist diff --git a/Changelog.md b/Changelog.md index 2350f67add..2e6da33fb8 100644 --- a/Changelog.md +++ b/Changelog.md @@ -14,6 +14,17 @@ --> +### 2021.10.10 + +* [downloader/ffmpeg] Fix bug in initializing `FFmpegPostProcessor` +* [minicurses] Fix when printing to file +* [downloader] Fix throttledratelimit +* [francetv] Fix extractor by [fstirlitz](https://github.com/fstirlitz), [sarnoud](https://github.com/sarnoud) +* [NovaPlay] Add extractor by [Bojidarist](https://github.com/Bojidarist) +* [ffmpeg] Revert "Set max probesize" - No longer needed +* [docs] Remove incorrect dependency on VC++10 +* [build] Allow to release without changelog + ### 2021.10.09 * Improved progress reporting diff --git a/README.md b/README.md index 8acb574846..1a46b25f4d 100644 --- a/README.md +++ b/README.md @@ -92,9 +92,9 @@ The major new features from the latest release of [blackjack4494/yt-dlc](https:/ * **Aria2c with HLS/DASH**: You can use `aria2c` as the external downloader for DASH(mpd) and HLS(m3u8) formats -* **New extractors**: AnimeLab, Philo MSO, Spectrum MSO, SlingTV MSO, Cablevision MSO, RCN MSO, Rcs, Gedi, bitwave.tv, mildom, audius, zee5, mtv.it, wimtv, pluto.tv, niconico users, discoveryplus.in, mediathek, NFHSNetwork, nebula, ukcolumn, whowatch, MxplayerShow, parlview (au), YoutubeWebArchive, fancode, Saitosan, ShemarooMe, telemundo, VootSeries, SonyLIVSeries, HotstarSeries, VidioPremier, VidioLive, RCTIPlus, TBS Live, douyin, pornflip, ParamountPlusSeries, ScienceChannel, Utreon, OpenRec, BandcampMusic, blackboardcollaborate, eroprofile albums, mirrativ, BannedVideo, bilibili categories, Epicon, filmmodu, GabTV, HungamaAlbum, ManotoTV, Niconico search, Patreon User, peloton, ProjectVeritas, radiko, StarTV, tiktok user, Tokentube, voicy, TV2HuSeries, biliintl, 17live, NewgroundsUser, peertube channel/playlist, ZenYandex, CAM4, CGTN, damtomo, gotostage, Koo, Mediaite, Mediaklikk, MuseScore, nzherald, Olympics replay, radlive, SovietsCloset, Streamanity, Theta, Chingari, ciscowebex, Gettr, GoPro, N1, Theta, Veo, Vupload +* **New extractors**: AnimeLab, Philo MSO, Spectrum MSO, SlingTV MSO, Cablevision MSO, RCN MSO, Rcs, Gedi, bitwave.tv, mildom, audius, zee5, mtv.it, wimtv, pluto.tv, niconico users, discoveryplus.in, mediathek, NFHSNetwork, nebula, ukcolumn, whowatch, MxplayerShow, parlview (au), YoutubeWebArchive, fancode, Saitosan, ShemarooMe, telemundo, VootSeries, SonyLIVSeries, HotstarSeries, VidioPremier, VidioLive, RCTIPlus, TBS Live, douyin, pornflip, ParamountPlusSeries, ScienceChannel, Utreon, OpenRec, BandcampMusic, blackboardcollaborate, eroprofile albums, mirrativ, BannedVideo, bilibili categories, Epicon, filmmodu, GabTV, HungamaAlbum, ManotoTV, Niconico search, Patreon User, peloton, ProjectVeritas, radiko, StarTV, tiktok user, Tokentube, voicy, TV2HuSeries, biliintl, 17live, NewgroundsUser, peertube channel/playlist, ZenYandex, CAM4, CGTN, damtomo, gotostage, Koo, Mediaite, Mediaklikk, MuseScore, nzherald, Olympics replay, radlive, SovietsCloset, Streamanity, Theta, Chingari, ciscowebex, Gettr, GoPro, N1, Theta, Veo, Vupload, NovaPlay -* **Fixed/improved extractors**: archive.org, roosterteeth.com, skyit, instagram, itv, SouthparkDe, spreaker, Vlive, akamai, ina, rumble, tennistv, amcnetworks, la7 podcasts, linuxacadamy, nitter, twitcasting, viu, crackle, curiositystream, mediasite, rmcdecouverte, sonyliv, tubi, tenplay, patreon, videa, yahoo, BravoTV, crunchyroll playlist, RTP, viki, Hotstar, vidio, vimeo, mediaset, Mxplayer, nbcolympics, ParamountPlus, Newgrounds, SAML Verizon login, Hungama, afreecatv, aljazeera, ATV, bitchute, camtube, CDA, eroprofile, facebook, HearThisAtIE, iwara, kakao, Motherless, Nova, peertube, pornhub, reddit, tiktok, TV2, TV2Hu, tv5mondeplus, VH1, Viafree, XHamster, 9Now, AnimalPlanet, Arte, CBC, Chingari, comedycentral, DIYNetwork, niconico, dw, funimation, globo, HiDive, NDR, Nuvid, Oreilly, pbs, plutotv, reddit, redtube, soundcloud, SpankBang, VrtNU, bbc, Bilibili, LinkedInLearning, parliamentlive, PolskieRadio, Streamable, vidme +* **Fixed/improved extractors**: archive.org, roosterteeth.com, skyit, instagram, itv, SouthparkDe, spreaker, Vlive, akamai, ina, rumble, tennistv, amcnetworks, la7 podcasts, linuxacadamy, nitter, twitcasting, viu, crackle, curiositystream, mediasite, rmcdecouverte, sonyliv, tubi, tenplay, patreon, videa, yahoo, BravoTV, crunchyroll playlist, RTP, viki, Hotstar, vidio, vimeo, mediaset, Mxplayer, nbcolympics, ParamountPlus, Newgrounds, SAML Verizon login, Hungama, afreecatv, aljazeera, ATV, bitchute, camtube, CDA, eroprofile, facebook, HearThisAtIE, iwara, kakao, Motherless, Nova, peertube, pornhub, reddit, tiktok, TV2, TV2Hu, tv5mondeplus, VH1, Viafree, XHamster, 9Now, AnimalPlanet, Arte, CBC, Chingari, comedycentral, DIYNetwork, niconico, dw, funimation, globo, HiDive, NDR, Nuvid, Oreilly, pbs, plutotv, reddit, redtube, soundcloud, SpankBang, VrtNU, bbc, Bilibili, LinkedInLearning, parliamentlive, PolskieRadio, Streamable, vidme, francetv * **Subtitle extraction from manifests**: Subtitles can be extracted from streaming media manifests. See [commit/be6202f](https://github.com/yt-dlp/yt-dlp/commit/be6202f12b97858b9d716e608394b51065d0419f) for details diff --git a/supportedsites.md b/supportedsites.md index 3fe79683aa..02be6b918c 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -232,7 +232,6 @@ - **CTV** - **CTVNews** - **cu.ntv.co.jp**: Nippon Television Network - - **Culturebox** - **CultureUnplugged** - **curiositystream** - **curiositystream:collection** @@ -346,13 +345,10 @@ - **foxnews**: Fox News and Fox Business Video - **foxnews:article** - **FoxSports** - - **france2.fr:generation-what** - **FranceCulture** - **FranceInter** - **FranceTV** - - **FranceTVEmbed** - **francetvinfo.fr** - - **FranceTVJeunesse** - **FranceTVSite** - **Freesound** - **freespeech.org** @@ -705,6 +701,7 @@ - **NosVideo** - **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz - **NovaEmbed** + - **NovaPlay** - **nowness** - **nowness:playlist** - **nowness:series** @@ -991,7 +988,6 @@ - **SpankBangPlaylist** - **Spankwire** - **Spiegel** - - **sport.francetvinfo.fr** - **Sport5** - **SportBox** - **SportDeutschland** diff --git a/yt_dlp/extractor/trovolive.py b/yt_dlp/extractor/trovolive.py deleted file mode 100644 index e69de29bb2..0000000000 From 8063de51097a7af719c7a8c95b8f7c097573046f Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 10 Oct 2021 04:03:13 +0000 Subject: [PATCH 236/641] [version] update :ci skip all --- .github/ISSUE_TEMPLATE/1_broken_site.md | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.md | 4 ++-- .github/ISSUE_TEMPLATE/3_site_feature_request.md | 4 ++-- .github/ISSUE_TEMPLATE/4_bug_report.md | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.md | 4 ++-- yt_dlp/version.py | 2 +- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md index d07c33e110..157eca91b5 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -21,7 +21,7 @@ assignees: '' - [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running yt-dlp version **2021.10.09** +- [ ] I've verified that I'm running yt-dlp version **2021.10.10** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped - [ ] I've searched the bugtracker for similar issues including closed ones @@ -47,7 +47,7 @@ Add the `-v` flag to your command line you run yt-dlp with (`yt-dlp -v - [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running yt-dlp version **2021.10.09** +- [ ] I've verified that I'm running yt-dlp version **2021.10.10** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] I've checked that none of provided URLs violate any copyrights - [ ] The provided URLs do not contain any DRM to the best of my knowledge diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md index dd9bc1faa8..54536fce6d 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -21,14 +21,14 @@ assignees: '' - [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running yt-dlp version **2021.10.09** +- [ ] I've verified that I'm running yt-dlp version **2021.10.10** - [ ] I've searched the bugtracker for similar site feature requests including closed ones - [ ] I've read the opening an issue section in CONTRIBUTING.md - [ ] I have given an appropriate title to the issue diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md index 8981eca0e5..6413e8b7ec 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -21,7 +21,7 @@ assignees: '' - [ ] I'm reporting a bug unrelated to a specific site -- [ ] I've verified that I'm running yt-dlp version **2021.10.09** +- [ ] I've verified that I'm running yt-dlp version **2021.10.10** - [ ] I've checked that all provided URLs are alive and playable in a browser - [ ] The provided URLs do not contain any DRM to the best of my knowledge - [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped @@ -48,7 +48,7 @@ Add the `-v` flag to your command line you run yt-dlp with (`yt-dlp -v - [ ] I'm reporting a feature request -- [ ] I've verified that I'm running yt-dlp version **2021.10.09** +- [ ] I've verified that I'm running yt-dlp version **2021.10.10** - [ ] I've searched the bugtracker for similar feature requests including closed ones - [ ] I've read the opening an issue section in CONTRIBUTING.md - [ ] I have given an appropriate title to the issue diff --git a/yt_dlp/version.py b/yt_dlp/version.py index 34b6e9a5d8..83b6fea9fc 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2021.10.09' +__version__ = '2021.10.10' From e8f726a57fe144cb5a6f548e4654944ac2b2aa50 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 10 Oct 2021 11:06:23 +0530 Subject: [PATCH 237/641] [hidive] Fix typo in b5ae35ee6d3f913898770b8c74ee5f5e5cc33560 --- yt_dlp/extractor/hidive.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/hidive.py b/yt_dlp/extractor/hidive.py index ef1ca197e2..15bd444f9f 100644 --- a/yt_dlp/extractor/hidive.py +++ b/yt_dlp/extractor/hidive.py @@ -72,8 +72,9 @@ class HiDiveIE(InfoExtractor): parsed_urls.add(cc_url) subtitles.setdefault(cc_lang, []).append({'url': cc_url}) - def _get_subtitles(self, url, video_id, title, key, subtitles, parsed_urls): + def _get_subtitles(self, url, video_id, title, key, parsed_urls): webpage = self._download_webpage(url, video_id, fatal=False) or '' + subtitles = {} for caption in set(re.findall(r'data-captions=\"([^\"]+)\"', webpage)): renditions = self._call_api( video_id, title, key, {'Captions': caption}, fatal=False, @@ -93,7 +94,7 @@ class HiDiveIE(InfoExtractor): raise ExtractorError( '%s said: %s' % (self.IE_NAME, restriction), expected=True) - formats, parsed_urls = [], {}, {None} + formats, parsed_urls = [], {None} for rendition_id, rendition in settings['renditions'].items(): audio, version, extra = rendition_id.split('_') m3u8_url = url_or_none(try_get(rendition, lambda x: x['bitrates']['hls'])) From 2c4bba96acb64e23470ccae804c659b56ebb93b5 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Mon, 11 Oct 2021 03:36:27 +0530 Subject: [PATCH 238/641] [EUScreen] Add Extractor (#1219) Closes #1207 Authored by: Ashish0804 --- yt_dlp/extractor/euscreen.py | 64 ++++++++++++++++++++++++++++++++++ yt_dlp/extractor/extractors.py | 1 + 2 files changed, 65 insertions(+) create mode 100644 yt_dlp/extractor/euscreen.py diff --git a/yt_dlp/extractor/euscreen.py b/yt_dlp/extractor/euscreen.py new file mode 100644 index 0000000000..3980c2349f --- /dev/null +++ b/yt_dlp/extractor/euscreen.py @@ -0,0 +1,64 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +from ..utils import ( + parse_duration, + js_to_json, +) + + +class EUScreenIE(InfoExtractor): + _VALID_URL = r'(?:https?://)(?:www\.)?euscreen\.eu/item.html\?id=(?P[^&?$/]+)' + + _TESTS = [{ + 'url': 'https://euscreen.eu/item.html?id=EUS_0EBCBF356BFC4E12A014023BA41BD98C', + 'info_dict': { + 'id': 'EUS_0EBCBF356BFC4E12A014023BA41BD98C', + 'ext': 'mp4', + 'title': "L'effondrement du stade du Heysel", + 'alt_title': 'Collapse of the Heysel Stadium', + 'duration': 318.0, + 'description': 'md5:f0ffffdfce6821139357a1b8359d6152', + 'series': 'JA2 DERNIERE', + 'episode': '-', + 'uploader': 'INA / France', + 'thumbnail': 'http://images3.noterik.com/domain/euscreenxl/user/eu_ina/video/EUS_0EBCBF356BFC4E12A014023BA41BD98C/image.jpg' + }, + 'params': {'skip_download': True} + }] + + _payload = b'-1Win32MozillaNetscape5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36true784758undefinedSat, 07 Oct 2021 08:56:50 GMT1633769810758' + + def _real_extract(self, url): + id = self._match_id(url) + args_for_js_request = self._download_webpage( + 'https://euscreen.eu/lou/LouServlet/domain/euscreenxl/html5application/euscreenxlitem', + id, data=self._payload, query={'actionlist': 'itempage', 'id': id}) + info_js = self._download_webpage( + 'https://euscreen.eu/lou/LouServlet/domain/euscreenxl/html5application/euscreenxlitem', + id, data=args_for_js_request.replace('screenid', 'screenId').encode()) + video_json = self._parse_json( + self._search_regex(r'setVideo\(({.+})\)\(\$end\$\)put', info_js, 'Video JSON'), + id, transform_source=js_to_json) + meta_json = self._parse_json( + self._search_regex(r'setData\(({.+})\)\(\$end\$\)', info_js, 'Metadata JSON'), + id, transform_source=js_to_json) + formats = [{ + 'url': source['src'], + } for source in video_json.get('sources', [])] + self._sort_formats(formats) + + return { + 'id': id, + 'title': meta_json.get('originalTitle'), + 'alt_title': meta_json.get('title'), + 'duration': parse_duration(meta_json.get('duration')), + 'description': '%s\n%s' % (meta_json.get('summaryOriginal', ''), meta_json.get('summaryEnglish', '')), + 'series': meta_json.get('series') or meta_json.get('seriesEnglish'), + 'episode': meta_json.get('episodeNumber'), + 'uploader': meta_json.get('provider'), + 'thumbnail': meta_json.get('screenshot') or video_json.get('screenshot'), + 'formats': formats, + } diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 0a761135e3..adf54ca7e8 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -420,6 +420,7 @@ from .espn import ( ) from .esri import EsriVideoIE from .europa import EuropaIE +from .euscreen import EUScreenIE from .expotv import ExpoTVIE from .expressen import ExpressenIE from .extremetube import ExtremeTubeIE From 0481e266f590d835a010019a63b1821c24c8e178 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 11 Oct 2021 09:49:51 +0530 Subject: [PATCH 239/641] [tiktok] Fix typo in 943d5ab13305b6a37424e6572d10f562384ada9a and update tests Closes #1226 --- yt_dlp/extractor/tiktok.py | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index fc0915fb02..1db6327e24 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -208,7 +208,7 @@ class TikTokBaseIE(InfoExtractor): 'duration': int_or_none(traverse_obj(video_info, 'duration', ('download_addr', 'duration')), scale=1000) } - def _parse_aweme_video_web(self, aweme_detail, webpage, url): + def _parse_aweme_video_web(self, aweme_detail, webpage_url): video_info = aweme_detail['video'] author_info = traverse_obj(aweme_detail, 'author', 'authorInfo', default={}) music_info = aweme_detail.get('music') or {} @@ -277,7 +277,7 @@ class TikTokBaseIE(InfoExtractor): 'thumbnails': thumbnails, 'description': str_or_none(aweme_detail.get('desc')), 'http_headers': { - 'Referer': url + 'Referer': webpage_url } } @@ -287,18 +287,18 @@ class TikTokIE(TikTokBaseIE): _TESTS = [{ 'url': 'https://www.tiktok.com/@leenabhushan/video/6748451240264420610', - 'md5': '34a7543afd5a151b0840ba6736fb633b', + 'md5': '736bb7a466c6f0a6afeb597da1e6f5b7', 'info_dict': { 'id': '6748451240264420610', 'ext': 'mp4', 'title': '#jassmanak #lehanga #leenabhushan', 'description': '#jassmanak #lehanga #leenabhushan', 'duration': 13, - 'height': 1280, - 'width': 720, + 'height': 1024, + 'width': 576, 'uploader': 'leenabhushan', 'uploader_id': '6691488002098119685', - 'uploader_url': 'https://www.tiktok.com/@leenabhushan', + 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAA_Eb4t1vodM1IuTy_cvp9CY22RAb59xqrO0Xtz9CYQJvgXaDvZxYnZYRzDWhhgJmy', 'creator': 'facestoriesbyleenabh', 'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?', 'upload_date': '20191016', @@ -310,7 +310,7 @@ class TikTokIE(TikTokBaseIE): } }, { 'url': 'https://www.tiktok.com/@patroxofficial/video/6742501081818877190?langCountry=en', - 'md5': '06b9800d47d5fe51a19e322dd86e61c9', + 'md5': '6f3cf8cdd9b28cb8363fe0a9a160695b', 'info_dict': { 'id': '6742501081818877190', 'ext': 'mp4', @@ -321,7 +321,7 @@ class TikTokIE(TikTokBaseIE): 'width': 540, 'uploader': 'patrox', 'uploader_id': '18702747', - 'uploader_url': 'https://www.tiktok.com/@patrox', + 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAiFnldaILebi5heDoVU6bn4jBWWycX6-9U3xuNPqZ8Ws', 'creator': 'patroX', 'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?', 'upload_date': '20190930', @@ -362,7 +362,7 @@ class TikTokIE(TikTokBaseIE): # Chech statusCode for success status = props_data.get('pageProps').get('statusCode') if status == 0: - return self._parse_aweme_video_web(props_data['pageProps']['itemInfo']['itemStruct'], webpage, url) + return self._parse_aweme_video_web(props_data['pageProps']['itemInfo']['itemStruct'], url) elif status == 10216: raise ExtractorError('This video is private', expected=True) @@ -377,13 +377,17 @@ class TikTokUserIE(TikTokBaseIE): 'playlist_mincount': 45, 'info_dict': { 'id': '6935371178089399301', + 'title': 'corgibobaa', }, + 'expected_warnings': ['Retrying'] }, { 'url': 'https://www.tiktok.com/@meme', 'playlist_mincount': 593, 'info_dict': { 'id': '79005827461758976', + 'title': 'meme', }, + 'expected_warnings': ['Retrying'] }] r''' # TODO: Fix by adding _signature to api_url @@ -430,7 +434,7 @@ class TikTokUserIE(TikTokBaseIE): break for video in post_list.get('aweme_list', []): yield { - **self._parse_aweme_video(video), + **self._parse_aweme_video_app(video), 'ie_key': TikTokIE.ie_key(), 'extractor': 'TikTok', } @@ -439,12 +443,12 @@ class TikTokUserIE(TikTokBaseIE): query['max_cursor'] = post_list['max_cursor'] def _real_extract(self, url): - user_id = self._match_id(url) - webpage = self._download_webpage(url, user_id, headers={ + user_name = self._match_id(url) + webpage = self._download_webpage(url, user_name, headers={ 'User-Agent': 'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)' }) - own_id = self._html_search_regex(r'snssdk\d*://user/profile/(\d+)', webpage, 'user ID') - return self.playlist_result(self._entries_api(webpage, own_id, user_id), user_id) + user_id = self._html_search_regex(r'snssdk\d*://user/profile/(\d+)', webpage, 'user ID') + return self.playlist_result(self._entries_api(webpage, user_id, user_name), user_id, user_name) class DouyinIE(TikTokIE): @@ -556,4 +560,4 @@ class DouyinIE(TikTokIE): render_data = self._parse_json( render_data_json, video_id, transform_source=compat_urllib_parse_unquote) return self._parse_aweme_video_web( - traverse_obj(render_data, (..., 'aweme', 'detail'), get_all=False), webpage, url) + traverse_obj(render_data, (..., 'aweme', 'detail'), get_all=False), url) From a169858f2409eefb66ac30085fddba81123f63b7 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 11 Oct 2021 03:59:55 +0530 Subject: [PATCH 240/641] Fix `check_formats` output being written to stdout when `-qv` Closes #1229 --- yt_dlp/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 49d6b3779b..2730d2e191 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2485,7 +2485,7 @@ class YoutubeDL(object): verbose = self.params.get('verbose') params = { 'test': True, - 'quiet': not verbose, + 'quiet': self.params.get('quiet') or not verbose, 'verbose': verbose, 'noprogress': not verbose, 'nopart': True, From ed39cac53d0dcb51623918a9c8abdbe18b653459 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 11 Oct 2021 04:00:52 +0530 Subject: [PATCH 241/641] Load archive only after printing verbose head If there is some issue in loading archive, the verbose head should be visible in the logs --- yt_dlp/YoutubeDL.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 2730d2e191..59a3e3df1a 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -601,24 +601,6 @@ class YoutubeDL(object): self._setup_opener() - def preload_download_archive(fn): - """Preload the archive, if any is specified""" - if fn is None: - return False - self.write_debug('Loading archive file %r\n' % fn) - try: - with locked_file(fn, 'r', encoding='utf-8') as archive_file: - for line in archive_file: - self.archive.add(line.strip()) - except IOError as ioe: - if ioe.errno != errno.ENOENT: - raise - return False - return True - - self.archive = set() - preload_download_archive(self.params.get('download_archive')) - if auto_init: self.print_debug_header() self.add_default_info_extractors() @@ -638,6 +620,24 @@ class YoutubeDL(object): register_socks_protocols() + def preload_download_archive(fn): + """Preload the archive, if any is specified""" + if fn is None: + return False + self.write_debug('Loading archive file %r\n' % fn) + try: + with locked_file(fn, 'r', encoding='utf-8') as archive_file: + for line in archive_file: + self.archive.add(line.strip()) + except IOError as ioe: + if ioe.errno != errno.ENOENT: + raise + return False + return True + + self.archive = set() + preload_download_archive(self.params.get('download_archive')) + def warn_if_short_id(self, argv): # short YouTube ID starting with dash? idxs = [ From e6faf2be366fcebb6147739363ebd1f690b961bf Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 11 Oct 2021 09:55:30 +0530 Subject: [PATCH 242/641] [update] Clean up error reporting Closes #1224 --- yt_dlp/update.py | 135 ++++++++++++++++++++++++++--------------------- 1 file changed, 74 insertions(+), 61 deletions(-) diff --git a/yt_dlp/update.py b/yt_dlp/update.py index 4fbe7bd7e7..26f18bddab 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -48,10 +48,10 @@ def detect_variant(): _NON_UPDATEABLE_REASONS = { 'exe': None, 'zip': None, - 'dir': 'Auto-update is not supported for unpackaged windows executable. Re-download the latest release', - 'py2exe': 'There is no official release for py2exe executable. Build it again with the latest source code', - 'source': 'You cannot update when running from source code', - 'unknown': 'It looks like you installed yt-dlp with a package manager, pip, setup.py or a tarball. Use that to update', + 'dir': 'Auto-update is not supported for unpackaged windows executable; Re-download the latest release', + 'py2exe': 'There is no official release for py2exe executable; Build it again with the latest source code', + 'source': 'You cannot update when running from source code; Use git to pull the latest changes', + 'unknown': 'It looks like you installed yt-dlp with a package manager, pip, setup.py or a tarball; Use that to update', } @@ -59,40 +59,6 @@ def is_non_updateable(): return _NON_UPDATEABLE_REASONS.get(detect_variant(), _NON_UPDATEABLE_REASONS['unknown']) -def update_self(to_screen, verbose, opener): - ''' Exists for backward compatibility. Use run_update(ydl) instead ''' - - printfn = to_screen - - class FakeYDL(): - _opener = opener - to_screen = printfn - - @staticmethod - def report_warning(msg, *args, **kwargs): - return printfn('WARNING: %s' % msg, *args, **kwargs) - - @staticmethod - def report_error(msg, tb=None): - printfn('ERROR: %s' % msg) - if not verbose: - return - if tb is None: - # Copied from YoutubeDl.trouble - if sys.exc_info()[0]: - tb = '' - if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: - tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info)) - tb += encode_compat_str(traceback.format_exc()) - else: - tb_data = traceback.format_list(traceback.extract_stack()) - tb = ''.join(tb_data) - if tb: - printfn(tb) - - return run_update(FakeYDL()) - - def run_update(ydl): """ Update the program file with the latest version from the repository @@ -101,10 +67,17 @@ def run_update(ydl): JSON_URL = 'https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest' - def report_error(msg, network=False, expected=False, delim=';'): - if network: - msg += '%s Visit https://github.com/yt-dlp/yt-dlp/releases/latest' % delim - ydl.report_error(msg, tb='' if network or expected else None) + def report_error(msg, expected=False): + ydl.report_error(msg, tb='' if expected else None) + + def report_unable(action, expected=False): + report_error(f'Unable to {action}', expected) + + def report_permission_error(file): + report_unable(f'write to {file}; Try running as administrator', True) + + def report_network_error(action, delim=';'): + report_unable(f'{action}{delim} Visit https://github.com/yt-dlp/yt-dlp/releases/latest', True) def calc_sha256sum(path): h = hashlib.sha256() @@ -120,7 +93,7 @@ def run_update(ydl): version_info = ydl._opener.open(JSON_URL).read().decode('utf-8') version_info = json.loads(version_info) except Exception: - return report_error('can\'t obtain versions info. Please try again later ', True, delim='or') + return report_network_error('obtain version info', delim='; Please try again later or') def version_tuple(version_str): return tuple(map(int, version_str.split('.'))) @@ -133,7 +106,7 @@ def run_update(ydl): err = is_non_updateable() if err: ydl.to_screen(f'Latest version: {version_id}, Current version: {__version__}') - return report_error(err, expected=True) + return report_error(err, True) # sys.executable is set to the full pathname of the exe-file for py2exe # though symlinks are not followed so that we need to do this manually @@ -163,55 +136,57 @@ def run_update(ydl): return dict(ln.split()[::-1] for ln in hash_data.splitlines()).get(filename) if not os.access(filename, os.W_OK): - return report_error('no write permissions on %s' % filename, expected=True) + return report_permission_error(filename) # PyInstaller if hasattr(sys, 'frozen'): exe = filename directory = os.path.dirname(exe) if not os.access(directory, os.W_OK): - return report_error('no write permissions on %s' % directory, expected=True) + return report_permission_error(directory) try: if os.path.exists(filename + '.old'): os.remove(filename + '.old') except (IOError, OSError): - return report_error('unable to remove the old version') + return report_unable('remove the old version') try: arch = platform.architecture()[0][:2] url = get_bin_info('exe', arch).get('browser_download_url') if not url: - return report_error('unable to fetch updates', True) + return report_network_error('fetch updates') urlh = ydl._opener.open(url) newcontent = urlh.read() urlh.close() - except (IOError, OSError, StopIteration): - return report_error('unable to download latest version', True) + except (IOError, OSError): + return report_network_error('download latest version') + if not os.access(exe + '.new', os.W_OK): + return report_permission_error(f'{exe}.new') try: with open(exe + '.new', 'wb') as outf: outf.write(newcontent) except (IOError, OSError): - return report_error('unable to write the new version') + return report_unable('write the new version') expected_sum = get_sha256sum('exe', arch) if not expected_sum: ydl.report_warning('no hash information found for the release') elif calc_sha256sum(exe + '.new') != expected_sum: - report_error('unable to verify the new executable', True) + report_network_error('verify the new executable') try: os.remove(exe + '.new') except OSError: - return report_error('unable to remove corrupt download') + return report_unable('remove corrupt download') try: os.rename(exe, exe + '.old') except (IOError, OSError): - return report_error('unable to move current version') + return report_unable('move current version') try: os.rename(exe + '.new', exe) except (IOError, OSError): - report_error('unable to overwrite current version') + report_unable('overwrite current version') os.rename(exe + '.old', exe) return try: @@ -222,31 +197,31 @@ def run_update(ydl): ydl.to_screen('Updated yt-dlp to version %s' % version_id) return True # Exit app except OSError: - report_error('unable to delete old version') + report_unable('delete the old version') # Zip unix package elif isinstance(globals().get('__loader__'), zipimporter): try: url = get_bin_info('zip', '3').get('browser_download_url') if not url: - return report_error('unable to fetch updates', True) + return report_network_error('fetch updates') urlh = ydl._opener.open(url) newcontent = urlh.read() urlh.close() - except (IOError, OSError, StopIteration): - return report_error('unable to download latest version', True) + except (IOError, OSError): + return report_network_error('download the latest version') expected_sum = get_sha256sum('zip', '3') if not expected_sum: ydl.report_warning('no hash information found for the release') elif hashlib.sha256(newcontent).hexdigest() != expected_sum: - return report_error('unable to verify the new zip', True) + return report_network_error('verify the new zip') try: with open(filename, 'wb') as outf: outf.write(newcontent) except (IOError, OSError): - return report_error('unable to overwrite current version') + return report_unable('overwrite current version') ydl.to_screen('Updated yt-dlp to version %s; Restart yt-dlp to use the new version' % version_id) @@ -267,3 +242,41 @@ def print_notes(to_screen, versions, fromVersion=__version__): for note in notes: to_screen(note) ''' + + +def update_self(to_screen, verbose, opener): + ''' Exists for backward compatibility ''' + + printfn = to_screen + + printfn( + 'WARNING: "yt_dlp.update.update_self" is deprecated and may be removed in a future version. ' + 'Use "yt_dlp.update.run_update(ydl)" instead') + + class FakeYDL(): + _opener = opener + to_screen = printfn + + @staticmethod + def report_warning(msg, *args, **kwargs): + return printfn('WARNING: %s' % msg, *args, **kwargs) + + @staticmethod + def report_error(msg, tb=None): + printfn('ERROR: %s' % msg) + if not verbose: + return + if tb is None: + # Copied from YoutubeDl.trouble + if sys.exc_info()[0]: + tb = '' + if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: + tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info)) + tb += encode_compat_str(traceback.format_exc()) + else: + tb_data = traceback.format_list(traceback.extract_stack()) + tb = ''.join(tb_data) + if tb: + printfn(tb) + + return run_update(FakeYDL()) From ba107574128aa2bf9769819658931053449fecf9 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 11 Oct 2021 15:21:37 +0530 Subject: [PATCH 243/641] [extractor] Detect `EXT-X-KEY` Apple FairPlay --- yt_dlp/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 65444d3bf3..af0f01f37f 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -2012,7 +2012,7 @@ class InfoExtractor(object): if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access return formats, subtitles - has_drm = re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc) + has_drm = re.search(r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', m3u8_doc) def format_url(url): return url if re.match(r'^https?://', url) else compat_urlparse.urljoin(m3u8_url, url) From 9dda99f2fca7342c8f19150ac8730d67fceed42d Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 11 Oct 2021 15:27:00 +0530 Subject: [PATCH 244/641] [Merger] Do not add `aac_adtstoasc` to non-hls audio --- yt_dlp/postprocessor/ffmpeg.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 5f6861f938..e6aa2940a4 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -732,7 +732,8 @@ class FFmpegMergerPP(FFmpegPostProcessor): for (i, fmt) in enumerate(info['requested_formats']): if fmt.get('acodec') != 'none': args.extend(['-map', f'{i}:a:0']) - if self.get_audio_codec(fmt['filepath']) == 'aac': + aac_fixup = fmt['protocol'].startswith('m3u8') and self.get_audio_codec(fmt['filepath']) == 'aac' + if aac_fixup: args.extend([f'-bsf:a:{audio_streams}', 'aac_adtstoasc']) audio_streams += 1 if fmt.get('vcodec') != 'none': From a903d8285c96b2c7ac7915f228a17e84cbfe3ba4 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 11 Oct 2021 17:25:37 +0530 Subject: [PATCH 245/641] Fix bug in storyboards Caused by 9359f3d4f02856128f5626e754c7f64e2232b02f --- yt_dlp/YoutubeDL.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 59a3e3df1a..8878d710f4 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -3072,6 +3072,7 @@ class YoutubeDL(object): @staticmethod def format_resolution(format, default='unknown'): + is_images = format.get('vcodec') == 'none' and format.get('acodec') == 'none' if format.get('vcodec') == 'none' and format.get('acodec') != 'none': return 'audio only' if format.get('resolution') is not None: @@ -3082,11 +3083,11 @@ class YoutubeDL(object): res = '%sp' % format['height'] elif format.get('width'): res = '%dx?' % format['width'] + elif is_images: + return 'images' else: - res = default - if format.get('vcodec') == 'none' and format.get('acodec') == 'none': - res += ' (images)' - return res + return default + return f'{res} images' if is_images else res def _format_note(self, fdict): res = '' From cc16383ff36b3971064bae8106a45d38dbddc31b Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 9 Oct 2021 02:09:55 +0530 Subject: [PATCH 246/641] [extractor] Simplify search extractors --- yt_dlp/extractor/common.py | 10 +++++++++- yt_dlp/extractor/googlesearch.py | 28 ++++++---------------------- yt_dlp/extractor/niconico.py | 8 +++----- yt_dlp/extractor/soundcloud.py | 21 +++++---------------- yt_dlp/extractor/yahoo.py | 22 +++------------------- yt_dlp/extractor/youtube.py | 10 +--------- 6 files changed, 27 insertions(+), 72 deletions(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index af0f01f37f..d02a808b6b 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import base64 import datetime import hashlib +import itertools import json import netrc import os @@ -3617,7 +3618,14 @@ class SearchInfoExtractor(InfoExtractor): return self._get_n_results(query, n) def _get_n_results(self, query, n): - """Get a specified number of results for a query""" + """Get a specified number of results for a query. + Either this function or _search_results must be overridden by subclasses """ + return self.playlist_result( + itertools.islice(self._search_results(query), 0, None if n == float('inf') else n), + query, query) + + def _search_results(self, query): + """Returns an iterator of search results""" raise NotImplementedError('This method must be implemented by subclasses') @property diff --git a/yt_dlp/extractor/googlesearch.py b/yt_dlp/extractor/googlesearch.py index 5279fa807f..f605c0c35f 100644 --- a/yt_dlp/extractor/googlesearch.py +++ b/yt_dlp/extractor/googlesearch.py @@ -11,6 +11,7 @@ class GoogleSearchIE(SearchInfoExtractor): _MAX_RESULTS = 1000 IE_NAME = 'video.google:search' _SEARCH_KEY = 'gvsearch' + _WORKING = False _TEST = { 'url': 'gvsearch15:python language', 'info_dict': { @@ -20,16 +21,7 @@ class GoogleSearchIE(SearchInfoExtractor): 'playlist_count': 15, } - def _get_n_results(self, query, n): - """Get a specified number of results for a query""" - - entries = [] - res = { - '_type': 'playlist', - 'id': query, - 'title': query, - } - + def _search_results(self, query): for pagenum in itertools.count(): webpage = self._download_webpage( 'http://www.google.com/search', @@ -44,16 +36,8 @@ class GoogleSearchIE(SearchInfoExtractor): for hit_idx, mobj in enumerate(re.finditer( r'

= n) or not re.search(r'id="pnnext"', webpage): - res['entries'] = entries[:n] - return res + if not re.search(r'id="pnnext"', webpage): + return diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py index f19afa485d..76f087057a 100644 --- a/yt_dlp/extractor/niconico.py +++ b/yt_dlp/extractor/niconico.py @@ -709,11 +709,9 @@ class NicovideoSearchIE(SearchInfoExtractor, NicovideoSearchURLIE): _SEARCH_KEY = 'nicosearch' _TESTS = [] - def _get_n_results(self, query, n): - entries = self._entries(self._proto_relative_url(f'//www.nicovideo.jp/search/{query}'), query) - if n < float('inf'): - entries = itertools.islice(entries, 0, n) - return self.playlist_result(entries, query, query) + def _search_results(self, query): + return self._entries( + self._proto_relative_url(f'//www.nicovideo.jp/search/{query}'), query) class NicovideoSearchDateIE(NicovideoSearchIE): diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index ad3a32a024..e89383ff13 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -880,25 +880,14 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): }) next_url = update_url_query(self._API_V2_BASE + endpoint, query) - collected_results = 0 - for i in itertools.count(1): response = self._download_json( - next_url, collection_id, 'Downloading page {0}'.format(i), + next_url, collection_id, f'Downloading page {i}', 'Unable to download API page', headers=self._HEADERS) - collection = response.get('collection', []) - if not collection: - break - - collection = list(filter(bool, collection)) - collected_results += len(collection) - - for item in collection: - yield self.url_result(item['uri'], SoundcloudIE.ie_key()) - - if not collection or collected_results >= limit: - break + for item in response.get('collection') or []: + if item: + yield self.url_result(item['uri'], SoundcloudIE.ie_key()) next_url = response.get('next_href') if not next_url: @@ -906,4 +895,4 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): def _get_n_results(self, query, n): tracks = self._get_collection('search/tracks', query, limit=n, q=query) - return self.playlist_result(tracks, playlist_title=query) + return self.playlist_result(tracks, query, query) diff --git a/yt_dlp/extractor/yahoo.py b/yt_dlp/extractor/yahoo.py index 741efefc89..53556de007 100644 --- a/yt_dlp/extractor/yahoo.py +++ b/yt_dlp/extractor/yahoo.py @@ -334,31 +334,15 @@ class YahooSearchIE(SearchInfoExtractor): IE_NAME = 'screen.yahoo:search' _SEARCH_KEY = 'yvsearch' - def _get_n_results(self, query, n): - """Get a specified number of results for a query""" - entries = [] + def _search_results(self, query): for pagenum in itertools.count(0): result_url = 'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30) info = self._download_json(result_url, query, note='Downloading results page ' + str(pagenum + 1)) - m = info['m'] - results = info['results'] - - for (i, r) in enumerate(results): - if (pagenum * 30) + i >= n: - break - mobj = re.search(r'(?Pscreen\.yahoo\.com/.*?-\d*?\.html)"', r) - e = self.url_result('http://' + mobj.group('url'), 'Yahoo') - entries.append(e) - if (pagenum * 30 + i >= n) or (m['last'] >= (m['total'] - 1)): + yield from (self.url_result(result['rurl']) for result in info['results']) + if info['m']['last'] >= info['m']['total'] - 1: break - return { - '_type': 'playlist', - 'id': query, - 'entries': entries, - } - class YahooGyaOPlayerIE(InfoExtractor): IE_NAME = 'yahoo:gyao:player' diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 97d02dc0b4..41fd0aef7e 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -4615,11 +4615,10 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE): _SEARCH_PARAMS = None _TESTS = [] - def _entries(self, query, n): + def _search_results(self, query): data = {'query': query} if self._SEARCH_PARAMS: data['params'] = self._SEARCH_PARAMS - total = 0 continuation = {} for page_num in itertools.count(1): data.update(continuation) @@ -4662,17 +4661,10 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE): continue yield self._extract_video(video) - total += 1 - if total == n: - return if not continuation: break - def _get_n_results(self, query, n): - """Get a specified number of results for a query""" - return self.playlist_result(self._entries(query, n), query, query) - class YoutubeSearchDateIE(YoutubeSearchIE): IE_NAME = YoutubeSearchIE.IE_NAME + ':date' From a2160aa45f4019e02ced01c9030aa9519b40b24f Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 12 Oct 2021 15:20:50 +0530 Subject: [PATCH 247/641] [extractor] Generalize `getcomments` implementation --- yt_dlp/extractor/bannedvideo.py | 17 +++++---------- yt_dlp/extractor/common.py | 26 +++++++++++++++++++++++ yt_dlp/extractor/youtube.py | 37 +++++++-------------------------- 3 files changed, 38 insertions(+), 42 deletions(-) diff --git a/yt_dlp/extractor/bannedvideo.py b/yt_dlp/extractor/bannedvideo.py index 8f8f5ef5f2..3db1151f6d 100644 --- a/yt_dlp/extractor/bannedvideo.py +++ b/yt_dlp/extractor/bannedvideo.py @@ -97,21 +97,16 @@ query GetCommentReplies($id: String!) { 'query': self._GRAPHQL_QUERIES[operation] }).encode('utf8')).get('data') - def _extract_comments(self, video_id, comments, comment_data): + def _get_comments(self, video_id, comments, comment_data): + yield from comments for comment in comment_data.copy(): comment_id = comment.get('_id') if comment.get('replyCount') > 0: reply_json = self._call_api( video_id, comment_id, 'GetCommentReplies', f'Downloading replies for comment {comment_id}') - comments.extend( - self._parse_comment(reply, comment_id) - for reply in reply_json.get('getCommentReplies')) - - return { - 'comments': comments, - 'comment_count': len(comments), - } + for reply in reply_json.get('getCommentReplies'): + yield self._parse_comment(reply, comment_id) @staticmethod def _parse_comment(comment_data, parent): @@ -159,7 +154,5 @@ query GetCommentReplies($id: String!) { 'tags': [tag.get('name') for tag in video_info.get('tags')], 'availability': self._availability(is_unlisted=video_info.get('unlisted')), 'comments': comments, - '__post_extractor': ( - (lambda: self._extract_comments(video_id, comments, video_json.get('getVideoComments'))) - if self.get_param('getcomments') else None) + '__post_extractor': self.extract_comments(video_id, comments, video_json.get('getVideoComments')) } diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index d02a808b6b..5b7b8891aa 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -3502,6 +3502,32 @@ class InfoExtractor(object): def _get_subtitles(self, *args, **kwargs): raise NotImplementedError('This method must be implemented by subclasses') + def extract_comments(self, *args, **kwargs): + if not self.get_param('getcomments'): + return None + generator = self._get_comments(*args, **kwargs) + + def extractor(): + comments = [] + try: + while True: + comments.append(next(generator)) + except KeyboardInterrupt: + interrupted = True + self.to_screen('Interrupted by user') + except StopIteration: + interrupted = False + comment_count = len(comments) + self.to_screen(f'Extracted {comment_count} comments') + return { + 'comments': comments, + 'comment_count': None if interrupted else comment_count + } + return extractor + + def _get_comments(self, *args, **kwargs): + raise NotImplementedError('This method must be implemented by subclasses') + @staticmethod def _merge_subtitle_items(subtitle_list1, subtitle_list2): """ Merge subtitle items for one language. Items with duplicated URLs diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 41fd0aef7e..3e93c99342 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2241,7 +2241,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, comment_counts=None): def extract_header(contents): - _total_comments = 0 _continuation = None for content in contents: comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer']) @@ -2251,7 +2250,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if expected_comment_count: comment_counts[1] = expected_comment_count self.to_screen('Downloading ~%d comments' % expected_comment_count) - _total_comments = comment_counts[1] sort_mode_str = self._configuration_arg('comment_sort', [''])[0] comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top @@ -2271,7 +2269,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): sort_text = 'top comments' if comment_sort_index == 0 else 'newest first' self.to_screen('Sorting comments by %s' % sort_text) break - return _total_comments, _continuation + return _continuation def extract_thread(contents): if not parent: @@ -2359,9 +2357,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): lambda x: x['appendContinuationItemsAction']['continuationItems']), list) or [] if is_first_continuation: - total_comments, continuation = extract_header(continuation_items) - if total_comments: - yield total_comments + continuation = extract_header(continuation_items) is_first_continuation = False if continuation: break @@ -2389,9 +2385,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): continue if is_first_continuation: header_continuation_items = [continuation_renderer.get('header') or {}] - total_comments, continuation = extract_header(header_continuation_items) - if total_comments: - yield total_comments + continuation = extract_header(header_continuation_items) is_first_continuation = False if continuation: break @@ -2419,35 +2413,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor): [bytes_to_intlist(base64.b64decode(part)) for part in parts])) return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8') - def _extract_comments(self, ytcfg, video_id, contents, webpage): + def _get_comments(self, ytcfg, video_id, contents, webpage): """Entry for comment extraction""" def _real_comment_extract(contents): yield from self._comment_entries( traverse_obj(contents, (..., 'itemSectionRenderer'), get_all=False), ytcfg, video_id) - comments = [] - estimated_total = 0 - max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf') + max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) # Force English regardless of account setting to prevent parsing issues # See: https://github.com/yt-dlp/yt-dlp/issues/532 ytcfg = copy.deepcopy(ytcfg) traverse_obj( ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en' - try: - for comment in _real_comment_extract(contents): - if len(comments) >= max_comments: - break - if isinstance(comment, int): - estimated_total = comment - continue - comments.append(comment) - except KeyboardInterrupt: - self.to_screen('Interrupted by user') - self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total)) - return { - 'comments': comments, - 'comment_count': len(comments), - } + return itertools.islice(_real_comment_extract(contents), 0, max_comments) @staticmethod def _get_checkok_params(): @@ -3209,8 +3187,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): needs_auth=info['age_limit'] >= 18, is_unlisted=None if is_private is None else is_unlisted) - if self.get_param('getcomments', False): - info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage) + info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage) self.mark_watched(video_id, player_responses) From e88d44c6ee66e2a1b814c2fe89fc53b3c3e029ef Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 9 Oct 2021 02:14:23 +0530 Subject: [PATCH 248/641] [cleanup] Cleanup bilibili code Closes #1169 Authored by pukkandan, u-spec-png --- yt_dlp/extractor/bilibili.py | 222 +++++++++++++---------------------- 1 file changed, 81 insertions(+), 141 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index a1be7e04b3..d6c77e4184 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -1,16 +1,13 @@ # coding: utf-8 -from __future__ import unicode_literals import hashlib import itertools -import json import functools import re import math from .common import InfoExtractor, SearchInfoExtractor from ..compat import ( - compat_str, compat_parse_qs, compat_urlparse, compat_urllib_parse_urlparse @@ -20,6 +17,7 @@ from ..utils import ( int_or_none, float_or_none, parse_iso8601, + traverse_obj, try_get, smuggle_url, srt_subtitles_timecode, @@ -101,7 +99,7 @@ class BiliBiliIE(InfoExtractor): 'upload_date': '20170301', }, 'params': { - 'skip_download': True, # Test metadata only + 'skip_download': True, }, }, { 'info_dict': { @@ -115,7 +113,7 @@ class BiliBiliIE(InfoExtractor): 'upload_date': '20170301', }, 'params': { - 'skip_download': True, # Test metadata only + 'skip_download': True, }, }] }, { @@ -169,7 +167,7 @@ class BiliBiliIE(InfoExtractor): if 'anime/' not in url: cid = self._search_regex( - r'\bcid(?:["\']:|=)(\d+),["\']page(?:["\']:|=)' + compat_str(page_id), webpage, 'cid', + r'\bcid(?:["\']:|=)(\d+),["\']page(?:["\']:|=)' + str(page_id), webpage, 'cid', default=None ) or self._search_regex( r'\bcid(?:["\']:|=)(\d+)', webpage, 'cid', @@ -259,7 +257,7 @@ class BiliBiliIE(InfoExtractor): # TODO: The json is already downloaded by _extract_anthology_entries. Don't redownload for each video part_title = try_get( self._download_json( - "https://api.bilibili.com/x/player/pagelist?bvid=%s&jsonp=jsonp" % bv_id, + f'https://api.bilibili.com/x/player/pagelist?bvid={bv_id}&jsonp=jsonp', video_id, note='Extracting videos in anthology'), lambda x: x['data'][int(page_id) - 1]['part']) title = part_title or title @@ -273,7 +271,7 @@ class BiliBiliIE(InfoExtractor): # TODO 'view_count' requires deobfuscating Javascript info = { - 'id': compat_str(video_id) if page_id is None else '%s_p%s' % (video_id, page_id), + 'id': str(video_id) if page_id is None else '%s_part%s' % (video_id, page_id), 'cid': cid, 'title': title, 'description': description, @@ -295,29 +293,25 @@ class BiliBiliIE(InfoExtractor): info['uploader'] = self._html_search_meta( 'author', webpage, 'uploader', default=None) - raw_danmaku = self._get_raw_danmaku(video_id, cid) - - raw_tags = self._get_tags(video_id) - tags = list(map(lambda x: x['tag_name'], raw_tags)) - top_level_info = { - 'raw_danmaku': raw_danmaku, - 'tags': tags, - 'raw_tags': raw_tags, + 'tags': traverse_obj(self._download_json( + f'https://api.bilibili.com/x/tag/archive/tags?aid={video_id}', + video_id, fatal=False, note='Downloading tags'), ('data', ..., 'tag_name')), } - if self.get_param('getcomments', False): - def get_comments(): - comments = self._get_all_comment_pages(video_id) - return { - 'comments': comments, - 'comment_count': len(comments) - } - top_level_info['__post_extractor'] = get_comments + entries[0]['subtitles'] = { + 'danmaku': [{ + 'ext': 'xml', + 'url': f'https://comment.bilibili.com/{cid}.xml', + }] + } - ''' + r''' # Requires https://github.com/m13253/danmaku2ass which is licenced under GPL3 # See https://github.com/animelover1984/youtube-dl + + raw_danmaku = self._download_webpage( + f'https://comment.bilibili.com/{cid}.xml', video_id, fatal=False, note='Downloading danmaku comments') danmaku = NiconicoIE.CreateDanmaku(raw_danmaku, commentType='Bilibili', x=1024, y=576) entries[0]['subtitles'] = { 'danmaku': [{ @@ -327,29 +321,27 @@ class BiliBiliIE(InfoExtractor): } ''' + top_level_info['__post_extractor'] = self.extract_comments(video_id) + for entry in entries: entry.update(info) if len(entries) == 1: entries[0].update(top_level_info) return entries[0] - else: - for idx, entry in enumerate(entries): - entry['id'] = '%s_part%d' % (video_id, (idx + 1)) - global_info = { - '_type': 'multi_video', - 'id': compat_str(video_id), - 'bv_id': bv_id, - 'title': title, - 'description': description, - 'entries': entries, - } + for idx, entry in enumerate(entries): + entry['id'] = '%s_part%d' % (video_id, (idx + 1)) - global_info.update(info) - global_info.update(top_level_info) - - return global_info + return { + '_type': 'multi_video', + 'id': str(video_id), + 'bv_id': bv_id, + 'title': title, + 'description': description, + 'entries': entries, + **info, **top_level_info + } def _extract_anthology_entries(self, bv_id, video_id, webpage): title = self._html_search_regex( @@ -357,10 +349,10 @@ class BiliBiliIE(InfoExtractor): r'(?s)]*>(?P.+?)</h1>'), webpage, 'title', group='title') json_data = self._download_json( - "https://api.bilibili.com/x/player/pagelist?bvid=%s&jsonp=jsonp" % bv_id, + f'https://api.bilibili.com/x/player/pagelist?bvid={bv_id}&jsonp=jsonp', video_id, note='Extracting videos in anthology') - if len(json_data['data']) > 1: + if json_data['data']: return self.playlist_from_matches( json_data['data'], bv_id, title, ie=BiliBiliIE.ie_key(), getter=lambda entry: 'https://www.bilibili.com/video/%s?p=%d' % (bv_id, entry['page'])) @@ -375,65 +367,31 @@ class BiliBiliIE(InfoExtractor): if response['code'] == -400: raise ExtractorError('Video ID does not exist', expected=True, video_id=id) elif response['code'] != 0: - raise ExtractorError('Unknown error occurred during API check (code %s)' % response['code'], expected=True, video_id=id) - return (response['data']['aid'], response['data']['bvid']) + raise ExtractorError(f'Unknown error occurred during API check (code {response["code"]})', + expected=True, video_id=id) + return response['data']['aid'], response['data']['bvid'] - # recursive solution to getting every page of comments for the video - # we can stop when we reach a page without any comments - def _get_all_comment_pages(self, video_id, commentPageNumber=0): - comment_url = "https://api.bilibili.com/x/v2/reply?jsonp=jsonp&pn=%s&type=1&oid=%s&sort=2&_=1567227301685" % (commentPageNumber, video_id) - json_str = self._download_webpage( - comment_url, video_id, - note='Extracting comments from page %s' % (commentPageNumber)) - replies = json.loads(json_str)['data']['replies'] - if replies is None: - return [] - return self._get_all_children(replies) + self._get_all_comment_pages(video_id, commentPageNumber + 1) + def _get_comments(self, video_id, commentPageNumber=0): + for idx in itertools.count(1): + replies = traverse_obj( + self._download_json( + f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={video_id}&type=1&jsonp=jsonp&sort=2&_=1567227301685', + video_id, note=f'Extracting comments from page {idx}'), + ('data', 'replies')) or [] + for children in map(self._get_all_children, replies): + yield from children - # extracts all comments in the tree - def _get_all_children(self, replies): - if replies is None: - return [] - - ret = [] - for reply in replies: - author = reply['member']['uname'] - author_id = reply['member']['mid'] - id = reply['rpid'] - text = reply['content']['message'] - timestamp = reply['ctime'] - parent = reply['parent'] if reply['parent'] != 0 else 'root' - - comment = { - "author": author, - "author_id": author_id, - "id": id, - "text": text, - "timestamp": timestamp, - "parent": parent, - } - ret.append(comment) - - # from the JSON, the comment structure seems arbitrarily deep, but I could be wrong. - # Regardless, this should work. - ret += self._get_all_children(reply['replies']) - - return ret - - def _get_raw_danmaku(self, video_id, cid): - # This will be useful if I decide to scrape all pages instead of doing them individually - # cid_url = "https://www.bilibili.com/widget/getPageList?aid=%s" % (video_id) - # cid_str = self._download_webpage(cid_url, video_id, note=False) - # cid = json.loads(cid_str)[0]['cid'] - - danmaku_url = "https://comment.bilibili.com/%s.xml" % (cid) - danmaku = self._download_webpage(danmaku_url, video_id, note='Downloading danmaku comments') - return danmaku - - def _get_tags(self, video_id): - tags_url = "https://api.bilibili.com/x/tag/archive/tags?aid=%s" % (video_id) - tags_json = self._download_json(tags_url, video_id, note='Downloading tags') - return tags_json['data'] + def _get_all_children(self, reply): + yield { + 'author': traverse_obj(reply, ('member', 'uname')), + 'author_id': traverse_obj(reply, ('member', 'mid')), + 'id': reply.get('rpid'), + 'text': traverse_obj(reply, ('content', 'message')), + 'timestamp': reply.get('ctime'), + 'parent': reply.get('parent') or 'root', + } + for children in map(self._get_all_children, reply.get('replies') or []): + yield from children class BiliBiliBangumiIE(InfoExtractor): @@ -516,11 +474,8 @@ class BilibiliChannelIE(InfoExtractor): count, max_count = 0, None for page_num in itertools.count(1): - data = self._parse_json( - self._download_webpage( - self._API_URL % (list_id, page_num), list_id, - note='Downloading page %d' % page_num), - list_id)['data'] + data = self._download_json( + self._API_URL % (list_id, page_num), list_id, note=f'Downloading page {page_num}')['data'] max_count = max_count or try_get(data, lambda x: x['page']['count']) @@ -583,11 +538,11 @@ class BilibiliCategoryIE(InfoExtractor): } if category not in rid_map: - raise ExtractorError('The supplied category, %s, is not supported. List of supported categories: %s' % (category, list(rid_map.keys()))) - + raise ExtractorError( + f'The category {category} isn\'t supported. Supported categories: {list(rid_map.keys())}') if subcategory not in rid_map[category]: - raise ExtractorError('The subcategory, %s, isn\'t supported for this category. Supported subcategories: %s' % (subcategory, list(rid_map[category].keys()))) - + raise ExtractorError( + f'The subcategory {subcategory} isn\'t supported for this category. Supported subcategories: {list(rid_map[category].keys())}') rid_value = rid_map[category][subcategory] api_url = 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value @@ -614,41 +569,26 @@ class BiliBiliSearchIE(SearchInfoExtractor): IE_DESC = 'Bilibili video search, "bilisearch" keyword' _MAX_RESULTS = 100000 _SEARCH_KEY = 'bilisearch' - MAX_NUMBER_OF_RESULTS = 1000 - def _get_n_results(self, query, n): - """Get a specified number of results for a query""" - - entries = [] - pageNumber = 0 - while True: - pageNumber += 1 - # FIXME - api_url = 'https://api.bilibili.com/x/web-interface/search/type?context=&page=%s&order=pubdate&keyword=%s&duration=0&tids_2=&__refresh__=true&search_type=video&tids=0&highlight=1' % (pageNumber, query) - json_str = self._download_webpage( - api_url, "None", query={"Search_key": query}, - note='Extracting results from page %s' % pageNumber) - data = json.loads(json_str)['data'] - - # FIXME: this is hideous - if "result" not in data: - return { - '_type': 'playlist', - 'id': query, - 'entries': entries[:n] - } - - videos = data['result'] + def _search_results(self, query): + for page_num in itertools.count(1): + videos = self._download_json( + 'https://api.bilibili.com/x/web-interface/search/type', query, + note=f'Extracting results from page {page_num}', query={ + 'Search_key': query, + 'keyword': query, + 'page': page_num, + 'context': '', + 'order': 'pubdate', + 'duration': 0, + 'tids_2': '', + '__refresh__': 'true', + 'search_type': 'video', + 'tids': 0, + 'highlight': 1, + })['data'].get('result') or [] for video in videos: - e = self.url_result(video['arcurl'], 'BiliBili', compat_str(video['aid'])) - entries.append(e) - - if(len(entries) >= n or len(videos) >= BiliBiliSearchIE.MAX_NUMBER_OF_RESULTS): - return { - '_type': 'playlist', - 'id': query, - 'entries': entries[:n] - } + yield self.url_result(video['arcurl'], 'BiliBili', str(video['aid'])) class BilibiliAudioBaseIE(InfoExtractor): From 7b38649845c1516e4ab4e29b6bb84b2302269663 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Mon, 11 Oct 2021 20:21:04 +0530 Subject: [PATCH 249/641] Fix verbose head not showing custom configs --- yt_dlp/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index f45c548f2a..d2dc7687b8 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1590,7 +1590,7 @@ def parseOpts(overrideArguments=None): parser.error('config-location %s does not exist.' % location) config = _readOptions(location, default=None) if config: - configs['custom'], paths['config'] = config, location + configs['custom'], paths['custom'] = config, location if opts.ignoreconfig: return From ecdc9049c0d8c00ad9ea5218126eefb1e7049385 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 12 Oct 2021 12:03:56 +0530 Subject: [PATCH 250/641] [YouTube] Add auto-translated subtitles Closes #1245 --- yt_dlp/extractor/youtube.py | 49 ++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 3e93c99342..1ef80445eb 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2964,15 +2964,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor): } pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict) - # Converted into dicts to remove duplicates - captions = { - sub.get('baseUrl'): sub - for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])} - translation_languages = { - lang.get('languageCode'): lang.get('languageName') - for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])} - subtitles = {} if pctr: + def get_lang_code(track): + return (remove_start(track.get('vssId') or '', '.').replace('.', '-') + or track.get('languageCode')) + + # Converted into dicts to remove duplicates + captions = { + get_lang_code(sub): sub + for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])} + translation_languages = { + lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1) + for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])} + def process_language(container, base_url, lang_code, sub_name, query): lang_subs = container.setdefault(lang_code, []) for fmt in self._SUBTITLE_FORMATS: @@ -2985,30 +2989,29 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'name': sub_name, }) - for base_url, caption_track in captions.items(): + subtitles, automatic_captions = {}, {} + for lang_code, caption_track in captions.items(): + base_url = caption_track.get('baseUrl') if not base_url: continue + lang_name = self._get_text(caption_track, 'name', max_runs=1) if caption_track.get('kind') != 'asr': - lang_code = ( - remove_start(caption_track.get('vssId') or '', '.').replace('.', '-') - or caption_track.get('languageCode')) if not lang_code: continue process_language( - subtitles, base_url, lang_code, - traverse_obj(caption_track, ('name', 'simpleText'), ('name', 'runs', ..., 'text'), get_all=False), - {}) - continue - automatic_captions = {} + subtitles, base_url, lang_code, lang_name, {}) + if not caption_track.get('isTranslatable'): + continue for trans_code, trans_name in translation_languages.items(): if not trans_code: continue + if caption_track.get('kind') != 'asr': + trans_code += f'-{lang_code}' + trans_name += format_field(lang_name, template=' from %s') process_language( - automatic_captions, base_url, trans_code, - self._get_text(trans_name, max_runs=1), - {'tlang': trans_code}) - info['automatic_captions'] = automatic_captions - info['subtitles'] = subtitles + automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code}) + info['automatic_captions'] = automatic_captions + info['subtitles'] = subtitles parsed_url = compat_urllib_parse_urlparse(url) for component in [parsed_url.fragment, parsed_url.query]: @@ -3054,7 +3057,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): try: # This will error if there is no livechat initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'] - info['subtitles']['live_chat'] = [{ + info.setdefault('subtitles', {})['live_chat'] = [{ 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies 'video_id': video_id, 'ext': 'json', From a387b69a7cb55afb160d8f59df2593cb337a9db7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81kos=20S=C3=BClyi?= <sulyi.gbox@gmail.com> Date: Tue, 12 Oct 2021 20:54:27 +0200 Subject: [PATCH 251/641] [devscripts/run_tests] Use markers to filter tests (#1258) `-k` filters using a substring match on test name. `-m` checks markers for an exact match. Authored by: sulyi --- devscripts/run_tests.bat | 6 +++--- devscripts/run_tests.sh | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/devscripts/run_tests.bat b/devscripts/run_tests.bat index f12ae1c1b2..b8bb393d93 100644 --- a/devscripts/run_tests.bat +++ b/devscripts/run_tests.bat @@ -3,11 +3,11 @@ cd /d %~dp0.. if ["%~1"]==[""] ( - set "test_set=" + set "test_set="test"" ) else if ["%~1"]==["core"] ( - set "test_set=-k "not download"" + set "test_set="-m not download"" ) else if ["%~1"]==["download"] ( - set "test_set=-k download" + set "test_set="-m "download"" ) else ( echo.Invalid test type "%~1". Use "core" ^| "download" exit /b 1 diff --git a/devscripts/run_tests.sh b/devscripts/run_tests.sh index fb405b5698..c9a75ba006 100755 --- a/devscripts/run_tests.sh +++ b/devscripts/run_tests.sh @@ -3,12 +3,12 @@ if [ -z $1 ]; then test_set='test' elif [ $1 = 'core' ]; then - test_set='not download' + test_set="-m not download" elif [ $1 = 'download' ]; then - test_set='download' + test_set="-m download" else echo 'Invalid test type "'$1'". Use "core" | "download"' exit 1 fi -python3 -m pytest -k "$test_set" +python3 -m pytest "$test_set" From 975a0d0df98a68d936c86a77175f2b0e86b576f5 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 12 Oct 2021 16:47:18 +0530 Subject: [PATCH 252/641] Calculate more fields for merged formats Closes #947 --- yt_dlp/YoutubeDL.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 8878d710f4..d9b3ce98d5 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1848,11 +1848,18 @@ class YoutubeDL(object): else: output_ext = 'mkv' + filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info)) + new_dict = { 'requested_formats': formats_info, - 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info), - 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info), + 'format': '+'.join(filtered('format')), + 'format_id': '+'.join(filtered('format_id')), 'ext': output_ext, + 'protocol': '+'.join(map(determine_protocol, formats_info)), + 'language': '+'.join(orderedSet(filtered('language'))), + 'format_note': '+'.join(orderedSet(filtered('format_note'))), + 'filesize_approx': sum(filtered('filesize', 'filesize_approx')), + 'tbr': sum(filtered('tbr', 'vbr', 'abr')), } if the_only_video: @@ -1870,6 +1877,7 @@ class YoutubeDL(object): new_dict.update({ 'acodec': the_only_audio.get('acodec'), 'abr': the_only_audio.get('abr'), + 'asr': the_only_audio.get('asr'), }) return new_dict From c111cefa5de2337fc677367ee2d727b8a56e3fd0 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 12 Oct 2021 16:50:04 +0530 Subject: [PATCH 253/641] [downloader/ffmpeg] Improve simultaneous download and merge --- README.md | 2 ++ yt_dlp/YoutubeDL.py | 9 ++------- yt_dlp/downloader/__init__.py | 15 ++++++++++----- yt_dlp/downloader/external.py | 4 ++++ 4 files changed, 18 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 1a46b25f4d..dd9cbc7fc1 100644 --- a/README.md +++ b/README.md @@ -1179,6 +1179,8 @@ $ yt-dlp -o - BaW_jenozKc By default, yt-dlp tries to download the best available quality if you **don't** pass any options. This is generally equivalent to using `-f bestvideo*+bestaudio/best`. However, if multiple audiostreams is enabled (`--audio-multistreams`), the default format changes to `-f bestvideo+bestaudio/best`. Similarly, if ffmpeg is unavailable, or if you use yt-dlp to stream to `stdout` (`-o -`), the default becomes `-f best/bestvideo+bestaudio`. +**Deprecation warning**: Latest versions of yt-dlp can stream multiple formats to the stdout simultaneously using ffmpeg. So, in future versions, the default for this will be set to `-f bv*+ba/b` similar to normal downloads. If you want to preserve the `-f b/bv+ba` setting, it is recommended to explicitly specify it in the configuration options. + The general syntax for format selection is `-f FORMAT` (or `--format FORMAT`) where `FORMAT` is a *selector expression*, i.e. an expression that describes format or formats you would like to download. **tl;dr:** [navigate me to examples](#format-selection-examples). diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index d9b3ce98d5..1afe17639c 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2744,14 +2744,9 @@ class YoutubeDL(object): dl_filename = existing_file(full_filename, temp_filename) info_dict['__real_download'] = False - _protocols = set(determine_protocol(f) for f in requested_formats) - if len(_protocols) == 1: # All requested formats have same protocol - info_dict['protocol'] = _protocols.pop() - directly_mergable = FFmpegFD.can_merge_formats(info_dict, self.params) if dl_filename is not None: self.report_file_already_downloaded(dl_filename) - elif (directly_mergable and get_suitable_downloader( - info_dict, self.params, to_stdout=(temp_filename == '-')) == FFmpegFD): + elif get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-'): info_dict['url'] = '\n'.join(f['url'] for f in requested_formats) success, real_download = self.dl(temp_filename, info_dict) info_dict['__real_download'] = real_download @@ -2769,7 +2764,7 @@ class YoutubeDL(object): 'The formats won\'t be merged.') if temp_filename == '-': - reason = ('using a downloader other than ffmpeg' if directly_mergable + reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict) else 'but the formats are incompatible for simultaneous download' if merger.available else 'but ffmpeg is not installed') self.report_warning( diff --git a/yt_dlp/downloader/__init__.py b/yt_dlp/downloader/__init__.py index 739d98c2b6..2449c74117 100644 --- a/yt_dlp/downloader/__init__.py +++ b/yt_dlp/downloader/__init__.py @@ -10,10 +10,15 @@ from ..utils import ( def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=None, to_stdout=False): info_dict['protocol'] = determine_protocol(info_dict) info_copy = info_dict.copy() - if protocol: - info_copy['protocol'] = protocol info_copy['to_stdout'] = to_stdout - return _get_suitable_downloader(info_copy, params, default) + + downloaders = [_get_suitable_downloader(info_copy, proto, params, default) + for proto in (protocol or info_copy['protocol']).split('+')] + if set(downloaders) == {FFmpegFD} and FFmpegFD.can_merge_formats(info_copy, params): + return FFmpegFD + elif len(downloaders) == 1: + return downloaders[0] + return None # Some of these require get_suitable_downloader @@ -72,7 +77,7 @@ def shorten_protocol_name(proto, simplify=False): return short_protocol_names.get(proto, proto) -def _get_suitable_downloader(info_dict, params, default): +def _get_suitable_downloader(info_dict, protocol, params, default): """Get the downloader class that can handle the info dict.""" if default is NO_DEFAULT: default = HttpFD @@ -80,7 +85,7 @@ def _get_suitable_downloader(info_dict, params, default): # if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict): # return FFmpegFD - protocol = info_dict['protocol'] + info_dict['protocol'] = protocol downloaders = params.get('external_downloader') external_downloader = ( downloaders if isinstance(downloaders, compat_str) or downloaders is None diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index 9c1229cf6f..3c0202f228 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -327,6 +327,10 @@ class FFmpegFD(ExternalFD): # Fixme: This may be wrong when --ffmpeg-location is used return FFmpegPostProcessor().available + @classmethod + def supports(cls, info_dict): + return all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+')) + def on_process_started(self, proc, stdin): """ Override this in subclasses """ pass From b836dc94f2ba0d9953f61ba6bcec2a4ced504beb Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 12 Oct 2021 17:34:24 +0530 Subject: [PATCH 254/641] [outtmpl] Fix bug in expanding environment variables --- test/test_YoutubeDL.py | 6 ++++++ yt_dlp/YoutubeDL.py | 8 +++----- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 06963f7a8e..bd2d752e25 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -817,6 +817,12 @@ class TestYoutubeDL(unittest.TestCase): compat_setenv('__yt_dlp_var', 'expanded') envvar = '%__yt_dlp_var%' if compat_os_name == 'nt' else '$__yt_dlp_var' test(envvar, (envvar, 'expanded')) + if compat_os_name == 'nt': + test('%s%', ('%s%', '%s%')) + compat_setenv('s', 'expanded') + test('%s%', ('%s%', 'expanded')) # %s% should be expanded before escaping %s + compat_setenv('(test)s', 'expanded') + test('%(test)s%', ('NA%', 'expanded')) # Environment should take priority over template # Path expansion and escaping test('Hello %(title1)s', 'Hello $PATH') diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 1afe17639c..9d91d72ec8 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1034,7 +1034,7 @@ class YoutubeDL(object): def create_key(outer_mobj): if not outer_mobj.group('has_key'): - return f'%{outer_mobj.group(0)}' + return outer_mobj.group(0) key = outer_mobj.group('key') mobj = re.match(INTERNAL_FORMAT_RE, key) initial_field = mobj.group('fields').split('.')[-1] if mobj else '' @@ -1105,10 +1105,8 @@ class YoutubeDL(object): compat_str(v), restricted=self.params.get('restrictfilenames'), is_id=(k == 'id' or k.endswith('_id'))) - outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']) - outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize) - outtmpl = self.escape_outtmpl(self._outtmpl_expandpath(outtmpl)) - filename = outtmpl % template_dict + outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])) + filename = self.evaluate_outtmpl(outtmpl, info_dict, sanitize) force_ext = OUTTMPL_TYPES.get(tmpl_type) if filename and force_ext is not None: From fc5c8b6492d0c269191a32d7836b8a94416b804e Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Wed, 13 Oct 2021 04:11:25 +0530 Subject: [PATCH 255/641] [eria2c] Fix --skip-unavailable fragment --- yt_dlp/downloader/external.py | 89 ++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 44 deletions(-) diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index 3c0202f228..40b9dcfe30 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -115,55 +115,56 @@ class ExternalFD(FragmentFD): self._debug_cmd(cmd) - if 'fragments' in info_dict: - fragment_retries = self.params.get('fragment_retries', 0) - skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) - - count = 0 - while count <= fragment_retries: - p = subprocess.Popen( - cmd, stderr=subprocess.PIPE) - _, stderr = process_communicate_or_kill(p) - if p.returncode == 0: - break - # TODO: Decide whether to retry based on error code - # https://aria2.github.io/manual/en/html/aria2c.html#exit-status - self.to_stderr(stderr.decode('utf-8', 'replace')) - count += 1 - if count <= fragment_retries: - self.to_screen( - '[%s] Got error. Retrying fragments (attempt %d of %s)...' - % (self.get_basename(), count, self.format_retries(fragment_retries))) - if count > fragment_retries: - if not skip_unavailable_fragments: - self.report_error('Giving up after %s fragment retries' % fragment_retries) - return -1 - - decrypt_fragment = self.decrypter(info_dict) - dest, _ = sanitize_open(tmpfilename, 'wb') - for frag_index, fragment in enumerate(info_dict['fragments']): - fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index) - try: - src, _ = sanitize_open(fragment_filename, 'rb') - except IOError: - if skip_unavailable_fragments and frag_index > 1: - self.to_screen('[%s] Skipping fragment %d ...' % (self.get_basename(), frag_index)) - continue - self.report_error('Unable to open fragment %d' % frag_index) - return -1 - dest.write(decrypt_fragment(fragment, src.read())) - src.close() - if not self.params.get('keep_fragments', False): - os.remove(encodeFilename(fragment_filename)) - dest.close() - os.remove(encodeFilename('%s.frag.urls' % tmpfilename)) - else: + if 'fragments' not in info_dict: p = subprocess.Popen( cmd, stderr=subprocess.PIPE) _, stderr = process_communicate_or_kill(p) if p.returncode != 0: self.to_stderr(stderr.decode('utf-8', 'replace')) - return p.returncode + return p.returncode + + fragment_retries = self.params.get('fragment_retries', 0) + skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) + + count = 0 + while count <= fragment_retries: + p = subprocess.Popen( + cmd, stderr=subprocess.PIPE) + _, stderr = process_communicate_or_kill(p) + if p.returncode == 0: + break + # TODO: Decide whether to retry based on error code + # https://aria2.github.io/manual/en/html/aria2c.html#exit-status + self.to_stderr(stderr.decode('utf-8', 'replace')) + count += 1 + if count <= fragment_retries: + self.to_screen( + '[%s] Got error. Retrying fragments (attempt %d of %s)...' + % (self.get_basename(), count, self.format_retries(fragment_retries))) + if count > fragment_retries: + if not skip_unavailable_fragments: + self.report_error('Giving up after %s fragment retries' % fragment_retries) + return -1 + + decrypt_fragment = self.decrypter(info_dict) + dest, _ = sanitize_open(tmpfilename, 'wb') + for frag_index, fragment in enumerate(info_dict['fragments']): + fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index) + try: + src, _ = sanitize_open(fragment_filename, 'rb') + except IOError: + if skip_unavailable_fragments and frag_index > 1: + self.to_screen('[%s] Skipping fragment %d ...' % (self.get_basename(), frag_index)) + continue + self.report_error('Unable to open fragment %d' % frag_index) + return -1 + dest.write(decrypt_fragment(fragment, src.read())) + src.close() + if not self.params.get('keep_fragments', False): + os.remove(encodeFilename(fragment_filename)) + dest.close() + os.remove(encodeFilename('%s.frag.urls' % tmpfilename)) + return 0 class CurlFD(ExternalFD): From 993191c0d5f711d4978c680d705ce09d957aa176 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Wed, 13 Oct 2021 04:42:31 +0530 Subject: [PATCH 256/641] Fix bug in c111cefa5de2337fc677367ee2d727b8a56e3fd0 --- yt_dlp/YoutubeDL.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 9d91d72ec8..2a8c658ebe 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2856,8 +2856,8 @@ class YoutubeDL(object): 'writing DASH m4a. Only some players support this container', FFmpegFixupM4aPP) - downloader = (get_suitable_downloader(info_dict, self.params).__name__ - if 'protocol' in info_dict else None) + downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None + downloader = downloader.__name__ if downloader else None ffmpeg_fixup(info_dict.get('requested_formats') is None and downloader == 'HlsFD', 'malformed AAC bitstream detected', FFmpegFixupM3u8PP) ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP) From 6993f78d1bbb62b24dd77ac7fce3ead250fbe01f Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Wed, 13 Oct 2021 05:03:40 +0530 Subject: [PATCH 257/641] [extractor,utils] Detect more codecs/mimetypes Fixes: https://github.com/ytdl-org/youtube-dl/issues/29943 --- yt_dlp/extractor/common.py | 2 ++ yt_dlp/utils.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 5b7b8891aa..14201c5387 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -2646,6 +2646,8 @@ class InfoExtractor(object): content_type = mime_type elif codecs.split('.')[0] == 'stpp': content_type = 'text' + elif mimetype2ext(mime_type) in ('tt', 'dfxp', 'ttml', 'xml', 'json'): + content_type = 'text' else: self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) continue diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 8e5c08ce54..7a40258cf9 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4621,7 +4621,7 @@ def parse_codecs(codecs_str): vcodec, acodec = None, None for full_codec in split_codecs: codec = full_codec.split('.')[0] - if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'): + if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora', 'dvh1', 'dvhe'): if not vcodec: vcodec = full_codec elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'): From a64907d0ac89102c9380361e385fc67167595661 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Thu, 14 Oct 2021 14:44:14 +0530 Subject: [PATCH 258/641] [Hotstar] Mention Dynamic Range in format id (#1265) Authored by: Ashish0804 --- yt_dlp/extractor/hotstar.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index f66d3e433c..af679b906a 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -70,7 +70,7 @@ class HotStarBaseIE(InfoExtractor): def _call_api_v2(self, path, video_id, st=None, cookies=None): return self._call_api_impl( '%s/content/%s' % (path, video_id), video_id, st=st, cookies=cookies, query={ - 'desired-config': 'audio_channel:stereo|dynamic_range:sdr|encryption:plain|ladder:tv|package:dash|resolution:hd|subs-tag:HotstarVIP|video_codec:vp9', + 'desired-config': 'audio_channel:stereo|container:fmp4|dynamic_range:hdr|encryption:plain|ladder:tv|package:dash|resolution:fhd|subs-tag:HotstarVIP|video_codec:h265', 'device-id': cookies.get('device_id').value if cookies.get('device_id') else compat_str(uuid.uuid4()), 'os-name': 'Windows', 'os-version': '10', @@ -196,6 +196,7 @@ class HotStarIE(HotStarBaseIE): for playback_set in playback_sets: if not isinstance(playback_set, dict): continue + dr = re.search(r'dynamic_range:(?P<dr>[a-z]+)', playback_set.get('tagsCombination')).group('dr') format_url = url_or_none(playback_set.get('playbackUrl')) if not format_url: continue @@ -210,12 +211,12 @@ class HotStarIE(HotStarBaseIE): hls_formats, hls_subs = self._extract_m3u8_formats_and_subtitles( format_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', headers=headers) + m3u8_id=f'{dr}-hls', headers=headers) formats.extend(hls_formats) subs = self._merge_subtitles(subs, hls_subs) elif 'package:dash' in tags or ext == 'mpd': dash_formats, dash_subs = self._extract_mpd_formats_and_subtitles( - format_url, video_id, mpd_id='dash', headers=headers) + format_url, video_id, mpd_id=f'{dr}-dash', headers=headers) formats.extend(dash_formats) subs = self._merge_subtitles(subs, dash_subs) elif ext == 'f4m': From d5a39f0badbf6155eeed5c03d14489227fc9dab2 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Thu, 14 Oct 2021 14:40:37 +0530 Subject: [PATCH 259/641] [http] Show the last encountered error Closes #1262 --- yt_dlp/downloader/http.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index 3bc41e5b22..2e95bb9d10 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -373,6 +373,8 @@ class HttpFD(FileDownloader): count += 1 if count <= retries: self.report_retry(e.source_error, count, retries) + else: + self.to_screen(f'[download] Got server HTTP error: {e.source_error}') continue except NextFragment: continue From a0c716bb618e525b3fbafd4ba19a8ea345db7afc Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Thu, 14 Oct 2021 14:35:10 +0530 Subject: [PATCH 260/641] [instagram] Show appropriate error when login is needed Closes #1264 --- yt_dlp/extractor/common.py | 4 ++-- yt_dlp/extractor/instagram.py | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 14201c5387..4f358c53bb 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -441,8 +441,8 @@ class InfoExtractor(object): _LOGIN_HINTS = { 'any': 'Use --cookies, --username and --password or --netrc to provide account credentials', 'cookies': ( - 'Use --cookies for the authentication. ' - 'See https://github.com/ytdl-org/youtube-dl#how-do-i-pass-cookies-to-youtube-dl for how to pass cookies'), + 'Use --cookies-from-browser or --cookies for the authentication. ' + 'See https://github.com/ytdl-org/youtube-dl#how-do-i-pass-cookies-to-youtube-dl for how to manually pass cookies'), 'password': 'Use --username and --password or --netrc to provide account credentials', } diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py index 9aad804cf8..3801c7af92 100644 --- a/yt_dlp/extractor/instagram.py +++ b/yt_dlp/extractor/instagram.py @@ -145,7 +145,9 @@ class InstagramIE(InfoExtractor): video_id = mobj.group('id') url = mobj.group('url') - webpage = self._download_webpage(url, video_id) + webpage, urlh = self._download_webpage_handle(url, video_id) + if 'www.instagram.com/accounts/login' in urlh.geturl().rstrip('/'): + self.raise_login_required('You need to log in to access this content', method='cookies') (media, video_url, description, thumbnail, timestamp, uploader, uploader_id, like_count, comment_count, comments, height, From 883d4b1eecca98f069e3a75fb7667a2750d4a106 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Thu, 14 Oct 2021 09:58:29 +0530 Subject: [PATCH 261/641] [YoutubeDL] Write verbose header to logger --- yt_dlp/YoutubeDL.py | 48 ++++++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 2a8c658ebe..542a977944 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -495,7 +495,10 @@ class YoutubeDL(object): _screen_file = None def __init__(self, params=None, auto_init=True): - """Create a FileDownloader object with the given options.""" + """Create a FileDownloader object with the given options. + @param auto_init Whether to load the default extractors and print header (if verbose). + Set to 'no_verbose_header' to not ptint the header + """ if params is None: params = {} self._ies = {} @@ -602,7 +605,8 @@ class YoutubeDL(object): self._setup_opener() if auto_init: - self.print_debug_header() + if auto_init != 'no_verbose_header': + self.print_debug_header() self.add_default_info_extractors() for pp_def_raw in self.params.get('postprocessors', []): @@ -3232,28 +3236,32 @@ class YoutubeDL(object): def print_debug_header(self): if not self.params.get('verbose'): return - - stdout_encoding = getattr( - sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__) + get_encoding = lambda stream: getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__) encoding_str = ( - '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % ( + '[debug] Encodings: locale %s, fs %s, stdout %s, stderr %s, pref %s\n' % ( locale.getpreferredencoding(), sys.getfilesystemencoding(), - stdout_encoding, + get_encoding(self._screen_file), get_encoding(self._err_file), self.get_encoding())) - write_string(encoding_str, encoding=None) + + logger = self.params.get('logger') + if logger: + write_debug = lambda msg: logger.debug(f'[debug] {msg}') + write_debug(encoding_str) + else: + write_debug = lambda msg: self._write_string(f'[debug] {msg}') + write_string(encoding_str, encoding=None) source = detect_variant() - self._write_string('[debug] yt-dlp version %s%s\n' % (__version__, '' if source == 'unknown' else f' ({source})')) + write_debug('yt-dlp version %s%s\n' % (__version__, '' if source == 'unknown' else f' ({source})')) if _LAZY_LOADER: - self._write_string('[debug] Lazy loading extractors enabled\n') + write_debug('Lazy loading extractors enabled\n') if plugin_extractors or plugin_postprocessors: - self._write_string('[debug] Plugins: %s\n' % [ + write_debug('Plugins: %s\n' % [ '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}') for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())]) if self.params.get('compat_opts'): - self._write_string( - '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts'))) + write_debug('Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts'))) try: sp = subprocess.Popen( ['git', 'rev-parse', '--short', 'HEAD'], @@ -3262,7 +3270,7 @@ class YoutubeDL(object): out, err = process_communicate_or_kill(sp) out = out.decode().strip() if re.match('[0-9a-f]+', out): - self._write_string('[debug] Git HEAD: %s\n' % out) + write_debug('Git HEAD: %s\n' % out) except Exception: try: sys.exc_clear() @@ -3275,7 +3283,7 @@ class YoutubeDL(object): return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3] return impl_name - self._write_string('[debug] Python version %s (%s %s) - %s\n' % ( + write_debug('Python version %s (%s %s) - %s\n' % ( platform.python_version(), python_implementation(), platform.architecture()[0], @@ -3287,7 +3295,7 @@ class YoutubeDL(object): exe_str = ', '.join( f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v ) or 'none' - self._write_string('[debug] exe versions: %s\n' % exe_str) + write_debug('exe versions: %s\n' % exe_str) from .downloader.websocket import has_websockets from .postprocessor.embedthumbnail import has_mutagen @@ -3300,8 +3308,8 @@ class YoutubeDL(object): SQLITE_AVAILABLE and 'sqlite', KEYRING_AVAILABLE and 'keyring', )))) or 'none' - self._write_string('[debug] Optional libraries: %s\n' % lib_str) - self._write_string('[debug] ANSI escape support: stdout = %s, stderr = %s\n' % ( + write_debug('Optional libraries: %s\n' % lib_str) + write_debug('ANSI escape support: stdout = %s, stderr = %s\n' % ( supports_terminal_sequences(self._screen_file), supports_terminal_sequences(self._err_file))) @@ -3309,11 +3317,11 @@ class YoutubeDL(object): for handler in self._opener.handlers: if hasattr(handler, 'proxies'): proxy_map.update(handler.proxies) - self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n') + write_debug('Proxy map: ' + compat_str(proxy_map) + '\n') if self.params.get('call_home', False): ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8') - self._write_string('[debug] Public IP address: %s\n' % ipaddr) + write_debug('Public IP address: %s\n' % ipaddr) return latest_version = self.urlopen( 'https://yt-dl.org/latest/version').read().decode('utf-8') From 974208e15105b6bd467f1ab59ba7173ac3d0ede5 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Thu, 14 Oct 2021 17:32:48 +0530 Subject: [PATCH 262/641] [trovo] Support channel clips and VODs (#1246) Closes #229 Authored by: Ashish0804 --- yt_dlp/extractor/extractors.py | 2 + yt_dlp/extractor/trovo.py | 67 ++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index adf54ca7e8..6bc9a2b1eb 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1470,6 +1470,8 @@ from .trilulilu import TriluliluIE from .trovo import ( TrovoIE, TrovoVodIE, + TrovoChannelVodIE, + TrovoChannelClipIE, ) from .trunews import TruNewsIE from .trutv import TruTVIE diff --git a/yt_dlp/extractor/trovo.py b/yt_dlp/extractor/trovo.py index 7d6b2b88e4..ec55f41f20 100644 --- a/yt_dlp/extractor/trovo.py +++ b/yt_dlp/extractor/trovo.py @@ -1,6 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals +import itertools import json from .common import InfoExtractor @@ -194,3 +195,69 @@ class TrovoVodIE(TrovoBaseIE): } info.update(self._extract_streamer_info(vod_detail_info)) return info + + +class TrovoChannelBaseIE(InfoExtractor): + def _get_vod_json(self, page, uid): + raise NotImplementedError('This method must be implemented by subclasses') + + def _entries(self, uid): + for page in itertools.count(1): + vod_json = self._get_vod_json(page, uid) + vods = vod_json.get('vodInfos', []) + for vod in vods: + yield self.url_result( + 'https://trovo.live/%s/%s' % (self._TYPE, vod.get('vid')), + ie=TrovoVodIE.ie_key()) + has_more = vod_json['hasMore'] + if not has_more: + break + + def _real_extract(self, url): + id = self._match_id(url) + uid = str(self._download_json('https://gql.trovo.live/', id, query={ + 'query': '{getLiveInfo(params:{userName:"%s"}){streamerInfo{uid}}}' % id + })['data']['getLiveInfo']['streamerInfo']['uid']) + return self.playlist_result(self._entries(uid), playlist_id=uid) + + +class TrovoChannelVodIE(TrovoChannelBaseIE): + _VALID_URL = r'trovovod:(?P<id>[^\s]+)' + IE_DESC = 'All VODs of a trovo.live channel, "trovovod" keyword' + + _TESTS = [{ + 'url': 'trovovod:OneTappedYou', + 'playlist_mincount': 24, + 'info_dict': { + 'id': '100719456', + }, + }] + + _QUERY = '{getChannelLtvVideoInfos(params:{pageSize:99,currPage:%d,channelID:%s}){hasMore,vodInfos{vid}}}' + _TYPE = 'video' + + def _get_vod_json(self, page, uid): + return self._download_json('https://gql.trovo.live/', uid, query={ + 'query': self._QUERY % (page, uid) + })['data']['getChannelLtvVideoInfos'] + + +class TrovoChannelClipIE(TrovoChannelBaseIE): + _VALID_URL = r'trovoclip:(?P<id>[^\s]+)' + IE_DESC = 'All Clips of a trovo.live channel, "trovoclip" keyword' + + _TESTS = [{ + 'url': 'trovoclip:OneTappedYou', + 'playlist_mincount': 29, + 'info_dict': { + 'id': '100719456', + }, + }] + + _QUERY = '{getChannelClipVideoInfos(params:{pageSize:99,currPage:%d,channelID:%s,albumType:VOD_CLIP_ALBUM_TYPE_LATEST}){hasMore,vodInfos{vid}}}' + _TYPE = 'clip' + + def _get_vod_json(self, page, uid): + return self._download_json('https://gql.trovo.live/', uid, query={ + 'query': self._QUERY % (page, uid) + })['data']['getChannelClipVideoInfos'] From e3950399e4d471b987a2d693f8a6a476568e7c8a Mon Sep 17 00:00:00 2001 From: gustaf <86112802+18928172992817182@users.noreply.github.com> Date: Thu, 14 Oct 2021 14:04:40 +0200 Subject: [PATCH 263/641] [Viafree] add support for Finland (#1253) Authored by: 18928172992817182 (gustaf) --- yt_dlp/extractor/tvplay.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/tvplay.py b/yt_dlp/extractor/tvplay.py index 9b6d17f619..fbafb41f87 100644 --- a/yt_dlp/extractor/tvplay.py +++ b/yt_dlp/extractor/tvplay.py @@ -336,8 +336,8 @@ class ViafreeIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// (?:www\.)? - viafree\.(?P<country>dk|no|se) - /(?P<id>program(?:mer)?/(?:[^/]+/)+[^/?#&]+) + viafree\.(?P<country>dk|no|se|fi) + /(?P<id>(?:program(?:mer)?|ohjelmat)?/(?:[^/]+/)+[^/?#&]+) ''' _TESTS = [{ 'url': 'http://www.viafree.no/programmer/underholdning/det-beste-vorspielet/sesong-2/episode-1', @@ -389,6 +389,9 @@ class ViafreeIE(InfoExtractor): }, { 'url': 'http://www.viafree.se/program/underhallning/i-like-radio-live/sasong-1/676869', 'only_matching': True, + }, { + 'url': 'https://www.viafree.fi/ohjelmat/entertainment/amazing-makeovers/kausi-7/jakso-2', + 'only_matching': True, }] _GEO_BYPASS = False From 6ff34542d2ddfe3369f7e1b321891f155690ae80 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <Ashish08@protonmail.com> Date: Sat, 16 Oct 2021 13:21:59 +0530 Subject: [PATCH 264/641] [Hotstar] Raise appropriate error for DRM --- yt_dlp/extractor/hotstar.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index af679b906a..12e6c53d49 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -203,35 +203,35 @@ class HotStarIE(HotStarBaseIE): format_url = re.sub( r'(?<=//staragvod)(\d)', r'web\1', format_url) tags = str_or_none(playback_set.get('tagsCombination')) or '' - if tags and 'encryption:plain' not in tags: - continue ext = determine_ext(format_url) + current_formats, current_subs = [], {} try: if 'package:hls' in tags or ext == 'm3u8': - hls_formats, hls_subs = self._extract_m3u8_formats_and_subtitles( + current_formats, current_subs = self._extract_m3u8_formats_and_subtitles( format_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id=f'{dr}-hls', headers=headers) - formats.extend(hls_formats) - subs = self._merge_subtitles(subs, hls_subs) elif 'package:dash' in tags or ext == 'mpd': - dash_formats, dash_subs = self._extract_mpd_formats_and_subtitles( + current_formats, current_subs = self._extract_mpd_formats_and_subtitles( format_url, video_id, mpd_id=f'{dr}-dash', headers=headers) - formats.extend(dash_formats) - subs = self._merge_subtitles(subs, dash_subs) elif ext == 'f4m': # produce broken files pass else: - formats.append({ + current_formats = [{ 'url': format_url, 'width': int_or_none(playback_set.get('width')), 'height': int_or_none(playback_set.get('height')), - }) + }] except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: geo_restricted = True continue + if tags and 'encryption:plain' not in tags: + for f in current_formats: + f['has_drm'] = True + formats.extend(current_formats) + subs = self._merge_subtitles(subs, current_subs) if not formats and geo_restricted: self.raise_geo_restricted(countries=['IN'], metadata_available=True) self._sort_formats(formats) From 48ee10ee8adcf61e1136a252462670ec230e9439 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Fri, 15 Oct 2021 18:50:28 +0530 Subject: [PATCH 265/641] Fix conflict b/w id and ext in format selection Closes #1282 --- yt_dlp/YoutubeDL.py | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 542a977944..aff7d6ddb7 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -483,6 +483,12 @@ class YoutubeDL(object): 'track_number', 'disc_number', 'release_year', )) + _format_selection_exts = { + 'audio': {'m4a', 'mp3', 'ogg', 'aac'}, + 'video': {'mp4', 'flv', 'webm', '3gp'}, + 'storyboards': {'mhtml'}, + } + params = None _ies = {} _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []} @@ -1980,11 +1986,11 @@ class YoutubeDL(object): filter_f = lambda f: _filter_f(f) and ( f.get('vcodec') != 'none' or f.get('acodec') != 'none') else: - if format_spec in ('m4a', 'mp3', 'ogg', 'aac'): # audio extension + if format_spec in self._format_selection_exts['audio']: filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' - elif format_spec in ('mp4', 'flv', 'webm', '3gp'): # video extension + elif format_spec in self._format_selection_exts['video']: filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none' - elif format_spec in ('mhtml', ): # storyboards extension + elif format_spec in self._format_selection_exts['storyboards']: filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none' else: filter_f = lambda f: f.get('format_id') == format_spec # id @@ -2259,10 +2265,18 @@ class YoutubeDL(object): formats_dict[format_id].append(format) # Make sure all formats have unique format_id + common_exts = set(ext for exts in self._format_selection_exts.values() for ext in exts) for format_id, ambiguous_formats in formats_dict.items(): - if len(ambiguous_formats) > 1: - for i, format in enumerate(ambiguous_formats): + ambigious_id = len(ambiguous_formats) > 1 + for i, format in enumerate(ambiguous_formats): + if ambigious_id: format['format_id'] = '%s-%d' % (format_id, i) + if format.get('ext') is None: + format['ext'] = determine_ext(format['url']).lower() + # Ensure there is no conflict between id and ext in format selection + # See https://github.com/yt-dlp/yt-dlp/issues/1282 + if format['format_id'] != format['ext'] and format['format_id'] in common_exts: + format['format_id'] = 'f%s' % format['format_id'] for i, format in enumerate(formats): if format.get('format') is None: @@ -2271,9 +2285,6 @@ class YoutubeDL(object): res=self.format_resolution(format), note=format_field(format, 'format_note', ' (%s)'), ) - # Automatically determine file extension if missing - if format.get('ext') is None: - format['ext'] = determine_ext(format['url']).lower() # Automatically determine protocol if missing (useful for format # selection purposes) if format.get('protocol') is None: From 03b4de722a6cf86dbcc6d17a63145ec59a573bf6 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Sat, 16 Oct 2021 18:31:00 +0530 Subject: [PATCH 266/641] [downloader] Fix slow progress hooks Closes #1301 --- yt_dlp/YoutubeDL.py | 16 +++++++++++----- yt_dlp/downloader/common.py | 5 +---- yt_dlp/downloader/dash.py | 5 ++--- yt_dlp/downloader/hls.py | 5 ++--- yt_dlp/postprocessor/common.py | 13 +++++++------ 5 files changed, 23 insertions(+), 21 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index aff7d6ddb7..fd8ad0f983 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -950,13 +950,18 @@ class YoutubeDL(object): except ValueError as err: return err + @staticmethod + def _copy_infodict(info_dict): + info_dict = dict(info_dict) + for key in ('__original_infodict', '__postprocessors'): + info_dict.pop(key, None) + return info_dict + def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None): """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """ info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set - info_dict = dict(info_dict) # Do not sanitize so as not to consume LazyList - for key in ('__original_infodict', '__postprocessors'): - info_dict.pop(key, None) + info_dict = self._copy_infodict(info_dict) info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs formatSeconds(info_dict['duration'], '-' if sanitize else ':') if info_dict.get('duration', None) is not None @@ -2265,7 +2270,7 @@ class YoutubeDL(object): formats_dict[format_id].append(format) # Make sure all formats have unique format_id - common_exts = set(ext for exts in self._format_selection_exts.values() for ext in exts) + common_exts = set(itertools.chain(*self._format_selection_exts.values())) for format_id, ambiguous_formats in formats_dict.items(): ambigious_id = len(ambiguous_formats) > 1 for i, format in enumerate(ambiguous_formats): @@ -2523,7 +2528,8 @@ class YoutubeDL(object): fd.add_progress_hook(ph) urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']]) self.write_debug('Invoking downloader on "%s"' % urls) - new_info = dict(info) + + new_info = copy.deepcopy(self._copy_infodict(info)) if new_info.get('http_headers') is None: new_info['http_headers'] = self._calc_headers(new_info) return fd.download(name, new_info, subtitle) diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index 89cdffd246..96b78a968c 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -405,13 +405,10 @@ class FileDownloader(object): def _hook_progress(self, status, info_dict): if not self._progress_hooks: return - info_dict = dict(info_dict) - for key in ('__original_infodict', '__postprocessors'): - info_dict.pop(key, None) + status['info_dict'] = info_dict # youtube-dl passes the same status object to all the hooks. # Some third party scripts seems to be relying on this. # So keep this behavior if possible - status['info_dict'] = copy.deepcopy(info_dict) for ph in self._progress_hooks: ph(status) diff --git a/yt_dlp/downloader/dash.py b/yt_dlp/downloader/dash.py index 734eab3ef2..6444ad6928 100644 --- a/yt_dlp/downloader/dash.py +++ b/yt_dlp/downloader/dash.py @@ -55,9 +55,8 @@ class DashSegmentsFD(FragmentFD): if real_downloader: self.to_screen( '[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename())) - info_copy = info_dict.copy() - info_copy['fragments'] = fragments_to_download + info_dict['fragments'] = fragments_to_download fd = real_downloader(self.ydl, self.params) - return fd.real_download(filename, info_copy) + return fd.real_download(filename, info_dict) return self.download_and_append_fragments(ctx, fragments_to_download, info_dict) diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index 3c5a2617d0..61312c5ba5 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -245,13 +245,12 @@ class HlsFD(FragmentFD): fragments = [fragments[0] if fragments else None] if real_downloader: - info_copy = info_dict.copy() - info_copy['fragments'] = fragments + info_dict['fragments'] = fragments fd = real_downloader(self.ydl, self.params) # TODO: Make progress updates work without hooking twice # for ph in self._progress_hooks: # fd.add_progress_hook(ph) - return fd.real_download(filename, info_copy) + return fd.real_download(filename, info_dict) if is_webvtt: def pack_fragment(frag_content, frag_index): diff --git a/yt_dlp/postprocessor/common.py b/yt_dlp/postprocessor/common.py index d2daeb0fba..b367167432 100644 --- a/yt_dlp/postprocessor/common.py +++ b/yt_dlp/postprocessor/common.py @@ -17,11 +17,12 @@ class PostProcessorMetaClass(type): def run_wrapper(func): @functools.wraps(func) def run(self, info, *args, **kwargs): - self._hook_progress({'status': 'started'}, info) + info_copy = copy.deepcopy(self._copy_infodict(info)) + self._hook_progress({'status': 'started'}, info_copy) ret = func(self, info, *args, **kwargs) if ret is not None: _, info = ret - self._hook_progress({'status': 'finished'}, info) + self._hook_progress({'status': 'finished'}, info_copy) return ret return run @@ -93,6 +94,9 @@ class PostProcessor(metaclass=PostProcessorMetaClass): for ph in getattr(downloader, '_postprocessor_hooks', []): self.add_progress_hook(ph) + def _copy_infodict(self, info_dict): + return getattr(self._downloader, '_copy_infodict', dict)(info_dict) + @staticmethod def _restrict_to(*, video=True, audio=True, images=True): allowed = {'video': video, 'audio': audio, 'images': images} @@ -142,11 +146,8 @@ class PostProcessor(metaclass=PostProcessorMetaClass): def _hook_progress(self, status, info_dict): if not self._progress_hooks: return - info_dict = dict(info_dict) - for key in ('__original_infodict', '__postprocessors'): - info_dict.pop(key, None) status.update({ - 'info_dict': copy.deepcopy(info_dict), + 'info_dict': info_dict, 'postprocessor': self.pp_key(), }) for ph in self._progress_hooks: From 580d3274e50d9cca79189689ba53db7295ea267c Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Sat, 16 Oct 2021 20:13:23 +0530 Subject: [PATCH 267/641] [youtube] Expose different formats with same itag --- yt_dlp/downloader/common.py | 1 - yt_dlp/extractor/youtube.py | 9 +++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index 96b78a968c..9081794dbc 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -1,6 +1,5 @@ from __future__ import division, unicode_literals -import copy import os import re import time diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 1ef80445eb..dc9aa8ab70 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2692,7 +2692,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): itag = self._search_regex( r'/itag/(\d+)', f['url'], 'itag', default=None) if itag in itags: - continue + itag += '-hls' + if itag in itags: + continue if itag: f['format_id'] = itag itags.append(itag) @@ -2704,8 +2706,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False): itag = f['format_id'] if itag in itags: - continue + itag += '-dash' + if itag in itags: + continue if itag: + f['format_id'] = itag itags.append(itag) f['quality'] = guess_quality(f) filesize = int_or_none(self._search_regex( From 71ce444a3fece1f7de779b358943de4ac14aa0f4 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Sun, 17 Oct 2021 01:03:04 +0530 Subject: [PATCH 268/641] Fix --restrict-filename when used with default template --- yt_dlp/YoutubeDL.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index fd8ad0f983..8cfb18e036 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -889,8 +889,13 @@ class YoutubeDL(object): outtmpl_dict = self.params.get('outtmpl', {}) if not isinstance(outtmpl_dict, dict): outtmpl_dict = {'default': outtmpl_dict} + # Remove spaces in the default template + if self.params.get('restrictfilenames'): + sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-') + else: + sanitize = lambda x: x outtmpl_dict.update({ - k: v for k, v in DEFAULT_OUTTMPL.items() + k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl_dict.get(k) is None}) for key, val in outtmpl_dict.items(): if isinstance(val, bytes): From dd078970ba1739cfd4fcc798a4b5026cb11c427a Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Sun, 17 Oct 2021 17:16:05 +0530 Subject: [PATCH 269/641] [crunchyroll] Add support for `beta.crunchyroll` URLs and fix series URLs with language code --- yt_dlp/extractor/crunchyroll.py | 56 ++++++++++++++++++++++++++++++++- yt_dlp/extractor/extractors.py | 4 ++- 2 files changed, 58 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/crunchyroll.py b/yt_dlp/extractor/crunchyroll.py index 256c6943f2..fb05415fce 100644 --- a/yt_dlp/extractor/crunchyroll.py +++ b/yt_dlp/extractor/crunchyroll.py @@ -650,7 +650,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE): IE_NAME = 'crunchyroll:playlist' - _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login|media-\d+))(?P<id>[\w\-]+))/?(?:\?|$)' + _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?:\w{1,2}/)?(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login|media-\d+))(?P<id>[\w\-]+))/?(?:\?|$)' _TESTS = [{ 'url': 'https://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi', @@ -672,6 +672,9 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE): # geo-restricted (US), 18+ maturity wall, non-premium will be available since 2015.11.14 'url': 'http://www.crunchyroll.com/ladies-versus-butlers?skip_wall=1', 'only_matching': True, + }, { + 'url': 'http://www.crunchyroll.com/fr/ladies-versus-butlers', + 'only_matching': True, }] def _real_extract(self, url): @@ -698,3 +701,54 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE): 'title': title, 'entries': entries, } + + +class CrunchyrollBetaIE(CrunchyrollBaseIE): + IE_NAME = 'crunchyroll:beta' + _VALID_URL = r'https?://beta\.crunchyroll\.com/(?P<lang>(?:\w{1,2}/)?)watch/(?P<internal_id>\w+)/(?P<id>[\w\-]+)/?(?:\?|$)' + _TESTS = [{ + 'url': 'https://beta.crunchyroll.com/watch/GY2P1Q98Y/to-the-future', + 'info_dict': { + 'id': '696363', + 'ext': 'mp4', + 'timestamp': 1459610100, + 'description': 'md5:a022fbec4fbb023d43631032c91ed64b', + 'uploader': 'Toei Animation', + 'title': 'World Trigger Episode 73 – To the Future', + 'upload_date': '20160402', + }, + 'params': {'skip_download': 'm3u8'}, + 'expected_warnings': ['Unable to download XML'] + }] + + def _real_extract(self, url): + lang, internal_id, display_id = self._match_valid_url(url).group('lang', 'internal_id', 'id') + webpage = self._download_webpage(url, display_id) + episode_data = self._parse_json( + self._search_regex(r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'episode data'), + display_id)['content']['byId'][internal_id] + video_id = episode_data['external_id'].split('.')[1] + series_id = episode_data['episode_metadata']['series_slug_title'] + return self.url_result(f'https://www.crunchyroll.com/{lang}{series_id}/{display_id}-{video_id}', + CrunchyrollIE.ie_key(), video_id) + + +class CrunchyrollBetaShowIE(CrunchyrollBaseIE): + IE_NAME = 'crunchyroll:playlist:beta' + _VALID_URL = r'https?://beta\.crunchyroll\.com/(?P<lang>(?:\w{1,2}/)?)series/\w+/(?P<id>[\w\-]+)/?(?:\?|$)' + _TESTS = [{ + 'url': 'https://beta.crunchyroll.com/series/GY19NQ2QR/Girl-Friend-BETA', + 'info_dict': { + 'id': 'girl-friend-beta', + 'title': 'Girl Friend BETA', + }, + 'playlist_mincount': 10, + }, { + 'url': 'https://beta.crunchyroll.com/it/series/GY19NQ2QR/Girl-Friend-BETA', + 'only_matching': True, + }] + + def _real_extract(self, url): + lang, series_id = self._match_valid_url(url).group('lang', 'id') + return self.url_result(f'https://www.crunchyroll.com/{lang}{series_id.lower()}', + CrunchyrollShowPlaylistIE.ie_key(), series_id) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 6bc9a2b1eb..4c89c5a185 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -298,7 +298,9 @@ from .crackle import CrackleIE from .crooksandliars import CrooksAndLiarsIE from .crunchyroll import ( CrunchyrollIE, - CrunchyrollShowPlaylistIE + CrunchyrollShowPlaylistIE, + CrunchyrollBetaIE, + CrunchyrollBetaShowIE, ) from .cspan import CSpanIE from .ctsnews import CtsNewsIE From ec3f6640c1a5391380ff7d47769fb710cf817638 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Sun, 17 Oct 2021 17:34:03 +0530 Subject: [PATCH 270/641] [crunchyroll] Add season to flat-playlist Closes #1319 --- yt_dlp/extractor/common.py | 3 ++- yt_dlp/extractor/crunchyroll.py | 21 ++++++++++++--------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 4f358c53bb..dbe7dfcbf1 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1087,12 +1087,13 @@ class InfoExtractor(object): # Methods for following #608 @staticmethod - def url_result(url, ie=None, video_id=None, video_title=None): + def url_result(url, ie=None, video_id=None, video_title=None, **kwargs): """Returns a URL that points to a page that should be processed""" # TODO: ie should be the class used for getting the info video_info = {'_type': 'url', 'url': url, 'ie_key': ie} + video_info.update(kwargs) if video_id is not None: video_info['id'] = video_id if video_title is not None: diff --git a/yt_dlp/extractor/crunchyroll.py b/yt_dlp/extractor/crunchyroll.py index fb05415fce..511ac1b2ce 100644 --- a/yt_dlp/extractor/crunchyroll.py +++ b/yt_dlp/extractor/crunchyroll.py @@ -686,20 +686,23 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE): headers=self.geo_verification_headers()) title = self._html_search_meta('name', webpage, default=None) - episode_paths = re.findall( - r'(?s)<li id="showview_videos_media_(\d+)"[^>]+>.*?<a href="([^"]+)"', - webpage) - entries = [ - self.url_result('http://www.crunchyroll.com' + ep, 'Crunchyroll', ep_id) - for ep_id, ep in episode_paths - ] - entries.reverse() + episode_re = r'<li id="showview_videos_media_(\d+)"[^>]+>.*?<a href="([^"]+)"' + season_re = r'<a [^>]+season-dropdown[^>]+>([^<]+)' + paths = re.findall(f'(?s){episode_re}|{season_re}', webpage) + + entries, current_season = [], None + for ep_id, ep, season in paths: + if season: + current_season = season + continue + entries.append(self.url_result( + f'http://www.crunchyroll.com{ep}', CrunchyrollIE.ie_key(), ep_id, season=current_season)) return { '_type': 'playlist', 'id': show_id, 'title': title, - 'entries': entries, + 'entries': reversed(entries), } From 18f96d129b24200debf257153bcc762125d2a1f7 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Sun, 17 Oct 2021 01:04:00 +0530 Subject: [PATCH 271/641] [utils] Allow duration strings in filter Closes #1309 --- test/test_utils.py | 1 + yt_dlp/utils.py | 46 ++++++++++++++++++++-------------------------- 2 files changed, 21 insertions(+), 26 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index d20bca7950..7fc431505f 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1231,6 +1231,7 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') self.assertFalse(match_str('x>2K', {'x': 1200})) self.assertTrue(match_str('x>=1200 & x < 1300', {'x': 1200})) self.assertFalse(match_str('x>=1100 & x < 1200', {'x': 1200})) + self.assertTrue(match_str('x > 1:0:0', {'x': 3700})) # String self.assertFalse(match_str('y=a212', {'y': 'foobar42'})) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 7a40258cf9..15bee0c470 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4756,7 +4756,6 @@ def _match_one(filter_part, dct, incomplete): (?P<key>[a-z_]+) \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s* (?: - (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)| (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)| (?P<strval>.+?) ) @@ -4764,40 +4763,35 @@ def _match_one(filter_part, dct, incomplete): ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys()))) m = operator_rex.search(filter_part) if m: - unnegated_op = COMPARISON_OPERATORS[m.group('op')] - if m.group('negation'): + m = m.groupdict() + unnegated_op = COMPARISON_OPERATORS[m['op']] + if m['negation']: op = lambda attr, value: not unnegated_op(attr, value) else: op = unnegated_op - actual_value = dct.get(m.group('key')) - if (m.group('quotedstrval') is not None - or m.group('strval') is not None + comparison_value = m['quotedstrval'] or m['strval'] or m['intval'] + if m['quote']: + comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote']) + actual_value = dct.get(m['key']) + numeric_comparison = None + if isinstance(actual_value, compat_numeric_types): # If the original field is a string and matching comparisonvalue is # a number we should respect the origin of the original field # and process comparison value as a string (see - # https://github.com/ytdl-org/youtube-dl/issues/11082). - or actual_value is not None and m.group('intval') is not None - and isinstance(actual_value, compat_str)): - comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval') - quote = m.group('quote') - if quote is not None: - comparison_value = comparison_value.replace(r'\%s' % quote, quote) - else: - if m.group('op') in STRING_OPERATORS: - raise ValueError('Operator %s only supports string values!' % m.group('op')) + # https://github.com/ytdl-org/youtube-dl/issues/11082) try: - comparison_value = int(m.group('intval')) + numeric_comparison = int(comparison_value) except ValueError: - comparison_value = parse_filesize(m.group('intval')) - if comparison_value is None: - comparison_value = parse_filesize(m.group('intval') + 'B') - if comparison_value is None: - raise ValueError( - 'Invalid integer value %r in filter part %r' % ( - m.group('intval'), filter_part)) + numeric_comparison = parse_filesize(comparison_value) + if numeric_comparison is None: + numeric_comparison = parse_filesize(f'{comparison_value}B') + if numeric_comparison is None: + numeric_comparison = parse_duration(comparison_value) + if numeric_comparison is not None and m['op'] in STRING_OPERATORS: + raise ValueError('Operator %s only supports string values!' % m['op']) if actual_value is None: - return incomplete or m.group('none_inclusive') - return op(actual_value, comparison_value) + return incomplete or m['none_inclusive'] + return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison) UNARY_OPERATORS = { '': lambda v: (v is True) if isinstance(v, bool) else (v is not None), From 239df021037447f71ac8b7cf3c58edc9c6abe3a6 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Sun, 17 Oct 2021 01:05:16 +0530 Subject: [PATCH 272/641] Make `duration_string` and `resolution` available in --match-filter Related: #1309 --- yt_dlp/YoutubeDL.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 8cfb18e036..4a7712cb63 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2177,6 +2177,9 @@ class YoutubeDL(object): if info_dict.get('display_id') is None and 'id' in info_dict: info_dict['display_id'] = info_dict['id'] + if info_dict.get('duration') is not None: + info_dict['duration_string'] = formatSeconds(info_dict['duration']) + for ts_key, date_key in ( ('timestamp', 'upload_date'), ('release_timestamp', 'release_date'), @@ -2295,10 +2298,10 @@ class YoutubeDL(object): res=self.format_resolution(format), note=format_field(format, 'format_note', ' (%s)'), ) - # Automatically determine protocol if missing (useful for format - # selection purposes) if format.get('protocol') is None: format['protocol'] = determine_protocol(format) + if format.get('resolution') is None: + format['resolution'] = self.format_resolution(format, default=None) # Add HTTP headers, so that external programs can use them from the # json output full_format_info = info_dict.copy() From 693ec74401fa8d42b0cfd5f1ef24aabade5cc275 Mon Sep 17 00:00:00 2001 From: Damiano Amatruda <damiano.amatruda@outlook.com> Date: Mon, 18 Oct 2021 03:32:46 +0200 Subject: [PATCH 273/641] [on24] Add extractor (#1200) Authored by: damianoamatruda --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/on24.py | 91 ++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+) create mode 100644 yt_dlp/extractor/on24.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 4c89c5a185..03d4a67f54 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -983,6 +983,7 @@ from .odatv import OdaTVIE from .odnoklassniki import OdnoklassnikiIE from .oktoberfesttv import OktoberfestTVIE from .olympics import OlympicsReplayIE +from .on24 import On24IE from .ondemandkorea import OnDemandKoreaIE from .onet import ( OnetIE, diff --git a/yt_dlp/extractor/on24.py b/yt_dlp/extractor/on24.py new file mode 100644 index 0000000000..d4d824430f --- /dev/null +++ b/yt_dlp/extractor/on24.py @@ -0,0 +1,91 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + strip_or_none, + try_get, + urljoin, +) + + +class On24IE(InfoExtractor): + IE_NAME = 'on24' + IE_DESC = 'ON24' + + _VALID_URL = r'''(?x) + https?://event\.on24\.com/(?: + wcc/r/(?P<id_1>\d{7})/(?P<key_1>[0-9A-F]{32})| + eventRegistration/(?:console/EventConsoleApollo|EventLobbyServlet\?target=lobby30) + \.jsp\?(?:[^/#?]*&)?eventid=(?P<id_2>\d{7})[^/#?]*&key=(?P<key_2>[0-9A-F]{32}) + )''' + + _TESTS = [{ + 'url': 'https://event.on24.com/eventRegistration/console/EventConsoleApollo.jsp?uimode=nextgeneration&eventid=2197467&sessionid=1&key=5DF57BE53237F36A43B478DD36277A84&contenttype=A&eventuserid=305999&playerwidth=1000&playerheight=650&caller=previewLobby&text_language_id=en&format=fhaudio&newConsole=false', + 'info_dict': { + 'id': '2197467', + 'ext': 'wav', + 'title': 'Pearson Test of English General/Pearson English International Certificate Teacher Training Guide', + 'upload_date': '20200219', + 'timestamp': 1582149600.0, + 'view_count': int, + } + }, { + 'url': 'https://event.on24.com/wcc/r/2639291/82829018E813065A122363877975752E?mode=login&email=johnsmith@gmail.com', + 'only_matching': True, + }, { + 'url': 'https://event.on24.com/eventRegistration/console/EventConsoleApollo.jsp?&eventid=2639291&sessionid=1&username=&partnerref=&format=fhvideo1&mobile=&flashsupportedmobiledevice=&helpcenter=&key=82829018E813065A122363877975752E&newConsole=true&nxChe=true&newTabCon=true&text_language_id=en&playerwidth=748&playerheight=526&eventuserid=338788762&contenttype=A&mediametricsessionid=384764716&mediametricid=3558192&usercd=369267058&mode=launch', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + event_id = mobj.group('id_1') or mobj.group('id_2') + event_key = mobj.group('key_1') or mobj.group('key_2') + + event_data = self._download_json( + 'https://event.on24.com/apic/utilApp/EventConsoleCachedServlet', + event_id, query={ + 'eventId': event_id, + 'displayProfile': 'player', + 'key': event_key, + 'contentType': 'A' + }) + event_id = str(try_get(event_data, lambda x: x['presentationLogInfo']['eventid'])) or event_id + language = event_data.get('localelanguagecode') + + formats = [] + for media in event_data.get('mediaUrlInfo', []): + media_url = urljoin('https://event.on24.com/media/news/corporatevideo/events/', str(media.get('url'))) + if not media_url: + continue + media_type = media.get('code') + if media_type == 'fhvideo1': + formats.append({ + 'format_id': 'video', + 'url': media_url, + 'language': language, + 'ext': 'mp4', + 'vcodec': 'avc1.640020', + 'acodec': 'mp4a.40.2', + }) + elif media_type == 'audio': + formats.append({ + 'format_id': 'audio', + 'url': media_url, + 'language': language, + 'ext': 'wav', + 'vcodec': 'none', + 'acodec': 'wav' + }) + self._sort_formats(formats) + + return { + 'id': event_id, + 'title': strip_or_none(event_data.get('description')), + 'timestamp': int_or_none(try_get(event_data, lambda x: x['session']['startdate']), 1000), + 'webpage_url': f'https://event.on24.com/wcc/r/{event_id}/{event_key}', + 'view_count': event_data.get('registrantcount'), + 'formats': formats, + } From e69585f8c620926d29477bc68ba9b97298646348 Mon Sep 17 00:00:00 2001 From: nyuszika7h <nyuszika7h@gmail.com> Date: Mon, 18 Oct 2021 03:34:56 +0200 Subject: [PATCH 274/641] [7plus] Add cookie based authentication (#1202) Closes #1103 Authored by: nyuszika7h --- yt_dlp/extractor/sevenplus.py | 46 ++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/sevenplus.py b/yt_dlp/extractor/sevenplus.py index 9f15bd7ccc..210c44ab20 100644 --- a/yt_dlp/extractor/sevenplus.py +++ b/yt_dlp/extractor/sevenplus.py @@ -1,6 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals +import json import re from .brightcove import BrightcoveNewIE @@ -42,9 +43,52 @@ class SevenPlusIE(BrightcoveNewIE): 'only_matching': True, }] + def _real_initialize(self): + self.token = None + + cookies = self._get_cookies('https://7plus.com.au') + api_key = next((x for x in cookies if x.startswith('glt_')), '')[4:] + if not api_key: # Cookies are signed out, skip login + return + + login_resp = self._download_json( + 'https://login.7plus.com.au/accounts.getJWT', None, 'Logging in', fatal=False, + query={ + 'APIKey': api_key, + 'sdk': 'js_latest', + 'login_token': cookies[f'glt_{api_key}'].value, + 'authMode': 'cookie', + 'pageURL': 'https://7plus.com.au/', + 'sdkBuild': '12471', + 'format': 'json', + }) or {} + + if 'errorMessage' in login_resp: + self.report_warning(f'Unable to login: 7plus said: {login_resp["errorMessage"]}') + return + id_token = login_resp.get('id_token') + if not id_token: + self.report_warning('Unable to login: Could not extract id token') + return + + token_resp = self._download_json( + 'https://7plus.com.au/auth/token', None, 'Getting auth token', fatal=False, + headers={'Content-Type': 'application/json'}, data=json.dumps({ + 'idToken': id_token, + 'platformId': 'web', + 'regSource': '7plus', + }).encode('utf-8')) or {} + self.token = token_resp.get('token') + if not self.token: + self.report_warning('Unable to log in: Could not extract auth token') + def _real_extract(self, url): path, episode_id = self._match_valid_url(url).groups() + headers = {} + if self.token: + headers['Authorization'] = f'Bearer {self.token}' + try: media = self._download_json( 'https://videoservice.swm.digital/playback', episode_id, query={ @@ -55,7 +99,7 @@ class SevenPlusIE(BrightcoveNewIE): 'referenceId': 'ref:' + episode_id, 'deliveryId': 'csai', 'videoType': 'vod', - })['media'] + }, headers=headers)['media'] except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: raise ExtractorError(self._parse_json( From 019a94f7d62cf9fb482ebf28e1c153486a49f319 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81kos=20S=C3=BClyi?= <sulyi.gbox@gmail.com> Date: Mon, 18 Oct 2021 03:46:49 +0200 Subject: [PATCH 275/641] [utils] Use `importlib` to load plugins (#1277) Authored by: sulyi --- yt_dlp/utils.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 15bee0c470..3ac2fbc4be 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -18,7 +18,7 @@ import functools import gzip import hashlib import hmac -import imp +import importlib.util import io import itertools import json @@ -6302,12 +6302,13 @@ def get_executable_path(): def load_plugins(name, suffix, namespace): - plugin_info = [None] classes = {} try: - plugin_info = imp.find_module( - name, [os.path.join(get_executable_path(), 'ytdlp_plugins')]) - plugins = imp.load_module(name, *plugin_info) + plugins_spec = importlib.util.spec_from_file_location( + name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py')) + plugins = importlib.util.module_from_spec(plugins_spec) + sys.modules[plugins_spec.name] = plugins + plugins_spec.loader.exec_module(plugins) for name in dir(plugins): if name in namespace: continue @@ -6315,11 +6316,8 @@ def load_plugins(name, suffix, namespace): continue klass = getattr(plugins, name) classes[name] = namespace[name] = klass - except ImportError: + except FileNotFoundError: pass - finally: - if plugin_info[0] is not None: - plugin_info[0].close() return classes From 01b052b2b19609a5b0f54db8fa2989562dedbdc4 Mon Sep 17 00:00:00 2001 From: LE <llacb47@users.noreply.github.com> Date: Sun, 17 Oct 2021 22:28:20 -0400 Subject: [PATCH 276/641] [tbs] Add tbs live streams (#1326) Authored by: llacb47 --- yt_dlp/extractor/tbs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/tbs.py b/yt_dlp/extractor/tbs.py index cad3f2f46f..c7d62ff4e4 100644 --- a/yt_dlp/extractor/tbs.py +++ b/yt_dlp/extractor/tbs.py @@ -16,7 +16,7 @@ from ..utils import ( class TBSIE(TurnerBaseIE): - _VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com(?P<path>/(?:movies|watchtnt|shows/[^/]+/(?:clips|season-\d+/episode-\d+))/(?P<id>[^/?#]+))' + _VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com(?P<path>/(?:movies|watchtnt|watchtbs|shows/[^/]+/(?:clips|season-\d+/episode-\d+))/(?P<id>[^/?#]+))' _TESTS = [{ 'url': 'http://www.tntdrama.com/shows/the-alienist/clips/monster', 'info_dict': { @@ -45,7 +45,7 @@ class TBSIE(TurnerBaseIE): drupal_settings = self._parse_json(self._search_regex( r'<script[^>]+?data-drupal-selector="drupal-settings-json"[^>]*?>({.+?})</script>', webpage, 'drupal setting'), display_id) - isLive = 'watchtnt' in path + isLive = 'watchtnt' in path or 'watchtbs' in path video_data = next(v for v in drupal_settings['turner_playlist'] if isLive or v.get('url') == path) media_id = video_data['mediaID'] From 72ab7687194f353079b4f6e6ac9a59f586c9a9ef Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Mon, 18 Oct 2021 08:09:50 +0530 Subject: [PATCH 277/641] [SkyNewsAU] Add extractor (#1308) Closes #1287 Authored by: Ashish0804 --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/skynewsau.py | 46 ++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 yt_dlp/extractor/skynewsau.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 03d4a67f54..ffd26ca0bb 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1284,6 +1284,7 @@ from .skynewsarabia import ( SkyNewsArabiaIE, SkyNewsArabiaArticleIE, ) +from .skynewsau import SkyNewsAUIE from .sky import ( SkyNewsIE, SkySportsIE, diff --git a/yt_dlp/extractor/skynewsau.py b/yt_dlp/extractor/skynewsau.py new file mode 100644 index 0000000000..b1d77951e7 --- /dev/null +++ b/yt_dlp/extractor/skynewsau.py @@ -0,0 +1,46 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + try_get, + unified_strdate, +) + + +class SkyNewsAUIE(InfoExtractor): + _VALID_URL = r'(?:https?://)(?:www\.)?skynews\.com\.au/[^/]+/[^/]+/[^/]+/video/(?P<id>[a-z0-9]+)' + + _TESTS = [{ + 'url': 'https://www.skynews.com.au/world-news/united-states/incredible-vision-shows-lava-overflowing-from-spains-la-palma-volcano/video/0f4c6243d6903502c01251f228b91a71', + 'info_dict': { + 'id': '6277184925001', + 'ext': 'mp4', + 'title': 'md5:60594f1ea6d5ae93e292900f4d34e9ae', + 'description': 'md5:60594f1ea6d5ae93e292900f4d34e9ae', + 'thumbnail': r're:^https?://.*\.jpg', + 'duration': 76.394, + 'timestamp': 1634271300, + 'uploader_id': '5348771529001', + 'tags': ['fblink', 'msn', 'usa', 'world', 'yt'], + 'upload_date': '20211015', + }, + 'params': {'skip_download': True, 'format': 'bv'} + }] + + _API_KEY = '6krsj3w249nk779d8fukqx9f' + + def _real_extract(self, url): + id = self._match_id(url) + webpage = self._download_webpage(url, id) + embedcode = self._search_regex(r'embedcode\s?=\s?\"([^\"]+)\"', webpage, 'embedcode') + data_json = self._download_json( + f'https://content.api.news/v3/videos/brightcove/{embedcode}?api_key={self._API_KEY}', id)['content'] + return { + 'id': id, + '_type': 'url_transparent', + 'url': 'https://players.brightcove.net/%s/default_default/index.html?videoId=%s' % tuple(embedcode.split('-')), + 'ie_key': 'BrightcoveNew', + 'title': data_json.get('caption'), + 'upload_date': unified_strdate(try_get(data_json, lambda x: x['date']['created'])), + } From 920134b2e526ccb39a368add5547788361c78fb3 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Mon, 18 Oct 2021 08:11:31 +0530 Subject: [PATCH 278/641] [Gronkh] Add extractor (#1299) Closes #1293 Authored by: Ashish0804 --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/gronkh.py | 43 ++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 yt_dlp/extractor/gronkh.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index ffd26ca0bb..f4f817fcb5 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -527,6 +527,7 @@ from .gopro import GoProIE from .goshgay import GoshgayIE from .gotostage import GoToStageIE from .gputechconf import GPUTechConfIE +from .gronkh import GronkhIE from .groupon import GrouponIE from .hbo import HBOIE from .hearthisat import HearThisAtIE diff --git a/yt_dlp/extractor/gronkh.py b/yt_dlp/extractor/gronkh.py new file mode 100644 index 0000000000..a7792a5e0e --- /dev/null +++ b/yt_dlp/extractor/gronkh.py @@ -0,0 +1,43 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import unified_strdate + + +class GronkhIE(InfoExtractor): + _VALID_URL = r'(?:https?://)(?:www\.)?gronkh\.tv/stream/(?P<id>\d+)' + + _TESTS = [{ + 'url': 'https://gronkh.tv/stream/536', + 'info_dict': { + 'id': '536', + 'ext': 'mp4', + 'title': 'GTV0536, 2021-10-01 - MARTHA IS DEAD #FREiAB1830 !FF7 !horde !archiv', + 'view_count': 19491, + 'thumbnail': 'https://01.cdn.vod.farm/preview/6436746cce14e25f751260a692872b9b.jpg', + 'upload_date': '20211001' + }, + 'params': {'skip_download': True} + }] + + def _real_extract(self, url): + id = self._match_id(url) + data_json = self._download_json(f'https://api.gronkh.tv/v1/video/info?episode={id}', id) + m3u8_url = self._download_json(f'https://api.gronkh.tv/v1/video/playlist?episode={id}', id)['playlist_url'] + formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, id) + if data_json.get('vtt_url'): + subtitles.setdefault('en', []).append({ + 'url': data_json['vtt_url'], + 'ext': 'vtt', + }) + self._sort_formats(formats) + return { + 'id': id, + 'title': data_json.get('title'), + 'view_count': data_json.get('views'), + 'thumbnail': data_json.get('preview_url'), + 'upload_date': unified_strdate(data_json.get('created_at')), + 'formats': formats, + 'subtitles': subtitles, + } From 373475f03553a7fff2d20df878755bfad2fab8e5 Mon Sep 17 00:00:00 2001 From: shirt <2660574+shirt-dev@users.noreply.github.com> Date: Sun, 17 Oct 2021 22:44:20 -0400 Subject: [PATCH 279/641] [fragments] Pad fragments before decrypting (#1298) Closes #197, #1297, #1007 Authored by: shirt-dev --- yt_dlp/downloader/fragment.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index 6a490131b1..d0eaede7ee 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -355,7 +355,8 @@ class FragmentFD(FileDownloader): # not what it decrypts to. if self.params.get('test', False): return frag_content - decrypted_data = aes_cbc_decrypt_bytes(frag_content, decrypt_info['KEY'], iv) + padding_len = 16 - (len(frag_content) % 16) + decrypted_data = aes_cbc_decrypt_bytes(frag_content + bytes([padding_len] * padding_len), decrypt_info['KEY'], iv) return decrypted_data[:-decrypted_data[-1]] return decrypt_fragment From aae16f6ed9ba1fc6943a8461d0a9aa8be6e5561d Mon Sep 17 00:00:00 2001 From: coletdjnz <colethedj@protonmail.com> Date: Mon, 18 Oct 2021 15:58:42 +1300 Subject: [PATCH 280/641] [youtube:comments] Fix comment section not being extracted in new layouts (#1324) Co-authored-by: coletdjnz, pukkandan --- yt_dlp/extractor/youtube.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index dc9aa8ab70..892993c9bb 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2314,6 +2314,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): continuation_token = self._generate_comment_continuation(video_id) continuation = self._build_api_continuation_query(continuation_token, None) + message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1) + if message and not parent: + self.report_warning(message, video_id=video_id) + visitor_data = None is_first_continuation = parent is None @@ -2416,8 +2420,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _get_comments(self, ytcfg, video_id, contents, webpage): """Entry for comment extraction""" def _real_comment_extract(contents): - yield from self._comment_entries( - traverse_obj(contents, (..., 'itemSectionRenderer'), get_all=False), ytcfg, video_id) + renderer = next(( + item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={}) + if item.get('sectionIdentifier') == 'comment-item-section'), None) + yield from self._comment_entries(renderer, ytcfg, video_id) max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) # Force English regardless of account setting to prevent parsing issues From 24b0a72b302a8ba67eb7301911d8fedfa90f0ecc Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Mon, 18 Oct 2021 07:55:34 +0530 Subject: [PATCH 281/641] [cleanup] Remove broken youtube login code --- yt_dlp/extractor/youtube.py | 200 +----------------------------------- 1 file changed, 2 insertions(+), 198 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 892993c9bb..b71cd4292f 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -258,28 +258,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor): # If True it will raise an error if no login info is provided _LOGIN_REQUIRED = False - r''' # Unused since login is broken - _LOGIN_URL = 'https://accounts.google.com/ServiceLogin' - _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge' - - _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup' - _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge' - _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}' - ''' - def _login(self): """ Attempt to log in to YouTube. - True is returned if successful or skipped. - False is returned if login failed. - If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised. """ - def warn(message): - self.report_warning(message) - - # username+password login is broken if (self._LOGIN_REQUIRED and self.get_param('cookiefile') is None and self.get_param('cookiesfrombrowser') is None): @@ -287,184 +271,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'Login details are needed to download this content', method='cookies') username, password = self._get_login_info() if username: - warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies']) - return - - # Everything below this is broken! - r''' - # No authentication to be performed - if username is None: - if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None: - raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True) - # if self.get_param('cookiefile'): # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them. - # self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!') - return True - - login_page = self._download_webpage( - self._LOGIN_URL, None, - note='Downloading login page', - errnote='unable to fetch login page', fatal=False) - if login_page is False: - return - - login_form = self._hidden_inputs(login_page) - - def req(url, f_req, note, errnote): - data = login_form.copy() - data.update({ - 'pstMsg': 1, - 'checkConnection': 'youtube', - 'checkedDomains': 'youtube', - 'hl': 'en', - 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]', - 'f.req': json.dumps(f_req), - 'flowName': 'GlifWebSignIn', - 'flowEntry': 'ServiceLogin', - # TODO: reverse actual botguard identifier generation algo - 'bgRequest': '["identifier",""]', - }) - return self._download_json( - url, None, note=note, errnote=errnote, - transform_source=lambda s: re.sub(r'^[^[]*', '', s), - fatal=False, - data=urlencode_postdata(data), headers={ - 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8', - 'Google-Accounts-XSRF': 1, - }) - - lookup_req = [ - username, - None, [], None, 'US', None, None, 2, False, True, - [ - None, None, - [2, 1, None, 1, - 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', - None, [], 4], - 1, [None, None, []], None, None, None, True - ], - username, - ] - - lookup_results = req( - self._LOOKUP_URL, lookup_req, - 'Looking up account info', 'Unable to look up account info') - - if lookup_results is False: - return False - - user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str) - if not user_hash: - warn('Unable to extract user hash') - return False - - challenge_req = [ - user_hash, - None, 1, None, [1, None, None, None, [password, None, True]], - [ - None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4], - 1, [None, None, []], None, None, None, True - ]] - - challenge_results = req( - self._CHALLENGE_URL, challenge_req, - 'Logging in', 'Unable to log in') - - if challenge_results is False: - return - - login_res = try_get(challenge_results, lambda x: x[0][5], list) - if login_res: - login_msg = try_get(login_res, lambda x: x[5], compat_str) - warn( - 'Unable to login: %s' % 'Invalid password' - if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg) - return False - - res = try_get(challenge_results, lambda x: x[0][-1], list) - if not res: - warn('Unable to extract result entry') - return False - - login_challenge = try_get(res, lambda x: x[0][0], list) - if login_challenge: - challenge_str = try_get(login_challenge, lambda x: x[2], compat_str) - if challenge_str == 'TWO_STEP_VERIFICATION': - # SEND_SUCCESS - TFA code has been successfully sent to phone - # QUOTA_EXCEEDED - reached the limit of TFA codes - status = try_get(login_challenge, lambda x: x[5], compat_str) - if status == 'QUOTA_EXCEEDED': - warn('Exceeded the limit of TFA codes, try later') - return False - - tl = try_get(challenge_results, lambda x: x[1][2], compat_str) - if not tl: - warn('Unable to extract TL') - return False - - tfa_code = self._get_tfa_info('2-step verification code') - - if not tfa_code: - warn( - 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>' - '(Note that only TOTP (Google Authenticator App) codes work at this time.)') - return False - - tfa_code = remove_start(tfa_code, 'G-') - - tfa_req = [ - user_hash, None, 2, None, - [ - 9, None, None, None, None, None, None, None, - [None, tfa_code, True, 2] - ]] - - tfa_results = req( - self._TFA_URL.format(tl), tfa_req, - 'Submitting TFA code', 'Unable to submit TFA code') - - if tfa_results is False: - return False - - tfa_res = try_get(tfa_results, lambda x: x[0][5], list) - if tfa_res: - tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str) - warn( - 'Unable to finish TFA: %s' % 'Invalid TFA code' - if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg) - return False - - check_cookie_url = try_get( - tfa_results, lambda x: x[0][-1][2], compat_str) - else: - CHALLENGES = { - 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.", - 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.', - 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.", - } - challenge = CHALLENGES.get( - challenge_str, - '%s returned error %s.' % (self.IE_NAME, challenge_str)) - warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge) - return False - else: - check_cookie_url = try_get(res, lambda x: x[2], compat_str) - - if not check_cookie_url: - warn('Unable to extract CheckCookie URL') - return False - - check_cookie_results = self._download_webpage( - check_cookie_url, None, 'Checking cookie', fatal=False) - - if check_cookie_results is False: - return False - - if 'https://myaccount.google.com/' not in check_cookie_results: - warn('Unable to log in') - return False - - return True - ''' + self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}') def _initialize_consent(self): cookies = self._get_cookies('https://www.youtube.com/') @@ -483,10 +290,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): def _real_initialize(self): self._initialize_consent() - if self._downloader is None: - return - if not self._login(): - return + self._login() _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;' _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;' From b11d210156f083f23e1bce284192314e54e4047a Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Mon, 18 Oct 2021 09:19:25 +0530 Subject: [PATCH 282/641] [EmbedMetadata] Allow overwriting all default metadata with `meta_default` key --- README.md | 2 +- yt_dlp/postprocessor/ffmpeg.py | 21 ++++++++++----------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index dd9cbc7fc1..cbd3f337d2 100644 --- a/README.md +++ b/README.md @@ -1433,7 +1433,7 @@ Note that any field created by this can be used in the [output template](#output This option also has a few special uses: * You can download an additional URL based on the metadata of the currently downloaded video. To do this, set the field `additional_urls` to the URL that you want to download. Eg: `--parse-metadata "description:(?P<additional_urls>https?://www\.vimeo\.com/\d+)` will download the first vimeo video found in the description -* You can use this to change the metadata that is embedded in the media file. To do this, set the value of the corresponding field with a `meta_` prefix. For example, any value you set to `meta_description` field will be added to the `description` field in the file. For example, you can use this to set a different "description" and "synopsis" +* You can use this to change the metadata that is embedded in the media file. To do this, set the value of the corresponding field with a `meta_` prefix. For example, any value you set to `meta_description` field will be added to the `description` field in the file. For example, you can use this to set a different "description" and "synopsis". Any value set to the `meta_` field will overwrite all default values. For reference, these are the fields yt-dlp adds by default to the file metadata: diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index e6aa2940a4..e5595341d1 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -10,7 +10,7 @@ import json from .common import AudioConversionError, PostProcessor -from ..compat import compat_str, compat_numeric_types +from ..compat import compat_str from ..utils import ( dfxp2srt, encodeArgument, @@ -664,15 +664,14 @@ class FFmpegMetadataPP(FFmpegPostProcessor): def _get_metadata_opts(self, info): metadata = {} + meta_prefix = 'meta_' def add(meta_list, info_list=None): - if not meta_list: - return - for info_f in variadic(info_list or meta_list): - if isinstance(info.get(info_f), (compat_str, compat_numeric_types)): - for meta_f in variadic(meta_list): - metadata[meta_f] = info[info_f] - break + value = next(( + str(info[key]) for key in [meta_prefix] + list(variadic(info_list or meta_list)) + if info.get(key) is not None), None) + if value not in ('', None): + metadata.update({meta_f: value for meta_f in variadic(meta_list)}) # See [1-4] for some info on media metadata/metadata supported # by ffmpeg. @@ -695,9 +694,9 @@ class FFmpegMetadataPP(FFmpegPostProcessor): add('episode_id', ('episode', 'episode_id')) add('episode_sort', 'episode_number') - prefix = 'meta_' - for key in filter(lambda k: k.startswith(prefix), info.keys()): - add(key[len(prefix):], key) + for key, value in info.items(): + if value is not None and key != meta_prefix and key.startswith(meta_prefix): + metadata[key[len(meta_prefix):]] = value for name, value in metadata.items(): yield ('-metadata', f'{name}={value}') From e820fbaa6ff41625b6f4d8453253883b86bf9ca4 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Mon, 18 Oct 2021 15:23:42 +0530 Subject: [PATCH 283/641] Do not verify thumbnail URLs by default Partially reverts cca80fe6110653582e8c8a8d06490b4028ffd755 and 0ba692acc8feffd46b6e1085fb4a2849b685945c Unless `--check-formats` is specified, this causes yt-dlp to return incorrect thumbnail urls. See https://github.com/yt-dlp/yt-dlp/issues/340#issuecomment-877909966, #402 But the overhead in general use is not worth it Closes #694, #725 --- yt_dlp/YoutubeDL.py | 17 +++-------------- yt_dlp/extractor/common.py | 1 - yt_dlp/extractor/youtube.py | 7 ++----- 3 files changed, 5 insertions(+), 20 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 4a7712cb63..cf97ff21cf 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2095,25 +2095,14 @@ class YoutubeDL(object): t.get('url'))) def thumbnail_tester(): - if self.params.get('check_formats'): - test_all = True - to_screen = lambda msg: self.to_screen(f'[info] {msg}') - else: - test_all = False - to_screen = self.write_debug - def test_thumbnail(t): - if not test_all and not t.get('_test_url'): - return True - to_screen('Testing thumbnail %s' % t['id']) + self.to_screen(f'[info] Testing thumbnail {t["id"]}') try: self.urlopen(HEADRequest(t['url'])) except network_exceptions as err: - to_screen('Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % ( - t['id'], t['url'], error_to_compat_str(err))) + self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...') return False return True - return test_thumbnail for i, t in enumerate(thumbnails): @@ -2123,7 +2112,7 @@ class YoutubeDL(object): t['resolution'] = '%dx%d' % (t['width'], t['height']) t['url'] = sanitize_url(t['url']) - if self.params.get('check_formats') is not False: + if self.params.get('check_formats'): info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse() else: info_dict['thumbnails'] = thumbnails diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index dbe7dfcbf1..0a14f7c0d3 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -233,7 +233,6 @@ class InfoExtractor(object): * "resolution" (optional, string "{width}x{height}", deprecated) * "filesize" (optional, int) - * "_test_url" (optional, bool) - If true, test the URL thumbnail: Full URL to a video thumbnail image. description: Full video description. uploader: Full name of the video uploader. diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index b71cd4292f..b9566a0a7e 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2699,21 +2699,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # The best resolution thumbnails sometimes does not appear in the webpage # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029> - hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3'] - # TODO: Test them also? - For some videos, even these don't exist - guaranteed_thumbnail_names = [ + thumbnail_names = [ + 'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3', 'hqdefault', 'hq1', 'hq2', 'hq3', '0', 'mqdefault', 'mq1', 'mq2', 'mq3', 'default', '1', '2', '3' ] - thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names n_thumbnail_names = len(thumbnail_names) thumbnails.extend({ 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format( video_id=video_id, name=name, ext=ext, webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''), - '_test_url': name in hq_thumbnail_names, } for name in thumbnail_names for ext in ('webp', 'jpg')) for thumb in thumbnails: i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names) From 2d9ec70423121dbf280475769690f19b0034ee8b Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Mon, 18 Oct 2021 16:03:05 +0530 Subject: [PATCH 284/641] [ModifyChapters] Allow removing sections by timestamp Eg: --remove-chapters "*10:15-15:00". The `*` prefix is used so as to avoid any conflicts with other valid regex --- README.md | 6 +++++- yt_dlp/__init__.py | 10 +++++++++- yt_dlp/options.py | 6 +++++- yt_dlp/postprocessor/modify_chapters.py | 13 +++++++++++-- 4 files changed, 30 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index cbd3f337d2..141be3315d 100644 --- a/README.md +++ b/README.md @@ -847,7 +847,11 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t --no-split-chapters Do not split video based on chapters (default) --remove-chapters REGEX Remove chapters whose title matches the - given regular expression. This option can + given regular expression. Time ranges + prefixed by a "*" can also be used in place + of chapters to remove the specified range. + Eg: --remove-chapters "*10:15-15:00" + --remove-chapters "intro". This option can be used multiple times --no-remove-chapters Do not remove any chapters from the file (default) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 4b82efea7f..b952cc0625 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -31,6 +31,7 @@ from .utils import ( expand_path, match_filter_func, MaxDownloadsReached, + parse_duration, preferredencoding, read_batch_urls, RejectedVideoReached, @@ -490,8 +491,14 @@ def _real_main(argv=None): if opts.allsubtitles and not opts.writeautomaticsub: opts.writesubtitles = True # ModifyChapters must run before FFmpegMetadataPP - remove_chapters_patterns = [] + remove_chapters_patterns, remove_ranges = [], [] for regex in opts.remove_chapters: + if regex.startswith('*'): + dur = list(map(parse_duration, regex[1:].split('-'))) + if len(dur) == 2 and all(t is not None for t in dur): + remove_ranges.append(tuple(dur)) + continue + parser.error(f'invalid --remove-chapters time range {regex!r}. Must be of the form ?start-end') try: remove_chapters_patterns.append(re.compile(regex)) except re.error as err: @@ -501,6 +508,7 @@ def _real_main(argv=None): 'key': 'ModifyChapters', 'remove_chapters_patterns': remove_chapters_patterns, 'remove_sponsor_segments': opts.sponsorblock_remove, + 'remove_ranges': remove_ranges, 'sponsorblock_chapter_title': opts.sponsorblock_chapter_title, 'force_keyframes': opts.force_keyframes_at_cuts }) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index d2dc7687b8..1c99e7e7c3 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1378,7 +1378,11 @@ def parseOpts(overrideArguments=None): postproc.add_option( '--remove-chapters', metavar='REGEX', dest='remove_chapters', action='append', - help='Remove chapters whose title matches the given regular expression. This option can be used multiple times') + help=( + 'Remove chapters whose title matches the given regular expression. ' + 'Time ranges prefixed by a "*" can also be used in place of chapters to remove the specified range. ' + 'Eg: --remove-chapters "*10:15-15:00" --remove-chapters "intro". ' + 'This option can be used multiple times')) postproc.add_option( '--no-remove-chapters', dest='remove_chapters', action='store_const', const=None, help='Do not remove any chapters from the file (default)') diff --git a/yt_dlp/postprocessor/modify_chapters.py b/yt_dlp/postprocessor/modify_chapters.py index 72a705fc55..a0818c41ba 100644 --- a/yt_dlp/postprocessor/modify_chapters.py +++ b/yt_dlp/postprocessor/modify_chapters.py @@ -20,11 +20,12 @@ DEFAULT_SPONSORBLOCK_CHAPTER_TITLE = '[SponsorBlock]: %(category_names)l' class ModifyChaptersPP(FFmpegPostProcessor): - def __init__(self, downloader, remove_chapters_patterns=None, remove_sponsor_segments=None, - sponsorblock_chapter_title=DEFAULT_SPONSORBLOCK_CHAPTER_TITLE, force_keyframes=False): + def __init__(self, downloader, remove_chapters_patterns=None, remove_sponsor_segments=None, remove_ranges=None, + *, sponsorblock_chapter_title=DEFAULT_SPONSORBLOCK_CHAPTER_TITLE, force_keyframes=False): FFmpegPostProcessor.__init__(self, downloader) self._remove_chapters_patterns = set(remove_chapters_patterns or []) self._remove_sponsor_segments = set(remove_sponsor_segments or []) + self._ranges_to_remove = set(remove_ranges or []) self._sponsorblock_chapter_title = sponsorblock_chapter_title self._force_keyframes = force_keyframes @@ -97,6 +98,14 @@ class ModifyChaptersPP(FFmpegPostProcessor): if warn_no_chapter_to_remove: self.to_screen('There are no matching SponsorBlock chapters') + sponsor_chapters.extend({ + 'start_time': start, + 'end_time': end, + 'category': 'manually_removed', + '_categories': [('manually_removed', start, end)], + 'remove': True, + } for start, end in self._ranges_to_remove) + return chapters, sponsor_chapters def _get_supported_subs(self, info): From 17bddf3e95873230d85723e306641b2b3fcb87a9 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Mon, 18 Oct 2021 16:10:27 +0530 Subject: [PATCH 285/641] Reduce default `--socket-timeout` --- test/parameters.json | 1 - yt_dlp/YoutubeDL.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/test/parameters.json b/test/parameters.json index 9ca7d2ca9a..8544f1ab29 100644 --- a/test/parameters.json +++ b/test/parameters.json @@ -44,6 +44,5 @@ "writesubtitles": false, "allsubtitles": false, "listsubtitles": false, - "socket_timeout": 20, "fixup": "never" } diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index cf97ff21cf..50cb11d494 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -3347,7 +3347,7 @@ class YoutubeDL(object): def _setup_opener(self): timeout_val = self.params.get('socket_timeout') - self._socket_timeout = 600 if timeout_val is None else float(timeout_val) + self._socket_timeout = 20 if timeout_val is None else float(timeout_val) opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser') opts_cookiefile = self.params.get('cookiefile') From 176f1866cb437dd59cf8f600638cfd7ba2a8525e Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Mon, 18 Oct 2021 18:34:21 +0530 Subject: [PATCH 286/641] Add HDR information to formats --- README.md | 6 ++++-- test/test_utils.py | 22 ++++++++++++++++++++++ yt_dlp/YoutubeDL.py | 5 ++++- yt_dlp/extractor/common.py | 6 +++++- yt_dlp/extractor/youtube.py | 2 +- yt_dlp/utils.py | 12 +++++++++++- 6 files changed, 47 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 141be3315d..ce76474a2c 100644 --- a/README.md +++ b/README.md @@ -1060,6 +1060,7 @@ The available fields are: - `asr` (numeric): Audio sampling rate in Hertz - `vbr` (numeric): Average video bitrate in KBit/s - `fps` (numeric): Frame rate + - `dynamic_range` (string): The dynamic range of the video - `vcodec` (string): Name of the video codec in use - `container` (string): Name of the container format - `filesize` (numeric): The number of bytes, if known in advance @@ -1283,6 +1284,7 @@ The available fields are: - `width`: Width of video - `res`: Video resolution, calculated as the smallest dimension. - `fps`: Framerate of video + - `hdr`: The dynamic range of the video (`DV` > `HDR12` > `HDR10+` > `HDR10` > `SDR`) - `tbr`: Total average bitrate in KBit/s - `vbr`: Average video bitrate in KBit/s - `abr`: Average audio bitrate in KBit/s @@ -1293,9 +1295,9 @@ The available fields are: All fields, unless specified otherwise, are sorted in descending order. To reverse this, prefix the field with a `+`. Eg: `+res` prefers format with the smallest resolution. Additionally, you can suffix a preferred value for the fields, separated by a `:`. Eg: `res:720` prefers larger videos, but no larger than 720p and the smallest video if there are no videos less than 720p. For `codec` and `ext`, you can provide two preferred values, the first for video and the second for audio. Eg: `+codec:avc:m4a` (equivalent to `+vcodec:avc,+acodec:m4a`) sets the video codec preference to `h264` > `h265` > `vp9` > `vp9.2` > `av01` > `vp8` > `h263` > `theora` and audio codec preference to `mp4a` > `aac` > `vorbis` > `opus` > `mp3` > `ac3` > `dts`. You can also make the sorting prefer the nearest values to the provided by using `~` as the delimiter. Eg: `filesize~1G` prefers the format with filesize closest to 1 GiB. -The fields `hasvid` and `ie_pref` are always given highest priority in sorting, irrespective of the user-defined order. This behaviour can be changed by using `--format-sort-force`. Apart from these, the default order used is: `lang,quality,res,fps,codec:vp9.2,size,br,asr,proto,ext,hasaud,source,id`. The extractors may override this default order, but they cannot override the user-provided order. +The fields `hasvid` and `ie_pref` are always given highest priority in sorting, irrespective of the user-defined order. This behaviour can be changed by using `--format-sort-force`. Apart from these, the default order used is: `lang,quality,res,fps,hdr:12,codec:vp9.2,size,br,asr,proto,ext,hasaud,source,id`. The extractors may override this default order, but they cannot override the user-provided order. -Note that the default has `codec:vp9.2`; i.e. `av1` is not prefered +Note that the default has `codec:vp9.2`; i.e. `av1` is not prefered. Similarly, the default for hdr is `hdr:12`; i.e. dolby vision is not prefered. These choices are made since DV and AV1 formats are not yet fully compatible with most devices. This may be changed in the future as more devices become capable of smoothly playing back these formats. If your format selector is `worst`, the last item is selected after sorting. This means it will select the format that is worst in all respects. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-f best -S +size,+br,+res,+fps`. diff --git a/test/test_utils.py b/test/test_utils.py index 7fc431505f..9a5e3f0f0d 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -848,30 +848,52 @@ class TestUtil(unittest.TestCase): self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), { 'vcodec': 'avc1.77.30', 'acodec': 'mp4a.40.2', + 'dynamic_range': None, }) self.assertEqual(parse_codecs('mp4a.40.2'), { 'vcodec': 'none', 'acodec': 'mp4a.40.2', + 'dynamic_range': None, }) self.assertEqual(parse_codecs('mp4a.40.5,avc1.42001e'), { 'vcodec': 'avc1.42001e', 'acodec': 'mp4a.40.5', + 'dynamic_range': None, }) self.assertEqual(parse_codecs('avc3.640028'), { 'vcodec': 'avc3.640028', 'acodec': 'none', + 'dynamic_range': None, }) self.assertEqual(parse_codecs(', h264,,newcodec,aac'), { 'vcodec': 'h264', 'acodec': 'aac', + 'dynamic_range': None, }) self.assertEqual(parse_codecs('av01.0.05M.08'), { 'vcodec': 'av01.0.05M.08', 'acodec': 'none', + 'dynamic_range': None, + }) + self.assertEqual(parse_codecs('vp9.2'), { + 'vcodec': 'vp9.2', + 'acodec': 'none', + 'dynamic_range': 'HDR10', + }) + self.assertEqual(parse_codecs('av01.0.12M.10.0.110.09.16.09.0'), { + 'vcodec': 'av01.0.12M.10', + 'acodec': 'none', + 'dynamic_range': 'HDR10', + }) + self.assertEqual(parse_codecs('dvhe'), { + 'vcodec': 'dvhe', + 'acodec': 'none', + 'dynamic_range': 'DV', }) self.assertEqual(parse_codecs('theora, vorbis'), { 'vcodec': 'theora', 'acodec': 'vorbis', + 'dynamic_range': None, }) self.assertEqual(parse_codecs('unknownvcodec, unknownacodec'), { 'vcodec': 'unknownvcodec', diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 50cb11d494..5d8e0bded0 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2291,6 +2291,8 @@ class YoutubeDL(object): format['protocol'] = determine_protocol(format) if format.get('resolution') is None: format['resolution'] = self.format_resolution(format, default=None) + if format.get('dynamic_range') is None and format.get('vcodec') != 'none': + format['dynamic_range'] = 'SDR' # Add HTTP headers, so that external programs can use them from the # json output full_format_info = info_dict.copy() @@ -3176,6 +3178,7 @@ class YoutubeDL(object): format_field(f, 'ext'), self.format_resolution(f), format_field(f, 'fps', '%d'), + format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''), '|', format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes), format_field(f, 'tbr', '%4dk'), @@ -3193,7 +3196,7 @@ class YoutubeDL(object): format_field(f, 'container', ignore=(None, f.get('ext'))), ))), ] for f in formats if f.get('preference') is None or f['preference'] >= -1000] - header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO', + header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', 'HDR', '|', ' FILESIZE', ' TBR', 'PROTO', '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO'] else: table = [ diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 0a14f7c0d3..e00d8c42b5 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -147,6 +147,8 @@ class InfoExtractor(object): * width Width of the video, if known * height Height of the video, if known * resolution Textual description of width and height + * dynamic_range The dynamic range of the video. One of: + "SDR" (None), "HDR10", "HDR10+, "HDR12", "HLG, "DV" * tbr Average bitrate of audio and video in KBit/s * abr Average audio bitrate in KBit/s * acodec Name of the audio codec in use @@ -1507,7 +1509,7 @@ class InfoExtractor(object): regex = r' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<separator>[~:])(?P<limit>.*?))?)? *$' default = ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality', - 'res', 'fps', 'codec:vp9.2', 'size', 'br', 'asr', + 'res', 'fps', 'hdr:12', 'codec:vp9.2', 'size', 'br', 'asr', 'proto', 'ext', 'hasaud', 'source', 'format_id') # These must not be aliases ytdl_default = ('hasaud', 'lang', 'quality', 'tbr', 'filesize', 'vbr', 'height', 'width', 'proto', 'vext', 'abr', 'aext', @@ -1518,6 +1520,8 @@ class InfoExtractor(object): 'order': ['av0?1', 'vp0?9.2', 'vp0?9', '[hx]265|he?vc?', '[hx]264|avc', 'vp0?8', 'mp4v|h263', 'theora', '', None, 'none']}, 'acodec': {'type': 'ordered', 'regex': True, 'order': ['opus', 'vorbis', 'aac', 'mp?4a?', 'mp3', 'e?a?c-?3', 'dts', '', None, 'none']}, + 'hdr': {'type': 'ordered', 'regex': True, 'field': 'dynamic_range', + 'order': ['dv', '(hdr)?12', r'(hdr)?10\+', '(hdr)?10', 'hlg', '', 'sdr', None]}, 'proto': {'type': 'ordered', 'regex': True, 'field': 'protocol', 'order': ['(ht|f)tps', '(ht|f)tp$', 'm3u8.+', '.*dash', 'ws|websocket', '', 'mms|rtsp', 'none', 'f4']}, 'vext': {'type': 'ordered', 'field': 'video_ext', diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index b9566a0a7e..aa58a22bff 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2653,7 +2653,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Source is given priority since formats that throttle are given lower source_preference # When throttling issue is fully fixed, remove this - self._sort_formats(formats, ('quality', 'res', 'fps', 'source', 'codec:vp9.2', 'lang')) + self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang')) keywords = get_first(video_details, 'keywords', expected_type=list) or [] if not keywords and webpage: diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 3ac2fbc4be..28431ac733 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4618,12 +4618,21 @@ def parse_codecs(codecs_str): return {} split_codecs = list(filter(None, map( str.strip, codecs_str.strip().strip(',').split(',')))) - vcodec, acodec = None, None + vcodec, acodec, hdr = None, None, None for full_codec in split_codecs: codec = full_codec.split('.')[0] if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora', 'dvh1', 'dvhe'): if not vcodec: vcodec = full_codec + if codec in ('dvh1', 'dvhe'): + hdr = 'DV' + elif codec == 'vp9' and vcodec.startswith('vp9.2'): + hdr = 'HDR10' + elif codec == 'av01': + parts = full_codec.split('.') + if len(parts) > 3 and parts[3] == '10': + hdr = 'HDR10' + vcodec = '.'.join(parts[:4]) elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'): if not acodec: acodec = full_codec @@ -4639,6 +4648,7 @@ def parse_codecs(codecs_str): return { 'vcodec': vcodec or 'none', 'acodec': acodec or 'none', + 'dynamic_range': hdr, } return {} From 1e520b5535fbd870f46981fc0de228dc781bc361 Mon Sep 17 00:00:00 2001 From: Zirro <code@zirro.se> Date: Mon, 18 Oct 2021 21:11:07 +0200 Subject: [PATCH 287/641] Add option `--no-batch-file` (#1335) Authored by: Zirro --- README.md | 1 + yt_dlp/options.py | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/README.md b/README.md index ce76474a2c..d410d04d18 100644 --- a/README.md +++ b/README.md @@ -465,6 +465,7 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t stdin), one URL per line. Lines starting with '#', ';' or ']' are considered as comments and ignored + --no-batch-file Do not read URLs from batch file (default) -P, --paths [TYPES:]PATH The paths where the files should be downloaded. Specify the type of file and the path separated by a colon ":". All the diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 1c99e7e7c3..0638e86429 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -971,6 +971,10 @@ def parseOpts(overrideArguments=None): dest='batchfile', metavar='FILE', help="File containing URLs to download ('-' for stdin), one URL per line. " "Lines starting with '#', ';' or ']' are considered as comments and ignored") + filesystem.add_option( + '--no-batch-file', + dest='batchfile', action='store_const', const=None, + help='Do not read URLs from batch file (default)') filesystem.add_option( '-P', '--paths', metavar='[TYPES:]PATH', dest='paths', default={}, type='str', From e619d8a752d00aa9394e41b9b0c3c0d8f348eea6 Mon Sep 17 00:00:00 2001 From: Nil Admirari <50202386+nihil-admirari@users.noreply.github.com> Date: Tue, 19 Oct 2021 08:51:05 +0000 Subject: [PATCH 288/641] [ModifyChapters] Do not mutate original chapters (#1322) Closes #1295 Authored by: nihil-admirari --- yt_dlp/postprocessor/modify_chapters.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/yt_dlp/postprocessor/modify_chapters.py b/yt_dlp/postprocessor/modify_chapters.py index a0818c41ba..dca8762003 100644 --- a/yt_dlp/postprocessor/modify_chapters.py +++ b/yt_dlp/postprocessor/modify_chapters.py @@ -31,8 +31,10 @@ class ModifyChaptersPP(FFmpegPostProcessor): @PostProcessor._restrict_to(images=False) def run(self, info): + # Chapters must be preserved intact when downloading multiple formats of the same video. chapters, sponsor_chapters = self._mark_chapters_to_remove( - info.get('chapters') or [], info.get('sponsorblock_chapters') or []) + copy.deepcopy(info.get('chapters')) or [], + copy.deepcopy(info.get('sponsorblock_chapters')) or []) if not chapters and not sponsor_chapters: return [], info @@ -126,7 +128,7 @@ class ModifyChaptersPP(FFmpegPostProcessor): cuts = [] def append_cut(c): - assert 'remove' in c + assert 'remove' in c, 'Not a cut is appended to cuts' last_to_cut = cuts[-1] if cuts else None if last_to_cut and last_to_cut['end_time'] >= c['start_time']: last_to_cut['end_time'] = max(last_to_cut['end_time'], c['end_time']) @@ -154,7 +156,7 @@ class ModifyChaptersPP(FFmpegPostProcessor): new_chapters = [] def append_chapter(c): - assert 'remove' not in c + assert 'remove' not in c, 'Cut is appended to chapters' length = c['end_time'] - c['start_time'] - excess_duration(c) # Chapter is completely covered by cuts or sponsors. if length <= 0: @@ -237,7 +239,7 @@ class ModifyChaptersPP(FFmpegPostProcessor): heapq.heappush(chapters, (c['start_time'], i, c)) # (normal, sponsor) and (sponsor, sponsor) else: - assert '_categories' in c + assert '_categories' in c, 'Normal chapters overlap' cur_chapter['_was_cut'] = True c['_was_cut'] = True # Push the part after the sponsor to PQ. From 9fab498fbf38dca24ef215d4789b13dd24d7952d Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 19 Oct 2021 18:52:17 +0530 Subject: [PATCH 289/641] [http] Retry on socket timeout Closes #1222 --- yt_dlp/downloader/http.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index 2e95bb9d10..6290884a8e 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -191,11 +191,13 @@ class HttpFD(FileDownloader): # Unexpected HTTP error raise raise RetryDownload(err) - except socket.error as err: - if err.errno != errno.ECONNRESET: - # Connection reset is no problem, just retry - raise + except socket.timeout as err: raise RetryDownload(err) + except socket.error as err: + if err.errno in (errno.ECONNRESET, errno.ETIMEDOUT): + # Connection reset is no problem, just retry + raise RetryDownload(err) + raise def download(): nonlocal throttle_start From aa7785f860be0bae7135ee32fe0ef4f0ab00bbc1 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 19 Oct 2021 22:58:14 +0530 Subject: [PATCH 290/641] [utils] Standardize timestamp formatting code Closes #1285 --- test/test_utils.py | 8 ++++---- yt_dlp/downloader/common.py | 13 ++++++------- yt_dlp/extractor/adn.py | 9 +++------ yt_dlp/utils.py | 30 +++++++++++++++++++++++------- yt_dlp/webvtt.py | 8 ++------ 5 files changed, 38 insertions(+), 30 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 9a5e3f0f0d..d84c3d3eef 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1390,21 +1390,21 @@ The first line </body> </tt>'''.encode('utf-8') srt_data = '''1 -00:00:02,080 --> 00:00:05,839 +00:00:02,080 --> 00:00:05,840 <font color="white" face="sansSerif" size="16">default style<font color="red">custom style</font></font> 2 -00:00:02,080 --> 00:00:05,839 +00:00:02,080 --> 00:00:05,840 <b><font color="cyan" face="sansSerif" size="16"><font color="lime">part 1 </font>part 2</font></b> 3 -00:00:05,839 --> 00:00:09,560 +00:00:05,840 --> 00:00:09,560 <u><font color="lime">line 3 part 3</font></u> 4 -00:00:09,560 --> 00:00:12,359 +00:00:09,560 --> 00:00:12,360 <i><u><font color="yellow"><font color="lime">inner </font>style</font></u></i> diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index 9081794dbc..6cfbb6657a 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -12,6 +12,7 @@ from ..utils import ( format_bytes, shell_quote, timeconvert, + timetuple_from_msec, ) from ..minicurses import ( MultilineLogger, @@ -75,14 +76,12 @@ class FileDownloader(object): @staticmethod def format_seconds(seconds): - (mins, secs) = divmod(seconds, 60) - (hours, mins) = divmod(mins, 60) - if hours > 99: + time = timetuple_from_msec(seconds * 1000) + if time.hours > 99: return '--:--:--' - if hours == 0: - return '%02d:%02d' % (mins, secs) - else: - return '%02d:%02d:%02d' % (hours, mins, secs) + if not time.hours: + return '%02d:%02d' % time[1:-1] + return '%02d:%02d:%02d' % time[:-1] @staticmethod def calc_percent(byte_counter, data_len): diff --git a/yt_dlp/extractor/adn.py b/yt_dlp/extractor/adn.py index a55ebbcbd6..5a1283baa5 100644 --- a/yt_dlp/extractor/adn.py +++ b/yt_dlp/extractor/adn.py @@ -15,6 +15,7 @@ from ..compat import ( compat_ord, ) from ..utils import ( + ass_subtitles_timecode, bytes_to_intlist, bytes_to_long, ExtractorError, @@ -68,10 +69,6 @@ class ADNIE(InfoExtractor): 'end': 4, } - @staticmethod - def _ass_subtitles_timecode(seconds): - return '%01d:%02d:%02d.%02d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 100) - def _get_subtitles(self, sub_url, video_id): if not sub_url: return None @@ -117,8 +114,8 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' continue alignment = self._POS_ALIGN_MAP.get(position_align, 2) + self._LINE_ALIGN_MAP.get(line_align, 0) ssa += os.linesep + 'Dialogue: Marked=0,%s,%s,Default,,0,0,0,,%s%s' % ( - self._ass_subtitles_timecode(start), - self._ass_subtitles_timecode(end), + ass_subtitles_timecode(start), + ass_subtitles_timecode(end), '{\\a%d}' % alignment if alignment != 2 else '', text.replace('\n', '\\N').replace('<i>', '{\\i1}').replace('</i>', '{\\i0}')) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 28431ac733..b88257bc27 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -2342,14 +2342,25 @@ def decodeOption(optval): return optval +_timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds')) + + +def timetuple_from_msec(msec): + secs, msec = divmod(msec, 1000) + mins, secs = divmod(secs, 60) + hrs, mins = divmod(mins, 60) + return _timetuple(hrs, mins, secs, msec) + + def formatSeconds(secs, delim=':', msec=False): - if secs > 3600: - ret = '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60) - elif secs > 60: - ret = '%d%s%02d' % (secs // 60, delim, secs % 60) + time = timetuple_from_msec(secs * 1000) + if time.hours: + ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds) + elif time.minutes: + ret = '%d%s%02d' % (time.minutes, delim, time.seconds) else: - ret = '%d' % secs - return '%s.%03d' % (ret, secs % 1) if msec else ret + ret = '%d' % time.seconds + return '%s.%03d' % (ret, time.milliseconds) if msec else ret def _ssl_load_windows_store_certs(ssl_context, storename): @@ -4855,7 +4866,12 @@ def parse_dfxp_time_expr(time_expr): def srt_subtitles_timecode(seconds): - return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000) + return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000) + + +def ass_subtitles_timecode(seconds): + time = timetuple_from_msec(seconds * 1000) + return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10) def dfxp2srt(dfxp_data): diff --git a/yt_dlp/webvtt.py b/yt_dlp/webvtt.py index cd936e7e5f..962aa57ad6 100644 --- a/yt_dlp/webvtt.py +++ b/yt_dlp/webvtt.py @@ -13,7 +13,7 @@ in RFC 8216 §3.5 <https://tools.ietf.org/html/rfc8216#section-3.5>. import re import io -from .utils import int_or_none +from .utils import int_or_none, timetuple_from_msec from .compat import ( compat_str as str, compat_Pattern, @@ -124,11 +124,7 @@ def _format_ts(ts): Convert an MPEG PES timestamp into a WebVTT timestamp. This will lose sub-millisecond precision. """ - msec = int((ts + 45) // 90) - secs, msec = divmod(msec, 1000) - mins, secs = divmod(secs, 60) - hrs, mins = divmod(mins, 60) - return '%02u:%02u:%02u.%03u' % (hrs, mins, secs, msec) + return '%02u:%02u:%02u.%03u' % timetuple_from_msec(int((ts + 45) // 90)) class Block(object): From 2cda6b401d4b9af36a2db71c71e1872ab7e4a6b6 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 19 Oct 2021 22:13:45 +0530 Subject: [PATCH 291/641] Revert "[fragments] Pad fragments before decrypting (#1298)" This reverts commit 373475f03553a7fff2d20df878755bfad2fab8e5. --- yt_dlp/downloader/fragment.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index d0eaede7ee..6a490131b1 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -355,8 +355,7 @@ class FragmentFD(FileDownloader): # not what it decrypts to. if self.params.get('test', False): return frag_content - padding_len = 16 - (len(frag_content) % 16) - decrypted_data = aes_cbc_decrypt_bytes(frag_content + bytes([padding_len] * padding_len), decrypt_info['KEY'], iv) + decrypted_data = aes_cbc_decrypt_bytes(frag_content, decrypt_info['KEY'], iv) return decrypted_data[:-decrypted_data[-1]] return decrypt_fragment From b4b855ebc7fac536a85f087f6921df69dec4e470 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 19 Oct 2021 21:51:33 +0530 Subject: [PATCH 292/641] [fragment] Print error message when skipping fragment --- yt_dlp/downloader/external.py | 6 +++--- yt_dlp/downloader/fragment.py | 7 ++++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index 40b9dcfe30..e30efb0576 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -152,11 +152,11 @@ class ExternalFD(FragmentFD): fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index) try: src, _ = sanitize_open(fragment_filename, 'rb') - except IOError: + except IOError as err: if skip_unavailable_fragments and frag_index > 1: - self.to_screen('[%s] Skipping fragment %d ...' % (self.get_basename(), frag_index)) + self.report_skip_fragment(frag_index, err) continue - self.report_error('Unable to open fragment %d' % frag_index) + self.report_error(f'Unable to open fragment {frag_index}; {err}') return -1 dest.write(decrypt_fragment(fragment, src.read())) src.close() diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index 6a490131b1..c345f3148b 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -72,8 +72,9 @@ class FragmentFD(FileDownloader): '\r[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s) ...' % (error_to_compat_str(err), frag_index, count, self.format_retries(retries))) - def report_skip_fragment(self, frag_index): - self.to_screen('[download] Skipping fragment %d ...' % frag_index) + def report_skip_fragment(self, frag_index, err=None): + err = f' {err};' if err else '' + self.to_screen(f'[download]{err} Skipping fragment {frag_index:d} ...') def _prepare_url(self, info_dict, url): headers = info_dict.get('http_headers') @@ -443,7 +444,7 @@ class FragmentFD(FileDownloader): def append_fragment(frag_content, frag_index, ctx): if not frag_content: if not is_fatal(frag_index - 1): - self.report_skip_fragment(frag_index) + self.report_skip_fragment(frag_index, 'fragment not found') return True else: ctx['dest_stream'].close() From d3c93ec2b7f5bcb872b0afb169efaa2f1abdf6e2 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Wed, 20 Oct 2021 21:49:40 +0530 Subject: [PATCH 293/641] Don't create console for subprocesses on Windows (#1261) Closes #1251 --- yt_dlp/YoutubeDL.py | 13 +++++------- yt_dlp/cookies.py | 16 +++++++-------- yt_dlp/downloader/external.py | 20 +++++++++--------- yt_dlp/downloader/rtmp.py | 3 ++- yt_dlp/extractor/openload.py | 11 +++++----- yt_dlp/postprocessor/embedthumbnail.py | 6 +++--- yt_dlp/postprocessor/ffmpeg.py | 14 ++++++------- yt_dlp/postprocessor/sponskrub.py | 6 +++--- yt_dlp/update.py | 4 ++-- yt_dlp/utils.py | 28 ++++++++++++++++++-------- 10 files changed, 63 insertions(+), 58 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 5d8e0bded0..79f0b274d2 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -87,10 +87,10 @@ from .utils import ( parse_filesize, PerRequestProxyHandler, platform_name, + Popen, PostProcessingError, preferredencoding, prepend_extension, - process_communicate_or_kill, register_socks_protocols, RejectedVideoReached, render_table, @@ -578,12 +578,9 @@ class YoutubeDL(object): stdout=slave, stderr=self._err_file) try: - self._output_process = subprocess.Popen( - ['bidiv'] + width_args, **sp_kwargs - ) + self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs) except OSError: - self._output_process = subprocess.Popen( - ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs) + self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs) self._output_channel = os.fdopen(master, 'rb') except OSError as ose: if ose.errno == errno.ENOENT: @@ -3280,11 +3277,11 @@ class YoutubeDL(object): if self.params.get('compat_opts'): write_debug('Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts'))) try: - sp = subprocess.Popen( + sp = Popen( ['git', 'rev-parse', '--short', 'HEAD'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=os.path.dirname(os.path.abspath(__file__))) - out, err = process_communicate_or_kill(sp) + out, err = sp.communicate_or_kill() out = out.decode().strip() if re.match('[0-9a-f]+', out): write_debug('Git HEAD: %s\n' % out) diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index 049ec9fb1f..5f7fdf5843 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -17,7 +17,7 @@ from .compat import ( from .utils import ( bug_reports_message, expand_path, - process_communicate_or_kill, + Popen, YoutubeDLCookieJar, ) @@ -599,14 +599,14 @@ def _get_mac_keyring_password(browser_keyring_name, logger): return password.encode('utf-8') else: logger.debug('using find-generic-password to obtain password') - proc = subprocess.Popen(['security', 'find-generic-password', - '-w', # write password to stdout - '-a', browser_keyring_name, # match 'account' - '-s', '{} Safe Storage'.format(browser_keyring_name)], # match 'service' - stdout=subprocess.PIPE, - stderr=subprocess.DEVNULL) + proc = Popen( + ['security', 'find-generic-password', + '-w', # write password to stdout + '-a', browser_keyring_name, # match 'account' + '-s', '{} Safe Storage'.format(browser_keyring_name)], # match 'service' + stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) try: - stdout, stderr = process_communicate_or_kill(proc) + stdout, stderr = proc.communicate_or_kill() if stdout[-1:] == b'\n': stdout = stdout[:-1] return stdout diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index e30efb0576..ce3370fb77 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -22,7 +22,7 @@ from ..utils import ( handle_youtubedl_headers, check_executable, is_outdated_version, - process_communicate_or_kill, + Popen, sanitize_open, ) @@ -116,9 +116,8 @@ class ExternalFD(FragmentFD): self._debug_cmd(cmd) if 'fragments' not in info_dict: - p = subprocess.Popen( - cmd, stderr=subprocess.PIPE) - _, stderr = process_communicate_or_kill(p) + p = Popen(cmd, stderr=subprocess.PIPE) + _, stderr = p.communicate_or_kill() if p.returncode != 0: self.to_stderr(stderr.decode('utf-8', 'replace')) return p.returncode @@ -128,9 +127,8 @@ class ExternalFD(FragmentFD): count = 0 while count <= fragment_retries: - p = subprocess.Popen( - cmd, stderr=subprocess.PIPE) - _, stderr = process_communicate_or_kill(p) + p = Popen(cmd, stderr=subprocess.PIPE) + _, stderr = p.communicate_or_kill() if p.returncode == 0: break # TODO: Decide whether to retry based on error code @@ -199,8 +197,8 @@ class CurlFD(ExternalFD): self._debug_cmd(cmd) # curl writes the progress to stderr so don't capture it. - p = subprocess.Popen(cmd) - process_communicate_or_kill(p) + p = Popen(cmd) + p.communicate_or_kill() return p.returncode @@ -476,7 +474,7 @@ class FFmpegFD(ExternalFD): args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True)) self._debug_cmd(args) - proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env) + proc = Popen(args, stdin=subprocess.PIPE, env=env) if url in ('-', 'pipe:'): self.on_process_started(proc, proc.stdin) try: @@ -488,7 +486,7 @@ class FFmpegFD(ExternalFD): # streams). Note that Windows is not affected and produces playable # files (see https://github.com/ytdl-org/youtube-dl/issues/8300). if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32' and url not in ('-', 'pipe:'): - process_communicate_or_kill(proc, b'q') + proc.communicate_or_kill(b'q') else: proc.kill() proc.wait() diff --git a/yt_dlp/downloader/rtmp.py b/yt_dlp/downloader/rtmp.py index 6dca64725d..90f1acfd44 100644 --- a/yt_dlp/downloader/rtmp.py +++ b/yt_dlp/downloader/rtmp.py @@ -12,6 +12,7 @@ from ..utils import ( encodeFilename, encodeArgument, get_exe_version, + Popen, ) @@ -26,7 +27,7 @@ class RtmpFD(FileDownloader): start = time.time() resume_percent = None resume_downloaded_data_len = None - proc = subprocess.Popen(args, stderr=subprocess.PIPE) + proc = Popen(args, stderr=subprocess.PIPE) cursor_in_new_line = True proc_stderr_closed = False try: diff --git a/yt_dlp/extractor/openload.py b/yt_dlp/extractor/openload.py index dfdd0e526e..6ec54509b6 100644 --- a/yt_dlp/extractor/openload.py +++ b/yt_dlp/extractor/openload.py @@ -17,7 +17,7 @@ from ..utils import ( get_exe_version, is_outdated_version, std_headers, - process_communicate_or_kill, + Popen, ) @@ -223,11 +223,10 @@ class PhantomJSwrapper(object): else: self.extractor.to_screen('%s: %s' % (video_id, note2)) - p = subprocess.Popen([ - self.exe, '--ssl-protocol=any', - self._TMP_FILES['script'].name - ], stdout=subprocess.PIPE, stderr=subprocess.PIPE) - out, err = process_communicate_or_kill(p) + p = Popen( + [self.exe, '--ssl-protocol=any', self._TMP_FILES['script'].name], + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + out, err = p.communicate_or_kill() if p.returncode != 0: raise ExtractorError( 'Executing JS failed\n:' + encodeArgument(err)) diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py index 3139a63388..918d3e7887 100644 --- a/yt_dlp/postprocessor/embedthumbnail.py +++ b/yt_dlp/postprocessor/embedthumbnail.py @@ -26,9 +26,9 @@ from ..utils import ( encodeArgument, encodeFilename, error_to_compat_str, + Popen, PostProcessingError, prepend_extension, - process_communicate_or_kill, shell_quote, ) @@ -183,8 +183,8 @@ class EmbedThumbnailPP(FFmpegPostProcessor): self._report_run('atomicparsley', filename) self.write_debug('AtomicParsley command line: %s' % shell_quote(cmd)) - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - stdout, stderr = process_communicate_or_kill(p) + p = Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout, stderr = p.communicate_or_kill() if p.returncode != 0: msg = stderr.decode('utf-8', 'replace').strip() raise EmbedThumbnailPPError(msg) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index e5595341d1..4a0a96427e 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -20,9 +20,9 @@ from ..utils import ( is_outdated_version, ISO639Utils, orderedSet, + Popen, PostProcessingError, prepend_extension, - process_communicate_or_kill, replace_extension, shell_quote, traverse_obj, @@ -178,10 +178,8 @@ class FFmpegPostProcessor(PostProcessor): encodeArgument('-i')] cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True)) self.write_debug('%s command line: %s' % (self.basename, shell_quote(cmd))) - handle = subprocess.Popen( - cmd, stderr=subprocess.PIPE, - stdout=subprocess.PIPE, stdin=subprocess.PIPE) - stdout_data, stderr_data = process_communicate_or_kill(handle) + handle = Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout_data, stderr_data = handle.communicate_or_kill() expected_ret = 0 if self.probe_available else 1 if handle.wait() != expected_ret: return None @@ -223,7 +221,7 @@ class FFmpegPostProcessor(PostProcessor): cmd += opts cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True)) self.write_debug('ffprobe command line: %s' % shell_quote(cmd)) - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) + p = Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) stdout, stderr = p.communicate() return json.loads(stdout.decode('utf-8', 'replace')) @@ -284,8 +282,8 @@ class FFmpegPostProcessor(PostProcessor): for i, (path, opts) in enumerate(path_opts) if path) self.write_debug('ffmpeg command line: %s' % shell_quote(cmd)) - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) - stdout, stderr = process_communicate_or_kill(p) + p = Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) + stdout, stderr = p.communicate_or_kill() if p.returncode not in variadic(expected_retcodes): stderr = stderr.decode('utf-8', 'replace').strip() self.write_debug(stderr) diff --git a/yt_dlp/postprocessor/sponskrub.py b/yt_dlp/postprocessor/sponskrub.py index 932555a0ee..37e7411e44 100644 --- a/yt_dlp/postprocessor/sponskrub.py +++ b/yt_dlp/postprocessor/sponskrub.py @@ -11,9 +11,9 @@ from ..utils import ( encodeFilename, shell_quote, str_or_none, + Popen, PostProcessingError, prepend_extension, - process_communicate_or_kill, ) @@ -81,8 +81,8 @@ class SponSkrubPP(PostProcessor): self.write_debug('sponskrub command line: %s' % shell_quote(cmd)) pipe = None if self.get_param('verbose') else subprocess.PIPE - p = subprocess.Popen(cmd, stdout=pipe) - stdout = process_communicate_or_kill(p)[0] + p = Popen(cmd, stdout=pipe) + stdout = p.communicate_or_kill()[0] if p.returncode == 0: os.replace(temp_filename, filename) diff --git a/yt_dlp/update.py b/yt_dlp/update.py index 26f18bddab..e4b1280be6 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -10,7 +10,7 @@ import traceback from zipimport import zipimporter from .compat import compat_realpath -from .utils import encode_compat_str +from .utils import encode_compat_str, Popen from .version import __version__ @@ -191,7 +191,7 @@ def run_update(ydl): return try: # Continues to run in the background - subprocess.Popen( + Popen( 'ping 127.0.0.1 -n 5 -w 1000 & del /F "%s.old"' % exe, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) ydl.to_screen('Updated yt-dlp to version %s' % version_id) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index b88257bc27..319f6979ba 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -2272,6 +2272,20 @@ def process_communicate_or_kill(p, *args, **kwargs): raise +class Popen(subprocess.Popen): + if sys.platform == 'win32': + _startupinfo = subprocess.STARTUPINFO() + _startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW + else: + _startupinfo = None + + def __init__(self, *args, **kwargs): + super(Popen, self).__init__(*args, **kwargs, startupinfo=self._startupinfo) + + def communicate_or_kill(self, *args, **kwargs): + return process_communicate_or_kill(self, *args, **kwargs) + + def get_subprocess_encoding(): if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5: # For subprocess calls, encode with locale encoding @@ -3977,8 +3991,7 @@ def check_executable(exe, args=[]): """ Checks if the given binary is installed somewhere in PATH, and returns its name. args can be a list of arguments for a short output (like -version) """ try: - process_communicate_or_kill(subprocess.Popen( - [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)) + Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate_or_kill() except OSError: return False return exe @@ -3992,10 +4005,9 @@ def get_exe_version(exe, args=['--version'], # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers # SIGTTOU if yt-dlp is run in the background. # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656 - out, _ = process_communicate_or_kill(subprocess.Popen( - [encodeArgument(exe)] + args, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, stderr=subprocess.STDOUT)) + out, _ = Popen( + [encodeArgument(exe)] + args, stdin=subprocess.PIPE, + stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate_or_kill() except OSError: return False if isinstance(out, bytes): # Python 2.x @@ -6155,11 +6167,11 @@ def write_xattr(path, key, value): + [encodeFilename(path, True)]) try: - p = subprocess.Popen( + p = Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) except EnvironmentError as e: raise XAttrMetadataError(e.errno, e.strerror) - stdout, stderr = process_communicate_or_kill(p) + stdout, stderr = p.communicate_or_kill() stderr = stderr.decode('utf-8', 'replace') if p.returncode != 0: raise XAttrMetadataError(p.returncode, stderr) From 27f817a84b8be5896caf7df2aeffbcc4904ecb75 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Thu, 21 Oct 2021 15:26:36 +0530 Subject: [PATCH 294/641] [docs] Migrate issues to use forms (#1302) Authored by: Ashish0804 --- .github/ISSUE_TEMPLATE/1_broken_site.md | 73 ------------------ .github/ISSUE_TEMPLATE/1_broken_site.yml | 63 ++++++++++++++++ .../ISSUE_TEMPLATE/2_site_support_request.md | 60 --------------- .../ISSUE_TEMPLATE/2_site_support_request.yml | 74 +++++++++++++++++++ .../ISSUE_TEMPLATE/3_site_feature_request.md | 43 ----------- .../ISSUE_TEMPLATE/3_site_feature_request.yml | 37 ++++++++++ .github/ISSUE_TEMPLATE/4_bug_report.md | 74 ------------------- .github/ISSUE_TEMPLATE/4_bug_report.yml | 57 ++++++++++++++ .github/ISSUE_TEMPLATE/5_feature_request.md | 43 ----------- .github/ISSUE_TEMPLATE/5_feature_request.yml | 30 ++++++++ .github/ISSUE_TEMPLATE/6_question.md | 43 ----------- .github/ISSUE_TEMPLATE/6_question.yml | 30 ++++++++ .github/ISSUE_TEMPLATE/config.yml | 5 ++ .github/ISSUE_TEMPLATE_tmpl/1_broken_site.md | 73 ------------------ .github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml | 63 ++++++++++++++++ .../2_site_support_request.md | 60 --------------- .../2_site_support_request.yml | 74 +++++++++++++++++++ .../3_site_feature_request.md | 43 ----------- .../3_site_feature_request.yml | 37 ++++++++++ .github/ISSUE_TEMPLATE_tmpl/4_bug_report.md | 74 ------------------- .github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml | 57 ++++++++++++++ .../ISSUE_TEMPLATE_tmpl/5_feature_request.md | 43 ----------- .../ISSUE_TEMPLATE_tmpl/5_feature_request.yml | 30 ++++++++ .github/ISSUE_TEMPLATE_tmpl/6_question.yml | 30 ++++++++ CONTRIBUTING.md | 12 +++ Makefile | 13 ++-- 26 files changed, 606 insertions(+), 635 deletions(-) delete mode 100644 .github/ISSUE_TEMPLATE/1_broken_site.md create mode 100644 .github/ISSUE_TEMPLATE/1_broken_site.yml delete mode 100644 .github/ISSUE_TEMPLATE/2_site_support_request.md create mode 100644 .github/ISSUE_TEMPLATE/2_site_support_request.yml delete mode 100644 .github/ISSUE_TEMPLATE/3_site_feature_request.md create mode 100644 .github/ISSUE_TEMPLATE/3_site_feature_request.yml delete mode 100644 .github/ISSUE_TEMPLATE/4_bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/4_bug_report.yml delete mode 100644 .github/ISSUE_TEMPLATE/5_feature_request.md create mode 100644 .github/ISSUE_TEMPLATE/5_feature_request.yml delete mode 100644 .github/ISSUE_TEMPLATE/6_question.md create mode 100644 .github/ISSUE_TEMPLATE/6_question.yml create mode 100644 .github/ISSUE_TEMPLATE/config.yml delete mode 100644 .github/ISSUE_TEMPLATE_tmpl/1_broken_site.md create mode 100644 .github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml delete mode 100644 .github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md create mode 100644 .github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml delete mode 100644 .github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md create mode 100644 .github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml delete mode 100644 .github/ISSUE_TEMPLATE_tmpl/4_bug_report.md create mode 100644 .github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml delete mode 100644 .github/ISSUE_TEMPLATE_tmpl/5_feature_request.md create mode 100644 .github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml create mode 100644 .github/ISSUE_TEMPLATE_tmpl/6_question.yml diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md deleted file mode 100644 index 157eca91b5..0000000000 --- a/.github/ISSUE_TEMPLATE/1_broken_site.md +++ /dev/null @@ -1,73 +0,0 @@ ---- -name: Broken site support -about: Report broken or misfunctioning site -title: "[Broken] Website Name: A short description of the issue" -labels: ['triage', 'extractor-bug'] -assignees: '' - ---- - -<!-- - -###################################################################### - WARNING! - IGNORING THE FOLLOWING TEMPLATE WILL RESULT IN ISSUE CLOSED AS INCOMPLETE -###################################################################### - ---> - - -## Checklist - -<!-- -Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: -- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.10.10. If it's not, see https://github.com/yt-dlp/yt-dlp#update on how to update. Issues with outdated version will be REJECTED. -- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. -- Make sure that all URLs and arguments with special characters are properly quoted or escaped. -- Search the bugtracker for similar issues: https://github.com/yt-dlp/yt-dlp/issues. DO NOT post duplicates. -- Read "opening an issue" section in CONTRIBUTING.md: https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue -- Finally, confirm all RELEVANT tasks from the following by putting x into all the boxes like this [x] (Dont forget to delete the empty space) ---> - -- [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running yt-dlp version **2021.10.10** -- [ ] I've checked that all provided URLs are alive and playable in a browser -- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped -- [ ] I've searched the bugtracker for similar issues including closed ones -- [ ] I've read the opening an issue section in CONTRIBUTING.md -- [ ] I have given an appropriate title to the issue - - -## Verbose log - -<!-- -Provide the complete verbose output of yt-dlp that clearly demonstrates the problem. -Add the `-v` flag to your command line you run yt-dlp with (`yt-dlp -v <your command line>`), copy the WHOLE output and insert it below. It should look similar to this: - [debug] System config: [] - [debug] User config: [] - [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKc'] - [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] yt-dlp version 2021.10.10 - [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 - [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 - [debug] Proxy map: {} - <more lines> ---> - -``` -PASTE VERBOSE LOG HERE - -``` -<!-- -Do not remove the above ``` ---> - - -## Description - -<!-- -Provide an explanation of your issue in an arbitrary form. Provide any additional information, suggested solution and as much context and examples as possible. -If work on your issue requires account credentials please provide them or explain how one can obtain them. ---> - -WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.yml b/.github/ISSUE_TEMPLATE/1_broken_site.yml new file mode 100644 index 0000000000..2a492d132d --- /dev/null +++ b/.github/ISSUE_TEMPLATE/1_broken_site.yml @@ -0,0 +1,63 @@ +name: Broken site support +description: Report broken or misfunctioning site +labels: [triage, extractor-bug] +body: + - type: checkboxes + id: checklist + attributes: + label: Checklist + description: | + Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: + options: + - label: I'm reporting a broken site + required: true + - label: I've verified that I'm running yt-dlp version **2021.10.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + required: true + - label: I've checked that all provided URLs are alive and playable in a browser + required: true + - label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/ytdl-org/youtube-dl#video-url-contains-an-ampersand-and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command) + required: true + - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates + required: true + - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + required: true + - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and I'm willing to share it if required + - type: input + id: region + attributes: + label: Region + description: "Enter the region the site is accessible from" + placeholder: "India" + - type: textarea + id: description + attributes: + label: Description + description: | + Provide an explanation of your issue in an arbitrary form. + Provide any additional information, any suggested solutions, and as much context and examples as possible + placeholder: WRITE DESCRIPTION HERE + validations: + required: true + - type: textarea + id: log + attributes: + label: Verbose log + description: | + Provide the complete verbose output of yt-dlp that clearly demonstrates the problem. + Add the `-Uv` flag to your command line you run yt-dlp with (`yt-dlp -Uv <your command line>`), copy the WHOLE output and insert it below. + It should look similar to this: + placeholder: | + [debug] Command-line config: ['-Uv', 'http://www.youtube.com/watch?v=BaW_jenozKc'] + [debug] Portable config file: yt-dlp.conf + [debug] Portable config: ['-i'] + [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 + [debug] yt-dlp version 2021.10.10 (exe) + [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 + [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 + [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets + [debug] Proxy map: {} + yt-dlp is up to date (2021.10.10) + <more lines> + render: shell + validations: + required: true diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md deleted file mode 100644 index 1220344722..0000000000 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.md +++ /dev/null @@ -1,60 +0,0 @@ ---- -name: Site support request -about: Request support for a new site -title: "[Site Request] Website Name" -labels: ['triage', 'site-request'] -assignees: '' - ---- - -<!-- - -###################################################################### - WARNING! - IGNORING THE FOLLOWING TEMPLATE WILL RESULT IN ISSUE CLOSED AS INCOMPLETE -###################################################################### - ---> - - -## Checklist - -<!-- -Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: -- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.10.10. If it's not, see https://github.com/yt-dlp/yt-dlp#update on how to update. Issues with outdated version will be REJECTED. -- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. -- Make sure that site you are requesting is not dedicated to copyright infringement. yt-dlp does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights. -- Search the bugtracker for similar site support requests: https://github.com/yt-dlp/yt-dlp/issues. DO NOT post duplicates. -- Read "opening an issue" section in CONTRIBUTING.md: https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue -- Finally, confirm all RELEVANT tasks from the following by putting x into all the boxes like this [x] (Dont forget to delete the empty space) ---> - -- [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running yt-dlp version **2021.10.10** -- [ ] I've checked that all provided URLs are alive and playable in a browser -- [ ] I've checked that none of provided URLs violate any copyrights -- [ ] The provided URLs do not contain any DRM to the best of my knowledge -- [ ] I've searched the bugtracker for similar site support requests including closed ones -- [ ] I've read the opening an issue section in CONTRIBUTING.md -- [ ] I have given an appropriate title to the issue - - -## Example URLs - -<!-- -Provide all kinds of example URLs support for which should be included. Replace following example URLs by yours. ---> - -- Single video: https://www.youtube.com/watch?v=BaW_jenozKc -- Single video: https://youtu.be/BaW_jenozKc -- Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc - - -## Description - -<!-- -Provide any additional information. -If work on your issue requires account credentials please provide them or explain how one can obtain them. ---> - -WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.yml b/.github/ISSUE_TEMPLATE/2_site_support_request.yml new file mode 100644 index 0000000000..c0a22ac2b5 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.yml @@ -0,0 +1,74 @@ +name: Site support request +description: Request support for a new site +labels: [triage, site-request] +body: + - type: checkboxes + id: checklist + attributes: + label: Checklist + description: | + Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: + options: + - label: I'm reporting a new site support request + required: true + - label: I've verified that I'm running yt-dlp version **2021.10.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + required: true + - label: I've checked that all provided URLs are alive and playable in a browser + required: true + - label: I've checked that none of provided URLs [violate any copyrights](https://github.com/ytdl-org/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free) or contain any [DRM](https://en.wikipedia.org/wiki/Digital_rights_management) to the best of my knowledge + required: true + - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates + required: true + - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + required: true + - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and am willing to share it if required + - type: input + id: region + attributes: + label: Region + description: "Enter the region the site is accessible from" + placeholder: "India" + - type: textarea + id: example-urls + attributes: + label: Example URLs + description: | + Provide all kinds of example URLs, support for which should be included. Replace following example URLs by yours + value: | + - Single video: https://www.youtube.com/watch?v=BaW_jenozKc + - Single video: https://youtu.be/BaW_jenozKc + - Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc + validations: + required: true + - type: textarea + id: description + attributes: + label: Description + description: | + Provide any additional information + placeholder: WRITE DESCRIPTION HERE + validations: + required: true + - type: textarea + id: log + attributes: + label: Verbose log + description: | + Provide the complete verbose output using one of the example URLs provided above. + Add the `-Uv` flag to your command line you run yt-dlp with (`yt-dlp -Uv <your command line>`), copy the WHOLE output and insert it below. + It should look similar to this: + placeholder: | + [debug] Command-line config: ['-Uv', 'http://www.youtube.com/watch?v=BaW_jenozKc'] + [debug] Portable config file: yt-dlp.conf + [debug] Portable config: ['-i'] + [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 + [debug] yt-dlp version 2021.10.10 (exe) + [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 + [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 + [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets + [debug] Proxy map: {} + yt-dlp is up to date (2021.10.10) + <more lines> + render: shell + validations: + required: true diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md deleted file mode 100644 index 54536fce6d..0000000000 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md +++ /dev/null @@ -1,43 +0,0 @@ ---- -name: Site feature request -about: Request a new functionality for a site -title: "[Site Feature] Website Name: A short description of the feature" -labels: ['triage', 'site-enhancement'] -assignees: '' - ---- - -<!-- - -###################################################################### - WARNING! - IGNORING THE FOLLOWING TEMPLATE WILL RESULT IN ISSUE CLOSED AS INCOMPLETE -###################################################################### - ---> - - -## Checklist - -<!-- -Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: -- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.10.10. If it's not, see https://github.com/yt-dlp/yt-dlp#update on how to update. Issues with outdated version will be REJECTED. -- Search the bugtracker for similar site feature requests: https://github.com/yt-dlp/yt-dlp/issues. DO NOT post duplicates. -- Read "opening an issue" section in CONTRIBUTING.md: https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue -- Finally, confirm all RELEVANT tasks from the following by putting x into all the boxes like this [x] (Dont forget to delete the empty space) ---> - -- [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running yt-dlp version **2021.10.10** -- [ ] I've searched the bugtracker for similar site feature requests including closed ones -- [ ] I've read the opening an issue section in CONTRIBUTING.md -- [ ] I have given an appropriate title to the issue - - -## Description - -<!-- -Provide an explanation of your site feature request in an arbitrary form. Please make sure the description is worded well enough to be understood, see https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient. Provide any additional information, suggested solution and as much context and examples as possible. ---> - -WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml new file mode 100644 index 0000000000..44c8a0816c --- /dev/null +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml @@ -0,0 +1,37 @@ +name: Site feature request +description: Request a new functionality for a site +labels: [triage, site-enhancement] +body: + - type: checkboxes + id: checklist + attributes: + label: Checklist + description: | + Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: + options: + - label: I'm reporting a site feature request + required: true + - label: I've verified that I'm running yt-dlp version **2021.10.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + required: true + - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates + required: true + - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + required: true + - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and I'm willing to share it if required + - type: input + id: region + attributes: + label: Region + description: "Enter the region the site is accessible from" + placeholder: "India" + - type: textarea + id: description + attributes: + label: Description + description: | + Provide an explanation of your site feature request in an arbitrary form. + Please make sure the description is worded well enough to be understood, see [is-the-description-of-the-issue-itself-sufficient](https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient). + Provide any additional information, any suggested solutions, and as much context and examples as possible + placeholder: WRITE DESCRIPTION HERE + validations: + required: true diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md deleted file mode 100644 index 6413e8b7ec..0000000000 --- a/.github/ISSUE_TEMPLATE/4_bug_report.md +++ /dev/null @@ -1,74 +0,0 @@ ---- -name: Bug report -about: Report a bug unrelated to any particular site or extractor -title: '[Bug] A short description of the issue' -labels: ['triage', 'bug'] -assignees: '' - ---- - -<!-- - -###################################################################### - WARNING! - IGNORING THE FOLLOWING TEMPLATE WILL RESULT IN ISSUE CLOSED AS INCOMPLETE -###################################################################### - ---> - - -## Checklist - -<!-- -Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: -- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.10.10. If it's not, see https://github.com/yt-dlp/yt-dlp#update on how to update. Issues with outdated version will be REJECTED. -- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. -- Make sure that all URLs and arguments with special characters are properly quoted or escaped. -- Search the bugtracker for similar issues: https://github.com/yt-dlp/yt-dlp/issues. DO NOT post duplicates. -- Read "opening an issue" section in CONTRIBUTING.md: https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue -- Finally, confirm all RELEVANT tasks from the following by putting x into all the boxes like this [x] (Dont forget to delete the empty space) ---> - -- [ ] I'm reporting a bug unrelated to a specific site -- [ ] I've verified that I'm running yt-dlp version **2021.10.10** -- [ ] I've checked that all provided URLs are alive and playable in a browser -- [ ] The provided URLs do not contain any DRM to the best of my knowledge -- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped -- [ ] I've searched the bugtracker for similar bug reports including closed ones -- [ ] I've read the opening an issue section in CONTRIBUTING.md -- [ ] I have given an appropriate title to the issue - - -## Verbose log - -<!-- -Provide the complete verbose output of yt-dlp that clearly demonstrates the problem. -Add the `-v` flag to your command line you run yt-dlp with (`yt-dlp -v <your command line>`), copy the WHOLE output and insert it below. It should look similar to this: - [debug] System config: [] - [debug] User config: [] - [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKc'] - [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] yt-dlp version 2021.10.10 - [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 - [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 - [debug] Proxy map: {} - <more lines> ---> - -``` -PASTE VERBOSE LOG HERE - -``` -<!-- -Do not remove the above ``` ---> - - -## Description - -<!-- -Provide an explanation of your issue in an arbitrary form. Please make sure the description is worded well enough to be understood, see https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient. Provide any additional information, suggested solution and as much context and examples as possible. -If work on your issue requires account credentials please provide them or explain how one can obtain them. ---> - -WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.yml b/.github/ISSUE_TEMPLATE/4_bug_report.yml new file mode 100644 index 0000000000..1c609cab18 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/4_bug_report.yml @@ -0,0 +1,57 @@ +name: Bug report +description: Report a bug unrelated to any particular site or extractor +labels: [triage,bug] +body: + - type: checkboxes + id: checklist + attributes: + label: Checklist + description: | + Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: + options: + - label: I'm reporting a bug unrelated to a specific site + required: true + - label: I've verified that I'm running yt-dlp version **2021.10.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + required: true + - label: I've checked that all provided URLs are alive and playable in a browser + required: true + - label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/ytdl-org/youtube-dl#video-url-contains-an-ampersand-and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command) + required: true + - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates + required: true + - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + required: true + - type: textarea + id: description + attributes: + label: Description + description: | + Provide an explanation of your issue in an arbitrary form. + Please make sure the description is worded well enough to be understood, see [is-the-description-of-the-issue-itself-sufficient](https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient). + Provide any additional information, any suggested solutions, and as much context and examples as possible + placeholder: WRITE DESCRIPTION HERE + validations: + required: true + - type: textarea + id: log + attributes: + label: Verbose log + description: | + Provide the complete verbose output of yt-dlp that clearly demonstrates the problem. + Add the `-Uv` flag to your command line you run yt-dlp with (`yt-dlp -Uv <your command line>`), copy the WHOLE output and insert it below. + It should look similar to this: + placeholder: | + [debug] Command-line config: ['-Uv', 'http://www.youtube.com/watch?v=BaW_jenozKc'] + [debug] Portable config file: yt-dlp.conf + [debug] Portable config: ['-i'] + [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 + [debug] yt-dlp version 2021.10.10 (exe) + [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 + [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 + [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets + [debug] Proxy map: {} + yt-dlp is up to date (2021.10.10) + <more lines> + render: shell + validations: + required: true diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md deleted file mode 100644 index b04dbf9819..0000000000 --- a/.github/ISSUE_TEMPLATE/5_feature_request.md +++ /dev/null @@ -1,43 +0,0 @@ ---- -name: Feature request -about: Request a new functionality unrelated to any particular site or extractor -title: "[Feature Request] A short description of your feature" -labels: ['triage', 'enhancement'] -assignees: '' - ---- - -<!-- - -###################################################################### - WARNING! - IGNORING THE FOLLOWING TEMPLATE WILL RESULT IN ISSUE CLOSED AS INCOMPLETE -###################################################################### - ---> - - -## Checklist - -<!-- -Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: -- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is 2021.10.10. If it's not, see https://github.com/yt-dlp/yt-dlp#update on how to update. Issues with outdated version will be REJECTED. -- Search the bugtracker for similar feature requests: https://github.com/yt-dlp/yt-dlp/issues. DO NOT post duplicates. -- Read "opening an issue" section in CONTRIBUTING.md: https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue -- Finally, put x into all relevant boxes like this [x] (Dont forget to delete the empty space) ---> - -- [ ] I'm reporting a feature request -- [ ] I've verified that I'm running yt-dlp version **2021.10.10** -- [ ] I've searched the bugtracker for similar feature requests including closed ones -- [ ] I've read the opening an issue section in CONTRIBUTING.md -- [ ] I have given an appropriate title to the issue - - -## Description - -<!-- -Provide an explanation of your issue in an arbitrary form. Please make sure the description is worded well enough to be understood, see https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient. Provide any additional information, suggested solution and as much context and examples as possible. ---> - -WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.yml b/.github/ISSUE_TEMPLATE/5_feature_request.yml new file mode 100644 index 0000000000..d839df95df --- /dev/null +++ b/.github/ISSUE_TEMPLATE/5_feature_request.yml @@ -0,0 +1,30 @@ +name: Feature request request +description: Request a new functionality unrelated to any particular site or extractor +labels: [triage, enhancement] +body: + - type: checkboxes + id: checklist + attributes: + label: Checklist + description: | + Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: + options: + - label: I'm reporting a feature request + required: true + - label: I've verified that I'm running yt-dlp version **2021.10.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + required: true + - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates + required: true + - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + required: true + - type: textarea + id: description + attributes: + label: Description + description: | + Provide an explanation of your site feature request in an arbitrary form. + Please make sure the description is worded well enough to be understood, see [is-the-description-of-the-issue-itself-sufficient](https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient). + Provide any additional information, any suggested solutions, and as much context and examples as possible + placeholder: WRITE DESCRIPTION HERE + validations: + required: true diff --git a/.github/ISSUE_TEMPLATE/6_question.md b/.github/ISSUE_TEMPLATE/6_question.md deleted file mode 100644 index 5ab17802a7..0000000000 --- a/.github/ISSUE_TEMPLATE/6_question.md +++ /dev/null @@ -1,43 +0,0 @@ ---- -name: Ask question -about: Ask yt-dlp related question -title: "[Question] A short description of your question" -labels: question -assignees: '' - ---- - -<!-- - -###################################################################### - WARNING! - IGNORING THE FOLLOWING TEMPLATE WILL RESULT IN ISSUE CLOSED AS INCOMPLETE -###################################################################### - ---> - - -## Checklist - -<!-- -Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: -- Look through the README (https://github.com/yt-dlp/yt-dlp) -- Read "opening an issue" section in CONTRIBUTING.md: https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue -- Search the bugtracker for similar questions: https://github.com/yt-dlp/yt-dlp/issues -- Finally, put x into all relevant boxes like this [x] (Dont forget to delete the empty space) ---> - -- [ ] I'm asking a question -- [ ] I've looked through the README -- [ ] I've read the opening an issue section in CONTRIBUTING.md -- [ ] I've searched the bugtracker for similar questions including closed ones -- [ ] I have given an appropriate title to the issue - - -## Question - -<!-- -Ask your question in an arbitrary form. Please make sure it's worded well enough to be understood, see https://github.com/yt-dlp/yt-dlp. ---> - -WRITE QUESTION HERE diff --git a/.github/ISSUE_TEMPLATE/6_question.yml b/.github/ISSUE_TEMPLATE/6_question.yml new file mode 100644 index 0000000000..c101c2286d --- /dev/null +++ b/.github/ISSUE_TEMPLATE/6_question.yml @@ -0,0 +1,30 @@ +name: Ask question +description: Ask yt-dlp related question +labels: [question] +body: + - type: checkboxes + id: checklist + attributes: + label: Checklist + description: | + Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: + options: + - label: I'm asking a question and not reporting a bug/feature request + required: true + - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp) + required: true + - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + required: true + - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues) for similar questions including closed ones + required: true + - type: textarea + id: question + attributes: + label: Question + description: | + Ask your question in an arbitrary form. + Please make sure it's worded well enough to be understood, see [is-the-description-of-the-issue-itself-sufficient](https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient). + Provide any additional information and as much context and examples as possible + placeholder: WRITE QUESTION HERE + validations: + required: true diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000000..61127d6828 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,5 @@ +blank_issues_enabled: false +contact_links: + - name: Get help from the community on Discord + url: https://discord.gg/H5MNcFW63r + about: Join the yt-dlp Discord for community-powered support! \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md b/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md deleted file mode 100644 index 9ee0022964..0000000000 --- a/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md +++ /dev/null @@ -1,73 +0,0 @@ ---- -name: Broken site support -about: Report broken or misfunctioning site -title: "[Broken] Website Name: A short description of the issue" -labels: ['triage', 'extractor-bug'] -assignees: '' - ---- - -<!-- - -###################################################################### - WARNING! - IGNORING THE FOLLOWING TEMPLATE WILL RESULT IN ISSUE CLOSED AS INCOMPLETE -###################################################################### - ---> - - -## Checklist - -<!-- -Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: -- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is %(version)s. If it's not, see https://github.com/yt-dlp/yt-dlp#update on how to update. Issues with outdated version will be REJECTED. -- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. -- Make sure that all URLs and arguments with special characters are properly quoted or escaped. -- Search the bugtracker for similar issues: https://github.com/yt-dlp/yt-dlp/issues. DO NOT post duplicates. -- Read "opening an issue" section in CONTRIBUTING.md: https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue -- Finally, confirm all RELEVANT tasks from the following by putting x into all the boxes like this [x] (Dont forget to delete the empty space) ---> - -- [ ] I'm reporting a broken site support -- [ ] I've verified that I'm running yt-dlp version **%(version)s** -- [ ] I've checked that all provided URLs are alive and playable in a browser -- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped -- [ ] I've searched the bugtracker for similar issues including closed ones -- [ ] I've read the opening an issue section in CONTRIBUTING.md -- [ ] I have given an appropriate title to the issue - - -## Verbose log - -<!-- -Provide the complete verbose output of yt-dlp that clearly demonstrates the problem. -Add the `-v` flag to your command line you run yt-dlp with (`yt-dlp -v <your command line>`), copy the WHOLE output and insert it below. It should look similar to this: - [debug] System config: [] - [debug] User config: [] - [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKc'] - [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] yt-dlp version %(version)s - [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 - [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 - [debug] Proxy map: {} - <more lines> ---> - -``` -PASTE VERBOSE LOG HERE - -``` -<!-- -Do not remove the above ``` ---> - - -## Description - -<!-- -Provide an explanation of your issue in an arbitrary form. Provide any additional information, suggested solution and as much context and examples as possible. -If work on your issue requires account credentials please provide them or explain how one can obtain them. ---> - -WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml b/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml new file mode 100644 index 0000000000..fdca0e53a8 --- /dev/null +++ b/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml @@ -0,0 +1,63 @@ +name: Broken site support +description: Report broken or misfunctioning site +labels: [triage, extractor-bug] +body: + - type: checkboxes + id: checklist + attributes: + label: Checklist + description: | + Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: + options: + - label: I'm reporting a broken site + required: true + - label: I've verified that I'm running yt-dlp version **%(version)s**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + required: true + - label: I've checked that all provided URLs are alive and playable in a browser + required: true + - label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/ytdl-org/youtube-dl#video-url-contains-an-ampersand-and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command) + required: true + - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates + required: true + - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + required: true + - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and I'm willing to share it if required + - type: input + id: region + attributes: + label: Region + description: "Enter the region the site is accessible from" + placeholder: "India" + - type: textarea + id: description + attributes: + label: Description + description: | + Provide an explanation of your issue in an arbitrary form. + Provide any additional information, any suggested solutions, and as much context and examples as possible + placeholder: WRITE DESCRIPTION HERE + validations: + required: true + - type: textarea + id: log + attributes: + label: Verbose log + description: | + Provide the complete verbose output of yt-dlp that clearly demonstrates the problem. + Add the `-Uv` flag to your command line you run yt-dlp with (`yt-dlp -Uv <your command line>`), copy the WHOLE output and insert it below. + It should look similar to this: + placeholder: | + [debug] Command-line config: ['-Uv', 'http://www.youtube.com/watch?v=BaW_jenozKc'] + [debug] Portable config file: yt-dlp.conf + [debug] Portable config: ['-i'] + [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 + [debug] yt-dlp version %(version)s (exe) + [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 + [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 + [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets + [debug] Proxy map: {} + yt-dlp is up to date (%(version)s) + <more lines> + render: shell + validations: + required: true diff --git a/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md deleted file mode 100644 index e71abbab29..0000000000 --- a/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md +++ /dev/null @@ -1,60 +0,0 @@ ---- -name: Site support request -about: Request support for a new site -title: "[Site Request] Website Name" -labels: ['triage', 'site-request'] -assignees: '' - ---- - -<!-- - -###################################################################### - WARNING! - IGNORING THE FOLLOWING TEMPLATE WILL RESULT IN ISSUE CLOSED AS INCOMPLETE -###################################################################### - ---> - - -## Checklist - -<!-- -Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: -- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is %(version)s. If it's not, see https://github.com/yt-dlp/yt-dlp#update on how to update. Issues with outdated version will be REJECTED. -- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. -- Make sure that site you are requesting is not dedicated to copyright infringement. yt-dlp does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights. -- Search the bugtracker for similar site support requests: https://github.com/yt-dlp/yt-dlp/issues. DO NOT post duplicates. -- Read "opening an issue" section in CONTRIBUTING.md: https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue -- Finally, confirm all RELEVANT tasks from the following by putting x into all the boxes like this [x] (Dont forget to delete the empty space) ---> - -- [ ] I'm reporting a new site support request -- [ ] I've verified that I'm running yt-dlp version **%(version)s** -- [ ] I've checked that all provided URLs are alive and playable in a browser -- [ ] I've checked that none of provided URLs violate any copyrights -- [ ] The provided URLs do not contain any DRM to the best of my knowledge -- [ ] I've searched the bugtracker for similar site support requests including closed ones -- [ ] I've read the opening an issue section in CONTRIBUTING.md -- [ ] I have given an appropriate title to the issue - - -## Example URLs - -<!-- -Provide all kinds of example URLs support for which should be included. Replace following example URLs by yours. ---> - -- Single video: https://www.youtube.com/watch?v=BaW_jenozKc -- Single video: https://youtu.be/BaW_jenozKc -- Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc - - -## Description - -<!-- -Provide any additional information. -If work on your issue requires account credentials please provide them or explain how one can obtain them. ---> - -WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml new file mode 100644 index 0000000000..be6427ce1a --- /dev/null +++ b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml @@ -0,0 +1,74 @@ +name: Site support request +description: Request support for a new site +labels: [triage, site-request] +body: + - type: checkboxes + id: checklist + attributes: + label: Checklist + description: | + Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: + options: + - label: I'm reporting a new site support request + required: true + - label: I've verified that I'm running yt-dlp version **%(version)s**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + required: true + - label: I've checked that all provided URLs are alive and playable in a browser + required: true + - label: I've checked that none of provided URLs [violate any copyrights](https://github.com/ytdl-org/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free) or contain any [DRM](https://en.wikipedia.org/wiki/Digital_rights_management) to the best of my knowledge + required: true + - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates + required: true + - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + required: true + - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and am willing to share it if required + - type: input + id: region + attributes: + label: Region + description: "Enter the region the site is accessible from" + placeholder: "India" + - type: textarea + id: example-urls + attributes: + label: Example URLs + description: | + Provide all kinds of example URLs, support for which should be included. Replace following example URLs by yours + value: | + - Single video: https://www.youtube.com/watch?v=BaW_jenozKc + - Single video: https://youtu.be/BaW_jenozKc + - Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc + validations: + required: true + - type: textarea + id: description + attributes: + label: Description + description: | + Provide any additional information + placeholder: WRITE DESCRIPTION HERE + validations: + required: true + - type: textarea + id: log + attributes: + label: Verbose log + description: | + Provide the complete verbose output using one of the example URLs provided above. + Add the `-Uv` flag to your command line you run yt-dlp with (`yt-dlp -Uv <your command line>`), copy the WHOLE output and insert it below. + It should look similar to this: + placeholder: | + [debug] Command-line config: ['-Uv', 'http://www.youtube.com/watch?v=BaW_jenozKc'] + [debug] Portable config file: yt-dlp.conf + [debug] Portable config: ['-i'] + [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 + [debug] yt-dlp version %(version)s (exe) + [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 + [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 + [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets + [debug] Proxy map: {} + yt-dlp is up to date (%(version)s) + <more lines> + render: shell + validations: + required: true diff --git a/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md b/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md deleted file mode 100644 index e0ccd54161..0000000000 --- a/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md +++ /dev/null @@ -1,43 +0,0 @@ ---- -name: Site feature request -about: Request a new functionality for a site -title: "[Site Feature] Website Name: A short description of the feature" -labels: ['triage', 'site-enhancement'] -assignees: '' - ---- - -<!-- - -###################################################################### - WARNING! - IGNORING THE FOLLOWING TEMPLATE WILL RESULT IN ISSUE CLOSED AS INCOMPLETE -###################################################################### - ---> - - -## Checklist - -<!-- -Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: -- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is %(version)s. If it's not, see https://github.com/yt-dlp/yt-dlp#update on how to update. Issues with outdated version will be REJECTED. -- Search the bugtracker for similar site feature requests: https://github.com/yt-dlp/yt-dlp/issues. DO NOT post duplicates. -- Read "opening an issue" section in CONTRIBUTING.md: https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue -- Finally, confirm all RELEVANT tasks from the following by putting x into all the boxes like this [x] (Dont forget to delete the empty space) ---> - -- [ ] I'm reporting a site feature request -- [ ] I've verified that I'm running yt-dlp version **%(version)s** -- [ ] I've searched the bugtracker for similar site feature requests including closed ones -- [ ] I've read the opening an issue section in CONTRIBUTING.md -- [ ] I have given an appropriate title to the issue - - -## Description - -<!-- -Provide an explanation of your site feature request in an arbitrary form. Please make sure the description is worded well enough to be understood, see https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient. Provide any additional information, suggested solution and as much context and examples as possible. ---> - -WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml new file mode 100644 index 0000000000..f19d958c63 --- /dev/null +++ b/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml @@ -0,0 +1,37 @@ +name: Site feature request +description: Request a new functionality for a site +labels: [triage, site-enhancement] +body: + - type: checkboxes + id: checklist + attributes: + label: Checklist + description: | + Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: + options: + - label: I'm reporting a site feature request + required: true + - label: I've verified that I'm running yt-dlp version **%(version)s**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + required: true + - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates + required: true + - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + required: true + - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and I'm willing to share it if required + - type: input + id: region + attributes: + label: Region + description: "Enter the region the site is accessible from" + placeholder: "India" + - type: textarea + id: description + attributes: + label: Description + description: | + Provide an explanation of your site feature request in an arbitrary form. + Please make sure the description is worded well enough to be understood, see [is-the-description-of-the-issue-itself-sufficient](https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient). + Provide any additional information, any suggested solutions, and as much context and examples as possible + placeholder: WRITE DESCRIPTION HERE + validations: + required: true diff --git a/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md b/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md deleted file mode 100644 index 43e91b0522..0000000000 --- a/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md +++ /dev/null @@ -1,74 +0,0 @@ ---- -name: Bug report -about: Report a bug unrelated to any particular site or extractor -title: '[Bug] A short description of the issue' -labels: ['triage', 'bug'] -assignees: '' - ---- - -<!-- - -###################################################################### - WARNING! - IGNORING THE FOLLOWING TEMPLATE WILL RESULT IN ISSUE CLOSED AS INCOMPLETE -###################################################################### - ---> - - -## Checklist - -<!-- -Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: -- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is %(version)s. If it's not, see https://github.com/yt-dlp/yt-dlp#update on how to update. Issues with outdated version will be REJECTED. -- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser. -- Make sure that all URLs and arguments with special characters are properly quoted or escaped. -- Search the bugtracker for similar issues: https://github.com/yt-dlp/yt-dlp/issues. DO NOT post duplicates. -- Read "opening an issue" section in CONTRIBUTING.md: https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue -- Finally, confirm all RELEVANT tasks from the following by putting x into all the boxes like this [x] (Dont forget to delete the empty space) ---> - -- [ ] I'm reporting a bug unrelated to a specific site -- [ ] I've verified that I'm running yt-dlp version **%(version)s** -- [ ] I've checked that all provided URLs are alive and playable in a browser -- [ ] The provided URLs do not contain any DRM to the best of my knowledge -- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped -- [ ] I've searched the bugtracker for similar bug reports including closed ones -- [ ] I've read the opening an issue section in CONTRIBUTING.md -- [ ] I have given an appropriate title to the issue - - -## Verbose log - -<!-- -Provide the complete verbose output of yt-dlp that clearly demonstrates the problem. -Add the `-v` flag to your command line you run yt-dlp with (`yt-dlp -v <your command line>`), copy the WHOLE output and insert it below. It should look similar to this: - [debug] System config: [] - [debug] User config: [] - [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKc'] - [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 - [debug] yt-dlp version %(version)s - [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 - [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 - [debug] Proxy map: {} - <more lines> ---> - -``` -PASTE VERBOSE LOG HERE - -``` -<!-- -Do not remove the above ``` ---> - - -## Description - -<!-- -Provide an explanation of your issue in an arbitrary form. Please make sure the description is worded well enough to be understood, see https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient. Provide any additional information, suggested solution and as much context and examples as possible. -If work on your issue requires account credentials please provide them or explain how one can obtain them. ---> - -WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml b/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml new file mode 100644 index 0000000000..e4d669bb7b --- /dev/null +++ b/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml @@ -0,0 +1,57 @@ +name: Bug report +description: Report a bug unrelated to any particular site or extractor +labels: [triage,bug] +body: + - type: checkboxes + id: checklist + attributes: + label: Checklist + description: | + Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: + options: + - label: I'm reporting a bug unrelated to a specific site + required: true + - label: I've verified that I'm running yt-dlp version **%(version)s**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + required: true + - label: I've checked that all provided URLs are alive and playable in a browser + required: true + - label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/ytdl-org/youtube-dl#video-url-contains-an-ampersand-and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command) + required: true + - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates + required: true + - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + required: true + - type: textarea + id: description + attributes: + label: Description + description: | + Provide an explanation of your issue in an arbitrary form. + Please make sure the description is worded well enough to be understood, see [is-the-description-of-the-issue-itself-sufficient](https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient). + Provide any additional information, any suggested solutions, and as much context and examples as possible + placeholder: WRITE DESCRIPTION HERE + validations: + required: true + - type: textarea + id: log + attributes: + label: Verbose log + description: | + Provide the complete verbose output of yt-dlp that clearly demonstrates the problem. + Add the `-Uv` flag to your command line you run yt-dlp with (`yt-dlp -Uv <your command line>`), copy the WHOLE output and insert it below. + It should look similar to this: + placeholder: | + [debug] Command-line config: ['-Uv', 'http://www.youtube.com/watch?v=BaW_jenozKc'] + [debug] Portable config file: yt-dlp.conf + [debug] Portable config: ['-i'] + [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 + [debug] yt-dlp version %(version)s (exe) + [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 + [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 + [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets + [debug] Proxy map: {} + yt-dlp is up to date (%(version)s) + <more lines> + render: shell + validations: + required: true diff --git a/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.md b/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.md deleted file mode 100644 index 075e0b1b32..0000000000 --- a/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.md +++ /dev/null @@ -1,43 +0,0 @@ ---- -name: Feature request -about: Request a new functionality unrelated to any particular site or extractor -title: "[Feature Request] A short description of your feature" -labels: ['triage', 'enhancement'] -assignees: '' - ---- - -<!-- - -###################################################################### - WARNING! - IGNORING THE FOLLOWING TEMPLATE WILL RESULT IN ISSUE CLOSED AS INCOMPLETE -###################################################################### - ---> - - -## Checklist - -<!-- -Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: -- First of, make sure you are using the latest version of yt-dlp. Run `yt-dlp --version` and ensure your version is %(version)s. If it's not, see https://github.com/yt-dlp/yt-dlp#update on how to update. Issues with outdated version will be REJECTED. -- Search the bugtracker for similar feature requests: https://github.com/yt-dlp/yt-dlp/issues. DO NOT post duplicates. -- Read "opening an issue" section in CONTRIBUTING.md: https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue -- Finally, put x into all relevant boxes like this [x] (Dont forget to delete the empty space) ---> - -- [ ] I'm reporting a feature request -- [ ] I've verified that I'm running yt-dlp version **%(version)s** -- [ ] I've searched the bugtracker for similar feature requests including closed ones -- [ ] I've read the opening an issue section in CONTRIBUTING.md -- [ ] I have given an appropriate title to the issue - - -## Description - -<!-- -Provide an explanation of your issue in an arbitrary form. Please make sure the description is worded well enough to be understood, see https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient. Provide any additional information, suggested solution and as much context and examples as possible. ---> - -WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml b/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml new file mode 100644 index 0000000000..27e2e773b4 --- /dev/null +++ b/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml @@ -0,0 +1,30 @@ +name: Feature request request +description: Request a new functionality unrelated to any particular site or extractor +labels: [triage, enhancement] +body: + - type: checkboxes + id: checklist + attributes: + label: Checklist + description: | + Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: + options: + - label: I'm reporting a feature request + required: true + - label: I've verified that I'm running yt-dlp version **%(version)s**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + required: true + - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates + required: true + - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + required: true + - type: textarea + id: description + attributes: + label: Description + description: | + Provide an explanation of your site feature request in an arbitrary form. + Please make sure the description is worded well enough to be understood, see [is-the-description-of-the-issue-itself-sufficient](https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient). + Provide any additional information, any suggested solutions, and as much context and examples as possible + placeholder: WRITE DESCRIPTION HERE + validations: + required: true diff --git a/.github/ISSUE_TEMPLATE_tmpl/6_question.yml b/.github/ISSUE_TEMPLATE_tmpl/6_question.yml new file mode 100644 index 0000000000..c101c2286d --- /dev/null +++ b/.github/ISSUE_TEMPLATE_tmpl/6_question.yml @@ -0,0 +1,30 @@ +name: Ask question +description: Ask yt-dlp related question +labels: [question] +body: + - type: checkboxes + id: checklist + attributes: + label: Checklist + description: | + Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: + options: + - label: I'm asking a question and not reporting a bug/feature request + required: true + - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp) + required: true + - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + required: true + - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues) for similar questions including closed ones + required: true + - type: textarea + id: question + attributes: + label: Question + description: | + Ask your question in an arbitrary form. + Please make sure it's worded well enough to be understood, see [is-the-description-of-the-issue-itself-sufficient](https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient). + Provide any additional information and as much context and examples as possible + placeholder: WRITE QUESTION HERE + validations: + required: true diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 7aaf6a52ba..fb539ec0da 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -109,6 +109,18 @@ Some bug reports are completely unrelated to yt-dlp and relate to a different, o If the issue is with `youtube-dl` (the upstream fork of yt-dlp) and not with yt-dlp, the issue should be raised in the youtube-dl project. +### Are you willing to share account details if needed? + +The maintainers and potential contributors of the project often do not have an account for the website you are asking support for. So any developer interested in solving your issue may ask you for account details. It is your personal discression whether you are willing to share the account in order for the developer to try and solve your issue. However, if you are unwilling or unable to provide details, they obviously cannot work on the issue and it cannot be solved unless some developer who both has an account and is willing/able to contribute decides to solve it. + +By sharing an account with anyone, you agree to bear all risks associated with it. The maintainers and yt-dlp can't be held responsible for any misuse of the credentials. + +While these steps won't necessarily ensure that no misuse of the account takes place, these are still some good practices to follow. + +- Look for people with `Member` or `Contributor` tag on their messages. +- Change the password before sharing the account to something random (use [this](https://passwordsgenerator.net/) if you don't have a random password generator). +- Change the password after receiving the account back. + diff --git a/Makefile b/Makefile index 9ce975ea2b..e7b854a9d3 100644 --- a/Makefile +++ b/Makefile @@ -78,12 +78,13 @@ README.md: yt_dlp/*.py yt_dlp/*/*.py CONTRIBUTING.md: README.md $(PYTHON) devscripts/make_contributing.py README.md CONTRIBUTING.md -issuetemplates: devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/1_broken_site.md .github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md .github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md .github/ISSUE_TEMPLATE_tmpl/4_bug_report.md .github/ISSUE_TEMPLATE_tmpl/5_feature_request.md yt_dlp/version.py - $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/1_broken_site.md .github/ISSUE_TEMPLATE/1_broken_site.md - $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md .github/ISSUE_TEMPLATE/2_site_support_request.md - $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md .github/ISSUE_TEMPLATE/3_site_feature_request.md - $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/4_bug_report.md .github/ISSUE_TEMPLATE/4_bug_report.md - $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/5_feature_request.md .github/ISSUE_TEMPLATE/5_feature_request.md +issuetemplates: devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml .github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml .github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml .github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml .github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml yt_dlp/version.py + $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml .github/ISSUE_TEMPLATE/1_broken_site.yml + $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml .github/ISSUE_TEMPLATE/2_site_support_request.yml + $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml .github/ISSUE_TEMPLATE/3_site_feature_request.yml + $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml .github/ISSUE_TEMPLATE/4_bug_report.yml + $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml .github/ISSUE_TEMPLATE/5_feature_request.yml + $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/6_question.yml .github/ISSUE_TEMPLATE/6_question.yml supportedsites: $(PYTHON) devscripts/make_supportedsites.py supportedsites.md From 0e5927eebfcd02a4815fcb29319a1dd3f05fd1b3 Mon Sep 17 00:00:00 2001 From: Ricardo <10128951+smplayer-dev@users.noreply.github.com> Date: Thu, 21 Oct 2021 12:48:46 +0200 Subject: [PATCH 295/641] [build] Build standalone MacOS packages (#1221) Closes #1075 Authored by: smplayer-dev --- .github/workflows/build.yml | 108 ++++++++++++++++++++++++++++++------ README.md | 15 +++++ pyinst.py | 89 ++++++++++++++++------------- yt_dlp/update.py | 34 +++++++----- 4 files changed, 175 insertions(+), 71 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5717ce8ee4..2963805961 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -133,6 +133,70 @@ jobs: asset_name: yt-dlp.tar.gz asset_content_type: application/gzip + build_macos: + runs-on: macos-11 + needs: build_unix + + outputs: + sha256_macos: ${{ steps.sha256_macos.outputs.sha256_macos }} + sha512_macos: ${{ steps.sha512_macos.outputs.sha512_macos }} + sha256_macos_zip: ${{ steps.sha256_macos_zip.outputs.sha256_macos_zip }} + sha512_macos_zip: ${{ steps.sha512_macos_zip.outputs.sha512_macos_zip }} + + steps: + - uses: actions/checkout@v2 + # In order to create a universal2 application, the version of python3 in /usr/bin has to be used + - name: Install Requirements + run: | + brew install coreutils + /usr/bin/pip3 install --user Pyinstaller mutagen pycryptodomex websockets + - name: Bump version + id: bump_version + run: python devscripts/update-version.py + - name: Print version + run: echo "${{ steps.bump_version.outputs.ytdlp_version }}" + - name: Run PyInstaller Script + run: /usr/bin/python3 ./pyinst.py --target-architecture universal2 --onefile + - name: Upload yt-dlp MacOS binary + id: upload-release-macos + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ needs.build_unix.outputs.upload_url }} + asset_path: ./dist/yt-dlp_macos + asset_name: yt-dlp_macos + asset_content_type: application/octet-stream + - name: Get SHA2-256SUMS for yt-dlp_macos + id: sha256_macos + run: echo "::set-output name=sha256_macos::$(sha256sum dist/yt-dlp_macos | awk '{print $1}')" + - name: Get SHA2-512SUMS for yt-dlp_macos + id: sha512_macos + run: echo "::set-output name=sha512_macos::$(sha512sum dist/yt-dlp_macos | awk '{print $1}')" + + - name: Run PyInstaller Script with --onedir + run: /usr/bin/python3 ./pyinst.py --target-architecture universal2 --onedir + - uses: papeloto/action-zip@v1 + with: + files: ./dist/yt-dlp_macos + dest: ./dist/yt-dlp_macos.zip + - name: Upload yt-dlp MacOS onedir + id: upload-release-macos-zip + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ needs.build_unix.outputs.upload_url }} + asset_path: ./dist/yt-dlp_macos.zip + asset_name: yt-dlp_macos.zip + asset_content_type: application/zip + - name: Get SHA2-256SUMS for yt-dlp_macos.zip + id: sha256_macos_zip + run: echo "::set-output name=sha256_macos_zip::$(sha256sum dist/yt-dlp_macos.zip | awk '{print $1}')" + - name: Get SHA2-512SUMS for yt-dlp_macos + id: sha512_macos_zip + run: echo "::set-output name=sha512_macos_zip::$(sha512sum dist/yt-dlp_macos.zip | awk '{print $1}')" + build_windows: runs-on: windows-latest needs: build_unix @@ -150,11 +214,11 @@ jobs: uses: actions/setup-python@v2 with: python-version: '3.8' - - name: Upgrade pip and enable wheel support - run: python -m pip install --upgrade pip setuptools wheel - name: Install Requirements # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds - run: pip install "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-4.5.1-py3-none-any.whl" mutagen pycryptodomex websockets + run: | + python -m pip install --upgrade pip setuptools wheel + pip install "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-4.5.1-py3-none-any.whl" mutagen pycryptodomex websockets - name: Bump version id: bump_version run: python devscripts/update-version.py @@ -183,27 +247,27 @@ jobs: - uses: papeloto/action-zip@v1 with: files: ./dist/yt-dlp - dest: ./dist/yt-dlp.zip - - name: Upload yt-dlp.zip Windows onedir + dest: ./dist/yt-dlp_win.zip + - name: Upload yt-dlp Windows onedir id: upload-release-windows-zip uses: actions/upload-release-asset@v1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: upload_url: ${{ needs.build_unix.outputs.upload_url }} - asset_path: ./dist/yt-dlp.zip - asset_name: yt-dlp.zip + asset_path: ./dist/yt-dlp_win.zip + asset_name: yt-dlp_win.zip asset_content_type: application/zip - - name: Get SHA2-256SUMS for yt-dlp.zip + - name: Get SHA2-256SUMS for yt-dlp_win.zip id: sha256_win_zip - run: echo "::set-output name=sha256_win_zip::$((Get-FileHash dist\yt-dlp.zip -Algorithm SHA256).Hash.ToLower())" - - name: Get SHA2-512SUMS for yt-dlp.zip + run: echo "::set-output name=sha256_win_zip::$((Get-FileHash dist\yt-dlp_win.zip -Algorithm SHA256).Hash.ToLower())" + - name: Get SHA2-512SUMS for yt-dlp_win.zip id: sha512_win_zip - run: echo "::set-output name=sha512_win_zip::$((Get-FileHash dist\yt-dlp.zip -Algorithm SHA512).Hash.ToLower())" + run: echo "::set-output name=sha512_win_zip::$((Get-FileHash dist\yt-dlp_win.zip -Algorithm SHA512).Hash.ToLower())" build_windows32: runs-on: windows-latest - needs: [build_unix, build_windows] + needs: build_unix outputs: sha256_win32: ${{ steps.sha256_win32.outputs.sha256_win32 }} @@ -217,10 +281,10 @@ jobs: with: python-version: '3.7' architecture: 'x86' - - name: Upgrade pip and enable wheel support - run: python -m pip install --upgrade pip setuptools wheel - name: Install Requirements - run: pip install "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-4.5.1-py3-none-any.whl" mutagen pycryptodomex websockets + run: | + python -m pip install --upgrade pip setuptools wheel + pip install "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-4.5.1-py3-none-any.whl" mutagen pycryptodomex websockets - name: Bump version id: bump_version run: python devscripts/update-version.py @@ -247,7 +311,7 @@ jobs: finish: runs-on: ubuntu-latest - needs: [build_unix, build_windows, build_windows32] + needs: [build_unix, build_windows, build_windows32, build_macos] steps: - name: Make SHA2-256SUMS file @@ -255,14 +319,18 @@ jobs: SHA256_WIN: ${{ needs.build_windows.outputs.sha256_win }} SHA256_WIN_ZIP: ${{ needs.build_windows.outputs.sha256_win_zip }} SHA256_WIN32: ${{ needs.build_windows32.outputs.sha256_win32 }} + SHA256_MACOS: ${{ needs.build_macos.outputs.sha256_macos }} + SHA256_MACOS_ZIP: ${{ needs.build_macos.outputs.sha256_macos_zip }} SHA256_BIN: ${{ needs.build_unix.outputs.sha256_bin }} SHA256_TAR: ${{ needs.build_unix.outputs.sha256_tar }} run: | echo "${{ env.SHA256_WIN }} yt-dlp.exe" >> SHA2-256SUMS echo "${{ env.SHA256_WIN32 }} yt-dlp_x86.exe" >> SHA2-256SUMS + echo "${{ env.SHA256_MACOS }} yt-dlp_macos" >> SHA2-256SUMS + echo "${{ env.SHA256_MACOS_ZIP }} yt-dlp_macos.zip" >> SHA2-256SUMS echo "${{ env.SHA256_BIN }} yt-dlp" >> SHA2-256SUMS echo "${{ env.SHA256_TAR }} yt-dlp.tar.gz" >> SHA2-256SUMS - echo "${{ env.SHA256_WIN_ZIP }} yt-dlp.zip" >> SHA2-256SUMS + echo "${{ env.SHA256_WIN_ZIP }} yt-dlp_win.zip" >> SHA2-256SUMS - name: Upload 256SUMS file id: upload-sums uses: actions/upload-release-asset@v1 @@ -278,14 +346,18 @@ jobs: SHA512_WIN: ${{ needs.build_windows.outputs.sha512_win }} SHA512_WIN_ZIP: ${{ needs.build_windows.outputs.sha512_win_zip }} SHA512_WIN32: ${{ needs.build_windows32.outputs.sha512_win32 }} + SHA512_MACOS: ${{ needs.build_macos.outputs.sha512_macos }} + SHA512_MACOS_ZIP: ${{ needs.build_macos.outputs.sha512_macos_zip }} SHA512_BIN: ${{ needs.build_unix.outputs.sha512_bin }} SHA512_TAR: ${{ needs.build_unix.outputs.sha512_tar }} run: | echo "${{ env.SHA512_WIN }} yt-dlp.exe" >> SHA2-512SUMS echo "${{ env.SHA512_WIN32 }} yt-dlp_x86.exe" >> SHA2-512SUMS + echo "${{ env.SHA512_MACOS }} yt-dlp_macos" >> SHA2-512SUMS + echo "${{ env.SHA512_MACOS_ZIP }} yt-dlp_macos.zip" >> SHA2-512SUMS echo "${{ env.SHA512_BIN }} yt-dlp" >> SHA2-512SUMS echo "${{ env.SHA512_TAR }} yt-dlp.tar.gz" >> SHA2-512SUMS - echo "${{ env.SHA512_WIN_ZIP }} yt-dlp.zip" >> SHA2-512SUMS + echo "${{ env.SHA512_WIN_ZIP }} yt-dlp_win.zip" >> SHA2-512SUMS - name: Upload 512SUMS file id: upload-512sums uses: actions/upload-release-asset@v1 diff --git a/README.md b/README.md index d410d04d18..edd7d298af 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t * [Differences in default behavior](#differences-in-default-behavior) * [INSTALLATION](#installation) * [Update](#update) + * [Release Files](#release-files) * [Dependencies](#dependencies) * [Compile](#compile) * [USAGE AND OPTIONS](#usage-and-options) @@ -190,6 +191,20 @@ You can use `yt-dlp -U` to update if you are using the provided release. If you are using `pip`, simply re-run the same command that was used to install the program. If you have installed using Homebrew, run `brew upgrade yt-dlp/taps/yt-dlp` +### RELEASE FILES + +File|Description +:---|:--- +[yt-dlp](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)|Platform independant binary. Needs Python (Recommended for UNIX like OSes) +[yt-dlp.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)|Windows standalone x64 binary (Recommended for Windows) +[yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows standalone x86 (32bit) binary +[yt-dlp_win.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_win.zip)|Unpackaged windows executable +[yt-dlp_macos](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos)|MacOS standalone executable +[yt-dlp_macos.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos.zip)|Unpackaged MacOS executable +[yt-dlp.tar.gz](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz)|Source tarball. Also contains manpages, completions, etc +[SHA2-512SUMS](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-512SUMS)|GNU-style SHA512 sums +[SHA2-256SUMS](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-256SUMS)|GNU-style SHA256 sums + ### DEPENDENCIES Python versions 3.6+ (CPython and PyPy) are supported. Other versions and implementations may or may not work correctly. diff --git a/pyinst.py b/pyinst.py index ed410e0f2e..5aa83f9dab 100644 --- a/pyinst.py +++ b/pyinst.py @@ -6,16 +6,24 @@ import sys import platform from PyInstaller.utils.hooks import collect_submodules -from PyInstaller.utils.win32.versioninfo import ( - VarStruct, VarFileInfo, StringStruct, StringTable, - StringFileInfo, FixedFileInfo, VSVersionInfo, SetVersion, -) + +if platform.system() == 'Windows': + from PyInstaller.utils.win32.versioninfo import ( + VarStruct, VarFileInfo, StringStruct, StringTable, + StringFileInfo, FixedFileInfo, VSVersionInfo, SetVersion, + ) import PyInstaller.__main__ +suffix = '' arch = platform.architecture()[0][:2] assert arch in ('32', '64') _x86 = '_x86' if arch == '32' else '' +if platform.system() == 'Windows': + suffix = _x86 +if platform.system() == 'Darwin': + suffix = '_macos' + # Compatability with older arguments opts = sys.argv[1:] if opts[0:1] in (['32'], ['64']): @@ -37,39 +45,40 @@ VERSION_LIST = list(map(int, VERSION_LIST)) + [0] * (4 - len(VERSION_LIST)) print('Version: %s%s' % (VERSION, _x86)) print('Remember to update the version using devscipts\\update-version.py') -VERSION_FILE = VSVersionInfo( - ffi=FixedFileInfo( - filevers=VERSION_LIST, - prodvers=VERSION_LIST, - mask=0x3F, - flags=0x0, - OS=0x4, - fileType=0x1, - subtype=0x0, - date=(0, 0), - ), - kids=[ - StringFileInfo([ - StringTable( - '040904B0', [ - StringStruct('Comments', 'yt-dlp%s Command Line Interface.' % _x86), - StringStruct('CompanyName', 'https://github.com/yt-dlp'), - StringStruct('FileDescription', FILE_DESCRIPTION), - StringStruct('FileVersion', VERSION), - StringStruct('InternalName', 'yt-dlp%s' % _x86), - StringStruct( - 'LegalCopyright', - 'pukkandan.ytdlp@gmail.com | UNLICENSE', - ), - StringStruct('OriginalFilename', 'yt-dlp%s.exe' % _x86), - StringStruct('ProductName', 'yt-dlp%s' % _x86), - StringStruct( - 'ProductVersion', - '%s%s on Python %s' % (VERSION, _x86, platform.python_version())), - ])]), - VarFileInfo([VarStruct('Translation', [0, 1200])]) - ] -) +if platform.system() == 'Windows': + VERSION_FILE = VSVersionInfo( + ffi=FixedFileInfo( + filevers=VERSION_LIST, + prodvers=VERSION_LIST, + mask=0x3F, + flags=0x0, + OS=0x4, + fileType=0x1, + subtype=0x0, + date=(0, 0), + ), + kids=[ + StringFileInfo([ + StringTable( + '040904B0', [ + StringStruct('Comments', 'yt-dlp%s Command Line Interface.' % _x86), + StringStruct('CompanyName', 'https://github.com/yt-dlp'), + StringStruct('FileDescription', FILE_DESCRIPTION), + StringStruct('FileVersion', VERSION), + StringStruct('InternalName', 'yt-dlp%s' % _x86), + StringStruct( + 'LegalCopyright', + 'pukkandan.ytdlp@gmail.com | UNLICENSE', + ), + StringStruct('OriginalFilename', 'yt-dlp%s.exe' % _x86), + StringStruct('ProductName', 'yt-dlp%s' % _x86), + StringStruct( + 'ProductVersion', + '%s%s on Python %s' % (VERSION, _x86, platform.python_version())), + ])]), + VarFileInfo([VarStruct('Translation', [0, 1200])]) + ] + ) def pycryptodome_module(): @@ -90,7 +99,7 @@ dependancies = [pycryptodome_module(), 'mutagen'] + collect_submodules('websocke excluded_modules = ['test', 'ytdlp_plugins', 'youtube-dl', 'youtube-dlc'] PyInstaller.__main__.run([ - '--name=yt-dlp%s' % _x86, + '--name=yt-dlp%s' % suffix, '--icon=devscripts/logo.ico', *[f'--exclude-module={module}' for module in excluded_modules], *[f'--hidden-import={module}' for module in dependancies], @@ -99,4 +108,6 @@ PyInstaller.__main__.run([ *opts, 'yt_dlp/__main__.py', ]) -SetVersion('dist/%syt-dlp%s.exe' % ('yt-dlp/' if '--onedir' in opts else '', _x86), VERSION_FILE) + +if platform.system() == 'Windows': + SetVersion('dist/%syt-dlp%s.exe' % ('yt-dlp/' if '--onedir' in opts else '', _x86), VERSION_FILE) diff --git a/yt_dlp/update.py b/yt_dlp/update.py index e4b1280be6..127b2cbc84 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -33,10 +33,11 @@ def rsa_verify(message, signature, key): def detect_variant(): if hasattr(sys, 'frozen'): + prefix = 'mac' if sys.platform == 'darwin' else 'win' if getattr(sys, '_MEIPASS', None): if sys._MEIPASS == os.path.dirname(sys.executable): - return 'dir' - return 'exe' + return f'{prefix}_dir' + return f'{prefix}_exe' return 'py2exe' elif isinstance(globals().get('__loader__'), zipimporter): return 'zip' @@ -46,9 +47,11 @@ def detect_variant(): _NON_UPDATEABLE_REASONS = { - 'exe': None, + 'win_exe': None, 'zip': None, - 'dir': 'Auto-update is not supported for unpackaged windows executable; Re-download the latest release', + 'mac_exe': None, + 'win_dir': 'Auto-update is not supported for unpackaged windows executable; Re-download the latest release', + 'mac_dir': 'Auto-update is not supported for unpackaged MacOS executable; Re-download the latest release', 'py2exe': 'There is no official release for py2exe executable; Build it again with the latest source code', 'source': 'You cannot update when running from source code; Use git to pull the latest changes', 'unknown': 'It looks like you installed yt-dlp with a package manager, pip, setup.py or a tarball; Use that to update', @@ -119,6 +122,7 @@ def run_update(ydl): 'zip_3': '', 'exe_64': '.exe', 'exe_32': '_x86.exe', + 'mac_64': '_macos', } def get_bin_info(bin_or_exe, version): @@ -139,7 +143,8 @@ def run_update(ydl): return report_permission_error(filename) # PyInstaller - if hasattr(sys, 'frozen'): + variant = detect_variant() + if variant == 'win_exe': exe = filename directory = os.path.dirname(exe) if not os.access(directory, os.W_OK): @@ -161,13 +166,11 @@ def run_update(ydl): except (IOError, OSError): return report_network_error('download latest version') - if not os.access(exe + '.new', os.W_OK): - return report_permission_error(f'{exe}.new') try: with open(exe + '.new', 'wb') as outf: outf.write(newcontent) except (IOError, OSError): - return report_unable('write the new version') + return report_permission_error(f'{exe}.new') expected_sum = get_sha256sum('exe', arch) if not expected_sum: @@ -199,10 +202,10 @@ def run_update(ydl): except OSError: report_unable('delete the old version') - # Zip unix package - elif isinstance(globals().get('__loader__'), zipimporter): + elif variant in ('zip', 'mac_exe'): + pack_type = ('mac', '64') if variant == 'mac_exe' else ('zip', '3') try: - url = get_bin_info('zip', '3').get('browser_download_url') + url = get_bin_info(*pack_type).get('browser_download_url') if not url: return report_network_error('fetch updates') urlh = ydl._opener.open(url) @@ -211,11 +214,11 @@ def run_update(ydl): except (IOError, OSError): return report_network_error('download the latest version') - expected_sum = get_sha256sum('zip', '3') + expected_sum = get_sha256sum(*pack_type) if not expected_sum: ydl.report_warning('no hash information found for the release') elif hashlib.sha256(newcontent).hexdigest() != expected_sum: - return report_network_error('verify the new zip') + return report_network_error('verify the new package') try: with open(filename, 'wb') as outf: @@ -223,7 +226,10 @@ def run_update(ydl): except (IOError, OSError): return report_unable('overwrite current version') - ydl.to_screen('Updated yt-dlp to version %s; Restart yt-dlp to use the new version' % version_id) + ydl.to_screen('Updated yt-dlp to version %s; Restart yt-dlp to use the new version' % version_id) + return + + assert False, f'Unhandled variant: {variant}' ''' # UNUSED From 6e21fdd27902efa6ad7fb12b570e4b2dd0bfde8d Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Thu, 21 Oct 2021 18:24:05 +0530 Subject: [PATCH 296/641] [build] Enable lazy-extractors in releases Set the environment variable `YTDLP_NO_LAZY_EXTRACTORS` to forcefully disable lazy extractor loading --- .github/workflows/build.yml | 13 +++++++++++++ Makefile | 4 ++-- yt_dlp/YoutubeDL.py | 7 +++++-- yt_dlp/extractor/__init__.py | 21 +++++++++++---------- 4 files changed, 31 insertions(+), 14 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 2963805961..9bcdc4f94c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -51,6 +51,10 @@ jobs: echo "changelog<<EOF" >> $GITHUB_ENV echo "$changelog" >> $GITHUB_ENV echo "EOF" >> $GITHUB_ENV + + - name: Build lazy extractors + id: lazy_extractors + run: python devscripts/make_lazy_extractors.py yt_dlp/extractor/lazy_extractors.py - name: Run Make run: make all tar - name: Get SHA2-256SUMS for yt-dlp @@ -155,6 +159,9 @@ jobs: run: python devscripts/update-version.py - name: Print version run: echo "${{ steps.bump_version.outputs.ytdlp_version }}" + - name: Build lazy extractors + id: lazy_extractors + run: /usr/bin/python3 devscripts/make_lazy_extractors.py yt_dlp/extractor/lazy_extractors.py - name: Run PyInstaller Script run: /usr/bin/python3 ./pyinst.py --target-architecture universal2 --onefile - name: Upload yt-dlp MacOS binary @@ -224,6 +231,9 @@ jobs: run: python devscripts/update-version.py - name: Print version run: echo "${{ steps.bump_version.outputs.ytdlp_version }}" + - name: Build lazy extractors + id: lazy_extractors + run: python devscripts/make_lazy_extractors.py yt_dlp/extractor/lazy_extractors.py - name: Run PyInstaller Script run: python pyinst.py - name: Upload yt-dlp.exe Windows binary @@ -290,6 +300,9 @@ jobs: run: python devscripts/update-version.py - name: Print version run: echo "${{ steps.bump_version.outputs.ytdlp_version }}" + - name: Build lazy extractors + id: lazy_extractors + run: python devscripts/make_lazy_extractors.py yt_dlp/extractor/lazy_extractors.py - name: Run PyInstaller Script for 32 Bit run: python pyinst.py - name: Upload Executable yt-dlp_x86.exe diff --git a/Makefile b/Makefile index e7b854a9d3..ee199e4486 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -all: yt-dlp doc pypi-files +all: lazy-extractors yt-dlp doc pypi-files clean: clean-test clean-dist clean-cache completions: completion-bash completion-fish completion-zsh doc: README.md CONTRIBUTING.md issuetemplates supportedsites @@ -40,7 +40,7 @@ SYSCONFDIR = $(shell if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then ech # set markdown input format to "markdown-smart" for pandoc version 2 and to "markdown" for pandoc prior to version 2 MARKDOWN = $(shell if [ `pandoc -v | head -n1 | cut -d" " -f2 | head -c1` = "2" ]; then echo markdown-smart; else echo markdown; fi) -install: yt-dlp yt-dlp.1 completions +install: lazy_extractors yt-dlp yt-dlp.1 completions install -Dm755 yt-dlp $(DESTDIR)$(BINDIR) install -Dm644 yt-dlp.1 $(DESTDIR)$(MANDIR)/man1 install -Dm644 completions/bash/yt-dlp $(DESTDIR)$(SHAREDIR)/bash-completion/completions/yt-dlp diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 79f0b274d2..f95bbea81f 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -3268,8 +3268,11 @@ class YoutubeDL(object): source = detect_variant() write_debug('yt-dlp version %s%s\n' % (__version__, '' if source == 'unknown' else f' ({source})')) - if _LAZY_LOADER: - write_debug('Lazy loading extractors enabled\n') + if not _LAZY_LOADER: + if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'): + write_debug('Lazy loading extractors is forcibly disabled\n') + else: + write_debug('Lazy loading extractors is disabled\n') if plugin_extractors or plugin_postprocessors: write_debug('Plugins: %s\n' % [ '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}') diff --git a/yt_dlp/extractor/__init__.py b/yt_dlp/extractor/__init__.py index 198c4ae17f..b35484246a 100644 --- a/yt_dlp/extractor/__init__.py +++ b/yt_dlp/extractor/__init__.py @@ -1,14 +1,15 @@ -from __future__ import unicode_literals +import os from ..utils import load_plugins -try: - from .lazy_extractors import * - from .lazy_extractors import _ALL_CLASSES - _LAZY_LOADER = True - _PLUGIN_CLASSES = {} -except ImportError: - _LAZY_LOADER = False +_LAZY_LOADER = False +if not os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'): + try: + from .lazy_extractors import * + from .lazy_extractors import _ALL_CLASSES + _LAZY_LOADER = True + except ImportError: + pass if not _LAZY_LOADER: from .extractors import * @@ -19,8 +20,8 @@ if not _LAZY_LOADER: ] _ALL_CLASSES.append(GenericIE) - _PLUGIN_CLASSES = load_plugins('extractor', 'IE', globals()) - _ALL_CLASSES = list(_PLUGIN_CLASSES.values()) + _ALL_CLASSES +_PLUGIN_CLASSES = load_plugins('extractor', 'IE', globals()) +_ALL_CLASSES = list(_PLUGIN_CLASSES.values()) + _ALL_CLASSES def gen_extractor_classes(): From 386cdfdb5b9ff90c7e7b716e9db6ccdd776feb77 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Thu, 21 Oct 2021 18:26:56 +0530 Subject: [PATCH 297/641] [build] Release windows exe built with py2exe Closes: #855 Related: #661, #705, #890, #1024, #1160 --- .github/workflows/build.yml | 27 ++++++++++++++++++++++++++- README.md | 1 + setup.py | 2 +- yt_dlp/update.py | 22 +++++++++++----------- 4 files changed, 39 insertions(+), 13 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9bcdc4f94c..b2da4063b9 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -211,6 +211,8 @@ jobs: outputs: sha256_win: ${{ steps.sha256_win.outputs.sha256_win }} sha512_win: ${{ steps.sha512_win.outputs.sha512_win }} + sha256_py2exe: ${{ steps.sha256_py2exe.outputs.sha256_py2exe }} + sha512_py2exe: ${{ steps.sha512_py2exe.outputs.sha512_py2exe }} sha256_win_zip: ${{ steps.sha256_win_zip.outputs.sha256_win_zip }} sha512_win_zip: ${{ steps.sha512_win_zip.outputs.sha512_win_zip }} @@ -224,7 +226,7 @@ jobs: - name: Install Requirements # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds run: | - python -m pip install --upgrade pip setuptools wheel + python -m pip install --upgrade pip setuptools wheel py2exe pip install "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-4.5.1-py3-none-any.whl" mutagen pycryptodomex websockets - name: Bump version id: bump_version @@ -275,6 +277,25 @@ jobs: id: sha512_win_zip run: echo "::set-output name=sha512_win_zip::$((Get-FileHash dist\yt-dlp_win.zip -Algorithm SHA512).Hash.ToLower())" + - name: Run py2exe Script + run: python setup.py py2exe + - name: Upload yt-dlp_min.exe Windows binary + id: upload-release-windows-py2exe + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ needs.build_unix.outputs.upload_url }} + asset_path: ./dist/yt-dlp.exe + asset_name: yt-dlp_min.exe + asset_content_type: application/vnd.microsoft.portable-executable + - name: Get SHA2-256SUMS for yt-dlp_min.exe + id: sha256_py2exe + run: echo "::set-output name=sha256_py2exe::$((Get-FileHash dist\yt-dlp.exe -Algorithm SHA256).Hash.ToLower())" + - name: Get SHA2-512SUMS for yt-dlp_min.exe + id: sha512_py2exe + run: echo "::set-output name=sha512_py2exe::$((Get-FileHash dist\yt-dlp.exe -Algorithm SHA512).Hash.ToLower())" + build_windows32: runs-on: windows-latest needs: build_unix @@ -330,6 +351,7 @@ jobs: - name: Make SHA2-256SUMS file env: SHA256_WIN: ${{ needs.build_windows.outputs.sha256_win }} + SHA256_PY2EXE: ${{ needs.build_windows.outputs.sha256_py2exe }} SHA256_WIN_ZIP: ${{ needs.build_windows.outputs.sha256_win_zip }} SHA256_WIN32: ${{ needs.build_windows32.outputs.sha256_win32 }} SHA256_MACOS: ${{ needs.build_macos.outputs.sha256_macos }} @@ -338,6 +360,7 @@ jobs: SHA256_TAR: ${{ needs.build_unix.outputs.sha256_tar }} run: | echo "${{ env.SHA256_WIN }} yt-dlp.exe" >> SHA2-256SUMS + echo "${{ env.SHA256_PY2EXE }} yt-dlp_min.exe" >> SHA2-256SUMS echo "${{ env.SHA256_WIN32 }} yt-dlp_x86.exe" >> SHA2-256SUMS echo "${{ env.SHA256_MACOS }} yt-dlp_macos" >> SHA2-256SUMS echo "${{ env.SHA256_MACOS_ZIP }} yt-dlp_macos.zip" >> SHA2-256SUMS @@ -357,6 +380,7 @@ jobs: - name: Make SHA2-512SUMS file env: SHA512_WIN: ${{ needs.build_windows.outputs.sha512_win }} + SHA512_PY2EXE: ${{ needs.build_windows.outputs.sha512_py2exe }} SHA512_WIN_ZIP: ${{ needs.build_windows.outputs.sha512_win_zip }} SHA512_WIN32: ${{ needs.build_windows32.outputs.sha512_win32 }} SHA512_MACOS: ${{ needs.build_macos.outputs.sha512_macos }} @@ -365,6 +389,7 @@ jobs: SHA512_TAR: ${{ needs.build_unix.outputs.sha512_tar }} run: | echo "${{ env.SHA512_WIN }} yt-dlp.exe" >> SHA2-512SUMS + echo "${{ env.SHA512_PY2EXE }} yt-dlp_min.exe" >> SHA2-512SUMS echo "${{ env.SHA512_WIN32 }} yt-dlp_x86.exe" >> SHA2-512SUMS echo "${{ env.SHA512_MACOS }} yt-dlp_macos" >> SHA2-512SUMS echo "${{ env.SHA512_MACOS_ZIP }} yt-dlp_macos.zip" >> SHA2-512SUMS diff --git a/README.md b/README.md index edd7d298af..25dd290020 100644 --- a/README.md +++ b/README.md @@ -201,6 +201,7 @@ File|Description [yt-dlp_win.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_win.zip)|Unpackaged windows executable [yt-dlp_macos](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos)|MacOS standalone executable [yt-dlp_macos.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos.zip)|Unpackaged MacOS executable +[yt-dlp_min.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows standalone x64 binary built with `py2exe`. Does not contain `pycryptodomex`, needs VC++14 [yt-dlp.tar.gz](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz)|Source tarball. Also contains manpages, completions, etc [SHA2-512SUMS](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-512SUMS)|GNU-style SHA512 sums [SHA2-256SUMS](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-256SUMS)|GNU-style SHA256 sums diff --git a/setup.py b/setup.py index fbd2be0aeb..e1c585be4a 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ REQUIREMENTS = ['mutagen', 'pycryptodomex', 'websockets'] if sys.argv[1:2] == ['py2exe']: import py2exe warnings.warn( - 'Building with py2exe is not officially supported. ' + 'py2exe builds do not support pycryptodomex and needs VC++14 to run. ' 'The recommended way is to use "pyinst.py" to build using pyinstaller') params = { 'console': [{ diff --git a/yt_dlp/update.py b/yt_dlp/update.py index 127b2cbc84..e880cbd8dc 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -50,9 +50,9 @@ _NON_UPDATEABLE_REASONS = { 'win_exe': None, 'zip': None, 'mac_exe': None, + 'py2exe': None, 'win_dir': 'Auto-update is not supported for unpackaged windows executable; Re-download the latest release', 'mac_dir': 'Auto-update is not supported for unpackaged MacOS executable; Re-download the latest release', - 'py2exe': 'There is no official release for py2exe executable; Build it again with the latest source code', 'source': 'You cannot update when running from source code; Use git to pull the latest changes', 'unknown': 'It looks like you installed yt-dlp with a package manager, pip, setup.py or a tarball; Use that to update', } @@ -120,9 +120,10 @@ def run_update(ydl): version_labels = { 'zip_3': '', - 'exe_64': '.exe', - 'exe_32': '_x86.exe', - 'mac_64': '_macos', + 'win_exe_64': '.exe', + 'py2exe_64': '_min.exe', + 'win_exe_32': '_x86.exe', + 'mac_exe_64': '_macos', } def get_bin_info(bin_or_exe, version): @@ -144,9 +145,8 @@ def run_update(ydl): # PyInstaller variant = detect_variant() - if variant == 'win_exe': - exe = filename - directory = os.path.dirname(exe) + if variant in ('win_exe', 'py2exe'): + directory = os.path.dirname(filename) if not os.access(directory, os.W_OK): return report_permission_error(directory) try: @@ -157,7 +157,7 @@ def run_update(ydl): try: arch = platform.architecture()[0][:2] - url = get_bin_info('exe', arch).get('browser_download_url') + url = get_bin_info(variant, arch).get('browser_download_url') if not url: return report_network_error('fetch updates') urlh = ydl._opener.open(url) @@ -203,9 +203,9 @@ def run_update(ydl): report_unable('delete the old version') elif variant in ('zip', 'mac_exe'): - pack_type = ('mac', '64') if variant == 'mac_exe' else ('zip', '3') + pack_type = '3' if variant == 'zip' else '64' try: - url = get_bin_info(*pack_type).get('browser_download_url') + url = get_bin_info(variant, pack_type).get('browser_download_url') if not url: return report_network_error('fetch updates') urlh = ydl._opener.open(url) @@ -214,7 +214,7 @@ def run_update(ydl): except (IOError, OSError): return report_network_error('download the latest version') - expected_sum = get_sha256sum(*pack_type) + expected_sum = get_sha256sum(variant, pack_type) if not expected_sum: ydl.report_warning('no hash information found for the release') elif hashlib.sha256(newcontent).hexdigest() != expected_sum: From 733d8e8f9935534742408318274912704c5fae09 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Thu, 21 Oct 2021 18:27:58 +0530 Subject: [PATCH 298/641] [build] Refactor `pyinst.py` and misc cleanup Closes #1361 --- .github/workflows/build.yml | 45 +++++---- Makefile | 6 +- README.md | 38 +++++--- pyinst.py | 177 ++++++++++++++++++++---------------- yt_dlp/update.py | 18 ++-- 5 files changed, 156 insertions(+), 128 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b2da4063b9..3082884aa0 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -8,7 +8,6 @@ on: jobs: build_unix: runs-on: ubuntu-latest - outputs: ytdlp_version: ${{ steps.bump_version.outputs.ytdlp_version }} upload_url: ${{ steps.create_release.outputs.upload_url }} @@ -69,6 +68,7 @@ jobs: - name: Get SHA2-512SUMS for yt-dlp.tar.gz id: sha512_tar run: echo "::set-output name=sha512_tar::$(sha512sum yt-dlp.tar.gz | awk '{print $1}')" + - name: Install dependencies for pypi env: PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} @@ -85,6 +85,7 @@ jobs: rm -rf dist/* python setup.py sdist bdist_wheel twine upload dist/* + - name: Install SSH private key env: BREW_TOKEN: ${{ secrets.BREW_TOKEN }} @@ -103,6 +104,7 @@ jobs: git -C taps/ config user.email github-actions@example.com git -C taps/ commit -am 'yt-dlp: ${{ steps.bump_version.outputs.ytdlp_version }}' git -C taps/ push + - name: Create Release id: create_release uses: actions/create-release@v1 @@ -113,7 +115,9 @@ jobs: release_name: yt-dlp ${{ steps.bump_version.outputs.ytdlp_version }} commitish: ${{ steps.push_update.outputs.head_sha }} body: | - Changelog: + See [this](https://github.com/yt-dlp/yt-dlp#release-files) for a description of the files + + #### Changelog: ${{ env.changelog }} draft: false prerelease: false @@ -140,7 +144,6 @@ jobs: build_macos: runs-on: macos-11 needs: build_unix - outputs: sha256_macos: ${{ steps.sha256_macos.outputs.sha256_macos }} sha512_macos: ${{ steps.sha512_macos.outputs.sha512_macos }} @@ -153,17 +156,15 @@ jobs: - name: Install Requirements run: | brew install coreutils - /usr/bin/pip3 install --user Pyinstaller mutagen pycryptodomex websockets + /usr/bin/python3 -m pip install -U --user pip Pyinstaller mutagen pycryptodomex websockets - name: Bump version id: bump_version - run: python devscripts/update-version.py - - name: Print version - run: echo "${{ steps.bump_version.outputs.ytdlp_version }}" + run: /usr/bin/python3 devscripts/update-version.py - name: Build lazy extractors id: lazy_extractors run: /usr/bin/python3 devscripts/make_lazy_extractors.py yt_dlp/extractor/lazy_extractors.py - name: Run PyInstaller Script - run: /usr/bin/python3 ./pyinst.py --target-architecture universal2 --onefile + run: /usr/bin/python3 pyinst.py --target-architecture universal2 --onefile - name: Upload yt-dlp MacOS binary id: upload-release-macos uses: actions/upload-release-asset@v1 @@ -182,7 +183,7 @@ jobs: run: echo "::set-output name=sha512_macos::$(sha512sum dist/yt-dlp_macos | awk '{print $1}')" - name: Run PyInstaller Script with --onedir - run: /usr/bin/python3 ./pyinst.py --target-architecture universal2 --onedir + run: /usr/bin/python3 pyinst.py --target-architecture universal2 --onedir - uses: papeloto/action-zip@v1 with: files: ./dist/yt-dlp_macos @@ -207,7 +208,6 @@ jobs: build_windows: runs-on: windows-latest needs: build_unix - outputs: sha256_win: ${{ steps.sha256_win.outputs.sha256_win }} sha512_win: ${{ steps.sha512_win.outputs.sha512_win }} @@ -231,8 +231,6 @@ jobs: - name: Bump version id: bump_version run: python devscripts/update-version.py - - name: Print version - run: echo "${{ steps.bump_version.outputs.ytdlp_version }}" - name: Build lazy extractors id: lazy_extractors run: python devscripts/make_lazy_extractors.py yt_dlp/extractor/lazy_extractors.py @@ -254,6 +252,7 @@ jobs: - name: Get SHA2-512SUMS for yt-dlp.exe id: sha512_win run: echo "::set-output name=sha512_win::$((Get-FileHash dist\yt-dlp.exe -Algorithm SHA512).Hash.ToLower())" + - name: Run PyInstaller Script with --onedir run: python pyinst.py --onedir - uses: papeloto/action-zip@v1 @@ -319,8 +318,6 @@ jobs: - name: Bump version id: bump_version run: python devscripts/update-version.py - - name: Print version - run: echo "${{ steps.bump_version.outputs.ytdlp_version }}" - name: Build lazy extractors id: lazy_extractors run: python devscripts/make_lazy_extractors.py yt_dlp/extractor/lazy_extractors.py @@ -350,23 +347,23 @@ jobs: steps: - name: Make SHA2-256SUMS file env: + SHA256_BIN: ${{ needs.build_unix.outputs.sha256_bin }} + SHA256_TAR: ${{ needs.build_unix.outputs.sha256_tar }} SHA256_WIN: ${{ needs.build_windows.outputs.sha256_win }} SHA256_PY2EXE: ${{ needs.build_windows.outputs.sha256_py2exe }} SHA256_WIN_ZIP: ${{ needs.build_windows.outputs.sha256_win_zip }} SHA256_WIN32: ${{ needs.build_windows32.outputs.sha256_win32 }} SHA256_MACOS: ${{ needs.build_macos.outputs.sha256_macos }} SHA256_MACOS_ZIP: ${{ needs.build_macos.outputs.sha256_macos_zip }} - SHA256_BIN: ${{ needs.build_unix.outputs.sha256_bin }} - SHA256_TAR: ${{ needs.build_unix.outputs.sha256_tar }} run: | + echo "${{ env.SHA256_BIN }} yt-dlp" >> SHA2-256SUMS + echo "${{ env.SHA256_TAR }} yt-dlp.tar.gz" >> SHA2-256SUMS echo "${{ env.SHA256_WIN }} yt-dlp.exe" >> SHA2-256SUMS echo "${{ env.SHA256_PY2EXE }} yt-dlp_min.exe" >> SHA2-256SUMS echo "${{ env.SHA256_WIN32 }} yt-dlp_x86.exe" >> SHA2-256SUMS + echo "${{ env.SHA256_WIN_ZIP }} yt-dlp_win.zip" >> SHA2-256SUMS echo "${{ env.SHA256_MACOS }} yt-dlp_macos" >> SHA2-256SUMS echo "${{ env.SHA256_MACOS_ZIP }} yt-dlp_macos.zip" >> SHA2-256SUMS - echo "${{ env.SHA256_BIN }} yt-dlp" >> SHA2-256SUMS - echo "${{ env.SHA256_TAR }} yt-dlp.tar.gz" >> SHA2-256SUMS - echo "${{ env.SHA256_WIN_ZIP }} yt-dlp_win.zip" >> SHA2-256SUMS - name: Upload 256SUMS file id: upload-sums uses: actions/upload-release-asset@v1 @@ -379,23 +376,23 @@ jobs: asset_content_type: text/plain - name: Make SHA2-512SUMS file env: + SHA512_BIN: ${{ needs.build_unix.outputs.sha512_bin }} + SHA512_TAR: ${{ needs.build_unix.outputs.sha512_tar }} SHA512_WIN: ${{ needs.build_windows.outputs.sha512_win }} SHA512_PY2EXE: ${{ needs.build_windows.outputs.sha512_py2exe }} SHA512_WIN_ZIP: ${{ needs.build_windows.outputs.sha512_win_zip }} SHA512_WIN32: ${{ needs.build_windows32.outputs.sha512_win32 }} SHA512_MACOS: ${{ needs.build_macos.outputs.sha512_macos }} SHA512_MACOS_ZIP: ${{ needs.build_macos.outputs.sha512_macos_zip }} - SHA512_BIN: ${{ needs.build_unix.outputs.sha512_bin }} - SHA512_TAR: ${{ needs.build_unix.outputs.sha512_tar }} run: | + echo "${{ env.SHA512_BIN }} yt-dlp" >> SHA2-512SUMS + echo "${{ env.SHA512_TAR }} yt-dlp.tar.gz" >> SHA2-512SUMS echo "${{ env.SHA512_WIN }} yt-dlp.exe" >> SHA2-512SUMS + echo "${{ env.SHA512_WIN_ZIP }} yt-dlp_win.zip" >> SHA2-512SUMS echo "${{ env.SHA512_PY2EXE }} yt-dlp_min.exe" >> SHA2-512SUMS echo "${{ env.SHA512_WIN32 }} yt-dlp_x86.exe" >> SHA2-512SUMS echo "${{ env.SHA512_MACOS }} yt-dlp_macos" >> SHA2-512SUMS echo "${{ env.SHA512_MACOS_ZIP }} yt-dlp_macos.zip" >> SHA2-512SUMS - echo "${{ env.SHA512_BIN }} yt-dlp" >> SHA2-512SUMS - echo "${{ env.SHA512_TAR }} yt-dlp.tar.gz" >> SHA2-512SUMS - echo "${{ env.SHA512_WIN_ZIP }} yt-dlp_win.zip" >> SHA2-512SUMS - name: Upload 512SUMS file id: upload-512sums uses: actions/upload-release-asset@v1 diff --git a/Makefile b/Makefile index ee199e4486..10d6ab8563 100644 --- a/Makefile +++ b/Makefile @@ -40,9 +40,9 @@ SYSCONFDIR = $(shell if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then ech # set markdown input format to "markdown-smart" for pandoc version 2 and to "markdown" for pandoc prior to version 2 MARKDOWN = $(shell if [ `pandoc -v | head -n1 | cut -d" " -f2 | head -c1` = "2" ]; then echo markdown-smart; else echo markdown; fi) -install: lazy_extractors yt-dlp yt-dlp.1 completions - install -Dm755 yt-dlp $(DESTDIR)$(BINDIR) - install -Dm644 yt-dlp.1 $(DESTDIR)$(MANDIR)/man1 +install: lazy-extractors yt-dlp yt-dlp.1 completions + install -Dm755 yt-dlp $(DESTDIR)$(BINDIR)/yt-dlp + install -Dm644 yt-dlp.1 $(DESTDIR)$(MANDIR)/man1/yt-dlp.1 install -Dm644 completions/bash/yt-dlp $(DESTDIR)$(SHAREDIR)/bash-completion/completions/yt-dlp install -Dm644 completions/zsh/_yt-dlp $(DESTDIR)$(SHAREDIR)/zsh/site-functions/_yt-dlp install -Dm644 completions/fish/yt-dlp.fish $(DESTDIR)$(SHAREDIR)/fish/vendor_completions.d/yt-dlp.fish diff --git a/README.md b/README.md index 25dd290020..cfdcadd0d5 100644 --- a/README.md +++ b/README.md @@ -155,11 +155,10 @@ For ease of use, a few more compat options are available: yt-dlp is not platform specific. So it should work on your Unix box, on Windows or on macOS You can install yt-dlp using one of the following methods: -* Download the binary from the [latest release](https://github.com/yt-dlp/yt-dlp/releases/latest) +* Download [the binary](#release-files) from the [latest release](https://github.com/yt-dlp/yt-dlp/releases/latest) * With Homebrew, `brew install yt-dlp/taps/yt-dlp` * Use [PyPI package](https://pypi.org/project/yt-dlp): `python3 -m pip install --upgrade yt-dlp` -* Use pip+git: `python3 -m pip install --upgrade git+https://github.com/yt-dlp/yt-dlp.git@release` -* Install master branch: `python3 -m pip install --upgrade git+https://github.com/yt-dlp/yt-dlp` +* Install master branch: `python3 -m pip3 install -U https://github.com/yt-dlp/yt-dlp/archive/master.zip` Note that on some systems, you may need to use `py` or `python` instead of `python3` @@ -193,15 +192,27 @@ If you have installed using Homebrew, run `brew upgrade yt-dlp/taps/yt-dlp` ### RELEASE FILES +#### Recommended + +File|Description +:---|:--- +[yt-dlp](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)|Platform independant binary. Needs Python (Recommended for **UNIX-like systems**) +[yt-dlp.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)|Windows standalone x64 binary (Recommended for **Windows**) + +#### Alternatives + File|Description :---|:--- -[yt-dlp](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)|Platform independant binary. Needs Python (Recommended for UNIX like OSes) -[yt-dlp.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)|Windows standalone x64 binary (Recommended for Windows) -[yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows standalone x86 (32bit) binary -[yt-dlp_win.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_win.zip)|Unpackaged windows executable [yt-dlp_macos](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos)|MacOS standalone executable -[yt-dlp_macos.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos.zip)|Unpackaged MacOS executable -[yt-dlp_min.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows standalone x64 binary built with `py2exe`. Does not contain `pycryptodomex`, needs VC++14 +[yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows standalone x86 (32bit) binary +[yt-dlp_min.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows standalone x64 binary built with `py2exe`.<br/> Does not contain `pycryptodomex`, needs VC++14 +[yt-dlp_win.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_win.zip)|Unpackaged windows executable (No auto-update) +[yt-dlp_macos.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos.zip)|Unpackaged MacOS executable (No auto-update) + +#### Misc + +File|Description +:---|:--- [yt-dlp.tar.gz](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz)|Source tarball. Also contains manpages, completions, etc [SHA2-512SUMS](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-512SUMS)|GNU-style SHA512 sums [SHA2-256SUMS](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-256SUMS)|GNU-style SHA256 sums @@ -239,13 +250,10 @@ The windows releases are already built with the python interpreter, mutagen, pyc **For Windows**: To build the Windows executable, you must have pyinstaller (and optionally mutagen, pycryptodomex, websockets) - python3 -m pip install -U -r requirements.txt +Once you have all the necessary dependencies installed, just run `pyinst.py`. The executable will be built for the same architecture (32/64 bit) as the python used to build it. -Once you have all the necessary dependencies installed, just run `py pyinst.py`. The executable will be built for the same architecture (32/64 bit) as the python used to build it. - -You can also build the executable without any version info or metadata by using: - - pyinstaller.exe yt_dlp\__main__.py --onefile --name yt-dlp + py -m pip install -U pyinstaller -r requirements.txt + py pyinst.py Note that pyinstaller [does not support](https://github.com/pyinstaller/pyinstaller#requirements-and-tested-platforms) Python installed from the Windows store without using a virtual environment diff --git a/pyinst.py b/pyinst.py index 5aa83f9dab..0a695289b7 100644 --- a/pyinst.py +++ b/pyinst.py @@ -1,84 +1,85 @@ #!/usr/bin/env python3 # coding: utf-8 - -from __future__ import unicode_literals -import sys +import os import platform - +import sys from PyInstaller.utils.hooks import collect_submodules -if platform.system() == 'Windows': + +OS_NAME = platform.system() +if OS_NAME == 'Windows': from PyInstaller.utils.win32.versioninfo import ( VarStruct, VarFileInfo, StringStruct, StringTable, StringFileInfo, FixedFileInfo, VSVersionInfo, SetVersion, ) -import PyInstaller.__main__ +elif OS_NAME == 'Darwin': + pass +else: + raise Exception('{OS_NAME} is not supported') -suffix = '' -arch = platform.architecture()[0][:2] -assert arch in ('32', '64') -_x86 = '_x86' if arch == '32' else '' +ARCH = platform.architecture()[0][:2] -if platform.system() == 'Windows': - suffix = _x86 -if platform.system() == 'Darwin': - suffix = '_macos' -# Compatability with older arguments -opts = sys.argv[1:] -if opts[0:1] in (['32'], ['64']): - if arch != opts[0]: - raise Exception(f'{opts[0]}bit executable cannot be built on a {arch}bit system') - opts = opts[1:] -opts = opts or ['--onefile'] +def main(): + opts = parse_options() + version = read_version() -print(f'Building {arch}bit version with options {opts}') + suffix = '_x86' if ARCH == '32' else '_macos' if OS_NAME == 'Darwin' else '' + final_file = 'dist/%syt-dlp%s%s' % ( + 'yt-dlp/' if '--onedir' in opts else '', suffix, '.exe' if OS_NAME == 'Windows' else '') -FILE_DESCRIPTION = 'yt-dlp%s' % (' (32 Bit)' if _x86 else '') + print(f'Building yt-dlp v{version} {ARCH}bit for {OS_NAME} with options {opts}') + print('Remember to update the version using "devscripts/update-version.py"') + if not os.path.isfile('yt_dlp/extractor/lazy_extractors.py'): + print('WARNING: Building without lazy_extractors. Run ' + '"devscripts/make_lazy_extractors.py" "yt_dlp/extractor/lazy_extractors.py" ' + 'to build lazy extractors', file=sys.stderr) + print(f'Destination: {final_file}\n') -exec(compile(open('yt_dlp/version.py').read(), 'yt_dlp/version.py', 'exec')) -VERSION = locals()['__version__'] + opts = [ + f'--name=yt-dlp{suffix}', + '--icon=devscripts/logo.ico', + '--upx-exclude=vcruntime140.dll', + '--noconfirm', + *dependancy_options(), + *opts, + 'yt_dlp/__main__.py', + ] + print(f'Running PyInstaller with {opts}') -VERSION_LIST = VERSION.split('.') -VERSION_LIST = list(map(int, VERSION_LIST)) + [0] * (4 - len(VERSION_LIST)) + import PyInstaller.__main__ -print('Version: %s%s' % (VERSION, _x86)) -print('Remember to update the version using devscipts\\update-version.py') + PyInstaller.__main__.run(opts) -if platform.system() == 'Windows': - VERSION_FILE = VSVersionInfo( - ffi=FixedFileInfo( - filevers=VERSION_LIST, - prodvers=VERSION_LIST, - mask=0x3F, - flags=0x0, - OS=0x4, - fileType=0x1, - subtype=0x0, - date=(0, 0), - ), - kids=[ - StringFileInfo([ - StringTable( - '040904B0', [ - StringStruct('Comments', 'yt-dlp%s Command Line Interface.' % _x86), - StringStruct('CompanyName', 'https://github.com/yt-dlp'), - StringStruct('FileDescription', FILE_DESCRIPTION), - StringStruct('FileVersion', VERSION), - StringStruct('InternalName', 'yt-dlp%s' % _x86), - StringStruct( - 'LegalCopyright', - 'pukkandan.ytdlp@gmail.com | UNLICENSE', - ), - StringStruct('OriginalFilename', 'yt-dlp%s.exe' % _x86), - StringStruct('ProductName', 'yt-dlp%s' % _x86), - StringStruct( - 'ProductVersion', - '%s%s on Python %s' % (VERSION, _x86, platform.python_version())), - ])]), - VarFileInfo([VarStruct('Translation', [0, 1200])]) - ] - ) + set_version_info(final_file, version) + + +def parse_options(): + # Compatability with older arguments + opts = sys.argv[1:] + if opts[0:1] in (['32'], ['64']): + if ARCH != opts[0]: + raise Exception(f'{opts[0]}bit executable cannot be built on a {ARCH}bit system') + opts = opts[1:] + return opts or ['--onefile'] + + +def read_version(): + exec(compile(open('yt_dlp/version.py').read(), 'yt_dlp/version.py', 'exec')) + return locals()['__version__'] + + +def version_to_list(version): + version_list = version.split('.') + return list(map(int, version_list)) + [0] * (4 - len(version_list)) + + +def dependancy_options(): + dependancies = [pycryptodome_module(), 'mutagen'] + collect_submodules('websockets') + excluded_modules = ['test', 'ytdlp_plugins', 'youtube-dl', 'youtube-dlc'] + + yield from (f'--hidden-import={module}' for module in dependancies) + yield from (f'--exclude-module={module}' for module in excluded_modules) def pycryptodome_module(): @@ -95,19 +96,41 @@ def pycryptodome_module(): return 'Cryptodome' -dependancies = [pycryptodome_module(), 'mutagen'] + collect_submodules('websockets') -excluded_modules = ['test', 'ytdlp_plugins', 'youtube-dl', 'youtube-dlc'] +def set_version_info(exe, version): + if OS_NAME == 'Windows': + windows_set_version(exe, version) -PyInstaller.__main__.run([ - '--name=yt-dlp%s' % suffix, - '--icon=devscripts/logo.ico', - *[f'--exclude-module={module}' for module in excluded_modules], - *[f'--hidden-import={module}' for module in dependancies], - '--upx-exclude=vcruntime140.dll', - '--noconfirm', - *opts, - 'yt_dlp/__main__.py', -]) -if platform.system() == 'Windows': - SetVersion('dist/%syt-dlp%s.exe' % ('yt-dlp/' if '--onedir' in opts else '', _x86), VERSION_FILE) +def windows_set_version(exe, version): + version_list = version_to_list(version) + suffix = '_x86' if ARCH == '32' else '' + SetVersion(exe, VSVersionInfo( + ffi=FixedFileInfo( + filevers=version_list, + prodvers=version_list, + mask=0x3F, + flags=0x0, + OS=0x4, + fileType=0x1, + subtype=0x0, + date=(0, 0), + ), + kids=[ + StringFileInfo([StringTable('040904B0', [ + StringStruct('Comments', 'yt-dlp%s Command Line Interface.' % suffix), + StringStruct('CompanyName', 'https://github.com/yt-dlp'), + StringStruct('FileDescription', 'yt-dlp%s' % (' (32 Bit)' if ARCH == '32' else '')), + StringStruct('FileVersion', version), + StringStruct('InternalName', f'yt-dlp{suffix}'), + StringStruct('LegalCopyright', 'pukkandan.ytdlp@gmail.com | UNLICENSE'), + StringStruct('OriginalFilename', f'yt-dlp{suffix}.exe'), + StringStruct('ProductName', f'yt-dlp{suffix}'), + StringStruct( + 'ProductVersion', f'{version}{suffix} on Python {platform.python_version()}'), + ])]), VarFileInfo([VarStruct('Translation', [0, 1200])]) + ] + )) + + +if __name__ == '__main__': + main() diff --git a/yt_dlp/update.py b/yt_dlp/update.py index e880cbd8dc..9fadae90c5 100644 --- a/yt_dlp/update.py +++ b/yt_dlp/update.py @@ -167,35 +167,35 @@ def run_update(ydl): return report_network_error('download latest version') try: - with open(exe + '.new', 'wb') as outf: + with open(filename + '.new', 'wb') as outf: outf.write(newcontent) except (IOError, OSError): - return report_permission_error(f'{exe}.new') + return report_permission_error(f'{filename}.new') - expected_sum = get_sha256sum('exe', arch) + expected_sum = get_sha256sum(variant, arch) if not expected_sum: ydl.report_warning('no hash information found for the release') - elif calc_sha256sum(exe + '.new') != expected_sum: + elif calc_sha256sum(filename + '.new') != expected_sum: report_network_error('verify the new executable') try: - os.remove(exe + '.new') + os.remove(filename + '.new') except OSError: return report_unable('remove corrupt download') try: - os.rename(exe, exe + '.old') + os.rename(filename, filename + '.old') except (IOError, OSError): return report_unable('move current version') try: - os.rename(exe + '.new', exe) + os.rename(filename + '.new', filename) except (IOError, OSError): report_unable('overwrite current version') - os.rename(exe + '.old', exe) + os.rename(filename + '.old', filename) return try: # Continues to run in the background Popen( - 'ping 127.0.0.1 -n 5 -w 1000 & del /F "%s.old"' % exe, + 'ping 127.0.0.1 -n 5 -w 1000 & del /F "%s.old"' % filename, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) ydl.to_screen('Updated yt-dlp to version %s' % version_id) return True # Exit app From f0ded3dad3d751e697d2938d60f369b4cd409170 Mon Sep 17 00:00:00 2001 From: jfogelman <jfogelman@users.noreply.github.com> Date: Thu, 21 Oct 2021 15:36:03 -0400 Subject: [PATCH 299/641] [AdobePass] Fix RCN MSO (#1349) Authored by: jfogelman --- yt_dlp/extractor/adobepass.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py index 9378c33cd3..bebcafa6b7 100644 --- a/yt_dlp/extractor/adobepass.py +++ b/yt_dlp/extractor/adobepass.py @@ -39,8 +39,8 @@ MSO_INFO = { }, 'RCN': { 'name': 'RCN', - 'username_field': 'UserName', - 'password_field': 'UserPassword', + 'username_field': 'username', + 'password_field': 'password', }, 'Rogers': { 'name': 'Rogers', From 19b824f6939b0c13c6de1297faee2e70206ce6c4 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Fri, 22 Oct 2021 04:34:22 +0530 Subject: [PATCH 300/641] Re-implement deprecated option `--id` Despite `--title`, `--literal` etc being deprecated, `--id` is still documented in youtube-dl and so should be kept --- README.md | 2 +- yt_dlp/__init__.py | 14 +++++++++----- yt_dlp/options.py | 3 +++ 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index cfdcadd0d5..3ca308f876 100644 --- a/README.md +++ b/README.md @@ -1658,6 +1658,7 @@ While these options still work, their use is not recommended since there are oth --print-json -j --no-simulate --autonumber-size NUMBER Use string formatting. Eg: %(autonumber)03d --autonumber-start NUMBER Use internal field formatting like %(autonumber+NUMBER)s + --id -o "%(id)s.%(ext)s" --metadata-from-title FORMAT --parse-metadata "%(title)s:FORMAT" --hls-prefer-native --downloader "m3u8:native" --hls-prefer-ffmpeg --downloader "m3u8:ffmpeg" @@ -1724,7 +1725,6 @@ These options may no longer work as intended #### Removed These options were deprecated since 2014 and have now been entirely removed - --id -o "%(id)s.%(ext)s" -A, --auto-number -o "%(autonumber)s-%(id)s.%(ext)s" -t, --title -o "%(title)s-%(id)s.%(ext)s" -l, --literal -o accepts literal names diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index b952cc0625..d97d4af648 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -259,6 +259,9 @@ def _real_main(argv=None): compat_opts = opts.compat_opts + def report_conflict(arg1, arg2): + warnings.append(f'{arg2} is ignored since {arg1} was given') + def _unused_compat_opt(name): if name not in compat_opts: return False @@ -290,10 +293,14 @@ def _real_main(argv=None): if _video_multistreams_set is False and _audio_multistreams_set is False: _unused_compat_opt('multistreams') outtmpl_default = opts.outtmpl.get('default') + if opts.useid: + if outtmpl_default is None: + outtmpl_default = opts.outtmpl['default'] = '%(id)s.%(ext)s' + else: + report_conflict('--output', '--id') if 'filename' in compat_opts: if outtmpl_default is None: - outtmpl_default = '%(title)s-%(id)s.%(ext)s' - opts.outtmpl.update({'default': outtmpl_default}) + outtmpl_default = opts.outtmpl['default'] = '%(title)s-%(id)s.%(ext)s' else: _unused_compat_opt('filename') @@ -366,9 +373,6 @@ def _real_main(argv=None): opts.addchapters = True opts.remove_chapters = opts.remove_chapters or [] - def report_conflict(arg1, arg2): - warnings.append('%s is ignored since %s was given' % (arg2, arg1)) - if (opts.remove_chapters or sponsorblock_query) and opts.sponskrub is not False: if opts.sponskrub: if opts.remove_chapters: diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 0638e86429..719a1bce45 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -975,6 +975,9 @@ def parseOpts(overrideArguments=None): '--no-batch-file', dest='batchfile', action='store_const', const=None, help='Do not read URLs from batch file (default)') + filesystem.add_option( + '--id', default=False, + action='store_true', dest='useid', help=optparse.SUPPRESS_HELP) filesystem.add_option( '-P', '--paths', metavar='[TYPES:]PATH', dest='paths', default={}, type='str', From ef58c47637625089cc7dc7326e7ce67a9c15f5e0 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Mon, 18 Oct 2021 09:19:37 +0530 Subject: [PATCH 301/641] [SponsorBlock] Obey `extractor-retries` and `sleep-requests` --- yt_dlp/postprocessor/sponsorblock.py | 33 +++++++++++++++++++--------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/yt_dlp/postprocessor/sponsorblock.py b/yt_dlp/postprocessor/sponsorblock.py index 7265a9de7c..70c5462d14 100644 --- a/yt_dlp/postprocessor/sponsorblock.py +++ b/yt_dlp/postprocessor/sponsorblock.py @@ -1,6 +1,8 @@ +from hashlib import sha256 +import itertools import json import re -from hashlib import sha256 +import time from .ffmpeg import FFmpegPostProcessor from ..compat import compat_urllib_parse_urlencode, compat_HTTPError @@ -33,6 +35,7 @@ class SponsorBlockPP(FFmpegPostProcessor): self.to_screen(f'SponsorBlock is not supported for {extractor}') return [], info + self.to_screen('Fetching SponsorBlock segments') info['sponsorblock_chapters'] = self._get_sponsor_chapters(info, info['duration']) return [], info @@ -79,18 +82,28 @@ class SponsorBlockPP(FFmpegPostProcessor): 'service': service, 'categories': json.dumps(self._categories), }) + self.write_debug(f'SponsorBlock query: {url}') for d in self._get_json(url): if d['videoID'] == video_id: return d['segments'] return [] def _get_json(self, url): - self.write_debug(f'SponsorBlock query: {url}') - try: - rsp = self._downloader.urlopen(sanitized_Request(url)) - except network_exceptions as e: - if isinstance(e, compat_HTTPError) and e.code == 404: - return [] - raise PostProcessingError(f'Unable to communicate with SponsorBlock API - {e}') - - return json.loads(rsp.read().decode(rsp.info().get_param('charset') or 'utf-8')) + # While this is not an extractor, it behaves similar to one and + # so obey extractor_retries and sleep_interval_requests + max_retries = self.get_param('extractor_retries', 3) + sleep_interval = self.get_param('sleep_interval_requests') or 0 + for retries in itertools.count(): + try: + rsp = self._downloader.urlopen(sanitized_Request(url)) + return json.loads(rsp.read().decode(rsp.info().get_param('charset') or 'utf-8')) + except network_exceptions as e: + if isinstance(e, compat_HTTPError) and e.code == 404: + return [] + if retries < max_retries: + self.report_warning(f'{e}. Retrying...') + if sleep_interval > 0: + self.to_screen(f'Sleeping {sleep_interval} seconds ...') + time.sleep(sleep_interval) + continue + raise PostProcessingError(f'Unable to communicate with SponsorBlock API: {e}') From 0f6e60bb5722f03c6b64712f70aaf9b0b6915795 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Thu, 21 Oct 2021 23:39:50 +0000 Subject: [PATCH 302/641] [tagesschau] Fix extractor (#1227) Closes #1124 Authored by: u-spec-png --- yt_dlp/extractor/extractors.py | 5 +- yt_dlp/extractor/tagesschau.py | 277 +++++++-------------------------- 2 files changed, 61 insertions(+), 221 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index f4f817fcb5..8ea7d2ed8e 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1387,10 +1387,7 @@ from .svt import ( from .swrmediathek import SWRMediathekIE from .syfy import SyfyIE from .sztvhu import SztvHuIE -from .tagesschau import ( - TagesschauPlayerIE, - TagesschauIE, -) +from .tagesschau import TagesschauIE from .tass import TassIE from .tbs import TBSIE from .tdslifeway import TDSLifewayIE diff --git a/yt_dlp/extractor/tagesschau.py b/yt_dlp/extractor/tagesschau.py index 25c2004556..6e03d0a7d1 100644 --- a/yt_dlp/extractor/tagesschau.py +++ b/yt_dlp/extractor/tagesschau.py @@ -5,177 +5,63 @@ import re from .common import InfoExtractor from ..utils import ( - determine_ext, js_to_json, - parse_iso8601, - parse_filesize, + extract_attributes, + try_get, + int_or_none, ) -class TagesschauPlayerIE(InfoExtractor): - IE_NAME = 'tagesschau:player' - _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/(?P<kind>audio|video)/(?P=kind)-(?P<id>\d+)~player(?:_[^/?#&]+)?\.html' - - _TESTS = [{ - 'url': 'http://www.tagesschau.de/multimedia/video/video-179517~player.html', - 'md5': '8d09548d5c15debad38bee3a4d15ca21', - 'info_dict': { - 'id': '179517', - 'ext': 'mp4', - 'title': 'Marie Kristin Boese, ARD Berlin, über den zukünftigen Kurs der AfD', - 'thumbnail': r're:^https?:.*\.jpg$', - 'formats': 'mincount:6', - }, - }, { - 'url': 'https://www.tagesschau.de/multimedia/audio/audio-29417~player.html', - 'md5': '76e6eec6ebd40740671cf0a2c88617e5', - 'info_dict': { - 'id': '29417', - 'ext': 'mp3', - 'title': 'Trabi - Bye, bye Rennpappe', - 'thumbnail': r're:^https?:.*\.jpg$', - 'formats': 'mincount:2', - }, - }, { - 'url': 'http://www.tagesschau.de/multimedia/audio/audio-29417~player_autoplay-true.html', - 'only_matching': True, - }] - - _FORMATS = { - 'xs': {'quality': 0}, - 's': {'width': 320, 'height': 180, 'quality': 1}, - 'm': {'width': 512, 'height': 288, 'quality': 2}, - 'l': {'width': 960, 'height': 540, 'quality': 3}, - 'xl': {'width': 1280, 'height': 720, 'quality': 4}, - 'xxl': {'quality': 5}, - } - - def _extract_via_api(self, kind, video_id): - info = self._download_json( - 'https://www.tagesschau.de/api/multimedia/{0}/{0}-{1}.json'.format(kind, video_id), - video_id) - title = info['headline'] - formats = [] - for media in info['mediadata']: - for format_id, format_url in media.items(): - if determine_ext(format_url) == 'm3u8': - formats.extend(self._extract_m3u8_formats( - format_url, video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls')) - else: - formats.append({ - 'url': format_url, - 'format_id': format_id, - 'vcodec': 'none' if kind == 'audio' else None, - }) - self._sort_formats(formats) - timestamp = parse_iso8601(info.get('date')) - return { - 'id': video_id, - 'title': title, - 'timestamp': timestamp, - 'formats': formats, - } - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('id') - - # kind = mobj.group('kind').lower() - # if kind == 'video': - # return self._extract_via_api(kind, video_id) - - # JSON api does not provide some audio formats (e.g. ogg) thus - # extracting audio via webpage - - webpage = self._download_webpage(url, video_id) - - title = self._og_search_title(webpage).strip() - formats = [] - - for media_json in re.findall(r'({src\s*:\s*["\']http[^}]+type\s*:[^}]+})', webpage): - media = self._parse_json(js_to_json(media_json), video_id, fatal=False) - if not media: - continue - src = media.get('src') - if not src: - return - quality = media.get('quality') - kind = media.get('type', '').split('/')[0] - ext = determine_ext(src) - f = { - 'url': src, - 'format_id': '%s_%s' % (quality, ext) if quality else ext, - 'ext': ext, - 'vcodec': 'none' if kind == 'audio' else None, - } - f.update(self._FORMATS.get(quality, {})) - formats.append(f) - - self._sort_formats(formats) - - thumbnail = self._og_search_thumbnail(webpage) - - return { - 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, - 'formats': formats, - } - - class TagesschauIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/(?P<path>[^/]+/(?:[^/]+/)*?(?P<id>[^/#?]+?(?:-?[0-9]+)?))(?:~_?[^/#?]+?)?\.html' _TESTS = [{ 'url': 'http://www.tagesschau.de/multimedia/video/video-102143.html', - 'md5': 'f7c27a0eff3bfe8c7727e65f8fe1b1e6', + 'md5': '7a7287612fa881a1ae1d087df45c2fd6', 'info_dict': { - 'id': 'video-102143', + 'id': 'video-102143-1', 'ext': 'mp4', 'title': 'Regierungsumbildung in Athen: Neue Minister in Griechenland vereidigt', - 'description': '18.07.2015 20:10 Uhr', - 'thumbnail': r're:^https?:.*\.jpg$', }, }, { 'url': 'http://www.tagesschau.de/multimedia/sendung/ts-5727.html', 'md5': '3c54c1f6243d279b706bde660ceec633', 'info_dict': { - 'id': 'ts-5727', + 'id': 'ts-5727-1', 'ext': 'mp4', - 'title': 'Sendung: tagesschau \t04.12.2014 20:00 Uhr', - 'description': 'md5:695c01bfd98b7e313c501386327aea59', - 'thumbnail': r're:^https?:.*\.jpg$', + 'title': 'Ganze Sendung', }, }, { # exclusive audio 'url': 'http://www.tagesschau.de/multimedia/audio/audio-29417.html', - 'md5': '76e6eec6ebd40740671cf0a2c88617e5', + 'md5': '4cf22023c285f35e99c24d290ba58cc9', 'info_dict': { - 'id': 'audio-29417', + 'id': 'audio-29417-1', 'ext': 'mp3', - 'title': 'Trabi - Bye, bye Rennpappe', - 'description': 'md5:8687dda862cbbe2cfb2df09b56341317', - 'thumbnail': r're:^https?:.*\.jpg$', + 'title': 'Brasilianischer Präsident Bolsonaro unter Druck: Corona-Bericht wird vorgestellt', }, }, { - # audio in article 'url': 'http://www.tagesschau.de/inland/bnd-303.html', - 'md5': 'e0916c623e85fc1d2b26b78f299d3958', + 'md5': '12cfb212d9325b5ba0d52b625f1aa61c', 'info_dict': { - 'id': 'bnd-303', - 'ext': 'mp3', - 'title': 'Viele Baustellen für neuen BND-Chef', - 'description': 'md5:1e69a54be3e1255b2b07cdbce5bcd8b4', - 'thumbnail': r're:^https?:.*\.jpg$', + 'id': 'bnd-303-1', + 'ext': 'mp4', + 'title': 'SPD-Gruppenbild mit Bärbel Bas nach der Fraktionssitzung | dpa', }, }, { 'url': 'http://www.tagesschau.de/inland/afd-parteitag-135.html', 'info_dict': { 'id': 'afd-parteitag-135', - 'title': 'Möchtegern-Underdog mit Machtanspruch', + 'title': 'AfD', + }, + 'playlist_count': 20, + }, { + 'url': 'https://www.tagesschau.de/multimedia/audio/audio-29417~player.html', + 'info_dict': { + 'id': 'audio-29417-1', + 'ext': 'mp3', + 'title': 'Brasilianischer Präsident Bolsonaro unter Druck: Corona-Bericht wird vorgestellt', }, - 'playlist_count': 2, }, { 'url': 'http://www.tagesschau.de/multimedia/sendung/tsg-3771.html', 'only_matching': True, @@ -206,62 +92,6 @@ class TagesschauIE(InfoExtractor): 'only_matching': True, }] - @classmethod - def suitable(cls, url): - return False if TagesschauPlayerIE.suitable(url) else super(TagesschauIE, cls).suitable(url) - - def _extract_formats(self, download_text, media_kind): - links = re.finditer( - r'<div class="button" title="(?P<title>[^"]*)"><a href="(?P<url>[^"]+)">(?P<name>.+?)</a></div>', - download_text) - formats = [] - for l in links: - link_url = l.group('url') - if not link_url: - continue - format_id = self._search_regex( - r'.*/[^/.]+\.([^/]+)\.[^/.]+$', link_url, 'format ID', - default=determine_ext(link_url)) - format = { - 'format_id': format_id, - 'url': l.group('url'), - 'format_name': l.group('name'), - } - title = l.group('title') - if title: - if media_kind.lower() == 'video': - m = re.match( - r'''(?x) - Video:\s*(?P<vcodec>[a-zA-Z0-9/._-]+)\s*&\#10; - (?P<width>[0-9]+)x(?P<height>[0-9]+)px&\#10; - (?P<vbr>[0-9]+)kbps&\#10; - Audio:\s*(?P<abr>[0-9]+)kbps,\s*(?P<audio_desc>[A-Za-z\.0-9]+)&\#10; - Größe:\s*(?P<filesize_approx>[0-9.,]+\s+[a-zA-Z]*B)''', - title) - if m: - format.update({ - 'format_note': m.group('audio_desc'), - 'vcodec': m.group('vcodec'), - 'width': int(m.group('width')), - 'height': int(m.group('height')), - 'abr': int(m.group('abr')), - 'vbr': int(m.group('vbr')), - 'filesize_approx': parse_filesize(m.group('filesize_approx')), - }) - else: - m = re.match( - r'(?P<format>.+?)-Format\s*:\s*(?P<abr>\d+)kbps\s*,\s*(?P<note>.+)', - title) - if m: - format.update({ - 'format_note': '%s, %s' % (m.group('format'), m.group('note')), - 'vcodec': 'none', - 'abr': int(m.group('abr')), - }) - formats.append(format) - self._sort_formats(formats) - return formats - def _real_extract(self, url): mobj = self._match_valid_url(url) video_id = mobj.group('id') or mobj.group('path') @@ -271,34 +101,46 @@ class TagesschauIE(InfoExtractor): title = self._html_search_regex( r'<span[^>]*class="headline"[^>]*>(.+?)</span>', - webpage, 'title', default=None) or self._og_search_title(webpage) + webpage, 'title', default=None) or self._og_search_title(webpage, fatal=False) - DOWNLOAD_REGEX = r'(?s)<p>Wir bieten dieses (?P<kind>Video|Audio) in folgenden Formaten zum Download an:</p>\s*<div class="controls">(?P<links>.*?)</div>\s*<p>' - - webpage_type = self._og_search_property('type', webpage, default=None) - if webpage_type == 'website': # Article - entries = [] - for num, (entry_title, media_kind, download_text) in enumerate(re.findall( - r'(?s)<p[^>]+class="infotext"[^>]*>\s*(?:<a[^>]+>)?\s*<strong>(.+?)</strong>.*?</p>.*?%s' % DOWNLOAD_REGEX, - webpage), 1): + entries = [] + videos = re.findall(r'<div[^>]+>', webpage) + num = 0 + for video in videos: + video = extract_attributes(video).get('data-config') + if not video: + continue + video = self._parse_json(video, video_id, transform_source=js_to_json, fatal=False) + video_formats = try_get(video, lambda x: x['mc']['_mediaArray'][0]['_mediaStreamArray']) + if not video_formats: + continue + num += 1 + for video_format in video_formats: + media_url = video_format.get('_stream') or '' + formats = [] + if media_url.endswith('master.m3u8'): + formats = self._extract_m3u8_formats(media_url, video_id, 'mp4', m3u8_id='hls') + elif media_url.endswith('.hi.mp3') and media_url.startswith('https://download'): + formats = [{ + 'url': media_url, + 'vcodec': 'none', + }] + if not formats: + continue entries.append({ 'id': '%s-%d' % (display_id, num), - 'title': '%s' % entry_title, - 'formats': self._extract_formats(download_text, media_kind), + 'title': try_get(video, lambda x: x['mc']['_title']), + 'duration': int_or_none(try_get(video, lambda x: x['mc']['_duration'])), + 'formats': formats }) - if len(entries) > 1: - return self.playlist_result(entries, display_id, title) - formats = entries[0]['formats'] - else: # Assume single video - download_text = self._search_regex( - DOWNLOAD_REGEX, webpage, 'download links', group='links') - media_kind = self._search_regex( - DOWNLOAD_REGEX, webpage, 'media kind', default='Video', group='kind') - formats = self._extract_formats(download_text, media_kind) - thumbnail = self._og_search_thumbnail(webpage) - description = self._html_search_regex( - r'(?s)<p class="teasertext">(.*?)</p>', - webpage, 'description', default=None) + if len(entries) > 1: + return self.playlist_result(entries, display_id, title) + formats = entries[0]['formats'] + video_info = self._search_json_ld(webpage, video_id) + description = video_info.get('description') + thumbnail = self._og_search_thumbnail(webpage) or video_info.get('thumbnail') + timestamp = video_info.get('timestamp') + title = title or video_info.get('description') self._sort_formats(formats) @@ -307,5 +149,6 @@ class TagesschauIE(InfoExtractor): 'title': title, 'thumbnail': thumbnail, 'formats': formats, + 'timestamp': timestamp, 'description': description, } From 17ec8bcfa90b80913072fdcb0cafc09c1ad79849 Mon Sep 17 00:00:00 2001 From: Damiano Amatruda <damiano.amatruda@outlook.com> Date: Fri, 22 Oct 2021 02:04:00 +0200 Subject: [PATCH 303/641] [microsoftstream] Add extractor (#1201) Based on: https://github.com/ytdl-org/youtube-dl/pull/24649 Fixes: https://github.com/ytdl-org/youtube-dl/issues/24440 Authored by: damianoamatruda, nixklai --- test/test_utils.py | 7 +- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/microsoftstream.py | 125 ++++++++++++++++++++++++++++ yt_dlp/utils.py | 4 +- 4 files changed, 133 insertions(+), 4 deletions(-) create mode 100644 yt_dlp/extractor/microsoftstream.py diff --git a/test/test_utils.py b/test/test_utils.py index d84c3d3eef..810ed3de4c 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1163,12 +1163,15 @@ class TestUtil(unittest.TestCase): def test_parse_resolution(self): self.assertEqual(parse_resolution(None), {}) self.assertEqual(parse_resolution(''), {}) - self.assertEqual(parse_resolution('1920x1080'), {'width': 1920, 'height': 1080}) - self.assertEqual(parse_resolution('1920×1080'), {'width': 1920, 'height': 1080}) + self.assertEqual(parse_resolution(' 1920x1080'), {'width': 1920, 'height': 1080}) + self.assertEqual(parse_resolution('1920×1080 '), {'width': 1920, 'height': 1080}) self.assertEqual(parse_resolution('1920 x 1080'), {'width': 1920, 'height': 1080}) self.assertEqual(parse_resolution('720p'), {'height': 720}) self.assertEqual(parse_resolution('4k'), {'height': 2160}) self.assertEqual(parse_resolution('8K'), {'height': 4320}) + self.assertEqual(parse_resolution('pre_1920x1080_post'), {'width': 1920, 'height': 1080}) + self.assertEqual(parse_resolution('ep1x2'), {}) + self.assertEqual(parse_resolution('1920, 1080'), {'width': 1920, 'height': 1080}) def test_parse_bitrate(self): self.assertEqual(parse_bitrate(None), None) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 8ea7d2ed8e..ef2b25c930 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -760,6 +760,7 @@ from .metacritic import MetacriticIE from .mgoon import MgoonIE from .mgtv import MGTVIE from .miaopai import MiaoPaiIE +from .microsoftstream import MicrosoftStreamIE from .microsoftvirtualacademy import ( MicrosoftVirtualAcademyIE, MicrosoftVirtualAcademyCourseIE, diff --git a/yt_dlp/extractor/microsoftstream.py b/yt_dlp/extractor/microsoftstream.py new file mode 100644 index 0000000000..4d5a9df1ff --- /dev/null +++ b/yt_dlp/extractor/microsoftstream.py @@ -0,0 +1,125 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from base64 import b64decode + +from .common import InfoExtractor +from ..utils import ( + merge_dicts, + parse_iso8601, + parse_duration, + parse_resolution, + try_get, + url_basename, +) + + +class MicrosoftStreamIE(InfoExtractor): + IE_NAME = 'microsoftstream' + IE_DESC = 'Microsoft Stream' + _VALID_URL = r'https?://(?:web|www|msit)\.microsoftstream\.com/video/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' + + _TESTS = [{ + 'url': 'https://web.microsoftstream.com/video/6e51d928-4f46-4f1c-b141-369925e37b62?list=user&userId=f5491e02-e8fe-4e34-b67c-ec2e79a6ecc0', + 'only_matching': True, + }, { + 'url': 'https://msit.microsoftstream.com/video/b60f5987-aabd-4e1c-a42f-c559d138f2ca', + 'only_matching': True, + }] + + def _get_all_subtitles(self, api_url, video_id, headers): + subtitles = {} + automatic_captions = {} + text_tracks = self._download_json( + f'{api_url}/videos/{video_id}/texttracks', video_id, + note='Downloading subtitles JSON', fatal=False, headers=headers, + query={'api-version': '1.4-private'}).get('value') or [] + for track in text_tracks: + if not track.get('language') or not track.get('url'): + continue + sub_dict = automatic_captions if track.get('autoGenerated') else subtitles + sub_dict.setdefault(track['language'], []).append({ + 'ext': 'vtt', + 'url': track.get('url') + }) + return { + 'subtitles': subtitles, + 'automatic_captions': automatic_captions + } + + def extract_all_subtitles(self, *args, **kwargs): + if (self.get_param('writesubtitles', False) + or self.get_param('writeautomaticsub', False) + or self.get_param('listsubtitles')): + return self._get_all_subtitles(*args, **kwargs) + return {} + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + if '<title>Microsoft Stream' not in webpage: + self.raise_login_required(method='cookies') + + access_token = self._html_search_regex(r'"AccessToken":"(.+?)"', webpage, 'access token') + api_url = self._html_search_regex(r'"ApiGatewayUri":"(.+?)"', webpage, 'api url') + + headers = {'Authorization': f'Bearer {access_token}'} + + video_data = self._download_json( + f'{api_url}/videos/{video_id}', video_id, + headers=headers, query={ + '$expand': 'creator,tokens,status,liveEvent,extensions', + 'api-version': '1.4-private' + }) + video_id = video_data.get('id') or video_id + language = video_data.get('language') + + thumbnails = [] + for thumbnail_id in ('extraSmall', 'small', 'medium', 'large'): + thumbnail_url = try_get(video_data, lambda x: x['posterImage'][thumbnail_id]['url'], str) + if not thumbnail_url: + continue + thumb = { + 'id': thumbnail_id, + 'url': thumbnail_url, + } + thumb_name = url_basename(thumbnail_url) + thumb_name = str(b64decode(thumb_name + '=' * (-len(thumb_name) % 4))) + thumb.update(parse_resolution(thumb_name)) + thumbnails.append(thumb) + + formats = [] + for playlist in video_data['playbackUrls']: + if playlist['mimeType'] == 'application/vnd.apple.mpegurl': + formats.extend(self._extract_m3u8_formats( + playlist['playbackUrl'], video_id, + ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls', + fatal=False, headers=headers)) + elif playlist['mimeType'] == 'application/dash+xml': + formats.extend(self._extract_mpd_formats( + playlist['playbackUrl'], video_id, mpd_id='dash', + fatal=False, headers=headers)) + elif playlist['mimeType'] == 'application/vnd.ms-sstr+xml': + formats.extend(self._extract_ism_formats( + playlist['playbackUrl'], video_id, ism_id='mss', + fatal=False, headers=headers)) + formats = [merge_dicts(f, {'language': language}) for f in formats] + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': video_data['name'], + 'description': video_data.get('description'), + 'uploader': try_get(video_data, lambda x: x['creator']['name'], str), + 'uploader_id': try_get(video_data, (lambda x: x['creator']['mail'], + lambda x: x['creator']['id']), str), + 'thumbnails': thumbnails, + **self.extract_all_subtitles(api_url, video_id, headers), + 'timestamp': parse_iso8601(video_data.get('created')), + 'duration': parse_duration(try_get(video_data, lambda x: x['media']['duration'])), + 'webpage_url': f'https://web.microsoftstream.com/video/{video_id}', + 'view_count': try_get(video_data, lambda x: x['metrics']['views'], int), + 'like_count': try_get(video_data, lambda x: x['metrics']['likes'], int), + 'comment_count': try_get(video_data, lambda x: x['metrics']['comments'], int), + 'formats': formats, + } diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 319f6979ba..e05677d08e 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -3714,14 +3714,14 @@ def parse_resolution(s): if s is None: return {} - mobj = re.search(r'\b(?P\d+)\s*[xX×]\s*(?P\d+)\b', s) + mobj = re.search(r'(?\d+)\s*[xX×,]\s*(?P\d+)(?![a-zA-Z0-9])', s) if mobj: return { 'width': int(mobj.group('w')), 'height': int(mobj.group('h')), } - mobj = re.search(r'\b(\d+)[pPiI]\b', s) + mobj = re.search(r'(? Date: Fri, 22 Oct 2021 05:57:15 +0530 Subject: [PATCH 304/641] [vimeo] Fix embedded `player.vimeo` URL Closes #1138, partially fixes #1323 Cherry-picked from upstream commit 3ae9c0f410b1d4f63e8bada67dd62a8d2852be32 --- yt_dlp/extractor/vimeo.py | 230 +++++++++++++++++--------------------- 1 file changed, 101 insertions(+), 129 deletions(-) diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index 8b367a4e63..04c5049342 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -3,7 +3,6 @@ from __future__ import unicode_literals import base64 import functools -import json import re import itertools @@ -17,8 +16,8 @@ from ..compat import ( from ..utils import ( clean_html, determine_ext, - dict_get, ExtractorError, + get_element_by_class, js_to_json, int_or_none, merge_dicts, @@ -26,7 +25,6 @@ from ..utils import ( parse_filesize, parse_iso8601, parse_qs, - RegexNotFoundError, sanitized_Request, smuggle_url, std_headers, @@ -129,10 +127,11 @@ class VimeoBaseInfoExtractor(InfoExtractor): video_title = video_data['title'] live_event = video_data.get('live_event') or {} is_live = live_event.get('status') == 'started' + request = config.get('request') or {} formats = [] - config_files = video_data.get('files') or config['request'].get('files', {}) - for f in config_files.get('progressive', []): + config_files = video_data.get('files') or request.get('files') or {} + for f in (config_files.get('progressive') or []): video_url = f.get('url') if not video_url: continue @@ -148,7 +147,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): # TODO: fix handling of 308 status code returned for live archive manifest requests sep_pattern = r'/sep/video/' for files_type in ('hls', 'dash'): - for cdn_name, cdn_data in config_files.get(files_type, {}).get('cdns', {}).items(): + for cdn_name, cdn_data in (try_get(config_files, lambda x: x[files_type]['cdns']) or {}).items(): manifest_url = cdn_data.get('url') if not manifest_url: continue @@ -188,17 +187,15 @@ class VimeoBaseInfoExtractor(InfoExtractor): }) subtitles = {} - text_tracks = config['request'].get('text_tracks') - if text_tracks: - for tt in text_tracks: - subtitles[tt['lang']] = [{ - 'ext': 'vtt', - 'url': urljoin('https://vimeo.com', tt['url']), - }] + for tt in (request.get('text_tracks') or []): + subtitles[tt['lang']] = [{ + 'ext': 'vtt', + 'url': urljoin('https://vimeo.com', tt['url']), + }] thumbnails = [] if not is_live: - for key, thumb in video_data.get('thumbs', {}).items(): + for key, thumb in (video_data.get('thumbs') or {}).items(): thumbnails.append({ 'id': key, 'width': int_or_none(key), @@ -342,6 +339,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'duration': 1595, 'upload_date': '20130610', 'timestamp': 1370893156, + 'license': 'by', }, 'params': { 'format': 'best[protocol=https]', @@ -420,6 +418,12 @@ class VimeoIE(VimeoBaseInfoExtractor): 'uploader_id': 'staff', 'uploader': 'Vimeo Staff', 'duration': 62, + 'subtitles': { + 'de': [{'ext': 'vtt'}], + 'en': [{'ext': 'vtt'}], + 'es': [{'ext': 'vtt'}], + 'fr': [{'ext': 'vtt'}], + }, } }, { @@ -626,6 +630,37 @@ class VimeoIE(VimeoBaseInfoExtractor): def _real_initialize(self): self._login() + def _extract_from_api(self, video_id, unlisted_hash=None): + token = self._download_json( + 'https://vimeo.com/_rv/jwt', video_id, headers={ + 'X-Requested-With': 'XMLHttpRequest' + })['token'] + api_url = 'https://api.vimeo.com/videos/' + video_id + if unlisted_hash: + api_url += ':' + unlisted_hash + video = self._download_json( + api_url, video_id, headers={ + 'Authorization': 'jwt ' + token, + }, query={ + 'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays', + }) + info = self._parse_config(self._download_json( + video['config_url'], video_id), video_id) + self._vimeo_sort_formats(info['formats']) + get_timestamp = lambda x: parse_iso8601(video.get(x + '_time')) + info.update({ + 'description': video.get('description'), + 'license': video.get('license'), + 'release_timestamp': get_timestamp('release'), + 'timestamp': get_timestamp('created'), + 'view_count': int_or_none(try_get(video, lambda x: x['stats']['plays'])), + }) + connections = try_get( + video, lambda x: x['metadata']['connections'], dict) or {} + for k in ('comment', 'like'): + info[k + '_count'] = int_or_none(try_get(connections, lambda x: x[k + 's']['total'])) + return info + def _try_album_password(self, url): album_id = self._search_regex( r'vimeo\.com/(?:album|showcase)/([^/]+)', url, 'album id', default=None) @@ -675,45 +710,16 @@ class VimeoIE(VimeoBaseInfoExtractor): # Extract ID from URL video_id, unlisted_hash = self._match_valid_url(url).groups() if unlisted_hash: - token = self._download_json( - 'https://vimeo.com/_rv/jwt', video_id, headers={ - 'X-Requested-With': 'XMLHttpRequest' - })['token'] - video = self._download_json( - 'https://api.vimeo.com/videos/%s:%s' % (video_id, unlisted_hash), - video_id, headers={ - 'Authorization': 'jwt ' + token, - }, query={ - 'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays', - }) - info = self._parse_config(self._download_json( - video['config_url'], video_id), video_id) - self._vimeo_sort_formats(info['formats']) - get_timestamp = lambda x: parse_iso8601(video.get(x + '_time')) - info.update({ - 'description': video.get('description'), - 'license': video.get('license'), - 'release_timestamp': get_timestamp('release'), - 'timestamp': get_timestamp('created'), - 'view_count': int_or_none(try_get(video, lambda x: x['stats']['plays'])), - }) - connections = try_get( - video, lambda x: x['metadata']['connections'], dict) or {} - for k in ('comment', 'like'): - info[k + '_count'] = int_or_none(try_get(connections, lambda x: x[k + 's']['total'])) - return info + return self._extract_from_api(video_id, unlisted_hash) orig_url = url is_pro = 'vimeopro.com/' in url - is_player = '://player.vimeo.com/video/' in url if is_pro: # some videos require portfolio_id to be present in player url # https://github.com/ytdl-org/youtube-dl/issues/20070 url = self._extract_url(url, self._download_webpage(url, video_id)) if not url: url = 'https://vimeo.com/' + video_id - elif is_player: - url = 'https://player.vimeo.com/video/' + video_id elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')): url = 'https://vimeo.com/' + video_id @@ -734,14 +740,25 @@ class VimeoIE(VimeoBaseInfoExtractor): expected=True) raise - # Now we begin extracting as much information as we can from what we - # retrieved. First we extract the information common to all extractors, - # and latter we extract those that are Vimeo specific. - self.report_extraction(video_id) + if '://player.vimeo.com/video/' in url: + config = self._parse_json(self._search_regex( + r'\bconfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id) + if config.get('view') == 4: + config = self._verify_player_video_password( + redirect_url, video_id, headers) + info = self._parse_config(config, video_id) + self._vimeo_sort_formats(info['formats']) + return info + + if re.search(r']+?id="pw_form"', webpage): + video_password = self._get_video_password() + token, vuid = self._extract_xsrft_and_vuid(webpage) + webpage = self._verify_video_password( + redirect_url, video_id, video_password, token, vuid) vimeo_config = self._extract_vimeo_config(webpage, video_id, default=None) if vimeo_config: - seed_status = vimeo_config.get('seed_status', {}) + seed_status = vimeo_config.get('seed_status') or {} if seed_status.get('state') == 'failed': raise ExtractorError( '%s said: %s' % (self.IE_NAME, seed_status['title']), @@ -750,70 +767,40 @@ class VimeoIE(VimeoBaseInfoExtractor): cc_license = None timestamp = None video_description = None + info_dict = {} - # Extract the config JSON - try: - try: - config_url = self._html_search_regex( - r' data-config-url="(.+?)"', webpage, - 'config URL', default=None) - if not config_url: - # Sometimes new react-based page is served instead of old one that require - # different config URL extraction approach (see - # https://github.com/ytdl-org/youtube-dl/pull/7209) - page_config = self._parse_json(self._search_regex( - r'vimeo\.(?:clip|vod_title)_page_config\s*=\s*({.+?});', - webpage, 'page config'), video_id) - config_url = page_config['player']['config_url'] - cc_license = page_config.get('cc_license') - timestamp = try_get( - page_config, lambda x: x['clip']['uploaded_on'], - compat_str) - video_description = clean_html(dict_get( - page_config, ('description', 'description_html_escaped'))) - config = self._download_json(config_url, video_id) - except RegexNotFoundError: - # For pro videos or player.vimeo.com urls - # We try to find out to which variable is assigned the config dic - m_variable_name = re.search(r'(\w)\.video\.id', webpage) - if m_variable_name is not None: - config_re = [r'%s=({[^}].+?});' % re.escape(m_variable_name.group(1))] - else: - config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});'] - config_re.append(r'\bvar\s+r\s*=\s*({.+?})\s*;') - config_re.append(r'\bconfig\s*=\s*({.+?})\s*;') - config = self._search_regex(config_re, webpage, 'info section', - flags=re.DOTALL) - config = json.loads(config) - except Exception as e: - if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage): - raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option') - - if re.search(r']+?id="pw_form"', webpage) is not None: - if '_video_password_verified' in data: - raise ExtractorError('video password verification failed!') - video_password = self._get_video_password() - token, vuid = self._extract_xsrft_and_vuid(webpage) - self._verify_video_password( - redirect_url, video_id, video_password, token, vuid) - return self._real_extract( - smuggle_url(redirect_url, {'_video_password_verified': 'verified'})) - else: - raise ExtractorError('Unable to extract info section', - cause=e) + channel_id = self._search_regex( + r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None) + if channel_id: + config_url = self._html_search_regex( + r'\bdata-config-url="([^"]+)"', webpage, 'config URL') + video_description = clean_html(get_element_by_class('description', webpage)) + info_dict.update({ + 'channel_id': channel_id, + 'channel_url': 'https://vimeo.com/channels/' + channel_id, + }) else: - if config.get('view') == 4: - config = self._verify_player_video_password(redirect_url, video_id, headers) - + page_config = self._parse_json(self._search_regex( + r'vimeo\.(?:clip|vod_title)_page_config\s*=\s*({.+?});', + webpage, 'page config', default='{}'), video_id, fatal=False) + if not page_config: + return self._extract_from_api(video_id) + config_url = page_config['player']['config_url'] + cc_license = page_config.get('cc_license') + clip = page_config.get('clip') or {} + timestamp = clip.get('uploaded_on') + video_description = clean_html( + clip.get('description') or page_config.get('description_html_escaped')) + config = self._download_json(config_url, video_id) video = config.get('video') or {} vod = video.get('vod') or {} def is_rented(): if '>You rented this title.<' in webpage: return True - if config.get('user', {}).get('purchased'): + if try_get(config, lambda x: x['user']['purchased']): return True - for purchase_option in vod.get('purchase_options', []): + for purchase_option in (vod.get('purchase_options') or []): if purchase_option.get('purchased'): return True label = purchase_option.get('label_string') @@ -828,14 +815,14 @@ class VimeoIE(VimeoBaseInfoExtractor): 'https://player.vimeo.com/player/%s' % feature_id, {'force_feature_id': True}), 'Vimeo') - # Extract video description if not video_description: video_description = self._html_search_regex( r'(?s)]*>(.*?)', webpage, 'description', default=None) if not video_description: video_description = self._html_search_meta( - 'description', webpage, default=None) + ['description', 'og:description', 'twitter:description'], + webpage, default=None) if not video_description and is_pro: orig_webpage = self._download_webpage( orig_url, video_id, @@ -844,24 +831,17 @@ class VimeoIE(VimeoBaseInfoExtractor): if orig_webpage: video_description = self._html_search_meta( 'description', orig_webpage, default=None) - if not video_description and not is_player: + if not video_description: self.report_warning('Cannot find video description') - # Extract upload date if not timestamp: timestamp = self._search_regex( r']+datetime="([^"]+)"', webpage, 'timestamp', default=None) - try: - view_count = int(self._search_regex(r'UserPlays:(\d+)', webpage, 'view count')) - like_count = int(self._search_regex(r'UserLikes:(\d+)', webpage, 'like count')) - comment_count = int(self._search_regex(r'UserComments:(\d+)', webpage, 'comment count')) - except RegexNotFoundError: - # This info is only available in vimeo.com/{id} urls - view_count = None - like_count = None - comment_count = None + view_count = int_or_none(self._search_regex(r'UserPlays:(\d+)', webpage, 'view count', default=None)) + like_count = int_or_none(self._search_regex(r'UserLikes:(\d+)', webpage, 'like count', default=None)) + comment_count = int_or_none(self._search_regex(r'UserComments:(\d+)', webpage, 'comment count', default=None)) formats = [] @@ -881,11 +861,7 @@ class VimeoIE(VimeoBaseInfoExtractor): r']+rel=["\']license["\'][^>]+href=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'license', default=None, group='license') - channel_id = self._search_regex( - r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None) - channel_url = 'https://vimeo.com/channels/%s' % channel_id if channel_id else None - - info_dict = { + info_dict.update({ 'formats': formats, 'timestamp': unified_timestamp(timestamp), 'description': video_description, @@ -894,18 +870,14 @@ class VimeoIE(VimeoBaseInfoExtractor): 'like_count': like_count, 'comment_count': comment_count, 'license': cc_license, - 'channel_id': channel_id, - 'channel_url': channel_url, - } + }) - info_dict = merge_dicts(info_dict, info_dict_config, json_ld) - - return info_dict + return merge_dicts(info_dict, info_dict_config, json_ld) class VimeoOndemandIE(VimeoIE): IE_NAME = 'vimeo:ondemand' - _VALID_URL = r'https?://(?:www\.)?vimeo\.com/ondemand/([^/]+/)?(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?vimeo\.com/ondemand/(?:[^/]+/)?(?P[^/?#&]+)' _TESTS = [{ # ondemand video not available via https://vimeo.com/id 'url': 'https://vimeo.com/ondemand/20704', From f656a23cb116980b0eed5cad02e707249b75701a Mon Sep 17 00:00:00 2001 From: zenerdi0de <83358565+zenerdi0de@users.noreply.github.com> Date: Fri, 22 Oct 2021 06:20:49 +0530 Subject: [PATCH 305/641] [patreon] Fix vimeo player regex (#1332) Closes #1323 Authored by: zenerdi0de --- yt_dlp/extractor/patreon.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py index a189c0237d..c7d316efce 100644 --- a/yt_dlp/extractor/patreon.py +++ b/yt_dlp/extractor/patreon.py @@ -161,7 +161,7 @@ class PatreonIE(InfoExtractor): if try_get(attributes, lambda x: x['embed']['provider']) == 'Vimeo': embed_html = try_get(attributes, lambda x: x['embed']['html']) v_url = url_or_none(compat_urllib_parse_unquote( - self._search_regex(r'src=(https%3A%2F%2Fplayer\.vimeo\.com.+)%3F', embed_html, 'vimeo url', fatal=False))) + self._search_regex(r'(https(?:%3A%2F%2F|://)player\.vimeo\.com.+app_id(?:=|%3D)+\d+)', embed_html, 'vimeo url', fatal=False))) if v_url: info.update({ '_type': 'url_transparent', From ab2ffab22d02d530e0b46f9e361ff53a2139898b Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Fri, 22 Oct 2021 00:53:45 +0000 Subject: [PATCH 306/641] [Instagram] Add login (#1288) Authored by: u-spec-png --- yt_dlp/extractor/instagram.py | 44 ++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py index 3801c7af92..24f47f3a82 100644 --- a/yt_dlp/extractor/instagram.py +++ b/yt_dlp/extractor/instagram.py @@ -4,6 +4,7 @@ import itertools import hashlib import json import re +import time from .common import InfoExtractor from ..compat import ( @@ -20,11 +21,13 @@ from ..utils import ( try_get, url_or_none, variadic, + urlencode_postdata, ) class InstagramIE(InfoExtractor): _VALID_URL = r'(?Phttps?://(?:www\.)?instagram\.com/(?:p|tv|reel)/(?P[^/?#&]+))' + _NETRC_MACHINE = 'instagram' _TESTS = [{ 'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc', 'md5': '0d2da106a9d2631273e192b372806516', @@ -140,6 +143,45 @@ class InstagramIE(InfoExtractor): if mobj: return mobj.group('link') + def _login(self): + username, password = self._get_login_info() + + login_webpage = self._download_webpage( + 'https://www.instagram.com/accounts/login/', None, + note='Downloading login webpage', errnote='Failed to download login webpage') + + shared_data = self._parse_json( + self._search_regex( + r'window\._sharedData\s*=\s*({.+?});', + login_webpage, 'shared data', default='{}'), + None) + + login = self._download_json('https://www.instagram.com/accounts/login/ajax/', None, note='Logging in', headers={ + 'Accept': '*/*', + 'X-IG-App-ID': '936619743392459', + 'X-ASBD-ID': '198387', + 'X-IG-WWW-Claim': '0', + 'X-Requested-With': 'XMLHttpRequest', + 'X-CSRFToken': shared_data['config']['csrf_token'], + 'X-Instagram-AJAX': shared_data['rollout_hash'], + 'Referer': 'https://www.instagram.com/', + }, data=urlencode_postdata({ + 'enc_password': f'#PWD_INSTAGRAM_BROWSER:0:{int(time.time())}:{password}', + 'username': username, + 'queryParams': '{}', + 'optIntoOneTap': 'false', + 'stopDeletionNonce': '', + 'trustedDeviceRecords': '{}', + })) + + if not login.get('authenticated'): + if login.get('message'): + raise ExtractorError(f'Unable to login: {login["message"]}') + raise ExtractorError('Unable to login') + + def _real_initialize(self): + self._login() + def _real_extract(self, url): mobj = self._match_valid_url(url) video_id = mobj.group('id') @@ -147,7 +189,7 @@ class InstagramIE(InfoExtractor): webpage, urlh = self._download_webpage_handle(url, video_id) if 'www.instagram.com/accounts/login' in urlh.geturl().rstrip('/'): - self.raise_login_required('You need to log in to access this content', method='cookies') + self.raise_login_required('You need to log in to access this content') (media, video_url, description, thumbnail, timestamp, uploader, uploader_id, like_count, comment_count, comments, height, From 3c239332b0df3b22a5cbd66930ad240d2398fb44 Mon Sep 17 00:00:00 2001 From: makeworld <25111343+makeworld-the-better-one@users.noreply.github.com> Date: Thu, 21 Oct 2021 20:56:29 -0400 Subject: [PATCH 307/641] [CBC] Fix Gem livestream (#1289) Authored by: makeworld-the-better-one --- yt_dlp/extractor/cbc.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py index 5e4526c535..61fe4074cb 100644 --- a/yt_dlp/extractor/cbc.py +++ b/yt_dlp/extractor/cbc.py @@ -377,7 +377,7 @@ class CBCGemPlaylistIE(InfoExtractor): class CBCGemLiveIE(InfoExtractor): IE_NAME = 'gem.cbc.ca:live' - _VALID_URL = r'https?://gem\.cbc\.ca/live/(?P[0-9]{12})' + _VALID_URL = r'https?://gem\.cbc\.ca/live/(?P\d+)' _TEST = { 'url': 'https://gem.cbc.ca/live/920604739687', 'info_dict': { @@ -396,21 +396,21 @@ class CBCGemLiveIE(InfoExtractor): # It's unclear where the chars at the end come from, but they appear to be # constant. Might need updating in the future. - _API = 'https://tpfeed.cbc.ca/f/ExhSPC/t_t3UKJR6MAT' + # There are two URLs, some livestreams are in one, and some + # in the other. The JSON schema is the same for both. + _API_URLS = ['https://tpfeed.cbc.ca/f/ExhSPC/t_t3UKJR6MAT', 'https://tpfeed.cbc.ca/f/ExhSPC/FNiv9xQx_BnT'] def _real_extract(self, url): video_id = self._match_id(url) - live_info = self._download_json(self._API, video_id)['entries'] - video_info = None - for stream in live_info: - if stream.get('guid') == video_id: - video_info = stream - - if video_info is None: - raise ExtractorError( - 'Couldn\'t find video metadata, maybe this livestream is now offline', - expected=True) + for api_url in self._API_URLS: + video_info = next(( + stream for stream in self._download_json(api_url, video_id)['entries'] + if stream.get('guid') == video_id), None) + if video_info: + break + else: + raise ExtractorError('Couldn\'t find video metadata, maybe this livestream is now offline', expected=True) return { '_type': 'url_transparent', From d183af3cc1dbb98d2e2f89dbc7cff2901bd10408 Mon Sep 17 00:00:00 2001 From: makeworld <25111343+makeworld-the-better-one@users.noreply.github.com> Date: Thu, 21 Oct 2021 20:58:32 -0400 Subject: [PATCH 308/641] [CBC] Support CBC Gem member content (#1294) Authored by: makeworld-the-better-one --- yt_dlp/extractor/cbc.py | 108 +++++++++++++++++++++++++++++++--------- 1 file changed, 85 insertions(+), 23 deletions(-) diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py index 61fe4074cb..4fcf2a9c1b 100644 --- a/yt_dlp/extractor/cbc.py +++ b/yt_dlp/extractor/cbc.py @@ -2,6 +2,9 @@ from __future__ import unicode_literals import re +import json +import base64 +import time from .common import InfoExtractor from ..compat import ( @@ -244,37 +247,96 @@ class CBCGemIE(InfoExtractor): 'params': {'format': 'bv'}, 'skip': 'Geo-restricted to Canada', }] - _API_BASE = 'https://services.radio-canada.ca/ott/cbc-api/v2/assets/' + + _GEO_COUNTRIES = ['CA'] + _TOKEN_API_KEY = '3f4beddd-2061-49b0-ae80-6f1f2ed65b37' + _NETRC_MACHINE = 'cbcgem' + _claims_token = None + + def _new_claims_token(self, email, password): + data = json.dumps({ + 'email': email, + 'password': password, + }).encode() + headers = {'content-type': 'application/json'} + query = {'apikey': self._TOKEN_API_KEY} + resp = self._download_json('https://api.loginradius.com/identity/v2/auth/login', + None, data=data, headers=headers, query=query) + access_token = resp['access_token'] + + query = { + 'access_token': access_token, + 'apikey': self._TOKEN_API_KEY, + 'jwtapp': 'jwt', + } + resp = self._download_json('https://cloud-api.loginradius.com/sso/jwt/api/token', + None, headers=headers, query=query) + sig = resp['signature'] + + data = json.dumps({'jwt': sig}).encode() + headers = {'content-type': 'application/json', 'ott-device-type': 'web'} + resp = self._download_json('https://services.radio-canada.ca/ott/cbc-api/v2/token', + None, data=data, headers=headers) + cbc_access_token = resp['accessToken'] + + headers = {'content-type': 'application/json', 'ott-device-type': 'web', 'ott-access-token': cbc_access_token} + resp = self._download_json('https://services.radio-canada.ca/ott/cbc-api/v2/profile', + None, headers=headers) + return resp['claimsToken'] + + def _get_claims_token_expiry(self): + # Token is a JWT + # JWT is decoded here and 'exp' field is extracted + # It is a Unix timestamp for when the token expires + b64_data = self._claims_token.split('.')[1] + data = base64.urlsafe_b64decode(b64_data + "==") + return json.loads(data)['exp'] + + def claims_token_expired(self): + exp = self._get_claims_token_expiry() + if exp - time.time() < 10: + # It will expire in less than 10 seconds, or has already expired + return True + return False + + def claims_token_valid(self): + return self._claims_token is not None and not self.claims_token_expired() + + def _get_claims_token(self, email, password): + if not self.claims_token_valid(): + self._claims_token = self._new_claims_token(email, password) + self._downloader.cache.store(self._NETRC_MACHINE, 'claims_token', self._claims_token) + return self._claims_token + + def _real_initialize(self): + if self.claims_token_valid(): + return + self._claims_token = self._downloader.cache.load(self._NETRC_MACHINE, 'claims_token') def _real_extract(self, url): video_id = self._match_id(url) - video_info = self._download_json(self._API_BASE + video_id, video_id) + video_info = self._download_json('https://services.radio-canada.ca/ott/cbc-api/v2/assets/' + video_id, video_id) - last_error = None - attempt = -1 - retries = self.get_param('extractor_retries', 15) - while attempt < retries: - attempt += 1 - if last_error: - self.report_warning('%s. Retrying ...' % last_error) - m3u8_info = self._download_json( - video_info['playSession']['url'], video_id, - note='Downloading JSON metadata%s' % f' (attempt {attempt})') - m3u8_url = m3u8_info.get('url') - if m3u8_url: - break - elif m3u8_info.get('errorCode') == 1: - self.raise_geo_restricted(countries=['CA']) - else: - last_error = f'{self.IE_NAME} said: {m3u8_info.get("errorCode")} - {m3u8_info.get("message")}' - # 35 means media unavailable, but retries work - if m3u8_info.get('errorCode') != 35 or attempt >= retries: - raise ExtractorError(last_error) + email, password = self._get_login_info() + if email and password: + claims_token = self._get_claims_token(email, password) + headers = {'x-claims-token': claims_token} + else: + headers = {} + m3u8_info = self._download_json(video_info['playSession']['url'], video_id, headers=headers) + m3u8_url = m3u8_info.get('url') + + if m3u8_info.get('errorCode') == 1: + self.raise_geo_restricted(countries=['CA']) + elif m3u8_info.get('errorCode') == 35: + self.raise_login_required(method='password') + elif m3u8_info.get('errorCode') != 0: + raise ExtractorError(f'{self.IE_NAME} said: {m3u8_info.get("errorCode")} - {m3u8_info.get("message")}') formats = self._extract_m3u8_formats(m3u8_url, video_id, m3u8_id='hls') self._remove_duplicate_formats(formats) - for i, format in enumerate(formats): + for format in formats: if format.get('vcodec') == 'none': if format.get('ext') is None: format['ext'] = 'm4a' From ad0090d0d23e938e8a2107777a83e6c6b92494d3 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 22 Oct 2021 21:58:06 +0530 Subject: [PATCH 309/641] [cookies] Local State should be opened as utf-8 Closes #1276 --- yt_dlp/cookies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index 5f7fdf5843..c9ae9b6dbe 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -620,7 +620,7 @@ def _get_windows_v10_key(browser_root, logger): if path is None: logger.error('could not find local state file') return None - with open(path, 'r') as f: + with open(path, 'r', encoding='utf8') as f: data = json.load(f) try: base64_key = data['os_crypt']['encrypted_key'] From 457f6d68668704c20debc40ca77768796656d98b Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 22 Oct 2021 23:13:06 +0530 Subject: [PATCH 310/641] [vlive:channel] Fix extraction Based on https://github.com/ytdl-org/youtube-dl/pull/29866 Closes #749, #927, https://github.com/ytdl-org/youtube-dl/issues/29837 Authored by kikuyan, pukkandan --- yt_dlp/extractor/vlive.py | 217 +++++++++++++++++--------------------- 1 file changed, 98 insertions(+), 119 deletions(-) diff --git a/yt_dlp/extractor/vlive.py b/yt_dlp/extractor/vlive.py index 84f51a544c..681d959027 100644 --- a/yt_dlp/extractor/vlive.py +++ b/yt_dlp/extractor/vlive.py @@ -17,17 +17,65 @@ from ..utils import ( strip_or_none, try_get, urlencode_postdata, + url_or_none, ) class VLiveBaseIE(NaverBaseIE): - _APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b' + _NETRC_MACHINE = 'vlive' + _logged_in = False + + def _real_initialize(self): + if not self._logged_in: + VLiveBaseIE._logged_in = self._login() + + def _login(self): + email, password = self._get_login_info() + if email is None: + return False + + LOGIN_URL = 'https://www.vlive.tv/auth/email/login' + self._request_webpage( + LOGIN_URL, None, note='Downloading login cookies') + + self._download_webpage( + LOGIN_URL, None, note='Logging in', + data=urlencode_postdata({'email': email, 'pwd': password}), + headers={ + 'Referer': LOGIN_URL, + 'Content-Type': 'application/x-www-form-urlencoded' + }) + + login_info = self._download_json( + 'https://www.vlive.tv/auth/loginInfo', None, + note='Checking login status', + headers={'Referer': 'https://www.vlive.tv/home'}) + + if not try_get(login_info, lambda x: x['message']['login'], bool): + raise ExtractorError('Unable to log in', expected=True) + return True + + def _call_api(self, path_template, video_id, fields=None, query_add={}, note=None): + if note is None: + note = 'Downloading %s JSON metadata' % path_template.split('/')[-1].split('-')[0] + query = {'appId': '8c6cc7b45d2568fb668be6e05b6e5a3b', 'gcc': 'KR', 'platformType': 'PC'} + if fields: + query['fields'] = fields + if query_add: + query.update(query_add) + try: + return self._download_json( + 'https://www.vlive.tv/globalv-web/vam-web/' + path_template % video_id, video_id, + note, headers={'Referer': 'https://www.vlive.tv/'}, query=query) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + self.raise_login_required(json.loads(e.cause.read().decode('utf-8'))['message']) + raise class VLiveIE(VLiveBaseIE): IE_NAME = 'vlive' _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/(?:video|embed)/(?P[0-9]+)' - _NETRC_MACHINE = 'vlive' _TESTS = [{ 'url': 'http://www.vlive.tv/video/1326', 'md5': 'cc7314812855ce56de70a06a27314983', @@ -81,53 +129,6 @@ class VLiveIE(VLiveBaseIE): 'playlist_mincount': 120 }] - def _real_initialize(self): - self._login() - - def _login(self): - email, password = self._get_login_info() - if None in (email, password): - return - - def is_logged_in(): - login_info = self._download_json( - 'https://www.vlive.tv/auth/loginInfo', None, - note='Downloading login info', - headers={'Referer': 'https://www.vlive.tv/home'}) - return try_get( - login_info, lambda x: x['message']['login'], bool) or False - - LOGIN_URL = 'https://www.vlive.tv/auth/email/login' - self._request_webpage( - LOGIN_URL, None, note='Downloading login cookies') - - self._download_webpage( - LOGIN_URL, None, note='Logging in', - data=urlencode_postdata({'email': email, 'pwd': password}), - headers={ - 'Referer': LOGIN_URL, - 'Content-Type': 'application/x-www-form-urlencoded' - }) - - if not is_logged_in(): - raise ExtractorError('Unable to log in', expected=True) - - def _call_api(self, path_template, video_id, fields=None, limit=None): - query = {'appId': self._APP_ID, 'gcc': 'KR', 'platformType': 'PC'} - if fields: - query['fields'] = fields - if limit: - query['limit'] = limit - try: - return self._download_json( - 'https://www.vlive.tv/globalv-web/vam-web/' + path_template % video_id, video_id, - 'Downloading %s JSON metadata' % path_template.split('/')[-1].split('-')[0], - headers={'Referer': 'https://www.vlive.tv/'}, query=query) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: - self.raise_login_required(json.loads(e.cause.read().decode('utf-8'))['message']) - raise - def _real_extract(self, url): video_id = self._match_id(url) @@ -150,7 +151,7 @@ class VLiveIE(VLiveBaseIE): playlist_count = str_or_none(playlist.get('totalCount')) playlist = self._call_api( - 'playlist/v1.0/playlist-%s/posts', playlist_id, 'data', limit=playlist_count) + 'playlist/v1.0/playlist-%s/posts', playlist_id, 'data', {'limit': playlist_count}) entries = [] for video_data in playlist['data']: @@ -216,7 +217,7 @@ class VLiveIE(VLiveBaseIE): raise ExtractorError('Unknown status ' + status) -class VLivePostIE(VLiveIE): +class VLivePostIE(VLiveBaseIE): IE_NAME = 'vlive:post' _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/post/(?P\d-\d+)' _TESTS = [{ @@ -238,8 +239,6 @@ class VLivePostIE(VLiveIE): 'playlist_count': 1, }] _FVIDEO_TMPL = 'fvideo/v1.0/fvideo-%%s/%s' - _SOS_TMPL = _FVIDEO_TMPL % 'sosPlayInfo' - _INKEY_TMPL = _FVIDEO_TMPL % 'inKey' def _real_extract(self, url): post_id = self._match_id(url) @@ -266,7 +265,7 @@ class VLivePostIE(VLiveIE): entry = None if upload_type == 'SOS': download = self._call_api( - self._SOS_TMPL, video_id)['videoUrl']['download'] + self._FVIDEO_TMPL % 'sosPlayInfo', video_id)['videoUrl']['download'] formats = [] for f_id, f_url in download.items(): formats.append({ @@ -284,7 +283,7 @@ class VLivePostIE(VLiveIE): vod_id = upload_info.get('videoId') if not vod_id: continue - inkey = self._call_api(self._INKEY_TMPL, video_id)['inKey'] + inkey = self._call_api(self._FVIDEO_TMPL % 'inKey', video_id)['inKey'] entry = self._extract_video_info(video_id, vod_id, inkey) if entry: entry['title'] = '%s_part%s' % (title, idx) @@ -295,7 +294,7 @@ class VLivePostIE(VLiveIE): class VLiveChannelIE(VLiveBaseIE): IE_NAME = 'vlive:channel' - _VALID_URL = r'https?://(?:channels\.vlive\.tv|(?:(?:www|m)\.)?vlive\.tv/channel)/(?P[0-9A-Z]+)' + _VALID_URL = r'https?://(?:channels\.vlive\.tv|(?:(?:www|m)\.)?vlive\.tv/channel)/(?P[0-9A-Z]+)(?:/board/(?P\d+))?' _TESTS = [{ 'url': 'http://channels.vlive.tv/FCD4B', 'info_dict': { @@ -306,78 +305,58 @@ class VLiveChannelIE(VLiveBaseIE): }, { 'url': 'https://www.vlive.tv/channel/FCD4B', 'only_matching': True, + }, { + 'url': 'https://www.vlive.tv/channel/FCD4B/board/3546', + 'info_dict': { + 'id': 'FCD4B-3546', + 'title': 'MAMAMOO - Star Board', + }, + 'playlist_mincount': 880 }] - def _call_api(self, path, channel_key_suffix, channel_value, note, query): - q = { - 'app_id': self._APP_ID, - 'channel' + channel_key_suffix: channel_value, - } - q.update(query) - return self._download_json( - 'http://api.vfan.vlive.tv/vproxy/channelplus/' + path, - channel_value, note='Downloading ' + note, query=q)['result'] - - def _real_extract(self, url): - channel_code = self._match_id(url) - - channel_seq = self._call_api( - 'decodeChannelCode', 'Code', channel_code, - 'decode channel code', {})['channelSeq'] - - channel_name = None - entries = [] + def _entries(self, posts_id, board_name): + if board_name: + posts_path = 'post/v1.0/board-%s/posts' + query_add = {'limit': 100, 'sortType': 'LATEST'} + else: + posts_path = 'post/v1.0/channel-%s/starPosts' + query_add = {'limit': 100} for page_num in itertools.count(1): video_list = self._call_api( - 'getChannelVideoList', 'Seq', channel_seq, - 'channel list page #%d' % page_num, { - # Large values of maxNumOfRows (~300 or above) may cause - # empty responses (see [1]), e.g. this happens for [2] that - # has more than 300 videos. - # 1. https://github.com/ytdl-org/youtube-dl/issues/13830 - # 2. http://channels.vlive.tv/EDBF. - 'maxNumOfRows': 100, - 'pageNo': page_num - } - ) + posts_path, posts_id, 'channel{channelName},contentType,postId,title,url', query_add, + note=f'Downloading playlist page {page_num}') - if not channel_name: - channel_name = try_get( - video_list, - lambda x: x['channelInfo']['channelName'], - compat_str) - - videos = try_get( - video_list, lambda x: x['videoList'], list) - if not videos: - break - - for video in videos: - video_id = video.get('videoSeq') - video_type = video.get('videoType') - - if not video_id or not video_type: + for video in try_get(video_list, lambda x: x['data'], list) or []: + video_id = str(video.get('postId')) + video_title = str_or_none(video.get('title')) + video_url = url_or_none(video.get('url')) + if not all((video_id, video_title, video_url)) or video.get('contentType') != 'VIDEO': continue - video_id = compat_str(video_id) + channel_name = try_get(video, lambda x: x['channel']['channelName'], compat_str) + yield self.url_result(video_url, VLivePostIE.ie_key(), video_id, video_title, channel=channel_name) - if video_type in ('PLAYLIST'): - first_video_id = try_get( - video, - lambda x: x['videoPlaylist']['videoList'][0]['videoSeq'], int) + after = try_get(video_list, lambda x: x['paging']['nextParams']['after'], compat_str) + if not after: + break + query_add['after'] = after - if not first_video_id: - continue + def _real_extract(self, url): + channel_id, posts_id = self._match_valid_url(url).groups() - entries.append( - self.url_result( - 'http://www.vlive.tv/video/%s' % first_video_id, - ie=VLiveIE.ie_key(), video_id=first_video_id)) - else: - entries.append( - self.url_result( - 'http://www.vlive.tv/video/%s' % video_id, - ie=VLiveIE.ie_key(), video_id=video_id)) + board_name = None + if posts_id: + board = self._call_api( + 'board/v1.0/board-%s', posts_id, 'title,boardType') + board_name = board.get('title') or 'Unknown' + if board.get('boardType') not in ('STAR', 'VLIVE_PLUS'): + raise ExtractorError(f'Board {board_name!r} is not supported', expected=True) + + entries = self._entries(posts_id or channel_id, board_name) + first_video = next(entries) + channel_name = first_video['channel'] return self.playlist_result( - entries, channel_code, channel_name) + itertools.chain([first_video], entries), + f'{channel_id}-{posts_id}' if posts_id else channel_id, + f'{channel_name} - {board_name}' if channel_name and board_name else channel_name) From 49a57e70a9105dfe1671e96bef24663bce5b563d Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 23 Oct 2021 02:07:20 +0530 Subject: [PATCH 311/641] [cleanup] misc --- .../ISSUE_TEMPLATE/2_site_support_request.yml | 2 +- .../ISSUE_TEMPLATE/3_site_feature_request.yml | 12 ++++ .github/ISSUE_TEMPLATE/6_question.yml | 4 +- .../2_site_support_request.yml | 2 +- .../3_site_feature_request.yml | 12 ++++ .github/ISSUE_TEMPLATE_tmpl/6_question.yml | 4 +- .github/workflows/build.yml | 16 +++-- .github/workflows/quick-test.yml | 2 +- README.md | 21 +++--- devscripts/make_lazy_extractors.py | 2 +- pyinst.py | 7 +- yt_dlp/YoutubeDL.py | 72 ++++++++++--------- yt_dlp/__init__.py | 2 +- yt_dlp/downloader/fragment.py | 3 +- yt_dlp/extractor/common.py | 6 +- yt_dlp/extractor/soundcloud.py | 2 +- yt_dlp/extractor/youtube.py | 4 +- 17 files changed, 104 insertions(+), 69 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.yml b/.github/ISSUE_TEMPLATE/2_site_support_request.yml index c0a22ac2b5..f8ca606c7a 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.yml +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.yml @@ -33,7 +33,7 @@ body: attributes: label: Example URLs description: | - Provide all kinds of example URLs, support for which should be included. Replace following example URLs by yours + Provide all kinds of example URLs for which support should be added value: | - Single video: https://www.youtube.com/watch?v=BaW_jenozKc - Single video: https://youtu.be/BaW_jenozKc diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml index 44c8a0816c..a986df363d 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml @@ -13,6 +13,8 @@ body: required: true - label: I've verified that I'm running yt-dlp version **2021.10.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true + - label: I've checked that all provided URLs are alive and playable in a browser + required: true - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) @@ -24,6 +26,16 @@ body: label: Region description: "Enter the region the site is accessible from" placeholder: "India" + - type: textarea + id: example-urls + attributes: + label: Example URLs + description: | + Example URLs that can be used to demonstrate the requested feature + value: | + https://www.youtube.com/watch?v=BaW_jenozKc + validations: + required: true - type: textarea id: description attributes: diff --git a/.github/ISSUE_TEMPLATE/6_question.yml b/.github/ISSUE_TEMPLATE/6_question.yml index c101c2286d..a6e5fa80d7 100644 --- a/.github/ISSUE_TEMPLATE/6_question.yml +++ b/.github/ISSUE_TEMPLATE/6_question.yml @@ -11,11 +11,11 @@ body: options: - label: I'm asking a question and not reporting a bug/feature request required: true - - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp) + - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true - - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues) for similar questions including closed ones + - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions including closed ones required: true - type: textarea id: question diff --git a/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml index be6427ce1a..f7a48edc79 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml +++ b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml @@ -33,7 +33,7 @@ body: attributes: label: Example URLs description: | - Provide all kinds of example URLs, support for which should be included. Replace following example URLs by yours + Provide all kinds of example URLs for which support should be added value: | - Single video: https://www.youtube.com/watch?v=BaW_jenozKc - Single video: https://youtu.be/BaW_jenozKc diff --git a/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml index f19d958c63..09b98a9ec1 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml +++ b/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml @@ -13,6 +13,8 @@ body: required: true - label: I've verified that I'm running yt-dlp version **%(version)s**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true + - label: I've checked that all provided URLs are alive and playable in a browser + required: true - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) @@ -24,6 +26,16 @@ body: label: Region description: "Enter the region the site is accessible from" placeholder: "India" + - type: textarea + id: example-urls + attributes: + label: Example URLs + description: | + Example URLs that can be used to demonstrate the requested feature + value: | + https://www.youtube.com/watch?v=BaW_jenozKc + validations: + required: true - type: textarea id: description attributes: diff --git a/.github/ISSUE_TEMPLATE_tmpl/6_question.yml b/.github/ISSUE_TEMPLATE_tmpl/6_question.yml index c101c2286d..a6e5fa80d7 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/6_question.yml +++ b/.github/ISSUE_TEMPLATE_tmpl/6_question.yml @@ -11,11 +11,11 @@ body: options: - label: I'm asking a question and not reporting a bug/feature request required: true - - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp) + - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true - - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues) for similar questions including closed ones + - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions including closed ones required: true - type: textarea id: question diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3082884aa0..3329c141f0 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -53,7 +53,7 @@ jobs: - name: Build lazy extractors id: lazy_extractors - run: python devscripts/make_lazy_extractors.py yt_dlp/extractor/lazy_extractors.py + run: python devscripts/make_lazy_extractors.py - name: Run Make run: make all tar - name: Get SHA2-256SUMS for yt-dlp @@ -115,10 +115,12 @@ jobs: release_name: yt-dlp ${{ steps.bump_version.outputs.ytdlp_version }} commitish: ${{ steps.push_update.outputs.head_sha }} body: | - See [this](https://github.com/yt-dlp/yt-dlp#release-files) for a description of the files - - #### Changelog: + ### Changelog: ${{ env.changelog }} + + --- + + ### See [this](https://github.com/yt-dlp/yt-dlp#release-files) for a description of the release files draft: false prerelease: false - name: Upload yt-dlp Unix binary @@ -162,7 +164,7 @@ jobs: run: /usr/bin/python3 devscripts/update-version.py - name: Build lazy extractors id: lazy_extractors - run: /usr/bin/python3 devscripts/make_lazy_extractors.py yt_dlp/extractor/lazy_extractors.py + run: /usr/bin/python3 devscripts/make_lazy_extractors.py - name: Run PyInstaller Script run: /usr/bin/python3 pyinst.py --target-architecture universal2 --onefile - name: Upload yt-dlp MacOS binary @@ -233,7 +235,7 @@ jobs: run: python devscripts/update-version.py - name: Build lazy extractors id: lazy_extractors - run: python devscripts/make_lazy_extractors.py yt_dlp/extractor/lazy_extractors.py + run: python devscripts/make_lazy_extractors.py - name: Run PyInstaller Script run: python pyinst.py - name: Upload yt-dlp.exe Windows binary @@ -320,7 +322,7 @@ jobs: run: python devscripts/update-version.py - name: Build lazy extractors id: lazy_extractors - run: python devscripts/make_lazy_extractors.py yt_dlp/extractor/lazy_extractors.py + run: python devscripts/make_lazy_extractors.py - name: Run PyInstaller Script for 32 Bit run: python pyinst.py - name: Upload Executable yt-dlp_x86.exe diff --git a/.github/workflows/quick-test.yml b/.github/workflows/quick-test.yml index bbad209b39..d8e14f4705 100644 --- a/.github/workflows/quick-test.yml +++ b/.github/workflows/quick-test.yml @@ -28,6 +28,6 @@ jobs: - name: Install flake8 run: pip install flake8 - name: Make lazy extractors - run: python devscripts/make_lazy_extractors.py yt_dlp/extractor/lazy_extractors.py + run: python devscripts/make_lazy_extractors.py - name: Run flake8 run: flake8 . diff --git a/README.md b/README.md index 3ca308f876..713e6e5344 100644 --- a/README.md +++ b/README.md @@ -205,7 +205,7 @@ File|Description :---|:--- [yt-dlp_macos](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos)|MacOS standalone executable [yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows standalone x86 (32bit) binary -[yt-dlp_min.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows standalone x64 binary built with `py2exe`.
Does not contain `pycryptodomex`, needs VC++14 +[yt-dlp_min.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_min.exe)|Windows standalone x64 binary built with `py2exe`.
Does not contain `pycryptodomex`, needs VC++14 [yt-dlp_win.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_win.zip)|Unpackaged windows executable (No auto-update) [yt-dlp_macos.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos.zip)|Unpackaged MacOS executable (No auto-update) @@ -248,11 +248,10 @@ The windows releases are already built with the python interpreter, mutagen, pyc ### COMPILE **For Windows**: -To build the Windows executable, you must have pyinstaller (and optionally mutagen, pycryptodomex, websockets) - -Once you have all the necessary dependencies installed, just run `pyinst.py`. The executable will be built for the same architecture (32/64 bit) as the python used to build it. +To build the Windows executable, you must have pyinstaller (and optionally mutagen, pycryptodomex, websockets). Once you have all the necessary dependencies installed, (optionally) build lazy extractors using `devscripts/make_lazy_extractors.py`, and then just run `pyinst.py`. The executable will be built for the same architecture (32/64 bit) as the python used to build it. py -m pip install -U pyinstaller -r requirements.txt + py devscripts/make_lazy_extractors.py py pyinst.py Note that pyinstaller [does not support](https://github.com/pyinstaller/pyinstaller#requirements-and-tested-platforms) Python installed from the Windows store without using a virtual environment @@ -261,7 +260,7 @@ Note that pyinstaller [does not support](https://github.com/pyinstaller/pyinstal You will need the required build tools: `python`, `make` (GNU), `pandoc`, `zip`, `pytest` Then simply run `make`. You can also run `make yt-dlp` instead to compile only the binary without updating any of the additional files -**Note**: In either platform, `devscripts\update-version.py` can be used to automatically update the version number +**Note**: In either platform, `devscripts/update-version.py` can be used to automatically update the version number # USAGE AND OPTIONS @@ -1156,11 +1155,13 @@ Available only in `--sponsorblock-chapter-title`: - `category_names` (list): Friendly names of the categories - `name` (string): Friendly name of the smallest category -Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default). +Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `yt-dlp test video` and id `BaW_jenozKc`, this will result in a `yt-dlp test video-BaW_jenozKc.mp4` file created in the current directory. -For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `yt-dlp test video` and id `BaW_jenozKc`, this will result in a `yt-dlp test video-BaW_jenozKc.mp4` file created in the current directory. +Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default). -For numeric sequences you can use numeric related formatting, for example, `%(view_count)05d` will result in a string with view count padded with zeros up to 5 characters, like in `00042`. +**Tip**: Look at the `-j` output to identify which fields are available for the purticular URL + +For numeric sequences you can use [numeric related formatting](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting), for example, `%(view_count)05d` will result in a string with view count padded with zeros up to 5 characters, like in `00042`. Output templates can also contain arbitrary hierarchical path, e.g. `-o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s'` which will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you. @@ -1309,7 +1310,7 @@ The available fields are: - `width`: Width of video - `res`: Video resolution, calculated as the smallest dimension. - `fps`: Framerate of video - - `hdr`: The dynamic range of the video (`DV` > `HDR12` > `HDR10+` > `HDR10` > `SDR`) + - `hdr`: The dynamic range of the video (`DV` > `HDR12` > `HDR10+` > `HDR10` > `HLG` > `SDR`) - `tbr`: Total average bitrate in KBit/s - `vbr`: Average video bitrate in KBit/s - `abr`: Average audio bitrate in KBit/s @@ -1627,6 +1628,8 @@ with yt_dlp.YoutubeDL(ydl_opts) as ydl: See the public functions in [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py) for other available functions. Eg: `ydl.download`, `ydl.download_with_info_file` +**Tip**: If you are porting your code from youtube-dl to yt-dlp, one important point to look out for is that we do not guarantee the return value of `YoutubeDL.extract_info` to be json serializable, or even be a dictionary. It will be dictionary-like, but if you want to ensure it is a serializable dictionary, pass it through `YoutubeDL.sanitize_info` as shown in the example above + # DEPRECATED OPTIONS diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py index 427045b984..0411df76b9 100644 --- a/devscripts/make_lazy_extractors.py +++ b/devscripts/make_lazy_extractors.py @@ -9,7 +9,7 @@ import sys sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) -lazy_extractors_filename = sys.argv[1] +lazy_extractors_filename = sys.argv[1] if len(sys.argv) > 1 else 'yt_dlp/extractor/lazy_extractors.py' if os.path.exists(lazy_extractors_filename): os.remove(lazy_extractors_filename) diff --git a/pyinst.py b/pyinst.py index 0a695289b7..c7ef2761bb 100644 --- a/pyinst.py +++ b/pyinst.py @@ -24,16 +24,15 @@ def main(): opts = parse_options() version = read_version() - suffix = '_x86' if ARCH == '32' else '_macos' if OS_NAME == 'Darwin' else '' + suffix = '_macos' if OS_NAME == 'Darwin' else '_x86' if ARCH == '32' else '' final_file = 'dist/%syt-dlp%s%s' % ( 'yt-dlp/' if '--onedir' in opts else '', suffix, '.exe' if OS_NAME == 'Windows' else '') print(f'Building yt-dlp v{version} {ARCH}bit for {OS_NAME} with options {opts}') - print('Remember to update the version using "devscripts/update-version.py"') + print('Remember to update the version using "devscripts/update-version.py"') if not os.path.isfile('yt_dlp/extractor/lazy_extractors.py'): print('WARNING: Building without lazy_extractors. Run ' - '"devscripts/make_lazy_extractors.py" "yt_dlp/extractor/lazy_extractors.py" ' - 'to build lazy extractors', file=sys.stderr) + '"devscripts/make_lazy_extractors.py" to build lazy extractors', file=sys.stderr) print(f'Destination: {final_file}\n') opts = [ diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index f95bbea81f..0ac1f1c61c 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -307,7 +307,7 @@ class YoutubeDL(object): cookiefile: File name where cookies should be read from and dumped to cookiesfrombrowser: A tuple containing the name of the browser and the profile name/path from where cookies are loaded. - Eg: ('chrome', ) or (vivaldi, 'default') + Eg: ('chrome', ) or ('vivaldi', 'default') nocheckcertificate:Do not verify SSL certificates prefer_insecure: Use HTTP instead of HTTPS to retrieve information. At the moment, this is only supported by YouTube. @@ -503,7 +503,7 @@ class YoutubeDL(object): def __init__(self, params=None, auto_init=True): """Create a FileDownloader object with the given options. @param auto_init Whether to load the default extractors and print header (if verbose). - Set to 'no_verbose_header' to not ptint the header + Set to 'no_verbose_header' to not print the header """ if params is None: params = {} @@ -551,7 +551,7 @@ class YoutubeDL(object): check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"') check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"') - for msg in self.params.get('warnings', []): + for msg in self.params.get('_warnings', []): self.report_warning(msg) if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None: @@ -584,7 +584,9 @@ class YoutubeDL(object): self._output_channel = os.fdopen(master, 'rb') except OSError as ose: if ose.errno == errno.ENOENT: - self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.') + self.report_warning( + 'Could not find fribidi executable, ignoring --bidi-workaround. ' + 'Make sure that fribidi is an executable file in one of the directories in your $PATH.') else: raise @@ -631,7 +633,7 @@ class YoutubeDL(object): """Preload the archive, if any is specified""" if fn is None: return False - self.write_debug('Loading archive file %r\n' % fn) + self.write_debug(f'Loading archive file {fn!r}') try: with locked_file(fn, 'r', encoding='utf-8') as archive_file: for line in archive_file: @@ -658,7 +660,7 @@ class YoutubeDL(object): ) self.report_warning( 'Long argument string detected. ' - 'Use -- to separate parameters and URLs, like this:\n%s\n' % + 'Use -- to separate parameters and URLs, like this:\n%s' % args_to_str(correct_argv)) def add_info_extractor(self, ie): @@ -1550,7 +1552,7 @@ class YoutubeDL(object): playlistitems = list(range(playliststart, playliststart + n_entries)) ie_result['requested_entries'] = playlistitems - if self.params.get('allow_playlist_files', True): + if not self.params.get('simulate') and self.params.get('allow_playlist_files', True): ie_copy = { 'playlist': playlist, 'playlist_id': ie_result.get('id'), @@ -1558,6 +1560,7 @@ class YoutubeDL(object): 'playlist_uploader': ie_result.get('uploader'), 'playlist_uploader_id': ie_result.get('uploader_id'), 'playlist_index': 0, + 'n_entries': n_entries, } ie_copy.update(dict(ie_result)) @@ -1883,6 +1886,7 @@ class YoutubeDL(object): 'height': the_only_video.get('height'), 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video), 'fps': the_only_video.get('fps'), + 'dynamic_range': the_only_video.get('dynamic_range'), 'vcodec': the_only_video.get('vcodec'), 'vbr': the_only_video.get('vbr'), 'stretched_ratio': the_only_video.get('stretched_ratio'), @@ -2381,7 +2385,7 @@ class YoutubeDL(object): new_info['__original_infodict'] = info_dict new_info.update(fmt) self.process_info(new_info) - # We update the info dict with the best quality format (backwards compatibility) + # We update the info dict with the selected best quality format (backwards compatibility) if formats_to_download: info_dict.update(formats_to_download[-1]) return info_dict @@ -3250,35 +3254,40 @@ class YoutubeDL(object): def print_debug_header(self): if not self.params.get('verbose'): return - get_encoding = lambda stream: getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__) - encoding_str = ( - '[debug] Encodings: locale %s, fs %s, stdout %s, stderr %s, pref %s\n' % ( - locale.getpreferredencoding(), - sys.getfilesystemencoding(), - get_encoding(self._screen_file), get_encoding(self._err_file), - self.get_encoding())) + + def get_encoding(stream): + ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__) + if not supports_terminal_sequences(stream): + ret += ' (No ANSI)' + return ret + + encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % ( + locale.getpreferredencoding(), + sys.getfilesystemencoding(), + get_encoding(self._screen_file), get_encoding(self._err_file), + self.get_encoding()) logger = self.params.get('logger') if logger: write_debug = lambda msg: logger.debug(f'[debug] {msg}') write_debug(encoding_str) else: - write_debug = lambda msg: self._write_string(f'[debug] {msg}') - write_string(encoding_str, encoding=None) + write_string(f'[debug] {encoding_str}', encoding=None) + write_debug = lambda msg: self._write_string(f'[debug] {msg}\n') source = detect_variant() - write_debug('yt-dlp version %s%s\n' % (__version__, '' if source == 'unknown' else f' ({source})')) + write_debug('yt-dlp version %s%s' % (__version__, '' if source == 'unknown' else f' ({source})')) if not _LAZY_LOADER: if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'): - write_debug('Lazy loading extractors is forcibly disabled\n') + write_debug('Lazy loading extractors is forcibly disabled') else: - write_debug('Lazy loading extractors is disabled\n') + write_debug('Lazy loading extractors is disabled') if plugin_extractors or plugin_postprocessors: - write_debug('Plugins: %s\n' % [ + write_debug('Plugins: %s' % [ '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}') for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())]) if self.params.get('compat_opts'): - write_debug('Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts'))) + write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts'))) try: sp = Popen( ['git', 'rev-parse', '--short', 'HEAD'], @@ -3287,7 +3296,7 @@ class YoutubeDL(object): out, err = sp.communicate_or_kill() out = out.decode().strip() if re.match('[0-9a-f]+', out): - write_debug('Git HEAD: %s\n' % out) + write_debug('Git HEAD: %s' % out) except Exception: try: sys.exc_clear() @@ -3300,7 +3309,7 @@ class YoutubeDL(object): return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3] return impl_name - write_debug('Python version %s (%s %s) - %s\n' % ( + write_debug('Python version %s (%s %s) - %s' % ( platform.python_version(), python_implementation(), platform.architecture()[0], @@ -3312,7 +3321,7 @@ class YoutubeDL(object): exe_str = ', '.join( f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v ) or 'none' - write_debug('exe versions: %s\n' % exe_str) + write_debug('exe versions: %s' % exe_str) from .downloader.websocket import has_websockets from .postprocessor.embedthumbnail import has_mutagen @@ -3325,21 +3334,18 @@ class YoutubeDL(object): SQLITE_AVAILABLE and 'sqlite', KEYRING_AVAILABLE and 'keyring', )))) or 'none' - write_debug('Optional libraries: %s\n' % lib_str) - write_debug('ANSI escape support: stdout = %s, stderr = %s\n' % ( - supports_terminal_sequences(self._screen_file), - supports_terminal_sequences(self._err_file))) + write_debug('Optional libraries: %s' % lib_str) proxy_map = {} for handler in self._opener.handlers: if hasattr(handler, 'proxies'): proxy_map.update(handler.proxies) - write_debug('Proxy map: ' + compat_str(proxy_map) + '\n') + write_debug(f'Proxy map: {proxy_map}') - if self.params.get('call_home', False): + # Not implemented + if False and self.params.get('call_home'): ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8') - write_debug('Public IP address: %s\n' % ipaddr) - return + write_debug('Public IP address: %s' % ipaddr) latest_version = self.urlopen( 'https://yt-dl.org/latest/version').read().decode('utf-8') if version_tuple(latest_version) > version_tuple(__version__): diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index d97d4af648..e1c45441ab 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -745,7 +745,7 @@ def _real_main(argv=None): 'geo_bypass': opts.geo_bypass, 'geo_bypass_country': opts.geo_bypass_country, 'geo_bypass_ip_block': opts.geo_bypass_ip_block, - 'warnings': warnings, + '_warnings': warnings, 'compat_opts': compat_opts, } diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index c345f3148b..a9d1471f8c 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -370,7 +370,8 @@ class FragmentFD(FileDownloader): if max_progress == 1: return self.download_and_append_fragments(*args[0], pack_func=pack_func, finish_func=finish_func) max_workers = self.params.get('concurrent_fragment_downloads', max_progress) - self._prepare_multiline_status(max_progress) + if max_progress > 1: + self._prepare_multiline_status(max_progress) def thread_func(idx, ctx, fragments, info_dict, tpe): ctx['max_progress'] = max_progress diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index e00d8c42b5..22b1ed69ab 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -448,7 +448,9 @@ class InfoExtractor(object): } def __init__(self, downloader=None): - """Constructor. Receives an optional downloader.""" + """Constructor. Receives an optional downloader (a YoutubeDL instance). + If a downloader is not passed during initialization, + it must be set using "set_downloader()" before "extract()" is called""" self._ready = False self._x_forwarded_for_ip = None self._printed_messages = set() @@ -664,7 +666,7 @@ class InfoExtractor(object): See _download_webpage docstring for arguments specification. """ if not self._downloader._first_webpage_request: - sleep_interval = float_or_none(self.get_param('sleep_interval_requests')) or 0 + sleep_interval = self.get_param('sleep_interval_requests') or 0 if sleep_interval > 0: self.to_screen('Sleeping %s seconds ...' % sleep_interval) time.sleep(sleep_interval) diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index e89383ff13..412331e17c 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -855,7 +855,7 @@ class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE): class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): IE_NAME = 'soundcloud:search' - IE_DESC = 'Soundcloud search' + IE_DESC = 'Soundcloud search, "scsearch" keyword' _MAX_RESULTS = float('inf') _TESTS = [{ 'url': 'scsearch15:post-avant jazzcore', diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index aa58a22bff..54f5ef15cc 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -4304,9 +4304,7 @@ class YoutubePlaylistIE(InfoExtractor): def suitable(cls, url): if YoutubeTabIE.suitable(url): return False - # Hack for lazy extractors until more generic solution is implemented - # (see #28780) - from .youtube import parse_qs + from ..utils import parse_qs qs = parse_qs(url) if qs.get('v', [None])[0]: return False From 0676afb12609b4d457b9626215eea38bab40f2dc Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 23 Oct 2021 02:09:15 +0530 Subject: [PATCH 312/641] Release 2021.10.22 --- CONTRIBUTORS | 4 +++ Changelog.md | 78 +++++++++++++++++++++++++++++++++++++++++++++++ README.md | 4 +-- supportedsites.md | 14 +++++++-- 4 files changed, 95 insertions(+), 5 deletions(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 048d988529..2bf96affe4 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -125,3 +125,7 @@ jfogelman timethrow sarnoud Bojidarist +18928172992817182/gustaf +nixklai +smplayer-dev +Zirro diff --git a/Changelog.md b/Changelog.md index 2e6da33fb8..6dbc13bd72 100644 --- a/Changelog.md +++ b/Changelog.md @@ -14,6 +14,84 @@ --> +### 2021.10.22 + +* [build] Improvements + * Build standalone MacOS packages by [smplayer-dev](https://github.com/smplayer-dev) + * Release windows exe built with `py2exe` + * Enable lazy-extractors in releases. + * Set env var `YTDLP_NO_LAZY_EXTRACTORS` to forcefully disable this (experimental) + * Clean up error reporting in update + * Refactor `pyinst.py`, misc cleanup and improve docs +* [docs] Migrate issues to use forms by [Ashish0804](https://github.com/Ashish0804) +* [downloader] **Fix slow progress hooks** + * This was causing HLS/DASH downloads to be extremely slow in some situations +* [downloader/ffmpeg] Improve simultaneous download and merge +* [EmbedMetadata] Allow overwriting all default metadata with `meta_default` key +* [ModifyChapters] Add ability for `--remove-chapters` to remove sections by timestamp +* [utils] Allow duration strings in `--match-filter` +* Add HDR information to formats +* Add negative option `--no-batch-file` by [Zirro](https://github.com/Zirro) +* Calculate more fields for merged formats +* Do not verify thumbnail URLs unless `--check-formats` is specified +* Don't create console for subprocesses on Windows +* Fix `--restrict-filename` when used with default template +* Fix `check_formats` output being written to stdout when `-qv` +* Fix bug in storyboards +* Fix conflict b/w id and ext in format selection +* Fix verbose head not showing custom configs +* Load archive only after printing verbose head +* Make `duration_string` and `resolution` available in --match-filter +* Re-implement deprecated option `--id` +* Reduce default `--socket-timeout` +* Write verbose header to logger +* [outtmpl] Fix bug in expanding environment variables +* [cookies] Local State should be opened as utf-8 +* [extractor,utils] Detect more codecs/mimetypes +* [extractor] Detect `EXT-X-KEY` Apple FairPlay +* [utils] Use `importlib` to load plugins by [sulyi](https://github.com/sulyi) +* [http] Retry on socket timeout and show the last encountered error +* [fragment] Print error message when skipping fragment +* [aria2c] Fix `--skip-unavailable-fragment` +* [SponsorBlock] Obey `extractor-retries` and `sleep-requests` +* [Merger] Do not add `aac_adtstoasc` to non-hls audio +* [ModifyChapters] Do not mutate original chapters by [nihil-admirari](https://github.com/nihil-admirari) +* [devscripts/run_tests] Use markers to filter tests by [sulyi](https://github.com/sulyi) +* [7plus] Add cookie based authentication by [nyuszika7h](https://github.com/nyuszika7h) +* [AdobePass] Fix RCN MSO by [jfogelman](https://github.com/jfogelman) +* [CBC] Fix Gem livestream by [makeworld-the-better-one](https://github.com/makeworld-the-better-one) +* [CBC] Support CBC Gem member content by [makeworld-the-better-one](https://github.com/makeworld-the-better-one) +* [crunchyroll] Add season to flat-playlist Closes #1319 +* [crunchyroll] Add support for `beta.crunchyroll` URLs and fix series URLs with language code +* [EUScreen] Add Extractor by [Ashish0804](https://github.com/Ashish0804) +* [Gronkh] Add extractor by [Ashish0804](https://github.com/Ashish0804) +* [hidive] Fix typo +* [Hotstar] Mention Dynamic Range in `format_id` by [Ashish0804](https://github.com/Ashish0804) +* [Hotstar] Raise appropriate error for DRM +* [instagram] Add login by [u-spec-png](https://github.com/u-spec-png) +* [instagram] Show appropriate error when login is needed +* [microsoftstream] Add extractor by [damianoamatruda](https://github.com/damianoamatruda), [nixklai](https://github.com/nixklai) +* [on24] Add extractor by [damianoamatruda](https://github.com/damianoamatruda) +* [patreon] Fix vimeo player regex by [zenerdi0de](https://github.com/zenerdi0de) +* [SkyNewsAU] Add extractor by [Ashish0804](https://github.com/Ashish0804) +* [tagesschau] Fix extractor by [u-spec-png](https://github.com/u-spec-png) +* [tbs] Add tbs live streams by [llacb47](https://github.com/llacb47) +* [tiktok] Fix typo and update tests +* [trovo] Support channel clips and VODs by [Ashish0804](https://github.com/Ashish0804) +* [Viafree] Add support for Finland by [18928172992817182](https://github.com/18928172992817182) +* [vimeo] Fix embedded `player.vimeo` +* [vlive:channel] Fix extraction by [kikuyan](https://github.com/kikuyan), [pukkandan](https://github.com/pukkandan) +* [youtube] Add auto-translated subtitles +* [youtube] Expose different formats with same itag +* [youtube:comments] Fix for new layout by [coletdjnz](https://github.com/coletdjnz) +* [cleanup] Cleanup bilibili code by [pukkandan](https://github.com/pukkandan), [u-spec-png](https://github.com/u-spec-png) +* [cleanup] Remove broken youtube login code +* [cleanup] Standardize timestamp formatting code +* [cleanup] Generalize `getcomments` implementation for extractors +* [cleanup] Simplify search extractors code +* [cleanup] misc + + ### 2021.10.10 * [downloader/ffmpeg] Fix bug in initializing `FFmpegPostProcessor` diff --git a/README.md b/README.md index 713e6e5344..6e773412d8 100644 --- a/README.md +++ b/README.md @@ -93,9 +93,9 @@ The major new features from the latest release of [blackjack4494/yt-dlc](https:/ * **Aria2c with HLS/DASH**: You can use `aria2c` as the external downloader for DASH(mpd) and HLS(m3u8) formats -* **New extractors**: AnimeLab, Philo MSO, Spectrum MSO, SlingTV MSO, Cablevision MSO, RCN MSO, Rcs, Gedi, bitwave.tv, mildom, audius, zee5, mtv.it, wimtv, pluto.tv, niconico users, discoveryplus.in, mediathek, NFHSNetwork, nebula, ukcolumn, whowatch, MxplayerShow, parlview (au), YoutubeWebArchive, fancode, Saitosan, ShemarooMe, telemundo, VootSeries, SonyLIVSeries, HotstarSeries, VidioPremier, VidioLive, RCTIPlus, TBS Live, douyin, pornflip, ParamountPlusSeries, ScienceChannel, Utreon, OpenRec, BandcampMusic, blackboardcollaborate, eroprofile albums, mirrativ, BannedVideo, bilibili categories, Epicon, filmmodu, GabTV, HungamaAlbum, ManotoTV, Niconico search, Patreon User, peloton, ProjectVeritas, radiko, StarTV, tiktok user, Tokentube, voicy, TV2HuSeries, biliintl, 17live, NewgroundsUser, peertube channel/playlist, ZenYandex, CAM4, CGTN, damtomo, gotostage, Koo, Mediaite, Mediaklikk, MuseScore, nzherald, Olympics replay, radlive, SovietsCloset, Streamanity, Theta, Chingari, ciscowebex, Gettr, GoPro, N1, Theta, Veo, Vupload, NovaPlay +* **New extractors**: AnimeLab, Philo MSO, Spectrum MSO, SlingTV MSO, Cablevision MSO, RCN MSO, Rcs, Gedi, bitwave.tv, mildom, audius, zee5, mtv.it, wimtv, pluto.tv, niconico users, discoveryplus.in, mediathek, NFHSNetwork, nebula, ukcolumn, whowatch, MxplayerShow, parlview (au), YoutubeWebArchive, fancode, Saitosan, ShemarooMe, telemundo, VootSeries, SonyLIVSeries, HotstarSeries, VidioPremier, VidioLive, RCTIPlus, TBS Live, douyin, pornflip, ParamountPlusSeries, ScienceChannel, Utreon, OpenRec, BandcampMusic, blackboardcollaborate, eroprofile albums, mirrativ, BannedVideo, bilibili categories, Epicon, filmmodu, GabTV, HungamaAlbum, ManotoTV, Niconico search, Patreon User, peloton, ProjectVeritas, radiko, StarTV, tiktok user, Tokentube, voicy, TV2HuSeries, biliintl, 17live, NewgroundsUser, peertube channel/playlist, ZenYandex, CAM4, CGTN, damtomo, gotostage, Koo, Mediaite, Mediaklikk, MuseScore, nzherald, Olympics replay, radlive, SovietsCloset, Streamanity, Theta, Chingari, ciscowebex, Gettr, GoPro, N1, Theta, Veo, Vupload, NovaPlay, SkyNewsAU, EUScreen, Gronkh, microsoftstream, on24, trovo channels -* **Fixed/improved extractors**: archive.org, roosterteeth.com, skyit, instagram, itv, SouthparkDe, spreaker, Vlive, akamai, ina, rumble, tennistv, amcnetworks, la7 podcasts, linuxacadamy, nitter, twitcasting, viu, crackle, curiositystream, mediasite, rmcdecouverte, sonyliv, tubi, tenplay, patreon, videa, yahoo, BravoTV, crunchyroll playlist, RTP, viki, Hotstar, vidio, vimeo, mediaset, Mxplayer, nbcolympics, ParamountPlus, Newgrounds, SAML Verizon login, Hungama, afreecatv, aljazeera, ATV, bitchute, camtube, CDA, eroprofile, facebook, HearThisAtIE, iwara, kakao, Motherless, Nova, peertube, pornhub, reddit, tiktok, TV2, TV2Hu, tv5mondeplus, VH1, Viafree, XHamster, 9Now, AnimalPlanet, Arte, CBC, Chingari, comedycentral, DIYNetwork, niconico, dw, funimation, globo, HiDive, NDR, Nuvid, Oreilly, pbs, plutotv, reddit, redtube, soundcloud, SpankBang, VrtNU, bbc, Bilibili, LinkedInLearning, parliamentlive, PolskieRadio, Streamable, vidme, francetv +* **Fixed/improved extractors**: archive.org, roosterteeth.com, skyit, instagram, itv, SouthparkDe, spreaker, Vlive, akamai, ina, rumble, tennistv, amcnetworks, la7 podcasts, linuxacadamy, nitter, twitcasting, viu, crackle, curiositystream, mediasite, rmcdecouverte, sonyliv, tubi, tenplay, patreon, videa, yahoo, BravoTV, crunchyroll, RTP, viki, Hotstar, vidio, vimeo, mediaset, Mxplayer, nbcolympics, ParamountPlus, Newgrounds, SAML Verizon login, Hungama, afreecatv, aljazeera, ATV, bitchute, camtube, CDA, eroprofile, facebook, HearThisAtIE, iwara, kakao, Motherless, Nova, peertube, pornhub, reddit, tiktok, TV2, TV2Hu, tv5mondeplus, VH1, Viafree, XHamster, 9Now, AnimalPlanet, Arte, CBC, Chingari, comedycentral, DIYNetwork, niconico, dw, funimation, globo, HiDive, NDR, Nuvid, Oreilly, pbs, plutotv, reddit, redtube, soundcloud, SpankBang, VrtNU, bbc, Bilibili, LinkedInLearning, parliamentlive, PolskieRadio, Streamable, vidme, francetv, 7plus, tagesschau * **Subtitle extraction from manifests**: Subtitles can be extracted from streaming media manifests. See [commit/be6202f](https://github.com/yt-dlp/yt-dlp/commit/be6202f12b97858b9d716e608394b51065d0419f) for details diff --git a/supportedsites.md b/supportedsites.md index 02be6b918c..616151db8d 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -226,7 +226,9 @@ - **Crackle** - **CrooksAndLiars** - **crunchyroll** + - **crunchyroll:beta** - **crunchyroll:playlist** + - **crunchyroll:playlist:beta** - **CSpan**: C-SPAN - **CtsNews**: 華視新聞 - **CTV** @@ -315,6 +317,7 @@ - **ESPNArticle** - **EsriVideo** - **Europa** + - **EUScreen** - **EWETV** - **ExpoTV** - **Expressen** @@ -394,6 +397,7 @@ - **Goshgay** - **GoToStage** - **GPUTechConf** + - **Gronkh** - **Groupon** - **hbo** - **HearThisAt** @@ -570,6 +574,7 @@ - **Mgoon** - **MGTV**: 芒果TV - **MiaoPai** + - **microsoftstream**: Microsoft Stream - **mildom**: Record ongoing live by specific user in Mildom - **mildom:user:vod**: Download all VODs from specific user in Mildom - **mildom:vod**: Download a VOD in Mildom @@ -734,6 +739,7 @@ - **Odnoklassniki** - **OktoberfestTV** - **OlympicsReplay** + - **on24**: ON24 - **OnDemandKorea** - **onet.pl** - **onet.tv** @@ -961,6 +967,7 @@ - **SkylineWebcams** - **skynewsarabia:article** - **skynewsarabia:video** + - **SkyNewsAU** - **Slideshare** - **SlidesLive** - **Slutload** @@ -970,7 +977,7 @@ - **SonyLIVSeries** - **soundcloud** - **soundcloud:playlist** - - **soundcloud:search**: Soundcloud search + - **soundcloud:search**: Soundcloud search, "scsearch" keyword - **soundcloud:set** - **soundcloud:trackstation** - **soundcloud:user** @@ -1029,7 +1036,6 @@ - **SztvHu** - **t-online.de** - **Tagesschau** - - **tagesschau:player** - **Tass** - **TBS** - **TDSLifeway** @@ -1089,6 +1095,8 @@ - **TrailerAddict** (Currently broken) - **Trilulilu** - **Trovo** + - **TrovoChannelClip**: All Clips of a trovo.live channel, "trovoclip" keyword + - **TrovoChannelVod**: All VODs of a trovo.live channel, "trovovod" keyword - **TrovoVod** - **TruNews** - **TruTV** @@ -1193,7 +1201,7 @@ - **Viddler** - **Videa** - **video.arnes.si**: Arnes Video - - **video.google:search**: Google Video search + - **video.google:search**: Google Video search (Currently broken) - **video.sky.it** - **video.sky.it:live** - **VideoDetective** From 1117579b9457f8fbf7a4d7433a92b67ac802bdea Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 22 Oct 2021 20:47:18 +0000 Subject: [PATCH 313/641] [version] update :ci skip all --- .github/ISSUE_TEMPLATE/1_broken_site.yml | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.yml | 6 +++--- .github/ISSUE_TEMPLATE/3_site_feature_request.yml | 2 +- .github/ISSUE_TEMPLATE/4_bug_report.yml | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.yml | 2 +- yt_dlp/version.py | 2 +- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.yml b/.github/ISSUE_TEMPLATE/1_broken_site.yml index 2a492d132d..862e7235fd 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.yml +++ b/.github/ISSUE_TEMPLATE/1_broken_site.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a broken site required: true - - label: I've verified that I'm running yt-dlp version **2021.10.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.10.22**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -51,12 +51,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2021.10.10 (exe) + [debug] yt-dlp version 2021.10.22 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2021.10.10) + yt-dlp is up to date (2021.10.22) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.yml b/.github/ISSUE_TEMPLATE/2_site_support_request.yml index f8ca606c7a..aa00b8ad7b 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.yml +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a new site support request required: true - - label: I've verified that I'm running yt-dlp version **2021.10.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.10.22**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -62,12 +62,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2021.10.10 (exe) + [debug] yt-dlp version 2021.10.22 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2021.10.10) + yt-dlp is up to date (2021.10.22) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml index a986df363d..59578b7122 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a site feature request required: true - - label: I've verified that I'm running yt-dlp version **2021.10.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.10.22**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.yml b/.github/ISSUE_TEMPLATE/4_bug_report.yml index 1c609cab18..9003bb19ae 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/4_bug_report.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a bug unrelated to a specific site required: true - - label: I've verified that I'm running yt-dlp version **2021.10.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.10.22**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -45,12 +45,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2021.10.10 (exe) + [debug] yt-dlp version 2021.10.22 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2021.10.10) + yt-dlp is up to date (2021.10.22) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.yml b/.github/ISSUE_TEMPLATE/5_feature_request.yml index d839df95df..134416f4e1 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/5_feature_request.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a feature request required: true - - label: I've verified that I'm running yt-dlp version **2021.10.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.10.22**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates required: true diff --git a/yt_dlp/version.py b/yt_dlp/version.py index 83b6fea9fc..e7203be6b6 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2021.10.10' +__version__ = '2021.10.22' From 93c7f3398dd2e45fdb2c32b49ff169c46eadfbda Mon Sep 17 00:00:00 2001 From: Alf Marius Date: Sat, 23 Oct 2021 00:52:01 +0200 Subject: [PATCH 314/641] [Nrk] See desc (#1382) * Endpoint has changed. Currently the old one redirects to the new one, but this may change * Descriptions use \r instead of \n. So translate it Authored by: fractalf --- yt_dlp/extractor/nrk.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/nrk.py b/yt_dlp/extractor/nrk.py index b556bc6aa4..49d58a685b 100644 --- a/yt_dlp/extractor/nrk.py +++ b/yt_dlp/extractor/nrk.py @@ -147,7 +147,7 @@ class NRKIE(NRKBaseIE): def _real_extract(self, url): video_id = self._match_id(url).split('/')[-1] - path_templ = 'playback/%s/' + video_id + path_templ = 'playback/%s/program/' + video_id def call_playback_api(item, query=None): return self._call_api(path_templ % item, video_id, item, query=query) @@ -188,7 +188,7 @@ class NRKIE(NRKBaseIE): title = titles['title'] alt_title = titles.get('subtitle') - description = preplay.get('description') + description = try_get(preplay, lambda x: x['description'].replace('\r', '\n')) duration = parse_duration(playable.get('duration')) or parse_duration(data.get('duration')) thumbnails = [] From ec11a9f4a26e8225b195e5f91bd0b72b008d0c3a Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 20 Oct 2021 22:07:32 +0530 Subject: [PATCH 315/641] [minicurses] Add more colors --- yt_dlp/YoutubeDL.py | 95 +++++++++++++++++++++++++++----------- yt_dlp/extractor/common.py | 2 +- yt_dlp/minicurses.py | 78 ++++++++++++++++++++++++++++--- yt_dlp/utils.py | 33 +++++++------ 4 files changed, 161 insertions(+), 47 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 0ac1f1c61c..a3fb3faeb5 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -28,6 +28,7 @@ import traceback import random import unicodedata +from enum import Enum from string import ascii_letters from .compat import ( @@ -81,6 +82,7 @@ from .utils import ( make_HTTPS_handler, MaxDownloadsReached, network_exceptions, + number_of_digits, orderedSet, OUTTMPL_TYPES, PagedList, @@ -107,7 +109,6 @@ from .utils import ( strftime_or_none, subtitles_filename, supports_terminal_sequences, - TERMINAL_SEQUENCES, ThrottledDownload, to_high_limit_path, traverse_obj, @@ -123,6 +124,7 @@ from .utils import ( YoutubeDLRedirectHandler, ) from .cache import Cache +from .minicurses import format_text from .extractor import ( gen_extractor_classes, get_info_extractor, @@ -524,7 +526,10 @@ class YoutubeDL(object): windows_enable_vt_mode() # FIXME: This will break if we ever print color to stdout - self.params['no_color'] = self.params.get('no_color') or not supports_terminal_sequences(self._err_file) + self._allow_colors = { + 'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file), + 'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file), + } if sys.version_info < (3, 6): self.report_warning( @@ -532,10 +537,10 @@ class YoutubeDL(object): if self.params.get('allow_unplayable_formats'): self.report_warning( - f'You have asked for {self._color_text("unplayable formats", "blue")} to be listed/downloaded. ' + f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. ' 'This is a developer option intended for debugging. \n' ' If you experience any issues while using this option, ' - f'{self._color_text("DO NOT", "red")} open a bug report') + f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report') def check_deprecated(param, option, suggestion): if self.params.get(param) is not None: @@ -554,6 +559,9 @@ class YoutubeDL(object): for msg in self.params.get('_warnings', []): self.report_warning(msg) + if 'list-formats' in self.params.get('compat_opts', []): + self.params['listformats_table'] = False + if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None: # nooverwrites was unnecessarily changed to overwrites # in 0c3d0f51778b153f65c21906031c2e091fcfb641 @@ -826,10 +834,32 @@ class YoutubeDL(object): self.to_stdout( message, skip_eol, quiet=self.params.get('quiet', False)) - def _color_text(self, text, color): - if self.params.get('no_color'): - return text - return f'{TERMINAL_SEQUENCES[color.upper()]}{text}{TERMINAL_SEQUENCES["RESET_STYLE"]}' + class Styles(Enum): + HEADERS = 'yellow' + EMPHASIS = 'blue' + ID = 'green' + DELIM = 'blue' + ERROR = 'red' + WARNING = 'yellow' + + def __format_text(self, out, text, f, fallback=None, *, test_encoding=False): + assert out in ('screen', 'err') + if test_encoding: + original_text = text + handle = self._screen_file if out == 'screen' else self._err_file + encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii') + text = text.encode(encoding, 'ignore').decode(encoding) + if fallback is not None and text != original_text: + text = fallback + if isinstance(f, self.Styles): + f = f._value_ + return format_text(text, f) if self._allow_colors[out] else text if fallback is None else fallback + + def _format_screen(self, *args, **kwargs): + return self.__format_text('screen', *args, **kwargs) + + def _format_err(self, *args, **kwargs): + return self.__format_text('err', *args, **kwargs) def report_warning(self, message, only_once=False): ''' @@ -841,14 +871,14 @@ class YoutubeDL(object): else: if self.params.get('no_warnings'): return - self.to_stderr(f'{self._color_text("WARNING:", "yellow")} {message}', only_once) + self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once) def report_error(self, message, tb=None): ''' Do the same as trouble, but prefixes the message with 'ERROR:', colored in red if stderr is a tty file. ''' - self.trouble(f'{self._color_text("ERROR:", "red")} {message}', tb) + self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', tb) def write_debug(self, message, only_once=False): '''Log debug message or Print message to stderr''' @@ -977,8 +1007,8 @@ class YoutubeDL(object): # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences # of %(field)s to %(field)0Nd for backward compatibility field_size_compat_map = { - 'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')), - 'playlist_autonumber': len(str(info_dict.get('n_entries') or '')), + 'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0), + 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0), 'autonumber': self.params.get('autonumber_size') or 5, } @@ -3167,38 +3197,46 @@ class YoutubeDL(object): res += '~' + format_bytes(fdict['filesize_approx']) return res + def _list_format_headers(self, *headers): + if self.params.get('listformats_table', True) is not False: + return [self._format_screen(header, self.Styles.HEADERS) for header in headers] + return headers + def list_formats(self, info_dict): formats = info_dict.get('formats', [info_dict]) - new_format = ( - 'list-formats' not in self.params.get('compat_opts', []) - and self.params.get('listformats_table', True) is not False) + new_format = self.params.get('listformats_table', True) is not False if new_format: + tbr_digits = number_of_digits(max(f.get('tbr') or 0 for f in formats)) + vbr_digits = number_of_digits(max(f.get('vbr') or 0 for f in formats)) + abr_digits = number_of_digits(max(f.get('abr') or 0 for f in formats)) + delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True) table = [ [ - format_field(f, 'format_id'), + self._format_screen(format_field(f, 'format_id'), self.Styles.ID), format_field(f, 'ext'), self.format_resolution(f), format_field(f, 'fps', '%d'), format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''), - '|', + delim, format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes), - format_field(f, 'tbr', '%4dk'), + format_field(f, 'tbr', f'%{tbr_digits}dk'), shorten_protocol_name(f.get('protocol', '').replace("native", "n")), - '|', + delim, format_field(f, 'vcodec', default='unknown').replace('none', ''), - format_field(f, 'vbr', '%4dk'), + format_field(f, 'vbr', f'%{vbr_digits}dk'), format_field(f, 'acodec', default='unknown').replace('none', ''), - format_field(f, 'abr', '%3dk'), + format_field(f, 'abr', f'%{abr_digits}dk'), format_field(f, 'asr', '%5dHz'), ', '.join(filter(None, ( - 'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '', + self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else '', format_field(f, 'language', '[%s]'), format_field(f, 'format_note'), format_field(f, 'container', ignore=(None, f.get('ext'))), ))), ] for f in formats if f.get('preference') is None or f['preference'] >= -1000] - header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', 'HDR', '|', ' FILESIZE', ' TBR', 'PROTO', - '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO'] + header_line = self._list_format_headers( + 'ID', 'EXT', 'RESOLUTION', 'FPS', 'HDR', delim, ' FILESIZE', ' TBR', 'PROTO', + delim, 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO') else: table = [ [ @@ -3213,7 +3251,10 @@ class YoutubeDL(object): self.to_screen( '[info] Available formats for %s:' % info_dict['id']) self.to_stdout(render_table( - header_line, table, delim=new_format, extraGap=(0 if new_format else 1), hideEmpty=new_format)) + header_line, table, + extraGap=(0 if new_format else 1), + hideEmpty=new_format, + delim=new_format and self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))) def list_thumbnails(self, info_dict): thumbnails = list(info_dict.get('thumbnails')) @@ -3224,7 +3265,7 @@ class YoutubeDL(object): self.to_screen( '[info] Thumbnails for %s:' % info_dict['id']) self.to_stdout(render_table( - ['ID', 'width', 'height', 'URL'], + self._list_format_headers('ID', 'Width', 'Height', 'URL'), [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])) def list_subtitles(self, video_id, subtitles, name='subtitles'): @@ -3241,7 +3282,7 @@ class YoutubeDL(object): return [lang, ', '.join(names), ', '.join(exts)] self.to_stdout(render_table( - ['Language', 'Name', 'Formats'], + self._list_format_headers('Language', 'Name', 'Formats'), [_row(lang, formats) for lang, formats in subtitles.items()], hideEmpty=True)) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 22b1ed69ab..d1d1b46fce 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1139,7 +1139,7 @@ class InfoExtractor(object): if mobj: break - _name = self._downloader._color_text(name, 'blue') + _name = self._downloader._format_err(name, self._downloader.Styles.EMPHASIS) if mobj: if group is None: diff --git a/yt_dlp/minicurses.py b/yt_dlp/minicurses.py index a6e159a143..38fdb5bc6e 100644 --- a/yt_dlp/minicurses.py +++ b/yt_dlp/minicurses.py @@ -1,6 +1,72 @@ import functools from threading import Lock -from .utils import supports_terminal_sequences, TERMINAL_SEQUENCES, write_string +from .utils import supports_terminal_sequences, write_string + + +CONTROL_SEQUENCES = { + 'DOWN': '\n', + 'UP': '\033[A', + 'ERASE_LINE': '\033[K', + 'RESET': '\033[0m', +} + + +_COLORS = { + 'BLACK': '0', + 'RED': '1', + 'GREEN': '2', + 'YELLOW': '3', + 'BLUE': '4', + 'PURPLE': '5', + 'CYAN': '6', + 'WHITE': '7', +} + + +_TEXT_STYLES = { + 'NORMAL': '0', + 'BOLD': '1', + 'UNDERLINED': '4', +} + + +def format_text(text, f): + f = f.upper() + tokens = f.strip().split() + + bg_color = '' + if 'ON' in tokens: + if tokens[-1] == 'ON': + raise SyntaxError(f'Empty background format specified in {f!r}') + if tokens[-1] not in _COLORS: + raise SyntaxError(f'{tokens[-1]} in {f!r} must be a color') + bg_color = f'4{_COLORS[tokens.pop()]}' + if tokens[-1] == 'LIGHT': + bg_color = f'0;10{bg_color[1:]}' + tokens.pop() + if tokens[-1] != 'ON': + raise SyntaxError(f'Invalid format {f.split(" ON ", 1)[1]!r} in {f!r}') + bg_color = f'\033[{bg_color}m' + tokens.pop() + + if not tokens: + fg_color = '' + elif tokens[-1] not in _COLORS: + raise SyntaxError(f'{tokens[-1]} in {f!r} must be a color') + else: + fg_color = f'3{_COLORS[tokens.pop()]}' + if tokens and tokens[-1] == 'LIGHT': + fg_color = f'9{fg_color[1:]}' + tokens.pop() + fg_style = tokens.pop() if tokens and tokens[-1] in _TEXT_STYLES else 'NORMAL' + fg_color = f'\033[{_TEXT_STYLES[fg_style]};{fg_color}m' + if tokens: + raise SyntaxError(f'Invalid format {" ".join(tokens)!r} in {f!r}') + + if fg_color or bg_color: + return f'{fg_color}{bg_color}{text}{CONTROL_SEQUENCES["RESET"]}' + else: + return text class MultilinePrinterBase: @@ -67,15 +133,15 @@ class MultilinePrinter(MultilinePrinterBase): yield '\r' distance = dest - current if distance < 0: - yield TERMINAL_SEQUENCES['UP'] * -distance + yield CONTROL_SEQUENCES['UP'] * -distance elif distance > 0: - yield TERMINAL_SEQUENCES['DOWN'] * distance + yield CONTROL_SEQUENCES['DOWN'] * distance self._lastline = dest @lock def print_at_line(self, text, pos): if self._HAVE_FULLCAP: - self.write(*self._move_cursor(pos), TERMINAL_SEQUENCES['ERASE_LINE'], text) + self.write(*self._move_cursor(pos), CONTROL_SEQUENCES['ERASE_LINE'], text) text = self._add_line_number(text, pos) textlen = len(text) @@ -103,7 +169,7 @@ class MultilinePrinter(MultilinePrinterBase): if self._HAVE_FULLCAP: self.write( - *text, TERMINAL_SEQUENCES['ERASE_LINE'], - f'{TERMINAL_SEQUENCES["UP"]}{TERMINAL_SEQUENCES["ERASE_LINE"]}' * self.maximum) + *text, CONTROL_SEQUENCES['ERASE_LINE'], + f'{CONTROL_SEQUENCES["UP"]}{CONTROL_SEQUENCES["ERASE_LINE"]}' * self.maximum) else: self.write(*text, ' ' * self._lastlength) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index e05677d08e..08f9a5dc99 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4748,9 +4748,11 @@ def determine_protocol(info_dict): def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False): """ Render a list of rows, each as a list of values """ + def width(string): + return len(remove_terminal_sequences(string)) def get_max_lens(table): - return [max(len(compat_str(v)) for v in col) for col in zip(*table)] + return [max(width(str(v)) for v in col) for col in zip(*table)] def filter_using_list(row, filterArray): return [col for (take, col) in zip(filterArray, row) if take] @@ -4762,10 +4764,15 @@ def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False): table = [header_row] + data max_lens = get_max_lens(table) + extraGap += 1 if delim: - table = [header_row] + [['-' * ml for ml in max_lens]] + data - format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s' - return '\n'.join(format_str % tuple(row) for row in table) + table = [header_row] + [[delim * (ml + extraGap) for ml in max_lens]] + data + max_lens[-1] = 0 + for row in table: + for pos, text in enumerate(map(str, row)): + row[pos] = text + (' ' * (max_lens[pos] - width(text) + extraGap)) + ret = '\n'.join(''.join(row) for row in table) + return ret def _match_one(filter_part, dct, incomplete): @@ -6498,12 +6505,12 @@ def supports_terminal_sequences(stream): return False -TERMINAL_SEQUENCES = { - 'DOWN': '\n', - 'UP': '\x1b[A', - 'ERASE_LINE': '\x1b[K', - 'RED': '\033[0;31m', - 'YELLOW': '\033[0;33m', - 'BLUE': '\033[0;34m', - 'RESET_STYLE': '\033[0m', -} +_terminal_sequences_re = re.compile('\033\\[[^m]+m') + + +def remove_terminal_sequences(string): + return _terminal_sequences_re.sub('', string) + + +def number_of_digits(number): + return len('%d' % number) From 96565c7e55bc3d97a1d4232fe974091dd45f5fe9 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 23 Oct 2021 19:59:52 +0530 Subject: [PATCH 316/641] [cleanup] Add keyword automatically to SearchIE descriptions and some minor cleanup of docs --- Changelog.md | 2 +- README.md | 47 ++++++++++++++++--------------- devscripts/make_supportedsites.py | 3 ++ setup.py | 2 +- supportedsites.md | 40 +++++++++++++------------- yt_dlp/YoutubeDL.py | 2 +- yt_dlp/__init__.py | 4 +-- yt_dlp/extractor/bilibili.py | 2 +- yt_dlp/extractor/common.py | 4 ++- yt_dlp/extractor/niconico.py | 1 - yt_dlp/extractor/soundcloud.py | 5 ++-- yt_dlp/extractor/trovo.py | 4 +-- yt_dlp/extractor/youtube.py | 28 +++++++++--------- yt_dlp/minicurses.py | 5 ++++ 14 files changed, 78 insertions(+), 71 deletions(-) diff --git a/Changelog.md b/Changelog.md index 6dbc13bd72..d74237dd42 100644 --- a/Changelog.md +++ b/Changelog.md @@ -61,7 +61,7 @@ * [AdobePass] Fix RCN MSO by [jfogelman](https://github.com/jfogelman) * [CBC] Fix Gem livestream by [makeworld-the-better-one](https://github.com/makeworld-the-better-one) * [CBC] Support CBC Gem member content by [makeworld-the-better-one](https://github.com/makeworld-the-better-one) -* [crunchyroll] Add season to flat-playlist Closes #1319 +* [crunchyroll] Add season to flat-playlist * [crunchyroll] Add support for `beta.crunchyroll` URLs and fix series URLs with language code * [EUScreen] Add Extractor by [Ashish0804](https://github.com/Ashish0804) * [Gronkh] Add extractor by [Ashish0804](https://github.com/Ashish0804) diff --git a/README.md b/README.md index 6e773412d8..f9695aec5a 100644 --- a/README.md +++ b/README.md @@ -125,9 +125,9 @@ If you are coming from [youtube-dl](https://github.com/ytdl-org/youtube-dl), the Some of yt-dlp's default options are different from that of youtube-dl and youtube-dlc. -* The options `--id`, `--auto-number` (`-A`), `--title` (`-t`) and `--literal` (`-l`), no longer work. See [removed options](#Removed) for details +* The options `--auto-number` (`-A`), `--title` (`-t`) and `--literal` (`-l`), no longer work. See [removed options](#Removed) for details * `avconv` is not supported as as an alternative to `ffmpeg` -* The default [output template](#output-template) is `%(title)s [%(id)s].%(ext)s`. There is no real reason for this change. This was changed before yt-dlp was ever made public and now there are no plans to change it back to `%(title)s.%(id)s.%(ext)s`. Instead, you may use `--compat-options filename` +* The default [output template](#output-template) is `%(title)s [%(id)s].%(ext)s`. There is no real reason for this change. This was changed before yt-dlp was ever made public and now there are no plans to change it back to `%(title)s-%(id)s.%(ext)s`. Instead, you may use `--compat-options filename` * The default [format sorting](#sorting-formats) is different from youtube-dl and prefers higher resolution and better codecs rather than higher bitrates. You can use the `--format-sort` option to change this to any order you prefer, or use `--compat-options format-sort` to use youtube-dl's sorting order * The default format selector is `bv*+ba/b`. This means that if a combined video + audio format that is better than the best video-only format is found, the former will be prefered. Use `-f bv+ba/b` or `--compat-options format-spec` to revert this * Unlike youtube-dlc, yt-dlp does not allow merging multiple audio/video streams into one file by default (since this conflicts with the use of `-f bv*+ba`). If needed, this feature must be enabled using `--audio-multistreams` and `--video-multistreams`. You can also use `--compat-options multistreams` to enable both @@ -197,17 +197,17 @@ If you have installed using Homebrew, run `brew upgrade yt-dlp/taps/yt-dlp` File|Description :---|:--- [yt-dlp](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)|Platform independant binary. Needs Python (Recommended for **UNIX-like systems**) -[yt-dlp.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)|Windows standalone x64 binary (Recommended for **Windows**) +[yt-dlp.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)|Windows (Win7 SP1+) standalone x64 binary (Recommended for **Windows**) #### Alternatives File|Description :---|:--- -[yt-dlp_macos](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos)|MacOS standalone executable -[yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows standalone x86 (32bit) binary -[yt-dlp_min.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_min.exe)|Windows standalone x64 binary built with `py2exe`.
Does not contain `pycryptodomex`, needs VC++14 +[yt-dlp_macos](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos)|MacOS (10.15+) standalone executable +[yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows (Vista SP2+) standalone x86 (32bit) binary +[yt-dlp_min.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_min.exe)|Windows (Win7 SP1+) standalone x64 binary built with `py2exe`.
Does not contain `pycryptodomex`, needs VC++14 [yt-dlp_win.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_win.zip)|Unpackaged windows executable (No auto-update) -[yt-dlp_macos.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos.zip)|Unpackaged MacOS executable (No auto-update) +[yt-dlp_macos.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos.zip)|Unpackaged MacOS (10.15+) executable (No auto-update) #### Misc @@ -1516,24 +1516,25 @@ $ yt-dlp --replace-in-metadata 'title,uploader' '[ _]' '-' Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) seperated string of `ARG=VAL1,VAL2`. Eg: `--extractor-args "youtube:player_client=android_agegate,web;include_live_dash" --extractor-args "funimation:version=uncut"` The following extractors use this feature: -* **youtube** - * `skip`: `hls` or `dash` (or both) to skip download of the respective manifests - * `player_client`: Clients to extract video data from. The main clients are `web`, `android`, `ios`, `mweb`. These also have `_music`, `_embedded`, `_agegate`, and `_creator` variants (Eg: `web_embedded`) (`mweb` has only `_agegate`). By default, `android,web` is used, but the agegate and creator variants are added as required for age-gated videos. Similarly the music variants are added for `music.youtube.com` urls. You can also use `all` to use all the clients - * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details - * `include_live_dash`: Include live dash formats (These formats don't download properly) - * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side). - * `max_comments`: Maximum amount of comments to download (default all). - * `max_comment_depth`: Maximum depth for nested comments. YouTube supports depths 1 or 2 (default). -* **youtubetab** - (YouTube playlists, channels, feeds, etc.) - * `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details) -* **funimation** - * `language`: Languages to extract. Eg: `funimation:language=english,japanese` - * `version`: The video version to extract - `uncut` or `simulcast` +#### youtube +* `skip`: `hls` or `dash` (or both) to skip download of the respective manifests +* `player_client`: Clients to extract video data from. The main clients are `web`, `android`, `ios`, `mweb`. These also have `_music`, `_embedded`, `_agegate`, and `_creator` variants (Eg: `web_embedded`) (`mweb` has only `_agegate`). By default, `android,web` is used, but the agegate and creator variants are added as required for age-gated videos. Similarly the music variants are added for `music.youtube.com` urls. You can also use `all` to use all the clients +* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details +* `include_live_dash`: Include live dash formats (These formats don't download properly) +* `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) +* `max_comments`: Maximum amount of comments to download (default all) +* `max_comment_depth`: Maximum depth for nested comments. YouTube supports depths 1 or 2 (default) -* **vikiChannel** - * `video_types`: Types of videos to download - one or more of `episodes`, `movies`, `clips`, `trailers` +#### youtubetab (YouTube playlists, channels, feeds, etc.) +* `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details) + +#### funimation +* `language`: Languages to extract. Eg: `funimation:language=english,japanese` +* `version`: The video version to extract - `uncut` or `simulcast` + +#### vikichannel +* `video_types`: Types of videos to download - one or more of `episodes`, `movies`, `clips`, `trailers` NOTE: These options may be changed/removed in the future without concern for backward compatibility diff --git a/devscripts/make_supportedsites.py b/devscripts/make_supportedsites.py index 17a34843fd..4c11e25f28 100644 --- a/devscripts/make_supportedsites.py +++ b/devscripts/make_supportedsites.py @@ -29,6 +29,9 @@ def main(): continue if ie_desc is not None: ie_md += ': {0}'.format(ie.IE_DESC) + search_key = getattr(ie, 'SEARCH_KEY', None) + if search_key is not None: + ie_md += f'; "{ie.SEARCH_KEY}:" prefix' if not ie.working(): ie_md += ' (Currently broken)' yield ie_md diff --git a/setup.py b/setup.py index e1c585be4a..f08ae2309d 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ from distutils.spawn import spawn exec(compile(open('yt_dlp/version.py').read(), 'yt_dlp/version.py', 'exec')) -DESCRIPTION = 'Command-line program to download videos from YouTube.com and many other other video platforms.' +DESCRIPTION = 'A youtube-dl fork with additional features and patches' LONG_DESCRIPTION = '\n\n'.join(( 'Official repository: ', diff --git a/supportedsites.md b/supportedsites.md index 616151db8d..01c3f43a97 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -127,7 +127,7 @@ - **BilibiliAudioAlbum** - **BilibiliChannel** - **BiliBiliPlayer** - - **BiliBiliSearch**: Bilibili video search, "bilisearch" keyword + - **BiliBiliSearch**: Bilibili video search; "bilisearch:" prefix - **BiliIntl** - **BiliIntlSeries** - **BioBioChileTV** @@ -691,8 +691,8 @@ - **niconico**: ニコニコ動画 - **NiconicoPlaylist** - **NiconicoUser** - - **nicovideo:search**: Nico video searches - - **nicovideo:search:date**: Nico video searches, newest first + - **nicovideo:search**: Nico video searches; "nicosearch:" prefix + - **nicovideo:search:date**: Nico video searches, newest first; "nicosearchdate:" prefix - **nicovideo:search_url**: Nico video search URLs - **Nintendo** - **Nitter** @@ -936,7 +936,7 @@ - **SBS**: sbs.com.au - **schooltv** - **ScienceChannel** - - **screen.yahoo:search**: Yahoo screen search + - **screen.yahoo:search**: Yahoo screen search; "yvsearch:" prefix - **Screencast** - **ScreencastOMatic** - **ScrippsNetworks** @@ -977,7 +977,7 @@ - **SonyLIVSeries** - **soundcloud** - **soundcloud:playlist** - - **soundcloud:search**: Soundcloud search, "scsearch" keyword + - **soundcloud:search**: Soundcloud search; "scsearch:" prefix - **soundcloud:set** - **soundcloud:trackstation** - **soundcloud:user** @@ -1095,8 +1095,8 @@ - **TrailerAddict** (Currently broken) - **Trilulilu** - **Trovo** - - **TrovoChannelClip**: All Clips of a trovo.live channel, "trovoclip" keyword - - **TrovoChannelVod**: All VODs of a trovo.live channel, "trovovod" keyword + - **TrovoChannelClip**: All Clips of a trovo.live channel; "trovoclip:" prefix + - **TrovoChannelVod**: All VODs of a trovo.live channel; "trovovod:" prefix - **TrovoVod** - **TruNews** - **TruTV** @@ -1201,7 +1201,7 @@ - **Viddler** - **Videa** - **video.arnes.si**: Arnes Video - - **video.google:search**: Google Video search (Currently broken) + - **video.google:search**: Google Video search; "gvsearch:" prefix (Currently broken) - **video.sky.it** - **video.sky.it:live** - **VideoDetective** @@ -1343,19 +1343,19 @@ - **YouPorn** - **YourPorn** - **YourUpload** - - **youtube**: YouTube.com - - **youtube:favorites**: YouTube.com liked videos, ":ytfav" for short (requires authentication) - - **youtube:history**: Youtube watch history, ":ythis" for short (requires authentication) - - **youtube:playlist**: YouTube.com playlists - - **youtube:recommended**: YouTube.com recommended videos, ":ytrec" for short (requires authentication) - - **youtube:search**: YouTube.com searches, "ytsearch" keyword - - **youtube:search:date**: YouTube.com searches, newest videos first, "ytsearchdate" keyword - - **youtube:search_url**: YouTube.com search URLs - - **youtube:subscriptions**: YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication) - - **youtube:tab**: YouTube.com tab - - **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication) + - **youtube**: YouTube + - **youtube:favorites**: YouTube liked videos; ":ytfav" keyword (requires cookies) + - **youtube:history**: Youtube watch history; ":ythis" keyword (requires cookies) + - **youtube:playlist**: YouTube playlists + - **youtube:recommended**: YouTube recommended videos; ":ytrec" keyword + - **youtube:search**: YouTube searches; "ytsearch:" prefix + - **youtube:search:date**: YouTube searches, newest videos first; "ytsearchdate:" prefix + - **youtube:search_url**: YouTube search URLs with sorting and filter support + - **youtube:subscriptions**: YouTube subscriptions feed; ":ytsubs" keyword (requires cookies) + - **youtube:tab**: YouTube Tabs + - **youtube:watchlater**: Youtube watch later list; ":ytwatchlater" keyword (requires cookies) - **YoutubeYtBe**: youtu.be - - **YoutubeYtUser**: YouTube.com user videos, URL or "ytuser" keyword + - **YoutubeYtUser**: YouTube user videos; "ytuser:" prefix - **Zapiks** - **Zattoo** - **ZattooLive** diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index a3fb3faeb5..27fac62638 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -3313,7 +3313,7 @@ class YoutubeDL(object): write_debug = lambda msg: logger.debug(f'[debug] {msg}') write_debug(encoding_str) else: - write_string(f'[debug] {encoding_str}', encoding=None) + write_string(f'[debug] {encoding_str}\n', encoding=None) write_debug = lambda msg: self._write_string(f'[debug] {msg}\n') source = detect_variant() diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index e1c45441ab..5c3d33df06 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -122,10 +122,10 @@ def _real_main(argv=None): desc = getattr(ie, 'IE_DESC', ie.IE_NAME) if desc is False: continue - if hasattr(ie, 'SEARCH_KEY'): + if getattr(ie, 'SEARCH_KEY', None) is not None: _SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow') _COUNTS = ('', '5', '10', 'all') - desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES)) + desc += f'; "{ie.SEARCH_KEY}:" prefix (Example: "{ie.SEARCH_KEY}{random.choice(_COUNTS)}:{random.choice(_SEARCHES)}")' write_string(desc + '\n', out=sys.stdout) sys.exit(0) if opts.ap_list_mso: diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index d6c77e4184..ee1722e941 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -566,7 +566,7 @@ class BilibiliCategoryIE(InfoExtractor): class BiliBiliSearchIE(SearchInfoExtractor): - IE_DESC = 'Bilibili video search, "bilisearch" keyword' + IE_DESC = 'Bilibili video search' _MAX_RESULTS = 100000 _SEARCH_KEY = 'bilisearch' diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index d1d1b46fce..c0d7142496 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -3620,9 +3620,11 @@ class SearchInfoExtractor(InfoExtractor): """ Base class for paged search queries extractors. They accept URLs in the format _SEARCH_KEY(|all|[0-9]):{query} - Instances should define _SEARCH_KEY and _MAX_RESULTS. + Instances should define _SEARCH_KEY and optionally _MAX_RESULTS """ + _MAX_RESULTS = float('inf') + @classmethod def _make_valid_url(cls): return r'%s(?P|[1-9][0-9]*|all):(?P[\s\S]+)' % cls._SEARCH_KEY diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py index 76f087057a..4bcea33d58 100644 --- a/yt_dlp/extractor/niconico.py +++ b/yt_dlp/extractor/niconico.py @@ -704,7 +704,6 @@ class NicovideoSearchURLIE(InfoExtractor): class NicovideoSearchIE(SearchInfoExtractor, NicovideoSearchURLIE): IE_DESC = 'Nico video searches' - _MAX_RESULTS = float('inf') IE_NAME = NicovideoSearchIE_NAME _SEARCH_KEY = 'nicosearch' _TESTS = [] diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index 412331e17c..8245284748 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -855,8 +855,8 @@ class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE): class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): IE_NAME = 'soundcloud:search' - IE_DESC = 'Soundcloud search, "scsearch" keyword' - _MAX_RESULTS = float('inf') + IE_DESC = 'Soundcloud search' + _SEARCH_KEY = 'scsearch' _TESTS = [{ 'url': 'scsearch15:post-avant jazzcore', 'info_dict': { @@ -865,7 +865,6 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): 'playlist_count': 15, }] - _SEARCH_KEY = 'scsearch' _MAX_RESULTS_PER_PAGE = 200 _DEFAULT_RESULTS_PER_PAGE = 50 diff --git a/yt_dlp/extractor/trovo.py b/yt_dlp/extractor/trovo.py index ec55f41f20..a0f0cc31ca 100644 --- a/yt_dlp/extractor/trovo.py +++ b/yt_dlp/extractor/trovo.py @@ -223,7 +223,7 @@ class TrovoChannelBaseIE(InfoExtractor): class TrovoChannelVodIE(TrovoChannelBaseIE): _VALID_URL = r'trovovod:(?P[^\s]+)' - IE_DESC = 'All VODs of a trovo.live channel, "trovovod" keyword' + IE_DESC = 'All VODs of a trovo.live channel; "trovovod:" prefix' _TESTS = [{ 'url': 'trovovod:OneTappedYou', @@ -244,7 +244,7 @@ class TrovoChannelVodIE(TrovoChannelBaseIE): class TrovoChannelClipIE(TrovoChannelBaseIE): _VALID_URL = r'trovoclip:(?P[^\s]+)' - IE_DESC = 'All Clips of a trovo.live channel, "trovoclip" keyword' + IE_DESC = 'All Clips of a trovo.live channel; "trovoclip:" prefix' _TESTS = [{ 'url': 'trovoclip:OneTappedYou', diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 54f5ef15cc..6a7a2ce1a4 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -695,7 +695,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): class YoutubeIE(YoutubeBaseInfoExtractor): - IE_DESC = 'YouTube.com' + IE_DESC = 'YouTube' _INVIDIOUS_SITES = ( # invidious-redirect websites r'(?:www\.)?redirect\.invidious\.io', @@ -3010,7 +3010,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): class YoutubeTabIE(YoutubeBaseInfoExtractor): - IE_DESC = 'YouTube.com tab' + IE_DESC = 'YouTube Tabs' _VALID_URL = r'''(?x) https?:// (?:\w+\.)? @@ -4238,7 +4238,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): class YoutubePlaylistIE(InfoExtractor): - IE_DESC = 'YouTube.com playlists' + IE_DESC = 'YouTube playlists' _VALID_URL = r'''(?x)(?: (?:https?://)? (?:\w+\.)? @@ -4362,7 +4362,7 @@ class YoutubeYtBeIE(InfoExtractor): class YoutubeYtUserIE(InfoExtractor): - IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword' + IE_DESC = 'YouTube user videos; "ytuser:" prefix' _VALID_URL = r'ytuser:(?P.+)' _TESTS = [{ 'url': 'ytuser:phihag', @@ -4378,7 +4378,7 @@ class YoutubeYtUserIE(InfoExtractor): class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): IE_NAME = 'youtube:favorites' - IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)' + IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)' _VALID_URL = r':ytfav(?:ou?rite)?s?' _LOGIN_REQUIRED = True _TESTS = [{ @@ -4396,10 +4396,7 @@ class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE): - IE_DESC = 'YouTube.com searches, "ytsearch" keyword' - # there doesn't appear to be a real limit, for example if you search for - # 'python' you get more than 8.000.000 results - _MAX_RESULTS = float('inf') + IE_DESC = 'YouTube searches' IE_NAME = 'youtube:search' _SEARCH_KEY = 'ytsearch' _SEARCH_PARAMS = None @@ -4459,13 +4456,14 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE): class YoutubeSearchDateIE(YoutubeSearchIE): IE_NAME = YoutubeSearchIE.IE_NAME + ':date' _SEARCH_KEY = 'ytsearchdate' - IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword' + IE_DESC = 'YouTube searches, newest videos first' _SEARCH_PARAMS = 'CAI%3D' class YoutubeSearchURLIE(YoutubeSearchIE): - IE_DESC = 'YouTube.com search URLs' + IE_DESC = 'YouTube search URLs with sorting and filter support' IE_NAME = YoutubeSearchIE.IE_NAME + '_url' + _SEARCH_KEY = None _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)' # _MAX_RESULTS = 100 _TESTS = [{ @@ -4511,7 +4509,7 @@ class YoutubeFeedsInfoExtractor(YoutubeTabIE): class YoutubeWatchLaterIE(InfoExtractor): IE_NAME = 'youtube:watchlater' - IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)' + IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)' _VALID_URL = r':ytwatchlater' _TESTS = [{ 'url': ':ytwatchlater', @@ -4524,7 +4522,7 @@ class YoutubeWatchLaterIE(InfoExtractor): class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor): - IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)' + IE_DESC = 'YouTube recommended videos; ":ytrec" keyword' _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?' _FEED_NAME = 'recommended' _LOGIN_REQUIRED = False @@ -4541,7 +4539,7 @@ class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor): class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor): - IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)' + IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)' _VALID_URL = r':ytsub(?:scription)?s?' _FEED_NAME = 'subscriptions' _TESTS = [{ @@ -4554,7 +4552,7 @@ class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor): class YoutubeHistoryIE(YoutubeFeedsInfoExtractor): - IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)' + IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)' _VALID_URL = r':ythis(?:tory)?' _FEED_NAME = 'history' _TESTS = [{ diff --git a/yt_dlp/minicurses.py b/yt_dlp/minicurses.py index 38fdb5bc6e..699b1158ab 100644 --- a/yt_dlp/minicurses.py +++ b/yt_dlp/minicurses.py @@ -31,6 +31,11 @@ _TEXT_STYLES = { def format_text(text, f): + ''' + @param f String representation of formatting to apply in the form: + [style] [light] font_color [on [light] bg_color] + Eg: "red", "bold green on light blue" + ''' f = f.upper() tokens = f.strip().split() From 9f1a1c36e60b14f9ff47d83234b4ea61c5f5e2f7 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 24 Oct 2021 14:46:07 +0530 Subject: [PATCH 317/641] Separate `--check-all-formats` from `--check-formats` Previously, `--check-formats` tested only the selected video formats, but ALL thumbnails --- yt_dlp/YoutubeDL.py | 109 ++++++++++++++++++++++++-------------------- yt_dlp/options.py | 10 ++-- yt_dlp/utils.py | 2 + 3 files changed, 68 insertions(+), 53 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 27fac62638..071f2e9438 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -224,7 +224,8 @@ class YoutubeDL(object): allow_multiple_audio_streams: Allow multiple audio streams to be merged into a single file check_formats Whether to test if the formats are downloadable. - Can be True (check all), False (check none) + Can be True (check all), False (check none), + 'selected' (check selected formats), or None (check only if requested by extractor) paths: Dictionary of output paths. The allowed keys are 'home' 'temp' and the keys of OUTTMPL_TYPES (in utils.py) @@ -1720,6 +1721,28 @@ class YoutubeDL(object): return op(actual_value, comparison_value) return _filter + def _check_formats(self, formats): + for f in formats: + self.to_screen('[info] Testing format %s' % f['format_id']) + temp_file = tempfile.NamedTemporaryFile( + suffix='.tmp', delete=False, + dir=self.get_output_path('temp') or None) + temp_file.close() + try: + success, _ = self.dl(temp_file.name, f, test=True) + except (DownloadError, IOError, OSError, ValueError) + network_exceptions: + success = False + finally: + if os.path.exists(temp_file.name): + try: + os.remove(temp_file.name) + except OSError: + self.report_warning('Unable to delete temporary file "%s"' % temp_file.name) + if success: + yield f + else: + self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id']) + def _default_format_spec(self, info_dict, download=True): def can_merge(): @@ -1759,7 +1782,7 @@ class YoutubeDL(object): allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False), 'video': self.params.get('allow_multiple_video_streams', False)} - check_formats = self.params.get('check_formats') + check_formats = self.params.get('check_formats') == 'selected' def _parse_filter(tokens): filter_parts = [] @@ -1935,26 +1958,7 @@ class YoutubeDL(object): if not check_formats: yield from formats return - for f in formats: - self.to_screen('[info] Testing format %s' % f['format_id']) - temp_file = tempfile.NamedTemporaryFile( - suffix='.tmp', delete=False, - dir=self.get_output_path('temp') or None) - temp_file.close() - try: - success, _ = self.dl(temp_file.name, f, test=True) - except (DownloadError, IOError, OSError, ValueError) + network_exceptions: - success = False - finally: - if os.path.exists(temp_file.name): - try: - os.remove(temp_file.name) - except OSError: - self.report_warning('Unable to delete temporary file "%s"' % temp_file.name) - if success: - yield f - else: - self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id']) + yield from self._check_formats(formats) def _build_selector_function(selector): if isinstance(selector, list): # , @@ -2111,42 +2115,45 @@ class YoutubeDL(object): self.cookiejar.add_cookie_header(pr) return pr.get_header('Cookie') + def _sort_thumbnails(self, thumbnails): + thumbnails.sort(key=lambda t: ( + t.get('preference') if t.get('preference') is not None else -1, + t.get('width') if t.get('width') is not None else -1, + t.get('height') if t.get('height') is not None else -1, + t.get('id') if t.get('id') is not None else '', + t.get('url'))) + def _sanitize_thumbnails(self, info_dict): thumbnails = info_dict.get('thumbnails') if thumbnails is None: thumbnail = info_dict.get('thumbnail') if thumbnail: info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}] - if thumbnails: - thumbnails.sort(key=lambda t: ( - t.get('preference') if t.get('preference') is not None else -1, - t.get('width') if t.get('width') is not None else -1, - t.get('height') if t.get('height') is not None else -1, - t.get('id') if t.get('id') is not None else '', - t.get('url'))) + if not thumbnails: + return - def thumbnail_tester(): - def test_thumbnail(t): - self.to_screen(f'[info] Testing thumbnail {t["id"]}') - try: - self.urlopen(HEADRequest(t['url'])) - except network_exceptions as err: - self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...') - return False - return True - return test_thumbnail + def check_thumbnails(thumbnails): + for t in thumbnails: + self.to_screen(f'[info] Testing thumbnail {t["id"]}') + try: + self.urlopen(HEADRequest(t['url'])) + except network_exceptions as err: + self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...') + continue + yield t - for i, t in enumerate(thumbnails): - if t.get('id') is None: - t['id'] = '%d' % i - if t.get('width') and t.get('height'): - t['resolution'] = '%dx%d' % (t['width'], t['height']) - t['url'] = sanitize_url(t['url']) + self._sort_thumbnails(thumbnails) + for i, t in enumerate(thumbnails): + if t.get('id') is None: + t['id'] = '%d' % i + if t.get('width') and t.get('height'): + t['resolution'] = '%dx%d' % (t['width'], t['height']) + t['url'] = sanitize_url(t['url']) - if self.params.get('check_formats'): - info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse() - else: - info_dict['thumbnails'] = thumbnails + if self.params.get('check_formats') is True: + info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1])).reverse() + else: + info_dict['thumbnails'] = thumbnails def process_video_result(self, info_dict, download=True): assert info_dict.get('_type', 'video') == 'video' @@ -2252,7 +2259,6 @@ class YoutubeDL(object): info_dict['requested_subtitles'] = self.process_subtitles( info_dict['id'], subtitles, automatic_captions) - # We now pick which formats have to be downloaded if info_dict.get('formats') is None: # There's only one format available formats = [info_dict] @@ -2335,6 +2341,9 @@ class YoutubeDL(object): # TODO Central sorting goes here + if self.params.get('check_formats') is True: + formats = LazyList(self._check_formats(formats[::-1])).reverse() + if not formats or formats[0] is not info_dict: # only set the 'formats' fields if the original info_dict list them # otherwise we end up with a circular reference, the first (and unique) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 719a1bce45..5499ab13e9 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -562,12 +562,16 @@ def parseOpts(overrideArguments=None): help="Don't give any special preference to free containers (default)") video_format.add_option( '--check-formats', - action='store_true', dest='check_formats', default=None, - help='Check that the formats selected are actually downloadable') + action='store_const', const='selected', dest='check_formats', default=None, + help='Check that the selected formats are actually downloadable') + video_format.add_option( + '--check-all-formats', + action='store_true', dest='check_formats', + help='Check all formats for whether they are actually downloadable') video_format.add_option( '--no-check-formats', action='store_false', dest='check_formats', - help='Do not check that the formats selected are actually downloadable') + help='Do not check that the formats are actually downloadable') video_format.add_option( '-F', '--list-formats', action='store_true', dest='listformats', diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 08f9a5dc99..2c3ab00dc7 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4050,6 +4050,8 @@ class LazyList(collections.abc.Sequence): def __exhaust(self): self.__cache.extend(self.__iterable) + # Discard the emptied iterable to make it pickle-able + self.__iterable = [] return self.__cache def exhaust(self): From fccf502118466bbfde7c5c6dd0279f0dfdb1311c Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 24 Oct 2021 14:55:28 +0530 Subject: [PATCH 318/641] [youtube] Populate `thumbnail` with the best "known" thumbnail Closes #402, Related: https://github.com/yt-dlp/yt-dlp/issues/340#issuecomment-950290624 --- yt_dlp/extractor/youtube.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 6a7a2ce1a4..658b45fe14 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2696,6 +2696,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): thumbnails.append({ 'url': thumbnail_url, }) + original_thumbnails = thumbnails.copy() + # The best resolution thumbnails sometimes does not appear in the webpage # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340 # List of possible thumbnails - Ref: @@ -2706,7 +2708,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'default', '1', '2', '3' ] n_thumbnail_names = len(thumbnail_names) - thumbnails.extend({ 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format( video_id=video_id, name=name, ext=ext, @@ -2716,6 +2717,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names) thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i) self._remove_duplicate_formats(thumbnails) + self._downloader._sort_thumbnails(original_thumbnails) category = get_first(microformats, 'category') or search_meta('genre') channel_id = str_or_none( @@ -2745,6 +2747,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'title': self._live_title(video_title) if is_live else video_title, 'formats': formats, 'thumbnails': thumbnails, + # The best thumbnail that we are sure exists. Prevents unnecessary + # URL checking if user don't care about getting the best possible thumbnail + 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')), 'description': video_description, 'upload_date': unified_strdate( get_first(microformats, 'uploadDate') From f2fe69c7b0d208bdb1f6292b4ae92bc1e1a7444a Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 24 Oct 2021 18:02:00 +0530 Subject: [PATCH 319/641] Approximate filesize from bitrate Closes #1400 --- yt_dlp/YoutubeDL.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 071f2e9438..8c8cf7ecb6 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2330,6 +2330,10 @@ class YoutubeDL(object): format['resolution'] = self.format_resolution(format, default=None) if format.get('dynamic_range') is None and format.get('vcodec') != 'none': format['dynamic_range'] = 'SDR' + if (info_dict.get('duration') and format.get('tbr') + and not format.get('filesize') and not format.get('filesize_approx')): + format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8) + # Add HTTP headers, so that external programs can use them from the # json output full_format_info = info_dict.copy() From ad64a2323f1ce0f8aeb07e4ead46630edec2bf2d Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Sun, 24 Oct 2021 16:31:33 +0000 Subject: [PATCH 320/641] [instagram] Fix bug in ab2ffab22d02d530e0b46f9e361ff53a2139898b (#1403) Authored by: u-spec-png --- yt_dlp/extractor/instagram.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py index 24f47f3a82..ccfcddd5bc 100644 --- a/yt_dlp/extractor/instagram.py +++ b/yt_dlp/extractor/instagram.py @@ -145,6 +145,8 @@ class InstagramIE(InfoExtractor): def _login(self): username, password = self._get_login_info() + if username is None: + return login_webpage = self._download_webpage( 'https://www.instagram.com/accounts/login/', None, From 8e7ab2cf08970dbeedef304cd25bcd6abf36966b Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Mon, 25 Oct 2021 19:33:01 +0000 Subject: [PATCH 321/641] [Bilibili:comments] Fix infinite loop (#1423) Closes #1412 Authored by: u-spec-png --- yt_dlp/extractor/bilibili.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index ee1722e941..483f93d679 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -376,8 +376,10 @@ class BiliBiliIE(InfoExtractor): replies = traverse_obj( self._download_json( f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={video_id}&type=1&jsonp=jsonp&sort=2&_=1567227301685', - video_id, note=f'Extracting comments from page {idx}'), - ('data', 'replies')) or [] + video_id, note=f'Extracting comments from page {idx}', fatal=False), + ('data', 'replies')) + if not replies: + return for children in map(self._get_all_children, replies): yield from children From 7e59ca440a9351aac0a99b505587698b912e500e Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 26 Oct 2021 19:31:00 +0530 Subject: [PATCH 322/641] [DiscoveryPlus] Allow language codes in URL Closes #1425 --- yt_dlp/extractor/dplay.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/dplay.py b/yt_dlp/extractor/dplay.py index e0e446b873..d624808108 100644 --- a/yt_dlp/extractor/dplay.py +++ b/yt_dlp/extractor/dplay.py @@ -325,7 +325,7 @@ class HGTVDeIE(DPlayIE): class DiscoveryPlusIE(DPlayIE): - _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/video' + DPlayIE._PATH_REGEX + _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?:\w{2}/)?video' + DPlayIE._PATH_REGEX _TESTS = [{ 'url': 'https://www.discoveryplus.com/video/property-brothers-forever-home/food-and-family', 'info_dict': { @@ -343,6 +343,9 @@ class DiscoveryPlusIE(DPlayIE): 'episode_number': 1, }, 'skip': 'Available for Premium users', + }, { + 'url': 'https://discoveryplus.com/ca/video/bering-sea-gold-discovery-ca/goldslingers', + 'only_matching': True, }] _PRODUCT = 'dplus_us' From 7de837a5e3e5eae92a77d07e66eda49c0e949b8d Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 26 Oct 2021 19:31:56 +0530 Subject: [PATCH 323/641] [utils] Sanitize URL when determining protocol Closes #1406 --- yt_dlp/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 2c3ab00dc7..be93b0ef27 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4731,7 +4731,7 @@ def determine_protocol(info_dict): if protocol is not None: return protocol - url = info_dict['url'] + url = sanitize_url(info_dict['url']) if url.startswith('rtmp'): return 'rtmp' elif url.startswith('mms'): From 08438d2ca59fddd4147f4f957473af78d56be732 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 26 Oct 2021 20:11:59 +0530 Subject: [PATCH 324/641] [outtmpl] Add type `link` for internet shortcut files and refactor related code Closes #1405 --- README.md | 2 +- yt_dlp/YoutubeDL.py | 76 ++++++++++++++++++--------------------------- yt_dlp/utils.py | 7 +++++ 3 files changed, 39 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index f9695aec5a..e2fbbf2ae6 100644 --- a/README.md +++ b/README.md @@ -1034,7 +1034,7 @@ To summarize, the general syntax for a field is: %(name[.keys][addition][>strf][,alternate][|default])[flags][width][.precision][length]type ``` -Additionally, you can set different output templates for the various metadata files separately from the general output template by specifying the type of file followed by the template separated by a colon `:`. The different file types supported are `subtitle`, `thumbnail`, `description`, `annotation` (deprecated), `infojson`, `pl_thumbnail`, `pl_description`, `pl_infojson`, `chapter`. For example, `-o '%(title)s.%(ext)s' -o 'thumbnail:%(title)s\%(title)s.%(ext)s'` will put the thumbnails in a folder with the same name as the video. If any of the templates (except default) is empty, that type of file will not be written. Eg: `--write-thumbnail -o "thumbnail:"` will write thumbnails only for playlists and not for video. +Additionally, you can set different output templates for the various metadata files separately from the general output template by specifying the type of file followed by the template separated by a colon `:`. The different file types supported are `subtitle`, `thumbnail`, `description`, `annotation` (deprecated), `infojson`, `link`, `pl_thumbnail`, `pl_description`, `pl_infojson`, `chapter`. For example, `-o '%(title)s.%(ext)s' -o 'thumbnail:%(title)s\%(title)s.%(ext)s'` will put the thumbnails in a folder with the same name as the video. If any of the templates (except default) is empty, that type of file will not be written. Eg: `--write-thumbnail -o "thumbnail:"` will write thumbnails only for playlists and not for video. The available fields are: diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 8c8cf7ecb6..ced7d12028 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -56,9 +56,6 @@ from .utils import ( DEFAULT_OUTTMPL, determine_ext, determine_protocol, - DOT_DESKTOP_LINK_TEMPLATE, - DOT_URL_LINK_TEMPLATE, - DOT_WEBLOC_LINK_TEMPLATE, DownloadError, encode_compat_str, encodeFilename, @@ -77,6 +74,7 @@ from .utils import ( iri_to_uri, ISO3166Utils, LazyList, + LINK_TEMPLATES, locked_file, make_dir, make_HTTPS_handler, @@ -2665,53 +2663,41 @@ class YoutubeDL(object): return # Write internet shortcut files - url_link = webloc_link = desktop_link = False - if self.params.get('writelink', False): - if sys.platform == "darwin": # macOS. - webloc_link = True - elif sys.platform.startswith("linux"): - desktop_link = True - else: # if sys.platform in ['win32', 'cygwin']: - url_link = True - if self.params.get('writeurllink', False): - url_link = True - if self.params.get('writewebloclink', False): - webloc_link = True - if self.params.get('writedesktoplink', False): - desktop_link = True - - if url_link or webloc_link or desktop_link: + def _write_link_file(link_type): if 'webpage_url' not in info_dict: self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information') - return - ascii_url = iri_to_uri(info_dict['webpage_url']) - - def _write_link_file(extension, template, newline, embed_filename): - linkfn = replace_extension(full_filename, extension, info_dict.get('ext')) + return False + linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext')) if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)): - self.to_screen('[info] Internet shortcut is already present') - else: - try: - self.to_screen('[info] Writing internet shortcut to: ' + linkfn) - with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile: - template_vars = {'url': ascii_url} - if embed_filename: - template_vars['filename'] = linkfn[:-(len(extension) + 1)] - linkfile.write(template % template_vars) - except (OSError, IOError): - self.report_error('Cannot write internet shortcut ' + linkfn) - return False + self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present') + return True + try: + self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}') + with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', + newline='\r\n' if link_type == 'url' else '\n') as linkfile: + template_vars = {'url': iri_to_uri(info_dict['webpage_url'])} + if link_type == 'desktop': + template_vars['filename'] = linkfn[:-(len(link_type) + 1)] + linkfile.write(LINK_TEMPLATES[link_type] % template_vars) + except (OSError, IOError): + self.report_error(f'Cannot write internet shortcut {linkfn}') + return False return True - if url_link: - if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False): - return - if webloc_link: - if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False): - return - if desktop_link: - if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True): - return + write_links = { + 'url': self.params.get('writeurllink'), + 'webloc': self.params.get('writewebloclink'), + 'desktop': self.params.get('writedesktoplink'), + } + if self.params.get('writelink'): + link_type = ('webloc' if sys.platform == 'darwin' + else 'desktop' if sys.platform.startswith('linux') + else 'url') + write_links[link_type] = True + + if any(should_write and not _write_link_file(link_type) + for link_type, should_write in write_links.items()): + return try: info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index be93b0ef27..9d90eca5e8 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4503,6 +4503,7 @@ OUTTMPL_TYPES = { 'description': 'description', 'annotation': 'annotations.xml', 'infojson': 'info.json', + 'link': None, 'pl_thumbnail': None, 'pl_description': 'description', 'pl_infojson': 'info.json', @@ -6238,6 +6239,12 @@ URL=%(url)s Icon=text-html '''.lstrip() +LINK_TEMPLATES = { + 'url': DOT_URL_LINK_TEMPLATE, + 'desktop': DOT_DESKTOP_LINK_TEMPLATE, + 'webloc': DOT_WEBLOC_LINK_TEMPLATE, +} + def iri_to_uri(iri): """ From abad800058180da93f482915070aef12f8f63564 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 26 Oct 2021 20:12:30 +0530 Subject: [PATCH 325/641] [downloader/ffmpeg] Fix vtt download with ffmpeg --- yt_dlp/postprocessor/ffmpeg.py | 1 + 1 file changed, 1 insertion(+) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 4a0a96427e..b7fcc569ba 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -41,6 +41,7 @@ EXT_TO_OUT_FORMATS = { 'ts': 'mpegts', 'wma': 'asf', 'wmv': 'asf', + 'vtt': 'webvtt', } ACODECS = { 'mp3': 'libmp3lame', From 48f796874d78ad3d1849d0639893667f6cdf30d2 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 26 Oct 2021 20:15:12 +0530 Subject: [PATCH 326/641] [utils] Create `DownloadCancelled` exception as super-class of ExistingVideoReached, RejectedVideoReached, MaxDownloadsReached Third parties can also sub-class this to cancel the download queue from a hook --- yt_dlp/YoutubeDL.py | 13 ++++--------- yt_dlp/utils.py | 30 ++++++++++++++++++++---------- 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index ced7d12028..2c2b17b200 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -56,6 +56,7 @@ from .utils import ( DEFAULT_OUTTMPL, determine_ext, determine_protocol, + DownloadCancelled, DownloadError, encode_compat_str, encodeFilename, @@ -1320,7 +1321,7 @@ class YoutubeDL(object): self.to_stderr('\r') self.report_warning('The download speed is below throttle limit. Re-extracting data') return wrapper(self, *args, **kwargs) - except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached, LazyList.IndexError): + except (DownloadCancelled, LazyList.IndexError): raise except Exception as e: if self.params.get('ignoreerrors'): @@ -2949,14 +2950,8 @@ class YoutubeDL(object): url, force_generic_extractor=self.params.get('force_generic_extractor', False)) except UnavailableVideoError: self.report_error('unable to download video') - except MaxDownloadsReached: - self.to_screen('[info] Maximum number of downloads reached') - raise - except ExistingVideoReached: - self.to_screen('[info] Encountered a video that is already in the archive, stopping due to --break-on-existing') - raise - except RejectedVideoReached: - self.to_screen('[info] Encountered a video that did not match filter, stopping due to --break-on-reject') + except DownloadCancelled as e: + self.to_screen(f'[info] {e.msg}') raise else: if self.params.get('dump_single_json', False): diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 9d90eca5e8..a8755a1b97 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -2542,14 +2542,29 @@ class PostProcessingError(YoutubeDLError): self.msg = msg -class ExistingVideoReached(YoutubeDLError): - """ --max-downloads limit has been reached. """ - pass +class DownloadCancelled(YoutubeDLError): + """ Exception raised when the download queue should be interrupted """ + msg = 'The download was cancelled' + + def __init__(self, msg=None): + if msg is not None: + self.msg = msg + YoutubeDLError.__init__(self, self.msg) -class RejectedVideoReached(YoutubeDLError): +class ExistingVideoReached(DownloadCancelled): + """ --break-on-existing triggered """ + msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing' + + +class RejectedVideoReached(DownloadCancelled): + """ --break-on-reject triggered """ + msg = 'Encountered a video that did not match filter, stopping due to --break-on-reject' + + +class MaxDownloadsReached(DownloadCancelled): """ --max-downloads limit has been reached. """ - pass + msg = 'Maximum number of downloads reached, stopping due to --max-downloads' class ThrottledDownload(YoutubeDLError): @@ -2557,11 +2572,6 @@ class ThrottledDownload(YoutubeDLError): pass -class MaxDownloadsReached(YoutubeDLError): - """ --max-downloads limit has been reached. """ - pass - - class UnavailableVideoError(YoutubeDLError): """Unavailable Format exception. From 0db3bae879d57ff400f8c61261534b6e3659c470 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 26 Oct 2021 20:17:29 +0530 Subject: [PATCH 327/641] [extractor] Fix some errors being converted to `ExtractorError` --- yt_dlp/extractor/common.py | 16 +++++++++++++--- yt_dlp/utils.py | 6 +++--- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index c0d7142496..369cff418e 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -74,6 +74,7 @@ from ..utils import ( strip_or_none, traverse_obj, unescapeHTML, + UnsupportedError, unified_strdate, unified_timestamp, update_Request, @@ -604,10 +605,19 @@ class InfoExtractor(object): if self.__maybe_fake_ip_and_retry(e.countries): continue raise + except UnsupportedError: + raise except ExtractorError as e: - video_id = e.video_id or self.get_temp_id(url) - raise ExtractorError( - e.msg, video_id=video_id, ie=self.IE_NAME, tb=e.traceback, expected=e.expected, cause=e.cause) + kwargs = { + 'video_id': e.video_id or self.get_temp_id(url), + 'ie': self.IE_NAME, + 'tb': e.traceback, + 'expected': e.expected, + 'cause': e.cause + } + if hasattr(e, 'countries'): + kwargs['countries'] = e.countries + raise type(e)(e.msg, **kwargs) except compat_http_client.IncompleteRead as e: raise ExtractorError('A network error has occurred.', cause=e, expected=True, video_id=self.get_temp_id(url)) except (KeyError, StopIteration) as e: diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index a8755a1b97..48baa6503c 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -2492,9 +2492,9 @@ class GeoRestrictedError(ExtractorError): geographic location due to geographic restrictions imposed by a website. """ - def __init__(self, msg, countries=None): - super(GeoRestrictedError, self).__init__(msg, expected=True) - self.msg = msg + def __init__(self, msg, countries=None, **kwargs): + kwargs['expected'] = True + super(GeoRestrictedError, self).__init__(msg, **kwargs) self.countries = countries From c35ada33604b820a6f2b3c6a2d4045b6c9c7dedf Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 26 Oct 2021 21:14:13 +0530 Subject: [PATCH 328/641] [twitter] Do not sort by codec Closes #1431 --- yt_dlp/extractor/twitter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py index 485b781ca1..0749263d9b 100644 --- a/yt_dlp/extractor/twitter.py +++ b/yt_dlp/extractor/twitter.py @@ -485,7 +485,7 @@ class TwitterIE(TwitterBaseIE): fmts, subs = self._extract_variant_formats(variant, twid) subtitles = self._merge_subtitles(subtitles, subs) formats.extend(fmts) - self._sort_formats(formats) + self._sort_formats(formats, ('res', 'br', 'size', 'proto')) # The codec of http formats are unknown thumbnails = [] media_url = media.get('media_url_https') or media.get('media_url') From 0c873df3a84e6269dff03fd91ce4f23a38bd8f27 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Tue, 26 Oct 2021 21:17:39 +0530 Subject: [PATCH 329/641] [3speak] Add extractors (#1430) Closes #1421 Authored by: Ashish0804 --- yt_dlp/extractor/extractors.py | 4 ++ yt_dlp/extractor/threespeak.py | 97 ++++++++++++++++++++++++++++++++++ 2 files changed, 101 insertions(+) create mode 100644 yt_dlp/extractor/threespeak.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index ef2b25c930..035c159c24 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1442,6 +1442,10 @@ from .theweatherchannel import TheWeatherChannelIE from .thisamericanlife import ThisAmericanLifeIE from .thisav import ThisAVIE from .thisoldhouse import ThisOldHouseIE +from .threespeak import ( + ThreeSpeakIE, + ThreeSpeakUserIE, +) from .threeqsdn import ThreeQSDNIE from .tiktok import ( TikTokIE, diff --git a/yt_dlp/extractor/threespeak.py b/yt_dlp/extractor/threespeak.py new file mode 100644 index 0000000000..60e84529d8 --- /dev/null +++ b/yt_dlp/extractor/threespeak.py @@ -0,0 +1,97 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + try_get, + unified_strdate, +) + + +class ThreeSpeakIE(InfoExtractor): + _VALID_URL = r'(?:https?://)(?:www\.)?3speak\.tv/watch\?v\=[^/]+/(?P[^/$&#?]+)' + + _TESTS = [{ + 'url': 'https://3speak.tv/watch?v=dannyshine/wjgoxyfy', + 'info_dict': { + 'id': 'wjgoxyfy', + 'ext': 'mp4', + 'title': 'Can People who took the Vax think Critically', + 'uploader': 'dannyshine', + 'description': 'md5:181aa7ccb304afafa089b5af3bca7a10', + 'tags': ['sex', 'covid', 'antinatalism', 'comedy', 'vaccines'], + 'thumbnail': 'https://img.3speakcontent.co/wjgoxyfy/thumbnails/default.png', + 'upload_date': '20211021', + 'duration': 2703.867833, + 'filesize': 1620054781, + }, + 'params': {'skip_download': True} + }] + + def _real_extract(self, url): + id = self._match_id(url) + webpage = self._download_webpage(url, id) + json_str = self._html_search_regex(r'JSON\.parse\(\'([^\']+)\'\)', webpage, 'json') + # The json string itself is escaped. Hence the double parsing + data_json = self._parse_json(self._parse_json(f'"{json_str}"', id), id) + video_json = self._parse_json(data_json['json_metadata'], id) + formats, subtitles = [], {} + og_m3u8 = self._html_search_regex(r'', webpage, 'og m3u8', fatal=False) + if og_m3u8: + https_frmts, https_subs = self._extract_m3u8_formats_and_subtitles(og_m3u8, id, fatal=False, m3u8_id='https') + formats.extend(https_frmts) + subtitles = self._merge_subtitles(subtitles, https_subs) + ipfs_m3u8 = try_get(video_json, lambda x: x['video']['info']['ipfs']) + if ipfs_m3u8: + ipfs_frmts, ipfs_subs = self._extract_m3u8_formats_and_subtitles(f'https://ipfs.3speak.tv/ipfs/{ipfs_m3u8}', + id, fatal=False, m3u8_id='ipfs') + formats.extend(ipfs_frmts) + subtitles = self._merge_subtitles(subtitles, ipfs_subs) + mp4_file = try_get(video_json, lambda x: x['video']['info']['file']) + if mp4_file: + formats.append({ + 'url': f'https://threespeakvideo.b-cdn.net/{id}/{mp4_file}', + 'ext': 'mp4', + 'format_id': 'https-mp4', + 'duration': try_get(video_json, lambda x: x['video']['info']['duration']), + 'filesize': try_get(video_json, lambda x: x['video']['info']['filesize']), + 'quality': 11, + 'format_note': 'Original file', + }) + self._sort_formats(formats) + return { + 'id': id, + 'title': data_json.get('title') or data_json.get('root_title'), + 'uploader': data_json.get('author'), + 'description': try_get(video_json, lambda x: x['video']['content']['description']), + 'tags': try_get(video_json, lambda x: x['video']['content']['tags']), + 'thumbnail': try_get(video_json, lambda x: x['image'][0]), + 'upload_date': unified_strdate(data_json.get('created')), + 'formats': formats, + 'subtitles': subtitles, + } + + +class ThreeSpeakUserIE(InfoExtractor): + _VALID_URL = r'(?:https?://)(?:www\.)?3speak\.tv/user/(?P[^/$&?#]+)' + + _TESTS = [{ + 'url': 'https://3speak.tv/user/theycallmedan', + 'info_dict': { + 'id': 'theycallmedan', + }, + 'playlist_mincount': 115, + }] + + def _real_extract(self, url): + id = self._match_id(url) + webpage = self._download_webpage(url, id) + entries = [ + self.url_result( + 'https://3speak.tv/watch?v=%s' % video, + ie=ThreeSpeakIE.ie_key()) + for video in re.findall(r'data-payout\s?\=\s?\"([^\"]+)\"', webpage) if video + ] + return self.playlist_result(entries, id) From 673944b001447adb0de88c12fa22577a770d771a Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 26 Oct 2021 20:47:30 +0530 Subject: [PATCH 330/641] [compat] Don't create console in `windows_enable_vt_mode` Closes #1420 --- yt_dlp/compat.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/compat.py b/yt_dlp/compat.py index b107b21142..8508f1465c 100644 --- a/yt_dlp/compat.py +++ b/yt_dlp/compat.py @@ -19,6 +19,7 @@ import shlex import shutil import socket import struct +import subprocess import sys import tokenize import urllib @@ -162,7 +163,9 @@ except ImportError: def windows_enable_vt_mode(): # TODO: Do this the proper way https://bugs.python.org/issue30075 if compat_os_name != 'nt': return - os.system('') + startupinfo = subprocess.STARTUPINFO() + startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW + subprocess.Popen('', shell=True, startupinfo=startupinfo) # Deprecated From dc88e9be03ea0974760725d1ad089b91a7fefe52 Mon Sep 17 00:00:00 2001 From: nyuszika7h Date: Tue, 26 Oct 2021 18:33:43 +0200 Subject: [PATCH 331/641] [wakanim] Add support for MPD manifests (#1428) Closes #1426 Authored by: nyuszika7h --- yt_dlp/extractor/wakanim.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/wakanim.py b/yt_dlp/extractor/wakanim.py index c956d616ed..22441c38ff 100644 --- a/yt_dlp/extractor/wakanim.py +++ b/yt_dlp/extractor/wakanim.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +from urllib.parse import unquote + from .common import InfoExtractor from ..utils import ( merge_dicts, @@ -37,20 +39,24 @@ class WakanimIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - m3u8_url = urljoin(url, self._search_regex( - r'file\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, 'm3u8 url', + manifest_url = urljoin(url, self._search_regex( + r'file\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, 'manifest url', group='url')) if not self.get_param('allow_unplayable_formats'): # https://docs.microsoft.com/en-us/azure/media-services/previous/media-services-content-protection-overview#streaming-urls encryption = self._search_regex( r'encryption%3D(c(?:enc|bc(?:s-aapl)?))', - m3u8_url, 'encryption', default=None) + manifest_url, 'encryption', default=None) if encryption in ('cenc', 'cbcs-aapl'): self.report_drm(video_id) - formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls') + if 'format=mpd-time-cmaf' in unquote(manifest_url): + formats = self._extract_mpd_formats( + manifest_url, video_id, mpd_id='dash') + else: + formats = self._extract_m3u8_formats( + manifest_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') info = self._search_json_ld(webpage, video_id, default={}) From bd1c7923274962e3027acf63111ccb0d766b9725 Mon Sep 17 00:00:00 2001 From: nyuszika7h Date: Tue, 26 Oct 2021 18:35:20 +0200 Subject: [PATCH 332/641] [wakanim] Detect geo-restriction (#1429) Authored by: nyuszika7h --- yt_dlp/extractor/wakanim.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/yt_dlp/extractor/wakanim.py b/yt_dlp/extractor/wakanim.py index 22441c38ff..a61a630e26 100644 --- a/yt_dlp/extractor/wakanim.py +++ b/yt_dlp/extractor/wakanim.py @@ -33,12 +33,19 @@ class WakanimIE(InfoExtractor): 'url': 'https://www.wakanim.tv/de/v2/catalogue/episode/7843/sword-art-online-alicization-omu-arc-2-folge-15-omu', 'only_matching': True, }] + _GEO_BYPASS = False def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + if 'Geoblocking' in webpage: + if '/de/' in url: + self.raise_geo_restricted(countries=['DE', 'AT', 'CH']) + else: + self.raise_geo_restricted(countries=['RU']) + manifest_url = urljoin(url, self._search_regex( r'file\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, 'manifest url', group='url')) From b7b186e7decc1236576086d9ced3579af57b07c3 Mon Sep 17 00:00:00 2001 From: ajj8 <35781586+ajj8@users.noreply.github.com> Date: Wed, 27 Oct 2021 17:08:48 +0100 Subject: [PATCH 333/641] [sky] Add `SkyNewsStoryIE` (#1443) Authored by: ajj8 --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/sky.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 035c159c24..21c71a835e 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1289,6 +1289,7 @@ from .skynewsarabia import ( from .skynewsau import SkyNewsAUIE from .sky import ( SkyNewsIE, + SkyNewsStoryIE, SkySportsIE, SkySportsNewsIE, ) diff --git a/yt_dlp/extractor/sky.py b/yt_dlp/extractor/sky.py index ff2c977a02..ad1e62d88f 100644 --- a/yt_dlp/extractor/sky.py +++ b/yt_dlp/extractor/sky.py @@ -105,6 +105,34 @@ class SkyNewsIE(SkyBaseIE): } +class SkyNewsStoryIE(SkyBaseIE): + IE_NAME = 'sky:news:story' + _VALID_URL = r'https?://news\.sky\.com/story/[0-9a-z-]+-(?P[0-9]+)' + _TEST = { + 'url': 'https://news.sky.com/story/budget-2021-chancellor-rishi-sunak-vows-address-will-deliver-strong-economy-fit-for-a-new-age-of-optimism-12445425', + 'info_dict': { + 'id': 'ref:0714acb9-123d-42c8-91b8-5c1bc6c73f20', + 'title': 'md5:e408dd7aad63f31a1817bbe40c7d276f', + 'description': 'md5:a881e12f49212f92be2befe4a09d288a', + 'ext': 'mp4', + 'upload_date': '20211027', + 'timestamp': 1635317494, + 'uploader_id': '6058004172001', + } + } + + def _real_extract(self, url): + article_id = self._match_id(url) + webpage = self._download_webpage(url, article_id) + + entries = [self._process_ooyala_element(webpage, sdc_el, url) + for sdc_el in re.findall(self._SDC_EL_REGEX, webpage)] + + return self.playlist_result( + entries, article_id, self._og_search_title(webpage), + self._html_search_meta(['og:description', 'description'], webpage)) + + class SkySportsNewsIE(SkyBaseIE): IE_NAME = 'sky:sports:news' _VALID_URL = r'https?://(?:www\.)?skysports\.com/([^/]+/)*news/\d+/(?P\d+)' From 5be76d1ab7fed65a5894b221c7b7f896a18fc820 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 28 Oct 2021 02:01:26 +0530 Subject: [PATCH 334/641] [viewlift] Add cookie-based login and series support Closes #1340, #1316 Authored by: Ashish0804, pukkandan --- yt_dlp/extractor/viewlift.py | 192 ++++++++++++++++++++++++++--------- 1 file changed, 146 insertions(+), 46 deletions(-) diff --git a/yt_dlp/extractor/viewlift.py b/yt_dlp/extractor/viewlift.py index c3b2e863d7..ca53a1736d 100644 --- a/yt_dlp/extractor/viewlift.py +++ b/yt_dlp/extractor/viewlift.py @@ -9,6 +9,7 @@ from ..utils import ( ExtractorError, int_or_none, parse_age_limit, + traverse_obj, ) @@ -32,26 +33,36 @@ class ViewLiftBaseIE(InfoExtractor): } _TOKENS = {} - def _call_api(self, site, path, video_id, query): - token = self._TOKENS.get(site) - if not token: - token_query = {'site': site} - email, password = self._get_login_info(netrc_machine=site) - if email: - resp = self._download_json( - self._API_BASE + 'identity/signin', video_id, - 'Logging in', query=token_query, data=json.dumps({ - 'email': email, - 'password': password, - }).encode()) - else: - resp = self._download_json( - self._API_BASE + 'identity/anonymous-token', video_id, - 'Downloading authorization token', query=token_query) - self._TOKENS[site] = token = resp['authorizationToken'] - return self._download_json( - self._API_BASE + path, video_id, - headers={'Authorization': token}, query=query) + def _fetch_token(self, site, url): + if self._TOKENS.get(site): + return + email, password = self._get_login_info(netrc_machine=site) + if email: + self.report_warning('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies']) + + cookies = self._get_cookies(url) + if cookies and cookies.get('token'): + self._TOKENS[site] = self._search_regex(r'22authorizationToken\%22:\%22([^\%]+)\%22', cookies['token'].value, 'token') + if not self._TOKENS.get(site): + self.raise_login_required('Cookies (not necessarily logged in) are needed to download from this website', method='cookies') + + def _call_api(self, site, path, video_id, url, query): + self._fetch_token(site, url) + try: + return self._download_json( + self._API_BASE + path, video_id, headers={'Authorization': self._TOKENS.get(site)}, query=query) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + webpage = e.cause.read().decode() + try: + error_message = traverse_obj(json.loads(webpage), 'errorMessage', 'message') + except json.JSONDecodeError: + raise ExtractorError(f'{site} said: {webpage}', cause=e.cause) + if error_message: + if 'has not purchased' in error_message: + self.raise_login_required(method='cookies') + raise ExtractorError(error_message, expected=True) + raise class ViewLiftEmbedIE(ViewLiftBaseIE): @@ -81,6 +92,81 @@ class ViewLiftEmbedIE(ViewLiftBaseIE): }, { 'url': 'http://www.snagfilms.com/embed/player?filmId=0000014c-de2f-d5d6-abcf-ffef58af0017', 'only_matching': True, + }, { # Free film with langauge code + 'url': 'https://www.hoichoi.tv/bn/films/title/shuyopoka', + 'info_dict': { + 'id': '7a7a9d33-1f4c-4771-9173-ee4fb6dbf196', + 'ext': 'mp4', + 'title': 'Shuyopoka', + 'description': 'md5:e28f2fb8680096a69c944d37c1fa5ffc', + 'thumbnail': r're:^https?://.*\.jpg$', + 'upload_date': '20211006', + 'series': None + }, + 'params': {'skip_download': True}, + }, { # Free film + 'url': 'https://www.hoichoi.tv/films/title/dadu-no1', + 'info_dict': { + 'id': '0000015b-b009-d126-a1db-b81ff3780000', + 'ext': 'mp4', + 'title': 'Dadu No.1', + 'description': 'md5:605cba408e51a79dafcb824bdeded51e', + 'thumbnail': r're:^https?://.*\.jpg$', + 'upload_date': '20210827', + 'series': None + }, + 'params': {'skip_download': True}, + }, { # Free episode + 'url': 'https://www.hoichoi.tv/webseries/case-jaundice-s01-e01', + 'info_dict': { + 'id': 'f779e07c-30c8-459c-8612-5a834ab5e5ba', + 'ext': 'mp4', + 'title': 'Humans Vs. Corona', + 'description': 'md5:ca30a682b4528d02a3eb6d0427dd0f87', + 'thumbnail': r're:^https?://.*\.jpg$', + 'upload_date': '20210830', + 'series': 'Case Jaundice' + }, + 'params': {'skip_download': True}, + }, { # Free video + 'url': 'https://www.hoichoi.tv/videos/1549072415320-six-episode-02-hindi', + 'info_dict': { + 'id': 'b41fa1ce-aca6-47b6-b208-283ff0a2de30', + 'ext': 'mp4', + 'title': 'Woman in red - Hindi', + 'description': 'md5:9d21edc1827d32f8633eb67c2054fc31', + 'thumbnail': r're:^https?://.*\.jpg$', + 'upload_date': '20211006', + 'series': 'Six (Hindi)' + }, + 'params': {'skip_download': True}, + }, { # Free episode + 'url': 'https://www.hoichoi.tv/shows/watch-asian-paints-moner-thikana-online-season-1-episode-1', + 'info_dict': { + 'id': '1f45d185-8500-455c-b88d-13252307c3eb', + 'ext': 'mp4', + 'title': 'Jisshu Sengupta', + 'description': 'md5:ef6ffae01a3d83438597367400f824ed', + 'thumbnail': r're:^https?://.*\.jpg$', + 'upload_date': '20211004', + 'series': 'Asian Paints Moner Thikana' + }, + 'params': {'skip_download': True}, + }, { # Free series + 'url': 'https://www.hoichoi.tv/shows/watch-moner-thikana-bengali-web-series-online', + 'playlist_mincount': 5, + 'info_dict': { + 'id': 'watch-moner-thikana-bengali-web-series-online', + }, + }, { # Premium series + 'url': 'https://www.hoichoi.tv/shows/watch-byomkesh-bengali-web-series-online', + 'playlist_mincount': 14, + 'info_dict': { + 'id': 'watch-byomkesh-bengali-web-series-online', + }, + }, { # Premium movie + 'url': 'https://www.hoichoi.tv/movies/detective-2020', + 'only_matching': True }] @staticmethod @@ -96,27 +182,24 @@ class ViewLiftEmbedIE(ViewLiftBaseIE): site = domain.split('.')[-2] if site in self._SITE_MAP: site = self._SITE_MAP[site] - try: - content_data = self._call_api( - site, 'entitlement/video/status', film_id, { - 'id': film_id - })['video'] - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: - error_message = self._parse_json(e.cause.read().decode(), film_id).get('errorMessage') - if error_message == 'User does not have a valid subscription or has not purchased this content.': - self.raise_login_required() - raise ExtractorError(error_message, expected=True) - raise + + content_data = self._call_api( + site, 'entitlement/video/status', film_id, url, { + 'id': film_id + })['video'] gist = content_data['gist'] title = gist['title'] video_assets = content_data['streamingInfo']['videoAssets'] - formats = [] - mpeg_video_assets = video_assets.get('mpeg') or [] - for video_asset in mpeg_video_assets: + hls_url = video_assets.get('hls') + formats, subtitles = [], {} + if hls_url: + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + hls_url, film_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) + + for video_asset in video_assets.get('mpeg') or []: video_asset_url = video_asset.get('url') - if not video_asset: + if not video_asset_url: continue bitrate = int_or_none(video_asset.get('bitrate')) height = int_or_none(self._search_regex( @@ -130,13 +213,17 @@ class ViewLiftEmbedIE(ViewLiftBaseIE): 'vcodec': video_asset.get('codec'), }) - hls_url = video_assets.get('hls') - if hls_url: - formats.extend(self._extract_m3u8_formats( - hls_url, film_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - self._sort_formats(formats) + subs = {} + for sub in traverse_obj(content_data, ('contentDetails', 'closedCaptions')) or []: + sub_url = sub.get('url') + if not sub_url: + continue + subs.setdefault(sub.get('language', 'English'), []).append({ + 'url': sub_url, + }) - info = { + self._sort_formats(formats) + return { 'id': film_id, 'title': title, 'description': gist.get('description'), @@ -145,14 +232,15 @@ class ViewLiftEmbedIE(ViewLiftBaseIE): 'age_limit': parse_age_limit(content_data.get('parentalRating')), 'timestamp': int_or_none(gist.get('publishDate'), 1000), 'formats': formats, + 'subtitles': self._merge_subtitles(subs, subtitles), + 'categories': traverse_obj(content_data, ('categories', ..., 'title')), + 'tags': traverse_obj(content_data, ('tags', ..., 'title')), } - for k in ('categories', 'tags'): - info[k] = [v['title'] for v in content_data.get(k, []) if v.get('title')] - return info class ViewLiftIE(ViewLiftBaseIE): IE_NAME = 'viewlift' + _API_BASE = 'https://prod-api-cached-2.viewlift.com/' _VALID_URL = r'https?://(?:www\.)?(?P%s)(?P(?:/(?:films/title|show|(?:news/)?videos?|watch))?/(?P[^?#]+))' % ViewLiftBaseIE._DOMAINS_REGEX _TESTS = [{ 'url': 'http://www.snagfilms.com/films/title/lost_for_life', @@ -228,18 +316,30 @@ class ViewLiftIE(ViewLiftBaseIE): def suitable(cls, url): return False if ViewLiftEmbedIE.suitable(url) else super(ViewLiftIE, cls).suitable(url) + def _show_entries(self, domain, seasons): + for season in seasons: + for episode in season.get('episodes') or []: + path = traverse_obj(episode, ('gist', 'permalink')) + if path: + yield self.url_result(f'https://www.{domain}{path}', ie=self.ie_key()) + def _real_extract(self, url): domain, path, display_id = self._match_valid_url(url).groups() site = domain.split('.')[-2] if site in self._SITE_MAP: site = self._SITE_MAP[site] modules = self._call_api( - site, 'content/pages', display_id, { + site, 'content/pages', display_id, url, { 'includeContent': 'true', 'moduleOffset': 1, 'path': path, 'site': site, })['modules'] + + seasons = next((m['contentData'][0]['seasons'] for m in modules if m.get('moduleType') == 'ShowDetailModule'), None) + if seasons: + return self.playlist_result(self._show_entries(domain, seasons), display_id) + film_id = next(m['contentData'][0]['gist']['id'] for m in modules if m.get('moduleType') == 'VideoDetailModule') return { '_type': 'url_transparent', From 16b0d7e621c2fb4dc23e88f9b3e1a7b61cf5c60e Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 28 Oct 2021 02:07:15 +0530 Subject: [PATCH 335/641] [utils] Add `jwt_decode_hs256` Code from #1340 Authored by: Ashish0804 --- yt_dlp/utils.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 48baa6503c..080bf260a2 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -6512,6 +6512,13 @@ def jwt_encode_hs256(payload_data, key, headers={}): return token +# can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256 +def jwt_decode_hs256(jwt): + header_b64, payload_b64, signature_b64 = jwt.split('.') + payload_data = json.loads(base64.urlsafe_b64decode(payload_b64)) + return payload_data + + def supports_terminal_sequences(stream): if compat_os_name == 'nt': if get_windows_version() < (10, 0, 10586): From ab630a57b9df229fa150a7eaa19ff51750597fbc Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 28 Oct 2021 02:14:33 +0530 Subject: [PATCH 336/641] [viewlift] Fix typo in 5be76d1ab7fed65a5894b221c7b7f896a18fc820 --- yt_dlp/extractor/viewlift.py | 150 +++++++++++++++++------------------ 1 file changed, 75 insertions(+), 75 deletions(-) diff --git a/yt_dlp/extractor/viewlift.py b/yt_dlp/extractor/viewlift.py index ca53a1736d..5b558d8904 100644 --- a/yt_dlp/extractor/viewlift.py +++ b/yt_dlp/extractor/viewlift.py @@ -92,81 +92,6 @@ class ViewLiftEmbedIE(ViewLiftBaseIE): }, { 'url': 'http://www.snagfilms.com/embed/player?filmId=0000014c-de2f-d5d6-abcf-ffef58af0017', 'only_matching': True, - }, { # Free film with langauge code - 'url': 'https://www.hoichoi.tv/bn/films/title/shuyopoka', - 'info_dict': { - 'id': '7a7a9d33-1f4c-4771-9173-ee4fb6dbf196', - 'ext': 'mp4', - 'title': 'Shuyopoka', - 'description': 'md5:e28f2fb8680096a69c944d37c1fa5ffc', - 'thumbnail': r're:^https?://.*\.jpg$', - 'upload_date': '20211006', - 'series': None - }, - 'params': {'skip_download': True}, - }, { # Free film - 'url': 'https://www.hoichoi.tv/films/title/dadu-no1', - 'info_dict': { - 'id': '0000015b-b009-d126-a1db-b81ff3780000', - 'ext': 'mp4', - 'title': 'Dadu No.1', - 'description': 'md5:605cba408e51a79dafcb824bdeded51e', - 'thumbnail': r're:^https?://.*\.jpg$', - 'upload_date': '20210827', - 'series': None - }, - 'params': {'skip_download': True}, - }, { # Free episode - 'url': 'https://www.hoichoi.tv/webseries/case-jaundice-s01-e01', - 'info_dict': { - 'id': 'f779e07c-30c8-459c-8612-5a834ab5e5ba', - 'ext': 'mp4', - 'title': 'Humans Vs. Corona', - 'description': 'md5:ca30a682b4528d02a3eb6d0427dd0f87', - 'thumbnail': r're:^https?://.*\.jpg$', - 'upload_date': '20210830', - 'series': 'Case Jaundice' - }, - 'params': {'skip_download': True}, - }, { # Free video - 'url': 'https://www.hoichoi.tv/videos/1549072415320-six-episode-02-hindi', - 'info_dict': { - 'id': 'b41fa1ce-aca6-47b6-b208-283ff0a2de30', - 'ext': 'mp4', - 'title': 'Woman in red - Hindi', - 'description': 'md5:9d21edc1827d32f8633eb67c2054fc31', - 'thumbnail': r're:^https?://.*\.jpg$', - 'upload_date': '20211006', - 'series': 'Six (Hindi)' - }, - 'params': {'skip_download': True}, - }, { # Free episode - 'url': 'https://www.hoichoi.tv/shows/watch-asian-paints-moner-thikana-online-season-1-episode-1', - 'info_dict': { - 'id': '1f45d185-8500-455c-b88d-13252307c3eb', - 'ext': 'mp4', - 'title': 'Jisshu Sengupta', - 'description': 'md5:ef6ffae01a3d83438597367400f824ed', - 'thumbnail': r're:^https?://.*\.jpg$', - 'upload_date': '20211004', - 'series': 'Asian Paints Moner Thikana' - }, - 'params': {'skip_download': True}, - }, { # Free series - 'url': 'https://www.hoichoi.tv/shows/watch-moner-thikana-bengali-web-series-online', - 'playlist_mincount': 5, - 'info_dict': { - 'id': 'watch-moner-thikana-bengali-web-series-online', - }, - }, { # Premium series - 'url': 'https://www.hoichoi.tv/shows/watch-byomkesh-bengali-web-series-online', - 'playlist_mincount': 14, - 'info_dict': { - 'id': 'watch-byomkesh-bengali-web-series-online', - }, - }, { # Premium movie - 'url': 'https://www.hoichoi.tv/movies/detective-2020', - 'only_matching': True }] @staticmethod @@ -310,6 +235,81 @@ class ViewLiftIE(ViewLiftBaseIE): }, { 'url': 'https://www.marquee.tv/watch/sadlerswells-sacredmonsters', 'only_matching': True, + }, { # Free film with langauge code + 'url': 'https://www.hoichoi.tv/bn/films/title/shuyopoka', + 'info_dict': { + 'id': '7a7a9d33-1f4c-4771-9173-ee4fb6dbf196', + 'ext': 'mp4', + 'title': 'Shuyopoka', + 'description': 'md5:e28f2fb8680096a69c944d37c1fa5ffc', + 'thumbnail': r're:^https?://.*\.jpg$', + 'upload_date': '20211006', + 'series': None + }, + 'params': {'skip_download': True}, + }, { # Free film + 'url': 'https://www.hoichoi.tv/films/title/dadu-no1', + 'info_dict': { + 'id': '0000015b-b009-d126-a1db-b81ff3780000', + 'ext': 'mp4', + 'title': 'Dadu No.1', + 'description': 'md5:605cba408e51a79dafcb824bdeded51e', + 'thumbnail': r're:^https?://.*\.jpg$', + 'upload_date': '20210827', + 'series': None + }, + 'params': {'skip_download': True}, + }, { # Free episode + 'url': 'https://www.hoichoi.tv/webseries/case-jaundice-s01-e01', + 'info_dict': { + 'id': 'f779e07c-30c8-459c-8612-5a834ab5e5ba', + 'ext': 'mp4', + 'title': 'Humans Vs. Corona', + 'description': 'md5:ca30a682b4528d02a3eb6d0427dd0f87', + 'thumbnail': r're:^https?://.*\.jpg$', + 'upload_date': '20210830', + 'series': 'Case Jaundice' + }, + 'params': {'skip_download': True}, + }, { # Free video + 'url': 'https://www.hoichoi.tv/videos/1549072415320-six-episode-02-hindi', + 'info_dict': { + 'id': 'b41fa1ce-aca6-47b6-b208-283ff0a2de30', + 'ext': 'mp4', + 'title': 'Woman in red - Hindi', + 'description': 'md5:9d21edc1827d32f8633eb67c2054fc31', + 'thumbnail': r're:^https?://.*\.jpg$', + 'upload_date': '20211006', + 'series': 'Six (Hindi)' + }, + 'params': {'skip_download': True}, + }, { # Free episode + 'url': 'https://www.hoichoi.tv/shows/watch-asian-paints-moner-thikana-online-season-1-episode-1', + 'info_dict': { + 'id': '1f45d185-8500-455c-b88d-13252307c3eb', + 'ext': 'mp4', + 'title': 'Jisshu Sengupta', + 'description': 'md5:ef6ffae01a3d83438597367400f824ed', + 'thumbnail': r're:^https?://.*\.jpg$', + 'upload_date': '20211004', + 'series': 'Asian Paints Moner Thikana' + }, + 'params': {'skip_download': True}, + }, { # Free series + 'url': 'https://www.hoichoi.tv/shows/watch-moner-thikana-bengali-web-series-online', + 'playlist_mincount': 5, + 'info_dict': { + 'id': 'watch-moner-thikana-bengali-web-series-online', + }, + }, { # Premium series + 'url': 'https://www.hoichoi.tv/shows/watch-byomkesh-bengali-web-series-online', + 'playlist_mincount': 14, + 'info_dict': { + 'id': 'watch-byomkesh-bengali-web-series-online', + }, + }, { # Premium movie + 'url': 'https://www.hoichoi.tv/movies/detective-2020', + 'only_matching': True }] @classmethod From 3783b5f1d13380f9472bcbdca192aff349c01b17 Mon Sep 17 00:00:00 2001 From: ajj8 <35781586+ajj8@users.noreply.github.com> Date: Thu, 28 Oct 2021 11:57:09 +0100 Subject: [PATCH 337/641] [itv] Add support for ITV News (#1456) Authored by: ajj8 --- yt_dlp/extractor/itv.py | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/yt_dlp/extractor/itv.py b/yt_dlp/extractor/itv.py index d69782b782..6e6a3673cd 100644 --- a/yt_dlp/extractor/itv.py +++ b/yt_dlp/extractor/itv.py @@ -220,16 +220,23 @@ class ITVIE(InfoExtractor): class ITVBTCCIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?itv\.com/btcc/(?:[^/]+/)*(?P[^/?#&]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?itv\.com/(?:news|btcc)/(?:[^/]+/)*(?P[^/?#&]+)' + _TESTS = [{ 'url': 'https://www.itv.com/btcc/articles/btcc-2019-brands-hatch-gp-race-action', 'info_dict': { 'id': 'btcc-2019-brands-hatch-gp-race-action', 'title': 'BTCC 2019: Brands Hatch GP race action', }, 'playlist_count': 12, - } - BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1582188683001/HkiHLnNRx_default/index.html?videoId=%s' + }, { + 'url': 'https://www.itv.com/news/2021-10-27/i-have-to-protect-the-country-says-rishi-sunak-as-uk-faces-interest-rate-hike', + 'info_dict': { + 'id': 'i-have-to-protect-the-country-says-rishi-sunak-as-uk-faces-interest-rate-hike', + 'title': 'md5:6ef054dd9f069330db3dcc66cb772d32' + }, + 'playlist_count': 4 + }] + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s' def _real_extract(self, url): playlist_id = self._match_id(url) @@ -240,15 +247,15 @@ class ITVBTCCIE(InfoExtractor): '(?s)]+id=[\'"]__NEXT_DATA__[^>]*>([^<]+)', webpage, 'json_map'), playlist_id), lambda x: x['props']['pageProps']['article']['body']['content']) or [] - # Discard empty objects - video_ids = [] + entries = [] for video in json_map: - if video['data'].get('id'): - video_ids.append(video['data']['id']) - - entries = [ - self.url_result( - smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, { + if not any(video['data'].get(attr) == 'Brightcove' for attr in ('name', 'type')): + continue + video_id = video['data']['id'] + account_id = video['data']['accountId'] + player_id = video['data']['playerId'] + entries.append(self.url_result( + smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id), { # ITV does not like some GB IP ranges, so here are some # IP blocks it accepts 'geo_ip_blocks': [ @@ -256,8 +263,7 @@ class ITVBTCCIE(InfoExtractor): ], 'referrer': url, }), - ie=BrightcoveNewIE.ie_key(), video_id=video_id) - for video_id in video_ids] + ie=BrightcoveNewIE.ie_key(), video_id=video_id)) title = self._og_search_title(webpage, fatal=False) From 7b5f3f7c3d87d1bb711f6a76007a352a851e80ca Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Thu, 28 Oct 2021 23:48:09 +0530 Subject: [PATCH 338/641] [MLSScoccer] Add extractor (#1452) Authored by: Ashish0804 Closes #1451 --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/mlssoccer.py | 118 +++++++++++++++++++++++++++++++++ 2 files changed, 119 insertions(+) create mode 100644 yt_dlp/extractor/mlssoccer.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 21c71a835e..1c5743604d 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -793,6 +793,7 @@ from .mlb import ( MLBIE, MLBVideoIE, ) +from .mlssoccer import MLSSoccerIE from .mnet import MnetIE from .moevideo import MoeVideoIE from .mofosex import ( diff --git a/yt_dlp/extractor/mlssoccer.py b/yt_dlp/extractor/mlssoccer.py new file mode 100644 index 0000000000..2d65787e20 --- /dev/null +++ b/yt_dlp/extractor/mlssoccer.py @@ -0,0 +1,118 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class MLSSoccerIE(InfoExtractor): + _VALID_DOMAINS = r'(?:(?:cfmontreal|intermiamicf|lagalaxy|lafc|houstondynamofc|dcunited|atlutd|mlssoccer|fcdallas|columbuscrew|coloradorapids|fccincinnati|chicagofirefc|austinfc|nashvillesc|whitecapsfc|sportingkc|soundersfc|sjearthquakes|rsl|timbers|philadelphiaunion|orlandocitysc|newyorkredbulls|nycfc)\.com|(?:torontofc)\.ca|(?:revolutionsoccer)\.net)' + _VALID_URL = r'(?:https?://)(?:www\.)?%s/video/#?(?P[^/&$#?]+)' % _VALID_DOMAINS + + _TESTS = [{ + 'url': 'https://www.mlssoccer.com/video/the-octagon-can-alphonso-davies-lead-canada-to-first-world-cup-since-1986#the-octagon-can-alphonso-davies-lead-canada-to-first-world-cup-since-1986', + 'info_dict': { + 'id': '6276033198001', + 'ext': 'mp4', + 'title': 'The Octagon | Can Alphonso Davies lead Canada to first World Cup since 1986?', + 'description': 'md5:f0a883ee33592a0221798f451a98be8f', + 'thumbnail': 'https://cf-images.us-east-1.prod.boltdns.net/v1/static/5530036772001/1bbc44f6-c63c-4981-82fa-46b0c1f891e0/5c1ca44a-a033-4e98-b531-ff24c4947608/160x90/match/image.jpg', + 'duration': 350.165, + 'timestamp': 1633627291, + 'uploader_id': '5530036772001', + 'tags': ['club/canada'], + 'is_live': False, + 'duration_string': '5:50', + 'upload_date': '20211007', + 'filesize_approx': 255193528.83200002 + }, + 'params': {'skip_download': True} + }, { + 'url': 'https://www.whitecapsfc.com/video/highlights-san-jose-earthquakes-vs-vancouver-whitecaps-fc-october-23-2021#highlights-san-jose-earthquakes-vs-vancouver-whitecaps-fc-october-23-2021', + 'only_matching': True + }, { + 'url': 'https://www.torontofc.ca/video/highlights-toronto-fc-vs-cf-montreal-october-23-2021-x6733#highlights-toronto-fc-vs-cf-montreal-october-23-2021-x6733', + 'only_matching': True + }, { + 'url': 'https://www.sportingkc.com/video/post-match-press-conference-john-pulskamp-oct-27-2021#post-match-press-conference-john-pulskamp-oct-27-2021', + 'only_matching': True + }, { + 'url': 'https://www.soundersfc.com/video/highlights-seattle-sounders-fc-vs-sporting-kansas-city-october-23-2021', + 'only_matching': True + }, { + 'url': 'https://www.sjearthquakes.com/video/#highlights-austin-fc-vs-san-jose-earthquakes-june-19-2021', + 'only_matching': True + }, { + 'url': 'https://www.rsl.com/video/2021-u-of-u-health-mic-d-up-vs-colorado-10-16-21#2021-u-of-u-health-mic-d-up-vs-colorado-10-16-21', + 'only_matching': True + }, { + 'url': 'https://www.timbers.com/video/highlights-d-chara-asprilla-with-goals-in-portland-timbers-2-0-win-over-san-jose#highlights-d-chara-asprilla-with-goals-in-portland-timbers-2-0-win-over-san-jose', + 'only_matching': True + }, { + 'url': 'https://www.philadelphiaunion.com/video/highlights-torvphi', + 'only_matching': True + }, { + 'url': 'https://www.orlandocitysc.com/video/highlight-columbus-crew-vs-orlando-city-sc', + 'only_matching': True + }, { + 'url': 'https://www.newyorkredbulls.com/video/all-access-matchday-double-derby-week#all-access-matchday-double-derby-week', + 'only_matching': True + }, { + 'url': 'https://www.nycfc.com/video/highlights-nycfc-1-0-chicago-fire-fc#highlights-nycfc-1-0-chicago-fire-fc', + 'only_matching': True + }, { + 'url': 'https://www.revolutionsoccer.net/video/two-minute-highlights-revs-1-rapids-0-october-27-2021#two-minute-highlights-revs-1-rapids-0-october-27-2021', + 'only_matching': True + }, { + 'url': 'https://www.nashvillesc.com/video/goal-c-j-sapong-nashville-sc-92nd-minute', + 'only_matching': True + }, { + 'url': 'https://www.cfmontreal.com/video/faits-saillants-tor-v-mtl#faits-saillants-orl-v-mtl-x5645', + 'only_matching': True + }, { + 'url': 'https://www.intermiamicf.com/video/all-access-victory-vs-nashville-sc-by-ukg#all-access-victory-vs-nashville-sc-by-ukg', + 'only_matching': True + }, { + 'url': 'https://www.lagalaxy.com/video/#moment-of-the-month-presented-by-san-manuel-casino-rayan-raveloson-scores-his-se', + 'only_matching': True + }, { + 'url': 'https://www.lafc.com/video/breaking-down-lafc-s-final-6-matches-of-the-2021-mls-regular-season#breaking-down-lafc-s-final-6-matches-of-the-2021-mls-regular-season', + 'only_matching': True + }, { + 'url': 'https://www.houstondynamofc.com/video/postgame-press-conference-michael-nelson-presented-by-coushatta-casino-res-x9660#postgame-press-conference-michael-nelson-presented-by-coushatta-casino-res-x9660', + 'only_matching': True + }, { + 'url': 'https://www.dcunited.com/video/tony-alfaro-my-family-pushed-me-to-believe-everything-was-possible', + 'only_matching': True + }, { + 'url': 'https://www.fcdallas.com/video/highlights-fc-dallas-vs-minnesota-united-fc-october-02-2021#highlights-fc-dallas-vs-minnesota-united-fc-october-02-2021', + 'only_matching': True + }, { + 'url': 'https://www.columbuscrew.com/video/match-rewind-columbus-crew-vs-new-york-red-bulls-october-23-2021', + 'only_matching': True + }, { + 'url': 'https://www.coloradorapids.com/video/postgame-reaction-robin-fraser-october-27#postgame-reaction-robin-fraser-october-27', + 'only_matching': True + }, { + 'url': 'https://www.fccincinnati.com/video/#keeping-cincy-chill-presented-by-coors-lite', + 'only_matching': True + }, { + 'url': 'https://www.chicagofirefc.com/video/all-access-fire-score-dramatic-road-win-in-cincy#all-access-fire-score-dramatic-road-win-in-cincy', + 'only_matching': True + }, { + 'url': 'https://www.austinfc.com/video/highlights-colorado-rapids-vs-austin-fc-september-29-2021#highlights-colorado-rapids-vs-austin-fc-september-29-2021', + 'only_matching': True + }, { + 'url': 'https://www.atlutd.com/video/goal-josef-martinez-scores-in-the-73rd-minute#goal-josef-martinez-scores-in-the-73rd-minute', + 'only_matching': True + }] + + def _real_extract(self, url): + id = self._match_id(url) + webpage = self._download_webpage(url, id) + data_json = self._parse_json(self._html_search_regex(r'data-options\=\"([^\"]+)\"', webpage, 'json'), id)['videoList'][0] + return { + 'id': id, + '_type': 'url', + 'url': 'https://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (data_json['accountId'], data_json['videoId']), + 'ie_key': 'BrightcoveNew', + } From aeaf3b2b92bc4ab8b6f5d90c053aa43d93ab64e1 Mon Sep 17 00:00:00 2001 From: Luc Ritchie Date: Fri, 29 Oct 2021 14:17:10 -0400 Subject: [PATCH 339/641] [Coub] Fix media format identification (#1469) Authored by: wlritchi --- yt_dlp/extractor/coub.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/coub.py b/yt_dlp/extractor/coub.py index eba6b73baa..e90aa1954f 100644 --- a/yt_dlp/extractor/coub.py +++ b/yt_dlp/extractor/coub.py @@ -57,7 +57,7 @@ class CoubIE(InfoExtractor): file_versions = coub['file_versions'] - QUALITIES = ('low', 'med', 'high') + QUALITIES = ('low', 'med', 'high', 'higher') MOBILE = 'mobile' IPHONE = 'iphone' @@ -86,6 +86,7 @@ class CoubIE(InfoExtractor): 'format_id': '%s-%s-%s' % (HTML5, kind, quality), 'filesize': int_or_none(item.get('size')), 'vcodec': 'none' if kind == 'audio' else None, + 'acodec': 'none' if kind == 'video' else None, 'quality': quality_key(quality), 'source_preference': preference_key(HTML5), }) From e6ff66efc0dcacbfbca4402e717a182c8f6b4e85 Mon Sep 17 00:00:00 2001 From: nixxo Date: Fri, 29 Oct 2021 21:39:55 +0200 Subject: [PATCH 340/641] [mediaset] Add playlist support (#1463) Closes #1372 Authored by: nixxo --- yt_dlp/extractor/extractors.py | 5 ++- yt_dlp/extractor/mediaset.py | 82 ++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 1c5743604d..9d963ee46e 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -744,7 +744,10 @@ from .mdr import MDRIE from .medaltv import MedalTVIE from .mediaite import MediaiteIE from .mediaklikk import MediaKlikkIE -from .mediaset import MediasetIE +from .mediaset import ( + MediasetIE, + MediasetShowIE, +) from .mediasite import ( MediasiteIE, MediasiteCatalogIE, diff --git a/yt_dlp/extractor/mediaset.py b/yt_dlp/extractor/mediaset.py index 26e7abc493..119b39997a 100644 --- a/yt_dlp/extractor/mediaset.py +++ b/yt_dlp/extractor/mediaset.py @@ -1,13 +1,17 @@ # coding: utf-8 from __future__ import unicode_literals +import functools import re from .theplatform import ThePlatformBaseIE from ..utils import ( ExtractorError, int_or_none, + OnDemandPagedList, parse_qs, + try_get, + urljoin, update_url_query, ) @@ -212,3 +216,81 @@ class MediasetIE(ThePlatformBaseIE): 'subtitles': subtitles, }) return info + + +class MediasetShowIE(MediasetIE): + _VALID_URL = r'''(?x) + (?: + https?:// + (?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/ + (?: + (?:fiction|programmi-tv|serie-tv)/(?:.+?/)? + (?:[a-z]+)_SE(?P\d{12}) + (?:,ST(?P\d{12}))? + (?:,sb(?P\d{9}))?$ + ) + ) + ''' + _TESTS = [{ + # TV Show webpage (with a single playlist) + 'url': 'https://www.mediasetplay.mediaset.it/serie-tv/fireforce/episodi_SE000000001556', + 'info_dict': { + 'id': '000000001556', + 'title': 'Fire Force', + }, + 'playlist_count': 1, + }, { + # TV Show webpage (with multiple playlists) + 'url': 'https://www.mediasetplay.mediaset.it/programmi-tv/leiene/leiene_SE000000000061,ST000000002763', + 'info_dict': { + 'id': '000000002763', + 'title': 'Le Iene', + }, + 'playlist_count': 7, + }, { + # TV Show specific playlist (single page) + 'url': 'https://www.mediasetplay.mediaset.it/serie-tv/fireforce/episodi_SE000000001556,ST000000002738,sb100013107', + 'info_dict': { + 'id': '100013107', + 'title': 'Episodi', + }, + 'playlist_count': 4, + }, { + # TV Show specific playlist (with multiple pages) + 'url': 'https://www.mediasetplay.mediaset.it/programmi-tv/leiene/iservizi_SE000000000061,ST000000002763,sb100013375', + 'info_dict': { + 'id': '100013375', + 'title': 'I servizi', + }, + 'playlist_count': 53, + }] + + _BY_SUBBRAND = 'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs-v2?byCustomValue={subBrandId}{%s}&sort=:publishInfo_lastPublished|desc,tvSeasonEpisodeNumber|desc&range=%d-%d' + _PAGE_SIZE = 25 + + def _fetch_page(self, sb, page): + lower_limit = page * self._PAGE_SIZE + 1 + upper_limit = lower_limit + self._PAGE_SIZE - 1 + content = self._download_json( + self._BY_SUBBRAND % (sb, lower_limit, upper_limit), sb) + for entry in content.get('entries') or []: + yield self.url_result( + 'mediaset:' + entry['guid'], + playlist_title=entry['mediasetprogram$subBrandDescription']) + + def _real_extract(self, url): + playlist_id, st, sb = self._match_valid_url(url).group('id', 'st', 'sb') + if not sb: + page = self._download_webpage(url, playlist_id) + entries = [self.url_result(urljoin('https://www.mediasetplay.mediaset.it', url)) + for url in re.findall(r'href="([^<>=]+SE\d{12},ST\d{12},sb\d{9})">[^<]+<', page)] + title = (self._html_search_regex(r'(?s)]*>(.+?)

', page, 'title', default=None) + or self._og_search_title(page)) + return self.playlist_result(entries, st or playlist_id, title) + + entries = OnDemandPagedList( + functools.partial(self._fetch_page, sb), + self._PAGE_SIZE) + title = try_get(entries, lambda x: x[0]['playlist_title']) + + return self.playlist_result(entries, sb, title) From 10beccc980ea04913603b802d06ffaebc011cfc8 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 30 Oct 2021 01:13:51 +0530 Subject: [PATCH 341/641] [FormatSort] Fix some fields' defaults Closes #1479 --- yt_dlp/extractor/common.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 369cff418e..aa98c0cc9f 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1549,8 +1549,8 @@ class InfoExtractor(object): 'ie_pref': {'priority': True, 'type': 'extractor'}, 'hasvid': {'priority': True, 'field': 'vcodec', 'type': 'boolean', 'not_in_list': ('none',)}, 'hasaud': {'field': 'acodec', 'type': 'boolean', 'not_in_list': ('none',)}, - 'lang': {'convert': 'ignore', 'field': 'language_preference'}, - 'quality': {'convert': 'float_none', 'default': -1}, + 'lang': {'convert': 'float', 'field': 'language_preference', 'default': -1}, + 'quality': {'convert': 'float', 'default': -1}, 'filesize': {'convert': 'bytes'}, 'fs_approx': {'convert': 'bytes', 'field': 'filesize_approx'}, 'id': {'convert': 'string', 'field': 'format_id'}, @@ -1561,7 +1561,7 @@ class InfoExtractor(object): 'vbr': {'convert': 'float_none'}, 'abr': {'convert': 'float_none'}, 'asr': {'convert': 'float_none'}, - 'source': {'convert': 'ignore', 'field': 'source_preference'}, + 'source': {'convert': 'float', 'field': 'source_preference', 'default': -1}, 'codec': {'type': 'combined', 'field': ('vcodec', 'acodec')}, 'br': {'type': 'combined', 'field': ('tbr', 'vbr', 'abr'), 'same_limit': True}, From 5e7bbac3057e06bb0d5d8cb3cfd5f607d5cf8459 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 30 Oct 2021 01:53:28 +0530 Subject: [PATCH 342/641] [generic] parse jwplayer with only the json URL Closes #1476 --- yt_dlp/extractor/generic.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 5918c8c562..ffcf9b303a 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -1188,6 +1188,21 @@ class GenericIE(InfoExtractor): }, 'skip': 'Only has video a few mornings per month, see http://www.suffolk.edu/sjc/', }, + # jwplayer with only the json URL + { + 'url': 'https://www.hollywoodreporter.com/news/general-news/dunkirk-team-reveals-what-christopher-nolan-said-oscar-win-meet-your-oscar-winner-1092454', + 'info_dict': { + 'id': 'TljWkvWH', + 'ext': 'mp4', + 'upload_date': '20180306', + 'title': 'md5:91eb1862f6526415214f62c00b453936', + 'description': 'md5:73048ae50ae953da10549d1d2fe9b3aa', + 'timestamp': 1520367225, + }, + 'params': { + 'skip_download': True, + }, + }, # Complex jwplayer { 'url': 'http://www.indiedb.com/games/king-machine/videos', @@ -3503,6 +3518,13 @@ class GenericIE(InfoExtractor): jwplayer_data = self._find_jwplayer_data( webpage, video_id, transform_source=js_to_json) if jwplayer_data: + if isinstance(jwplayer_data.get('playlist'), str): + return { + **info_dict, + '_type': 'url', + 'ie_key': JWPlatformIE.ie_key(), + 'url': jwplayer_data['playlist'], + } try: info = self._parse_jwplayer_data( jwplayer_data, video_id, require_title=False, base_url=url) From fa0b816e379b79abc3f4e64bd8d750fc99e40775 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 30 Oct 2021 02:03:53 +0530 Subject: [PATCH 343/641] [generic] Detect more json_ld Closes #1475 --- yt_dlp/extractor/generic.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index ffcf9b303a..0d279016b2 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -3583,8 +3583,7 @@ class GenericIE(InfoExtractor): return info_dict # Looking for http://schema.org/VideoObject - json_ld = self._search_json_ld( - webpage, video_id, default={}, expected_type='VideoObject') + json_ld = self._search_json_ld(webpage, video_id, default={}) if json_ld.get('url'): return merge_dicts(json_ld, info_dict) From 6b301aaa34545b217fdcc276a65f683de518cbf4 Mon Sep 17 00:00:00 2001 From: Sipherdrakon <64430430+Sipherdrakon@users.noreply.github.com> Date: Fri, 29 Oct 2021 21:18:59 -0400 Subject: [PATCH 344/641] [mtv] Fix some videos (#1453) Partial fix for #713 Authored by: Sipherdrakon --- yt_dlp/extractor/mtv.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/mtv.py b/yt_dlp/extractor/mtv.py index e0608845dd..141dd7deb3 100644 --- a/yt_dlp/extractor/mtv.py +++ b/yt_dlp/extractor/mtv.py @@ -305,6 +305,14 @@ class MTVServicesInfoExtractor(InfoExtractor): if not mgid: mgid = self._extract_triforce_mgid(webpage) + if not mgid: + mgid = self._search_regex( + r'"videoConfig":{"videoId":"(mgid:.*?)"', webpage, 'mgid', default=None) + + if not mgid: + mgid = self._search_regex( + r'"media":{"video":{"config":{"uri":"(mgid:.*?)"', webpage, 'mgid', default=None) + if not mgid: data = self._parse_json(self._search_regex( r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None) @@ -313,10 +321,6 @@ class MTVServicesInfoExtractor(InfoExtractor): video_player = self._extract_child_with_type(ab_testing or main_container, 'VideoPlayer') mgid = video_player['props']['media']['video']['config']['uri'] - if not mgid: - mgid = self._search_regex( - r'"media":{"video":{"config":{"uri":"(mgid:.*?)"', webpage, 'mgid', default=None) - return mgid def _real_extract(self, url): From 652fb0d446524af4b783276babd55f5fc6a3afeb Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Sat, 30 Oct 2021 23:26:00 +0530 Subject: [PATCH 345/641] [VLive] Add upload_date and thumbnail (#1486) Closes #1472 Authored by: Ashish0804 --- yt_dlp/extractor/naver.py | 7 ++++--- yt_dlp/extractor/vlive.py | 11 +++++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/naver.py b/yt_dlp/extractor/naver.py index acf53c1ff2..a6821ba86d 100644 --- a/yt_dlp/extractor/naver.py +++ b/yt_dlp/extractor/naver.py @@ -40,6 +40,7 @@ class NaverBaseIE(InfoExtractor): formats.append({ 'format_id': '%s_%s' % (stream.get('type') or stream_type, dict_get(encoding_option, ('name', 'id'))), 'url': stream_url, + 'ext': 'mp4', 'width': int_or_none(encoding_option.get('width')), 'height': int_or_none(encoding_option.get('height')), 'vbr': int_or_none(bitrate.get('video')), @@ -174,7 +175,7 @@ class NaverLiveIE(InfoExtractor): 'url': 'https://tv.naver.com/l/52010', 'info_dict': { 'id': '52010', - 'ext': 'm3u8', + 'ext': 'mp4', 'title': '[LIVE] 뉴스특보 : "수도권 거리두기, 2주간 2단계로 조정"', 'description': 'md5:df7f0c237a5ed5e786ce5c91efbeaab3', 'channel_id': 'NTV-ytnnews24-0', @@ -184,7 +185,7 @@ class NaverLiveIE(InfoExtractor): 'url': 'https://tv.naver.com/l/51549', 'info_dict': { 'id': '51549', - 'ext': 'm3u8', + 'ext': 'mp4', 'title': '연합뉴스TV - 코로나19 뉴스특보', 'description': 'md5:c655e82091bc21e413f549c0eaccc481', 'channel_id': 'NTV-yonhapnewstv-0', @@ -233,7 +234,7 @@ class NaverLiveIE(InfoExtractor): continue formats.extend(self._extract_m3u8_formats( - quality.get('url'), video_id, 'm3u8', + quality.get('url'), video_id, 'mp4', m3u8_id=quality.get('qualityId'), live=True )) self._sort_formats(formats) diff --git a/yt_dlp/extractor/vlive.py b/yt_dlp/extractor/vlive.py index 681d959027..4340b1d4c9 100644 --- a/yt_dlp/extractor/vlive.py +++ b/yt_dlp/extractor/vlive.py @@ -86,6 +86,12 @@ class VLiveIE(VLiveBaseIE): 'creator': "Girl's Day", 'view_count': int, 'uploader_id': 'muploader_a', + 'upload_date': '20150817', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)$', + 'timestamp': 1439816449, + }, + 'params': { + 'skip_download': True, }, }, { 'url': 'http://www.vlive.tv/video/16937', @@ -97,6 +103,9 @@ class VLiveIE(VLiveBaseIE): 'view_count': int, 'subtitles': 'mincount:12', 'uploader_id': 'muploader_j', + 'upload_date': '20161112', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)$', + 'timestamp': 1478923074, }, 'params': { 'skip_download': True, @@ -173,6 +182,8 @@ class VLiveIE(VLiveBaseIE): 'view_count': int_or_none(video.get('playCount')), 'like_count': int_or_none(video.get('likeCount')), 'comment_count': int_or_none(video.get('commentCount')), + 'timestamp': int_or_none(video.get('createdAt'), scale=1000), + 'thumbnail': video.get('thumb'), } video_type = video.get('type') From cd9ea4104b8b5075ea4bfe92c76130e267686805 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Sun, 31 Oct 2021 02:54:39 +0000 Subject: [PATCH 346/641] [instagram] Add more formats when logged in (#1487) Authored by: u-spec-png --- yt_dlp/extractor/instagram.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py index ccfcddd5bc..8c935c2514 100644 --- a/yt_dlp/extractor/instagram.py +++ b/yt_dlp/extractor/instagram.py @@ -222,8 +222,8 @@ class InstagramIE(InfoExtractor): dict) if media: video_url = media.get('video_url') - height = int_or_none(media.get('dimensions', {}).get('height')) - width = int_or_none(media.get('dimensions', {}).get('width')) + height = try_get(media, lambda x: x['dimensions']['height']) + width = try_get(media, lambda x: x['dimensions']['width']) description = try_get( media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'], compat_str) or media.get('caption') @@ -231,8 +231,8 @@ class InstagramIE(InfoExtractor): thumbnail = media.get('display_src') or media.get('display_url') duration = float_or_none(media.get('video_duration')) timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date')) - uploader = media.get('owner', {}).get('full_name') - uploader_id = media.get('owner', {}).get('username') + uploader = try_get(media, lambda x: x['owner']['full_name']) + uploader_id = try_get(media, lambda x: x['owner']['username']) def get_count(keys, kind): for key in variadic(keys): @@ -294,6 +294,10 @@ class InstagramIE(InfoExtractor): 'width': width, 'height': height, }] + dash = try_get(media, lambda x: x['dash_info']['video_dash_manifest']) + if dash: + formats.extend(self._parse_mpd_formats(self._parse_xml(dash, video_id), mpd_id='dash')) + self._sort_formats(formats) if not uploader_id: uploader_id = self._search_regex( From 404f611f1c4aa516fbc4301aa7b8f734ee4bc67b Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 31 Oct 2021 09:53:58 +0530 Subject: [PATCH 347/641] [youtube] Fix throttling by decrypting n-sig (#1437) --- .gitignore | 1 + test/test_jsinterp.py | 50 ++++ test/test_youtube_signature.py | 72 +++-- yt_dlp/extractor/youtube.py | 91 +++++-- yt_dlp/jsinterp.py | 480 ++++++++++++++++++++++++++------- 5 files changed, 547 insertions(+), 147 deletions(-) diff --git a/.gitignore b/.gitignore index bf06c81f06..790989b3ca 100644 --- a/.gitignore +++ b/.gitignore @@ -41,6 +41,7 @@ cookies *.webp *.annotations.xml *.description +.cache/ # Allow config/media files in testdata !test/** diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index 8b2b60403c..380e52c333 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -112,6 +112,56 @@ class TestJSInterpreter(unittest.TestCase): ''') self.assertEqual(jsi.call_function('z'), 5) + def test_for_loop(self): + jsi = JSInterpreter(''' + function x() { a=0; for (i=0; i-10; i++) {a++} a } + ''') + self.assertEqual(jsi.call_function('x'), 10) + + def test_switch(self): + jsi = JSInterpreter(''' + function x(f) { switch(f){ + case 1:f+=1; + case 2:f+=2; + case 3:f+=3;break; + case 4:f+=4; + default:f=0; + } return f } + ''') + self.assertEqual(jsi.call_function('x', 1), 7) + self.assertEqual(jsi.call_function('x', 3), 6) + self.assertEqual(jsi.call_function('x', 5), 0) + + def test_try(self): + jsi = JSInterpreter(''' + function x() { try{return 10} catch(e){return 5} } + ''') + self.assertEqual(jsi.call_function('x'), 10) + + def test_for_loop_continue(self): + jsi = JSInterpreter(''' + function x() { a=0; for (i=0; i-10; i++) { continue; a++ } a } + ''') + self.assertEqual(jsi.call_function('x'), 0) + + def test_for_loop_break(self): + jsi = JSInterpreter(''' + function x() { a=0; for (i=0; i-10; i++) { break; a++ } a } + ''') + self.assertEqual(jsi.call_function('x'), 0) + + def test_literal_list(self): + jsi = JSInterpreter(''' + function x() { [1, 2, "asdf", [5, 6, 7]][3] } + ''') + self.assertEqual(jsi.call_function('x'), [5, 6, 7]) + + def test_comma(self): + jsi = JSInterpreter(''' + function x() { a=5; a -= 1, a+=3; return a } + ''') + self.assertEqual(jsi.call_function('x'), 7) + if __name__ == '__main__': unittest.main() diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index dcf6ab60d6..f40a069526 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -14,9 +14,10 @@ import string from test.helper import FakeYDL, is_download_test from yt_dlp.extractor import YoutubeIE +from yt_dlp.jsinterp import JSInterpreter from yt_dlp.compat import compat_str, compat_urlretrieve -_TESTS = [ +_SIG_TESTS = [ ( 'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js', 86, @@ -64,6 +65,13 @@ _TESTS = [ ) ] +_NSIG_TESTS = [ + ( + 'https://www.youtube.com/s/player/9216d1f7/player_ias.vflset/en_US/base.js', + 'SLp9F5bwjAdhE9F-', 'gWnb9IK2DJ8Q1w', + ), # TODO: Add more tests +] + @is_download_test class TestPlayerInfo(unittest.TestCase): @@ -97,35 +105,49 @@ class TestSignature(unittest.TestCase): os.mkdir(self.TESTDATA_DIR) -def make_tfunc(url, sig_input, expected_sig): - m = re.match(r'.*-([a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$', url) - assert m, '%r should follow URL format' % url - test_id = m.group(1) +def t_factory(name, sig_func, url_pattern): + def make_tfunc(url, sig_input, expected_sig): + m = url_pattern.match(url) + assert m, '%r should follow URL format' % url + test_id = m.group('id') - def test_func(self): - basename = 'player-%s.js' % test_id - fn = os.path.join(self.TESTDATA_DIR, basename) + def test_func(self): + basename = f'player-{name}-{test_id}.js' + fn = os.path.join(self.TESTDATA_DIR, basename) - if not os.path.exists(fn): - compat_urlretrieve(url, fn) + if not os.path.exists(fn): + compat_urlretrieve(url, fn) + with io.open(fn, encoding='utf-8') as testf: + jscode = testf.read() + self.assertEqual(sig_func(jscode, sig_input), expected_sig) - ydl = FakeYDL() - ie = YoutubeIE(ydl) - with io.open(fn, encoding='utf-8') as testf: - jscode = testf.read() - func = ie._parse_sig_js(jscode) - src_sig = ( - compat_str(string.printable[:sig_input]) - if isinstance(sig_input, int) else sig_input) - got_sig = func(src_sig) - self.assertEqual(got_sig, expected_sig) - - test_func.__name__ = str('test_signature_js_' + test_id) - setattr(TestSignature, test_func.__name__, test_func) + test_func.__name__ = f'test_{name}_js_{test_id}' + setattr(TestSignature, test_func.__name__, test_func) + return make_tfunc -for test_spec in _TESTS: - make_tfunc(*test_spec) +def signature(jscode, sig_input): + func = YoutubeIE(FakeYDL())._parse_sig_js(jscode) + src_sig = ( + compat_str(string.printable[:sig_input]) + if isinstance(sig_input, int) else sig_input) + return func(src_sig) + + +def n_sig(jscode, sig_input): + funcname = YoutubeIE(FakeYDL())._extract_n_function_name(jscode) + return JSInterpreter(jscode).call_function(funcname, sig_input) + + +make_sig_test = t_factory( + 'signature', signature, re.compile(r'.*-(?P[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$')) +for test_spec in _SIG_TESTS: + make_sig_test(*test_spec) + +make_nsig_test = t_factory( + 'nsig', n_sig, re.compile(r'.+/player/(?P[a-zA-Z0-9_-]+)/.+.js$')) +for test_spec in _NSIG_TESTS: + make_nsig_test(*test_spec) if __name__ == '__main__': diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 658b45fe14..56cd2ed8d3 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -1720,7 +1720,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): raise ExtractorError('Cannot identify player %r' % player_url) return id_m.group('id') - def _load_player(self, video_id, player_url, fatal=True) -> bool: + def _load_player(self, video_id, player_url, fatal=True): player_id = self._extract_player_info(player_url) if player_id not in self._code_cache: code = self._download_webpage( @@ -1729,7 +1729,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): errnote='Download of %s failed' % player_url) if code: self._code_cache[player_id] = code - return player_id in self._code_cache + return self._code_cache.get(player_id) def _extract_signature_function(self, video_id, player_url, example_sig): player_id = self._extract_player_info(player_url) @@ -1743,8 +1743,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if cache_spec is not None: return lambda s: ''.join(s[i] for i in cache_spec) - if self._load_player(video_id, player_url): - code = self._code_cache[player_id] + code = self._load_player(video_id, player_url) + if code: res = self._parse_sig_js(code) test_string = ''.join(map(compat_chr, range(len(example_sig)))) @@ -1755,6 +1755,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return res def _print_sig_code(self, func, example_sig): + if not self.get_param('youtube_print_sig_code'): + return + def gen_sig_code(idxs): def _genslice(start, end, step): starts = '' if start == 0 else str(start) @@ -1831,13 +1834,58 @@ class YoutubeIE(YoutubeBaseInfoExtractor): ) self._player_cache[player_id] = func func = self._player_cache[player_id] - if self.get_param('youtube_print_sig_code'): - self._print_sig_code(func, s) + self._print_sig_code(func, s) return func(s) except Exception as e: - tb = traceback.format_exc() - raise ExtractorError( - 'Signature extraction failed: ' + tb, cause=e) + raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e) + + def _decrypt_nsig(self, s, video_id, player_url): + """Turn the encrypted n field into a working signature""" + if player_url is None: + raise ExtractorError('Cannot decrypt nsig without player_url') + if player_url.startswith('//'): + player_url = 'https:' + player_url + elif not re.match(r'https?://', player_url): + player_url = compat_urlparse.urljoin( + 'https://www.youtube.com', player_url) + + sig_id = ('nsig_value', s) + if sig_id in self._player_cache: + return self._player_cache[sig_id] + + try: + player_id = ('nsig', player_url) + if player_id not in self._player_cache: + self._player_cache[player_id] = self._extract_n_function(video_id, player_url) + func = self._player_cache[player_id] + self._player_cache[sig_id] = func(s) + self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}') + return self._player_cache[sig_id] + except Exception as e: + raise ExtractorError(traceback.format_exc(), cause=e) + + def _extract_n_function_name(self, jscode): + return self._search_regex( + (r'\.get\("n"\)\)&&\(b=(?P[a-zA-Z0-9$]{3})\([a-zA-Z0-9]\)',), + jscode, 'Initial JS player n function name', group='nfunc') + + def _extract_n_function(self, video_id, player_url): + player_id = self._extract_player_info(player_url) + func_code = self._downloader.cache.load('youtube-nsig', player_id) + + if func_code: + jsi = JSInterpreter(func_code) + else: + jscode = self._load_player(video_id, player_url) + funcname = self._extract_n_function_name(jscode) + jsi = JSInterpreter(jscode) + func_code = jsi.extract_function_code(funcname) + self._downloader.cache.store('youtube-nsig', player_id, func_code) + + if self.get_param('youtube_print_sig_code'): + self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n') + + return lambda s: jsi.extract_function_from_code(*func_code)([s]) def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False): """ @@ -1856,9 +1904,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): raise ExtractorError(error_msg) self.report_warning(error_msg) return - if self._load_player(video_id, player_url, fatal=fatal): - player_id = self._extract_player_info(player_url) - code = self._code_cache[player_id] + code = self._load_player(video_id, player_url, fatal=fatal) + if code: sts = int_or_none(self._search_regex( r'(?:signatureTimestamp|sts)\s*:\s*(?P[0-9]{5})', code, 'JS player signature timestamp', group='sts', fatal=fatal)) @@ -2440,6 +2487,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor): sp = try_get(sc, lambda x: x['sp'][0]) or 'signature' fmt_url += '&' + sp + '=' + signature + query = parse_qs(fmt_url) + throttled = False + if query.get('ratebypass') != ['yes'] and query.get('n'): + try: + fmt_url = update_url_query(fmt_url, { + 'n': self._decrypt_nsig(query['n'][0], video_id, player_url)}) + except ExtractorError as e: + self.report_warning(f'nsig extraction failed: You may experience throttling for some formats\n{e}', only_once=True) + throttled = True + if itag: itags.append(itag) stream_ids.append(stream_id) @@ -2453,7 +2510,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'format_note': ', '.join(filter(None, ( '%s%s' % (audio_track.get('displayName') or '', ' (default)' if audio_track.get('audioIsDefault') else ''), - fmt.get('qualityLabel') or quality.replace('audio_quality_', '')))), + fmt.get('qualityLabel') or quality.replace('audio_quality_', ''), + throttled and 'THROTTLED'))), + 'source_preference': -10 if not throttled else -1, 'fps': int_or_none(fmt.get('fps')), 'height': height, 'quality': q(quality), @@ -2645,12 +2704,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if reason: self.raise_no_formats(reason, expected=True) - for f in formats: - if '&c=WEB&' in f['url'] and '&ratebypass=yes&' not in f['url']: # throttled - f['source_preference'] = -10 - # TODO: this method is not reliable - f['format_note'] = format_field(f, 'format_note', '%s ') + '(maybe throttled)' - # Source is given priority since formats that throttle are given lower source_preference # When throttling issue is fully fixed, remove this self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang')) diff --git a/yt_dlp/jsinterp.py b/yt_dlp/jsinterp.py index 7bda596102..5c79a8110d 100644 --- a/yt_dlp/jsinterp.py +++ b/yt_dlp/jsinterp.py @@ -1,5 +1,4 @@ -from __future__ import unicode_literals - +from collections.abc import MutableMapping import json import operator import re @@ -22,11 +21,54 @@ _OPERATORS = [ ('*', operator.mul), ] _ASSIGN_OPERATORS = [(op + '=', opfunc) for op, opfunc in _OPERATORS] -_ASSIGN_OPERATORS.append(('=', lambda cur, right: right)) +_ASSIGN_OPERATORS.append(('=', (lambda cur, right: right))) _NAME_RE = r'[a-zA-Z_$][a-zA-Z_$0-9]*' +class JS_Break(ExtractorError): + def __init__(self): + ExtractorError.__init__(self, 'Invalid break') + + +class JS_Continue(ExtractorError): + def __init__(self): + ExtractorError.__init__(self, 'Invalid continue') + + +class LocalNameSpace(MutableMapping): + def __init__(self, *stack): + self.stack = tuple(stack) + + def __getitem__(self, key): + for scope in self.stack: + if key in scope: + return scope[key] + raise KeyError(key) + + def __setitem__(self, key, value): + for scope in self.stack: + if key in scope: + scope[key] = value + break + else: + self.stack[0][key] = value + return value + + def __delitem__(self, key): + raise NotImplementedError('Deleting is not supported') + + def __iter__(self): + for scope in self.stack: + yield from scope + + def __len__(self, key): + return len(iter(self)) + + def __repr__(self): + return f'LocalNameSpace{self.stack}' + + class JSInterpreter(object): def __init__(self, code, objects=None): if objects is None: @@ -34,11 +76,58 @@ class JSInterpreter(object): self.code = code self._functions = {} self._objects = objects + self.__named_object_counter = 0 + + def _named_object(self, namespace, obj): + self.__named_object_counter += 1 + name = f'__yt_dlp_jsinterp_obj{self.__named_object_counter}' + namespace[name] = obj + return name + + @staticmethod + def _seperate(expr, delim=',', max_split=None): + if not expr: + return + parens = {'(': 0, '{': 0, '[': 0, ']': 0, '}': 0, ')': 0} + start, splits, pos, max_pos = 0, 0, 0, len(delim) - 1 + for idx, char in enumerate(expr): + if char in parens: + parens[char] += 1 + is_in_parens = (parens['['] - parens[']'] + or parens['('] - parens[')'] + or parens['{'] - parens['}']) + if char == delim[pos] and not is_in_parens: + if pos == max_pos: + pos = 0 + yield expr[start: idx - max_pos] + start = idx + 1 + splits += 1 + if max_split and splits >= max_split: + break + else: + pos += 1 + else: + pos = 0 + yield expr[start:] + + @staticmethod + def _seperate_at_paren(expr, delim): + seperated = list(JSInterpreter._seperate(expr, delim, 1)) + if len(seperated) < 2: + raise ExtractorError(f'No terminating paren {delim} in {expr}') + return seperated[0][1:].strip(), seperated[1].strip() def interpret_statement(self, stmt, local_vars, allow_recursion=100): if allow_recursion < 0: raise ExtractorError('Recursion limit reached') + sub_statements = list(self._seperate(stmt, ';')) + stmt = (sub_statements or ['']).pop() + for sub_stmt in sub_statements: + ret, should_abort = self.interpret_statement(sub_stmt, local_vars, allow_recursion - 1) + if should_abort: + return ret + should_abort = False stmt = stmt.lstrip() stmt_m = re.match(r'var\s', stmt) @@ -61,25 +150,118 @@ class JSInterpreter(object): if expr == '': # Empty expression return None - if expr.startswith('('): - parens_count = 0 - for m in re.finditer(r'[()]', expr): - if m.group(0) == '(': - parens_count += 1 - else: - parens_count -= 1 - if parens_count == 0: - sub_expr = expr[1:m.start()] - sub_result = self.interpret_expression( - sub_expr, local_vars, allow_recursion) - remaining_expr = expr[m.end():].strip() - if not remaining_expr: - return sub_result - else: - expr = json.dumps(sub_result) + remaining_expr - break + if expr.startswith('{'): + inner, outer = self._seperate_at_paren(expr, '}') + inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion - 1) + if not outer or should_abort: + return inner else: - raise ExtractorError('Premature end of parens in %r' % expr) + expr = json.dumps(inner) + outer + + if expr.startswith('('): + inner, outer = self._seperate_at_paren(expr, ')') + inner = self.interpret_expression(inner, local_vars, allow_recursion) + if not outer: + return inner + else: + expr = json.dumps(inner) + outer + + if expr.startswith('['): + inner, outer = self._seperate_at_paren(expr, ']') + name = self._named_object(local_vars, [ + self.interpret_expression(item, local_vars, allow_recursion) + for item in self._seperate(inner)]) + expr = name + outer + + m = re.match(r'try\s*', expr) + if m: + if expr[m.end()] == '{': + try_expr, expr = self._seperate_at_paren(expr[m.end():], '}') + else: + try_expr, expr = expr[m.end() - 1:], '' + ret, should_abort = self.interpret_statement(try_expr, local_vars, allow_recursion - 1) + if should_abort: + return ret + return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0] + + m = re.match(r'catch\s*\(', expr) + if m: + # We ignore the catch block + _, expr = self._seperate_at_paren(expr, '}') + return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0] + + m = re.match(r'for\s*\(', expr) + if m: + constructor, remaining = self._seperate_at_paren(expr[m.end() - 1:], ')') + if remaining.startswith('{'): + body, expr = self._seperate_at_paren(remaining, '}') + else: + m = re.match(r'switch\s*\(', remaining) # FIXME + if m: + switch_val, remaining = self._seperate_at_paren(remaining[m.end() - 1:], ')') + body, expr = self._seperate_at_paren(remaining, '}') + body = 'switch(%s){%s}' % (switch_val, body) + else: + body, expr = remaining, '' + start, cndn, increment = self._seperate(constructor, ';') + if self.interpret_statement(start, local_vars, allow_recursion - 1)[1]: + raise ExtractorError( + f'Premature return in the initialization of a for loop in {constructor!r}') + while True: + if not self.interpret_expression(cndn, local_vars, allow_recursion): + break + try: + ret, should_abort = self.interpret_statement(body, local_vars, allow_recursion - 1) + if should_abort: + return ret + except JS_Break: + break + except JS_Continue: + pass + if self.interpret_statement(increment, local_vars, allow_recursion - 1)[1]: + raise ExtractorError( + f'Premature return in the initialization of a for loop in {constructor!r}') + return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0] + + m = re.match(r'switch\s*\(', expr) + if m: + switch_val, remaining = self._seperate_at_paren(expr[m.end() - 1:], ')') + switch_val = self.interpret_expression(switch_val, local_vars, allow_recursion) + body, expr = self._seperate_at_paren(remaining, '}') + body, default = body.split('default:') if 'default:' in body else (body, None) + items = body.split('case ')[1:] + if default: + items.append(f'default:{default}') + matched = False + for item in items: + case, stmt = [i.strip() for i in self._seperate(item, ':', 1)] + matched = matched or case == 'default' or switch_val == self.interpret_expression(case, local_vars, allow_recursion) + if matched: + try: + ret, should_abort = self.interpret_statement(stmt, local_vars, allow_recursion - 1) + if should_abort: + return ret + except JS_Break: + break + return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0] + + # Comma seperated statements + sub_expressions = list(self._seperate(expr)) + expr = sub_expressions.pop().strip() if sub_expressions else '' + for sub_expr in sub_expressions: + self.interpret_expression(sub_expr, local_vars, allow_recursion) + + for m in re.finditer(rf'''(?x) + (?P\+\+|--)(?P{_NAME_RE})| + (?P{_NAME_RE})(?P\+\+|--)''', expr): + var = m.group('var1') or m.group('var2') + start, end = m.span() + sign = m.group('pre_sign') or m.group('post_sign') + ret = local_vars[var] + local_vars[var] += 1 if sign[0] == '+' else -1 + if m.group('pre_sign'): + ret = local_vars[var] + expr = expr[:start] + json.dumps(ret) + expr[end:] for op, opfunc in _ASSIGN_OPERATORS: m = re.match(r'''(?x) @@ -88,14 +270,13 @@ class JSInterpreter(object): (?P.*)$''' % (_NAME_RE, re.escape(op)), expr) if not m: continue - right_val = self.interpret_expression( - m.group('expr'), local_vars, allow_recursion - 1) + right_val = self.interpret_expression(m.group('expr'), local_vars, allow_recursion) if m.groupdict().get('index'): lvar = local_vars[m.group('out')] - idx = self.interpret_expression( - m.group('index'), local_vars, allow_recursion) - assert isinstance(idx, int) + idx = self.interpret_expression(m.group('index'), local_vars, allow_recursion) + if not isinstance(idx, int): + raise ExtractorError(f'List indices must be integers: {idx}') cur = lvar[idx] val = opfunc(cur, right_val) lvar[idx] = val @@ -109,8 +290,13 @@ class JSInterpreter(object): if expr.isdigit(): return int(expr) + if expr == 'break': + raise JS_Break() + elif expr == 'continue': + raise JS_Continue() + var_m = re.match( - r'(?!if|return|true|false)(?P%s)$' % _NAME_RE, + r'(?!if|return|true|false|null)(?P%s)$' % _NAME_RE, expr) if var_m: return local_vars[var_m.group('name')] @@ -124,91 +310,154 @@ class JSInterpreter(object): r'(?P%s)\[(?P.+)\]$' % _NAME_RE, expr) if m: val = local_vars[m.group('in')] - idx = self.interpret_expression( - m.group('idx'), local_vars, allow_recursion - 1) + idx = self.interpret_expression(m.group('idx'), local_vars, allow_recursion) return val[idx] + for op, opfunc in _OPERATORS: + seperated = list(self._seperate(expr, op)) + if len(seperated) < 2: + continue + right_val = seperated.pop() + left_val = op.join(seperated) + left_val, should_abort = self.interpret_statement( + left_val, local_vars, allow_recursion - 1) + if should_abort: + raise ExtractorError(f'Premature left-side return of {op} in {expr!r}') + right_val, should_abort = self.interpret_statement( + right_val, local_vars, allow_recursion - 1) + if should_abort: + raise ExtractorError(f'Premature right-side return of {op} in {expr!r}') + return opfunc(left_val or 0, right_val) + m = re.match( - r'(?P%s)(?:\.(?P[^(]+)|\[(?P[^]]+)\])\s*(?:\(+(?P[^()]*)\))?$' % _NAME_RE, + r'(?P%s)(?:\.(?P[^(]+)|\[(?P[^]]+)\])\s*' % _NAME_RE, expr) if m: variable = m.group('var') member = remove_quotes(m.group('member') or m.group('member2')) - arg_str = m.group('args') - - if variable in local_vars: - obj = local_vars[variable] + arg_str = expr[m.end():] + if arg_str.startswith('('): + arg_str, remaining = self._seperate_at_paren(arg_str, ')') else: - if variable not in self._objects: - self._objects[variable] = self.extract_object(variable) - obj = self._objects[variable] + arg_str, remaining = None, arg_str - if arg_str is None: - # Member access - if member == 'length': - return len(obj) - return obj[member] + def assertion(cndn, msg): + """ assert, but without risk of getting optimized out """ + if not cndn: + raise ExtractorError(f'{member} {msg}: {expr}') - assert expr.endswith(')') - # Function call - if arg_str == '': - argvals = tuple() - else: - argvals = tuple([ + def eval_method(): + nonlocal member + if variable == 'String': + obj = str + elif variable in local_vars: + obj = local_vars[variable] + else: + if variable not in self._objects: + self._objects[variable] = self.extract_object(variable) + obj = self._objects[variable] + + if arg_str is None: + # Member access + if member == 'length': + return len(obj) + return obj[member] + + # Function call + argvals = [ self.interpret_expression(v, local_vars, allow_recursion) - for v in arg_str.split(',')]) + for v in self._seperate(arg_str)] - if member == 'split': - assert argvals == ('',) - return list(obj) - if member == 'join': - assert len(argvals) == 1 - return argvals[0].join(obj) - if member == 'reverse': - assert len(argvals) == 0 - obj.reverse() - return obj - if member == 'slice': - assert len(argvals) == 1 - return obj[argvals[0]:] - if member == 'splice': - assert isinstance(obj, list) - index, howMany = argvals - res = [] - for i in range(index, min(index + howMany, len(obj))): - res.append(obj.pop(index)) - return res + if obj == str: + if member == 'fromCharCode': + assertion(argvals, 'takes one or more arguments') + return ''.join(map(chr, argvals)) + raise ExtractorError(f'Unsupported string method {member}') - return obj[member](argvals) + if member == 'split': + assertion(argvals, 'takes one or more arguments') + assertion(argvals == [''], 'with arguments is not implemented') + return list(obj) + elif member == 'join': + assertion(isinstance(obj, list), 'must be applied on a list') + assertion(len(argvals) == 1, 'takes exactly one argument') + return argvals[0].join(obj) + elif member == 'reverse': + assertion(not argvals, 'does not take any arguments') + obj.reverse() + return obj + elif member == 'slice': + assertion(isinstance(obj, list), 'must be applied on a list') + assertion(len(argvals) == 1, 'takes exactly one argument') + return obj[argvals[0]:] + elif member == 'splice': + assertion(isinstance(obj, list), 'must be applied on a list') + assertion(argvals, 'takes one or more arguments') + index, howMany = (argvals + [len(obj)])[:2] + if index < 0: + index += len(obj) + add_items = argvals[2:] + res = [] + for i in range(index, min(index + howMany, len(obj))): + res.append(obj.pop(index)) + for i, item in enumerate(add_items): + obj.insert(index + i, item) + return res + elif member == 'unshift': + assertion(isinstance(obj, list), 'must be applied on a list') + assertion(argvals, 'takes one or more arguments') + for item in reversed(argvals): + obj.insert(0, item) + return obj + elif member == 'pop': + assertion(isinstance(obj, list), 'must be applied on a list') + assertion(not argvals, 'does not take any arguments') + if not obj: + return + return obj.pop() + elif member == 'push': + assertion(argvals, 'takes one or more arguments') + obj.extend(argvals) + return obj + elif member == 'forEach': + assertion(argvals, 'takes one or more arguments') + assertion(len(argvals) <= 2, 'takes at-most 2 arguments') + f, this = (argvals + [''])[:2] + return [f((item, idx, obj), this=this) for idx, item in enumerate(obj)] + elif member == 'indexOf': + assertion(argvals, 'takes one or more arguments') + assertion(len(argvals) <= 2, 'takes at-most 2 arguments') + idx, start = (argvals + [0])[:2] + try: + return obj.index(idx, start) + except ValueError: + return -1 - for op, opfunc in _OPERATORS: - m = re.match(r'(?P.+?)%s(?P.+)' % re.escape(op), expr) - if not m: - continue - x, abort = self.interpret_statement( - m.group('x'), local_vars, allow_recursion - 1) - if abort: - raise ExtractorError( - 'Premature left-side return of %s in %r' % (op, expr)) - y, abort = self.interpret_statement( - m.group('y'), local_vars, allow_recursion - 1) - if abort: - raise ExtractorError( - 'Premature right-side return of %s in %r' % (op, expr)) - return opfunc(x, y) + if isinstance(obj, list): + member = int(member) + return obj[member](argvals) - m = re.match( - r'^(?P%s)\((?P[a-zA-Z0-9_$,]*)\)$' % _NAME_RE, expr) + if remaining: + return self.interpret_expression( + self._named_object(local_vars, eval_method()) + remaining, + local_vars, allow_recursion) + else: + return eval_method() + + m = re.match(r'^(?P%s)\((?P[a-zA-Z0-9_$,]*)\)$' % _NAME_RE, expr) if m: fname = m.group('func') argvals = tuple([ int(v) if v.isdigit() else local_vars[v] - for v in m.group('args').split(',')]) if len(m.group('args')) > 0 else tuple() - if fname not in self._functions: + for v in self._seperate(m.group('args'))]) + if fname in local_vars: + return local_vars[fname](argvals) + elif fname not in self._functions: self._functions[fname] = self.extract_function(fname) return self._functions[fname](argvals) - raise ExtractorError('Unsupported JS expression %r' % expr) + if expr: + raise ExtractorError('Unsupported JS expression %r' % expr) def extract_object(self, objname): _FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')''' @@ -233,30 +482,55 @@ class JSInterpreter(object): return obj - def extract_function(self, funcname): + def extract_function_code(self, funcname): + """ @returns argnames, code """ func_m = re.search( r'''(?x) (?:function\s+%s|[{;,]\s*%s\s*=\s*function|var\s+%s\s*=\s*function)\s* \((?P[^)]*)\)\s* - \{(?P[^}]+)\}''' % ( + (?P\{(?:(?!};)[^"]|"([^"]|\\")*")+\})''' % ( re.escape(funcname), re.escape(funcname), re.escape(funcname)), self.code) + code, _ = self._seperate_at_paren(func_m.group('code'), '}') # refine the match if func_m is None: raise ExtractorError('Could not find JS function %r' % funcname) - argnames = func_m.group('args').split(',') + return func_m.group('args').split(','), code - return self.build_function(argnames, func_m.group('code')) + def extract_function(self, funcname): + return self.extract_function_from_code(*self.extract_function_code(funcname)) + + def extract_function_from_code(self, argnames, code, *global_stack): + local_vars = {} + while True: + mobj = re.search(r'function\((?P[^)]*)\)\s*{', code) + if mobj is None: + break + start, body_start = mobj.span() + body, remaining = self._seperate_at_paren(code[body_start - 1:], '}') + name = self._named_object( + local_vars, + self.extract_function_from_code( + [str.strip(x) for x in mobj.group('args').split(',')], + body, local_vars, *global_stack)) + code = code[:start] + name + remaining + return self.build_function(argnames, code, local_vars, *global_stack) def call_function(self, funcname, *args): - f = self.extract_function(funcname) - return f(args) + return self.extract_function(funcname)(args) - def build_function(self, argnames, code): - def resf(args): - local_vars = dict(zip(argnames, args)) - for stmt in code.split(';'): - res, abort = self.interpret_statement(stmt, local_vars) - if abort: + def build_function(self, argnames, code, *global_stack): + global_stack = list(global_stack) or [{}] + local_vars = global_stack.pop(0) + + def resf(args, **kwargs): + local_vars.update({ + **dict(zip(argnames, args)), + **kwargs + }) + var_stack = LocalNameSpace(local_vars, *global_stack) + for stmt in self._seperate(code.replace('\n', ''), ';'): + ret, should_abort = self.interpret_statement(stmt, var_stack) + if should_abort: break - return res + return ret return resf From 92592bd30588ae3797d7085a58c6189b774e3ae5 Mon Sep 17 00:00:00 2001 From: Marcel Date: Sun, 31 Oct 2021 05:49:03 +0100 Subject: [PATCH 348/641] [ceskatelevize] Fix extractor (#1489) Authored by: flashdagger --- yt_dlp/extractor/ceskatelevize.py | 122 ++++++++++++------------------ yt_dlp/extractor/extractors.py | 5 +- 2 files changed, 51 insertions(+), 76 deletions(-) diff --git a/yt_dlp/extractor/ceskatelevize.py b/yt_dlp/extractor/ceskatelevize.py index 5e04d38a25..f766dfbb7e 100644 --- a/yt_dlp/extractor/ceskatelevize.py +++ b/yt_dlp/extractor/ceskatelevize.py @@ -20,22 +20,8 @@ from ..utils import ( class CeskaTelevizeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/ivysilani/(?:[^/?#&]+/)*(?P[^/#?]+)' + _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady)/(?:[^/?#&]+/)*(?P[^/#?]+)' _TESTS = [{ - 'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220', - 'info_dict': { - 'id': '61924494877246241', - 'ext': 'mp4', - 'title': 'Hyde Park Civilizace: Život v Grónsku', - 'description': 'md5:3fec8f6bb497be5cdb0c9e8781076626', - 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 3350, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { 'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en', 'info_dict': { 'id': '61924494877028507', @@ -66,12 +52,58 @@ class CeskaTelevizeIE(InfoExtractor): }, { 'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25', 'only_matching': True, + }, { + # video with 18+ caution trailer + 'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/', + 'info_dict': { + 'id': '215562210900007-bogotart', + 'title': 'Queer: Bogotart', + 'description': 'Hlavní město Kolumbie v doprovodu queer umělců. Vroucí svět plný vášně, sebevědomí, ale i násilí a bolesti. Připravil Peter Serge Butko', + }, + 'playlist': [{ + 'info_dict': { + 'id': '61924494877311053', + 'ext': 'mp4', + 'title': 'Queer: Bogotart (Varování 18+)', + 'duration': 11.9, + }, + }, { + 'info_dict': { + 'id': '61924494877068022', + 'ext': 'mp4', + 'title': 'Queer: Bogotart (Queer)', + 'thumbnail': r're:^https?://.*\.jpg', + 'duration': 1558.3, + }, + }], + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + # iframe embed + 'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/', + 'only_matching': True, }] def _real_extract(self, url): playlist_id = self._match_id(url) - + parsed_url = compat_urllib_parse_urlparse(url) webpage = self._download_webpage(url, playlist_id) + site_name = self._og_search_property('site_name', webpage, fatal=False, default=None) + playlist_title = self._og_search_title(webpage, default=None) + if site_name and playlist_title: + playlist_title = playlist_title.replace(f' — {site_name}', '', 1) + playlist_description = self._og_search_description(webpage, default=None) + if playlist_description: + playlist_description = playlist_description.replace('\xa0', ' ') + + if parsed_url.path.startswith('/porady/'): + refer_url = update_url_query(unescapeHTML(self._search_regex( + (r']*\bdata-url=(["\'])(?P(?:(?!\1).)+)\1', + r']+\bsrc=(["\'])(?P(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'), + webpage, 'iframe player url', group='url')), query={'autoStart': 'true'}) + webpage = self._download_webpage(refer_url, playlist_id) NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.' if '%s

' % NOT_AVAILABLE_STRING in webpage: @@ -100,7 +132,7 @@ class CeskaTelevizeIE(InfoExtractor): data = { 'playlist[0][type]': type_, 'playlist[0][id]': episode_id, - 'requestUrl': compat_urllib_parse_urlparse(url).path, + 'requestUrl': parsed_url.path, 'requestSource': 'iVysilani', } @@ -108,7 +140,7 @@ class CeskaTelevizeIE(InfoExtractor): for user_agent in (None, USER_AGENTS['Safari']): req = sanitized_Request( - 'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist', + 'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist/', data=urlencode_postdata(data)) req.add_header('Content-type', 'application/x-www-form-urlencoded') @@ -130,9 +162,6 @@ class CeskaTelevizeIE(InfoExtractor): req = sanitized_Request(compat_urllib_parse_unquote(playlist_url)) req.add_header('Referer', url) - playlist_title = self._og_search_title(webpage, default=None) - playlist_description = self._og_search_description(webpage, default=None) - playlist = self._download_json(req, playlist_id, fatal=False) if not playlist: continue @@ -237,54 +266,3 @@ class CeskaTelevizeIE(InfoExtractor): yield line return '\r\n'.join(_fix_subtitle(subtitles)) - - -class CeskaTelevizePoradyIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/porady/(?:[^/?#&]+/)*(?P[^/#?]+)' - _TESTS = [{ - # video with 18+ caution trailer - 'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/', - 'info_dict': { - 'id': '215562210900007-bogotart', - 'title': 'Queer: Bogotart', - 'description': 'Alternativní průvodce současným queer světem', - }, - 'playlist': [{ - 'info_dict': { - 'id': '61924494876844842', - 'ext': 'mp4', - 'title': 'Queer: Bogotart (Varování 18+)', - 'duration': 10.2, - }, - }, { - 'info_dict': { - 'id': '61924494877068022', - 'ext': 'mp4', - 'title': 'Queer: Bogotart (Queer)', - 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 1558.3, - }, - }], - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - # iframe embed - 'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - data_url = update_url_query(unescapeHTML(self._search_regex( - (r']*\bdata-url=(["\'])(?P(?:(?!\1).)+)\1', - r']+\bsrc=(["\'])(?P(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'), - webpage, 'iframe player url', group='url')), query={ - 'autoStart': 'true', - }) - - return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key()) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 9d963ee46e..78952d2688 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -235,10 +235,7 @@ from .ccc import ( from .ccma import CCMAIE from .cctv import CCTVIE from .cda import CDAIE -from .ceskatelevize import ( - CeskaTelevizeIE, - CeskaTelevizePoradyIE, -) +from .ceskatelevize import CeskaTelevizeIE from .cgtn import CGTNIE from .channel9 import Channel9IE from .charlierose import CharlieRoseIE From 8dcf65c92ec899a34cf57a02809520698f1d7b66 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Sun, 31 Oct 2021 05:08:04 +0000 Subject: [PATCH 349/641] [Instagram] Add login to playlist (#1488) Authored by: u-spec-png --- yt_dlp/extractor/instagram.py | 108 ++++++++++++++++++---------------- 1 file changed, 56 insertions(+), 52 deletions(-) diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py index 8c935c2514..6ed20d9c6d 100644 --- a/yt_dlp/extractor/instagram.py +++ b/yt_dlp/extractor/instagram.py @@ -1,3 +1,4 @@ +# coding: utf-8 from __future__ import unicode_literals import itertools @@ -25,9 +26,55 @@ from ..utils import ( ) -class InstagramIE(InfoExtractor): - _VALID_URL = r'(?Phttps?://(?:www\.)?instagram\.com/(?:p|tv|reel)/(?P[^/?#&]+))' +class InstagramBaseIE(InfoExtractor): _NETRC_MACHINE = 'instagram' + _IS_LOGGED_IN = False + + def _login(self): + username, password = self._get_login_info() + if username is None or self._IS_LOGGED_IN: + return + + login_webpage = self._download_webpage( + 'https://www.instagram.com/accounts/login/', None, + note='Downloading login webpage', errnote='Failed to download login webpage') + + shared_data = self._parse_json( + self._search_regex( + r'window\._sharedData\s*=\s*({.+?});', + login_webpage, 'shared data', default='{}'), + None) + + login = self._download_json('https://www.instagram.com/accounts/login/ajax/', None, note='Logging in', headers={ + 'Accept': '*/*', + 'X-IG-App-ID': '936619743392459', + 'X-ASBD-ID': '198387', + 'X-IG-WWW-Claim': '0', + 'X-Requested-With': 'XMLHttpRequest', + 'X-CSRFToken': shared_data['config']['csrf_token'], + 'X-Instagram-AJAX': shared_data['rollout_hash'], + 'Referer': 'https://www.instagram.com/', + }, data=urlencode_postdata({ + 'enc_password': f'#PWD_INSTAGRAM_BROWSER:0:{int(time.time())}:{password}', + 'username': username, + 'queryParams': '{}', + 'optIntoOneTap': 'false', + 'stopDeletionNonce': '', + 'trustedDeviceRecords': '{}', + })) + + if not login.get('authenticated'): + if login.get('message'): + raise ExtractorError(f'Unable to login: {login["message"]}') + raise ExtractorError('Unable to login') + InstagramBaseIE._IS_LOGGED_IN = True + + def _real_initialize(self): + self._login() + + +class InstagramIE(InstagramBaseIE): + _VALID_URL = r'(?Phttps?://(?:www\.)?instagram\.com/(?:p|tv|reel)/(?P[^/?#&]+))' _TESTS = [{ 'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc', 'md5': '0d2da106a9d2631273e192b372806516', @@ -143,47 +190,6 @@ class InstagramIE(InfoExtractor): if mobj: return mobj.group('link') - def _login(self): - username, password = self._get_login_info() - if username is None: - return - - login_webpage = self._download_webpage( - 'https://www.instagram.com/accounts/login/', None, - note='Downloading login webpage', errnote='Failed to download login webpage') - - shared_data = self._parse_json( - self._search_regex( - r'window\._sharedData\s*=\s*({.+?});', - login_webpage, 'shared data', default='{}'), - None) - - login = self._download_json('https://www.instagram.com/accounts/login/ajax/', None, note='Logging in', headers={ - 'Accept': '*/*', - 'X-IG-App-ID': '936619743392459', - 'X-ASBD-ID': '198387', - 'X-IG-WWW-Claim': '0', - 'X-Requested-With': 'XMLHttpRequest', - 'X-CSRFToken': shared_data['config']['csrf_token'], - 'X-Instagram-AJAX': shared_data['rollout_hash'], - 'Referer': 'https://www.instagram.com/', - }, data=urlencode_postdata({ - 'enc_password': f'#PWD_INSTAGRAM_BROWSER:0:{int(time.time())}:{password}', - 'username': username, - 'queryParams': '{}', - 'optIntoOneTap': 'false', - 'stopDeletionNonce': '', - 'trustedDeviceRecords': '{}', - })) - - if not login.get('authenticated'): - if login.get('message'): - raise ExtractorError(f'Unable to login: {login["message"]}') - raise ExtractorError('Unable to login') - - def _real_initialize(self): - self._login() - def _real_extract(self, url): mobj = self._match_valid_url(url) video_id = mobj.group('id') @@ -333,9 +339,7 @@ class InstagramIE(InfoExtractor): } -class InstagramPlaylistIE(InfoExtractor): - # A superclass for handling any kind of query based on GraphQL which - # results in a playlist. +class InstagramPlaylistBaseIE(InstagramBaseIE): _gis_tmpl = None # used to cache GIS request type @@ -462,11 +466,11 @@ class InstagramPlaylistIE(InfoExtractor): self._extract_graphql(data, url), user_or_tag, user_or_tag) -class InstagramUserIE(InstagramPlaylistIE): +class InstagramUserIE(InstagramPlaylistBaseIE): _VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P[^/]{2,})/?(?:$|[?#])' IE_DESC = 'Instagram user profile' IE_NAME = 'instagram:user' - _TEST = { + _TESTS = [{ 'url': 'https://instagram.com/porsche', 'info_dict': { 'id': 'porsche', @@ -478,7 +482,7 @@ class InstagramUserIE(InstagramPlaylistIE): 'skip_download': True, 'playlistend': 5, } - } + }] _QUERY_HASH = '42323d64886122307be10013ad2dcc44', @@ -496,11 +500,11 @@ class InstagramUserIE(InstagramPlaylistIE): } -class InstagramTagIE(InstagramPlaylistIE): +class InstagramTagIE(InstagramPlaylistBaseIE): _VALID_URL = r'https?://(?:www\.)?instagram\.com/explore/tags/(?P[^/]+)' IE_DESC = 'Instagram hashtag search' IE_NAME = 'instagram:tag' - _TEST = { + _TESTS = [{ 'url': 'https://instagram.com/explore/tags/lolcats', 'info_dict': { 'id': 'lolcats', @@ -512,7 +516,7 @@ class InstagramTagIE(InstagramPlaylistIE): 'skip_download': True, 'playlistend': 50, } - } + }] _QUERY_HASH = 'f92f56d47dc7a55b606908374b43a314', From 2f9e021299a451b576ce67c43135393157531991 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Sun, 31 Oct 2021 10:39:26 +0530 Subject: [PATCH 350/641] [PlanetMarathi] Add extractor (#1484) Authored by: Ashish0804 --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/planetmarathi.py | 76 +++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 yt_dlp/extractor/planetmarathi.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 78952d2688..5fc18f7a06 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1072,6 +1072,7 @@ from .pinterest import ( PinterestCollectionIE, ) from .pladform import PladformIE +from .planetmarathi import PlanetMarathiIE from .platzi import ( PlatziIE, PlatziCourseIE, diff --git a/yt_dlp/extractor/planetmarathi.py b/yt_dlp/extractor/planetmarathi.py new file mode 100644 index 0000000000..d1d9911f7d --- /dev/null +++ b/yt_dlp/extractor/planetmarathi.py @@ -0,0 +1,76 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + try_get, + unified_strdate, +) + + +class PlanetMarathiIE(InfoExtractor): + _VALID_URL = r'(?:https?://)(?:www\.)?planetmarathi\.com/titles/(?P[^/#&?$]+)' + _TESTS = [{ + 'url': 'https://www.planetmarathi.com/titles/ek-unad-divas', + 'playlist_mincount': 2, + 'info_dict': { + 'id': 'ek-unad-divas', + }, + 'playlist': [{ + 'info_dict': { + 'id': 'ASSETS-MOVIE-ASSET-01_ek-unad-divas', + 'ext': 'mp4', + 'title': 'ek unad divas', + 'alt_title': 'चित्रपट', + 'description': 'md5:41c7ed6b041c2fea9820a3f3125bd881', + 'season_number': None, + 'episode_number': 1, + 'duration': 5539, + 'upload_date': '20210829', + }, + }] # Trailer skipped + }, { + 'url': 'https://www.planetmarathi.com/titles/baap-beep-baap-season-1', + 'playlist_mincount': 10, + 'info_dict': { + 'id': 'baap-beep-baap-season-1', + }, + 'playlist': [{ + 'info_dict': { + 'id': 'ASSETS-CHARACTER-PROFILE-SEASON-01-ASSET-01_baap-beep-baap-season-1', + 'ext': 'mp4', + 'title': 'Manohar Kanhere', + 'alt_title': 'मनोहर कान्हेरे', + 'description': 'md5:285ed45d5c0ab5522cac9a043354ebc6', + 'season_number': 1, + 'episode_number': 1, + 'duration': 29, + 'upload_date': '20210829', + }, + }] # Trailers, Episodes, other Character profiles skipped + }] + + def _real_extract(self, url): + id = self._match_id(url) + entries = [] + json_data = self._download_json(f'https://www.planetmarathi.com/api/v1/titles/{id}/assets', id)['assets'] + for asset in json_data: + asset_title = asset['mediaAssetName']['en'] + if asset_title == 'Movie': + asset_title = id.replace('-', ' ') + asset_id = f'{asset["sk"]}_{id}'.replace('#', '-') + formats, subtitles = self._extract_m3u8_formats_and_subtitles(asset['mediaAssetURL'], asset_id) + self._sort_formats(formats) + entries.append({ + 'id': asset_id, + 'title': asset_title, + 'alt_title': try_get(asset, lambda x: x['mediaAssetName']['mr']), + 'description': try_get(asset, lambda x: x['mediaAssetDescription']['en']), + 'season_number': asset.get('mediaAssetSeason'), + 'episode_number': asset.get('mediaAssetIndexForAssetType'), + 'duration': asset.get('mediaAssetDurationInSeconds'), + 'upload_date': unified_strdate(asset.get('created')), + 'formats': formats, + 'subtitles': subtitles, + }) + return self.playlist_result(entries, playlist_id=id) From b2f25dc242616bd9eae6d5dbbe7ff56280e7d396 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Sun, 31 Oct 2021 05:10:42 +0000 Subject: [PATCH 351/641] [Olympics] Fix extractor (#1483) Authored by: u-spec-png --- yt_dlp/extractor/olympics.py | 73 ++++++++++++++++++++++-------------- 1 file changed, 44 insertions(+), 29 deletions(-) diff --git a/yt_dlp/extractor/olympics.py b/yt_dlp/extractor/olympics.py index 0bc9206ed5..bca1f19280 100644 --- a/yt_dlp/extractor/olympics.py +++ b/yt_dlp/extractor/olympics.py @@ -2,22 +2,27 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import unified_strdate +from ..utils import ( + int_or_none, + try_get +) class OlympicsReplayIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?olympics\.com/tokyo-2020/(?:[a-z]{2}/)?replay/(?P[^/#&?]+)' + _VALID_URL = r'https?://(?:www\.)?olympics\.com(?:/tokyo-2020)?/[a-z]{2}/(?:replay|video)/(?P[^/#&?]+)' _TESTS = [{ - 'url': 'https://olympics.com/tokyo-2020/en/replay/300622eb-abc0-43ea-b03b-c5f2d429ec7b/jumping-team-qualifier', + 'url': 'https://olympics.com/fr/video/men-s-109kg-group-a-weightlifting-tokyo-2020-replays', 'info_dict': { - 'id': '300622eb-abc0-43ea-b03b-c5f2d429ec7b', + 'id': 'f6a0753c-8e6f-4b7d-a435-027054a4f8e9', 'ext': 'mp4', - 'title': 'Jumping Team Qualifier', - 'release_date': '20210806', - 'upload_date': '20210713', + 'title': '+109kg (H) Groupe A - Haltérophilie | Replay de Tokyo 2020', + 'upload_date': '20210801', + 'timestamp': 1627783200, + 'description': 'md5:c66af4a5bc7429dbcc43d15845ff03b3', }, 'params': { - 'format': 'bv', + 'format': 'bestvideo', + 'skip_download': True, }, }, { 'url': 'https://olympics.com/tokyo-2020/en/replay/bd242924-4b22-49a5-a846-f1d4c809250d/mens-bronze-medal-match-hun-esp', @@ -26,31 +31,41 @@ class OlympicsReplayIE(InfoExtractor): def _real_extract(self, url): id = self._match_id(url) - # The parameters are hardcoded in the webpage, it's not necessary to download the webpage just for these parameters. - # If in downloading webpage serves other functions aswell, then extract these parameters from it. - token_url = 'https://appovptok.ovpobs.tv/api/identity/app/token?api_key=OTk5NDcxOjpvY3N3LWFwaXVzZXI%3D&api_secret=ODY4ODM2MjE3ODMwYmVjNTAxMWZlMDJiMTYxZmY0MjFiMjMwMjllMjJmNDA1YWRiYzA5ODcxYTZjZTljZDkxOTo6NTM2NWIzNjRlMTM1ZmI2YWNjNmYzMGMzOGM3NzZhZTY%3D' - token = self._download_webpage(token_url, id) - headers = {'x-obs-app-token': token} - data_json = self._download_json(f'https://appocswtok.ovpobs.tv/api/schedule-sessions/{id}?include=stream', - id, headers=headers) - meta_data = data_json['data']['attributes'] - for t_dict in data_json['included']: - if t_dict.get('type') == 'Stream': - stream_data = t_dict['attributes'] + + webpage = self._download_webpage(url, id) + title = self._html_search_meta(('title', 'og:title', 'twitter:title'), webpage) + uuid = self._html_search_meta('episode_uid', webpage) + m3u8_url = self._html_search_meta('video_url', webpage) + json_ld = self._search_json_ld(webpage, uuid) + thumbnails_list = json_ld.get('image') + if not thumbnails_list: + thumbnails_list = self._html_search_regex( + r'["\']image["\']:\s*["\']([^"\']+)["\']', webpage, 'images', default='') + thumbnails_list = thumbnails_list.replace('[', '').replace(']', '').split(',') + thumbnails_list = [thumbnail.strip() for thumbnail in thumbnails_list] + thumbnails = [] + for thumbnail in thumbnails_list: + width_a, height_a, width = self._search_regex( + r'/images/image/private/t_(?P\d+)-(?P\d+)_(?P\d+)/primary/[\W\w\d]+', + thumbnail, 'thumb', group=(1, 2, 3), default=(None, None, None)) + width_a, height_a, width = int_or_none(width_a), int_or_none(height_a), int_or_none(width) + thumbnails.append({ + 'url': thumbnail, + 'width': width, + 'height': int_or_none(try_get(width, lambda x: x * height_a / width_a)) + }) m3u8_url = self._download_json( - 'https://meteringtok.ovpobs.tv/api/playback-sessions', id, headers=headers, query={ - 'alias': stream_data['alias'], - 'stream': stream_data['stream'], - 'type': 'vod' - })['data']['attributes']['url'] - formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, id) + f'https://olympics.com/tokenGenerator?url={m3u8_url}', uuid, note='Downloading m3u8 url') + formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, uuid, m3u8_id='hls') self._sort_formats(formats) return { - 'id': id, - 'title': meta_data['title'], - 'release_date': unified_strdate(meta_data.get('start') or meta_data.get('broadcastPublished')), - 'upload_date': unified_strdate(meta_data.get('publishedAt')), + 'id': uuid, + 'title': title, + 'timestamp': json_ld.get('timestamp'), + 'description': json_ld.get('description'), + 'thumbnails': thumbnails, + 'duration': json_ld.get('duration'), 'formats': formats, 'subtitles': subtitles, } From 5b6cb5620797e745a113cfb8118ea7def1484784 Mon Sep 17 00:00:00 2001 From: kaz-us <32769754+kaz-us@users.noreply.github.com> Date: Sun, 31 Oct 2021 09:13:49 +0400 Subject: [PATCH 352/641] [vk] Add subtitles (#1480) Authored by: kaz-us --- yt_dlp/extractor/vk.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/yt_dlp/extractor/vk.py b/yt_dlp/extractor/vk.py index d8a9b9ab47..a8a980de69 100644 --- a/yt_dlp/extractor/vk.py +++ b/yt_dlp/extractor/vk.py @@ -471,6 +471,13 @@ class VKIE(VKBaseIE): }) self._sort_formats(formats) + subtitles = {} + for sub in data.get('subs') or {}: + subtitles.setdefault(sub.get('lang', 'en'), []).append({ + 'ext': sub.get('title', '.srt').split('.')[-1], + 'url': url_or_none(sub.get('url')), + }) + return { 'id': video_id, 'formats': formats, @@ -484,6 +491,7 @@ class VKIE(VKBaseIE): 'like_count': int_or_none(mv_data.get('likes')), 'comment_count': int_or_none(mv_data.get('commcount')), 'is_live': is_live, + 'subtitles': subtitles, } From da4832007574a60b397dff11f26cc20cace685de Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 31 Oct 2021 13:08:03 +0530 Subject: [PATCH 353/641] [linkedin] Don't login multiple times --- yt_dlp/extractor/linkedin.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/yt_dlp/extractor/linkedin.py b/yt_dlp/extractor/linkedin.py index 3ce906e2f1..c2d347efd2 100644 --- a/yt_dlp/extractor/linkedin.py +++ b/yt_dlp/extractor/linkedin.py @@ -19,6 +19,7 @@ from ..utils import ( class LinkedInLearningBaseIE(InfoExtractor): _NETRC_MACHINE = 'linkedin' _LOGIN_URL = 'https://www.linkedin.com/uas/login?trk=learning' + _logged_in = False def _call_api(self, course_slug, fields, video_slug=None, resolution=None): query = { @@ -34,6 +35,8 @@ class LinkedInLearningBaseIE(InfoExtractor): }) sub = ' %dp' % resolution api_url = 'https://www.linkedin.com/learning-api/detailedCourses' + if not self._get_cookies(api_url).get('JSESSIONID'): + self.raise_login_required() return self._download_json( api_url, video_slug, 'Downloading%s JSON metadata' % sub, headers={ 'Csrf-Token': self._get_cookies(api_url)['JSESSIONID'].value, @@ -50,6 +53,8 @@ class LinkedInLearningBaseIE(InfoExtractor): return self._get_urn_id(video_data) or '%s/%s' % (course_slug, video_slug) def _real_initialize(self): + if self._logged_in: + return email, password = self._get_login_info() if email is None: return @@ -72,6 +77,7 @@ class LinkedInLearningBaseIE(InfoExtractor): login_submit_page, 'error', default=None) if error: raise ExtractorError(error, expected=True) + LinkedInLearningBaseIE._logged_in = True class LinkedInLearningIE(LinkedInLearningBaseIE): From a0bb6ce58db5b3124962037ca12e78cbd348f56c Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 31 Oct 2021 13:26:44 +0530 Subject: [PATCH 354/641] [youtube] refactor itag processing --- yt_dlp/extractor/youtube.py | 56 +++++++++++++++---------------------- 1 file changed, 23 insertions(+), 33 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 56cd2ed8d3..64475edec0 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2434,7 +2434,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return prs, player_url def _extract_formats(self, streaming_data, video_id, player_url, is_live): - itags, stream_ids = [], [] + itags, stream_ids = {}, [] itag_qualities, res_qualities = {}, {} q = qualities([ # Normally tiny is the smallest video-only formats. But @@ -2498,7 +2498,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): throttled = True if itag: - itags.append(itag) + itags[itag] = 'https' stream_ids.append(stream_id) tbr = float_or_none( @@ -2548,46 +2548,36 @@ class YoutubeIE(YoutubeBaseInfoExtractor): and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)) get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True) - def guess_quality(f): - for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)): - if val in qdict: - return q(qdict[val]) - return -1 + def process_manifest_format(f, proto, itag): + if itag in itags: + if itags[itag] == proto or f'{itag}-{proto}' in itags: + return False + itag = f'{itag}-{proto}' + if itag: + f['format_id'] = itag + itags[itag] = proto + + f['quality'] = next(( + q(qdict[val]) + for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)) + if val in qdict), -1) + return True for sd in streaming_data: hls_manifest_url = get_hls and sd.get('hlsManifestUrl') if hls_manifest_url: for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False): - itag = self._search_regex( - r'/itag/(\d+)', f['url'], 'itag', default=None) - if itag in itags: - itag += '-hls' - if itag in itags: - continue - if itag: - f['format_id'] = itag - itags.append(itag) - f['quality'] = guess_quality(f) - yield f + if process_manifest_format(f, 'hls', self._search_regex( + r'/itag/(\d+)', f['url'], 'itag', default=None)): + yield f dash_manifest_url = get_dash and sd.get('dashManifestUrl') if dash_manifest_url: for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False): - itag = f['format_id'] - if itag in itags: - itag += '-dash' - if itag in itags: - continue - if itag: - f['format_id'] = itag - itags.append(itag) - f['quality'] = guess_quality(f) - filesize = int_or_none(self._search_regex( - r'/clen/(\d+)', f.get('fragment_base_url') - or f['url'], 'file size', default=None)) - if filesize: - f['filesize'] = filesize - yield f + if process_manifest_format(f, 'dash', f['format_id']): + f['filesize'] = int_or_none(self._search_regex( + r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None)) + yield f def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) From 0930b11fdaff2141ad951a8ed6d90417bfde7059 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 31 Oct 2021 14:45:59 +0530 Subject: [PATCH 355/641] [docs,cleanup] Improve docs and minor cleanup Closes #1387, #1404, #1408, #1485, #1415, #1450, #1492 --- .github/workflows/build.yml | 6 +- CONTRIBUTING.md | 2 +- README.md | 116 +++++++++++++++++++++------------- yt_dlp/YoutubeDL.py | 4 +- yt_dlp/__init__.py | 1 + yt_dlp/cookies.py | 4 +- yt_dlp/extractor/common.py | 4 +- yt_dlp/extractor/telemundo.py | 2 +- yt_dlp/extractor/tiktok.py | 8 +-- yt_dlp/options.py | 12 ++-- 10 files changed, 93 insertions(+), 66 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3329c141f0..0fff6cae36 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -115,12 +115,12 @@ jobs: release_name: yt-dlp ${{ steps.bump_version.outputs.ytdlp_version }} commitish: ${{ steps.push_update.outputs.head_sha }} body: | - ### Changelog: - ${{ env.changelog }} + #### [A description of the various files]((https://github.com/yt-dlp/yt-dlp#release-files)) are in the README --- - ### See [this](https://github.com/yt-dlp/yt-dlp#release-files) for a description of the release files + ### Changelog: + ${{ env.changelog }} draft: false prerelease: false - name: Upload yt-dlp Unix binary diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index fb539ec0da..2490004909 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -148,7 +148,7 @@ If you want to create a build of yt-dlp yourself, you can follow the instruction Before you start writing code for implementing a new feature, open an issue explaining your feature request and atleast one use case. This allows the maintainers to decide whether such a feature is desired for the project in the first place, and will provide an avenue to discuss some implementation details. If you open a pull request for a new feature without discussing with us first, do not be surprised when we ask for large changes to the code, or even reject it outright. -The same applies for overarching changes to the architecture, documentation or code style +The same applies for changes to the documentation, code style, or overarching changes to the architecture ## Adding support for a new site diff --git a/README.md b/README.md index e2fbbf2ae6..31bfca6a8c 100644 --- a/README.md +++ b/README.md @@ -61,7 +61,6 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t * [Opening an Issue](CONTRIBUTING.md#opening-an-issue) * [Developer Instructions](CONTRIBUTING.md#developer-instructions) * [MORE](#more) - # NEW FEATURES @@ -123,7 +122,7 @@ If you are coming from [youtube-dl](https://github.com/ytdl-org/youtube-dl), the ### Differences in default behavior -Some of yt-dlp's default options are different from that of youtube-dl and youtube-dlc. +Some of yt-dlp's default options are different from that of youtube-dl and youtube-dlc: * The options `--auto-number` (`-A`), `--title` (`-t`) and `--literal` (`-l`), no longer work. See [removed options](#Removed) for details * `avconv` is not supported as as an alternative to `ffmpeg` @@ -143,7 +142,7 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu * If `ffmpeg` is used as the downloader, the downloading and merging of formats happen in a single step when possible. Use `--compat-options no-direct-merge` to revert this * Thumbnail embedding in `mp4` is done with mutagen if possible. Use `--compat-options embed-thumbnail-atomicparsley` to force the use of AtomicParsley instead * Some private fields such as filenames are removed by default from the infojson. Use `--no-clean-infojson` or `--compat-options no-clean-infojson` to revert this -* When `--embed-subs` and `--write-subs` are used together, the subtitles are written to disk and also embedded in the media file. You can use just `--embed-subs` to embed the subs and automatically delete the seperate file. See [#630 (comment)](https://github.com/yt-dlp/yt-dlp/issues/630#issuecomment-893659460) for more info. `--compat-options no-keep-subs` can be used to revert this. +* When `--embed-subs` and `--write-subs` are used together, the subtitles are written to disk and also embedded in the media file. You can use just `--embed-subs` to embed the subs and automatically delete the seperate file. See [#630 (comment)](https://github.com/yt-dlp/yt-dlp/issues/630#issuecomment-893659460) for more info. `--compat-options no-keep-subs` can be used to revert this For ease of use, a few more compat options are available: * `--compat-options all`: Use all compat options @@ -152,17 +151,14 @@ For ease of use, a few more compat options are available: # INSTALLATION -yt-dlp is not platform specific. So it should work on your Unix box, on Windows or on macOS You can install yt-dlp using one of the following methods: -* Download [the binary](#release-files) from the [latest release](https://github.com/yt-dlp/yt-dlp/releases/latest) -* With Homebrew, `brew install yt-dlp/taps/yt-dlp` -* Use [PyPI package](https://pypi.org/project/yt-dlp): `python3 -m pip install --upgrade yt-dlp` -* Install master branch: `python3 -m pip3 install -U https://github.com/yt-dlp/yt-dlp/archive/master.zip` -Note that on some systems, you may need to use `py` or `python` instead of `python3` +#### Using the release binary -UNIX users (Linux, macOS, BSD) can also install the [latest release](https://github.com/yt-dlp/yt-dlp/releases/latest) one of the following ways: +You can simply download the [correct binary file](#release-files) for your OS: **[[Windows](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)] [[UNIX-like](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)]** + +In UNIX-like OSes (MacOS, Linux, BSD), you can also install the same in one of the following ways: ``` sudo curl -L https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp -o /usr/local/bin/yt-dlp @@ -179,16 +175,41 @@ sudo aria2c https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp -o sudo chmod a+rx /usr/local/bin/yt-dlp ``` -macOS or Linux users that are using Homebrew (formerly known as Linuxbrew for Linux users) can also install it by: +PS: The manpages, shell completion files etc. are available in [yt-dlp.tar.gz](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz) + +#### With [PIP](https://pypi.org/project/pip) + +You can install the [PyPI package](https://pypi.org/project/yt-dlp) with: +``` +python3 -m pip install -U yt-dlp +``` + +On some systems (like Termux), it is not possible to install pycryptodomex. In that case, install without dependancies: +``` +python3 -m pip install --no-deps -U yt-dlp +``` + +You can also install the master branch with: +``` +python3 -m pip3 install -U https://github.com/yt-dlp/yt-dlp/archive/master.zip +``` + +Note that on some systems, you may need to use `py` or `python` instead of `python3` + +#### With [Homebrew](https://brew.sh) + +macOS or Linux users that are using Homebrew can also install it by: ``` brew install yt-dlp/taps/yt-dlp ``` ### UPDATE -You can use `yt-dlp -U` to update if you are using the provided release. -If you are using `pip`, simply re-run the same command that was used to install the program. -If you have installed using Homebrew, run `brew upgrade yt-dlp/taps/yt-dlp` +You can use `yt-dlp -U` to update if you are [using the provided release](#using-the-release-binary) + +If you [installed with pip](#with-pip), simply re-run the same command that was used to install the program + +If you [installed using Homebrew](#with-homebrew), run `brew upgrade yt-dlp/taps/yt-dlp` ### RELEASE FILES @@ -196,18 +217,18 @@ If you have installed using Homebrew, run `brew upgrade yt-dlp/taps/yt-dlp` File|Description :---|:--- -[yt-dlp](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)|Platform independant binary. Needs Python (Recommended for **UNIX-like systems**) -[yt-dlp.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)|Windows (Win7 SP1+) standalone x64 binary (Recommended for **Windows**) +[yt-dlp](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)|Platform-independant binary. Needs Python (recommended for **UNIX-like systems**) +[yt-dlp.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)|Windows (Win7 SP1+) standalone x64 binary (recommended for **Windows**) #### Alternatives File|Description :---|:--- [yt-dlp_macos](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos)|MacOS (10.15+) standalone executable -[yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows (Vista SP2+) standalone x86 (32bit) binary +[yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows (Vista SP2+) standalone x86 (32-bit) binary [yt-dlp_min.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_min.exe)|Windows (Win7 SP1+) standalone x64 binary built with `py2exe`.
Does not contain `pycryptodomex`, needs VC++14 -[yt-dlp_win.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_win.zip)|Unpackaged windows executable (No auto-update) -[yt-dlp_macos.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos.zip)|Unpackaged MacOS (10.15+) executable (No auto-update) +[yt-dlp_win.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_win.zip)|Unpackaged Windows executable (no auto-update) +[yt-dlp_macos.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos.zip)|Unpackaged MacOS (10.15+) executable (no auto-update) #### Misc @@ -227,20 +248,20 @@ On windows, [Microsoft Visual C++ 2010 SP1 Redistributable Package (x86)](https: While all the other dependancies are optional, `ffmpeg` and `ffprobe` are highly recommended * [**ffmpeg** and **ffprobe**](https://www.ffmpeg.org) - Required for [merging seperate video and audio files](#format-selection) as well as for various [post-processing](#post-processing-options) tasks. Licence [depends on the build](https://www.ffmpeg.org/legal.html) -* [**mutagen**](https://github.com/quodlibet/mutagen) - For embedding thumbnail in certain formats. Licenced under [GPLv2+](https://github.com/quodlibet/mutagen/blob/master/COPYING) -* [**pycryptodomex**](https://github.com/Legrandin/pycryptodome) - For decrypting AES-128 HLS streams and various other data. Licenced under [BSD2](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst) -* [**websockets**](https://github.com/aaugustin/websockets) - For downloading over websocket. Licenced under [BSD3](https://github.com/aaugustin/websockets/blob/main/LICENSE) -* [**keyring**](https://github.com/jaraco/keyring) - For decrypting cookies of chromium-based browsers on Linux. Licenced under [MIT](https://github.com/jaraco/keyring/blob/main/LICENSE) -* [**AtomicParsley**](https://github.com/wez/atomicparsley) - For embedding thumbnail in mp4/m4a if mutagen is not present. Licenced under [GPLv2+](https://github.com/wez/atomicparsley/blob/master/COPYING) -* [**rtmpdump**](http://rtmpdump.mplayerhq.hu) - For downloading `rtmp` streams. ffmpeg will be used as a fallback. Licenced under [GPLv2+](http://rtmpdump.mplayerhq.hu) -* [**mplayer**](http://mplayerhq.hu/design7/info.html) or [**mpv**](https://mpv.io) - For downloading `rstp` streams. ffmpeg will be used as a fallback. Licenced under [GPLv2+](https://github.com/mpv-player/mpv/blob/master/Copyright) -* [**phantomjs**](https://github.com/ariya/phantomjs) - Used in extractors where javascript needs to be run. Licenced under [BSD3](https://github.com/ariya/phantomjs/blob/master/LICENSE.BSD) -* [**sponskrub**](https://github.com/faissaloo/SponSkrub) - For using the now **deprecated** [sponskrub options](#sponskrub-options). Licenced under [GPLv3+](https://github.com/faissaloo/SponSkrub/blob/master/LICENCE.md) +* [**mutagen**](https://github.com/quodlibet/mutagen) - For embedding thumbnail in certain formats. Licensed under [GPLv2+](https://github.com/quodlibet/mutagen/blob/master/COPYING) +* [**pycryptodomex**](https://github.com/Legrandin/pycryptodome) - For decrypting AES-128 HLS streams and various other data. Licensed under [BSD2](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst) +* [**websockets**](https://github.com/aaugustin/websockets) - For downloading over websocket. Licensed under [BSD3](https://github.com/aaugustin/websockets/blob/main/LICENSE) +* [**keyring**](https://github.com/jaraco/keyring) - For decrypting cookies of chromium-based browsers on Linux. Licensed under [MIT](https://github.com/jaraco/keyring/blob/main/LICENSE) +* [**AtomicParsley**](https://github.com/wez/atomicparsley) - For embedding thumbnail in mp4/m4a if mutagen is not present. Licensed under [GPLv2+](https://github.com/wez/atomicparsley/blob/master/COPYING) +* [**rtmpdump**](http://rtmpdump.mplayerhq.hu) - For downloading `rtmp` streams. ffmpeg will be used as a fallback. Licensed under [GPLv2+](http://rtmpdump.mplayerhq.hu) +* [**mplayer**](http://mplayerhq.hu/design7/info.html) or [**mpv**](https://mpv.io) - For downloading `rstp` streams. ffmpeg will be used as a fallback. Licensed under [GPLv2+](https://github.com/mpv-player/mpv/blob/master/Copyright) +* [**phantomjs**](https://github.com/ariya/phantomjs) - Used in extractors where javascript needs to be run. Licensed under [BSD3](https://github.com/ariya/phantomjs/blob/master/LICENSE.BSD) +* [**sponskrub**](https://github.com/faissaloo/SponSkrub) - For using the now **deprecated** [sponskrub options](#sponskrub-options). Licensed under [GPLv3+](https://github.com/faissaloo/SponSkrub/blob/master/LICENCE.md) * Any external downloader that you want to use with `--downloader` To use or redistribute the dependencies, you must agree to their respective licensing terms. -The windows releases are already built with the python interpreter, mutagen, pycryptodomex and websockets included. +The Windows and MacOS standalone release binaries are already built with the python interpreter, mutagen, pycryptodomex and websockets included. **Note**: There are some regressions in newer ffmpeg versions that causes various issues when used alongside yt-dlp. Since ffmpeg is such an important dependancy, we provide [custom builds](https://github.com/yt-dlp/FFmpeg-Builds/wiki/Latest#latest-autobuilds) with patches for these issues at [yt-dlp/FFmpeg-Builds](https://github.com/yt-dlp/FFmpeg-Builds). See [the readme](https://github.com/yt-dlp/FFmpeg-Builds#patches-applied) for details on the specifc issues solved by these builds @@ -276,7 +297,7 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t sure that you have sufficient permissions (run with sudo if needed) -i, --ignore-errors Ignore download and postprocessing errors. - The download will be considered successfull + The download will be considered successful even if the postprocessing fails --no-abort-on-error Continue with next video on download errors; e.g. to skip unavailable videos in @@ -366,7 +387,7 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t SIZE (e.g. 50k or 44.6m) --max-filesize SIZE Do not download any videos larger than SIZE (e.g. 50k or 44.6m) - --date DATE Download only videos uploaded in this date. + --date DATE Download only videos uploaded on this date. The date can be "YYYYMMDD" or in the format "(now|today)[+-][0-9](day|week|month|year)(s)?" --datebefore DATE Download only videos uploaded on or before @@ -510,9 +531,9 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t filenames --no-restrict-filenames Allow Unicode characters, "&" and spaces in filenames (default) - --windows-filenames Force filenames to be windows compatible - --no-windows-filenames Make filenames windows compatible only if - using windows (default) + --windows-filenames Force filenames to be Windows-compatible + --no-windows-filenames Make filenames Windows-compatible only if + using Windows (default) --trim-filenames LENGTH Limit the filename length (excluding extension) to the specified number of characters @@ -608,9 +629,9 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t anything to disk --no-simulate Download the video even if printing/listing options are used - --ignore-no-formats-error Ignore "No video formats" error. Usefull - for extracting metadata even if the videos - are not actually available for download + --ignore-no-formats-error Ignore "No video formats" error. Useful for + extracting metadata even if the videos are + not actually available for download (experimental) --no-ignore-no-formats-error Throw error when no downloadable video formats are found (default) @@ -644,7 +665,7 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t "postprocess:", or "postprocess-title:". The video's fields are accessible under the "info" key and the progress attributes are - accessible under "progress" key. Eg: + accessible under "progress" key. E.g.: --console-title --progress-template "download-title:%(info.id)s-%(progress.eta)s" -v, --verbose Print various debugging information @@ -657,7 +678,7 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t ## Workarounds: --encoding ENCODING Force the specified encoding (experimental) - --no-check-certificate Suppress HTTPS certificate validation + --no-check-certificates Suppress HTTPS certificate validation --prefer-insecure Use an unencrypted connection to retrieve information about the video (Currently supported only for YouTube) @@ -706,10 +727,12 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t containers irrespective of quality --no-prefer-free-formats Don't give any special preference to free containers (default) - --check-formats Check that the formats selected are + --check-formats Check that the selected formats are actually downloadable - --no-check-formats Do not check that the formats selected are + --check-all-formats Check all formats for whether they are actually downloadable + --no-check-formats Do not check that the formats are actually + downloadable -F, --list-formats List available formats of each video. Simulate unless --no-simulate is used --merge-output-format FORMAT If a merge is required (e.g. @@ -1018,7 +1041,7 @@ The `-o` option is used to indicate a template for the output file names while ` The simplest usage of `-o` is not to set any template arguments when downloading a single file, like in `yt-dlp -o funny_video.flv "https://some/video"` (hard-coding file extension like this is _not_ recommended and could break some post-processing). -It may however also contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. +It may however also contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [Python string formatting operations](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. The field names themselves (the part inside the parenthesis) can also have some special formatting: 1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) separator. You can also do python slicing using `:`. Eg: `%(tags.0)s`, `%(subtitles.en.-1.ext)s`, `%(id.3:7:-1)s`, `%(formats.:.format_id)s`. `%()s` refers to the entire infodict. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields @@ -1159,7 +1182,7 @@ Each aforementioned sequence when referenced in an output template will be repla Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default). -**Tip**: Look at the `-j` output to identify which fields are available for the purticular URL +**Tip**: Look at the `-j` output to identify which fields are available for the particular URL For numeric sequences you can use [numeric related formatting](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting), for example, `%(view_count)05d` will result in a string with view count padded with zeros up to 5 characters, like in `00042`. @@ -1303,7 +1326,7 @@ The available fields are: - `vext`: Video Extension (`mp4` > `webm` > `flv` > other > unknown). If `--prefer-free-formats` is used, `webm` is prefered. - `aext`: Audio Extension (`m4a` > `aac` > `mp3` > `ogg` > `opus` > `webm` > other > unknown). If `--prefer-free-formats` is used, the order changes to `opus` > `ogg` > `webm` > `m4a` > `mp3` > `aac`. - `ext`: Equivalent to `vext,aext` - - `filesize`: Exact filesize, if know in advance. This will be unavailable for mu38 and DASH formats. + - `filesize`: Exact filesize, if known in advance - `fs_approx`: Approximate filesize calculated from the manifests - `size`: Exact filesize if available, otherwise approximate filesize - `height`: Height of video @@ -1506,6 +1529,9 @@ $ yt-dlp --parse-metadata '%(series)s S%(season_number)02dE%(episode_number)02d: # Set "comment" field in video metadata using description instead of webpage_url $ yt-dlp --parse-metadata 'description:(?s)(?P.+)' --add-metadata +# Remove "formats" field from the infojson by setting it to an empty string +$ yt-dlp --parse-metadata ':(?P)' -j + # Replace all spaces and "_" in title and uploader with a `-` $ yt-dlp --replace-in-metadata 'title,uploader' '[ _]' '-' @@ -1513,7 +1539,7 @@ $ yt-dlp --replace-in-metadata 'title,uploader' '[ _]' '-' # EXTRACTOR ARGUMENTS -Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) seperated string of `ARG=VAL1,VAL2`. Eg: `--extractor-args "youtube:player_client=android_agegate,web;include_live_dash" --extractor-args "funimation:version=uncut"` +Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. Eg: `--extractor-args "youtube:player_client=android_agegate,web;include_live_dash" --extractor-args "funimation:version=uncut"` The following extractors use this feature: diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 2c2b17b200..4a9f4775bf 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -214,8 +214,8 @@ class YoutubeDL(object): ignore_no_formats_error: Ignore "No video formats" error. Usefull for extracting metadata even if the video is not actually available for download (experimental) - format_sort: How to sort the video formats. see "Sorting Formats" - for more details. + format_sort: A list of fields by which to sort the video formats. + See "Sorting Formats" for more details. format_sort_force: Force the given format_sort. see "Sorting Formats" for more details. allow_multiple_video_streams: Allow multiple video streams to be merged diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 5c3d33df06..84628bf455 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -225,6 +225,7 @@ def _real_main(argv=None): if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart: raise ValueError('Playlist end must be greater than playlist start') if opts.extractaudio: + opts.audioformat = opts.audioformat.lower() if opts.audioformat not in ['best'] + list(FFmpegExtractAudioPP.SUPPORTED_EXTS): parser.error('invalid audio format specified') if opts.audioquality: diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index c9ae9b6dbe..ec68a809d0 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -117,7 +117,7 @@ def _extract_firefox_cookies(profile, logger): raise FileNotFoundError('could not find firefox cookies database in {}'.format(search_root)) logger.debug('Extracting cookies from: "{}"'.format(cookie_database_path)) - with tempfile.TemporaryDirectory(prefix='youtube_dl') as tmpdir: + with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir: cursor = None try: cursor = _open_database_copy(cookie_database_path, tmpdir) @@ -236,7 +236,7 @@ def _extract_chrome_cookies(browser_name, profile, logger): decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger) - with tempfile.TemporaryDirectory(prefix='youtube_dl') as tmpdir: + with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir: cursor = None try: cursor = _open_database_copy(cookie_database_path, tmpdir) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index aa98c0cc9f..2bbe236997 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -441,11 +441,11 @@ class InfoExtractor(object): _WORKING = True _LOGIN_HINTS = { - 'any': 'Use --cookies, --username and --password or --netrc to provide account credentials', + 'any': 'Use --cookies, --username and --password, or --netrc to provide account credentials', 'cookies': ( 'Use --cookies-from-browser or --cookies for the authentication. ' 'See https://github.com/ytdl-org/youtube-dl#how-do-i-pass-cookies-to-youtube-dl for how to manually pass cookies'), - 'password': 'Use --username and --password or --netrc to provide account credentials', + 'password': 'Use --username and --password, or --netrc to provide account credentials', } def __init__(self, downloader=None): diff --git a/yt_dlp/extractor/telemundo.py b/yt_dlp/extractor/telemundo.py index 18552a0efb..e326bbdd5b 100644 --- a/yt_dlp/extractor/telemundo.py +++ b/yt_dlp/extractor/telemundo.py @@ -1,4 +1,4 @@ -# coding=utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 1db6327e24..8599516377 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -38,8 +38,8 @@ class TikTokBaseIE(InfoExtractor): 'build_number': self._APP_VERSION, 'manifest_version_code': self._MANIFEST_APP_VERSION, 'update_version_code': self._MANIFEST_APP_VERSION, - 'openudid': ''.join(random.choice('0123456789abcdef') for i in range(16)), - 'uuid': ''.join([random.choice(string.digits) for num in range(16)]), + 'openudid': ''.join(random.choice('0123456789abcdef') for _ in range(16)), + 'uuid': ''.join([random.choice(string.digits) for _ in range(16)]), '_rticket': int(time.time() * 1000), 'ts': int(time.time()), 'device_brand': 'Google', @@ -66,7 +66,7 @@ class TikTokBaseIE(InfoExtractor): 'as': 'a1qwert123', 'cp': 'cbfhckdckkde1', } - self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choice('0123456789abcdef') for i in range(160))) + self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choice('0123456789abcdef') for _ in range(160))) return self._download_json( 'https://%s/aweme/v1/%s/' % (self._API_HOSTNAME, ep), video_id=video_id, fatal=fatal, note=note, errnote=errnote, headers={ @@ -416,7 +416,7 @@ class TikTokUserIE(TikTokBaseIE): 'max_cursor': 0, 'min_cursor': 0, 'retry_type': 'no_retry', - 'device_id': ''.join(random.choice(string.digits) for i in range(19)), # Some endpoints don't like randomized device_id, so it isn't directly set in _call_api. + 'device_id': ''.join(random.choice(string.digits) for _ in range(19)), # Some endpoints don't like randomized device_id, so it isn't directly set in _call_api. } max_retries = self.get_param('extractor_retries', 3) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 5499ab13e9..a3a6c74b3a 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -209,7 +209,7 @@ def parseOpts(overrideArguments=None): general.add_option( '-i', '--ignore-errors', action='store_true', dest='ignoreerrors', - help='Ignore download and postprocessing errors. The download will be considered successfull even if the postprocessing fails') + help='Ignore download and postprocessing errors. The download will be considered successful even if the postprocessing fails') general.add_option( '--no-abort-on-error', action='store_const', dest='ignoreerrors', const='only_download', @@ -383,7 +383,7 @@ def parseOpts(overrideArguments=None): '--date', metavar='DATE', dest='date', default=None, help=( - 'Download only videos uploaded in this date. ' + 'Download only videos uploaded on this date. ' 'The date can be "YYYYMMDD" or in the format ' '"(now|today)[+-][0-9](day|week|month|year)(s)?"')) selection.add_option( @@ -840,7 +840,7 @@ def parseOpts(overrideArguments=None): '--ignore-no-formats-error', action='store_true', dest='ignore_no_formats_error', default=False, help=( - 'Ignore "No video formats" error. Usefull for extracting metadata ' + 'Ignore "No video formats" error. Useful for extracting metadata ' 'even if the videos are not actually available for download (experimental)')) verbosity.add_option( '--no-ignore-no-formats-error', @@ -935,7 +935,7 @@ def parseOpts(overrideArguments=None): 'Template for progress outputs, optionally prefixed with one of "download:" (default), ' '"download-title:" (the console title), "postprocess:", or "postprocess-title:". ' 'The video\'s fields are accessible under the "info" key and ' - 'the progress attributes are accessible under "progress" key. Eg: ' + 'the progress attributes are accessible under "progress" key. E.g.: ' # TODO: Document the fields inside "progress" '--console-title --progress-template "download-title:%(info.id)s-%(progress.eta)s"')) verbosity.add_option( @@ -1028,11 +1028,11 @@ def parseOpts(overrideArguments=None): filesystem.add_option( '--windows-filenames', action='store_true', dest='windowsfilenames', default=False, - help='Force filenames to be windows compatible') + help='Force filenames to be Windows-compatible') filesystem.add_option( '--no-windows-filenames', action='store_false', dest='windowsfilenames', - help='Make filenames windows compatible only if using windows (default)') + help='Make filenames Windows-compatible only if using Windows (default)') filesystem.add_option( '--trim-filenames', '--trim-file-names', metavar='LENGTH', dest='trim_file_name', default=0, type=int, From f0ffaa1621fc40ba033aa3c98a14aa4c93533915 Mon Sep 17 00:00:00 2001 From: kaz-us <32769754+kaz-us@users.noreply.github.com> Date: Sun, 31 Oct 2021 18:16:12 +0400 Subject: [PATCH 356/641] [vk] Fix login (#1495) Closes #1459 Authored by: kaz-us --- yt_dlp/extractor/vk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/vk.py b/yt_dlp/extractor/vk.py index a8a980de69..9a5c9ee6bc 100644 --- a/yt_dlp/extractor/vk.py +++ b/yt_dlp/extractor/vk.py @@ -51,7 +51,7 @@ class VKBaseIE(InfoExtractor): self._apply_first_set_cookie_header(url_handle, 'remixlhk') login_page = self._download_webpage( - 'https://login.vk.com/?act=login', None, + 'https://vk.com/login', None, note='Logging in', data=urlencode_postdata(login_form)) From c588b602d34f005dc018ae004281226741414192 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Sun, 31 Oct 2021 14:20:09 +0000 Subject: [PATCH 357/641] [Instagram] Fix incorrect resolution (#1494) Authored by: u-spec-png --- yt_dlp/extractor/instagram.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py index 6ed20d9c6d..4eca9eb922 100644 --- a/yt_dlp/extractor/instagram.py +++ b/yt_dlp/extractor/instagram.py @@ -228,8 +228,8 @@ class InstagramIE(InstagramBaseIE): dict) if media: video_url = media.get('video_url') - height = try_get(media, lambda x: x['dimensions']['height']) - width = try_get(media, lambda x: x['dimensions']['width']) + height = int_or_none(self._html_search_meta(('og:video:height', 'video:height'), webpage)) or try_get(media, lambda x: x['dimensions']['height']) + width = int_or_none(self._html_search_meta(('og:video:width', 'video:width'), webpage)) or try_get(media, lambda x: x['dimensions']['width']) description = try_get( media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'], compat_str) or media.get('caption') From a1fc7ca0743c8df06416e68ee74b64e07dfe7135 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 3 Nov 2021 16:25:48 +0530 Subject: [PATCH 358/641] [jsinterp] Handle default in switch better --- test/test_jsinterp.py | 15 +++++++++++++++ test/test_youtube_signature.py | 6 +++++- yt_dlp/jsinterp.py | 22 +++++++++++++--------- 3 files changed, 33 insertions(+), 10 deletions(-) diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index 380e52c333..e230b045fd 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -132,6 +132,21 @@ class TestJSInterpreter(unittest.TestCase): self.assertEqual(jsi.call_function('x', 3), 6) self.assertEqual(jsi.call_function('x', 5), 0) + def test_switch_default(self): + jsi = JSInterpreter(''' + function x(f) { switch(f){ + case 2: f+=2; + default: f-=1; + case 5: + case 6: f+=6; + case 0: break; + case 1: f+=1; + } return f } + ''') + self.assertEqual(jsi.call_function('x', 1), 2) + self.assertEqual(jsi.call_function('x', 5), 11) + self.assertEqual(jsi.call_function('x', 9), 14) + def test_try(self): jsi = JSInterpreter(''' function x() { try{return 10} catch(e){return 5} } diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index f40a069526..60d8eabf5c 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -69,7 +69,11 @@ _NSIG_TESTS = [ ( 'https://www.youtube.com/s/player/9216d1f7/player_ias.vflset/en_US/base.js', 'SLp9F5bwjAdhE9F-', 'gWnb9IK2DJ8Q1w', - ), # TODO: Add more tests + ), + ( + 'https://www.youtube.com/s/player/f8cb7a3b/player_ias.vflset/en_US/base.js', + 'oBo2h5euWy6osrUt', 'ivXHpm7qJjJN', + ), ] diff --git a/yt_dlp/jsinterp.py b/yt_dlp/jsinterp.py index 5c79a8110d..bb2a0ae0b9 100644 --- a/yt_dlp/jsinterp.py +++ b/yt_dlp/jsinterp.py @@ -228,21 +228,25 @@ class JSInterpreter(object): switch_val, remaining = self._seperate_at_paren(expr[m.end() - 1:], ')') switch_val = self.interpret_expression(switch_val, local_vars, allow_recursion) body, expr = self._seperate_at_paren(remaining, '}') - body, default = body.split('default:') if 'default:' in body else (body, None) - items = body.split('case ')[1:] - if default: - items.append(f'default:{default}') - matched = False - for item in items: - case, stmt = [i.strip() for i in self._seperate(item, ':', 1)] - matched = matched or case == 'default' or switch_val == self.interpret_expression(case, local_vars, allow_recursion) - if matched: + items = body.replace('default:', 'case default:').split('case ')[1:] + for default in (False, True): + matched = False + for item in items: + case, stmt = [i.strip() for i in self._seperate(item, ':', 1)] + if default: + matched = matched or case == 'default' + elif not matched: + matched = case != 'default' and switch_val == self.interpret_expression(case, local_vars, allow_recursion) + if not matched: + continue try: ret, should_abort = self.interpret_statement(stmt, local_vars, allow_recursion - 1) if should_abort: return ret except JS_Break: break + if matched: + break return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0] # Comma seperated statements From 9bd979ca40f4f7b1f3918386b8347e03820766b4 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 3 Nov 2021 16:26:34 +0530 Subject: [PATCH 359/641] [utils] Parse `vp09` as vp9 --- yt_dlp/utils.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 080bf260a2..2953909fce 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4656,19 +4656,18 @@ def parse_codecs(codecs_str): str.strip, codecs_str.strip().strip(',').split(',')))) vcodec, acodec, hdr = None, None, None for full_codec in split_codecs: - codec = full_codec.split('.')[0] - if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora', 'dvh1', 'dvhe'): + parts = full_codec.split('.') + codec = parts[0].replace('0', '') + if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', + 'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'): if not vcodec: - vcodec = full_codec + vcodec = '.'.join(parts[:4]) if codec in ('vp9', 'av1') else full_codec if codec in ('dvh1', 'dvhe'): hdr = 'DV' - elif codec == 'vp9' and vcodec.startswith('vp9.2'): + elif codec == 'av1' and len(parts) > 3 and parts[3] == '10': + hdr = 'HDR10' + elif full_codec.replace('0', '').startswith('vp9.2'): hdr = 'HDR10' - elif codec == 'av01': - parts = full_codec.split('.') - if len(parts) > 3 and parts[3] == '10': - hdr = 'HDR10' - vcodec = '.'.join(parts[:4]) elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'): if not acodec: acodec = full_codec From d89257f398fed8a44fae7d12d849114f9f4ca2be Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 3 Nov 2021 16:27:34 +0530 Subject: [PATCH 360/641] [youtube] Remove unnecessary no-playlist warning --- yt_dlp/extractor/youtube.py | 83 ++++++++++++++++++------------------- 1 file changed, 41 insertions(+), 42 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 64475edec0..4284143839 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2623,49 +2623,48 @@ class YoutubeIE(YoutubeBaseInfoExtractor): or search_meta(['og:title', 'twitter:title', 'title'])) video_description = get_first(video_details, 'shortDescription') - if not smuggled_data.get('force_singlefeed', False): - if not self.get_param('noplaylist'): - multifeed_metadata_list = get_first( - player_responses, - ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'), - expected_type=str) - if multifeed_metadata_list: - entries = [] - feed_ids = [] - for feed in multifeed_metadata_list.split(','): - # Unquote should take place before split on comma (,) since textual - # fields may contain comma as well (see - # https://github.com/ytdl-org/youtube-dl/issues/8536) - feed_data = compat_parse_qs( - compat_urllib_parse_unquote_plus(feed)) - - def feed_entry(name): - return try_get( - feed_data, lambda x: x[name][0], compat_str) - - feed_id = feed_entry('id') - if not feed_id: - continue - feed_title = feed_entry('title') - title = video_title - if feed_title: - title += ' (%s)' % feed_title - entries.append({ - '_type': 'url_transparent', - 'ie_key': 'Youtube', - 'url': smuggle_url( - '%swatch?v=%s' % (base_url, feed_data['id'][0]), - {'force_singlefeed': True}), - 'title': title, - }) - feed_ids.append(feed_id) - self.to_screen( - 'Downloading multifeed video (%s) - add --no-playlist to just download video %s' - % (', '.join(feed_ids), video_id)) - return self.playlist_result( - entries, video_id, video_title, video_description) - else: + multifeed_metadata_list = get_first( + player_responses, + ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'), + expected_type=str) + if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'): + if self.get_param('noplaylist'): self.to_screen('Downloading just video %s because of --no-playlist' % video_id) + else: + entries = [] + feed_ids = [] + for feed in multifeed_metadata_list.split(','): + # Unquote should take place before split on comma (,) since textual + # fields may contain comma as well (see + # https://github.com/ytdl-org/youtube-dl/issues/8536) + feed_data = compat_parse_qs( + compat_urllib_parse_unquote_plus(feed)) + + def feed_entry(name): + return try_get( + feed_data, lambda x: x[name][0], compat_str) + + feed_id = feed_entry('id') + if not feed_id: + continue + feed_title = feed_entry('title') + title = video_title + if feed_title: + title += ' (%s)' % feed_title + entries.append({ + '_type': 'url_transparent', + 'ie_key': 'Youtube', + 'url': smuggle_url( + '%swatch?v=%s' % (base_url, feed_data['id'][0]), + {'force_singlefeed': True}), + 'title': title, + }) + feed_ids.append(feed_id) + self.to_screen( + 'Downloading multifeed video (%s) - add --no-playlist to just download video %s' + % (', '.join(feed_ids), video_id)) + return self.playlist_result( + entries, video_id, video_title, video_description) live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails')) is_live = get_first(video_details, 'isLive') From bd93fd5d45e104561bad919d4775feba869d0145 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 3 Nov 2021 16:28:45 +0530 Subject: [PATCH 361/641] [fragment] Fix progress display in fragmented downloads Closes #1517 --- yt_dlp/downloader/common.py | 2 ++ yt_dlp/downloader/fragment.py | 14 +++++++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index 6cfbb6657a..bcf8ac9554 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -319,6 +319,8 @@ class FileDownloader(object): msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s' else: msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s' + if s.get('fragment_index'): + msg_template += ' (frag %(fragment_index)s)' s['_default_template'] = msg_template % s self._report_progress_status(s) diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index a9d1471f8c..d08fd52a19 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -31,6 +31,10 @@ class HttpQuietDownloader(HttpFD): def to_screen(self, *args, **kargs): pass + def report_retry(self, err, count, retries): + super().to_screen( + f'[download] Got server HTTP error: {err}. Retrying (attempt {count} of {self.format_retries(retries)}) ...') + class FragmentFD(FileDownloader): """ @@ -167,7 +171,7 @@ class FragmentFD(FileDownloader): self.ydl, { 'continuedl': True, - 'quiet': True, + 'quiet': self.params.get('quiet'), 'noprogress': True, 'ratelimit': self.params.get('ratelimit'), 'retries': self.params.get('retries', 0), @@ -237,6 +241,7 @@ class FragmentFD(FileDownloader): start = time.time() ctx.update({ 'started': start, + 'fragment_started': start, # Amount of fragment's bytes downloaded by the time of the previous # frag progress hook invocation 'prev_frag_downloaded_bytes': 0, @@ -267,6 +272,9 @@ class FragmentFD(FileDownloader): ctx['fragment_index'] = state['fragment_index'] state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes'] ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes'] + ctx['speed'] = state['speed'] = self.calc_speed( + ctx['fragment_started'], time_now, frag_total_bytes) + ctx['fragment_started'] = time.time() ctx['prev_frag_downloaded_bytes'] = 0 else: frag_downloaded_bytes = s['downloaded_bytes'] @@ -275,8 +283,8 @@ class FragmentFD(FileDownloader): state['eta'] = self.calc_eta( start, time_now, estimated_size - resume_len, state['downloaded_bytes'] - resume_len) - state['speed'] = s.get('speed') or ctx.get('speed') - ctx['speed'] = state['speed'] + ctx['speed'] = state['speed'] = self.calc_speed( + ctx['fragment_started'], time_now, frag_downloaded_bytes) ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes self._hook_progress(state, info_dict) From 31c49255bf647373734c2c7f917e0d24ab81ac95 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 4 Nov 2021 00:05:53 +0530 Subject: [PATCH 362/641] [ExtractAudio] Rescale --audio-quality correctly Authored by: CrypticSignal, pukkandan --- yt_dlp/__init__.py | 4 +++- yt_dlp/options.py | 2 +- yt_dlp/postprocessor/ffmpeg.py | 37 +++++++++++++++++++++------------- yt_dlp/utils.py | 2 +- 4 files changed, 28 insertions(+), 17 deletions(-) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 84628bf455..0070d50a8a 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -29,6 +29,8 @@ from .utils import ( error_to_compat_str, ExistingVideoReached, expand_path, + float_or_none, + int_or_none, match_filter_func, MaxDownloadsReached, parse_duration, @@ -230,7 +232,7 @@ def _real_main(argv=None): parser.error('invalid audio format specified') if opts.audioquality: opts.audioquality = opts.audioquality.strip('k').strip('K') - if not opts.audioquality.isdigit(): + if int_or_none(float_or_none(opts.audioquality)) is None: # int_or_none prevents inf, nan parser.error('invalid audio quality specified') if opts.recodevideo is not None: opts.recodevideo = opts.recodevideo.replace(' ', '') diff --git a/yt_dlp/options.py b/yt_dlp/options.py index a3a6c74b3a..bd9fdd37bd 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1215,7 +1215,7 @@ def parseOpts(overrideArguments=None): postproc.add_option( '--audio-quality', metavar='QUALITY', dest='audioquality', default='5', - help='Specify ffmpeg audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default %default)') + help='Specify ffmpeg audio quality, insert a value between 0 (best) and 10 (worst) for VBR or a specific bitrate like 128K (default %default)') postproc.add_option( '--remux-video', metavar='FORMAT', dest='remuxvideo', default=None, diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index b7fcc569ba..96f7be6ff3 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -371,9 +371,29 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False): FFmpegPostProcessor.__init__(self, downloader) self._preferredcodec = preferredcodec or 'best' - self._preferredquality = preferredquality + self._preferredquality = float_or_none(preferredquality) self._nopostoverwrites = nopostoverwrites + def _quality_args(self, codec): + if self._preferredquality is None: + return [] + elif self._preferredquality > 10: + return ['-b:a', f'{self._preferredquality}k'] + + limits = { + 'libmp3lame': (10, 0), + 'aac': (0.1, 11), + 'vorbis': (0, 10), + 'opus': None, # doesn't support -q:a + 'wav': None, + 'flac': None, + }[codec] + if not limits: + return [] + + q = limits[1] + (limits[0] - limits[1]) * (self._preferredquality / 10) + return ['-q:a', f'{q}'] + def run_ffmpeg(self, path, out_path, codec, more_opts): if codec is None: acodec_opts = [] @@ -417,23 +437,12 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): # MP3 otherwise. acodec = 'libmp3lame' extension = 'mp3' - more_opts = [] - if self._preferredquality is not None: - if int(self._preferredquality) < 10: - more_opts += ['-q:a', self._preferredquality] - else: - more_opts += ['-b:a', self._preferredquality + 'k'] + more_opts = self._quality_args(acodec) else: # We convert the audio (lossy if codec is lossy) acodec = ACODECS[self._preferredcodec] extension = self._preferredcodec - more_opts = [] - if self._preferredquality is not None: - # The opus codec doesn't support the -aq option - if int(self._preferredquality) < 10 and extension != 'opus': - more_opts += ['-q:a', self._preferredquality] - else: - more_opts += ['-b:a', self._preferredquality + 'k'] + more_opts = self._quality_args(acodec) if self._preferredcodec == 'aac': more_opts += ['-f', 'adts'] if self._preferredcodec == 'm4a': diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 2953909fce..62f83c9ce2 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -3871,7 +3871,7 @@ def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1): return default try: return int(v) * invscale // scale - except (ValueError, TypeError): + except (ValueError, TypeError, OverflowError): return default From 9af98e17bd2b761d304e88a359b0f7a40e6c0a67 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 4 Nov 2021 00:23:48 +0530 Subject: [PATCH 363/641] [ffmpeg] Framework for feature detection Related: #1502, #1237, https://github.com/ytdl-org/youtube-dl/pull/29581 --- yt_dlp/__init__.py | 3 +- yt_dlp/postprocessor/ffmpeg.py | 77 ++++++++++++++++++---------------- yt_dlp/utils.py | 15 ++++--- 3 files changed, 54 insertions(+), 41 deletions(-) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 0070d50a8a..3020b6e95d 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -232,7 +232,8 @@ def _real_main(argv=None): parser.error('invalid audio format specified') if opts.audioquality: opts.audioquality = opts.audioquality.strip('k').strip('K') - if int_or_none(float_or_none(opts.audioquality)) is None: # int_or_none prevents inf, nan + audioquality = int_or_none(float_or_none(opts.audioquality)) # int_or_none prevents inf, nan + if audioquality is None or audioquality < 0: parser.error('invalid audio quality specified') if opts.recodevideo is not None: opts.recodevideo = opts.recodevideo.replace(' ', '') diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 96f7be6ff3..c2415c59a1 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -16,7 +16,8 @@ from ..utils import ( encodeArgument, encodeFilename, float_or_none, - get_exe_version, + _get_exe_version_output, + detect_exe_version, is_outdated_version, ISO639Utils, orderedSet, @@ -80,10 +81,10 @@ class FFmpegPostProcessor(PostProcessor): def _determine_executables(self): programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe'] - prefer_ffmpeg = True - def get_ffmpeg_version(path): - ver = get_exe_version(path, args=['-version']) + def get_ffmpeg_version(path, prog): + out = _get_exe_version_output(path, ['-bsfs']) + ver = detect_exe_version(out) if out else False if ver: regexs = [ r'(?:\d+:)?([0-9.]+)-[0-9]+ubuntu[0-9.]+$', # Ubuntu, see [1] @@ -94,42 +95,46 @@ class FFmpegPostProcessor(PostProcessor): mobj = re.match(regex, ver) if mobj: ver = mobj.group(1) - return ver + self._versions[prog] = ver + if prog != 'ffmpeg' or not out: + return + + # TODO: Feature detection self.basename = None self.probe_basename = None - self._paths = None self._versions = None - if self._downloader: - prefer_ffmpeg = self.get_param('prefer_ffmpeg', True) - location = self.get_param('ffmpeg_location') - if location is not None: - if not os.path.exists(location): - self.report_warning( - 'ffmpeg-location %s does not exist! ' - 'Continuing without ffmpeg.' % (location)) - self._versions = {} - return - elif os.path.isdir(location): - dirname, basename = location, None - else: - basename = os.path.splitext(os.path.basename(location))[0] - basename = next((p for p in programs if basename.startswith(p)), 'ffmpeg') - dirname = os.path.dirname(os.path.abspath(location)) - if basename in ('ffmpeg', 'ffprobe'): - prefer_ffmpeg = True + self._features = {} - self._paths = dict( - (p, os.path.join(dirname, p)) for p in programs) - if basename: - self._paths[basename] = location - self._versions = dict( - (p, get_ffmpeg_version(self._paths[p])) for p in programs) - if self._versions is None: - self._versions = dict( - (p, get_ffmpeg_version(p)) for p in programs) - self._paths = dict((p, p) for p in programs) + prefer_ffmpeg = self.get_param('prefer_ffmpeg', True) + location = self.get_param('ffmpeg_location') + if location is None: + self._paths = {p: p for p in programs} + else: + if not os.path.exists(location): + self.report_warning( + 'ffmpeg-location %s does not exist! ' + 'Continuing without ffmpeg.' % (location)) + self._versions = {} + return + elif os.path.isdir(location): + dirname, basename = location, None + else: + basename = os.path.splitext(os.path.basename(location))[0] + basename = next((p for p in programs if basename.startswith(p)), 'ffmpeg') + dirname = os.path.dirname(os.path.abspath(location)) + if basename in ('ffmpeg', 'ffprobe'): + prefer_ffmpeg = True + + self._paths = dict( + (p, os.path.join(dirname, p)) for p in programs) + if basename: + self._paths[basename] = location + + self._versions = {} + for p in programs: + get_ffmpeg_version(self._paths[p], p) if prefer_ffmpeg is False: prefs = ('avconv', 'ffmpeg') @@ -382,7 +387,9 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): limits = { 'libmp3lame': (10, 0), - 'aac': (0.1, 11), + # FFmpeg's AAC encoder does not have an upper limit for the value of -q:a. + # Experimentally, with values over 4, bitrate changes were minimal or non-existent + 'aac': (0.1, 4), 'vorbis': (0, 10), 'opus': None, # doesn't support -q:a 'wav': None, diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 62f83c9ce2..55e452a151 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4007,10 +4007,7 @@ def check_executable(exe, args=[]): return exe -def get_exe_version(exe, args=['--version'], - version_re=None, unrecognized='present'): - """ Returns the version of the specified executable, - or False if the executable is not present """ +def _get_exe_version_output(exe, args): try: # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers # SIGTTOU if yt-dlp is run in the background. @@ -4022,7 +4019,7 @@ def get_exe_version(exe, args=['--version'], return False if isinstance(out, bytes): # Python 2.x out = out.decode('ascii', 'ignore') - return detect_exe_version(out, version_re, unrecognized) + return out def detect_exe_version(output, version_re=None, unrecognized='present'): @@ -4036,6 +4033,14 @@ def detect_exe_version(output, version_re=None, unrecognized='present'): return unrecognized +def get_exe_version(exe, args=['--version'], + version_re=None, unrecognized='present'): + """ Returns the version of the specified executable, + or False if the executable is not present """ + out = _get_exe_version_output(exe, args) + return detect_exe_version(out, version_re, unrecognized) if out else False + + class LazyList(collections.abc.Sequence): ''' Lazy immutable list from an iterable Note that slices of a LazyList are lists and not LazyList''' From 673c0057e81410b3da2b0c07ebf7abca13286eab Mon Sep 17 00:00:00 2001 From: CrypticSignal Date: Thu, 4 Nov 2021 02:23:40 +0530 Subject: [PATCH 364/641] [ExtractAudio] Use `libfdk_aac` if available Closes #1502 Authored by: CrypticSignal --- yt_dlp/postprocessor/ffmpeg.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index c2415c59a1..3f82eabf5e 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -99,7 +99,7 @@ class FFmpegPostProcessor(PostProcessor): if prog != 'ffmpeg' or not out: return - # TODO: Feature detection + self._features['fdk'] = '--enable-libfdk-aac' in out self.basename = None self.probe_basename = None @@ -391,6 +391,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): # Experimentally, with values over 4, bitrate changes were minimal or non-existent 'aac': (0.1, 4), 'vorbis': (0, 10), + 'libfdk_aac': (1, 5), 'opus': None, # doesn't support -q:a 'wav': None, 'flac': None, @@ -399,6 +400,8 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): return [] q = limits[1] + (limits[0] - limits[1]) * (self._preferredquality / 10) + if codec == 'libfdk_aac': + return ['-vbr', f'{int(q)}'] return ['-q:a', f'{q}'] def run_ffmpeg(self, path, out_path, codec, more_opts): @@ -448,6 +451,8 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): else: # We convert the audio (lossy if codec is lossy) acodec = ACODECS[self._preferredcodec] + if acodec == 'aac' and self._features.get('fdk'): + acodec = 'libfdk_aac' extension = self._preferredcodec more_opts = self._quality_args(acodec) if self._preferredcodec == 'aac': From 832e9000c71c5bbd97c93d21051044cf61a3b87f Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 4 Nov 2021 02:24:12 +0530 Subject: [PATCH 365/641] [ffmpeg] Accurately detect presence of setts Closes #1237 --- yt_dlp/postprocessor/ffmpeg.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 3f82eabf5e..139b97fb48 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -99,7 +99,10 @@ class FFmpegPostProcessor(PostProcessor): if prog != 'ffmpeg' or not out: return - self._features['fdk'] = '--enable-libfdk-aac' in out + self._features = { + 'fdk': '--enable-libfdk-aac' in out, + 'setts': 'setts' in out.splitlines(), + } self.basename = None self.probe_basename = None @@ -827,11 +830,10 @@ class FFmpegFixupTimestampPP(FFmpegFixupPostProcessor): @PostProcessor._restrict_to(images=False) def run(self, info): - required_version = '4.4' - if is_outdated_version(self._versions[self.basename], required_version): + if not self._features.get('setts'): self.report_warning( 'A re-encode is needed to fix timestamps in older versions of ffmpeg. ' - f'Please install ffmpeg {required_version} or later to fixup without re-encoding') + 'Please install ffmpeg 4.4 or later to fixup without re-encoding') opts = ['-vf', 'setpts=PTS-STARTPTS'] else: opts = ['-c', 'copy', '-bsf', 'setts=ts=TS-STARTPTS'] From 8913ef74d76d8e93e4aeaf9d2827ca950c17f8ce Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 4 Nov 2021 03:10:49 +0530 Subject: [PATCH 366/641] [ffmpeg] Detect libavformat version for `aac_adtstoasc` and print available features in verbose head Based on https://github.com/ytdl-org/youtube-dl/pull/29581 --- yt_dlp/YoutubeDL.py | 6 +++++- yt_dlp/downloader/external.py | 3 +-- yt_dlp/postprocessor/ffmpeg.py | 10 +++++++++- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 4a9f4775bf..a866178b03 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -3350,7 +3350,11 @@ class YoutubeDL(object): platform.architecture()[0], platform_name())) - exe_versions = FFmpegPostProcessor.get_versions(self) + exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self) + ffmpeg_features = {key for key, val in ffmpeg_features.items() if val} + if ffmpeg_features: + exe_versions['ffmpeg'] += f' (%s)' % ','.join(ffmpeg_features) + exe_versions['rtmpdump'] = rtmpdump_version() exe_versions['phantomjs'] = PhantomJSwrapper._version() exe_str = ', '.join( diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index ce3370fb77..1efbb2fabe 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -21,7 +21,6 @@ from ..utils import ( encodeArgument, handle_youtubedl_headers, check_executable, - is_outdated_version, Popen, sanitize_open, ) @@ -459,7 +458,7 @@ class FFmpegFD(ExternalFD): args += ['-f', 'mpegts'] else: args += ['-f', 'mp4'] - if (ffpp.basename == 'ffmpeg' and is_outdated_version(ffpp._versions['ffmpeg'], '3.2', False)) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')): + if (ffpp.basename == 'ffmpeg' and ffpp._features.get('needs_adtstoasc')) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')): args += ['-bsf:a', 'aac_adtstoasc'] elif protocol == 'rtmp': args += ['-f', 'flv'] diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 139b97fb48..46e87baebf 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -75,9 +75,14 @@ class FFmpegPostProcessor(PostProcessor): self.basename, self.basename, required_version) self.report_warning(warning) + @staticmethod + def get_versions_and_features(downloader=None): + pp = FFmpegPostProcessor(downloader) + return pp._versions, pp._features + @staticmethod def get_versions(downloader=None): - return FFmpegPostProcessor(downloader)._versions + return FFmpegPostProcessor.get_version_and_features(downloader)[0] def _determine_executables(self): programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe'] @@ -99,9 +104,12 @@ class FFmpegPostProcessor(PostProcessor): if prog != 'ffmpeg' or not out: return + mobj = re.search(r'(?m)^\s+libavformat\s+(?:[0-9. ]+)\s+/\s+(?P[0-9. ]+)', out) + lavf_runtime_version = mobj.group('runtime').replace(' ', '') if mobj else None self._features = { 'fdk': '--enable-libfdk-aac' in out, 'setts': 'setts' in out.splitlines(), + 'needs_adtstoasc': is_outdated_version(lavf_runtime_version, '57.56.100', False), } self.basename = None From a4211baff55f72bd1ca0649407c3d134bfcd2646 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 4 Nov 2021 03:40:35 +0530 Subject: [PATCH 367/641] [cleanup] Minor cleanup --- .github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml | 2 +- .../2_site_support_request.yml | 2 +- .github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml | 4 ++-- README.md | 24 +++++++++---------- yt_dlp/YoutubeDL.py | 4 ++-- yt_dlp/downloader/common.py | 4 +++- yt_dlp/extractor/picarto.py | 2 +- yt_dlp/extractor/youtube.py | 2 +- yt_dlp/options.py | 2 +- 9 files changed, 24 insertions(+), 22 deletions(-) diff --git a/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml b/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml index fdca0e53a8..e23bc4195c 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml +++ b/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml @@ -43,7 +43,7 @@ body: attributes: label: Verbose log description: | - Provide the complete verbose output of yt-dlp that clearly demonstrates the problem. + Provide the complete verbose output of yt-dlp **that clearly demonstrates the problem**. Add the `-Uv` flag to your command line you run yt-dlp with (`yt-dlp -Uv `), copy the WHOLE output and insert it below. It should look similar to this: placeholder: | diff --git a/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml index f7a48edc79..f353848214 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml +++ b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml @@ -54,7 +54,7 @@ body: attributes: label: Verbose log description: | - Provide the complete verbose output using one of the example URLs provided above. + Provide the complete verbose output **using one of the example URLs provided above**. Add the `-Uv` flag to your command line you run yt-dlp with (`yt-dlp -Uv `), copy the WHOLE output and insert it below. It should look similar to this: placeholder: | diff --git a/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml b/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml index e4d669bb7b..8219ebfd43 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml +++ b/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml @@ -37,8 +37,8 @@ body: attributes: label: Verbose log description: | - Provide the complete verbose output of yt-dlp that clearly demonstrates the problem. - Add the `-Uv` flag to your command line you run yt-dlp with (`yt-dlp -Uv `), copy the WHOLE output and insert it below. + Provide the complete verbose output of yt-dlp **that clearly demonstrates the problem**. + Add the `-Uv` flag to **your** command line you run yt-dlp with (`yt-dlp -Uv `), copy the WHOLE output and insert it below. It should look similar to this: placeholder: | [debug] Command-line config: ['-Uv', 'http://www.youtube.com/watch?v=BaW_jenozKc'] diff --git a/README.md b/README.md index 31bfca6a8c..ccd221bb44 100644 --- a/README.md +++ b/README.md @@ -79,7 +79,7 @@ The major new features from the latest release of [blackjack4494/yt-dlc](https:/ * Search (`ytsearch:`, `ytsearchdate:`), search URLs and in-channel search works * Mixes supports downloading multiple pages of content * Most (but not all) age-gated content can be downloaded without cookies - * Partial workaround for throttling issue + * Fix for [n-sig based throttling](https://github.com/ytdl-org/youtube-dl/issues/29326) * Redirect channel's home URL automatically to `/video` to preserve the old behaviour * `255kbps` audio is extracted (if available) from youtube music when premium cookies are given * Youtube music Albums, channels etc can be downloaded ([except self-uploaded music](https://github.com/yt-dlp/yt-dlp/issues/723)) @@ -154,7 +154,7 @@ For ease of use, a few more compat options are available: You can install yt-dlp using one of the following methods: -#### Using the release binary +### Using the release binary You can simply download the [correct binary file](#release-files) for your OS: **[[Windows](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)] [[UNIX-like](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)]** @@ -177,7 +177,7 @@ sudo chmod a+rx /usr/local/bin/yt-dlp PS: The manpages, shell completion files etc. are available in [yt-dlp.tar.gz](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz) -#### With [PIP](https://pypi.org/project/pip) +### With [PIP](https://pypi.org/project/pip) You can install the [PyPI package](https://pypi.org/project/yt-dlp) with: ``` @@ -196,7 +196,7 @@ python3 -m pip3 install -U https://github.com/yt-dlp/yt-dlp/archive/master.zip Note that on some systems, you may need to use `py` or `python` instead of `python3` -#### With [Homebrew](https://brew.sh) +### With [Homebrew](https://brew.sh) macOS or Linux users that are using Homebrew can also install it by: @@ -204,14 +204,14 @@ macOS or Linux users that are using Homebrew can also install it by: brew install yt-dlp/taps/yt-dlp ``` -### UPDATE +## UPDATE You can use `yt-dlp -U` to update if you are [using the provided release](#using-the-release-binary) If you [installed with pip](#with-pip), simply re-run the same command that was used to install the program If you [installed using Homebrew](#with-homebrew), run `brew upgrade yt-dlp/taps/yt-dlp` -### RELEASE FILES +## RELEASE FILES #### Recommended @@ -238,7 +238,7 @@ File|Description [SHA2-512SUMS](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-512SUMS)|GNU-style SHA512 sums [SHA2-256SUMS](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-256SUMS)|GNU-style SHA256 sums -### DEPENDENCIES +## DEPENDENCIES Python versions 3.6+ (CPython and PyPy) are supported. Other versions and implementations may or may not work correctly. ', '', html) From 89fcdff5d8e62c6153763650f12ec4eb4453bdff Mon Sep 17 00:00:00 2001 From: Lauren Liberda Date: Sat, 23 Oct 2021 03:25:09 +0200 Subject: [PATCH 399/641] [polskieradio] Add extractors (#1386) Authored by: selfisekai --- yt_dlp/extractor/extractors.py | 4 + yt_dlp/extractor/polskieradio.py | 307 ++++++++++++++++++++++++++----- 2 files changed, 269 insertions(+), 42 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 641481d017..741b9f0210 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1108,6 +1108,10 @@ from .pokemon import ( from .polskieradio import ( PolskieRadioIE, PolskieRadioCategoryIE, + PolskieRadioPlayerIE, + PolskieRadioPodcastIE, + PolskieRadioPodcastListIE, + PolskieRadioRadioKierowcowIE, ) from .popcorntimes import PopcorntimesIE from .popcorntv import PopcornTVIE diff --git a/yt_dlp/extractor/polskieradio.py b/yt_dlp/extractor/polskieradio.py index 53fe0340a0..b2b3eb29cf 100644 --- a/yt_dlp/extractor/polskieradio.py +++ b/yt_dlp/extractor/polskieradio.py @@ -2,6 +2,8 @@ from __future__ import unicode_literals import itertools +import json +import math import re from .common import InfoExtractor @@ -12,15 +14,45 @@ from ..compat import ( ) from ..utils import ( extract_attributes, + ExtractorError, + InAdvancePagedList, int_or_none, + js_to_json, + parse_iso8601, strip_or_none, unified_timestamp, unescapeHTML, + url_or_none, ) -class PolskieRadioIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+/\d+/Artykul/(?P[0-9]+)' +class PolskieRadioBaseExtractor(InfoExtractor): + def _extract_webpage_player_entries(self, webpage, playlist_id, base_data): + media_urls = set() + + for data_media in re.findall(r'<[^>]+data-media="?({[^>]+})"?', webpage): + media = self._parse_json(data_media, playlist_id, transform_source=unescapeHTML, fatal=False) + if not media.get('file') or not media.get('desc'): + continue + media_url = self._proto_relative_url(media['file']) + if media_url in media_urls: + continue + media_urls.add(media_url) + entry = base_data.copy() + entry.update({ + 'id': compat_str(media['id']), + 'url': media_url, + 'duration': int_or_none(media.get('length')), + 'vcodec': 'none' if media.get('provider') == 'audio' else None, + }) + entry_title = compat_urllib_parse_unquote(media['desc']) + if entry_title: + entry['title'] = entry_title + yield entry + + +class PolskieRadioIE(PolskieRadioBaseExtractor): + _VALID_URL = r'https?://(?:www\.)?polskieradio(?:24)?\.pl/\d+/\d+/Artykul/(?P[0-9]+)' _TESTS = [{ # Old-style single broadcast. 'url': 'http://www.polskieradio.pl/7/5102/Artykul/1587943,Prof-Andrzej-Nowak-o-historii-nie-da-sie-myslec-beznamietnie', 'info_dict': { @@ -59,22 +91,14 @@ class PolskieRadioIE(InfoExtractor): 'thumbnail': r're:^https?://static\.prsa\.pl/images/.*\.jpg$' }, }], - }, { # Old-style multiple broadcast playlist. - 'url': 'https://www.polskieradio.pl/8/4346/Artykul/2487823,Marek-Kondrat-czyta-Mistrza-i-Malgorzate', + }, { + # PR4 audition - other frontend + 'url': 'https://www.polskieradio.pl/10/6071/Artykul/2610977,Poglos-29-pazdziernika-godz-2301', 'info_dict': { - 'id': '2487823', - 'title': 'Marek Kondrat czyta "Mistrza i Małgorzatę"', - 'description': 'md5:8422a95cc83834f2aaeff9d82e9c8f39', + 'id': '2610977', + 'ext': 'mp3', + 'title': 'Pogłos 29 października godz. 23:01', }, - 'playlist_mincount': 50, - }, { # New-style multiple broadcast playlist. - 'url': 'https://www.polskieradio.pl/8/4346/Artykul/2541317,Czytamy-Kalendarz-i-klepsydre-Tadeusza-Konwickiego', - 'info_dict': { - 'id': '2541317', - 'title': 'Czytamy "Kalendarz i klepsydrę" Tadeusza Konwickiego', - 'description': 'md5:0baeaa46d877f1351fb2eeed3e871f9f', - }, - 'playlist_mincount': 15, }, { 'url': 'http://polskieradio.pl/9/305/Artykul/1632955,Bardzo-popularne-slowo-remis', 'only_matching': True, @@ -85,6 +109,9 @@ class PolskieRadioIE(InfoExtractor): # with mp4 video 'url': 'http://www.polskieradio.pl/9/299/Artykul/1634903,Brexit-Leszek-Miller-swiat-sie-nie-zawali-Europa-bedzie-trwac-dalej', 'only_matching': True, + }, { + 'url': 'https://polskieradio24.pl/130/4503/Artykul/2621876,Narusza-nasza-suwerennosc-Publicysci-o-uzaleznieniu-funduszy-UE-od-praworzadnosci', + 'only_matching': True, }] def _real_extract(self, url): @@ -94,40 +121,38 @@ class PolskieRadioIE(InfoExtractor): content = self._search_regex( r'(?s)]+class="\s*this-article\s*"[^>]*>(.+?)]+class="tags"[^>]*>', - webpage, 'content') + webpage, 'content', default=None) timestamp = unified_timestamp(self._html_search_regex( r'(?s)]+id="datetime2"[^>]*>(.+?)', - webpage, 'timestamp', fatal=False)) + webpage, 'timestamp', default=None)) - thumbnail_url = self._og_search_thumbnail(webpage) - - entries = [] - - media_urls = set() - - for data_media in re.findall(r'<[^>]+data-media="?({[^>]+})"?', content): - media = self._parse_json(data_media, playlist_id, transform_source=unescapeHTML, fatal=False) - if not media.get('file') or not media.get('desc'): - continue - media_url = self._proto_relative_url(media['file'], 'http:') - if media_url in media_urls: - continue - media_urls.add(media_url) - entries.append({ - 'id': compat_str(media['id']), - 'url': media_url, - 'title': compat_urllib_parse_unquote(media['desc']), - 'duration': int_or_none(media.get('length')), - 'vcodec': 'none' if media.get('provider') == 'audio' else None, - 'timestamp': timestamp, - 'thumbnail': thumbnail_url - }) + thumbnail_url = self._og_search_thumbnail(webpage, default=None) title = self._og_search_title(webpage).strip() - description = strip_or_none(self._og_search_description(webpage)) + + description = strip_or_none(self._og_search_description(webpage, default=None)) description = description.replace('\xa0', ' ') if description is not None else None + if not content: + return { + 'id': playlist_id, + 'url': self._proto_relative_url( + self._search_regex( + r"source:\s*'(//static\.prsa\.pl/[^']+)'", + webpage, 'audition record url')), + 'title': title, + 'description': description, + 'timestamp': timestamp, + 'thumbnail': thumbnail_url, + } + + entries = self._extract_webpage_player_entries(content, playlist_id, { + 'title': title, + 'timestamp': timestamp, + 'thumbnail': thumbnail_url, + }) + return self.playlist_result(entries, playlist_id, title, description) @@ -207,3 +232,201 @@ class PolskieRadioCategoryIE(InfoExtractor): return self.playlist_result( self._entries(url, webpage, category_id), category_id, title) + + +class PolskieRadioPlayerIE(InfoExtractor): + IE_NAME = 'polskieradio:player' + _VALID_URL = r'https?://player\.polskieradio\.pl/anteny/(?P[^/]+)' + + _BASE_URL = 'https://player.polskieradio.pl' + _PLAYER_URL = 'https://player.polskieradio.pl/main.bundle.js' + _STATIONS_API_URL = 'https://apipr.polskieradio.pl/api/stacje' + + _TESTS = [{ + 'url': 'https://player.polskieradio.pl/anteny/trojka', + 'info_dict': { + 'id': '3', + 'ext': 'm4a', + 'title': 'Trójka', + }, + 'params': { + 'format': 'bestaudio', + 'skip_download': 'endless stream', + }, + }] + + def _get_channel_list(self, channel_url='no_channel'): + player_code = self._download_webpage( + self._PLAYER_URL, channel_url, + note='Downloading js player') + channel_list = js_to_json(self._search_regex( + r';var r="anteny",a=(\[.+?\])},', player_code, 'channel list')) + return self._parse_json(channel_list, channel_url) + + def _real_extract(self, url): + channel_url = self._match_id(url) + channel_list = self._get_channel_list(channel_url) + + channel = next((c for c in channel_list if c.get('url') == channel_url), None) + + if not channel: + raise ExtractorError('Channel not found') + + station_list = self._download_json(self._STATIONS_API_URL, channel_url, + note='Downloading stream url list', + headers={ + 'Accept': 'application/json', + 'Referer': url, + 'Origin': self._BASE_URL, + }) + station = next((s for s in station_list + if s.get('Name') == (channel.get('streamName') or channel.get('name'))), None) + if not station: + raise ExtractorError('Station not found even though we extracted channel') + + formats = [] + for stream_url in station['Streams']: + stream_url = self._proto_relative_url(stream_url) + if stream_url.endswith('/playlist.m3u8'): + formats.extend(self._extract_m3u8_formats(stream_url, channel_url, live=True)) + elif stream_url.endswith('/manifest.f4m'): + formats.extend(self._extract_mpd_formats(stream_url, channel_url)) + elif stream_url.endswith('/Manifest'): + formats.extend(self._extract_ism_formats(stream_url, channel_url)) + else: + formats.append({ + 'url': stream_url, + }) + + self._sort_formats(formats) + + return { + 'id': compat_str(channel['id']), + 'formats': formats, + 'title': channel.get('name') or channel.get('streamName'), + 'display_id': channel_url, + 'thumbnail': f'{self._BASE_URL}/images/{channel_url}-color-logo.png', + 'is_live': True, + } + + +class PolskieRadioPodcastBaseExtractor(InfoExtractor): + _API_BASE = 'https://apipodcasts.polskieradio.pl/api' + + def _parse_episode(self, data): + return { + 'id': data['guid'], + 'formats': [{ + 'url': data['url'], + 'filesize': int_or_none(data.get('fileSize')), + }], + 'title': data['title'], + 'description': data.get('description'), + 'duration': int_or_none(data.get('length')), + 'timestamp': parse_iso8601(data.get('publishDate')), + 'thumbnail': url_or_none(data.get('image')), + 'series': data.get('podcastTitle'), + 'episode': data['title'], + } + + +class PolskieRadioPodcastListIE(PolskieRadioPodcastBaseExtractor): + IE_NAME = 'polskieradio:podcast:list' + _VALID_URL = r'https?://podcasty\.polskieradio\.pl/podcast/(?P\d+)' + _TESTS = [{ + 'url': 'https://podcasty.polskieradio.pl/podcast/8/', + 'info_dict': { + 'id': '8', + 'title': 'Śniadanie w Trójce', + 'description': 'md5:57abcc27bc4c6a6b25baa3061975b9ef', + 'uploader': 'Beata Michniewicz', + }, + 'playlist_mincount': 714, + }] + _PAGE_SIZE = 10 + + def _call_api(self, podcast_id, page): + return self._download_json( + f'{self._API_BASE}/Podcasts/{podcast_id}/?pageSize={self._PAGE_SIZE}&page={page}', + podcast_id, f'Downloading page {page}') + + def _real_extract(self, url): + podcast_id = self._match_id(url) + data = self._call_api(podcast_id, 1) + + def get_page(page_num): + page_data = self._call_api(podcast_id, page_num + 1) if page_num else data + yield from (self._parse_episode(ep) for ep in page_data['items']) + + return { + '_type': 'playlist', + 'entries': InAdvancePagedList( + get_page, math.ceil(data['itemCount'] / self._PAGE_SIZE), self._PAGE_SIZE), + 'id': str(data['id']), + 'title': data['title'], + 'description': data.get('description'), + 'uploader': data.get('announcer'), + } + + +class PolskieRadioPodcastIE(PolskieRadioPodcastBaseExtractor): + IE_NAME = 'polskieradio:podcast' + _VALID_URL = r'https?://podcasty\.polskieradio\.pl/track/(?P[a-f\d]{8}(?:-[a-f\d]{4}){4}[a-f\d]{8})' + _TESTS = [{ + 'url': 'https://podcasty.polskieradio.pl/track/6eafe403-cb8f-4756-b896-4455c3713c32', + 'info_dict': { + 'id': '6eafe403-cb8f-4756-b896-4455c3713c32', + 'ext': 'mp3', + 'title': 'Theresa May rezygnuje. Co dalej z brexitem?', + 'description': 'md5:e41c409a29d022b70ef0faa61dbded60', + }, + }] + + def _real_extract(self, url): + podcast_id = self._match_id(url) + data = self._download_json( + f'{self._API_BASE}/audio', + podcast_id, 'Downloading podcast metadata', + data=json.dumps({ + 'guids': [podcast_id], + }).encode('utf-8'), + headers={ + 'Content-Type': 'application/json', + }) + return self._parse_episode(data[0]) + + +class PolskieRadioRadioKierowcowIE(PolskieRadioBaseExtractor): + _VALID_URL = r'https?://(?:www\.)?radiokierowcow\.pl/artykul/(?P[0-9]+)' + IE_NAME = 'polskieradio:kierowcow' + + _TESTS = [{ + 'url': 'https://radiokierowcow.pl/artykul/2694529', + 'info_dict': { + 'id': '2694529', + 'title': 'Zielona fala reliktem przeszłości?', + 'description': 'md5:343950a8717c9818fdfd4bd2b8ca9ff2', + }, + 'playlist_count': 3, + }] + + def _real_extract(self, url): + media_id = self._match_id(url) + webpage = self._download_webpage(url, media_id) + nextjs_build = self._search_nextjs_data(webpage, media_id)['buildId'] + article = self._download_json( + f'https://radiokierowcow.pl/_next/data/{nextjs_build}/artykul/{media_id}.json?articleId={media_id}', + media_id) + data = article['pageProps']['data'] + title = data['title'] + entries = self._extract_webpage_player_entries(data['content'], media_id, { + 'title': title, + }) + + return { + '_type': 'playlist', + 'id': media_id, + 'entries': entries, + 'title': title, + 'description': data.get('lead'), + } From ed76230b3f61d3440da5b71170e243cd2bfe693b Mon Sep 17 00:00:00 2001 From: Lauren Liberda Date: Sat, 23 Oct 2021 01:46:56 +0200 Subject: [PATCH 400/641] [polsatgo] Add extractor (#1386) Authored by: selfisekai, sdomi Co-authored-by: Dominika Liberda --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/polsatgo.py | 90 ++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 yt_dlp/extractor/polsatgo.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 741b9f0210..bd0da2c387 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1105,6 +1105,7 @@ from .pokemon import ( PokemonIE, PokemonWatchIE, ) +from .polsatgo import PolsatGoIE from .polskieradio import ( PolskieRadioIE, PolskieRadioCategoryIE, diff --git a/yt_dlp/extractor/polsatgo.py b/yt_dlp/extractor/polsatgo.py new file mode 100644 index 0000000000..1e3f46c07c --- /dev/null +++ b/yt_dlp/extractor/polsatgo.py @@ -0,0 +1,90 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from uuid import uuid4 +import json + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + try_get, + url_or_none, + ExtractorError, +) + + +class PolsatGoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?polsat(?:box)?go\.pl/.+/(?P[0-9a-fA-F]+)(?:[/#?]|$)' + _TESTS = [{ + 'url': 'https://polsatgo.pl/wideo/seriale/swiat-wedlug-kiepskich/5024045/sezon-1/5028300/swiat-wedlug-kiepskich-odcinek-88/4121', + 'info_dict': { + 'id': '4121', + 'ext': 'mp4', + 'title': 'Świat według Kiepskich - Odcinek 88', + 'age_limit': 12, + }, + }] + + def _extract_formats(self, sources, video_id): + for source in sources or []: + if not source.get('id'): + continue + url = url_or_none(self._call_api( + 'drm', video_id, 'getPseudoLicense', + {'mediaId': video_id, 'sourceId': source['id']}).get('url')) + if not url: + continue + yield { + 'url': url, + 'height': int_or_none(try_get(source, lambda x: x['quality'][:-1])) + } + + def _real_extract(self, url): + video_id = self._match_id(url) + media = self._call_api('navigation', video_id, 'prePlayData', {'mediaId': video_id})['mediaItem'] + + formats = list(self._extract_formats( + try_get(media, lambda x: x['playback']['mediaSources']), video_id)) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': media['displayInfo']['title'], + 'formats': formats, + 'age_limit': int_or_none(media['displayInfo']['ageGroup']) + } + + def _call_api(self, endpoint, media_id, method, params): + rand_uuid = str(uuid4()) + res = self._download_json( + f'https://b2c-mobile.redefine.pl/rpc/{endpoint}/', media_id, + note=f'Downloading {method} JSON metadata', + data=json.dumps({ + 'method': method, + 'id': '2137', + 'jsonrpc': '2.0', + 'params': { + **params, + 'userAgentData': { + 'deviceType': 'mobile', + 'application': 'native', + 'os': 'android', + 'build': 10003, + 'widevine': False, + 'portal': 'pg', + 'player': 'cpplayer', + }, + 'deviceId': { + 'type': 'other', + 'value': rand_uuid, + }, + 'clientId': rand_uuid, + 'cpid': 1, + }, + }).encode('utf-8'), + headers={'Content-type': 'application/json'}) + if not res.get('result'): + if res['error']['code'] == 13404: + raise ExtractorError('This video is either unavailable in your region or is DRM protected', expected=True) + raise ExtractorError(f'Solorz said: {res["error"]["message"]} - {res["error"]["data"]["userMessage"]}') + return res['result'] From 3f771f75d7277e54411a6e2ae36e74d7ddb993dd Mon Sep 17 00:00:00 2001 From: Lauren Liberda Date: Sun, 31 Oct 2021 10:58:57 +0530 Subject: [PATCH 401/641] [radiokapital] Add extractors (#1401) Authored by: selfisekai --- yt_dlp/extractor/extractors.py | 4 ++ yt_dlp/extractor/radiokapital.py | 99 ++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+) create mode 100644 yt_dlp/extractor/radiokapital.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index bd0da2c387..4a06ec5787 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1159,6 +1159,10 @@ from .radiode import RadioDeIE from .radiojavan import RadioJavanIE from .radiobremen import RadioBremenIE from .radiofrance import RadioFranceIE +from .radiokapital import ( + RadioKapitalIE, + RadioKapitalShowIE, +) from .radlive import ( RadLiveIE, RadLiveChannelIE, diff --git a/yt_dlp/extractor/radiokapital.py b/yt_dlp/extractor/radiokapital.py new file mode 100644 index 0000000000..2e93e034f7 --- /dev/null +++ b/yt_dlp/extractor/radiokapital.py @@ -0,0 +1,99 @@ +# coding: utf-8 + +from .common import InfoExtractor +from ..utils import ( + clean_html, + traverse_obj, + unescapeHTML, +) + +import itertools +from urllib.parse import urlencode + + +class RadioKapitalBaseIE(InfoExtractor): + def _call_api(self, resource, video_id, note='Downloading JSON metadata', qs={}): + return self._download_json( + f'https://www.radiokapital.pl/wp-json/kapital/v1/{resource}?{urlencode(qs)}', + video_id, note=note) + + def _parse_episode(self, data): + release = '%s%s%s' % (data['published'][6:11], data['published'][3:6], data['published'][:3]) + return { + '_type': 'url_transparent', + 'url': data['mixcloud_url'], + 'ie_key': 'Mixcloud', + 'title': unescapeHTML(data['title']), + 'description': clean_html(data.get('content')), + 'tags': traverse_obj(data, ('tags', ..., 'name')), + 'release_date': release, + 'series': traverse_obj(data, ('show', 'title')), + } + + +class RadioKapitalIE(RadioKapitalBaseIE): + IE_NAME = 'radiokapital' + _VALID_URL = r'https?://(?:www\.)?radiokapital\.pl/shows/[a-z\d-]+/(?P[a-z\d-]+)' + + _TESTS = [{ + 'url': 'https://radiokapital.pl/shows/tutaj-sa-smoki/5-its-okay-to-be-immaterial', + 'info_dict': { + 'id': 'radiokapital_radio-kapitał-tutaj-są-smoki-5-its-okay-to-be-immaterial-2021-05-20', + 'ext': 'm4a', + 'title': '#5: It’s okay to\xa0be\xa0immaterial', + 'description': 'md5:2499da5fbfb0e88333b7d37ec8e9e4c4', + 'uploader': 'Radio Kapitał', + 'uploader_id': 'radiokapital', + 'timestamp': 1621640164, + 'upload_date': '20210521', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + episode = self._call_api('episodes/%s' % video_id, video_id) + return self._parse_episode(episode) + + +class RadioKapitalShowIE(RadioKapitalBaseIE): + IE_NAME = 'radiokapital:show' + _VALID_URL = r'https?://(?:www\.)?radiokapital\.pl/shows/(?P[a-z\d-]+)/?(?:$|[?#])' + + _TESTS = [{ + 'url': 'https://radiokapital.pl/shows/wesz', + 'info_dict': { + 'id': '100', + 'title': 'WĘSZ', + 'description': 'md5:3a557a1e0f31af612b0dcc85b1e0ca5c', + }, + 'playlist_mincount': 17, + }] + + def _get_episode_list(self, series_id, page_no): + return self._call_api( + 'episodes', series_id, + f'Downloading episode list page #{page_no}', qs={ + 'show': series_id, + 'page': page_no, + }) + + def _entries(self, series_id): + for page_no in itertools.count(1): + episode_list = self._get_episode_list(series_id, page_no) + yield from (self._parse_episode(ep) for ep in episode_list['items']) + if episode_list['next'] is None: + break + + def _real_extract(self, url): + series_id = self._match_id(url) + + show = self._call_api(f'shows/{series_id}', series_id, 'Downloading show metadata') + entries = self._entries(series_id) + return { + '_type': 'playlist', + 'entries': entries, + 'id': str(show['id']), + 'title': show.get('title'), + 'description': clean_html(show.get('content')), + } From c0599d4fe493730236c7e62ed63575ea0d3f3fa2 Mon Sep 17 00:00:00 2001 From: Lauren Liberda Date: Sun, 31 Oct 2021 10:59:17 +0530 Subject: [PATCH 402/641] [wppilot] Add extractors (#1401) Authored by: selfisekai --- yt_dlp/extractor/extractors.py | 4 + yt_dlp/extractor/wppilot.py | 177 +++++++++++++++++++++++++++++++++ 2 files changed, 181 insertions(+) create mode 100644 yt_dlp/extractor/wppilot.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 4a06ec5787..d47c066476 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1788,6 +1788,10 @@ from .wistia import ( WistiaPlaylistIE, ) from .worldstarhiphop import WorldStarHipHopIE +from .wppilot import ( + WPPilotIE, + WPPilotChannelsIE, +) from .wsj import ( WSJIE, WSJArticleIE, diff --git a/yt_dlp/extractor/wppilot.py b/yt_dlp/extractor/wppilot.py new file mode 100644 index 0000000000..3003a0f108 --- /dev/null +++ b/yt_dlp/extractor/wppilot.py @@ -0,0 +1,177 @@ +# coding: utf-8 + +from .common import InfoExtractor +from ..utils import ( + try_get, + ExtractorError, +) + +import json +import random +import re + + +class WPPilotBaseIE(InfoExtractor): + _VIDEO_URL = 'https://pilot.wp.pl/api/v1/channel/%s' + _VIDEO_GUEST_URL = 'https://pilot.wp.pl/api/v1/guest/channel/%s' + + _HEADERS_WEB = { + 'Content-Type': 'application/json; charset=UTF-8', + 'Referer': 'https://pilot.wp.pl/tv/', + } + + def _get_channel_list(self, cache=True): + if cache is True: + cache_res = self._downloader.cache.load('wppilot', 'channel-list') + if cache_res: + return cache_res, True + webpage = self._download_webpage('https://pilot.wp.pl/tv/', None, 'Downloading webpage') + page_data_base_url = self._search_regex( + r'', + ], webpage, 'video id', default=page_id) return { '_type': 'url_transparent', 'url': 'tvp:' + video_id, 'description': self._og_search_description( - webpage, default=None) or self._html_search_meta( - 'description', webpage, default=None), + webpage, default=None) or (self._html_search_meta( + 'description', webpage, default=None) + if '//s.tvp.pl/files/portal/v' in webpage else None), 'thumbnail': self._og_search_thumbnail(webpage, default=None), 'ie_key': 'TVPEmbed', } @@ -252,18 +417,20 @@ class TVPWebsiteIE(InfoExtractor): _TESTS = [{ # series - 'url': 'https://vod.tvp.pl/website/lzy-cennet,38678312/video', + 'url': 'https://vod.tvp.pl/website/wspaniale-stulecie,17069012/video', 'info_dict': { - 'id': '38678312', + 'id': '17069012', }, - 'playlist_count': 115, + 'playlist_count': 312, }, { # film - 'url': 'https://vod.tvp.pl/website/gloria,35139666', + 'url': 'https://vod.tvp.pl/website/krzysztof-krawczyk-cale-moje-zycie,51374466', 'info_dict': { - 'id': '36637049', + 'id': '51374509', 'ext': 'mp4', - 'title': 'Gloria, Gloria', + 'title': 'Krzysztof Krawczyk – całe moje życie, Krzysztof Krawczyk – całe moje życie', + 'description': 'md5:2e80823f00f5fc263555482f76f8fa42', + 'age_limit': 12, }, 'params': { 'skip_download': True, From ebfab36fca0901f99076158f9eb4f7fc9d87589b Mon Sep 17 00:00:00 2001 From: Lauren Liberda Date: Sun, 31 Oct 2021 11:03:04 +0530 Subject: [PATCH 405/641] [tvp] Add TVPStreamIE (#1401) Authored by: selfisekai --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/tvp.py | 46 ++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index d47c066476..4f9de71e27 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1571,6 +1571,7 @@ from .tvnow import ( from .tvp import ( TVPEmbedIE, TVPIE, + TVPStreamIE, TVPWebsiteIE, ) from .tvplay import ( diff --git a/yt_dlp/extractor/tvp.py b/yt_dlp/extractor/tvp.py index 22cfbd25e0..48e2c6e764 100644 --- a/yt_dlp/extractor/tvp.py +++ b/yt_dlp/extractor/tvp.py @@ -251,6 +251,52 @@ class TVPIE(InfoExtractor): } +class TVPStreamIE(InfoExtractor): + IE_NAME = 'tvp:stream' + _VALID_URL = r'(?:tvpstream:|https?://tvpstream\.vod\.tvp\.pl/(?:\?(?:[^&]+[&;])*channel_id=)?)(?P\d*)' + _TESTS = [{ + # untestable as "video" id changes many times across a day + 'url': 'https://tvpstream.vod.tvp.pl/?channel_id=1455', + 'only_matching': True, + }, { + 'url': 'tvpstream:39821455', + 'only_matching': True, + }, { + # the default stream when you provide no channel_id, most probably TVP Info + 'url': 'tvpstream:', + 'only_matching': True, + }, { + 'url': 'https://tvpstream.vod.tvp.pl/', + 'only_matching': True, + }] + + _PLAYER_BOX_RE = r']*id\s*=\s*["\']?tvp_player_box["\']?[^>]+data-%s-id\s*=\s*["\']?(\d+)' + _BUTTON_RE = r']*data-channel-id=["\']?%s["\']?[^>]*\sdata-title=(?:"([^"]*)"|\'([^\']*)\')[^>]*\sdata-stationname=(?:"([^"]*)"|\'([^\']*)\')' + + def _real_extract(self, url): + channel_id = self._match_id(url) + channel_url = self._proto_relative_url('//tvpstream.vod.tvp.pl/?channel_id=%s' % channel_id or 'default') + webpage = self._download_webpage(channel_url, channel_id, 'Downloading channel webpage') + if not channel_id: + channel_id = self._search_regex(self._PLAYER_BOX_RE % 'channel', + webpage, 'default channel id') + video_id = self._search_regex(self._PLAYER_BOX_RE % 'video', + webpage, 'video id') + audition_title, station_name = self._search_regex( + self._BUTTON_RE % (re.escape(channel_id)), webpage, + 'audition title and station name', + group=(1, 2)) + return { + '_type': 'url_transparent', + 'id': channel_id, + 'url': 'tvp:%s' % video_id, + 'title': audition_title, + 'alt_title': station_name, + 'is_live': True, + 'ie_key': 'TVPEmbed', + } + + class TVPEmbedIE(InfoExtractor): IE_NAME = 'tvp:embed' IE_DESC = 'Telewizja Polska' From 86c1a8aae4db4a5b720cbd7c9465de350d64edef Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 6 Nov 2021 09:30:38 +0530 Subject: [PATCH 406/641] Release 2021.11.10 --- .github/ISSUE_TEMPLATE/1_broken_site.yml | 2 +- .../ISSUE_TEMPLATE/2_site_support_request.yml | 2 +- .github/ISSUE_TEMPLATE/4_bug_report.yml | 4 +- CONTRIBUTORS | 10 +++ Changelog.md | 85 +++++++++++++++++++ README.md | 18 ++-- supportedsites.md | 24 +++++- 7 files changed, 133 insertions(+), 12 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.yml b/.github/ISSUE_TEMPLATE/1_broken_site.yml index 862e7235fd..67145d8b21 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.yml +++ b/.github/ISSUE_TEMPLATE/1_broken_site.yml @@ -43,7 +43,7 @@ body: attributes: label: Verbose log description: | - Provide the complete verbose output of yt-dlp that clearly demonstrates the problem. + Provide the complete verbose output of yt-dlp **that clearly demonstrates the problem**. Add the `-Uv` flag to your command line you run yt-dlp with (`yt-dlp -Uv `), copy the WHOLE output and insert it below. It should look similar to this: placeholder: | diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.yml b/.github/ISSUE_TEMPLATE/2_site_support_request.yml index aa00b8ad7b..30cebec910 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.yml +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.yml @@ -54,7 +54,7 @@ body: attributes: label: Verbose log description: | - Provide the complete verbose output using one of the example URLs provided above. + Provide the complete verbose output **using one of the example URLs provided above**. Add the `-Uv` flag to your command line you run yt-dlp with (`yt-dlp -Uv `), copy the WHOLE output and insert it below. It should look similar to this: placeholder: | diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.yml b/.github/ISSUE_TEMPLATE/4_bug_report.yml index 9003bb19ae..445945df4f 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/4_bug_report.yml @@ -37,8 +37,8 @@ body: attributes: label: Verbose log description: | - Provide the complete verbose output of yt-dlp that clearly demonstrates the problem. - Add the `-Uv` flag to your command line you run yt-dlp with (`yt-dlp -Uv `), copy the WHOLE output and insert it below. + Provide the complete verbose output of yt-dlp **that clearly demonstrates the problem**. + Add the `-Uv` flag to **your** command line you run yt-dlp with (`yt-dlp -Uv `), copy the WHOLE output and insert it below. It should look similar to this: placeholder: | [debug] Command-line config: ['-Uv', 'http://www.youtube.com/watch?v=BaW_jenozKc'] diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 2bf96affe4..f035ce10d8 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -129,3 +129,13 @@ Bojidarist nixklai smplayer-dev Zirro +CrypticSignal +flashdagger +fractalf +frafra +kaz-us +ozburo +rhendric +sdomi +selfisekai +stanoarn diff --git a/Changelog.md b/Changelog.md index d74237dd42..6124d6bd0f 100644 --- a/Changelog.md +++ b/Changelog.md @@ -14,6 +14,91 @@ --> +### 2021.11.10 + +* [youtube] **Fix throttling by decrypting n-sig** +* Merging extractors from [haruhi-dl](https://git.sakamoto.pl/laudom/haruhi-dl) by [selfisekai](https://github.com/selfisekai) + * [extractor] Add `_search_nextjs_data` + * [tvp] Fix extractors + * [tvp] Add TVPStreamIE + * [wppilot] Add extractors + * [polskieradio] Add extractors + * [radiokapital] Add extractors + * [polsatgo] Add extractor by [selfisekai](https://github.com/selfisekai), [sdomi](https://github.com/sdomi) +* Separate `--check-all-formats` from `--check-formats` +* Approximate filesize from bitrate +* Don't create console in `windows_enable_vt_mode` +* Fix bug in `--load-infojson` of playlists +* [minicurses] Add colors to `-F` and standardize color-printing code +* [outtmpl] Add type `link` for internet shortcut files +* [outtmpl] Add alternate forms for `q` and `j` +* [outtmpl] Do not traverse `None` +* [fragment] Fix progress display in fragmented downloads +* [downloader/ffmpeg] Fix vtt download with ffmpeg +* [ffmpeg] Detect presence of setts and libavformat version +* [ExtractAudio] Rescale --audio-quality correctly by [CrypticSignal](https://github.com/CrypticSignal), [pukkandan](https://github.com/pukkandan) +* [ExtractAudio] Use `libfdk_aac` if available by [CrypticSignal](https://github.com/CrypticSignal) +* [FormatSort] `eac3` is better than `ac3` +* [FormatSort] Fix some fields' defaults +* [generic] Detect more json_ld +* [generic] parse jwplayer with only the json URL +* [extractor] Add keyword automatically to SearchIE descriptions +* [extractor] Fix some errors being converted to `ExtractorError` +* [utils] Add `join_nonempty` +* [utils] Add `jwt_decode_hs256` by [Ashish0804](https://github.com/Ashish0804) +* [utils] Create `DownloadCancelled` exception +* [utils] Parse `vp09` as vp9 +* [utils] Sanitize URL when determining protocol +* [test/download] Fallback test to `bv` +* [docs] Minor documentation improvements +* [cleanup] Improvements to error and debug messages +* [cleanup] Minor fixes and cleanup +* [3speak] Add extractors by [Ashish0804](https://github.com/Ashish0804) +* [AmazonStore] Add extractor by [Ashish0804](https://github.com/Ashish0804) +* [Gab] Add extractor by [u-spec-png](https://github.com/u-spec-png) +* [mediaset] Add playlist support by [nixxo](https://github.com/nixxo) +* [MLSScoccer] Add extractor by [Ashish0804](https://github.com/Ashish0804) +* [N1] Add support for nova.rs by [u-spec-png](https://github.com/u-spec-png) +* [PlanetMarathi] Add extractor by [Ashish0804](https://github.com/Ashish0804) +* [RaiplayRadio] Add extractors by [frafra](https://github.com/frafra) +* [roosterteeth] Add series extractor +* [sky] Add `SkyNewsStoryIE` by [ajj8](https://github.com/ajj8) +* [youtube] Fix sorting for some videos +* [youtube] Populate `thumbnail` with the best "known" thumbnail +* [youtube] Refactor itag processing +* [youtube] Remove unnecessary no-playlist warning +* [youtube:tab] Add Invidious list for playlists/channels by [rhendric](https://github.com/rhendric) +* [Bilibili:comments] Fix infinite loop by [u-spec-png](https://github.com/u-spec-png) +* [ceskatelevize] Fix extractor by [flashdagger](https://github.com/flashdagger) +* [Coub] Fix media format identification by [wlritchi](https://github.com/wlritchi) +* [crunchyroll] Add extractor-args `language` and `hardsub` +* [DiscoveryPlus] Allow language codes in URL +* [imdb] Fix thumbnail by [ozburo](https://github.com/ozburo) +* [instagram] Add IOS URL support by [u-spec-png](https://github.com/u-spec-png) +* [instagram] Improve login code by [u-spec-png](https://github.com/u-spec-png) +* [Instagram] Improve metadata extraction by [u-spec-png](https://github.com/u-spec-png) +* [iPrima] Fix extractor by [stanoarn](https://github.com/stanoarn) +* [itv] Add support for ITV News by [ajj8](https://github.com/ajj8) +* [la7] Fix extractor by [nixxo](https://github.com/nixxo) +* [linkedin] Don't login multiple times +* [mtv] Fix some videos by [Sipherdrakon](https://github.com/Sipherdrakon) +* [Newgrounds] Fix description by [u-spec-png](https://github.com/u-spec-png) +* [Nrk] Minor fixes by [fractalf](https://github.com/fractalf) +* [Olympics] Fix extractor by [u-spec-png](https://github.com/u-spec-png) +* [piksel] Fix sorting +* [twitter] Do not sort by codec +* [viewlift] Add cookie-based login and series support by [Ashish0804](https://github.com/Ashish0804), [pukkandan](https://github.com/pukkandan) +* [vimeo] Detect source extension and misc cleanup by [flashdagger](https://github.com/flashdagger) +* [vimeo] Fix ondemand videos and direct URLs with hash +* [vk] Fix login and add subtitles by [kaz-us](https://github.com/kaz-us) +* [VLive] Add upload_date and thumbnail by [Ashish0804](https://github.com/Ashish0804) +* [VRT] Fix login by [pgaig](https://github.com/pgaig) +* [Vupload] Fix extractor by [u-spec-png](https://github.com/u-spec-png) +* [wakanim] Add support for MPD manifests by [nyuszika7h](https://github.com/nyuszika7h) +* [wakanim] Detect geo-restriction by [nyuszika7h](https://github.com/nyuszika7h) +* [ZenYandex] Fix extractor by [u-spec-png](https://github.com/u-spec-png) + + ### 2021.10.22 * [build] Improvements diff --git a/README.md b/README.md index 713c2c4a01..24975ad6ff 100644 --- a/README.md +++ b/README.md @@ -78,7 +78,7 @@ The major new features from the latest release of [blackjack4494/yt-dlc](https:/ * All Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`) and private playlists supports downloading multiple pages of content * Search (`ytsearch:`, `ytsearchdate:`), search URLs and in-channel search works * Mixes supports downloading multiple pages of content - * Most (but not all) age-gated content can be downloaded without cookies + * Some (but not all) age-gated content can be downloaded without cookies * Fix for [n-sig based throttling](https://github.com/ytdl-org/youtube-dl/issues/29326) * Redirect channel's home URL automatically to `/video` to preserve the old behaviour * `255kbps` audio is extracted (if available) from youtube music when premium cookies are given @@ -92,9 +92,13 @@ The major new features from the latest release of [blackjack4494/yt-dlc](https:/ * **Aria2c with HLS/DASH**: You can use `aria2c` as the external downloader for DASH(mpd) and HLS(m3u8) formats -* **New extractors**: AnimeLab, Philo MSO, Spectrum MSO, SlingTV MSO, Cablevision MSO, RCN MSO, Rcs, Gedi, bitwave.tv, mildom, audius, zee5, mtv.it, wimtv, pluto.tv, niconico users, discoveryplus.in, mediathek, NFHSNetwork, nebula, ukcolumn, whowatch, MxplayerShow, parlview (au), YoutubeWebArchive, fancode, Saitosan, ShemarooMe, telemundo, VootSeries, SonyLIVSeries, HotstarSeries, VidioPremier, VidioLive, RCTIPlus, TBS Live, douyin, pornflip, ParamountPlusSeries, ScienceChannel, Utreon, OpenRec, BandcampMusic, blackboardcollaborate, eroprofile albums, mirrativ, BannedVideo, bilibili categories, Epicon, filmmodu, GabTV, HungamaAlbum, ManotoTV, Niconico search, Patreon User, peloton, ProjectVeritas, radiko, StarTV, tiktok user, Tokentube, voicy, TV2HuSeries, biliintl, 17live, NewgroundsUser, peertube channel/playlist, ZenYandex, CAM4, CGTN, damtomo, gotostage, Koo, Mediaite, Mediaklikk, MuseScore, nzherald, Olympics replay, radlive, SovietsCloset, Streamanity, Theta, Chingari, ciscowebex, Gettr, GoPro, N1, Theta, Veo, Vupload, NovaPlay, SkyNewsAU, EUScreen, Gronkh, microsoftstream, on24, trovo channels +* **New extractors**: 17live, 3speak, amazonstore, animelab, audius, bandcampmusic, bannedvideo, biliintl, bitwave.tv, blackboardcollaborate, cam4, cgtn, chingari, ciscowebex, damtomo, discoveryplus.in, douyin, epicon, euscreen, fancode, filmmodu, gab, gedi, gettr, gopro, gotostage, gronkh, koo, manototv, mediaite, mediaklikk, mediasetshow, mediathek, microsoftstream, mildom, mirrativ, mlsscoccer, mtv.it, musescore, mxplayershow, n1, nebula, nfhsnetwork, novaplay, nzherald, olympics replay, on24, openrec, parlview-AU, peloton, planetmarathi, pluto.tv, polsatgo, polskieradio, pornflip, projectveritas, radiko, radiokapital, radlive, raiplayradio, rcs, rctiplus, saitosan, sciencechannel, shemaroome, skynews-AU, skynews-story, sovietscloset, startv, streamanity, telemundo, theta, theta, tokentube, tv2huseries, ukcolumn, utreon, veo, vidiolive, vidiopremier, voicy, vupload, whowatch, wim.tv, wppilot, youtube webarchive, zee5, zen.yandex -* **Fixed/improved extractors**: archive.org, roosterteeth.com, skyit, instagram, itv, SouthparkDe, spreaker, Vlive, akamai, ina, rumble, tennistv, amcnetworks, la7 podcasts, linuxacadamy, nitter, twitcasting, viu, crackle, curiositystream, mediasite, rmcdecouverte, sonyliv, tubi, tenplay, patreon, videa, yahoo, BravoTV, crunchyroll, RTP, viki, Hotstar, vidio, vimeo, mediaset, Mxplayer, nbcolympics, ParamountPlus, Newgrounds, SAML Verizon login, Hungama, afreecatv, aljazeera, ATV, bitchute, camtube, CDA, eroprofile, facebook, HearThisAtIE, iwara, kakao, Motherless, Nova, peertube, pornhub, reddit, tiktok, TV2, TV2Hu, tv5mondeplus, VH1, Viafree, XHamster, 9Now, AnimalPlanet, Arte, CBC, Chingari, comedycentral, DIYNetwork, niconico, dw, funimation, globo, HiDive, NDR, Nuvid, Oreilly, pbs, plutotv, reddit, redtube, soundcloud, SpankBang, VrtNU, bbc, Bilibili, LinkedInLearning, parliamentlive, PolskieRadio, Streamable, vidme, francetv, 7plus, tagesschau +* **New playlist extractors**: bilibili categories, eroprofile albums, hotstar series, hungama albums, newgrounds user, niconico search/users, paramountplus series, patreon user, peertube playlist/channels, roosterteeth series, sonyliv series, tiktok user, trovo channels, voot series + +* **Fixed/improved extractors**: 7plus, 9now, afreecatv, akamai, aljazeera, amcnetworks, animalplanet, archive.org, arte, atv, bbc, bilibili, bitchute, bravotv, camtube, cbc, cda, ceskatelevize, chingari, comedycentral, coub, crackle, crunchyroll, curiositystream, diynetwork, dw, eroprofile, facebook, francetv, funimation, globo, hearthisatie, hidive, hotstar, hungama, imdb, ina, instagram, iprima, itv, iwara, kakao, la7, linkedinlearning, linuxacadamy, mediaset, mediasite, motherless, mxplayer, nbcolympics, ndr, newgrounds, niconico, nitter, nova, nrk, nuvid, oreilly, paramountplus, parliamentlive, patreon, pbs, peertube, plutotv, polskieradio, pornhub, reddit, reddit, redtube, rmcdecouverte, roosterteeth, rtp, rumble, saml verizon login, skyit, sonyliv, soundcloud, southparkde, spankbang, spreaker, streamable, tagesschau, tbs, tennistv, tenplay, tiktok, tubi, tv2, tv2hu, tv5mondeplus, tvp, twitcasting, vh1, viafree, videa, vidio, vidme, viewlift, viki, vimeo, viu, vk, vlive, vrt, wakanim, xhamster, yahoo + +* **New MSOs**: Philo, Spectrum, SlingTV, Cablevision, RCN * **Subtitle extraction from manifests**: Subtitles can be extracted from streaming media manifests. See [commit/be6202f](https://github.com/yt-dlp/yt-dlp/commit/be6202f12b97858b9d716e608394b51065d0419f) for details @@ -108,7 +112,7 @@ The major new features from the latest release of [blackjack4494/yt-dlc](https:/ * **Improvements**: Regex and other operators in `--match-filter`, multiple `--postprocessor-args` and `--downloader-args`, faster archive checking, more [format selection options](#format-selection) etc -* **Plugin extractors**: Extractors can be loaded from an external file. See [plugins](#plugins) for details +* **Plugins**: Extractors and PostProcessors can be loaded from an external file. See [plugins](#plugins) for details * **Self-updater**: The releases can be updated using `yt-dlp -U` @@ -184,12 +188,12 @@ You can install the [PyPI package](https://pypi.org/project/yt-dlp) with: python3 -m pip install -U yt-dlp ``` -You can also install without any dependencies using: +You can install without any of the optional dependencies using: ``` python3 -m pip install --no-deps -U yt-dlp ``` -You can also install the master branch with: +If you want to be on the cutting edge, you can also install the master branch with: ``` python3 -m pip3 install --force-reinstall https://github.com/yt-dlp/yt-dlp/archive/master.zip ``` @@ -790,7 +794,7 @@ You can also fork the project on github and push it to a release branch in your formats are: best (default) or one of best|aac|flac|mp3|m4a|opus|vorbis|wav --audio-quality QUALITY Specify ffmpeg audio quality, insert a - value between 0 (better) and 9 (worse) for + value between 0 (best) and 10 (worst) for VBR or a specific bitrate like 128K (default 5) --remux-video FORMAT Remux the video into another container if diff --git a/supportedsites.md b/supportedsites.md index 01c3f43a97..50fa7f9f13 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -48,6 +48,7 @@ - **Alura** - **AluraCourse** - **Amara** + - **AmazonStore** - **AMCNetworks** - **AmericasTestKitchen** - **AmericasTestKitchenSeason** @@ -184,7 +185,6 @@ - **CCTV**: 央视网 - **CDA** - **CeskaTelevize** - - **CeskaTelevizePorady** - **CGTN** - **channel9**: Channel 9 - **CharlieRose** @@ -366,6 +366,7 @@ - **Funk** - **Fusion** - **Fux** + - **Gab** - **GabTV** - **Gaia** - **GameInformer** @@ -449,9 +450,11 @@ - **Instagram** - **instagram:tag**: Instagram hashtag search - **instagram:user**: Instagram user profile + - **InstagramIOS**: IOS instagram:// URL - **Internazionale** - **InternetVideoArchive** - **IPrima** + - **IPrimaCNN** - **iqiyi**: 爱奇艺 - **Ir90Tv** - **ITTF** @@ -560,6 +563,7 @@ - **MediaKlikk** - **Medialaan** - **Mediaset** + - **MediasetShow** - **Mediasite** - **MediasiteCatalog** - **MediasiteNamedCatalog** @@ -592,6 +596,7 @@ - **mixcloud:user** - **MLB** - **MLBVideo** + - **MLSSoccer** - **Mnet** - **MNetTV** - **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net @@ -801,6 +806,7 @@ - **Pinterest** - **PinterestCollection** - **Pladform** + - **PlanetMarathi** - **Platzi** - **PlatziCourse** - **play.fm** @@ -817,7 +823,12 @@ - **podomatic** - **Pokemon** - **PokemonWatch** + - **PolsatGo** - **PolskieRadio** + - **polskieradio:kierowcow** + - **polskieradio:player** + - **polskieradio:podcast** + - **polskieradio:podcast:list** - **PolskieRadioCategory** - **Popcorntimes** - **PopcornTV** @@ -860,6 +871,8 @@ - **radiocanada:audiovideo** - **radiofrance** - **RadioJavan** + - **radiokapital** + - **radiokapital:show** - **radlive** - **radlive:channel** - **radlive:season** @@ -867,6 +880,8 @@ - **RaiPlay** - **RaiPlayLive** - **RaiPlayPlaylist** + - **RaiPlayRadio** + - **RaiPlayRadioPlaylist** - **RayWenderlich** - **RayWenderlichCourse** - **RBMARadio** @@ -894,6 +909,7 @@ - **RMCDecouverte** - **RockstarGames** - **RoosterTeeth** + - **RoosterTeethSeries** - **RottenTomatoes** - **Roxwel** - **Rozhlas** @@ -961,6 +977,7 @@ - **Sina** - **sky.it** - **sky:news** + - **sky:news:story** - **sky:sports** - **sky:sports:news** - **skyacademy.it** @@ -1079,6 +1096,8 @@ - **ThisAmericanLife** - **ThisAV** - **ThisOldHouse** + - **ThreeSpeak** + - **ThreeSpeakUser** - **TikTok** - **tiktok:user** - **tinypic**: tinypic.com videos @@ -1142,6 +1161,7 @@ - **tvp**: Telewizja Polska - **tvp:embed**: Telewizja Polska - **tvp:series** + - **tvp:stream** - **TVPlayer** - **TVPlayHome** - **Tweakers** @@ -1296,6 +1316,8 @@ - **WistiaPlaylist** - **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **WorldStarHipHop** + - **wppilot** + - **wppilot:channels** - **WSJ**: Wall Street Journal - **WSJArticle** - **WWE** From 2e9a445bc34e79182f900909d727ba87f8487522 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 10 Nov 2021 01:14:33 +0000 Subject: [PATCH 407/641] [version] update :ci skip all --- .github/ISSUE_TEMPLATE/1_broken_site.yml | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.yml | 6 +++--- .github/ISSUE_TEMPLATE/3_site_feature_request.yml | 2 +- .github/ISSUE_TEMPLATE/4_bug_report.yml | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.yml | 2 +- yt_dlp/version.py | 2 +- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.yml b/.github/ISSUE_TEMPLATE/1_broken_site.yml index 67145d8b21..8200bdeb43 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.yml +++ b/.github/ISSUE_TEMPLATE/1_broken_site.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a broken site required: true - - label: I've verified that I'm running yt-dlp version **2021.10.22**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.11.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -51,12 +51,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2021.10.22 (exe) + [debug] yt-dlp version 2021.11.10 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2021.10.22) + yt-dlp is up to date (2021.11.10) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.yml b/.github/ISSUE_TEMPLATE/2_site_support_request.yml index 30cebec910..8736184a3f 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.yml +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a new site support request required: true - - label: I've verified that I'm running yt-dlp version **2021.10.22**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.11.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -62,12 +62,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2021.10.22 (exe) + [debug] yt-dlp version 2021.11.10 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2021.10.22) + yt-dlp is up to date (2021.11.10) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml index 59578b7122..a8576e21cb 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a site feature request required: true - - label: I've verified that I'm running yt-dlp version **2021.10.22**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.11.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.yml b/.github/ISSUE_TEMPLATE/4_bug_report.yml index 445945df4f..56b233ce73 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/4_bug_report.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a bug unrelated to a specific site required: true - - label: I've verified that I'm running yt-dlp version **2021.10.22**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.11.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -45,12 +45,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2021.10.22 (exe) + [debug] yt-dlp version 2021.11.10 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2021.10.22) + yt-dlp is up to date (2021.11.10) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.yml b/.github/ISSUE_TEMPLATE/5_feature_request.yml index 134416f4e1..0937f09ce3 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/5_feature_request.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a feature request required: true - - label: I've verified that I'm running yt-dlp version **2021.10.22**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.11.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates required: true diff --git a/yt_dlp/version.py b/yt_dlp/version.py index e7203be6b6..197e7389cf 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2021.10.22' +__version__ = '2021.11.10' From 7144b697fc20d6615690e5ec63e6c134ddb7aa5e Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 10 Nov 2021 06:58:42 +0530 Subject: [PATCH 408/641] Release 2021.11.10.1 :ci skip all --- .github/workflows/build.yml | 11 ++++++----- Changelog.md | 4 ++++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 0fff6cae36..f75b11700c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -146,6 +146,7 @@ jobs: build_macos: runs-on: macos-11 needs: build_unix + if: False outputs: sha256_macos: ${{ steps.sha256_macos.outputs.sha256_macos }} sha512_macos: ${{ steps.sha512_macos.outputs.sha512_macos }} @@ -344,7 +345,7 @@ jobs: finish: runs-on: ubuntu-latest - needs: [build_unix, build_windows, build_windows32, build_macos] + needs: [build_unix, build_windows, build_windows32] steps: - name: Make SHA2-256SUMS file @@ -364,8 +365,8 @@ jobs: echo "${{ env.SHA256_PY2EXE }} yt-dlp_min.exe" >> SHA2-256SUMS echo "${{ env.SHA256_WIN32 }} yt-dlp_x86.exe" >> SHA2-256SUMS echo "${{ env.SHA256_WIN_ZIP }} yt-dlp_win.zip" >> SHA2-256SUMS - echo "${{ env.SHA256_MACOS }} yt-dlp_macos" >> SHA2-256SUMS - echo "${{ env.SHA256_MACOS_ZIP }} yt-dlp_macos.zip" >> SHA2-256SUMS + # echo "${{ env.SHA256_MACOS }} yt-dlp_macos" >> SHA2-256SUMS + # echo "${{ env.SHA256_MACOS_ZIP }} yt-dlp_macos.zip" >> SHA2-256SUMS - name: Upload 256SUMS file id: upload-sums uses: actions/upload-release-asset@v1 @@ -393,8 +394,8 @@ jobs: echo "${{ env.SHA512_WIN_ZIP }} yt-dlp_win.zip" >> SHA2-512SUMS echo "${{ env.SHA512_PY2EXE }} yt-dlp_min.exe" >> SHA2-512SUMS echo "${{ env.SHA512_WIN32 }} yt-dlp_x86.exe" >> SHA2-512SUMS - echo "${{ env.SHA512_MACOS }} yt-dlp_macos" >> SHA2-512SUMS - echo "${{ env.SHA512_MACOS_ZIP }} yt-dlp_macos.zip" >> SHA2-512SUMS + # echo "${{ env.SHA512_MACOS }} yt-dlp_macos" >> SHA2-512SUMS + # echo "${{ env.SHA512_MACOS_ZIP }} yt-dlp_macos.zip" >> SHA2-512SUMS - name: Upload 512SUMS file id: upload-512sums uses: actions/upload-release-asset@v1 diff --git a/Changelog.md b/Changelog.md index 6124d6bd0f..5ac2aa6157 100644 --- a/Changelog.md +++ b/Changelog.md @@ -14,6 +14,10 @@ --> +### 2021.11.10.1 + +* Temporarily disable MacOS Build + ### 2021.11.10 * [youtube] **Fix throttling by decrypting n-sig** From 9ebf3c6ab97c29b2d5872122e532bc98b93ad8b3 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 10 Nov 2021 01:47:10 +0000 Subject: [PATCH 409/641] [version] update :ci skip all --- .github/ISSUE_TEMPLATE/1_broken_site.yml | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.yml | 6 +++--- .github/ISSUE_TEMPLATE/3_site_feature_request.yml | 2 +- .github/ISSUE_TEMPLATE/4_bug_report.yml | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.yml | 2 +- yt_dlp/version.py | 2 +- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.yml b/.github/ISSUE_TEMPLATE/1_broken_site.yml index 8200bdeb43..27e07fb186 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.yml +++ b/.github/ISSUE_TEMPLATE/1_broken_site.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a broken site required: true - - label: I've verified that I'm running yt-dlp version **2021.11.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.11.10.1**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -51,12 +51,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2021.11.10 (exe) + [debug] yt-dlp version 2021.11.10.1 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2021.11.10) + yt-dlp is up to date (2021.11.10.1) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.yml b/.github/ISSUE_TEMPLATE/2_site_support_request.yml index 8736184a3f..b274185440 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.yml +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a new site support request required: true - - label: I've verified that I'm running yt-dlp version **2021.11.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.11.10.1**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -62,12 +62,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2021.11.10 (exe) + [debug] yt-dlp version 2021.11.10.1 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2021.11.10) + yt-dlp is up to date (2021.11.10.1) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml index a8576e21cb..9df0902f48 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a site feature request required: true - - label: I've verified that I'm running yt-dlp version **2021.11.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.11.10.1**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.yml b/.github/ISSUE_TEMPLATE/4_bug_report.yml index 56b233ce73..14cc17ac91 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/4_bug_report.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a bug unrelated to a specific site required: true - - label: I've verified that I'm running yt-dlp version **2021.11.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.11.10.1**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -45,12 +45,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2021.11.10 (exe) + [debug] yt-dlp version 2021.11.10.1 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2021.11.10) + yt-dlp is up to date (2021.11.10.1) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.yml b/.github/ISSUE_TEMPLATE/5_feature_request.yml index 0937f09ce3..ae0c277b34 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/5_feature_request.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a feature request required: true - - label: I've verified that I'm running yt-dlp version **2021.11.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.11.10.1**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates required: true diff --git a/yt_dlp/version.py b/yt_dlp/version.py index 197e7389cf..5290afa2db 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2021.11.10' +__version__ = '2021.11.10.1' From b47d236d724f7a129c7ed0792fb847eb12e6f8a5 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Wed, 10 Nov 2021 15:28:38 +0000 Subject: [PATCH 410/641] [Tokentube] Fix description (#1578) Authored by: u-spec-png --- yt_dlp/extractor/tokentube.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/tokentube.py b/yt_dlp/extractor/tokentube.py index d6362117f7..579623fed4 100644 --- a/yt_dlp/extractor/tokentube.py +++ b/yt_dlp/extractor/tokentube.py @@ -6,7 +6,10 @@ import re from .common import InfoExtractor from ..utils import ( + clean_html, + get_element_by_class, parse_count, + remove_end, unified_strdate, js_to_json, OnDemandPagedList, @@ -35,7 +38,7 @@ class TokentubeIE(InfoExtractor): 'id': '3950239124', 'ext': 'mp4', 'title': 'Linux Ubuntu Studio perus käyttö', - 'description': 'md5:854ff1dc732ff708976de2880ea32050', + 'description': 'md5:46077d0daaba1974f2dc381257f9d64c', 'uploader': 'jyrilehtonen', 'upload_date': '20210825', }, @@ -45,7 +48,7 @@ class TokentubeIE(InfoExtractor): 'id': '3582463289', 'ext': 'mp4', 'title': 'Police for Freedom - toiminta aloitetaan Suomessa ❤️??', - 'description': 'md5:cd92e620d7f5fa162e8410d0fc9a08be', + 'description': 'md5:37ebf1cb44264e0bf23ed98b337ee63e', 'uploader': 'Voitontie', 'upload_date': '20210428', } @@ -90,7 +93,10 @@ class TokentubeIE(InfoExtractor): r']+>(.+?)
', webpage, 'uploader', fatal=False) - description = self._html_search_meta('description', webpage) + description = (clean_html(get_element_by_class('p-d-txt', webpage)) + or self._html_search_meta(('og:description', 'description', 'twitter:description'), webpage)) + + description = remove_end(description, 'Category') self._sort_formats(formats) From 013ae2e5038178420966fa7e029908b37ecda821 Mon Sep 17 00:00:00 2001 From: makeworld <25111343+makeworld-the-better-one@users.noreply.github.com> Date: Wed, 10 Nov 2021 14:37:05 -0500 Subject: [PATCH 411/641] [CBC Gem] Fix for shows that don't have all seasons (#1621) Closes #1594 Authored by: makeworld-the-better-one --- yt_dlp/extractor/cbc.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py index 4fcf2a9c1b..413053499b 100644 --- a/yt_dlp/extractor/cbc.py +++ b/yt_dlp/extractor/cbc.py @@ -390,7 +390,8 @@ class CBCGemPlaylistIE(InfoExtractor): show = match.group('show') show_info = self._download_json(self._API_BASE + show, season_id) season = int(match.group('season')) - season_info = try_get(show_info, lambda x: x['seasons'][season - 1]) + + season_info = next((s for s in show_info['seasons'] if s.get('season') == season), None) if season_info is None: raise ExtractorError(f'Couldn\'t find season {season} of {show}') From 44bcb8d1225c2fcfb9b1814282b74f0563ee26d1 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 10 Nov 2021 18:33:37 +0530 Subject: [PATCH 412/641] Fix bug in parsing `--add-header` Closes #1614 --- yt_dlp/options.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 89a1a8637e..89401910e0 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -151,25 +151,25 @@ def parseOpts(overrideArguments=None): def _dict_from_options_callback( option, opt_str, value, parser, - allowed_keys=r'[\w-]+', delimiter=':', default_key=None, process=None, multiple_keys=True): + allowed_keys=r'[\w-]+', delimiter=':', default_key=None, process=None, multiple_keys=True, + process_key=str.lower): out_dict = getattr(parser.values, option.dest) if multiple_keys: allowed_keys = r'(%s)(,(%s))*' % (allowed_keys, allowed_keys) mobj = re.match(r'(?i)(?P%s)%s(?P.*)$' % (allowed_keys, delimiter), value) if mobj is not None: - keys = [k.strip() for k in mobj.group('keys').lower().split(',')] - val = mobj.group('val') + keys, val = mobj.group('keys').split(','), mobj.group('val') elif default_key is not None: keys, val = [default_key], value else: raise optparse.OptionValueError( 'wrong %s formatting; it should be %s, not "%s"' % (opt_str, option.metavar, value)) try: + keys = map(process_key, keys) if process_key else keys val = process(val) if process else val except Exception as err: - raise optparse.OptionValueError( - 'wrong %s formatting; %s' % (opt_str, err)) + raise optparse.OptionValueError(f'wrong {opt_str} formatting; {err}') for key in keys: out_dict[key] = val @@ -792,7 +792,7 @@ def parseOpts(overrideArguments=None): '--add-header', metavar='FIELD:VALUE', dest='headers', default={}, type='str', action='callback', callback=_dict_from_options_callback, - callback_kwargs={'multiple_keys': False}, + callback_kwargs={'multiple_keys': False, 'process_key': None}, help='Specify a custom HTTP header and its value, separated by a colon ":". You can use this option multiple times', ) workarounds.add_option( From 093a17107ea5e375ba606ed1c31d1c259f93e0df Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 10 Nov 2021 21:41:41 +0530 Subject: [PATCH 413/641] Allow using a custom format selector through API Closes #1619, #1464 --- README.md | 51 ++++++++++++++++++++++++++++++++++++++------- yt_dlp/YoutubeDL.py | 13 +++++++++--- 2 files changed, 53 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 24975ad6ff..7a4ec55bb2 100644 --- a/README.md +++ b/README.md @@ -1600,14 +1600,14 @@ From a Python program, you can embed yt-dlp in a more powerful fashion, like thi ```python from yt_dlp import YoutubeDL -ydl_opts = {} +ydl_opts = {'format': 'bestaudio'} with YoutubeDL(ydl_opts) as ydl: ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc']) ``` Most likely, you'll want to use various options. For a list of options available, have a look at [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py#L154-L452). -Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), converts the video to an mp3 file, implements a custom postprocessor and prints the final info_dict as json: +Here's a more complete example demonstrating various functionality: ```python import json @@ -1633,23 +1633,56 @@ class MyLogger: print(msg) +# ℹ️ See the docstring of yt_dlp.postprocessor.common.PostProcessor class MyCustomPP(yt_dlp.postprocessor.PostProcessor): + # ℹ️ See docstring of yt_dlp.postprocessor.common.PostProcessor.run def run(self, info): self.to_screen('Doing stuff') return [], info +# ℹ️ See "progress_hooks" in the docstring of yt_dlp.YoutubeDL def my_hook(d): if d['status'] == 'finished': print('Done downloading, now converting ...') +def format_selector(ctx): + """ Select the best video and the best audio that won't result in an mkv. + This is just an example and does not handle all cases """ + + # formats are already sorted worst to best + formats = ctx.get('formats')[::-1] + + # acodec='none' means there is no audio + best_video = next(f for f in formats + if f['vcodec'] != 'none' and f['acodec'] == 'none') + + # find compatible audio extension + audio_ext = {'mp4': 'm4a', 'webm': 'webm'}[best_video['ext']] + # vcodec='none' means there is no video + best_audio = next(f for f in formats if ( + f['acodec'] != 'none' and f['vcodec'] == 'none' and f['ext'] == audio_ext)) + + yield { + # These are the minimum required fields for a merged format + 'format_id': f'{best_video["format_id"]}+{best_audio["format_id"]}', + 'ext': best_video['ext'], + 'requested_formats': [best_video, best_audio], + # Must be + seperated list of protocols + 'protocol': f'{best_video["protocol"]}+{best_audio["protocol"]}' + } + + +# ℹ️ See docstring of yt_dlp.YoutubeDL for a description of the options ydl_opts = { - 'format': 'bestaudio/best', + 'format': format_selector, 'postprocessors': [{ - 'key': 'FFmpegExtractAudio', - 'preferredcodec': 'mp3', - 'preferredquality': '192', + # Embed metadata in video using ffmpeg. + # ℹ️ See yt_dlp.postprocessor.FFmpegMetadataPP for the arguments it accepts + 'key': 'FFmpegMetadata', + 'add_chapters': True, + 'add_metadata': True, }], 'logger': MyLogger(), 'progress_hooks': [my_hook], @@ -1659,14 +1692,16 @@ ydl_opts = { # Add custom headers yt_dlp.utils.std_headers.update({'Referer': 'https://www.google.com'}) +# ℹ️ See the public functions in yt_dlp.YoutubeDL for for other available functions. +# Eg: "ydl.download", "ydl.download_with_info_file" with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.add_post_processor(MyCustomPP()) info = ydl.extract_info('https://www.youtube.com/watch?v=BaW_jenozKc') + + # ℹ️ ydl.sanitize_info makes the info json-serializable print(json.dumps(ydl.sanitize_info(info))) ``` -See the public functions in [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py) for other available functions. Eg: `ydl.download`, `ydl.download_with_info_file` - **Tip**: If you are porting your code from youtube-dl to yt-dlp, one important point to look out for is that we do not guarantee the return value of `YoutubeDL.extract_info` to be json serializable, or even be a dictionary. It will be dictionary-like, but if you want to ensure it is a serializable dictionary, pass it through `YoutubeDL.sanitize_info` as shown in the example above diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 2439fc82bd..5d6b1d5b2a 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -211,6 +211,9 @@ class YoutubeDL(object): simulate: Do not download the video files. If unset (or None), simulate only if listsubtitles, listformats or list_thumbnails is used format: Video format code. see "FORMAT SELECTION" for more details. + You can also pass a function. The function takes 'ctx' as + argument and returns the formats to download. + See "build_format_selector" for an implementation allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded. ignore_no_formats_error: Ignore "No video formats" error. Usefull for extracting metadata even if the video is not actually @@ -613,6 +616,7 @@ class YoutubeDL(object): # Creating format selector here allows us to catch syntax errors before the extraction self.format_selector = ( None if self.params.get('format') is None + else self.params['format'] if callable(self.params['format']) else self.build_format_selector(self.params['format'])) self._setup_opener() @@ -1927,9 +1931,9 @@ class YoutubeDL(object): 'format_id': '+'.join(filtered('format_id')), 'ext': output_ext, 'protocol': '+'.join(map(determine_protocol, formats_info)), - 'language': '+'.join(orderedSet(filtered('language'))), - 'format_note': '+'.join(orderedSet(filtered('format_note'))), - 'filesize_approx': sum(filtered('filesize', 'filesize_approx')), + 'language': '+'.join(orderedSet(filtered('language'))) or None, + 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None, + 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None, 'tbr': sum(filtered('tbr', 'vbr', 'abr')), } @@ -2357,6 +2361,9 @@ class YoutubeDL(object): info_dict, _ = self.pre_process(info_dict) + # The pre-processors may have modified the formats + formats = info_dict.get('formats', [info_dict]) + if self.params.get('list_thumbnails'): self.list_thumbnails(info_dict) if self.params.get('listformats'): From e08a85d86595705126d1304eafd3829e6f3811d0 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 11 Nov 2021 08:00:43 +0530 Subject: [PATCH 414/641] Fix writing playlist infojson with `--no-clean-infojson` --- yt_dlp/YoutubeDL.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 5d6b1d5b2a..4699e58b16 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1583,10 +1583,11 @@ class YoutubeDL(object): if entry is not None] n_entries = len(entries) - if not playlistitems and (playliststart or playlistend): + if not playlistitems and (playliststart != 1 or playlistend): playlistitems = list(range(playliststart, playliststart + n_entries)) ie_result['requested_entries'] = playlistitems + _infojson_written = False if not self.params.get('simulate') and self.params.get('allow_playlist_files', True): ie_copy = { 'playlist': playlist, @@ -1599,8 +1600,9 @@ class YoutubeDL(object): } ie_copy.update(dict(ie_result)) - if self._write_info_json('playlist', ie_result, - self.prepare_filename(ie_copy, 'pl_infojson')) is None: + _infojson_written = self._write_info_json( + 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson')) + if _infojson_written is None: return if self._write_description('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_description')) is None: @@ -1656,6 +1658,12 @@ class YoutubeDL(object): # TODO: skip failed (empty) entries? playlist_results.append(entry_result) ie_result['entries'] = playlist_results + + # Write the updated info to json + if _infojson_written and self._write_info_json( + 'updated playlist', ie_result, + self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None: + return self.to_screen('[download] Finished downloading playlist: %s' % playlist) return ie_result @@ -3472,8 +3480,10 @@ class YoutubeDL(object): encoding = preferredencoding() return encoding - def _write_info_json(self, label, ie_result, infofn): + def _write_info_json(self, label, ie_result, infofn, overwrite=None): ''' Write infojson and returns True = written, False = skip, None = error ''' + if overwrite is None: + overwrite = self.params.get('overwrites', True) if not self.params.get('writeinfojson'): return False elif not infofn: @@ -3481,7 +3491,7 @@ class YoutubeDL(object): return False elif not self._ensure_dir_exists(infofn): return None - elif not self.params.get('overwrites', True) and os.path.exists(infofn): + elif not overwrite and os.path.exists(infofn): self.to_screen(f'[info] {label.title()} metadata is already present') else: self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}') From bf5f605e7674c96d752aabb102cf627f5d7258ae Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 11 Nov 2021 08:44:54 +0530 Subject: [PATCH 415/641] bugfix for e08a85d86595705126d1304eafd3829e6f3811d0 --- yt_dlp/YoutubeDL.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 4699e58b16..1b3873254f 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1506,9 +1506,9 @@ class YoutubeDL(object): raise EntryNotInPlaylist('There are no entries') incomplete_entries = bool(ie_result.get('requested_entries')) if incomplete_entries: - def fill_missing_entries(entries, indexes): - ret = [None] * max(*indexes) - for i, entry in zip(indexes, entries): + def fill_missing_entries(entries, indices): + ret = [None] * max(indices) + for i, entry in zip(indices, entries): ret[i - 1] = entry return ret ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries']) @@ -2991,7 +2991,8 @@ class YoutubeDL(object): try: self.__download_wrapper(self.process_ie_result)(info, download=True) except (DownloadError, EntryNotInPlaylist, ThrottledDownload) as e: - self.to_stderr('\r') + if not isinstance(e, EntryNotInPlaylist): + self.to_stderr('\r') webpage_url = info.get('webpage_url') if webpage_url is not None: self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}') From c1dc0ee56e0d29cefe6948621d253385fff3e20f Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 12 Nov 2021 03:12:53 +0530 Subject: [PATCH 416/641] [NovaEmbed] Fix extractor Closes #1570 --- yt_dlp/extractor/nova.py | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/nova.py b/yt_dlp/extractor/nova.py index 3acb881217..0007b6b12a 100644 --- a/yt_dlp/extractor/nova.py +++ b/yt_dlp/extractor/nova.py @@ -10,6 +10,7 @@ from ..utils import ( int_or_none, js_to_json, qualities, + traverse_obj, unified_strdate, url_or_none, ) @@ -17,30 +18,44 @@ from ..utils import ( class NovaEmbedIE(InfoExtractor): _VALID_URL = r'https?://media\.cms\.nova\.cz/embed/(?P[^/?#&]+)' - _TEST = { + _TESTS = [{ 'url': 'https://media.cms.nova.cz/embed/8o0n0r?autoplay=1', - 'md5': 'ee009bafcc794541570edd44b71cbea3', 'info_dict': { 'id': '8o0n0r', - 'ext': 'mp4', 'title': '2180. díl', 'thumbnail': r're:^https?://.*\.jpg', 'duration': 2578, }, - } + 'params': { + 'skip_download': True, + 'ignore_no_formats_error': True, + }, + 'expected_warnings': ['DRM protected', 'Requested format is not available'], + }, { + 'url': 'https://media.cms.nova.cz/embed/KybpWYvcgOa', + 'info_dict': { + 'id': 'KybpWYvcgOa', + 'ext': 'mp4', + 'title': 'Borhyová oslavila 60? Soutěžící z pořadu odboural moderátora Ondřeje Sokola', + 'thumbnail': r're:^https?://.*\.jpg', + 'duration': 114, + }, + 'params': {'skip_download': 'm3u8'}, + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + has_drm = False duration = None formats = [] player = self._parse_json( self._search_regex( - r'Player\.init\s*\([^,]+,\s*(?:\w+\s*\?\s*{.+?}\s*:\s*)?({.+})\s*,\s*{.+?}\s*\)\s*;', - webpage, 'player', default='{}'), video_id, fatal=False) + r'Player\.init\s*\([^,]+,(?P\s*\w+\s*\?)?\s*(?P{(?(cndn).+?|.+)})\s*(?(cndn):|,\s*{.+?}\s*\)\s*;)', + webpage, 'player', default='{}', group='json'), video_id, fatal=False) if player: for format_id, format_list in player['tracks'].items(): if not isinstance(format_list, list): @@ -48,6 +63,10 @@ class NovaEmbedIE(InfoExtractor): for format_dict in format_list: if not isinstance(format_dict, dict): continue + if (not self.get_param('allow_unplayable_formats') + and traverse_obj(format_dict, ('drm', 'keySystem'))): + has_drm = True + continue format_url = url_or_none(format_dict.get('src')) format_type = format_dict.get('type') ext = determine_ext(format_url) @@ -104,6 +123,8 @@ class NovaEmbedIE(InfoExtractor): f['format_id'] = f_id formats.append(f) + if not formats and has_drm: + self.report_drm(video_id) self._sort_formats(formats) title = self._og_search_title( From 48e931066091fba7af1c447787685bbf7c889a25 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 12 Nov 2021 03:59:32 +0530 Subject: [PATCH 417/641] [nexx] Better error message for unsupported format Related: #1637 --- yt_dlp/extractor/nexx.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/yt_dlp/extractor/nexx.py b/yt_dlp/extractor/nexx.py index a30108483a..8aceebd492 100644 --- a/yt_dlp/extractor/nexx.py +++ b/yt_dlp/extractor/nexx.py @@ -385,8 +385,7 @@ class NexxIE(InfoExtractor): elif cdn == 'free': formats = self._extract_free_formats(video, video_id) else: - # TODO: reverse more cdns - assert False + self.raise_no_formats(f'{cdn} formats are currently not supported', video_id) self._sort_formats(formats) From df03de2c02192e43e5b51c8708619179a268b4cf Mon Sep 17 00:00:00 2001 From: MinePlayersPE Date: Fri, 12 Nov 2021 20:46:19 +0700 Subject: [PATCH 418/641] [RoosterTeethSeries] Fix for multiple pages (#1642) Authored by: MinePlayersPE --- yt_dlp/extractor/roosterteeth.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/roosterteeth.py b/yt_dlp/extractor/roosterteeth.py index be796804cc..18672b2e3b 100644 --- a/yt_dlp/extractor/roosterteeth.py +++ b/yt_dlp/extractor/roosterteeth.py @@ -12,6 +12,7 @@ from ..utils import ( url_or_none, urlencode_postdata, urljoin, + update_url_query, ) @@ -182,6 +183,13 @@ class RoosterTeethSeriesIE(RoosterTeethBaseIE): 'id': 'role-initiative', 'title': 'Role Initiative', } + }, { + 'url': 'https://roosterteeth.com/series/let-s-play-minecraft?season=9', + 'playlist_mincount': 50, + 'info_dict': { + 'id': 'let-s-play-minecraft-9', + 'title': 'Let\'s Play Minecraft - Season 9', + } }] def _entries(self, series_id, season_number): @@ -192,7 +200,7 @@ class RoosterTeethSeriesIE(RoosterTeethBaseIE): idx = traverse_obj(data, ('attributes', 'number')) if season_number and idx != season_number: continue - season_url = urljoin(self._API_BASE, data['links']['episodes']) + season_url = update_url_query(urljoin(self._API_BASE, data['links']['episodes']), {'per_page': 1000}) season = self._download_json(season_url, display_id, f'Downloading season {idx} JSON metadata')['data'] for episode in season: yield self.url_result( From 92775d8a40728fe045af000755f1c3eeffb2089d Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 13 Nov 2021 15:07:48 +0530 Subject: [PATCH 419/641] [CuriosityStream] Fix series Bug indroduced in ed807c18376ecb61c2219b506040bc3e9464bde9 --- yt_dlp/extractor/curiositystream.py | 56 +++++++++++++++++------------ yt_dlp/extractor/extractors.py | 3 +- 2 files changed, 35 insertions(+), 24 deletions(-) diff --git a/yt_dlp/extractor/curiositystream.py b/yt_dlp/extractor/curiositystream.py index 41c0f845a7..628c836319 100644 --- a/yt_dlp/extractor/curiositystream.py +++ b/yt_dlp/extractor/curiositystream.py @@ -44,7 +44,7 @@ class CuriosityStreamBaseIE(InfoExtractor): 'password': password, })) self._handle_errors(result) - self._auth_token = result['message']['auth_token'] + CuriosityStreamBaseIE._auth_token = result['message']['auth_token'] class CuriosityStreamIE(CuriosityStreamBaseIE): @@ -142,9 +142,26 @@ class CuriosityStreamIE(CuriosityStreamBaseIE): } -class CuriosityStreamCollectionIE(CuriosityStreamBaseIE): - IE_NAME = 'curiositystream:collection' - _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collections?|series)/(?P\d+)' +class CuriosityStreamCollectionBaseIE(CuriosityStreamBaseIE): + + def _real_extract(self, url): + collection_id = self._match_id(url) + collection = self._call_api(collection_id, collection_id) + entries = [] + for media in collection.get('media', []): + media_id = compat_str(media.get('id')) + media_type, ie = ('series', CuriosityStreamSeriesIE) if media.get('is_collection') else ('video', CuriosityStreamIE) + entries.append(self.url_result( + 'https://curiositystream.com/%s/%s' % (media_type, media_id), + ie=ie.ie_key(), video_id=media_id)) + return self.playlist_result( + entries, collection_id, + collection.get('title'), collection.get('description')) + + +class CuriosityStreamCollectionsIE(CuriosityStreamCollectionBaseIE): + IE_NAME = 'curiositystream:collections' + _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/collections/(?P\d+)' _API_BASE_URL = 'https://api.curiositystream.com/v2/collections/' _TESTS = [{ 'url': 'https://curiositystream.com/collections/86', @@ -155,7 +172,17 @@ class CuriosityStreamCollectionIE(CuriosityStreamBaseIE): }, 'playlist_mincount': 7, }, { - 'url': 'https://app.curiositystream.com/collection/2', + 'url': 'https://curiositystream.com/collections/36', + 'only_matching': True, + }] + + +class CuriosityStreamSeriesIE(CuriosityStreamCollectionBaseIE): + IE_NAME = 'curiositystream:series' + _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:series|collection)/(?P\d+)' + _API_BASE_URL = 'https://api.curiositystream.com/v2/series/' + _TESTS = [{ + 'url': 'https://curiositystream.com/series/2', 'info_dict': { 'id': '2', 'title': 'Curious Minds: The Internet', @@ -163,23 +190,6 @@ class CuriosityStreamCollectionIE(CuriosityStreamBaseIE): }, 'playlist_mincount': 16, }, { - 'url': 'https://curiositystream.com/series/2', - 'only_matching': True, - }, { - 'url': 'https://curiositystream.com/collections/36', + 'url': 'https://curiositystream.com/collection/2', 'only_matching': True, }] - - def _real_extract(self, url): - collection_id = self._match_id(url) - collection = self._call_api(collection_id, collection_id) - entries = [] - for media in collection.get('media', []): - media_id = compat_str(media.get('id')) - media_type, ie = ('series', CuriosityStreamCollectionIE) if media.get('is_collection') else ('video', CuriosityStreamIE) - entries.append(self.url_result( - 'https://curiositystream.com/%s/%s' % (media_type, media_id), - ie=ie.ie_key(), video_id=media_id)) - return self.playlist_result( - entries, collection_id, - collection.get('title'), collection.get('description')) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 4f9de71e27..2eee2a864e 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -307,7 +307,8 @@ from .ctvnews import CTVNewsIE from .cultureunplugged import CultureUnpluggedIE from .curiositystream import ( CuriosityStreamIE, - CuriosityStreamCollectionIE, + CuriosityStreamCollectionsIE, + CuriosityStreamSeriesIE, ) from .cwtv import CWTVIE from .dailymail import DailyMailIE From 39c04074e7e108bc6e36f3a34ef08a163663144a Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 13 Nov 2021 15:11:33 +0530 Subject: [PATCH 420/641] [ExtractAudio] Fix conversion to `wav` Closes #1645 --- yt_dlp/postprocessor/ffmpeg.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 46e87baebf..b2f28d6589 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -403,10 +403,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): 'aac': (0.1, 4), 'vorbis': (0, 10), 'libfdk_aac': (1, 5), - 'opus': None, # doesn't support -q:a - 'wav': None, - 'flac': None, - }[codec] + }.get(codec) if not limits: return [] From e339d25a0d0d5de7e237e6ff8c7676aaa2cbb8a8 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 13 Nov 2021 15:11:59 +0530 Subject: [PATCH 421/641] [youtube] Minor improvement to format sorting --- yt_dlp/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 7bcd6e7dc6..3ae0f5a270 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2563,7 +2563,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): f['quality'] = next(( q(qdict[val]) - for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)) + for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities)) if val in qdict), -1) return True From 7c7f7161fc0d778cd74d8b89162ba9df3d4e5da8 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 13 Nov 2021 17:30:33 +0530 Subject: [PATCH 422/641] Fix `--load-info-json` of playlists with failed entries --- yt_dlp/YoutubeDL.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 1b3873254f..70106db7e1 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1504,10 +1504,12 @@ class YoutubeDL(object): if 'entries' not in ie_result: raise EntryNotInPlaylist('There are no entries') + + MissingEntry = object() incomplete_entries = bool(ie_result.get('requested_entries')) if incomplete_entries: def fill_missing_entries(entries, indices): - ret = [None] * max(indices) + ret = [MissingEntry] * max(indices) for i, entry in zip(indices, entries): ret[i - 1] = entry return ret @@ -1561,7 +1563,7 @@ class YoutubeDL(object): entry = None try: entry = get_entry(i) - if entry is None: + if entry is MissingEntry: raise EntryNotInPlaylist() except (IndexError, EntryNotInPlaylist): if incomplete_entries: @@ -1655,7 +1657,6 @@ class YoutubeDL(object): self.report_error( 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures)) break - # TODO: skip failed (empty) entries? playlist_results.append(entry_result) ie_result['entries'] = playlist_results From 9ac24e235ea9ef91c711c35b0f793d17ea284a54 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 13 Nov 2021 23:49:14 +0530 Subject: [PATCH 423/641] [curiositystream] Add more metadata Closes #1568 --- yt_dlp/extractor/common.py | 1 + yt_dlp/extractor/curiositystream.py | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 5c6e599017..6f06502961 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -342,6 +342,7 @@ class InfoExtractor(object): series, programme or podcast: series: Title of the series or programme the video episode belongs to. + series_id: Id of the series or programme the video episode belongs to, as a unicode string. season: Title of the season the video episode belongs to. season_number: Number of the season the video episode belongs to, as an integer. season_id: Id of the season the video episode belongs to, as a unicode string. diff --git a/yt_dlp/extractor/curiositystream.py b/yt_dlp/extractor/curiositystream.py index 628c836319..286a4c6af4 100644 --- a/yt_dlp/extractor/curiositystream.py +++ b/yt_dlp/extractor/curiositystream.py @@ -50,19 +50,23 @@ class CuriosityStreamBaseIE(InfoExtractor): class CuriosityStreamIE(CuriosityStreamBaseIE): IE_NAME = 'curiositystream' _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P\d+)' - _TEST = { + _TESTS = [{ 'url': 'https://app.curiositystream.com/video/2', 'info_dict': { 'id': '2', 'ext': 'mp4', 'title': 'How Did You Develop The Internet?', 'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.', + 'channel': 'Curiosity Stream', + 'categories': ['Technology', 'Interview'], + 'average_rating': 96.79, + 'series_id': '2', }, 'params': { # m3u8 download 'skip_download': True, }, - } + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -139,6 +143,10 @@ class CuriosityStreamIE(CuriosityStreamBaseIE): 'duration': int_or_none(media.get('duration')), 'tags': media.get('tags'), 'subtitles': subtitles, + 'channel': media.get('producer'), + 'categories': [media.get('primary_category'), media.get('type')], + 'average_rating': media.get('rating_percentage'), + 'series_id': str(media.get('collection_id') or '') or None, } From d0e6121adf4f82b266c82d7e632f7fe79f05096c Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 13 Nov 2021 23:55:12 +0530 Subject: [PATCH 424/641] [curiositystream] Fix login Bug from 92775d8a40728fe045af000755f1c3eeffb2089d --- yt_dlp/extractor/curiositystream.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/curiositystream.py b/yt_dlp/extractor/curiositystream.py index 286a4c6af4..485b6031fc 100644 --- a/yt_dlp/extractor/curiositystream.py +++ b/yt_dlp/extractor/curiositystream.py @@ -15,7 +15,6 @@ from ..utils import ( class CuriosityStreamBaseIE(InfoExtractor): _NETRC_MACHINE = 'curiositystream' _auth_token = None - _API_BASE_URL = 'https://api.curiositystream.com/v1/' def _handle_errors(self, result): error = result.get('error', {}).get('message') @@ -39,7 +38,8 @@ class CuriosityStreamBaseIE(InfoExtractor): if email is None: return result = self._download_json( - self._API_BASE_URL + 'login', None, data=urlencode_postdata({ + 'https://api.curiositystream.com/v1/login', None, + note='Logging in', data=urlencode_postdata({ 'email': email, 'password': password, })) @@ -68,12 +68,14 @@ class CuriosityStreamIE(CuriosityStreamBaseIE): }, }] + _API_BASE_URL = 'https://api.curiositystream.com/v1/media/' + def _real_extract(self, url): video_id = self._match_id(url) formats = [] for encoding_format in ('m3u8', 'mpd'): - media = self._call_api('media/' + video_id, video_id, query={ + media = self._call_api(video_id, video_id, query={ 'encodingsNew': 'true', 'encodingsFormat': encoding_format, }) From f279aaee8e246f510e56fe35b163520f35085338 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 15 Nov 2021 01:25:47 +0530 Subject: [PATCH 425/641] Add compat-option embed-metadata --- README.md | 1 + yt_dlp/options.py | 2 +- yt_dlp/postprocessor/ffmpeg.py | 3 +++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 7a4ec55bb2..1612bda5ae 100644 --- a/README.md +++ b/README.md @@ -137,6 +137,7 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu * `--ignore-errors` is enabled by default. Use `--abort-on-error` or `--compat-options abort-on-error` to abort on errors instead * When writing metadata files such as thumbnails, description or infojson, the same information (if available) is also written for playlists. Use `--no-write-playlist-metafiles` or `--compat-options no-playlist-metafiles` to not write these files * `--add-metadata` attaches the `infojson` to `mkv` files in addition to writing the metadata when used with `--write-infojson`. Use `--compat-options no-attach-info-json` to revert this +* Some metadata are embedded into different fields when using `--add-metadata` as compared to youtube-dl. Most notably, `comment` field contains the `webpage_url` and `synopsis` contains the `description`. You can [use `--parse-metadata`](https://github.com/yt-dlp/yt-dlp#modifying-metadata) to modify this to your liking or use `--compat-options embed-metadata` to revert this * `playlist_index` behaves differently when used with options like `--playlist-reverse` and `--playlist-items`. See [#302](https://github.com/yt-dlp/yt-dlp/issues/302) for details. You can use `--compat-options playlist-index` if you want to keep the earlier behavior * The output of `-F` is listed in a new format. Use `--compat-options list-formats` to revert this * All *experiences* of a funimation episode are considered as a single video. This behavior breaks existing archives. Use `--compat-options seperate-video-versions` to extract information from only the default player diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 89401910e0..209f199bd6 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -278,7 +278,7 @@ def parseOpts(overrideArguments=None): 'allowed_values': { 'filename', 'format-sort', 'abort-on-error', 'format-spec', 'no-playlist-metafiles', 'multistreams', 'no-live-chat', 'playlist-index', 'list-formats', 'no-direct-merge', - 'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-attach-info-json', + 'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-attach-info-json', 'embed-metadata', 'embed-thumbnail-atomicparsley', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', }, 'aliases': { 'youtube-dl': ['-multistreams', 'all'], diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index b2f28d6589..d6734e8d96 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -721,6 +721,9 @@ class FFmpegMetadataPP(FFmpegPostProcessor): add('season_number') add('episode_id', ('episode', 'episode_id')) add('episode_sort', 'episode_number') + if 'embed-metadata' in self.get_param('compat_opts', []): + add('comment', 'description') + metadata.pop('synopsis', None) for key, value in info.items(): if value is not None and key != meta_prefix and key.startswith(meta_prefix): From dac5df5a988a75ed12343e4ee8fcafbc76ae847d Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 15 Nov 2021 04:03:41 +0530 Subject: [PATCH 426/641] Add option `--embed-info-json` to embed info-json in mkv Closes #1644 --- yt_dlp/YoutubeDL.py | 8 +++--- yt_dlp/__init__.py | 10 +++++++- yt_dlp/options.py | 12 ++++++++- yt_dlp/postprocessor/ffmpeg.py | 47 +++++++++++++++++++++++++--------- 4 files changed, 60 insertions(+), 17 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 70106db7e1..a102ecc321 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -431,7 +431,7 @@ class YoutubeDL(object): compat_opts: Compatibility options. See "Differences in default behavior". The following options do not work when used through the API: filename, abort-on-error, multistreams, no-live-chat, format-sort - no-clean-infojson, no-playlist-metafiles, no-keep-subs. + no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json. Refer __init__.py for their implementation progress_template: Dictionary of templates for progress outputs. Allowed keys are 'download', 'postprocess', @@ -2654,6 +2654,8 @@ class YoutubeDL(object): infofn = self.prepare_filename(info_dict, 'infojson') _infojson_written = self._write_info_json('video', info_dict, infofn) if _infojson_written: + info_dict['infojson_filename'] = infofn + # For backward compatability, even though it was a private field info_dict['__infojson_filename'] = infofn elif _infojson_written is None: return @@ -3012,8 +3014,8 @@ class YoutubeDL(object): keep_keys = ['_type'] # Always keep this to facilitate load-info-json if remove_private_keys: remove_keys |= { - 'requested_formats', 'requested_subtitles', 'requested_entries', - 'filepath', 'entries', 'original_url', 'playlist_autonumber', + 'requested_formats', 'requested_subtitles', 'requested_entries', 'entries', + 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber', } empty_values = (None, {}, [], set(), tuple()) reject = lambda k, v: k not in keep_keys and ( diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index d72e08b353..63b9b6e2f9 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -290,6 +290,11 @@ def _real_main(argv=None): set_default_compat('abort-on-error', 'ignoreerrors', 'only_download') set_default_compat('no-playlist-metafiles', 'allow_playlist_files') set_default_compat('no-clean-infojson', 'clean_infojson') + if 'no-attach-info-json' in compat_opts: + if opts.embed_infojson: + _unused_compat_opt('no-attach-info-json') + else: + opts.embed_infojson = False if 'format-sort' in compat_opts: opts.format_sort.extend(InfoExtractor.FormatSort.ytdl_default) _video_multistreams_set = set_default_compat('multistreams', 'allow_multiple_video_streams', False, remove_compat=False) @@ -526,11 +531,14 @@ def _real_main(argv=None): # By default ffmpeg preserves metadata applicable for both # source and target containers. From this point the container won't change, # so metadata can be added here. - if opts.addmetadata or opts.addchapters: + if opts.addmetadata or opts.addchapters or opts.embed_infojson: + if opts.embed_infojson is None: + opts.embed_infojson = 'if_exists' postprocessors.append({ 'key': 'FFmpegMetadata', 'add_chapters': opts.addchapters, 'add_metadata': opts.addmetadata, + 'add_infojson': opts.embed_infojson, }) # Note: Deprecated # This should be above EmbedThumbnail since sponskrub removes the thumbnail attachment diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 209f199bd6..0843d5ff76 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1287,7 +1287,9 @@ def parseOpts(overrideArguments=None): postproc.add_option( '--embed-metadata', '--add-metadata', action='store_true', dest='addmetadata', default=False, - help='Embed metadata to the video file. Also adds chapters to file unless --no-add-chapters is used (Alias: --add-metadata)') + help=( + 'Embed metadata to the video file. Also embeds chapters/infojson if present ' + 'unless --no-embed-chapters/--no-embed-info-json are used (Alias: --add-metadata)')) postproc.add_option( '--no-embed-metadata', '--no-add-metadata', action='store_false', dest='addmetadata', @@ -1300,6 +1302,14 @@ def parseOpts(overrideArguments=None): '--no-embed-chapters', '--no-add-chapters', action='store_false', dest='addchapters', help='Do not add chapter markers (default) (Alias: --no-add-chapters)') + postproc.add_option( + '--embed-info-json', + action='store_true', dest='embed_infojson', default=None, + help='Embed the infojson as an attachment to mkv/mka video files') + postproc.add_option( + '--no-embed-info-json', + action='store_false', dest='embed_infojson', + help='Do not embed the infojson as an attachment to the video file') postproc.add_option( '--metadata-from-title', metavar='FORMAT', dest='metafromtitle', diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index d6734e8d96..eacee8ee9d 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -28,6 +28,7 @@ from ..utils import ( shell_quote, traverse_obj, variadic, + write_json_file, ) @@ -636,10 +637,11 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): class FFmpegMetadataPP(FFmpegPostProcessor): - def __init__(self, downloader, add_metadata=True, add_chapters=True): + def __init__(self, downloader, add_metadata=True, add_chapters=True, add_infojson='if_exists'): FFmpegPostProcessor.__init__(self, downloader) self._add_metadata = add_metadata self._add_chapters = add_chapters + self._add_infojson = add_infojson @staticmethod def _options(target_ext): @@ -652,13 +654,23 @@ class FFmpegMetadataPP(FFmpegPostProcessor): @PostProcessor._restrict_to(images=False) def run(self, info): filename, metadata_filename = info['filepath'], None - options = [] + files_to_delete, options = [], [] if self._add_chapters and info.get('chapters'): metadata_filename = replace_extension(filename, 'meta') options.extend(self._get_chapter_opts(info['chapters'], metadata_filename)) + files_to_delete.append(metadata_filename) if self._add_metadata: options.extend(self._get_metadata_opts(info)) + if self._add_infojson: + if info['ext'] in ('mkv', 'mka'): + infojson_filename = info.get('infojson_filename') + options.extend(self._get_infojson_opts(info, infojson_filename)) + if not infojson_filename: + files_to_delete.append(info.get('infojson_filename')) + elif self._add_infojson is True: + self.to_screen('The info-json can only be attached to mkv/mka files') + if not options: self.to_screen('There isn\'t any metadata to add') return [], info @@ -668,8 +680,8 @@ class FFmpegMetadataPP(FFmpegPostProcessor): self.run_ffmpeg_multiple_files( (filename, metadata_filename), temp_filename, itertools.chain(self._options(info['ext']), *options)) - if metadata_filename: - os.remove(metadata_filename) + for file in filter(None, files_to_delete): + os.remove(file) # Don't obey --keep-files os.replace(temp_filename, filename) return [], info @@ -741,15 +753,26 @@ class FFmpegMetadataPP(FFmpegPostProcessor): yield ('-metadata:s:%d' % (stream_idx + i), 'language=%s' % lang) stream_idx += stream_count - if ('no-attach-info-json' not in self.get_param('compat_opts', []) - and '__infojson_filename' in info and info['ext'] in ('mkv', 'mka')): - old_stream, new_stream = self.get_stream_number(info['filepath'], ('tags', 'mimetype'), 'application/json') - if old_stream is not None: - yield ('-map', '-0:%d' % old_stream) - new_stream -= 1 + def _get_infojson_opts(self, info, infofn): + if not infofn or not os.path.exists(infofn): + if self._add_infojson is not True: + return + infofn = infofn or '%s.temp' % ( + self._downloader.prepare_filename(info, 'infojson') + or replace_extension(self._downloader.prepare_filename(info), 'info.json', info['ext'])) + if not self._downloader._ensure_dir_exists(infofn): + return + self.write_debug(f'Writing info-json to: {infofn}') + write_json_file(self._downloader.sanitize_info(info, self.get_param('clean_infojson', True)), infofn) + info['infojson_filename'] = infofn - yield ('-attach', info['__infojson_filename'], - '-metadata:s:%d' % new_stream, 'mimetype=application/json') + old_stream, new_stream = self.get_stream_number(info['filepath'], ('tags', 'mimetype'), 'application/json') + if old_stream is not None: + yield ('-map', '-0:%d' % old_stream) + new_stream -= 1 + + yield ('-attach', infofn, + '-metadata:s:%d' % new_stream, 'mimetype=application/json') class FFmpegMergerPP(FFmpegPostProcessor): From 013b50b7949563e445936302d6e486bab7100018 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 15 Nov 2021 04:50:11 +0530 Subject: [PATCH 427/641] Fix 'postprocessor_hooks` Closes #1650 --- yt_dlp/YoutubeDL.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index a102ecc321..197ec11e6c 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -633,11 +633,14 @@ class YoutubeDL(object): pp = pp_class(self, **compat_kwargs(pp_def)) self.add_post_processor(pp, when=when) - for ph in self.params.get('post_hooks', []): - self.add_post_hook(ph) - - for ph in self.params.get('progress_hooks', []): - self.add_progress_hook(ph) + hooks = { + 'post_hooks': self.add_post_hook, + 'progress_hooks': self.add_progress_hook, + 'postprocessor_hooks': self.add_postprocessor_hook, + } + for opt, fn in hooks.items(): + for ph in self.params.get(opt, []): + fn(ph) register_socks_protocols() From d0d012d4e79cd1420e96ce5c3d509771110d3ea1 Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Tue, 16 Nov 2021 14:22:01 +1300 Subject: [PATCH 428/641] [youtube] Add `default` player client (#1685) Authored-by: coletdjnz --- README.md | 2 +- yt_dlp/extractor/youtube.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1612bda5ae..96f5d7ecb2 100644 --- a/README.md +++ b/README.md @@ -1552,7 +1552,7 @@ The following extractors use this feature: #### youtube * `skip`: `hls` or `dash` (or both) to skip download of the respective manifests -* `player_client`: Clients to extract video data from. The main clients are `web`, `android`, `ios`, `mweb`. These also have `_music`, `_embedded`, `_agegate`, and `_creator` variants (Eg: `web_embedded`) (`mweb` has only `_agegate`). By default, `android,web` is used, but the agegate and creator variants are added as required for age-gated videos. Similarly the music variants are added for `music.youtube.com` urls. You can also use `all` to use all the clients +* `player_client`: Clients to extract video data from. The main clients are `web`, `android`, `ios`, `mweb`. These also have `_music`, `_embedded`, `_agegate`, and `_creator` variants (Eg: `web_embedded`) (`mweb` has only `_agegate`). By default, `android,web` is used, but the agegate and creator variants are added as required for age-gated videos. Similarly the music variants are added for `music.youtube.com` urls. You can also use `all` to use all the clients, and `default` for the default clients. * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details * `include_live_dash`: Include live dash formats (These formats don't download properly) * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 3ae0f5a270..203f4a92ad 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2339,18 +2339,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _get_requested_clients(self, url, smuggled_data): requested_clients = [] + default = ['android', 'web'] allowed_clients = sorted( [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'], key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True) for client in self._configuration_arg('player_client'): if client in allowed_clients: requested_clients.append(client) + elif client == 'default': + requested_clients.extend(default) elif client == 'all': requested_clients.extend(allowed_clients) else: self.report_warning(f'Skipping unsupported client {client}') if not requested_clients: - requested_clients = ['android', 'web'] + requested_clients = default if smuggled_data.get('is_music_url') or self.is_music_url(url): requested_clients.extend( From d8cf8d97a8dbc9602556de474af133b5ab0e0a29 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 16 Nov 2021 21:14:02 +0530 Subject: [PATCH 429/641] [utils] Fix `PagedList` --- yt_dlp/utils.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index f07eef61f0..a9e066257d 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4179,7 +4179,9 @@ class PagedList: self._cache = {} def getpage(self, pagenum): - page_results = self._cache.get(pagenum) or list(self._pagefunc(pagenum)) + page_results = self._cache.get(pagenum) + if page_results is None: + page_results = list(self._pagefunc(pagenum)) if self._use_cache: self._cache[pagenum] = page_results return page_results @@ -4195,7 +4197,9 @@ class PagedList: if not isinstance(idx, int) or idx < 0: raise TypeError('indices must be non-negative integers') entries = self.getslice(idx, idx + 1) - return entries[0] if entries else None + if not entries: + raise IndexError() + return entries[0] class OnDemandPagedList(PagedList): From 720c309932ea6724223d0a6b7781a0e92a74262c Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 17 Nov 2021 01:26:23 +0530 Subject: [PATCH 430/641] [youtube] Add storyboard formats Closes: #1553, https://github.com/ytdl-org/youtube-dl/issues/9868 Related: https://github.com/ytdl-org/youtube-dl/pull/14951 --- yt_dlp/extractor/youtube.py | 53 ++++++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 203f4a92ad..41e7fce101 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -9,6 +9,7 @@ import datetime import hashlib import itertools import json +import math import os.path import random import re @@ -28,6 +29,7 @@ from ..compat import ( ) from ..jsinterp import JSInterpreter from ..utils import ( + bug_reports_message, bytes_to_intlist, clean_html, datetime_from_str, @@ -66,6 +68,10 @@ from ..utils import ( ) +def get_first(obj, keys, **kwargs): + return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False) + + # any clients starting with _ cannot be explicity requested by the user INNERTUBE_CLIENTS = { 'web': { @@ -2586,6 +2592,41 @@ class YoutubeIE(YoutubeBaseInfoExtractor): r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None)) yield f + def _extract_storyboard(self, player_responses, duration): + spec = get_first( + player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1] + if not spec: + return + base_url = spec.pop() + L = len(spec) - 1 + for i, args in enumerate(spec): + args = args.split('#') + counts = list(map(int_or_none, args[:5])) + if len(args) != 8 or not all(counts): + self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}') + continue + width, height, frame_count, cols, rows = counts + N, sigh = args[6:] + + url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}' + fragment_count = frame_count / (cols * rows) + fragment_duration = duration / fragment_count + yield { + 'format_id': f'sb{i}', + 'format_note': 'storyboard', + 'ext': 'mhtml', + 'protocol': 'mhtml', + 'acodec': 'none', + 'vcodec': 'none', + 'url': url, + 'width': width, + 'height': height, + 'fragments': [{ + 'path': url.replace('$M', str(j)), + 'duration': min(fragment_duration, duration - (j * fragment_duration)), + } for j in range(math.ceil(fragment_count))], + } + def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) video_id = self._match_id(url) @@ -2603,8 +2644,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): self._get_requested_clients(url, smuggled_data), video_id, webpage, master_ytcfg) - get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False) - playability_statuses = traverse_obj( player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[]) @@ -2700,10 +2739,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if reason: self.raise_no_formats(reason, expected=True) - # Source is given priority since formats that throttle are given lower source_preference - # When throttling issue is fully fixed, remove this - self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto')) - keywords = get_first(video_details, 'keywords', expected_type=list) or [] if not keywords and webpage: keywords = [ @@ -2791,6 +2826,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if not duration and live_endtime and live_starttime: duration = live_endtime - live_starttime + formats.extend(self._extract_storyboard(player_responses, duration)) + + # Source is given priority since formats that throttle are given lower source_preference + # When throttling issue is fully fixed, remove this + self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto')) + info = { 'id': video_id, 'title': self._live_title(video_title) if is_live else video_title, From 450bdf69bc080d882cb4db26cde8c2f9681b7e18 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Thu, 18 Nov 2021 21:27:50 +0530 Subject: [PATCH 431/641] [OneFootball] Add extractor (#1613) Closes: #1598 Authored by: Ashish0804 --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/onefootball.py | 51 +++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 yt_dlp/extractor/onefootball.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 2eee2a864e..a60e271868 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1000,6 +1000,7 @@ from .oktoberfesttv import OktoberfestTVIE from .olympics import OlympicsReplayIE from .on24 import On24IE from .ondemandkorea import OnDemandKoreaIE +from .onefootball import OneFootballIE from .onet import ( OnetIE, OnetChannelIE, diff --git a/yt_dlp/extractor/onefootball.py b/yt_dlp/extractor/onefootball.py new file mode 100644 index 0000000000..79501003db --- /dev/null +++ b/yt_dlp/extractor/onefootball.py @@ -0,0 +1,51 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class OneFootballIE(InfoExtractor): + _VALID_URL = r'(?:https?://)(?:www\.)?onefootball\.com/[a-z]{2}/video/[^/&?#]+-(?P\d+)' + + _TESTS = [{ + 'url': 'https://onefootball.com/en/video/highlights-fc-zuerich-3-3-fc-basel-34012334', + 'info_dict': { + 'id': '34012334', + 'ext': 'mp4', + 'title': 'Highlights: FC Zürich 3-3 FC Basel', + 'description': 'md5:33d9855cb790702c4fe42a513700aba8', + 'thumbnail': 'https://photobooth-api.onefootball.com/api/screenshot/https:%2F%2Fperegrine-api.onefootball.com%2Fv2%2Fphotobooth%2Fcms%2Fen%2F34012334', + 'timestamp': 1635874604, + 'upload_date': '20211102' + }, + 'params': {'skip_download': True} + }, { + 'url': 'https://onefootball.com/en/video/klopp-fumes-at-var-decisions-in-west-ham-defeat-34041020', + 'info_dict': { + 'id': '34041020', + 'ext': 'mp4', + 'title': 'Klopp fumes at VAR decisions in West Ham defeat', + 'description': 'md5:9c50371095a01ad3f63311c73d8f51a5', + 'thumbnail': 'https://photobooth-api.onefootball.com/api/screenshot/https:%2F%2Fperegrine-api.onefootball.com%2Fv2%2Fphotobooth%2Fcms%2Fen%2F34041020', + 'timestamp': 1636314103, + 'upload_date': '20211107' + }, + 'params': {'skip_download': True} + }] + + def _real_extract(self, url): + id = self._match_id(url) + webpage = self._download_webpage(url, id) + data_json = self._search_json_ld(webpage, id) + m3u8_url = self._html_search_regex(r'(https://cdn\.jwplayer\.com/manifests/.+\.m3u8)', webpage, 'm3u8_url') + formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, id) + self._sort_formats(formats) + return { + 'id': id, + 'title': data_json.get('title'), + 'description': data_json.get('description'), + 'thumbnail': data_json.get('thumbnail'), + 'timestamp': data_json.get('timestamp'), + 'formats': formats, + 'subtitles': subtitles, + } From 266a1b5d52d4a48a966d0a0b6286ca2740482409 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Thu, 18 Nov 2021 21:28:51 +0530 Subject: [PATCH 432/641] [ESPNCricInfo] Add extractor (#1652) Closes: #1635 Authored by: Ashish0804 --- yt_dlp/extractor/espn.py | 43 ++++++++++++++++++++++++++++++++++ yt_dlp/extractor/extractors.py | 1 + 2 files changed, 44 insertions(+) diff --git a/yt_dlp/extractor/espn.py b/yt_dlp/extractor/espn.py index d4a66c29ff..dc50f3b8b5 100644 --- a/yt_dlp/extractor/espn.py +++ b/yt_dlp/extractor/espn.py @@ -7,7 +7,9 @@ from .once import OnceIE from ..compat import compat_str from ..utils import ( determine_ext, + dict_get, int_or_none, + unified_strdate, unified_timestamp, ) @@ -236,3 +238,44 @@ class FiveThirtyEightIE(InfoExtractor): webpage, 'embed url') return self.url_result(embed_url, 'AbcNewsVideo') + + +class ESPNCricInfoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?espncricinfo\.com/video/[^#$&?/]+-(?P\d+)' + _TESTS = [{ + 'url': 'https://www.espncricinfo.com/video/finch-chasing-comes-with-risks-despite-world-cup-trend-1289135', + 'info_dict': { + 'id': '1289135', + 'ext': 'mp4', + 'title': 'Finch: Chasing comes with \'risks\' despite World Cup trend', + 'description': 'md5:ea32373303e25efbb146efdfc8a37829', + 'upload_date': '20211113', + 'duration': 96, + }, + 'params': {'skip_download': True} + }] + + def _real_extract(self, url): + id = self._match_id(url) + data_json = self._download_json(f'https://hs-consumer-api.espncricinfo.com/v1/pages/video/video-details?videoId={id}', id)['video'] + formats, subtitles = [], {} + for item in data_json.get('playbacks') or []: + if item.get('type') == 'HLS' and item.get('url'): + m3u8_frmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(item['url'], id) + formats.extend(m3u8_frmts) + subtitles = self._merge_subtitles(subtitles, m3u8_subs) + elif item.get('type') == 'AUDIO' and item.get('url'): + formats.append({ + 'url': item['url'], + 'vcodec': 'none', + }) + self._sort_formats(formats) + return { + 'id': id, + 'title': data_json.get('title'), + 'description': data_json.get('summary'), + 'upload_date': unified_strdate(dict_get(data_json, ('publishedAt', 'recordedAt'))), + 'duration': data_json.get('duration'), + 'formats': formats, + 'subtitles': subtitles, + } diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index a60e271868..a3674d8365 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -418,6 +418,7 @@ from .espn import ( ESPNIE, ESPNArticleIE, FiveThirtyEightIE, + ESPNCricInfoIE, ) from .esri import EsriVideoIE from .europa import EuropaIE From 9d63137eac4a5753dae775712599dc5c7adb0e8c Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Thu, 18 Nov 2021 21:29:53 +0530 Subject: [PATCH 433/641] [CanalAlpha] Add extractor (#1655) Closes: #1528 Authored by: Ashish0804 --- yt_dlp/extractor/canalalpha.py | 98 ++++++++++++++++++++++++++++++++++ yt_dlp/extractor/extractors.py | 1 + 2 files changed, 99 insertions(+) create mode 100644 yt_dlp/extractor/canalalpha.py diff --git a/yt_dlp/extractor/canalalpha.py b/yt_dlp/extractor/canalalpha.py new file mode 100644 index 0000000000..7287677c11 --- /dev/null +++ b/yt_dlp/extractor/canalalpha.py @@ -0,0 +1,98 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + clean_html, + dict_get, + try_get, + unified_strdate, +) + + +class CanalAlphaIE(InfoExtractor): + _VALID_URL = r'(?:https?://)(?:www\.)?canalalpha\.ch/play/[^/]+/[^/]+/(?P\d+)/?.*' + + _TESTS = [{ + 'url': 'https://www.canalalpha.ch/play/le-journal/episode/24520/jeudi-28-octobre-2021', + 'info_dict': { + 'id': '24520', + 'ext': 'mp4', + 'title': 'Jeudi 28 octobre 2021', + 'description': 'md5:d30c6c3e53f8ad40d405379601973b30', + 'thumbnail': 'https://static.canalalpha.ch/poster/journal/journal_20211028.jpg', + 'upload_date': '20211028', + 'duration': 1125, + }, + 'params': {'skip_download': True} + }, { + 'url': 'https://www.canalalpha.ch/play/le-journal/topic/24512/la-poste-fait-de-neuchatel-un-pole-cryptographique', + 'info_dict': { + 'id': '24512', + 'ext': 'mp4', + 'title': 'La Poste fait de Neuchâtel un pôle cryptographique', + 'description': 'md5:4ba63ae78a0974d1a53d6703b6e1dedf', + 'thumbnail': 'https://static.canalalpha.ch/poster/news/news_39712.jpg', + 'upload_date': '20211028', + 'duration': 138, + }, + 'params': {'skip_download': True} + }, { + 'url': 'https://www.canalalpha.ch/play/eureka/episode/24484/ces-innovations-qui-veulent-rendre-lagriculture-plus-durable', + 'info_dict': { + 'id': '24484', + 'ext': 'mp4', + 'title': 'Ces innovations qui veulent rendre l’agriculture plus durable', + 'description': 'md5:3de3f151180684621e85be7c10e4e613', + 'thumbnail': 'https://static.canalalpha.ch/poster/magazine/magazine_10236.jpg', + 'upload_date': '20211026', + 'duration': 360, + }, + 'params': {'skip_download': True} + }, { + 'url': 'https://www.canalalpha.ch/play/avec-le-temps/episode/23516/redonner-de-leclat-grace-au-polissage', + 'info_dict': { + 'id': '23516', + 'ext': 'mp4', + 'title': 'Redonner de l\'éclat grâce au polissage', + 'description': 'md5:0d8fbcda1a5a4d6f6daa3165402177e1', + 'thumbnail': 'https://static.canalalpha.ch/poster/magazine/magazine_9990.png', + 'upload_date': '20210726', + 'duration': 360, + }, + 'params': {'skip_download': True} + }] + + def _real_extract(self, url): + id = self._match_id(url) + webpage = self._download_webpage(url, id) + data_json = self._parse_json(self._search_regex( + r'window\.__SERVER_STATE__\s?=\s?({(?:(?!};)[^"]|"([^"]|\\")*")+})\s?;', + webpage, 'data_json'), id)['1']['data']['data'] + manifests = try_get(data_json, lambda x: x['video']['manifests'], expected_type=dict) or {} + subtitles = {} + formats = [{ + 'url': video['$url'], + 'ext': 'mp4', + 'width': try_get(video, lambda x: x['res']['width'], expected_type=int), + 'height': try_get(video, lambda x: x['res']['height'], expected_type=int), + } for video in try_get(data_json, lambda x: x['video']['mp4'], expected_type=list) or [] if video.get('$url')] + if manifests.get('hls'): + m3u8_frmts, m3u8_subs = self._parse_m3u8_formats_and_subtitles(manifests['hls'], id) + formats.extend(m3u8_frmts) + subtitles = self._merge_subtitles(subtitles, m3u8_subs) + if manifests.get('dash'): + dash_frmts, dash_subs = self._parse_mpd_formats_and_subtitles(manifests['dash'], id) + formats.extend(dash_frmts) + subtitles = self._merge_subtitles(subtitles, dash_subs) + self._sort_formats(formats) + return { + 'id': id, + 'title': data_json.get('title').strip(), + 'description': clean_html(dict_get(data_json, ('longDesc', 'shortDesc'))), + 'thumbnail': data_json.get('poster'), + 'upload_date': unified_strdate(dict_get(data_json, ('webPublishAt', 'featuredAt', 'diffusionDate'))), + 'duration': try_get(data_json, lambda x: x['video']['duration'], expected_type=int), + 'formats': formats, + 'subtitles': subtitles, + } diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index a3674d8365..2c0a885b9e 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -193,6 +193,7 @@ from .camdemy import ( ) from .cammodels import CamModelsIE from .camwithher import CamWithHerIE +from .canalalpha import CanalAlphaIE from .canalplus import CanalplusIE from .canalc2 import Canalc2IE from .canvas import ( From 525d9e0c7d4e8e1ad121d75f14ae40e8ee023079 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Thu, 18 Nov 2021 21:30:48 +0530 Subject: [PATCH 434/641] [HotStar] Set language field from tags (#1700) Authored by: Ashish0804 --- yt_dlp/extractor/hotstar.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index 12e6c53d49..0bdf772a19 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -230,6 +230,11 @@ class HotStarIE(HotStarBaseIE): if tags and 'encryption:plain' not in tags: for f in current_formats: f['has_drm'] = True + if tags and 'language' in tags: + lang = re.search(r'language:(?P[a-z]+)', tags).group('lang') + for f in current_formats: + if not f.get('langauge'): + f['language'] = lang formats.extend(current_formats) subs = self._merge_subtitles(subs, current_subs) if not formats and geo_restricted: From 11852843e738bfdb01e1c65d3466629dc9645813 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Thu, 18 Nov 2021 21:43:39 +0530 Subject: [PATCH 435/641] [AmazonStoreIE] Fix regex to not match vdp urls (#1699) Closes: #1698 Authored by: Ashish0804 --- yt_dlp/extractor/amazon.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/amazon.py b/yt_dlp/extractor/amazon.py index 01d6f2a54b..7c5d35f473 100644 --- a/yt_dlp/extractor/amazon.py +++ b/yt_dlp/extractor/amazon.py @@ -4,7 +4,7 @@ from ..utils import int_or_none class AmazonStoreIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?amazon\.(?:[a-z]{2,3})(?:\.[a-z]{2})?/[^/]*/?(?:dp|gp/product)/(?P[^/&#$?]+)' + _VALID_URL = r'(?:https?://)(?:www\.)?amazon\.(?:[a-z]{2,3})(?:\.[a-z]{2})?/(?:[^/]+/)?(?:dp|gp/product)/(?P[^/&#$?]+)' _TESTS = [{ 'url': 'https://www.amazon.co.uk/dp/B098XNCHLD/', From 61be785a6700be8b9e064572ddfb6546b20cb8f9 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Thu, 18 Nov 2021 21:20:45 +0000 Subject: [PATCH 436/641] [peer.tv] Add extractor (#1499) Closes #1388 Authored by: u-spec-png --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/peertv.py | 57 ++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 yt_dlp/extractor/peertv.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 2c0a885b9e..458e6e2c8c 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1063,6 +1063,7 @@ from .peertube import ( PeerTubeIE, PeerTubePlaylistIE, ) +from .peertv import PeerTVIE from .peloton import ( PelotonIE, PelotonLiveIE diff --git a/yt_dlp/extractor/peertv.py b/yt_dlp/extractor/peertv.py new file mode 100644 index 0000000000..002d33a880 --- /dev/null +++ b/yt_dlp/extractor/peertv.py @@ -0,0 +1,57 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import js_to_json + + +class PeerTVIE(InfoExtractor): + IE_NAME = 'peer.tv' + _VALID_URL = r'https?://(?:www\.)?peer\.tv/(?:de|it|en)/(?P\d+)' + _TESTS = [{ + 'url': 'https://www.peer.tv/de/841', + 'info_dict': { + 'id': '841', + 'ext': 'mp4', + 'title': 'Die Brunnenburg', + 'description': 'md5:4395f6142b090338340ab88a3aae24ed', + }, + }, { + 'url': 'https://www.peer.tv/it/404', + 'info_dict': { + 'id': '404', + 'ext': 'mp4', + 'title': 'Cascate di ghiaccio in Val Gardena', + 'description': 'md5:e8e5907f236171842674e8090e3577b8', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + video_key = self._html_search_regex(r'player\.peer\.tv/js/([a-zA-Z0-9]+)', webpage, 'video key') + + js = self._download_webpage(f'https://player.peer.tv/js/{video_key}/', video_id, + headers={'Referer': 'https://www.peer.tv/'}, note='Downloading session id') + + session_id = self._search_regex(r'["\']session_id["\']:\s*["\']([a-zA-Z0-9]+)["\']', js, 'session id') + + player_webpage = self._download_webpage( + f'https://player.peer.tv/jsc/{video_key}/{session_id}?jsr=aHR0cHM6Ly93d3cucGVlci50di9kZS84NDE=&cs=UTF-8&mq=2&ua=0&webm=p&mp4=p&hls=1', + video_id, note='Downloading player webpage') + + m3u8_url = self._search_regex(r'["\']playlist_url["\']:\s*(["\'][^"\']+["\'])', player_webpage, 'm3u8 url') + m3u8_url = self._parse_json(m3u8_url, video_id, transform_source=js_to_json) + + formats = self._extract_m3u8_formats(m3u8_url, video_id, m3u8_id='hls') + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': self._html_search_regex(r'

(.+?)

', webpage, 'title').replace('\xa0', ' '), + 'formats': formats, + 'description': self._html_search_meta(('og:description', 'description'), webpage), + 'thumbnail': self._html_search_meta(('og:image', 'image'), webpage) + } From 22a510ff447a5d0e4c023b810d434611521b777c Mon Sep 17 00:00:00 2001 From: The Hatsune Daishi Date: Fri, 19 Nov 2021 06:43:22 +0900 Subject: [PATCH 437/641] [mixch] add support for mixch.tv (#1586) Authored by: nao20010128nao --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/mixch.py | 55 ++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 yt_dlp/extractor/mixch.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 458e6e2c8c..200c59bbed 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -795,6 +795,7 @@ from .mirrativ import ( ) from .mit import TechTVMITIE, OCWMITIE from .mitele import MiTeleIE +from .mixch import MixchIE from .mixcloud import ( MixcloudIE, MixcloudUserIE, diff --git a/yt_dlp/extractor/mixch.py b/yt_dlp/extractor/mixch.py new file mode 100644 index 0000000000..a99ddd172e --- /dev/null +++ b/yt_dlp/extractor/mixch.py @@ -0,0 +1,55 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + traverse_obj, +) + + +class MixchIE(InfoExtractor): + IE_NAME = 'mixch' + _VALID_URL = r'https?://(?:www\.)?mixch\.tv/u/(?P\d+)' + + TESTS = [{ + 'url': 'https://mixch.tv/u/16236849/live', + 'skip': 'don\'t know if this live persists', + 'info_dict': { + 'id': '16236849', + 'title': '24配信シェア⭕️投票🙏💦', + 'comment_count': 13145, + 'view_count': 28348, + 'timestamp': 1636189377, + 'uploader': '🦥伊咲👶🏻#フレアワ', + 'uploader_id': '16236849', + } + }, { + 'url': 'https://mixch.tv/u/16137876/live', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(f'https://mixch.tv/u/{video_id}/live', video_id) + + initial_js_state = self._parse_json(self._search_regex( + r'(?m)^\s*window\.__INITIAL_JS_STATE__\s*=\s*(\{.+?\});\s*$', webpage, 'initial JS state'), video_id) + if not initial_js_state.get('liveInfo'): + raise ExtractorError('Livestream has ended.', expected=True) + + return { + 'id': video_id, + 'title': traverse_obj(initial_js_state, ('liveInfo', 'title')), + 'comment_count': traverse_obj(initial_js_state, ('liveInfo', 'comments')), + 'view_count': traverse_obj(initial_js_state, ('liveInfo', 'visitor')), + 'timestamp': traverse_obj(initial_js_state, ('liveInfo', 'created')), + 'uploader': traverse_obj(initial_js_state, ('broadcasterInfo', 'name')), + 'uploader_id': video_id, + 'formats': [{ + 'format_id': 'hls', + 'url': traverse_obj(initial_js_state, ('liveInfo', 'hls')) or 'https://d1hd0ww6piyb43.cloudfront.net/hls/torte_%s.m3u8' % video_id, + 'ext': 'mp4', + 'protocol': 'm3u8', + }], + 'is_live': True, + } From 402cd603a40c2115413f914ebb4dd43d9bf2449a Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Thu, 18 Nov 2021 21:57:40 +0000 Subject: [PATCH 438/641] [LinkedIn] Add extractor (#1597) Closes #1206 Authored by: u-spec-png --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/linkedin.py | 105 ++++++++++++++++++++++++--------- 2 files changed, 78 insertions(+), 28 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 200c59bbed..1060066712 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -704,6 +704,7 @@ from .line import ( LineLiveChannelIE, ) from .linkedin import ( + LinkedInIE, LinkedInLearningIE, LinkedInLearningCourseIE, ) diff --git a/yt_dlp/extractor/linkedin.py b/yt_dlp/extractor/linkedin.py index c2d347efd2..9255b33012 100644 --- a/yt_dlp/extractor/linkedin.py +++ b/yt_dlp/extractor/linkedin.py @@ -6,21 +6,56 @@ import re from .common import InfoExtractor from ..utils import ( + clean_html, + extract_attributes, ExtractorError, float_or_none, + get_element_by_class, int_or_none, srt_subtitles_timecode, + strip_or_none, + mimetype2ext, try_get, urlencode_postdata, urljoin, ) -class LinkedInLearningBaseIE(InfoExtractor): +class LinkedInBaseIE(InfoExtractor): _NETRC_MACHINE = 'linkedin' - _LOGIN_URL = 'https://www.linkedin.com/uas/login?trk=learning' _logged_in = False + def _real_initialize(self): + if self._logged_in: + return + email, password = self._get_login_info() + if email is None: + return + + login_page = self._download_webpage( + self._LOGIN_URL, None, 'Downloading login page') + action_url = urljoin(self._LOGIN_URL, self._search_regex( + r']+action=(["\'])(?P.+?)\1', login_page, 'post url', + default='https://www.linkedin.com/uas/login-submit', group='url')) + data = self._hidden_inputs(login_page) + data.update({ + 'session_key': email, + 'session_password': password, + }) + login_submit_page = self._download_webpage( + action_url, None, 'Logging in', + data=urlencode_postdata(data)) + error = self._search_regex( + r']+class="error"[^>]*>\s*(.+?)\s*', + login_submit_page, 'error', default=None) + if error: + raise ExtractorError(error, expected=True) + LinkedInBaseIE._logged_in = True + + +class LinkedInLearningBaseIE(LinkedInBaseIE): + _LOGIN_URL = 'https://www.linkedin.com/uas/login?trk=learning' + def _call_api(self, course_slug, fields, video_slug=None, resolution=None): query = { 'courseSlug': course_slug, @@ -52,32 +87,47 @@ class LinkedInLearningBaseIE(InfoExtractor): def _get_video_id(self, video_data, course_slug, video_slug): return self._get_urn_id(video_data) or '%s/%s' % (course_slug, video_slug) - def _real_initialize(self): - if self._logged_in: - return - email, password = self._get_login_info() - if email is None: - return - login_page = self._download_webpage( - self._LOGIN_URL, None, 'Downloading login page') - action_url = urljoin(self._LOGIN_URL, self._search_regex( - r']+action=(["\'])(?P.+?)\1', login_page, 'post url', - default='https://www.linkedin.com/uas/login-submit', group='url')) - data = self._hidden_inputs(login_page) - data.update({ - 'session_key': email, - 'session_password': password, - }) - login_submit_page = self._download_webpage( - action_url, None, 'Logging in', - data=urlencode_postdata(data)) - error = self._search_regex( - r']+class="error"[^>]*>\s*(.+?)\s*', - login_submit_page, 'error', default=None) - if error: - raise ExtractorError(error, expected=True) - LinkedInLearningBaseIE._logged_in = True +class LinkedInIE(LinkedInBaseIE): + _VALID_URL = r'https?://(?:www\.)?linkedin\.com/posts/.+?(?P\d+)' + _TESTS = [{ + 'url': 'https://www.linkedin.com/posts/mishalkhawaja_sendinblueviews-toronto-digitalmarketing-ugcPost-6850898786781339649-mM20', + 'info_dict': { + 'id': '6850898786781339649', + 'ext': 'mp4', + 'title': 'Mishal K. on LinkedIn: #sendinblueviews #toronto #digitalmarketing', + 'description': 'md5:be125430bab1c574f16aeb186a4d5b19', + 'creator': 'Mishal K.' + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex(r'([^<]+)', webpage, 'title') + description = clean_html(get_element_by_class('share-update-card__update-text', webpage)) + like_count = int_or_none(get_element_by_class('social-counts-reactions__social-counts-numRections', webpage)) + creator = strip_or_none(clean_html(get_element_by_class('comment__actor-name', webpage))) + + sources = self._parse_json(extract_attributes(self._search_regex(r'(]+>)', webpage, 'video'))['data-sources'], video_id) + formats = [{ + 'url': source['src'], + 'ext': mimetype2ext(source.get('type')), + 'tbr': float_or_none(source.get('data-bitrate'), scale=1000), + } for source in sources] + + self._sort_formats(formats) + + return { + 'id': video_id, + 'formats': formats, + 'title': title, + 'like_count': like_count, + 'creator': creator, + 'thumbnail': self._og_search_thumbnail(webpage), + 'description': description, + } class LinkedInLearningIE(LinkedInLearningBaseIE): @@ -108,7 +158,6 @@ class LinkedInLearningIE(LinkedInLearningBaseIE): def _real_extract(self, url): course_slug, video_slug = self._match_valid_url(url).groups() - video_data = None formats = [] for width, height in ((640, 360), (960, 540), (1280, 720)): video_data = self._call_api( From cfcaf64a4b10400964606804085eb975cfd2a401 Mon Sep 17 00:00:00 2001 From: Paul Wise Date: Fri, 19 Nov 2021 06:14:38 +0800 Subject: [PATCH 439/641] [rtrfm] Add extractor (#1628) Authored by: pabs3 --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/rtrfm.py | 67 ++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 yt_dlp/extractor/rtrfm.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 1060066712..89c61312d4 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1235,6 +1235,7 @@ from .rtl2 import ( RTL2YouSeriesIE, ) from .rtp import RTPIE +from .rtrfm import RTRFMIE from .rts import RTSIE from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE from .rtvnh import RTVNHIE diff --git a/yt_dlp/extractor/rtrfm.py b/yt_dlp/extractor/rtrfm.py new file mode 100644 index 0000000000..93d51e8ed7 --- /dev/null +++ b/yt_dlp/extractor/rtrfm.py @@ -0,0 +1,67 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class RTRFMIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rtrfm\.com\.au/(?:shows|show-episode)/(?P[^/?\#&]+)' + _TESTS = [ + { + 'url': 'https://rtrfm.com.au/shows/breakfast/', + 'md5': '46168394d3a5ce237cf47e85d0745413', + 'info_dict': { + 'id': 'breakfast-2021-11-16', + 'ext': 'mp3', + 'series': 'Breakfast with Taylah', + 'title': r're:^Breakfast with Taylah \d{4}-\d{2}-\d{2}$', + 'description': 'md5:0979c3ab1febfbec3f1ccb743633c611', + }, + 'skip': 'ID and md5 changes daily', + }, + { + 'url': 'https://rtrfm.com.au/show-episode/breakfast-2021-11-11/', + 'md5': '396bedf1e40f96c62b30d4999202a790', + 'info_dict': { + 'id': 'breakfast-2021-11-11', + 'ext': 'mp3', + 'series': 'Breakfast with Taylah', + 'title': 'Breakfast with Taylah 2021-11-11', + 'description': 'md5:0979c3ab1febfbec3f1ccb743633c611', + }, + }, + { + 'url': 'https://rtrfm.com.au/show-episode/breakfast-2020-06-01/', + 'md5': '594027f513ec36a24b15d65007a24dff', + 'info_dict': { + 'id': 'breakfast-2020-06-01', + 'ext': 'mp3', + 'series': 'Breakfast with Taylah', + 'title': 'Breakfast with Taylah 2020-06-01', + 'description': r're:^Breakfast with Taylah ', + }, + 'skip': 'This audio has expired', + }, + ] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + show, date, title = self._search_regex( + r'''\.playShow(?:From)?\(['"](?P[^'"]+)['"],\s*['"](?P[0-9]{4}-[0-9]{2}-[0-9]{2})['"],\s*['"](?P[^'"]+)['"]''', + webpage, 'details', group=('show', 'date', 'title')) + url = self._download_json( + 'https://restreams.rtrfm.com.au/rzz', + show, 'Downloading MP3 URL', query={'n': show, 'd': date})['u'] + # This is the only indicator of an error until trying to download the URL and + # downloads of mp4 URLs always fail (403 for current episodes, 404 for missing). + if '.mp4' in url: + url = None + self.raise_no_formats('Expired or no episode on this date', expected=True) + return { + 'id': '%s-%s' % (show, date), + 'title': '%s %s' % (title, date), + 'series': title, + 'url': url, + 'release_date': date, + 'description': self._og_search_description(webpage), + } From 764f5de2f48a523394558b10006b97cd0b6c7acf Mon Sep 17 00:00:00 2001 From: Paul Wise <pabs3@bonedaddy.net> Date: Fri, 19 Nov 2021 06:15:41 +0800 Subject: [PATCH 440/641] [blogger] Add extractor (#1629) Authored by: pabs3 --- yt_dlp/extractor/blogger.py | 54 ++++++++++++++++++++++++++++++++++ yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/generic.py | 17 +++++++++++ 3 files changed, 72 insertions(+) create mode 100644 yt_dlp/extractor/blogger.py diff --git a/yt_dlp/extractor/blogger.py b/yt_dlp/extractor/blogger.py new file mode 100644 index 0000000000..dba131cb05 --- /dev/null +++ b/yt_dlp/extractor/blogger.py @@ -0,0 +1,54 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from ..utils import ( + mimetype2ext, + parse_duration, + parse_qs, + str_or_none, + traverse_obj, +) +from .common import InfoExtractor + + +class BloggerIE(InfoExtractor): + IE_NAME = 'blogger.com' + _VALID_URL = r'https?://(?:www\.)?blogger\.com/video\.g\?token=(?P<id>.+)' + _VALID_EMBED = r'''<iframe[^>]+src=["']((?:https?:)?//(?:www\.)?blogger\.com/video\.g\?token=[^"']+)["']''' + _TESTS = [{ + 'url': 'https://www.blogger.com/video.g?token=AD6v5dzEe9hfcARr5Hlq1WTkYy6t-fXH3BBahVhGvVHe5szdEUBEloSEDSTA8-b111089KbfWuBvTN7fnbxMtymsHhXAXwVvyzHH4Qch2cfLQdGxKQrrEuFpC1amSl_9GuLWODjPgw', + 'md5': 'f1bc19b6ea1b0fd1d81e84ca9ec467ac', + 'info_dict': { + 'id': 'BLOGGER-video-3c740e3a49197e16-796', + 'title': 'BLOGGER-video-3c740e3a49197e16-796', + 'ext': 'mp4', + 'thumbnail': r're:^https?://.*', + 'duration': 76.068, + } + }] + + @staticmethod + def _extract_urls(webpage): + return re.findall(BloggerIE._VALID_EMBED, webpage) + + def _real_extract(self, url): + token_id = self._match_id(url) + webpage = self._download_webpage(url, token_id) + data_json = self._search_regex(r'var\s+VIDEO_CONFIG\s*=\s*(\{.*)', webpage, 'JSON data') + data = self._parse_json(data_json.encode('utf-8').decode('unicode_escape'), token_id) + streams = data['streams'] + formats = [{ + 'ext': mimetype2ext(traverse_obj(parse_qs(stream['play_url']), ('mime', 0))), + 'url': stream['play_url'], + 'format_id': str_or_none(stream.get('format_id')), + } for stream in streams] + + return { + 'id': data.get('iframe_id', token_id), + 'title': data.get('iframe_id', token_id), + 'formats': formats, + 'thumbnail': data.get('thumbnail'), + 'duration': parse_duration(traverse_obj(parse_qs(streams[0]['play_url']), ('dur', 0))), + } diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 89c61312d4..75cb0b2ab1 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -166,6 +166,7 @@ from .bleacherreport import ( BleacherReportIE, BleacherReportCMSIE, ) +from .blogger import BloggerIE from .bloomberg import BloombergIE from .bokecc import BokeCCIE from .bongacams import BongaCamsIE diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 3374c1c200..d6631e2f3b 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -136,6 +136,7 @@ from .medialaan import MedialaanIE from .simplecast import SimplecastIE from .wimtv import WimTVIE from .tvp import TVPEmbedIE +from .blogger import BloggerIE class GenericIE(InfoExtractor): @@ -2173,6 +2174,17 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, }, + { + # blogger embed + 'url': 'https://blog.tomeuvizoso.net/2019/01/a-panfrost-milestone.html', + 'md5': 'f1bc19b6ea1b0fd1d81e84ca9ec467ac', + 'info_dict': { + 'id': 'BLOGGER-video-3c740e3a49197e16-796', + 'ext': 'mp4', + 'title': 'Blogger', + 'thumbnail': r're:^https?://.*', + }, + }, # { # # TODO: find another test # # http://schema.org/VideoObject @@ -3216,6 +3228,11 @@ class GenericIE(InfoExtractor): if onionstudios_url: return self.url_result(onionstudios_url) + # Look for Blogger embeds + blogger_urls = BloggerIE._extract_urls(webpage) + if blogger_urls: + return self.playlist_from_matches(blogger_urls, video_id, video_title, ie=BloggerIE.ie_key()) + # Look for ViewLift embeds viewlift_url = ViewLiftEmbedIE._extract_url(webpage) if viewlift_url: From c6118ca2ccf41663e14f353a6f7e6a306525e190 Mon Sep 17 00:00:00 2001 From: zulaport <70630440+zulaport@users.noreply.github.com> Date: Thu, 18 Nov 2021 14:45:13 -0800 Subject: [PATCH 441/641] [Stripchat] Add extractor (#1668) Authored by: zulaport --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/stripchat.py | 66 ++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 yt_dlp/extractor/stripchat.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 75cb0b2ab1..6bad1f40cd 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1407,6 +1407,7 @@ from .streamcloud import StreamcloudIE from .streamcz import StreamCZIE from .streetvoice import StreetVoiceIE from .stretchinternet import StretchInternetIE +from .stripchat import StripchatIE from .stv import STVPlayerIE from .sunporno import SunPornoIE from .sverigesradio import ( diff --git a/yt_dlp/extractor/stripchat.py b/yt_dlp/extractor/stripchat.py new file mode 100644 index 0000000000..efd0afc75e --- /dev/null +++ b/yt_dlp/extractor/stripchat.py @@ -0,0 +1,66 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import ( + compat_str, +) +from ..utils import ( + ExtractorError, + lowercase_escape, + try_get, +) + + +class StripchatIE(InfoExtractor): + _VALID_URL = r'https?://stripchat\.com/(?P<id>[0-9A-Za-z-_]+)' + _TESTS = [{ + 'url': 'https://stripchat.com/feel_me', + 'info_dict': { + 'id': 'feel_me', + 'ext': 'mp4', + 'title': 're:^feel_me [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': str, + 'is_live': True, + 'age_limit': 18, + }, + 'skip': 'Room is offline', + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage( + 'https://stripchat.com/%s/' % video_id, video_id, + headers=self.geo_verification_headers()) + + data = self._parse_json( + self._search_regex( + r'<script\b[^>]*>\s*window\.__PRELOADED_STATE__\s*=(?P<value>.*?)<\/script>', + webpage, 'data', default='{}', group='value'), + video_id, transform_source=lowercase_escape, fatal=False) + if not data: + raise ExtractorError('Unable to find configuration for stream.') + + if try_get(data, lambda x: x['viewCam']['show'], dict): + raise ExtractorError('Model is in private show', expected=True) + elif not try_get(data, lambda x: x['viewCam']['model']['isLive'], bool): + raise ExtractorError('Model is offline', expected=True) + + server = try_get(data, lambda x: x['viewCam']['viewServers']['flashphoner-hls'], compat_str) + host = try_get(data, lambda x: x['config']['data']['hlsStreamHost'], compat_str) + model_id = try_get(data, lambda x: x['viewCam']['model']['id'], int) + + formats = self._extract_m3u8_formats( + 'https://b-%s.%s/hls/%d/%d.m3u8' % (server, host, model_id, model_id), + video_id, ext='mp4', m3u8_id='hls', fatal=False, live=True) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': self._live_title(video_id), + 'description': self._og_search_description(webpage), + 'is_live': True, + 'formats': formats, + # Stripchat declares the RTA meta-tag, but in an non-standard format so _rta_search() can't be used + 'age_limit': 18, + } From e16fefd8699c56d7a565e933ed1f55112ad399b4 Mon Sep 17 00:00:00 2001 From: Joshua Lochner <admin@xenova.com> Date: Fri, 19 Nov 2021 00:48:48 +0200 Subject: [PATCH 442/641] [Reddit] Add support for 1080p videos (#1682) Fixes: https://github.com/ytdl-org/youtube-dl/issues/29565 Authored by: xenova --- yt_dlp/extractor/extractors.py | 5 +-- yt_dlp/extractor/generic.py | 28 ++++++++++++ yt_dlp/extractor/reddit.py | 82 +++++++++++++++++----------------- 3 files changed, 71 insertions(+), 44 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 6bad1f40cd..d19c67243e 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1206,10 +1206,7 @@ from .redbulltv import ( RedBullTVRrnContentIE, RedBullIE, ) -from .reddit import ( - RedditIE, - RedditRIE, -) +from .reddit import RedditIE from .redtube import RedTubeIE from .regiotv import RegioTVIE from .rentv import ( diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index d6631e2f3b..9c7fa4a217 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -2344,6 +2344,34 @@ class GenericIE(InfoExtractor): 'thumbnail': 'https://bogmedia.org/contents/videos_screenshots/21000/21217/preview_480p.mp4.jpg', } }, + { + # Reddit-hosted video that will redirect and be processed by RedditIE + # Redirects to https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/ + 'url': 'https://v.redd.it/zv89llsvexdz', + 'md5': '87f5f02f6c1582654146f830f21f8662', + 'info_dict': { + 'id': 'zv89llsvexdz', + 'ext': 'mp4', + 'timestamp': 1501941939.0, + 'title': 'That small heart attack.', + 'upload_date': '20170805', + 'uploader': 'Antw87' + } + }, + { + # 1080p Reddit-hosted video that will redirect and be processed by RedditIE + 'url': 'https://v.redd.it/33hgok7dfbz71/', + 'md5': '7a1d587940242c9bb3bd6eb320b39258', + 'info_dict': { + 'id': '33hgok7dfbz71', + 'ext': 'mp4', + 'title': "The game Didn't want me to Knife that Guy I guess", + 'uploader': 'paraf1ve', + 'timestamp': 1636788683.0, + 'upload_date': '20211113' + } + } + # ] def report_following_redirect(self, new_url): diff --git a/yt_dlp/extractor/reddit.py b/yt_dlp/extractor/reddit.py index 3ea750aeb1..a042a59cc4 100644 --- a/yt_dlp/extractor/reddit.py +++ b/yt_dlp/extractor/reddit.py @@ -8,43 +8,11 @@ from ..utils import ( try_get, unescapeHTML, url_or_none, + traverse_obj ) class RedditIE(InfoExtractor): - _VALID_URL = r'https?://v\.redd\.it/(?P<id>[^/?#&]+)' - _TEST = { - # from https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/ - 'url': 'https://v.redd.it/zv89llsvexdz', - 'md5': '0a070c53eba7ec4534d95a5a1259e253', - 'info_dict': { - 'id': 'zv89llsvexdz', - 'ext': 'mp4', - 'title': 'zv89llsvexdz', - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - formats = self._extract_m3u8_formats( - 'https://v.redd.it/%s/HLSPlaylist.m3u8' % video_id, video_id, - 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) - - formats.extend(self._extract_mpd_formats( - 'https://v.redd.it/%s/DASHPlaylist.mpd' % video_id, video_id, - mpd_id='dash', fatal=False)) - - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': video_id, - 'formats': formats, - } - - -class RedditRIE(InfoExtractor): _VALID_URL = r'https?://(?P<subdomain>[^/]+\.)?reddit(?:media)?\.com/r/(?P<slug>[^/]+/comments/(?P<id>[^/?#&]+))' _TESTS = [{ 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/', @@ -147,19 +115,53 @@ class RedditRIE(InfoExtractor): for resolution in resolutions: add_thumbnail(resolution) - return { - '_type': 'url_transparent', - 'url': video_url, + info = { 'title': data.get('title'), 'thumbnails': thumbnails, 'timestamp': float_or_none(data.get('created_utc')), 'uploader': data.get('author'), - 'duration': int_or_none(try_get( - data, - (lambda x: x['media']['reddit_video']['duration'], - lambda x: x['secure_media']['reddit_video']['duration']))), 'like_count': int_or_none(data.get('ups')), 'dislike_count': int_or_none(data.get('downs')), 'comment_count': int_or_none(data.get('num_comments')), 'age_limit': age_limit, } + + # Check if media is hosted on reddit: + reddit_video = traverse_obj(data, (('media', 'secure_media'), 'reddit_video'), get_all=False) + if reddit_video: + playlist_urls = [ + try_get(reddit_video, lambda x: unescapeHTML(x[y])) + for y in ('dash_url', 'hls_url') + ] + + # Update video_id + display_id = video_id + video_id = self._search_regex( + r'https?://v\.redd\.it/(?P<id>[^/?#&]+)', reddit_video['fallback_url'], + 'video_id', default=display_id) + + dash_playlist_url = playlist_urls[0] or f'https://v.redd.it/{video_id}/DASHPlaylist.mpd' + hls_playlist_url = playlist_urls[1] or f'https://v.redd.it/{video_id}/HLSPlaylist.m3u8' + + formats = self._extract_m3u8_formats( + hls_playlist_url, display_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) + formats.extend(self._extract_mpd_formats( + dash_playlist_url, display_id, mpd_id='dash', fatal=False)) + self._sort_formats(formats) + + return { + **info, + 'id': video_id, + 'display_id': display_id, + 'formats': formats, + 'duration': int_or_none(reddit_video.get('duration')), + } + + # Not hosted on reddit, must continue extraction + return { + **info, + 'display_id': video_id, + '_type': 'url_transparent', + 'url': video_url, + } From 8863c8f09ee0bf36a83f428adca58b373d2c8358 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Thu, 18 Nov 2021 22:38:00 +0530 Subject: [PATCH 443/641] [soundcloud:search] Fix pagination --- yt_dlp/extractor/soundcloud.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index 8245284748..2bb449220b 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -893,5 +893,6 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): break def _get_n_results(self, query, n): - tracks = self._get_collection('search/tracks', query, limit=n, q=query) - return self.playlist_result(tracks, query, query) + return self.playlist_result(itertools.islice( + self._get_collection('search/tracks', query, limit=n, q=query), + 0, None if n == float('inf') else n), query, query) From 467b6b838737c0907bbc331f96352dda3019afb7 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Fri, 19 Nov 2021 05:20:13 +0530 Subject: [PATCH 444/641] [ExtractAudio] Support `alac` Closes #1707 --- yt_dlp/postprocessor/ffmpeg.py | 44 +++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index eacee8ee9d..1bde170ce6 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -53,6 +53,7 @@ ACODECS = { 'opus': 'libopus', 'vorbis': 'libvorbis', 'wav': None, + 'alac': None, } @@ -383,7 +384,7 @@ class FFmpegPostProcessor(PostProcessor): class FFmpegExtractAudioPP(FFmpegPostProcessor): COMMON_AUDIO_EXTS = ('wav', 'flac', 'm4a', 'aiff', 'mp3', 'ogg', 'mka', 'opus', 'wma') - SUPPORTED_EXTS = ('best', 'aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav') + SUPPORTED_EXTS = ('best', 'aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav', 'alac') def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False): FFmpegPostProcessor.__init__(self, downloader) @@ -399,10 +400,10 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): limits = { 'libmp3lame': (10, 0), + 'libvorbis': (0, 10), # FFmpeg's AAC encoder does not have an upper limit for the value of -q:a. # Experimentally, with values over 4, bitrate changes were minimal or non-existent 'aac': (0.1, 4), - 'vorbis': (0, 10), 'libfdk_aac': (1, 5), }.get(codec) if not limits: @@ -426,7 +427,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): @PostProcessor._restrict_to(images=False) def run(self, information): - path = information['filepath'] + orig_path = path = information['filepath'] orig_ext = information['ext'] if self._preferredcodec == 'best' and orig_ext in self.COMMON_AUDIO_EXTS: @@ -452,6 +453,10 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): more_opts = ['-f', 'adts'] if filecodec == 'vorbis': extension = 'ogg' + elif filecodec == 'alac': + acodec = None + extension = 'm4a' + more_opts += ['-acodec', 'alac'] else: # MP3 otherwise. acodec = 'libmp3lame' @@ -466,42 +471,49 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): more_opts = self._quality_args(acodec) if self._preferredcodec == 'aac': more_opts += ['-f', 'adts'] - if self._preferredcodec == 'm4a': + elif self._preferredcodec == 'm4a': more_opts += ['-bsf:a', 'aac_adtstoasc'] - if self._preferredcodec == 'vorbis': + elif self._preferredcodec == 'vorbis': extension = 'ogg' - if self._preferredcodec == 'wav': + elif self._preferredcodec == 'wav': extension = 'wav' more_opts += ['-f', 'wav'] + elif self._preferredcodec == 'alac': + extension = 'm4a' + more_opts += ['-acodec', 'alac'] prefix, sep, ext = path.rpartition('.') # not os.path.splitext, since the latter does not work on unicode in all setups - new_path = prefix + sep + extension + temp_path = new_path = prefix + sep + extension - information['filepath'] = new_path - information['ext'] = extension - - # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly. - if (new_path == path - or (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)))): + if new_path == path: + orig_path = prepend_extension(path, 'orig') + temp_path = prepend_extension(path, 'temp') + if (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)) + and os.path.exists(encodeFilename(orig_path))): self.to_screen('Post-process file %s exists, skipping' % new_path) return [], information try: - self.to_screen('Destination: ' + new_path) - self.run_ffmpeg(path, new_path, acodec, more_opts) + self.to_screen(f'Destination: {new_path}') + self.run_ffmpeg(path, temp_path, acodec, more_opts) except AudioConversionError as e: raise PostProcessingError( 'audio conversion failed: ' + e.msg) except Exception: raise PostProcessingError('error running ' + self.basename) + os.replace(path, orig_path) + os.replace(temp_path, new_path) + information['filepath'] = new_path + information['ext'] = extension + # Try to update the date time for extracted audio file. if information.get('filetime') is not None: self.try_utime( new_path, time.time(), information['filetime'], errnote='Cannot update utime of audio file') - return [path], information + return [orig_path], information class FFmpegVideoConvertorPP(FFmpegPostProcessor): From 9222c38182604d0a9826291509e0719b45b3faac Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Fri, 19 Nov 2021 05:36:28 +0530 Subject: [PATCH 445/641] [cleanup] Minor cleanup Closes #1696, Closes #1673 --- CONTRIBUTING.md | 2 +- Changelog.md | 2 +- README.md | 26 ++++++++++------ test/test_youtube_signature.py | 4 +++ yt_dlp/YoutubeDL.py | 7 ++--- yt_dlp/__init__.py | 56 ++++++++++++++-------------------- yt_dlp/extractor/francetv.py | 2 +- yt_dlp/extractor/funimation.py | 2 +- yt_dlp/extractor/linkedin.py | 2 +- yt_dlp/extractor/pbs.py | 2 +- yt_dlp/extractor/tenplay.py | 2 +- yt_dlp/extractor/youtube.py | 27 +++++++++++----- 12 files changed, 74 insertions(+), 60 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index cd22afed98..8a0178d944 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -209,7 +209,7 @@ After you have ensured this site is distributing its content legally, you can fo ``` 1. Add an import in [`yt_dlp/extractor/extractors.py`](yt_dlp/extractor/extractors.py). 1. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, the tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. You can also run all the tests in one go with `TestDownload.test_YourExtractor_all` -1. Make sure you have atleast one test for your extractor. Even if all videos covered by the extractor are expected to be inaccessible for automated testing, tests should still be added with a `skip` parameter indicating why the purticular test is disabled from running. +1. Make sure you have atleast one test for your extractor. Even if all videos covered by the extractor are expected to be inaccessible for automated testing, tests should still be added with a `skip` parameter indicating why the particular test is disabled from running. 1. Have a look at [`yt_dlp/extractor/common.py`](yt_dlp/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](yt_dlp/extractor/common.py#L91-L426). Add tests and code for as many as you want. 1. Make sure your code follows [yt-dlp coding conventions](#yt-dlp-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart): diff --git a/Changelog.md b/Changelog.md index 5ac2aa6157..7bb8c7888f 100644 --- a/Changelog.md +++ b/Changelog.md @@ -40,7 +40,7 @@ * [fragment] Fix progress display in fragmented downloads * [downloader/ffmpeg] Fix vtt download with ffmpeg * [ffmpeg] Detect presence of setts and libavformat version -* [ExtractAudio] Rescale --audio-quality correctly by [CrypticSignal](https://github.com/CrypticSignal), [pukkandan](https://github.com/pukkandan) +* [ExtractAudio] Rescale `--audio-quality` correctly by [CrypticSignal](https://github.com/CrypticSignal), [pukkandan](https://github.com/pukkandan) * [ExtractAudio] Use `libfdk_aac` if available by [CrypticSignal](https://github.com/CrypticSignal) * [FormatSort] `eac3` is better than `ac3` * [FormatSort] Fix some fields' defaults diff --git a/README.md b/README.md index 96f5d7ecb2..1a5f84cc98 100644 --- a/README.md +++ b/README.md @@ -96,7 +96,7 @@ The major new features from the latest release of [blackjack4494/yt-dlc](https:/ * **New playlist extractors**: bilibili categories, eroprofile albums, hotstar series, hungama albums, newgrounds user, niconico search/users, paramountplus series, patreon user, peertube playlist/channels, roosterteeth series, sonyliv series, tiktok user, trovo channels, voot series -* **Fixed/improved extractors**: 7plus, 9now, afreecatv, akamai, aljazeera, amcnetworks, animalplanet, archive.org, arte, atv, bbc, bilibili, bitchute, bravotv, camtube, cbc, cda, ceskatelevize, chingari, comedycentral, coub, crackle, crunchyroll, curiositystream, diynetwork, dw, eroprofile, facebook, francetv, funimation, globo, hearthisatie, hidive, hotstar, hungama, imdb, ina, instagram, iprima, itv, iwara, kakao, la7, linkedinlearning, linuxacadamy, mediaset, mediasite, motherless, mxplayer, nbcolympics, ndr, newgrounds, niconico, nitter, nova, nrk, nuvid, oreilly, paramountplus, parliamentlive, patreon, pbs, peertube, plutotv, polskieradio, pornhub, reddit, reddit, redtube, rmcdecouverte, roosterteeth, rtp, rumble, saml verizon login, skyit, sonyliv, soundcloud, southparkde, spankbang, spreaker, streamable, tagesschau, tbs, tennistv, tenplay, tiktok, tubi, tv2, tv2hu, tv5mondeplus, tvp, twitcasting, vh1, viafree, videa, vidio, vidme, viewlift, viki, vimeo, viu, vk, vlive, vrt, wakanim, xhamster, yahoo +* **Fixed/improved extractors**: 7plus, 9now, afreecatv, akamai, aljazeera, amcnetworks, animalplanet, archive.org, arte, atv, bbc, bilibili, bitchute, bravotv, camtube, cbc, cda, ceskatelevize, chingari, comedycentral, coub, crackle, crunchyroll, curiositystream, diynetwork, dw, eroprofile, facebook, francetv, funimation, globo, hearthisatie, hidive, hotstar, hungama, imdb, ina, instagram, iprima, itv, iwara, kakao, la7, linkedinlearning, linuxacadamy, mediaset, mediasite, motherless, mxplayer, nbcolympics, ndr, newgrounds, niconico, nitter, nova, nrk, nuvid, oreilly, paramountplus, parliamentlive, patreon, pbs, peertube, plutotv, polskieradio, pornhub, reddit, redtube, rmcdecouverte, roosterteeth, rtp, rumble, saml verizon login, skyit, sonyliv, soundcloud, southparkde, spankbang, spreaker, streamable, tagesschau, tbs, tennistv, tenplay, tiktok, tubi, tv2, tv2hu, tv5mondeplus, tvp, twitcasting, vh1, viafree, videa, vidio, vidme, viewlift, viki, vimeo, viu, vk, vlive, vrt, wakanim, xhamster, yahoo * **New MSOs**: Philo, Spectrum, SlingTV, Cablevision, RCN @@ -136,7 +136,7 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu * Unlike youtube-dlc, yt-dlp does not allow merging multiple audio/video streams into one file by default (since this conflicts with the use of `-f bv*+ba`). If needed, this feature must be enabled using `--audio-multistreams` and `--video-multistreams`. You can also use `--compat-options multistreams` to enable both * `--ignore-errors` is enabled by default. Use `--abort-on-error` or `--compat-options abort-on-error` to abort on errors instead * When writing metadata files such as thumbnails, description or infojson, the same information (if available) is also written for playlists. Use `--no-write-playlist-metafiles` or `--compat-options no-playlist-metafiles` to not write these files -* `--add-metadata` attaches the `infojson` to `mkv` files in addition to writing the metadata when used with `--write-infojson`. Use `--compat-options no-attach-info-json` to revert this +* `--add-metadata` attaches the `infojson` to `mkv` files in addition to writing the metadata when used with `--write-info-json`. Use `--no-embed-info-json` or `--compat-options no-attach-info-json` to revert this * Some metadata are embedded into different fields when using `--add-metadata` as compared to youtube-dl. Most notably, `comment` field contains the `webpage_url` and `synopsis` contains the `description`. You can [use `--parse-metadata`](https://github.com/yt-dlp/yt-dlp#modifying-metadata) to modify this to your liking or use `--compat-options embed-metadata` to revert this * `playlist_index` behaves differently when used with options like `--playlist-reverse` and `--playlist-items`. See [#302](https://github.com/yt-dlp/yt-dlp/issues/302) for details. You can use `--compat-options playlist-index` if you want to keep the earlier behavior * The output of `-F` is listed in a new format. Use `--compat-options list-formats` to revert this @@ -196,7 +196,7 @@ python3 -m pip install --no-deps -U yt-dlp If you want to be on the cutting edge, you can also install the master branch with: ``` -python3 -m pip3 install --force-reinstall https://github.com/yt-dlp/yt-dlp/archive/master.zip +python3 -m pip install --force-reinstall https://github.com/yt-dlp/yt-dlp/archive/master.zip ``` Note that on some systems, you may need to use `py` or `python` instead of `python3` @@ -793,7 +793,7 @@ You can also fork the project on github and push it to a release branch in your --audio-format FORMAT Specify audio format to convert the audio to when -x is used. Currently supported formats are: best (default) or one of - best|aac|flac|mp3|m4a|opus|vorbis|wav + best|aac|flac|mp3|m4a|opus|vorbis|wav|alac --audio-quality QUALITY Specify ffmpeg audio quality, insert a value between 0 (best) and 10 (worst) for VBR or a specific bitrate like 128K @@ -844,15 +844,20 @@ You can also fork the project on github and push it to a release branch in your --no-embed-subs Do not embed subtitles (default) --embed-thumbnail Embed thumbnail in the video as cover art --no-embed-thumbnail Do not embed thumbnail (default) - --embed-metadata Embed metadata to the video file. Also adds - chapters to file unless --no-add-chapters - is used (Alias: --add-metadata) + --embed-metadata Embed metadata to the video file. Also + embeds chapters/infojson if present unless + --no-embed-chapters/--no-embed-info-json + are used (Alias: --add-metadata) --no-embed-metadata Do not add metadata to file (default) (Alias: --no-add-metadata) --embed-chapters Add chapter markers to the video file (Alias: --add-chapters) --no-embed-chapters Do not add chapter markers (default) (Alias: --no-add-chapters) + --embed-info-json Embed the infojson as an attachment to + mkv/mka video files + --no-embed-info-json Do not embed the infojson as an attachment + to the video file --parse-metadata FROM:TO Parse additional metadata like title/artist from other fields; see "MODIFYING METADATA" for details @@ -1210,11 +1215,14 @@ If you are using an output template inside a Windows batch file then you must es Note that on Windows you need to use double quotes instead of single. ```bash +$ yt-dlp --get-filename -o 'test video.%(ext)s' BaW_jenozKc +test video.webm # Literal name with correct extension + $ yt-dlp --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc -youtube-dl test video ''_ä↭𝕐.mp4 # All kinds of weird characters +youtube-dl test video ''_ä↭𝕐.webm # All kinds of weird characters $ yt-dlp --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc --restrict-filenames -youtube-dl_test_video_.mp4 # A simple file name +youtube-dl_test_video_.webm # Restricted file name # Download YouTube playlist videos in separate directory indexed by video order in a playlist $ yt-dlp -o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s' https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 60d8eabf5c..df4c360473 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -74,6 +74,10 @@ _NSIG_TESTS = [ 'https://www.youtube.com/s/player/f8cb7a3b/player_ias.vflset/en_US/base.js', 'oBo2h5euWy6osrUt', 'ivXHpm7qJjJN', ), + ( + 'https://www.youtube.com/s/player/2dfe380c/player_ias.vflset/en_US/base.js', + 'oBo2h5euWy6osrUt', '3DIBbn3qdQ', + ), ] diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 197ec11e6c..e078e62ef6 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -528,7 +528,6 @@ class YoutubeDL(object): self.cache = Cache(self) windows_enable_vt_mode() - # FIXME: This will break if we ever print color to stdout self._allow_colors = { 'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file), 'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file), @@ -2012,10 +2011,10 @@ class YoutubeDL(object): # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector if format_spec == 'all': def selector_function(ctx): - yield from _check_formats(ctx['formats']) + yield from _check_formats(ctx['formats'][::-1]) elif format_spec == 'mergeall': def selector_function(ctx): - formats = list(_check_formats(ctx['formats'])) + formats = list(_check_formats(ctx['formats'][::-1])) if not formats: return merged_format = formats[-1] @@ -3163,7 +3162,7 @@ class YoutubeDL(object): return 'images' else: return default - return f'{res} images' if is_images else res + return f'img {res}' if is_images else res def _format_note(self, fdict): res = '' diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 63b9b6e2f9..7960d3b039 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -378,8 +378,6 @@ def _real_main(argv=None): opts.sponsorblock_remove = set() sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove - if (opts.addmetadata or opts.sponsorblock_mark) and opts.addchapters is None: - opts.addchapters = True opts.remove_chapters = opts.remove_chapters or [] if (opts.remove_chapters or sponsorblock_query) and opts.sponskrub is not False: @@ -400,40 +398,32 @@ def _real_main(argv=None): opts.remuxvideo = False if opts.allow_unplayable_formats: - if opts.extractaudio: - report_conflict('--allow-unplayable-formats', '--extract-audio') - opts.extractaudio = False - if opts.remuxvideo: - report_conflict('--allow-unplayable-formats', '--remux-video') - opts.remuxvideo = False - if opts.recodevideo: - report_conflict('--allow-unplayable-formats', '--recode-video') - opts.recodevideo = False - if opts.addmetadata: - report_conflict('--allow-unplayable-formats', '--add-metadata') - opts.addmetadata = False - if opts.embedsubtitles: - report_conflict('--allow-unplayable-formats', '--embed-subs') - opts.embedsubtitles = False - if opts.embedthumbnail: - report_conflict('--allow-unplayable-formats', '--embed-thumbnail') - opts.embedthumbnail = False - if opts.xattrs: - report_conflict('--allow-unplayable-formats', '--xattrs') - opts.xattrs = False - if opts.fixup and opts.fixup.lower() not in ('never', 'ignore'): - report_conflict('--allow-unplayable-formats', '--fixup') + def report_unplayable_conflict(opt_name, arg, default=False, allowed=None): + val = getattr(opts, opt_name) + if (not allowed and val) or not allowed(val): + report_conflict('--allow-unplayable-formats', arg) + setattr(opts, opt_name, default) + + report_unplayable_conflict('extractaudio', '--extract-audio') + report_unplayable_conflict('remuxvideo', '--remux-video') + report_unplayable_conflict('recodevideo', '--recode-video') + report_unplayable_conflict('addmetadata', '--embed-metadata') + report_unplayable_conflict('addchapters', '--embed-chapters') + report_unplayable_conflict('embed_infojson', '--embed-info-json') + opts.embed_infojson = False + report_unplayable_conflict('embedsubtitles', '--embed-subs') + report_unplayable_conflict('embedthumbnail', '--embed-thumbnail') + report_unplayable_conflict('xattrs', '--xattrs') + report_unplayable_conflict('fixup', '--fixup', default='never', allowed=lambda x: x in (None, 'never', 'ignore')) opts.fixup = 'never' - if opts.remove_chapters: - report_conflict('--allow-unplayable-formats', '--remove-chapters') - opts.remove_chapters = [] - if opts.sponsorblock_remove: - report_conflict('--allow-unplayable-formats', '--sponsorblock-remove') - opts.sponsorblock_remove = set() - if opts.sponskrub: - report_conflict('--allow-unplayable-formats', '--sponskrub') + report_unplayable_conflict('remove_chapters', '--remove-chapters', default=[]) + report_unplayable_conflict('sponsorblock_remove', '--sponsorblock-remove', default=set()) + report_unplayable_conflict('sponskrub', '--sponskrub', default=set()) opts.sponskrub = False + if (opts.addmetadata or opts.sponsorblock_mark) and opts.addchapters is None: + opts.addchapters = True + # PostProcessors postprocessors = list(opts.add_postprocessors) if sponsorblock_query: diff --git a/yt_dlp/extractor/francetv.py b/yt_dlp/extractor/francetv.py index 3bbab69e61..bc5ef4df97 100644 --- a/yt_dlp/extractor/francetv.py +++ b/yt_dlp/extractor/francetv.py @@ -185,7 +185,7 @@ class FranceTVIE(InfoExtractor): 'vcodec': 'none', 'ext': 'mhtml', 'protocol': 'mhtml', - 'url': 'about:dummy', + 'url': 'about:invalid', 'fragments': [{ 'path': sheet, # XXX: not entirely accurate; each spritesheet seems to be diff --git a/yt_dlp/extractor/funimation.py b/yt_dlp/extractor/funimation.py index 42711083e8..96dad2ca34 100644 --- a/yt_dlp/extractor/funimation.py +++ b/yt_dlp/extractor/funimation.py @@ -276,7 +276,7 @@ class FunimationIE(FunimationBaseIE): def _get_subtitles(self, subtitles, experience_id, episode, display_id, format_name): if isinstance(episode, str): webpage = self._download_webpage( - f'https://www.funimation.com/player/{experience_id}', display_id, + f'https://www.funimation.com/player/{experience_id}/', display_id, fatal=False, note=f'Downloading player webpage for {format_name}') episode, _, _ = self._get_episode(webpage, episode_id=episode, fatal=False) diff --git a/yt_dlp/extractor/linkedin.py b/yt_dlp/extractor/linkedin.py index 9255b33012..bd76ae1664 100644 --- a/yt_dlp/extractor/linkedin.py +++ b/yt_dlp/extractor/linkedin.py @@ -109,7 +109,7 @@ class LinkedInIE(LinkedInBaseIE): description = clean_html(get_element_by_class('share-update-card__update-text', webpage)) like_count = int_or_none(get_element_by_class('social-counts-reactions__social-counts-numRections', webpage)) creator = strip_or_none(clean_html(get_element_by_class('comment__actor-name', webpage))) - + sources = self._parse_json(extract_attributes(self._search_regex(r'(<video[^>]+>)', webpage, 'video'))['data-sources'], video_id) formats = [{ 'url': source['src'], diff --git a/yt_dlp/extractor/pbs.py b/yt_dlp/extractor/pbs.py index 0eabf9beee..ffaa6bf929 100644 --- a/yt_dlp/extractor/pbs.py +++ b/yt_dlp/extractor/pbs.py @@ -193,7 +193,7 @@ class PBSIE(InfoExtractor): # Article with embedded player (or direct video) (?:www\.)?pbs\.org/(?:[^/]+/){1,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) | # Player - (?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/ + (?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+) ) ''' % '|'.join(list(zip(*_STATIONS))[0]) diff --git a/yt_dlp/extractor/tenplay.py b/yt_dlp/extractor/tenplay.py index c810cfd0d5..5b3222ecf8 100644 --- a/yt_dlp/extractor/tenplay.py +++ b/yt_dlp/extractor/tenplay.py @@ -58,7 +58,7 @@ class TenPlayIE(InfoExtractor): 'email': username, 'password': password, })) - return "Bearer " + data['jwt']['accessToken'] + return 'Bearer ' + data['jwt']['accessToken'] def _real_extract(self, url): content_id = self._match_id(url) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 41e7fce101..1fbdcd98b6 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -508,9 +508,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor): Extracts visitorData from an API response or ytcfg Appears to be used to track session state """ - return traverse_obj( - args, (..., ('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))), - expected_type=compat_str, get_all=False) + return get_first( + args, (('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))), + expected_type=str) @property def is_authenticated(self): @@ -1674,7 +1674,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # shorts 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY', 'only_matching': True, - }, + }, { + 'note': 'Storyboards', + 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8', + 'info_dict': { + 'id': '5KLPxDtMqe8', + 'ext': 'mhtml', + 'format_id': 'sb0', + 'title': 'Your Brain is Plastic', + 'uploader_id': 'scishow', + 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc', + 'upload_date': '20140324', + 'uploader': 'SciShow', + }, 'params': {'format': 'mhtml', 'skip_download': True} + } ] @classmethod @@ -1920,9 +1933,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return sts def _mark_watched(self, video_id, player_responses): - playback_url = traverse_obj( - player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'), - expected_type=url_or_none, get_all=False) + playback_url = get_first( + player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'), + expected_type=url_or_none) if not playback_url: self.report_warning('Unable to mark watched') return From dd2a987d3f412dc61422ad13cf7b60920be8af6e Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Fri, 19 Nov 2021 06:30:25 +0530 Subject: [PATCH 446/641] [tests] Fix tests --- test/test_YoutubeDL.py | 4 ++-- test/test_all_urls.py | 1 - test/test_youtube_lists.py | 22 ++++++++++++---------- yt_dlp/YoutubeDL.py | 2 +- yt_dlp/extractor/youtube.py | 3 --- 5 files changed, 15 insertions(+), 17 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 5a0dabeb6e..63ef50e1a6 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -137,7 +137,7 @@ class TestFormatSelection(unittest.TestCase): test('webm/mp4', '47') test('3gp/40/mp4', '35') test('example-with-dashes', 'example-with-dashes') - test('all', '35', 'example-with-dashes', '45', '47', '2') # Order doesn't actually matter for this + test('all', '2', '47', '45', 'example-with-dashes', '35') test('mergeall', '2+47+45+example-with-dashes+35', multi=True) def test_format_selection_audio(self): @@ -520,7 +520,7 @@ class TestFormatSelection(unittest.TestCase): ydl = YDL({'format': 'all[width>=400][width<=600]'}) ydl.process_ie_result(info_dict) downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts] - self.assertEqual(downloaded_ids, ['B', 'C', 'D']) + self.assertEqual(downloaded_ids, ['D', 'C', 'B']) ydl = YDL({'format': 'best[height<40]'}) try: diff --git a/test/test_all_urls.py b/test/test_all_urls.py index 68c1c68d3f..2d89366d45 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -38,7 +38,6 @@ class TestAllURLsMatching(unittest.TestCase): assertTab('https://www.youtube.com/AsapSCIENCE') assertTab('https://www.youtube.com/embedded') assertTab('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q') - assertTab('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') assertTab('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC') assertTab('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') # 668 self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M')) diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index e831393e49..d9638658dd 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -26,29 +26,31 @@ class TestYoutubeLists(unittest.TestCase): def test_youtube_playlist_noplaylist(self): dl = FakeYDL() dl.params['noplaylist'] = True - ie = YoutubePlaylistIE(dl) + ie = YoutubeTabIE(dl) result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re') self.assertEqual(result['_type'], 'url') - self.assertEqual(YoutubeIE().extract_id(result['url']), 'FXxLjLQi3Fg') + self.assertEqual(YoutubeIE.extract_id(result['url']), 'FXxLjLQi3Fg') def test_youtube_course(self): + print('Skipping: Course URLs no longer exists') + return dl = FakeYDL() ie = YoutubePlaylistIE(dl) # TODO find a > 100 (paginating?) videos course result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') entries = list(result['entries']) - self.assertEqual(YoutubeIE().extract_id(entries[0]['url']), 'j9WZyLZCBzs') + self.assertEqual(YoutubeIE.extract_id(entries[0]['url']), 'j9WZyLZCBzs') self.assertEqual(len(entries), 25) - self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0') + self.assertEqual(YoutubeIE.extract_id(entries[-1]['url']), 'rYefUsYuEp0') def test_youtube_mix(self): dl = FakeYDL() - ie = YoutubePlaylistIE(dl) - result = ie.extract('https://www.youtube.com/watch?v=W01L70IGBgE&index=2&list=RDOQpdSVF_k_w') - entries = result['entries'] + ie = YoutubeTabIE(dl) + result = ie.extract('https://www.youtube.com/watch?v=tyITL_exICo&list=RDCLAK5uy_kLWIr9gv1XLlPbaDS965-Db4TrBoUTxQ8') + entries = list(result['entries']) self.assertTrue(len(entries) >= 50) original_video = entries[0] - self.assertEqual(original_video['id'], 'OQpdSVF_k_w') + self.assertEqual(original_video['id'], 'tyITL_exICo') def test_youtube_toptracks(self): print('Skipping: The playlist page gives error 500') @@ -68,10 +70,10 @@ class TestYoutubeLists(unittest.TestCase): entries = list(result['entries']) self.assertTrue(len(entries) == 1) video = entries[0] - self.assertEqual(video['_type'], 'url_transparent') + self.assertEqual(video['_type'], 'url') self.assertEqual(video['ie_key'], 'Youtube') self.assertEqual(video['id'], 'BaW_jenozKc') - self.assertEqual(video['url'], 'BaW_jenozKc') + self.assertEqual(video['url'], 'https://www.youtube.com/watch?v=BaW_jenozKc') self.assertEqual(video['title'], 'youtube-dl test video "\'/\\ä↭𝕐') self.assertEqual(video['duration'], 10) self.assertEqual(video['uploader'], 'Philipp Hagemeister') diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index e078e62ef6..1f1b4ccd45 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2014,7 +2014,7 @@ class YoutubeDL(object): yield from _check_formats(ctx['formats'][::-1]) elif format_spec == 'mergeall': def selector_function(ctx): - formats = list(_check_formats(ctx['formats'][::-1])) + formats = list(_check_formats(ctx['formats'])) if not formats: return merged_format = formats[-1] diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 1fbdcd98b6..632129bc67 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3467,9 +3467,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): 'note': 'inline playlist with not always working continuations', 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C', 'only_matching': True, - }, { - 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8', - 'only_matching': True, }, { 'url': 'https://www.youtube.com/course', 'only_matching': True, From 6b993ca765753e0b04d65ec70cf787a2e9f94639 Mon Sep 17 00:00:00 2001 From: nyuszika7h <nyuszika7h@gmail.com> Date: Fri, 19 Nov 2021 02:49:51 +0100 Subject: [PATCH 447/641] [hls] Better FairPlay DRM detection (#1661) Authored by: nyuszika7h --- yt_dlp/downloader/hls.py | 9 +++++++++ yt_dlp/extractor/common.py | 8 ++++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index 61312c5ba5..e932fd6aea 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -77,6 +77,15 @@ class HlsFD(FragmentFD): message = ('The stream has AES-128 encryption and neither ffmpeg nor pycryptodomex are available; ' 'Decryption will be performed natively, but will be extremely slow') if not can_download: + has_drm = re.search('|'.join([ + r'#EXT-X-FAXS-CM:', # Adobe Flash Access + r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay + ]), s) + if has_drm and not self.params.get('allow_unplayable_formats'): + self.report_error( + 'This video is DRM protected; Try selecting another format with --format or ' + 'add --check-formats to automatically fallback to the next best format') + return False message = message or 'Unsupported features have been detected' fd = FFmpegFD(self.ydl, self.params) self.report_warning(f'{message}; extraction will be delegated to {fd.get_basename()}') diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 6f06502961..a47364d076 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -2035,10 +2035,10 @@ class InfoExtractor(object): video_id=None): formats, subtitles = [], {} - if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access - return formats, subtitles - - has_drm = re.search(r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', m3u8_doc) + has_drm = re.search('|'.join([ + r'#EXT-X-FAXS-CM:', # Adobe Flash Access + r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay + ]), m3u8_doc) def format_url(url): return url if re.match(r'^https?://', url) else compat_urlparse.urljoin(m3u8_url, url) From a04e005521ecf2eb0c4979e234ff0c4f23a3caa0 Mon Sep 17 00:00:00 2001 From: The Hatsune Daishi <nao20010128@gmail.com> Date: Fri, 19 Nov 2021 10:54:10 +0900 Subject: [PATCH 448/641] [AES] Add ECB mode (#1686) Needed for #1688 Authored by: nao20010128nao --- test/test_aes.py | 18 +++++++++++++++++- yt_dlp/aes.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/test/test_aes.py b/test/test_aes.py index 46db59e57b..5c9273f8aa 100644 --- a/test/test_aes.py +++ b/test/test_aes.py @@ -10,6 +10,8 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from yt_dlp.aes import ( aes_decrypt, aes_encrypt, + aes_ecb_encrypt, + aes_ecb_decrypt, aes_cbc_decrypt, aes_cbc_decrypt_bytes, aes_cbc_encrypt, @@ -17,7 +19,8 @@ from yt_dlp.aes import ( aes_ctr_encrypt, aes_gcm_decrypt_and_verify, aes_gcm_decrypt_and_verify_bytes, - aes_decrypt_text + aes_decrypt_text, + BLOCK_SIZE_BYTES, ) from yt_dlp.compat import compat_pycrypto_AES from yt_dlp.utils import bytes_to_intlist, intlist_to_bytes @@ -94,6 +97,19 @@ class TestAES(unittest.TestCase): decrypted = (aes_decrypt_text(encrypted, password, 32)) self.assertEqual(decrypted, self.secret_msg) + def test_ecb_encrypt(self): + data = bytes_to_intlist(self.secret_msg) + data += [0x08] * (BLOCK_SIZE_BYTES - len(data) % BLOCK_SIZE_BYTES) + encrypted = intlist_to_bytes(aes_ecb_encrypt(data, self.key, self.iv)) + self.assertEqual( + encrypted, + b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:') + + def test_ecb_decrypt(self): + data = bytes_to_intlist(b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:') + decrypted = intlist_to_bytes(aes_ecb_decrypt(data, self.key, self.iv)) + self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) + if __name__ == '__main__': unittest.main() diff --git a/yt_dlp/aes.py b/yt_dlp/aes.py index 60cdeb74e0..8503e3dfd6 100644 --- a/yt_dlp/aes.py +++ b/yt_dlp/aes.py @@ -28,6 +28,48 @@ else: BLOCK_SIZE_BYTES = 16 +def aes_ecb_encrypt(data, key, iv=None): + """ + Encrypt with aes in ECB mode + + @param {int[]} data cleartext + @param {int[]} key 16/24/32-Byte cipher key + @param {int[]} iv Unused for this mode + @returns {int[]} encrypted data + """ + expanded_key = key_expansion(key) + block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) + + encrypted_data = [] + for i in range(block_count): + block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] + encrypted_data += aes_encrypt(block, expanded_key) + encrypted_data = encrypted_data[:len(data)] + + return encrypted_data + + +def aes_ecb_decrypt(data, key, iv=None): + """ + Decrypt with aes in ECB mode + + @param {int[]} data cleartext + @param {int[]} key 16/24/32-Byte cipher key + @param {int[]} iv Unused for this mode + @returns {int[]} decrypted data + """ + expanded_key = key_expansion(key) + block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) + + encrypted_data = [] + for i in range(block_count): + block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] + encrypted_data += aes_decrypt(block, expanded_key) + encrypted_data = encrypted_data[:len(data)] + + return encrypted_data + + def aes_ctr_decrypt(data, key, iv): """ Decrypt with aes in counter mode From 7333296ff5386efcd13a9db780170350e1924389 Mon Sep 17 00:00:00 2001 From: Paper <37962225+mrpapersonic@users.noreply.github.com> Date: Fri, 19 Nov 2021 01:11:36 -0500 Subject: [PATCH 449/641] [VidLii] Add 720p support (#1681) Authored by: mrpapersonic --- yt_dlp/extractor/vidlii.py | 45 ++++++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 7 deletions(-) diff --git a/yt_dlp/extractor/vidlii.py b/yt_dlp/extractor/vidlii.py index f4774256bd..ce7487ec16 100644 --- a/yt_dlp/extractor/vidlii.py +++ b/yt_dlp/extractor/vidlii.py @@ -5,9 +5,11 @@ import re from .common import InfoExtractor from ..utils import ( + HEADRequest, float_or_none, get_element_by_id, int_or_none, + str_to_int, strip_or_none, unified_strdate, urljoin, @@ -35,6 +37,25 @@ class VidLiiIE(InfoExtractor): 'categories': ['News & Politics'], 'tags': ['Vidlii', 'Jan', 'Videogames'], } + }, { + 'url': 'https://www.vidlii.com/watch?v=zTAtaAgOLKt', + 'md5': '5778f7366aa4c569b77002f8bf6b614f', + 'info_dict': { + 'id': 'zTAtaAgOLKt', + 'ext': 'mp4', + 'title': 'FULPTUBE SUCKS.', + 'description': 'md5:087b2ca355d4c8f8f77e97c43e72d711', + 'thumbnail': 'https://www.vidlii.com/usfi/thmp/zTAtaAgOLKt.jpg', + 'uploader': 'Homicide', + 'uploader_url': 'https://www.vidlii.com/user/Homicide', + 'upload_date': '20210612', + 'duration': 89, + 'view_count': int, + 'comment_count': int, + 'average_rating': float, + 'categories': ['News & Politics'], + 'tags': ['fulp', 'tube', 'sucks', 'bad', 'fulptube'], + }, }, { 'url': 'https://www.vidlii.com/embed?v=tJluaH4BJ3v&a=0', 'only_matching': True, @@ -45,10 +66,20 @@ class VidLiiIE(InfoExtractor): webpage = self._download_webpage( 'https://www.vidlii.com/watch?v=%s' % video_id, video_id) + formats = [] - video_url = self._search_regex( - r'src\s*:\s*(["\'])(?P<url>(?:https?://)?(?:(?!\1).)+)\1', webpage, - 'video url', group='url') + sources = [source[1] for source in re.findall( + r'src\s*:\s*(["\'])(?P<url>(?:https?://)?(?:(?!\1).)+)\1', + webpage) or []] + for source in sources: + height = int(self._search_regex(r'(\d+).mp4', source, 'height', default=360)) + if self._request_webpage(HEADRequest(source), video_id, f'Checking {height}p url', errnote=False): + formats.append({ + 'url': source, + 'format_id': f'{height}p', + 'height': height, + }) + self._sort_formats(formats) title = self._search_regex( (r'<h1>([^<]+)</h1>', r'<title>([^<]+) - VidLii<'), webpage, @@ -82,9 +113,9 @@ class VidLiiIE(InfoExtractor): default=None) or self._search_regex( r'duration\s*:\s*(\d+)', webpage, 'duration', fatal=False)) - view_count = int_or_none(self._search_regex( - (r'<strong>(\d+)</strong> views', - r'Views\s*:\s*<strong>(\d+)</strong>'), + view_count = str_to_int(self._search_regex( + (r'<strong>([,0-9]+)</strong> views', + r'Views\s*:\s*<strong>([,0-9]+)</strong>'), webpage, 'view count', fatal=False)) comment_count = int_or_none(self._search_regex( @@ -109,11 +140,11 @@ class VidLiiIE(InfoExtractor): return { 'id': video_id, - 'url': video_url, 'title': title, 'description': description, 'thumbnail': thumbnail, 'uploader': uploader, + 'formats': formats, 'uploader_url': uploader_url, 'upload_date': upload_date, 'duration': duration, From c45b87419f86b5c513a3135ea17e93b3deea6e29 Mon Sep 17 00:00:00 2001 From: nyuszika7h <nyuszika7h@gmail.com> Date: Fri, 19 Nov 2021 15:57:01 +0100 Subject: [PATCH 450/641] [bbc] Get all available formats (#1717) Authored by: nyuszika7h --- yt_dlp/extractor/bbc.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py index 4e2dcd76b8..672ed1ffe2 100644 --- a/yt_dlp/extractor/bbc.py +++ b/yt_dlp/extractor/bbc.py @@ -451,9 +451,10 @@ class BBCCoUkIE(InfoExtractor): playlist = self._download_json( 'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id, playlist_id, 'Downloading playlist JSON') + formats = [] + subtitles = {} - version = playlist.get('defaultAvailableVersion') - if version: + for version in playlist.get('allAvailableVersions', []): smp_config = version['smpConfig'] title = smp_config['title'] description = smp_config['summary'] @@ -463,8 +464,18 @@ class BBCCoUkIE(InfoExtractor): continue programme_id = item.get('vpid') duration = int_or_none(item.get('duration')) - formats, subtitles = self._download_media_selector(programme_id) - return programme_id, title, description, duration, formats, subtitles + version_formats, version_subtitles = self._download_media_selector(programme_id) + types = version['types'] + for f in version_formats: + f['format_note'] = ', '.join(types) + if any('AudioDescribed' in x for x in types): + f['language_preference'] = -10 + formats += version_formats + for tag, subformats in (version_subtitles or {}).items(): + subtitles.setdefault(tag, []) + subtitles[tag] += subformats + + return programme_id, title, description, duration, formats, subtitles except ExtractorError as ee: if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404): raise From c5e3f84972f19e8f5c99ca358cf30bb105294e20 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Sat, 20 Nov 2021 08:33:51 +0530 Subject: [PATCH 451/641] [utils] Allow alignment in `render_table` and add tests --- test/test_utils.py | 41 +++++++++++++++++++++++++++++++++++++++-- yt_dlp/YoutubeDL.py | 35 +++++++++++++++++------------------ yt_dlp/utils.py | 22 +++++++++++++--------- 3 files changed, 69 insertions(+), 29 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 810ed3de4c..b918ae2b63 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1222,12 +1222,49 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') def test_render_table(self): self.assertEqual( render_table( - ['a', 'bcd'], - [[123, 4], [9999, 51]]), + ['a', 'empty', 'bcd'], + [[123, '', 4], [9999, '', 51]]), + 'a empty bcd\n' + '123 4\n' + '9999 51') + + self.assertEqual( + render_table( + ['a', 'empty', 'bcd'], + [[123, '', 4], [9999, '', 51]], + hide_empty=True), 'a bcd\n' '123 4\n' '9999 51') + self.assertEqual( + render_table( + ['\ta', 'bcd'], + [['1\t23', 4], ['\t9999', 51]]), + ' a bcd\n' + '1 23 4\n' + '9999 51') + + self.assertEqual( + render_table( + ['a', 'bcd'], + [[123, 4], [9999, 51]], + delim='-'), + 'a bcd\n' + '--------\n' + '123 4\n' + '9999 51') + + self.assertEqual( + render_table( + ['a', 'bcd'], + [[123, 4], [9999, 51]], + delim='-', extra_gap=2), + 'a bcd\n' + '----------\n' + '123 4\n' + '9999 51') + def test_match_str(self): # Unary self.assertFalse(match_str('xy', {'x': 1200})) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 1f1b4ccd45..4bd6dcc4cf 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -3229,37 +3229,36 @@ class YoutubeDL(object): formats = info_dict.get('formats', [info_dict]) new_format = self.params.get('listformats_table', True) is not False if new_format: - tbr_digits = number_of_digits(max(f.get('tbr') or 0 for f in formats)) - vbr_digits = number_of_digits(max(f.get('vbr') or 0 for f in formats)) - abr_digits = number_of_digits(max(f.get('abr') or 0 for f in formats)) delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True) table = [ [ self._format_screen(format_field(f, 'format_id'), self.Styles.ID), format_field(f, 'ext'), self.format_resolution(f), - format_field(f, 'fps', '%3d'), + format_field(f, 'fps', '\t%d'), format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''), delim, - format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes), - format_field(f, 'tbr', f'%{tbr_digits}dk'), - shorten_protocol_name(f.get('protocol', '').replace("native", "n")), + format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes), + format_field(f, 'tbr', '\t%dk'), + shorten_protocol_name(f.get('protocol', '').replace('native', 'n')), delim, format_field(f, 'vcodec', default='unknown').replace('none', ''), - format_field(f, 'vbr', f'%{vbr_digits}dk'), + format_field(f, 'vbr', '\t%dk'), format_field(f, 'acodec', default='unknown').replace('none', ''), - format_field(f, 'abr', f'%{abr_digits}dk'), - format_field(f, 'asr', '%5dHz'), + format_field(f, 'abr', '\t%dk'), + format_field(f, 'asr', '\t%dHz'), join_nonempty( self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None, format_field(f, 'language', '[%s]'), - format_field(f, 'format_note'), - format_field(f, 'container', ignore=(None, f.get('ext'))), - delim=', '), + join_nonempty( + format_field(f, 'format_note'), + format_field(f, 'container', ignore=(None, f.get('ext'))), + delim=', '), + delim=' '), ] for f in formats if f.get('preference') is None or f['preference'] >= -1000] header_line = self._list_format_headers( - 'ID', 'EXT', 'RESOLUTION', 'FPS', 'HDR', delim, ' FILESIZE', ' TBR', 'PROTO', - delim, 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO') + 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO', + delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO') else: table = [ [ @@ -3275,8 +3274,8 @@ class YoutubeDL(object): '[info] Available formats for %s:' % info_dict['id']) self.to_stdout(render_table( header_line, table, - extraGap=(0 if new_format else 1), - hideEmpty=new_format, + extra_gap=(0 if new_format else 1), + hide_empty=new_format, delim=new_format and self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))) def list_thumbnails(self, info_dict): @@ -3307,7 +3306,7 @@ class YoutubeDL(object): self.to_stdout(render_table( self._list_format_headers('Language', 'Name', 'Formats'), [_row(lang, formats) for lang, formats in subtitles.items()], - hideEmpty=True)) + hide_empty=True)) def urlopen(self, req): """ Start an HTTP download """ diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index a9e066257d..282ed1f933 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4805,10 +4805,11 @@ def determine_protocol(info_dict): return compat_urllib_parse_urlparse(url).scheme -def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False): - """ Render a list of rows, each as a list of values """ +def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False): + """ Render a list of rows, each as a list of values. + Text after a \t will be right aligned """ def width(string): - return len(remove_terminal_sequences(string)) + return len(remove_terminal_sequences(string).replace('\t', '')) def get_max_lens(table): return [max(width(str(v)) for v in col) for col in zip(*table)] @@ -4816,21 +4817,24 @@ def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False): def filter_using_list(row, filterArray): return [col for (take, col) in zip(filterArray, row) if take] - if hideEmpty: + if hide_empty: max_lens = get_max_lens(data) header_row = filter_using_list(header_row, max_lens) data = [filter_using_list(row, max_lens) for row in data] table = [header_row] + data max_lens = get_max_lens(table) - extraGap += 1 + extra_gap += 1 if delim: - table = [header_row] + [[delim * (ml + extraGap) for ml in max_lens]] + data - max_lens[-1] = 0 + table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data + table[1][-1] = table[1][-1][:-extra_gap] # Remove extra_gap from end of delimiter for row in table: for pos, text in enumerate(map(str, row)): - row[pos] = text + (' ' * (max_lens[pos] - width(text) + extraGap)) - ret = '\n'.join(''.join(row) for row in table) + if '\t' in text: + row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap + else: + row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap) + ret = '\n'.join(''.join(row).rstrip() for row in table) return ret From c07a39ae8e3e3b71ec8c7c0fa3e91b6908584316 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Fri, 19 Nov 2021 20:45:52 +0530 Subject: [PATCH 452/641] [utils] Fix `PagedList` Bug in d8cf8d97a8dbc9602556de474af133b5ab0e0a29 --- yt_dlp/YoutubeDL.py | 2 +- yt_dlp/utils.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 4bd6dcc4cf..62ec087b8d 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1328,7 +1328,7 @@ class YoutubeDL(object): self.to_stderr('\r') self.report_warning(f'{e}; Re-extracting data') return wrapper(self, *args, **kwargs) - except (DownloadCancelled, LazyList.IndexError): + except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError): raise except Exception as e: if self.params.get('ignoreerrors'): diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 282ed1f933..2d5b9892dc 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4168,6 +4168,10 @@ class LazyList(collections.abc.Sequence): class PagedList: + + class IndexError(IndexError): + pass + def __len__(self): # This is only useful for tests return len(self.getslice()) @@ -4198,7 +4202,7 @@ class PagedList: raise TypeError('indices must be non-negative integers') entries = self.getslice(idx, idx + 1) if not entries: - raise IndexError() + raise self.IndexError() return entries[0] From 282f570918f936a3aa9f57d4c85de4693da882c9 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Sat, 20 Nov 2021 08:05:57 +0530 Subject: [PATCH 453/641] [utils] Fix error when copying `LazyList` --- test/test_utils.py | 10 +++++----- yt_dlp/YoutubeDL.py | 4 ++-- yt_dlp/utils.py | 20 ++++++++++++++------ 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index b918ae2b63..22dda4f377 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1657,9 +1657,9 @@ Line 1 self.assertEqual(repr(LazyList(it)), repr(it)) self.assertEqual(str(LazyList(it)), str(it)) - self.assertEqual(list(LazyList(it).reverse()), it[::-1]) - self.assertEqual(list(LazyList(it).reverse()[1:3:7]), it[::-1][1:3:7]) - self.assertEqual(list(LazyList(it).reverse()[::-1]), it) + self.assertEqual(list(LazyList(it, reverse=True)), it[::-1]) + self.assertEqual(list(reversed(LazyList(it))[::-1]), it) + self.assertEqual(list(reversed(LazyList(it))[1:3:7]), it[::-1][1:3:7]) def test_LazyList_laziness(self): @@ -1672,13 +1672,13 @@ Line 1 test(ll, 5, 5, range(6)) test(ll, -3, 7, range(10)) - ll = LazyList(range(10)).reverse() + ll = LazyList(range(10), reverse=True) test(ll, -1, 0, range(1)) test(ll, 3, 6, range(10)) ll = LazyList(itertools.count()) test(ll, 10, 10, range(11)) - ll.reverse() + ll = reversed(ll) test(ll, -15, 14, range(15)) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 62ec087b8d..fb7e12624a 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2166,7 +2166,7 @@ class YoutubeDL(object): t['url'] = sanitize_url(t['url']) if self.params.get('check_formats') is True: - info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1])).reverse() + info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True) else: info_dict['thumbnails'] = thumbnails @@ -2361,7 +2361,7 @@ class YoutubeDL(object): # TODO Central sorting goes here if self.params.get('check_formats') is True: - formats = LazyList(self._check_formats(formats[::-1])).reverse() + formats = LazyList(self._check_formats(formats[::-1]), reverse=True) if not formats or formats[0] is not info_dict: # only set the 'formats' fields if the original info_dict list them diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 2d5b9892dc..ade2bbff16 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4086,10 +4086,10 @@ class LazyList(collections.abc.Sequence): class IndexError(IndexError): pass - def __init__(self, iterable): + def __init__(self, iterable, *, reverse=False, _cache=None): self.__iterable = iter(iterable) - self.__cache = [] - self.__reversed = False + self.__cache = [] if _cache is None else _cache + self.__reversed = reverse def __iter__(self): if self.__reversed: @@ -4155,9 +4155,17 @@ class LazyList(collections.abc.Sequence): self.__exhaust() return len(self.__cache) - def reverse(self): - self.__reversed = not self.__reversed - return self + def __reversed__(self): + return type(self)(self.__iterable, reverse=not self.__reversed, _cache=self.__cache) + + def __copy__(self): + return type(self)(self.__iterable, reverse=self.__reversed, _cache=self.__cache) + + def __deepcopy__(self, memo): + # FIXME: This is actually just a shallow copy + id_ = id(self) + memo[id_] = self.__copy__() + return memo[id_] def __repr__(self): # repr and str should mimic a list. So we exhaust the iterable From d76991ab0743a1e855bd44be597a40c89d5a814a Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Sat, 20 Nov 2021 08:27:47 +0530 Subject: [PATCH 454/641] Fix `--check-formats` for `mhtml` Closes #1709 --- yt_dlp/downloader/mhtml.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/downloader/mhtml.py b/yt_dlp/downloader/mhtml.py index b75db18a8a..1477f65a69 100644 --- a/yt_dlp/downloader/mhtml.py +++ b/yt_dlp/downloader/mhtml.py @@ -114,8 +114,8 @@ body > figure > img { fragment_base_url = info_dict.get('fragment_base_url') fragments = info_dict['fragments'][:1] if self.params.get( 'test', False) else info_dict['fragments'] - title = info_dict['title'] - origin = info_dict['webpage_url'] + title = info_dict.get('title', info_dict['format_id']) + origin = info_dict.get('webpage_url', info_dict['url']) ctx = { 'filename': filename, From 545ad64988d03b8c38e51004cd6941236f529e66 Mon Sep 17 00:00:00 2001 From: aarubui <aarubui@users.noreply.github.com> Date: Sat, 20 Nov 2021 15:03:43 +1100 Subject: [PATCH 455/641] [willow] Add extractor (#1723) Authored by: aarubui --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/willow.py | 58 ++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 yt_dlp/extractor/willow.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index d19c67243e..fdcd60e2d8 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1789,6 +1789,7 @@ from .weibo import ( WeiboMobileIE ) from .weiqitv import WeiqiTVIE +from .willow import WillowIE from .wimtv import WimTVIE from .whowatch import WhoWatchIE from .wistia import ( diff --git a/yt_dlp/extractor/willow.py b/yt_dlp/extractor/willow.py new file mode 100644 index 0000000000..4d3d62f955 --- /dev/null +++ b/yt_dlp/extractor/willow.py @@ -0,0 +1,58 @@ +# coding: utf-8 +from ..utils import ExtractorError +from .common import InfoExtractor + + +class WillowIE(InfoExtractor): + _VALID_URL = r'https?://(www\.)?willow\.tv/videos/(?P<id>[0-9a-z-_]+)' + _GEO_COUNTRIES = ['US'] + + _TESTS = [{ + 'url': 'http://willow.tv/videos/d5winning-moment-eng-vs-ind-streaming-online-4th-test-india-tour-of-england-2021', + 'info_dict': { + 'id': '169662', + 'display_id': 'd5winning-moment-eng-vs-ind-streaming-online-4th-test-india-tour-of-england-2021', + 'ext': 'mp4', + 'title': 'Winning Moment: 4th Test, England vs India', + 'thumbnail': 'https://aimages.willow.tv/ytThumbnails/6748_D5winning_moment.jpg', + 'duration': 233, + 'timestamp': 1630947954, + 'upload_date': '20210906', + 'location': 'Kennington Oval, London', + 'series': 'India tour of England 2021', + }, + 'params': { + 'skip_download': True, # AES-encrypted m3u8 + }, + }, { + 'url': 'http://willow.tv/videos/highlights-short-ind-vs-nz-streaming-online-2nd-t20i-new-zealand-tour-of-india-2021', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + video_data = self._parse_json(self._html_search_regex( + r'var\s+data_js\s*=\s*JSON\.parse\(\'(.+)\'\)', webpage, + 'data_js'), video_id) + + video = next((v for v in video_data.get('trending_videos') or [] + if v.get('secureurl')), None) + if not video: + raise ExtractorError('No videos found') + + formats = self._extract_m3u8_formats(video['secureurl'], video_id, 'mp4') + self._sort_formats(formats) + + return { + 'id': str(video.get('content_id')), + 'display_id': video.get('video_slug'), + 'title': video.get('video_name') or self._html_search_meta('twitter:title', webpage), + 'formats': formats, + 'thumbnail': video.get('yt_thumb_url') or self._html_search_meta( + 'twitter:image', webpage, default=None), + 'duration': video.get('duration_seconds'), + 'timestamp': video.get('created_date'), + 'location': video.get('venue'), + 'series': video.get('series_name'), + } From 77fcc6515852bc2e1c6960a6e010ab2ff1caf1ee Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Sat, 20 Nov 2021 14:55:14 +0530 Subject: [PATCH 456/641] [CozyTV] Add extractor (#1727) Authored by: Ashish0804 --- yt_dlp/extractor/cozytv.py | 40 ++++++++++++++++++++++++++++++++++ yt_dlp/extractor/extractors.py | 1 + 2 files changed, 41 insertions(+) create mode 100644 yt_dlp/extractor/cozytv.py diff --git a/yt_dlp/extractor/cozytv.py b/yt_dlp/extractor/cozytv.py new file mode 100644 index 0000000000..868d8d27da --- /dev/null +++ b/yt_dlp/extractor/cozytv.py @@ -0,0 +1,40 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import unified_strdate + + +class CozyTVIE(InfoExtractor): + _VALID_URL = r'(?:https?://)(?:www\.)?cozy\.tv/(?P<uploader>[^/]+)/replays/(?P<id>[^/$#&?]+)' + + _TESTS = [{ + 'url': 'https://cozy.tv/beardson/replays/2021-11-19_1', + 'info_dict': { + 'id': 'beardson-2021-11-19_1', + 'ext': 'mp4', + 'title': 'pokemon pt2', + 'uploader': 'beardson', + 'upload_date': '20211119', + 'was_live': True, + 'duration': 7981, + }, + 'params': {'skip_download': True} + }] + + def _real_extract(self, url): + uploader, date = self._match_valid_url(url).groups() + id = f'{uploader}-{date}' + data_json = self._download_json(f'https://api.cozy.tv/cache/{uploader}/replay/{date}', id) + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + f'https://cozycdn.foxtrotstream.xyz/replays/{uploader}/{date}/index.m3u8', id, ext='mp4') + return { + 'id': id, + 'title': data_json.get('title'), + 'uploader': data_json.get('user') or uploader, + 'upload_date': unified_strdate(data_json.get('date')), + 'was_live': True, + 'duration': data_json.get('duration'), + 'formats': formats, + 'subtitles': subtitles, + } diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index fdcd60e2d8..a0f4908f03 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -293,6 +293,7 @@ from .commonprotocols import ( from .condenast import CondeNastIE from .contv import CONtvIE from .corus import CorusIE +from .cozytv import CozyTVIE from .cracked import CrackedIE from .crackle import CrackleIE from .crooksandliars import CrooksAndLiarsIE From 849d699a8b2d36a9aab6c3a34073c9d1c5088a29 Mon Sep 17 00:00:00 2001 From: 4a1e2y5 <66421735+4a1e2y5@users.noreply.github.com> Date: Sun, 21 Nov 2021 00:24:05 +0100 Subject: [PATCH 457/641] [xvideos] Detect embed URLs (#1729) Authored by: 4a1e2y5 --- yt_dlp/extractor/xvideos.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/xvideos.py b/yt_dlp/extractor/xvideos.py index 8fc64914ca..ef45eb929e 100644 --- a/yt_dlp/extractor/xvideos.py +++ b/yt_dlp/extractor/xvideos.py @@ -19,7 +19,7 @@ class XVideosIE(InfoExtractor): (?: (?:[^/]+\.)?xvideos2?\.com/video| (?:www\.)?xvideos\.es/video| - flashservice\.xvideos\.com/embedframe/| + (?:www|flashservice)\.xvideos\.com/embedframe/| static-hw\.xvideos\.com/swf/xv-player\.swf\?.*?\bid_video= ) (?P<id>[0-9]+) @@ -37,6 +37,9 @@ class XVideosIE(InfoExtractor): }, { 'url': 'https://flashservice.xvideos.com/embedframe/4588838', 'only_matching': True, + }, { + 'url': 'https://www.xvideos.com/embedframe/4588838', + 'only_matching': True, }, { 'url': 'http://static-hw.xvideos.com/swf/xv-player.swf?id_video=4588838', 'only_matching': True, From c98d4df23bfba30fc38f2614bd96db67644e7ddf Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Mon, 22 Nov 2021 13:41:57 +0530 Subject: [PATCH 458/641] [WDR] Expand valid URL Closes #1749 --- yt_dlp/extractor/wdr.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/wdr.py b/yt_dlp/extractor/wdr.py index f54aa6ff90..d3229d8af3 100644 --- a/yt_dlp/extractor/wdr.py +++ b/yt_dlp/extractor/wdr.py @@ -22,7 +22,11 @@ from ..utils import ( class WDRIE(InfoExtractor): - _VALID_URL = r'https?://deviceids-medp\.wdr\.de/ondemand/\d+/(?P<id>\d+)\.js' + _VALID_URL = r'''(?x)https?:// + (?:deviceids-medp\.wdr\.de/ondemand/\d+/| + kinder\.wdr\.de/(?!mediathek/)[^#?]+-) + (?P<id>\d+)\.(?:js|assetjsonp) + ''' _GEO_COUNTRIES = ['DE'] _TEST = { 'url': 'http://deviceids-medp.wdr.de/ondemand/155/1557833.js', From 234416e4bf39d442e7abd036b7c59b8934a4086b Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Mon, 22 Nov 2021 23:32:14 +0530 Subject: [PATCH 459/641] [downloader/ffmpeg] Fix for direct videos inside mpd manifests Closes #1751 --- yt_dlp/downloader/external.py | 3 +-- yt_dlp/extractor/common.py | 9 +++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index 1efbb2fabe..da69423f72 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -443,8 +443,7 @@ class FFmpegFD(ExternalFD): if info_dict.get('requested_formats') or protocol == 'http_dash_segments': for (i, fmt) in enumerate(info_dict.get('requested_formats') or [info_dict]): stream_number = fmt.get('manifest_stream_number', 0) - a_or_v = 'a' if fmt.get('acodec') != 'none' else 'v' - args.extend(['-map', f'{i}:{a_or_v}:{stream_number}']) + args.extend(['-map', f'{i}:{stream_number}']) if self.params.get('test', False): args += ['-fs', compat_str(self._TEST_FILE_SIZE)] diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index a47364d076..1565ba5c37 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import base64 +import collections import datetime import hashlib import itertools @@ -2649,7 +2650,7 @@ class InfoExtractor(object): mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration')) formats, subtitles = [], {} - stream_numbers = {'audio': 0, 'video': 0} + stream_numbers = collections.defaultdict(int) for period in mpd_doc.findall(_add_ns('Period')): period_duration = parse_duration(period.get('duration')) or mpd_duration period_ms_info = extract_multisegment_info(period, { @@ -2715,10 +2716,8 @@ class InfoExtractor(object): 'format_note': 'DASH %s' % content_type, 'filesize': filesize, 'container': mimetype2ext(mime_type) + '_dash', - 'manifest_stream_number': stream_numbers[content_type] } f.update(parse_codecs(codecs)) - stream_numbers[content_type] += 1 elif content_type == 'text': f = { 'ext': mimetype2ext(mime_type), @@ -2885,7 +2884,9 @@ class InfoExtractor(object): else: # Assuming direct URL to unfragmented media. f['url'] = base_url - if content_type in ('video', 'audio') or mime_type == 'image/jpeg': + if content_type in ('video', 'audio', 'image/jpeg'): + f['manifest_stream_number'] = stream_numbers[f['url']] + stream_numbers[f['url']] += 1 formats.append(f) elif content_type == 'text': subtitles.setdefault(lang or 'und', []).append(f) From 1ee34c76bb6e3a74d5a4d76475469e64dc201063 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 23 Nov 2021 01:09:17 +0530 Subject: [PATCH 460/641] [vimeo] Add fallback for config URL Closes #1662 --- yt_dlp/extractor/vimeo.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index 7df4116f38..e2b86662be 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -604,6 +604,20 @@ class VimeoIE(VimeoBaseInfoExtractor): 'format': 'Original', }, }, + { + 'url': 'https://vimeo.com/channels/staffpicks/143603739', + 'info_dict': { + 'id': '143603739', + 'ext': 'mp4', + 'uploader': 'Karim Huu Do', + 'timestamp': 1445846953, + 'upload_date': '20151026', + 'title': 'The Shoes - Submarine Feat. Blaine Harrison', + 'uploader_id': 'karimhd', + 'description': 'md5:8e2eea76de4504c2e8020a9bcfa1e843', + }, + 'params': {'skip_download': 'm3u8'}, + }, { # requires passing unlisted_hash(a52724358e) to load_download_config request 'url': 'https://vimeo.com/392479337/a52724358e', @@ -798,18 +812,19 @@ class VimeoIE(VimeoBaseInfoExtractor): timestamp = None video_description = None info_dict = {} + config_url = None channel_id = self._search_regex( r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None) if channel_id: config_url = self._html_search_regex( - r'\bdata-config-url="([^"]+)"', webpage, 'config URL') + r'\bdata-config-url="([^"]+)"', webpage, 'config URL', default=None) video_description = clean_html(get_element_by_class('description', webpage)) info_dict.update({ 'channel_id': channel_id, 'channel_url': 'https://vimeo.com/channels/' + channel_id, }) - else: + if not config_url: page_config = self._parse_json(self._search_regex( r'vimeo\.(?:clip|vod_title)_page_config\s*=\s*({.+?});', webpage, 'page config', default='{}'), video_id, fatal=False) From f7b558df4d76fae77a5bbac62364195891673738 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 23 Nov 2021 01:14:25 +0530 Subject: [PATCH 461/641] [mediaklikk] Expand valid URL Partial fix for #1409 --- yt_dlp/extractor/mediaklikk.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/mediaklikk.py b/yt_dlp/extractor/mediaklikk.py index b9b6d739f5..18ff3befae 100644 --- a/yt_dlp/extractor/mediaklikk.py +++ b/yt_dlp/extractor/mediaklikk.py @@ -12,8 +12,8 @@ from ..compat import ( class MediaKlikkIE(InfoExtractor): - _VALID_URL = r'''(?x)^https?:\/\/(?:www\.)? - (?:mediaklikk|m4sport|hirado|petofilive)\.hu\/.*?videok?\/ + _VALID_URL = r'''(?x)https?://(?:www\.)? + (?:mediaklikk|m4sport|hirado|petofilive)\.hu/.*?(?:videok?|cikk)/ (?:(?P<year>[0-9]{4})/(?P<month>[0-9]{1,2})/(?P<day>[0-9]{1,2})/)? (?P<id>[^/#?_]+)''' From 0e6b018a10e751bc6da59cdf5d55e61cdf975efa Mon Sep 17 00:00:00 2001 From: Zirro <code@zirro.se> Date: Tue, 23 Nov 2021 01:40:53 +0530 Subject: [PATCH 462/641] Ensure path for link files exists (#1755) Authored by: Zirro --- yt_dlp/YoutubeDL.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index fb7e12624a..5c2d645988 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2690,6 +2690,8 @@ class YoutubeDL(object): self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information') return False linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext')) + if not self._ensure_dir_exists(encodeFilename(linkfn)): + return False if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)): self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present') return True From 14a086058a30a0748b5b716e9b21481f993518f3 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 23 Nov 2021 02:33:41 +0530 Subject: [PATCH 463/641] [ARDBetaMediathek] Handle new URLs Adapted from https://github.com/ytdl-org/youtube-dl/commit/8562218350a79d4709da8593bb0c538aa0824acf Closes #1601 --- yt_dlp/extractor/ard.py | 46 +++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/yt_dlp/extractor/ard.py b/yt_dlp/extractor/ard.py index 048d30f27d..f8d57109e1 100644 --- a/yt_dlp/extractor/ard.py +++ b/yt_dlp/extractor/ard.py @@ -388,7 +388,13 @@ class ARDIE(InfoExtractor): class ARDBetaMediathekIE(ARDMediathekBaseIE): - _VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?P<client>[^/]+)/(?P<mode>player|live|video|sendung|sammlung)/(?P<display_id>(?:[^/]+/)*)(?P<video_id>[a-zA-Z0-9]+)' + _VALID_URL = r'''(?x)https:// + (?:(?:beta|www)\.)?ardmediathek\.de/ + (?:(?P<client>[^/]+)/)? + (?:player|live|video|(?P<playlist>sendung|sammlung))/ + (?:(?P<display_id>[^?#]+)/)? + (?P<id>(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+)''' + _TESTS = [{ 'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/', 'md5': 'a1dc75a39c61601b980648f7c9f9f71d', @@ -403,6 +409,18 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE): 'upload_date': '20200805', 'ext': 'mp4', }, + 'skip': 'Error', + }, { + 'url': 'https://www.ardmediathek.de/video/tagesschau-oder-tagesschau-20-00-uhr/das-erste/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll', + 'md5': 'f1837e563323b8a642a8ddeff0131f51', + 'info_dict': { + 'id': '10049223', + 'ext': 'mp4', + 'title': 'tagesschau, 20:00 Uhr', + 'timestamp': 1636398000, + 'description': 'md5:39578c7b96c9fe50afdf5674ad985e6b', + 'upload_date': '20211108', + }, }, { 'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE', 'only_matching': True, @@ -426,6 +444,12 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE): # playlist of type 'sammlung' 'url': 'https://www.ardmediathek.de/ard/sammlung/team-muenster/5JpTzLSbWUAK8184IOvEir/', 'only_matching': True, + }, { + 'url': 'https://www.ardmediathek.de/video/coronavirus-update-ndr-info/astrazeneca-kurz-lockdown-und-pims-syndrom-81/ndr/Y3JpZDovL25kci5kZS84NzE0M2FjNi0wMWEwLTQ5ODEtOTE5NS1mOGZhNzdhOTFmOTI/', + 'only_matching': True, + }, { + 'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3dkci5kZS9CZWl0cmFnLWQ2NDJjYWEzLTMwZWYtNGI4NS1iMTI2LTU1N2UxYTcxOGIzOQ/tatort-duo-koeln-leipzig-ihr-kinderlein-kommet', + 'only_matching': True, }] def _ARD_load_playlist_snipped(self, playlist_id, display_id, client, mode, pageNumber): @@ -525,20 +549,12 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE): return self.playlist_result(entries, playlist_title=display_id) def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('video_id') - display_id = mobj.group('display_id') - if display_id: - display_id = display_id.rstrip('/') - if not display_id: - display_id = video_id + video_id, display_id, playlist_type, client = self._match_valid_url(url).group( + 'id', 'display_id', 'playlist', 'client') + display_id, client = display_id or video_id, client or 'ard' - if mobj.group('mode') in ('sendung', 'sammlung'): - # this is a playlist-URL - return self._ARD_extract_playlist( - url, video_id, display_id, - mobj.group('client'), - mobj.group('mode')) + if playlist_type: + return self._ARD_extract_playlist(url, video_id, display_id, client, playlist_type) player_page = self._download_json( 'https://api.ardmediathek.de/public-gateway', @@ -574,7 +590,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE): } } } -}''' % (mobj.group('client'), video_id), +}''' % (client, video_id), }).encode(), headers={ 'Content-Type': 'application/json' })['data']['playerPage'] From 8f122fa070dee737077059747731896a603c9e0b Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 23 Nov 2021 13:11:28 +0530 Subject: [PATCH 464/641] [extractor] Extract `average_rating` from JSON-LD Eg: Crunchyroll --- yt_dlp/extractor/common.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 1565ba5c37..fc28bca2e1 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1452,6 +1452,9 @@ class InfoExtractor(object): item_type = e.get('@type') if expected_type is not None and expected_type != item_type: continue + rating = traverse_obj(e, ('aggregateRating', 'ratingValue'), expected_type=float_or_none) + if rating is not None: + info['average_rating'] = rating if item_type in ('TVEpisode', 'Episode'): episode_name = unescapeHTML(e.get('name')) info.update({ From bc8ab44ea08995bd4345c9ca149ba82591b600bb Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 23 Nov 2021 13:13:27 +0530 Subject: [PATCH 465/641] [itv] Fix for Python 3.6/3.7 Closes #1758 --- yt_dlp/extractor/itv.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/itv.py b/yt_dlp/extractor/itv.py index 6e6a3673cd..5f1d306f61 100644 --- a/yt_dlp/extractor/itv.py +++ b/yt_dlp/extractor/itv.py @@ -117,7 +117,7 @@ class ITVIE(InfoExtractor): # See: https://github.com/yt-dlp/yt-dlp/issues/986 platform_tag_subs, featureset_subs = next( ((platform_tag, featureset) - for platform_tag, featuresets in reversed(variants.items()) for featureset in featuresets + for platform_tag, featuresets in reversed(list(variants.items())) for featureset in featuresets if try_get(featureset, lambda x: x[2]) == 'outband-webvtt'), (None, None)) @@ -146,7 +146,7 @@ class ITVIE(InfoExtractor): # See: https://github.com/yt-dlp/yt-dlp/issues/986 platform_tag_video, featureset_video = next( ((platform_tag, featureset) - for platform_tag, featuresets in reversed(variants.items()) for featureset in featuresets + for platform_tag, featuresets in reversed(list(variants.items())) for featureset in featuresets if try_get(featureset, lambda x: x[:2]) == ['hls', 'aes']), (None, None)) if not platform_tag_video or not featureset_video: From d52cd2f5cd54bd100a51fca8e4044b4f2a89fade Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 23 Nov 2021 13:15:49 +0530 Subject: [PATCH 466/641] [sbs] Fix for movies and livestreams Closes #1640 --- yt_dlp/extractor/sbs.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/sbs.py b/yt_dlp/extractor/sbs.py index 0a806ee4e4..4090f6385d 100644 --- a/yt_dlp/extractor/sbs.py +++ b/yt_dlp/extractor/sbs.py @@ -10,7 +10,14 @@ from ..utils import ( class SBSIE(InfoExtractor): IE_DESC = 'sbs.com.au' - _VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/(?:ondemand(?:/video/(?:single/)?|.*?\bplay=|/watch/)|news/(?:embeds/)?video/)(?P<id>[0-9]+)' + _VALID_URL = r'''(?x) + https?://(?:www\.)?sbs\.com\.au/(?: + ondemand(?: + /video/(?:single/)?| + /movie/[^/]+/| + .*?\bplay=|/watch/ + )|news/(?:embeds/)?video/ + )(?P<id>[0-9]+)''' _TESTS = [{ # Original URL is handled by the generic IE which finds the iframe: @@ -46,6 +53,13 @@ class SBSIE(InfoExtractor): }, { 'url': 'https://www.sbs.com.au/ondemand/watch/1698704451971', 'only_matching': True, + }, { + 'url': 'https://www.sbs.com.au/ondemand/movie/coherence/1469404227931', + 'only_matching': True, + }, { + 'note': 'Live stream', + 'url': 'https://www.sbs.com.au/ondemand/video/1726824003663/sbs-24x7-live-stream-nsw', + 'only_matching': True, }] def _real_extract(self, url): @@ -75,4 +89,5 @@ class SBSIE(InfoExtractor): 'ie_key': 'ThePlatform', 'id': video_id, 'url': smuggle_url(self._proto_relative_url(theplatform_url), {'force_smil_url': True}), + 'is_live': player_params.get('streamType') == 'live', } From e5d731f35dce2e0eb82d7877d6e1001d5e18ced9 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 23 Nov 2021 17:15:41 +0530 Subject: [PATCH 467/641] [tv2] Expand valid URL Closes #1764 --- yt_dlp/extractor/tv2.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/tv2.py b/yt_dlp/extractor/tv2.py index e0851531cb..da351eeb0e 100644 --- a/yt_dlp/extractor/tv2.py +++ b/yt_dlp/extractor/tv2.py @@ -19,7 +19,7 @@ from ..utils import ( class TV2IE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tv2\.no/v/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?tv2\.no/v\d*/(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.tv2.no/v/916509/', 'info_dict': { @@ -33,6 +33,9 @@ class TV2IE(InfoExtractor): 'view_count': int, 'categories': list, }, + }, { + 'url': 'http://www.tv2.no/v2/916509', + 'only_matching': True, }] _PROTOCOLS = ('HLS', 'DASH') _GEO_COUNTRIES = ['NO'] From 57dbe8077f8d00e0fffac53669f40cd7d584474f Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 23 Nov 2021 20:33:55 +0530 Subject: [PATCH 468/641] [jsinterp] Fix splice to handle float Needed for new youtube js player f1ca6900 Closes #1767 --- test/test_youtube_signature.py | 4 ++++ yt_dlp/jsinterp.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index df4c360473..3359ac457b 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -78,6 +78,10 @@ _NSIG_TESTS = [ 'https://www.youtube.com/s/player/2dfe380c/player_ias.vflset/en_US/base.js', 'oBo2h5euWy6osrUt', '3DIBbn3qdQ', ), + ( + 'https://www.youtube.com/s/player/f1ca6900/player_ias.vflset/en_US/base.js', + 'cu3wyu6LQn2hse', 'jvxetvmlI9AN9Q', + ), ] diff --git a/yt_dlp/jsinterp.py b/yt_dlp/jsinterp.py index bb2a0ae0b9..a6084ab821 100644 --- a/yt_dlp/jsinterp.py +++ b/yt_dlp/jsinterp.py @@ -397,7 +397,7 @@ class JSInterpreter(object): elif member == 'splice': assertion(isinstance(obj, list), 'must be applied on a list') assertion(argvals, 'takes one or more arguments') - index, howMany = (argvals + [len(obj)])[:2] + index, howMany = map(int, (argvals + [len(obj)])[:2]) if index < 0: index += len(obj) add_items = argvals[2:] From ff51ed588fa75256b98ead67bdef7edda08b66f0 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 23 Nov 2021 20:38:30 +0530 Subject: [PATCH 469/641] Clarify video/audio-only formats in -F Related: #1759 --- yt_dlp/YoutubeDL.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 5c2d645988..b983b17752 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -847,6 +847,7 @@ class YoutubeDL(object): DELIM = 'blue' ERROR = 'red' WARNING = 'yellow' + SUPPRESS = 'light black' def __format_text(self, out, text, f, fallback=None, *, test_encoding=False): assert out in ('screen', 'err') @@ -3149,22 +3150,17 @@ class YoutubeDL(object): @staticmethod def format_resolution(format, default='unknown'): - is_images = format.get('vcodec') == 'none' and format.get('acodec') == 'none' if format.get('vcodec') == 'none' and format.get('acodec') != 'none': return 'audio only' if format.get('resolution') is not None: return format['resolution'] if format.get('width') and format.get('height'): - res = '%dx%d' % (format['width'], format['height']) + return '%dx%d' % (format['width'], format['height']) elif format.get('height'): - res = '%sp' % format['height'] + return '%sp' % format['height'] elif format.get('width'): - res = '%dx?' % format['width'] - elif is_images: - return 'images' - else: - return default - return f'img {res}' if is_images else res + return '%dx?' % format['width'] + return default def _format_note(self, fdict): res = '' @@ -3236,7 +3232,7 @@ class YoutubeDL(object): [ self._format_screen(format_field(f, 'format_id'), self.Styles.ID), format_field(f, 'ext'), - self.format_resolution(f), + format_field(f, func=self.format_resolution, ignore=('audio only', 'images')), format_field(f, 'fps', '\t%d'), format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''), delim, @@ -3244,9 +3240,15 @@ class YoutubeDL(object): format_field(f, 'tbr', '\t%dk'), shorten_protocol_name(f.get('protocol', '').replace('native', 'n')), delim, - format_field(f, 'vcodec', default='unknown').replace('none', ''), + format_field(f, 'vcodec', default='unknown').replace( + 'none', + 'images' if f.get('acodec') == 'none' + else self._format_screen('audio only', self.Styles.SUPPRESS)), format_field(f, 'vbr', '\t%dk'), - format_field(f, 'acodec', default='unknown').replace('none', ''), + format_field(f, 'acodec', default='unknown').replace( + 'none', + '' if f.get('vcodec') == 'none' + else self._format_screen('video only', self.Styles.SUPPRESS)), format_field(f, 'abr', '\t%dk'), format_field(f, 'asr', '\t%dHz'), join_nonempty( From 9941a1e12750c3df1350c505250ee88a230a208c Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Wed, 24 Nov 2021 08:28:36 +0530 Subject: [PATCH 470/641] [PatreonUser] Do not capture RSS URLs Closes #1777 --- yt_dlp/extractor/patreon.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py index c7d316efce..d3ee071e0f 100644 --- a/yt_dlp/extractor/patreon.py +++ b/yt_dlp/extractor/patreon.py @@ -191,7 +191,7 @@ class PatreonIE(InfoExtractor): class PatreonUserIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?patreon\.com/(?P<id>[-_\w\d]+)/?(?:posts/?)?' + _VALID_URL = r'https?://(?:www\.)?patreon\.com/(?!rss)(?P<id>[-\w]+)' _TESTS = [{ 'url': 'https://www.patreon.com/dissonancepod/', From a6213a49250129f25e8f435ff3fadf4a3237f6e1 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Wed, 24 Nov 2021 08:31:52 +0530 Subject: [PATCH 471/641] [cleanup,youtube] Reorganize Tab and Search extractor inheritances --- yt_dlp/extractor/youtube.py | 1395 ++++++++++++++++++----------------- 1 file changed, 698 insertions(+), 697 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 632129bc67..a8d515f5cb 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -44,6 +44,7 @@ from ..utils import ( join_nonempty, mimetype2ext, network_exceptions, + NO_DEFAULT, orderedSet, parse_codecs, parse_count, @@ -3116,26 +3117,699 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return info +class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): -class YoutubeTabIE(YoutubeBaseInfoExtractor): + def _extract_channel_id(self, webpage): + channel_id = self._html_search_meta( + 'channelId', webpage, 'channel id', default=None) + if channel_id: + return channel_id + channel_url = self._html_search_meta( + ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url', + 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad', + 'twitter:app:url:googleplay'), webpage, 'channel url') + return self._search_regex( + r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+', + channel_url, 'channel id') + + @staticmethod + def _extract_basic_item_renderer(item): + # Modified from _extract_grid_item_renderer + known_basic_renderers = ( + 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer' + ) + for key, renderer in item.items(): + if not isinstance(renderer, dict): + continue + elif key in known_basic_renderers: + return renderer + elif key.startswith('grid') and key.endswith('Renderer'): + return renderer + + def _grid_entries(self, grid_renderer): + for item in grid_renderer['items']: + if not isinstance(item, dict): + continue + renderer = self._extract_basic_item_renderer(item) + if not isinstance(renderer, dict): + continue + title = self._get_text(renderer, 'title') + + # playlist + playlist_id = renderer.get('playlistId') + if playlist_id: + yield self.url_result( + 'https://www.youtube.com/playlist?list=%s' % playlist_id, + ie=YoutubeTabIE.ie_key(), video_id=playlist_id, + video_title=title) + continue + # video + video_id = renderer.get('videoId') + if video_id: + yield self._extract_video(renderer) + continue + # channel + channel_id = renderer.get('channelId') + if channel_id: + yield self.url_result( + 'https://www.youtube.com/channel/%s' % channel_id, + ie=YoutubeTabIE.ie_key(), video_title=title) + continue + # generic endpoint URL support + ep_url = urljoin('https://www.youtube.com/', try_get( + renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'], + compat_str)) + if ep_url: + for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE): + if ie.suitable(ep_url): + yield self.url_result( + ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title) + break + + def _shelf_entries_from_content(self, shelf_renderer): + content = shelf_renderer.get('content') + if not isinstance(content, dict): + return + renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer') + if renderer: + # TODO: add support for nested playlists so each shelf is processed + # as separate playlist + # TODO: this includes only first N items + for entry in self._grid_entries(renderer): + yield entry + renderer = content.get('horizontalListRenderer') + if renderer: + # TODO + pass + + def _shelf_entries(self, shelf_renderer, skip_channels=False): + ep = try_get( + shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'], + compat_str) + shelf_url = urljoin('https://www.youtube.com', ep) + if shelf_url: + # Skipping links to another channels, note that checking for + # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL + # will not work + if skip_channels and '/channels?' in shelf_url: + return + title = self._get_text(shelf_renderer, 'title') + yield self.url_result(shelf_url, video_title=title) + # Shelf may not contain shelf URL, fallback to extraction from content + for entry in self._shelf_entries_from_content(shelf_renderer): + yield entry + + def _playlist_entries(self, video_list_renderer): + for content in video_list_renderer['contents']: + if not isinstance(content, dict): + continue + renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer') + if not isinstance(renderer, dict): + continue + video_id = renderer.get('videoId') + if not video_id: + continue + yield self._extract_video(renderer) + + def _rich_entries(self, rich_grid_renderer): + renderer = try_get( + rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {} + video_id = renderer.get('videoId') + if not video_id: + return + yield self._extract_video(renderer) + + def _video_entry(self, video_renderer): + video_id = video_renderer.get('videoId') + if video_id: + return self._extract_video(video_renderer) + + def _post_thread_entries(self, post_thread_renderer): + post_renderer = try_get( + post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict) + if not post_renderer: + return + # video attachment + video_renderer = try_get( + post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {} + video_id = video_renderer.get('videoId') + if video_id: + entry = self._extract_video(video_renderer) + if entry: + yield entry + # playlist attachment + playlist_id = try_get( + post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str) + if playlist_id: + yield self.url_result( + 'https://www.youtube.com/playlist?list=%s' % playlist_id, + ie=YoutubeTabIE.ie_key(), video_id=playlist_id) + # inline video links + runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or [] + for run in runs: + if not isinstance(run, dict): + continue + ep_url = try_get( + run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str) + if not ep_url: + continue + if not YoutubeIE.suitable(ep_url): + continue + ep_video_id = YoutubeIE._match_id(ep_url) + if video_id == ep_video_id: + continue + yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id) + + def _post_thread_continuation_entries(self, post_thread_continuation): + contents = post_thread_continuation.get('contents') + if not isinstance(contents, list): + return + for content in contents: + renderer = content.get('backstagePostThreadRenderer') + if not isinstance(renderer, dict): + continue + for entry in self._post_thread_entries(renderer): + yield entry + + r''' # unused + def _rich_grid_entries(self, contents): + for content in contents: + video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict) + if video_renderer: + entry = self._video_entry(video_renderer) + if entry: + yield entry + ''' + def _extract_entries(self, parent_renderer, continuation_list): + # continuation_list is modified in-place with continuation_list = [continuation_token] + continuation_list[:] = [None] + contents = try_get(parent_renderer, lambda x: x['contents'], list) or [] + for content in contents: + if not isinstance(content, dict): + continue + is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict) + if not is_renderer: + renderer = content.get('richItemRenderer') + if renderer: + for entry in self._rich_entries(renderer): + yield entry + continuation_list[0] = self._extract_continuation(parent_renderer) + continue + isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or [] + for isr_content in isr_contents: + if not isinstance(isr_content, dict): + continue + + known_renderers = { + 'playlistVideoListRenderer': self._playlist_entries, + 'gridRenderer': self._grid_entries, + 'shelfRenderer': lambda x: self._shelf_entries(x), + 'backstagePostThreadRenderer': self._post_thread_entries, + 'videoRenderer': lambda x: [self._video_entry(x)], + } + for key, renderer in isr_content.items(): + if key not in known_renderers: + continue + for entry in known_renderers[key](renderer): + if entry: + yield entry + continuation_list[0] = self._extract_continuation(renderer) + break + + if not continuation_list[0]: + continuation_list[0] = self._extract_continuation(is_renderer) + + if not continuation_list[0]: + continuation_list[0] = self._extract_continuation(parent_renderer) + + def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data): + continuation_list = [None] + extract_entries = lambda x: self._extract_entries(x, continuation_list) + tab_content = try_get(tab, lambda x: x['content'], dict) + if not tab_content: + return + parent_renderer = ( + try_get(tab_content, lambda x: x['sectionListRenderer'], dict) + or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {}) + for entry in extract_entries(parent_renderer): + yield entry + continuation = continuation_list[0] + + for page_num in itertools.count(1): + if not continuation: + break + headers = self.generate_api_headers( + ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data) + response = self._extract_response( + item_id='%s page %s' % (item_id, page_num), + query=continuation, headers=headers, ytcfg=ytcfg, + check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints')) + + if not response: + break + # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases + # See: https://github.com/ytdl-org/youtube-dl/issues/28702 + visitor_data = self._extract_visitor_data(response) or visitor_data + + known_continuation_renderers = { + 'playlistVideoListContinuation': self._playlist_entries, + 'gridContinuation': self._grid_entries, + 'itemSectionContinuation': self._post_thread_continuation_entries, + 'sectionListContinuation': extract_entries, # for feeds + } + continuation_contents = try_get( + response, lambda x: x['continuationContents'], dict) or {} + continuation_renderer = None + for key, value in continuation_contents.items(): + if key not in known_continuation_renderers: + continue + continuation_renderer = value + continuation_list = [None] + for entry in known_continuation_renderers[key](continuation_renderer): + yield entry + continuation = continuation_list[0] or self._extract_continuation(continuation_renderer) + break + if continuation_renderer: + continue + + known_renderers = { + 'gridPlaylistRenderer': (self._grid_entries, 'items'), + 'gridVideoRenderer': (self._grid_entries, 'items'), + 'gridChannelRenderer': (self._grid_entries, 'items'), + 'playlistVideoRenderer': (self._playlist_entries, 'contents'), + 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds + 'richItemRenderer': (extract_entries, 'contents'), # for hashtag + 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents') + } + on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints')) + continuation_items = try_get( + on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list) + continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {} + video_items_renderer = None + for key, value in continuation_item.items(): + if key not in known_renderers: + continue + video_items_renderer = {known_renderers[key][1]: continuation_items} + continuation_list = [None] + for entry in known_renderers[key][0](video_items_renderer): + yield entry + continuation = continuation_list[0] or self._extract_continuation(video_items_renderer) + break + if video_items_renderer: + continue + break + + @staticmethod + def _extract_selected_tab(tabs): + for tab in tabs: + renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {} + if renderer.get('selected') is True: + return renderer + else: + raise ExtractorError('Unable to find selected tab') + + @classmethod + def _extract_uploader(cls, data): + uploader = {} + renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {} + owner = try_get( + renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict) + if owner: + uploader['uploader'] = owner.get('text') + uploader['uploader_id'] = try_get( + owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str) + uploader['uploader_url'] = urljoin( + 'https://www.youtube.com/', + try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str)) + return {k: v for k, v in uploader.items() if v is not None} + + def _extract_from_tabs(self, item_id, ytcfg, data, tabs): + playlist_id = title = description = channel_url = channel_name = channel_id = None + thumbnails_list = [] + tags = [] + + selected_tab = self._extract_selected_tab(tabs) + renderer = try_get( + data, lambda x: x['metadata']['channelMetadataRenderer'], dict) + if renderer: + channel_name = renderer.get('title') + channel_url = renderer.get('channelUrl') + channel_id = renderer.get('externalId') + else: + renderer = try_get( + data, lambda x: x['metadata']['playlistMetadataRenderer'], dict) + + if renderer: + title = renderer.get('title') + description = renderer.get('description', '') + playlist_id = channel_id + tags = renderer.get('keywords', '').split() + thumbnails_list = ( + try_get(renderer, lambda x: x['avatar']['thumbnails'], list) + or try_get( + self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'), + lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'], + list) + or []) + + thumbnails = [] + for t in thumbnails_list: + if not isinstance(t, dict): + continue + thumbnail_url = url_or_none(t.get('url')) + if not thumbnail_url: + continue + thumbnails.append({ + 'url': thumbnail_url, + 'width': int_or_none(t.get('width')), + 'height': int_or_none(t.get('height')), + }) + if playlist_id is None: + playlist_id = item_id + if title is None: + title = ( + try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText']) + or playlist_id) + title += format_field(selected_tab, 'title', ' - %s') + title += format_field(selected_tab, 'expandedText', ' - %s') + metadata = { + 'playlist_id': playlist_id, + 'playlist_title': title, + 'playlist_description': description, + 'uploader': channel_name, + 'uploader_id': channel_id, + 'uploader_url': channel_url, + 'thumbnails': thumbnails, + 'tags': tags, + } + availability = self._extract_availability(data) + if availability: + metadata['availability'] = availability + if not channel_id: + metadata.update(self._extract_uploader(data)) + metadata.update({ + 'channel': metadata['uploader'], + 'channel_id': metadata['uploader_id'], + 'channel_url': metadata['uploader_url']}) + return self.playlist_result( + self._entries( + selected_tab, playlist_id, ytcfg, + self._extract_account_syncid(ytcfg, data), + self._extract_visitor_data(data, ytcfg)), + **metadata) + + def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg): + first_id = last_id = response = None + for page_num in itertools.count(1): + videos = list(self._playlist_entries(playlist)) + if not videos: + return + start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1 + if start >= len(videos): + return + for video in videos[start:]: + if video['id'] == first_id: + self.to_screen('First video %s found again; Assuming end of Mix' % first_id) + return + yield video + first_id = first_id or videos[0]['id'] + last_id = videos[-1]['id'] + watch_endpoint = try_get( + playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint']) + headers = self.generate_api_headers( + ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data), + visitor_data=self._extract_visitor_data(response, data, ytcfg)) + query = { + 'playlistId': playlist_id, + 'videoId': watch_endpoint.get('videoId') or last_id, + 'index': watch_endpoint.get('index') or len(videos), + 'params': watch_endpoint.get('params') or 'OAE%3D' + } + response = self._extract_response( + item_id='%s page %d' % (playlist_id, page_num), + query=query, ep='next', headers=headers, ytcfg=ytcfg, + check_get_keys='contents' + ) + playlist = try_get( + response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict) + + def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg): + title = playlist.get('title') or try_get( + data, lambda x: x['titleText']['simpleText'], compat_str) + playlist_id = playlist.get('playlistId') or item_id + + # Delegating everything except mix playlists to regular tab-based playlist URL + playlist_url = urljoin(url, try_get( + playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'], + compat_str)) + if playlist_url and playlist_url != url: + return self.url_result( + playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id, + video_title=title) + + return self.playlist_result( + self._extract_mix_playlist(playlist, playlist_id, data, ytcfg), + playlist_id=playlist_id, playlist_title=title) + + def _extract_availability(self, data): + """ + Gets the availability of a given playlist/tab. + Note: Unless YouTube tells us explicitly, we do not assume it is public + @param data: response + """ + is_private = is_unlisted = None + renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {} + badge_labels = self._extract_badges(renderer) + + # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge + privacy_dropdown_entries = try_get( + renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or [] + for renderer_dict in privacy_dropdown_entries: + is_selected = try_get( + renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False + if not is_selected: + continue + label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label')) + if label: + badge_labels.add(label.lower()) + break + + for badge_label in badge_labels: + if badge_label == 'unlisted': + is_unlisted = True + elif badge_label == 'private': + is_private = True + elif badge_label == 'public': + is_unlisted = is_private = False + return self._availability(is_private, False, False, False, is_unlisted) + + @staticmethod + def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict): + sidebar_renderer = try_get( + data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or [] + for item in sidebar_renderer: + renderer = try_get(item, lambda x: x[info_renderer], expected_type) + if renderer: + return renderer + + def _reload_with_unavailable_videos(self, item_id, data, ytcfg): + """ + Get playlist with unavailable videos if the 'show unavailable videos' button exists. + """ + browse_id = params = None + renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') + if not renderer: + return + menu_renderer = try_get( + renderer, lambda x: x['menu']['menuRenderer']['items'], list) or [] + for menu_item in menu_renderer: + if not isinstance(menu_item, dict): + continue + nav_item_renderer = menu_item.get('menuNavigationItemRenderer') + text = try_get( + nav_item_renderer, lambda x: x['text']['simpleText'], compat_str) + if not text or text.lower() != 'show unavailable videos': + continue + browse_endpoint = try_get( + nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {} + browse_id = browse_endpoint.get('browseId') + params = browse_endpoint.get('params') + break + + headers = self.generate_api_headers( + ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data), + visitor_data=self._extract_visitor_data(data, ytcfg)) + query = { + 'params': params or 'wgYCCAA=', + 'browseId': browse_id or 'VL%s' % item_id + } + return self._extract_response( + item_id=item_id, headers=headers, query=query, + check_get_keys='contents', fatal=False, ytcfg=ytcfg, + note='Downloading API JSON with unavailable videos') + + def _extract_webpage(self, url, item_id, fatal=True): + retries = self.get_param('extractor_retries', 3) + count = -1 + webpage = data = last_error = None + while count < retries: + count += 1 + # Sometimes youtube returns a webpage with incomplete ytInitialData + # See: https://github.com/yt-dlp/yt-dlp/issues/116 + if last_error: + self.report_warning('%s. Retrying ...' % last_error) + try: + webpage = self._download_webpage( + url, item_id, + note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',)) + data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {} + except ExtractorError as e: + if isinstance(e.cause, network_exceptions): + if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429): + last_error = error_to_compat_str(e.cause or e.msg) + if count < retries: + continue + if fatal: + raise + self.report_warning(error_to_compat_str(e)) + break + else: + try: + self._extract_and_report_alerts(data) + except ExtractorError as e: + if fatal: + raise + self.report_warning(error_to_compat_str(e)) + break + + if dict_get(data, ('contents', 'currentVideoEndpoint')): + break + + last_error = 'Incomplete yt initial data received' + if count >= retries: + if fatal: + raise ExtractorError(last_error) + self.report_warning(last_error) + break + + return webpage, data + + def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'): + data = None + if 'webpage' not in self._configuration_arg('skip'): + webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal) + ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage) + if not data: + if not ytcfg and self.is_authenticated: + msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.' + if 'authcheck' not in self._configuration_arg('skip') and fatal: + raise ExtractorError( + msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,' + ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check', + expected=True) + self.report_warning(msg, only_once=True) + data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client) + return data, ytcfg + + def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'): + headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client) + resolve_response = self._extract_response( + item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal, + ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client) + endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'} + for ep_key, ep in endpoints.items(): + params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict) + if params: + return self._extract_response( + item_id=item_id, query=params, ep=ep, headers=headers, + ytcfg=ytcfg, fatal=fatal, default_client=default_client, + check_get_keys=('contents', 'currentVideoEndpoint')) + err_note = 'Failed to resolve url (does the playlist exist?)' + if fatal: + raise ExtractorError(err_note, expected=True) + self.report_warning(err_note, item_id) + + @staticmethod + def _smuggle_data(entries, data): + for entry in entries: + if data: + entry['url'] = smuggle_url(entry['url'], data) + yield entry + + _SEARCH_PARAMS = None + + def _search_results(self, query, params=NO_DEFAULT): + data = {'query': query} + if params is NO_DEFAULT: + params = self._SEARCH_PARAMS + if params: + data['params'] = params + continuation = {} + for page_num in itertools.count(1): + data.update(continuation) + search = self._extract_response( + item_id='query "%s" page %s' % (query, page_num), ep='search', query=data, + check_get_keys=('contents', 'onResponseReceivedCommands') + ) + if not search: + break + slr_contents = try_get( + search, + (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'], + lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']), + list) + if not slr_contents: + break + + # Youtube sometimes adds promoted content to searches, + # changing the index location of videos and token. + # So we search through all entries till we find them. + continuation = None + for slr_content in slr_contents: + if not continuation: + continuation = self._extract_continuation({'contents': [slr_content]}) + + isr_contents = try_get( + slr_content, + lambda x: x['itemSectionRenderer']['contents'], + list) + if not isr_contents: + continue + for content in isr_contents: + if not isinstance(content, dict): + continue + video = content.get('videoRenderer') + if not isinstance(video, dict): + continue + video_id = video.get('videoId') + if not video_id: + continue + + yield self._extract_video(video) + + if not continuation: + break + + +class YoutubeTabIE(YoutubeTabBaseInfoExtractor): IE_DESC = 'YouTube Tabs' - _VALID_URL = r'''(?x) - https?:// - (?:\w+\.)? - (?: - youtube(?:kids)?\.com| - %(invidious)s - )/ - (?: - (?P<channel_type>channel|c|user|browse)/| - (?P<not_channel> - feed/|hashtag/| - (?:playlist|watch)\?.*?\blist= - )| - (?!(?:%(reserved_names)s)\b) # Direct URLs - ) - (?P<id>[^/?\#&]+) - ''' % { + _VALID_URL = r'''(?x: + https?:// + (?:\w+\.)? + (?: + youtube(?:kids)?\.com| + %(invidious)s + )/ + (?: + (?P<channel_type>channel|c|user|browse)/| + (?P<not_channel> + feed/|hashtag/| + (?:playlist|watch)\?.*?\blist= + )| + (?!(?:%(reserved_names)s)\b) # Direct URLs + ) + (?P<id>[^/?\#&]+) + )''' % { 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES, 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES), } @@ -3606,621 +4280,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): return False if YoutubeIE.suitable(url) else super( YoutubeTabIE, cls).suitable(url) - def _extract_channel_id(self, webpage): - channel_id = self._html_search_meta( - 'channelId', webpage, 'channel id', default=None) - if channel_id: - return channel_id - channel_url = self._html_search_meta( - ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url', - 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad', - 'twitter:app:url:googleplay'), webpage, 'channel url') - return self._search_regex( - r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+', - channel_url, 'channel id') - - @staticmethod - def _extract_basic_item_renderer(item): - # Modified from _extract_grid_item_renderer - known_basic_renderers = ( - 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer' - ) - for key, renderer in item.items(): - if not isinstance(renderer, dict): - continue - elif key in known_basic_renderers: - return renderer - elif key.startswith('grid') and key.endswith('Renderer'): - return renderer - - def _grid_entries(self, grid_renderer): - for item in grid_renderer['items']: - if not isinstance(item, dict): - continue - renderer = self._extract_basic_item_renderer(item) - if not isinstance(renderer, dict): - continue - title = self._get_text(renderer, 'title') - - # playlist - playlist_id = renderer.get('playlistId') - if playlist_id: - yield self.url_result( - 'https://www.youtube.com/playlist?list=%s' % playlist_id, - ie=YoutubeTabIE.ie_key(), video_id=playlist_id, - video_title=title) - continue - # video - video_id = renderer.get('videoId') - if video_id: - yield self._extract_video(renderer) - continue - # channel - channel_id = renderer.get('channelId') - if channel_id: - yield self.url_result( - 'https://www.youtube.com/channel/%s' % channel_id, - ie=YoutubeTabIE.ie_key(), video_title=title) - continue - # generic endpoint URL support - ep_url = urljoin('https://www.youtube.com/', try_get( - renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'], - compat_str)) - if ep_url: - for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE): - if ie.suitable(ep_url): - yield self.url_result( - ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title) - break - - def _shelf_entries_from_content(self, shelf_renderer): - content = shelf_renderer.get('content') - if not isinstance(content, dict): - return - renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer') - if renderer: - # TODO: add support for nested playlists so each shelf is processed - # as separate playlist - # TODO: this includes only first N items - for entry in self._grid_entries(renderer): - yield entry - renderer = content.get('horizontalListRenderer') - if renderer: - # TODO - pass - - def _shelf_entries(self, shelf_renderer, skip_channels=False): - ep = try_get( - shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'], - compat_str) - shelf_url = urljoin('https://www.youtube.com', ep) - if shelf_url: - # Skipping links to another channels, note that checking for - # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL - # will not work - if skip_channels and '/channels?' in shelf_url: - return - title = self._get_text(shelf_renderer, 'title') - yield self.url_result(shelf_url, video_title=title) - # Shelf may not contain shelf URL, fallback to extraction from content - for entry in self._shelf_entries_from_content(shelf_renderer): - yield entry - - def _playlist_entries(self, video_list_renderer): - for content in video_list_renderer['contents']: - if not isinstance(content, dict): - continue - renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer') - if not isinstance(renderer, dict): - continue - video_id = renderer.get('videoId') - if not video_id: - continue - yield self._extract_video(renderer) - - def _rich_entries(self, rich_grid_renderer): - renderer = try_get( - rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {} - video_id = renderer.get('videoId') - if not video_id: - return - yield self._extract_video(renderer) - - def _video_entry(self, video_renderer): - video_id = video_renderer.get('videoId') - if video_id: - return self._extract_video(video_renderer) - - def _post_thread_entries(self, post_thread_renderer): - post_renderer = try_get( - post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict) - if not post_renderer: - return - # video attachment - video_renderer = try_get( - post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {} - video_id = video_renderer.get('videoId') - if video_id: - entry = self._extract_video(video_renderer) - if entry: - yield entry - # playlist attachment - playlist_id = try_get( - post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str) - if playlist_id: - yield self.url_result( - 'https://www.youtube.com/playlist?list=%s' % playlist_id, - ie=YoutubeTabIE.ie_key(), video_id=playlist_id) - # inline video links - runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or [] - for run in runs: - if not isinstance(run, dict): - continue - ep_url = try_get( - run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str) - if not ep_url: - continue - if not YoutubeIE.suitable(ep_url): - continue - ep_video_id = YoutubeIE._match_id(ep_url) - if video_id == ep_video_id: - continue - yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id) - - def _post_thread_continuation_entries(self, post_thread_continuation): - contents = post_thread_continuation.get('contents') - if not isinstance(contents, list): - return - for content in contents: - renderer = content.get('backstagePostThreadRenderer') - if not isinstance(renderer, dict): - continue - for entry in self._post_thread_entries(renderer): - yield entry - - r''' # unused - def _rich_grid_entries(self, contents): - for content in contents: - video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict) - if video_renderer: - entry = self._video_entry(video_renderer) - if entry: - yield entry - ''' - def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data): - - def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds - contents = try_get(parent_renderer, lambda x: x['contents'], list) or [] - for content in contents: - if not isinstance(content, dict): - continue - is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict) - if not is_renderer: - renderer = content.get('richItemRenderer') - if renderer: - for entry in self._rich_entries(renderer): - yield entry - continuation_list[0] = self._extract_continuation(parent_renderer) - continue - isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or [] - for isr_content in isr_contents: - if not isinstance(isr_content, dict): - continue - - known_renderers = { - 'playlistVideoListRenderer': self._playlist_entries, - 'gridRenderer': self._grid_entries, - 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'), - 'backstagePostThreadRenderer': self._post_thread_entries, - 'videoRenderer': lambda x: [self._video_entry(x)], - } - for key, renderer in isr_content.items(): - if key not in known_renderers: - continue - for entry in known_renderers[key](renderer): - if entry: - yield entry - continuation_list[0] = self._extract_continuation(renderer) - break - - if not continuation_list[0]: - continuation_list[0] = self._extract_continuation(is_renderer) - - if not continuation_list[0]: - continuation_list[0] = self._extract_continuation(parent_renderer) - - continuation_list = [None] # Python 2 does not support nonlocal - tab_content = try_get(tab, lambda x: x['content'], dict) - if not tab_content: - return - parent_renderer = ( - try_get(tab_content, lambda x: x['sectionListRenderer'], dict) - or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {}) - for entry in extract_entries(parent_renderer): - yield entry - continuation = continuation_list[0] - - for page_num in itertools.count(1): - if not continuation: - break - headers = self.generate_api_headers( - ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data) - response = self._extract_response( - item_id='%s page %s' % (item_id, page_num), - query=continuation, headers=headers, ytcfg=ytcfg, - check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints')) - - if not response: - break - # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases - # See: https://github.com/ytdl-org/youtube-dl/issues/28702 - visitor_data = self._extract_visitor_data(response) or visitor_data - - known_continuation_renderers = { - 'playlistVideoListContinuation': self._playlist_entries, - 'gridContinuation': self._grid_entries, - 'itemSectionContinuation': self._post_thread_continuation_entries, - 'sectionListContinuation': extract_entries, # for feeds - } - continuation_contents = try_get( - response, lambda x: x['continuationContents'], dict) or {} - continuation_renderer = None - for key, value in continuation_contents.items(): - if key not in known_continuation_renderers: - continue - continuation_renderer = value - continuation_list = [None] - for entry in known_continuation_renderers[key](continuation_renderer): - yield entry - continuation = continuation_list[0] or self._extract_continuation(continuation_renderer) - break - if continuation_renderer: - continue - - known_renderers = { - 'gridPlaylistRenderer': (self._grid_entries, 'items'), - 'gridVideoRenderer': (self._grid_entries, 'items'), - 'gridChannelRenderer': (self._grid_entries, 'items'), - 'playlistVideoRenderer': (self._playlist_entries, 'contents'), - 'itemSectionRenderer': (extract_entries, 'contents'), # for feeds - 'richItemRenderer': (extract_entries, 'contents'), # for hashtag - 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents') - } - on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints')) - continuation_items = try_get( - on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list) - continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {} - video_items_renderer = None - for key, value in continuation_item.items(): - if key not in known_renderers: - continue - video_items_renderer = {known_renderers[key][1]: continuation_items} - continuation_list = [None] - for entry in known_renderers[key][0](video_items_renderer): - yield entry - continuation = continuation_list[0] or self._extract_continuation(video_items_renderer) - break - if video_items_renderer: - continue - break - - @staticmethod - def _extract_selected_tab(tabs): - for tab in tabs: - renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {} - if renderer.get('selected') is True: - return renderer - else: - raise ExtractorError('Unable to find selected tab') - - @classmethod - def _extract_uploader(cls, data): - uploader = {} - renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {} - owner = try_get( - renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict) - if owner: - uploader['uploader'] = owner.get('text') - uploader['uploader_id'] = try_get( - owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str) - uploader['uploader_url'] = urljoin( - 'https://www.youtube.com/', - try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str)) - return {k: v for k, v in uploader.items() if v is not None} - - def _extract_from_tabs(self, item_id, ytcfg, data, tabs): - playlist_id = title = description = channel_url = channel_name = channel_id = None - thumbnails_list = [] - tags = [] - - selected_tab = self._extract_selected_tab(tabs) - renderer = try_get( - data, lambda x: x['metadata']['channelMetadataRenderer'], dict) - if renderer: - channel_name = renderer.get('title') - channel_url = renderer.get('channelUrl') - channel_id = renderer.get('externalId') - else: - renderer = try_get( - data, lambda x: x['metadata']['playlistMetadataRenderer'], dict) - - if renderer: - title = renderer.get('title') - description = renderer.get('description', '') - playlist_id = channel_id - tags = renderer.get('keywords', '').split() - thumbnails_list = ( - try_get(renderer, lambda x: x['avatar']['thumbnails'], list) - or try_get( - self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'), - lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'], - list) - or []) - - thumbnails = [] - for t in thumbnails_list: - if not isinstance(t, dict): - continue - thumbnail_url = url_or_none(t.get('url')) - if not thumbnail_url: - continue - thumbnails.append({ - 'url': thumbnail_url, - 'width': int_or_none(t.get('width')), - 'height': int_or_none(t.get('height')), - }) - if playlist_id is None: - playlist_id = item_id - if title is None: - title = ( - try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText']) - or playlist_id) - title += format_field(selected_tab, 'title', ' - %s') - title += format_field(selected_tab, 'expandedText', ' - %s') - metadata = { - 'playlist_id': playlist_id, - 'playlist_title': title, - 'playlist_description': description, - 'uploader': channel_name, - 'uploader_id': channel_id, - 'uploader_url': channel_url, - 'thumbnails': thumbnails, - 'tags': tags, - } - availability = self._extract_availability(data) - if availability: - metadata['availability'] = availability - if not channel_id: - metadata.update(self._extract_uploader(data)) - metadata.update({ - 'channel': metadata['uploader'], - 'channel_id': metadata['uploader_id'], - 'channel_url': metadata['uploader_url']}) - return self.playlist_result( - self._entries( - selected_tab, playlist_id, ytcfg, - self._extract_account_syncid(ytcfg, data), - self._extract_visitor_data(data, ytcfg)), - **metadata) - - def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg): - first_id = last_id = response = None - for page_num in itertools.count(1): - videos = list(self._playlist_entries(playlist)) - if not videos: - return - start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1 - if start >= len(videos): - return - for video in videos[start:]: - if video['id'] == first_id: - self.to_screen('First video %s found again; Assuming end of Mix' % first_id) - return - yield video - first_id = first_id or videos[0]['id'] - last_id = videos[-1]['id'] - watch_endpoint = try_get( - playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint']) - headers = self.generate_api_headers( - ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data), - visitor_data=self._extract_visitor_data(response, data, ytcfg)) - query = { - 'playlistId': playlist_id, - 'videoId': watch_endpoint.get('videoId') or last_id, - 'index': watch_endpoint.get('index') or len(videos), - 'params': watch_endpoint.get('params') or 'OAE%3D' - } - response = self._extract_response( - item_id='%s page %d' % (playlist_id, page_num), - query=query, ep='next', headers=headers, ytcfg=ytcfg, - check_get_keys='contents' - ) - playlist = try_get( - response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict) - - def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg): - title = playlist.get('title') or try_get( - data, lambda x: x['titleText']['simpleText'], compat_str) - playlist_id = playlist.get('playlistId') or item_id - - # Delegating everything except mix playlists to regular tab-based playlist URL - playlist_url = urljoin(url, try_get( - playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'], - compat_str)) - if playlist_url and playlist_url != url: - return self.url_result( - playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id, - video_title=title) - - return self.playlist_result( - self._extract_mix_playlist(playlist, playlist_id, data, ytcfg), - playlist_id=playlist_id, playlist_title=title) - - def _extract_availability(self, data): - """ - Gets the availability of a given playlist/tab. - Note: Unless YouTube tells us explicitly, we do not assume it is public - @param data: response - """ - is_private = is_unlisted = None - renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {} - badge_labels = self._extract_badges(renderer) - - # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge - privacy_dropdown_entries = try_get( - renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or [] - for renderer_dict in privacy_dropdown_entries: - is_selected = try_get( - renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False - if not is_selected: - continue - label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label')) - if label: - badge_labels.add(label.lower()) - break - - for badge_label in badge_labels: - if badge_label == 'unlisted': - is_unlisted = True - elif badge_label == 'private': - is_private = True - elif badge_label == 'public': - is_unlisted = is_private = False - return self._availability(is_private, False, False, False, is_unlisted) - - @staticmethod - def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict): - sidebar_renderer = try_get( - data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or [] - for item in sidebar_renderer: - renderer = try_get(item, lambda x: x[info_renderer], expected_type) - if renderer: - return renderer - - def _reload_with_unavailable_videos(self, item_id, data, ytcfg): - """ - Get playlist with unavailable videos if the 'show unavailable videos' button exists. - """ - browse_id = params = None - renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') - if not renderer: - return - menu_renderer = try_get( - renderer, lambda x: x['menu']['menuRenderer']['items'], list) or [] - for menu_item in menu_renderer: - if not isinstance(menu_item, dict): - continue - nav_item_renderer = menu_item.get('menuNavigationItemRenderer') - text = try_get( - nav_item_renderer, lambda x: x['text']['simpleText'], compat_str) - if not text or text.lower() != 'show unavailable videos': - continue - browse_endpoint = try_get( - nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {} - browse_id = browse_endpoint.get('browseId') - params = browse_endpoint.get('params') - break - - headers = self.generate_api_headers( - ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data), - visitor_data=self._extract_visitor_data(data, ytcfg)) - query = { - 'params': params or 'wgYCCAA=', - 'browseId': browse_id or 'VL%s' % item_id - } - return self._extract_response( - item_id=item_id, headers=headers, query=query, - check_get_keys='contents', fatal=False, ytcfg=ytcfg, - note='Downloading API JSON with unavailable videos') - - def _extract_webpage(self, url, item_id, fatal=True): - retries = self.get_param('extractor_retries', 3) - count = -1 - webpage = data = last_error = None - while count < retries: - count += 1 - # Sometimes youtube returns a webpage with incomplete ytInitialData - # See: https://github.com/yt-dlp/yt-dlp/issues/116 - if last_error: - self.report_warning('%s. Retrying ...' % last_error) - try: - webpage = self._download_webpage( - url, item_id, - note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',)) - data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {} - except ExtractorError as e: - if isinstance(e.cause, network_exceptions): - if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429): - last_error = error_to_compat_str(e.cause or e.msg) - if count < retries: - continue - if fatal: - raise - self.report_warning(error_to_compat_str(e)) - break - else: - try: - self._extract_and_report_alerts(data) - except ExtractorError as e: - if fatal: - raise - self.report_warning(error_to_compat_str(e)) - break - - if dict_get(data, ('contents', 'currentVideoEndpoint')): - break - - last_error = 'Incomplete yt initial data received' - if count >= retries: - if fatal: - raise ExtractorError(last_error) - self.report_warning(last_error) - break - - return webpage, data - - def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'): - data = None - if 'webpage' not in self._configuration_arg('skip'): - webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal) - ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage) - if not data: - if not ytcfg and self.is_authenticated: - msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.' - if 'authcheck' not in self._configuration_arg('skip') and fatal: - raise ExtractorError( - msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,' - ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check', - expected=True) - self.report_warning(msg, only_once=True) - data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client) - return data, ytcfg - - def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'): - headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client) - resolve_response = self._extract_response( - item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal, - ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client) - endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'} - for ep_key, ep in endpoints.items(): - params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict) - if params: - return self._extract_response( - item_id=item_id, query=params, ep=ep, headers=headers, - ytcfg=ytcfg, fatal=fatal, default_client=default_client, - check_get_keys=('contents', 'currentVideoEndpoint')) - err_note = 'Failed to resolve url (does the playlist exist?)' - if fatal: - raise ExtractorError(err_note, expected=True) - self.report_warning(err_note, item_id) - - @staticmethod - def _smuggle_data(entries, data): - for entry in entries: - if data: - entry['url'] = smuggle_url(entry['url'], data) - yield entry - def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) if self.is_music_url(url): @@ -4506,77 +4565,24 @@ class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): ie=YoutubeTabIE.ie_key()) -class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE): - IE_DESC = 'YouTube searches' +class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor): + IE_DESC = 'YouTube search' IE_NAME = 'youtube:search' _SEARCH_KEY = 'ytsearch' _SEARCH_PARAMS = None _TESTS = [] - def _search_results(self, query): - data = {'query': query} - if self._SEARCH_PARAMS: - data['params'] = self._SEARCH_PARAMS - continuation = {} - for page_num in itertools.count(1): - data.update(continuation) - search = self._extract_response( - item_id='query "%s" page %s' % (query, page_num), ep='search', query=data, - check_get_keys=('contents', 'onResponseReceivedCommands') - ) - if not search: - break - slr_contents = try_get( - search, - (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'], - lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']), - list) - if not slr_contents: - break - - # Youtube sometimes adds promoted content to searches, - # changing the index location of videos and token. - # So we search through all entries till we find them. - continuation = None - for slr_content in slr_contents: - if not continuation: - continuation = self._extract_continuation({'contents': [slr_content]}) - - isr_contents = try_get( - slr_content, - lambda x: x['itemSectionRenderer']['contents'], - list) - if not isr_contents: - continue - for content in isr_contents: - if not isinstance(content, dict): - continue - video = content.get('videoRenderer') - if not isinstance(video, dict): - continue - video_id = video.get('videoId') - if not video_id: - continue - - yield self._extract_video(video) - - if not continuation: - break - - -class YoutubeSearchDateIE(YoutubeSearchIE): +class YoutubeSearchDateIE(SearchInfoExtractor, YoutubeTabBaseInfoExtractor): IE_NAME = YoutubeSearchIE.IE_NAME + ':date' _SEARCH_KEY = 'ytsearchdate' - IE_DESC = 'YouTube searches, newest videos first' + IE_DESC = 'YouTube search, newest videos first' _SEARCH_PARAMS = 'CAI%3D' -class YoutubeSearchURLIE(YoutubeSearchIE): +class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor): IE_DESC = 'YouTube search URLs with sorting and filter support' IE_NAME = YoutubeSearchIE.IE_NAME + '_url' - _SEARCH_KEY = None _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)' - # _MAX_RESULTS = 100 _TESTS = [{ 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', 'playlist_mincount': 5, @@ -4589,15 +4595,10 @@ class YoutubeSearchURLIE(YoutubeSearchIE): 'only_matching': True, }] - @classmethod - def _make_valid_url(cls): - return cls._VALID_URL - def _real_extract(self, url): qs = parse_qs(url) query = (qs.get('search_query') or qs.get('q'))[0] - self._SEARCH_PARAMS = qs.get('sp', ('',))[0] - return self._get_n_results(query, self._MAX_RESULTS) + return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query) class YoutubeFeedsInfoExtractor(YoutubeTabIE): From a61fd4cf6fa23b05729396ae342a5fe9785c231f Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Wed, 24 Nov 2021 09:27:59 +0530 Subject: [PATCH 472/641] [youtube:search_url] Add playlist/channel support Closes #1213, #1214 --- yt_dlp/extractor/youtube.py | 57 +++++++++++++------------------------ 1 file changed, 19 insertions(+), 38 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index a8d515f5cb..ba135613bc 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3117,6 +3117,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return info + class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): def _extract_channel_id(self, webpage): @@ -3326,6 +3327,8 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): 'shelfRenderer': lambda x: self._shelf_entries(x), 'backstagePostThreadRenderer': self._post_thread_entries, 'videoRenderer': lambda x: [self._video_entry(x)], + 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}), + 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}), } for key, renderer in isr_content.items(): if key not in known_renderers: @@ -3744,50 +3747,19 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): params = self._SEARCH_PARAMS if params: data['params'] = params - continuation = {} + continuation_list = [None] for page_num in itertools.count(1): - data.update(continuation) + data.update(continuation_list[0] or {}) search = self._extract_response( item_id='query "%s" page %s' % (query, page_num), ep='search', query=data, - check_get_keys=('contents', 'onResponseReceivedCommands') - ) - if not search: - break + check_get_keys=('contents', 'onResponseReceivedCommands')) slr_contents = try_get( search, (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'], lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']), list) - if not slr_contents: - break - - # Youtube sometimes adds promoted content to searches, - # changing the index location of videos and token. - # So we search through all entries till we find them. - continuation = None - for slr_content in slr_contents: - if not continuation: - continuation = self._extract_continuation({'contents': [slr_content]}) - - isr_contents = try_get( - slr_content, - lambda x: x['itemSectionRenderer']['contents'], - list) - if not isr_contents: - continue - for content in isr_contents: - if not isinstance(content, dict): - continue - video = content.get('videoRenderer') - if not isinstance(video, dict): - continue - video_id = video.get('videoId') - if not video_id: - continue - - yield self._extract_video(video) - - if not continuation: + yield from self._extract_entries({'contents': slr_contents}, continuation_list) + if not continuation_list[0]: break @@ -4569,14 +4541,15 @@ class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor): IE_DESC = 'YouTube search' IE_NAME = 'youtube:search' _SEARCH_KEY = 'ytsearch' - _SEARCH_PARAMS = None + _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only _TESTS = [] + class YoutubeSearchDateIE(SearchInfoExtractor, YoutubeTabBaseInfoExtractor): IE_NAME = YoutubeSearchIE.IE_NAME + ':date' _SEARCH_KEY = 'ytsearchdate' IE_DESC = 'YouTube search, newest videos first' - _SEARCH_PARAMS = 'CAI%3D' + _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor): @@ -4590,6 +4563,14 @@ class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor): 'id': 'youtube-dl test video', 'title': 'youtube-dl test video', } + }, { + 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D', + 'playlist_mincount': 5, + 'info_dict': { + 'id': 'python', + 'title': 'python', + } + }, { 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB', 'only_matching': True, From fec41d17a587ff18f375c9ec96ee8bc748b57236 Mon Sep 17 00:00:00 2001 From: Sipherdrakon <64430430+Sipherdrakon@users.noreply.github.com> Date: Wed, 24 Nov 2021 03:01:49 -0500 Subject: [PATCH 473/641] [MTV] Improve mgid extraction (#1713) Original PR: https://github.com/ytdl-org/youtube-dl/pull/30149 Fixes: #713, #1580, https://github.com/ytdl-org/youtube-dl/issues/30139 Authored by: Sipherdrakon, kikuyan --- yt_dlp/extractor/mtv.py | 20 +++++++++++--------- yt_dlp/extractor/southpark.py | 17 ++++++++--------- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/yt_dlp/extractor/mtv.py b/yt_dlp/extractor/mtv.py index 4812f11cc5..be5de0a70c 100644 --- a/yt_dlp/extractor/mtv.py +++ b/yt_dlp/extractor/mtv.py @@ -306,21 +306,23 @@ class MTVServicesInfoExtractor(InfoExtractor): if not mgid: mgid = self._extract_triforce_mgid(webpage) - if not mgid: - mgid = self._search_regex( - r'"videoConfig":{"videoId":"(mgid:.*?)"', webpage, 'mgid', default=None) - - if not mgid: - mgid = self._search_regex( - r'"media":{"video":{"config":{"uri":"(mgid:.*?)"', webpage, 'mgid', default=None) - if not mgid: data = self._parse_json(self._search_regex( r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None) main_container = self._extract_child_with_type(data, 'MainContainer') ab_testing = self._extract_child_with_type(main_container, 'ABTesting') video_player = self._extract_child_with_type(ab_testing or main_container, 'VideoPlayer') - mgid = video_player['props']['media']['video']['config']['uri'] + if video_player: + mgid = try_get(video_player, lambda x: x['props']['media']['video']['config']['uri']) + else: + flex_wrapper = self._extract_child_with_type(ab_testing or main_container, 'FlexWrapper') + auth_suite_wrapper = self._extract_child_with_type(flex_wrapper, 'AuthSuiteWrapper') + player = self._extract_child_with_type(auth_suite_wrapper or flex_wrapper, 'Player') + if player: + mgid = try_get(player, lambda x: x['props']['videoDetail']['mgid']) + + if not mgid: + raise ExtractorError('Could not extract mgid') return mgid diff --git a/yt_dlp/extractor/southpark.py b/yt_dlp/extractor/southpark.py index d49749467d..942a52dcf5 100644 --- a/yt_dlp/extractor/southpark.py +++ b/yt_dlp/extractor/southpark.py @@ -6,19 +6,18 @@ from .mtv import MTVServicesInfoExtractor class SouthParkIE(MTVServicesInfoExtractor): IE_NAME = 'southpark.cc.com' - _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark(?:\.cc|studios)\.com/(?:clips|(?:full-)?episodes|collections)/(?P<id>.+?)(\?|#|$))' + _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark(?:\.cc|studios)\.com/((?:video-)?clips|(?:full-)?episodes|collections)/(?P<id>.+?)(\?|#|$))' _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed' _TESTS = [{ - 'url': 'http://southpark.cc.com/clips/104437/bat-daded#tab=featured', + 'url': 'https://southpark.cc.com/video-clips/d7wr06/south-park-you-all-agreed-to-counseling', 'info_dict': { - 'id': 'a7bff6c2-ed00-11e0-aca6-0026b9414f30', 'ext': 'mp4', - 'title': 'South Park|Bat Daded', - 'description': 'Randy disqualifies South Park by getting into a fight with Bat Dad.', - 'timestamp': 1112760000, - 'upload_date': '20050406', + 'title': 'You All Agreed to Counseling', + 'description': 'Kenny, Cartman, Stan, and Kyle visit Mr. Mackey and ask for his help getting Mrs. Nelson to come back. Mr. Mackey reveals the only way to get things back to normal is to get the teachers vaccinated.', + 'timestamp': 1615352400, + 'upload_date': '20210310', }, }, { 'url': 'http://southpark.cc.com/collections/7758/fan-favorites/1', @@ -40,11 +39,11 @@ class SouthParkIE(MTVServicesInfoExtractor): class SouthParkEsIE(SouthParkIE): IE_NAME = 'southpark.cc.com:español' - _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/episodios-en-espanol/(?P<id>.+?)(\?|#|$))' + _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/es/episodios/(?P<id>.+?)(\?|#|$))' _LANG = 'es' _TESTS = [{ - 'url': 'http://southpark.cc.com/episodios-en-espanol/s01e01-cartman-consigue-una-sonda-anal#source=351c1323-0b96-402d-a8b9-40d01b2e9bde&position=1&sort=!airdate', + 'url': 'http://southpark.cc.com/es/episodios/s01e01-cartman-consigue-una-sonda-anal#source=351c1323-0b96-402d-a8b9-40d01b2e9bde&position=1&sort=!airdate', 'info_dict': { 'title': 'Cartman Consigue Una Sonda Anal', 'description': 'Cartman Consigue Una Sonda Anal', From da27aeea5c4eb8e381b8cb34d3ead8c6487d1e67 Mon Sep 17 00:00:00 2001 From: Tim <staubichsauger@t-online.de> Date: Wed, 24 Nov 2021 11:08:58 +0100 Subject: [PATCH 474/641] [ITV] Fix extractor (#1776) Closes #1775 Authored by: staubichsauger --- yt_dlp/extractor/itv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/itv.py b/yt_dlp/extractor/itv.py index 5f1d306f61..bdd6af6884 100644 --- a/yt_dlp/extractor/itv.py +++ b/yt_dlp/extractor/itv.py @@ -147,7 +147,7 @@ class ITVIE(InfoExtractor): platform_tag_video, featureset_video = next( ((platform_tag, featureset) for platform_tag, featuresets in reversed(list(variants.items())) for featureset in featuresets - if try_get(featureset, lambda x: x[:2]) == ['hls', 'aes']), + if set(try_get(featureset, lambda x: x[:2]) or []) == {'aes', 'hls'}), (None, None)) if not platform_tag_video or not featureset_video: raise ExtractorError('No downloads available', expected=True, video_id=video_id) From 17b454066224453b0adc795c5a990b35b97c9ffb Mon Sep 17 00:00:00 2001 From: Aurora <nyaurora@disroot.org> Date: Wed, 24 Nov 2021 10:47:53 +0000 Subject: [PATCH 475/641] [radiozet] Add extractor (#1593) Authored by: 0xA7404A (Aurora) --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/radiozet.py | 51 ++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 yt_dlp/extractor/radiozet.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index a0f4908f03..4dda3705a3 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1168,6 +1168,7 @@ from .radiode import RadioDeIE from .radiojavan import RadioJavanIE from .radiobremen import RadioBremenIE from .radiofrance import RadioFranceIE +from .radiozet import RadioZetPodcastIE from .radiokapital import ( RadioKapitalIE, RadioKapitalShowIE, diff --git a/yt_dlp/extractor/radiozet.py b/yt_dlp/extractor/radiozet.py new file mode 100644 index 0000000000..2e1ff36c2f --- /dev/null +++ b/yt_dlp/extractor/radiozet.py @@ -0,0 +1,51 @@ +# coding: utf-8 +from .common import InfoExtractor +from ..utils import ( + traverse_obj, + strip_or_none, +) + + +class RadioZetPodcastIE(InfoExtractor): + _VALID_URL = r'https?://player\.radiozet\.pl\/Podcasty/.*?/(?P<id>.+)' + _TEST = { + 'url': 'https://player.radiozet.pl/Podcasty/Nie-Ma-Za-Co/O-przedmiotach-szkolnych-ktore-przydaja-sie-w-zyciu', + 'md5': 'e03665c316b4fbc5f6a8f232948bbba3', + 'info_dict': { + 'id': '42154', + 'display_id': 'O-przedmiotach-szkolnych-ktore-przydaja-sie-w-zyciu', + 'title': 'O przedmiotach szkolnych, które przydają się w życiu', + 'description': 'md5:fa72bed49da334b09e5b2f79851f185c', + 'release_timestamp': 1592985480, + 'ext': 'mp3', + 'thumbnail': r're:^https?://.*\.png$', + 'duration': 83, + 'series': 'Nie Ma Za Co', + 'creator': 'Katarzyna Pakosińska', + } + } + + def _call_api(self, podcast_id, display_id): + return self._download_json( + f'https://player.radiozet.pl/api/podcasts/getPodcast/(node)/{podcast_id}/(station)/radiozet', + display_id) + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + podcast_id = self._html_search_regex(r'<div.*?\sid="player".*?\sdata-id=[\'"]([^\'"]+)[\'"]', + webpage, 'podcast id') + data = self._call_api(podcast_id, display_id)['data'][0] + + return { + 'id': podcast_id, + 'display_id': display_id, + 'title': strip_or_none(data.get('title')), + 'description': strip_or_none(traverse_obj(data, ('program', 'desc'))), + 'release_timestamp': data.get('published_date'), + 'url': traverse_obj(data, ('player', 'stream')), + 'thumbnail': traverse_obj(data, ('program', 'image', 'original')), + 'duration': traverse_obj(data, ('player', 'duration')), + 'series': strip_or_none(traverse_obj(data, ('program', 'title'))), + 'creator': strip_or_none(traverse_obj(data, ('presenter', 0, 'title'))), + } From eb56d132d21752fa50e0dd2c3bfa3d983ad48655 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Wed, 24 Nov 2021 18:22:42 +0530 Subject: [PATCH 476/641] [cleanup,instagram] Refactor extractors Closes #1561 --- yt_dlp/extractor/instagram.py | 291 ++++++++++++++-------------------- 1 file changed, 117 insertions(+), 174 deletions(-) diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py index 0e726423e0..1fcf97a196 100644 --- a/yt_dlp/extractor/instagram.py +++ b/yt_dlp/extractor/instagram.py @@ -1,5 +1,4 @@ # coding: utf-8 -from __future__ import unicode_literals import itertools import hashlib @@ -9,7 +8,6 @@ import time from .common import InfoExtractor from ..compat import ( - compat_str, compat_HTTPError, ) from ..utils import ( @@ -19,9 +17,8 @@ from ..utils import ( int_or_none, lowercase_escape, std_headers, - try_get, + traverse_obj, url_or_none, - variadic, urlencode_postdata, ) @@ -72,6 +69,58 @@ class InstagramBaseIE(InfoExtractor): def _real_initialize(self): self._login() + def _get_count(self, media, kind, *keys): + return traverse_obj( + media, (kind, 'count'), *((f'edge_media_{key}', 'count') for key in keys), + expected_type=int_or_none) + + def _get_dimension(self, name, media, webpage=None): + return ( + traverse_obj(media, ('dimensions', name), expected_type=int_or_none) + or int_or_none(self._html_search_meta( + (f'og:video:{name}', f'video:{name}'), webpage or '', default=None))) + + def _extract_nodes(self, nodes, is_direct=False): + for idx, node in enumerate(nodes, start=1): + if node.get('__typename') != 'GraphVideo' and node.get('is_video') is not True: + continue + + video_id = node.get('shortcode') + + if is_direct: + info = { + 'id': video_id or node['id'], + 'url': node.get('video_url'), + 'width': self._get_dimension('width', node), + 'height': self._get_dimension('height', node), + 'http_headers': { + 'Referer': 'https://www.instagram.com/', + } + } + elif not video_id: + continue + else: + info = { + '_type': 'url', + 'ie_key': 'Instagram', + 'id': video_id, + 'url': f'https://instagram.com/p/{video_id}', + } + + yield { + **info, + 'title': node.get('title') or (f'Video {idx}' if is_direct else None), + 'description': traverse_obj( + node, ('edge_media_to_caption', 'edges', 0, 'node', 'text'), expected_type=str), + 'thumbnail': traverse_obj( + node, 'display_url', 'thumbnail_src', 'display_src', expected_type=url_or_none), + 'duration': float_or_none(node.get('video_duration')), + 'timestamp': int_or_none(node.get('taken_at_timestamp')), + 'view_count': int_or_none(node.get('video_view_count')), + 'comment_count': self._get_count(node, 'comments', 'preview_comment', 'to_comment', 'to_parent_comment'), + 'like_count': self._get_count(node, 'likes', 'preview_like'), + } + class InstagramIOSIE(InfoExtractor): IE_DESC = 'IOS instagram:// URL' @@ -234,29 +283,22 @@ class InstagramIE(InstagramBaseIE): return mobj.group('link') def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('id') - url = mobj.group('url') - + video_id, url = self._match_valid_url(url).group('id', 'url') webpage, urlh = self._download_webpage_handle(url, video_id) - if 'www.instagram.com/accounts/login' in urlh.geturl().rstrip('/'): + if 'www.instagram.com/accounts/login' in urlh.geturl(): self.raise_login_required('You need to log in to access this content') - (media, video_url, description, thumbnails, timestamp, uploader, - uploader_id, like_count, comment_count, comments, height, - width) = [None] * 12 - shared_data = self._parse_json( self._search_regex( r'window\._sharedData\s*=\s*({.+?});', webpage, 'shared data', default='{}'), video_id, fatal=False) - if shared_data: - media = try_get( - shared_data, - (lambda x: x['entry_data']['PostPage'][0]['graphql']['shortcode_media'], - lambda x: x['entry_data']['PostPage'][0]['media']), - dict) + media = traverse_obj( + shared_data, + ('entry_data', 'PostPage', 0, 'graphql', 'shortcode_media'), + ('entry_data', 'PostPage', 0, 'media'), + expected_type=dict) + # _sharedData.entry_data.PostPage is empty when authenticated (see # https://github.com/ytdl-org/youtube-dl/pull/22880) if not media: @@ -265,125 +307,71 @@ class InstagramIE(InstagramBaseIE): r'window\.__additionalDataLoaded\s*\(\s*[^,]+,\s*({.+?})\s*\)\s*;', webpage, 'additional data', default='{}'), video_id, fatal=False) - if additional_data: - media = try_get( - additional_data, lambda x: x['graphql']['shortcode_media'], - dict) - if media: - video_url = media.get('video_url') - height = int_or_none(self._html_search_meta(('og:video:height', 'video:height'), webpage)) or try_get(media, lambda x: x['dimensions']['height']) - width = int_or_none(self._html_search_meta(('og:video:width', 'video:width'), webpage)) or try_get(media, lambda x: x['dimensions']['width']) - description = try_get( - media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'], - compat_str) or media.get('caption') - title = media.get('title') - display_resources = media.get('display_resources') - if not display_resources: - display_resources = [{'src': media.get('display_src')}, {'src': media.get('display_url')}] - duration = float_or_none(media.get('video_duration')) - timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date')) - uploader = try_get(media, lambda x: x['owner']['full_name']) - uploader_id = try_get(media, lambda x: x['owner']['username']) + media = traverse_obj(additional_data, ('graphql', 'shortcode_media'), expected_type=dict) or {} - def get_count(keys, kind): - for key in variadic(keys): - count = int_or_none(try_get( - media, (lambda x: x['edge_media_%s' % key]['count'], - lambda x: x['%ss' % kind]['count']))) - if count is not None: - return count - - like_count = get_count('preview_like', 'like') - comment_count = get_count( - ('preview_comment', 'to_comment', 'to_parent_comment'), 'comment') - - thumbnails = [{ - 'url': thumbnail['src'], - 'width': thumbnail.get('config_width'), - 'height': thumbnail.get('config_height'), - } for thumbnail in display_resources if thumbnail.get('src')] - - comments = [] - for comment in try_get(media, lambda x: x['edge_media_to_parent_comment']['edges']): - comment_dict = comment.get('node', {}) - comment_text = comment_dict.get('text') - if comment_text: - comments.append({ - 'author': try_get(comment_dict, lambda x: x['owner']['username']), - 'author_id': try_get(comment_dict, lambda x: x['owner']['id']), - 'id': comment_dict.get('id'), - 'text': comment_text, - 'timestamp': int_or_none(comment_dict.get('created_at')), - }) - if not video_url: - edges = try_get( - media, lambda x: x['edge_sidecar_to_children']['edges'], - list) or [] - if edges: - entries = [] - for edge_num, edge in enumerate(edges, start=1): - node = try_get(edge, lambda x: x['node'], dict) - if not node: - continue - node_video_url = url_or_none(node.get('video_url')) - if not node_video_url: - continue - entries.append({ - 'id': node.get('shortcode') or node['id'], - 'title': node.get('title') or 'Video %d' % edge_num, - 'url': node_video_url, - 'thumbnail': node.get('display_url'), - 'duration': float_or_none(node.get('video_duration')), - 'width': int_or_none(try_get(node, lambda x: x['dimensions']['width'])), - 'height': int_or_none(try_get(node, lambda x: x['dimensions']['height'])), - 'view_count': int_or_none(node.get('video_view_count')), - }) - return self.playlist_result( - entries, video_id, - 'Post by %s' % uploader_id if uploader_id else None, - description) - - if not video_url: - video_url = self._og_search_video_url(webpage, secure=False) - - formats = [{ - 'url': video_url, - 'width': width, - 'height': height, - }] - dash = try_get(media, lambda x: x['dash_info']['video_dash_manifest']) - if dash: - formats.extend(self._parse_mpd_formats(self._parse_xml(dash, video_id), mpd_id='dash')) - self._sort_formats(formats) - - if not uploader_id: - uploader_id = self._search_regex( - r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"', - webpage, 'uploader id', fatal=False) + uploader_id = traverse_obj(media, ('owner', 'username')) or self._search_regex( + r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"', webpage, 'uploader id', fatal=False) + description = ( + traverse_obj(media, ('edge_media_to_caption', 'edges', 0, 'node', 'text'), expected_type=str) + or media.get('caption')) if not description: description = self._search_regex( r'"caption"\s*:\s*"(.+?)"', webpage, 'description', default=None) if description is not None: description = lowercase_escape(description) - if not thumbnails: - thumbnails = self._og_search_thumbnail(webpage) + video_url = media.get('video_url') + if not video_url: + nodes = traverse_obj(media, ('edge_sidecar_to_children', 'edges', ..., 'node'), expected_type=dict) or [] + if nodes: + return self.playlist_result( + self._extract_nodes(nodes, True), video_id, + 'Post by %s' % uploader_id if uploader_id else None, description) + + video_url = self._og_search_video_url(webpage, secure=False) + + formats = [{ + 'url': video_url, + 'width': self._get_dimension('width', media, webpage), + 'height': self._get_dimension('height', media, webpage), + }] + dash = traverse_obj(media, ('dash_info', 'video_dash_manifest')) + if dash: + formats.extend(self._parse_mpd_formats(self._parse_xml(dash, video_id), mpd_id='dash')) + self._sort_formats(formats) + + comments = [{ + 'author': traverse_obj(comment_dict, ('node', 'owner', 'username')), + 'author_id': traverse_obj(comment_dict, ('node', 'owner', 'id')), + 'id': traverse_obj(comment_dict, ('node', 'id')), + 'text': traverse_obj(comment_dict, ('node', 'text')), + 'timestamp': traverse_obj(comment_dict, ('node', 'created_at'), expected_type=int_or_none), + } for comment_dict in traverse_obj(media, ('edge_media_to_parent_comment', 'edges'))] + + display_resources = ( + media.get('display_resources') + or [{'src': media.get(key)} for key in ('display_src', 'display_url')] + or [{'src': self._og_search_thumbnail(webpage)}]) + thumbnails = [{ + 'url': thumbnail['src'], + 'width': thumbnail.get('config_width'), + 'height': thumbnail.get('config_height'), + } for thumbnail in display_resources if thumbnail.get('src')] return { 'id': video_id, 'formats': formats, - 'ext': 'mp4', - 'title': title or 'Video by %s' % uploader_id, + 'title': media.get('title') or 'Video by %s' % uploader_id, 'description': description, - 'duration': duration, - 'thumbnails': thumbnails, - 'timestamp': timestamp, + 'duration': float_or_none(media.get('video_duration')), + 'timestamp': traverse_obj(media, 'taken_at_timestamp', 'date', expected_type=int_or_none), 'uploader_id': uploader_id, - 'uploader': uploader, - 'like_count': like_count, - 'comment_count': comment_count, + 'uploader': traverse_obj(media, ('owner', 'full_name')), + 'like_count': self._get_count(media, 'likes', 'preview_like'), + 'comment_count': self._get_count(media, 'comments', 'preview_comment', 'to_comment', 'to_parent_comment'), 'comments': comments, + 'thumbnails': thumbnails, 'http_headers': { 'Referer': 'https://www.instagram.com/', } @@ -402,10 +390,6 @@ class InstagramPlaylistBaseIE(InstagramBaseIE): def _extract_graphql(self, data, url): # Parses GraphQL queries containing videos and generates a playlist. - def get_count(suffix): - return int_or_none(try_get( - node, lambda x: x['edge_media_' + suffix]['count'])) - uploader_id = self._match_id(url) csrf_token = data['config']['csrf_token'] rhx_gis = data.get('rhx_gis') or '3c7ca9dcefcf966d11dacf1f151335e8' @@ -454,55 +438,14 @@ class InstagramPlaylistBaseIE(InstagramBaseIE): continue raise - edges = media.get('edges') - if not edges or not isinstance(edges, list): + nodes = traverse_obj(media, ('edges', ..., 'node'), expected_type=dict) or [] + if not nodes: break + yield from self._extract_nodes(nodes) - for edge in edges: - node = edge.get('node') - if not node or not isinstance(node, dict): - continue - if node.get('__typename') != 'GraphVideo' and node.get('is_video') is not True: - continue - video_id = node.get('shortcode') - if not video_id: - continue - - info = self.url_result( - 'https://instagram.com/p/%s/' % video_id, - ie=InstagramIE.ie_key(), video_id=video_id) - - description = try_get( - node, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'], - compat_str) - thumbnail = node.get('thumbnail_src') or node.get('display_src') - timestamp = int_or_none(node.get('taken_at_timestamp')) - - comment_count = get_count('to_comment') - like_count = get_count('preview_like') - view_count = int_or_none(node.get('video_view_count')) - - info.update({ - 'description': description, - 'thumbnail': thumbnail, - 'timestamp': timestamp, - 'comment_count': comment_count, - 'like_count': like_count, - 'view_count': view_count, - }) - - yield info - - page_info = media.get('page_info') - if not page_info or not isinstance(page_info, dict): - break - - has_next_page = page_info.get('has_next_page') - if not has_next_page: - break - - cursor = page_info.get('end_cursor') - if not cursor or not isinstance(cursor, compat_str): + has_next_page = traverse_obj(media, ('page_info', 'has_next_page')) + cursor = traverse_obj(media, ('page_info', 'end_cursor'), expected_type=str) + if not has_next_page or not cursor: break def _real_extract(self, url): From 883ecd54949fa90174094628bf002f179edf6767 Mon Sep 17 00:00:00 2001 From: cntrl-s <65956966+cntrl-s@users.noreply.github.com> Date: Sat, 27 Nov 2021 00:05:39 +0530 Subject: [PATCH 477/641] Streamff extractor (#1736) Closes #1359 Authored by: cntrl-s --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/streamff.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 yt_dlp/extractor/streamff.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 4dda3705a3..163efc748e 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1404,6 +1404,7 @@ from .streamable import StreamableIE from .streamanity import StreamanityIE from .streamcloud import StreamcloudIE from .streamcz import StreamCZIE +from .streamff import StreamFFIE from .streetvoice import StreetVoiceIE from .stretchinternet import StretchInternetIE from .stripchat import StripchatIE diff --git a/yt_dlp/extractor/streamff.py b/yt_dlp/extractor/streamff.py new file mode 100644 index 0000000000..6b190bb3b8 --- /dev/null +++ b/yt_dlp/extractor/streamff.py @@ -0,0 +1,31 @@ +# coding: utf-8 +from .common import InfoExtractor +from ..utils import int_or_none, parse_iso8601 + + +class StreamFFIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?streamff\.com/v/(?P<id>[a-zA-Z0-9]+)' + + _TESTS = [{ + 'url': 'https://streamff.com/v/55cc94', + 'md5': '8745a67bb5e5c570738efe7983826370', + 'info_dict': { + 'id': '55cc94', + 'ext': 'mp4', + 'title': '55cc94', + 'timestamp': 1634764643, + 'upload_date': '20211020', + 'view_count': int, + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + json_data = self._download_json(f'https://streamff.com/api/videos/{video_id}', video_id) + return { + 'id': video_id, + 'title': json_data.get('name') or video_id, + 'url': 'https://streamff.com/%s' % json_data['videoLink'], + 'view_count': int_or_none(json_data.get('views')), + 'timestamp': parse_iso8601(json_data.get('date')), + } From 18d6dd4e0194211c4f3238fe441ebe0c1fdbc167 Mon Sep 17 00:00:00 2001 From: Grabien <60237587+Grabien@users.noreply.github.com> Date: Fri, 26 Nov 2021 21:00:04 +0200 Subject: [PATCH 478/641] [extractor/breitbart] Breitbart.com website support (#1434) Authored by: Grabien --- yt_dlp/extractor/breitbart.py | 39 ++++++++++++++++++++++++++++++++++ yt_dlp/extractor/extractors.py | 1 + 2 files changed, 40 insertions(+) create mode 100644 yt_dlp/extractor/breitbart.py diff --git a/yt_dlp/extractor/breitbart.py b/yt_dlp/extractor/breitbart.py new file mode 100644 index 0000000000..f50f719dc2 --- /dev/null +++ b/yt_dlp/extractor/breitbart.py @@ -0,0 +1,39 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class BreitBartIE(InfoExtractor): + _VALID_URL = r'https?:\/\/(?:www\.)breitbart.com/videos/v/(?P<id>[^/]+)' + _TESTS = [{ + 'url': 'https://www.breitbart.com/videos/v/5cOz1yup/?pl=Ij6NDOji', + 'md5': '0aa6d1d6e183ac5ca09207fe49f17ade', + 'info_dict': { + 'id': '5cOz1yup', + 'ext': 'mp4', + 'title': 'Watch \u2013 Clyburn: Statues in Congress Have to Go Because they Are Honoring Slavery', + 'description': 'md5:bac35eb0256d1cb17f517f54c79404d5', + 'thumbnail': 'https://cdn.jwplayer.com/thumbs/5cOz1yup-1920.jpg', + 'age_limit': 0, + } + }, { + 'url': 'https://www.breitbart.com/videos/v/eaiZjVOn/', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + formats = self._extract_m3u8_formats(f'https://cdn.jwplayer.com/manifests/{video_id}.m3u8', video_id, ext='mp4') + self._sort_formats(formats) + return { + 'id': video_id, + 'title': self._og_search_title( + webpage, default=None) or self._html_search_regex( + r'(?s)<title>(.*?)', webpage, 'video title'), + 'description': self._og_search_description(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), + 'age_limit': self._rta_search(webpage), + 'formats': formats + } diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 163efc748e..ed8a23e723 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -179,6 +179,7 @@ from .br import ( ) from .bravotv import BravoTVIE from .breakcom import BreakIE +from .breitbart import BreitBartIE from .brightcove import ( BrightcoveLegacyIE, BrightcoveNewIE, From cf1f13b817d88eb7d4b449f20cbad3215030e35f Mon Sep 17 00:00:00 2001 From: shirt <2660574+shirt-dev@users.noreply.github.com> Date: Sat, 27 Nov 2021 00:15:59 -0500 Subject: [PATCH 479/641] [generic] Support mpd manifests without extension (#1806) Authored by: shirt-dev --- yt_dlp/extractor/generic.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 9c7fa4a217..ae0ebb14ad 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -2601,6 +2601,8 @@ class GenericIE(InfoExtractor): subtitles = {} if format_id.endswith('mpegurl'): formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4') + elif format_id.endswith('mpd') or format_id.endswith('dash+xml'): + formats, subtitles = self._extract_mpd_formats_and_subtitles(url, video_id) elif format_id == 'f4m': formats = self._extract_f4m_formats(url, video_id) else: From 3938a9212c3d1aa30a7f6db12b997d94afd8b646 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Sat, 27 Nov 2021 12:01:42 +0530 Subject: [PATCH 480/641] [CPTwentyFour] Add extractor (#1769) Closes #1768 Authored by: Ashish0804 --- yt_dlp/extractor/extractors.py | 5 ++++- yt_dlp/extractor/ninecninemedia.py | 35 +++++++++++++++++++++++++++++- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index ed8a23e723..a277bf7226 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -943,7 +943,10 @@ from .niconico import ( NicovideoSearchIE, NicovideoSearchURLIE, ) -from .ninecninemedia import NineCNineMediaIE +from .ninecninemedia import ( + NineCNineMediaIE, + CPTwentyFourIE, +) from .ninegag import NineGagIE from .ninenow import NineNowIE from .nintendo import NintendoIE diff --git a/yt_dlp/extractor/ninecninemedia.py b/yt_dlp/extractor/ninecninemedia.py index 4aaf21a120..781842721b 100644 --- a/yt_dlp/extractor/ninecninemedia.py +++ b/yt_dlp/extractor/ninecninemedia.py @@ -1,7 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( float_or_none, @@ -99,3 +98,37 @@ class NineCNineMediaIE(InfoExtractor): } return info + + +class CPTwentyFourIE(InfoExtractor): + IE_NAME = 'cp24' + _GEO_COUNTRIES = ['CA'] + _VALID_URL = r'https?://(?:www\.)?cp24\.com/news/(?P[^?#]+)' + + _TESTS = [{ + 'url': 'https://www.cp24.com/news/video-shows-atm-being-ripped-out-of-business-by-pickup-truck-driver-in-mississauga-1.5676877', + 'info_dict': { + 'id': '2328005', + 'ext': 'mp4', + 'title': 'WATCH: Truck rips ATM from Mississauga business', + 'description': 'md5:cf7498480885f080a754389a2b2f7073', + 'timestamp': 1637618377, + 'episode_number': None, + 'season': 'Season 0', + 'season_number': 0, + 'season_id': 57974, + 'series': 'CTV News Toronto', + 'duration': 26.86, + 'thumbnail': 'http://images2.9c9media.com/image_asset/2014_11_5_2eb609a0-475b-0132-fbd6-34b52f6f1279_jpg_2000x1125.jpg', + 'upload_date': '20211122', + }, + 'params': {'skip_download': True, 'format': 'bv'} + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + id, destination = self._search_regex( + r'getAuthStates\("(?P[^"]+)",\s?"(?P[^"]+)"\);', + webpage, 'video id and destination', group=('id', 'destination')) + return self.url_result(f'9c9media:{destination}:{id}', ie=NineCNineMediaIE.ie_key(), video_id=id) From 359df0fc423b4a5d5af8113d42648fdea22e81ea Mon Sep 17 00:00:00 2001 From: Henrik Heimbuerger Date: Sat, 27 Nov 2021 07:51:32 +0100 Subject: [PATCH 481/641] [nebula] Add NebulaCollectionIE and rewrite extractor (#1694) Closes #1690 Authored by: hheimbuerger --- yt_dlp/extractor/extractors.py | 5 +- yt_dlp/extractor/nebula.py | 374 +++++++++++++++++++-------------- 2 files changed, 217 insertions(+), 162 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index a277bf7226..2fb9515c0b 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -889,7 +889,10 @@ from .ndr import ( NJoyEmbedIE, ) from .ndtv import NDTVIE -from .nebula import NebulaIE +from .nebula import ( + NebulaIE, + NebulaCollectionIE, +) from .nerdcubed import NerdCubedFeedIE from .netzkino import NetzkinoIE from .neteasemusic import ( diff --git a/yt_dlp/extractor/nebula.py b/yt_dlp/extractor/nebula.py index 9698a358e1..d235805c35 100644 --- a/yt_dlp/extractor/nebula.py +++ b/yt_dlp/extractor/nebula.py @@ -1,22 +1,163 @@ # coding: utf-8 from __future__ import unicode_literals +import itertools import json import time +import urllib -from urllib.error import HTTPError -from .common import InfoExtractor -from ..compat import compat_str, compat_urllib_parse_unquote, compat_urllib_parse_quote from ..utils import ( ExtractorError, parse_iso8601, try_get, - urljoin, ) +from .common import InfoExtractor -class NebulaIE(InfoExtractor): +class NebulaBaseIE(InfoExtractor): + _NETRC_MACHINE = 'watchnebula' + _nebula_api_token = None + _nebula_bearer_token = None + _zype_access_token = None + + def _perform_nebula_auth(self): + username, password = self._get_login_info() + if not (username and password): + self.raise_login_required() + + data = json.dumps({'email': username, 'password': password}).encode('utf8') + response = self._download_json( + 'https://api.watchnebula.com/api/v1/auth/login/', + data=data, fatal=False, video_id=None, + headers={ + 'content-type': 'application/json', + # Submitting the 'sessionid' cookie always causes a 403 on auth endpoint + 'cookie': '' + }, + note='Logging in to Nebula with supplied credentials', + errnote='Authentication failed or rejected') + if not response or not response.get('key'): + self.raise_login_required() + + # save nebula token as cookie + self._set_cookie( + 'nebula.app', 'nebula-auth', + urllib.parse.quote( + json.dumps({ + "apiToken": response["key"], + "isLoggingIn": False, + "isLoggingOut": False, + }, separators=(",", ":"))), + expire_time=int(time.time()) + 86400 * 365, + ) + + return response['key'] + + def _retrieve_nebula_api_token(self): + """ + Check cookie jar for valid token. Try to authenticate using credentials if no valid token + can be found in the cookie jar. + """ + nebula_cookies = self._get_cookies('https://nebula.app') + nebula_cookie = nebula_cookies.get('nebula-auth') + if nebula_cookie: + self.to_screen('Authenticating to Nebula with token from cookie jar') + nebula_cookie_value = urllib.parse.unquote(nebula_cookie.value) + nebula_api_token = self._parse_json(nebula_cookie_value, None).get('apiToken') + if nebula_api_token: + return nebula_api_token + + return self._perform_nebula_auth() + + def _call_nebula_api(self, url, video_id=None, method='GET', auth_type='api', note=''): + assert method in ('GET', 'POST',) + assert auth_type in ('api', 'bearer',) + + def inner_call(): + authorization = f'Token {self._nebula_api_token}' if auth_type == 'api' else f'Bearer {self._nebula_bearer_token}' + return self._download_json( + url, video_id, note=note, headers={'Authorization': authorization}, + data=b'' if method == 'POST' else None) + + try: + return inner_call() + except ExtractorError as exc: + # if 401 or 403, attempt credential re-auth and retry + if exc.cause and isinstance(exc.cause, urllib.error.HTTPError) and exc.cause.code in (401, 403): + self.to_screen(f'Reauthenticating to Nebula and retrying, because last {auth_type} call resulted in error {exc.cause.code}') + self._login() + return inner_call() + else: + raise + + def _fetch_nebula_bearer_token(self): + """ + Get a Bearer token for the Nebula API. This will be required to fetch video meta data. + """ + response = self._call_nebula_api('https://api.watchnebula.com/api/v1/authorization/', + method='POST', + note='Authorizing to Nebula') + return response['token'] + + def _fetch_zype_access_token(self): + """ + Get a Zype access token, which is required to access video streams -- in our case: to + generate video URLs. + """ + user_object = self._call_nebula_api('https://api.watchnebula.com/api/v1/auth/user/', note='Retrieving Zype access token') + + access_token = try_get(user_object, lambda x: x['zype_auth_info']['access_token'], str) + if not access_token: + if try_get(user_object, lambda x: x['is_subscribed'], bool): + # TODO: Reimplement the same Zype token polling the Nebula frontend implements + # see https://github.com/ytdl-org/youtube-dl/pull/24805#issuecomment-749231532 + raise ExtractorError( + 'Unable to extract Zype access token from Nebula API authentication endpoint. ' + 'Open an arbitrary video in a browser with this account to generate a token', + expected=True) + raise ExtractorError('Unable to extract Zype access token from Nebula API authentication endpoint') + return access_token + + def _build_video_info(self, episode): + zype_id = episode['zype_id'] + zype_video_url = f'https://player.zype.com/embed/{zype_id}.html?access_token={self._zype_access_token}' + channel_slug = episode['channel_slug'] + return { + 'id': episode['zype_id'], + 'display_id': episode['slug'], + '_type': 'url_transparent', + 'ie_key': 'Zype', + 'url': zype_video_url, + 'title': episode['title'], + 'description': episode['description'], + 'timestamp': parse_iso8601(episode['published_at']), + 'thumbnails': [{ + # 'id': tn.get('name'), # this appears to be null + 'url': tn['original'], + 'height': key, + } for key, tn in episode['assets']['thumbnail'].items()], + 'duration': episode['duration'], + 'channel': episode['channel_title'], + 'channel_id': channel_slug, + 'channel_url': f'https://nebula.app/{channel_slug}', + 'uploader': episode['channel_title'], + 'uploader_id': channel_slug, + 'uploader_url': f'https://nebula.app/{channel_slug}', + 'series': episode['channel_title'], + 'creator': episode['channel_title'], + } + + def _login(self): + self._nebula_api_token = self._retrieve_nebula_api_token() + self._nebula_bearer_token = self._fetch_nebula_bearer_token() + self._zype_access_token = self._fetch_zype_access_token() + + def _real_initialize(self): + self._login() + + +class NebulaIE(NebulaBaseIE): _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/videos/(?P[-\w]+)' _TESTS = [ { @@ -30,12 +171,13 @@ class NebulaIE(InfoExtractor): 'upload_date': '20180731', 'timestamp': 1533009600, 'channel': 'Lindsay Ellis', + 'channel_id': 'lindsayellis', 'uploader': 'Lindsay Ellis', + 'uploader_id': 'lindsayellis', }, 'params': { 'usenetrc': True, }, - 'skip': 'All Nebula content requires authentication', }, { 'url': 'https://nebula.app/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore', @@ -47,13 +189,14 @@ class NebulaIE(InfoExtractor): 'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.', 'upload_date': '20200327', 'timestamp': 1585348140, - 'channel': 'The Logistics of D-Day', - 'uploader': 'The Logistics of D-Day', + 'channel': 'Real Engineering', + 'channel_id': 'realengineering', + 'uploader': 'Real Engineering', + 'uploader_id': 'realengineering', }, 'params': { 'usenetrc': True, }, - 'skip': 'All Nebula content requires authentication', }, { 'url': 'https://nebula.app/videos/money-episode-1-the-draw', @@ -66,173 +209,82 @@ class NebulaIE(InfoExtractor): 'upload_date': '20200323', 'timestamp': 1584980400, 'channel': 'Tom Scott Presents: Money', + 'channel_id': 'tom-scott-presents-money', 'uploader': 'Tom Scott Presents: Money', + 'uploader_id': 'tom-scott-presents-money', }, 'params': { 'usenetrc': True, }, - 'skip': 'All Nebula content requires authentication', }, { 'url': 'https://watchnebula.com/videos/money-episode-1-the-draw', 'only_matching': True, }, ] - _NETRC_MACHINE = 'watchnebula' - _nebula_token = None - - def _retrieve_nebula_auth(self): - """ - Log in to Nebula, and returns a Nebula API token - """ - - username, password = self._get_login_info() - if not (username and password): - self.raise_login_required() - - self.report_login() - data = json.dumps({'email': username, 'password': password}).encode('utf8') - response = self._download_json( - 'https://api.watchnebula.com/api/v1/auth/login/', - data=data, fatal=False, video_id=None, - headers={ - 'content-type': 'application/json', - # Submitting the 'sessionid' cookie always causes a 403 on auth endpoint - 'cookie': '' - }, - note='Authenticating to Nebula with supplied credentials', - errnote='Authentication failed or rejected') - if not response or not response.get('key'): - self.raise_login_required() - - # save nebula token as cookie - self._set_cookie( - 'nebula.app', 'nebula-auth', - compat_urllib_parse_quote( - json.dumps({ - "apiToken": response["key"], - "isLoggingIn": False, - "isLoggingOut": False, - }, separators=(",", ":"))), - expire_time=int(time.time()) + 86400 * 365, - ) - - return response['key'] - - def _retrieve_zype_api_key(self, page_url, display_id): - """ - Retrieves the Zype API key - """ - - # Find the js that has the API key from the webpage and download it - webpage = self._download_webpage(page_url, video_id=display_id) - main_script_relpath = self._search_regex( - r']*src="(?P[^"]*main.[0-9a-f]*.chunk.js)"[^>]*>', webpage, - group='script_relpath', name='script relative path', fatal=True) - main_script_abspath = urljoin(page_url, main_script_relpath) - main_script = self._download_webpage(main_script_abspath, video_id=display_id, - note='Retrieving Zype API key') - - api_key = self._search_regex( - r'REACT_APP_ZYPE_API_KEY\s*:\s*"(?P[\w-]*)"', main_script, - group='api_key', name='API key', fatal=True) - - return api_key - - def _call_zype_api(self, path, params, video_id, api_key, note): - """ - A helper for making calls to the Zype API. - """ - query = {'api_key': api_key, 'per_page': 1} - query.update(params) - return self._download_json('https://api.zype.com' + path, video_id, query=query, note=note) - - def _call_nebula_api(self, path, video_id, access_token, note): - """ - A helper for making calls to the Nebula API. - """ - return self._download_json('https://api.watchnebula.com/api/v1' + path, video_id, headers={ - 'Authorization': 'Token {access_token}'.format(access_token=access_token) - }, note=note) - - def _fetch_zype_access_token(self, video_id): - try: - user_object = self._call_nebula_api('/auth/user/', video_id, self._nebula_token, note='Retrieving Zype access token') - except ExtractorError as exc: - # if 401, attempt credential auth and retry - if exc.cause and isinstance(exc.cause, HTTPError) and exc.cause.code == 401: - self._nebula_token = self._retrieve_nebula_auth() - user_object = self._call_nebula_api('/auth/user/', video_id, self._nebula_token, note='Retrieving Zype access token') - else: - raise - - access_token = try_get(user_object, lambda x: x['zype_auth_info']['access_token'], compat_str) - if not access_token: - if try_get(user_object, lambda x: x['is_subscribed'], bool): - # TODO: Reimplement the same Zype token polling the Nebula frontend implements - # see https://github.com/ytdl-org/youtube-dl/pull/24805#issuecomment-749231532 - raise ExtractorError( - 'Unable to extract Zype access token from Nebula API authentication endpoint. ' - 'Open an arbitrary video in a browser with this account to generate a token', - expected=True) - raise ExtractorError('Unable to extract Zype access token from Nebula API authentication endpoint') - return access_token - - def _extract_channel_title(self, video_meta): - # TODO: Implement the API calls giving us the channel list, - # so that we can do the title lookup and then figure out the channel URL - categories = video_meta.get('categories', []) if video_meta else [] - # the channel name is the value of the first category - for category in categories: - if category.get('value'): - return category['value'][0] - - def _real_initialize(self): - # check cookie jar for valid token - nebula_cookies = self._get_cookies('https://nebula.app') - nebula_cookie = nebula_cookies.get('nebula-auth') - if nebula_cookie: - self.to_screen('Authenticating to Nebula with token from cookie jar') - nebula_cookie_value = compat_urllib_parse_unquote(nebula_cookie.value) - self._nebula_token = self._parse_json(nebula_cookie_value, None).get('apiToken') - - # try to authenticate using credentials if no valid token has been found - if not self._nebula_token: - self._nebula_token = self._retrieve_nebula_auth() + def _fetch_video_metadata(self, slug): + return self._call_nebula_api(f'https://content.watchnebula.com/video/{slug}/', + video_id=slug, + auth_type='bearer', + note='Fetching video meta data') def _real_extract(self, url): - display_id = self._match_id(url) - api_key = self._retrieve_zype_api_key(url, display_id) + slug = self._match_id(url) + video = self._fetch_video_metadata(slug) + return self._build_video_info(video) - response = self._call_zype_api('/videos', {'friendly_title': display_id}, - display_id, api_key, note='Retrieving metadata from Zype') - if len(response.get('response') or []) != 1: - raise ExtractorError('Unable to find video on Zype API') - video_meta = response['response'][0] - video_id = video_meta['_id'] - zype_access_token = self._fetch_zype_access_token(display_id) +class NebulaCollectionIE(NebulaBaseIE): + IE_NAME = 'nebula:collection' + _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/(?!videos/)(?P[-\w]+)' + _TESTS = [ + { + 'url': 'https://nebula.app/tom-scott-presents-money', + 'info_dict': { + 'id': 'tom-scott-presents-money', + 'title': 'Tom Scott Presents: Money', + 'description': 'Tom Scott hosts a series all about trust, negotiation and money.', + }, + 'playlist_count': 5, + 'params': { + 'usenetrc': True, + }, + }, { + 'url': 'https://nebula.app/lindsayellis', + 'info_dict': { + 'id': 'lindsayellis', + 'title': 'Lindsay Ellis', + 'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.', + }, + 'playlist_mincount': 100, + 'params': { + 'usenetrc': True, + }, + }, + ] - channel_title = self._extract_channel_title(video_meta) + def _generate_playlist_entries(self, collection_id, channel): + episodes = channel['episodes']['results'] + for page_num in itertools.count(2): + for episode in episodes: + yield self._build_video_info(episode) + next_url = channel['episodes']['next'] + if not next_url: + break + channel = self._call_nebula_api(next_url, collection_id, auth_type='bearer', + note=f'Retrieving channel page {page_num}') + episodes = channel['episodes']['results'] - return { - 'id': video_id, - 'display_id': display_id, - '_type': 'url_transparent', - 'ie_key': 'Zype', - 'url': 'https://player.zype.com/embed/%s.html?access_token=%s' % (video_id, zype_access_token), - 'title': video_meta.get('title'), - 'description': video_meta.get('description'), - 'timestamp': parse_iso8601(video_meta.get('published_at')), - 'thumbnails': [{ - 'id': tn.get('name'), # this appears to be null - 'url': tn['url'], - 'width': tn.get('width'), - 'height': tn.get('height'), - } for tn in video_meta.get('thumbnails', [])], - 'duration': video_meta.get('duration'), - 'channel': channel_title, - 'uploader': channel_title, # we chose uploader = channel name - # TODO: uploader_url, channel_id, channel_url - } + def _real_extract(self, url): + collection_id = self._match_id(url) + channel_url = f'https://content.watchnebula.com/video/channels/{collection_id}/' + channel = self._call_nebula_api(channel_url, collection_id, auth_type='bearer', note='Retrieving channel') + channel_details = channel['details'] + + return self.playlist_result( + entries=self._generate_playlist_entries(collection_id, channel), + playlist_id=collection_id, + playlist_title=channel_details['title'], + playlist_description=channel_details['description'] + ) From 2abf0815542dd44724b577752fb9339e76816057 Mon Sep 17 00:00:00 2001 From: Yakabuff Date: Sat, 27 Nov 2021 02:04:51 -0500 Subject: [PATCH 482/641] [xvideos] Fix extractor (#1799) Closes #1788 Authored by: Yakabuff --- yt_dlp/extractor/xvideos.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/yt_dlp/extractor/xvideos.py b/yt_dlp/extractor/xvideos.py index ef45eb929e..ab07f01afa 100644 --- a/yt_dlp/extractor/xvideos.py +++ b/yt_dlp/extractor/xvideos.py @@ -83,9 +83,7 @@ class XVideosIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - - webpage = self._download_webpage( - 'https://www.xvideos.com/video%s/' % video_id, video_id) + webpage = self._download_webpage(url, video_id) mobj = re.search(r'

(.+?)

', webpage) if mobj: From 4e4ba1d75f250240725c0012edbd88cc0a7ead4b Mon Sep 17 00:00:00 2001 From: chio0hai <94094996+chio0hai@users.noreply.github.com> Date: Sat, 27 Nov 2021 02:10:29 -0500 Subject: [PATCH 483/641] [redgifs] Add extractor (#1631) Closes #1504 Authored by: chio0hai --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/redgifs.py | 94 ++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 yt_dlp/extractor/redgifs.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 2fb9515c0b..dd9edff0e7 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1216,6 +1216,7 @@ from .redbulltv import ( RedBullIE, ) from .reddit import RedditIE +from .redgifs import RedGifsIE from .redtube import RedTubeIE from .regiotv import RegioTVIE from .rentv import ( diff --git a/yt_dlp/extractor/redgifs.py b/yt_dlp/extractor/redgifs.py new file mode 100644 index 0000000000..1257d1344a --- /dev/null +++ b/yt_dlp/extractor/redgifs.py @@ -0,0 +1,94 @@ +# coding: utf-8 + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none, + qualities, + try_get, +) + + +class RedGifsIE(InfoExtractor): + _VALID_URL = r'https?://(?:(?:www|thumbs2?)\.)?redgifs\.com/(?:watch/)?(?P[^-/?#\.]+)' + _FORMATS = { + 'gif': 250, + 'sd': 480, + 'hd': None, + } + _TESTS = [{ + 'url': 'https://www.redgifs.com/watch/squeakyhelplesswisent', + 'info_dict': { + 'id': 'squeakyhelplesswisent', + 'ext': 'mp4', + 'title': 'Hotwife Legs Thick', + 'timestamp': 1636287915, + 'upload_date': '20211107', + 'uploader': 'ignored52', + 'duration': 16, + 'view_count': int, + 'like_count': int, + 'categories': list, + 'age_limit': 18, + } + }, { + 'url': 'https://thumbs2.redgifs.com/SqueakyHelplessWisent-mobile.mp4#t=0', + 'info_dict': { + 'id': 'squeakyhelplesswisent', + 'ext': 'mp4', + 'title': 'Hotwife Legs Thick', + 'timestamp': 1636287915, + 'upload_date': '20211107', + 'uploader': 'ignored52', + 'duration': 16, + 'view_count': int, + 'like_count': int, + 'categories': list, + 'age_limit': 18, + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url).lower() + + video_info = self._download_json( + 'https://api.redgifs.com/v2/gifs/%s' % video_id, + video_id, 'Downloading video info') + if 'error' in video_info: + raise ExtractorError(f'RedGifs said: {video_info["error"]}', expected=True) + + gif = video_info['gif'] + urls = gif['urls'] + + quality = qualities(tuple(self._FORMATS.keys())) + + orig_height = int_or_none(gif.get('height')) + aspect_ratio = try_get(gif, lambda x: orig_height / x['width']) + + formats = [] + for format_id, height in self._FORMATS.items(): + video_url = urls.get(format_id) + if not video_url: + continue + height = min(orig_height, height or orig_height) + formats.append({ + 'url': video_url, + 'format_id': format_id, + 'width': height * aspect_ratio if aspect_ratio else None, + 'height': height, + 'quality': quality(format_id), + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': ' '.join(gif.get('tags') or []) or 'RedGifs', + 'timestamp': int_or_none(gif.get('createDate')), + 'uploader': gif.get('userName'), + 'duration': int_or_none(gif.get('duration')), + 'view_count': int_or_none(gif.get('views')), + 'like_count': int_or_none(gif.get('likes')), + 'categories': gif.get('tags') or [], + 'age_limit': 18, + 'formats': formats, + } From 896a88c5c61a5431222a9b3a75c2c9c5129b1bbe Mon Sep 17 00:00:00 2001 From: gustaf <86112802+18928172992817182@users.noreply.github.com> Date: Sat, 27 Nov 2021 08:24:48 +0100 Subject: [PATCH 484/641] [Tvplayhome] Fix extractor (#1357) Authored by: pukkandan, 18928172992817182 (gustaf) --- yt_dlp/extractor/tvplay.py | 115 +++++++++++++++++++++---------------- 1 file changed, 67 insertions(+), 48 deletions(-) diff --git a/yt_dlp/extractor/tvplay.py b/yt_dlp/extractor/tvplay.py index 9771d9108e..b5dbc55262 100644 --- a/yt_dlp/extractor/tvplay.py +++ b/yt_dlp/extractor/tvplay.py @@ -12,9 +12,9 @@ from ..utils import ( determine_ext, ExtractorError, int_or_none, - parse_duration, parse_iso8601, qualities, + traverse_obj, try_get, update_url_query, url_or_none, @@ -431,77 +431,96 @@ class ViafreeIE(InfoExtractor): class TVPlayHomeIE(InfoExtractor): - _VALID_URL = r'https?://(?:tv3?)?play\.(?:tv3\.lt|skaties\.lv|tv3\.ee)/(?:[^/]+/)*[^/?#&]+-(?P\d+)' + _VALID_URL = r'''(?x) + https?:// + (?:tv3?)? + play\.(?:tv3|skaties)\.(?Plv|lt|ee)/ + (?Plives/)? + [^?#&]+(?:episode|programme|clip)-(?P\d+) + ''' _TESTS = [{ - 'url': 'https://tvplay.tv3.lt/aferistai-n-7/aferistai-10047125/', + 'url': 'https://play.tv3.lt/series/gauju-karai-karveliai,serial-2343791/serija-8,episode-2343828', 'info_dict': { - 'id': '366367', + 'id': '2343828', 'ext': 'mp4', - 'title': 'Aferistai', - 'description': 'Aferistai. Kalėdinė pasaka.', - 'series': 'Aferistai [N-7]', - 'season': '1 sezonas', + 'title': 'Gaujų karai. Karveliai (2021) | S01E08: Serija 8', + 'description': 'md5:f6fcfbb236429f05531131640dfa7c81', + 'duration': 2710, + 'season': 'Gaujų karai. Karveliai', 'season_number': 1, - 'duration': 464, - 'timestamp': 1394209658, - 'upload_date': '20140307', - 'age_limit': 18, + 'release_year': 2021, + 'episode': 'Serija 8', + 'episode_number': 8, }, 'params': { - 'skip_download': True, + 'skip_download': 'm3u8', }, }, { - 'url': 'https://tvplay.skaties.lv/vinas-melo-labak/vinas-melo-labak-10280317/', + 'url': 'https://play.tv3.lt/series/moterys-meluoja-geriau-n-7,serial-2574652/serija-25,episode-3284937', + 'info_dict': { + 'id': '3284937', + 'ext': 'mp4', + 'season': 'Moterys meluoja geriau [N-7]', + 'season_number': 14, + 'release_year': 2021, + 'episode': 'Serija 25', + 'episode_number': 25, + 'title': 'Moterys meluoja geriau [N-7] (2021) | S14|E25: Serija 25', + 'description': 'md5:c6926e9710f1a126f028fbe121eddb79', + 'duration': 2440, + }, + 'skip': '404' + }, { + 'url': 'https://play.tv3.lt/lives/tv6-lt,live-2838694/optibet-a-lygos-rungtynes-marijampoles-suduva--vilniaus-riteriai,programme-3422014', 'only_matching': True, }, { - 'url': 'https://tvplay.tv3.ee/cool-d-ga-mehhikosse/cool-d-ga-mehhikosse-10044354/', + 'url': 'https://tv3play.skaties.lv/series/women-lie-better-lv,serial-1024464/women-lie-better-lv,episode-1038762', 'only_matching': True, }, { - 'url': 'https://play.tv3.lt/aferistai-10047125', + 'url': 'https://play.tv3.ee/series/_,serial-2654462/_,episode-2654474', 'only_matching': True, }, { - 'url': 'https://tv3play.skaties.lv/vinas-melo-labak-10280317', - 'only_matching': True, - }, { - 'url': 'https://play.tv3.ee/cool-d-ga-mehhikosse-10044354', + 'url': 'https://tv3play.skaties.lv/clips/tv3-zinas-valsti-lidz-15novembrim-bus-majsede,clip-3464509', 'only_matching': True, }] def _real_extract(self, url): - video_id = self._match_id(url) + country, is_live, video_id = self._match_valid_url(url).groups() - asset = self._download_json( - urljoin(url, '/sb/public/asset/' + video_id), video_id) + api_path = 'lives/programmes' if is_live else 'vods' + data = self._download_json( + urljoin(url, f'/api/products/{api_path}/{video_id}?platform=BROWSER&lang={country.upper()}'), + video_id) - m3u8_url = asset['movie']['contentUrl'] - video_id = asset['assetId'] - asset_title = asset['title'] - title = asset_title['title'] - - formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls') + video_type = 'CATCHUP' if is_live else 'MOVIE' + stream_id = data['programRecordingId'] if is_live else video_id + stream = self._download_json( + urljoin(url, f'/api/products/{stream_id}/videos/playlist?videoType={video_type}&platform=BROWSER'), video_id) + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + stream['sources']['HLS'][0]['src'], video_id, 'mp4', 'm3u8_native', m3u8_id='hls') self._sort_formats(formats) - thumbnails = None - image_url = asset.get('imageUrl') - if image_url: - thumbnails = [{ - 'url': urljoin(url, image_url), - 'ext': 'jpg', - }] - - metadata = asset.get('metadata') or {} + thumbnails = set(traverse_obj( + data, (('galary', 'images', 'artworks'), ..., ..., ('miniUrl', 'mainUrl')), expected_type=url_or_none)) return { 'id': video_id, - 'title': title, - 'description': asset_title.get('summaryLong') or asset_title.get('summaryShort'), - 'thumbnails': thumbnails, - 'duration': parse_duration(asset_title.get('runTime')), - 'series': asset.get('tvSeriesTitle'), - 'season': asset.get('tvSeasonTitle'), - 'season_number': int_or_none(metadata.get('seasonNumber')), - 'episode': asset_title.get('titleBrief'), - 'episode_number': int_or_none(metadata.get('episodeNumber')), + 'title': self._resolve_title(data), + 'description': traverse_obj(data, 'description', 'lead'), + 'duration': int_or_none(data.get('duration')), + 'season': traverse_obj(data, ('season', 'serial', 'title')), + 'season_number': int_or_none(traverse_obj(data, ('season', 'number'))), + 'episode': data.get('title'), + 'episode_number': int_or_none(data.get('episode')), + 'release_year': int_or_none(traverse_obj(data, ('season', 'serial', 'year'))), + 'thumbnails': [{'url': url, 'ext': 'jpg'} for url in thumbnails], 'formats': formats, + 'subtitles': subtitles, } + + @staticmethod + def _resolve_title(data): + return try_get(data, lambda x: ( + f'{data["season"]["serial"]["title"]} ({data["season"]["serial"]["year"]}) | ' + f'S{data["season"]["number"]:02d}E{data["episode"]:02d}: {data["title"]}' + )) or data.get('title') From 639f80c1f9feca69509ede153c28f8651213f7fc Mon Sep 17 00:00:00 2001 From: mpeter50 <83356418+mpeter50@users.noreply.github.com> Date: Sat, 27 Nov 2021 09:00:58 +0100 Subject: [PATCH 485/641] [Twitch:vod] Add chapters (#1515) Authored by: mpeter50 --- yt_dlp/extractor/twitch.py | 71 ++++++++++++++++++++++++++++++++++---- 1 file changed, 65 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py index be70beed4b..c5b16f2b06 100644 --- a/yt_dlp/extractor/twitch.py +++ b/yt_dlp/extractor/twitch.py @@ -24,6 +24,8 @@ from ..utils import ( parse_iso8601, parse_qs, qualities, + str_or_none, + traverse_obj, try_get, unified_timestamp, update_url_query, @@ -52,6 +54,7 @@ class TwitchBaseIE(InfoExtractor): 'VideoAccessToken_Clip': '36b89d2507fce29e5ca551df756d27c1cfe079e2609642b4390aa4c35796eb11', 'VideoPreviewOverlay': '3006e77e51b128d838fa4e835723ca4dc9a05c5efd4466c1085215c6e437e65c', 'VideoMetadata': '226edb3e692509f727fd56821f5653c05740242c82b0388883e0c0e75dcbf687', + 'VideoPlayer_ChapterSelectButtonVideo': '8d2793384aac3773beab5e59bd5d6f585aedb923d292800119e03d40cd0f9b41', } def _real_initialize(self): @@ -249,6 +252,38 @@ class TwitchVodIE(TwitchBaseIE): }, { 'url': 'https://player.twitch.tv/?video=480452374', 'only_matching': True, + }, { + 'url': 'https://www.twitch.tv/videos/635475444', + 'info_dict': { + 'id': 'v635475444', + 'ext': 'mp4', + 'title': 'Riot Games', + 'duration': 11643, + 'uploader': 'Riot Games', + 'uploader_id': 'riotgames', + 'timestamp': 1590770569, + 'upload_date': '20200529', + 'chapters': [ + { + 'start_time': 0, + 'end_time': 573, + 'title': 'League of Legends' + }, + { + 'start_time': 573, + 'end_time': 3922, + 'title': 'Legends of Runeterra' + }, + { + 'start_time': 3922, + 'end_time': 11643, + 'title': 'Art' + } + ], + }, + 'params': { + 'skip_download': True + } }] def _download_info(self, item_id): @@ -259,16 +294,24 @@ class TwitchVodIE(TwitchBaseIE): 'channelLogin': '', 'videoID': item_id, }, + }, { + 'operationName': 'VideoPlayer_ChapterSelectButtonVideo', + 'variables': { + 'includePrivate': False, + 'videoID': item_id, + }, }], - 'Downloading stream metadata GraphQL')[0]['data'] - video = data.get('video') + 'Downloading stream metadata GraphQL') + + video = traverse_obj(data, (0, 'data', 'video')) + video['moments'] = traverse_obj(data, (1, 'data', 'video', 'moments', 'edges', ..., 'node')) + if video is None: raise ExtractorError( 'Video %s does not exist' % item_id, expected=True) return self._extract_info_gql(video, item_id) - @staticmethod - def _extract_info(info): + def _extract_info(self, info): status = info.get('status') if status == 'recording': is_live = True @@ -304,8 +347,22 @@ class TwitchVodIE(TwitchBaseIE): 'is_live': is_live, } - @staticmethod - def _extract_info_gql(info, item_id): + def _extract_moments(self, info, item_id): + for moment in info.get('moments') or []: + start_time = int_or_none(moment.get('positionMilliseconds'), 1000) + duration = int_or_none(moment.get('durationMilliseconds'), 1000) + name = str_or_none(moment.get('description')) + + if start_time is None or duration is None: + self.report_warning(f'Important chapter information missing for chapter {name}', item_id) + continue + yield { + 'start_time': start_time, + 'end_time': start_time + duration, + 'title': name, + } + + def _extract_info_gql(self, info, item_id): vod_id = info.get('id') or item_id # id backward compatibility for download archives if vod_id[0] != 'v': @@ -314,6 +371,7 @@ class TwitchVodIE(TwitchBaseIE): if thumbnail: for p in ('width', 'height'): thumbnail = thumbnail.replace('{%s}' % p, '0') + return { 'id': vod_id, 'title': info.get('title') or 'Untitled Broadcast', @@ -324,6 +382,7 @@ class TwitchVodIE(TwitchBaseIE): 'uploader_id': try_get(info, lambda x: x['owner']['login'], compat_str), 'timestamp': unified_timestamp(info.get('publishedAt')), 'view_count': int_or_none(info.get('viewCount')), + 'chapters': list(self._extract_moments(info, item_id)), } def _real_extract(self, url): From dfd78699f59d66fe7cd109c2534240ea0254426c Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Sat, 27 Nov 2021 08:12:56 +0000 Subject: [PATCH 486/641] [Aljazeera] Fix extractor (#1577) Closes #1518 Authored by: u-spec-png --- yt_dlp/extractor/aljazeera.py | 87 ++++++++++++++++++++++++----------- 1 file changed, 59 insertions(+), 28 deletions(-) diff --git a/yt_dlp/extractor/aljazeera.py b/yt_dlp/extractor/aljazeera.py index e829b45e47..7bcdb7afba 100644 --- a/yt_dlp/extractor/aljazeera.py +++ b/yt_dlp/extractor/aljazeera.py @@ -1,55 +1,86 @@ +# coding: utf-8 from __future__ import unicode_literals import json from .common import InfoExtractor +from ..utils import ( + try_get, +) class AlJazeeraIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?Pprogram/[^/]+|(?:feature|video)s)/\d{4}/\d{1,2}/\d{1,2}/(?P[^/?&#]+)' + _VALID_URL = r'https?://(?P\w+\.aljazeera\.\w+)/(?Pprograms?/[^/]+|(?:feature|video|new)s)?/\d{4}/\d{1,2}/\d{1,2}/(?P[^/?&#]+)' _TESTS = [{ - 'url': 'https://www.aljazeera.com/program/episode/2014/9/19/deliverance', + 'url': 'https://balkans.aljazeera.net/videos/2021/11/6/pojedini-domovi-u-sarajevu-jos-pod-vodom-mjestanima-se-dostavlja-hrana', 'info_dict': { - 'id': '3792260579001', + 'id': '6280641530001', 'ext': 'mp4', - 'title': 'The Slum - Episode 1: Deliverance', - 'description': 'As a birth attendant advocating for family planning, Remy is on the frontline of Tondo\'s battle with overcrowding.', - 'uploader_id': '665003303001', - 'timestamp': 1411116829, - 'upload_date': '20140919', + 'title': 'Pojedini domovi u Sarajevu još pod vodom, mještanima se dostavlja hrana', + 'timestamp': 1636219149, + 'description': 'U sarajevskim naseljima Rajlovac i Reljevo stambeni objekti, ali i industrijska postrojenja i dalje su pod vodom.', + 'upload_date': '20211106', + } + }, { + 'url': 'https://balkans.aljazeera.net/videos/2021/11/6/djokovic-usao-u-finale-mastersa-u-parizu', + 'info_dict': { + 'id': '6280654936001', + 'ext': 'mp4', + 'title': 'Đoković ušao u finale Mastersa u Parizu', + 'timestamp': 1636221686, + 'description': 'Novak Đoković je u polufinalu Mastersa u Parizu nakon preokreta pobijedio Poljaka Huberta Hurkacza.', + 'upload_date': '20211106', }, - 'add_ie': ['BrightcoveNew'], - 'skip': 'Not accessible from Travis CI server', - }, { - 'url': 'https://www.aljazeera.com/videos/2017/5/11/sierra-leone-709-carat-diamond-to-be-auctioned-off', - 'only_matching': True, - }, { - 'url': 'https://www.aljazeera.com/features/2017/8/21/transforming-pakistans-buses-into-art', - 'only_matching': True, }] - BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s' + BRIGHTCOVE_URL_RE = r'https?://players.brightcove.net/(?P\d+)/(?P[a-zA-Z0-9]+)_(?P[^/]+)/index.html\?videoId=(?P\d+)' def _real_extract(self, url): - post_type, name = self._match_valid_url(url).groups() + base, post_type, id = self._match_valid_url(url).groups() + wp = { + 'balkans.aljazeera.net': 'ajb', + 'chinese.aljazeera.net': 'chinese', + 'mubasher.aljazeera.net': 'ajm', + }.get(base) or 'aje' post_type = { 'features': 'post', 'program': 'episode', + 'programs': 'episode', 'videos': 'video', + 'news': 'news', }[post_type.split('/')[0]] video = self._download_json( - 'https://www.aljazeera.com/graphql', name, query={ + f'https://{base}/graphql', id, query={ + 'wp-site': wp, 'operationName': 'ArchipelagoSingleArticleQuery', 'variables': json.dumps({ - 'name': name, + 'name': id, 'postType': post_type, }), }, headers={ - 'wp-site': 'aje', - })['data']['article']['video'] - video_id = video['id'] - account_id = video.get('accountId') or '665003303001' - player_id = video.get('playerId') or 'BkeSH5BDb' - return self.url_result( - self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id), - 'BrightcoveNew', video_id) + 'wp-site': wp, + }) + video = try_get(video, lambda x: x['data']['article']['video']) or {} + video_id = video.get('id') + account = video.get('accountId') or '911432371001' + player_id = video.get('playerId') or 'csvTfAlKW' + embed = 'default' + + if video_id is None: + webpage = self._download_webpage(url, id) + + account, player_id, embed, video_id = self._search_regex(self.BRIGHTCOVE_URL_RE, webpage, 'video id', + group=(1, 2, 3, 4), default=(None, None, None, None)) + + if video_id is None: + return { + '_type': 'url_transparent', + 'url': url, + 'ie_key': 'Generic' + } + + return { + '_type': 'url_transparent', + 'url': f'https://players.brightcove.net/{account}/{player_id}_{embed}/index.html?videoId={video_id}', + 'ie_key': 'BrightcoveNew' + } From 909b0d66f47c4fb73ee320f512f0c12502f16294 Mon Sep 17 00:00:00 2001 From: Grabien <60237587+Grabien@users.noreply.github.com> Date: Sat, 27 Nov 2021 12:37:45 +0200 Subject: [PATCH 487/641] [Senate.gov] Add SenateGovIE and fix SenateISVPIE (#1435) Authored by: Grabien, pukkandan --- yt_dlp/extractor/cspan.py | 2 +- yt_dlp/extractor/extractors.py | 2 +- yt_dlp/extractor/generic.py | 2 +- yt_dlp/extractor/senategov.py | 213 +++++++++++++++++++++++++++++++++ yt_dlp/extractor/senateisvp.py | 153 ----------------------- 5 files changed, 216 insertions(+), 156 deletions(-) create mode 100644 yt_dlp/extractor/senategov.py delete mode 100644 yt_dlp/extractor/senateisvp.py diff --git a/yt_dlp/extractor/cspan.py b/yt_dlp/extractor/cspan.py index 2e01aff488..c717aec3ac 100644 --- a/yt_dlp/extractor/cspan.py +++ b/yt_dlp/extractor/cspan.py @@ -18,7 +18,7 @@ from ..utils import ( str_to_int, unescapeHTML, ) -from .senateisvp import SenateISVPIE +from .senategov import SenateISVPIE from .ustream import UstreamIE diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index dd9edff0e7..a4baad2dab 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1285,7 +1285,7 @@ from .scte import ( SCTECourseIE, ) from .seeker import SeekerIE -from .senateisvp import SenateISVPIE +from .senategov import SenateISVPIE, SenateGovIE from .sendtonews import SendtoNewsIE from .servus import ServusIE from .sevenplus import SevenPlusIE diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index ae0ebb14ad..51557f0f1c 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -56,7 +56,7 @@ from .sportbox import SportBoxIE from .myvi import MyviIE from .condenast import CondeNastIE from .udn import UDNEmbedIE -from .senateisvp import SenateISVPIE +from .senategov import SenateISVPIE from .svt import SVTIE from .pornhub import PornHubIE from .xhamster import XHamsterEmbedIE diff --git a/yt_dlp/extractor/senategov.py b/yt_dlp/extractor/senategov.py new file mode 100644 index 0000000000..6f4240422a --- /dev/null +++ b/yt_dlp/extractor/senategov.py @@ -0,0 +1,213 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, + compat_urlparse, +) +from ..utils import ( + ExtractorError, + parse_qs, + unsmuggle_url, +) + +_COMMITTEES = { + 'ag': ('76440', 'http://ag-f.akamaihd.net'), + 'aging': ('76442', 'http://aging-f.akamaihd.net'), + 'approps': ('76441', 'http://approps-f.akamaihd.net'), + 'arch': ('', 'http://ussenate-f.akamaihd.net'), + 'armed': ('76445', 'http://armed-f.akamaihd.net'), + 'banking': ('76446', 'http://banking-f.akamaihd.net'), + 'budget': ('76447', 'http://budget-f.akamaihd.net'), + 'cecc': ('76486', 'http://srs-f.akamaihd.net'), + 'commerce': ('80177', 'http://commerce1-f.akamaihd.net'), + 'csce': ('75229', 'http://srs-f.akamaihd.net'), + 'dpc': ('76590', 'http://dpc-f.akamaihd.net'), + 'energy': ('76448', 'http://energy-f.akamaihd.net'), + 'epw': ('76478', 'http://epw-f.akamaihd.net'), + 'ethics': ('76449', 'http://ethics-f.akamaihd.net'), + 'finance': ('76450', 'http://finance-f.akamaihd.net'), + 'foreign': ('76451', 'http://foreign-f.akamaihd.net'), + 'govtaff': ('76453', 'http://govtaff-f.akamaihd.net'), + 'help': ('76452', 'http://help-f.akamaihd.net'), + 'indian': ('76455', 'http://indian-f.akamaihd.net'), + 'intel': ('76456', 'http://intel-f.akamaihd.net'), + 'intlnarc': ('76457', 'http://intlnarc-f.akamaihd.net'), + 'jccic': ('85180', 'http://jccic-f.akamaihd.net'), + 'jec': ('76458', 'http://jec-f.akamaihd.net'), + 'judiciary': ('76459', 'http://judiciary-f.akamaihd.net'), + 'rpc': ('76591', 'http://rpc-f.akamaihd.net'), + 'rules': ('76460', 'http://rules-f.akamaihd.net'), + 'saa': ('76489', 'http://srs-f.akamaihd.net'), + 'smbiz': ('76461', 'http://smbiz-f.akamaihd.net'), + 'srs': ('75229', 'http://srs-f.akamaihd.net'), + 'uscc': ('76487', 'http://srs-f.akamaihd.net'), + 'vetaff': ('76462', 'http://vetaff-f.akamaihd.net'), +} + + +class SenateISVPIE(InfoExtractor): + _IE_NAME = 'senate.gov:isvp' + _VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P.+)' + + _TESTS = [{ + 'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png', + 'info_dict': { + 'id': 'judiciary031715', + 'ext': 'mp4', + 'title': 'Integrated Senate Video Player', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)$', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'http://www.senate.gov/isvp/?type=live&comm=commerce&filename=commerce011514.mp4&auto_play=false', + 'info_dict': { + 'id': 'commerce011514', + 'ext': 'mp4', + 'title': 'Integrated Senate Video Player' + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'http://www.senate.gov/isvp/?type=arch&comm=intel&filename=intel090613&hc_location=ufi', + # checksum differs each time + 'info_dict': { + 'id': 'intel090613', + 'ext': 'mp4', + 'title': 'Integrated Senate Video Player' + } + }, { + # From http://www.c-span.org/video/?96791-1 + 'url': 'http://www.senate.gov/isvp?type=live&comm=banking&filename=banking012715', + 'only_matching': True, + }] + + @staticmethod + def _search_iframe_url(webpage): + mobj = re.search( + r"]+src=['\"](?Phttps?://www\.senate\.gov/isvp/?\?[^'\"]+)['\"]", + webpage) + if mobj: + return mobj.group('url') + + def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) + + qs = compat_parse_qs(self._match_valid_url(url).group('qs')) + if not qs.get('filename') or not qs.get('type') or not qs.get('comm'): + raise ExtractorError('Invalid URL', expected=True) + + video_id = re.sub(r'.mp4$', '', qs['filename'][0]) + + webpage = self._download_webpage(url, video_id) + + if smuggled_data.get('force_title'): + title = smuggled_data['force_title'] + else: + title = self._html_search_regex(r'([^<]+)', webpage, video_id) + poster = qs.get('poster') + thumbnail = poster[0] if poster else None + + video_type = qs['type'][0] + committee = video_type if video_type == 'arch' else qs['comm'][0] + + stream_num, domain = _COMMITTEES[committee] + + formats = [] + if video_type == 'arch': + filename = video_id if '.' in video_id else video_id + '.mp4' + m3u8_url = compat_urlparse.urljoin(domain, 'i/' + filename + '/master.m3u8') + formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', m3u8_id='m3u8') + else: + hdcore_sign = 'hdcore=3.1.0' + url_params = (domain, video_id, stream_num) + f4m_url = f'%s/z/%s_1@%s/manifest.f4m?{hdcore_sign}' % url_params + m3u8_url = '%s/i/%s_1@%s/master.m3u8' % url_params + for entry in self._extract_f4m_formats(f4m_url, video_id, f4m_id='f4m'): + # URLs without the extra param induce an 404 error + entry.update({'extra_param_to_segment_url': hdcore_sign}) + formats.append(entry) + for entry in self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', m3u8_id='m3u8'): + mobj = re.search(r'(?P(?:-p|-b)).m3u8', entry['url']) + if mobj: + entry['format_id'] += mobj.group('tag') + formats.append(entry) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnail': thumbnail, + } + + +class SenateGovIE(InfoExtractor): + _IE_NAME = 'senate.gov' + _VALID_URL = r'https?:\/\/(?:www\.)?(help|appropriations|judiciary|banking|armed-services|finance)\.senate\.gov' + _TESTS = [{ + 'url': 'https://www.help.senate.gov/hearings/vaccines-saving-lives-ensuring-confidence-and-protecting-public-health', + 'info_dict': { + 'id': 'help090920', + 'display_id': 'vaccines-saving-lives-ensuring-confidence-and-protecting-public-health', + 'title': 'Vaccines: Saving Lives, Ensuring Confidence, and Protecting Public Health', + 'description': 'The U.S. Senate Committee on Health, Education, Labor & Pensions', + 'ext': 'mp4', + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.appropriations.senate.gov/hearings/watch?hearingid=B8A25434-5056-A066-6020-1F68CB75F0CD', + 'info_dict': { + 'id': 'appropsA051518', + 'display_id': 'watch?hearingid=B8A25434-5056-A066-6020-1F68CB75F0CD', + 'title': 'Review of the FY2019 Budget Request for the U.S. Army', + 'ext': 'mp4', + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.banking.senate.gov/hearings/21st-century-communities-public-transportation-infrastructure-investment-and-fast-act-reauthorization', + 'info_dict': { + 'id': 'banking041521', + 'display_id': '21st-century-communities-public-transportation-infrastructure-investment-and-fast-act-reauthorization', + 'title': '21st Century Communities: Public Transportation Infrastructure Investment and FAST Act Reauthorization', + 'description': 'The Official website of The United States Committee on Banking, Housing, and Urban Affairs', + 'ext': 'mp4', + }, + 'params': {'skip_download': 'm3u8'}, + }] + + def _real_extract(self, url): + display_id = self._generic_id(url) + webpage = self._download_webpage(url, display_id) + parse_info = parse_qs(self._search_regex( + r'