From 58786a10f212bd63f9ad1d0b4d9e4d31c3b385e2 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 25 Jun 2023 20:10:00 +0530 Subject: [PATCH 01/70] [extractor/youtube] Add extractor-arg `formats` Closes #7417 --- README.md | 3 +-- yt_dlp/extractor/youtube.py | 22 ++++++++++++++++------ 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 4de4ece96..d89bb204e 100644 --- a/README.md +++ b/README.md @@ -1805,8 +1805,7 @@ The following extractors use this feature: * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) * `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all` * E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total -* `include_duplicate_formats`: Extract formats with identical content but different URLs or protocol. This is useful if some of the formats are unavailable or throttled. -* `include_incomplete_formats`: Extract formats that cannot be downloaded completely (live dash and post-live m3u8) +* `formats`: Change the types of formats to return. `dashy` (convert http to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8) * `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others * `innertube_key`: Innertube API key to use for all API requests diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index a0d0a601a..bdc631ccb 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3752,7 +3752,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres' ]) streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...)) - all_formats = self._configuration_arg('include_duplicate_formats') + format_types = self._configuration_arg('formats') + all_formats = 'duplicate' in format_types + if self._configuration_arg('include_duplicate_formats'): + all_formats = True + self._downloader.deprecated_feature('[youtube] include_duplicate_formats extractor argument is deprecated. ' + 'Use formats=duplicate extractor argument instead') def build_fragments(f): return LazyList({ @@ -3892,18 +3897,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if single_stream and dct.get('ext'): dct['container'] = dct['ext'] + '_dash' - if all_formats and dct['filesize']: + if (all_formats or 'dashy' in format_types) and dct['filesize']: yield { **dct, 'format_id': f'{dct["format_id"]}-dashy' if all_formats else dct['format_id'], 'protocol': 'http_dash_segments', 'fragments': build_fragments(dct), } - dct['downloader_options'] = {'http_chunk_size': CHUNK_SIZE} - yield dct + if all_formats or 'dashy' not in format_types: + dct['downloader_options'] = {'http_chunk_size': CHUNK_SIZE} + yield dct needs_live_processing = self._needs_live_processing(live_status, duration) - skip_bad_formats = not self._configuration_arg('include_incomplete_formats') + skip_bad_formats = 'incomplete' not in format_types + if self._configuration_arg('include_incomplete_formats'): + skip_bad_formats = False + self._downloader.deprecated_feature('[youtube] include_incomplete_formats extractor argument is deprecated. ' + 'Use formats=incomplete extractor argument instead') skip_manifests = set(self._configuration_arg('skip')) if (not self.get_param('youtube_include_hls_manifest', True) @@ -3915,7 +3925,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): skip_manifests.add('dash') if self._configuration_arg('include_live_dash'): self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. ' - 'Use include_incomplete_formats extractor argument instead') + 'Use formats=incomplete extractor argument instead') elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live': skip_manifests.add('dash') From f0a1ff118145b6449982ba401f9a9f656ecd8062 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sun, 25 Jun 2023 13:13:28 -0500 Subject: [PATCH 02/70] [extractor/qdance] Add extractor (#7420) Closes #7385 Authored by: bashonly --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/qdance.py | 150 ++++++++++++++++++++++++++++++++ 2 files changed, 151 insertions(+) create mode 100644 yt_dlp/extractor/qdance.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 49a3f39d3..06340fcd8 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1531,6 +1531,7 @@ from .prx import ( ) from .puls4 import Puls4IE from .pyvideo import PyvideoIE +from .qdance import QDanceIE from .qingting import QingTingIE from .qqmusic import ( QQMusicIE, diff --git a/yt_dlp/extractor/qdance.py b/yt_dlp/extractor/qdance.py new file mode 100644 index 000000000..d817677f0 --- /dev/null +++ b/yt_dlp/extractor/qdance.py @@ -0,0 +1,150 @@ +import json +import time + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none, + jwt_decode_hs256, + str_or_none, + traverse_obj, + try_call, + url_or_none, +) + + +class QDanceIE(InfoExtractor): + _NETRC_MACHINE = 'qdance' + _VALID_URL = r'https?://(?:www\.)?q-dance\.com/network/(?:library|live)/(?P\d+)' + _TESTS = [{ + 'note': 'vod', + 'url': 'https://www.q-dance.com/network/library/146542138', + 'info_dict': { + 'id': '146542138', + 'ext': 'mp4', + 'title': 'Sound Rush [LIVE] | Defqon.1 Weekend Festival 2022 | Friday | RED', + 'display_id': 'sound-rush-live-v3-defqon-1-weekend-festival-2022-friday-red', + 'description': 'Relive Defqon.1 - Primal Energy 2022 with the sounds of Sound Rush LIVE at the RED on Friday! 🔥', + 'season': 'Defqon.1 Weekend Festival 2022', + 'season_id': '31840632', + 'series': 'Defqon.1', + 'series_id': '31840378', + 'thumbnail': 'https://images.q-dance.network/1674829540-20220624171509-220624171509_delio_dn201093-2.jpg', + 'availability': 'premium_only', + 'duration': 1829, + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'note': 'livestream', + 'url': 'https://www.q-dance.com/network/live/149170353', + 'info_dict': { + 'id': '149170353', + 'ext': 'mp4', + 'title': r're:^Defqon\.1 2023 - Friday - RED', + 'display_id': 'defqon-1-2023-friday-red', + 'description': 'md5:3c73fbbd4044e578e696adfc64019163', + 'season': 'Defqon.1 Weekend Festival 2023', + 'season_id': '141735599', + 'series': 'Defqon.1', + 'series_id': '31840378', + 'thumbnail': 'https://images.q-dance.network/1686849069-area-thumbs_red.png', + 'availability': 'subscriber_only', + 'live_status': 'is_live', + 'channel_id': 'qdancenetwork.video_149170353', + }, + 'skip': 'Completed livestream', + }] + + _access_token = None + _refresh_token = None + + def _call_login_api(self, data, note='Logging in'): + login = self._download_json( + 'https://members.id-t.com/api/auth/login', None, note, headers={ + 'content-type': 'application/json', + 'brand': 'qdance', + 'origin': 'https://www.q-dance.com', + 'referer': 'https://www.q-dance.com/', + }, data=json.dumps(data, separators=(',', ':')).encode(), + expected_status=lambda x: True) + + tokens = traverse_obj(login, ('data', { + '_id-t-accounts-token': ('accessToken', {str}), + '_id-t-accounts-refresh': ('refreshToken', {str}), + '_id-t-accounts-id-token': ('idToken', {str}), + })) + + if not tokens.get('_id-t-accounts-token'): + error = ': '.join(traverse_obj(login, ('error', ('code', 'message'), {str}))) + if 'validation_error' not in error: + raise ExtractorError(f'Q-Dance API said "{error}"') + msg = 'Invalid username or password' if 'email' in data else 'Refresh token has expired' + raise ExtractorError(msg, expected=True) + + for name, value in tokens.items(): + self._set_cookie('.q-dance.com', name, value) + + def _perform_login(self, username, password): + self._call_login_api({'email': username, 'password': password}) + + def _real_initialize(self): + cookies = self._get_cookies('https://www.q-dance.com/') + self._refresh_token = try_call(lambda: cookies['_id-t-accounts-refresh'].value) + self._access_token = try_call(lambda: cookies['_id-t-accounts-token'].value) + if not self._access_token: + self.raise_login_required() + + def _get_auth(self): + if (try_call(lambda: jwt_decode_hs256(self._access_token)['exp']) or 0) <= int(time.time() - 120): + if not self._refresh_token: + raise ExtractorError( + 'Cannot refresh access token, login with yt-dlp or refresh cookies in browser') + self._call_login_api({'refreshToken': self._refresh_token}, note='Refreshing access token') + self._real_initialize() + + return {'Authorization': self._access_token} + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + data = self._search_nuxt_data(webpage, video_id, traverse=('data', 0, 'data')) + + def extract_availability(level): + level = int_or_none(level) or 0 + return self._availability( + needs_premium=(level >= 20), needs_subscription=(level >= 15), needs_auth=True) + + info = traverse_obj(data, { + 'title': ('title', {str.strip}), + 'description': ('description', {str.strip}), + 'display_id': ('slug', {str}), + 'thumbnail': ('thumbnail', {url_or_none}), + 'duration': ('durationInSeconds', {int_or_none}, {lambda x: x or None}), + 'availability': ('subscription', 'level', {extract_availability}), + 'is_live': ('type', {lambda x: x.lower() == 'live'}), + 'artist': ('acts', ..., {str}), + 'series': ('event', 'title', {str.strip}), + 'series_id': ('event', 'id', {str_or_none}), + 'season': ('eventEdition', 'title', {str.strip}), + 'season_id': ('eventEdition', 'id', {str_or_none}), + 'channel_id': ('pubnub', 'channelName', {str}), + }) + + stream = self._download_json( + f'https://dc9h6qmsoymbq.cloudfront.net/api/content/videos/{video_id}/url', + video_id, headers=self._get_auth(), expected_status=401) + + m3u8_url = traverse_obj(stream, ('data', 'url', {url_or_none})) + if not m3u8_url and traverse_obj(stream, ('error', 'code')) == 'unauthorized': + raise ExtractorError('Your account does not have access to this content', expected=True) + + formats = self._extract_m3u8_formats( + m3u8_url, video_id, fatal=False, live=True) if m3u8_url else [] + if not formats: + self.raise_no_formats('No active streams found', expected=bool(info.get('is_live'))) + + return { + **info, + 'id': video_id, + 'formats': formats, + } From 5e16cf92eb496b7c1541a6b1d727cb87542984db Mon Sep 17 00:00:00 2001 From: nnoboa <90611593+nnoboa@users.noreply.github.com> Date: Sun, 25 Jun 2023 16:22:38 -0400 Subject: [PATCH 03/70] [extractor/AdultSwim] Extract subtitles from m3u8 (#7421) Authored by: nnoboa Closes #6191 --- yt_dlp/extractor/adultswim.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/adultswim.py b/yt_dlp/extractor/adultswim.py index bd29eb43e..daaeddeb6 100644 --- a/yt_dlp/extractor/adultswim.py +++ b/yt_dlp/extractor/adultswim.py @@ -170,8 +170,10 @@ class AdultSwimIE(TurnerBaseIE): continue ext = determine_ext(asset_url, mimetype2ext(asset.get('mime_type'))) if ext == 'm3u8': - info['formats'].extend(self._extract_m3u8_formats( - asset_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + fmts, subs = self._extract_m3u8_formats_and_subtitles( + asset_url, video_id, 'mp4', m3u8_id='hls', fatal=False) + info['formats'].extend(fmts) + self._merge_subtitles(subs, target=info['subtitles']) elif ext == 'f4m': continue # info['formats'].extend(self._extract_f4m_formats( From ef8509c300ea50da86aea447eb214d3d6f6db6bb Mon Sep 17 00:00:00 2001 From: bashonly Date: Sun, 25 Jun 2023 17:04:42 -0500 Subject: [PATCH 04/70] [extractor/kick] Fix `_VALID_URL` Closes #7384 Authored by: bashonly --- yt_dlp/extractor/kick.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/kick.py b/yt_dlp/extractor/kick.py index 765ffa0c8..be1dfd4b1 100644 --- a/yt_dlp/extractor/kick.py +++ b/yt_dlp/extractor/kick.py @@ -30,7 +30,7 @@ class KickBaseIE(InfoExtractor): class KickIE(KickBaseIE): - _VALID_URL = r'https?://(?:www\.)?kick\.com/(?!(?:video|categories|search|auth)(?:[/?#]|$))(?P[\w_]+)' + _VALID_URL = r'https?://(?:www\.)?kick\.com/(?!(?:video|categories|search|auth)(?:[/?#]|$))(?P[\w-]+)' _TESTS = [{ 'url': 'https://kick.com/yuppy', 'info_dict': { From d949c10c45bfc359bdacd52e6a180169b8128958 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 26 Jun 2023 07:25:47 +0530 Subject: [PATCH 05/70] [extractor/youtube] Process `post_live` over 2 hours --- yt_dlp/extractor/youtube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index bdc631ccb..d5607975e 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3737,7 +3737,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _needs_live_processing(self, live_status, duration): if (live_status == 'is_live' and self.get_param('live_from_start') - or live_status == 'post_live' and (duration or 0) > 4 * 3600): + or live_status == 'post_live' and (duration or 0) > 2 * 3600): return live_status def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration): @@ -4238,7 +4238,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): for fmt in filter(is_bad_format, formats): fmt['preference'] = (fmt.get('preference') or -1) - 10 - fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 4 hours)', delim=' ') + fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 2 hours)', delim=' ') if needs_live_processing: self._prepare_live_from_start_formats( From 8a8af356e3bba98a7f7d333aff0777d5d92130c8 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 26 Jun 2023 16:13:31 +0530 Subject: [PATCH 06/70] [downloader/aria2c] Add `--no-conf` Closes #7404 --- yt_dlp/downloader/external.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index 007689a8c..f637a100b 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -271,7 +271,7 @@ class Aria2cFD(ExternalFD): return super()._call_downloader(tmpfilename, info_dict) def _make_cmd(self, tmpfilename, info_dict): - cmd = [self.exe, '-c', + cmd = [self.exe, '-c', '--no-conf', '--console-log-level=warn', '--summary-interval=0', '--download-result=hide', '--http-accept-gzip=true', '--file-allocation=none', '-x16', '-j16', '-s16'] if 'fragments' in info_dict: From f393bbe724b1fc6c7f754a5da507e807b2b40ad2 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 26 Jun 2023 16:14:20 +0530 Subject: [PATCH 07/70] [extractor/sbs] Python 3.7 compat Closes #7410 --- yt_dlp/extractor/sbs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/sbs.py b/yt_dlp/extractor/sbs.py index ac0b6de20..119106e8e 100644 --- a/yt_dlp/extractor/sbs.py +++ b/yt_dlp/extractor/sbs.py @@ -139,8 +139,8 @@ class SBSIE(InfoExtractor): 'release_year': ('releaseYear', {int_or_none}), 'duration': ('duration', ({float_or_none}, {parse_duration})), 'is_live': ('liveStream', {bool}), - 'age_limit': ( - ('classificationID', 'contentRating'), {str.upper}, {self._AUS_TV_PARENTAL_GUIDELINES.get}), + 'age_limit': (('classificationID', 'contentRating'), {str.upper}, { + lambda x: self._AUS_TV_PARENTAL_GUIDELINES.get(x)}), # dict.get is unhashable in py3.7 }, get_all=False), **traverse_obj(media, { 'categories': (('genres', ...), ('taxonomy', ('genre', 'subgenre'), 'name'), {str}), From 91302ed349f34dc26cc1d661bb45a4b71f4417f7 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 26 Jun 2023 16:19:49 +0530 Subject: [PATCH 08/70] [utils] clean_podcast_url: Handle protocol in redirect URL Closes #7430 --- yt_dlp/utils/_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index de51f6208..f68cdb968 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -5113,7 +5113,7 @@ def format_field(obj, field=None, template='%s', ignore=NO_DEFAULT, default='', def clean_podcast_url(url): - return re.sub(r'''(?x) + url = re.sub(r'''(?x) (?: (?: chtbl\.com/track| @@ -5127,6 +5127,7 @@ def clean_podcast_url(url): st\.fm # https://podsights.com/docs/ )/e )/''', '', url) + return re.sub(r'^\w+://(\w+://)', r'\1', url) _HEX_TABLE = '0123456789abcdef' From 5b4b92769afcc398475e481bfa839f1158902fe9 Mon Sep 17 00:00:00 2001 From: Aman Salwan <121633121+AmanSal1@users.noreply.github.com> Date: Wed, 28 Jun 2023 01:58:23 +0530 Subject: [PATCH 09/70] [extractor/crunchyroll:music] Fix `_VALID_URL` (#7439) Closes #7419 Authored by: AmanSal1, rdamas Co-authored-by: Robert Damas --- yt_dlp/extractor/crunchyroll.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/crunchyroll.py b/yt_dlp/extractor/crunchyroll.py index d4a21616b..910504ed2 100644 --- a/yt_dlp/extractor/crunchyroll.py +++ b/yt_dlp/extractor/crunchyroll.py @@ -490,8 +490,21 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE): _VALID_URL = r'''(?x) https?://(?:www\.)?crunchyroll\.com/ (?P(?:\w{2}(?:-\w{2})?/)?) - watch/(?Pconcert|musicvideo)/(?P\w{10})''' + watch/(?Pconcert|musicvideo)/(?P\w+)''' _TESTS = [{ + 'url': 'https://www.crunchyroll.com/de/watch/musicvideo/MV5B02C79', + 'info_dict': { + 'ext': 'mp4', + 'id': 'MV5B02C79', + 'display_id': 'egaono-hana', + 'title': 'Egaono Hana', + 'track': 'Egaono Hana', + 'artist': 'Goose house', + 'thumbnail': r're:(?i)^https://www.crunchyroll.com/imgsrv/.*\.jpeg?$', + 'genre': ['J-Pop'], + }, + 'params': {'skip_download': 'm3u8'}, + }, { 'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C', 'info_dict': { 'ext': 'mp4', @@ -519,11 +532,14 @@ class CrunchyrollMusicIE(CrunchyrollBaseIE): }, 'params': {'skip_download': 'm3u8'}, }, { - 'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C/crossing-field', + 'url': 'https://www.crunchyroll.com/de/watch/musicvideo/MV5B02C79/egaono-hana', 'only_matching': True, }, { 'url': 'https://www.crunchyroll.com/watch/concert/MC2E2AC135/live-is-smile-always-364joker-at-yokohama-arena', 'only_matching': True, + }, { + 'url': 'https://www.crunchyroll.com/watch/musicvideo/MV88BB7F2C/crossing-field', + 'only_matching': True, }] _API_ENDPOINT = 'music' From 8f05fbae2a79ce0713077ccc68b354e63216bf20 Mon Sep 17 00:00:00 2001 From: Xiao Han <38774211+meliber@users.noreply.github.com> Date: Tue, 27 Jun 2023 16:16:57 -0500 Subject: [PATCH 10/70] [extractor/abc] Fix extraction (#7434) Closes #6433 Authored by: meliber --- yt_dlp/extractor/abc.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/abc.py b/yt_dlp/extractor/abc.py index 0ca76b85a..f56133eb3 100644 --- a/yt_dlp/extractor/abc.py +++ b/yt_dlp/extractor/abc.py @@ -12,6 +12,7 @@ from ..utils import ( int_or_none, parse_iso8601, str_or_none, + traverse_obj, try_get, unescapeHTML, update_url_query, @@ -85,6 +86,15 @@ class ABCIE(InfoExtractor): 'uploader': 'Behind the News', 'uploader_id': 'behindthenews', } + }, { + 'url': 'https://www.abc.net.au/news/2023-06-25/wagner-boss-orders-troops-back-to-bases-to-avoid-bloodshed/102520540', + 'info_dict': { + 'id': '102520540', + 'title': 'Wagner Group retreating from Russia, leader Prigozhin to move to Belarus', + 'ext': 'mp4', + 'description': 'Wagner troops leave Rostov-on-Don and\xa0Yevgeny Prigozhin will move to Belarus under a deal brokered by Belarusian President Alexander Lukashenko to end the mutiny.', + 'thumbnail': 'https://live-production.wcms.abc-cdn.net.au/0c170f5b57f0105c432f366c0e8e267b?impolicy=wcms_crop_resize&cropH=2813&cropW=5000&xPos=0&yPos=249&width=862&height=485', + } }] def _real_extract(self, url): @@ -107,7 +117,7 @@ class ABCIE(InfoExtractor): video = True if mobj is None: - mobj = re.search(r'(?P)"sources": (?P\[[^\]]+\]),', webpage) + mobj = re.search(r'(?P)"(?:sources|files|renditions)":\s*(?P\[[^\]]+\])', webpage) if mobj is None: mobj = re.search( r'inline(?PVideo|Audio|YouTube)Data\.push\((?P[^)]+)\);', @@ -121,7 +131,8 @@ class ABCIE(InfoExtractor): urls_info = self._parse_json( mobj.group('json_data'), video_id, transform_source=js_to_json) youtube = mobj.group('type') == 'YouTube' - video = mobj.group('type') == 'Video' or urls_info[0]['contentType'] == 'video/mp4' + video = mobj.group('type') == 'Video' or traverse_obj( + urls_info, (0, ('contentType', 'MIMEType')), get_all=False) == 'video/mp4' if not isinstance(urls_info, list): urls_info = [urls_info] From a2be9781fbf4d7e4db245c277ca2ecc41cf3a7b2 Mon Sep 17 00:00:00 2001 From: bashonly Date: Tue, 27 Jun 2023 16:50:02 -0500 Subject: [PATCH 11/70] [extractor/Douyin] Fix extraction from webpage Closes #7431 Authored by: bashonly --- yt_dlp/extractor/tiktok.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 9c6d74007..2f491c317 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -1015,18 +1015,16 @@ class DouyinIE(TikTokBaseIE): self.to_screen(f'{e}; trying with webpage') webpage = self._download_webpage(url, video_id) - render_data_json = self._search_regex( - r'', - webpage, 'render data', default=None) - if not render_data_json: + render_data = self._search_json( + r'