diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 20111175b..31fa1d7b5 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -13,6 +13,7 @@ import optparse import os import re import traceback +import json from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS, CookieLoadError from .downloader.external import get_external_downloader @@ -119,6 +120,45 @@ def print_extractor_information(opts, urls): out = 'Supported TV Providers:\n{}\n'.format(render_table( ['mso', 'mso name'], [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()])) + elif opts.list_extractors_json: + from .extractor.generic import GenericIE + dicts = [] + e_index = 0 + urls = dict.fromkeys(urls, False) + if len(urls): + for ie in gen_extractors(): + if ie == GenericIE: + matched_urls = [url for url, matched in urls.items() if not matched] + else: + matched_urls = tuple(filter(ie.suitable, urls.keys())) + urls.update(dict.fromkeys(matched_urls, True)) + # show only extractor with matched URL + if len(matched_urls): + data = {'index': e_index, + 'name': ie.IE_NAME, + 'desc': ie.IE_DESC if ie.IE_DESC else '', + 'working': ie.working(), + 'enabled': ie.is_enabled(), + 'return_type': ie.return_type(), + 'regex_url': ie.list_regex_url(), + 'matched_urls': matched_urls, + } + e_index += 1 + dicts.append(data) + else: + # show all extractors + for ie in gen_extractors(): + data = {'index': e_index, + 'name': ie.IE_NAME, + 'desc': ie.IE_DESC if ie.IE_DESC else '', + 'working': ie.working(), + 'enabled': ie.is_enabled(), + 'return_type': ie.return_type(), + 'regex_url': ie.list_regex_url(), + } + dicts.append(data) + e_index += 1 + out = json.dumps(dicts, indent=4) else: return False write_string(out, out=sys.stdout) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 92ddad2b7..85597fe8c 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -14,6 +14,7 @@ import netrc import os import random import re +import string import subprocess import sys import time @@ -610,6 +611,23 @@ class InfoExtractor: # so that lazy_extractors works correctly return cls._match_valid_url(url) is not None + + @classmethod + def list_regex_url(cls): + return cls._VALID_URL if type(cls._VALID_URL) in [list, tuple] \ + else (cls._VALID_URL.translate({ord(c): None for c in string.whitespace}),) if type(cls._VALID_URL) is str \ + else [] + + @classmethod + def return_type(cls): + if '_RETURN_TYPE' not in cls.__dict__: + return '' + return cls._RETURN_TYPE + + @classmethod + def is_enabled(cls): + return cls._ENABLED + @classmethod def _match_id(cls, url): return cls._match_valid_url(url).group('id') diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 930d9d4be..ba75beec4 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -362,6 +362,10 @@ def create_parser(): '--list-extractors', action='store_true', dest='list_extractors', default=False, help='List all supported extractors and exit') + general.add_option( + '--list-extractors-json', + action='store_true', dest='list_extractors_json', default=False, + help='List all supported extractors parameters in JSON format and exit') general.add_option( '--extractor-descriptions', action='store_true', dest='list_extractor_descriptions', default=False,