Add option to list in json extractors matching URL or all extractors

This commit is contained in:
wesson 2024-10-16 13:26:12 +02:00
parent 354cb4026c
commit f20148e1d7
3 changed files with 62 additions and 0 deletions

View file

@ -13,6 +13,7 @@ import optparse
import os
import re
import traceback
import json
from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS, CookieLoadError
from .downloader.external import get_external_downloader
@ -119,6 +120,45 @@ def print_extractor_information(opts, urls):
out = 'Supported TV Providers:\n{}\n'.format(render_table(
['mso', 'mso name'],
[[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()]))
elif opts.list_extractors_json:
from .extractor.generic import GenericIE
dicts = []
e_index = 0
urls = dict.fromkeys(urls, False)
if len(urls):
for ie in gen_extractors():
if ie == GenericIE:
matched_urls = [url for url, matched in urls.items() if not matched]
else:
matched_urls = tuple(filter(ie.suitable, urls.keys()))
urls.update(dict.fromkeys(matched_urls, True))
# show only extractor with matched URL
if len(matched_urls):
data = {'index': e_index,
'name': ie.IE_NAME,
'desc': ie.IE_DESC if ie.IE_DESC else '',
'working': ie.working(),
'enabled': ie.is_enabled(),
'return_type': ie.return_type(),
'regex_url': ie.list_regex_url(),
'matched_urls': matched_urls,
}
e_index += 1
dicts.append(data)
else:
# show all extractors
for ie in gen_extractors():
data = {'index': e_index,
'name': ie.IE_NAME,
'desc': ie.IE_DESC if ie.IE_DESC else '',
'working': ie.working(),
'enabled': ie.is_enabled(),
'return_type': ie.return_type(),
'regex_url': ie.list_regex_url(),
}
dicts.append(data)
e_index += 1
out = json.dumps(dicts, indent=4)
else:
return False
write_string(out, out=sys.stdout)

View file

@ -14,6 +14,7 @@ import netrc
import os
import random
import re
import string
import subprocess
import sys
import time
@ -610,6 +611,23 @@ class InfoExtractor:
# so that lazy_extractors works correctly
return cls._match_valid_url(url) is not None
@classmethod
def list_regex_url(cls):
return cls._VALID_URL if type(cls._VALID_URL) in [list, tuple] \
else (cls._VALID_URL.translate({ord(c): None for c in string.whitespace}),) if type(cls._VALID_URL) is str \
else []
@classmethod
def return_type(cls):
if '_RETURN_TYPE' not in cls.__dict__:
return ''
return cls._RETURN_TYPE
@classmethod
def is_enabled(cls):
return cls._ENABLED
@classmethod
def _match_id(cls, url):
return cls._match_valid_url(url).group('id')

View file

@ -362,6 +362,10 @@ def create_parser():
'--list-extractors',
action='store_true', dest='list_extractors', default=False,
help='List all supported extractors and exit')
general.add_option(
'--list-extractors-json',
action='store_true', dest='list_extractors_json', default=False,
help='List all supported extractors parameters in JSON format and exit')
general.add_option(
'--extractor-descriptions',
action='store_true', dest='list_extractor_descriptions', default=False,