diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 68f08dfeaf..47c8298579 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -929,39 +929,37 @@ class InfoExtractor: return content - def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True): + def __print_error(self, errnote, fatal, video_id, err): + if fatal: + raise ExtractorError(f'{video_id}: {errnote} ', cause=err) + elif errnote: + self.report_warning(f'{video_id}: {errnote} {err}') + + def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True, errnote=None): if transform_source: xml_string = transform_source(xml_string) try: return compat_etree_fromstring(xml_string.encode('utf-8')) except xml.etree.ElementTree.ParseError as ve: - errmsg = '%s: Failed to parse XML ' % video_id - if fatal: - raise ExtractorError(errmsg, cause=ve) - else: - self.report_warning(errmsg + str(ve)) + self.__print_error('Failed to parse XML' if errnote is None else errnote, fatal, video_id, ve) - def _parse_json(self, json_string, video_id, transform_source=None, fatal=True, **parser_kwargs): + def _parse_json(self, json_string, video_id, transform_source=None, fatal=True, errnote=None, **parser_kwargs): try: return json.loads( json_string, cls=LenientJSONDecoder, strict=False, transform_source=transform_source, **parser_kwargs) except ValueError as ve: - errmsg = f'{video_id}: Failed to parse JSON' - if fatal: - raise ExtractorError(errmsg, cause=ve) - else: - self.report_warning(f'{errmsg}: {ve}') + self.__print_error('Failed to parse JSON' if errnote is None else errnote, fatal, video_id, ve) - def _parse_socket_response_as_json(self, data, video_id, transform_source=None, fatal=True): - return self._parse_json( - data[data.find('{'):data.rfind('}') + 1], - video_id, transform_source, fatal) + def _parse_socket_response_as_json(self, data, *args, **kwargs): + return self._parse_json(data[data.find('{'):data.rfind('}') + 1], *args, **kwargs) def __create_download_methods(name, parser, note, errnote, return_value): - def parse(ie, content, *args, **kwargs): + def parse(ie, content, *args, errnote=errnote, **kwargs): if parser is None: return content + if errnote is False: + kwargs['errnote'] = errnote # parser is fetched by name so subclasses can override it return getattr(ie, parser)(content, *args, **kwargs) @@ -973,7 +971,7 @@ class InfoExtractor: if res is False: return res content, urlh = res - return parse(self, content, video_id, transform_source=transform_source, fatal=fatal), urlh + return parse(self, content, video_id, transform_source=transform_source, fatal=fatal, errnote=errnote), urlh def download_content(self, url_or_request, video_id, note=note, errnote=errnote, transform_source=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None): @@ -988,7 +986,7 @@ class InfoExtractor: self.report_warning(f'Unable to load request from disk: {e}') else: content = self.__decode_webpage(webpage_bytes, encoding, url_or_request.headers) - return parse(self, content, video_id, transform_source, fatal) + return parse(self, content, video_id, transform_source=transform_source, fatal=fatal, errnote=errnote) kwargs = { 'note': note, 'errnote': errnote,