diff --git a/test/test_utils.py b/test/test_utils.py index b3de14198e..2e26d224af 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1769,6 +1769,10 @@ Line 1 ''' + GET_ELEMENT_BY_ATTRIBUTE_TEST_STRING_UPPERCASE = ''' + + ''' + def test_get_element_by_attribute(self): html = self.GET_ELEMENT_BY_CLASS_TEST_STRING @@ -1780,6 +1784,10 @@ Line 1 self.assertEqual(get_element_by_attribute('itemprop', 'author', html), 'foo') + html = self.GET_ELEMENT_BY_ATTRIBUTE_TEST_STRING_UPPERCASE + + self.assertEqual(get_element_by_attribute('itemprop', 'author', html), 'foo') + def test_get_element_html_by_attribute(self): html = self.GET_ELEMENT_BY_CLASS_TEST_STRING @@ -1851,6 +1859,11 @@ Line 1 GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML = GET_ELEMENT_BY_TAG_TEST_STRING.strip()[78:119] GET_ELEMENT_BY_TAG_RES_INNERSPAN_TEXT = GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML[6:-7] + + GET_ELEMENT_BY_TAG_TEST_STRING_UPPERCASE = ''' + nice + ''' + def test_get_element_text_and_html_by_tag(self): html = self.GET_ELEMENT_BY_TAG_TEST_STRING @@ -1860,8 +1873,15 @@ Line 1 self.assertEqual( get_element_text_and_html_by_tag('span', html), (self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_TEXT, self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML)) + self.assertRaises(compat_HTMLParseError, get_element_text_and_html_by_tag, 'article', html) + html = self.GET_ELEMENT_BY_TAG_TEST_STRING_UPPERCASE + + self.assertEqual( + get_element_text_and_html_by_tag('SPAN', html), + ('nice', html.strip()), html) + def test_iri_to_uri(self): self.assertEqual( iri_to_uri('https://www.google.com/search?q=foo&ie=utf-8&oe=utf-8&client=firefox-b'), diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 699bf1e7f6..9aae60584d 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -442,7 +442,7 @@ def get_element_text_and_html_by_tag(tag, html): content_start += whole_start + 1 with HTMLBreakOnClosingTagParser() as parser: parser.feed(html[whole_start:content_start]) - if not parser.tagstack or parser.tagstack[0] != tag: + if not parser.tagstack or parser.tagstack[0] != tag.lower(): raise compat_HTMLParseError(f'parser did not match opening {tag} tag') offset = content_start while offset < len(html):