mirror of
https://github.com/yt-dlp/yt-dlp
synced 2024-11-16 07:48:01 +01:00
[InfoExtractor/common] Correct and test meta tag matching
This commit is contained in:
parent
211503c39f
commit
bec2248141
2 changed files with 19 additions and 1 deletions
|
@ -40,5 +40,23 @@ class TestInfoExtractor(unittest.TestCase):
|
||||||
self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
|
self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
|
||||||
self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2')
|
self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2')
|
||||||
|
|
||||||
|
def test_html_search_meta(self):
|
||||||
|
ie = self.ie
|
||||||
|
html = '''
|
||||||
|
<meta name="a" content="1" />
|
||||||
|
<meta name='b' content='2'>
|
||||||
|
<meta name="c" content='3'>
|
||||||
|
<meta name=d content='4'>
|
||||||
|
<meta property="e" content='5' >
|
||||||
|
<meta content="6" name="f">
|
||||||
|
'''
|
||||||
|
|
||||||
|
self.assertEqual(ie._html_search_meta('a', html), '1')
|
||||||
|
self.assertEqual(ie._html_search_meta('b', html), '2')
|
||||||
|
self.assertEqual(ie._html_search_meta('c', html), '3')
|
||||||
|
self.assertEqual(ie._html_search_meta('d', html), '4')
|
||||||
|
self.assertEqual(ie._html_search_meta('e', html), '5')
|
||||||
|
self.assertEqual(ie._html_search_meta('f', html), '6')
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -594,7 +594,7 @@ class InfoExtractor(object):
|
||||||
return self._html_search_regex(
|
return self._html_search_regex(
|
||||||
r'''(?isx)<meta
|
r'''(?isx)<meta
|
||||||
(?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1)
|
(?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1)
|
||||||
[^>]+content=(["\'])(?P<content>.*?)\1''' % re.escape(name),
|
[^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(name),
|
||||||
html, display_name, fatal=fatal, group='content', **kwargs)
|
html, display_name, fatal=fatal, group='content', **kwargs)
|
||||||
|
|
||||||
def _dc_search_uploader(self, html):
|
def _dc_search_uploader(self, html):
|
||||||
|
|
Loading…
Reference in a new issue