mirror of
https://github.com/yt-dlp/yt-dlp
synced 2025-01-26 19:58:09 +01:00
[cleanup] Use _html_extract_title
This commit is contained in:
parent
85e801a9db
commit
04f3fd2c89
38 changed files with 51 additions and 80 deletions
|
@ -534,13 +534,13 @@ Extracting variables is acceptable for reducing code duplication and improving r
|
|||
Correct:
|
||||
|
||||
```python
|
||||
title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')
|
||||
title = self._html_search_regex(r'<h1>([^<]+)</h1>', webpage, 'title')
|
||||
```
|
||||
|
||||
Incorrect:
|
||||
|
||||
```python
|
||||
TITLE_RE = r'<title>([^<]+)</title>'
|
||||
TITLE_RE = r'<h1>([^<]+)</h1>'
|
||||
# ...some lines of code...
|
||||
title = self._html_search_regex(TITLE_RE, webpage, 'title')
|
||||
```
|
||||
|
|
|
@ -14,7 +14,7 @@ class AdobeConnectIE(InfoExtractor):
|
|||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title')
|
||||
title = self._html_extract_title(webpage)
|
||||
qs = compat_parse_qs(self._search_regex(r"swfUrl\s*=\s*'([^']+)'", webpage, 'swf url').split('?')[1])
|
||||
is_live = qs.get('isLive', ['false'])[0] == 'true'
|
||||
formats = []
|
||||
|
|
|
@ -7,6 +7,7 @@ from ..utils import (
|
|||
int_or_none,
|
||||
qualities,
|
||||
remove_end,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_basename,
|
||||
|
@ -102,10 +103,7 @@ class AllocineIE(InfoExtractor):
|
|||
video_id = display_id
|
||||
media_data = self._download_json(
|
||||
'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id)
|
||||
title = remove_end(
|
||||
self._html_search_regex(
|
||||
r'(?s)<title>(.+?)</title>', webpage, 'title').strip(),
|
||||
' - AlloCiné')
|
||||
title = remove_end(strip_or_none(self._html_extract_title(webpage), ' - AlloCiné'))
|
||||
for key, value in media_data['video'].items():
|
||||
if not key.endswith('Path'):
|
||||
continue
|
||||
|
|
|
@ -483,8 +483,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
|
|||
regex), webpage, name, default='{}'), video_id, fatal=False)
|
||||
|
||||
def _extract_webpage_title(self, webpage):
|
||||
page_title = self._html_search_regex(
|
||||
r'<title>([^<]*)</title>', webpage, 'title', default='')
|
||||
page_title = self._html_extract_title(webpage, default='')
|
||||
# YouTube video pages appear to always have either 'YouTube -' as prefix or '- YouTube' as suffix.
|
||||
return self._html_search_regex(
|
||||
r'(?:YouTube\s*-\s*(.*)$)|(?:(.*)\s*-\s*YouTube$)',
|
||||
|
|
|
@ -181,8 +181,7 @@ class AsianCrushPlaylistIE(AsianCrushBaseIE):
|
|||
'title', default=None) or self._og_search_title(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title',
|
||||
default=None) or self._search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title', fatal=False)
|
||||
default=None) or self._html_extract_title(webpage)
|
||||
if title:
|
||||
title = re.sub(r'\s*\|\s*.+?$', '', title)
|
||||
|
||||
|
|
|
@ -906,9 +906,8 @@ class BBCIE(BBCCoUkIE):
|
|||
|
||||
playlist_title = json_ld_info.get('title')
|
||||
if not playlist_title:
|
||||
playlist_title = self._og_search_title(
|
||||
webpage, default=None) or self._html_search_regex(
|
||||
r'<title>(.+?)</title>', webpage, 'playlist title', default=None)
|
||||
playlist_title = (self._og_search_title(webpage, default=None)
|
||||
or self._html_extract_title(webpage, 'playlist title', default=None))
|
||||
if playlist_title:
|
||||
playlist_title = re.sub(r'(.+)\s*-\s*BBC.*?$', r'\1', playlist_title).strip()
|
||||
|
||||
|
|
|
@ -29,9 +29,8 @@ class BreitBartIE(InfoExtractor):
|
|||
self._sort_formats(formats)
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(
|
||||
webpage, default=None) or self._html_search_regex(
|
||||
r'(?s)<title>(.*?)</title>', webpage, 'video title'),
|
||||
'title': (self._og_search_title(webpage, default=None)
|
||||
or self._html_extract_title(webpage, 'video title')),
|
||||
'description': self._og_search_description(webpage),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
'age_limit': self._rta_search(webpage),
|
||||
|
|
|
@ -54,7 +54,7 @@ class CallinIE(InfoExtractor):
|
|||
id = episode['id']
|
||||
title = (episode.get('title')
|
||||
or self._og_search_title(webpage, fatal=False)
|
||||
or self._html_search_regex('<title>(.*?)</title>', webpage, 'title'))
|
||||
or self._html_extract_title(webpage))
|
||||
url = episode['m3u8']
|
||||
formats = self._extract_m3u8_formats(url, display_id, ext='ts')
|
||||
self._sort_formats(formats)
|
||||
|
|
|
@ -127,9 +127,9 @@ class CBCIE(InfoExtractor):
|
|||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
title = self._og_search_title(webpage, default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title', default=None) or self._html_search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title', fatal=False)
|
||||
title = (self._og_search_title(webpage, default=None)
|
||||
or self._html_search_meta('twitter:title', webpage, 'title', default=None)
|
||||
or self._html_extract_title(webpage))
|
||||
entries = [
|
||||
self._extract_player_init(player_init, display_id)
|
||||
for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)]
|
||||
|
|
|
@ -54,8 +54,7 @@ class CloserToTruthIE(InfoExtractor):
|
|||
r'<script[^>]+src=["\'].*?\b(?:partner_id|p)/(\d+)',
|
||||
webpage, 'kaltura partner_id')
|
||||
|
||||
title = self._search_regex(
|
||||
r'<title>(.+?)\s*\|\s*.+?</title>', webpage, 'video title')
|
||||
title = self._html_extract_title(webpage, 'video title')
|
||||
|
||||
select = self._search_regex(
|
||||
r'(?s)<select[^>]+id="select-version"[^>]*>(.+?)</select>',
|
||||
|
|
|
@ -1329,9 +1329,8 @@ class InfoExtractor(object):
|
|||
def _og_search_description(self, html, **kargs):
|
||||
return self._og_search_property('description', html, fatal=False, **kargs)
|
||||
|
||||
def _og_search_title(self, html, **kargs):
|
||||
kargs.setdefault('fatal', False)
|
||||
return self._og_search_property('title', html, **kargs)
|
||||
def _og_search_title(self, html, *, fatal=False, **kargs):
|
||||
return self._og_search_property('title', html, fatal=fatal, **kargs)
|
||||
|
||||
def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
|
||||
regexes = self._og_regexes('video') + self._og_regexes('video:url')
|
||||
|
@ -1342,9 +1341,8 @@ class InfoExtractor(object):
|
|||
def _og_search_url(self, html, **kargs):
|
||||
return self._og_search_property('url', html, **kargs)
|
||||
|
||||
def _html_extract_title(self, html, name, **kwargs):
|
||||
return self._html_search_regex(
|
||||
r'(?s)<title>(.*?)</title>', html, name, **kwargs)
|
||||
def _html_extract_title(self, html, name='title', *, fatal=False, **kwargs):
|
||||
return self._html_search_regex(r'(?s)<title>([^<]+)</title>', html, name, fatal=fatal, **kwargs)
|
||||
|
||||
def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
|
||||
name = variadic(name)
|
||||
|
|
|
@ -278,7 +278,7 @@ class CSpanCongressIE(InfoExtractor):
|
|||
video_id, transform_source=js_to_json)
|
||||
|
||||
title = (self._og_search_title(webpage, default=None)
|
||||
or self._html_search_regex(r'(?s)<title>(.*?)</title>', webpage, 'video title'))
|
||||
or self._html_extract_title(webpage, 'video title'))
|
||||
description = (self._og_search_description(webpage, default=None)
|
||||
or self._html_search_meta('description', webpage, 'description', default=None))
|
||||
|
||||
|
|
|
@ -75,8 +75,7 @@ class FiveTVIE(InfoExtractor):
|
|||
r'<a[^>]+?href="([^"]+)"[^>]+?class="videoplayer"'],
|
||||
webpage, 'video url')
|
||||
|
||||
title = self._og_search_title(webpage, default=None) or self._search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title')
|
||||
title = self._og_search_title(webpage, default=None) or self._html_extract_title(webpage)
|
||||
duration = int_or_none(self._og_search_property(
|
||||
'video:duration', webpage, 'duration', default=None))
|
||||
|
||||
|
|
|
@ -29,8 +29,7 @@ class FoxgayIE(InfoExtractor):
|
|||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = remove_end(self._html_search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title'), ' - Foxgay.com')
|
||||
title = remove_end(self._html_extract_title(webpage), ' - Foxgay.com')
|
||||
description = get_element_by_id('inf_tit', webpage)
|
||||
|
||||
# The default user-agent with foxgay cookies leads to pages without videos
|
||||
|
|
|
@ -2873,10 +2873,8 @@ class GenericIE(InfoExtractor):
|
|||
# Site Name | Video Title
|
||||
# Video Title - Tagline | Site Name
|
||||
# and so on and so forth; it's just not practical
|
||||
video_title = self._og_search_title(
|
||||
webpage, default=None) or self._html_search_regex(
|
||||
r'(?s)<title>(.*?)</title>', webpage, 'video title',
|
||||
default='video')
|
||||
video_title = (self._og_search_title(webpage, default=None)
|
||||
or self._html_extract_title(webpage, 'video title', default='video'))
|
||||
|
||||
# Try to detect age limit automatically
|
||||
age_limit = self._rta_search(webpage)
|
||||
|
|
|
@ -23,9 +23,7 @@ class GlideIE(InfoExtractor):
|
|||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.+?)</title>', webpage,
|
||||
'title', default=None) or self._og_search_title(webpage)
|
||||
title = self._html_extract_title(webpage, default=None) or self._og_search_title(webpage)
|
||||
video_url = self._proto_relative_url(self._search_regex(
|
||||
r'<source[^>]+src=(["\'])(?P<url>.+?)\1',
|
||||
webpage, 'video URL', default=None,
|
||||
|
|
|
@ -38,8 +38,7 @@ class HellPornoIE(InfoExtractor):
|
|||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
title = remove_end(self._html_search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title'), ' - Hell Porno')
|
||||
title = remove_end(self._html_extract_title(webpage), ' - Hell Porno')
|
||||
|
||||
info = self._parse_html5_media_entries(url, webpage, display_id)[0]
|
||||
self._sort_formats(info['formats'])
|
||||
|
|
|
@ -66,8 +66,7 @@ class HuyaLiveIE(InfoExtractor):
|
|||
room_info = try_get(stream_data, lambda x: x['data'][0]['gameLiveInfo'])
|
||||
if not room_info:
|
||||
raise ExtractorError('Can not extract the room info', expected=True)
|
||||
title = room_info.get('roomName') or room_info.get('introduction') or self._html_search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title')
|
||||
title = room_info.get('roomName') or room_info.get('introduction') or self._html_extract_title(webpage)
|
||||
screen_type = room_info.get('screenType')
|
||||
live_source_type = room_info.get('liveSourceType')
|
||||
stream_info_list = stream_data['data'][0]['gameStreamInfoList']
|
||||
|
|
|
@ -68,7 +68,7 @@ class ImdbIE(InfoExtractor):
|
|||
video_info = traverse_obj(info, ('props', 'pageProps', 'videoPlaybackData', 'video'), default={})
|
||||
title = (traverse_obj(video_info, ('name', 'value'), ('primaryTitle', 'titleText', 'text'))
|
||||
or self._html_search_meta(('og:title', 'twitter:title'), webpage, default=None)
|
||||
or self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title'))
|
||||
or self._html_extract_title(webpage))
|
||||
data = video_info.get('playbackURLs') or try_get(self._download_json(
|
||||
'https://www.imdb.com/ve/data/VIDEO_PLAYBACK_DATA', video_id,
|
||||
query={
|
||||
|
|
|
@ -115,7 +115,7 @@ class InfoQIE(BokeCCBaseIE):
|
|||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title')
|
||||
video_title = self._html_extract_title(webpage)
|
||||
video_description = self._html_search_meta('description', webpage, 'description')
|
||||
|
||||
if '/cn/' in url:
|
||||
|
|
|
@ -76,8 +76,7 @@ class IwaraIE(InfoExtractor):
|
|||
'age_limit': age_limit,
|
||||
}
|
||||
|
||||
title = remove_end(self._html_search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title'), ' | Iwara')
|
||||
title = remove_end(self._html_extract_title(webpage), ' | Iwara')
|
||||
|
||||
thumbnail = self._html_search_regex(
|
||||
r'poster=[\'"]([^\'"]+)', webpage, 'thumbnail', default=None)
|
||||
|
|
|
@ -102,7 +102,7 @@ class LinkedInIE(LinkedInBaseIE):
|
|||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')
|
||||
title = self._html_extract_title(webpage)
|
||||
description = clean_html(get_element_by_class('share-update-card__update-text', webpage))
|
||||
like_count = int_or_none(get_element_by_class('social-counts-reactions__social-counts-numRections', webpage))
|
||||
creator = strip_or_none(clean_html(get_element_by_class('comment__actor-name', webpage)))
|
||||
|
|
|
@ -24,8 +24,7 @@ class MiaoPaiIE(InfoExtractor):
|
|||
webpage = self._download_webpage(
|
||||
url, video_id, headers={'User-Agent': self._USER_AGENT_IPAD})
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title')
|
||||
title = self._html_extract_title(webpage)
|
||||
thumbnail = self._html_search_regex(
|
||||
r'<div[^>]+class=(?P<q1>[\'"]).*\bvideo_img\b.*(?P=q1)[^>]+data-url=(?P<q2>[\'"])(?P<url>[^\'"]+)(?P=q2)',
|
||||
webpage, 'thumbnail', fatal=False, group='url')
|
||||
|
|
|
@ -38,8 +38,7 @@ class MojvideoIE(InfoExtractor):
|
|||
r'<errordesc>([^<]*)</errordesc>', playerapi, 'error description', fatal=False)
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_desc), expected=True)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>([^<]+)</title>', playerapi, 'title')
|
||||
title = self._html_extract_title(playerapi)
|
||||
video_url = self._html_search_regex(
|
||||
r'<file>([^<]+)</file>', playerapi, 'video URL')
|
||||
thumbnail = self._html_search_regex(
|
||||
|
|
|
@ -106,8 +106,7 @@ class NewgroundsIE(InfoExtractor):
|
|||
uploader = None
|
||||
webpage = self._download_webpage(url, media_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.+?)</title>', webpage, 'title')
|
||||
title = self._html_extract_title(webpage)
|
||||
|
||||
media_url_string = self._search_regex(
|
||||
r'"url"\s*:\s*("[^"]+"),', webpage, 'media url', default=None)
|
||||
|
@ -219,8 +218,7 @@ class NewgroundsPlaylistIE(InfoExtractor):
|
|||
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
title = self._search_regex(
|
||||
r'<title>([^>]+)</title>', webpage, 'title', default=None)
|
||||
title = self._html_extract_title(webpage, default=None)
|
||||
|
||||
# cut left menu
|
||||
webpage = self._search_regex(
|
||||
|
|
|
@ -309,7 +309,9 @@ class NhkForSchoolProgramListIE(InfoExtractor):
|
|||
|
||||
webpage = self._download_webpage(f'https://www.nhk.or.jp/school/{program_id}/', program_id)
|
||||
|
||||
title = self._og_search_title(webpage, fatal=False) or self._html_extract_title(webpage, fatal=False) or self._html_search_regex(r'<h3>([^<]+?)とは?\s*</h3>', webpage, 'title', fatal=False)
|
||||
title = (self._og_search_title(webpage)
|
||||
or self._html_extract_title(webpage)
|
||||
or self._html_search_regex(r'<h3>([^<]+?)とは?\s*</h3>', webpage, 'title', fatal=False))
|
||||
title = re.sub(r'\s*\|\s*NHK\s+for\s+School\s*$', '', title) if title else None
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<div\s+class="programDetail\s*">\s*<p>[^<]+</p>',
|
||||
|
|
|
@ -85,8 +85,7 @@ class PlayvidIE(InfoExtractor):
|
|||
|
||||
# Extract title - should be in the flashvars; if not, look elsewhere
|
||||
if video_title is None:
|
||||
video_title = self._html_search_regex(
|
||||
r'<title>(.*?)</title', webpage, 'title')
|
||||
video_title = self._html_extract_title(webpage)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
|
|
|
@ -49,7 +49,7 @@ class Rule34VideoIE(InfoExtractor):
|
|||
'quality': quality,
|
||||
})
|
||||
|
||||
title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')
|
||||
title = self._html_extract_title(webpage)
|
||||
thumbnail = self._html_search_regex(r'preview_url:\s+\'([^\']+)\'', webpage, 'thumbnail', default=None)
|
||||
duration = self._html_search_regex(r'"icon-clock"></i>\s+<span>((?:\d+:?)+)', webpage, 'duration', default=None)
|
||||
|
||||
|
|
|
@ -112,7 +112,7 @@ class SenateISVPIE(InfoExtractor):
|
|||
if smuggled_data.get('force_title'):
|
||||
title = smuggled_data['force_title']
|
||||
else:
|
||||
title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, video_id)
|
||||
title = self._html_extract_title(webpage)
|
||||
poster = qs.get('poster')
|
||||
thumbnail = poster[0] if poster else None
|
||||
|
||||
|
|
|
@ -36,8 +36,7 @@ class SunPornoIE(InfoExtractor):
|
|||
webpage = self._download_webpage(
|
||||
'http://www.sunporno.com/videos/%s' % video_id, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title')
|
||||
title = self._html_extract_title(webpage)
|
||||
description = self._html_search_meta(
|
||||
'description', webpage, 'description')
|
||||
thumbnail = self._html_search_regex(
|
||||
|
|
|
@ -37,9 +37,7 @@ class ThisAVIE(InfoExtractor):
|
|||
|
||||
video_id = mobj.group('id')
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = remove_end(self._html_search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title'),
|
||||
' - 視頻 - ThisAV.com-世界第一中文成人娛樂網站')
|
||||
title = remove_end(self._html_extract_title(webpage), ' - 視頻 - ThisAV.com-世界第一中文成人娛樂網站')
|
||||
video_url = self._html_search_regex(
|
||||
r"addVariable\('file','([^']+)'\);", webpage, 'video url', default=None)
|
||||
if video_url:
|
||||
|
|
|
@ -24,8 +24,7 @@ class TrailerAddictIE(InfoExtractor):
|
|||
name = mobj.group('movie') + '/' + mobj.group('trailer_name')
|
||||
webpage = self._download_webpage(url, name)
|
||||
|
||||
title = self._search_regex(r'<title>(.+?)</title>',
|
||||
webpage, 'video title').replace(' - Trailer Addict', '')
|
||||
title = self._html_extract_title(webpage, 'video title').replace(' - Trailer Addict', '')
|
||||
view_count_str = self._search_regex(
|
||||
r'<span class="views_n">([0-9,.]+)</span>',
|
||||
webpage, 'view count', fatal=False)
|
||||
|
|
|
@ -42,8 +42,7 @@ class Varzesh3IE(InfoExtractor):
|
|||
video_url = self._search_regex(
|
||||
r'<source[^>]+src="([^"]+)"', webpage, 'video url')
|
||||
|
||||
title = remove_start(self._html_search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title'), 'ویدیو ورزش 3 | ')
|
||||
title = remove_start(self._html_extract_title(webpage), 'ویدیو ورزش 3 | ')
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'(?s)<div class="matn">(.+?)</div>',
|
||||
|
|
|
@ -50,8 +50,7 @@ class VShareIE(InfoExtractor):
|
|||
'https://vshare.io/v/%s/width-650/height-430/1' % video_id,
|
||||
video_id, headers={'Referer': url})
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>([^<]+)</title>', webpage, 'title')
|
||||
title = self._html_extract_title(webpage)
|
||||
title = title.split(' - ')[0]
|
||||
|
||||
error = self._html_search_regex(
|
||||
|
|
|
@ -28,7 +28,7 @@ class VuploadIE(InfoExtractor):
|
|||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title')
|
||||
title = self._html_extract_title(webpage)
|
||||
video_json = self._parse_json(self._html_search_regex(r'sources:\s*(.+?]),', webpage, 'video'), video_id, transform_source=js_to_json)
|
||||
formats = []
|
||||
for source in video_json:
|
||||
|
|
|
@ -73,8 +73,7 @@ class WeiboIE(InfoExtractor):
|
|||
webpage = self._download_webpage(
|
||||
url, video_id, note='Revisiting webpage')
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.+?)</title>', webpage, 'title')
|
||||
title = self._html_extract_title(webpage)
|
||||
|
||||
video_formats = compat_parse_qs(self._search_regex(
|
||||
r'video-sources=\\\"(.+?)\"', webpage, 'video_sources'))
|
||||
|
|
|
@ -533,7 +533,7 @@ class YahooJapanNewsIE(InfoExtractor):
|
|||
|
||||
title = self._html_search_meta(
|
||||
['og:title', 'twitter:title'], webpage, 'title', default=None
|
||||
) or self._html_search_regex('<title>([^<]+)</title>', webpage, 'title')
|
||||
) or self._html_extract_title(webpage)
|
||||
|
||||
if display_id == host:
|
||||
# Headline page (w/ multiple BC playlists) ('news.yahoo.co.jp', 'headlines.yahoo.co.jp/videonews/', ...)
|
||||
|
|
|
@ -36,8 +36,7 @@ class YouJizzIE(InfoExtractor):
|
|||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<title>(.+?)</title>', webpage, 'title')
|
||||
title = self._html_extract_title(webpage)
|
||||
|
||||
formats = []
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue