diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 1897f73e02..ea1893d15a 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -534,13 +534,13 @@ Extracting variables is acceptable for reducing code duplication and improving r
Correct:
```python
-title = self._html_search_regex(r'
([^<]+)', webpage, 'title')
+title = self._html_search_regex(r'([^<]+)
', webpage, 'title')
```
Incorrect:
```python
-TITLE_RE = r'([^<]+)'
+TITLE_RE = r'([^<]+)
'
# ...some lines of code...
title = self._html_search_regex(TITLE_RE, webpage, 'title')
```
diff --git a/yt_dlp/extractor/adobeconnect.py b/yt_dlp/extractor/adobeconnect.py
index e688dddcbb..e2e6f93f31 100644
--- a/yt_dlp/extractor/adobeconnect.py
+++ b/yt_dlp/extractor/adobeconnect.py
@@ -14,7 +14,7 @@ class AdobeConnectIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- title = self._html_search_regex(r'(.+?)', webpage, 'title')
+ title = self._html_extract_title(webpage)
qs = compat_parse_qs(self._search_regex(r"swfUrl\s*=\s*'([^']+)'", webpage, 'swf url').split('?')[1])
is_live = qs.get('isLive', ['false'])[0] == 'true'
formats = []
diff --git a/yt_dlp/extractor/allocine.py b/yt_dlp/extractor/allocine.py
index cd533acfc7..403a277e97 100644
--- a/yt_dlp/extractor/allocine.py
+++ b/yt_dlp/extractor/allocine.py
@@ -7,6 +7,7 @@ from ..utils import (
int_or_none,
qualities,
remove_end,
+ strip_or_none,
try_get,
unified_timestamp,
url_basename,
@@ -102,10 +103,7 @@ class AllocineIE(InfoExtractor):
video_id = display_id
media_data = self._download_json(
'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id)
- title = remove_end(
- self._html_search_regex(
- r'(?s)(.+?)', webpage, 'title').strip(),
- ' - AlloCiné')
+ title = remove_end(strip_or_none(self._html_extract_title(webpage), ' - AlloCiné'))
for key, value in media_data['video'].items():
if not key.endswith('Path'):
continue
diff --git a/yt_dlp/extractor/archiveorg.py b/yt_dlp/extractor/archiveorg.py
index b06ac74aed..2ab3c1bebd 100644
--- a/yt_dlp/extractor/archiveorg.py
+++ b/yt_dlp/extractor/archiveorg.py
@@ -483,8 +483,7 @@ class YoutubeWebArchiveIE(InfoExtractor):
regex), webpage, name, default='{}'), video_id, fatal=False)
def _extract_webpage_title(self, webpage):
- page_title = self._html_search_regex(
- r'([^<]*)', webpage, 'title', default='')
+ page_title = self._html_extract_title(webpage, default='')
# YouTube video pages appear to always have either 'YouTube -' as prefix or '- YouTube' as suffix.
return self._html_search_regex(
r'(?:YouTube\s*-\s*(.*)$)|(?:(.*)\s*-\s*YouTube$)',
diff --git a/yt_dlp/extractor/asiancrush.py b/yt_dlp/extractor/asiancrush.py
index 75a6329589..7f1940fcab 100644
--- a/yt_dlp/extractor/asiancrush.py
+++ b/yt_dlp/extractor/asiancrush.py
@@ -181,8 +181,7 @@ class AsianCrushPlaylistIE(AsianCrushBaseIE):
'title', default=None) or self._og_search_title(
webpage, default=None) or self._html_search_meta(
'twitter:title', webpage, 'title',
- default=None) or self._search_regex(
- r'([^<]+)', webpage, 'title', fatal=False)
+ default=None) or self._html_extract_title(webpage)
if title:
title = re.sub(r'\s*\|\s*.+?$', '', title)
diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py
index 8231557300..29ad7ded77 100644
--- a/yt_dlp/extractor/bbc.py
+++ b/yt_dlp/extractor/bbc.py
@@ -906,9 +906,8 @@ class BBCIE(BBCCoUkIE):
playlist_title = json_ld_info.get('title')
if not playlist_title:
- playlist_title = self._og_search_title(
- webpage, default=None) or self._html_search_regex(
- r'(.+?)', webpage, 'playlist title', default=None)
+ playlist_title = (self._og_search_title(webpage, default=None)
+ or self._html_extract_title(webpage, 'playlist title', default=None))
if playlist_title:
playlist_title = re.sub(r'(.+)\s*-\s*BBC.*?$', r'\1', playlist_title).strip()
diff --git a/yt_dlp/extractor/breitbart.py b/yt_dlp/extractor/breitbart.py
index f50f719dc2..e029aa627f 100644
--- a/yt_dlp/extractor/breitbart.py
+++ b/yt_dlp/extractor/breitbart.py
@@ -29,9 +29,8 @@ class BreitBartIE(InfoExtractor):
self._sort_formats(formats)
return {
'id': video_id,
- 'title': self._og_search_title(
- webpage, default=None) or self._html_search_regex(
- r'(?s)(.*?)', webpage, 'video title'),
+ 'title': (self._og_search_title(webpage, default=None)
+ or self._html_extract_title(webpage, 'video title')),
'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
'age_limit': self._rta_search(webpage),
diff --git a/yt_dlp/extractor/callin.py b/yt_dlp/extractor/callin.py
index acf327ace6..1f3b7cfff9 100644
--- a/yt_dlp/extractor/callin.py
+++ b/yt_dlp/extractor/callin.py
@@ -54,7 +54,7 @@ class CallinIE(InfoExtractor):
id = episode['id']
title = (episode.get('title')
or self._og_search_title(webpage, fatal=False)
- or self._html_search_regex('(.*?)', webpage, 'title'))
+ or self._html_extract_title(webpage))
url = episode['m3u8']
formats = self._extract_m3u8_formats(url, display_id, ext='ts')
self._sort_formats(formats)
diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py
index ac1272f7b5..fba8bf965f 100644
--- a/yt_dlp/extractor/cbc.py
+++ b/yt_dlp/extractor/cbc.py
@@ -127,9 +127,9 @@ class CBCIE(InfoExtractor):
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
- title = self._og_search_title(webpage, default=None) or self._html_search_meta(
- 'twitter:title', webpage, 'title', default=None) or self._html_search_regex(
- r'([^<]+)', webpage, 'title', fatal=False)
+ title = (self._og_search_title(webpage, default=None)
+ or self._html_search_meta('twitter:title', webpage, 'title', default=None)
+ or self._html_extract_title(webpage))
entries = [
self._extract_player_init(player_init, display_id)
for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)]
diff --git a/yt_dlp/extractor/closertotruth.py b/yt_dlp/extractor/closertotruth.py
index 26243d52d5..517e121e02 100644
--- a/yt_dlp/extractor/closertotruth.py
+++ b/yt_dlp/extractor/closertotruth.py
@@ -54,8 +54,7 @@ class CloserToTruthIE(InfoExtractor):
r'