mirror of
https://github.com/yt-dlp/yt-dlp
synced 2024-12-28 22:24:34 +01:00
[npo] Generalize playlist extractors
This commit is contained in:
parent
e590b7ff9e
commit
a3d8b38168
1 changed files with 26 additions and 37 deletions
|
@ -438,9 +438,29 @@ class SchoolTVIE(InfoExtractor):
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class VPROIE(NPOIE):
|
class NPOPlaylistBaseIE(NPOIE):
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
|
entries = [
|
||||||
|
self.url_result('npo:%s' % video_id if not video_id.startswith('http') else video_id)
|
||||||
|
for video_id in re.findall(self._PLAYLIST_ENTRY_RE, webpage)
|
||||||
|
]
|
||||||
|
|
||||||
|
playlist_title = self._html_search_regex(
|
||||||
|
self._PLAYLIST_TITLE_RE, webpage, 'playlist title',
|
||||||
|
default=None) or self._og_search_title(webpage)
|
||||||
|
|
||||||
|
return self.playlist_result(entries, playlist_id, playlist_title)
|
||||||
|
|
||||||
|
|
||||||
|
class VPROIE(NPOPlaylistBaseIE):
|
||||||
IE_NAME = 'vpro'
|
IE_NAME = 'vpro'
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?:tegenlicht\.)?vpro\.nl/(?:[^/]+/){2,}(?P<id>[^/]+)\.html'
|
_VALID_URL = r'https?://(?:www\.)?(?:tegenlicht\.)?vpro\.nl/(?:[^/]+/){2,}(?P<id>[^/]+)\.html'
|
||||||
|
_PLAYLIST_TITLE_RE = r'<title>\s*([^>]+?)\s*-\s*Teledoc\s*-\s*VPRO\s*</title>'
|
||||||
|
_PLAYLIST_ENTRY_RE = r'data-media-id="([^"]+)"'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
|
@ -473,48 +493,17 @@ class VPROIE(NPOIE):
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
playlist_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
class WNLIE(NPOPlaylistBaseIE):
|
||||||
|
|
||||||
entries = [
|
|
||||||
self.url_result('npo:%s' % video_id if not video_id.startswith('http') else video_id)
|
|
||||||
for video_id in re.findall(r'data-media-id="([^"]+)"', webpage)
|
|
||||||
]
|
|
||||||
|
|
||||||
playlist_title = self._search_regex(
|
|
||||||
r'<title>\s*([^>]+?)\s*-\s*Teledoc\s*-\s*VPRO\s*</title>',
|
|
||||||
webpage, 'playlist title', default=None) or self._og_search_title(webpage)
|
|
||||||
|
|
||||||
return self.playlist_result(entries, playlist_id, playlist_title)
|
|
||||||
|
|
||||||
|
|
||||||
class WNLIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?omroepwnl\.nl/video/detail/(?P<id>[^/]+)__\d+'
|
_VALID_URL = r'https?://(?:www\.)?omroepwnl\.nl/video/detail/(?P<id>[^/]+)__\d+'
|
||||||
|
_PLAYLIST_TITLE_RE = r'(?s)<h1[^>]+class="subject"[^>]*>(.+?)</h1>'
|
||||||
|
_PLAYLIST_ENTRY_RE = r'<a[^>]+href="([^"]+)"[^>]+class="js-mid"[^>]*>Deel \d+'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.omroepwnl.nl/video/detail/vandaag-de-dag-6-mei__060515',
|
'url': 'http://www.omroepwnl.nl/video/detail/vandaag-de-dag-6-mei__060515',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'vandaag-de-dag-6-mei',
|
'id': 'vandaag-de-dag-6-mei',
|
||||||
'title': 'Vandaag de Dag 6 mei',
|
'title': 'Vandaag de Dag 6 mei',
|
||||||
},
|
},
|
||||||
'playlist_count': 4,
|
'playlist_count': 4,
|
||||||
}
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
playlist_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
|
||||||
|
|
||||||
entries = [
|
|
||||||
self.url_result('npo:%s' % video_id, 'NPO')
|
|
||||||
for video_id, part in re.findall(
|
|
||||||
r'<a[^>]+href="([^"]+)"[^>]+class="js-mid"[^>]*>(Deel \d+)', webpage)
|
|
||||||
]
|
|
||||||
|
|
||||||
playlist_title = self._html_search_regex(
|
|
||||||
r'(?s)<h1[^>]+class="subject"[^>]*>(.+?)</h1>',
|
|
||||||
webpage, 'playlist title')
|
|
||||||
|
|
||||||
return self.playlist_result(entries, playlist_id, playlist_title)
|
|
||||||
|
|
Loading…
Reference in a new issue