mirror of
https://github.com/yt-dlp/yt-dlp
synced 2025-01-28 19:58:38 +01:00
stations in url valid check
This commit is contained in:
parent
53cebe3a41
commit
69f2dea115
1 changed files with 20 additions and 15 deletions
|
@ -1,4 +1,5 @@
|
||||||
import re
|
import re
|
||||||
|
import urllib.parse
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
from ..compat import compat_str
|
||||||
|
@ -192,7 +193,7 @@ class PBSIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)https?://
|
_VALID_URL = r'''(?x)https?://
|
||||||
(?:
|
(?:
|
||||||
# Direct video URL
|
# Direct video URL
|
||||||
(?:%s)/(?:(?:vir|port)alplayer|video)/(?P<id>[0-9]+)(?:[?/]|$) |
|
(?:%s)/(?!show)(?:(?:vir|port)alplayer|video)/(?P<id>[0-9]+)(?:[?/]|$) |
|
||||||
# Article with embedded player (or direct video)
|
# Article with embedded player (or direct video)
|
||||||
(?:www\.)?pbs\.org/(?!show)(?:[^/]+/){1,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) |
|
(?:www\.)?pbs\.org/(?!show)(?:[^/]+/){1,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) |
|
||||||
# Player
|
# Player
|
||||||
|
@ -763,11 +764,14 @@ class PBSKidsIE(InfoExtractor):
|
||||||
|
|
||||||
|
|
||||||
class PBSShowIE(InfoExtractor):
|
class PBSShowIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:https://)?(?:www\.)?pbs\.org\/show\/(?P<presumptive_id>[^/]+?)(?:\.html)?\/?(?:$|[?#])'
|
_VALID_URL = r'''(?x)https?://
|
||||||
|
(?:www\.)?(?:%s)/show\/(?P<presumptive_id>[^/]+?)(?:\.html)?\/?(?:$|[?#])
|
||||||
|
''' % '|'.join(list(zip(*PBSIE._STATIONS))[0])
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
# Full Show
|
# Full Show
|
||||||
{
|
{
|
||||||
'url': 'https://www.pbs.org/show/oregon-experience',
|
'url': 'https://video.ksps.org/show/oregon-experience/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'oregon-experience',
|
'id': 'oregon-experience',
|
||||||
'title': 'Oregon Experience',
|
'title': 'Oregon Experience',
|
||||||
|
@ -780,7 +784,7 @@ class PBSShowIE(InfoExtractor):
|
||||||
},
|
},
|
||||||
# Single Special
|
# Single Special
|
||||||
{
|
{
|
||||||
'url': 'https://www.pbs.org/show/betrayed-survivng-american-concentration-camp',
|
'url': 'https://video.ksps.org/show/betrayed-survivng-american-concentration-camp',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'betrayed-survivng-american-concentration-camp',
|
'id': 'betrayed-survivng-american-concentration-camp',
|
||||||
'title': 'Betrayed: Surviving an American Concentration Camp',
|
'title': 'Betrayed: Surviving an American Concentration Camp',
|
||||||
|
@ -793,7 +797,7 @@ class PBSShowIE(InfoExtractor):
|
||||||
},
|
},
|
||||||
# Non-Season Episodes (uses season 1)
|
# Non-Season Episodes (uses season 1)
|
||||||
{
|
{
|
||||||
'url': 'https://www.pbs.org/show/a-brief-history-of-the-future/',
|
'url': 'https://video.ksps.org/show/a-brief-history-of-the-future/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'a-brief-history-of-the-future',
|
'id': 'a-brief-history-of-the-future',
|
||||||
'title': 'A Brief History of the Future',
|
'title': 'A Brief History of the Future',
|
||||||
|
@ -810,9 +814,8 @@ class PBSShowIE(InfoExtractor):
|
||||||
_SHOW_JSON_SEARCH = r'GTMDataLayer\.push\('
|
_SHOW_JSON_SEARCH = r'GTMDataLayer\.push\('
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _make_url(playlist_id):
|
def _make_url(url, playlist_id):
|
||||||
# pbs does not show metadata, use a different station that does
|
return f'https://{urllib.parse.urlparse(url).netloc}/show/{playlist_id}'
|
||||||
return f'https://video.ksps.org/show/{playlist_id}'
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_episode(popover_html):
|
def _extract_episode(popover_html):
|
||||||
|
@ -822,15 +825,15 @@ class PBSShowIE(InfoExtractor):
|
||||||
return maybe_ep[1]
|
return maybe_ep[1]
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _iterate_entries(self, playlist_id, season_indices):
|
def _iterate_entries(self, url, playlist_id, season_indices):
|
||||||
playlist_url = self._make_url(playlist_id)
|
base_url = urllib.parse.urlparse(url).netloc
|
||||||
|
|
||||||
for season_idx in season_indices:
|
for season_idx in season_indices:
|
||||||
season_id = f'{playlist_id}-season-{season_idx}'
|
season_id = f'{playlist_id}-season-{season_idx}'
|
||||||
|
|
||||||
season_page = self._download_webpage(
|
season_page = self._download_webpage(
|
||||||
f'{playlist_url}/episodes/season/{season_idx}'
|
f'{url}/episodes/season/{season_idx}'
|
||||||
if season_idx > 0 else f'{playlist_url}/specials',
|
if season_idx > 0 else f'{url}/specials',
|
||||||
video_id=season_id
|
video_id=season_id
|
||||||
)
|
)
|
||||||
episodes = [
|
episodes = [
|
||||||
|
@ -850,7 +853,7 @@ class PBSShowIE(InfoExtractor):
|
||||||
url_kwargs['episode'] = episode_indices[i]
|
url_kwargs['episode'] = episode_indices[i]
|
||||||
|
|
||||||
yield self.url_result(
|
yield self.url_result(
|
||||||
url=f'https://pbs.org/video/{ep["data-video-slug"]}',
|
url=f'https://{base_url}/video/{ep["data-video-slug"]}',
|
||||||
ie=PBSIE,
|
ie=PBSIE,
|
||||||
video_id=ep["data-cid"],
|
video_id=ep["data-cid"],
|
||||||
url_transparent=True,
|
url_transparent=True,
|
||||||
|
@ -861,7 +864,9 @@ class PBSShowIE(InfoExtractor):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
playlist_id = self._match_valid_url(url).group('presumptive_id')
|
playlist_id = self._match_valid_url(url).group('presumptive_id')
|
||||||
webpage = self._download_webpage(self._make_url(playlist_id), playlist_id)
|
url = self._make_url(url=url, playlist_id=playlist_id)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
show_data = self._search_json(self._JSON_SEARCH, webpage, 'seasons', playlist_id)
|
show_data = self._search_json(self._JSON_SEARCH, webpage, 'seasons', playlist_id)
|
||||||
|
|
||||||
playlist_description = clean_html(get_element_html_by_class(
|
playlist_description = clean_html(get_element_html_by_class(
|
||||||
|
@ -886,7 +891,7 @@ class PBSShowIE(InfoExtractor):
|
||||||
season_indices = [0] + season_indices
|
season_indices = [0] + season_indices
|
||||||
|
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
LazyList(self._iterate_entries(playlist_id, season_indices)),
|
LazyList(self._iterate_entries(url, playlist_id, season_indices)),
|
||||||
playlist_id=playlist_id,
|
playlist_id=playlist_id,
|
||||||
playlist_title=playlist_title,
|
playlist_title=playlist_title,
|
||||||
playlist_description=playlist_description,
|
playlist_description=playlist_description,
|
||||||
|
|
Loading…
Add table
Reference in a new issue