mirror of
https://github.com/yt-dlp/yt-dlp
synced 2025-01-29 20:35:06 +01:00
working 100%
This commit is contained in:
parent
bebcaf482e
commit
6f42a5e1f5
1 changed files with 68 additions and 20 deletions
|
@ -764,8 +764,9 @@ class PBSKidsIE(InfoExtractor):
|
||||||
|
|
||||||
class PBSShowIE(InfoExtractor):
|
class PBSShowIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:https://)?(?:www\.)?pbs\.org\/show\/(?P<presumptive_id>[^/]+?)(?:\.html)?\/?(?:$|[?#])'
|
_VALID_URL = r'(?:https://)?(?:www\.)?pbs\.org\/show\/(?P<presumptive_id>[^/]+?)(?:\.html)?\/?(?:$|[?#])'
|
||||||
|
_TESTS = [
|
||||||
_TESTS = [{
|
# Full Show
|
||||||
|
{
|
||||||
'url': 'https://www.pbs.org/show/oregon-experience',
|
'url': 'https://www.pbs.org/show/oregon-experience',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'oregon-experience',
|
'id': 'oregon-experience',
|
||||||
|
@ -776,7 +777,34 @@ class PBSShowIE(InfoExtractor):
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}]
|
},
|
||||||
|
# Single Special
|
||||||
|
{
|
||||||
|
'url': 'https://www.pbs.org/show/betrayed-survivng-american-concentration-camp',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'betrayed-survivng-american-concentration-camp',
|
||||||
|
'title': 'Betrayed: Surviving an American Concentration Camp',
|
||||||
|
'description': 'md5:7e78ee497f1359c030d54d68339f31e8',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 1,
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
# Non-Season Episodes (uses season 1)
|
||||||
|
{
|
||||||
|
'url': 'https://www.pbs.org/show/a-brief-history-of-the-future/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'a-brief-history-of-the-future',
|
||||||
|
'title': 'A Brief History of the Future',
|
||||||
|
'description': 'md5:08297c374c61361ac3f3d297b5157913',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 1,
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
_JSON_SEARCH = r'<script[^>]+id="content-strip-data" type="application/json">'
|
_JSON_SEARCH = r'<script[^>]+id="content-strip-data" type="application/json">'
|
||||||
_SHOW_JSON_SEARCH = r'GTMDataLayer\.push\('
|
_SHOW_JSON_SEARCH = r'GTMDataLayer\.push\('
|
||||||
|
@ -786,6 +814,14 @@ class PBSShowIE(InfoExtractor):
|
||||||
# pbs does not show metadata, use a different station that does
|
# pbs does not show metadata, use a different station that does
|
||||||
return f'https://video.ksps.org/show/{playlist_id}'
|
return f'https://video.ksps.org/show/{playlist_id}'
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_episode(popover_html):
|
||||||
|
clean = clean_html(popover_html)
|
||||||
|
maybe_ep = re.search(r"Ep(\d+) ", clean)
|
||||||
|
if maybe_ep is not None:
|
||||||
|
return maybe_ep[1]
|
||||||
|
return None
|
||||||
|
|
||||||
def _iterate_entries(self, playlist_id, season_indices):
|
def _iterate_entries(self, playlist_id, season_indices):
|
||||||
playlist_url = self._make_url(playlist_id)
|
playlist_url = self._make_url(playlist_id)
|
||||||
|
|
||||||
|
@ -793,24 +829,34 @@ class PBSShowIE(InfoExtractor):
|
||||||
season_id = f'{playlist_id}-season-{season_idx}'
|
season_id = f'{playlist_id}-season-{season_idx}'
|
||||||
|
|
||||||
season_page = self._download_webpage(
|
season_page = self._download_webpage(
|
||||||
f'{playlist_url}/episodes/season/{season_idx}',
|
f'{playlist_url}/episodes/season/{season_idx}'
|
||||||
|
if season_idx > 0 else f'{playlist_url}/specials',
|
||||||
video_id=season_id
|
video_id=season_id
|
||||||
)
|
)
|
||||||
episodes_metadata = [
|
episodes = [
|
||||||
extract_attributes(elem)
|
extract_attributes(elem)
|
||||||
for elem in get_elements_html_by_class("video-summary", season_page)
|
for elem in get_elements_html_by_class("video-summary", season_page)
|
||||||
]
|
]
|
||||||
num_eps = len(episodes_metadata)
|
if not episodes:
|
||||||
for i, episode_metadata in enumerate(episodes_metadata):
|
continue
|
||||||
print(f's{season_idx}e{num_eps - i} {episode_metadata["data-title"]}')
|
|
||||||
|
episode_indices = [
|
||||||
|
self._extract_episode(elem)
|
||||||
|
for elem in get_elements_html_by_class("popover__meta-data", season_page)
|
||||||
|
]
|
||||||
|
for i, ep in enumerate(episodes):
|
||||||
|
url_kwargs = {}
|
||||||
|
if len(episode_indices) == len(episodes) and episode_indices[i] is not None:
|
||||||
|
url_kwargs['episode'] = episode_indices[i]
|
||||||
|
|
||||||
yield self.url_result(
|
yield self.url_result(
|
||||||
url=f'https://pbs.org/video/{episode_metadata["data-video-slug"]}',
|
url=f'https://pbs.org/video/{ep["data-video-slug"]}',
|
||||||
ie=PBSIE,
|
ie=PBSIE,
|
||||||
video_id=episode_metadata["data-cid"],
|
video_id=ep["data-cid"],
|
||||||
url_transparent=True,
|
url_transparent=True,
|
||||||
title=episode_metadata["data-title"],
|
title=ep["data-title"],
|
||||||
season=season_idx,
|
season=season_idx,
|
||||||
episode_index=num_eps - i,
|
**url_kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -836,6 +882,8 @@ class PBSShowIE(InfoExtractor):
|
||||||
],
|
],
|
||||||
reverse=True
|
reverse=True
|
||||||
))
|
))
|
||||||
|
if not self._configuration_arg('exclude_specials', [None])[0]:
|
||||||
|
season_indices = [0] + season_indices
|
||||||
|
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
LazyList(self._iterate_entries(playlist_id, season_indices)),
|
LazyList(self._iterate_entries(playlist_id, season_indices)),
|
||||||
|
|
Loading…
Add table
Reference in a new issue