merge 'master'

This commit is contained in:
Mozi 2024-05-31 18:25:30 +00:00
commit 365e615d11
11 changed files with 216 additions and 92 deletions

View file

@ -237,7 +237,7 @@ jobs:
macos:
needs: process
if: inputs.macos
runs-on: macos-11
runs-on: macos-12
steps:
- uses: actions/checkout@v4
@ -260,11 +260,23 @@ jobs:
--pre -d curl_cffi_whls \
-r requirements.txt
done
( # Overwrite x86_64-only libs with fat/universal2 libs or else Pyinstaller will do the opposite
# See https://github.com/yt-dlp/yt-dlp/pull/10069
cd curl_cffi_whls
mkdir -p curl_cffi/.dylibs
python_libdir=$(python3 -c 'import sys; from pathlib import Path; print(Path(sys.path[1]).parent)')
for dylib in lib{ssl,crypto}.3.dylib; do
cp "${python_libdir}/${dylib}" "curl_cffi/.dylibs/${dylib}"
for wheel in curl_cffi*macos*x86_64.whl; do
zip "${wheel}" "curl_cffi/.dylibs/${dylib}"
done
done
)
python3 -m delocate.cmd.delocate_fuse curl_cffi_whls/curl_cffi*.whl -w curl_cffi_universal2
python3 -m delocate.cmd.delocate_fuse curl_cffi_whls/cffi*.whl -w curl_cffi_universal2
cd curl_cffi_universal2
for wheel in *cffi*.whl; do mv -n -- "${wheel}" "${wheel/x86_64/universal2}"; done
python3 -m pip install -U --user *cffi*.whl
for wheel in ./*cffi*.whl; do mv -n -- "${wheel}" "${wheel/x86_64/universal2}"; done
python3 -m pip install -U --user ./*cffi*.whl
- name: Prepare
run: |
@ -311,7 +323,7 @@ jobs:
# Hack to get the latest patch version. Uncomment if needed
#brew install python@3.10
#export PYTHON_VERSION=$( $(brew --prefix)/opt/python@3.10/bin/python3 --version | cut -d ' ' -f 2 )
curl https://www.python.org/ftp/python/${PYTHON_VERSION}/python-${PYTHON_VERSION}-macos11.pkg -o "python.pkg"
curl "https://www.python.org/ftp/python/${PYTHON_VERSION}/python-${PYTHON_VERSION}-macos11.pkg" -o "python.pkg"
sudo installer -pkg python.pkg -target /
python3 --version
- name: Install Requirements
@ -361,7 +373,7 @@ jobs:
run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds
python devscripts/install_deps.py -o --include build
python devscripts/install_deps.py --include curl-cffi
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-5.8.0-py3-none-any.whl"
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-6.7.0-py3-none-any.whl"
- name: Prepare
run: |
@ -421,7 +433,7 @@ jobs:
run: |
python devscripts/install_deps.py -o --include build
python devscripts/install_deps.py
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-5.8.0-py3-none-any.whl"
python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-6.7.0-py3-none-any.whl"
- name: Prepare
run: |
@ -475,8 +487,8 @@ jobs:
run: |
cd ./artifact/
# make sure SHA sums are also printed to stdout
sha256sum * | tee ../SHA2-256SUMS
sha512sum * | tee ../SHA2-512SUMS
sha256sum -- * | tee ../SHA2-256SUMS
sha512sum -- * | tee ../SHA2-512SUMS
- name: Make Update spec
run: |

View file

@ -630,3 +630,4 @@ TuxCoder
voidful
vtexier
WyohKnott
trueauracoral

View file

@ -4,6 +4,17 @@
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
-->
### 2024.05.27
#### Extractor changes
- [Fix parsing of base URL in SMIL manifest](https://github.com/yt-dlp/yt-dlp/commit/26603d0b34898818992bee4598e0607c07059511) ([#9225](https://github.com/yt-dlp/yt-dlp/issues/9225)) by [seproDev](https://github.com/seproDev)
- **peertube**: [Support livestreams](https://github.com/yt-dlp/yt-dlp/commit/12b248ce60be1aa1362edd839d915bba70dbee4b) ([#10044](https://github.com/yt-dlp/yt-dlp/issues/10044)) by [bashonly](https://github.com/bashonly), [trueauracoral](https://github.com/trueauracoral)
- **piksel**: [Update domain](https://github.com/yt-dlp/yt-dlp/commit/ae2194e1dd4a99d32eb3cab7c48a0ff03101ef3b) ([#9223](https://github.com/yt-dlp/yt-dlp/issues/9223)) by [seproDev](https://github.com/seproDev)
- **tiktok**: user: [Fix extraction loop](https://github.com/yt-dlp/yt-dlp/commit/c53c2e40fde8f2e15c7c62f8ca1a5d9e90ddc079) ([#10035](https://github.com/yt-dlp/yt-dlp/issues/10035)) by [bashonly](https://github.com/bashonly)
#### Misc. changes
- **cleanup**: Miscellaneous: [5e3e19c](https://github.com/yt-dlp/yt-dlp/commit/5e3e19c93c52830da98d9d1ed84ea7a559efefbd) by [bashonly](https://github.com/bashonly)
### 2024.05.26
#### Core changes

View file

@ -401,6 +401,9 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
--impersonate CLIENT[:OS] Client to impersonate for requests. E.g.
chrome, chrome-110, chrome:windows-10. Pass
--impersonate="" to impersonate any client.
Note that forcing impersonation for all
requests may have a detrimental impact on
download speed and stability
--list-impersonate-targets List available clients to impersonate.
-4, --force-ipv4 Make all connections via IPv4
-6, --force-ipv6 Make all connections via IPv6

View file

@ -62,7 +62,7 @@ build = [
"build",
"hatchling",
"pip",
"setuptools>=66.1.0,<70",
"setuptools",
"wheel",
]
dev = [
@ -78,8 +78,7 @@ test = [
"pytest~=8.1",
]
pyinstaller = [
"pyinstaller>=6.3; sys_platform!='darwin'",
"pyinstaller==5.13.2; sys_platform=='darwin'", # needed for curl_cffi
"pyinstaller>=6.7.0", # for compat with setuptools>=70
]
py2exe = [
"py2exe>=0.12",

View file

@ -12,7 +12,9 @@ from ..utils import (
mimetype2ext,
orderedSet,
parse_age_limit,
parse_iso8601,
remove_end,
str_or_none,
strip_jsonp,
try_call,
unified_strdate,
@ -390,7 +392,7 @@ class ORFFM4StoryIE(InfoExtractor):
class ORFONIE(InfoExtractor):
IE_NAME = 'orf:on'
_VALID_URL = r'https?://on\.orf\.at/video/(?P<id>\d+)'
_VALID_URL = r'https?://on\.orf\.at/video/(?P<id>\d+)(?:/(?P<segment>\d+))?'
_TESTS = [{
'url': 'https://on.orf.at/video/14210000/school-of-champions-48',
'info_dict': {
@ -401,10 +403,14 @@ class ORFONIE(InfoExtractor):
'title': 'School of Champions (4/8)',
'description': 'md5:d09ad279fc2e8502611e7648484b6afd',
'media_type': 'episode',
'timestamp': 1706472362,
'upload_date': '20240128',
'timestamp': 1706558922,
'upload_date': '20240129',
'release_timestamp': 1706472362,
'release_date': '20240128',
'modified_timestamp': 1712756663,
'modified_date': '20240410',
'_old_archive_ids': ['orftvthek 14210000'],
}
},
}, {
'url': 'https://on.orf.at/video/3220355',
'md5': 'f94d98e667cf9a3851317efb4e136662',
@ -418,18 +424,87 @@ class ORFONIE(InfoExtractor):
'media_type': 'episode',
'timestamp': 52916400,
'upload_date': '19710905',
'release_timestamp': 52916400,
'release_date': '19710905',
'modified_timestamp': 1498536049,
'modified_date': '20170627',
'_old_archive_ids': ['orftvthek 3220355'],
}
},
}, {
# Video with multiple segments selecting the second segment
'url': 'https://on.orf.at/video/14226549/15639808/jugendbande-einbrueche-aus-langeweile',
'md5': '90f4ebff86b4580837b8a361d0232a9e',
'info_dict': {
'id': '15639808',
'ext': 'mp4',
'duration': 97.707,
'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0175/43/thumb_17442704_segments_highlight_teaser.jpg',
'title': 'Jugendbande: Einbrüche aus Langeweile',
'description': 'md5:193df0bf0d91cf16830c211078097120',
'media_type': 'segment',
'timestamp': 1715792400,
'upload_date': '20240515',
'modified_timestamp': 1715794394,
'modified_date': '20240515',
'_old_archive_ids': ['orftvthek 15639808'],
},
'params': {'noplaylist': True},
}, {
# Video with multiple segments and no combined version
'url': 'https://on.orf.at/video/14227864/formel-1-grosser-preis-von-monaco-2024',
'info_dict': {
'_type': 'multi_video',
'id': '14227864',
'duration': 18410.52,
'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0176/04/thumb_17503881_segments_highlight_teaser.jpg',
'title': 'Formel 1: Großer Preis von Monaco 2024',
'description': 'md5:aeeb010710ccf70ce28ccb4482243d4f',
'media_type': 'episode',
'timestamp': 1716721200,
'upload_date': '20240526',
'release_timestamp': 1716721802,
'release_date': '20240526',
'modified_timestamp': 1716967501,
'modified_date': '20240529',
},
'playlist_count': 42,
}, {
# Video with multiple segments, but with combined version
'url': 'https://on.orf.at/video/14228172',
'info_dict': {
'id': '14228172',
'ext': 'mp4',
'duration': 3294.878,
'thumbnail': 'https://api-tvthek.orf.at/assets/segments/0176/17/thumb_17516455_segments_highlight_teaser.jpg',
'title': 'Willkommen Österreich mit Stermann & Grissemann',
'description': 'md5:5de034d033a9c27f989343be3bbd4839',
'media_type': 'episode',
'timestamp': 1716926584,
'upload_date': '20240528',
'release_timestamp': 1716919202,
'release_date': '20240528',
'modified_timestamp': 1716968045,
'modified_date': '20240529',
'_old_archive_ids': ['orftvthek 14228172'],
},
}]
def _extract_video(self, video_id):
encrypted_id = base64.b64encode(f'3dSlfek03nsLKdj4Jsd{video_id}'.encode()).decode()
api_json = self._download_json(
f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', video_id)
if traverse_obj(api_json, 'is_drm_protected'):
self.report_drm(video_id)
@staticmethod
def _parse_metadata(api_json):
return traverse_obj(api_json, {
'id': ('id', {int}, {str_or_none}),
'age_limit': ('age_classification', {parse_age_limit}),
'duration': ('exact_duration', {functools.partial(float_or_none, scale=1000)}),
'title': (('title', 'headline'), {str}),
'description': (('description', 'teaser_text'), {str}),
'media_type': ('video_type', {str}),
'thumbnail': ('_embedded', 'image', 'public_urls', 'highlight_teaser', 'url', {url_or_none}),
'timestamp': (('date', 'episode_date'), {parse_iso8601}),
'release_timestamp': ('release_date', {parse_iso8601}),
'modified_timestamp': ('updated_at', {parse_iso8601}),
}, get_all=False)
def _extract_video_info(self, video_id, api_json):
formats, subtitles = [], {}
for manifest_type in traverse_obj(api_json, ('sources', {dict.keys}, ...)):
for manifest_url in traverse_obj(api_json, ('sources', manifest_type, ..., 'src', {url_or_none})):
@ -454,24 +529,30 @@ class ORFONIE(InfoExtractor):
'formats': formats,
'subtitles': subtitles,
'_old_archive_ids': [make_archive_id('ORFTVthek', video_id)],
**traverse_obj(api_json, {
'age_limit': ('age_classification', {parse_age_limit}),
'duration': ('duration_second', {float_or_none}),
'title': (('title', 'headline'), {str}),
'description': (('description', 'teaser_text'), {str}),
'media_type': ('video_type', {str}),
}, get_all=False),
**self._parse_metadata(api_json),
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_id, segment_id = self._match_valid_url(url).group('id', 'segment')
return {
'id': video_id,
'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None),
'description': self._html_search_meta(
['description', 'og:description', 'twitter:description'], webpage, default=None),
**self._search_json_ld(webpage, video_id, fatal=False),
**self._extract_video(video_id),
}
encrypted_id = base64.b64encode(f'3dSlfek03nsLKdj4Jsd{video_id}'.encode()).decode()
api_json = self._download_json(
f'https://api-tvthek.orf.at/api/v4.3/public/episode/encrypted/{encrypted_id}', video_id)
if traverse_obj(api_json, 'is_drm_protected'):
self.report_drm(video_id)
segments = traverse_obj(api_json, ('_embedded', 'segments', lambda _, v: v['id']))
selected_segment = traverse_obj(segments, (lambda _, v: str(v['id']) == segment_id, any))
# selected_segment will be falsy if input URL did not include a valid segment_id
if selected_segment and not self._yes_playlist(video_id, segment_id, playlist_label='episode', video_label='segment'):
return self._extract_video_info(segment_id, selected_segment)
# Even some segmented videos have an unsegmented version available in API response root
if not traverse_obj(api_json, ('sources', ..., ..., 'src', {url_or_none})):
return self.playlist_result(
(self._extract_video_info(str(segment['id']), segment) for segment in segments),
video_id, **self._parse_metadata(api_json), multi_video=True)
return self._extract_video_info(video_id, api_json)

View file

@ -486,7 +486,8 @@ class PatreonCampaignIE(PatreonBaseIE):
campaign_id, vanity = self._match_valid_url(url).group('campaign_id', 'vanity')
if campaign_id is None:
webpage = self._download_webpage(url, vanity, headers={'User-Agent': self.USER_AGENT})
campaign_id = self._search_regex(r'https://www.patreon.com/api/campaigns/(\d+)/?', webpage, 'Campaign ID')
campaign_id = self._search_nextjs_data(
webpage, vanity)['props']['pageProps']['bootstrapEnvelope']['pageBootstrap']['campaign']['data']['id']
params = {
'json-api-use-default-includes': 'false',

View file

@ -1470,11 +1470,15 @@ class PeerTubeIE(InfoExtractor):
title = video['name']
formats = []
formats, is_live = [], False
files = video.get('files') or []
for playlist in (video.get('streamingPlaylists') or []):
if not isinstance(playlist, dict):
continue
if playlist_url := url_or_none(playlist.get('playlistUrl')):
is_live = True
formats.extend(self._extract_m3u8_formats(
playlist_url, video_id, fatal=False, live=True))
playlist_files = playlist.get('files')
if not (playlist_files and isinstance(playlist_files, list)):
continue
@ -1498,6 +1502,7 @@ class PeerTubeIE(InfoExtractor):
f['vcodec'] = 'none'
else:
f['fps'] = int_or_none(file_.get('fps'))
is_live = False
formats.append(f)
description = video.get('description')
@ -1555,6 +1560,7 @@ class PeerTubeIE(InfoExtractor):
'categories': categories,
'formats': formats,
'subtitles': subtitles,
'is_live': is_live,
'webpage_url': webpage_url,
}

View file

@ -7,33 +7,45 @@ from ..utils import (
int_or_none,
js_to_json,
traverse_obj,
url_or_none,
urlencode_postdata,
)
class TubiTvIE(InfoExtractor):
_VALID_URL = r'''(?x)
(?:
tubitv:|
https?://(?:www\.)?tubitv\.com/(?:video|movies|tv-shows)/
)
(?P<id>[0-9]+)'''
_VALID_URL = r'https?://(?:www\.)?tubitv\.com/(?P<type>video|movies|tv-shows)/(?P<id>\d+)'
_LOGIN_URL = 'http://tubitv.com/login'
_NETRC_MACHINE = 'tubitv'
_GEO_COUNTRIES = ['US']
_TESTS = [{
'url': 'https://tubitv.com/movies/383676/tracker',
'md5': '566fa0f76870302d11af0de89511d3f0',
'url': 'https://tubitv.com/movies/100004539/the-39-steps',
'info_dict': {
'id': '383676',
'id': '100004539',
'ext': 'mp4',
'title': 'Tracker',
'description': 'md5:ff320baf43d0ad2655e538c1d5cd9706',
'uploader_id': 'f866e2677ea2f0dff719788e4f7f9195',
'release_year': 2010,
'title': 'The 39 Steps',
'description': 'md5:bb2f2dd337f0dc58c06cb509943f54c8',
'uploader_id': 'abc2558d54505d4f0f32be94f2e7108c',
'release_year': 1935,
'thumbnail': r're:^https?://.+\.(jpe?g|png)$',
'duration': 6122,
'duration': 5187,
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://tubitv.com/tv-shows/554628/s01-e01-rise-of-the-snakes',
'info_dict': {
'id': '554628',
'ext': 'mp4',
'title': 'S01:E01 - Rise of the Snakes',
'description': 'md5:ba136f586de53af0372811e783a3f57d',
'episode': 'Rise of the Snakes',
'episode_number': 1,
'season': 'Season 1',
'season_number': 1,
'uploader_id': '2a9273e728c510d22aa5c57d0646810b',
'release_year': 2011,
'thumbnail': r're:^https?://.+\.(jpe?g|png)$',
'duration': 1376,
},
'params': {'skip_download': 'm3u8'},
}, {
'url': 'http://tubitv.com/video/283829/the_comedian_at_the_friday',
'md5': '43ac06be9326f41912dc64ccf7a80320',
@ -81,45 +93,39 @@ class TubiTvIE(InfoExtractor):
'Login failed (invalid username/password)', expected=True)
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._download_json(f'https://tubitv.com/oz/videos/{video_id}/content', video_id, query={
'video_resources': ['dash', 'hlsv3', 'hlsv6', *self._UNPLAYABLE_FORMATS],
})
title = video_data['title']
video_id, video_type = self._match_valid_url(url).group('id', 'type')
webpage = self._download_webpage(f'https://tubitv.com/{video_type}/{video_id}/', video_id)
video_data = self._search_json(
r'window\.__data\s*=', webpage, 'data', video_id,
transform_source=js_to_json)['video']['byId'][video_id]
formats = []
drm_formats = False
for resource in video_data['video_resources']:
if resource['type'] in ('dash', ):
formats += self._extract_mpd_formats(resource['manifest']['url'], video_id, mpd_id=resource['type'], fatal=False)
elif resource['type'] in ('hlsv3', 'hlsv6'):
formats += self._extract_m3u8_formats(resource['manifest']['url'], video_id, 'mp4', m3u8_id=resource['type'], fatal=False)
elif resource['type'] in self._UNPLAYABLE_FORMATS:
for resource in traverse_obj(video_data, ('video_resources', lambda _, v: url_or_none(v['manifest']['url']))):
resource_type = resource.get('type')
manifest_url = resource['manifest']['url']
if resource_type == 'dash':
formats.extend(self._extract_mpd_formats(manifest_url, video_id, mpd_id=resource_type, fatal=False))
elif resource_type in ('hlsv3', 'hlsv6'):
formats.extend(self._extract_m3u8_formats(manifest_url, video_id, 'mp4', m3u8_id=resource_type, fatal=False))
elif resource_type in self._UNPLAYABLE_FORMATS:
drm_formats = True
else:
self.report_warning(f'Skipping unknown resource type "{resource_type}"')
if not formats and drm_formats:
self.report_drm(video_id)
elif not formats and not video_data.get('policy_match'): # policy_match is False if content was removed
raise ExtractorError('This content is currently unavailable', expected=True)
thumbnails = []
for thumbnail_url in video_data.get('thumbnails', []):
if not thumbnail_url:
continue
thumbnails.append({
'url': self._proto_relative_url(thumbnail_url),
})
subtitles = {}
for sub in video_data.get('subtitles', []):
sub_url = sub.get('url')
if not sub_url:
continue
for sub in traverse_obj(video_data, ('subtitles', lambda _, v: url_or_none(v['url']))):
subtitles.setdefault(sub.get('lang', 'English'), []).append({
'url': self._proto_relative_url(sub_url),
'url': self._proto_relative_url(sub['url']),
})
title = traverse_obj(video_data, ('title', {str}))
season_number, episode_number, episode_title = self._search_regex(
r'^S(\d+):E(\d+) - (.+)', title, 'episode info', fatal=False, group=(1, 2, 3), default=(None, None, None))
@ -128,18 +134,21 @@ class TubiTvIE(InfoExtractor):
'title': title,
'formats': formats,
'subtitles': subtitles,
'thumbnails': thumbnails,
'description': video_data.get('description'),
'duration': int_or_none(video_data.get('duration')),
'uploader_id': video_data.get('publisher_id'),
'release_year': int_or_none(video_data.get('year')),
'season_number': int_or_none(season_number),
'episode_number': int_or_none(episode_number),
'episode_title': episode_title
'episode': episode_title,
**traverse_obj(video_data, {
'description': ('description', {str}),
'duration': ('duration', {int_or_none}),
'uploader_id': ('publisher_id', {str}),
'release_year': ('year', {int_or_none}),
'thumbnails': ('thumbnails', ..., {url_or_none}, {'url': {self._proto_relative_url}}),
}),
}
class TubiTvShowIE(InfoExtractor):
_WORKING = False
_VALID_URL = r'https?://(?:www\.)?tubitv\.com/series/[0-9]+/(?P<show_name>[^/?#]+)'
_TESTS = [{
'url': 'https://tubitv.com/series/3936/the-joy-of-painting-with-bob-ross?start=true',
@ -160,7 +169,7 @@ class TubiTvShowIE(InfoExtractor):
if traverse_obj(show_json, ('byId', episode_id, 'type')) == 's':
continue
yield self.url_result(
'tubitv:%s' % episode_id,
f'https://tubitv.com/tv-shows/{episode_id}/',
ie=TubiTvIE.ie_key(), video_id=episode_id)
def _real_extract(self, url):

View file

@ -520,7 +520,8 @@ def create_parser():
metavar='CLIENT[:OS]', dest='impersonate', default=None,
help=(
'Client to impersonate for requests. E.g. chrome, chrome-110, chrome:windows-10. '
'Pass --impersonate="" to impersonate any client.'),
'Pass --impersonate="" to impersonate any client. Note that forcing impersonation '
'for all requests may have a detrimental impact on download speed and stability'),
)
network.add_option(
'--list-impersonate-targets',

View file

@ -1,8 +1,8 @@
# Autogenerated by devscripts/update-version.py
__version__ = '2024.05.26'
__version__ = '2024.05.27'
RELEASE_GIT_HEAD = 'ae2af1104f80caf2f47544763a33db2c17a3e1de'
RELEASE_GIT_HEAD = '12b248ce60be1aa1362edd839d915bba70dbee4b'
VARIANT = None
@ -12,4 +12,4 @@ CHANNEL = 'stable'
ORIGIN = 'yt-dlp/yt-dlp'
_pkg_version = '2024.05.26'
_pkg_version = '2024.05.27'