mirror of
https://github.com/yt-dlp/yt-dlp
synced 2025-01-13 20:01:57 +01:00
[ie/mlbtv] Fix extractor (#10515)
Some checks are pending
CodeQL / Analyze (python) (push) Waiting to run
Download Tests / Quick Download Tests (push) Waiting to run
Download Tests / Full Download Tests (ubuntu-latest, 3.10) (push) Waiting to run
Download Tests / Full Download Tests (ubuntu-latest, 3.11) (push) Waiting to run
Download Tests / Full Download Tests (ubuntu-latest, 3.12) (push) Waiting to run
Download Tests / Full Download Tests (ubuntu-latest, pypy-3.10) (push) Waiting to run
Download Tests / Full Download Tests (ubuntu-latest, pypy-3.8) (push) Waiting to run
Download Tests / Full Download Tests (windows-latest, 3.8) (push) Waiting to run
Download Tests / Full Download Tests (windows-latest, pypy-3.9) (push) Waiting to run
Quick Test / Core Test (push) Waiting to run
Quick Test / Code check (push) Waiting to run
Release (master) / release (push) Waiting to run
Some checks are pending
CodeQL / Analyze (python) (push) Waiting to run
Download Tests / Quick Download Tests (push) Waiting to run
Download Tests / Full Download Tests (ubuntu-latest, 3.10) (push) Waiting to run
Download Tests / Full Download Tests (ubuntu-latest, 3.11) (push) Waiting to run
Download Tests / Full Download Tests (ubuntu-latest, 3.12) (push) Waiting to run
Download Tests / Full Download Tests (ubuntu-latest, pypy-3.10) (push) Waiting to run
Download Tests / Full Download Tests (ubuntu-latest, pypy-3.8) (push) Waiting to run
Download Tests / Full Download Tests (windows-latest, 3.8) (push) Waiting to run
Download Tests / Full Download Tests (windows-latest, pypy-3.9) (push) Waiting to run
Quick Test / Core Test (push) Waiting to run
Quick Test / Code check (push) Waiting to run
Release (master) / release (push) Waiting to run
Closes #10510 Authored by: bashonly
This commit is contained in:
parent
1a34a802f4
commit
f0993391e6
1 changed files with 180 additions and 43 deletions
|
@ -1,16 +1,21 @@
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
import urllib.parse
|
import time
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..networking.exceptions import HTTPError
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
join_nonempty,
|
join_nonempty,
|
||||||
|
jwt_decode_hs256,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
try_get,
|
try_get,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
from ..utils.traversal import traverse_obj
|
from ..utils.traversal import traverse_obj
|
||||||
|
|
||||||
|
@ -276,81 +281,213 @@ class MLBVideoIE(MLBBaseIE):
|
||||||
class MLBTVIE(InfoExtractor):
|
class MLBTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?mlb\.com/tv/g(?P<id>\d{6})'
|
_VALID_URL = r'https?://(?:www\.)?mlb\.com/tv/g(?P<id>\d{6})'
|
||||||
_NETRC_MACHINE = 'mlb'
|
_NETRC_MACHINE = 'mlb'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.mlb.com/tv/g661581/vee2eff5f-a7df-4c20-bdb4-7b926fa12638',
|
'url': 'https://www.mlb.com/tv/g661581/vee2eff5f-a7df-4c20-bdb4-7b926fa12638',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '661581',
|
'id': '661581',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '2022-07-02 - St. Louis Cardinals @ Philadelphia Phillies',
|
'title': '2022-07-02 - St. Louis Cardinals @ Philadelphia Phillies',
|
||||||
|
'release_date': '20220702',
|
||||||
|
'release_timestamp': 1656792300,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
_GRAPHQL_INIT_QUERY = '''\
|
||||||
|
mutation initSession($device: InitSessionInput!, $clientType: ClientType!, $experience: ExperienceTypeInput) {
|
||||||
|
initSession(device: $device, clientType: $clientType, experience: $experience) {
|
||||||
|
deviceId
|
||||||
|
sessionId
|
||||||
|
entitlements {
|
||||||
|
code
|
||||||
|
}
|
||||||
|
location {
|
||||||
|
countryCode
|
||||||
|
regionName
|
||||||
|
zipCode
|
||||||
|
latitude
|
||||||
|
longitude
|
||||||
|
}
|
||||||
|
clientExperience
|
||||||
|
features
|
||||||
|
}
|
||||||
|
}'''
|
||||||
|
_GRAPHQL_PLAYBACK_QUERY = '''\
|
||||||
|
mutation initPlaybackSession(
|
||||||
|
$adCapabilities: [AdExperienceType]
|
||||||
|
$mediaId: String!
|
||||||
|
$deviceId: String!
|
||||||
|
$sessionId: String!
|
||||||
|
$quality: PlaybackQuality
|
||||||
|
) {
|
||||||
|
initPlaybackSession(
|
||||||
|
adCapabilities: $adCapabilities
|
||||||
|
mediaId: $mediaId
|
||||||
|
deviceId: $deviceId
|
||||||
|
sessionId: $sessionId
|
||||||
|
quality: $quality
|
||||||
|
) {
|
||||||
|
playbackSessionId
|
||||||
|
playback {
|
||||||
|
url
|
||||||
|
token
|
||||||
|
expiration
|
||||||
|
cdn
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}'''
|
||||||
|
_APP_VERSION = '7.8.2'
|
||||||
|
_device_id = None
|
||||||
|
_session_id = None
|
||||||
_access_token = None
|
_access_token = None
|
||||||
|
_token_expiry = 0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _api_headers(self):
|
||||||
|
if (self._token_expiry - 120) <= time.time():
|
||||||
|
self.write_debug('Access token has expired; re-logging in')
|
||||||
|
self._perform_login(*self._get_login_info())
|
||||||
|
return {'Authorization': f'Bearer {self._access_token}'}
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
if not self._access_token:
|
if not self._access_token:
|
||||||
self.raise_login_required(
|
self.raise_login_required(
|
||||||
'All videos are only available to registered users', method='password')
|
'All videos are only available to registered users', method='password')
|
||||||
|
|
||||||
|
def _set_device_id(self, username):
|
||||||
|
if not self._device_id:
|
||||||
|
self._device_id = self.cache.load(
|
||||||
|
self._NETRC_MACHINE, 'device_ids', default={}).get(username)
|
||||||
|
if self._device_id:
|
||||||
|
return
|
||||||
|
self._device_id = str(uuid.uuid4())
|
||||||
|
self.cache.store(self._NETRC_MACHINE, 'device_ids', {username: self._device_id})
|
||||||
|
|
||||||
def _perform_login(self, username, password):
|
def _perform_login(self, username, password):
|
||||||
data = f'grant_type=password&username={urllib.parse.quote(username)}&password={urllib.parse.quote(password)}&scope=openid offline_access&client_id=0oa3e1nutA1HLzAKG356'
|
try:
|
||||||
access_token = self._download_json(
|
|
||||||
'https://ids.mlb.com/oauth2/aus1m088yK07noBfh356/v1/token', None,
|
|
||||||
headers={
|
|
||||||
'User-Agent': 'okhttp/3.12.1',
|
|
||||||
'Content-Type': 'application/x-www-form-urlencoded',
|
|
||||||
}, data=data.encode())['access_token']
|
|
||||||
|
|
||||||
entitlement = self._download_webpage(
|
|
||||||
f'https://media-entitlement.mlb.com/api/v3/jwt?os=Android&appname=AtBat&did={uuid.uuid4()}', None,
|
|
||||||
headers={
|
|
||||||
'User-Agent': 'okhttp/3.12.1',
|
|
||||||
'Authorization': f'Bearer {access_token}',
|
|
||||||
})
|
|
||||||
|
|
||||||
data = f'grant_type=urn:ietf:params:oauth:grant-type:token-exchange&subject_token={entitlement}&subject_token_type=urn:ietf:params:oauth:token-type:jwt&platform=android-tv'
|
|
||||||
self._access_token = self._download_json(
|
self._access_token = self._download_json(
|
||||||
'https://us.edge.bamgrid.com/token', None,
|
'https://ids.mlb.com/oauth2/aus1m088yK07noBfh356/v1/token', None,
|
||||||
headers={
|
'Logging in', 'Unable to log in', headers={
|
||||||
'Accept': 'application/json',
|
'User-Agent': 'okhttp/3.12.1',
|
||||||
'Authorization': 'Bearer bWxidHYmYW5kcm9pZCYxLjAuMA.6LZMbH2r--rbXcgEabaDdIslpo4RyZrlVfWZhsAgXIk',
|
|
||||||
'Content-Type': 'application/x-www-form-urlencoded',
|
'Content-Type': 'application/x-www-form-urlencoded',
|
||||||
}, data=data.encode())['access_token']
|
}, data=urlencode_postdata({
|
||||||
|
'grant_type': 'password',
|
||||||
|
'username': username,
|
||||||
|
'password': password,
|
||||||
|
'scope': 'openid offline_access',
|
||||||
|
'client_id': '0oa3e1nutA1HLzAKG356',
|
||||||
|
}))['access_token']
|
||||||
|
except ExtractorError as error:
|
||||||
|
if isinstance(error.cause, HTTPError) and error.cause.status == 400:
|
||||||
|
raise ExtractorError('Invalid username or password', expected=True)
|
||||||
|
raise
|
||||||
|
|
||||||
|
self._token_expiry = traverse_obj(self._access_token, ({jwt_decode_hs256}, 'exp', {int})) or 0
|
||||||
|
self._set_device_id(username)
|
||||||
|
|
||||||
|
self._session_id = self._call_api({
|
||||||
|
'operationName': 'initSession',
|
||||||
|
'query': self._GRAPHQL_INIT_QUERY,
|
||||||
|
'variables': {
|
||||||
|
'device': {
|
||||||
|
'appVersion': self._APP_VERSION,
|
||||||
|
'deviceFamily': 'desktop',
|
||||||
|
'knownDeviceId': self._device_id,
|
||||||
|
'languagePreference': 'ENGLISH',
|
||||||
|
'manufacturer': '',
|
||||||
|
'model': '',
|
||||||
|
'os': '',
|
||||||
|
'osVersion': '',
|
||||||
|
},
|
||||||
|
'clientType': 'WEB',
|
||||||
|
},
|
||||||
|
}, None, 'session ID')['data']['initSession']['sessionId']
|
||||||
|
|
||||||
|
def _call_api(self, data, video_id, description='GraphQL JSON', fatal=True):
|
||||||
|
return self._download_json(
|
||||||
|
'https://media-gateway.mlb.com/graphql', video_id,
|
||||||
|
f'Downloading {description}', f'Unable to download {description}', fatal=fatal,
|
||||||
|
headers={
|
||||||
|
**self._api_headers,
|
||||||
|
'Accept': 'application/json',
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'x-client-name': 'WEB',
|
||||||
|
'x-client-version': self._APP_VERSION,
|
||||||
|
}, data=json.dumps(data, separators=(',', ':')).encode())
|
||||||
|
|
||||||
|
def _extract_formats_and_subtitles(self, broadcast, video_id):
|
||||||
|
feed = traverse_obj(broadcast, ('homeAway', {str.title}))
|
||||||
|
medium = traverse_obj(broadcast, ('type', {str}))
|
||||||
|
language = traverse_obj(broadcast, ('language', {str.lower}))
|
||||||
|
format_id = join_nonempty(feed, medium, language)
|
||||||
|
|
||||||
|
response = self._call_api({
|
||||||
|
'operationName': 'initPlaybackSession',
|
||||||
|
'query': self._GRAPHQL_PLAYBACK_QUERY,
|
||||||
|
'variables': {
|
||||||
|
'adCapabilities': ['GOOGLE_STANDALONE_AD_PODS'],
|
||||||
|
'deviceId': self._device_id,
|
||||||
|
'mediaId': broadcast['mediaId'],
|
||||||
|
'quality': 'PLACEHOLDER',
|
||||||
|
'sessionId': self._session_id,
|
||||||
|
},
|
||||||
|
}, video_id, f'{format_id} broadcast JSON', fatal=False)
|
||||||
|
|
||||||
|
playback = traverse_obj(response, ('data', 'initPlaybackSession', 'playback', {dict}))
|
||||||
|
m3u8_url = traverse_obj(playback, ('url', {url_or_none}))
|
||||||
|
token = traverse_obj(playback, ('token', {str}))
|
||||||
|
|
||||||
|
if not (m3u8_url and token):
|
||||||
|
errors = '; '.join(traverse_obj(response, ('errors', ..., 'message', {str})))
|
||||||
|
if 'not entitled' in errors:
|
||||||
|
raise ExtractorError(errors, expected=True)
|
||||||
|
elif errors: # Only warn when 'blacked out' since radio formats are available
|
||||||
|
self.report_warning(f'API returned errors for {format_id}: {errors}')
|
||||||
|
else:
|
||||||
|
self.report_warning(f'No formats available for {format_id} broadcast; skipping')
|
||||||
|
return [], {}
|
||||||
|
|
||||||
|
cdn_headers = {'x-cdn-token': token}
|
||||||
|
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||||
|
m3u8_url.replace(f'/{token}/', '/'), video_id, 'mp4',
|
||||||
|
m3u8_id=format_id, fatal=False, headers=cdn_headers)
|
||||||
|
for fmt in fmts:
|
||||||
|
fmt['http_headers'] = cdn_headers
|
||||||
|
fmt.setdefault('format_note', join_nonempty(feed, medium, delim=' '))
|
||||||
|
fmt.setdefault('language', language)
|
||||||
|
if fmt.get('vcodec') == 'none' and fmt['language'] == 'en':
|
||||||
|
fmt['source_preference'] = 10
|
||||||
|
|
||||||
|
return fmts, subs
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
airings = self._download_json(
|
metadata = traverse_obj(self._download_json(
|
||||||
f'https://search-api-mlbtv.mlb.com/svc/search/v2/graphql/persisted/query/core/Airings?variables=%7B%22partnerProgramIds%22%3A%5B%22{video_id}%22%5D%2C%22applyEsniMediaRightsLabels%22%3Atrue%7D',
|
'https://statsapi.mlb.com/api/v1/schedule', video_id, query={
|
||||||
video_id)['data']['Airings']
|
'gamePk': video_id,
|
||||||
|
'hydrate': 'broadcasts(all),statusFlags',
|
||||||
|
}), ('dates', ..., 'games', lambda _, v: str(v['gamePk']) == video_id and v['broadcasts'], any))
|
||||||
|
|
||||||
|
broadcasts = traverse_obj(metadata, (
|
||||||
|
'broadcasts', lambda _, v: v['mediaId'] and v['mediaState']['mediaStateCode'] != 'MEDIA_OFF'))
|
||||||
|
|
||||||
formats, subtitles = [], {}
|
formats, subtitles = [], {}
|
||||||
for airing in traverse_obj(airings, lambda _, v: v['playbackUrls'][0]['href']):
|
for broadcast in broadcasts:
|
||||||
format_id = join_nonempty('feedType', 'feedLanguage', from_dict=airing)
|
fmts, subs = self._extract_formats_and_subtitles(broadcast, video_id)
|
||||||
m3u8_url = traverse_obj(self._download_json(
|
formats.extend(fmts)
|
||||||
airing['playbackUrls'][0]['href'].format(scenario='browser~csai'), video_id,
|
self._merge_subtitles(subs, target=subtitles)
|
||||||
note=f'Downloading {format_id} stream info JSON',
|
|
||||||
errnote=f'Failed to download {format_id} stream info, skipping',
|
|
||||||
fatal=False, headers={
|
|
||||||
'Authorization': self._access_token,
|
|
||||||
'Accept': 'application/vnd.media-service+json; version=2',
|
|
||||||
}), ('stream', 'complete', {url_or_none}))
|
|
||||||
if not m3u8_url:
|
|
||||||
continue
|
|
||||||
f, s = self._extract_m3u8_formats_and_subtitles(
|
|
||||||
m3u8_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)
|
|
||||||
formats.extend(f)
|
|
||||||
self._merge_subtitles(s, target=subtitles)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': traverse_obj(airings, (..., 'titles', 0, 'episodeName'), get_all=False),
|
'title': join_nonempty(
|
||||||
'is_live': traverse_obj(airings, (..., 'mediaConfig', 'productType'), get_all=False) == 'LIVE',
|
traverse_obj(metadata, ('officialDate', {str})),
|
||||||
|
traverse_obj(metadata, ('teams', ('away', 'home'), 'team', 'name', {str}, all, {' @ '.join})),
|
||||||
|
delim=' - '),
|
||||||
|
'is_live': traverse_obj(broadcasts, (..., 'mediaState', 'mediaStateCode', {str}, any)) == 'MEDIA_ON',
|
||||||
|
'release_timestamp': traverse_obj(metadata, ('gameDate', {parse_iso8601})),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'http_headers': {'Authorization': f'Bearer {self._access_token}'},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue