[ie/tiktok] Fix API extraction (#10216)

Closes #10213
Authored by: bashonly
This commit is contained in:
bashonly 2024-06-21 17:57:29 -05:00 committed by GitHub
parent 7aa322c02c
commit 96472d72f2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -30,6 +30,7 @@ from ..utils import (
try_call, try_call,
try_get, try_get,
url_or_none, url_or_none,
urlencode_postdata,
) )
@ -43,8 +44,8 @@ class TikTokBaseIE(InfoExtractor):
'iid': None, 'iid': None,
# TikTok (KR/PH/TW/TH/VN) = trill, TikTok (rest of world) = musical_ly, Douyin = aweme # TikTok (KR/PH/TW/TH/VN) = trill, TikTok (rest of world) = musical_ly, Douyin = aweme
'app_name': 'musical_ly', 'app_name': 'musical_ly',
'app_version': '34.1.2', 'app_version': '35.1.3',
'manifest_app_version': '2023401020', 'manifest_app_version': '2023501030',
# "app id": aweme = 1128, trill = 1180, musical_ly = 1233, universal = 0 # "app id": aweme = 1128, trill = 1180, musical_ly = 1233, universal = 0
'aid': '0', 'aid': '0',
} }
@ -114,7 +115,7 @@ class TikTokBaseIE(InfoExtractor):
'universal data', display_id, end_pattern=r'</script>', default={}), 'universal data', display_id, end_pattern=r'</script>', default={}),
('__DEFAULT_SCOPE__', {dict})) or {} ('__DEFAULT_SCOPE__', {dict})) or {}
def _call_api_impl(self, ep, query, video_id, fatal=True, def _call_api_impl(self, ep, video_id, query=None, data=None, headers=None, fatal=True,
note='Downloading API JSON', errnote='Unable to download API page'): note='Downloading API JSON', errnote='Unable to download API page'):
self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choices('0123456789abcdef', k=160))) self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choices('0123456789abcdef', k=160)))
webpage_cookies = self._get_cookies(self._WEBPAGE_HOST) webpage_cookies = self._get_cookies(self._WEBPAGE_HOST)
@ -125,7 +126,8 @@ class TikTokBaseIE(InfoExtractor):
fatal=fatal, note=note, errnote=errnote, headers={ fatal=fatal, note=note, errnote=errnote, headers={
'User-Agent': self._APP_USER_AGENT, 'User-Agent': self._APP_USER_AGENT,
'Accept': 'application/json', 'Accept': 'application/json',
}, query=query) **(headers or {}),
}, query=query, data=data)
def _build_api_query(self, query): def _build_api_query(self, query):
return filter_dict({ return filter_dict({
@ -174,7 +176,7 @@ class TikTokBaseIE(InfoExtractor):
'openudid': ''.join(random.choices('0123456789abcdef', k=16)), 'openudid': ''.join(random.choices('0123456789abcdef', k=16)),
}) })
def _call_api(self, ep, query, video_id, fatal=True, def _call_api(self, ep, video_id, query=None, data=None, headers=None, fatal=True,
note='Downloading API JSON', errnote='Unable to download API page'): note='Downloading API JSON', errnote='Unable to download API page'):
if not self._APP_INFO and not self._get_next_app_info(): if not self._APP_INFO and not self._get_next_app_info():
message = 'No working app info is available' message = 'No working app info is available'
@ -187,9 +189,11 @@ class TikTokBaseIE(InfoExtractor):
max_tries = len(self._APP_INFO_POOL) + 1 # _APP_INFO_POOL + _APP_INFO max_tries = len(self._APP_INFO_POOL) + 1 # _APP_INFO_POOL + _APP_INFO
for count in itertools.count(1): for count in itertools.count(1):
self.write_debug(str(self._APP_INFO)) self.write_debug(str(self._APP_INFO))
real_query = self._build_api_query(query) real_query = self._build_api_query(query or {})
try: try:
return self._call_api_impl(ep, real_query, video_id, fatal, note, errnote) return self._call_api_impl(
ep, video_id, query=real_query, data=data, headers=headers,
fatal=fatal, note=note, errnote=errnote)
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0: if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0:
message = str(e.cause or e.msg) message = str(e.cause or e.msg)
@ -204,12 +208,13 @@ class TikTokBaseIE(InfoExtractor):
raise raise
def _extract_aweme_app(self, aweme_id): def _extract_aweme_app(self, aweme_id):
feed_list = self._call_api( aweme_detail = traverse_obj(
'feed', {'aweme_id': aweme_id}, aweme_id, note='Downloading video feed', self._call_api('multi/aweme/detail', aweme_id, data=urlencode_postdata({
errnote='Unable to download video feed').get('aweme_list') or [] 'aweme_ids': f'[{aweme_id}]',
aweme_detail = next((aweme for aweme in feed_list if str(aweme.get('aweme_id')) == aweme_id), None) 'request_source': '0',
}), headers={'X-Argus': ''}), ('aweme_details', 0, {dict}))
if not aweme_detail: if not aweme_detail:
raise ExtractorError('Unable to find video in feed', video_id=aweme_id) raise ExtractorError('Unable to extract aweme detail info', video_id=aweme_id)
return self._parse_aweme_video_app(aweme_detail) return self._parse_aweme_video_app(aweme_detail)
def _extract_web_data_and_status(self, url, video_id, fatal=True): def _extract_web_data_and_status(self, url, video_id, fatal=True):
@ -1037,7 +1042,8 @@ class TikTokBaseListIE(TikTokBaseIE): # XXX: Conventionally, base classes shoul
for retry in self.RetryManager(): for retry in self.RetryManager():
try: try:
post_list = self._call_api( post_list = self._call_api(
self._API_ENDPOINT, query, display_id, note=f'Downloading video list page {page}', self._API_ENDPOINT, display_id, query=query,
note=f'Downloading video list page {page}',
errnote='Unable to download video list') errnote='Unable to download video list')
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0: if isinstance(e.cause, json.JSONDecodeError) and e.cause.pos == 0: