mirror of
https://github.com/yt-dlp/yt-dlp
synced 2025-01-15 03:41:33 +01:00
Closes #1605, Closes #5233, Closes #1249 Authored by: Grub4K, nixxo, bashonly, pukkandan Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> Co-authored-by: nixxo <nixxo@protonmail.com>
This commit is contained in:
parent
3639df54c3
commit
7a26ce2641
3 changed files with 426 additions and 52 deletions
|
@ -1765,6 +1765,8 @@ The following extractors use this feature:
|
||||||
#### rokfinchannel
|
#### rokfinchannel
|
||||||
* `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks`
|
* `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks`
|
||||||
|
|
||||||
|
#### twitter
|
||||||
|
* `force_graphql`: Force usage of the GraphQL API. By default it will only be used if login cookies are provided
|
||||||
|
|
||||||
NOTE: These options may be changed/removed in the future without concern for backward compatibility
|
NOTE: These options may be changed/removed in the future without concern for backward compatibility
|
||||||
|
|
||||||
|
|
|
@ -1968,6 +1968,7 @@ from .twitter import (
|
||||||
TwitterIE,
|
TwitterIE,
|
||||||
TwitterAmplifyIE,
|
TwitterAmplifyIE,
|
||||||
TwitterBroadcastIE,
|
TwitterBroadcastIE,
|
||||||
|
TwitterSpacesIE,
|
||||||
TwitterShortenerIE,
|
TwitterShortenerIE,
|
||||||
)
|
)
|
||||||
from .udemy import (
|
from .udemy import (
|
||||||
|
|
|
@ -1,9 +1,11 @@
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
|
import urllib.error
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .periscope import PeriscopeBaseIE, PeriscopeIE
|
from .periscope import PeriscopeBaseIE, PeriscopeIE
|
||||||
|
from ..compat import functools # isort: split
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_HTTPError,
|
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_urllib_parse_unquote,
|
compat_urllib_parse_unquote,
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
|
@ -18,6 +20,7 @@ from ..utils import (
|
||||||
str_or_none,
|
str_or_none,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
|
try_call,
|
||||||
try_get,
|
try_get,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
|
@ -28,8 +31,12 @@ from ..utils import (
|
||||||
|
|
||||||
class TwitterBaseIE(InfoExtractor):
|
class TwitterBaseIE(InfoExtractor):
|
||||||
_API_BASE = 'https://api.twitter.com/1.1/'
|
_API_BASE = 'https://api.twitter.com/1.1/'
|
||||||
|
_GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
|
||||||
|
_TOKENS = {
|
||||||
|
'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA': None,
|
||||||
|
'AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw': None,
|
||||||
|
}
|
||||||
_BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
|
_BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
|
||||||
_GUEST_TOKEN = None
|
|
||||||
|
|
||||||
def _extract_variant_formats(self, variant, video_id):
|
def _extract_variant_formats(self, variant, video_id):
|
||||||
variant_url = variant.get('url')
|
variant_url = variant.get('url')
|
||||||
|
@ -81,28 +88,73 @@ class TwitterBaseIE(InfoExtractor):
|
||||||
'height': int(m.group('height')),
|
'height': int(m.group('height')),
|
||||||
})
|
})
|
||||||
|
|
||||||
def _call_api(self, path, video_id, query={}):
|
@functools.cached_property
|
||||||
headers = {
|
def is_logged_in(self):
|
||||||
'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA',
|
return bool(self._get_cookies(self._API_BASE).get('auth_token'))
|
||||||
}
|
|
||||||
token = self._get_cookies(self._API_BASE).get('ct0')
|
def _call_api(self, path, video_id, query={}, graphql=False):
|
||||||
if token:
|
cookies = self._get_cookies(self._API_BASE)
|
||||||
headers['x-csrf-token'] = token.value
|
headers = {}
|
||||||
if not self._GUEST_TOKEN:
|
|
||||||
self._GUEST_TOKEN = self._download_json(
|
csrf_cookie = cookies.get('ct0')
|
||||||
|
if csrf_cookie:
|
||||||
|
headers['x-csrf-token'] = csrf_cookie.value
|
||||||
|
|
||||||
|
if self.is_logged_in:
|
||||||
|
headers.update({
|
||||||
|
'x-twitter-auth-type': 'OAuth2Session',
|
||||||
|
'x-twitter-client-language': 'en',
|
||||||
|
'x-twitter-active-user': 'yes',
|
||||||
|
})
|
||||||
|
|
||||||
|
result, last_error = None, None
|
||||||
|
for bearer_token in self._TOKENS:
|
||||||
|
headers['Authorization'] = f'Bearer {bearer_token}'
|
||||||
|
|
||||||
|
if not self.is_logged_in:
|
||||||
|
if not self._TOKENS[bearer_token]:
|
||||||
|
headers.pop('x-guest-token', None)
|
||||||
|
guest_token_response = self._download_json(
|
||||||
self._API_BASE + 'guest/activate.json', video_id,
|
self._API_BASE + 'guest/activate.json', video_id,
|
||||||
'Downloading guest token', data=b'',
|
'Downloading guest token', data=b'', headers=headers)
|
||||||
headers=headers)['guest_token']
|
|
||||||
headers['x-guest-token'] = self._GUEST_TOKEN
|
self._TOKENS[bearer_token] = guest_token_response.get('guest_token')
|
||||||
|
if not self._TOKENS[bearer_token]:
|
||||||
|
raise ExtractorError('Could not retrieve guest token')
|
||||||
|
headers['x-guest-token'] = self._TOKENS[bearer_token]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return self._download_json(
|
allowed_status = {400, 403, 404} if graphql else {403}
|
||||||
self._API_BASE + path, video_id, headers=headers, query=query)
|
result = self._download_json(
|
||||||
|
(self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
|
||||||
|
video_id, headers=headers, query=query, expected_status=allowed_status)
|
||||||
|
break
|
||||||
|
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
if last_error:
|
||||||
raise ExtractorError(self._parse_json(
|
raise last_error
|
||||||
e.cause.read().decode(),
|
elif not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code != 404:
|
||||||
video_id)['errors'][0]['message'], expected=True)
|
|
||||||
raise
|
raise
|
||||||
|
last_error = e
|
||||||
|
self.report_warning(
|
||||||
|
'Twitter API gave 404 response, retrying with deprecated token. '
|
||||||
|
'Only one media item can be extracted')
|
||||||
|
|
||||||
|
if result.get('errors'):
|
||||||
|
error_message = ', '.join(set(traverse_obj(
|
||||||
|
result, ('errors', ..., 'message'), expected_type=str))) or 'Unknown error'
|
||||||
|
raise ExtractorError(f'Error(s) while querying api: {error_message}', expected=True)
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _build_graphql_query(self, media_id):
|
||||||
|
raise NotImplementedError('Method must be implemented to support GraphQL')
|
||||||
|
|
||||||
|
def _call_graphql_api(self, endpoint, media_id):
|
||||||
|
data = self._build_graphql_query(media_id)
|
||||||
|
query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
|
||||||
|
return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
|
||||||
|
|
||||||
|
|
||||||
class TwitterCardIE(InfoExtractor):
|
class TwitterCardIE(InfoExtractor):
|
||||||
|
@ -113,7 +165,7 @@ class TwitterCardIE(InfoExtractor):
|
||||||
'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
|
'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
|
||||||
# MD5 checksums are different in different places
|
# MD5 checksums are different in different places
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '560070183650213889',
|
'id': '560070131976392705',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
|
'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
|
||||||
'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
|
'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
|
||||||
|
@ -123,6 +175,13 @@ class TwitterCardIE(InfoExtractor):
|
||||||
'duration': 30.033,
|
'duration': 30.033,
|
||||||
'timestamp': 1422366112,
|
'timestamp': 1422366112,
|
||||||
'upload_date': '20150127',
|
'upload_date': '20150127',
|
||||||
|
'age_limit': 0,
|
||||||
|
'comment_count': int,
|
||||||
|
'tags': [],
|
||||||
|
'repost_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'display_id': '560070183650213889',
|
||||||
|
'uploader_url': 'https://twitter.com/Twitter',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -137,7 +196,14 @@ class TwitterCardIE(InfoExtractor):
|
||||||
'uploader_id': 'NASA',
|
'uploader_id': 'NASA',
|
||||||
'timestamp': 1437408129,
|
'timestamp': 1437408129,
|
||||||
'upload_date': '20150720',
|
'upload_date': '20150720',
|
||||||
|
'uploader_url': 'https://twitter.com/NASA',
|
||||||
|
'age_limit': 0,
|
||||||
|
'comment_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'repost_count': int,
|
||||||
|
'tags': ['PlutoFlyby'],
|
||||||
},
|
},
|
||||||
|
'params': {'format': '[protocol=https]'}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
|
'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
|
||||||
|
@ -150,12 +216,27 @@ class TwitterCardIE(InfoExtractor):
|
||||||
'upload_date': '20111013',
|
'upload_date': '20111013',
|
||||||
'uploader': 'OMG! UBUNTU!',
|
'uploader': 'OMG! UBUNTU!',
|
||||||
'uploader_id': 'omgubuntu',
|
'uploader_id': 'omgubuntu',
|
||||||
|
'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
|
||||||
|
'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
|
||||||
|
'channel_follower_count': int,
|
||||||
|
'chapters': 'count:8',
|
||||||
|
'uploader_url': 'http://www.youtube.com/user/omgubuntu',
|
||||||
|
'duration': 138,
|
||||||
|
'categories': ['Film & Animation'],
|
||||||
|
'age_limit': 0,
|
||||||
|
'comment_count': int,
|
||||||
|
'availability': 'public',
|
||||||
|
'like_count': int,
|
||||||
|
'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
|
||||||
|
'view_count': int,
|
||||||
|
'tags': 'count:12',
|
||||||
|
'channel': 'OMG! UBUNTU!',
|
||||||
|
'playable_in_embed': True,
|
||||||
},
|
},
|
||||||
'add_ie': ['Youtube'],
|
'add_ie': ['Youtube'],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
|
'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
|
||||||
'md5': '6dabeaca9e68cbb71c99c322a4b42a11',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'iBb2x00UVlv',
|
'id': 'iBb2x00UVlv',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
@ -164,9 +245,17 @@ class TwitterCardIE(InfoExtractor):
|
||||||
'uploader': 'ArsenalTerje',
|
'uploader': 'ArsenalTerje',
|
||||||
'title': 'Vine by ArsenalTerje',
|
'title': 'Vine by ArsenalTerje',
|
||||||
'timestamp': 1447451307,
|
'timestamp': 1447451307,
|
||||||
|
'alt_title': 'Vine by ArsenalTerje',
|
||||||
|
'comment_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'thumbnail': r're:^https?://[^?#]+\.jpg',
|
||||||
|
'view_count': int,
|
||||||
|
'repost_count': int,
|
||||||
},
|
},
|
||||||
'add_ie': ['Vine'],
|
'add_ie': ['Vine'],
|
||||||
}, {
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
},
|
||||||
|
{
|
||||||
'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
|
'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
|
||||||
'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
|
'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -180,7 +269,8 @@ class TwitterCardIE(InfoExtractor):
|
||||||
'upload_date': '20160303',
|
'upload_date': '20160303',
|
||||||
},
|
},
|
||||||
'skip': 'This content is no longer available.',
|
'skip': 'This content is no longer available.',
|
||||||
}, {
|
},
|
||||||
|
{
|
||||||
'url': 'https://twitter.com/i/videos/752274308186120192',
|
'url': 'https://twitter.com/i/videos/752274308186120192',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
|
@ -211,7 +301,6 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'duration': 12.922,
|
'duration': 12.922,
|
||||||
'timestamp': 1442188653,
|
'timestamp': 1442188653,
|
||||||
'upload_date': '20150913',
|
'upload_date': '20150913',
|
||||||
'age_limit': 18,
|
|
||||||
'uploader_url': 'https://twitter.com/freethenipple',
|
'uploader_url': 'https://twitter.com/freethenipple',
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
|
@ -239,10 +328,10 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'id': '665052190608723968',
|
'id': '665052190608723968',
|
||||||
'display_id': '665052190608723968',
|
'display_id': '665052190608723968',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Star Wars - A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens.',
|
'title': 'md5:3f57ab5d35116537a2ae7345cd0060d8',
|
||||||
'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
|
'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
|
||||||
'uploader_id': 'starwars',
|
'uploader_id': 'starwars',
|
||||||
'uploader': 'Star Wars',
|
'uploader': r're:Star Wars.*',
|
||||||
'timestamp': 1447395772,
|
'timestamp': 1447395772,
|
||||||
'upload_date': '20151113',
|
'upload_date': '20151113',
|
||||||
'uploader_url': 'https://twitter.com/starwars',
|
'uploader_url': 'https://twitter.com/starwars',
|
||||||
|
@ -487,7 +576,7 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'uploader_url': 'https://twitter.com/oshtru',
|
'uploader_url': 'https://twitter.com/oshtru',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'duration': 30.03,
|
'duration': 30.03,
|
||||||
'timestamp': 1665025050.0,
|
'timestamp': 1665025050,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
|
@ -505,7 +594,7 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'uploader_id': 'UltimaShadowX',
|
'uploader_id': 'UltimaShadowX',
|
||||||
'uploader_url': 'https://twitter.com/UltimaShadowX',
|
'uploader_url': 'https://twitter.com/UltimaShadowX',
|
||||||
'upload_date': '20221005',
|
'upload_date': '20221005',
|
||||||
'timestamp': 1664992565.0,
|
'timestamp': 1664992565,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'repost_count': int,
|
'repost_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
|
@ -514,6 +603,121 @@ class TwitterIE(TwitterBaseIE):
|
||||||
},
|
},
|
||||||
'playlist_count': 4,
|
'playlist_count': 4,
|
||||||
'params': {'skip_download': True},
|
'params': {'skip_download': True},
|
||||||
|
}, {
|
||||||
|
'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1575559336759263233',
|
||||||
|
'display_id': '1575560063510810624',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
|
'description': 'md5:95aea692fda36a12081b9629b02daa92',
|
||||||
|
'uploader': 'Max Olson',
|
||||||
|
'uploader_id': 'MesoMax919',
|
||||||
|
'uploader_url': 'https://twitter.com/MesoMax919',
|
||||||
|
'duration': 21.321,
|
||||||
|
'timestamp': 1664477766,
|
||||||
|
'upload_date': '20220929',
|
||||||
|
'comment_count': int,
|
||||||
|
'repost_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'tags': ['HurricaneIan'],
|
||||||
|
'age_limit': 0,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# Adult content, uses old token
|
||||||
|
# Fails if not logged in (GraphQL)
|
||||||
|
'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1575199163847000068',
|
||||||
|
'display_id': '1575199173472927762',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': str,
|
||||||
|
'description': str,
|
||||||
|
'uploader': str,
|
||||||
|
'uploader_id': 'Rizdraws',
|
||||||
|
'uploader_url': 'https://twitter.com/Rizdraws',
|
||||||
|
'upload_date': '20220928',
|
||||||
|
'timestamp': 1664391723,
|
||||||
|
'thumbnail': 're:^https?://.*\\.jpg',
|
||||||
|
'like_count': int,
|
||||||
|
'repost_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'age_limit': 18,
|
||||||
|
'tags': []
|
||||||
|
},
|
||||||
|
'expected_warnings': ['404'],
|
||||||
|
}, {
|
||||||
|
# Description is missing one https://t.co url (GraphQL)
|
||||||
|
'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
|
||||||
|
'playlist_mincount': 2,
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1395079556562706435',
|
||||||
|
'title': str,
|
||||||
|
'tags': [],
|
||||||
|
'uploader': str,
|
||||||
|
'like_count': int,
|
||||||
|
'upload_date': '20210519',
|
||||||
|
'age_limit': 0,
|
||||||
|
'repost_count': int,
|
||||||
|
'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw https://t.co/kbXZrozlY7',
|
||||||
|
'uploader_id': 'Srirachachau',
|
||||||
|
'comment_count': int,
|
||||||
|
'uploader_url': 'https://twitter.com/Srirachachau',
|
||||||
|
'timestamp': 1621447860,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# Description is missing one https://t.co url (GraphQL)
|
||||||
|
'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
|
||||||
|
'playlist_mincount': 2,
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1578353380363501568',
|
||||||
|
'title': str,
|
||||||
|
'uploader_id': 'DavidToons_',
|
||||||
|
'repost_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'uploader': str,
|
||||||
|
'timestamp': 1665143744,
|
||||||
|
'uploader_url': 'https://twitter.com/DavidToons_',
|
||||||
|
'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/glfQdgfFXH https://t.co/WgJauwIW1w',
|
||||||
|
'tags': [],
|
||||||
|
'comment_count': int,
|
||||||
|
'upload_date': '20221007',
|
||||||
|
'age_limit': 0,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
|
||||||
|
'playlist_count': 2,
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1578401165338976258',
|
||||||
|
'title': str,
|
||||||
|
'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
|
||||||
|
'uploader': str,
|
||||||
|
'uploader_id': 'primevideouk',
|
||||||
|
'timestamp': 1665155137,
|
||||||
|
'upload_date': '20221007',
|
||||||
|
'age_limit': 0,
|
||||||
|
'uploader_url': 'https://twitter.com/primevideouk',
|
||||||
|
'comment_count': int,
|
||||||
|
'repost_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'tags': ['TheRingsOfPower'],
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# Twitter Spaces
|
||||||
|
'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1lPJqmBeeNAJb',
|
||||||
|
'ext': 'm4a',
|
||||||
|
'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
|
||||||
|
'uploader': r're:Monique Camarra.+?',
|
||||||
|
'uploader_id': 'MoniqueCamarra',
|
||||||
|
'live_status': 'was_live',
|
||||||
|
'description': 'md5:acce559345fd49f129c20dbcda3f1201',
|
||||||
|
'timestamp': 1658407771464,
|
||||||
|
},
|
||||||
|
'add_ie': ['TwitterSpaces'],
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
}, {
|
}, {
|
||||||
# onion route
|
# onion route
|
||||||
'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
|
'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
|
||||||
|
@ -552,10 +756,77 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _graphql_to_legacy(self, data, twid):
|
||||||
|
result = traverse_obj(data, (
|
||||||
|
'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
|
||||||
|
lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
|
||||||
|
'tweet_results', 'result'
|
||||||
|
), expected_type=dict, default={}, get_all=False)
|
||||||
|
|
||||||
|
if 'tombstone' in result:
|
||||||
|
cause = traverse_obj(result, ('tombstone', 'text', 'text'), expected_type=str)
|
||||||
|
raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
|
||||||
|
|
||||||
|
status = result.get('legacy', {})
|
||||||
|
status.update(traverse_obj(result, {
|
||||||
|
'user': ('core', 'user_results', 'result', 'legacy'),
|
||||||
|
'card': ('card', 'legacy'),
|
||||||
|
'quoted_status': ('quoted_status_result', 'result', 'legacy'),
|
||||||
|
}, expected_type=dict, default={}))
|
||||||
|
|
||||||
|
# extra transformation is needed since result does not match legacy format
|
||||||
|
binding_values = {
|
||||||
|
binding_value.get('key'): binding_value.get('value')
|
||||||
|
for binding_value in traverse_obj(status, ('card', 'binding_values', ...), expected_type=dict)
|
||||||
|
}
|
||||||
|
if binding_values:
|
||||||
|
status['card']['binding_values'] = binding_values
|
||||||
|
|
||||||
|
return status
|
||||||
|
|
||||||
|
def _build_graphql_query(self, media_id):
|
||||||
|
return {
|
||||||
|
'variables': {
|
||||||
|
'focalTweetId': media_id,
|
||||||
|
'includePromotedContent': True,
|
||||||
|
'with_rux_injections': False,
|
||||||
|
'withBirdwatchNotes': True,
|
||||||
|
'withCommunity': True,
|
||||||
|
'withDownvotePerspective': False,
|
||||||
|
'withQuickPromoteEligibilityTweetFields': True,
|
||||||
|
'withReactionsMetadata': False,
|
||||||
|
'withReactionsPerspective': False,
|
||||||
|
'withSuperFollowsTweetFields': True,
|
||||||
|
'withSuperFollowsUserFields': True,
|
||||||
|
'withV2Timeline': True,
|
||||||
|
'withVoice': True,
|
||||||
|
},
|
||||||
|
'features': {
|
||||||
|
'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
|
||||||
|
'interactive_text_enabled': True,
|
||||||
|
'responsive_web_edit_tweet_api_enabled': True,
|
||||||
|
'responsive_web_enhance_cards_enabled': True,
|
||||||
|
'responsive_web_graphql_timeline_navigation_enabled': False,
|
||||||
|
'responsive_web_text_conversations_enabled': False,
|
||||||
|
'responsive_web_uc_gql_enabled': True,
|
||||||
|
'standardized_nudges_misinfo': True,
|
||||||
|
'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
|
||||||
|
'tweetypie_unmention_optimization_enabled': True,
|
||||||
|
'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
|
||||||
|
'verified_phone_label_enabled': False,
|
||||||
|
'vibe_api_enabled': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
twid = self._match_id(url)
|
twid = self._match_id(url)
|
||||||
status = self._call_api(
|
if self.is_logged_in or self._configuration_arg('force_graphql'):
|
||||||
'statuses/show/%s.json' % twid, twid, {
|
self.write_debug(f'Using GraphQL API (Auth = {self.is_logged_in})')
|
||||||
|
result = self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid)
|
||||||
|
status = self._graphql_to_legacy(result, twid)
|
||||||
|
|
||||||
|
else:
|
||||||
|
status = self._call_api(f'statuses/show/{twid}.json', twid, {
|
||||||
'cards_platform': 'Web-12',
|
'cards_platform': 'Web-12',
|
||||||
'include_cards': 1,
|
'include_cards': 1,
|
||||||
'include_reply_count': 1,
|
'include_reply_count': 1,
|
||||||
|
@ -569,7 +840,7 @@ class TwitterIE(TwitterBaseIE):
|
||||||
user = status.get('user') or {}
|
user = status.get('user') or {}
|
||||||
uploader = user.get('name')
|
uploader = user.get('name')
|
||||||
if uploader:
|
if uploader:
|
||||||
title = '%s - %s' % (uploader, title)
|
title = f'{uploader} - {title}'
|
||||||
uploader_id = user.get('screen_name')
|
uploader_id = user.get('screen_name')
|
||||||
|
|
||||||
tags = []
|
tags = []
|
||||||
|
@ -642,31 +913,37 @@ class TwitterIE(TwitterBaseIE):
|
||||||
|
|
||||||
card_name = card['name'].split(':')[-1]
|
card_name = card['name'].split(':')[-1]
|
||||||
if card_name == 'player':
|
if card_name == 'player':
|
||||||
return {
|
yield {
|
||||||
'_type': 'url',
|
'_type': 'url',
|
||||||
'url': get_binding_value('player_url'),
|
'url': get_binding_value('player_url'),
|
||||||
}
|
}
|
||||||
elif card_name == 'periscope_broadcast':
|
elif card_name == 'periscope_broadcast':
|
||||||
return {
|
yield {
|
||||||
'_type': 'url',
|
'_type': 'url',
|
||||||
'url': get_binding_value('url') or get_binding_value('player_url'),
|
'url': get_binding_value('url') or get_binding_value('player_url'),
|
||||||
'ie_key': PeriscopeIE.ie_key(),
|
'ie_key': PeriscopeIE.ie_key(),
|
||||||
}
|
}
|
||||||
elif card_name == 'broadcast':
|
elif card_name == 'broadcast':
|
||||||
return {
|
yield {
|
||||||
'_type': 'url',
|
'_type': 'url',
|
||||||
'url': get_binding_value('broadcast_url'),
|
'url': get_binding_value('broadcast_url'),
|
||||||
'ie_key': TwitterBroadcastIE.ie_key(),
|
'ie_key': TwitterBroadcastIE.ie_key(),
|
||||||
}
|
}
|
||||||
|
elif card_name == 'audiospace':
|
||||||
|
yield {
|
||||||
|
'_type': 'url',
|
||||||
|
'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
|
||||||
|
'ie_key': TwitterSpacesIE.ie_key(),
|
||||||
|
}
|
||||||
elif card_name == 'summary':
|
elif card_name == 'summary':
|
||||||
return {
|
yield {
|
||||||
'_type': 'url',
|
'_type': 'url',
|
||||||
'url': get_binding_value('card_url'),
|
'url': get_binding_value('card_url'),
|
||||||
}
|
}
|
||||||
elif card_name == 'unified_card':
|
elif card_name == 'unified_card':
|
||||||
media_entities = self._parse_json(get_binding_value('unified_card'), twid)['media_entities']
|
unified_card = self._parse_json(get_binding_value('unified_card'), twid)
|
||||||
media = traverse_obj(media_entities, ..., expected_type=dict, get_all=False)
|
yield from map(extract_from_video_info, traverse_obj(
|
||||||
return extract_from_video_info(media)
|
unified_card, ('media_entities', ...), expected_type=dict))
|
||||||
# amplify, promo_video_website, promo_video_convo, appplayer,
|
# amplify, promo_video_website, promo_video_convo, appplayer,
|
||||||
# video_direct_message, poll2choice_video, poll3choice_video,
|
# video_direct_message, poll2choice_video, poll3choice_video,
|
||||||
# poll4choice_video, ...
|
# poll4choice_video, ...
|
||||||
|
@ -690,7 +967,7 @@ class TwitterIE(TwitterBaseIE):
|
||||||
'height': int_or_none(image.get('height')),
|
'height': int_or_none(image.get('height')),
|
||||||
})
|
})
|
||||||
|
|
||||||
return {
|
yield {
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
|
@ -700,11 +977,8 @@ class TwitterIE(TwitterBaseIE):
|
||||||
|
|
||||||
media_path = ((None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo')
|
media_path = ((None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo')
|
||||||
videos = map(extract_from_video_info, traverse_obj(status, media_path, expected_type=dict))
|
videos = map(extract_from_video_info, traverse_obj(status, media_path, expected_type=dict))
|
||||||
entries = [{**info, **data, 'display_id': twid} for data in videos if data]
|
cards = extract_from_card_info(status.get('card'))
|
||||||
|
entries = [{**info, **data, 'display_id': twid} for data in (*videos, *cards)]
|
||||||
data = extract_from_card_info(status.get('card'))
|
|
||||||
if data:
|
|
||||||
entries.append({**info, **data, 'display_id': twid})
|
|
||||||
|
|
||||||
if not entries:
|
if not entries:
|
||||||
expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
|
expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
|
||||||
|
@ -730,13 +1004,14 @@ class TwitterAmplifyIE(TwitterBaseIE):
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
|
'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
|
||||||
'md5': '7df102d0b9fd7066b86f3159f8e81bf6',
|
'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
|
'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Twitter Video',
|
'title': 'Twitter Video',
|
||||||
'thumbnail': 're:^https?://.*',
|
'thumbnail': 're:^https?://.*',
|
||||||
},
|
},
|
||||||
|
'params': {'format': '[protocol=https]'},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -745,7 +1020,7 @@ class TwitterAmplifyIE(TwitterBaseIE):
|
||||||
|
|
||||||
vmap_url = self._html_search_meta(
|
vmap_url = self._html_search_meta(
|
||||||
'twitter:amplify:vmap', webpage, 'vmap url')
|
'twitter:amplify:vmap', webpage, 'vmap url')
|
||||||
formats = self._extract_formats_from_vmap_url(vmap_url, video_id)
|
formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
|
||||||
|
|
||||||
thumbnails = []
|
thumbnails = []
|
||||||
thumbnail = self._html_search_meta(
|
thumbnail = self._html_search_meta(
|
||||||
|
@ -793,6 +1068,8 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
|
||||||
'title': 'Andrea May Sahouri - Periscope Broadcast',
|
'title': 'Andrea May Sahouri - Periscope Broadcast',
|
||||||
'uploader': 'Andrea May Sahouri',
|
'uploader': 'Andrea May Sahouri',
|
||||||
'uploader_id': '1PXEdBZWpGwKe',
|
'uploader_id': '1PXEdBZWpGwKe',
|
||||||
|
'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
|
||||||
|
'view_count': int,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -804,7 +1081,7 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
|
||||||
info = self._parse_broadcast_data(broadcast, broadcast_id)
|
info = self._parse_broadcast_data(broadcast, broadcast_id)
|
||||||
media_key = broadcast['media_key']
|
media_key = broadcast['media_key']
|
||||||
source = self._call_api(
|
source = self._call_api(
|
||||||
'live_video_stream/status/' + media_key, media_key)['source']
|
f'live_video_stream/status/{media_key}', media_key)['source']
|
||||||
m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
|
m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
|
||||||
if '/live_video_stream/geoblocked/' in m3u8_url:
|
if '/live_video_stream/geoblocked/' in m3u8_url:
|
||||||
self.raise_geo_restricted()
|
self.raise_geo_restricted()
|
||||||
|
@ -816,6 +1093,100 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
|
||||||
return info
|
return info
|
||||||
|
|
||||||
|
|
||||||
|
class TwitterSpacesIE(TwitterBaseIE):
|
||||||
|
IE_NAME = 'twitter:spaces'
|
||||||
|
_VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
|
||||||
|
_TWITTER_GRAPHQL = 'https://twitter.com/i/api/graphql/HPEisOmj1epUNLCWTYhUWw/'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1RDxlgyvNXzJL',
|
||||||
|
'ext': 'm4a',
|
||||||
|
'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
|
||||||
|
'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
|
||||||
|
'uploader': r're:Lucio Di Gaetano.*?',
|
||||||
|
'uploader_id': 'luciodigaetano',
|
||||||
|
'live_status': 'was_live',
|
||||||
|
'timestamp': 1659877956397,
|
||||||
|
},
|
||||||
|
'params': {'skip_download': 'm3u8'},
|
||||||
|
}]
|
||||||
|
|
||||||
|
SPACE_STATUS = {
|
||||||
|
'notstarted': 'is_upcoming',
|
||||||
|
'ended': 'was_live',
|
||||||
|
'running': 'is_live',
|
||||||
|
'timedout': 'post_live',
|
||||||
|
}
|
||||||
|
|
||||||
|
def _build_graphql_query(self, space_id):
|
||||||
|
return {
|
||||||
|
'variables': {
|
||||||
|
'id': space_id,
|
||||||
|
'isMetatagsQuery': True,
|
||||||
|
'withDownvotePerspective': False,
|
||||||
|
'withReactionsMetadata': False,
|
||||||
|
'withReactionsPerspective': False,
|
||||||
|
'withReplays': True,
|
||||||
|
'withSuperFollowsUserFields': True,
|
||||||
|
'withSuperFollowsTweetFields': True,
|
||||||
|
},
|
||||||
|
'features': {
|
||||||
|
'dont_mention_me_view_api_enabled': True,
|
||||||
|
'interactive_text_enabled': True,
|
||||||
|
'responsive_web_edit_tweet_api_enabled': True,
|
||||||
|
'responsive_web_enhance_cards_enabled': True,
|
||||||
|
'responsive_web_uc_gql_enabled': True,
|
||||||
|
'spaces_2022_h2_clipping': True,
|
||||||
|
'spaces_2022_h2_spaces_communities': False,
|
||||||
|
'standardized_nudges_misinfo': True,
|
||||||
|
'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
|
||||||
|
'vibe_api_enabled': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
space_id = self._match_id(url)
|
||||||
|
space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
|
||||||
|
if not space_data:
|
||||||
|
raise ExtractorError('Twitter Space not found', expected=True)
|
||||||
|
|
||||||
|
metadata = space_data['metadata']
|
||||||
|
live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
if live_status == 'is_upcoming':
|
||||||
|
self.raise_no_formats('Twitter Space not started yet', expected=True)
|
||||||
|
elif live_status == 'post_live':
|
||||||
|
self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
|
||||||
|
else:
|
||||||
|
source = self._call_api(
|
||||||
|
f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key'])['source']
|
||||||
|
|
||||||
|
# XXX: Native downloader does not work
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
traverse_obj(source, 'noRedirectPlaybackUrl', 'location'),
|
||||||
|
metadata['media_key'], 'm4a', 'm3u8', live=live_status == 'is_live')
|
||||||
|
for fmt in formats:
|
||||||
|
fmt.update({'vcodec': 'none', 'acodec': 'aac'})
|
||||||
|
|
||||||
|
participants = ', '.join(traverse_obj(
|
||||||
|
space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
|
||||||
|
return {
|
||||||
|
'id': space_id,
|
||||||
|
'title': metadata.get('title'),
|
||||||
|
'description': f'Twitter Space participated by {participants}',
|
||||||
|
'uploader': traverse_obj(
|
||||||
|
metadata, ('creator_results', 'result', 'legacy', 'name')),
|
||||||
|
'uploader_id': traverse_obj(
|
||||||
|
metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
|
||||||
|
'live_status': live_status,
|
||||||
|
'timestamp': metadata.get('created_at'),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class TwitterShortenerIE(TwitterBaseIE):
|
class TwitterShortenerIE(TwitterBaseIE):
|
||||||
IE_NAME = 'twitter:shortener'
|
IE_NAME = 'twitter:shortener'
|
||||||
_VALID_URL = r'https?://t.co/(?P<id>[^?]+)|tco:(?P<eid>[^?]+)'
|
_VALID_URL = r'https?://t.co/(?P<id>[^?]+)|tco:(?P<eid>[^?]+)'
|
||||||
|
|
Loading…
Reference in a new issue