diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 5d2b5ec351..5cbb536f97 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -22,7 +22,7 @@ class TwitterBaseIE(InfoExtractor): class TwitterCardIE(TwitterBaseIE): IE_NAME = 'twitter:card' - _VALID_URL = r'https?://(?:www\.)?twitter\.com/i/cards/tfw/v1/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?twitter\.com/i/(?:cards/tfw/v1|videos/tweet)/(?P\d+)' _TESTS = [ { 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889', @@ -30,7 +30,7 @@ class TwitterCardIE(TwitterBaseIE): 'info_dict': { 'id': '560070183650213889', 'ext': 'mp4', - 'title': 'TwitterCard', + 'title': 'Twitter Card', 'thumbnail': 're:^https?://.*\.jpg$', 'duration': 30.033, } @@ -41,7 +41,7 @@ class TwitterCardIE(TwitterBaseIE): 'info_dict': { 'id': '623160978427936768', 'ext': 'mp4', - 'title': 'TwitterCard', + 'title': 'Twitter Card', 'thumbnail': 're:^https?://.*\.jpg', 'duration': 80.155, }, @@ -72,7 +72,16 @@ class TwitterCardIE(TwitterBaseIE): 'title': 'Vine by ArsenalTerje', }, 'add_ie': ['Vine'], - } + }, { + 'url': 'https://twitter.com/i/videos/tweet/705235433198714880', + 'md5': '3846d0a07109b5ab622425449b59049d', + 'info_dict': { + 'id': '705235433198714880', + 'ext': 'mp4', + 'title': 'Twitter web player', + 'thumbnail': 're:^https?://.*\.jpg', + }, + }, ] def _real_extract(self, url): @@ -98,12 +107,13 @@ class TwitterCardIE(TwitterBaseIE): return self.url_result(iframe_url) config = self._parse_json(self._html_search_regex( - r'data-player-config="([^"]+)"', webpage, 'data player config'), + r'data-(?:player-)?config="([^"]+)"', webpage, 'data player config'), video_id) if 'playlist' not in config: - if 'vmapUrl' in config: + vmap_url = config.get('vmapUrl') or config.get('vmap_url') + if vmap_url: formats.append({ - 'url': self._get_vmap_video_url(config['vmapUrl'], video_id), + 'url': self._get_vmap_video_url(vmap_url, video_id), }) break # same video regardless of UA continue @@ -123,12 +133,13 @@ class TwitterCardIE(TwitterBaseIE): formats.append(f) self._sort_formats(formats) - thumbnail = config.get('posterImageUrl') + title = self._search_regex(r'([^<]+)', webpage, 'title') + thumbnail = config.get('posterImageUrl') or config.get('image_src') duration = float_or_none(config.get('duration')) return { 'id': video_id, - 'title': 'TwitterCard', + 'title': title, 'thumbnail': thumbnail, 'duration': duration, 'formats': formats, @@ -177,6 +188,21 @@ class TwitterIE(InfoExtractor): 'uploader_id': 'starwars', 'uploader': 'Star Wars', }, + }, { + 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880', + 'info_dict': { + 'id': '705235433198714880', + 'ext': 'mp4', + 'title': 'Brent Yarina - Khalil Iverson\'s missed highlight dunk. And made highlight dunk. In one highlight.', + 'description': 'Brent Yarina on Twitter: "Khalil Iverson\'s missed highlight dunk. And made highlight dunk. In one highlight."', + 'uploader_id': 'BTNBrentYarina', + 'uploader': 'Brent Yarina', + }, + 'params': { + # The same video as https://twitter.com/i/videos/tweet/705235433198714880 + # Test case of TwitterCardIE + 'skip_download': True, + }, }] def _real_extract(self, url): @@ -234,6 +260,15 @@ class TwitterIE(InfoExtractor): }) return info + if 'class="PlayableMedia' in webpage: + info.update({ + '_type': 'url_transparent', + 'ie_key': 'TwitterCard', + 'url': '%s//twitter.com/i/videos/tweet/%s' % (self.http_scheme(), twid), + }) + + return info + raise ExtractorError('There\'s no video in this tweet.')