[telegram] Add extractor (#2922)

Closes #2910

Authored by: hatienl0i261299
This commit is contained in:
Ha Tien Loi 2022-03-04 18:18:46 +07:00 committed by GitHub
parent ded9f32667
commit 5bcccbfec3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 38 additions and 0 deletions

View file

@ -1593,6 +1593,7 @@ from .tele13 import Tele13IE
from .telebruxelles import TeleBruxellesIE
from .telecinco import TelecincoIE
from .telegraaf import TelegraafIE
from .telegram import TelegramEmbedIE
from .telemb import TeleMBIE
from .telemundo import TelemundoIE
from .telequebec import (

View file

@ -0,0 +1,37 @@
from .common import InfoExtractor
class TelegramEmbedIE(InfoExtractor):
IE_NAME = 'telegram:embed'
_VALID_URL = r'https?://t\.me/(?P<channel_name>[^/]+)/(?P<id>\d+)'
_TESTS = [{
'url': 'https://t.me/europa_press/613',
'info_dict': {
'id': '613',
'ext': 'mp4',
'title': 'Europa Press',
'description': '6ce2d7e8d56eda16d80607b23db7b252',
'thumbnail': r're:^https?:\/\/cdn.*?telesco\.pe\/file\/\w+',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
webpage_embed = self._download_webpage(f'{url}?embed=1', video_id)
formats = [{
'url': self._proto_relative_url(self._search_regex(
'<video[^>]+src="([^"]+)"', webpage_embed, 'source')),
'ext': 'mp4',
}]
self._sort_formats(formats)
return {
'id': video_id,
'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, fatal=True),
'description': self._html_search_meta(['og:description', 'twitter:description'], webpage, fatal=True),
'thumbnail': self._search_regex(r'tgme_widget_message_video_thumb"[^>]+background-image:url\(\'([^\']+)\'\)',
webpage_embed, 'thumbnail'),
'formats': formats,
}