Fix extractor

This commit is contained in:
MrHulk 2024-09-10 08:54:16 +05:30 committed by GitHub
parent d1c4d88b2d
commit 308e713d9e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -2,78 +2,114 @@ import hashlib
import time import time
import urllib.parse import urllib.parse
from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html, ExtractorError,
join_nonempty, int_or_none,
strip_or_none,
) )
from .common import InfoExtractor
class FptplayIE(InfoExtractor): class FptplayIE(InfoExtractor):
_VALID_URL = r'https?://fptplay\.vn/xem-video/[^/]+\-(?P<id>\w+)(?:/tap-(?P<episode>\d+)?/?(?:[?#]|$)|)' _VALID_URL = r'https?://fptplay\.vn/xem-video/[^/]+\-(?P<id>[a-f0-9]+)'
_GEO_COUNTRIES = ['VN'] _GEO_COUNTRIES = ['VN']
IE_NAME = 'fptplay' IE_NAME = 'fptplay'
IE_DESC = 'fptplay.vn' IE_DESC = 'fptplay.vn'
_TESTS = [{ _TESTS = [{
'url': 'https://fptplay.vn/xem-video/nhan-duyen-dai-nhan-xin-dung-buoc-621a123016f369ebbde55945', 'url': 'https://fptplay.vn/xem-video/jumanji-tro-choi-ky-ao-615c9b232089bd0509bfbf42',
'md5': 'ca0ee9bc63446c0c3e9a90186f7d6b33',
'info_dict': { 'info_dict': {
'id': '621a123016f369ebbde55945', 'id': '615c9b232089bd0509bfbf42',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Nhân Duyên Đại Nhân Xin Dừng Bước - Tập 1A', 'title': 'Jumanji: Welcome To The Jungle',
'description': 'md5:23cf7d1ce0ade8e21e76ae482e6a8c6c', 'description': 'Phim theo chân một nhóm bốn học sinh phổ thông bị phạt dọn dẹp tầng hầm trường học. Tại đó, họ phát hiện ra trò chơi cổ mang tên Jumanji.',
'thumbnail': 'https://images.fptplay.net/media/OTT/VOD/2023/03/13/jumanji-tro-choi-ky-ao-fpt-play-1678685776013_Background_1920x1080_over.jpg',
'release_year': '2017',
}, },
}, { }, {
'url': 'https://fptplay.vn/xem-video/ma-toi-la-dai-gia-61f3aa8a6b3b1d2e73c60eb5/tap-3', 'url': 'https://fptplay.vn/xem-video/sang-nhu-trang-trong-may-6156d8292089bd2184e26238',
'md5': 'b35be968c909b3e4e1e20ca45dd261b1',
'info_dict': { 'info_dict': {
'id': '61f3aa8a6b3b1d2e73c60eb5', 'id': '346034',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Má Tôi Là Đại Gia - Tập 3', 'title': 'Bright As The Moon',
'description': 'md5:ff8ba62fb6e98ef8875c42edff641d1c', 'description': '',
'release_year': '2021',
'season_number': '1',
'episode': 'Tập 1',
'episode_number': '1',
'duration': '2665'
}, },
}, { }, ]
'url': 'https://fptplay.vn/xem-video/lap-toi-do-giam-under-the-skin-6222d9684ec7230fa6e627a2/tap-4',
'md5': 'bcb06c55ec14786d7d4eda07fa1ccbb9',
'info_dict': {
'id': '6222d9684ec7230fa6e627a2',
'ext': 'mp4',
'title': 'Lạp Tội Đồ Giám - Tập 2B',
'description': 'md5:e5a47e9d35fbf7e9479ca8a77204908b',
},
}, {
'url': 'https://fptplay.vn/xem-video/nha-co-chuyen-hi-alls-well-ends-well-1997-6218995f6af792ee370459f0',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id, slug_episode = self._match_valid_url(url).group('id', 'episode') contentId = self._match_id(url)
webpage = self._download_webpage(url, video_id=video_id, fatal=False) or ''
title = self._search_regex(
r'(?s)<h4\s+class="mb-1 text-2xl text-white"[^>]*>(.+)</h4>', webpage, 'title', fatal=False)
real_episode = slug_episode if not title else self._search_regex(
r'<p.+title="(?P<episode>[^">]+)"\s+class="epi-title active"', webpage, 'episode', fatal=False)
title = strip_or_none(title) or self._html_search_meta(('og:title', 'twitter:title'), webpage)
info = self._download_json( # Need valid cookie with Bearer token, else it won't work
self.get_api_with_st_token(video_id, int(slug_episode) - 1 if slug_episode else 0), video_id) token = self._get_cookies(url).get("token")
formats, subtitles = self._extract_m3u8_formats_and_subtitles(info['data']['url'], video_id, 'mp4')
res = self._download_json(self.get_api_with_st_token(contentId), contentId, expected_status=406)
if res["result"]["episode_type"] == 0:
# movie or single video
manifest = self._download_json(self.get_api_with_st_token(contentId, 0), contentId, headers={'authorization': f'Bearer {token.value}'}, expected_status=406)
if manifest.get("msg") != "success":
raise ExtractorError(f' - Got an error, response: {manifest.get("msg")}', expected=True)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(manifest["data"]["url"], contentId)
return { return {
'id': video_id, 'id': contentId,
'title': join_nonempty(title, real_episode, delim=' - '), 'title': res["result"]["title_origin"] if res["result"]["title_origin"] else res["result"]["title_vie"],
'description': ( 'description': res["result"]["description"],
clean_html(self._search_regex(r'<p\s+class="overflow-hidden"[^>]*>(.+)</p>', webpage, 'description')) 'thumbnail': res["result"]["thumb"],
or self._html_search_meta(('og:description', 'twitter:description'), webpage)), 'release_year': int_or_none(res["result"]["movie_release_date"]),
'duration': int_or_none(res["result"]["duration"]),
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles
}
else:
# playlist
entries = []
for episode in res["result"]["episodes"]:
if episode["is_trailer"] == 1:
continue
manifest = self._download_json(self.get_api_with_st_token(contentId, episode["_id"]), episode["_id"], headers={'authorization': f'Bearer {token.value}'}, expected_status=406)
if manifest.get("msg") != "success":
raise ExtractorError(f' - Got an error, response: {manifest.get("msg")}', expected=True)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(manifest["data"]["url"], episode["_id"])
entry = {
'id': episode["ref_episode_id"],
'title': res["result"]["title_origin"] if res["result"]["title_origin"] else res["result"]["title_vie"],
'description': episode["description"],
'thumbnail': episode["thumb"],
'release_year': int_or_none(res["result"]["movie_release_date"]),
'season_number': 1, # Assuming season 1 for simplicity
'episode': episode["title"],
'episode_number': episode["_id"] + 1,
'duration': int_or_none(episode["duration"]),
'formats': formats,
'subtitles': subtitles
}
entries.append(entry)
return {
'_type': 'playlist',
'id': contentId,
'title': res["result"]["title_origin"] if res["result"]["title_origin"] else res["result"]["title_vie"],
'entries': entries
} }
def get_api_with_st_token(self, video_id, episode): def get_api_with_st_token(self, video_id, episode=None):
path = f'/api/v6.2_w/stream/vod/{video_id}/{episode}/auto_vip' if episode is not None:
path = f'/api/v7.1_w/stream/vod/{video_id}/{0 if episode is None else episode}/adaptive_bitrate'
else:
path = f'/api/v7.1_w/vod/detail/{video_id}'
timestamp = int(time.time()) + 10800 timestamp = int(time.time()) + 10800
t = hashlib.md5(f'6ea6d2a4e2d3a4bd5e275401aa086d{timestamp}{path}'.encode()).hexdigest().upper()
t = hashlib.md5(f'WEBv6Dkdsad90dasdjlALDDDS{timestamp}{path}'.encode()).hexdigest().upper()
r = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/' r = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
n = [int(f'0x{t[2 * o: 2 * o + 2]}', 16) for o in range(len(t) // 2)] n = [int(f'0x{t[2 * o: 2 * o + 2]}', 16) for o in range(len(t) // 2)]
@ -89,7 +125,7 @@ class FptplayIE(InfoExtractor):
i[n] = e[c] i[n] = e[c]
n += 1 n += 1
c += 1 c += 1
if 3 == n: if n == 3:
a[0] = (252 & i[0]) >> 2 a[0] = (252 & i[0]) >> 2
a[1] = ((3 & i[0]) << 4) + ((240 & i[1]) >> 4) a[1] = ((3 & i[0]) << 4) + ((240 & i[1]) >> 4)
a[2] = ((15 & i[1]) << 2) + ((192 & i[2]) >> 6) a[2] = ((15 & i[1]) << 2) + ((192 & i[2]) >> 6)
@ -100,7 +136,6 @@ class FptplayIE(InfoExtractor):
if n: if n:
for o in range(n, 3): for o in range(n, 3):
i[o] = 0 i[o] = 0
for o in range(n + 1): for o in range(n + 1):
a[0] = (252 & i[0]) >> 2 a[0] = (252 & i[0]) >> 2
a[1] = ((3 & i[0]) << 4) + ((240 & i[1]) >> 4) a[1] = ((3 & i[0]) << 4) + ((240 & i[1]) >> 4)