[extractor/JdItemVideo] Add Extractor. To extract the video links from a JD.com product page

This commit is contained in:
pikadoramon 2023-06-28 20:17:05 +08:00 committed by zhangzhanming
parent 314fce0c43
commit 0d917bba3f

View file

@ -50,36 +50,25 @@ class JdItemVideoIE(InfoExtractor):
] ]
def _real_extract(self, url): def _real_extract(self, url):
item_id = self._match_id(url=url) item_id = self._match_id(url=url)
resp = self._download_webpage(url_or_request=url, video_id=item_id) resp = self._download_webpage(url_or_request=url, video_id=item_id)
pattern_data = self._html_search_regex(pattern=r'"mainVideoId":"(\d+?)"', string=resp, name='videoId', pattern_data = self._html_search_regex(pattern=r'"mainVideoId":"(\d+?)"', string=resp, name='videoId',
default=None) default=None)
if pattern_data is None: if pattern_data is None:
raise ValueError( raise ValueError("There are no any video. %s" % url)
"There are no any video. %s" % url
)
description = self._html_extract_title(resp) description = self._html_extract_title(resp)
rand = random.randint(433333, 999999) rand = random.randint(433333, 999999)
timestamp = int(time.time() * 1000) timestamp = int(time.time() * 1000)
url = self._JD_API_VIDEO_CALLBACK_URL.format(rand=rand, timestamp=timestamp, video_id=pattern_data) url = self._JD_API_VIDEO_CALLBACK_URL.format(rand=rand, timestamp=timestamp, video_id=pattern_data)
mp4resp = self._download_webpage( mp4resp = self._download_webpage(url_or_request=url, video_id=item_id)
url_or_request=url,
video_id=item_id
)
detailResp = self._html_search_regex(pattern=r'jQuery\d+\((.+)\)', string=mp4resp, name='detail', default=None) detailResp = self._html_search_regex(pattern=r'jQuery\d+\((.+)\)', string=mp4resp, name='detail', default=None)
if detailResp is None: if detailResp is None:
raise ValueError( raise ValueError("Callback fail. return: %s" % detailResp)
"Callback fail. return: %s" % detailResp
)
detailRespJson = json.loads(detailResp) detailRespJson = json.loads(detailResp)
if detailRespJson.get("code", -1) != 0: if detailRespJson.get("code", -1) != 0:
raise ValueError( raise ValueError("Callback fail. return: %s" % detailResp)
"Callback fail. return: %s" % detailResp
)
ext = determine_ext(url=detailRespJson.get("playUrl", "")) ext = determine_ext(url=detailRespJson.get("playUrl", ""))