[globo] the long-awaited rewrite

This commit is contained in:
slipinthedove 2024-12-12 22:11:58 -03:00
parent f6c73aad5f
commit 8678ff44cc

View file

@ -1,13 +1,9 @@
import base64
import hashlib
import json import json
import random
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..networking import HEADRequest from ..networking import HEADRequest
from ..utils import ( from ..utils import (
ExtractorError,
float_or_none, float_or_none,
orderedSet, orderedSet,
str_or_none, str_or_none,
@ -19,14 +15,14 @@ class GloboIE(InfoExtractor):
_VALID_URL = r'(?:globo:|https?://.+?\.globo\.com/(?:[^/]+/)*(?:v/(?:[^/]+/)?|videos/))(?P<id>\d{7,})' _VALID_URL = r'(?:globo:|https?://.+?\.globo\.com/(?:[^/]+/)*(?:v/(?:[^/]+/)?|videos/))(?P<id>\d{7,})'
_NETRC_MACHINE = 'globo' _NETRC_MACHINE = 'globo'
_TESTS = [{ _TESTS = [{
'url': 'http://g1.globo.com/carros/autoesporte/videos/t/exclusivos-do-g1/v/mercedes-benz-gla-passa-por-teste-de-colisao-na-europa/3607726/', 'url': 'https://globoplay.globo.com/v/3607726/',
'info_dict': { 'info_dict': {
'id': '3607726', 'id': '3607726',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Mercedes-Benz GLA passa por teste de colisão na Europa', 'title': 'Mercedes-Benz GLA passa por teste de colisão na Europa',
'duration': 103.204, 'duration': 103.204,
'uploader': 'G1', 'uploader': 'G1 ao vivo',
'uploader_id': '2015', 'uploader_id': '4209',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -38,126 +34,61 @@ class GloboIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Acidentes de trânsito estão entre as maiores causas de queda de energia em SP', 'title': 'Acidentes de trânsito estão entre as maiores causas de queda de energia em SP',
'duration': 137.973, 'duration': 137.973,
'uploader': 'Rede Globo', 'uploader': 'Bom Dia Brasil',
'uploader_id': '196', 'uploader_id': '810',
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, {
'url': 'http://canalbrasil.globo.com/programas/sangue-latino/videos/3928201.html',
'only_matching': True,
}, {
'url': 'http://globosatplay.globo.com/globonews/v/4472924/',
'only_matching': True,
}, {
'url': 'http://globotv.globo.com/t/programa/v/clipe-sexo-e-as-negas-adeus/3836166/',
'only_matching': True,
}, {
'url': 'http://globotv.globo.com/canal-brasil/sangue-latino/t/todos-os-videos/v/ator-e-diretor-argentino-ricado-darin-fala-sobre-utopias-e-suas-perdas/3928201/',
'only_matching': True,
}, {
'url': 'http://canaloff.globo.com/programas/desejar-profundo/videos/4518560.html',
'only_matching': True,
}, { }, {
'url': 'globo:3607726', 'url': 'globo:3607726',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://globoplay.globo.com/v/10248083/',
'info_dict': {
'id': '10248083',
'ext': 'mp4',
'title': 'Melhores momentos: Equador 1 x 1 Brasil pelas Eliminatórias da Copa do Mundo 2022',
'duration': 530.964,
'uploader': 'SporTV',
'uploader_id': '698',
},
'params': {
'skip_download': True,
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
self._request_webpage( self._request_webpage(
HEADRequest('https://globo-ab.globo.com/v2/selected-alternatives?experiments=player-isolated-experiment-02&skipImpressions=true'), HEADRequest('https://ab.g.globo/v2/selected-alternatives?experiments=player-isolated-experiment-02&skipImpressions=true'),
video_id, 'Getting cookies') video_id, 'Getting cookies')
video = self._download_json( video = self._download_json(
f'http://api.globovideos.com/videos/{video_id}/playlist', f'https://cloud-jarvis.globo.com/graphql?operationName=getVideoView&variables=%7B"videoId":"{video_id}"%7D&extensions=%7B"persistedQuery":%7B"version":1,"sha256Hash":"93309d471104ca6d6ed67f02ead5fe8db25b22bef7d6dedb3be98ea82e799d0a"%7D%7D', video_id,
video_id)['videos'][0] headers={'content-type': 'application/json', 'x-platform-id': 'web', 'x-device-id': 'desktop', 'x-client-version': '2024.12-5'})['data']['video']
if not self.get_param('allow_unplayable_formats') and video.get('encrypted') is True:
self.report_drm(video_id)
title = video['title'] title = video['headline']
uploader = video['title'].get('headline')
uploader_id = str_or_none(video['title'].get('originProgramId'))
formats = [] formats = []
security = self._download_json( security = self._download_json(
'https://playback.video.globo.com/v2/video-session', video_id, f'Downloading security hash for {video_id}', 'https://playback.video.globo.com/v4/video-session', video_id, f'Downloading resource info for {video_id}',
headers={'content-type': 'application/json'}, data=json.dumps({ headers={'Content-Type': 'application/json'}, data=json.dumps({
'player_type': 'desktop', 'player_type': 'desktop',
'video_id': video_id, 'video_id': video_id,
'quality': 'max', 'quality': 'max',
'content_protection': 'widevine', 'content_protection': 'widevine',
'vsid': '581b986b-4c40-71f0-5a58-803e579d5fa2', 'vsid': '2938bc7c-9376-d4b7-ee91-ce46dbbf9f4d',
'tz': '-3.0:00', 'tz': '-03:00',
'version': 1,
}).encode()) }).encode())
if not self.get_param('allow_unplayable_formats') and security['resource'].get('drm_protection_enabled') is True:
self.report_drm(video_id)
self._request_webpage(HEADRequest(security['sources'][0]['url_template']), video_id, 'Getting locksession cookie') main_resource = security['sources'][0]
resource_url = main_resource['url']
security_hash = security['sources'][0]['token']
if not security_hash:
message = security.get('message')
if message:
raise ExtractorError(
f'{self.IE_NAME} returned error: {message}', expected=True)
hash_code = security_hash[:2]
padding = '%010d' % random.randint(1, 10000000000)
if hash_code in ('04', '14'):
received_time = security_hash[3:13]
received_md5 = security_hash[24:]
hash_prefix = security_hash[:23]
elif hash_code in ('02', '12', '03', '13'):
received_time = security_hash[2:12]
received_md5 = security_hash[22:]
padding += '1'
hash_prefix = '05' + security_hash[:22]
padded_sign_time = str(int(received_time) + 86400) + padding
md5_data = (received_md5 + padded_sign_time + '0xAC10FD').encode()
signed_md5 = base64.urlsafe_b64encode(hashlib.md5(md5_data).digest()).decode().strip('=')
signed_hash = hash_prefix + padded_sign_time + signed_md5
source = security['sources'][0]['url_parts']
resource_url = source['scheme'] + '://' + source['domain'] + source['path']
signed_url = '{}?h={}&k=html5&a={}'.format(resource_url, signed_hash, 'F' if video.get('subscriber_only') else 'A')
fmts, subtitles = self._extract_m3u8_formats_and_subtitles( fmts, subtitles = self._extract_m3u8_formats_and_subtitles(
signed_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) resource_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
formats.extend(fmts) formats.extend(fmts)
for resource in video['resources']: subs = try_get(security, lambda x: x['sources'][0]['text']) or {}
if resource.get('type') == 'subtitle': for sub in subs.items():
subtitles.setdefault(resource.get('language') or 'por', []).append({ if sub['subtitle']:
'url': resource.get('url'), subtitles.setdefault(sub or 'por', []).append({
}) 'url': sub['subtitle']['srt'].get('url'),
subs = try_get(security, lambda x: x['source']['subtitles'], expected_type=dict) or {}
for sub_lang, sub_url in subs.items():
if sub_url:
subtitles.setdefault(sub_lang or 'por', []).append({
'url': sub_url,
})
subs = try_get(security, lambda x: x['source']['subtitles_webvtt'], expected_type=dict) or {}
for sub_lang, sub_url in subs.items():
if sub_url:
subtitles.setdefault(sub_lang or 'por', []).append({
'url': sub_url,
}) })
duration = float_or_none(video.get('duration'), 1000) duration = float_or_none(video.get('duration'), 1000)
uploader = video.get('channel')
uploader_id = str_or_none(video.get('channel_id'))
return { return {
'id': video_id, 'id': video_id,