mirror of
https://github.com/yt-dlp/yt-dlp
synced 2025-01-03 06:54:08 +01:00
[GoogleDriveFolderIE] improve code readability
- Combined `_extract_json_ds` and`_extract_json_hash` into one method(`_extract_json_meta`) - Improved `item_url_getter`'s traversal path of item info - Add notations to improve code readability
This commit is contained in:
parent
99d9105f33
commit
6e98d99dd5
1 changed files with 32 additions and 34 deletions
|
@ -303,7 +303,7 @@ class GoogleDriveFolderIE(InfoExtractor):
|
||||||
},
|
},
|
||||||
'playlist_count': 3,
|
'playlist_count': 3,
|
||||||
}, {
|
}, {
|
||||||
# Contains various formats and a subfolder
|
'note': 'Contains various formats and a subfolder, folder name was formerly mismatched',
|
||||||
'url': 'https://drive.google.com/drive/folders/1CkqRsNlzZ0o3IL083j17s6sH5Q83DcGo',
|
'url': 'https://drive.google.com/drive/folders/1CkqRsNlzZ0o3IL083j17s6sH5Q83DcGo',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1CkqRsNlzZ0o3IL083j17s6sH5Q83DcGo',
|
'id': '1CkqRsNlzZ0o3IL083j17s6sH5Q83DcGo',
|
||||||
|
@ -311,45 +311,43 @@ class GoogleDriveFolderIE(InfoExtractor):
|
||||||
},
|
},
|
||||||
'playlist_count': 6,
|
'playlist_count': 6,
|
||||||
}]
|
}]
|
||||||
_JSON_DS_RE = r'key\s*?:\s*?([\'"])ds:\s*?%d\1,[^}]*data:'
|
|
||||||
_JSON_HASH_RE = r'hash\s*?:\s*?([\'"])%d\1,[^}]*data:'
|
def _extract_json_meta(self, webpage, video_id, dsval=None, hashval=None, name=None, **kwargs):
|
||||||
|
"""
|
||||||
|
Uses regex to search for json metadata with 'ds' value(0-5) or 'hash' value(1-6)
|
||||||
|
from the webpage.
|
||||||
|
Folder info: ds=0, hash=1; Folder items: ds=4, hash=6.
|
||||||
|
For example, if the webpage contains the line below, the empty data array
|
||||||
|
can be got by passing dsval=3 or hashval=2 to this method.
|
||||||
|
AF_initDataCallback({key: 'ds:3', hash: '2', data:[], sideChannel: {}});
|
||||||
|
"""
|
||||||
_ARRAY_RE = r'\[(?s:.+)\]'
|
_ARRAY_RE = r'\[(?s:.+)\]'
|
||||||
|
_META_END_RE = r', sideChannel: \{\}\}\);' # greedy match to deal with the 2nd test case
|
||||||
def _extract_json_ds(self, dsval, webpage, video_id, **kwargs):
|
if dsval:
|
||||||
"""
|
if not name:
|
||||||
Searches for json with the 'ds' value(0~5) from the webpage with regex.
|
name = f'webpage JSON metadata ds:{dsval}'
|
||||||
Folder info: ds=0; Folder items: ds=4.
|
return self._search_json(
|
||||||
For example, if the webpage contains the line below, the empty data array
|
rf'''key\s*?:\s*?(['"])ds:\s*?{dsval}\1,[^\[]*?data:''', webpage, name, video_id,
|
||||||
can be got by passing dsval=3 to this function.
|
end_pattern=_META_END_RE, contains_pattern=_ARRAY_RE, **kwargs)
|
||||||
AF_initDataCallback({key: 'ds:3', hash: '2', data:[], sideChannel: {}});
|
elif hashval:
|
||||||
"""
|
if not name:
|
||||||
return self._search_json(self._JSON_DS_RE % dsval, webpage,
|
name = f'webpage JSON metadata hash:{hashval}'
|
||||||
f'webpage JSON ds:{dsval}', video_id,
|
return self._search_json(
|
||||||
contains_pattern=self._ARRAY_RE, **kwargs)
|
rf'''hash\s*?:\s*?(['"]){hashval}\1,[^\[]*?data:''', webpage, name, video_id,
|
||||||
|
end_pattern=_META_END_RE, contains_pattern=_ARRAY_RE, **kwargs)
|
||||||
def _extract_json_hash(self, hashval, webpage, video_id, **kwargs):
|
return None
|
||||||
"""
|
|
||||||
Searches for json with the 'hash' value(1~6) from the webpage with regex.
|
|
||||||
Folder info: hash=1; Folder items: hash=6.
|
|
||||||
For example, if the webpage contains the line below, the empty data array
|
|
||||||
can be got by passing hashval=2 to this function.
|
|
||||||
AF_initDataCallback({key: 'ds:3', hash: '2', data:[], sideChannel: {}});
|
|
||||||
"""
|
|
||||||
return self._search_json(self._JSON_HASH_RE % hashval, webpage,
|
|
||||||
f'webpage JSON hash:{hashval}', video_id,
|
|
||||||
contains_pattern=self._ARRAY_RE, **kwargs)
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
def item_url_getter(item, video_id):
|
def item_url_getter(item, video_id):
|
||||||
available_IEs = [GoogleDriveFolderIE, GoogleDriveIE]
|
available_IEs = [GoogleDriveFolderIE, GoogleDriveIE] # subfolder or item
|
||||||
if 'application/vnd.google-apps.shortcut' in item:
|
if 'application/vnd.google-apps.shortcut' in item: # extract real link
|
||||||
entry_url = traverse_obj(
|
entry_url = traverse_obj(
|
||||||
item, (..., ..., lambda _, v: any(ie.suitable(v) for ie in available_IEs),
|
item,
|
||||||
{str}, any))
|
(..., ..., lambda _, v: any(ie.suitable(v) for ie in available_IEs), any))
|
||||||
else:
|
else:
|
||||||
entry_url = traverse_obj(
|
entry_url = traverse_obj(
|
||||||
item, (lambda _, v: any(ie.suitable(v) for ie in available_IEs),
|
item,
|
||||||
{str}, any))
|
(lambda _, v: any(ie.suitable(v) for ie in available_IEs), any))
|
||||||
if not entry_url:
|
if not entry_url:
|
||||||
return None
|
return None
|
||||||
return self.url_result(entry_url, video_id=video_id, video_title=item[2])
|
return self.url_result(entry_url, video_id=video_id, video_title=item[2])
|
||||||
|
@ -359,17 +357,17 @@ class GoogleDriveFolderIE(InfoExtractor):
|
||||||
|
|
||||||
webpage = self._download_webpage(url, folder_id, headers=headers)
|
webpage = self._download_webpage(url, folder_id, headers=headers)
|
||||||
json_folder_info = (
|
json_folder_info = (
|
||||||
self._extract_json_ds(0, webpage, folder_id, default=None)
|
self._extract_json_meta(webpage, folder_id, dsval=0, name='folder info', default=None)
|
||||||
or self._extract_json_hash(1, webpage, folder_id)
|
or self._extract_json_meta(webpage, folder_id, hashval=1)
|
||||||
)
|
)
|
||||||
json_items = (
|
json_items = (
|
||||||
self._extract_json_ds(4, webpage, folder_id, default=None)
|
self._extract_json_meta(webpage, folder_id, dsval=4, name='folder items', default=None)
|
||||||
or self._extract_json_hash(6, webpage, folder_id)
|
or self._extract_json_meta(webpage, folder_id, hashval=6)
|
||||||
)
|
)
|
||||||
|
|
||||||
title = json_folder_info[1][2]
|
title = json_folder_info[1][2]
|
||||||
items = json_items[-1]
|
items = json_items[-1]
|
||||||
if not isinstance(items, list):
|
if not isinstance(items, list): # empty folder
|
||||||
return self.playlist_result([], folder_id, title)
|
return self.playlist_result([], folder_id, title)
|
||||||
|
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
|
|
Loading…
Reference in a new issue