yt-dlp/yt_dlp/extractor/dropbox.py

import base64
import os.path
import re
import urllib.parse

from .common import InfoExtractor
from ..utils import (
    ExtractorError,
    update_url,
    update_url_query,
    url_basename,
    urlencode_postdata,
)


class DropboxIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?dropbox\.com/(?:(?:e/)?scl/fi|sh?)/(?P<id>\w+)'
    _TESTS = [
        {
            'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0',
            'info_dict': {
                'id': 'nelirfsxnmcfbfh',
                'ext': 'mp4',
                'title': 'youtube-dl test video \'ä"BaW_jenozKc',
            },
        }, {
            'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh',
            'only_matching': True,
        }, {
            'url': 'https://www.dropbox.com/sh/2mgpiuq7kv8nqdf/AABy-fW4dkydT4GmWi2mdOUDa?dl=0&preview=Drone+Shot.mp4',
            'only_matching': True,
        }, {
            'url': 'https://www.dropbox.com/scl/fi/r2kd2skcy5ylbbta5y1pz/DJI_0003.MP4?dl=0&rlkey=wcdgqangn7t3lnmmv6li9mu9h',
            'only_matching': True,
        }, {
            'url': 'https://www.dropbox.com/e/scl/fi/r2kd2skcy5ylbbta5y1pz/DJI_0003.MP4?dl=0&rlkey=wcdgqangn7t3lnmmv6li9mu9h',
            'only_matching': True,
        },
    ]

    def _yield_decoded_parts(self, webpage):
        for encoded in reversed(re.findall(r'registerStreamedPrefetch\s*\(\s*"[\w/+=]+"\s*,\s*"([\w/+=]+)"', webpage)):
            yield base64.b64decode(encoded).decode('utf-8', 'ignore')

    def _real_extract(self, url):
        mobj = self._match_valid_url(url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)
        fn = urllib.parse.unquote(url_basename(url))
        title = os.path.splitext(fn)[0]
        content_id = None

        for part in self._yield_decoded_parts(webpage):
            if '/sm/password' in part:
                content_id = self._search_regex(r'content_id=([\w.+=/-]+)', part, 'content ID')
                break

        if content_id:
            password = self.get_param('videopassword')
            if not password:
                raise ExtractorError('Password protected video, use --video-password <password>', expected=True)

            response = self._download_json(
                'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password',
                data=urlencode_postdata({
                    'is_xhr': 'true',
                    't': self._get_cookies('https://www.dropbox.com')['t'].value,
                    'content_id': content_id,
                    'password': password,
                    'url': update_url(url, scheme='', netloc=''),
                }))
            if response.get('status') != 'authed':
                raise ExtractorError('Invalid password', expected=True)

            webpage = self._download_webpage(url, video_id)

        formats, subtitles = [], {}
        has_anonymous_download = False
        thumbnail = None
        for part in self._yield_decoded_parts(webpage):
            if not has_anonymous_download:
                has_anonymous_download = self._search_regex(
                    r'(anonymous:\tanonymous)', part, 'anonymous', default=False)
            transcode_url = self._search_regex(
                r'\n.(https://[^\x03\x08\x12\n]+\.m3u8)', part, 'transcode url', default=None)
            if not transcode_url:
                continue
            formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id, 'mp4')
            thumbnail = self._search_regex(
                r'(https://www\.dropbox\.com/temp_thumb_from_token/[\w/?&=]+)', part, 'thumbnail', default=None)
            break

        # downloads enabled we can get the original file
        if has_anonymous_download:
            formats.append({
                'url': update_url_query(url, {'dl': '1'}),
                'format_id': 'original',
                'format_note': 'Original',
                'quality': 1,
            })

        return {
            'id': video_id,
            'title': title,
            'formats': formats,
            'subtitles': subtitles,
            'thumbnail': thumbnail,
        }
[ie/Dropbox] Fix extractor (#7926) Closes #7005, Closes #7696 Authored by: nathantouze, bashonly, denhotte 2023-08-28 23:33:48 +02:00			`import base64`
[dropbox] Correct test case (#2171) 2014-01-19 06:16:40 +01:00			`import os.path`
Added dropbox support. issue #2055 2014-01-18 16:15:53 +01:00			`import re`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-12 01:09:58 +02:00			`import urllib.parse`
Added dropbox support. issue #2055 2014-01-18 16:15:53 +01:00
			`from .common import InfoExtractor`
[Dropbox] Support password protected files and more formats (#2201) Authored by: zenerdi0de 2022-01-02 11:44:10 +01:00			`from ..utils import (`
			`ExtractorError,`
[ie/dropbox] Fix password-protected video support (#10735) Also adds thumbnail extraction Closes #9864 Authored by: ndyanx 2024-09-28 00:05:22 +02:00			`update_url,`
[ie/Dropbox] Fix extractor (#7926) Closes #7005, Closes #7696 Authored by: nathantouze, bashonly, denhotte 2023-08-28 23:33:48 +02:00			`update_url_query,`
[Dropbox] Support password protected files and more formats (#2201) Authored by: zenerdi0de 2022-01-02 11:44:10 +01:00			`url_basename,`
[ie/dropbox] Fix password-protected video support (#10735) Also adds thumbnail extraction Closes #9864 Authored by: ndyanx 2024-09-28 00:05:22 +02:00			`urlencode_postdata,`
[Dropbox] Support password protected files and more formats (#2201) Authored by: zenerdi0de 2022-01-02 11:44:10 +01:00			`)`
Added dropbox support. issue #2055 2014-01-18 16:15:53 +01:00
[dropbox] PEP8 and simplify (#2171) 2014-01-19 06:14:24 +01:00
Added support for Dropbox 2014-01-19 05:50:26 +01:00			`class DropboxIE(InfoExtractor):`
[ie/Dropbox] Fix extractor (#7926) Closes #7005, Closes #7696 Authored by: nathantouze, bashonly, denhotte 2023-08-28 23:33:48 +02:00			`_VALID_URL = r'https?://(?:www\.)?dropbox\.com/(?:(?:e/)?scl/fi\|sh?)/(?P<id>\w+)'`
PEP8: applied even more rules 2014-11-23 21:39:15 +01:00			`_TESTS = [`
			`{`
Completely change project name to yt-dlp (#85) * All modules and binary names are changed * All documentation references changed * yt-dlp no longer loads youtube-dlc config files * All URLs changed to point to organization account Co-authored-by: Pccode66 Co-authored-by: pukkandan 2021-02-24 19:45:56 +01:00			`'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0',`
PEP8: applied even more rules 2014-11-23 21:39:15 +01:00			`'info_dict': {`
			`'id': 'nelirfsxnmcfbfh',`
			`'ext': 'mp4',`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-12 01:09:58 +02:00			`'title': 'youtube-dl test video \'ä"BaW_jenozKc',`
			`},`
PEP8: applied even more rules 2014-11-23 21:39:15 +01:00			`}, {`
[ie/Dropbox] Fix extractor (#7926) Closes #7005, Closes #7696 Authored by: nathantouze, bashonly, denhotte 2023-08-28 23:33:48 +02:00			`'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh',`
			`'only_matching': True,`
			`}, {`
			`'url': 'https://www.dropbox.com/sh/2mgpiuq7kv8nqdf/AABy-fW4dkydT4GmWi2mdOUDa?dl=0&preview=Drone+Shot.mp4',`
			`'only_matching': True,`
			`}, {`
			`'url': 'https://www.dropbox.com/scl/fi/r2kd2skcy5ylbbta5y1pz/DJI_0003.MP4?dl=0&rlkey=wcdgqangn7t3lnmmv6li9mu9h',`
			`'only_matching': True,`
			`}, {`
			`'url': 'https://www.dropbox.com/e/scl/fi/r2kd2skcy5ylbbta5y1pz/DJI_0003.MP4?dl=0&rlkey=wcdgqangn7t3lnmmv6li9mu9h',`
PEP8: applied even more rules 2014-11-23 21:39:15 +01:00			`'only_matching': True,`
			`},`
[dropbox] Recognize 'https://www.dropbox.com/sh/*' urls (fixes #3795) And extract the title from the url last path component. 2014-09-21 13:40:22 +02:00			`]`
[dropbox] PEP8 and simplify (#2171) 2014-01-19 06:14:24 +01:00
[ie/dropbox] Fix password-protected video support (#10735) Also adds thumbnail extraction Closes #9864 Authored by: ndyanx 2024-09-28 00:05:22 +02:00			`def _yield_decoded_parts(self, webpage):`
			`for encoded in reversed(re.findall(r'registerStreamedPrefetch\s\(\s"[\w/+=]+"\s,\s"([\w/+=]+)"', webpage)):`
			`yield base64.b64decode(encoded).decode('utf-8', 'ignore')`

[dropbox] PEP8 and simplify (#2171) 2014-01-19 06:14:24 +01:00			`def _real_extract(self, url):`
[extractor] Common function `_match_valid_url` 2021-08-19 03:41:24 +02:00			`mobj = self._match_valid_url(url)`
[dropbox] PEP8 and simplify (#2171) 2014-01-19 06:14:24 +01:00			`video_id = mobj.group('id')`
[Dropbox] Support password protected files and more formats (#2201) Authored by: zenerdi0de 2022-01-02 11:44:10 +01:00			`webpage = self._download_webpage(url, video_id)`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-12 01:09:58 +02:00			`fn = urllib.parse.unquote(url_basename(url))`
[dropbox] Fix test and add support for spaces in filenames 2014-07-21 12:57:40 +02:00			`title = os.path.splitext(fn)[0]`
[ie/dropbox] Fix password-protected video extraction (#11636) Closes #11634 Authored by: bashonly 2024-11-27 02:47:28 +01:00			`content_id = None`
[Dropbox] Support password protected files and more formats (#2201) Authored by: zenerdi0de 2022-01-02 11:44:10 +01:00
[ie/dropbox] Fix password-protected video support (#10735) Also adds thumbnail extraction Closes #9864 Authored by: ndyanx 2024-09-28 00:05:22 +02:00			`for part in self._yield_decoded_parts(webpage):`
			`if '/sm/password' in part:`
[ie/dropbox] Fix password-protected video extraction (#11636) Closes #11634 Authored by: bashonly 2024-11-27 02:47:28 +01:00			`content_id = self._search_regex(r'content_id=([\w.+=/-]+)', part, 'content ID')`
[ie/dropbox] Fix password-protected video support (#10735) Also adds thumbnail extraction Closes #9864 Authored by: ndyanx 2024-09-28 00:05:22 +02:00			`break`

[ie/dropbox] Fix password-protected video extraction (#11636) Closes #11634 Authored by: bashonly 2024-11-27 02:47:28 +01:00			`if content_id:`
			`password = self.get_param('videopassword')`
			`if not password:`
[Dropbox] Support password protected files and more formats (#2201) Authored by: zenerdi0de 2022-01-02 11:44:10 +01:00			`raise ExtractorError('Password protected video, use --video-password <password>', expected=True)`
[ie/dropbox] Fix password-protected video extraction (#11636) Closes #11634 Authored by: bashonly 2024-11-27 02:47:28 +01:00
			`response = self._download_json(`
			`'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password',`
			`data=urlencode_postdata({`
			`'is_xhr': 'true',`
			`'t': self._get_cookies('https://www.dropbox.com')['t'].value,`
			`'content_id': content_id,`
			`'password': password,`
			`'url': update_url(url, scheme='', netloc=''),`
			`}))`
			`if response.get('status') != 'authed':`
			`raise ExtractorError('Invalid password', expected=True)`

[ie/dropbox] Fix password-protected video support (#10735) Also adds thumbnail extraction Closes #9864 Authored by: ndyanx 2024-09-28 00:05:22 +02:00			`webpage = self._download_webpage(url, video_id)`
[Dropbox] Support password protected files and more formats (#2201) Authored by: zenerdi0de 2022-01-02 11:44:10 +01:00
[ie/dropbox] Fix password-protected video support (#10735) Also adds thumbnail extraction Closes #9864 Authored by: ndyanx 2024-09-28 00:05:22 +02:00			`formats, subtitles = [], {}`
			`has_anonymous_download = False`
			`thumbnail = None`
			`for part in self._yield_decoded_parts(webpage):`
[ie/dropbox] Fix formats extraction (#9627) Closes #9533 Authored by: bashonly 2024-04-06 19:19:44 +02:00			`if not has_anonymous_download:`
			`has_anonymous_download = self._search_regex(`
[ie/dropbox] Fix password-protected video support (#10735) Also adds thumbnail extraction Closes #9864 Authored by: ndyanx 2024-09-28 00:05:22 +02:00			`r'(anonymous:\tanonymous)', part, 'anonymous', default=False)`
[ie/Dropbox] Fix extractor (#7926) Closes #7005, Closes #7696 Authored by: nathantouze, bashonly, denhotte 2023-08-28 23:33:48 +02:00			`transcode_url = self._search_regex(`
[ie/dropbox] Fix password-protected video support (#10735) Also adds thumbnail extraction Closes #9864 Authored by: ndyanx 2024-09-28 00:05:22 +02:00			`r'\n.(https://[^\x03\x08\x12\n]+\.m3u8)', part, 'transcode url', default=None)`
[ie/Dropbox] Fix extractor (#7926) Closes #7005, Closes #7696 Authored by: nathantouze, bashonly, denhotte 2023-08-28 23:33:48 +02:00			`if not transcode_url:`
			`continue`
Bugfix for b9f2bc2dbed2323734a0d18e65e1e2e23dc833d8 Authored by: bashonly 2023-08-29 15:06:02 +02:00			`formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id, 'mp4')`
[ie/dropbox] Fix password-protected video support (#10735) Also adds thumbnail extraction Closes #9864 Authored by: ndyanx 2024-09-28 00:05:22 +02:00			`thumbnail = self._search_regex(`
			`r'(https://www\.dropbox\.com/temp_thumb_from_token/[\w/?&=]+)', part, 'thumbnail', default=None)`
[ie/Dropbox] Fix extractor (#7926) Closes #7005, Closes #7696 Authored by: nathantouze, bashonly, denhotte 2023-08-28 23:33:48 +02:00			`break`
[Dropbox] Support password protected files and more formats (#2201) Authored by: zenerdi0de 2022-01-02 11:44:10 +01:00
			`# downloads enabled we can get the original file`
[ie/Dropbox] Fix extractor (#7926) Closes #7005, Closes #7696 Authored by: nathantouze, bashonly, denhotte 2023-08-28 23:33:48 +02:00			`if has_anonymous_download:`
			`formats.append({`
			`'url': update_url_query(url, {'dl': '1'}),`
			`'format_id': 'original',`
			`'format_note': 'Original',`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-12 01:09:58 +02:00			`'quality': 1,`
[ie/Dropbox] Fix extractor (#7926) Closes #7005, Closes #7696 Authored by: nathantouze, bashonly, denhotte 2023-08-28 23:33:48 +02:00			`})`
[dropbox] PEP8 and simplify (#2171) 2014-01-19 06:14:24 +01:00
			`return {`
			`'id': video_id,`
			`'title': title,`
[Dropbox] Support password protected files and more formats (#2201) Authored by: zenerdi0de 2022-01-02 11:44:10 +01:00			`'formats': formats,`
[cleanup] Add more ruff rules (#10149) Authored by: seproDev Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com> Reviewed-by: Simon Sawicki <contact@grub4k.xyz> 2024-06-12 01:09:58 +02:00			`'subtitles': subtitles,`
[ie/dropbox] Fix password-protected video support (#10735) Also adds thumbnail extraction Closes #9864 Authored by: ndyanx 2024-09-28 00:05:22 +02:00			`'thumbnail': thumbnail,`
[dropbox] PEP8 and simplify (#2171) 2014-01-19 06:14:24 +01:00			`}`