[spotify] Detect iframe embeds (#3430)

Authored by: fstirlitz
This commit is contained in:
Felix S 2022-04-14 13:22:47 +00:00 committed by GitHub
parent cda1bc5197
commit a49e777d59
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 18 additions and 3 deletions

View file

@ -67,6 +67,7 @@ from .simplecast import SimplecastIE
from .soundcloud import SoundcloudEmbedIE from .soundcloud import SoundcloudEmbedIE
from .spankwire import SpankwireIE from .spankwire import SpankwireIE
from .sportbox import SportBoxIE from .sportbox import SportBoxIE
from .spotify import SpotifyBaseIE
from .springboardplatform import SpringboardPlatformIE from .springboardplatform import SpringboardPlatformIE
from .svt import SVTIE from .svt import SVTIE
from .teachable import TeachableIE from .teachable import TeachableIE
@ -3164,6 +3165,11 @@ class GenericIE(InfoExtractor):
if sportbox_urls: if sportbox_urls:
return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie=SportBoxIE.ie_key()) return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie=SportBoxIE.ie_key())
# Look for embedded Spotify player
spotify_urls = SpotifyBaseIE._extract_embed_urls(webpage)
if spotify_urls:
return self.playlist_from_matches(spotify_urls, video_id, video_title)
# Look for embedded XHamster player # Look for embedded XHamster player
xhamster_urls = XHamsterEmbedIE._extract_urls(webpage) xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
if xhamster_urls: if xhamster_urls:

View file

@ -19,7 +19,7 @@ class SpotifyBaseIE(InfoExtractor):
'MinimalShow': '13ee079672fad3f858ea45a55eb109553b4fb0969ed793185b2e34cbb6ee7cc0', 'MinimalShow': '13ee079672fad3f858ea45a55eb109553b4fb0969ed793185b2e34cbb6ee7cc0',
'ShowEpisodes': 'e0e5ce27bd7748d2c59b4d44ba245a8992a05be75d6fabc3b20753fc8857444d', 'ShowEpisodes': 'e0e5ce27bd7748d2c59b4d44ba245a8992a05be75d6fabc3b20753fc8857444d',
} }
_VALID_URL_TEMPL = r'https?://open\.spotify\.com/%s/(?P<id>[^/?&#]+)' _VALID_URL_TEMPL = r'https?://open\.spotify\.com/(?:embed-podcast/|embed/|)%s/(?P<id>[^/?&#]+)'
def _real_initialize(self): def _real_initialize(self):
self._ACCESS_TOKEN = self._download_json( self._ACCESS_TOKEN = self._download_json(
@ -93,11 +93,17 @@ class SpotifyBaseIE(InfoExtractor):
'series': series, 'series': series,
} }
@classmethod
def _extract_embed_urls(cls, webpage):
return re.findall(
r'<iframe[^>]+src="(https?://open\.spotify.com/embed/[^"]+)"',
webpage)
class SpotifyIE(SpotifyBaseIE): class SpotifyIE(SpotifyBaseIE):
IE_NAME = 'spotify' IE_NAME = 'spotify'
_VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'episode' _VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'episode'
_TEST = { _TESTS = [{
'url': 'https://open.spotify.com/episode/4Z7GAJ50bgctf6uclHlWKo', 'url': 'https://open.spotify.com/episode/4Z7GAJ50bgctf6uclHlWKo',
'md5': '74010a1e3fa4d9e1ab3aa7ad14e42d3b', 'md5': '74010a1e3fa4d9e1ab3aa7ad14e42d3b',
'info_dict': { 'info_dict': {
@ -109,7 +115,10 @@ class SpotifyIE(SpotifyBaseIE):
'release_date': '20201217', 'release_date': '20201217',
'series': "The Guardian's Audio Long Reads", 'series': "The Guardian's Audio Long Reads",
} }
} }, {
'url': 'https://open.spotify.com/embed/episode/4TvCsKKs2thXmarHigWvXE?si=7eatS8AbQb6RxqO2raIuWA',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
episode_id = self._match_id(url) episode_id = self._match_id(url)