diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
index 967010826..3aba459fe 100644
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -584,6 +584,10 @@ from .egghead import (
EggheadCourseIE,
EggheadLessonIE,
)
+from .eggs import (
+ EggsIE,
+ EggsArtistIE,
+)
from .eighttracks import EightTracksIE
from .eitb import EitbIE
from .elementorembed import ElementorEmbedIE
diff --git a/yt_dlp/extractor/eggs.py b/yt_dlp/extractor/eggs.py
new file mode 100644
index 000000000..368374bb7
--- /dev/null
+++ b/yt_dlp/extractor/eggs.py
@@ -0,0 +1,177 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ url_or_none,
+ unescapeHTML,
+)
+
+
+class EggsBaseIE(InfoExtractor):
+ def _parse_artist_name(self, webpage):
+ artist = self._search_regex(
+ r'
]+class=(["\'])artist_name\1[^>]*>([^<]+)
',
+ webpage, 'artist name', fatal=False, default=None, group=2
+ )
+ if artist:
+ return artist.strip()
+
+ og_title = self._html_search_meta(['og:title'], webpage, 'og:title', default=None)
+ if og_title:
+ artist_match = re.search(r'(?P[^()]+)(?:\([^)]*\))?のEggsページ', og_title)
+ if artist_match:
+ return artist_match.group('artist').strip()
+
+ return 'Unknown Artist'
+
+ def _parse_single_song(self, url, webpage, default_artist='Unknown Artist'):
+ song_id = self._search_regex(
+ r'/song/(?P[^/?#&]+)',
+ url, 'song id', fatal=False, default=None, group='id'
+ )
+
+ track_title = self._search_regex(
+ r']+class=(["\'])product_name\1[^>]*>\s*
([^<]+)
',
+ webpage, 'track title', fatal=False, default=None, group=2
+ )
+
+ if not track_title:
+ page_title = self._search_regex(
+ r'
(?P[^<]+)',
+ webpage, 'page title', fatal=False, default=None, group='title'
+ )
+ if page_title:
+ inner_match = re.search(r'「(?P[^」]+)」', page_title)
+ if inner_match:
+ track_title = inner_match.group('inner').strip()
+
+ if not track_title:
+ track_title = 'Unknown Title'
+
+ artist = default_artist
+ if not artist or artist == 'Unknown Artist':
+ artist_regex = r']+class=(["\'])artist_name\1[^>]*>\s*]*>([^<]+)'
+ fallback_artist = self._search_regex(
+ artist_regex, webpage, 'artist name',
+ fatal=False, default=None, group=2
+ )
+ if fallback_artist:
+ artist = fallback_artist.strip()
+
+ audio_url = self._search_regex(
+ r']+class=(["\'])[^"\']*player[^"\']*\1[^>]+data-src=(["\'])(?P
[^"\']+)\2',
+ webpage, 'audio url', fatal=True, group='audio_url'
+ )
+ audio_url = url_or_none(unescapeHTML(audio_url))
+ if not audio_url:
+ raise ExtractorError('Invalid audio URL.', expected=True)
+
+ thumbnail = (
+ self._html_search_meta(['og:image'], webpage, 'thumbnail', default=None)
+ or self._search_regex(
+ r']*>\s*]+src=(["\'])(?P[^"\']+)\1',
+ webpage, 'thumbnail', fatal=False, default=None, group='thumb'
+ )
+ )
+
+ return {
+ 'id': song_id,
+ 'url': audio_url,
+ 'title': track_title,
+ 'uploader': artist,
+ 'vcodec': 'none',
+ 'thumbnail': thumbnail,
+ }
+
+ def _parse_artist_page(self, webpage, artist_id, artist_name):
+ song_blocks = re.findall(r'(?s)]+id="songs\d+"[^>]*>.*?', webpage)
+ entries = []
+
+ for block in song_blocks:
+ audio_url = self._search_regex(
+ r'data-src=(["\'])(?Phttps?://.*?\.(?:mp3|m4a).*?)\1',
+ block, 'audio url', fatal=False, default=None, group='url'
+ )
+ audio_url = url_or_none(unescapeHTML(audio_url))
+ if not audio_url:
+ continue
+
+ track_id = self._search_regex(
+ r'data-srcid=(["\'])(?P[^"\'<>]+)\1',
+ block, 'track id', fatal=False, default=None, group='id'
+ )
+ if not track_id:
+ continue
+
+ title = self._search_regex(
+ r'data-srcname=(["\'])(?P[^"\']+)\1',
+ block, 'track title', fatal=False, default=None, group='title'
+ )
+ if not title:
+ title = 'Unknown Title'
+
+ thumbnail = self._search_regex(
+ r']+src=(["\'])(?P[^"\']+)\1',
+ block, 'thumbnail', fatal=False, default=None, group='th'
+ )
+
+ entries.append({
+ 'id': track_id,
+ 'url': audio_url,
+ 'title': title,
+ 'uploader': artist_name,
+ 'vcodec': 'none',
+ 'thumbnail': thumbnail,
+ })
+
+ return entries
+
+class EggsIE(EggsBaseIE):
+ IE_NAME = 'eggs:single'
+ _VALID_URL = (
+ r'https?://(?:www\.)?eggs\.mu/artist/(?P[^/]+)/song/(?P[^/?#&]+)'
+ )
+ _TESTS = [{
+ 'url': 'https://eggs.mu/artist/32_sunny_girl/song/0e95fd1d-4d61-4d5b-8b18-6092c551da90',
+ 'info_dict': {
+ 'id': '0e95fd1d-4d61-4d5b-8b18-6092c551da90',
+ 'ext': 'm4a',
+ 'title': 'シネマと信号',
+ 'uploader': 'Sunny Girl',
+ 'thumbnail': r're:^https?://.*\.jpg(?:\?.*)?$',
+ },
+ }]
+
+ def _real_extract(self, url):
+ mobj = self._match_valid_url(url)
+ song_id = mobj.group('song_id')
+ webpage = self._download_webpage(url, song_id)
+ artist_name = self._parse_artist_name(webpage)
+ info = self._parse_single_song(url, webpage, artist_name)
+ return info
+
+class EggsArtistIE(EggsBaseIE):
+ IE_NAME = 'eggs:artist'
+ _VALID_URL = (
+ r'https?://(?:www\.)?eggs\.mu/artist/(?P[^/?#]+)(?:[/?#].*)?$'
+ )
+ _TESTS = [{
+ 'url': 'https://eggs.mu/artist/32_sunny_girl',
+ 'info_dict': {
+ 'id': '32_sunny_girl',
+ 'title': 'Sunny Girl',
+ },
+ 'playlist_count': 18,
+ }]
+
+ def _real_extract(self, url):
+ artist_id = self._match_valid_url(url).group('artist_id')
+ webpage = self._download_webpage(url, artist_id)
+ artist_name = self._parse_artist_name(webpage)
+ entries = self._parse_artist_page(webpage, artist_id, artist_name)
+ return self.playlist_result(
+ entries,
+ playlist_id=artist_id,
+ playlist_title=artist_name
+ )
\ No newline at end of file
|