[pornhub] Extract cast

Closes #406, https://github.com/ytdl-org/youtube-dl/pull/27384
This commit is contained in:
pukkandan 2021-06-13 21:36:47 +05:30
parent 3fd4c2a543
commit d0fb4bd16f
No known key found for this signature in database
GPG key ID: 0F00D95A001F4698
2 changed files with 5 additions and 1 deletions

View file

@ -290,6 +290,7 @@ class InfoExtractor(object):
categories: A list of categories that the video falls in, for example
["Sports", "Berlin"]
tags: A list of tags assigned to the video, e.g. ["sweden", "pop music"]
cast: A list of the video cast
is_live: True, False, or None (=unknown). Whether this video is a
live stream that goes on instead of a fixed-length video.
was_live: True, False, or None (=unknown). Whether this video was

View file

@ -14,6 +14,7 @@ from ..compat import (
)
from .openload import PhantomJSwrapper
from ..utils import (
clean_html,
determine_ext,
ExtractorError,
int_or_none,
@ -145,6 +146,7 @@ class PornHubIE(PornHubBaseIE):
'age_limit': 18,
'tags': list,
'categories': list,
'cast': list,
},
}, {
# non-ASCII title
@ -464,7 +466,7 @@ class PornHubIE(PornHubBaseIE):
r'(?s)<div[^>]+\bclass=["\'].*?\b%sWrapper[^>]*>(.+?)</div>'
% meta_key, webpage, meta_key, default=None)
if div:
return re.findall(r'<a[^>]+\bhref=[^>]+>([^<]+)', div)
return [clean_html(x).strip() for x in re.findall(r'(?s)<a[^>]+\bhref=[^>]+>.+?</a>', div)]
info = self._search_json_ld(webpage, video_id, default={})
# description provided in JSON-LD is irrelevant
@ -485,6 +487,7 @@ class PornHubIE(PornHubBaseIE):
'age_limit': 18,
'tags': extract_list('tags'),
'categories': extract_list('categories'),
'cast': extract_list('pornstars'),
'subtitles': subtitles,
}, info)