mirror of
https://github.com/yt-dlp/yt-dlp
synced 2025-01-13 20:01:57 +01:00
[ie/patreon] Fix comments extraction (#11530)
Closes #11483 Authored by: jshumphrey, bashonly Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
This commit is contained in:
parent
720b3dc453
commit
1d253b0a27
1 changed files with 35 additions and 16 deletions
|
@ -16,10 +16,10 @@ from ..utils import (
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
traverse_obj,
|
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
|
from ..utils.traversal import traverse_obj, value
|
||||||
|
|
||||||
|
|
||||||
class PatreonBaseIE(InfoExtractor):
|
class PatreonBaseIE(InfoExtractor):
|
||||||
|
@ -252,6 +252,27 @@ class PatreonIE(PatreonBaseIE):
|
||||||
'thumbnail': r're:^https?://.+',
|
'thumbnail': r're:^https?://.+',
|
||||||
},
|
},
|
||||||
'skip': 'Patron-only content',
|
'skip': 'Patron-only content',
|
||||||
|
}, {
|
||||||
|
# Contains a comment reply in the 'included' section
|
||||||
|
'url': 'https://www.patreon.com/posts/114721679',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '114721679',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20241025',
|
||||||
|
'uploader': 'Japanalysis',
|
||||||
|
'like_count': int,
|
||||||
|
'thumbnail': r're:^https?://.+',
|
||||||
|
'comment_count': int,
|
||||||
|
'title': 'Karasawa Part 2',
|
||||||
|
'description': 'Part 2 of this video https://www.youtube.com/watch?v=Azms2-VTASk',
|
||||||
|
'uploader_url': 'https://www.patreon.com/japanalysis',
|
||||||
|
'uploader_id': '80504268',
|
||||||
|
'channel_url': 'https://www.patreon.com/japanalysis',
|
||||||
|
'channel_follower_count': int,
|
||||||
|
'timestamp': 1729897015,
|
||||||
|
'channel_id': '9346307',
|
||||||
|
},
|
||||||
|
'params': {'getcomments': True},
|
||||||
}]
|
}]
|
||||||
_RETURN_TYPE = 'video'
|
_RETURN_TYPE = 'video'
|
||||||
|
|
||||||
|
@ -404,26 +425,24 @@ class PatreonIE(PatreonBaseIE):
|
||||||
f'posts/{post_id}/comments', post_id, query=params, note=f'Downloading comments page {page}')
|
f'posts/{post_id}/comments', post_id, query=params, note=f'Downloading comments page {page}')
|
||||||
|
|
||||||
cursor = None
|
cursor = None
|
||||||
for comment in traverse_obj(response, (('data', ('included', lambda _, v: v['type'] == 'comment')), ...)):
|
for comment in traverse_obj(response, (('data', 'included'), lambda _, v: v['type'] == 'comment' and v['id'])):
|
||||||
count += 1
|
count += 1
|
||||||
comment_id = comment.get('id')
|
|
||||||
attributes = comment.get('attributes') or {}
|
|
||||||
if comment_id is None:
|
|
||||||
continue
|
|
||||||
author_id = traverse_obj(comment, ('relationships', 'commenter', 'data', 'id'))
|
author_id = traverse_obj(comment, ('relationships', 'commenter', 'data', 'id'))
|
||||||
author_info = traverse_obj(
|
|
||||||
response, ('included', lambda _, v: v['id'] == author_id and v['type'] == 'user', 'attributes'),
|
|
||||||
get_all=False, expected_type=dict, default={})
|
|
||||||
|
|
||||||
yield {
|
yield {
|
||||||
'id': comment_id,
|
**traverse_obj(comment, {
|
||||||
'text': attributes.get('body'),
|
'id': ('id', {str_or_none}),
|
||||||
'timestamp': parse_iso8601(attributes.get('created')),
|
'text': ('attributes', 'body', {str}),
|
||||||
'parent': traverse_obj(comment, ('relationships', 'parent', 'data', 'id'), default='root'),
|
'timestamp': ('attributes', 'created', {parse_iso8601}),
|
||||||
'author_is_uploader': attributes.get('is_by_creator'),
|
'parent': ('relationships', 'parent', 'data', ('id', {value('root')}), {str}, any),
|
||||||
|
'author_is_uploader': ('attributes', 'is_by_creator', {bool}),
|
||||||
|
}),
|
||||||
|
**traverse_obj(response, (
|
||||||
|
'included', lambda _, v: v['id'] == author_id and v['type'] == 'user', 'attributes', {
|
||||||
|
'author': ('full_name', {str}),
|
||||||
|
'author_thumbnail': ('image_url', {url_or_none}),
|
||||||
|
}), get_all=False),
|
||||||
'author_id': author_id,
|
'author_id': author_id,
|
||||||
'author': author_info.get('full_name'),
|
|
||||||
'author_thumbnail': author_info.get('image_url'),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if count < traverse_obj(response, ('meta', 'count')):
|
if count < traverse_obj(response, ('meta', 'count')):
|
||||||
|
|
Loading…
Reference in a new issue