Merge pull request #2082 from thewheat/allow-python-empty-spans

Allow Python scraper to keep empty spans with ids
This commit is contained in:
Simon Legner 2024-01-05 17:58:34 +01:00 committed by GitHub
commit 783e5dc502
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 13 additions and 5 deletions

View file

@ -36,9 +36,11 @@ module Docs
node.replace(pre)
end
css('span[id]:empty').each do |node|
(node.next_element || node.previous_element)['id'] ||= node['id'] if node.next_element || node.previous_element
node.remove
unless context[:sphinx_keep_empty_ids]
css('span[id]:empty').each do |node|
(node.next_element || node.previous_element)['id'] ||= node['id'] if node.next_element || node.previous_element
node.remove
end
end
css('.section').each do |node|

View file

@ -7,6 +7,12 @@ module Docs
code: 'https://github.com/python/cpython'
}
# bypass the clean_text filter as it removes empty span with ids
options[:clean_text] = false
# bypass sphinx modifying empty ids
options[:sphinx_keep_empty_ids] = true
options[:skip_patterns] = [/whatsnew/]
options[:skip] = %w(
library/2to3.html
@ -22,14 +28,14 @@ module Docs
HTML
version '3.12' do
self.release = '3.12.0'
self.release = '3.12.1'
self.base_url = "https://docs.python.org/#{self.version}/"
html_filters.push 'python/entries_v3', 'sphinx/clean_html', 'python/clean_html'
end
version '3.11' do
self.release = '3.11.5'
self.release = '3.11.7'
self.base_url = "https://docs.python.org/#{self.version}/"
html_filters.push 'python/entries_v3', 'sphinx/clean_html', 'python/clean_html'