Improve MDN/JavaScript scraper

This commit is contained in:
Thibaut Courouble 2016-05-01 11:47:40 -04:00
parent e2b7004078
commit 9e1b9ca2a9
5 changed files with 28 additions and 7 deletions

View file

@ -54,7 +54,8 @@ module Docs
def build_entry(name, frag = nil, type = nil)
type ||= self.type
Entry.new name, frag ? "#{path}##{frag}" : path, type
path = frag ? (frag.include?('#') ? frag : "#{self.path}##{frag}") : self.path
Entry.new(name, path, type)
end
end
end

View file

@ -12,12 +12,6 @@ module Docs
div = at_css '#Global_Objects + div'
div.css('h3').each { |node| node.name = 'h2' }
at_css('#Global_Objects').replace(div.children)
# Remove heading links
css('h2 > a').each do |node|
node.before(node.content)
node.remove
end
end
def other

View file

@ -71,6 +71,22 @@ module Docs
end
end
def additional_entries
return [] unless root_page?
entries = []
%w(arithmetic assignment bitwise comparison logical).each do |s|
css("a[href^='operators/#{s}_operators#']").each do |node|
name = node.content.strip
name.remove! %r{[a-zA-Z]}
name.strip!
entries << [name, node['href'], 'Operators']
end
end
entries.uniq
end
def include_default_entry?
node = doc.at_css '.overheadIndicator, .warning'

View file

@ -25,6 +25,11 @@ module Docs
css('h2[style]', 'pre[style]', 'th[style]', 'div[style*="line-height"]').remove_attr('style')
css('h2 > a[name]', 'h3 > a[name]').each do |node|
node.parent['id'] = node['name']
node.before(node.content).remove
end
doc
end
end

View file

@ -69,6 +69,11 @@ class EntriesFilterTest < MiniTest::Spec
assert_equal 'path#frag', entries.last.path
end
it "has a path with the given path" do
stub(filter).additional_entries { [['test', 'custom_path#frag']] }
assert_equal 'custom_path#frag', entries.last.path
end
it "has the given type" do
stub(filter).additional_entries { [['test', nil, 'test']] }
assert_equal 'test', entries.last.type