From 9e1b9ca2a952ac8870fb6a1fe4b831f670c9dbc0 Mon Sep 17 00:00:00 2001 From: Thibaut Courouble Date: Sun, 1 May 2016 11:47:40 -0400 Subject: [PATCH] Improve MDN/JavaScript scraper --- lib/docs/filters/core/entries.rb | 3 ++- lib/docs/filters/javascript/clean_html.rb | 6 ------ lib/docs/filters/javascript/entries.rb | 16 ++++++++++++++++ lib/docs/filters/mdn/clean_html.rb | 5 +++++ test/lib/docs/filters/core/entries_test.rb | 5 +++++ 5 files changed, 28 insertions(+), 7 deletions(-) diff --git a/lib/docs/filters/core/entries.rb b/lib/docs/filters/core/entries.rb index a86f1ac1..30f7c4e9 100644 --- a/lib/docs/filters/core/entries.rb +++ b/lib/docs/filters/core/entries.rb @@ -54,7 +54,8 @@ module Docs def build_entry(name, frag = nil, type = nil) type ||= self.type - Entry.new name, frag ? "#{path}##{frag}" : path, type + path = frag ? (frag.include?('#') ? frag : "#{self.path}##{frag}") : self.path + Entry.new(name, path, type) end end end diff --git a/lib/docs/filters/javascript/clean_html.rb b/lib/docs/filters/javascript/clean_html.rb index a4f07fc8..f4e97865 100644 --- a/lib/docs/filters/javascript/clean_html.rb +++ b/lib/docs/filters/javascript/clean_html.rb @@ -12,12 +12,6 @@ module Docs div = at_css '#Global_Objects + div' div.css('h3').each { |node| node.name = 'h2' } at_css('#Global_Objects').replace(div.children) - - # Remove heading links - css('h2 > a').each do |node| - node.before(node.content) - node.remove - end end def other diff --git a/lib/docs/filters/javascript/entries.rb b/lib/docs/filters/javascript/entries.rb index ad3197a3..872e33a0 100644 --- a/lib/docs/filters/javascript/entries.rb +++ b/lib/docs/filters/javascript/entries.rb @@ -71,6 +71,22 @@ module Docs end end + def additional_entries + return [] unless root_page? + entries = [] + + %w(arithmetic assignment bitwise comparison logical).each do |s| + css("a[href^='operators/#{s}_operators#']").each do |node| + name = node.content.strip + name.remove! %r{[a-zA-Z]} + name.strip! + entries << [name, node['href'], 'Operators'] + end + end + + entries.uniq + end + def include_default_entry? node = doc.at_css '.overheadIndicator, .warning' diff --git a/lib/docs/filters/mdn/clean_html.rb b/lib/docs/filters/mdn/clean_html.rb index 834955bf..5118ae24 100644 --- a/lib/docs/filters/mdn/clean_html.rb +++ b/lib/docs/filters/mdn/clean_html.rb @@ -25,6 +25,11 @@ module Docs css('h2[style]', 'pre[style]', 'th[style]', 'div[style*="line-height"]').remove_attr('style') + css('h2 > a[name]', 'h3 > a[name]').each do |node| + node.parent['id'] = node['name'] + node.before(node.content).remove + end + doc end end diff --git a/test/lib/docs/filters/core/entries_test.rb b/test/lib/docs/filters/core/entries_test.rb index c5298f4d..0fa843a0 100644 --- a/test/lib/docs/filters/core/entries_test.rb +++ b/test/lib/docs/filters/core/entries_test.rb @@ -69,6 +69,11 @@ class EntriesFilterTest < MiniTest::Spec assert_equal 'path#frag', entries.last.path end + it "has a path with the given path" do + stub(filter).additional_entries { [['test', 'custom_path#frag']] } + assert_equal 'custom_path#frag', entries.last.path + end + it "has the given type" do stub(filter).additional_entries { [['test', nil, 'test']] } assert_equal 'test', entries.last.type