diff --git a/lib/docs/core/scrapers/url_scraper.rb b/lib/docs/core/scrapers/url_scraper.rb index 97bcb6f4..46eba810 100644 --- a/lib/docs/core/scrapers/url_scraper.rb +++ b/lib/docs/core/scrapers/url_scraper.rb @@ -2,14 +2,17 @@ module Docs class UrlScraper < Scraper class << self attr_accessor :params + attr_accessor :headers def inherited(subclass) super subclass.params = params.deep_dup + subclass.headers = headers.deep_dup end end self.params = {} + self.headers = { 'User-Agent' => 'devdocs.io' } private @@ -22,7 +25,7 @@ module Docs end def request_options - { params: self.class.params } + { params: self.class.params, headers: self.class.headers } end def process_response?(response) diff --git a/lib/docs/filters/dojo/clean_html.rb b/lib/docs/filters/dojo/clean_html.rb index 2e6e0111..0cf2c443 100644 --- a/lib/docs/filters/dojo/clean_html.rb +++ b/lib/docs/filters/dojo/clean_html.rb @@ -2,8 +2,15 @@ module Docs class Dojo class CleanHtmlFilter < Filter def call - # TODO: Probably needs a little more cleanup but should do for the moment css('script').remove + + css('.version').remove + + #Remove links which are broken on the methods + doc.css(".functionIcon a").each do |a| + a.replace a.content + end + doc end end diff --git a/lib/docs/filters/dojo/entries.rb b/lib/docs/filters/dojo/entries.rb index 1c50799d..b02ea0ef 100644 --- a/lib/docs/filters/dojo/entries.rb +++ b/lib/docs/filters/dojo/entries.rb @@ -8,7 +8,6 @@ module Docs def get_type name end - # TODO:Figure out how to solve the internal links issue later end end end \ No newline at end of file diff --git a/lib/docs/scrapers/dojo.rb b/lib/docs/scrapers/dojo.rb index b2af9962..207a0ff7 100644 --- a/lib/docs/scrapers/dojo.rb +++ b/lib/docs/scrapers/dojo.rb @@ -4,7 +4,13 @@ module Docs self.slug = 'dojo' self.type = 'dojo' self.version = '1.10' - self.base_url = 'http://dojotoolkit.org/api/1.10/' #tree.json + self.base_url = 'http://dojotoolkit.org/api/1.10/' + + # This is a cut down list of the actually paths taken from the tree.json api on the dojo site + # Dojo used javascript and xhr requests to allow users to browse it's documentation so it can't + # be scrapped by just following links from the base page. This list was generating with a little + # bash and then cut down in order to remove a lot of the more unused documentation e.g. kernel, + # main, dnd and some others self.initial_paths = %w( dojo/AdapterRegistry dojo/aspect @@ -17,65 +23,18 @@ module Docs dojo/_base/config.modulePaths dojo/_base/connect dojo/_base/declare - dojo/_base/declare.__DeclareCreatedObject dojo/_base/Deferred dojo/_base/event dojo/_base/fx dojo/_base/html dojo/_base/json dojo/_base/kernel - dojo/_base/kernel.back - dojo/_base/kernel.cldr - dojo/_base/kernel.colors - dojo/_base/kernel.config - dojo/_base/kernel.contentHandlers - dojo/_base/kernel._contentHandlers - dojo/_base/kernel.currency - dojo/_base/kernel.data - dojo/_base/kernel.date - dojo/_base/kernel.dijit - dojo/_base/kernel.dnd - dojo/_base/kernel.doc - dojo/_base/kernel.dojox - dojo/_base/kernel.fx - dojo/_base/kernel.gears - dojo/_base/kernel.global - dojo/_base/kernel._hasResource - dojo/_base/kernel.html - dojo/_base/kernel.i18n - dojo/_base/kernel.io - dojo/_base/kernel.__IoArgs - dojo/_base/kernel.__IoCallbackArgs - dojo/_base/kernel.__IoPublish - dojo/_base/kernel.keys - dojo/_base/kernel.mouseButtons - dojo/_base/kernel._nodeDataCache - dojo/_base/kernel.number - dojo/_base/kernel.regexp - dojo/_base/kernel.rpc - dojo/_base/kernel.scopeMap - dojo/_base/kernel.Stateful - dojo/_base/kernel.store - dojo/_base/kernel.string - dojo/_base/kernel.tests - dojo/_base/kernel.touch - dojo/_base/kernel.version - dojo/_base/kernel.window - dojo/_base/kernel.__XhrArgs dojo/_base/lang dojo/_base/loader dojo/_base/NodeList dojo/_base/query dojo/_base/sniff dojo/_base/unload - dojo/_base/url - dojo/_base/url.authority - dojo/_base/url.fragment - dojo/_base/url.password - dojo/_base/url.port - dojo/_base/url.query - dojo/_base/url.scheme - dojo/_base/url.user dojo/_base/window dojo/_base/window.doc dojo/_base/window.global @@ -88,8 +47,6 @@ module Docs dojo/colors dojo/cookie dojo/currency - dojo/currency.__FormatOptions - dojo/currency.__ParseOptions dojo/data/api/Identity dojo/data/api/Item dojo/data/api/Notification @@ -104,33 +61,10 @@ module Docs dojo/data/util/sorter dojo/date dojo/date/locale - dojo/date/locale.__FormatOptions dojo/date/stamp dojo/debounce dojo/Deferred dojo/DeferredList - dojo/dnd/autoscroll - dojo/dnd/autoscroll._validNodes - dojo/dnd/autoscroll._validOverflow - dojo/dnd/AutoSource - dojo/dnd/Avatar - dojo/dnd/common - dojo/dnd/common._defaultCreatorNodes - dojo/dnd/common._empty - dojo/dnd/Container - dojo/dnd/Container.__ContainerArgs - dojo/dnd/Manager - dojo/dnd/move - dojo/dnd/Moveable - dojo/dnd/Moveable.__MoveableArgs - dojo/dnd/move.boxConstrainedMoveable - dojo/dnd/move.constrainedMoveable - dojo/dnd/move.parentConstrainedMoveable - dojo/dnd/Mover - dojo/dnd/Selector - dojo/dnd/Source - dojo/dnd/Target - dojo/dnd/TimedMoveable dojo/dom dojo/dom-attr dojo/dom-class @@ -146,7 +80,6 @@ module Docs dojo/errors/RequestError dojo/errors/RequestTimeoutError dojo/Evented - dojo/_firebug/firebug dojo/fx dojo/fx/easing dojo/fx.easing @@ -168,44 +101,6 @@ module Docs dojo/keys dojo/loadInit dojo/main - dojo/main.back - dojo/main.cldr - dojo/main.colors - dojo/main.config - dojo/main.contentHandlers - dojo/main._contentHandlers - dojo/main.currency - dojo/main.data - dojo/main.date - dojo/main.dijit - dojo/main.dnd - dojo/main.doc - dojo/main.dojox - dojo/main.fx - dojo/main.gears - dojo/main.global - dojo/main._hasResource - dojo/main.html - dojo/main.i18n - dojo/main.io - dojo/main.__IoArgs - dojo/main.__IoCallbackArgs - dojo/main.__IoPublish - dojo/main.keys - dojo/main.mouseButtons - dojo/main._nodeDataCache - dojo/main.number - dojo/main.regexp - dojo/main.rpc - dojo/main.scopeMap - dojo/main.Stateful - dojo/main.store - dojo/main.string - dojo/main.tests - dojo/main.touch - dojo/main.version - dojo/main.window - dojo/main.__XhrArgs dojo/mouse dojo/node dojo/NodeList @@ -217,12 +112,6 @@ module Docs dojo/NodeList._nodeDataCache dojo/NodeList-traverse dojo/number - dojo/number.__FormatAbsoluteOptions - dojo/number.__FormatOptions - dojo/number.__IntegerRegexpFlags - dojo/number.__ParseOptions - dojo/number.__RealNumberRegexpFlags - dojo/number.__RegexpOptions dojo/on dojo/on/asyncEventListener dojo/on/debounce @@ -237,32 +126,16 @@ module Docs dojo/ready dojo/regexp dojo/request - dojo/request.__BaseOptions dojo/request/default dojo/request/handlers dojo/request/iframe - dojo/request/iframe.__BaseOptions - dojo/request/iframe.__MethodOptions - dojo/request/iframe.__Options - dojo/request.__MethodOptions dojo/request/node - dojo/request/node.__BaseOptions - dojo/request/node.__MethodOptions - dojo/request/node.__Options dojo/request/notify - dojo/request.__Options - dojo/request.__Promise dojo/request/registry dojo/request/script - dojo/request/script.__BaseOptions - dojo/request/script.__MethodOptions - dojo/request/script.__Options dojo/request/util dojo/request/watch dojo/request/xhr - dojo/request/xhr.__BaseOptions - dojo/request/xhr.__MethodOptions - dojo/request/xhr.__Options dojo/require dojo/robot dojo/robot._runsemaphore @@ -299,6 +172,11 @@ module Docs dojo/uacss dojo/when dojo/window) + # Add the rest of the url to the path + self.initial_paths = self.initial_paths.map { |l| l + ".html?xhr=true" } + # Dojo expects all the requests to be xhrs or it redirects you back to the docs home page + # where it uses js to call the backend based on the URL so you get the appropriate documentation + self.headers = { 'User-Agent' => 'devdocs.io' , 'X-Requested-With' => 'XMLHttpRequest' } self.links = { home: 'http://dojotoolkit.org', code: 'https://github.com/dojo/dojo' @@ -306,7 +184,11 @@ module Docs html_filters.push 'dojo/clean_html', 'dojo/entries' + # Don't use default selector on xhrs as no body or html document exists + options[:container] = ->(filter) { filter.root_page? ? '#content' : false } + options[:follow_links] = false options[:skip_links] = true + options[:only] = self.initial_paths options[:attribution] = <<-HTML The Dojo Toolkit is Copyright © 2005–2013
diff --git a/public/icons/docs/dojo/16.png b/public/icons/docs/dojo/16.png new file mode 100644 index 00000000..34695164 Binary files /dev/null and b/public/icons/docs/dojo/16.png differ diff --git a/public/icons/docs/dojo/16@2x.png b/public/icons/docs/dojo/16@2x.png new file mode 100644 index 00000000..5d5ad491 Binary files /dev/null and b/public/icons/docs/dojo/16@2x.png differ diff --git a/test/lib/docs/core/scrapers/url_scraper_test.rb b/test/lib/docs/core/scrapers/url_scraper_test.rb index 35867880..da59c22c 100644 --- a/test/lib/docs/core/scrapers/url_scraper_test.rb +++ b/test/lib/docs/core/scrapers/url_scraper_test.rb @@ -58,12 +58,24 @@ class DocsUrlScraperTest < MiniTest::Spec result end + it "runs a Requester with .headers as :request_options" do + stub(Scraper).headers { { testheader: true } } + mock(Docs::Requester).run anything, satisfy { |options| options[:request_options][:headers][:testheader] } + result + end + + it "runs a Requester with default .headers as :request_options" do + mock(Docs::Requester).run anything, satisfy { |options| options[:request_options][:headers]["User-Agent"] } + result + end + it "runs a Requester with .params as :request_options" do stub(Scraper).params { { test: true } } mock(Docs::Requester).run anything, satisfy { |options| options[:request_options][:params][:test] } result end + it "runs a Requester with the given block" do stub(Docs::Requester).run { |*args| @block = args.last } result