rxjs: finish scraper and filters

This commit is contained in:
Jasper van Merle 2019-08-16 16:49:12 +02:00
parent 20438856d1
commit 17528d9845
6 changed files with 126 additions and 82 deletions

View file

@ -211,8 +211,7 @@ credits = [
'2017 Cypress.io',
'MIT',
'https://raw.githubusercontent.com/cypress-io/cypress-documentation/develop/LICENSE.md'
],
[
], [
'D',
'1999-2018 The D Language Foundation',
'Boost',
@ -572,8 +571,7 @@ credits = [
'2016-2018, The Pony Developers & 2014-2015, Causality Ltd.',
'BSD',
'https://raw.githubusercontent.com/ponylang/ponyc/master/LICENSE'
],
[
], [
'PostgreSQL',
'1996-2018 The PostgreSQL Global Development Group<br>&copy; 1994 The Regents of the University of California',
'PostgreSQL',
@ -648,13 +646,17 @@ credits = [
'2010 The Rust Project Developers',
'MIT',
'https://raw.githubusercontent.com/rust-lang/rust/master/LICENSE-MIT'
], [
'RxJS',
'2015-2018 Google, Inc., Netflix, Inc., Microsoft Corp. and contributors',
'Apache',
'https://raw.githubusercontent.com/ReactiveX/rxjs/master/LICENSE.txt'
], [
'Salt Stack',
'2019 SaltStack',
'Apache',
'https://raw.githubusercontent.com/saltstack/salt/develop/LICENSE'
],
[
], [
'Sass',
'2006-2016 Hampton Catlin, Nathan Weizenbaum, and Chris Eppstein',
'MIT',
@ -664,8 +666,7 @@ credits = [
'2002-2019 EPFL, with contributions from Lightbend',
'Apache',
'https://raw.githubusercontent.com/scala/scala-lang/master/license.md'
],
[
], [
'scikit-image',
'2011 the scikit-image team',
'BSD',
@ -765,8 +766,7 @@ credits = [
'2003-2019 WordPress Foundation',
'GPLv2+',
'https://wordpress.org/about/license/'
],
[
], [
'Yarn',
'2016-present Yarn Contributors',
'BSD',

View file

@ -7,6 +7,11 @@ module Docs
at_css('h1').content = 'RxJS Documentation'
end
if at_css('h1').nil?
title = subpath.rpartition('/').last.titleize
doc.prepend_child("<h1>#{title}</h1>")
end
css('br', 'hr', '.material-icons', '.header-link', '.breadcrumb').remove
css('.content', 'article', '.api-header', 'section', '.instance-member').each do |node|
@ -65,6 +70,16 @@ module Docs
if node['class'] && node['class'].include?('api-heading')
node.name = 'h3'
unless node.ancestors('.instance-method').empty?
matches = node.inner_html.scan(/([^(& ]+)[(&]/)
unless matches.empty? || matches[0][0] == 'constructor'
node['name'] = matches[0][0]
node['id'] = node['name'].downcase + '-'
end
end
node.inner_html = "<code>#{node.inner_html}</code>"
end
@ -77,25 +92,48 @@ module Docs
node.remove_attribute('class')
end
css('td > .overloads').each do |node|
node.replace node.at_css('.detail-contents')
end
css('td.short-description p').each do |node|
signature = node.parent.parent.next_element.at_css('h3[id]')
signature.after(node) unless signature.nil?
end
css('.method-table').each do |node|
node.replace node.at_css('tbody')
end
css('.api-body > table > caption').each do |node|
node.name = 'center'
lift_out_of_table node
end
css('.api-body > table > tbody > tr:not([class]) > td > *').each do |node|
lift_out_of_table node
end
css('.api-body > table').each do |node|
node.remove if node.content.strip.blank?
end
css('h1[class]').remove_attr('class')
css('table[class]').remove_attr('class')
css('table[width]').remove_attr('width')
css('tr[style]').remove_attr('style')
if at_css('.api-type-label.module')
at_css('h1').content = subpath.remove('api/')
end
css('th h3').each do |node|
node.name = 'span'
end
css('code code').each do |node|
node.before(node.children).remove
end
doc
end
def lift_out_of_table(node)
table = node.ancestors('table').first
table.previous_element.after(node)
end
end
end
end

View file

@ -2,22 +2,28 @@ module Docs
class Rxjs
class EntriesFilter < Docs::EntriesFilter
def get_name
name = at_css('h1').content
title = at_css('h1')
name = title.nil? ? subpath.rpartition('/').last.titleize : title.content
name.prepend "#{$1}. " if subpath =~ /\-pt(\d+)/
name += '()' unless at_css('.api-type-label.function').nil?
name
end
def get_type
if slug.start_with?('guide')
'Guide'
elsif at_css('.api-type-label.module')
name.split('/').first
elsif slug.start_with?('api/')
slug.split('/').second
else
'Miscellaneous'
end
end
def additional_entries
css('h3[id]').map do |node|
["#{name}.#{node['name']}()", node['id']]
end
end
end
end
end

View file

@ -4,11 +4,26 @@ module Docs
class Rxjs < UrlScraper
self.name = 'RxJS'
self.type = 'rxjs'
self.release = '6.5.2'
self.base_url = 'https://rxjs.dev/'
self.root_path = 'guide/overview'
self.links = {
home: 'https://rxjs.dev/',
code: 'https://github.com/ReactiveX/rxjs'
}
html_filters.push 'rxjs/clean_html', 'rxjs/entries'
options[:follow_links] = false
options[:only_patterns] = [/guide\//, /api\//]
options[:skip_patterns] = [/api\/([^\/]+)\.json/]
options[:fix_urls_before_parse] = ->(url) do
url.sub! %r{\Aguide/}, '/guide/'
url.sub! %r{\Aapi/}, '/api/'
url.sub! %r{\Agenerated/}, '/generated/'
url
end
options[:max_image_size] = 256_000
options[:attribution] = <<-HTML
@ -16,70 +31,55 @@ module Docs
Code licensed under an Apache-2.0 License. Documentation licensed under CC BY 4.0.
HTML
module Common
private
def initial_urls
initial_urls = []
Request.run "#{self.class.base_url}generated/navigation.json" do |response|
data = JSON.parse(response.body)
dig = ->(entry) do
initial_urls << url_for("generated/docs/#{entry['url']}.json") if entry['url'] && entry['url'] != 'api'
entry['children'].each(&dig) if entry['children']
end
data['SideNav'].each(&dig)
end
Request.run "#{self.class.base_url}generated/docs/api/api-list.json" do |response|
data = JSON.parse(response.body)
dig = ->(entry) do
initial_urls << url_for("generated/docs/#{entry['path']}.json") if entry['path']
initial_urls << url_for("generated/docs/api/#{entry['name']}.json") if entry['name'] && !entry['path']
entry['items'].each(&dig) if entry['items']
end
data.each(&dig)
end
initial_urls
end
def handle_response(response)
if response.mime_type.include?('json')
begin
response.options[:response_body] = JSON.parse(response.body)['contents']
rescue JSON::ParserError
response.options[:response_body] = ''
end
response.headers['Content-Type'] = 'text/html'
response.url.path = response.url.path.sub('/generated/docs/', '/').remove('.json')
response.effective_url.path = response.effective_url.path.sub('/generated/docs/', '/').remove('.json')
end
super
end
end
version do
self.release = '6.3.3'
self.base_url = 'https://rxjs.dev/'
self.root_path = 'guide/overview'
html_filters.push 'rxjs/clean_html', 'rxjs/entries'
options[:follow_links] = false
options[:only_patterns] = [/\Aguide/, /\Aapi/]
options[:fix_urls_before_parse] = ->(url) do
url.sub! %r{\Aguide/}, '/guide/'
url.sub! %r{\Aapi/}, '/api/'
url.sub! %r{\Agenerated/}, '/generated/'
url
end
include Docs::Rxjs::Common
def get_latest_version(opts)
json = fetch_json('https://rxjs.dev/generated/navigation.json', opts)
json['__versionInfo']['raw']
end
private
def initial_urls
initial_urls = []
Request.run "#{self.class.base_url}generated/navigation.json" do |response|
data = JSON.parse(response.body)
dig = ->(entry) do
initial_urls << url_for("generated/docs/#{entry['url']}.json") if entry['url'] && entry['url'] != 'api'
entry['children'].each(&dig) if entry['children']
end
data['SideNav'].each(&dig)
end
Request.run "#{self.class.base_url}generated/docs/api/api-list.json" do |response|
data = JSON.parse(response.body)
dig = ->(entry) do
initial_urls << url_for("generated/docs/#{entry['path']}.json") if entry['path']
initial_urls << url_for("generated/docs/api/#{entry['name']}.json") if entry['name'] && !entry['path']
entry['items'].each(&dig) if entry['items']
end
data.each(&dig)
end
initial_urls.select do |url|
options[:only_patterns].any? { |pattern| url =~ pattern } &&
options[:skip_patterns].none? { |pattern| url =~ pattern }
end
end
def handle_response(response)
if response.mime_type.include?('json')
begin
response.options[:response_body] = JSON.parse(response.body)['contents']
rescue JSON::ParserError
response.options[:response_body] = ''
end
response.headers['Content-Type'] = 'text/html'
response.url.path = response.url.path.sub('/generated/docs/', '/').remove('.json')
response.effective_url.path = response.effective_url.path.sub('/generated/docs/', '/').remove('.json')
end
super
end
def parse(response)
response.body.gsub! '<code-example', '<pre'
response.body.gsub! '</code-example', '</pre'

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.2 KiB

After

Width:  |  Height:  |  Size: 1.5 KiB

View file

@ -1 +1 @@
http://reactivex.io/
https://github.com/ReactiveX/reactivex.github.io/blob/develop/favicon.ico