Add Django Rest Framework scrapper

This commit is contained in:
Emil Maruszczak 2019-04-29 01:02:25 +02:00
parent b45090f369
commit 14623be628
6 changed files with 134 additions and 0 deletions

View file

@ -0,0 +1,36 @@
module Docs
class RestFramework
class CleanHtmlFilter < Docs::Filter
def call
css('hr').remove
css('.badges').each do |node|
node.remove
end
css('pre').each do |node|
node['data-language'] = 'python'
end
css('h1').each do |node|
node['style'] = nil
end
# Translate source files links to DevDocs links
links = Nokogiri::XML::Node.new('p', doc)
links['class'] = '_links'
css('a.github').each do |node|
span = node.at_css('span')
node.content = span.content
span.remove
node['class'] = '_links-link'
links.add_child(node)
end
doc.add_child(links)
doc
end
end
end
end

View file

@ -0,0 +1,60 @@
module Docs
class RestFramework
class EntriesFilter < Docs::EntriesFilter
def get_name
name = css('h1').first.content
name.slice! 'Tutorial '
name = '0: ' + name if name.include? 'Quickstart'
name
end
def get_type
case subpath
when /\Atutorial/
'Tutorial'
when /\Aapi-guide/
'API Guide'
end
end
def additional_entries
return [] if type == nil || type == 'Tutorial'
# Framework classes are provided in two different ways:
# - as H2's after H1 category titled:
accepted_headers = ['API Reference', 'API Guide']
# - as headers (1 or 2) with these endings:
endings = ['Validator', 'Field', 'View', 'Mixin', 'Default', 'Serializer']
# To avoid writing down all the endings
# and to ensure all entries in API categories are matched
# two different ways of finding them are used
entries = []
local_type = 'Ref: ' + name
in_category = false
css('h1, h2').each do |node|
# Third party category contains entries that could be matched (and shouldn't be)
break if node.content === 'Third party packages'
if in_category
if node.name === 'h1'
in_category = false
next
end
entries << [node.content, node['id'], local_type]
elsif accepted_headers.include? node.content
in_category = true
elsif endings.any? { |word| node.content.ends_with?(word) }
entries << [node.content, node['id'], local_type]
end
end
entries
end
end
end
end

View file

@ -0,0 +1,37 @@
module Docs
class RestFramework < UrlScraper
self.name = 'Django REST Framework'
self.release = '3.9.2'
self.slug = 'rest_framework'
self.type = 'mkdocs'
self.base_url = 'https://www.django-rest-framework.org/'
self.root_path = 'index.html'
self.links = {
home: 'https://www.django-rest-framework.org/',
code: 'https://github.com/encode/django-rest-framework'
}
html_filters.push 'mkdocs/clean_html', 'rest_framework/clean_html', 'rest_framework/entries'
options[:skip_patterns] = [
/\Atopics\//,
/\Acommunity\//,
]
options[:attribution] = <<-HTML
Copyright 2011&ndash;present Encode OSS Ltd<br>
Licensed under the BSD License.
HTML
private
def handle_response(response)
# Some scrapped urls don't have ending slash
# which leads to page duplication
if !response.url.path.ends_with?('/') && !response.url.path.ends_with?('index.html')
response.url.path << '/'
end
super
end
end
end

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.2 KiB

View file

@ -0,0 +1 @@
https://github.com/encode/django-rest-framework/blob/master/docs_theme/img/favicon.ico