Finish Kotlin scraper

This commit is contained in:
Thibaut Courouble 2016-06-05 11:54:30 -04:00
parent 999a927bd0
commit 78cf26a6a7
17 changed files with 88 additions and 38 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 45 KiB

After

Width:  |  Height:  |  Size: 45 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 118 KiB

After

Width:  |  Height:  |  Size: 118 KiB

View file

@ -7,7 +7,7 @@ class app.collections.Types extends app.Collection
(result[@_groupFor(type)] ||= []).push(type)
result.filter (e) -> e.length > 0
GUIDES_RGX = /(^|[\s\(])(guide|guides|tutorial|reference|playbooks|getting\ started|manual)($|[\s\):])/i
GUIDES_RGX = /(^|[\s\(])(guides?|tutorials?|reference|playbooks|getting\ started|manual)($|[\s\):])/i
_groupFor: (type) ->
if GUIDES_RGX.test(type.name)

View file

@ -1,5 +1,8 @@
[
[
"2016-06-05",
"New documentation: <a href=\"/kotlin/\">Kotlin</a>"
], [
"2016-04-24",
"New documentations: <a href=\"/numpy/\">NumPy</a> and <a href=\"/apache_pig/\">Apache Pig</a>"
], [

View file

@ -250,6 +250,11 @@ credits = [
'Steven Sanderson, the Knockout.js team, and other contributors',
'MIT',
'https://raw.githubusercontent.com/knockout/knockout/master/LICENSE'
], [
'Kotlin',
'2010-2016 JetBrains s.r.o.',
'Apache',
'https://raw.githubusercontent.com/JetBrains/kotlin-web-site/master/LICENSE'
], [
'Laravel',
'Taylor Otwell',

View file

@ -11,6 +11,7 @@ app.views.CakephpPage =
app.views.EmberPage =
app.views.ExpressPage =
app.views.GoPage =
app.views.KotlinPage =
app.views.LaravelPage =
app.views.LodashPage =
app.views.MarionettePage =

View file

@ -50,6 +50,7 @@
'pages/haskell',
'pages/jquery',
'pages/knockout',
'pages/kotlin',
'pages/laravel',
'pages/lua',
'pages/mdn',

View file

@ -50,6 +50,7 @@
'pages/haskell',
'pages/jquery',
'pages/knockout',
'pages/kotlin',
'pages/laravel',
'pages/lua',
'pages/mdn',

View file

@ -155,6 +155,7 @@ td {
margin-top: 0;
margin-bottom: 0;
}
> pre:first-child, > p:first-child, > ul:first-child, > ol:first-child { margin-top: 0; }
> pre:last-child, > p:last-child, > ul:last-child, > ol:last-child { margin-bottom: 0; }
}

View file

@ -139,3 +139,4 @@
._icon-perl:before { background-position: -3rem -11rem; }
._icon-apache_pig:before { background-position: -4rem -11rem; }
._icon-numpy:before { background-position: -5rem -11rem; }
._icon-kotlin:before { background-position: -6rem -11rem; }

View file

@ -0,0 +1,9 @@
._kotlin {
h2 { @extend %block-heading; }
h3 { @extend %block-label, %label-blue; }
code { @extend %label; }
td > pre { margin: .5em 0; }
.api-docs-breadcrumbs { @extend %note; }
}

View file

@ -2,8 +2,48 @@ module Docs
class Kotlin
class CleanHtmlFilter < Filter
def call
@doc = at_css('.page-content')
subpath.start_with?('api') ? api_page : doc_page
doc
end
def doc_page
css('.page-link-to-github').remove
css('a > img').each do |node|
node.parent.before(node.parent.content).remove
end
css('pre').each do |node|
node['data-language'] = node.at_css('code')['class'][/lang_(\w+)/, 1] if node.at_css('code')
node.content = node.content
end
end
def api_page
at_css('h1, h2, h3').name = 'h1'
if breadcrumbs = at_css('.api-docs-breadcrumbs')
at_css('h1').after(breadcrumbs)
end
unless at_css('h2')
css('h3').each do |node|
node.name = 'h2'
end
end
css('a[href="#"]').each do |node|
node.before(node.content).remove
end
css('.signature > code').each do |node|
parent = node.parent
parent.name = 'pre'
parent.inner_html = node.inner_html.gsub('<br>', "\n").strip
parent.content = parent.content
end
end
end
end
end

View file

@ -2,49 +2,27 @@ module Docs
class Kotlin
class EntriesFilter < Docs::EntriesFilter
def get_name
if at_css('h1')
name = at_css('h1').text
module_name = breadcrumbs[1]
"#{module_name}.#{name}"
elsif at_css('h2')
at_css('h2').text.gsub 'Package ', ''
elsif at_css('h3')
at_css('h3').text
if subpath.start_with?('api')
breadcrumbs[1..-1].join('.')
else
at_css('h1').content
end
end
def get_type
if package? || top_level? && !extensions?
if subpath.start_with?('api')
breadcrumbs[1]
else
"miscellaneous"
elsif subpath.start_with?('docs/tutorials')
'Tutorials'
elsif subpath.start_with?('docs/reference')
'Reference'
end
end
private
def breadcrumbs
container = at_css('.api-docs-breadcrumbs')
if container
links = container.children.select.with_index { |_, i| i.even? }
links.map &:text
else
[]
end
end
def top_level?
breadcrumbs.size == 3
end
def extensions?
get_name.start_with? 'Extensions'
end
def package?
breadcrumbs.size == 2
@breadcrumbs ||= css('.api-docs-breadcrumbs a').map(&:content).map(&:strip)
end
end
end

View file

@ -2,8 +2,8 @@ module Docs
class Kotlin < UrlScraper
self.type = 'kotlin'
self.release = '1.0.2'
self.base_url = 'https://kotlinlang.org/api/latest/jvm/stdlib/'
self.root_path = 'index.html'
self.base_url = 'https://kotlinlang.org/'
self.root_path = 'api/latest/jvm/stdlib/index.html'
self.links = {
home: 'https://kotlinlang.org/',
code: 'https://github.com/JetBrains/kotlin'
@ -11,10 +11,20 @@ module Docs
html_filters.push 'kotlin/entries', 'kotlin/clean_html'
options[:container] = '.page-content'
options[:container] = '.global-content'
options[:only_patterns] = [/\Adocs\/tutorials\//, /\Adocs\/reference\//, /\Aapi\/latest\/jvm\/stdlib\//]
options[:skip] = %w(
api/latest/jvm/stdlib/alltypes/index.html
docs/
docs/videos.html
docs/events.html
docs/resources.html
docs/reference/grammar.html)
options[:replace_paths] = { 'api/latest/jvm/stdlib/' => 'api/latest/jvm/stdlib/index.html' }
options[:attribution] = <<-HTML
&copy; 2016 JetBrains<br>
&copy; 2010&ndash;2016 JetBrains s.r.o.<br>
Licensed under the Apache License, Version 2.0.
HTML
end

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.2 KiB

After

Width:  |  Height:  |  Size: 389 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.5 KiB

After

Width:  |  Height:  |  Size: 680 B

View file

@ -1 +1 @@
https://upload.wikimedia.org/wikipedia/commons/b/b5/Kotlin-logo.png
https://github.com/JetBrains/kotlin-web-site/blob/master/assets/images/favicon.ico