Finish Apache Pig scraper

This commit is contained in:
Thibaut Courouble 2016-04-24 11:14:55 -04:00
parent 1927f4e196
commit 3b72e254c3
14 changed files with 78 additions and 78 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 44 KiB

After

Width:  |  Height:  |  Size: 55 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 113 KiB

After

Width:  |  Height:  |  Size: 149 KiB

View file

@ -1,5 +1,8 @@
[
[
"2016-04-24",
"New documentation: <a href=\"/apache_pig/\">Apache Pig</a>"
], [
"2016-04-17",
"New documentation: <a href=\"/perl/\">Perl</a>"
], [

View file

@ -96,8 +96,8 @@ credits = [
'GPLv3',
'https://raw.githubusercontent.com/ansible/ansible/devel/COPYING'
], [
'Apache HTTP Server',
'2016 The Apache Software Foundation',
'Apache HTTP Server<br>Apache Pig',
'2016 The Apache Software Foundation<br>Apache and the Apache feather logo are trademarks of The Apache Software Foundation.',
'Apache',
'https://www.apache.org/licenses/LICENSE-2.0'
], [

View file

@ -137,3 +137,4 @@
._icon-gnu_fortran:before { background-position: -1rem -11rem; }
._icon-gcc:before { background-position: -2rem -11rem; }
._icon-perl:before { background-position: -3rem -11rem; }
._icon-apache_pig:before { background-position: -4rem -11rem; }

View file

@ -16,6 +16,7 @@
blockquote { @extend %note; }
}
._apache_pig,
._cordova,
._gnu,
._grunt,
@ -27,7 +28,6 @@
._markdown,
._mocha,
._mongoose,
._pig,
._sinon,
._typescript,
._webpack {

View file

@ -1,9 +1,7 @@
module Docs
class Pig
class ApachePig
class CleanHtmlFilter < Filter
def call
@doc = at_css('#content')
css('.pdflink').remove
css('a[name]').each do |node|

View file

@ -0,0 +1,33 @@
module Docs
class ApachePig
class EntriesFilter < Docs::EntriesFilter
def get_name
at_css('h1').content
end
def get_type
at_css('h1').content
end
def additional_entries
nodes = case slug
when 'basic', 'cmds', 'func'
css('h3')
when 'cont'
css('h2, #macros + div > h3')
when 'test'
css('h2, #diagnostic-ops + div > h3')
when 'perf'
css('h2, #optimization-rules + div > h3, #specialized-joins + div > h3')
else
css('h2')
end
nodes.each_with_object [] do |node, entries|
name = node.content.strip
entries << [name, node['id']] unless name == 'Introduction'
end
end
end
end
end

View file

@ -1,33 +0,0 @@
module Docs
class Pig
class EntriesFilter < Docs::EntriesFilter
def include_default_entry?
false
end
def get_type
at_css('h1').content
end
def additional_entries
case slug
when 'basic', 'cmds', 'func'
nodes = css('h3')
when 'cont'
nodes = css('h2, #macros + div > h3')
when 'test'
nodes = css('h2, #diagnostic-ops + div > h3')
when 'perf'
nodes = css('h2, #optimization-rules + div > h3, #specialized-joins + div > h3')
else
nodes = css('h2')
end
nodes.each_with_object [] do |node, entries|
entries << [node.content, node['id'], get_type]
end
end
end
end
end

View file

@ -0,0 +1,37 @@
module Docs
class ApachePig < UrlScraper
self.name = 'Apache Pig'
self.slug = 'apache_pig'
self.type = 'apache_pig'
self.links = {
home: 'https://pig.apache.org/'
}
html_filters.push 'apache_pig/clean_html', 'apache_pig/entries'
options[:container] = '#content'
options[:skip] = %w(pig-index.html)
options[:skip_patterns] = [/\Aapi/, /\Ajdiff/]
options[:attribution] = <<-HTML
&copy; 2007&ndash;2016 Apache Software Foundation<br>
Licensed under the Apache Software License version 2.0.
HTML
version '0.15' do
self.release = '0.15.0'
self.base_url = "https://pig.apache.org/docs/r#{release}/"
end
version '0.14' do
self.release = '0.14.0'
self.base_url = "https://pig.apache.org/docs/r#{release}/"
end
version '0.13' do
self.release = '0.13.0'
self.base_url = "https://pig.apache.org/docs/r#{release}/"
end
end
end

View file

@ -1,39 +0,0 @@
module Docs
class Pig < UrlScraper
self.name = 'Pig'
self.type = 'pig'
self.links = {
home: 'https://pig.apache.org/'
}
html_filters.push 'pig/clean_html', 'pig/entries'
options[:skip] = %w( pig-index.html )
options[:skip_patterns] = [
/\Aapi/,
/\Ajdiff/
]
options[:attribution] = <<-HTML
&copy; 2007&ndash;2016 Apache Software Foundation<br>
Licensed under the Apache Software License version 2.0.
HTML
version '0.15.0' do
self.release = '0.15.0'
self.base_url = "http://pig.apache.org/docs/r#{release}/"
end
version '0.14.0' do
self.release = '0.14.0'
self.base_url = "http://pig.apache.org/docs/r#{release}/"
end
version '0.13.0' do
self.release = '0.13.0'
self.base_url = "http://pig.apache.org/docs/r#{release}/"
end
end
end

Binary file not shown.

After

Width:  |  Height:  |  Size: 759 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB