Finish D scraper

This commit is contained in:
Thibaut Courouble 2017-09-04 09:24:35 -04:00
parent bc8d9432c3
commit 8902d5331d
13 changed files with 196 additions and 61 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 32 KiB

After

Width:  |  Height:  |  Size: 35 KiB

View file

@ -1,7 +1,7 @@
[
[
"2017-09-03",
"New documentations: <a href=\"/nim/\">Nim</a> and <a href=\"/vulkan/\">Vulkan</a>"
"New documentations: <a href=\"/d/\">D</a>, <a href=\"/nim/\">Nim</a> and <a href=\"/vulkan/\">Vulkan</a>"
], [
"2017-07-23",
"New documentation: <a href=\"/godot/\">Godot</a>"

View file

@ -193,6 +193,11 @@ credits = [
'2012-2017 Manas Technology Solutions',
'Apache',
'https://raw.githubusercontent.com/crystal-lang/crystal/master/LICENSE'
], [
'D',
'1999-2017 The D Language Foundation',
'Boost',
'https://raw.githubusercontent.com/dlang/phobos/master/LICENSE_1_0.txt'
], [
'D3.js',
'2010-2017 Michael Bostock',

View file

@ -1,4 +1,4 @@
/* http://prismjs.com/download.html?themes=prism&languages=markup+css+clike+javascript+c+cpp+coffeescript+ruby+elixir+erlang+go+java+json+kotlin+lua+nginx+nim+perl+php+python+crystal+rust+scss+sql+typescript */
/* http://prismjs.com/download.html?themes=prism&languages=markup+css+clike+javascript+c+cpp+coffeescript+ruby+d+elixir+erlang+go+java+json+kotlin+lua+nginx+nim+perl+php+python+crystal+rust+scss+sql+typescript */
var _self = (typeof window !== 'undefined')
? window // if in browser
: (
@ -976,6 +976,70 @@ delete Prism.languages.coffeescript['template-string'];
}
];
}(Prism));
Prism.languages.d = Prism.languages.extend('clike', {
'string': [
// r"", x""
/\b[rx]"(\\.|[^\\"])*"[cwd]?/,
// q"[]", q"()", q"<>", q"{}"
/\bq"(?:\[[\s\S]*?\]|\([\s\S]*?\)|<[\s\S]*?>|\{[\s\S]*?\})"/,
// q"IDENT
// ...
// IDENT"
/\bq"([_a-zA-Z][_a-zA-Z\d]*)(?:\r?\n|\r)[\s\S]*?(?:\r?\n|\r)\1"/,
// q"//", q"||", etc.
/\bq"(.)[\s\S]*?\1"/,
// Characters
/'(?:\\'|\\?[^']+)'/,
/(["`])(\\.|(?!\1)[^\\])*\1[cwd]?/
],
'number': [
// The lookbehind and the negative look-ahead try to prevent bad highlighting of the .. operator
// Hexadecimal numbers must be handled separately to avoid problems with exponent "e"
/\b0x\.?[a-f\d_]+(?:(?!\.\.)\.[a-f\d_]*)?(?:p[+-]?[a-f\d_]+)?[ulfi]*/i,
{
pattern: /((?:\.\.)?)(?:\b0b\.?|\b|\.)\d[\d_]*(?:(?!\.\.)\.[\d_]*)?(?:e[+-]?\d[\d_]*)?[ulfi]*/i,
lookbehind: true
}
],
// In order: $, keywords and special tokens, globally defined symbols
'keyword': /\$|\b(?:abstract|alias|align|asm|assert|auto|body|bool|break|byte|case|cast|catch|cdouble|cent|cfloat|char|class|const|continue|creal|dchar|debug|default|delegate|delete|deprecated|do|double|else|enum|export|extern|false|final|finally|float|for|foreach|foreach_reverse|function|goto|idouble|if|ifloat|immutable|import|inout|int|interface|invariant|ireal|lazy|long|macro|mixin|module|new|nothrow|null|out|override|package|pragma|private|protected|public|pure|real|ref|return|scope|shared|short|static|struct|super|switch|synchronized|template|this|throw|true|try|typedef|typeid|typeof|ubyte|ucent|uint|ulong|union|unittest|ushort|version|void|volatile|wchar|while|with|__(?:(?:FILE|MODULE|LINE|FUNCTION|PRETTY_FUNCTION|DATE|EOF|TIME|TIMESTAMP|VENDOR|VERSION)__|gshared|traits|vector|parameters)|string|wstring|dstring|size_t|ptrdiff_t)\b/,
'operator': /\|[|=]?|&[&=]?|\+[+=]?|-[-=]?|\.?\.\.|=[>=]?|!(?:i[ns]\b|<>?=?|>=?|=)?|\bi[ns]\b|(?:<[<>]?|>>?>?|\^\^|[*\/%^~])=?/
});
Prism.languages.d.comment = [
// Shebang
/^\s*#!.+/,
// /+ +/
{
// Allow one level of nesting
pattern: /(^|[^\\])\/\+(?:\/\+[\s\S]*?\+\/|[\s\S])*?\+\//,
lookbehind: true
}
].concat(Prism.languages.d.comment);
Prism.languages.insertBefore('d', 'comment', {
'token-string': {
// Allow one level of nesting
pattern: /\bq\{(?:|\{[^}]*\}|[^}])*\}/,
alias: 'string'
}
});
Prism.languages.insertBefore('d', 'keyword', {
'property': /\B@\w*/
});
Prism.languages.insertBefore('d', 'function', {
'register': {
// Iasm registers
pattern: /\b(?:[ABCD][LHX]|E[ABCD]X|E?(?:BP|SP|DI|SI)|[ECSDGF]S|CR[0234]|DR[012367]|TR[3-7]|X?MM[0-7]|R[ABCD]X|[BS]PL|R[BS]P|[DS]IL|R[DS]I|R(?:[89]|1[0-5])[BWD]?|XMM(?:[89]|1[0-5])|YMM(?:1[0-5]|\d))\b|\bST(?:\([0-7]\)|\b)/,
alias: 'variable'
}
});
Prism.languages.elixir = {
// Negative look-ahead is needed for string interpolation
// Negative look-behind is needed to avoid highlighting markdown headers in

View file

@ -175,3 +175,4 @@
._icon-godot:before { background-position: -4rem -2rem; @extend %doc-icon-2; }
._icon-nim:before { background-position: -5rem -2rem; @extend %doc-icon-2; @extend %darkIconFix !optional; }
._icon-vulkan:before { background-position: -6rem -2rem; @extend %doc-icon-2; @extend %darkIconFix !optional; }
._icon-d:before { background-position: -7rem -2rem; @extend %doc-icon-2; }

View file

@ -1,44 +1,9 @@
._d {
> .description, > .documentation-section { padding-left: 1rem; }
> .description > h2, header > h3, > h2 { @extend %block-heading; }
.description > h1 { font-size: 1rem; }
.method-description > h2, h3, h4, h5, h6 { font-size: 1em; }
h2 { @extend %block-heading; }
h3, .d_decl { @extend %block-label, %label-blue; }
.d_decl { @extend %code; }
.d_decl {
font-weight: $boldFontWeight;
@extend %block-label, %label-blue;
p > code, li > code, td > code, dd > code { @extend %label; }
+ .d_decl { margin-top: -.5em; }
}
> .meta {
@extend %note, %note-blue;
> dd { margin: 0; }
> dd + dt { margin-top: .5em; }
}
a.method-click-advice {
float: right;
font-size: .75rem;
color: $linkColor;
cursor: pointer;
@extend %user-select-none;
&:hover { text-decoration: underline; }
}
.method-description { position: relative; }
.method-source-code {
display: none;
position: absolute;
z-index: 1;
top: 0;
right: 0;
background: rgba($contentBackground, .95);
box-shadow: 0 1em 1em 1em $contentBackground;
> pre { margin: 0; }
}
span.red { color: $textColorRed; }
}

View file

@ -2,9 +2,75 @@ module Docs
class D
class CleanHtmlFilter < Filter
def call
css('.d_decl > div > span.def-anchor').each do |node|
node.parent.parent['id'] = node['id']
@doc = at_css("#content")
css('#tools', '#copyright').remove
css('td > b', 'h1 > span').each do |node|
node.before(node.children).remove
end
css('span.d_inlinecode').each do |node|
node.name = 'code'
node.remove_attribute('class')
end
css('.keyval').each do |node|
key = node.at_css('.key')
dt = key.inner_html
dd = if val = node.at_css('.val')
val.inner_html
else
siblings = []
siblings << key while key = key.next
siblings.map(&:to_html).join
end
node.replace("<dl><dt>#{dt}</dt><dd>#{dd}</dd></dl>")
end
css('div.summary', 'div.description').each do |node|
node.name = 'p' unless node.at_css('p')
node.css('.blankline').each { |n| n.replace('<br><br>') }
end
css('.d_decl').each do |node|
node['id'] = node.at_css('.def-anchor')['id'].remove(/\A\./)
constraints = node.css('.constraint').remove
node.content = node.content.strip
node.inner_html = node.inner_html.gsub(/;\s*/, '<br>').remove(/<br>\z/)
node << "<br><br> Constraints:<br> #{constraints.map(&:content).join('<br> ')}" unless constraints.empty?
end
css('pre').each do |node|
node.content = node.content
node['data-language'] = 'd' if node['class'] && node['class'].include?('d_code')
end
css('div', 'code > a > code', 'code > code').each do |node|
node.before(node.children).remove
end
css('a[href*="#."]').each do |node|
node['href'] = node['href'].sub('#.', '#')
end
css('tr', 'td', 'code', 'pre', 'p', 'table').remove_attr('class')
css('table').remove_attr('border').remove_attr('cellpadding').remove_attr('cellspacing')
if base_url.path == '/spec/'
css('a.anchor').each do |node|
node.parent['id'] ||= node['id']
node.before(node.children).remove
end
css('center').each do |node|
node.before(node.children).remove
end
css('.fa-angle-left + a').remove
css('a + .fa-angle-right').each { |node| node.previous_element.remove }
end
doc
end
end

View file

@ -2,25 +2,46 @@ module Docs
class D
class EntriesFilter < Docs::EntriesFilter
def get_name
slug.to_s.gsub('_', '.').gsub('/', '.').squish!
name = at_css('h1').content
if base_url.path == '/spec/'
index = css('.subnav li a').to_a.index(at_css(".subnav li a[href='#{result[:path]}']")) + 1
name.prepend "#{index}. "
end
name
end
def get_type
slug.to_s.sub(/_(.*)/, '')
return 'Reference' if base_url.path == '/spec/'
if name.start_with?('etc') || name.start_with?('core.stdc.')
name.split('.')[0..2].join('.')
elsif name.start_with?('ddmd')
'ddmd'
else
name.split('.')[0..1].join('.')
end
end
def additional_entries
names = []
css('.book > tr > td > a').each do |x|
span_block = x.at_css('span')
if span_block != nil
elem_name = span_block.text
name = "#{get_name}.#{elem_name}"
type = name.sub(/\..*/,'')
names << [name, "#{slug}#{x['href']}", type]
return [] if root_page? || base_url.path == '/spec/'
entries = []
css('.book > tr > td > a').each do |node|
entries << ["#{self.name}.#{node.content}", node['href'].remove(/\A#/).remove(/\A\./)]
end
if entries.empty?
css('.quickindex[id]').each do |node|
name = node['id'].remove(/quickindex\.?/)
next if name.empty? || name =~ /\.\d+\z/
entries << ["#{self.name}.#{name}", name]
end
end
names
entries
end
end
end

View file

@ -1,18 +1,30 @@
module Docs
class D < UrlScraper
self.release = '2.075.1'
include MultipleBaseUrls
self.release = '2.076.0'
self.type = 'd'
self.base_url = 'http://dlang.org/phobos/'
self.base_urls = ['https://dlang.org/phobos/', 'https://dlang.org/spec/']
self.root_path = 'index.html'
self.links = {
home: 'https://dlang.org/',
code: 'https://github.com/dlang/phobos'
}
html_filters.push 'd/entries', 'd/clean_html'
options[:container] = '#content'
options[:skip] = %w(spec.html)
options[:container] = '.container'
options[:root_title] = 'D Programming Language'
options[:title] = false
options[:root_title] = 'D Language'
options[:skip_patterns] = [/#.*/]
options[:attribution] = <<-HTML
Copyright &copy; 1999-2017 by the D Language Foundation
&copy; 1999&ndash;2017 The D Language Foundation<br>
Licensed under the Boost License 1.0.
HTML
def initial_urls
%w(https://dlang.org/phobos/index.html https://dlang.org/spec/intro.html)
end
end
end

Binary file not shown.

Before

Width:  |  Height:  |  Size: 784 B

After

Width:  |  Height:  |  Size: 661 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.9 KiB

After

Width:  |  Height:  |  Size: 1.7 KiB

View file

@ -0,0 +1 @@
https://github.com/dlang/dlang.org/tree/master/images