Finish Jekyll scraper

This commit is contained in:
Thibaut Courouble 2018-02-04 11:20:23 -05:00
parent 2bd96af5c0
commit c349caf0f7
13 changed files with 37 additions and 73 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 16 KiB

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 38 KiB

After

Width:  |  Height:  |  Size: 39 KiB

View file

@ -1,7 +1,7 @@
[ [
[ [
"2018-2-4", "2018-2-4",
"New documentation: <a href=\"/jsdoc/\">JSDoc</a>" "New documentations: <a href=\"/jekyll/\">Jekyll</a> and <a href=\"/jsdoc/\">JSDoc</a>"
], [ ], [
"2017-11-26", "2017-11-26",
"New documentations: <a href=\"/bluebird/\">Bluebird</a>, <a href=\"/eslint/\">ESLint</a> and <a href=\"/homebrew/\">Homebrew</a>" "New documentations: <a href=\"/bluebird/\">Bluebird</a>, <a href=\"/eslint/\">ESLint</a> and <a href=\"/homebrew/\">Homebrew</a>"

View file

@ -320,6 +320,11 @@ credits = [
'2008-2017 Pivotal Labs', '2008-2017 Pivotal Labs',
'MIT', 'MIT',
'https://raw.githubusercontent.com/jasmine/jasmine/master/MIT.LICENSE' 'https://raw.githubusercontent.com/jasmine/jasmine/master/MIT.LICENSE'
], [
'Jekyll',
'2008-2018 Tom Preston-Werner and Jekyll contributors',
'MIT',
'https://raw.githubusercontent.com/jekyll/jekyll/master/LICENSE'
], [ ], [
'Jest', 'Jest',
'2014-present Facebook Inc.', '2014-present Facebook Inc.',

View file

@ -55,6 +55,7 @@
'pages/github', 'pages/github',
'pages/go', 'pages/go',
'pages/haskell', 'pages/haskell',
'pages/jekyll',
'pages/jquery', 'pages/jquery',
'pages/julia', 'pages/julia',
'pages/knockout', 'pages/knockout',

View file

@ -168,3 +168,4 @@
._icon-bluebird:before { background-position: -8rem -2rem; @extend %doc-icon-2; } ._icon-bluebird:before { background-position: -8rem -2rem; @extend %doc-icon-2; }
._icon-eslint:before { background-position: -9rem -2rem; @extend %doc-icon-2; } ._icon-eslint:before { background-position: -9rem -2rem; @extend %doc-icon-2; }
._icon-homebrew:before { background-position: 0 -3rem; @extend %doc-icon-2; } ._icon-homebrew:before { background-position: 0 -3rem; @extend %doc-icon-2; }
._icon-jekyll:before { background-position: -1rem -3rem; @extend %doc-icon-2; }

View file

@ -1,35 +1,7 @@
._jekyll { ._jekyll {
h2, h3 { @extend %block-heading; } @extend %simple;
.note { .note.info { @extend %note-blue; }
@extend %note; .note.warning { @extend %note-red; }
.note.unreleased { @extend %note-orange; }
h5 {
margin-top: 0.25em;
}
position: relative;
&::after {
content: attr(data-type);
opacity: 0.5;
text-transform: uppercase;
position: absolute;
top: 0.25em;
right: 0.5em;
font-size: 0.8em;
pointer-events: none;
}
// Other note types currently unstyled:
// plain
// tip
// feature
&.note-info { @extend %note-blue; }
&.note-warning { @extend %note-red; }
&.note-unreleased { @extend %note-orange; }
}
pre {
font-size: inherit;
}
} }

View file

@ -2,50 +2,38 @@ module Docs
class Jekyll class Jekyll
class CleanHtmlFilter < Filter class CleanHtmlFilter < Filter
def call def call
css('.improve, .section-nav').each(&:remove) @doc = at_css('article')
at_css('h1').content = 'Jekyll' if root_page?
css('.improve, .section-nav').remove
css('div.highlighter-rouge').each do |node| css('div.highlighter-rouge').each do |node|
pre = node.at_css('pre') pre = node.at_css('pre')
# copy over the highlighting metadata lang = node['class'][/language-(\w+)/, 1]
match = /language-(\w+)/.match(node['class'])
# HACK: Prism shell highlighting highlights `|`, # HACK: Prism shell highlighting highlights `|`,
# which makes the tree on this page look terrible # which makes the tree on this page look terrible
if match && !(slug == /structure/ && match[1] == 'sh') unless slug.include?('structure') && lang == 'sh'
lang = match[1] lang = 'bash' if lang == 'sh'
if lang == 'sh'
lang = 'bash'
elsif lang == 'liquid'
lang = 'django' # Close enough.
end
pre['class'] = nil
pre['data-language'] = lang pre['data-language'] = lang
end end
# Remove the server-rendered syntax highlighting pre.remove_attribute('class')
code = pre.at_css('code') pre.content = pre.content
code.content = code.text node.replace(pre)
# Remove the div.highlighter-rouge and div.highlight wrapping the <pre>
node.add_next_sibling pre
node.remove
end end
css('code').each do |node| css('code').remove_attr('class')
node['class'] = ''
end
css('.note').each do |node| css('.note').each do |node|
node_type = /note ?(\w+)?/.match(node['class'])[1] || 'tip' node.name = 'blockquote'
# <div class="note">...<br>...</div> -> <div class="note">...</div> # <div class="note">...<br>...</div> -> <div class="note">...</div>
(node > 'br').each(&:remove) (node > 'br').each(&:remove)
# <div class="note">...<p>...<br><br>...</p>...</div> -> # <div class="note">...<p>...<br><br>...</p>...</div> ->
# <div class="note">...<p>...<br>...</p>...</div> # <div class="note">...<p>...<br>...</p>...</div>
node.css('br + br').each(&:remove) node.css('br + br').each(&:remove)
node['class'] = "note note-#{node_type}"
node['data-type'] = node_type
end end
doc doc

View file

@ -1,13 +1,12 @@
module Docs module Docs
class Jekyll class Jekyll
class EntriesFilter < Docs::EntriesFilter class EntriesFilter < Docs::EntriesFilter
def get_name def get_name
at_css('h1').content at_css('h1').content
end end
def get_type def get_type
if /continuous-integration/.match(slug) if slug.include?('continuous-integration')
'Deployment' 'Deployment'
else else
nav_link = doc.document # document nav_link = doc.document # document

View file

@ -1,7 +1,7 @@
module Docs module Docs
class Jekyll < UrlScraper class Jekyll < UrlScraper
self.type = 'jekyll' self.type = 'jekyll'
self.release = '3.6.2' self.release = '3.7.2'
self.base_url = 'https://jekyllrb.com/docs/' self.base_url = 'https://jekyllrb.com/docs/'
self.root_path = 'home/' self.root_path = 'home/'
self.links = { self.links = {
@ -12,23 +12,21 @@ module Docs
html_filters.push 'jekyll/clean_html', 'jekyll/entries' html_filters.push 'jekyll/clean_html', 'jekyll/entries'
options[:trailing_slash] = true options[:trailing_slash] = true
options[:container] = 'article' options[:skip] = %w(sites/ upgrading/)
options[:skip] = [
'',
'/'
]
options[:skip_patterns] = [ options[:skip_patterns] = [
/conduct/, /conduct/,
/history/, /history/,
/maintaining/, /maintaining/,
/contributing/ /contributing/,
] ]
options[:replace_paths] = {
'' => 'home/',
'/' => 'home/'
}
options[:attribution] = <<-HTML options[:attribution] = <<-HTML
&copy; 2008&ndash;2017 Tom Preston-Werner and Jekyll contributors<br /> &copy; 2008&ndash;2018 Tom Preston-Werner and Jekyll contributors<br>
Licensed under Licensed under the MIT license.
<a href="https://github.com/jekyll/jekyll/blob/master/LICENSE">
the MIT license
</a>
HTML HTML
end end
end end

Binary file not shown.

Before

Width:  |  Height:  |  Size: 357 B

After

Width:  |  Height:  |  Size: 403 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 809 B

After

Width:  |  Height:  |  Size: 958 B

View file

@ -1 +1 @@
https://avatars3.githubusercontent.com/u/3083652 https://raw.githubusercontent.com/jekyll/jekyll/master/docs/favicon.ico