Add <base> support

This commit is contained in:
Thibaut Courouble 2017-01-22 10:26:14 -05:00
parent 6f0214eaf3
commit 4e41ed9f25
6 changed files with 99 additions and 3 deletions

View file

@ -1,3 +1,5 @@
# frozen_string_literal: true
module Docs
class Filter < ::HTML::Pipeline::Filter
def css(*args)
@ -73,7 +75,7 @@ module Docs
end
def relative_url_string?(str)
!fragment_url_string?(str) && str !~ SCHEME_RGX
str !~ SCHEME_RGX && !fragment_url_string?(str) && !data_url_string?(str)
end
def absolute_url_string?(str)

View file

@ -41,7 +41,7 @@ module Docs
self.html_filters = FilterStack.new
self.text_filters = FilterStack.new
html_filters.push 'container', 'clean_html', 'normalize_urls', 'internal_urls', 'normalize_paths'
html_filters.push 'apply_base_url', 'container', 'clean_html', 'normalize_urls', 'internal_urls', 'normalize_paths'
text_filters.push 'inner_html', 'clean_text', 'attribution'
def initialize

View file

@ -129,7 +129,7 @@ module Docs
def fetch_redirections
result = {}
with_filters 'container', 'normalize_urls', 'internal_urls' do
with_filters 'apply_base_url', 'container', 'normalize_urls', 'internal_urls' do
build_pages do |page|
next if page[:response_effective_path] == page[:response_path]
result[page[:response_path].downcase] = page[:response_effective_path]

View file

@ -0,0 +1,21 @@
module Docs
class ApplyBaseUrlFilter < Filter
URL_ATTRIBUTES = { 'a': 'href', 'img': 'src', 'iframe': 'src' }
SCHEME_RGX = /\A[^:\/?#]+:/
def call
base_url = at_css('base').try(:[], 'href')
return doc unless base_url
URL_ATTRIBUTES.each_pair do |tag, attribute|
css(tag).each do |node|
next unless value = node[attribute]
next if !relative_url_string?(value) || value[0] == '/'.freeze
node[attribute] = "#{base_url}#{node[attribute]}"
end
end
doc
end
end
end

View file

@ -149,6 +149,10 @@ class DocsFilterTest < MiniTest::Spec
it "returns false with 'mailto:test@example.com'" do
refute filter.relative_url_string?('mailto:test@example.com')
end
it "returns false with 'data:image/gif;base64,foo'" do
refute filter.relative_url_string?('data:image/gif;base64,foo')
end
end
describe "#absolute_url_string?" do

View file

@ -0,0 +1,69 @@
require 'test_helper'
require 'docs'
class ApplyBaseUrlFilterTest < MiniTest::Spec
include FilterTestHelper
self.filter_class = Docs::ApplyBaseUrlFilter
self.filter_type = 'html'
context "when there is no <base>" do
it "does nothing" do
@body = make_body nil, link_to('test')
assert_equal link_to('test'), filter_output.at_css('body').inner_html
end
end
context "when <base> is '/base/'" do
it "rewrites relative urls" do
@body = make_body '/base/', link_to('path#frag')
assert_equal link_to('/base/path#frag'), filter_output.at_css('body').inner_html
end
it "rewrites relative image urls" do
@body = make_body '/base/', '<img src="../img.png">'
assert_equal '<img src="/base/../img.png">', filter_output.at_css('body').inner_html
end
it "rewrites relative iframe urls" do
@body = make_body '/base/', '<iframe src="./test"></iframe>'
assert_equal '<iframe src="/base/./test"></iframe>', filter_output.at_css('body').inner_html
end
it "doesn't rewrite absolute urls" do
@body = make_body '/base/', link_to('http://example.com')
assert_equal link_to('http://example.com'), filter_output.at_css('body').inner_html
end
it "doesn't rewrite protocol-less urls" do
@body = make_body '/base/', link_to('//example.com')
assert_equal link_to('//example.com'), filter_output.at_css('body').inner_html
end
it "doesn't rewrite root-relative urls" do
@body = make_body '/base/', link_to('/path')
assert_equal link_to('/path'), filter_output.at_css('body').inner_html
end
it "doesn't rewrite fragment-only urls" do
@body = make_body '/base/', link_to('#test')
assert_equal link_to('#test'), filter_output.at_css('body').inner_html
end
it "doesn't rewrite email urls" do
@body = make_body '/base/', link_to('mailto:test@example.com')
assert_equal link_to('mailto:test@example.com'), filter_output.at_css('body').inner_html
end
it "doesn't rewrite data urls" do
@body = make_body '/base/', '<img src="data:image/gif;base64,aaaa">'
assert_equal '<img src="data:image/gif;base64,aaaa">', filter_output.at_css('body').inner_html
end
end
private
def make_body(base, body)
base = %(<base href="#{base}">) if base
"<html><meta charset=utf-8><title></title>#{base}#{body}</html>"
end
end