diff --git a/lib/docs/core/filter.rb b/lib/docs/core/filter.rb
index 82788ba0..8b8278fa 100644
--- a/lib/docs/core/filter.rb
+++ b/lib/docs/core/filter.rb
@@ -1,3 +1,5 @@
+# frozen_string_literal: true
+
module Docs
class Filter < ::HTML::Pipeline::Filter
def css(*args)
@@ -73,7 +75,7 @@ module Docs
end
def relative_url_string?(str)
- !fragment_url_string?(str) && str !~ SCHEME_RGX
+ str !~ SCHEME_RGX && !fragment_url_string?(str) && !data_url_string?(str)
end
def absolute_url_string?(str)
diff --git a/lib/docs/core/scraper.rb b/lib/docs/core/scraper.rb
index 63b33e29..bf2af017 100644
--- a/lib/docs/core/scraper.rb
+++ b/lib/docs/core/scraper.rb
@@ -41,7 +41,7 @@ module Docs
self.html_filters = FilterStack.new
self.text_filters = FilterStack.new
- html_filters.push 'container', 'clean_html', 'normalize_urls', 'internal_urls', 'normalize_paths'
+ html_filters.push 'apply_base_url', 'container', 'clean_html', 'normalize_urls', 'internal_urls', 'normalize_paths'
text_filters.push 'inner_html', 'clean_text', 'attribution'
def initialize
diff --git a/lib/docs/core/scrapers/url_scraper.rb b/lib/docs/core/scrapers/url_scraper.rb
index 12b4a68c..e033c6fa 100644
--- a/lib/docs/core/scrapers/url_scraper.rb
+++ b/lib/docs/core/scrapers/url_scraper.rb
@@ -129,7 +129,7 @@ module Docs
def fetch_redirections
result = {}
- with_filters 'container', 'normalize_urls', 'internal_urls' do
+ with_filters 'apply_base_url', 'container', 'normalize_urls', 'internal_urls' do
build_pages do |page|
next if page[:response_effective_path] == page[:response_path]
result[page[:response_path].downcase] = page[:response_effective_path]
diff --git a/lib/docs/filters/core/apply_base_url.rb b/lib/docs/filters/core/apply_base_url.rb
new file mode 100644
index 00000000..0a20a95a
--- /dev/null
+++ b/lib/docs/filters/core/apply_base_url.rb
@@ -0,0 +1,21 @@
+module Docs
+ class ApplyBaseUrlFilter < Filter
+ URL_ATTRIBUTES = { 'a': 'href', 'img': 'src', 'iframe': 'src' }
+ SCHEME_RGX = /\A[^:\/?#]+:/
+
+ def call
+ base_url = at_css('base').try(:[], 'href')
+ return doc unless base_url
+
+ URL_ATTRIBUTES.each_pair do |tag, attribute|
+ css(tag).each do |node|
+ next unless value = node[attribute]
+ next if !relative_url_string?(value) || value[0] == '/'.freeze
+ node[attribute] = "#{base_url}#{node[attribute]}"
+ end
+ end
+
+ doc
+ end
+ end
+end
diff --git a/test/lib/docs/core/filter_test.rb b/test/lib/docs/core/filter_test.rb
index d98ecdad..77265c22 100644
--- a/test/lib/docs/core/filter_test.rb
+++ b/test/lib/docs/core/filter_test.rb
@@ -149,6 +149,10 @@ class DocsFilterTest < MiniTest::Spec
it "returns false with 'mailto:test@example.com'" do
refute filter.relative_url_string?('mailto:test@example.com')
end
+
+ it "returns false with 'data:image/gif;base64,foo'" do
+ refute filter.relative_url_string?('data:image/gif;base64,foo')
+ end
end
describe "#absolute_url_string?" do
diff --git a/test/lib/docs/filters/core/apply_base_url_test.rb b/test/lib/docs/filters/core/apply_base_url_test.rb
new file mode 100644
index 00000000..e627d959
--- /dev/null
+++ b/test/lib/docs/filters/core/apply_base_url_test.rb
@@ -0,0 +1,69 @@
+require 'test_helper'
+require 'docs'
+
+class ApplyBaseUrlFilterTest < MiniTest::Spec
+ include FilterTestHelper
+ self.filter_class = Docs::ApplyBaseUrlFilter
+ self.filter_type = 'html'
+
+ context "when there is no