diff --git a/lib/docs/filters/npm/clean_html.rb b/lib/docs/filters/npm/clean_html.rb index ad07c476..b9519768 100644 --- a/lib/docs/filters/npm/clean_html.rb +++ b/lib/docs/filters/npm/clean_html.rb @@ -2,18 +2,13 @@ module Docs class Npm class CleanHtmlFilter < Filter def call - - at_css('#___gatsby').before(at_css('h1')) + @doc = at_css('main') css('details').remove - - css('.dZYhXG', '.fONtKn').remove - - css('.kSYjyK').remove - - css('.cDvIaH').remove - - css('.jRndWL').remove_attribute('style') + css('nav[aria-label="Breadcrumbs"]').remove + css('.gtWOdv').remove # Select CLI Version + css('.ezMiXD').remove # Navbox + css('.gOhcvK').remove # Edit this page on GitHub css('pre').each do |node| node.content = node.css('.token-line').map(&:content).join("\n") @@ -21,7 +16,6 @@ module Docs end doc - end end end diff --git a/lib/docs/filters/npm/entries.rb b/lib/docs/filters/npm/entries.rb index 8291d7a7..d754b1d4 100644 --- a/lib/docs/filters/npm/entries.rb +++ b/lib/docs/filters/npm/entries.rb @@ -7,7 +7,7 @@ module Docs end def get_type - at_css('.active').content + at_css('nav[aria-label="Breadcrumbs"] li').content end def additional_entries @@ -21,6 +21,7 @@ module Docs if name == 'package.json' css('h3').each do |node| + next unless node['id'] entries << [node['id'], slug + '#' + node['id'], 'Package.json Settings'] end end diff --git a/lib/docs/scrapers/npm.rb b/lib/docs/scrapers/npm.rb index ce2be86b..1cb2c6e2 100644 --- a/lib/docs/scrapers/npm.rb +++ b/lib/docs/scrapers/npm.rb @@ -2,7 +2,7 @@ module Docs class Npm < UrlScraper self.name = 'npm' self.type = 'npm' - self.release = '8.3.0' + self.release = '10.2.5' self.base_url = 'https://docs.npmjs.com/' self.force_gzip = true self.links = { @@ -12,7 +12,8 @@ module Docs html_filters.push 'npm/entries', 'npm/clean_html' - options[:max_image_size] = 130_000 + options[:download_images] = false + # options[:max_image_size] = 130_000 options[:skip] = [ 'all', @@ -31,6 +32,8 @@ module Docs /\Apolicies/, /cli\/v6/, /cli\/v7/, + /cli\/v8/, + /cli\/v9/, /\/\Z/ # avoid pages with a trailing slash, those pages mess up the entries ]