From 9c2ca1f27ca67128b54a8f6187053bc79f795b92 Mon Sep 17 00:00:00 2001 From: MasterEnoc Date: Sun, 22 Nov 2020 12:44:23 -0600 Subject: [PATCH 1/2] Update Pandas to 1.1.4 - Add version 1 of pandas, add new filters for this version - Pandas' web page changed but the page for older version is the same, for that reason new filters are added and the name old ones are changed --- .../templates/pages/about_tmpl.coffee | 2 +- lib/docs/filters/pandas/clean_html.rb | 22 +++--- lib/docs/filters/pandas/clean_html_old.rb | 20 ++++++ lib/docs/filters/pandas/entries.rb | 36 +++++----- lib/docs/filters/pandas/entries_old.rb | 29 ++++++++ lib/docs/scrapers/pandas.rb | 68 ++++++++++++++++-- public/icons/docs/pandas/16.png | Bin 324 -> 794 bytes public/icons/docs/pandas/16@2x.png | Bin 441 -> 1304 bytes public/icons/docs/pandas/SOURCE | 2 +- 9 files changed, 145 insertions(+), 34 deletions(-) create mode 100644 lib/docs/filters/pandas/clean_html_old.rb create mode 100644 lib/docs/filters/pandas/entries_old.rb diff --git a/assets/javascripts/templates/pages/about_tmpl.coffee b/assets/javascripts/templates/pages/about_tmpl.coffee index 712ca07b..bdd0c84d 100644 --- a/assets/javascripts/templates/pages/about_tmpl.coffee +++ b/assets/javascripts/templates/pages/about_tmpl.coffee @@ -573,7 +573,7 @@ credits = [ 'https://raw.githubusercontent.com/padrino/padrino-framework/master/padrino/LICENSE.txt' ], [ 'pandas', - '2008-2012, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team', + '2008-2020, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team', 'BSD', 'https://raw.githubusercontent.com/pydata/pandas/master/LICENSE' ], [ diff --git a/lib/docs/filters/pandas/clean_html.rb b/lib/docs/filters/pandas/clean_html.rb index 3bba131c..2234945d 100644 --- a/lib/docs/filters/pandas/clean_html.rb +++ b/lib/docs/filters/pandas/clean_html.rb @@ -2,16 +2,22 @@ module Docs class Pandas class CleanHtmlFilter < Filter def call - @doc = at_css('.body') - if root_page? - css('a[href$=".zip"]', 'a[href$=".pdf"]', '.toctree-wrapper').remove - at_css('h1').content = 'pandas' - end + css('#navbar-main').remove - css('h2 > a.reference', 'h3 > a.reference').each do |node| - node.before(node.children).remove - end + css('form').remove + + # sidebar + css('ul.nav.bd-sidenav').remove + + # title side symbol + css('.headerlink').remove + + # next and previous section buttons + css('next-link').remove + css('prev-link').remove + + css('footer').remove doc end diff --git a/lib/docs/filters/pandas/clean_html_old.rb b/lib/docs/filters/pandas/clean_html_old.rb new file mode 100644 index 00000000..140ae682 --- /dev/null +++ b/lib/docs/filters/pandas/clean_html_old.rb @@ -0,0 +1,20 @@ +module Docs + class Pandas + class CleanHtmlOldFilter < Filter + def call + @doc = at_css('.body') + + if root_page? + css('a[href$=".zip"]', 'a[href$=".pdf"]', '.toctree-wrapper').remove + at_css('h1').content = 'pandas' + end + + css('h2 > a.reference', 'h3 > a.reference').each do |node| + node.before(node.children).remove + end + + doc + end + end + end +end diff --git a/lib/docs/filters/pandas/entries.rb b/lib/docs/filters/pandas/entries.rb index badf5e22..79c5f3a6 100644 --- a/lib/docs/filters/pandas/entries.rb +++ b/lib/docs/filters/pandas/entries.rb @@ -1,29 +1,29 @@ module Docs class Pandas class EntriesFilter < Docs::EntriesFilter + def get_name - if subpath.start_with?('generated') || (subpath.include?('reference') && !subpath.include?('reference/index')) - name_node = at_css('dt') - name_node = at_css('h1') if name_node.nil? - name = name_node.content.strip - name.sub! %r{\(.*}, '()' - name.remove! %r{\s=.*} - name.remove! %r{\A(class(method)?) (pandas\.)?} - else - name = at_css('h1').content.strip - name.prepend "#{css('.toctree-l1 > a:not([href^="http"])').to_a.index(at_css('.toctree-l1.current > a')) + 1}. " - end - name.remove! "\u{00B6}" - name + at_css('h1').content end def get_type - if subpath.start_with?('generated') || (subpath.include?('reference') && !subpath.include?('reference/index')) - css('.toctree-l2.current > a').last.content.remove(/\s\(.+?\)/) - else - 'Manual' - end + return 'Manual' if slug.include?('user_guide') + return 'General utility functions' if slug.match?('option|assert|errors|types|show_versions') + return 'Extensions' if slug.match?(/extensions|check_array/) + return 'Style' if slug.match?(/style/) + return 'Input/output' if slug.match?(/read|io|HDFStore/) + return 'Series' if slug.match?(/Series/) + return 'GroupBy' if slug.match?(/groupby|Grouper/) + return 'DataFrame' if slug.match?(/DataFrame|frame/) + return 'Window' if slug.match?(/window|indexers/) + return 'Index Objects' if slug.match?(/Index|indexing/) + return 'Data offsets' if slug.match?(/offsets?/) + return 'Resampling' if slug.match?(/resample/) + return 'Plotting' if slug.match?(/plotting/) + return 'Pandas arrays' if slug.match?(/arrays?|Timestamp|Datetime|Timedelta|Period|Interval|Categorical|Dtype/) + 'General functions' end + end end end diff --git a/lib/docs/filters/pandas/entries_old.rb b/lib/docs/filters/pandas/entries_old.rb new file mode 100644 index 00000000..139d7b7e --- /dev/null +++ b/lib/docs/filters/pandas/entries_old.rb @@ -0,0 +1,29 @@ +module Docs + class Pandas + class EntriesOldFilter < Docs::EntriesFilter + def get_name + if subpath.start_with?('generated') || (subpath.include?('reference') && !subpath.include?('reference/index')) + name_node = at_css('dt') + name_node = at_css('h1') if name_node.nil? + name = name_node.content.strip + name.sub! %r{\(.*}, '()' + name.remove! %r{\s=.*} + name.remove! %r{\A(class(method)?) (pandas\.)?} + else + name = at_css('h1').content.strip + name.prepend "#{css('.toctree-l1 > a:not([href^="http"])').to_a.index(at_css('.toctree-l1.current > a')) + 1}. " + end + name.remove! "\u{00B6}" + name + end + + def get_type + if subpath.start_with?('generated') || (subpath.include?('reference') && !subpath.include?('reference/index')) + css('.toctree-l2.current > a').last.content.remove(/\s\(.+?\)/) + else + 'Manual' + end + end + end + end +end diff --git a/lib/docs/scrapers/pandas.rb b/lib/docs/scrapers/pandas.rb index fedba47b..99f31e25 100644 --- a/lib/docs/scrapers/pandas.rb +++ b/lib/docs/scrapers/pandas.rb @@ -4,61 +4,117 @@ module Docs self.type = 'sphinx' self.root_path = 'index.html' self.links = { - home: 'http://pandas.pydata.org/', + home: 'https://pandas.pydata.org/', code: 'https://github.com/pydata/pandas' } - html_filters.push 'pandas/entries', 'pandas/clean_html', 'sphinx/clean_html' - - # Cannot take only the body, as the sidebar gives info about the type. - options[:container] = '.document' + html_filters.push 'pandas/clean_html', 'pandas/entries' options[:skip] = %w(internals.html release.html contributing.html whatsnew.html) options[:skip_patterns] = [/whatsnew\//] options[:attribution] = <<-HTML - © 2008–2012, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
+ © 2008–2020, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
Licensed under the 3-clause BSD License. HTML + version '1' do + self.release = '1.1.4' + self.base_url = "https://pandas.pydata.org/pandas-docs/version/#{self.release}/" + + html_filters.push 'pandas/clean_html', 'pandas/entries' + + options[:skip_patterns] = [ + /development/, + /getting_started/, + /whatsnew/ + ] + + options[:skip] = [ + 'panel.html', + 'pandas.pdf', + 'pandas.zip', + 'ecosystem.html' + ] + + end + version '0.25' do self.release = '0.25.0' self.base_url = "https://pandas.pydata.org/pandas-docs/version/#{self.release}/" + + html_filters.push 'pandas/entries_old', 'pandas/clean_html_old', 'sphinx/clean_html' + + options[:container] = '.document' + end version '0.24' do self.release = '0.24.2' self.base_url = "https://pandas.pydata.org/pandas-docs/version/#{self.release}/" + + html_filters.push 'pandas/entries_old', 'pandas/clean_html_old', 'sphinx/clean_html' + options[:container] = '.document' + end version '0.23' do self.release = '0.23.4' self.base_url = "https://pandas.pydata.org/pandas-docs/version/#{self.release}/" + + html_filters.push 'pandas/entries_old', 'pandas/clean_html_old', 'sphinx/clean_html' + options[:container] = '.document' + end version '0.22' do self.release = '0.22.0' self.base_url = "https://pandas.pydata.org/pandas-docs/version/#{self.release}/" + + html_filters.push 'pandas/entries_old', 'pandas/clean_html_old', 'sphinx/clean_html' + + options[:container] = '.document' + end version '0.21' do self.release = '0.21.1' self.base_url = "https://pandas.pydata.org/pandas-docs/version/#{self.release}/" + + html_filters.push 'pandas/entries_old', 'pandas/clean_html_old', 'sphinx/clean_html' + + options[:container] = '.document' + end version '0.20' do self.release = '0.20.3' self.base_url = "https://pandas.pydata.org/pandas-docs/version/#{self.release}/" + + html_filters.push 'pandas/entries_old', 'pandas/clean_html_old', 'sphinx/clean_html' + + options[:container] = '.document' + end version '0.19' do self.release = '0.19.2' self.base_url = "https://pandas.pydata.org/pandas-docs/version/#{self.release}/" + + html_filters.push 'pandas/entries_old', 'pandas/clean_html_old', 'sphinx/clean_html' + + options[:container] = '.document' + end version '0.18' do self.release = '0.18.1' self.base_url = "https://pandas.pydata.org/pandas-docs/version/#{self.release}/" + + html_filters.push 'pandas/entries_old', 'pandas/clean_html_old', 'sphinx/clean_html' + + options[:container] = '.document' + end def get_latest_version(opts) diff --git a/public/icons/docs/pandas/16.png b/public/icons/docs/pandas/16.png index cdc075efcc92aa363e6866678a0e700c59f63330..24a421fbb9cca5089855930da4ecdf352ece82d5 100644 GIT binary patch literal 794 zcmeAS@N?(olHy`uVBq!ia0vp^JU}eK!3-ot_ve`cDVB6cUq=Rpjs4tz5?O(Kg=CK) zUj~LMH3o);76yi2K%s^g3=E|P3=FRl7#OT(FffQ0%-I!a!@$5O7~m7)3RED>9xTie zEX)=x%pM}l79s>h>_7w)0SW@yV6G5E4OkgWF-R9sJx~EiBSJfn9Skyu4J2iv>|vtp zjxZUl1;~}=3;2JT;r|5&h(-zSpx>t%f1hOJWeetG4+6RYVgfHyC^u8+GnV#O%pKBf zp&++`Z4_h=e#6}Pfw@bbEez~{U|qH-AOiUUqyXd#Bm@$HAPiZk`v3p`i^keL0|rV| zNswPKkcI#iHxB01@cf&SEPIy}-eEUa+CE=ZzgXf*zPtUz?UlDB@09k}&OB5c8yHt! zcBiB?-9M%DPT9`IrBf=_&b09pc6W(l z-o11Yki%Z$>Fdh=gj-CQMd{7?;PpVE1)eUBAsp9D&m9$Nb`W5>(8#cxvryU3=E&W< ztvjCouUB~!bSCvz=+}je3=-2=4GLu@JZ5D)kRzzM#>~3m@XJd|Nq6M*o$TxzML60H zP0Z+V>QkSzvP5mRa=@q7q?Iv-0c)>HpA}oH`POgyb$K3lt7DU2=Ik@r5VO|&_FSKh z_A8E0u9~KyClQ~ocBKf%Y)_b5^RZCnW zN>UO_QmvAUQh^kMk%5tcuA!l>kx_`DrImq+m4Ugofq|8QfpxH~5{ic0{FKbJN)!zi cR;DIaCZ-S#U!~4>0W~mqy85}Sb4q9e0D+X_Qvd(} literal 324 zcmV-K0lWT*P)Lzr*P2K62_5OS>n04$Q+Fu!(F$Ucxq^7004JKBnnXhQ52be84QI4k@@! zY2fhmg7eF(&%n{?CEJH*a)JBj8O^hwfw+cA>=Ig32mDKik%DR!2kwWc7I1lW!^!y- zF*ReD`c>Tz=KlI+GxjPcuYC?<&vLR_W-;(6r>uYZ;o0TYHG-!is>d+#s^nujZe`@O z%{^?REBg!lvVtU&J%W50 z7^>757#dm_7=8hT8eT9klo~KFyh>nTu$sZZAYL$MSD+081Cv;QPlzi}fiPQ$2z!Vy zdx#KQun>p@vV(;|>|me>kO7o}As`!!fU;mjKpbck$TTp?76LRJsvTqqNE~DxDuO5g zx&e-WA}}=|21plM$p8Ng|Nk)j|H}YmfE^sd&Xn>0G{gT>V04**jVXeGp$sH)8R*3* zpe`oHd=U2}7+q#yW{8Bi0Z0n6h5i4)@c$jd|Mws>Iaoq?*@GYmXgeQsCqJAeo% z!p80g48Wbt^~+f*ma&wtWT^mo6CB!*fCZ5tj|0OW6ll=60DB*3I9LiAbRah%A&4#* z36+9zA!dR!BD6rw4*><;Nd};U|NsA=@Z&)lFxXa=1o;I6X(-?*npD9prmMrtclw%0 z$(!eHyeBT#|Msc-<;@YE8N|{cmYiWNa^)nq&#dX%7GEauTt30e#LUcS^_e?t{Tf4a zt_Cl&FI>6zZrYiB`Ztl6M?#!kI55nRM?WD>hD}CRke930!{El_MxI&^^Pf|BJeB7! zkLU4bWM*cn{pG1NdvWYZ9=&KO%O6u&gEGT-ttNk+Vshts>$TH-oT}<#JQal;Z08Lc zfKFvh@^*LGXp%e+lw;USJbhi+pKyx_vvM6sJS75@_Vjdd4AD5BoS-0PU}R)yoIE)p zF)=mSSu7zv{W-(Mm^(ImYEB6KVQO+xh>A#=xMA|2DFMXGiyDQ@?QV_eDtBJIt!QYKX%~4i5o|*oT*vcs=BG= zs#dmiv~@MR`|?nm7eeRs*5$pEi%_xIdu{FaZy%3N`S6*agNKWclh=7VM}J38S6^pu zx46?}4-X$NKTlup^Cw&;pEwn_cFN@0)8hj=^a8hdt=zGs#5vMZth4LXs#mk3bc9O3 z^7i^(dLeW+J1Xk-Eo&>?-Pc~dR`e|SUh?hRSN{mp!b?vygF`2%EHgAVHu5WvGHq^c zZk{L@$#jIlQ^8%%De2j&$OzT6^E0jaU6cN;V&!)1aqXV2<2o&3n@!G9FM(!ZjtQy_ zo{ZH+si{X58$}ry+6*iD#Wc>z0mE6f#5JNMC9x#cD!C{XNHG{07#Zjq8tNJug&0~| z8JJiZm}?stSQ!{t2g@pU;&5Yo*i%mk3Ew;n;{M7C72c9!Fk~^IA-pL*5lIeQo{ShPMkK3*o7I4}# z(CF-EpwUHMf;G7g4HH5F`;M1t4ET`LN%)r4&-Us3Bu|OXSqbeAF3S*HF;3Ud2vqo( z)=m2stBtlpE5~6lTO4~Qh3KA_!eFw1!S=9L)bdVn9l6Lsd6UpV*Y_<+am~Mi_RSXS zwr5Lcv|wjEN`UMYsq_#!;9P(lx+TXDI7QAvz$f4n@Cop6ibU1V;B!VlL7_9^nBh77 j=9J!^b9|FZZ_n2OwIpj5EB6In00000NkvXXu0mjfZDr78 diff --git a/public/icons/docs/pandas/SOURCE b/public/icons/docs/pandas/SOURCE index b19b45d3..40788c5f 100644 --- a/public/icons/docs/pandas/SOURCE +++ b/public/icons/docs/pandas/SOURCE @@ -1 +1 @@ -http://pandas.pydata.org/_static/pandas_logo.png \ No newline at end of file +https://commons.wikimedia.org/wiki/File:Pandas_mark.svg \ No newline at end of file From 34aeb5f9a2c628846fa6132455a64eb532ca6bd4 Mon Sep 17 00:00:00 2001 From: MasterEnoc Date: Mon, 23 Nov 2020 11:22:15 -0600 Subject: [PATCH 2/2] Remove images in the root page and improve style. - Fix pages with tables where information lacked spaces. --- lib/docs/filters/pandas/clean_html.rb | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/lib/docs/filters/pandas/clean_html.rb b/lib/docs/filters/pandas/clean_html.rb index 2234945d..5b0186e7 100644 --- a/lib/docs/filters/pandas/clean_html.rb +++ b/lib/docs/filters/pandas/clean_html.rb @@ -3,10 +3,21 @@ module Docs class CleanHtmlFilter < Filter def call + if root_page? + css('img').remove + end + css('#navbar-main').remove css('form').remove + # add ':' to '.classifier' clases + css('.classifier').each do |node| + text = node.content + node.content = ':' + text + node.content = node.content.gsub(/::/, ' : ') + end + # sidebar css('ul.nav.bd-sidenav').remove