class SitemapGen::XMLCrawler

Public Class Methods

execute(xml_path, save_path) click to toggle source
# File lib/sitemap_gen/xml_crawler.rb, line 23
def self.execute(xml_path, save_path)
  save_path ||= Dir.pwd
  xml = File.open(xml_path) { |f| Nokogiri::XML(f) }
  links = xml.css('loc').map(&:content)
  ::CSV.open("#{save_path}/sitemap_only_link_title.csv", 'wb') do |csv|
    csv << ['ID', 'Page title', 'URL']
    links.with_multithread(8) do |link|
      p link
      res = Net::HTTP.get_response(URI(link))
      html = Nokogiri::HTML(res.body)
      title = html.css('head title')&.first&.content
      csv << ['', title, link]
    end
  end
end