class InvaderZim::Scraper

Public Class Methods

scrape_index_page(index_url) click to toggle source
# File lib/invader_zim/scraper.rb, line 7
def self.scrape_index_page(index_url)
  index_url = "https://zim.fandom.com/wiki/Characters"

  doc = Nokogiri::HTML(open(index_url))
  character_info = doc.css(".wikia-gallery-item")
  characters = []

  character_info.each do |character|
  characters << {
    :name => character.css(".lightbox-caption center b a[href]").text,
    :debut => character.css("[href]")[2].text,
    :profile_url => "https://zim.fandom.com" + character.css("b a").first["href"]
  }
    end
  characters
end
scrape_profile_page(character) click to toggle source
# File lib/invader_zim/scraper.rb, line 24
def self.scrape_profile_page(character)
  html = open(character.profile_url)
  doc = Nokogiri::HTML(html)

  character_table = doc.css(".WikiaArticle")
  character_page_traits = {} 

  character_table.each do |table|
      
      case character.name
      when "GIR", "Professor Membrane", "Ms. Bitters", "Recap Kid", "Minimoose", "Roboparents"
        character_page_traits[:homeworld] ||= table.css("td")[1].text.strip.gsub(/[\n]/, '')
      else
        character_page_traits[:homeworld] ||= table.css("td")[3].text.strip.gsub(/[\n]/, '')
      end
      
      case character.name
      when "Gaz Membrane", "Keef", "Roboparents"
        character_page_traits[:gender] ||= table.css("td")[5].text.strip.gsub(/[\n]/, '')
      when "Zim", "Invader Skoodge"
        character_page_traits[:gender] ||= table.css("td")[9].text.strip.gsub(/[\n]/, '')
      else
        character_page_traits[:gender] ||= table.css("td")[7].text.strip.gsub(/[\n]/, '')
      end
      
      case character.name 
      when "Roboparents"
        character_page_traits[:affiliation] ||= table.css("td")[9].text.strip.gsub(/[\n]/, '')
      when "GIR", "Minimoose"
        character_page_traits[:affiliation] ||= table.css("td")[9].text[13..15].strip.gsub(/[\n]/, '')
      when "Gaz Membrane", "Keef"
        character_page_traits[:affiliation] ||= table.css("td")[11].text.strip.gsub(/[\n]/, '')
      when "Dib Membrane", "Almighty Tallest Red", "Almighty Tallest Purple", "Tak"
        character_page_traits[:affiliation] ||= table.css("td")[13].text.strip.gsub(/[\n]/, '')
      when "Zim", "Invader Skoodge"
        character_page_traits[:affiliation] ||= table.css("td")[17].text.strip.gsub(/[\n]/, '')
      else
        character_page_traits[:affiliation] ||= table.css("td")[1].text.strip.gsub(/[\n]/, '')
      end
      
      case character.name
      when "Roboparents", "Recap Kid", "Ms. Bitters"
        character_page_traits[:introduction] ||= table.css("p")[0].text.strip.gsub(/[\n]/, ' ')
      when "Invader Skoodge"
        character_page_traits[:introduction] ||= table.css("p")[1].text.strip.gsub(/[\n]/, ' ').delete("\"")
      when "Keef"
        character_page_traits[:introduction] ||= table.css("p")[7].text.strip.gsub(/[\n]/, ' ').delete("\"")
      when "Gaz Membrane"
        character_page_traits[:introduction] ||= table.css("p")[7..9].text.strip.gsub(/[\n]/, ' ').delete("\"").delete("\u2019")
      else
        character_page_traits[:introduction] ||= table.css("p")[2].text.strip.gsub(/[\n]/, ' ').gsub(/[\u00A0\u00E1\u2019]/, ' ').delete("\"")
      end
      
      case character.name
      when "Keef"
        character_page_traits[:appearance] ||= table.css("p")[2].text.gsub(/[\"\n]/, '')
      when "Dib Membrane", "Invader Skoodge", "Minimoose"
        character_page_traits[:appearance] ||= table.css("p")[3].text.gsub(/[\"\n]/, '')
      when "Gaz Membrane", "Tak"
        character_page_traits[:appearance] ||= table.css("p")[5].text.gsub(/[\"\n]/, '')
      when "Recap Kid"
        character_page_traits[:appearance] ||= table.css("p")[3].text.gsub(/[\"\n]/, '').delete("[1]")
      else
        character_page_traits[:appearance] ||= table.css("p")[4].text.gsub(/[\"\n]/, '').delete("\u00A0").delete("\u00E4").delete("\u00ED")
      end
      
      case character.name
      when "Minimoose"
        character_page_traits[:facts_of_doom] ||= table.css(".mw-content-text ul li")[0..5].text.strip.gsub(/[\"\n\t]/, '')
      when "Zim"
        character_page_traits[:facts_of_doom] ||= table.css(".mw-content-text ul li")[5..10].text.strip.gsub(/[\"\n\t]/, '')
      when "Dib Membrane"
        character_page_traits[:facts_of_doom] ||= table.css(".mw-content-text ul li")[26..30].text.strip.gsub(/[\"\n\t]/, '')
      when "Gaz Membrane"
        character_page_traits[:facts_of_doom] ||= table.css(".mw-content-text ul li")[4..8].text.strip.gsub(/[\"\n]/, '').delete("\t")
      when "Ms. Bitters", "Almighty Tallest Purple", "Keef"
        character_page_traits[:facts_of_doom] ||= table.css(".mw-content-text ul li")[0..6].text.strip.gsub(/[\"\n]/, '').delete("[1]").delete("[2]").delete("[3]")
      when "Recap Kid"
        character_page_traits[:facts_of_doom] ||= table.css(".mw-content-text ul li")[42..45].text.strip.gsub(/[\"\n]/, '').delete("[2]")
      when "Tak"
        character_page_traits[:facts_of_doom] ||= table.css(".mw-content-text ul li")[17..19].text.strip.gsub(/[\"\n]/, '').delete("\"")
      else
        character_page_traits[:facts_of_doom] ||= table.css(".mw-content-text ul li")[0..8].text.strip.gsub(/[\"\n\t]/, '')
      end
    end
  character_page_traits
end