class AnimalInfo::Scraper
Public Class Methods
get_html(name)
click to toggle source
# File lib/scraper.rb, line 30 def self.get_html(name) url = "https://en.wikipedia.org/wiki/#{name}" Nokogiri::HTML(open(url)) end
scrape_from_wikipedia(name)
click to toggle source
# File lib/scraper.rb, line 2 def self.scrape_from_wikipedia(name) html = get_html(name) animal_name = html.search("h1#firstHeading").text properties = { name: animal_name } categories = ["Kingdom", "Phylum", "Class", "Order"] html.search("table.infobox.biota tr").each do |table_row| table_data = table_row.search("td") if table_data.size == 2 category = table_data.first.text.strip.gsub(":", "") if categories.include?(category) if !table_data.last.search("b").empty? category_info = table_data.last.search("b").text.strip else category_info = table_data.last.text.strip end category = "Klass" if category == "Class" properties[category.downcase.to_sym] = category_info end end end properties[:url] = "https://en.wikipedia.org/wiki/#{name}" properties end