class RainJackets::Scraper
Public Class Methods
get_page()
click to toggle source
Take HTML string returned by open-uri's open method and convert it into a NodeSet
# File lib/rain_jackets/scraper.rb, line 5 def self.get_page Nokogiri::HTML(open("https://www.outdoorgearlab.com/topics/clothing-womens/best-rain-jacket-womens")) end
initialize_jacket_objects()
click to toggle source
Instantiate jacket objects and assign attributes to each instance
# File lib/rain_jackets/scraper.rb, line 17 def self.initialize_jacket_objects all_jackets = [] # Determines which row you're on, hence which property you're trying to populate # Iterate through collection of table rows scrape_jackets_table.each_with_index do |tr_element, tr_index| # Scrape product Name and URL if tr_index == 0 # Grab the element that contains our desired data product_name_row = tr_element.css("div.compare_product_name") # Iterate through each of td element and instantiate new jacket product_name_row.each do |td_element| # Create a new Jacket instance out of each aray element # instantiating Jacket objects and giving each jacket object the correct attributes new_jacket = RainJackets::Jacket.new new_jacket.name = td_element.text #access text content inside an element scraped by Nokogiri new_jacket.url = "https://www.outdoorgearlab.com" + td_element.css("a").first.attributes["href"].value all_jackets << new_jacket end # Scrape Price elsif tr_index == 2 product_price_row = tr_element.css("td.compare_items span") # Iterate through each td_element (column) and # Populate each jacket's price attribute product_price_row.each_with_index do |td_element, td_index| td_value = td_element.text all_jackets[td_index].price = td_value # Price in string "$149.93" end # Scrape Overall Rating elsif tr_index == 3 overall_rating_row = tr_element.css("div.rating_score") # Iterate through each rating_score (column) and # Populate each jacket's overall_rating attribute overall_rating_row.each_with_index do |rating_score, rating_row_index| all_jackets[rating_row_index].overall_rating = rating_score.text #an integer end # Scrape Pros elsif tr_index == 5 pros_row = tr_element.css("td.compare_items").each_with_index do |td_element, td_index| td_value = td_element.text all_jackets[td_index].pros = td_value end # Scrape Cons elsif tr_index == 6 pros_row = tr_element.css("td.compare_items").each_with_index do |td_element, td_index| td_value = td_element.text all_jackets[td_index].cons = td_value end # Scrape Description elsif tr_index == 7 description_row = tr_element.css("td.compare_items").each_with_index do |td_element, td_index| td_value = td_element.text all_jackets[td_index].description = td_value end # Scrape rating categories if tr_index is between 9-14 elsif (9..14).include?(tr_index) tr_element.css("div.rating_score").each_with_index do |rating_score, rating_row_index| jacket = all_jackets[rating_row_index] rating_score = rating_score.text if tr_index == 9 jacket.water_resistance_rating = rating_score elsif tr_index == 10 jacket.breathability_rating = rating_score elsif tr_index == 11 jacket.comfort_rating = rating_score elsif tr_index == 12 jacket.weight_rating = rating_score elsif tr_index == 13 jacket.durability_rating = rating_score elsif tr_index == 14 jacket.packed_size_rating = rating_score end end end end # Store all_jackets array in Jacket class variable @@all RainJackets::Jacket.all = all_jackets end
scrape_jackets_table()
click to toggle source
# File lib/rain_jackets/scraper.rb, line 9 def self.scrape_jackets_table # Use CSS selector to grab all HTML elements that contain a table # return array of Nokogiri XML elements representing # table described on scraped webpage self.get_page.css("div.content_table_xwide tr") end