class WhatToWatch::Scraper
Public Class Methods
get_item_page(object)
click to toggle source
Scrape Search Page on imdb.com to determine the Item Page URL
# File lib/what_to_watch/scraper.rb, line 21 def self.get_item_page(object) search_results_page = Nokogiri::HTML(open("https://www.imdb.com/find?s=tt&q=" + CGI::escape(object.title.gsub("Season ", "")))) url = "https://www.imdb.com" + "#{search_results_page.css("td a").attribute("href").value}" end
scrape_imdb(object)
click to toggle source
# File lib/what_to_watch/scraper.rb, line 30 def self.scrape_imdb(object) object.url = self.get_item_page(object) item_page = Nokogiri::HTML(open(object.url)) object.description = item_page.css("div.summary_text").text.strip object.genre_year = item_page.css("div.subtext a").collect{|tag| tag.text.strip.gsub("\u2013","-")}.join(" | ") object.cast = {} item_page.css("div.credit_summary_item").each do |category| object.cast[category.css("h4").text.strip] = category.css("a").collect{|tag|tag.text.strip}. join(", ").gsub(", See full cast & crew", "") end end
scrape_vulture()
click to toggle source
# File lib/what_to_watch/scraper.rb, line 4 def self.scrape_vulture doc = Nokogiri::HTML(open("http://vulture.com/streaming")) doc.css("div[data-editable='main']").each do |section| section.css("div.column-item").each do |row| row.css("section").drop(1).each do |item| WhatToWatch::Show.new( item.css("div[itemprop='caption']").text.split("\n")[1].strip.gsub(" ", " "), row.css("section")[0].css("h3").text.strip.capitalize, section.css("h2.clay-subheader").text.strip.downcase ) end end end end