class Marvel101::Scraper
Attributes
doc[RW]
topic[RW]
Public Class Methods
new(topic)
click to toggle source
# File lib/marvel_101/scraper.rb, line 5 def initialize(topic) @topic = topic @url = topic.urls[:url] end
Public Instance Methods
get_101()
click to toggle source
# File lib/marvel_101/scraper.rb, line 77 def get_101 url_101_text = doc.css("div#MarvelVideo101 script").text unless url_101_text.empty? id = url_101_text.match(/videoId: .([-\w]*)./)[1] topic.urls[:url_101] = "https://www.youtube.com/watch?v=#{id}" end end
get_description()
click to toggle source
# File lib/marvel_101/scraper.rb, line 47 def get_description info = doc.css("div.featured-item-desc p:nth-child(2)").text unless info.strip.empty? info = info.gsub(/\r?\n\s*([ml][oe][rs][es])?/," ").strip topic.description = info.gsub("â", "\'").gsub("Â", "") end end
get_details()
click to toggle source
# File lib/marvel_101/scraper.rb, line 66 def get_details topic.details = {} raw_details = doc.css("div.featured-item-meta") raw_details.css("div div").each do |raw_detail| detail = raw_detail.css("strong").text.downcase.strip.split(" ").join("_") info = raw_detail.css("p:last-child span").text.strip info = raw_detail.css("p:last-child").text if info.empty? topic.details[detail.to_sym] = info.gsub("â", "\'").gsub("Â", "") end end
get_doc()
click to toggle source
# File lib/marvel_101/scraper.rb, line 23 def get_doc @doc = Nokogiri::HTML(open(@url)) end
get_item_cards()
click to toggle source
# File lib/marvel_101/scraper.rb, line 27 def get_item_cards item_cards = doc.css("div#comicsListing div.row-item") item_cards.empty? ? doc.css("#featured-chars div.row-item") : item_cards end
get_items(item_cards)
click to toggle source
# File lib/marvel_101/scraper.rb, line 32 def get_items(item_cards) topic.items = item_cards.css("div.row-item-text > h5 > a").collect do |link| name, url = link.text.strip, "http:#{link.attr("href")}" if @url.downcase.include?("team") Marvel101::Team.find_or_create_by_name("The #{name}", url).tap do |team| team.list = topic end else Marvel101::Character.find_or_create_by_name(name, url).tap do |char| char.list = topic end end end end
get_members()
click to toggle source
# File lib/marvel_101/scraper.rb, line 55 def get_members members_grid = doc.css("div.grid-container").first topic.members = members_grid.css("div.row-item").collect do |card| name = card.css("a.meta-title").text.strip url = "http:#{card.css("a.meta-title").attr("href").value}" Marvel101::Character.find_or_create_by_name(name, url).tap do |member| member.list, member.team = topic.list, topic end end end
get_wiki()
click to toggle source
# File lib/marvel_101/scraper.rb, line 85 def get_wiki wiki_link = doc.css("div.title-section a.featured-item-notice.primary") topic.urls[:url_wiki] = wiki_link.attr("href").value unless wiki_link.empty? end
scrape_list()
click to toggle source
# File lib/marvel_101/scraper.rb, line 10 def scrape_list get_doc get_items(get_item_cards) end
scrape_topic()
click to toggle source
# File lib/marvel_101/scraper.rb, line 15 def scrape_topic get_doc get_description topic.team? ? get_members : get_details get_101 get_wiki end