class SummaryScraper
Attributes
pagination_addon[R]
pagination_pages[R]
site[R]
Public Class Methods
new(site, pagination_addon, pagination_pages)
click to toggle source
# File lib/rubyscraper/summary_scraper.rb, line 8 def initialize(site, pagination_addon, pagination_pages) @site = site @pagination_addon = pagination_addon @pagination_pages = pagination_pages Capybara.register_driver :poltergeist do |app| Capybara::Poltergeist::Driver.new(app, js_errors: false) end Capybara.default_driver = :poltergeist end
Public Instance Methods
call()
click to toggle source
# File lib/rubyscraper/summary_scraper.rb, line 19 def call pagination_pages.inject [] do |results, page| results += get_summaries(page) end end
Private Instance Methods
get_summaries(page_no)
click to toggle source
# File lib/rubyscraper/summary_scraper.rb, line 27 def get_summaries(page_no) visit page_url(page_no) all(site["summary"]["loop"]).inject [] do |results, listing| record = pull_summary_data(site, listing) record = listing_cleanup(site, record) results << record end end
listing_cleanup(site, listing)
click to toggle source
# File lib/rubyscraper/summary_scraper.rb, line 58 def listing_cleanup(site, listing) # Add base url if not present unless listing["url"].match(/^http/) listing["url"] = "#{site["base_url"]}#{listing["url"]}" end listing end
page_url(page)
click to toggle source
# File lib/rubyscraper/summary_scraper.rb, line 37 def page_url(page) site["base_url"] + site["summary"]["url"] + pagination_addon + page.to_s end
pull_summary_data(site, record)
click to toggle source
# File lib/rubyscraper/summary_scraper.rb, line 41 def pull_summary_data(site, record) output = Hash.new site["summary"]["fields"].each do |field| if field["attr"] if record.has_css?(field["path"]) output[field["field"]] = record.send(field["method"].to_sym, field["path"])[field["attr"]] end else if record.has_css?(field["path"]) output[field["field"]] = record.send(field["method"].to_sym, field["path"]).text end end end; output end