class Processor

Attributes

record_limit[R]
scrape_delay[R]
single_site[R]
sites[R]

Public Class Methods

new(config_file, single_site, record_limit, scrape_delay) click to toggle source
# File lib/rubyscraper/processor.rb, line 9
def initialize(config_file, single_site, record_limit, scrape_delay)
  @scrape_file   = config_file
  @scrape_config = JSON.parse(File.read(@scrape_file))
  @sites         = @scrape_config
  @single_site   = single_site
  @record_limit  = record_limit
  @scrape_delay  = scrape_delay
end

Public Instance Methods

call() click to toggle source
# File lib/rubyscraper/processor.rb, line 18
def call
  !single_site.empty? ? scrape_single_site : scrape_all_sites
end

Private Instance Methods

has_sub_pages?(site) click to toggle source
# File lib/rubyscraper/processor.rb, line 44
def has_sub_pages?(site)
  site["summary"]["has_sub_pages"] == "true"
end
scrape_all_sites() click to toggle source
# File lib/rubyscraper/processor.rb, line 29
def scrape_all_sites
  sites.inject [] do |all_results, site|
    all_results += scrape_site(site)
  end
end
scrape_single_site() click to toggle source
# File lib/rubyscraper/processor.rb, line 24
def scrape_single_site
  site = sites.select { |s| s["name"] == single_site }.first
  scrape_site(site)
end
scrape_site(site) click to toggle source
# File lib/rubyscraper/processor.rb, line 35
def scrape_site(site)
  paginator = Paginator.new(site, record_limit)
  paginator.define_pagination_params

  results = SummaryScraper.new(site, paginator.add_on, paginator.steps).call
  results = SubPageScraper.new(site, results, scrape_delay).call if has_sub_pages?(site)
  results
end