class Processor
Attributes
record_limit[R]
scrape_delay[R]
single_site[R]
sites[R]
Public Class Methods
new(config_file, single_site, record_limit, scrape_delay)
click to toggle source
# File lib/rubyscraper/processor.rb, line 9 def initialize(config_file, single_site, record_limit, scrape_delay) @scrape_file = config_file @scrape_config = JSON.parse(File.read(@scrape_file)) @sites = @scrape_config @single_site = single_site @record_limit = record_limit @scrape_delay = scrape_delay end
Public Instance Methods
call()
click to toggle source
# File lib/rubyscraper/processor.rb, line 18 def call !single_site.empty? ? scrape_single_site : scrape_all_sites end
Private Instance Methods
has_sub_pages?(site)
click to toggle source
# File lib/rubyscraper/processor.rb, line 44 def has_sub_pages?(site) site["summary"]["has_sub_pages"] == "true" end
scrape_all_sites()
click to toggle source
# File lib/rubyscraper/processor.rb, line 29 def scrape_all_sites sites.inject [] do |all_results, site| all_results += scrape_site(site) end end
scrape_single_site()
click to toggle source
# File lib/rubyscraper/processor.rb, line 24 def scrape_single_site site = sites.select { |s| s["name"] == single_site }.first scrape_site(site) end
scrape_site(site)
click to toggle source
# File lib/rubyscraper/processor.rb, line 35 def scrape_site(site) paginator = Paginator.new(site, record_limit) paginator.define_pagination_params results = SummaryScraper.new(site, paginator.add_on, paginator.steps).call results = SubPageScraper.new(site, results, scrape_delay).call if has_sub_pages?(site) results end