class SubPageScraper
Attributes
delay[R]
listings[R]
site[R]
Public Class Methods
new(site, listings, delay)
click to toggle source
# File lib/rubyscraper/sub_page_scraper.rb, line 8 def initialize(site, listings, delay) @site = site @listings = listings @delay = delay Capybara.register_driver :poltergeist do |app| Capybara::Poltergeist::Driver.new(app, js_errors: false) end Capybara.default_driver = :poltergeist end
Public Instance Methods
call()
click to toggle source
# File lib/rubyscraper/sub_page_scraper.rb, line 19 def call puts "Pulling #{@listings.count} listings from #{@site["name"]}:" listings = @listings.inject [] do |results, listing| sleep delay listing = pull_sub_page_data(site, listing) listing = listing_cleanup(listing) results << listing end; puts "\n"; listings end
listing_cleanup(listing)
click to toggle source
# File lib/rubyscraper/sub_page_scraper.rb, line 48 def listing_cleanup(listing) # Remove 'Headquarters: ' from weworkremotely jobs listing["location"].slice!("Headquarter: ") if !listing["location"].to_s.empty? listing end
pull_sub_page_data(site, listing)
click to toggle source
# File lib/rubyscraper/sub_page_scraper.rb, line 29 def pull_sub_page_data(site, listing) visit listing["url"] site["sub_page"]["fields"].each do |field| if field["method"] == "all" if has_css?(field["path"]) values = all(field["path"]).map do |elem| elem.send(field["loop_collect"]) end listing[field["field"]] = values.join(field["join"]) end else if has_css?(field["path"]) listing[field["field"]] = send(field["method"].to_sym,field["path"]).text end end end; print "."; listing end