class RailsSpider::Resource
Constants
- DEFAULT_EXP
- SYMBOL_EXP
Attributes
fetcher[R]
host[R]
item_path[R]
list_path[R]
page[RW]
page_params[R]
work[R]
Public Class Methods
new(work, **options)
click to toggle source
# File lib/rails_spider/resource.rb, line 10 def initialize(work, **options) @work = work @host = work.host @list_path = work.list_path @item_path = work.item_path @page_params = work.page_params @page = 1 @fetcher ||= RailsSpider::Mechanize.new end
Public Instance Methods
get_items()
click to toggle source
# File lib/rails_spider/resource.rb, line 32 def get_items fetcher.links(list_url).select { |link| item_exp.match? link } end
item_exp()
click to toggle source
# File lib/rails_spider/resource.rb, line 53 def item_exp Regexp.new(item_path.gsub SYMBOL_EXP, DEFAULT_EXP) end
list_url()
click to toggle source
# File lib/rails_spider/resource.rb, line 43 def list_url list_url = URI.join host, list_path if page.to_i > 0 page_query = URI.encode_www_form page_params => page list_url.query = page_query end list_url end
run()
click to toggle source
# File lib/rails_spider/resource.rb, line 20 def run items = get_items while items.size > 0 do items.each do |item| save(item) end self.page += 1 items = get_items end end
save(url)
click to toggle source
# File lib/rails_spider/resource.rb, line 36 def save(url) body = fetcher.body(url) local = Local.find_or_initialize_by url: url, work_id: work.id local.body = body local.save end