class RailsSpider::Resource

Constants

DEFAULT_EXP
SYMBOL_EXP

Attributes

fetcher[R]
host[R]
item_path[R]
list_path[R]
page[RW]
page_params[R]
work[R]

Public Class Methods

new(work, **options) click to toggle source
# File lib/rails_spider/resource.rb, line 10
def initialize(work, **options)
  @work = work
  @host = work.host
  @list_path = work.list_path
  @item_path = work.item_path
  @page_params = work.page_params
  @page = 1
  @fetcher ||= RailsSpider::Mechanize.new
end

Public Instance Methods

get_items() click to toggle source
# File lib/rails_spider/resource.rb, line 32
def get_items
  fetcher.links(list_url).select { |link| item_exp.match? link }
end
item_exp() click to toggle source
# File lib/rails_spider/resource.rb, line 53
def item_exp
  Regexp.new(item_path.gsub SYMBOL_EXP, DEFAULT_EXP)
end
list_url() click to toggle source
# File lib/rails_spider/resource.rb, line 43
def list_url
  list_url = URI.join host, list_path
  if page.to_i > 0
    page_query = URI.encode_www_form page_params => page
    list_url.query = page_query
  end

  list_url
end
run() click to toggle source
# File lib/rails_spider/resource.rb, line 20
def run
  items = get_items

  while items.size > 0 do
    items.each do |item|
      save(item)
    end
    self.page += 1
    items = get_items
  end
end
save(url) click to toggle source
# File lib/rails_spider/resource.rb, line 36
def save(url)
  body = fetcher.body(url)
  local = Local.find_or_initialize_by url: url, work_id: work.id
  local.body = body
  local.save
end