module Crawler

Constants

VERSION

Attributes

base_uri[RW]
base_url[RW]
index[RW]

Public Class Methods

crawl(url = base_url) click to toggle source

Performs crawl of domain, indexes results

# File lib/crawler.rb, line 31
def crawl(url = base_url)
  document = Crawler::Document.new(url)
  index.consume_document url.sub(base_url, ''), document

  paths_queue = index.get_paths_to_visit
  next_path = paths_queue[0]

  print " Pages remaing - #{paths_queue.count}    \r"
  crawl "#{base_url}#{next_path}" if next_path
end
new(domain) click to toggle source

Initialize a new Crawler

base_uri => base_uri to crawl base_url => base_url to crawl

# File lib/crawler.rb, line 21
def new(domain)
  @base_uri = Addressable::URI.parse(domain.strip)
  validate_protocol

  @base_url = construct_url base_uri
  self
end
results() click to toggle source

Returns the indexed results of a crawl

# File lib/crawler.rb, line 44
def results
  index.results
end