class Crawler::Document

Attributes

content[RW]
domain_specific_paths[RW]
static_assets[RW]
uri[RW]
url[RW]

Public Class Methods

new(url) click to toggle source

Intialize a new Document

uri => uri of the document url => url of the document links => links found in the document domain_specific_paths => paths in the document related to the crawler's base domain static_assets => static_assets found in the document

# File lib/crawler/document.rb, line 19
def initialize(url)
  @uri = Addressable::URI.parse(url.strip)
  @url = construct_url uri
  @links = extract_links
  @domain_specific_paths = extract_domain_specific_paths
  @static_assets = extract_assets
end