class Snapcrawl::Page

Constants

EXTENSION_BLACKLIST
PROTOCOL_BLACKLIST

Attributes

depth[R]
url[R]

Public Class Methods

new(url, depth: 0) click to toggle source
# File lib/snapcrawl/page.rb, line 16
def initialize(url, depth: 0)
  @url, @depth = url.protocolize, depth
end

Public Instance Methods

pages() click to toggle source
# File lib/snapcrawl/page.rb, line 38
def pages
  return nil unless valid?
  links.map { |link| Page.new link, depth: depth+1 }
end
path() click to toggle source
# File lib/snapcrawl/page.rb, line 28
def path
  @path ||= Addressable::URI.parse(url).request_uri
end
save_screenshot(outfile) click to toggle source
# File lib/snapcrawl/page.rb, line 43
def save_screenshot(outfile)
  return false unless valid?
  Screenshot.new(url).save "#{outfile}"
end
site() click to toggle source
# File lib/snapcrawl/page.rb, line 24
def site
  @site ||= Addressable::URI.parse(url).site
end
valid?() click to toggle source
# File lib/snapcrawl/page.rb, line 20
def valid?
  http_response&.success?
end

Private Instance Methods

cache() click to toggle source
# File lib/snapcrawl/page.rb, line 111
def cache
  Lightly.new life: Config.cache_life
end
http_response() click to toggle source
# File lib/snapcrawl/page.rb, line 50
def http_response
  @http_response ||= http_response!
end
http_response!() click to toggle source
# File lib/snapcrawl/page.rb, line 54
def http_response!
  response = cache.get(url) { HTTParty.get url, httparty_options }

  if !response.success?
    $logger.warn "http error on !undpur!#{url}!txtrst!, code: !txtylw!#{response.code}!txtrst!, message: #{response.message.strip}"
  end

  response

rescue => e
  $logger.error "http error on !undpur!#{url}!txtrst! - !txtred!#{e.class}!txtrst!: #{e.message}"
  nil

end
httparty_options() click to toggle source
# File lib/snapcrawl/page.rb, line 69
def httparty_options
  Config.skip_ssl_verification ? { verify: false } : {}
end