class Spidercrawl::Request
Makes the request to the targeted website
Attributes
uri[RW]
Public Class Methods
new(url, options = {})
click to toggle source
# File lib/spidercrawl/request.rb, line 14 def initialize(url, options = {}) @uri = URI.parse(url) @threads = options[:threads] @timeout = options[:timeout] @http = Net::HTTP.new(@uri.host, @uri.port) do |http| http.open_timeout = @timeout # in seconds http.read_timeout = @timeout # in seconds end @c = Curl::Easy.new(@uri.to_s) do |curl| curl.headers['User-Agent'] = UserAgents.random end end
Public Instance Methods
curl()
click to toggle source
Fetch a page from the given url using libcurl
# File lib/spidercrawl/request.rb, line 32 def curl puts "fetching #{@uri.to_s}".green.on_black start_time = Time.now begin c = @c c.url = @uri.to_s c.perform end_time = Time.now case c.response_code when 200 then page = Page.new(@uri, response_code: c.response_code, response_head: c.header_str, response_body: c.body_str, response_time: ((end_time-start_time)*1000).round, crawled_time: (Time.now.to_f*1000).to_i) when 300..307 then page = Page.new(@uri, response_code: c.response_code, response_head: c.header_str, response_body: c.body_str, response_time: ((end_time-start_time)*1000).round, redirect_url: c.redirect_url) when 404 then page = Page.new(@uri, response_code: c.response_code, response_time: ((end_time-start_time)*1000).round) end rescue Exception => e puts e.inspect puts e.backtrace end end
fetch()
click to toggle source
Fetch a page from the given url using net/http
# File lib/spidercrawl/request.rb, line 66 def fetch puts "fetching #{@uri.to_s}".green.on_black start_time = Time.now begin request = Net::HTTP::Get.new(@uri.request_uri) request["User-Agent"] = UserAgents.random response = @http.request(request) end_time = Time.now case response when Net::HTTPSuccess then page = Page.new(@uri, response_code: response.code.to_i, response_head: response.instance_variable_get("@header"), response_body: response.body, response_time: (end_time-start_time).to_f, crawled_time: (Time.now.to_f*1000).to_i) when Net::HTTPRedirection then page = Page.new(@uri, response_code: response.code.to_i, response_head: response.instance_variable_get("@header"), response_body: response.body, response_time: (end_time-start_time).to_f, redirect_url: response['location']) when Net::HTTPNotFound then page = Page.new(@uri, response_code: response.code.to_i, response_time: (end_time-start_time).to_f) end rescue Exception => e puts e.inspect puts e.backtrace end end