module Http

The Http module defines a

Http.get(url)

method.

Public Instance Methods

get(url, max_age = nil) click to toggle source
# File lib/extensions/http.rb, line 31
def get(url, max_age = nil)
  get_body(url, max_age, :text)
end
get_binary(url, max_age = nil) click to toggle source
# File lib/extensions/http.rb, line 35
def get_binary(url, max_age = nil)
  get_body(url, max_age, :binary)
end
get_body(url, max_age, mode) click to toggle source
# File lib/extensions/http.rb, line 39
def get_body(url, max_age, mode)
  max_age ||= MaxAge.for(url)

  App.logger.benchmark("[GET] #{url}", :minimum => 20) do 
    SimpleCache.cached("g-#{url}", max_age) do 
      App.logger.debug "[GET] #{url}"
      body, headers = get_body_and_headers_(url) 

      if mode == :text
        content_types = (headers["content-type"] || []).join(";")
        body = reencode body, content_types
      end
      
      body
    end
  end
end

Private Instance Methods

force_valid_encoding(string, *encodings) click to toggle source
# File lib/extensions/http.rb, line 94
def force_valid_encoding(string, *encodings)
  encodings.each do |enc|
    next unless enc
    begin
      s = string.force_encoding(enc)
      next unless s.valid_encoding?
      return s.encode("UTF-8")
    rescue Encoding::UndefinedConversionError
    end
  end

  nil
end
get_body_and_headers_(uri_str, limit = 10) click to toggle source
# File lib/extensions/http.rb, line 59
def get_body_and_headers_(uri_str, limit = 10)
  raise 'too many redirections' if limit == 0

  uri =  Addressable::URI.parse(uri_str)
  
  http = Net::HTTP.new(uri.host, uri.port)
  if uri.scheme == "https"
    http.use_ssl = true
    http.verify_mode = OpenSSL::SSL::VERIFY_NONE
  end
  request = Net::HTTP::Get.new(uri.request_uri)
  response = http.request(request)

  case response
  when Net::HTTPSuccess then
    [ response.body, response.to_hash ]
  when Net::HTTPRedirection then
    location = response['location']
    App.logger.debug "redirected to #{location}"
    get_body_and_headers_(location, limit - 1)
  else  
    [ response.value, nil ]
  end
end
html_encoding(html) click to toggle source
# File lib/extensions/http.rb, line 108
def html_encoding(html)
  doc = Nokogiri.HTML(html)
  node = doc.css("meta[http-equiv='Content-Type']").first

  return unless node &&  node["content"] =~ /; charset=(\S+)/
  $1
end
reencode(body, content_type) click to toggle source
# File lib/extensions/http.rb, line 84
def reencode(body, content_type)
  encodings = [ "ISO-8859-1", "UTF-8" ]

  encodings.unshift($1)                   if content_type =~ /;\s*charset=(\S+)/
  encodings.unshift(html_encoding(body))  if content_type =~ /html/
  encodings.unshift(xml_encoding(body))   if content_type =~ /xml/

  force_valid_encoding body, *encodings
end
xml_encoding(xml) click to toggle source
# File lib/extensions/http.rb, line 116
def xml_encoding(xml)
  Nokogiri.XML(xml).encoding
end