class MetaInspector::URL

Constants

WELL_KNOWN_TRACKING_PARAMS

Attributes

url[R]

Public Class Methods

absolutify(url, base_url, options = {}) click to toggle source

Converts a relative URL to an absolute URL, like:

"/faq" => "http://example.com/faq"

Respecting already absolute URLs like the ones starting with

http:, ftp:, telnet:, mailto:, javascript: ...

Protocol-relative URLs are also resolved to use the same schema as the base_url

# File lib/meta_inspector/url.rb, line 59
def self.absolutify(url, base_url, options = {})
  options = defaults.merge(options)
  if url =~ /^\w*\:/i
    MetaInspector::URL.new(url, options).url
  else
    uri = Addressable::URI.join(base_url, url)
    options[:normalize] ? uri.normalize.to_s : uri.to_s
  end
rescue MetaInspector::ParserError, Addressable::URI::InvalidURIError, ArgumentError
  nil
end
new(initial_url, options = {}) click to toggle source
# File lib/meta_inspector/url.rb, line 7
def initialize(initial_url, options = {})
  options        = self.class.defaults.merge(options)

  @normalize     = options[:normalize]

  self.url = initial_url
end

Private Class Methods

defaults() click to toggle source
# File lib/meta_inspector/url.rb, line 73
def self.defaults
  { :normalize => true }
end

Public Instance Methods

host() click to toggle source
# File lib/meta_inspector/url.rb, line 19
def host
  parsed(url) ? parsed(url).host : nil
end
root_url() click to toggle source
# File lib/meta_inspector/url.rb, line 23
def root_url
  "#{scheme}://#{host}/"
end
scheme() click to toggle source
# File lib/meta_inspector/url.rb, line 15
def scheme
  parsed(url) ? parsed(url).scheme : nil
end
tracked?() click to toggle source
# File lib/meta_inspector/url.rb, line 29
def tracked?
  u = parsed(url)
  return false unless u.query_values
  found_tracking_params = WELL_KNOWN_TRACKING_PARAMS & u.query_values.keys
  return found_tracking_params.any?
end
untrack!() click to toggle source
# File lib/meta_inspector/url.rb, line 44
def untrack!
  self.url = untracked_url if tracked?
end
untracked_url() click to toggle source
# File lib/meta_inspector/url.rb, line 36
def untracked_url
  u = parsed(url)
  return url unless u.query_values
  query_values = u.query_values.delete_if { |key, _| WELL_KNOWN_TRACKING_PARAMS.include? key }
  u.query_values = query_values.length > 0 ? query_values : nil
  u.to_s
end
url=(new_url) click to toggle source
# File lib/meta_inspector/url.rb, line 48
def url=(new_url)
  url  = with_default_scheme(new_url)
  @url = @normalize ? normalized(url) : url
end

Private Instance Methods

normalized(url) click to toggle source

Normalize url to deal with characters that should be encoded, add trailing slash, convert to downcase…

# File lib/meta_inspector/url.rb, line 84
def normalized(url)
  Addressable::URI.parse(url).normalize.to_s
rescue Addressable::URI::InvalidURIError => e
  raise MetaInspector::ParserError.new(e)
end
parsed(url) click to toggle source
# File lib/meta_inspector/url.rb, line 90
def parsed(url)
  Addressable::URI.parse(url)
rescue Addressable::URI::InvalidURIError => e
  raise MetaInspector::ParserError.new(e)
end
with_default_scheme(url) click to toggle source

Adds ‘http’ as default scheme, if there is none

# File lib/meta_inspector/url.rb, line 78
def with_default_scheme(url)
  parsed(url) && parsed(url).scheme.nil? ? 'http://' + url : url
end