class MetaInspector::Parsers::LinksParser

Public Instance Methods

all() click to toggle source

Returns all links found, unrelavitized and absolutified

# File lib/meta_inspector/parsers/links.rb, line 16
def all
  @all ||= raw.map { |link| URL.absolutify(link, base_url) }.compact.uniq
end
base_href() click to toggle source

Returns the value of the href attribute on the <base /> tag, if exists

# File lib/meta_inspector/parsers/links.rb, line 55
def base_href
  parsed.search('base').first.attributes['href'].value rescue nil
end
base_url() click to toggle source

Returns the base url to absolutify relative links. This can be the one set on a <base> tag, or the url of the document if no <base> tag was found.

# File lib/meta_inspector/parsers/links.rb, line 49
def base_url
  current_base_href = base_href.to_s.strip.empty? ? nil : URL.absolutify(base_href, URL.new(url).root_url)
  current_base_href || url
end
external() click to toggle source

Returns all external HTTP links found

# File lib/meta_inspector/parsers/links.rb, line 36
def external
  @external ||= http.select { |link| URL.new(link).host != host }
end
http() click to toggle source

Returns all HTTP links found

# File lib/meta_inspector/parsers/links.rb, line 21
def http
  @http ||= all.select { |link| link =~ /^http(s)?:\/\//i}
end
internal() click to toggle source

Returns all internal HTTP links found

# File lib/meta_inspector/parsers/links.rb, line 31
def internal
  @internal ||= http.select { |link| URL.new(link).host == host }
end
non_http() click to toggle source

Returns all non-HTTP links found

# File lib/meta_inspector/parsers/links.rb, line 26
def non_http
  @non_http ||= all.select { |link| link !~ /^http(s)?:\/\//i}
end
raw() click to toggle source

Returns all links found, unprocessed

# File lib/meta_inspector/parsers/links.rb, line 11
def raw
  @raw ||= cleanup(parsed.search('//a/@href')).compact.uniq
end
to_hash() click to toggle source
# File lib/meta_inspector/parsers/links.rb, line 40
def to_hash
  { 'internal' => internal,
    'external' => external,
    'non_http' => non_http }
end