class MetaInspector::Parsers::LinksParser
Public Instance Methods
all()
click to toggle source
Returns all links found, unrelavitized and absolutified
# File lib/meta_inspector/parsers/links.rb, line 16 def all @all ||= raw.map { |link| URL.absolutify(link, base_url) }.compact.uniq end
base_href()
click to toggle source
Returns the value of the href attribute on the <base /> tag, if exists
# File lib/meta_inspector/parsers/links.rb, line 55 def base_href parsed.search('base').first.attributes['href'].value rescue nil end
base_url()
click to toggle source
Returns the base url to absolutify relative links. This can be the one set on a <base> tag, or the url of the document if no <base> tag was found.
# File lib/meta_inspector/parsers/links.rb, line 49 def base_url current_base_href = base_href.to_s.strip.empty? ? nil : URL.absolutify(base_href, URL.new(url).root_url) current_base_href || url end
external()
click to toggle source
Returns all external HTTP links found
# File lib/meta_inspector/parsers/links.rb, line 36 def external @external ||= http.select { |link| URL.new(link).host != host } end
http()
click to toggle source
Returns all HTTP links found
# File lib/meta_inspector/parsers/links.rb, line 21 def http @http ||= all.select { |link| link =~ /^http(s)?:\/\//i} end
internal()
click to toggle source
Returns all internal HTTP links found
# File lib/meta_inspector/parsers/links.rb, line 31 def internal @internal ||= http.select { |link| URL.new(link).host == host } end
links()
click to toggle source
# File lib/meta_inspector/parsers/links.rb, line 6 def links self end
non_http()
click to toggle source
Returns all non-HTTP links found
# File lib/meta_inspector/parsers/links.rb, line 26 def non_http @non_http ||= all.select { |link| link !~ /^http(s)?:\/\//i} end
raw()
click to toggle source
Returns all links found, unprocessed
# File lib/meta_inspector/parsers/links.rb, line 11 def raw @raw ||= cleanup(parsed.search('//a/@href')).compact.uniq end
to_hash()
click to toggle source
# File lib/meta_inspector/parsers/links.rb, line 40 def to_hash { 'internal' => internal, 'external' => external, 'non_http' => non_http } end