class MetaInspector::Parsers::TextsParser

Public Instance Methods

author() click to toggle source

Returns the meta author, if present

# File lib/meta_inspector/parsers/texts.rb, line 41
def author
  @author ||= meta['author']
end
best_author() click to toggle source

An author getter that returns the first non-nil description from the following candidates:

  • the standard meta description

  • a link with the relational attribute “author”

  • address tag which may contain the author

  • the twitter:creator meta tag for the username

# File lib/meta_inspector/parsers/texts.rb, line 51
def best_author
  @best_author ||= find_best_author
end
best_description() click to toggle source

A description getter that returns the first non-nill description from the following candidates:

  • the standard meta description

  • the og:description meta tag

  • the twitter:description meta tag

  • the first paragraph with more than 120 characters

# File lib/meta_inspector/parsers/texts.rb, line 66
def best_description
  @best_description ||= find_best_description
end
best_title() click to toggle source
# File lib/meta_inspector/parsers/texts.rb, line 12
def best_title
  @best_title ||= find_best_title
end
description() click to toggle source

Returns the meta description, if present

# File lib/meta_inspector/parsers/texts.rb, line 56
def description
  @description ||= meta['description']
end
h1() click to toggle source
# File lib/meta_inspector/parsers/texts.rb, line 16
def h1
  @h1 ||= find_heading('h1')
end
h2() click to toggle source
# File lib/meta_inspector/parsers/texts.rb, line 20
def h2
  @h2 ||= find_heading('h2')
end
h3() click to toggle source
# File lib/meta_inspector/parsers/texts.rb, line 24
def h3
  @h3 ||= find_heading('h3')
end
h4() click to toggle source
# File lib/meta_inspector/parsers/texts.rb, line 28
def h4
  @h4 ||= find_heading('h4')
end
h5() click to toggle source
# File lib/meta_inspector/parsers/texts.rb, line 32
def h5
  @h5 ||= find_heading('h5')
end
h6() click to toggle source
# File lib/meta_inspector/parsers/texts.rb, line 36
def h6
  @h6 ||= find_heading('h6')
end
title() click to toggle source

Returns the parsed document title, from the content of the <title> tag within the <head> section.

# File lib/meta_inspector/parsers/texts.rb, line 8
def title
  @title ||= parsed.css('head title').inner_text rescue nil
end

Private Instance Methods

find_best_author() click to toggle source
# File lib/meta_inspector/parsers/texts.rb, line 92
def find_best_author
  candidates = [
    meta['author'],
    parsed.css('a[rel="author"]').first,
    parsed.css('address').first,
    meta['twitter:creator']
  ]
  candidates.flatten!
  candidates.compact!
  candidates.map! { |c| (c.respond_to? :inner_text) ? c.inner_text : c }
  candidates.map! { |c| c.strip.gsub(/\s+/, ' ') }
  candidates.first
end
find_best_description() click to toggle source
# File lib/meta_inspector/parsers/texts.rb, line 106
def find_best_description
  candidates = [
    meta['description'],
    meta['og:description'],
    meta['twitter:description'],
    secondary_description
  ]
  candidates.find { |x| !x.to_s.empty? }
end
find_best_title() click to toggle source

Look for candidates per list of priority

# File lib/meta_inspector/parsers/texts.rb, line 77
def find_best_title
  candidates = [
      meta['title'],
      meta['og:title'],
      parsed.css('head title'),
      parsed.css('body title'),
      parsed.css('h1').first
  ]
  candidates.flatten!
  candidates.compact!
  candidates.map! { |c| (c.respond_to? :inner_text) ? c.inner_text : c }
  candidates.map! { |c| c.strip.gsub(/\s+/, ' ') }
  candidates.first
end
find_heading(heading) click to toggle source
# File lib/meta_inspector/parsers/texts.rb, line 72
def find_heading(heading)
  parsed.css(heading).map { |tag| tag.inner_text.strip.gsub(/\s+/, ' ') }.reject(&:empty?)
end
secondary_description() click to toggle source

Look for the first <p> block with 120 characters or more

# File lib/meta_inspector/parsers/texts.rb, line 117
def secondary_description
  first_long_paragraph = parsed.search('//p[string-length() >= 120]').first
  first_long_paragraph ? first_long_paragraph.text : ''
end