class Microformats::Parser

Attributes

http_body[R]
http_headers[R]

Public Class Methods

new() click to toggle source
Calls superclass method
# File lib/microformats/parser.rb, line 5
def initialize
  @http_headers = {}
  super
end

Public Instance Methods

parse(html, base: nil, headers: {}) click to toggle source
# File lib/microformats/parser.rb, line 10
def parse(html, base: nil, headers: {})
  @http_headers = {}

  @items = []
  @rels = {}
  @rel_urls = {}

  @alternates = []

  @base = base

  html = read_html(html, headers: headers)
  document = Nokogiri::HTML(html)

  found_base = parse_base(document)
  @base = found_base unless found_base.nil?

  document.traverse do |node|
    if !node.attribute('src').nil?
      absolute_url = Microformats::AbsoluteUri.new(node.attribute('src').value.to_s, base: @base).absolutize
      node.attribute('src').value = absolute_url.to_s
    elsif !node.attribute('href').nil?
      absolute_url = Microformats::AbsoluteUri.new(node.attribute('href').value.to_s, base: @base).absolutize
      node.attribute('href').value = absolute_url.to_s
    end
  end

  parse_node(document)
  parse_rels(document)

  Collection.new('items' => @items, 'rels' => @rels, 'rel-urls' => @rel_urls)
end
read_html(html, headers: {}) click to toggle source
# File lib/microformats/parser.rb, line 43
def read_html(html, headers: {})
  stripped_html = html.strip

  (URI.respond_to?(:open) ? URI : Kernel).open(stripped_html, **headers) do |response|
    @http_headers = response.meta if response.respond_to?(:meta)
    @http_body = response.read
  end

  @base = stripped_html if @base.nil?

  @http_body
rescue Errno::ENOENT, Errno::ENAMETOOLONG
  @http_body = html
end

Private Instance Methods

parse_base(document) click to toggle source
# File lib/microformats/parser.rb, line 78
def parse_base(document)
  base = document.search('base').first

  base.values.first unless base.nil?
end
parse_element(element) click to toggle source
# File lib/microformats/parser.rb, line 60
def parse_element(element)
  fmt_classes = format_classes(element)

  bc_fmt_classes = backcompat_format_classes(element).reject do |format_class|
    fmt_classes.include?(format_class)
  end

  joined_classes = fmt_classes + bc_fmt_classes

  if bc_fmt_classes.length >= 1
    @items << FormatParser.new.parse(element, base: @base, format_class_array: joined_classes, backcompat: true)
  elsif fmt_classes.length >= 1
    @items << FormatParser.new.parse(element, base: @base, format_class_array: fmt_classes)
  else
    parse_nodeset(element.children)
  end
end
parse_rels(element) click to toggle source
# File lib/microformats/parser.rb, line 84
def parse_rels(element)
  element.search('*[@rel]').each do |rel|
    next if rel.attribute('href').nil?

    url = Microformats::AbsoluteUri.new(rel.attribute('href').text, base: @base).absolutize

    rel_values = rel.attribute('rel').text.split(' ')

    rel_values.each do |rel_value|
      @rels[rel_value] = [] unless @rels.key?(rel_value)
      @rels[rel_value] << Microformats::AbsoluteUri.new(rel.attribute('href').text, base: @base).absolutize
      @rels[rel_value].uniq!
    end

    next if rel_values.empty?

    @rel_urls[url] = {} unless @rel_urls.key?(url)

    @rel_urls[url]['hreflang'] = rel.attribute('hreflang').value if @rel_urls[url]['hreflang'].nil? && !rel.attribute('hreflang').nil?
    @rel_urls[url]['media'] = rel.attribute('media').value if @rel_urls[url]['media'].nil? && !rel.attribute('media').nil?
    @rel_urls[url]['title'] = rel.attribute('title').value if @rel_urls[url]['title'].nil? && !rel.attribute('title').nil?
    @rel_urls[url]['type'] = rel.attribute('type').value if @rel_urls[url]['type'].nil? && !rel.attribute('type').nil?
    @rel_urls[url]['text'] = rel.text.strip if @rel_urls[url]['text'].nil? && !rel.text.empty?
    @rel_urls[url]['rels'] = rel_values
  end
end