class Microformats::FormatParser

Public Instance Methods

check_for_h_properties() click to toggle source
# File lib/microformats/format_parser.rb, line 118
def check_for_h_properties
  @properties.each do |_, prop|
    prop.each do |prop_entry|
      next unless prop_entry.respond_to?(:key) && prop_entry.key?('type')
      next if prop_entry['type'].nil?

      prop_entry['type'].each do |type|
        @found_prefixes.add(prefix_from_class(type).to_sym)
      end
    end
  end
end
imply_dates() click to toggle source

imply date for dt-end if dt-start is defined with a date ###

# File lib/microformats/format_parser.rb, line 323
def imply_dates
  return unless !@properties['end'].nil? && !@properties['start'].nil?

  start_date = nil

  @properties['start'].each do |start_val|
    if start_val =~ /^(\d{4}-[01]\d-[0-3]\d)/
      start_date = Regexp.last_match(1) if start_date.nil?
    elsif start_val =~ /^(\d{4}-[0-3]\d\d)/
      start_date = Regexp.last_match(1) if start_date.nil?
    end
  end

  unless start_date.nil?
    @properties['end'].map! do |end_val|
      if end_val.match?(/^\d{4}-[01]\d-[0-3]\d/)
        end_val
      elsif end_val.match?(/^\d{4}-[0-3]\d\d/)
        end_val
      else
        start_date + ' ' + end_val
      end
    end
  end
end
imply_name(element) click to toggle source
# File lib/microformats/format_parser.rb, line 131
def imply_name(element)
  return unless @properties['name'].nil? && !@found_prefixes.include?(:e) && !@found_prefixes.include?(:p) && !@found_prefixes.include?(:h) && @children.empty?

  if element.name == 'img' && !element.attribute('alt').nil?
    @properties['name'] = [element.attribute('alt').value.strip]
  elsif element.name == 'area' && !element.attribute('alt').nil?
    @properties['name'] = [element.attribute('alt').value.strip]
  elsif element.name == 'abbr' && !element.attribute('title').nil?
    @properties['name'] = [element.attribute('title').value.strip]
  else
    child_nodes = element.children.reject { |n| n.is_a?(Nokogiri::XML::Text) }

    if child_nodes.count == 1 && child_nodes.first.is_a?(Nokogiri::XML::Element) && format_classes(child_nodes.first).empty?
      node = child_nodes.first

      # else if .h-x>img:only-child[alt]:not([alt=""]):not[.h-*] then use that img's alt for name
      if node.name == 'img' && !node.attribute('alt').nil? && !node.attribute('alt').value.empty?
        @properties['name'] = [node.attribute('alt').value.strip]
      # else if .h-x>area:only-child[alt]:not([alt=""]):not[.h-*] then use that area's alt for name
      elsif node.name == 'area' && !node.attribute('alt').nil? && !node.attribute('alt').value.empty?
        @properties['name'] = [node.attribute('alt').value.strip]
      # else if .h-x>abbr:only-child[title]:not([title=""]):not[.h-*] then use that abbr title for name
      elsif node.name == 'abbr' && !node.attribute('title').nil? && !node.attribute('title').value.empty?
        @properties['name'] = [node.attribute('title').value.strip]
      else
        child_nodes = node.children.reject { |n| n.is_a?(Nokogiri::XML::Text) }

        if child_nodes.count == 1 && child_nodes.first.is_a?(Nokogiri::XML::Element) && format_classes(child_nodes.first).empty?
          node = child_nodes.first

          # else if .h-x>:only-child:not[.h-*]>img:only-child[alt]:not([alt=""]):not[.h-*] then use that img's alt for name
          if node.name == 'img' && !node.attribute('alt').nil? && !node.attribute('alt').value.empty?
            @properties['name'] = [node.attribute('alt').value.strip]
          # else if .h-x>:only-child:not[.h-*]>area:only-child[alt]:not([alt=""]):not[.h-*] then use that area's alt for name
          elsif node.name == 'area' && !node.attribute('alt').nil? && !node.attribute('alt').value.empty?
            @properties['name'] = [node.attribute('alt').value.strip]
          # else if .h-x>:only-child:not[.h-*]>abbr:only-child[title]:not([title=""]):not[.h-*] use that abbr's title for name
          elsif node.name == 'abbr' && !node.attribute('title').nil? && !node.attribute('title').value.empty?
            @properties['name'] = [node.attribute('title').value.strip]
          else
            @properties['name'] = [render_text(element)]
          end
        else
          @properties['name'] = [render_text(element)]
        end
      end
    else
      @properties['name'] = [render_text(element)]
    end
  end
end
imply_photo(element) click to toggle source
# File lib/microformats/format_parser.rb, line 183
def imply_photo(element)
  return unless @properties['photo'].nil?

  if element.name == 'img' && !element.attribute('src').nil?
    @properties['photo'] = [element.attribute('src').value]
  elsif element.name == 'object' && !element.attribute('data').nil?
    @properties['photo'] = [element.attribute('data').value]
  else
    # else if .h-x>img[src]:only-of-type:not[.h-*] then use that img src for photo
    child_img_tags_with_src = element.children.select do |child|
      child.is_a?(Nokogiri::XML::Element) && child.name == 'img' && !child.attribute('src').nil?
    end

    if child_img_tags_with_src.count == 1
      node = child_img_tags_with_src.first

      @properties['photo'] = [node.attribute('src').value.strip] if format_classes(node).empty?
    end

    if @properties['photo'].nil?
      # else if .h-x>object[data]:only-of-type:not[.h-*] then use that object's data for photo
      child_object_tags_with_data = element.children.select do |child|
        child.is_a?(Nokogiri::XML::Element) && child.name == 'object' && !child.attribute('data').nil?
      end

      if child_object_tags_with_data.count == 1
        node = child_object_tags_with_data.first

        if format_classes(node).empty?
          @properties['photo'] = [node.attribute('data').value.strip]
        end
      end
    end

    child_elements = element.children.reject { |child| child.is_a?(Nokogiri::XML::Text) }

    if @properties['photo'].nil? && child_elements.count == 1 && format_classes(child_elements.first).empty?
      # else if .h-x>:only-child:not[.h-*]>img[src]:only-of-type:not[.h-*], then use that img's src for photo
      child_img_tags_with_src = child_elements.first.children.select do |child|
        child.is_a?(Nokogiri::XML::Element) && child.name == 'img' && !child.attribute('src').nil?
      end

      if child_img_tags_with_src.count == 1
        node = child_img_tags_with_src.first

        if format_classes(node).empty?
          @properties['photo'] = [node.attribute('src').value.strip]
        end
      end

      if @properties['photo'].nil?
        # else if .h-x>:only-child:not[.h-*]>object[data]:only-of-type:not[.h-*], then use that object's data for photo
        child_object_tags_with_data = child_elements.first.children.select do |child|
          child.is_a?(Nokogiri::XML::Element) && child.name == 'object' && !child.attribute('data').nil?
        end

        if child_object_tags_with_data.count == 1
          node = child_object_tags_with_data.first

          if format_classes(node).empty?
            @properties['photo'] = [node.attribute('data').value.strip]
          end
        end
      end
    end
  end

  @properties['photo'] = [Microformats::AbsoluteUri.new(@properties['photo'].first, base: @base).absolutize] unless @properties['photo'].nil?
end
imply_properties(element) click to toggle source
# File lib/microformats/format_parser.rb, line 349
def imply_properties(element)
  ##### Implied Properties ######
  # NOTE: much of this code may be simplified by using element.css, not sure yet, but coding to have passing tests first
  # can optimize this later
  unless @mode_backcompat
    imply_name(element)
    imply_photo(element)
    imply_url(element)
  end
  ##### END Implied Properties when not in backcompat mode######

  imply_dates
end
imply_url(element) click to toggle source
# File lib/microformats/format_parser.rb, line 253
def imply_url(element)
  return unless @properties['url'].nil? && !@found_prefixes.include?(:u) && !@found_prefixes.include?(:h) && @children.empty?

  if element.name == 'a' && !element.attribute('href').nil?
    @properties['url'] = [element.attribute('href').value]
  elsif element.name == 'area' && !element.attribute('href').nil?
    @properties['url'] = [element.attribute('href').value]
  else
    # else if .h-x>a[href]:only-of-type:not[.h-*], then use that [href] for url
    child_a_tags_with_href = element.children.select do |child|
      child.is_a?(Nokogiri::XML::Element) && child.name == 'a' && !child.attribute('href').nil?
    end

    if child_a_tags_with_href.count == 1
      node = child_a_tags_with_href.first
      @properties['url'] = [node.attribute('href').value.strip] if format_classes(node).empty?
    end

    if @properties['url'].nil?
      # else if .h-x>area[href]:only-of-type:not[.h-*], then use that [href] for url
      child_area_tags_with_href = element.children.select do |child|
        child.is_a?(Nokogiri::XML::Element) && child.name == 'area' && !child.attribute('href').nil?
      end

      if child_area_tags_with_href.count == 1
        node = child_area_tags_with_href.first
        @properties['url'] = [node.attribute('href').value.strip] if format_classes(node).empty?
      end
    end

    child_elements = element.children.reject { |child| child.is_a?(Nokogiri::XML::Text) }

    if @properties['url'].nil? && child_elements.count == 1 && format_classes(child_elements.first).empty?
      child_element = child_elements.first
      # else if .h-x>:only-child:not[.h-*]>a[href]:only-of-type:not[.h-*], then use that [href] for url
      child_a_tags_with_href = child_element.children.select do |child|
        child.is_a?(Nokogiri::XML::Element) && child.name == 'a' && !child.attribute('href').nil?
      end

      if child_a_tags_with_href.count == 1
        node = child_a_tags_with_href.first
        @properties['url'] = [node.attribute('href').value.strip] if format_classes(node).empty?
      end

      if @properties['url'].nil?
        # else if .h-x>:only-child:not[.h-*]>area[href]:only-of-type:not[.h-*], then use that [href] for url
        child_area_tags_with_href = child_element.children.select do |child|
          child.is_a?(Nokogiri::XML::Element) && child.name == 'area' && !child.attribute('href').nil?
        end

        if child_area_tags_with_href.count == 1
          node = child_area_tags_with_href.first
          @properties['url'] = [node.attribute('href').value.strip] if format_classes(node).empty?
        end
      end
    end
  end

  @properties['url'] = [Microformats::AbsoluteUri.new(@properties['url'].first, base: @base).absolutize] unless @properties['url'].nil?
end
parse(element, base: nil, element_type: nil, format_class_array: [], backcompat: false) click to toggle source
# File lib/microformats/format_parser.rb, line 3
def parse(element, base: nil, element_type: nil, format_class_array: [], backcompat: false)
  @base = base

  @mode_backcompat = backcompat

  @properties = {}
  @children = []
  @found_prefixes = Set.new

  @format_property_type = element_type
  @value = nil

  @mode_backcompat = backcompat

  @fmt_classes = format_class_array

  parse_node(element.children)

  # check properties for any missing h-* so we know not to imply anything
  check_for_h_properties

  imply_properties(element)

  if @value.nil? || @value.empty?
    if element_type == 'p' && !@properties['name'].nil? && !@properties['name'].empty?
      @value = @properties['name'].first
    elsif element_type == 'u' && !@properties['url'].nil? && !@properties['url'].empty?
      @value = @properties['url'].first
    elsif !element_type.nil?
      @value = PropertyParser.new.parse(element, base: @base, element_type: element_type, backcompat: @mode_backcompat)
    end
  end

  h_object = {}

  h_object['value'] = @value unless @value.nil?
  h_object['type'] = format_class_array
  h_object['properties'] = @properties

  h_object['children'] = @children unless @children.empty?

  if @format_property_type == 'e'
    h_object['value'] = render_text(element)
    h_object['html'] = element.inner_html.strip
  end

  # TODO: fall back to p- dt- u- parsing if value still not set?
  # not sure that is correct by the spec actually
  h_object
end
parse_element(element) click to toggle source
# File lib/microformats/format_parser.rb, line 54
def parse_element(element)
  prop_classes = property_classes(element)
  prop_classes = backcompat_property_classes(element) if @mode_backcompat

  bc_classes_found = false
  fmt_classes = format_classes(element)

  prop_classes.each do |p_class|
    element_type = prefix_from_class(p_class)
    @found_prefixes.add(element_type.to_sym)
  end
  fmt_classes.each do |p_class|
    element_type = prefix_from_class(p_class)
    @found_prefixes.add(element_type.to_sym)
  end

  if fmt_classes.empty?
    fmt_classes = backcompat_format_classes(element)
    bc_classes_found = true unless fmt_classes.empty?
  end

  if prop_classes.length >= 1
    if fmt_classes.length >= 1
      prop_classes.each do |element_class|
        element_type = prefix_from_class(element_class)
        property_name = property_from_class(element_class)

        parsed_format = FormatParser.new.parse(element, base: @base, element_type: element_type, format_class_array: fmt_classes, backcompat: bc_classes_found)

        if @value.nil?
          if @format_property_type == 'p' && property_name == 'name'
            @value = parsed_format['value']
          # elsif @format_property_type == 'dt' and property_name == '???'
          #   @value = parsed_format['value']
          elsif @format_property_type == 'u' && property_name == 'url'
            @value = parsed_format['value']
          end
        end

        @properties[property_name] = [] if @properties[property_name].nil?
        @properties[property_name] << parsed_format
      end
    else
      prop_classes.each do |element_class|
        element_type = prefix_from_class(element_class)
        property_name = property_from_class(element_class)

        parsed_property = PropertyParser.new.parse(element, base: @base, element_type: element_type, backcompat: @mode_backcompat)

        unless parsed_property.nil?
          @properties[property_name] = [] if @properties[property_name].nil?
          @properties[property_name] << parsed_property
        end
      end

      parse_nodeset(element.children)
    end
  elsif fmt_classes.length >= 1
    @children << FormatParser.new.parse(element, base: @base, format_class_array: fmt_classes, backcompat: bc_classes_found)
  else
    parse_nodeset(element.children)
  end
end
prefix_from_class(class_name) click to toggle source
# File lib/microformats/format_parser.rb, line 314
def prefix_from_class(class_name)
  class_name.downcase.split('-')[0]
end
property_from_class(class_name) click to toggle source
# File lib/microformats/format_parser.rb, line 318
def property_from_class(class_name)
  class_name.downcase.split('-')[1..-1].join('-')
end