class RefParsers::LineParser

Public Class Methods

new() click to toggle source
# File lib/ref_parsers/line_parser.rb, line 43
def initialize
  hash = {"@type_key" => @type_key, "@key_regex_order" => @key_regex_order, "@line_regex" => @line_regex,
          "@value_regex_order" => @value_regex_order, "@regex_match_length" => @regex_match_length}

  missing = hash.select{|k, v| v.nil?}
  @missing_type_key_action = :raise_exception if !@missing_type_key_action
  raise "#{missing.keys.join(", ")} are missing" unless missing.empty?    
end

Public Instance Methods

friendly_name() click to toggle source
# File lib/ref_parsers/line_parser.rb, line 99
def friendly_name()
  self.class.name
end
open(filename, &summary_handler) click to toggle source
# File lib/ref_parsers/line_parser.rb, line 52
def open(filename, &summary_handler)
  parse(File.read(filename, mode: 'r:bom|UTF-8'), &summary_handler)
end
parse(body, &summary_handler) click to toggle source
# File lib/ref_parsers/line_parser.rb, line 56
def parse(body, &summary_handler)
  lines = body.split(/\n\r|\r\n|\n|\r/)
  entries = []
  first_tag_override = nil
  if summary_handler
    summary = ParsingLogSummary.new(self.friendly_name)
  end
  next_line = skip_header(lines)
  begin
    entry_found = false
    entry_is_null = false
    detail = parse_entry(lines, next_line, first_tag_override) do |entry|
      if entry
        entries << entry
      end
      entry_found = true
    end
    if summary_handler
      summary.detail_found(detail)
    end
    first_tag_override = nil
    next_line = detail.next_line if detail.next_line

    if detail.is_empty 
      break
    elsif detail.is_terminator_found #terminator same line as the same line as the beginning of next segment. I would have prefered to have a pass for fixing text before going through lines. But, The current version just loads the entire file into memory, that can be a potential performance problem and will be replaced by streaming the file instead and then the text fixing pass won't work
      full_line_text = lines[detail.current_line]
      if detail.parsed and detail.parsed[:value]
        trimed = detail.parsed[:value].strip
        has_first, first = try_get_first_line(trimed)
        if has_first and first
          first_tag_override = first
        end
      end
    end
  end while entry_found
  if summary_handler
    summary.report_entries(entries)
    summary_handler.call(summary)
  end
  entries
end

Protected Instance Methods

hash_entry(fields) click to toggle source
# File lib/ref_parsers/line_parser.rb, line 288
def hash_entry(fields)
  entry = {'type' => fields.first[:value]}
  fields.drop(1).each do |field| #skip type field
    if entry[field[:key]].nil? # empty value
      entry[field[:key]] = field[:value]
    elsif entry[field[:key]].instance_of? Array # array of values
      entry[field[:key]] << field[:value]
    else # value
      entry[field[:key]] = [entry[field[:key]], field[:value]]
    end
  end
  entry
end
is_valid_line(line) click to toggle source
# File lib/ref_parsers/line_parser.rb, line 265
def is_valid_line(line)
  ignores = []
  ignores << /^\s*$/
  ignores << /^\d+/
  return false if line.nil? || ignores.any?{|e| line.match(e)}
   m = line.match(@line_regex)
   return !m.nil?
end
parse_entry(lines, next_line, first_line_override = nil) { |hash_entry(fields)| ... } click to toggle source
# File lib/ref_parsers/line_parser.rb, line 144
def parse_entry(lines, next_line, first_line_override = nil)
  begin
    detail = ParsingInfoDetail.new()
    detail.current_line = next_line
    action = nil
    if !first_line_override
      begin
        if detail.current_line >= lines.length
          detail.is_eof_found = true
          return detail 
        end
        line_text = lines[detail.current_line]
        begin
          if !is_valid_line(line_text)
            first = nil
          else
            first, action = parse_first_line(line_text)
          end
        rescue => ex
          raise RefParsers::LineParsingException.new("Error parsing first line", detail.current_line, line_text, ex)
        end
        detail.current_line = detail.current_line + 1
      end while first.nil?
    else
      first = first_line_override
    end

    detail.action = action

    if action.nil?
      detail.is_type_found = true
      detail.first_line = detail.current_line
    end

    if first[:footer]
      detail.is_empty = true 
      return detail
    end

    fields = [first]

    last_parsed = {}
    begin
      parsed = parse_line(lines[detail.current_line])
      if parsed
        detail.parsed = parsed
        if parsed[:footer]
          if fields and fields.length > 1
            if action != :ignore_entry
              yield hash_entry(fields)
            else
              yield nil
            end
          else
            detail.is_empty = true
          end
          return detail
        end
        stop = false
        if parsed[:key] == "-1"
          parsed[:key] = last_parsed[:key]
          parsed[:value] = "#{last_parsed[:value]}#{NEWLINE_MERGER}#{parsed[:value]}"
          fields.delete_at fields.length - 1
        elsif @terminator_key && parsed[:key] == @terminator_key
          detail.is_terminator_found = true
          if action != :ignore_entry
            yield hash_entry(fields)
          else
            yield nil
          end
          return detail
        end
        last_parsed = parsed
        fields << parsed
      elsif @terminator_key.nil? || detail.next_line >= lines.length
        stop = true
        detail.is_eof_found = true
        if action != :ignore_entry
          yield hash_entry(fields)
        else
          yield nil
        end
        return detail
      else
        stop = false
      end
      detail.current_line += 1
    end until stop
  end
end
parse_first_line(line) click to toggle source
# File lib/ref_parsers/line_parser.rb, line 240
def parse_first_line(line)
  action = nil
  has_first, first = try_get_first_line(line) 

  return first, nil  if first.nil? || first[:footer] 
  
  if !has_first
    fail_first_line = lambda {
        raise "First line should start with #{@type_key}" 
    }
    if @terminator_key.nil?
      fail_first_line.call()
    elsif @missing_type_key_action == :ignore_entry
      action = :ignore_entry
    elsif @missing_type_key_action == :import_entry
      action = :import_entry
    else
      fail_first_line.call()
    end
  end
  # lets not check for semantics here, leave it for the library client
  # raise "#{line}: Reference type should be one of #{@types.inspect}" unless @types.include? first[:value]
  return first, action, has_first
end
parse_line(line, *ignores) click to toggle source
# File lib/ref_parsers/line_parser.rb, line 274
def parse_line(line, *ignores)
  ignores << /^\s*$/
  return nil if line.nil? || ignores.any?{|e| line.match(e)}
  footer = detect_footer(line)
  return footer if footer
  m = line.match(@line_regex)
  if m && m.length == @regex_match_length
    value = m[@value_regex_order].strip rescue nil
    {key: m[@key_regex_order], value: value}
  else
    {key: "-1", value: line}
  end
end
skip_header(lines) click to toggle source
# File lib/ref_parsers/line_parser.rb, line 128
def skip_header(lines)
  return 0 unless @header_regexes
  next_line = 0
  @header_regexes.each do |regex|
    line = lines[next_line]
    raise "Header line #{next_line} missing" unless line.match(regex)
    next_line += 1
  end
  next_line
end
try_get_first_line(line) click to toggle source
# File lib/ref_parsers/line_parser.rb, line 235
def try_get_first_line(line)
  first = parse_line(line, /^\d+/) # skip leading entry numbers
  return  (first and first[:key] == @type_key), first
end