class RefParsers::LineParser
Public Class Methods
new()
click to toggle source
# File lib/ref_parsers/line_parser.rb, line 43 def initialize hash = {"@type_key" => @type_key, "@key_regex_order" => @key_regex_order, "@line_regex" => @line_regex, "@value_regex_order" => @value_regex_order, "@regex_match_length" => @regex_match_length} missing = hash.select{|k, v| v.nil?} @missing_type_key_action = :raise_exception if !@missing_type_key_action raise "#{missing.keys.join(", ")} are missing" unless missing.empty? end
Public Instance Methods
friendly_name()
click to toggle source
# File lib/ref_parsers/line_parser.rb, line 99 def friendly_name() self.class.name end
open(filename, &summary_handler)
click to toggle source
# File lib/ref_parsers/line_parser.rb, line 52 def open(filename, &summary_handler) parse(File.read(filename, mode: 'r:bom|UTF-8'), &summary_handler) end
parse(body, &summary_handler)
click to toggle source
# File lib/ref_parsers/line_parser.rb, line 56 def parse(body, &summary_handler) lines = body.split(/\n\r|\r\n|\n|\r/) entries = [] first_tag_override = nil if summary_handler summary = ParsingLogSummary.new(self.friendly_name) end next_line = skip_header(lines) begin entry_found = false entry_is_null = false detail = parse_entry(lines, next_line, first_tag_override) do |entry| if entry entries << entry end entry_found = true end if summary_handler summary.detail_found(detail) end first_tag_override = nil next_line = detail.next_line if detail.next_line if detail.is_empty break elsif detail.is_terminator_found #terminator same line as the same line as the beginning of next segment. I would have prefered to have a pass for fixing text before going through lines. But, The current version just loads the entire file into memory, that can be a potential performance problem and will be replaced by streaming the file instead and then the text fixing pass won't work full_line_text = lines[detail.current_line] if detail.parsed and detail.parsed[:value] trimed = detail.parsed[:value].strip has_first, first = try_get_first_line(trimed) if has_first and first first_tag_override = first end end end end while entry_found if summary_handler summary.report_entries(entries) summary_handler.call(summary) end entries end
Protected Instance Methods
hash_entry(fields)
click to toggle source
# File lib/ref_parsers/line_parser.rb, line 288 def hash_entry(fields) entry = {'type' => fields.first[:value]} fields.drop(1).each do |field| #skip type field if entry[field[:key]].nil? # empty value entry[field[:key]] = field[:value] elsif entry[field[:key]].instance_of? Array # array of values entry[field[:key]] << field[:value] else # value entry[field[:key]] = [entry[field[:key]], field[:value]] end end entry end
is_valid_line(line)
click to toggle source
# File lib/ref_parsers/line_parser.rb, line 265 def is_valid_line(line) ignores = [] ignores << /^\s*$/ ignores << /^\d+/ return false if line.nil? || ignores.any?{|e| line.match(e)} m = line.match(@line_regex) return !m.nil? end
parse_entry(lines, next_line, first_line_override = nil) { |hash_entry(fields)| ... }
click to toggle source
# File lib/ref_parsers/line_parser.rb, line 144 def parse_entry(lines, next_line, first_line_override = nil) begin detail = ParsingInfoDetail.new() detail.current_line = next_line action = nil if !first_line_override begin if detail.current_line >= lines.length detail.is_eof_found = true return detail end line_text = lines[detail.current_line] begin if !is_valid_line(line_text) first = nil else first, action = parse_first_line(line_text) end rescue => ex raise RefParsers::LineParsingException.new("Error parsing first line", detail.current_line, line_text, ex) end detail.current_line = detail.current_line + 1 end while first.nil? else first = first_line_override end detail.action = action if action.nil? detail.is_type_found = true detail.first_line = detail.current_line end if first[:footer] detail.is_empty = true return detail end fields = [first] last_parsed = {} begin parsed = parse_line(lines[detail.current_line]) if parsed detail.parsed = parsed if parsed[:footer] if fields and fields.length > 1 if action != :ignore_entry yield hash_entry(fields) else yield nil end else detail.is_empty = true end return detail end stop = false if parsed[:key] == "-1" parsed[:key] = last_parsed[:key] parsed[:value] = "#{last_parsed[:value]}#{NEWLINE_MERGER}#{parsed[:value]}" fields.delete_at fields.length - 1 elsif @terminator_key && parsed[:key] == @terminator_key detail.is_terminator_found = true if action != :ignore_entry yield hash_entry(fields) else yield nil end return detail end last_parsed = parsed fields << parsed elsif @terminator_key.nil? || detail.next_line >= lines.length stop = true detail.is_eof_found = true if action != :ignore_entry yield hash_entry(fields) else yield nil end return detail else stop = false end detail.current_line += 1 end until stop end end
parse_first_line(line)
click to toggle source
# File lib/ref_parsers/line_parser.rb, line 240 def parse_first_line(line) action = nil has_first, first = try_get_first_line(line) return first, nil if first.nil? || first[:footer] if !has_first fail_first_line = lambda { raise "First line should start with #{@type_key}" } if @terminator_key.nil? fail_first_line.call() elsif @missing_type_key_action == :ignore_entry action = :ignore_entry elsif @missing_type_key_action == :import_entry action = :import_entry else fail_first_line.call() end end # lets not check for semantics here, leave it for the library client # raise "#{line}: Reference type should be one of #{@types.inspect}" unless @types.include? first[:value] return first, action, has_first end
parse_line(line, *ignores)
click to toggle source
# File lib/ref_parsers/line_parser.rb, line 274 def parse_line(line, *ignores) ignores << /^\s*$/ return nil if line.nil? || ignores.any?{|e| line.match(e)} footer = detect_footer(line) return footer if footer m = line.match(@line_regex) if m && m.length == @regex_match_length value = m[@value_regex_order].strip rescue nil {key: m[@key_regex_order], value: value} else {key: "-1", value: line} end end
skip_header(lines)
click to toggle source
# File lib/ref_parsers/line_parser.rb, line 128 def skip_header(lines) return 0 unless @header_regexes next_line = 0 @header_regexes.each do |regex| line = lines[next_line] raise "Header line #{next_line} missing" unless line.match(regex) next_line += 1 end next_line end
try_get_first_line(line)
click to toggle source
# File lib/ref_parsers/line_parser.rb, line 235 def try_get_first_line(line) first = parse_line(line, /^\d+/) # skip leading entry numbers return (first and first[:key] == @type_key), first end