class CsvReader::ParserTable
Attributes
config[R]
Public Class Methods
build_logger()
click to toggle source
add simple logger with debug flag/switch
use Parser.debug = true # to turn on
todo/fix: use logutils instead of std logger - why? why not?
# File lib/csvreader/parser_table.rb, line 14 def self.build_logger() l = Logger.new( STDOUT ) l.level = :info ## set to :info on start; note: is 0 (debug) by default l end
logger()
click to toggle source
# File lib/csvreader/parser_table.rb, line 19 def self.logger() @@logger ||= build_logger; end
new( space: nil )
click to toggle source
todo/check:
null values - include NA - why? why not? make null values case sensitive or add an option for case sensitive or better allow a proc as option for checking too!!!
# File lib/csvreader/parser_table.rb, line 32 def initialize( space: nil ) @config = {} ## todo/fix: change config to proper dialect class/struct - why? why not? ## e.g. treat/convert char to space e.g. _-+• etc ## Man_Utd => Man Utd ## or use it for leading and trailing spaces without quotes ## todo/check: only use for unquoted values? why? why not? @config[:space] = space end
Public Instance Methods
logger()
click to toggle source
# File lib/csvreader/parser_table.rb, line 20 def logger() self.class.logger; end
parse( str_or_readable, **kwargs, &block )
click to toggle source
# File lib/csvreader/parser_table.rb, line 51 def parse( str_or_readable, **kwargs, &block ) ## note: input: required each_line (string or io/file for example) ## note: kwargs NOT used for now (but required for "protocol/interface" by other parsers) input = str_or_readable ## assume it's a string or io/file handle if block_given? parse_lines( input, &block ) else records = [] parse_lines( input ) do |record| records << record end records end end
space=( value )
click to toggle source
config convenience helpers
# File lib/csvreader/parser_table.rb, line 45 def space=( value ) @config[:space]=value; end
Private Instance Methods
parse_lines( input, &block )
click to toggle source
# File lib/csvreader/parser_table.rb, line 75 def parse_lines( input, &block ) space = config[:space] ## note: each line only works with \n (windows) or \r\n (unix) ## will NOT work with \r (old mac, any others?) only!!!! input.each_line do |line| logger.debug "line:" if logger.debug? logger.debug line.pretty_inspect if logger.debug? ## note: chomp('') if is an empty string, ## it will remove all trailing newlines from the string. ## use line.sub(/[\n\r]*$/, '') or similar instead - why? why not? line = line.chomp( '' ) line = line.strip ## strip leading and trailing whitespaces (space/tab) too logger.debug line.pretty_inspect if logger.debug? if line.empty? ## skip blank lines logger.debug "skip blank line" if logger.debug? next end if line.start_with?( "#" ) ## skip comment lines logger.debug "skip comment line" if logger.debug? next end # note: string.split defaults to split by space (e.g. /\s+/) :-) # for just make it "explicit" with /[ \t]+/ values = line.split( /[ \t]+/ ) logger.debug values.pretty_inspect if logger.debug? if space ## e.g. translate _-+ etc. if configured to space ## Man_Utd => Man Utd etc. values = values.map {|value| value.tr(space,' ') } end ## note: requires block - enforce? how? why? why not? block.call( values ) end end