class CsvReader::ParserStrict
Constants
- BACKSLASH
char constants
- CR
- LF
Attributes
config[R]
Public Class Methods
build_logger()
click to toggle source
add simple logger with debug flag/switch
use Parser.debug = true # to turn on
todo/fix: use logutils instead of std logger - why? why not?
# File lib/csvreader/parser_strict.rb, line 22 def self.build_logger() l = Logger.new( STDOUT ) l.level = :info ## set to :info on start; note: is 0 (debug) by default l end
logger()
click to toggle source
# File lib/csvreader/parser_strict.rb, line 27 def self.logger() @@logger ||= build_logger; end
new( sep: ',', quote: '"', doublequote: true, escape: false, null: nil, comment: false )
click to toggle source
# File lib/csvreader/parser_strict.rb, line 34 def initialize( sep: ',', quote: '"', ## note: set to false/nil for no quote doublequote: true, escape: false, ## true/false null: nil, ## note: set to nil for no null vales / not availabe (na) comment: false ## note: comment char e.g. # or false/nil ) @config = {} ## todo/fix: change config to proper dialect class/struct - why? why not? @config[:sep] = sep @config[:quote] = quote @config[:doublequote] = doublequote @config[:escape] = escape @config[:null] = null @config[:comment] = comment end
Public Instance Methods
comment=( value )
click to toggle source
# File lib/csvreader/parser_strict.rb, line 60 def comment=( value ) @config[:comment]=value; end
doublequote=( value )
click to toggle source
# File lib/csvreader/parser_strict.rb, line 57 def doublequote=( value ) @config[:doublequote]=value; end
escape=( value )
click to toggle source
# File lib/csvreader/parser_strict.rb, line 58 def escape=( value ) @config[:escape]=value; end
logger()
click to toggle source
# File lib/csvreader/parser_strict.rb, line 28 def logger() self.class.logger; end
null=( value )
click to toggle source
# File lib/csvreader/parser_strict.rb, line 59 def null=( value ) @config[:null]=value; end
parse( data, sep: config[:sep], &block )
click to toggle source
# File lib/csvreader/parser_strict.rb, line 64 def parse( data, sep: config[:sep], &block ) ## note: data - will wrap either a String or IO object passed in data ## make sure data (string or io) is a wrapped into Buffer!!!!!! if data.is_a?( Buffer ) ### allow (re)use of Buffer if managed from "outside" input = data else input = Buffer.new( data ) end if block_given? parse_lines( input, sep: sep, &block ) else records = [] parse_lines( input, sep: sep ) do |record| records << record end records end end
quote=( value )
click to toggle source
# File lib/csvreader/parser_strict.rb, line 56 def quote=( value ) @config[:quote]=value; end
sep=( value )
click to toggle source
config convenience helpers
e.g. use like Csv.mysql.sep = ',' etc. instead of Csv.mysql.config[:sep] = ','
# File lib/csvreader/parser_strict.rb, line 55 def sep=( value ) @config[:sep]=value; end
Private Instance Methods
is_null?( value )
click to toggle source
# File lib/csvreader/parser_strict.rb, line 273 def is_null?( value ) null = @config[:null] if null.nil? false ## nothing set; return always false (not null) elsif null.is_a?( Proc ) null.call( value ) elsif null.is_a?( Array ) null.include?( value ) elsif null.is_a?( String ) value == null else ## unknown config style / setting ## todo: issue a warning or error - why? why not? false ## nothing set; return always false (not null) end end
parse_escape( input, sep: )
click to toggle source
# File lib/csvreader/parser_strict.rb, line 93 def parse_escape( input, sep: ) value = "" quote = config[:quote] if input.peek == BACKSLASH input.getc ## eat-up backslash if (c=input.peek; c==BACKSLASH || c==LF || c==CR || c==sep || (quote && c==quote) ) value << input.getc ## add escaped char (e.g. lf, cr, etc.) else ## unknown escape sequence; no special handling/escaping value << BACKSLASH end else raise ParseError.new( "found >#{input.peek} (#{input.peek.ord})< - BACKSLASH (\\) expected in parse_escape!!!!" ) end value end
parse_field( input, sep: )
click to toggle source
# File lib/csvreader/parser_strict.rb, line 150 def parse_field( input, sep: ) value = "" quote = config[:quote] escape = config[:escape] logger.debug "parse field - sep: >#{sep}< (#{sep.ord})" if logger.debug? if (c=input.peek; c==sep || c==LF || c==CR || input.eof?) ## empty unquoted field ## note: allows null = '' that is turn unquoted empty strings into null/nil ## or if using numeric into NotANumber (NaN) value = nil if is_null?( value ) ## do nothing - keep value as is :-) e.g. "". elsif quote && input.peek == quote logger.debug "start quote field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug? value << parse_quote( input, sep: sep ) logger.debug "end double_quote field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug? else logger.debug "start reg field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug? ## consume simple value ## until we hit "," or "\n" or "\r" or stray (double) quote e.g (") while (c=input.peek; !(c==sep || c==LF || c==CR || input.eof? || (quote && c==quote))) if escape && input.peek == BACKSLASH value << parse_escape( input, sep: sep ) else logger.debug " add char >#{input.peek}< (#{input.peek.ord})" if logger.debug? value << input.getc end end value = nil if is_null?( value ) ## note: null check only for UNQUOTED (not quoted/escaped) values # do nothing - keep value as is :-). logger.debug "end reg field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug? end value end
parse_lines( input, sep:, &block )
click to toggle source
# File lib/csvreader/parser_strict.rb, line 243 def parse_lines( input, sep:, &block ) ## no leading and trailing whitespaces trimmed/stripped ## no comments skipped ## no blanks skipped ## - follows strict rules of ## note: this csv format is NOT recommended; ## please, use a format with comments, leading and trailing whitespaces, etc. ## only added for checking compatibility comment = config[:comment] loop do break if input.eof? logger.debug "start record - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug? if comment && input.peek == comment ## comment line logger.debug "skipping comment - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug? skip_until_eol( input ) skip_newline( input ) else record = parse_record( input, sep: sep ) ## note: requires block - enforce? how? why? why not? block.call( record ) ## yield( record ) end end # loop end
parse_quote( input, sep: )
click to toggle source
# File lib/csvreader/parser_strict.rb, line 114 def parse_quote( input, sep: ) value = "" quote = config[:quote] # char (e.g.",') | nil doublequote = config[:doublequote] # true|false escape = config[:escape] # true|false if input.peek == quote input.getc ## eat-up double_quote loop do while (c=input.peek; !(c==quote || input.eof? || (escape && c==BACKSLASH))) value << input.getc ## eat-up everything until hitting double_quote (") or backslash (escape) end if input.eof? break elsif input.peek == BACKSLASH value << parse_escape( input, sep: sep ) else ## assume input.peek == DOUBLE_QUOTE input.getc ## eat-up double_quote if doublequote && input.peek == quote ## doubled up quote? value << input.getc ## add doube quote and continue!!!! else break end end end else raise ParseError.new( "found >#{input.peek} (#{input.peek.ord})< - DOUBLE_QUOTE (\") expected in parse_double_quote!!!!" ) end value end
parse_record( input, sep: )
click to toggle source
# File lib/csvreader/parser_strict.rb, line 192 def parse_record( input, sep: ) values = [] loop do value = parse_field( input, sep: sep ) logger.debug "value: »#{value}«" if logger.debug? values << value if input.eof? break elsif (c=input.peek; c==LF || c==CR) skip_newline( input ) ## note: singular / single newline only (NOT plural) break elsif input.peek == sep input.getc ## eat-up FS (,) else raise ParseError.new( "found >#{input.peek} (#{input.peek.ord})< - FS (,) or RS (\\n) expected!!!!" ) end end values end
skip_newline( input )
click to toggle source
# File lib/csvreader/parser_strict.rb, line 217 def skip_newline( input ) ## note: singular (strict) version return if input.eof? ## only skip CR LF or LF or CR if input.peek == CR input.getc ## eat-up input.getc if input.peek == LF elsif input.peek == LF input.getc ## eat-up else # do nothing end end
skip_until_eol( input )
click to toggle source
# File lib/csvreader/parser_strict.rb, line 233 def skip_until_eol( input ) return if input.eof? while (c=input.peek; !(c==LF || c==CR || input.eof?)) input.getc ## eat-up all until end of line end end