class CsvMadness::Sheet

Constants

COLUMN_TYPES
FORBIDDEN_COLUMN_NAMES

Attributes

columns[RW]
index_columns[RW]
opts[RW]
record_class[RW]
records[RW]
spreadsheet_file[RW]

Public Class Methods

add_search_path( path ) click to toggle source

Paths to be searched when CsvMadness.load( “filename.csv” ) is called.

# File lib/csv_madness/sheet.rb, line 74
def self.add_search_path( path )
  @search_paths ||= []
  path = Pathname.new( path ).expand_path
  unless path.directory?
    raise "The given path does not exist"
  end
  
  @search_paths << path unless @search_paths.include?( path )
end
find_spreadsheet_in_filesystem( name ) click to toggle source

Search absolute/relative-to-current-dir before checking search paths.

# File lib/csv_madness/sheet.rb, line 98
def self.find_spreadsheet_in_filesystem( name )
  @search_paths ||= []

  expanded_path = Pathname.new( name ).expand_path
  if expanded_path.exist?
    return expanded_path
  else   # look for it in the search paths
    @search_paths.each do |p|
      file = p.join( name )
      if file.exist? && file.file?
        return p.join( name )
      end
    end
  end
  
  nil
end
from( csv_file, opts = {} ) click to toggle source
# File lib/csv_madness/sheet.rb, line 88
def self.from( csv_file, opts = {} )
  if f = find_spreadsheet_in_filesystem( csv_file )
    Sheet.new( f, opts )
  else
    raise "File not found."
  end
end
getter_name( name ) click to toggle source

Used to make getter/setter names out of the original header strings. “ hello;: world! ” => :hello_world

# File lib/csv_madness/sheet.rb, line 62
def self.getter_name( name )
  name = name.strip.gsub( /\s+/, "_" ).gsub( /(\W|_)+/, "_" ).downcase
  name = name.gsub( /_+$/, "" )
  if name.match( /^\d/ )
    name = "_#{name}"
  end
  
  name.to_sym
end
new( *args ) click to toggle source

opts:

index: ( [:id, :id2 ] )
    columns you want mapped for quick 
    lookup of individual records

columns: ( [:fname, :lname, :age] )  
    an array of symbols, corresponding
    to the csv rows they represent (first, second, third)
    and designating the method for calling the cell in 
    a given record.  If not provided, it will guess based
    on the header row.

header:   false       
    anything else, we assume the csv file has a header row
# File lib/csv_madness/sheet.rb, line 156
def initialize( *args )
  if args.last.is_a?(Hash)
    @opts = args.pop
  else
    @opts = {}
  end
  
  firstarg = args.shift
  
  case firstarg
  when NilClass
    @spreadsheet_file = nil
    @opts[:columns] ||= []
  when String, FunWith::Files::FilePath, Pathname
    @spreadsheet_file = self.class.find_spreadsheet_in_filesystem( firstarg )
  when Array
    @spreadsheet_file = nil
    @opts[:columns] ||= firstarg
  end
  
  @opts[:header] = (@opts[:header] == false ? false : true)  # true unless already explicitly set to false
  
  reload_spreadsheet
end
search_paths() click to toggle source
# File lib/csv_madness/sheet.rb, line 84
def self.search_paths
  @search_paths
end
to_csv( spreadsheet, opts = {} ) click to toggle source

opts are passed to underlying CSV (:row_sep, :encoding, :force_quotes)

# File lib/csv_madness/sheet.rb, line 117
def self.to_csv( spreadsheet, opts = {} )
  out = spreadsheet.columns.to_csv( opts )
  spreadsheet.records.inject( out ) do |output, record|
    output << record.to_csv( opts )
  end
end
write_to_file( spreadsheet, file, opts = {} ) click to toggle source
# File lib/csv_madness/sheet.rb, line 124
def self.write_to_file( spreadsheet, file, opts = {} )
  file = file.fwf_filepath.expand_path
  File.open( file, "w" ) do |f|
    f << spreadsheet.to_csv( opts )
  end
end

Public Instance Methods

<<( record ) click to toggle source
# File lib/csv_madness/sheet.rb, line 181
def <<( record )
  self.add_record( record )
end
[](offset) click to toggle source
# File lib/csv_madness/sheet.rb, line 257
def [] offset
  @records[offset]
end
add_column( column ) { |val, record| ... } click to toggle source

If no block given, adds an empty column

# File lib/csv_madness/sheet.rb, line 369
def add_column( column, &block )
  raise "Column already exists: #{column}" if @columns.include?( column )
  raise "#{column} is in the list FORBIDDEN_COLUMN_NAMES" if FORBIDDEN_COLUMN_NAMES.include?(column)
  @columns << column
  
  # add empty column to each row
  @records.map{ |r|
    r.csv_data << {column => ""} 
  }
  
  update_data_accessor_module
  
  if block_given?
    alter_column( column ) do |val, record|
      yield val, record
    end
  end
end
add_record( record ) click to toggle source
# File lib/csv_madness/sheet.rb, line 185
def add_record( record )
  case record
  when Array
    # CSV::Row.new( column names, column_entries ) (in same order as columns, natch)
    record = CSV::Row.new( self.columns, record )
  when Hash
    header = []
    fields = []
    
    for col in self.columns
      header << col
      fields << record[col]
    end
    
    record = CSV::Row.new( header, fields )
  when CSV::Row
    # do nothing
  else
    raise "sheet.add_record() doesn't take objects of type #{record.inspect}" unless record.respond_to?(:csv_data)
    record = record.csv_data
  end
  
  record = @record_class.new( record )
  @records << record
  add_to_indexes( record )
end
add_record_methods( mod = nil, &block ) click to toggle source

Note: If a block is given, the mod arg will be ignored.

# File lib/csv_madness/sheet.rb, line 442
def add_record_methods( mod = nil, &block )
  if block_given?
    mod = Module.new( &block )
  end
  @record_class.send( :include, mod )
  self
end
alter_cells( blank = :undefined, &block ) click to toggle source

if blank is defined, only the records which are non-blank in that column will actually be yielded. The rest will be set to the provided default

# File lib/csv_madness/sheet.rb, line 348
def alter_cells( blank = :undefined, &block )
  @columns.each_with_index do |column, cindex|
    alter_column( column, blank, &block )
  end
end
alter_column( column, blank = :undefined ) { |record, record| ... } click to toggle source
# File lib/csv_madness/sheet.rb, line 354
def alter_column( column, blank = :undefined, &block )
  raise "Column does not exist: #{column}" unless @columns.include?( column )
  
  if cindex = @columns.index( column )
    for record in @records
      if record.blank?(column) && blank != :undefined
        record[cindex] = blank
      else
        record[cindex] = yield( record[cindex], record )
      end
    end
  end
end
blanked() click to toggle source

give a copy of the current spreadsheet, but with no records

# File lib/csv_madness/sheet.rb, line 294
def blanked()
  sheet = self.class.new
  sheet.columns = @columns.clone
  sheet.index_columns = @index_columns.clone
  sheet.records = []
  sheet.spreadsheet_file = nil
  sheet.create_data_accessor_module
  sheet.create_record_class
  sheet.opts = @opts.clone
  sheet.reindex
  
  sheet
end
column(col) click to toggle source
# File lib/csv_madness/sheet.rb, line 332
def column col
  @records.map(&col)
end
concat_columns( col1, col2, opts = {} ) click to toggle source

By default, the

# File lib/csv_madness/sheet.rb, line 428
def concat_columns( col1, col2, opts = {} )
  opts =  {:separator => '', :out => col1}.merge( opts )
  
  column_must_exist( col1, col2 )
  self.add_column( opts[:out] ) unless self.columns.include?( opts[:out] )
  
  for record in self.records
    record[ opts[:out] ] = "#{record[col1]}#{opts[:separator]}#{record[col2]}"
  end
end
Also aliased as: concatenate
concatenate( col1, col2, opts = {} )
Alias for: concat_columns
drop_column( column ) click to toggle source
# File lib/csv_madness/sheet.rb, line 388
def drop_column( column )
  raise "Column does not exist" unless @columns.include?( column )
  
  @columns.delete( column )
  
  key = column.to_s
  
  @records.map{ |r|
    r.csv_data.delete( key )
  }
  
  update_data_accessor_module
end
fetch( index_col, key ) click to toggle source

Fetches an indexed record based on the column indexed and the keying object. If key is an array of keying objects, returns an array of records in the same order that the keying objects appear. Index column should yield a different, unique value for each record.

# File lib/csv_madness/sheet.rb, line 265
def fetch( index_col, key )
  if key.is_a?(Array)
    key.map{ |key| @indexes[index_col][key] }
  else
    @indexes[index_col][key]
  end
end
filter( ) { |record )| ... } click to toggle source

function should take an object, and return either true or false returns an array of objects that respond true when put through the meat grinder

# File lib/csv_madness/sheet.rb, line 276
def filter( &block )
  rval = []
  @records.each do |record|
    rval << record if ( yield record )
  end

  rval
end
filter!( &block ) click to toggle source

removes rows which fail the given test from the spreadsheet.

# File lib/csv_madness/sheet.rb, line 286
def filter!( &block )
  @records = self.filter( &block )
  reindex
  @records
end
length() click to toggle source
# File lib/csv_madness/sheet.rb, line 457
def length
  self.records.length
end
merge_columns( source, dest, opts = {} ) click to toggle source

If :reverse_merge is true, then the dest column is only overwritten for records where :dest is blank

# File lib/csv_madness/sheet.rb, line 414
def merge_columns( source, dest, opts = {} )
  opts = { :drop_source => true, :reverse_merge => false, :default => "" }.merge( opts )
  column_must_exist( source, dest )
  
  self.records.each do |record|
    if opts[:reverse_merge] == false || record.blank?( dest )
      record[dest] = record.blank?(source) ? opts[:default] : record[source]
    end
  end
  
  self.drop_column( source ) if opts[:drop_source]
end
multiple_columns(*args) click to toggle source

retrieve multiple columns. Returns an array of the form

[record1:col1, record1:col2…], [record2:col1, record2:col2…

]

# File lib/csv_madness/sheet.rb, line 338
def multiple_columns(*args)
  @records.inject([]){ |memo, record|
    memo << args.map{ |arg| record.send(arg) }
    memo
  }
end
nils_are_blank_strings() click to toggle source

Note: If implementation of Record[] changes, so must this.

# File lib/csv_madness/sheet.rb, line 451
def nils_are_blank_strings
  alter_cells do |value, record|
    value.nil? ? "" : value
  end
end
reload_spreadsheet( opts = @opts ) click to toggle source
# File lib/csv_madness/sheet.rb, line 236
def reload_spreadsheet( opts = @opts )
  load_csv if @spreadsheet_file
  set_initial_columns( opts[:columns] )
  create_record_class
  package
  
  set_index_columns( opts[:index] )
  reindex
end
remove_record( record ) click to toggle source

record can be the row number (integer from 0…@records.length) record can be the record itself (anonymous class)

# File lib/csv_madness/sheet.rb, line 214
def remove_record( record )
  record = @records[record] if record.is_a?(Integer)
  return if record.nil?
  
  self.remove_from_index( record )
  @records.delete( record )
end
remove_records( records = nil ) { |record| ... } click to toggle source

Here’s the deal: you hand us a block, and we’ll remove the records for which it yields true.

# File lib/csv_madness/sheet.rb, line 224
def remove_records( records = nil, &block )
  if block_given?
    for record in @records
      remove_record( record ) if yield( record ) == true
    end
  else # records should be an array
    for record in records
      self.remove_record( record )
    end
  end
end
rename_column( column, new_name ) click to toggle source
# File lib/csv_madness/sheet.rb, line 402
def rename_column( column, new_name )
  @columns[@columns.index(column)] = new_name
  rename_index_column( column, new_name ) if @index_columns.include?( column )
  update_data_accessor_module
end
set_column_type( column, type, blank = :undefined ) click to toggle source
# File lib/csv_madness/sheet.rb, line 408
def set_column_type( column, type, blank = :undefined )
  alter_column( column, blank, &COLUMN_TYPES[type] )
end
set_index_columns( index_columns ) click to toggle source
# File lib/csv_madness/sheet.rb, line 246
def set_index_columns( index_columns )
  @index_columns = case index_columns
  when NilClass
    []
  when Symbol
    [ index_columns ]
  when Array
    index_columns
  end
end
split( ) { |record| ... } click to toggle source

give a block, and get back a hash.

The hash keys are the results of the block. The hash values are copies of the spreadsheets, with only the records which caused the block to return the key.

# File lib/csv_madness/sheet.rb, line 312
def split( &block )
  sheets = Hash.new
  
  for record in @records
    result_key = yield record
    ( sheets[result_key] ||= self.blanked() ) << record
  end

  sheets
  # sheet_args = self.blanked
  # for key, record_set in records
  #   sheet = self.clone
  #   sheet.records =
  #
  #   records[key] = sheet
  # end
  #
  # records
end
to_csv( opts = {} ) click to toggle source
# File lib/csv_madness/sheet.rb, line 135
def to_csv( opts = {} )
  self.records.inject( self.columns.to_csv( opts ) ) do |output, record|
    output << record.to_csv( opts )
  end
end
write_to_file( file, opts = {} ) click to toggle source
# File lib/csv_madness/sheet.rb, line 131
def write_to_file( file, opts = {} )
  self.class.write_to_file( self, file, opts )
end

Protected Instance Methods

add_to_index( col, key, record ) click to toggle source
# File lib/csv_madness/sheet.rb, line 472
def add_to_index( col, key, record )
  (@indexes[col] ||= {})[key] = record
end
add_to_indexes( records ) click to toggle source
# File lib/csv_madness/sheet.rb, line 476
def add_to_indexes( records )
  if records.is_a?( Array )
    for record in records
      add_to_indexes( record )
    end
  else
    record = records
    for col in @index_columns
      add_to_index( col, record.send(col), record )
    end
  end
end
column_must_exist( *cols ) click to toggle source
# File lib/csv_madness/sheet.rb, line 603
def column_must_exist( *cols )
  for col in cols
    raise ArgumentError.new( "#{caller[0]}: column :#{col} does not exist.") unless self.columns.include?(col)
  end
end
columns_to_mapping() click to toggle source
# File lib/csv_madness/sheet.rb, line 586
def columns_to_mapping
  @columns.each_with_index.inject({}){ |memo, item| 
    memo[item.first] = item.last
    memo 
  }
end
compare_columns_to_headers() click to toggle source

Printout so the user can see which CSV columns are being matched to which getter/setters. Helpful for debugging dropped or duplicate entries in the column list.

# File lib/csv_madness/sheet.rb, line 564
def compare_columns_to_headers
  headers = fetch_csv_headers

  for i in 0...([@columns, headers].map(&:length).max)
    $stdout.puts "\t#{i}:  #{@columns[i]} ==> #{headers[i]}"
  end
end
create_data_accessor_module() click to toggle source
# File lib/csv_madness/sheet.rb, line 593
def create_data_accessor_module
  # columns = @columns                 # yes, this line is necessary. Module.new has its own @vars.
  
  @module = DataAccessorModule.new( columns_to_mapping )
end
create_record_class() click to toggle source

Each spreadsheet has its own anonymous record class, and each CSV row instantiates a record of this class. This is where the getters and setters come from.

# File lib/csv_madness/sheet.rb, line 510
def create_record_class
  create_data_accessor_module
  @record_class = Class.new( CsvMadness::Record )
  @record_class.spreadsheet = self
  @record_class.send( :include, @module )
end
csv_column_count() click to toggle source

How many columns? Based off the length of the headers.

# File lib/csv_madness/sheet.rb, line 582
def csv_column_count
  fetch_csv_headers.length
end
fetch_csv_headers() click to toggle source

fetch the original headers from the CSV file. If opts is false, or the CSV file isn’t loaded yet, returns an empty array.

# File lib/csv_madness/sheet.rb, line 519
def fetch_csv_headers
  if @csv && @opts[:header]
    if @csv.headers == true
      @csv.shift
      headers = @csv.headers
      @csv.rewind   # shift/rewind, else @csv.headers only returns 'true'
    else
      headers = @csv.headers
    end
    headers
  else
    []
  end
end
load_csv() click to toggle source
# File lib/csv_madness/sheet.rb, line 464
def load_csv
  
  # encoding seems to solve a specific problem with a specific spreadsheet, at an unknown cost.
  @csv = CSV.new( File.read(@spreadsheet_file).force_encoding("ISO-8859-1").encode("UTF-8"), 
                    { write_headers: true, 
                      headers: ( @opts[:header] ? :first_row : false ) } )
end
package() click to toggle source

Create objects that respond to the recipe-named methods

# File lib/csv_madness/sheet.rb, line 574
def package
  @records = []
  (@csv || []).each do |row|
    @records << @record_class.new( row )
  end
end
reindex() click to toggle source

Reindexes the record lookup tables.

# File lib/csv_madness/sheet.rb, line 496
def reindex
  @indexes = {}
  add_to_indexes( @records )
end
remove_from_index( record ) click to toggle source
# File lib/csv_madness/sheet.rb, line 489
def remove_from_index( record )
  for col in @index_columns
    @indexes[col].delete( record.send(col) )
  end
end
rename_index_column( column, new_name ) click to toggle source

shouldn’t require reindex

# File lib/csv_madness/sheet.rb, line 502
def rename_index_column( column, new_name )
  @index_columns[ @index_columns.index( column ) ] = new_name
  @indexes[new_name] = @indexes[column]
  @indexes.delete(column)
end
set_initial_columns( columns = nil ) click to toggle source

sets the list of columns. If passed nil:

if the CSV file has headers, will try to create column names based on headers.
otherwise, you end up with accessors like record.col1, record.col2, record.col3...

If the columns given doesn’t match the number of columns in the spreadsheet prints a warning and a comparison of the columns to the headers.

# File lib/csv_madness/sheet.rb, line 540
def set_initial_columns( columns = nil )
  if columns.nil?
    if @opts[:header] == false
      columns = (0...csv_column_count).map{ |i| :"col#{i}" }
    else
      columns = fetch_csv_headers.map{ |name| self.class.getter_name( name ) }
    end
  else
    unless !@csv || columns.length == csv_column_count
      $stderr.puts "Warning <#{@spreadsheet_file}>: columns array does not match the number of columns in the spreadsheet." 
      compare_columns_to_headers
    end
  end
  
  for column in columns
    raise "#{column} is in the list FORBIDDEN_COLUMN_NAMES" if FORBIDDEN_COLUMN_NAMES.include?(column)
  end
  
  @columns = columns
end
update_data_accessor_module() click to toggle source
# File lib/csv_madness/sheet.rb, line 599
def update_data_accessor_module
  @module.remap_accessors( columns_to_mapping )
end