class Roo::Base

Base class for all other types of spreadsheets

Constants

MAX_ROW_COL
MIN_ROW_COL
TEMP_PREFIX

Attributes

header_line[RW]

sets the line with attribute names (default: 1)

headers[R]

Public Class Methods

new(filename, options = {}, _file_warning = :error, _tmpdir = nil) click to toggle source
# File lib/roo/base.rb, line 21
def initialize(filename, options = {}, _file_warning = :error, _tmpdir = nil)
  @filename = filename
  @options = options

  @cell = {}
  @cell_type = {}
  @cells_read = {}

  @first_row = {}
  @last_row = {}
  @first_column = {}
  @last_column = {}

  @header_line = 1
end

Public Instance Methods

cell_type_by_value(value) click to toggle source
# File lib/roo/base.rb, line 215
def cell_type_by_value(value)
  case value
  when Fixnum then :float
  when String, Float then :string
  else
    raise ArgumentError, "Type for #{value} not set"
  end
end
clean_sheet_if_need(options) click to toggle source
# File lib/roo/base.rb, line 325
def clean_sheet_if_need(options)
  return unless options[:clean]
  options.delete(:clean)
  @cleaned ||= {}
  clean_sheet(default_sheet) unless @cleaned[default_sheet]
end
collect_last_row_col_for_sheet(sheet) click to toggle source

Collect first/last row/column from sheet

# File lib/roo/base.rb, line 75
def collect_last_row_col_for_sheet(sheet)
  first_row = first_column = MAX_ROW_COL
  last_row = last_column = MIN_ROW_COL
  @cell[sheet].each_pair do|key, value|
    next unless value
    first_row = [first_row, key.first.to_i].min
    last_row = [last_row, key.first.to_i].max
    first_column = [first_column, key.last.to_i].min
    last_column = [last_column, key.last.to_i].max
  end if @cell[sheet]
  {first_row: first_row, first_column: first_column, last_row: last_row, last_column: last_column}
end
column(column_number, sheet = default_sheet) click to toggle source

returns all values in this column as an array column numbers are 1,2,3,… like in the spreadsheet

# File lib/roo/base.rb, line 195
def column(column_number, sheet = default_sheet)
  if column_number.is_a?(::String)
    column_number = ::Roo::Utils.letter_to_number(column_number)
  end
  read_cells(sheet)
  first_row(sheet).upto(last_row(sheet)).map do |row|
    cell(row, column_number, sheet)
  end
end
default_sheet() click to toggle source
# File lib/roo/base.rb, line 37
def default_sheet
  @default_sheet ||= sheets.first
end
default_sheet=(sheet) click to toggle source

sets the working sheet in the document ‘sheet’ can be a number (1 = first sheet) or the name of a sheet.

# File lib/roo/base.rb, line 43
def default_sheet=(sheet)
  validate_sheet!(sheet)
  @default_sheet = sheet
  @first_row[sheet] = @last_row[sheet] = @first_column[sheet] = @last_column[sheet] = nil
  @cells_read[sheet] = false
end
each(options = {}) { |row(line)| ... } click to toggle source

you can also pass in a :clean => true option to strip the sheet of odd unicode characters and white spaces around columns

# File lib/roo/base.rb, line 366
def each(options = {})
  if options.empty?
    1.upto(last_row) do |line|
      yield row(line)
    end
  else
    clean_sheet_if_need(options)
    search_or_set_header(options)
    headers = @headers ||
              Hash[(first_column..last_column).map do |col|
                [cell(@header_line, col), col]
              end]

    @header_line.upto(last_row) do |line|
      yield(Hash[headers.map { |k, v| [k, cell(line, v)] }])
    end
  end
end
each_with_pagename() { |sheet(s, true)| ... } click to toggle source

iterate through all worksheets of a document

# File lib/roo/base.rb, line 318
def each_with_pagename
  sheets.each do |s|
    yield sheet(s, true)
  end
end
empty?(row, col, sheet = default_sheet) click to toggle source

true if cell is empty

# File lib/roo/base.rb, line 232
def empty?(row, col, sheet = default_sheet)
  read_cells(sheet)
  row, col = normalize(row, col)
  contents = cell(row, col, sheet)
  !contents || (celltype(row, col, sheet) == :string && contents.empty?) \
    || (row < first_row(sheet) || row > last_row(sheet) || col < first_column(sheet) || col > last_column(sheet))
end
find(*args) click to toggle source

find a row either by row number or a condition Caution: this works only within the default sheet -> set default_sheet before you call this method (experimental. see examples in the test_roo.rb file)

# File lib/roo/base.rb, line 171
def find(*args) # :nodoc
  options = (args.last.is_a?(Hash) ? args.pop : {})

  case args[0]
  when Fixnum
    find_by_row(args[0])
  when :all
    find_by_conditions(options)
  else
    fail ArgumentError, "unexpected arg #{args[0].inspect}, pass a row index or :all"
  end
end
first_column_as_letter(sheet = default_sheet) click to toggle source

first non-empty column as a letter

# File lib/roo/base.rb, line 51
def first_column_as_letter(sheet = default_sheet)
  ::Roo::Utils.number_to_letter(first_column(sheet))
end
first_last_row_col_for_sheet(sheet) click to toggle source

Set first/last row/column for sheet

# File lib/roo/base.rb, line 61
def first_last_row_col_for_sheet(sheet)
  @first_last_row_cols ||= {}
  @first_last_row_cols[sheet] ||= begin
    result = collect_last_row_col_for_sheet(sheet)
    {
      first_row: result[:first_row] == MAX_ROW_COL ? nil : result[:first_row],
      first_column: result[:first_column] == MAX_ROW_COL ? nil : result[:first_column],
      last_row: result[:last_row] == MIN_ROW_COL ? nil : result[:last_row],
      last_column: result[:last_column] == MIN_ROW_COL ? nil : result[:last_column]
    }
  end
end
info() click to toggle source

returns information of the spreadsheet document and all sheets within this document.

# File lib/roo/base.rb, line 242
def info
  without_changing_default_sheet do
    result = "File: #{File.basename(@filename)}\n"\
      "Number of sheets: #{sheets.size}\n"\
      "Sheets: #{sheets.join(', ')}\n"
    n = 1
    sheets.each do|sheet|
      self.default_sheet = sheet
      result << 'Sheet ' + n.to_s + ":\n"
      unless first_row
        result << '  - empty -'
      else
        result << "  First row: #{first_row}\n"
        result << "  Last row: #{last_row}\n"
        result << "  First column: #{::Roo::Utils.number_to_letter(first_column)}\n"
        result << "  Last column: #{::Roo::Utils.number_to_letter(last_column)}"
      end
      result << "\n" if sheet != sheets.last
      n += 1
    end
    result
  end
end
inspect() click to toggle source

call to_s method defined on subclasses

# File lib/roo/base.rb, line 164
def inspect
  to_s
end
last_column_as_letter(sheet = default_sheet) click to toggle source

last non-empty column as a letter

# File lib/roo/base.rb, line 56
def last_column_as_letter(sheet = default_sheet)
  ::Roo::Utils.number_to_letter(last_column(sheet))
end
method_missing(m, *args) click to toggle source

when a method like spreadsheet.a42 is called convert it to a call of spreadsheet.cell(‘a’,42)

Calls superclass method
# File lib/roo/base.rb, line 294
def method_missing(m, *args)
  # #aa42 => #cell('aa',42)
  # #aa42('Sheet1')  => #cell('aa',42,'Sheet1')
  if m =~ /^([a-z]+)(\d)$/
    col = ::Roo::Utils.letter_to_number(Regexp.last_match[1])
    row = Regexp.last_match[2].to_i
    if args.empty?
      cell(row, col)
    else
      cell(row, col, args.first)
    end
  else
    super
  end
end
parse(options = {}) { |row| ... } click to toggle source
# File lib/roo/base.rb, line 385
def parse(options = {})
  ary = []
  each(options) do |row|
    yield(row) if block_given?
    ary << row
  end
  ary
end
reload() click to toggle source

reopens and read a spreadsheet document

# File lib/roo/base.rb, line 225
def reload
  ds = default_sheet
  reinitialize
  self.default_sheet = ds
end
row(row_number, sheet = default_sheet) click to toggle source

returns all values in this row as an array row numbers are 1,2,3,… like in the spreadsheet

# File lib/roo/base.rb, line 186
def row(row_number, sheet = default_sheet)
  read_cells(sheet)
  first_column(sheet).upto(last_column(sheet)).map do |col|
    cell(row_number, col, sheet)
  end
end
row_with(query, return_headers = false) click to toggle source
# File lib/roo/base.rb, line 394
def row_with(query, return_headers = false)
  line_no = 0
  each do |row|
    line_no += 1
    headers = query.map { |q| row.grep(q)[0] }.compact

    if headers.length == query.length
      @header_line = line_no
      return return_headers ? headers : line_no
    elsif line_no > 100
      fail "Couldn't find header row."
    end
  end
  fail "Couldn't find header row."
end
search_or_set_header(options) click to toggle source
# File lib/roo/base.rb, line 332
def search_or_set_header(options)
  if options[:header_search]
    @headers = nil
    @header_line = row_with(options[:header_search])
  elsif [:first_row, true].include?(options[:headers])
    @headers = []
    row(first_row).each_with_index { |x, i| @headers << [x, i + 1] }
  else
    set_headers(options)
  end
end
sheet(index, name = false) click to toggle source

access different worksheets by calling spreadsheet.sheet(1) or spreadsheet.sheet(‘SHEETNAME’)

# File lib/roo/base.rb, line 312
def sheet(index, name = false)
  self.default_sheet = String === index ? index : sheets[index]
  name ? [default_sheet, self] : self
end
to_csv(filename = nil, separator = ',', sheet = default_sheet) click to toggle source

write the current spreadsheet to stdout or into a file

# File lib/roo/base.rb, line 131
def to_csv(filename = nil, separator = ',', sheet = default_sheet)
  if filename
    File.open(filename, 'w') do |file|
      write_csv_content(file, sheet, separator)
    end
    true
  else
    sio = ::StringIO.new
    write_csv_content(sio, sheet, separator)
    sio.rewind
    sio.read
  end
end
to_matrix(from_row = nil, from_column = nil, to_row = nil, to_column = nil, sheet = default_sheet) click to toggle source

returns a matrix object from the whole sheet or a rectangular area of a sheet

# File lib/roo/base.rb, line 146
def to_matrix(from_row = nil, from_column = nil, to_row = nil, to_column = nil, sheet = default_sheet)
  require 'matrix'

  return Matrix.empty unless first_row

  from_row ||= first_row(sheet)
  to_row ||= last_row(sheet)
  from_column ||= first_column(sheet)
  to_column ||= last_column(sheet)

  Matrix.rows(from_row.upto(to_row).map do |row|
    from_column.upto(to_column).map do |col|
      cell(row, col, sheet)
    end
  end)
end
to_xml() click to toggle source

returns an XML representation of all sheets of a spreadsheet file

# File lib/roo/base.rb, line 267
def to_xml
  Nokogiri::XML::Builder.new do |xml|
    xml.spreadsheet do
      sheets.each do |sheet|
        self.default_sheet = sheet
        xml.sheet(name: sheet) do |x|
          if first_row && last_row && first_column && last_column
            # sonst gibt es Fehler bei leeren Blaettern
            first_row.upto(last_row) do |row|
              first_column.upto(last_column) do |col|
                unless empty?(row, col)
                  x.cell(cell(row, col),
                         row: row,
                         column: col,
                         type: celltype(row, col))
                end
              end
            end
          end
        end
      end
    end
  end.to_xml
end
to_yaml(prefix = {}, from_row = nil, from_column = nil, to_row = nil, to_column = nil, sheet = default_sheet) click to toggle source

returns a rectangular area (default: all cells) as yaml-output you can add additional attributes with the prefix parameter like: oo.to_yaml({“file”=>“flightdata_2007-06-26”, “sheet” => “1”})

# File lib/roo/base.rb, line 100
def to_yaml(prefix = {}, from_row = nil, from_column = nil, to_row = nil, to_column = nil, sheet = default_sheet)
  return '' unless first_row # empty result if there is no first_row in a sheet

  from_row ||= first_row(sheet)
  to_row ||= last_row(sheet)
  from_column ||= first_column(sheet)
  to_column ||= last_column(sheet)

  result = "--- \n"
  from_row.upto(to_row) do |row|
    from_column.upto(to_column) do |col|
      unless empty?(row, col, sheet)
        result << "cell_#{row}_#{col}: \n"
        prefix.each do|k, v|
          result << "  #{k}: #{v} \n"
        end
        result << "  row: #{row} \n"
        result << "  col: #{col} \n"
        result << "  celltype: #{celltype(row, col, sheet)} \n"
        value = cell(row, col, sheet)
        if celltype(row, col, sheet) == :time
          value = integer_to_timestring(value)
        end
        result << "  value: #{value} \n"
      end
    end
  end
  result
end

Protected Instance Methods

file_type_check(filename, ext, name, warning_level, packed = nil) click to toggle source
# File lib/roo/base.rb, line 412
def file_type_check(filename, ext, name, warning_level, packed = nil)
  if packed == :zip
    # lalala.ods.zip => lalala.ods
    # hier wird KEIN unzip gemacht, sondern nur der Name der Datei
    # getestet, falls es eine gepackte Datei ist.
    filename = File.basename(filename, File.extname(filename))
  end

  if uri?(filename) && qs_begin = filename.rindex('?')
    filename = filename[0..qs_begin - 1]
  end
  if File.extname(filename).downcase != ext
    case warning_level
    when :error
      warn file_type_warning_message(filename, ext)
      fail TypeError, "#{filename} is not #{name} file"
    when :warning
      warn "are you sure, this is #{name} spreadsheet file?"
      warn file_type_warning_message(filename, ext)
    when :ignore
      # ignore
    else
      fail "#{warning_level} illegal state of file_warning"
    end
  end
end
key_to_num(str) click to toggle source

konvertiert einen Key in der Form “12,45” (=row,column) in ein Array mit numerischen Werten ([12,45]) Diese Methode ist eine temp. Loesung, um zu erforschen, ob der Zugriff mit numerischen Keys schneller ist.

# File lib/roo/base.rb, line 443
def key_to_num(str)
  r, c = str.split(',')
  [r.to_i, c.to_i]
end
key_to_string(arr) click to toggle source

see: key_to_num

# File lib/roo/base.rb, line 449
def key_to_string(arr)
  "#{arr[0]},#{arr[1]}"
end

Private Instance Methods

cell_to_csv(row, col, sheet) click to toggle source

The content of a cell in the csv output

# File lib/roo/base.rb, line 662
def cell_to_csv(row, col, sheet)
  if empty?(row, col, sheet)
    ''
  else
    onecell = cell(row, col, sheet)

    case celltype(row, col, sheet)
    when :string
      %("#{onecell.tr('"', '""')}") unless onecell.empty?
    when :boolean
      %("#{onecell.tr('"', '""').downcase}")
    when :float, :percentage
      if onecell == onecell.to_i
        onecell.to_i.to_s
      else
        onecell.to_s
      end
    when :formula
      case onecell
      when String
        %("#{onecell.tr('"', '""')}") unless onecell.empty?
      when Float
        if onecell == onecell.to_i
          onecell.to_i.to_s
        else
          onecell.to_s
        end
      when DateTime
        onecell.to_s
      else
        fail "unhandled onecell-class #{onecell.class}"
      end
    when :date, :datetime
      onecell.to_s
    when :time
      integer_to_timestring(onecell)
    when :link
      %("#{onecell.url.tr('"', '""')}")
    else
      fail "unhandled celltype #{celltype(row, col, sheet)}"
    end || ''
  end
end
clean_sheet(sheet) click to toggle source
# File lib/roo/base.rb, line 524
def clean_sheet(sheet)
  read_cells(sheet)
  @cell[sheet].each_pair do |coord, value|
    if value.is_a?(::String)
      @cell[sheet][coord] = sanitize_value(value)
    end
  end
  @cleaned[sheet] = true
end
download_uri(uri, tmpdir) click to toggle source
# File lib/roo/base.rb, line 580
def download_uri(uri, tmpdir)
  require 'open-uri'
  tempfilename = File.join(tmpdir, File.basename(uri))
  begin
    File.open(tempfilename, 'wb') do |file|
      open(uri, 'User-Agent' => "Ruby/#{RUBY_VERSION}") do |net|
        file.write(net.read)
      end
    end
  rescue OpenURI::HTTPError
    raise "could not open #{uri}"
  end
  tempfilename
end
file_type_warning_message(filename, ext) click to toggle source
# File lib/roo/base.rb, line 464
def file_type_warning_message(filename, ext)
  "use #{Roo::CLASS_FOR_EXTENSION.fetch(ext.sub('.', '').to_sym)}.new to handle #{ext} spreadsheet files. This has #{File.extname(filename).downcase}"
rescue KeyError
  raise "unknown file type: #{ext}"
end
find_by_conditions(options) click to toggle source
# File lib/roo/base.rb, line 478
def find_by_conditions(options)
  rows = first_row.upto(last_row)
  header_for = Hash[1.upto(last_column).map do |col|
    [col, cell(@header_line, col)]
  end]

  # are all conditions met?
  conditions = options[:conditions]
  if conditions && !conditions.empty?
    column_with = header_for.invert
    rows = rows.select do |i|
      conditions.all? { |key, val| cell(i, column_with[key]) == val }
    end
  end

  if options[:array]
    rows.map { |i| row(i) }
  else
    rows.map do |i|
      Hash[1.upto(row(i).size).map do |j|
        [header_for.fetch(j), cell(i, j)]
      end]
    end
  end
end
find_by_row(row_index) click to toggle source
# File lib/roo/base.rb, line 470
def find_by_row(row_index)
  row_index += (header_line - 1) if @header_line

  row(row_index).size.times.map do |cell_index|
    cell(row_index, cell_index + 1)
  end
end
header_index(query) click to toggle source
# File lib/roo/base.rb, line 545
def header_index(query)
  row(@header_line).index(query) + first_column
end
integer_to_timestring(content) click to toggle source

converts an integer value to a time string like ‘02:05:06’

# File lib/roo/base.rb, line 709
def integer_to_timestring(content)
  h = (content / 3600.0).floor
  content = content - h * 3600
  m = (content / 60.0).floor
  content = content - m * 60
  s = content
  sprintf('%02d:%02d:%02d', h, m, s)
end
local_filename(filename, tmpdir, packed) click to toggle source
# File lib/roo/base.rb, line 455
def local_filename(filename, tmpdir, packed)
  filename = download_uri(filename, tmpdir) if uri?(filename)
  filename = unzip(filename, tmpdir) if packed == :zip
  unless File.file?(filename)
    fail IOError, "file #{filename} does not exist"
  end
  filename
end
make_tmpdir(prefix = nil, root = nil, &block) click to toggle source
# File lib/roo/base.rb, line 515
def make_tmpdir(prefix = nil, root = nil, &block)
  prefix = if prefix
    TEMP_PREFIX + prefix
  else
    TEMP_PREFIX
  end
  Dir.mktmpdir(prefix, root || ENV['ROO_TMP'], &block)
end
normalize(row, col) click to toggle source

converts cell coordinate to numeric values of row,col

# File lib/roo/base.rb, line 558
def normalize(row, col)
  if row.is_a?(::String)
    if col.is_a?(::Fixnum)
      # ('A',1):
      # ('B', 5) -> (5, 2)
      row, col = col, row
    else
      fail ArgumentError
    end
  end
  if col.is_a?(::String)
    col = ::Roo::Utils.letter_to_number(col)
  end
  [row, col]
end
open_from_stream(stream, tmpdir) click to toggle source
# File lib/roo/base.rb, line 595
def open_from_stream(stream, tmpdir)
  tempfilename = File.join(tmpdir, 'spreadsheet')
  File.open(tempfilename, 'wb') do |file|
    file.write(stream[7..-1])
  end
  File.join(tmpdir, 'spreadsheet')
end
process_zipfile_packed(zip, tmpdir, path = '') click to toggle source
# File lib/roo/base.rb, line 629
def process_zipfile_packed(zip, tmpdir, path = '')
  if zip.file.file? path
    # extract and return filename
    File.open(File.join(tmpdir, path), 'wb') do |file|
      file.write(zip.read(path))
    end
    File.join(tmpdir, path)
  else
    ret = nil
    path += '/' unless path.empty?
    zip.dir.foreach(path) do |filename|
      ret = process_zipfile_packed(zip, tmpdir, path + filename)
    end
    ret
  end
end
reinitialize() click to toggle source
# File lib/roo/base.rb, line 511
def reinitialize
  initialize(@filename)
end
sanitize_value(v) click to toggle source
# File lib/roo/base.rb, line 534
def sanitize_value(v)
  v.unpack('U*').select { |b| b < 127 }.pack('U*').strip
end
set_headers(hash = {}) click to toggle source
# File lib/roo/base.rb, line 538
def set_headers(hash = {})
  # try to find header row with all values or give an error
  # then create new hash by indexing strings and keeping integers for header array
  @headers = row_with(hash.values, true)
  @headers = Hash[hash.keys.zip(@headers.map { |x| header_index(x) })]
end
set_type(row, col, type, sheet = default_sheet) click to toggle source
# File lib/roo/base.rb, line 553
def set_type(row, col, type, sheet = default_sheet)
  @cell_type[sheet][[row, col]] = type
end
set_value(row, col, value, sheet = default_sheet) click to toggle source
# File lib/roo/base.rb, line 549
def set_value(row, col, value, sheet = default_sheet)
  @cell[sheet][[row, col]] = value
end
unzip(filename, tmpdir) click to toggle source
# File lib/roo/base.rb, line 603
def unzip(filename, tmpdir)
  require 'zip/filesystem'

  Zip::File.open(filename) do |zip|
    process_zipfile_packed(zip, tmpdir)
  end
end
uri?(filename) click to toggle source
# File lib/roo/base.rb, line 574
def uri?(filename)
  filename.start_with?('http://', 'https://')
rescue
  false
end
validate_sheet!(sheet) click to toggle source

check if default_sheet was set and exists in sheets-array

# File lib/roo/base.rb, line 612
def validate_sheet!(sheet)
  case sheet
  when nil
    fail ArgumentError, "Error: sheet 'nil' not valid"
  when Fixnum
    sheets.fetch(sheet - 1) do
      fail RangeError, "sheet index #{sheet} not found"
    end
  when String
    unless sheets.include? sheet
      fail RangeError, "sheet '#{sheet}' not found"
    end
  else
    fail TypeError, "not a valid sheet type: #{sheet.inspect}"
  end
end
without_changing_default_sheet() { || ... } click to toggle source
# File lib/roo/base.rb, line 504
def without_changing_default_sheet
  original_default_sheet = default_sheet
  yield
ensure
  self.default_sheet = original_default_sheet
end
write_csv_content(file = nil, sheet = nil, separator = ',') click to toggle source

Write all cells to the csv file. File can be a filename or nil. If the this parameter is nil the output goes to STDOUT

# File lib/roo/base.rb, line 648
def write_csv_content(file = nil, sheet = nil, separator = ',')
  file ||= STDOUT
  if first_row(sheet) # sheet is not empty
    1.upto(last_row(sheet)) do |row|
      1.upto(last_column(sheet)) do |col|
        file.print(separator) if col > 1
        file.print cell_to_csv(row, col, sheet)
      end
      file.print("\n")
    end # sheet not empty
  end
end