class ExtractDataFromWorksheet

Attributes

formulae_array[RW]
formulae_shared[RW]
formulae_shared_targets[RW]
formulae_simple[RW]
only_extract_values[RW]
persevere[RW]
table_rids[RW]
values[RW]
worksheets_dimensions[RW]

Public Class Methods

new() click to toggle source
# File src/extract/extract_data_from_worksheet.rb, line 16
def initialize
  # State
  @current_element = []

  # For parsing formulae
  @fp = CachingFormulaParser.instance

  # Outputs
  @table_rids ||= {}
  @worksheets_dimensions ||= {}
  @values ||= {}
  @formulae_simple ||= {}
  @formulae_array ||= {}
  @formulae_shared ||= {}
  @formulae_shared_targets ||= {}
end

Public Instance Methods

attr(name, value) click to toggle source
# File src/extract/extract_data_from_worksheet.rb, line 119
def attr(name, value)
  case @current_element.last
  when :dimension
    return unless name == :ref
    @worksheets_dimensions[@sheet_name] = value
  when :tablePart
    return unless name == :'r:id'
    @table_rids[@sheet_name] ||= []
    @table_rids[@sheet_name] << value
  when :c
    case name
    when :r
      @ref = value.to_sym
    when :t
      @value_type = value
    end
  when :f
    case name
    when :t
      @formula_type = value
    when :si
      @si = value
    when :ref
      @formula_ref = value
    end
  end
end
convert_excels_unicode_escaping(excel_string) click to toggle source

Excel encodes unicode as x000D where the 000D is the unicode codepoint

# File src/extract/extract_data_from_worksheet.rb, line 158
def convert_excels_unicode_escaping(excel_string)
  excel_string.gsub(/_x([0-9A-F]{4})_/) do 
    unicode_codepoint = $1
    [unicode_codepoint.hex].pack("U")
  end
end
end_element(name) click to toggle source
# File src/extract/extract_data_from_worksheet.rb, line 63
def end_element(name)
  case name
  when :v
    @current_element.pop
    value = @value.join
    ast = case @value_type
    when 'b'; value == "1" ? [:boolean_true] : [:boolean_false]
    when 's'; [:shared_string, value.to_i]
    when 'n'; [:number, value.to_f]
    when 'e'; [:error, value.to_sym]
    when 'str'; [:string, convert_excels_unicode_escaping(value).freeze]
    else
      $stderr.puts "Value of type #{@value_type} not known #{@sheet_name} #{@ref}"
      exit
    end
    @values[key] = @fp.map(ast)
  when :f
    @current_element.pop
    return if only_extract_values

    unless @formula.empty?
      begin
        formula_text = @formula.join
        ast = @fp.parse(formula_text)
      rescue ExcelToCodeException => e
        e.ref = key if e.respond_to?(:ref) # Attach the sheet and reference to the exception
        if persevere
          $stderr.puts e.message
          $stderr.puts "--persevere true, so setting #{key} to '#NAME?'"
          ast = [:error, '#NAME?']
        else
          raise
        end
      end
      unless ast
        $stderr.puts "Could not parse #{@sheet_name} #{@ref} #{formula_text}"
        exit
      end
    end
    case @formula_type
    when 'simple'
      return if @formula.empty?
      @formulae_simple[key] = ast
    when 'shared'
      @formulae_shared_targets[key] = @si
      if ast
        @formulae_shared[key] = [@formula_ref, @si, ast]
      end
    when 'array'
      @formulae_array[key] = [@formula_ref, ast]
    end
  when :dimension, :tablePart, :c
    @current_element.pop
  end
end
extract(sheet_name, input_xml) click to toggle source
# File src/extract/extract_data_from_worksheet.rb, line 33
def extract(sheet_name, input_xml)
  @sheet_name = sheet_name.to_sym
  Ox.sax_parse(self, input_xml, :convert_special => true)
  self
end
key() click to toggle source
# File src/extract/extract_data_from_worksheet.rb, line 59
def key
  [@sheet_name, @ref]
end
start_element(name) click to toggle source
# File src/extract/extract_data_from_worksheet.rb, line 39
def start_element(name)
  case name
  when :v # :v is value
    @current_element.push(name)
    @value = []
  when :f # :f is formula
    @current_element.push(name)
    @formula = []
    @formula_type = 'simple' # Default is a simple formula, alternatives are shared or array
    @formula_ref = nil # Used to specify range for shared or array formulae
    @si = nil # Used to specify the index for shared formulae
  when :c # :c is cell, wraps values and formulas
    @current_element.push(name)
    @ref = nil
    @value_type = 'n' #Default type is number
  when :dimension, :tablePart
    @current_element.push(name)
  end
end
text(text) click to toggle source
# File src/extract/extract_data_from_worksheet.rb, line 147
def text(text)
  case @current_element.last
  when :v
    @value << text
  when :f
    @formula << text
  end
end