class ExtractDataFromWorksheet
Attributes
formulae_array[RW]
formulae_simple[RW]
only_extract_values[RW]
persevere[RW]
table_rids[RW]
values[RW]
worksheets_dimensions[RW]
Public Class Methods
new()
click to toggle source
# File src/extract/extract_data_from_worksheet.rb, line 16 def initialize # State @current_element = [] # For parsing formulae @fp = CachingFormulaParser.instance # Outputs @table_rids ||= {} @worksheets_dimensions ||= {} @values ||= {} @formulae_simple ||= {} @formulae_array ||= {} @formulae_shared ||= {} @formulae_shared_targets ||= {} end
Public Instance Methods
attr(name, value)
click to toggle source
# File src/extract/extract_data_from_worksheet.rb, line 119 def attr(name, value) case @current_element.last when :dimension return unless name == :ref @worksheets_dimensions[@sheet_name] = value when :tablePart return unless name == :'r:id' @table_rids[@sheet_name] ||= [] @table_rids[@sheet_name] << value when :c case name when :r @ref = value.to_sym when :t @value_type = value end when :f case name when :t @formula_type = value when :si @si = value when :ref @formula_ref = value end end end
convert_excels_unicode_escaping(excel_string)
click to toggle source
Excel encodes unicode as x000D where the 000D is the unicode codepoint
# File src/extract/extract_data_from_worksheet.rb, line 158 def convert_excels_unicode_escaping(excel_string) excel_string.gsub(/_x([0-9A-F]{4})_/) do unicode_codepoint = $1 [unicode_codepoint.hex].pack("U") end end
end_element(name)
click to toggle source
# File src/extract/extract_data_from_worksheet.rb, line 63 def end_element(name) case name when :v @current_element.pop value = @value.join ast = case @value_type when 'b'; value == "1" ? [:boolean_true] : [:boolean_false] when 's'; [:shared_string, value.to_i] when 'n'; [:number, value.to_f] when 'e'; [:error, value.to_sym] when 'str'; [:string, convert_excels_unicode_escaping(value).freeze] else $stderr.puts "Value of type #{@value_type} not known #{@sheet_name} #{@ref}" exit end @values[key] = @fp.map(ast) when :f @current_element.pop return if only_extract_values unless @formula.empty? begin formula_text = @formula.join ast = @fp.parse(formula_text) rescue ExcelToCodeException => e e.ref = key if e.respond_to?(:ref) # Attach the sheet and reference to the exception if persevere $stderr.puts e.message $stderr.puts "--persevere true, so setting #{key} to '#NAME?'" ast = [:error, '#NAME?'] else raise end end unless ast $stderr.puts "Could not parse #{@sheet_name} #{@ref} #{formula_text}" exit end end case @formula_type when 'simple' return if @formula.empty? @formulae_simple[key] = ast when 'shared' @formulae_shared_targets[key] = @si if ast @formulae_shared[key] = [@formula_ref, @si, ast] end when 'array' @formulae_array[key] = [@formula_ref, ast] end when :dimension, :tablePart, :c @current_element.pop end end
extract(sheet_name, input_xml)
click to toggle source
# File src/extract/extract_data_from_worksheet.rb, line 33 def extract(sheet_name, input_xml) @sheet_name = sheet_name.to_sym Ox.sax_parse(self, input_xml, :convert_special => true) self end
key()
click to toggle source
# File src/extract/extract_data_from_worksheet.rb, line 59 def key [@sheet_name, @ref] end
start_element(name)
click to toggle source
# File src/extract/extract_data_from_worksheet.rb, line 39 def start_element(name) case name when :v # :v is value @current_element.push(name) @value = [] when :f # :f is formula @current_element.push(name) @formula = [] @formula_type = 'simple' # Default is a simple formula, alternatives are shared or array @formula_ref = nil # Used to specify range for shared or array formulae @si = nil # Used to specify the index for shared formulae when :c # :c is cell, wraps values and formulas @current_element.push(name) @ref = nil @value_type = 'n' #Default type is number when :dimension, :tablePart @current_element.push(name) end end
text(text)
click to toggle source
# File src/extract/extract_data_from_worksheet.rb, line 147 def text(text) case @current_element.last when :v @value << text when :f @formula << text end end