module RemoteTable::Delimited
Parses plaintext comma-separated (CSV), tab-separated (TSV), or really anything-delimited files using Ruby’s CSV parser.
Constants
- Engine
Public Class Methods
Public Instance Methods
_each() { |array| ... }
click to toggle source
Yield each row using Ruby’s CSV parser (FasterCSV on Ruby 1.8).
# File lib/remote_table/delimited.rb, line 25 def _each Engine.new(local_copy.encoded_io, csv_options.merge(headers: headers)).each do |row| some_value_present = false if not headers # represent the row as an array array = row.map do |v| v = RemoteTable.normalize_whitespace v if not some_value_present and not keep_blank_rows and v.present? some_value_present = true end v end if some_value_present or keep_blank_rows yield array end else # represent the row as a hash hash = ::ActiveSupport::OrderedHash.new row.each do |k, v| next if k.nil? v = RemoteTable.normalize_whitespace v if not some_value_present and not keep_blank_rows and v.present? some_value_present = true end hash[k] = v end if some_value_present or keep_blank_rows yield hash end end end ensure local_copy.cleanup end
adaptive_quote_char()
click to toggle source
# File lib/remote_table/delimited.rb, line 79 def adaptive_quote_char if quote_char quote_char elsif delimiter == "\t" or delimiter == '|' "\0" end end
csv_options()
click to toggle source
# File lib/remote_table/delimited.rb, line 66 def csv_options retval = { skip_blanks: !keep_blank_rows, } if delimiter retval[:col_sep] = delimiter end if adaptive_quote_char retval[:quote_char] = adaptive_quote_char end retval end
headers()
click to toggle source
# File lib/remote_table/delimited.rb, line 87 def headers return @_headers if defined?(@_headers) @_headers = case @headers when FalseClass, NilClass false when :first_row, TrueClass i = 0 begin line = local_copy.encoded_io.gets.strip end while line.length == 0 proto_headers = Engine.parse_line(line, csv_options) if proto_headers proto_headers.inject([]) do |memo, v| break memo if stop_after_untitled_headers and i > stop_after_untitled_headers header = RemoteTable.normalize_whitespace v memo << (header.present? ? header : "untitled_#{i+=1}") memo end else raise "No headers found in first line: #{line.inspect}" end when Array @headers else raise "Invalid headers: #{headers.inspect}" end end
preprocess!()
click to toggle source
# File lib/remote_table/delimited.rb, line 17 def preprocess! delete_harmful! convert_eol_to_unix! transliterate_whole_file_to_utf8! skip_rows! end