module HippieCSV::Support

Public Class Methods

dump_quotes(string, quote_character) click to toggle source
# File lib/hippie_csv/support.rb, line 44
def dump_quotes(string, quote_character)
  string.gsub(quote_character, "")
end
encode(string) click to toggle source
# File lib/hippie_csv/support.rb, line 7
def encode(string)
  string = ensure_valid_encoding(string)

  DELIMETERS.each do |delimiter|
    string.gsub!(blank_line_regex(delimiter), "")
  end

  string.encode(string.encoding, universal_newline: true)
end
guess_delimeter(string, quote_character) click to toggle source
# File lib/hippie_csv/support.rb, line 56
def guess_delimeter(string, quote_character)
  results = DELIMETERS.map do |delimeter|
    [delimeter, field_count(string, delimeter, quote_character)]
  end.max_by do |delimeter, count|
    count
  end.each do |delimiter, count|
    return delimiter
  end
end
maybe_parse(string) click to toggle source
# File lib/hippie_csv/support.rb, line 17
def maybe_parse(string)
  encoded_string = encode(string)

  QUOTE_CHARACTERS.find do |quote_character|
    [encoded_string, tolerate_escaping(encoded_string, quote_character), dump_quotes(encoded_string, quote_character)].find do |string_to_parse|
      rescuing_malformed do
        return parse_csv(string_to_parse.squeeze("\n").strip, quote_character)
      end
    end
  end
end
maybe_stream(path, &block) click to toggle source
# File lib/hippie_csv/support.rb, line 37
def maybe_stream(path, &block)
  File.foreach(path, encoding: ENCODING_WITH_BOM) do |line|
    row = maybe_parse(line)
    block.call(row.first) if row.first
  end
end
parse_csv(string, quote_character) click to toggle source
# File lib/hippie_csv/support.rb, line 29
def parse_csv(string, quote_character)
  CSV.parse(
    string,
    quote_char: quote_character,
    col_sep: guess_delimeter(string, quote_character)
  )
end
rescuing_malformed() { || ... } click to toggle source
# File lib/hippie_csv/support.rb, line 48
def rescuing_malformed
  begin; yield; rescue CSV::MalformedCSVError; end
end
tolerate_escaping(string, quote_character) click to toggle source
# File lib/hippie_csv/support.rb, line 52
def tolerate_escaping(string, quote_character)
  string.gsub("\\#{quote_character}", "#{quote_character}#{quote_character}")
end

Private Class Methods

blank_line_regex(delimiter) click to toggle source
# File lib/hippie_csv/support.rb, line 82
def blank_line_regex(delimiter)
  /^#{delimiter}+(\r\n|\r)$/
end
detect_encoding(string) click to toggle source
# File lib/hippie_csv/support.rb, line 86
def detect_encoding(string)
  CharDet.detect(string[0..ENCODING_SAMPLE_CHARACTER_COUNT])["encoding"]
end
ensure_valid_encoding(string) click to toggle source
# File lib/hippie_csv/support.rb, line 68
def ensure_valid_encoding(string)
  return string if string.valid_encoding?

  current_encoding = detect_encoding(string)

  if !current_encoding.nil? && current_encoding != ENCODING
    string.encode(ENCODING, current_encoding)
  else
    magical_encode(string)
  end
rescue Encoding::InvalidByteSequenceError
  magical_encode(string)
end
field_count(file, delimeter, quote_character) click to toggle source
# File lib/hippie_csv/support.rb, line 95
def field_count(file, delimeter, quote_character)
  csv = CSV.new(file, col_sep: delimeter, quote_char: quote_character)
  csv.lazy.take(FIELD_SAMPLE_COUNT).map(&:size).inject(:+)
rescue CSV::MalformedCSVError
  0
end
magical_encode(string) click to toggle source
# File lib/hippie_csv/support.rb, line 90
def magical_encode(string)
  string.encode(ALTERNATE_ENCODING, ENCODING, invalid: :replace, replace: "")
        .encode(ENCODING, ALTERNATE_ENCODING)
end