module NewRelic::Agent::EncodingNormalizer::EncodingNormalizer

Public Class Methods

normalize(raw_string) click to toggle source
# File lib/new_relic/agent/encoding_normalizer.rb, line 42
def self.normalize(raw_string)
  encoding = raw_string.encoding
  if (encoding == Encoding::UTF_8 || encoding == Encoding::ISO_8859_1) && raw_string.valid_encoding?
    return raw_string
  end

  # If the encoding is not valid, or it's ASCII-8BIT, we know conversion to
  # UTF-8 is likely to fail, so treat it as ISO-8859-1 (byte-preserving).
  normalized = raw_string.dup
  if encoding == Encoding::ASCII_8BIT || !raw_string.valid_encoding?
    normalized.force_encoding(Encoding::ISO_8859_1)
  else
    # Encoding is valid and non-binary, so it might be cleanly convertible
    # to UTF-8. Give it a try and fall back to ISO-8859-1 if it fails.
    begin
      normalized.encode!(Encoding::UTF_8)
    rescue
      normalized.force_encoding(Encoding::ISO_8859_1)
    end
  end
  normalized
end