module NdrSupport::YAML::SerializationMigration

Lightweight wrapper around YAML serialization, to provide any necessary support for YAML engines and string encodings.

Constants

YAML_SAFE_CLASSES

Classes we routinely allow to be included in our YAML serialisations, automatically accepted by load_yaml

Public Instance Methods

dump_yaml(object) click to toggle source

Wrapper around: YAML.dump(object)

# File lib/ndr_support/yaml/serialization_migration.rb, line 62
def dump_yaml(object)
  return Psych.dump(object) if utf8_storage

  # Psych produces UTF-8 encoded output; historically we
  # preferred YAML that can be safely stored in stores with
  # other encodings. If #load_yaml is used, the binary
  # encoding of the object will be reversed on load.
  Psych.dump binary_encode_any_high_ascii(object)
end
load_yaml(string, coerce_invalid_chars = false) click to toggle source

Wrapper around: YAML.load(string)

# File lib/ndr_support/yaml/serialization_migration.rb, line 37
def load_yaml(string, coerce_invalid_chars = false) # rubocop:disable Style/OptionalBooleanParameter
  fix_encoding!(string, coerce_invalid_chars)

  # Achieve same behaviour using `syck` and `psych`:
  handle_special_characters!(string, coerce_invalid_chars)
  fix_encoding!(string, coerce_invalid_chars)

  # TODO: Bump NdrSupport major version, and switch to safe_load by default
  object = if yaml_safe_classes == :unsafe
             raise(SecurityError, 'Unsafe YAML no longer supported') unless Psych::VERSION.start_with?('3.')

             Psych.load(string)
           else
             Psych.safe_load(string, permitted_classes: yaml_safe_classes, aliases: true)
           end

  # Ensure that any string related to the object
  # we've loaded is also valid UTF-8.
  ensure_utf8_object!(object)

  # We escape all non-printing control chars:
  escape_control_chars_in_object!(object)
end
utf8_storage() click to toggle source
# File lib/ndr_support/yaml/serialization_migration.rb, line 30
def utf8_storage
  return @utf8_storage if @utf8_storage == false

  true # New ndr_support default for versions >= 6, previously false
end
utf8_storage=(utf8_storage) click to toggle source

Allow emitted YAML to contain UTF-8 characters Defaults to true. (Defaulted to false in ndr_support versions < 6)

# File lib/ndr_support/yaml/serialization_migration.rb, line 26
def utf8_storage=(utf8_storage)
  @utf8_storage = utf8_storage
end
yaml_safe_classes() click to toggle source
# File lib/ndr_support/yaml/serialization_migration.rb, line 20
def yaml_safe_classes
  @yaml_safe_classes || YAML_SAFE_CLASSES
end
yaml_safe_classes=(yaml_safe_classes) click to toggle source

Set list of YAML safe classes, or :unsafe to use unsafe load

# File lib/ndr_support/yaml/serialization_migration.rb, line 16
def yaml_safe_classes=(yaml_safe_classes)
  @yaml_safe_classes = yaml_safe_classes
end

Private Instance Methods

fix_encoding!(string, coerce) click to toggle source

Makes ‘string` valid UTF-8. If `coerce` is true, any invalid characters will be escaped - if false, they will trigger an UTF8Encoding::UTF8CoercionError.

# File lib/ndr_support/yaml/serialization_migration.rb, line 77
def fix_encoding!(string, coerce)
  coerce ? coerce_utf8!(string) : ensure_utf8!(string)
end
handle_special_characters!(string, coerce_invalid_chars) click to toggle source

Within double quotes, YAML allows special characters. While ‘psych` emits UTF-8 YAML, `syck` double escapes higher characters. We need to unescape any we find: Both `psych` and `syck` escape lower control characters.

# File lib/ndr_support/yaml/serialization_migration.rb, line 85
def handle_special_characters!(string, coerce_invalid_chars)
  return unless string.start_with?('---') # Only handle YAML that is not JSON

  # Replace any encoded hex chars with their actual value:
  string.gsub!(/(?<!\\)((?:\\\\)*)((?:\\x[0-9A-F]{2})+)/) do
    # We use negative lookbehind and the first capturing group to skip over
    # properly escaped backslashes
    prefix = ::Regexp.last_match(1) # Prefix is an even number of backslashes
    byte_sequence = ::Regexp.last_match(2).scan(/[0-9A-F]{2}/)
    prefix + byte_sequence.pack('H2' * byte_sequence.length).tap do |sequence|
      fix_encoding!(sequence, coerce_invalid_chars)
    end
  end

  # Re-escape any non-printing control characters,
  # as they can break the YAML parser:
  escape_control_chars_in_object!(string)
end