module Ronin::Support::Encoding::XML

Contains methods for encoding/decoding escaping/unescaping XML data.

## Features

## Core-Ext Methods

@api public

Constants

ESCAPED_CHARS

XML escaped characters and their unescaped forms.

ESCAPE_BYTES

Special bytes and their escaped XML characters.

ESCAPE_BYTES_UPPERCASE

Special bytes and their escaped XML characters, but in uppercase.

Public Class Methods

decode(data) click to toggle source

Alias for {unescape}.

@param [String] data

The data to XML unescape.

@return [String]

The unescaped String.

@see unescape

# File lib/ronin/support/encoding/xml.rb, line 243
def self.decode(data)
  unescape(data)
end
encode(data,**kwargs) click to toggle source

Encodes each character in the given data as an XML character.

@param [String] data

The data to XML encode.

@param [Hash{Symbol => Object}] kwargs

Additional keyword arguments.

@option kwargs [:decimal, :hex] :format (:decimal)

The numeric format for the escaped characters.

@option kwargs [Boolean] :zero_pad (false)

Controls whether the escaped characters will be left-padded with
up to seven `0` characters.

@option kwargs [:lower, :upper, nil] :case

Controls whether to output lowercase or uppercase XML special
characters. Defaults to lowercase hexadecimal.

@return [String]

The XML encoded String.

@raise [ArgumentError]

The `format:` or `case:` keyword argument is invalid.

@example

Encoding::XML.encode("abc")
# => "abc"

@example Zero-padding:

Encoding::XML.encode("abc", zero_pad: true)
# => "abc"

@example Hexadecimal encoded characters:

Encoding::XML.encode("abc", format: :hex)
# => "abc"

@example Uppercase hexadecimal encoded characters:

Encoding::XML.encode("abc\xff", format: :hex, case: :upper)
# => "abcÿ"
# File lib/ronin/support/encoding/xml.rb, line 216
def self.encode(data,**kwargs)
  encoded = String.new

  if data.valid_encoding?
    data.each_codepoint do |codepoint|
      encoded << encode_byte(codepoint,**kwargs)
    end
  else
    data.each_byte do |byte|
      encoded << encode_byte(byte,**kwargs)
    end
  end

  return encoded
end
encode_byte(byte, format: :decimal, zero_pad: false, **kwargs) click to toggle source

Encodes the byte as a XML decimal character.

@param [Integer] byte

The byte to XML encode.

@param [:decimal, :hex] format

The numeric format for the escaped characters.

@param [Boolean] zero_pad

Controls whether the escaped characters will be left-padded with
up to seven `0` characters.

@param [Hash{Symbol => Object}] kwargs

Additional keyword arguments.

@option kwargs [:lower, :upper, nil] :case

Controls whether to output lowercase or uppercase XML special
characters. Defaults to lowercase hexadecimal.

@return [String]

The XML decimal character.

@raise [ArgumentError]

The `format:` or `case:` keyword argument is invalid.

@example

Encoding::XML.encode_byte(0x41)
# => "&#65;"

@example Zero-padding:

Encoding::XML.encode_byte(0x41, zero_pad: true)
# => "&#0000065;"

@example Hexadecimal escaped characters:

Encoding::XML.encode_byte(0x41, format: :hex)
# => "&#x41;"

@example Uppercase hexadecimal escaped characters:

Encoding::XML.encode_byte(0xff, format: :hex, case: :upper)
# => "&#XFF;"
# File lib/ronin/support/encoding/xml.rb, line 150
def self.encode_byte(byte, format: :decimal, zero_pad: false, **kwargs)
  case format
  when :decimal
    if zero_pad then "&#%.7d;" % byte
    else             "&#%d;" % byte
    end
  when :hex
    case kwargs[:case]
    when :upper
      if zero_pad then "&#X%.7X;" % byte
      else             "&#X%.2X;" % byte
      end
    when :lower, nil
      if zero_pad then "&#x%.7x;" % byte
      else             "&#x%.2x;" % byte
      end
    else
      raise(ArgumentError,"case (#{kwargs[:case].inspect}) keyword argument must be either :lower, :upper, or nil")
    end
  else
    raise(ArgumentError,"format (#{format.inspect}) must be :decimal or :hex")
  end
end
escape(data,**kwargs) click to toggle source

XML escapes the data.

@param [String] data

The data to XML escape.

@param [Hash{Symbol => Object}] kwargs

Additional keyword arguments.

@option kwargs [:lower, :upper, nil] :case

Controls whether to output lowercase or uppercase XML special
characters. Defaults to lowercase hexadecimal.

@return [String]

The XML escaped String.

@raise [ArgumentError]

The `case:` keyword argument is invalid.

@example

Encoding::XML.escape("one & two")
# => "one &amp; two"

@example Uppercase escaped characters:

Encoding::XML.encode("one & two", case: :upper)
# => "one &AMP; two"
# File lib/ronin/support/encoding/xml.rb, line 274
def self.escape(data,**kwargs)
  escaped = String.new

  if data.valid_encoding?
    data.each_codepoint do |codepoint|
      escaped << escape_byte(codepoint,**kwargs)
    end
  else
    data.each_byte do |byte|
      escaped << escape_byte(byte,**kwargs)
    end
  end

  return escaped
end
escape_byte(byte,**kwargs) click to toggle source

Escapes the byte as a XML decimal character.

@param [Integer] byte

The byte to XML escape.

@param [Hash{Symbol => Object}] kwargs

Additional keyword arguments.

@option kwargs [:lower, :upper, nil] :case

Controls whether to output lowercase or uppercase XML special
characters. Defaults to lowercase hexadecimal.

@return [String]

The XML decimal character.

@raise [ArgumentError]

The `case:` keyword argument is invalid.

@example

Encoding::XML.escape_byte(0x41)
# => "A"
Encoding::XML.escape_byte(0x26)
# => "&amp;"

@example Uppercase encoding:

Encoding::XML.escape_byte(0x26, case: :upper)
# => "&AMP;"
# File lib/ronin/support/encoding/xml.rb, line 91
def self.escape_byte(byte,**kwargs)
  table = case kwargs[:case]
          when :upper      then ESCAPE_BYTES_UPPERCASE
          when :lower, nil then ESCAPE_BYTES
          else
            raise(ArgumentError,"case (#{kwargs[:case].inspect}) keyword argument must be either :lower, :upper, or nil")
          end

  table.fetch(byte) do
    if (byte >= 0 && byte <= 0xff)
      byte.chr(Encoding::ASCII_8BIT)
    else
      byte.chr(Encoding::UTF_8)
    end
  end
end
unescape(data) click to toggle source

Unescapes the XML encoded data.

@param [String] data

The data to XML unescape.

@return [String]

The unescaped String.

@example

Encoding::XML.unescape("&lt;p&gt;one &lt;span&gt;two&lt;/span&gt;&lt;/p&gt;")
# => "<p>one <span>two</span></p>"

@see rubydoc.info/stdlib/cgi/CGI.unescapeHash

# File lib/ronin/support/encoding/xml.rb, line 314
def self.unescape(data)
  unescaped = String.new(encoding: Encoding::UTF_8)
  scanner   = StringScanner.new(data)

  until scanner.eos?
    unescaped << if (named_char = scanner.scan(/&(?:apos|amp|quot|lt|gt);/i))
                   ESCAPED_CHARS.fetch(named_char.downcase)
                 elsif (decimal_char = scanner.scan(/&#\d+;/))
                   decimal_char[2..-2].to_i.chr(Encoding::UTF_8)
                 elsif (hex_char     = scanner.scan(/&#x[a-f0-9]+;/i))
                   hex_char[3..-2].to_i(16).chr(Encoding::UTF_8)
                 else
                   scanner.getch
                 end
  end

  return unescaped
end