module Ronin::Support::Encoding::XML
Contains methods for encoding/decoding escaping/unescaping XML
data.
## Features
-
Supports lowercase (ex: ‘&`) and uppercase (ex: `&`) encoding.
-
Supports decimal (ex: ‘A`) and hexadecimal (ex: `A`) character encoding.
-
Supports zero-padding (ex: ‘A`).
## Core-Ext Methods
-
{Integer#xml_escape}
-
{Integer#xml_encode}
-
{String#xml_escape}
-
{String#xml_unescape}
-
{String#xml_encode}
-
{String#xml_decode}
@api public
Constants
Public Class Methods
Alias for {unescape}.
@param [String] data
The data to XML unescape.
@return [String]
The unescaped String.
@see unescape
# File lib/ronin/support/encoding/xml.rb, line 243 def self.decode(data) unescape(data) end
Encodes each character in the given data as an XML
character.
@param [String] data
The data to XML encode.
@param [Hash{Symbol => Object}] kwargs
Additional keyword arguments.
@option kwargs [:decimal, :hex] :format (:decimal)
The numeric format for the escaped characters.
@option kwargs [Boolean] :zero_pad (false)
Controls whether the escaped characters will be left-padded with up to seven `0` characters.
@option kwargs [:lower, :upper, nil] :case
Controls whether to output lowercase or uppercase XML special characters. Defaults to lowercase hexadecimal.
@return [String]
The XML encoded String.
@raise [ArgumentError]
The `format:` or `case:` keyword argument is invalid.
@example
Encoding::XML.encode("abc") # => "abc"
@example Zero-padding:
Encoding::XML.encode("abc", zero_pad: true) # => "abc"
@example Hexadecimal encoded characters:
Encoding::XML.encode("abc", format: :hex) # => "abc"
@example Uppercase hexadecimal encoded characters:
Encoding::XML.encode("abc\xff", format: :hex, case: :upper) # => "abcÿ"
# File lib/ronin/support/encoding/xml.rb, line 216 def self.encode(data,**kwargs) encoded = String.new if data.valid_encoding? data.each_codepoint do |codepoint| encoded << encode_byte(codepoint,**kwargs) end else data.each_byte do |byte| encoded << encode_byte(byte,**kwargs) end end return encoded end
Encodes the byte as a XML
decimal character.
@param [Integer] byte
The byte to XML encode.
@param [:decimal, :hex] format
The numeric format for the escaped characters.
@param [Boolean] zero_pad
Controls whether the escaped characters will be left-padded with up to seven `0` characters.
@param [Hash{Symbol => Object}] kwargs
Additional keyword arguments.
@option kwargs [:lower, :upper, nil] :case
Controls whether to output lowercase or uppercase XML special characters. Defaults to lowercase hexadecimal.
@return [String]
The XML decimal character.
@raise [ArgumentError]
The `format:` or `case:` keyword argument is invalid.
@example
Encoding::XML.encode_byte(0x41) # => "A"
@example Zero-padding:
Encoding::XML.encode_byte(0x41, zero_pad: true) # => "A"
@example Hexadecimal escaped characters:
Encoding::XML.encode_byte(0x41, format: :hex) # => "A"
@example Uppercase hexadecimal escaped characters:
Encoding::XML.encode_byte(0xff, format: :hex, case: :upper) # => "ÿ"
# File lib/ronin/support/encoding/xml.rb, line 150 def self.encode_byte(byte, format: :decimal, zero_pad: false, **kwargs) case format when :decimal if zero_pad then "&#%.7d;" % byte else "&#%d;" % byte end when :hex case kwargs[:case] when :upper if zero_pad then "&#X%.7X;" % byte else "&#X%.2X;" % byte end when :lower, nil if zero_pad then "&#x%.7x;" % byte else "&#x%.2x;" % byte end else raise(ArgumentError,"case (#{kwargs[:case].inspect}) keyword argument must be either :lower, :upper, or nil") end else raise(ArgumentError,"format (#{format.inspect}) must be :decimal or :hex") end end
XML
escapes the data.
@param [String] data
The data to XML escape.
@param [Hash{Symbol => Object}] kwargs
Additional keyword arguments.
@option kwargs [:lower, :upper, nil] :case
Controls whether to output lowercase or uppercase XML special characters. Defaults to lowercase hexadecimal.
@return [String]
The XML escaped String.
@raise [ArgumentError]
The `case:` keyword argument is invalid.
@example
Encoding::XML.escape("one & two") # => "one & two"
@example Uppercase escaped characters:
Encoding::XML.encode("one & two", case: :upper) # => "one & two"
# File lib/ronin/support/encoding/xml.rb, line 274 def self.escape(data,**kwargs) escaped = String.new if data.valid_encoding? data.each_codepoint do |codepoint| escaped << escape_byte(codepoint,**kwargs) end else data.each_byte do |byte| escaped << escape_byte(byte,**kwargs) end end return escaped end
Escapes the byte as a XML
decimal character.
@param [Integer] byte
The byte to XML escape.
@param [Hash{Symbol => Object}] kwargs
Additional keyword arguments.
@option kwargs [:lower, :upper, nil] :case
Controls whether to output lowercase or uppercase XML special characters. Defaults to lowercase hexadecimal.
@return [String]
The XML decimal character.
@raise [ArgumentError]
The `case:` keyword argument is invalid.
@example
Encoding::XML.escape_byte(0x41) # => "A" Encoding::XML.escape_byte(0x26) # => "&"
@example Uppercase encoding:
Encoding::XML.escape_byte(0x26, case: :upper) # => "&"
# File lib/ronin/support/encoding/xml.rb, line 91 def self.escape_byte(byte,**kwargs) table = case kwargs[:case] when :upper then ESCAPE_BYTES_UPPERCASE when :lower, nil then ESCAPE_BYTES else raise(ArgumentError,"case (#{kwargs[:case].inspect}) keyword argument must be either :lower, :upper, or nil") end table.fetch(byte) do if (byte >= 0 && byte <= 0xff) byte.chr(Encoding::ASCII_8BIT) else byte.chr(Encoding::UTF_8) end end end
Unescapes the XML
encoded data.
@param [String] data
The data to XML unescape.
@return [String]
The unescaped String.
@example
Encoding::XML.unescape("<p>one <span>two</span></p>") # => "<p>one <span>two</span></p>"
@see rubydoc.info/stdlib/cgi/CGI.unescapeHash
# File lib/ronin/support/encoding/xml.rb, line 314 def self.unescape(data) unescaped = String.new(encoding: Encoding::UTF_8) scanner = StringScanner.new(data) until scanner.eos? unescaped << if (named_char = scanner.scan(/&(?:apos|amp|quot|lt|gt);/i)) ESCAPED_CHARS.fetch(named_char.downcase) elsif (decimal_char = scanner.scan(/&#\d+;/)) decimal_char[2..-2].to_i.chr(Encoding::UTF_8) elsif (hex_char = scanner.scan(/&#x[a-f0-9]+;/i)) hex_char[3..-2].to_i(16).chr(Encoding::UTF_8) else scanner.getch end end return unescaped end