Source code for mrcrowbar.encoding

import codecs
import re
import struct


REGEX_CHARS = """()[]{}?*+-|^$\\.&~#="""
byte_escape = lambda char: '\\x{:02x}'.format( char ).encode( 'utf8' )

[docs] def regex_pattern_to_bytes( pattern, encoding='utf8', fixed_string=False, hex_format=False ): result = bytearray() # for hex format mode, strip out all whitespace characters first if hex_format: pattern = pattern.replace( ' ', '' ).replace( '\t', '' ).replace( '\n', '' ).replace( '\r', '' ) pointer = 0 repeat_block = False while pointer < len( pattern ): if pattern[pointer] == '\\' and not hex_format and not fixed_string: # an escaped character! if re.match( r'\\x[0-9A-Fa-f]{2}', pattern[pointer:pointer+4] ): # escaped hex byte result.extend( byte_escape( bytes.fromhex( pattern[pointer+2:pointer+4] )[0] ) ) pointer += 4 elif re.match( r'\\[\\\'"abfnrtv]', pattern[pointer:pointer+2] ): # escaped single character char_id, char_raw = '\\\'"abfnrtv', '\\\'"\a\b\f\n\r\t\v' char_map = {char_id[i]: ord( char_raw[i] ) for i in range( len( char_id ) )} result.extend( byte_escape( char_map[pattern[pointer+1]] ) ) pointer += 2 elif pattern[pointer+1] in REGEX_CHARS: # escaped character that's also a regex char result.extend( byte_escape( ord( pattern[pointer+1] ) ) ) pointer += 2 else: raise ValueError( 'Unknown escape sequence \\{}'.format( pattern[pointer+1] ) ) elif pattern[pointer] in REGEX_CHARS and not fixed_string: # a regex special character! inject it into the output unchanged if pattern[pointer] == '{': repeat_block = True elif pattern[pointer] == '}': repeat_block = False result.extend( pattern[pointer].encode( 'utf8' ) ) pointer += 1 elif repeat_block: # inside a repeat block, don't encode anything result.extend( pattern[pointer].encode( 'utf8' ) ) pointer += 1 elif hex_format: # we're in hex string mode; treat as raw hexadecimal if not re.match( r'[0-9A-Fa-f]{2}', pattern[pointer:pointer+2] ): raise ValueError( 'Sequence {} is not valid hexadecimal'.format( pattern[pointer:pointer+2] ) ) result.extend( byte_escape( int( pattern[pointer:pointer+2], 16 ) ) ) pointer += 2 else: # a normal character! encode as bytes, and inject escaped digits into the output for char in pattern[pointer].encode( encoding ): result.extend( byte_escape( char ) ) pointer += 1 return bytes( result )
RAW_TYPE_NAME = { (int, 1, 'signed', 'little'): 'int8', (int, 1, 'unsigned', 'little'): 'uint8', (int, 1, 'signed', 'big'): 'int8', (int, 1, 'unsigned', 'big'): 'uint8', (int, 1, 'signed', None): 'int8', (int, 1, 'unsigned', None): 'uint8', (int, 2, 'signed', 'little'): 'int16_le', (int, 3, 'signed', 'little'): 'int24_le', (int, 4, 'signed', 'little'): 'int32_le', (int, 8, 'signed', 'little'): 'int64_le', (int, 2, 'unsigned', 'little'): 'uint16_le', (int, 3, 'unsigned', 'little'): 'uint24_le', (int, 4, 'unsigned', 'little'): 'uint32_le', (int, 8, 'unsigned', 'little'): 'uint64_le', (float, 4, 'signed', 'little'): 'float32_le', (float, 8, 'signed', 'little'): 'float64_le', (int, 2, 'signed', 'big'): 'int16_be', (int, 3, 'signed', 'big'): 'int24_be', (int, 4, 'signed', 'big'): 'int32_be', (int, 8, 'signed', 'big'): 'int64_be', (int, 2, 'unsigned', 'big'): 'uint16_be', (int, 3, 'unsigned', 'big'): 'uint24_be', (int, 4, 'unsigned', 'big'): 'uint32_be', (int, 8, 'unsigned', 'big'): 'uint64_be', (float, 4, 'signed', 'big'): 'float32_be', (float, 8, 'signed', 'big'): 'float64_be', } RAW_TYPE_NAME_REVERSE = {v: k for k, v in RAW_TYPE_NAME.items()} RAW_TYPE_STRUCT = { (int, 1, 'unsigned'): 'B', (int, 1, 'signed'): 'b', (int, 2, 'unsigned'): 'H', (int, 2, 'signed'): 'h', (int, 4, 'unsigned'): 'I', (int, 4, 'signed'): 'i', (int, 8, 'unsigned'): 'Q', (int, 8, 'signed'): 'q', (float, 4, 'signed'): 'f', (float, 8, 'signed'): 'd', } FROM_RAW_TYPE = {} TO_RAW_TYPE = {} FROM_RAW_TYPE_ARRAY = {} TO_RAW_TYPE_ARRAY = {}
[docs] def get_raw_type_struct( format_type, field_size, signedness, endian, count=None ): return '{}{}{}'.format( '>' if endian == 'big' else '<', count if count is not None else '', RAW_TYPE_STRUCT[(format_type, field_size, signedness)] )
[docs] def get_raw_type_description( format_type, field_size, signedness, endian ): TYPE_NAMES = { int: 'integer', float: 'floating-point number', } type_name = TYPE_NAMES[format_type] return ('{}{}-bit {}{}'.format( ('signed ' if signedness == 'signed' else 'unsigned ') if format_type == int else '', field_size*8, type_name, ' ({}-endian)'.format(endian) if field_size>1 else '' ), type_name)
def _from_raw_type( type_id ): result = lambda buffer: struct.unpack( get_raw_type_struct( *type_id ), buffer )[0] result.__doc__ = 'Convert a {0} byte string to a Python {1}.'.format( *get_raw_type_description( *type_id ) ) return result def _to_raw_type( type_id ): result = lambda value: struct.pack( get_raw_type_struct( *type_id ), value ) result.__doc__ = 'Convert a Python {1} to a {0} byte string.'.format( *get_raw_type_description( *type_id ) ) return result def _from_raw_type_array( type_id ): result = lambda buffer: list( struct.unpack( get_raw_type_struct( *type_id, count=len( buffer )//type_id[1] ), buffer ) ) result.__doc__ = 'Convert a {0} byte string to a Python list of {1}s.'.format( *get_raw_type_description( *type_id ) ) return result def _to_raw_type_array( type_id ): result = lambda value_list: struct.pack( get_raw_type_struct( *type_id, count=len( value_list ) ), *value_list ) result.__doc__ = 'Convert a Python list of {1}s to a {0} byte string.'.format( *get_raw_type_description( *type_id ) ) return result def _from_generic_array( type_id, from_raw ): result = lambda buffer: [from_raw( buffer[i:i+type_id[1]] ) for i in range( 0, len( buffer ), type_id[1] )] result.__doc__ = 'Convert a {0} byte string to a Python list of {1}s.'.format( *get_raw_type_description( *type_id ) ) return result def _to_generic_array( type_id, to_raw ): result = lambda value_list: b''.join( [to_raw( value ) for value in value_list] ) result.__doc__ = 'Convert a Python list of {1}s to a {0} byte string.'.format( *get_raw_type_description( *type_id ) ) return result # autogenerate conversion methods based on struct for format_type, field_size, signedness in RAW_TYPE_STRUCT: endian_choices = [None, 'little', 'big'] if field_size == 1 else ['little', 'big'] for endian in endian_choices: type_id = (format_type, field_size, signedness, endian) FROM_RAW_TYPE[type_id] = _from_raw_type( type_id ) TO_RAW_TYPE[type_id] = _to_raw_type( type_id ) FROM_RAW_TYPE_ARRAY[type_id] = _from_raw_type_array( type_id ) TO_RAW_TYPE_ARRAY[type_id] = _to_raw_type_array( type_id ) # 24-bit types RAW_24 = ['int24_le', 'uint24_le', 'int24_be', 'uint24_be'] def _from_raw_24( type_id ): format_type, field_size, signedness, endian = type_id assert format_type == int assert field_size == 3 assert endian in ('little', 'big') assert signedness in ('signed', 'unsigned') def result( buffer ): if endian == 'little': buffer = buffer + (b'\xff' if (signedness == 'signed' and buffer[2] >= 0x80) else b'\x00') elif endian == 'big': buffer = (b'\xff' if (signedness == 'signed' and buffer[0] >= 0x80) else b'\x00') + buffer return FROM_RAW_TYPE[(format_type, 4, signedness, endian)]( buffer ) result.__doc__ = 'Convert a {0} byte string to a Python {1}.'.format( *get_raw_type_description( *type_id ) ) return result def _to_raw_24( type_id ): format_type, field_size, signedness, endian = type_id assert format_type == int assert field_size == 3 assert endian in ('little', 'big') assert signedness in ('signed', 'unsigned') def result( value ): if signedness == 'signed': assert value in range( -1<<23, 1<<23 ) else: assert value in range( 0, 1<<24 ) output = TO_RAW_TYPE[(format_type, 4, signedness, endian)]( value ) if endian == 'little': output = output[:3] elif endian == 'big': output = output[1:] return output result.__doc__ = 'Convert a Python {1} to a {0} byte string.'.format( *get_raw_type_description( *type_id ) ) return result for code in RAW_24: type_id = RAW_TYPE_NAME_REVERSE[code] FROM_RAW_TYPE[type_id] = _from_raw_24( type_id ) TO_RAW_TYPE[type_id] = _to_raw_24( type_id ) FROM_RAW_TYPE_ARRAY[type_id] = _from_generic_array( type_id, FROM_RAW_TYPE[type_id] ) TO_RAW_TYPE_ARRAY[type_id] = _to_generic_array( type_id, TO_RAW_TYPE[type_id] ) def _load_raw_types(): result = {} for type_id, from_func in FROM_RAW_TYPE.items(): result['from_{}'.format( RAW_TYPE_NAME[type_id] )] = from_func for type_id, to_func in TO_RAW_TYPE.items(): result['to_{}'.format( RAW_TYPE_NAME[type_id] )] = to_func for type_id, from_func in FROM_RAW_TYPE_ARRAY.items(): result['from_{}_array'.format( RAW_TYPE_NAME[type_id] )] = from_func for type_id, to_func in TO_RAW_TYPE_ARRAY.items(): result['to_{}_array'.format( RAW_TYPE_NAME[type_id] )] = to_func return result
[docs] def unpack( type_id, value ): if isinstance( type_id, str ): type_id = RAW_TYPE_NAME_REVERSE[type_id] return FROM_RAW_TYPE[type_id]( value )
[docs] def pack( type_id, value ): if isinstance( type_id, str ): type_id = RAW_TYPE_NAME_REVERSE[type_id] return TO_RAW_TYPE[type_id]( value )
[docs] def unpack_array( type_id, values ): if isinstance( type_id, str ): type_id = RAW_TYPE_NAME_REVERSE[type_id] return FROM_RAW_TYPE_ARRAY[type_id]( values )
[docs] def pack_array( type_id, values ): if isinstance( type_id, str ): type_id = RAW_TYPE_NAME_REVERSE[type_id] return TO_RAW_TYPE_ARRAY[type_id]( values )