Source code for mrcrowbar.statistics
import array
import math
from collections import Counter
[docs]
class Stats( object ):
"""Helper class for performing some basic statistical analysis on binary data."""
def __init__( self, buffer ):
"""Generate a Stats instance for a byte string and analyse the data."""
self.samples = len( buffer )
# Python's Counter object uses a fast path
cc = Counter( buffer )
#: Byte histogram for the source data.
self.histo = array.array( 'L', (cc.get( i, 0 ) for i in range( 256 )) )
#: Shanning entropy calculated for the source data.
self.entropy = 0.0
for count in self.histo:
if count != 0:
cover = count/self.samples
self.entropy += -cover * math.log2( cover )
[docs]
def histogram( self, width ):
if (256 % width) != 0:
raise ValueError( 'Width of the histogram must be a divisor of 256' )
elif (width <= 0):
raise ValueError( 'Width of the histogram must be greater than zero' )
elif (width > 256):
raise ValueError( 'Width of the histogram must be less than or equal to 256' )
bucket = 256//width
return [sum( self.histo[i:i+bucket] ) for i in range( 0, 256, bucket )]
[docs]
def ansi_format( self, width=64, height=12 ):
"""Return a human readable ANSI-terminal printout of the stats.
width
Custom width for the graph (in characters).
height
Custom height for the graph (in characters).
"""
from mrcrowbar.ansi import format_bar_graph_iter
if (256 % width) != 0:
raise ValueError( 'Width of the histogram must be a divisor of 256' )
elif (width <= 0):
raise ValueError( 'Width of the histogram must be greater than zero' )
elif (width > 256):
raise ValueError( 'Width of the histogram must be less than or equal to 256' )
buckets = self.histogram( width )
result = []
for line in format_bar_graph_iter( buckets, width=width, height=height ):
result.append( ' {}\n'.format( line ) )
result.append( '╘'+('═'*width)+'╛\n' )
result.append( 'entropy: {:.10f}\n'.format( self.entropy ) )
result.append( 'samples: {}'.format( self.samples ) )
return ''.join( result )
[docs]
def print( self, *args, **kwargs ):
"""Print the graphical version of the results produced by ansi_format()."""
print( self.ansi_format( *args, **kwargs ) )
def __str__( self ):
return self.ansi_format()