module Buckets

Buckets provides meat functionality of the TLSH algorithm bucketing

Constants

CODE_SIZE
NUM_BUCKETS
SALT
WINDOW_LENGTH

Public Class Methods

buckets_binary(buckets, q1, q2, q3) click to toggle source
# File lib/tlsh/buckets.rb, line 9
def buckets_binary(buckets, q1, q2, q3)
  bin_hash = []

  (0..CODE_SIZE - 1).each do |i|
    h = 0
    (0..3).each do |j|
      k = buckets[4 * i + j]
      h += addition(q1, q2, q3, j, k)
      bin_hash[CODE_SIZE - 1 - i] = h
    end
  end

  bin_hash
end
fill_buckets(input) click to toggle source
# File lib/tlsh/buckets.rb, line 24
def fill_buckets(input)
  # ensure we have an array (not enumerable)
  input = input.to_a if input.is_a?(Enumerable)

  chunk_slice = input[0..WINDOW_LENGTH - 1].compact
  chunk = chunk_slice[0..5].dup
  chunk.reverse!

  fill_buckets_looping(input, chunk_slice.size, chunk)
end

Private Class Methods

addition(q1, q2, q3, j, k) click to toggle source
# File lib/tlsh/buckets.rb, line 37
def addition(q1, q2, q3, j, k)
  add = if q3 < k
          3 << j * 2
        elsif q2 < k
          2 << j * 2
        elsif q1 < k
          1 << j * 2
        end
  add ||= 0
  add
end
fill_buckets_looping(input, file_size, chunk) click to toggle source
# File lib/tlsh/buckets.rb, line 49
def fill_buckets_looping(input, file_size, chunk)
  buckets = Array.new(NUM_BUCKETS, 0)
  chunk3 = []
  checksum = 0
  current_window = WINDOW_LENGTH - 1

  size = input.size
  loop do
    chunk3[0] = chunk[0]
    chunk3[1] = chunk[1]
    chunk3[2] = checksum
    checksum = DigestHash.pearson_hash(0, chunk3)
    buckets, chunk3, chunk = update_buckets_and_chunk(buckets, chunk3, chunk)

    current_window += 1
    break if current_window >= size

    chunk[0] = input[current_window]
    file_size += 1
  end

  [buckets, checksum, file_size]
end
update_buckets_and_chunk(buckets, chunk3, chunk) click to toggle source
# File lib/tlsh/buckets.rb, line 73
def update_buckets_and_chunk(buckets, chunk3, chunk)
  chunk3[2] = chunk[2]
  buckets[DigestHash.pearson_hash(SALT[0], chunk3)] += 1

  chunk3[2] = chunk[3]
  buckets[DigestHash.pearson_hash(SALT[1], chunk3)] += 1

  chunk3[1] = chunk[2]
  buckets[DigestHash.pearson_hash(SALT[2], chunk3)] += 1

  chunk3[2] = chunk[4]
  buckets[DigestHash.pearson_hash(SALT[3], chunk3)] += 1

  chunk3[1] = chunk[1]
  buckets[DigestHash.pearson_hash(SALT[4], chunk3)] += 1

  chunk3[1] = chunk[3]
  buckets[DigestHash.pearson_hash(SALT[5], chunk3)] += 1

  chunk[1..-1] = chunk[0..3].dup

  [buckets, chunk3, chunk]
end