class CBOR::Packer
Public Class Methods
from_item(item)
click to toggle source
# File lib/cbor-packed.rb, line 12 def self.from_item(item) count = Hash.new(0) item.cbor_visit do |o| (count[o] += 1) == 1 # if the count gets > 1, we can stop visiting, so we return false in the block end # pp count # count is now a Hash with all data items as keys and the number of times they occur as values # choose those matches that are occurring > 1, make first rough estimate of saving good_count = count.select {|k, v| v > 1}.map {|k, v| [k, v, l = k.to_cbor.length, (v-1)*(l-1)]} # good_count is now an array of [k, v, length, savings] tuples # select those that potentially have savings (> 0) and sort by best saving first better_count = good_count.to_a.select {|a| a[3] > 0}.sort_by {|a| -a[3]} # pp better_count # now: take the best out???; re-visit that reducing by n; re-sort and filter??? # sort by descending number of references we'll get -- the higher reference counts go first match_array = better_count.sort_by {|a| -a[1]}.map {|a| a[0]} # pp match_array # XXX the below needs to be done with arrays and (hard!) maps as well # do this on the reverse to find common suffixes # select all strings (ignoring reference counts) and sort them strings = count.select {|k, v| String === k}.map(&:first).sort if strings != [] string_common = strings[1..-1].zip(strings).map{ |y, x| l = x.chars.zip(y.chars).take_while{|a, b| a == b}.length # should be bytes [x, l] } << [strings[-1], 0] # string_common: list of strings/counts of number of /bytes/ matching with next # pp string_common end translate = {} prefixes = [] if string_common prefix_stack = [[0, false]] # sentinel pos = 0 # mirror prefix_stack[-1][0] tag_no = REF_TAG string_common.each do |s, l| if l > pos + 2 + $compression_hack if t = prefix_stack[-1][1] # if we still have a prefix left prefixes << CBOR::Tagged.new(t, s[pos...l]) else prefixes << s[0...l] end prefix_stack << [l, tag_no] pos = l tag_no += 1 tag_no = 225 if tag_no == REF_TAG+1 tag_no = 28704 if tag_no == 256 end if t = prefix_stack[-1][1] # if we still have a viable prefix left translate[s] = CBOR::Tagged.new(t, s[pos..-1]) end # pop the prefix stack while l < pos prefix_stack.pop pos = prefix_stack[-1][0] end # pp prefix_stack # pp pos end end # pp translate # XXX test replacing match_array here match_array = match_array.map do |v| if r = translate[v] # puts "*** replacing #{v.inspect} by #{r.inspect}" r else v end end # pp [:PREFIXES, prefixes] # pp translate new(match_array, prefixes, [], translate) end
new(match_array, prefix_array, suffix_array, translate)
click to toggle source
# File lib/cbor-packed.rb, line 93 def initialize(match_array, prefix_array, suffix_array, translate) @hit = translate # XXX: make sure we don't overwrite the existing prefix compression values! # (this should really be done downwards, ...) 16 x 1, 160 x 2, (512-48) x 3 match_array[0...16].each_with_index do |o, i| @hit[o] = CBOR::Simple.new(i) end # if m = match_array[16...128] # m.each_with_index do |o, i| # @hit[o] = CBOR::Simple.new(i + 128) # end # end if m = match_array[16..-1] m.each_with_index do |o, i| @hit[o] = CBOR::Tagged.new(REF_TAG, (i >> 1) ^ -(i & 1)) end end # add one round of transitive matching @hit.each do |k, v| if r = @hit[v] @hit[k] = r end end # p @hit @match_array = match_array # @prefix = {} -- do that later @prefix_array = prefix_array @suffix_array = suffix_array end
Public Instance Methods
has(o)
click to toggle source
# File lib/cbor-packed.rb, line 122 def has(o) @hit[o] end
pack(pa)
click to toggle source
# File lib/cbor-packed.rb, line 125 def pack(pa) # Don't forget to pack the match_array! CBOR::Tagged.new(PACKED_TAG, [@match_array, @prefix_array, @suffix_array, pa]) end