CodeKata – Kata Five Solution

Kata five requires the implementation of a Bloom filter. Here is my solution in Ruby using a non-optimized bitmap implementation and a reduced MD5 digest for the hashing functions.

class BloomFilter
  
  def initialize(dictionary, num_bits, num_hashes)
    @bitmap = Bitmap.new(num_bits)
    @hashes = Array.new(num_hashes) { |i| BloomHash.new(i, num_bits-1) }
    
    # compute the hash values for each word in the dictionary
    # and set the appropriate bits in the bitmap
    File.new(dictionary, 'r').each_line do |word|
      @hashes.each { |h| @bitmap[h.compute(word.chomp)] = 1 }
    end    
  end
  
  def search(word)
    @hashes.each { |h| return false if @bitmap[h.compute(word)] == 0 }
    return true
  end

end


class BloomHash

  def initialize(seed, max_value)
    @offset = seed
    @num_digits = 0
    @max_value = max_value
    while(max_value > 0) do
      @num_digits += 1
      max_value >>= 4
    end
  end
  
  def compute(word)
    digest = Digest::MD5.hexdigest(word)
    return digest[@offset, @num_digits].to_i(16) % @max_value
  end
  
end


class Bitmap < Array
  
  def initialize(size)
    super(size)
    self.fill(0)
  end
  
end

Tags: ,

Comments are closed.