June Meeting 2007
From DojoWiki
The June meeting is scheduled for Thursday, June 28th.
The presentation will be on Dave Thomas' BloomFilter Kata
A key part of the Bloom Filter is a Hash Function. You can use a built in function, or write your own.. I'll post some samples in preparation as we won't really spend time covering this in the Dojo.
I'm going to present in Java... But I just hacked up a quick C# version (A Java version is below)
Here is a homework assignment I found that describes the BloomFilter concepts in much more detail http://www.arl.wustl.edu/projects/fpx/cse535/mp/mp2/mp2.html
using System;
using System.Collections.Generic;
using System.Text;
using System.Collections;
namespace BloomFilters
{
class Program
{
static void Main(string[] args)
{
BitArray bitArray = new BitArray(26);
int j = 0;
foreach (bool i in bitArray)
{
Console.WriteLine("BitArray[{0}] == [{1}]", j++, i);
}
List<string> dict = new List<string>();
#region add words
dict.Add("There");
dict.Add("are");
dict.Add("many");
dict.Add("circumstances");
dict.Add("where");
dict.Add("we");
dict.Add("need");
dict.Add("to");
dict.Add("find");
dict.Add("out");
dict.Add("if");
dict.Add("something");
dict.Add("is");
dict.Add("a");
dict.Add("member");
dict.Add("of");
dict.Add("set");
dict.Add("and");
dict.Add("algorithms");
dict.Add("for");
#endregion
foreach (string word in dict)
{
bitArray.Set(new Hasher().GetHash(word), true) ;
}
j = 0;
foreach (bool i in bitArray)
{
Console.WriteLine("BitArray[{0}] == [{1}]", j++, i);
}
Console.ReadLine();
}
}
class Hasher
{
public int GetHash(string value)
{
int hashValue = 0;
foreach (char ch in value.ToCharArray())
{
hashValue += ch;
}
return hashValue % 26;
}
}
}
And here are the Java files (with JUnit tests) BloomFilter.java
package org.pghcodingdojo.bloomfilter;
public class BloomFilter {
private boolean[] boolArray = new boolean[26] ;
public void Add(int position) {
getBoolArray()[position] = true ;
}
public boolean[] getBoolArray() {
return boolArray;
}
}
Hash.java
package org.pghcodingdojo.bloomfilter;
public class Hash {
public int fromString(String value) {
long sum = 0 ;
for (char ch : value.toCharArray()) {
sum += ch ;
}
return (int)sum % 26 ;
}
}
TestBloomFilter.java
package org.pghcodingdojo.bloomfilter.tests;
import junit.framework.TestCase;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.pghcodingdojo.bloomfilter.BloomFilter;
import org.pghcodingdojo.bloomfilter.Hash;
public class TestBloomFilter extends TestCase {
@Before
public void setUp() throws Exception {
}
@After
public void tearDown() throws Exception {
}
@Test
public void testCreateBloomFilter() {
BloomFilter filter = new BloomFilter() ;
assertNotNull(filter) ;
}
@Test
public void testPopulateBloomFilter() {
BloomFilter filter = new BloomFilter() ;
Hash hash = new Hash() ;
filter.Add(hash.fromString("A")) ;
assertTrue(filter.getBoolArray()[13] );
}
@Test
public void testAAAInFilter() {
BloomFilter filter = new BloomFilter() ;
Hash hash = new Hash() ;
filter.Add(hash.fromString("AAA")) ;
assertTrue(filter.getBoolArray()[13] );
}
public void testTwoInOne() {
BloomFilter filter = new BloomFilter() ;
Hash hash = new Hash() ;
filter.Add(hash.fromString("A")) ;
filter.Add(hash.fromString("AAA")) ;
assertTrue(filter.getBoolArray()[13] );
filter.Add(hash.fromString("AAB")) ;
assertTrue(filter.getBoolArray()[14] );
}
public void testNotFound() {
BloomFilter filter = new BloomFilter() ;
Hash hash = new Hash() ;
filter.Add(hash.fromString("AAB")) ;
assertTrue(filter.getBoolArray()[14] );
}
public void testFoundButNotAdded() {
BloomFilter filter = new BloomFilter() ;
Hash hash = new Hash() ;
filter.Add(hash.fromString("A")) ;
assertTrue(filter.getBoolArray()[hash.fromString("AAA")] );
}
}
