Implementing dictionaries with Hash Tables and separate chaining
Collision resolution can handled either with open addressing or chaining. For the tradeoffs, see the wikipedia entry.
TL;DR: open addressing is faster as long as the load factor is < 80%, for obvious reasons.
Also from wikipedia: chained hash tables remain effective even when the number of table entries n is much higher than the number of slots. For example, a chained hash table with 1000 slots and 10,000 stored keys (load factor 10) is five to ten times slower than a 10,000-slot table (load factor 1); but still 1000 times faster than a plain sequential list.
Here’s a quick’n’dirty implementation of chaining; I used a list of tuples (key, val) instead of writing a linked list from scratch cause I’m lazy.
class HT(object):
def __init__(self):
self.size = 5
self.bucket = [ None for i in range(1, self.size+1) ]
self.data = [ [] for i in range(1, self.size+1) ]
def comphash(self, key):
return key % self.size
def insert(self, key, value):
hash_value = self.comphash(key)
if self.bucket[hash_value] is None:
self.bucket[hash_value] = key
self.data[hash_value].append((key, value))
def get(self, key):
hash_value = self.comphash(key)
if self.bucket[hash_value] is None:
return None
else:
val = [ v for (k, v) in self.data[hash_value] if k == key ]
if len(val) == 0:
return None
return val[0]
def delete(self, key):
hash_value = self.comphash(key)
if self.bucket[hash_value] is None:
return None
else:
for (k, v) in self.data[hash_value]:
if k == key:
self.data[hash_value].remove((k, v))
break
if len(self.data[hash_value]) == 0:
self.bucket[hash_value] = None
def __getitem__(self, key):
return self.get(key)
def __setitem__(self, key, value):
return self.insert(key, value)
def __delitem__(self, key):
return self.delete(key)
d = HT()
d[1] = 'one'
d[11] = 'eleven'
d[3] = 'three'
print d.bucket
print d.data
del d[11]
print d.bucket
print d.data
print d[11]