Implementing dictionaries with Hash Tables and separate chaining
Collision resolution can handled either with open addressing or chaining. For the tradeoffs, see the wikipedia entry.
TL;DR: open addressing is faster as long as the load factor is < 80%, for obvious reasons.
Also from wikipedia: chained hash tables remain effective even when the number of table entries n is much higher than the number of slots. For example, a chained hash table with 1000 slots and 10,000 stored keys (load factor 10) is five to ten times slower than a 10,000-slot table (load factor 1); but still 1000 times faster than a plain sequential list.
Here’s a quick’n’dirty implementation of chaining; I used a list of tuples (key, val)
instead of writing a linked list from scratch cause I’m lazy.
class HT(object): def __init__(self): self.size = 5 self.bucket = [ None for i in range(1, self.size+1) ] self.data = [ [] for i in range(1, self.size+1) ] def comphash(self, key): return key % self.size def insert(self, key, value): hash_value = self.comphash(key) if self.bucket[hash_value] is None: self.bucket[hash_value] = key self.data[hash_value].append((key, value)) def get(self, key): hash_value = self.comphash(key) if self.bucket[hash_value] is None: return None else: val = [ v for (k, v) in self.data[hash_value] if k == key ] if len(val) == 0: return None return val[0] def delete(self, key): hash_value = self.comphash(key) if self.bucket[hash_value] is None: return None else: for (k, v) in self.data[hash_value]: if k == key: self.data[hash_value].remove((k, v)) break if len(self.data[hash_value]) == 0: self.bucket[hash_value] = None def __getitem__(self, key): return self.get(key) def __setitem__(self, key, value): return self.insert(key, value) def __delitem__(self, key): return self.delete(key) d = HT() d[1] = 'one' d[11] = 'eleven' d[3] = 'three' print d.bucket print d.data del d[11] print d.bucket print d.data print d[11]