Implementing dictionaries with Hash Tables and separate chaining

27.04.2015

Collision resolution can handled either with open addressing or chaining. For the tradeoffs, see the wikipedia entry.
TL;DR: open addressing is faster as long as the load factor is < 80%, for obvious reasons. Also from wikipedia: chained hash tables remain effective even when the number of table entries n is much higher than the number of slots. For example, a chained hash table with 1000 slots and 10,000 stored keys (load factor 10) is five to ten times slower than a 10,000-slot table (load factor 1); but still 1000 times faster than a plain sequential list.

Here’s a quick’n’dirty implementation of chaining; I used a list of tuples (key, val) instead of writing a linked list from scratch cause I’m lazy.

class HT(object):
    def __init__(self):
        self.size = 5
        self.bucket = [ None for i in range(1, self.size+1) ]
        self.data = [ [] for i in range(1, self.size+1) ]

    def comphash(self, key):
        return key % self.size

    def insert(self, key, value):
        hash_value = self.comphash(key)
        if self.bucket[hash_value] is None:
            self.bucket[hash_value] = key
        self.data[hash_value].append((key, value))

    def get(self, key):
        hash_value = self.comphash(key)
        if self.bucket[hash_value] is None:
            return None
        else:
            val = [ v for (k, v) in self.data[hash_value] if k == key ]
            if len(val) == 0:
                return None
            return val[0]

    def delete(self, key):
        hash_value = self.comphash(key)
        if self.bucket[hash_value] is None:
            return None
        else:
            for (k, v) in self.data[hash_value]:
                if k == key:
                    self.data[hash_value].remove((k, v))
                    break
            if len(self.data[hash_value]) == 0:
                self.bucket[hash_value] = None

    def __getitem__(self, key):
        return self.get(key)
    
    def __setitem__(self, key, value):
        return self.insert(key, value)

    def __delitem__(self, key):
        return self.delete(key)


d = HT()
d[1] = 'one'
d[11] = 'eleven'
d[3] = 'three'

print d.bucket
print d.data

del d[11]
print d.bucket
print d.data
print d[11]