Implementing Python dictionaries with Hash Tables
This implementation uses open addressing to handle collision resolution, and two lists for pairing up keys
and values
, so that self.slots[x] -> self.data[x]
.
class HashTable: def __init__(self): self.size = 11 self.slots = [None] * self.size self.data = [None] * self.size self.length = 0 def hashString(self, string, tsize): sum = 0 for pos, val in enumerate(string): sum += (pos+1)*ord(val) return sum % tsize def reHash(self, old_hash, size): return (old_hash+1) % size def put(self, key, data): hash_value = self.hashString(key, len(self.slots)) # insert new key if self.slots[hash_value] == None: self.slots[hash_value] = key self.data[hash_value] = data self.length += 1 else: # replace data if self.slots[hash_value] == key: self.data[hash_value] = data else: # rehash until it finds an empty slot or the same key next_slot = self.reHash(hash_value, len(self.slots)) while self.slots[next_slot] != None and \ self.slots[next_slot] != key: next_slot = self.reHash(next_slot, len(self.slots)) # got an empty slot, use it if self.slots[next_slot] == None: self.slots[next_slot] = key self.data[next_slot] = data self.length += 1 # got the same key, replace data else: self.data[next_slot] = data def get(self, key): starts_slot = self.hashString(key, len(self.slots)) data = None position = starts_slot # loop through all the hashes and rehashes while self.slots[position] != None: # got it if self.slots[position] == key: data = self.data[position] break # rehash; if you complete a cycle, return (key not found) else: position = self.reHash(position, len(self.slots)) if position == starts_slot: break return data def delItem(self, key): # basically the same as get() starts_slot = self.hashString(key, len(self.slots)) position = starts_slot while self.slots[position] != None: if self.slots[position] == key: self.slots[position] = None self.data[position] = None self.length -= 1 break else: position = self.reHash(position, len(self.slots)) if position == starts_slot: break def __contains__(self, key): if self.get(key) is not None: return True else: return False def __len__(self): return self.length def __repr__(self): v = [(self.slots[pos], self.data[pos]) for pos, val in enumerate(self.slots) if val is not None] return str(v) def __getitem__(self, key): return self.get(key) def __setitem__(self, key, data): self.put(key, data) def __delitem__(self, key): self.delItem(key) h = HashTable() print len(h) h['milan'] = 'italy' h['nyc'] = 'usa' h['omg'] = '!!!' print h print len(h) del h['omg'] print h print len(h) print 'nyc' in h