Implementing Python dictionaries with Hash Tables
This implementation uses open addressing to handle collision resolution, and two lists for pairing up keys and values, so that self.slots[x] -> self.data[x].
class HashTable:
def __init__(self):
self.size = 11
self.slots = [None] * self.size
self.data = [None] * self.size
self.length = 0
def hashString(self, string, tsize):
sum = 0
for pos, val in enumerate(string):
sum += (pos+1)*ord(val)
return sum % tsize
def reHash(self, old_hash, size):
return (old_hash+1) % size
def put(self, key, data):
hash_value = self.hashString(key, len(self.slots))
# insert new key
if self.slots[hash_value] == None:
self.slots[hash_value] = key
self.data[hash_value] = data
self.length += 1
else:
# replace data
if self.slots[hash_value] == key:
self.data[hash_value] = data
else:
# rehash until it finds an empty slot or the same key
next_slot = self.reHash(hash_value, len(self.slots))
while self.slots[next_slot] != None and \
self.slots[next_slot] != key:
next_slot = self.reHash(next_slot, len(self.slots))
# got an empty slot, use it
if self.slots[next_slot] == None:
self.slots[next_slot] = key
self.data[next_slot] = data
self.length += 1
# got the same key, replace data
else:
self.data[next_slot] = data
def get(self, key):
starts_slot = self.hashString(key, len(self.slots))
data = None
position = starts_slot
# loop through all the hashes and rehashes
while self.slots[position] != None:
# got it
if self.slots[position] == key:
data = self.data[position]
break
# rehash; if you complete a cycle, return (key not found)
else:
position = self.reHash(position, len(self.slots))
if position == starts_slot:
break
return data
def delItem(self, key):
# basically the same as get()
starts_slot = self.hashString(key, len(self.slots))
position = starts_slot
while self.slots[position] != None:
if self.slots[position] == key:
self.slots[position] = None
self.data[position] = None
self.length -= 1
break
else:
position = self.reHash(position, len(self.slots))
if position == starts_slot:
break
def __contains__(self, key):
if self.get(key) is not None:
return True
else:
return False
def __len__(self):
return self.length
def __repr__(self):
v = [(self.slots[pos], self.data[pos]) for pos, val in enumerate(self.slots) if val is not None]
return str(v)
def __getitem__(self, key):
return self.get(key)
def __setitem__(self, key, data):
self.put(key, data)
def __delitem__(self, key):
self.delItem(key)
h = HashTable()
print len(h)
h['milan'] = 'italy'
h['nyc'] = 'usa'
h['omg'] = '!!!'
print h
print len(h)
del h['omg']
print h
print len(h)
print 'nyc' in h