Longest unique substring algorithm
The tricky part is adjusting where the new substrings should start once I found a repeated character. There are 2 possible scenarios:
a b c _a_ b c d
the new begin is string[1], nothing happens to the chars
a b c _b_ a d e f g
the new begin is string[3], and I also have to delete from chars everything up to ‘b’.
def longest_unique_substring(string):
if len(string) == 0:
return ''
chars = []
chars.append(string[0])
beg = end = 0 # current min/max of string we are processing
beg_top = end_top = 0 # longest min/max we have found so far
for x in range(1, len(string)):
cur_char = string[x]
if cur_char not in chars:
# grow the substring
chars.append(cur_char)
end = x + 1
if end - beg > end_top - beg_top:
# we have a new longest subchar, save it to beg_top/end_top
end_top = end
beg_top = beg
else:
# found a dupe char - move up until we have no dupes again
while string[beg] != cur_char:
chars.remove(string[beg])
beg += 1
beg += 1 # don't forget to move this up!
# beg and end now contain a unique string, continue for loop
return string[beg_top : end_top]
def test_function(mstr, expected):
print "Testing %s, expected %s" % (mstr, expected)
m_res = longest_unique_substring(mstr)
print " -- got %s" % m_res
assert(m_res == expected)
test_function('au', 'au')
test_function('abcaccd', 'abc')
test_function('aab', 'ab')
test_function('abcabcd', 'abcd')
test_function('abcaabcd', 'abcd')
test_function('abcabc', 'abc')
test_function('abcbadefgha', 'cbadefgh')
test_function('caabcABca', 'abcAB')