Longest unique substring algorithm
The tricky part is adjusting where the new substrings should start once I found a repeated character. There are 2 possible scenarios:
a b c _a_ b c d
the new begin is string[1]
, nothing happens to the chars
a b c _b_ a d e f g
the new begin is string[3]
, and I also have to delete from chars
everything up to ‘b’.
def longest_unique_substring(string): if len(string) == 0: return '' chars = [] chars.append(string[0]) beg = end = 0 # current min/max of string we are processing beg_top = end_top = 0 # longest min/max we have found so far for x in range(1, len(string)): cur_char = string[x] if cur_char not in chars: # grow the substring chars.append(cur_char) end = x + 1 if end - beg > end_top - beg_top: # we have a new longest subchar, save it to beg_top/end_top end_top = end beg_top = beg else: # found a dupe char - move up until we have no dupes again while string[beg] != cur_char: chars.remove(string[beg]) beg += 1 beg += 1 # don't forget to move this up! # beg and end now contain a unique string, continue for loop return string[beg_top : end_top] def test_function(mstr, expected): print "Testing %s, expected %s" % (mstr, expected) m_res = longest_unique_substring(mstr) print " -- got %s" % m_res assert(m_res == expected) test_function('au', 'au') test_function('abcaccd', 'abc') test_function('aab', 'ab') test_function('abcabcd', 'abcd') test_function('abcaabcd', 'abcd') test_function('abcabc', 'abc') test_function('abcbadefgha', 'cbadefgh') test_function('caabcABca', 'abcAB')