#
# (C) 2005, Rob W. W. Hooft (rob@hooft.net)
#
# Comparison algorithm as implemented by Reinhard Schneider and Chris Sander
# for comparison of protein sequences, but implemented to compare two
# ASCII strings. This can be very useful for command interpreters to account
# for mistyped commands (use the routine "compare(s1, s2)" in here to get
# a score for each possible command, and see if one stands out). The comparison
# makes use of a similarity matrix for letters: in the protein case this is
# normally a chemical functionality similarity, for our case this is a matrix
# based on keys next to each other on a US Qwerty keyboard and on "capital
# letters are similar to their lowercase equivalent"  
#
# The algorithm does not cut corners: it is sure to find the absolute best
# match between the two strings.
#
# No attempt has been made to make this efficient in time or memory. Time taken
# and memory used is proportional to the product of the length of the input
# strings. Use for strings longer than 25 characters is entirely for your own
# risk.
#
# Use freely, but please attribute when using.


# How much does it cost to make a hole in one of the strings?
GAPOPENPENALTY = -0.3
# How much does it cost to elongate a hole in one of the strings?
GAPELONGATIONPENALTY = -0.2
# How much alike (0.0-1.0) are small and capital letters?
CAPITALIZESCORE = 0.8
# How much alike (0.0-1.0) are characters next to each other on a (US) keyboard?
NEXTKEYSCORE = 0.4

comparematrix = {}

def _makekeyboardmap():
    # different characters score 0.0, equal characters score 1.0
    for i in range(33, 126+1):
        for j in range(33, 126+1):
            comparematrix[i,j] = 0.0
        comparematrix[i,i] = 1.0
    # Capital and small letters are CAPITALIZESCORE alike
    capdist = ord('A') - ord('a')
    for i in range(ord('a'), ord('z')+1):
        comparematrix[i, i + capdist] = CAPITALIZESCORE
        comparematrix[i + capdist, i] = CAPITALIZESCORE

    # Keyboard layout, add some score for letters that are close together
    line1='`1234567890-= '
    line2=' qwertyuiop[] '
    line3=' asdfghjkl;   '
    line4=' zxcvbnm,./   '
    for i in range(len(line1)-1):
        _keyboardneighbour(line1[i], line1[i+1])
        _keyboardneighbour(line2[i], line2[i+1])
        _keyboardneighbour(line3[i], line3[i+1])
        _keyboardneighbour(line4[i], line4[i+1])
        _keyboardneighbour(line1[i], line2[i])
        _keyboardneighbour(line2[i], line3[i])
        _keyboardneighbour(line3[i], line4[i])
        _keyboardneighbour(line1[i], line2[i+1])
        _keyboardneighbour(line2[i], line3[i+1])
        _keyboardneighbour(line3[i], line4[i+1])

def _keyboardneighbour(c1, c2):
    i1 = ord(c1)
    i2 = ord(c2)
    if 33 <= i1 <= 126 and 33 <= i2 <= 126:
        comparematrix[i1,i2] = 0.4
        comparematrix[i2,i1] = 0.4

_makekeyboardmap()

def compare(s1, s2):
    lh = {}
    gapped = {}

    l1 = len(s1)
    l2 = len(s2)

    if s1 == s2:
        return l1+1

    # Top left of the matrix is "before the first character" in both directions
    lh[1, 1] = 0.0
    gapped[1, 1] = False

    # Start with a gap in s1
    lh[2, 1] = GAPOPENPENALTY
    gapped[2, 1] = True

    for ii in range(3, l1+2):
        lh[ii, 1] = lh[ii-1, 1] + GAPELONGATIONPENALTY
        gapped[ii, 1] = True

    # Start with a gap in s2
    lh[1, 2] = GAPOPENPENALTY
    gapped[1, 2] = True

    for jj in range(3, l2+2):
        lh[1, jj] = lh[1, jj-1] + GAPELONGATIONPENALTY
        gapped[1, jj] = True

    # The main algorithm: for each point in the matrix decide what the best
    # route so far is, by comparing the diagonal route forward with the
    # possibility to open or elongate a gap either way.
    for jj in range(1, l2+1):
        for ii in range(1, l1+1):
            oc1 = ord(s1[ii-1])
            oc2 = ord(s2[jj-1])
            if 33 <= oc1 <= 126 and 33 <= oc2 <= 126:
                ld = comparematrix[oc1, oc2]
            elif oc1 == oc2:
                ld = 1.0
            else:
                ld = 0.0

            if gapped[ii+1, jj]:
                gph = GAPELONGATIONPENALTY
            else:
                gph = GAPOPENPENALTY

            if gapped[ii, jj+1]:
                gpv = GAPELONGATIONPENALTY
            else:
                gpv = GAPOPENPENALTY

            s = lh[ii, jj] + ld
            sh = lh[ii+1, jj] + gph
            sv = lh[ii, jj+1] + gpv
            sd = max(sh, sv)
            if s >= sd:
                lh[ii+1, jj+1] = s
                gapped[ii+1, jj+1] = False
            else:
                lh[ii+1, jj+1] = sd
                gapped[ii+1, jj+1] = True
    # The highest alignment score is in the bottom right corner of the matrix,
    # behind the last character in both strings
    return lh[l1+1, l2+1]

def test():
    assert compare("test", "test") == len("test") + 1.0
    assert compare("Test", "test") == len("test") - 1.0 + CAPITALIZESCORE
    assert compare("rest", "test") == len("test") - 1.0 + NEXTKEYSCORE
    assert compare("rest", "crest") == len("rest") + GAPOPENPENALTY
    
if __name__ == "__main__":
    test()
    try:
        while True:
            s1 = raw_input("s1: ")
            if not s1:
                break
            s2 = raw_input("s2: ")
            print "Score:", compare(s1, s2)
    except (EOFError, KeyboardInterrupt):
        pass
    
