Edit distance: Difference between revisions
No edit summary |
No edit summary |
||
Line 32: | Line 32: | ||
else if lab > 0 then lowers !! (lab - 1) | else if lab > 0 then lowers !! (lab - 1) | ||
else{- < 0 -} uppers !! (-1 - lab)) | else{- < 0 -} uppers !! (-1 - lab)) | ||
where mainDiag = oneDiag | where mainDiag = oneDiag a b (head uppers) (-1 : head lowers) | ||
uppers = eachDiag a b (mainDiag : uppers) -- upper diagonals | uppers = eachDiag a b (mainDiag : uppers) -- upper diagonals | ||
lowers = eachDiag b a (mainDiag : lowers) -- lower diagonals | lowers = eachDiag b a (mainDiag : lowers) -- lower diagonals | ||
Line 46: | Line 46: | ||
thisdiag = firstelt : doDiag a b firstelt diagAbove (tail diagBelow) | thisdiag = firstelt : doDiag a b firstelt diagAbove (tail diagBelow) | ||
lab = length a - length b | lab = length a - length b | ||
min3 x y z = if x < y then x else min y z | min3 x y z = if x < y then x else min y z | ||
</haskell> | </haskell> |
Revision as of 10:42, 24 April 2007
The edit-distance is the minimum number of (delete/insert/modify) operations, required to edit one given string a into another given string b.
A simple "dynamic programming" algorithm to compute the distance is as follow:
editDistance :: Eq a => [a] -> [a] -> Int
editDistance xs ys = table ! (m,n)
where
(m,n) = (length xs, length ys)
x = array (1,m) (zip [1..] xs)
y = array (1,n) (zip [1..] ys)
table :: Array (Int,Int) Int
table = array bnds [(ij, dist ij) | ij <- range bnds]
bnds = ((0,0),(m,n))
dist (0,j) = j
dist (i,0) = i
dist (i,j) = minimum [table ! (i-1,j) + 1, table ! (i,j-1) + 1,
if x ! i == y ! j then table ! (i-1,j-1) else table ! (i-1,j-1)]
Here is an haskell implementation of http://www.csse.monash.edu.au/~lloyd/tildeStrings/Alignment/92.IPL.html
It computes the edit distance between two lists in O(length a + dist a b)
dist :: Eq a => [a] -> [a] -> Int
dist a b
= last (if lab == 0 then mainDiag
else if lab > 0 then lowers !! (lab - 1)
else{- < 0 -} uppers !! (-1 - lab))
where mainDiag = oneDiag a b (head uppers) (-1 : head lowers)
uppers = eachDiag a b (mainDiag : uppers) -- upper diagonals
lowers = eachDiag b a (mainDiag : lowers) -- lower diagonals
eachDiag a [] diags = []
eachDiag a (bch:bs) (lastDiag:diags) = oneDiag a bs nextDiag lastDiag : eachDiag a bs diags
where nextDiag = head (tail diags)
oneDiag a b diagAbove diagBelow = thisdiag
where doDiag [] b nw n w = []
doDiag a [] nw n w = []
doDiag (ach:as) (bch:bs) nw n w = me : (doDiag as bs me (tail n) (tail w))
where me = if ach == bch then nw else 1 + min3 (head w) nw (head n)
firstelt = 1 + head diagBelow
thisdiag = firstelt : doDiag a b firstelt diagAbove (tail diagBelow)
lab = length a - length b
min3 x y z = if x < y then x else min y z