Edit distance: Difference between revisions
No edit summary |
No edit summary |
||
Line 1: | Line 1: | ||
The edit-distance is the minimum number of (delete/insert/modify) operations, required to edit one given string a into another given string b. | |||
A simple "dynamic programming" algorithm to compute the distance is as follow: | |||
<haskell> | <haskell> | ||
editDistance :: Eq a => [a] -> [a] -> Int | |||
editDistance xs ys = table ! (m,n) | |||
where | |||
(m,n) = (length xs, length ys) | |||
x = array (1,m) (zip [1..] xs) | |||
y = array (1,n) (zip [1..] ys) | |||
table :: Array (Int,Int) Int | |||
table = array bnds [(ij, dist ij) | ij <- range bnds] | |||
bnds = ((0,0),(m,n)) | |||
dist (0,j) = j | |||
dist (i,0) = i | |||
dist (i,j) = minimum [table ! (i-1,j) + 1, table ! (i,j-1) + 1, | |||
if x ! i == y ! j then table ! (i-1,j-1) else table ! (i-1,j-1)] | |||
</haskell> | |||
Here is an haskell implementation of http://www.csse.monash.edu.au/~lloyd/tildeStrings/Alignment/92.IPL.html | |||
It computes the edit distance between two lists in O(length a + dist a b) | |||
<haskell> | |||
dist :: Eq a => [a] -> [a] -> Int | |||
dist a b | dist a b | ||
= last (if lab == 0 then mainDiag | = last (if lab == 0 then mainDiag |
Revision as of 10:37, 24 April 2007
The edit-distance is the minimum number of (delete/insert/modify) operations, required to edit one given string a into another given string b.
A simple "dynamic programming" algorithm to compute the distance is as follow:
editDistance :: Eq a => [a] -> [a] -> Int
editDistance xs ys = table ! (m,n)
where
(m,n) = (length xs, length ys)
x = array (1,m) (zip [1..] xs)
y = array (1,n) (zip [1..] ys)
table :: Array (Int,Int) Int
table = array bnds [(ij, dist ij) | ij <- range bnds]
bnds = ((0,0),(m,n))
dist (0,j) = j
dist (i,0) = i
dist (i,j) = minimum [table ! (i-1,j) + 1, table ! (i,j-1) + 1,
if x ! i == y ! j then table ! (i-1,j-1) else table ! (i-1,j-1)]
Here is an haskell implementation of http://www.csse.monash.edu.au/~lloyd/tildeStrings/Alignment/92.IPL.html
It computes the edit distance between two lists in O(length a + dist a b)
dist :: Eq a => [a] -> [a] -> Int
dist a b
= last (if lab == 0 then mainDiag
else if lab > 0 then lowers !! (lab - 1)
else{- < 0 -} uppers !! (-1 - lab))
where mainDiag = oneDiag anyInt a b (head uppers) (anyInt : head lowers)
uppers = eachDiag a b (mainDiag : uppers) -- upper diagonals
lowers = eachDiag b a (mainDiag : lowers) -- lower diagonals
eachDiag a [] diags = []
eachDiag a (bch:bs) (lastDiag:diags) = oneDiag a bs nextDiag lastDiag : eachDiag a bs diags
where nextDiag = head (tail diags)
oneDiag a b diagAbove diagBelow = thisdiag
where doDiag [] b nw n w = []
doDiag a [] nw n w = []
doDiag (ach:as) (bch:bs) nw n w = me : (doDiag as bs me (tail n) (tail w))
where me = if ach == bch then nw else 1 + min3 (head w) nw (head n)
firstelt = 1 + head diagBelow
thisdiag = firstelt : doDiag a b firstelt diagAbove (tail diagBelow)
lab = length a - length b
anyInt = 0
min3 x y z = if x < y then x else min y z