Difference between revisions of "Phone number"
PaulJohnson (talk | contribs) |
JohnHamilton (talk | contribs) |
||
Line 175: | Line 175: | ||
rnd <- getStdGen |
rnd <- getStdGen |
||
writeFile "testData" $ unlines $ generate 1 rnd numbers |
writeFile "testData" $ unlines $ generate 1 rnd numbers |
||
+ | |||
+ | |||
+ | == Shorter solution == |
||
+ | |||
+ | By John Hamilton. |
||
+ | |||
+ | I heard about this problem from Peter Norvig's page [http://www.norvig.com/java-lisp.html Lisp as an Alternative to Java]. (You should also check out http://www.flownet.com/ron/papers/lisp-java/ for more info.) I've been learning Haskell, and to see how it would compare with Lisp, I recently wrote the following program. |
||
+ | |||
+ | <haskell> |
||
+ | import Data.Char |
||
+ | import Data.List |
||
+ | import Data.Map (fromListWith, findWithDefault) |
||
+ | import System.Environment |
||
+ | |||
+ | encodeWord = map f . filter (/= '"') where |
||
+ | f x = head $ [d | (s, d) <- ps, (toLower x) `elem` s] |
||
+ | ps = zip ["e", "jnq", "rwx", "dsy", "ft", "am", "civ", "bku", "lop", "ghz"] |
||
+ | ['0'..'9'] |
||
+ | |||
+ | translate wordMap _ "" = [""] |
||
+ | translate wordMap digit xs@(x:xs') = |
||
+ | if all null ys && digit |
||
+ | then combine [[x]] (translate wordMap False xs') |
||
+ | else concat $ zipWith combine ys zs |
||
+ | where |
||
+ | ys = [findWithDefault [] s wordMap | s <- (tail . inits) xs] |
||
+ | zs = [translate wordMap True s | s <- tails xs'] |
||
+ | combine [] _ = [] |
||
+ | combine ys [""] = ys |
||
+ | combine ys zs = [y ++ " " ++ z | y <- ys, z <- zs] |
||
+ | |||
+ | process wordMap n = [n ++ ": " ++ x | x <- xs] where |
||
+ | xs = translate wordMap True $ filter (`notElem` "-/") n |
||
+ | |||
+ | main = do |
||
+ | [dictionary, input] <- getArgs |
||
+ | words <- readFile dictionary |
||
+ | let wordMap = fromListWith (++) [(encodeWord w, [w]) | w <- lines words] |
||
+ | numbers <- readFile input |
||
+ | mapM_ putStrLn $ lines numbers >>= process wordMap |
||
+ | </haskell> |
Revision as of 01:59, 24 April 2006
This program was written after I read "An Empirical Comparison of Seven Programming Languages" at http://www.cis.udel.edu/~silber/470STUFF/article.pdf.
To run this program, copy and paste each code block into a file with the appropriate name (so module Main goes into Main.hs) and compile. Depending on your OS you may need to modify the dictionary file name. You may also have to run it with "+RTS -k16000000" on the command line in order to increase the stack space. It depends on your dictionary.
First, the PhoneWord module. This contains the meat of the program.
module PhoneWord where import Data.Array import Data.Char import Data.List -- The number encoding, as specified in the problem statement. numberPairs :: [(Int, String)] numberPairs = [(0, "e"), (1, "jnq"), (2, "rwx"), (3, "dsy"), (4, "ft"), (5, "am"), (6, "civ"), (7, "bku"), (8, "lop"), (9, "ghz")] -- Encode a character by looking it up in the inverse of the numberPairs -- table. Creating the inverse takes three lines of code (including the -- 'where'), but we can live with that. The alternative would be to invert -- the list manually and then have a longer list of pairs above. encode :: Char -> Int encode c = codeTable ! (toLower c) where codeTable = array ('a', 'z') $ concatMap pairList numberPairs pairList (n, cs) = map (\c -> (c, n)) cs -- The dictionary is stored in a denary tree. A node has sub-trees, a leaf -- does not. At each node or leaf is the list of strings that encode to the -- number of that location. data DenaryTree = Node {strings :: [String], subTrees :: Array Int DenaryTree} | Leaf {strings :: [String]} deriving (Eq, Show) -- Empty and unit trees. emptyTree :: DenaryTree emptyTree = Leaf [] unitTree :: String -> DenaryTree unitTree str = Leaf [str] -- Given an existing tree, create a new tree to be its parent. Put the -- existing tree at position 'n' in the new parent and put 'strs' in it. newNode :: [String] -> Int -> DenaryTree -> DenaryTree newNode strs n subtree = Node strs $ array (0,9) $ (n, subtree) : [(i, emptyTree) | i <- [0..9], i /= n] -- Add a new word to a tree, returning a new tree. addWord :: String -> DenaryTree -> DenaryTree -- The worker function is addWord1. addWord just wraps it up -- in a more convenient form. addWord newString tree = addWord1 numbers tree where numbers = map encode newString addWord1 [] (Leaf oldStrs) = Leaf (newString : oldStrs) addWord1 [] (Node oldStrs arr) = Node (newString : oldStrs) arr addWord1 (n:ns) (Leaf oldStrs) = newNode oldStrs n $ newBranch ns addWord1 (n:ns) (Node oldStrs arr) = Node oldStrs $ arr // [(n, addWord1 ns $ arr ! n)] newBranch [] = unitTree newString newBranch (n:ns) = newNode [] n $ newBranch ns -- Convert a list of words into a NumberTree wordTree :: [String] -> DenaryTree wordTree = foldr addWord emptyTree -- Find the list of words which match a prefix of the number by descending -- the tree until you either run out of tree or digits, and accumulating -- the words as you go. findNumber :: DenaryTree -> String -> [String] findNumber tree [] = strings tree findNumber (Node strs arr) (c:cs) = strs ++ findNumber (arr ! (digitToInt c)) cs findNumber (Leaf strs) _ = strs -- Find the list of solutions for a number. This is rendered slightly -- messy by the fact that one digit can be inserted if no other progress -- is possible. So if "Foo Bar" is a legal solution then "Foo 7 Ar" is not. mnemonics :: DenaryTree -> String -> [String] mnemonics tree numbers = map tail $ mnemonics1 True numbers where mnemonics1 _ "" = return " " mnemonics1 digitOK numbers = case findNumber tree numbers of [] -> if digitOK then do nextBit <- mnemonics1 False (tail numbers) return $ ' ' : head numbers : nextBit else [] ls -> do item <- ls nextBit <- mnemonics1 True $ drop (length item) numbers return $ ' ' : item ++ nextBit
Now the Main module with the IO in it:
module Main where import Data.Char import System.Environment import System.IO import PhoneWord -- File containing the words. This is the standard Unix dictionary. dictFile = "/usr/share/dict/words" -- Read the words file. Return only those "words" that consist entirely -- of letters and are at least three letters long. dictWords :: IO [String] dictWords = do text <- readFile dictFile return $ filter allowed $ lines text where allowed wrd = (and . (map isAlpha)) wrd && length wrd >= 4 -- Read the numbers file. numbers :: String -> IO [String] numbers fileName = do text <- readFile fileName return $ map (filter isDigit) $ lines text -- The Main Function executed when the program runs. main :: IO () main = do args <- getArgs nums <- numbers $ head args dict <- dictWords let tree = wordTree dict results = map (\n -> (n, mnemonics tree n)) nums sequence_ $ concatMap printResult results where printResult (num, strings) = map (\str -> putStrLn $ num ++ ": " ++ str) strings
Finally, a little module to generate a random test file:
module MakeTest where import System.Random import Test.QuickCheck alphabet = ['0'..'9'] number :: Gen String number = do len <- elements [0..10] sequence $ replicate len $ elements alphabet numbers :: Gen [String] numbers = sequence $ replicate 1000 number makeTest :: IO () makeTest = do rnd <- getStdGen writeFile "testData" $ unlines $ generate 1 rnd numbers
Shorter solution
By John Hamilton.
I heard about this problem from Peter Norvig's page Lisp as an Alternative to Java. (You should also check out http://www.flownet.com/ron/papers/lisp-java/ for more info.) I've been learning Haskell, and to see how it would compare with Lisp, I recently wrote the following program.
import Data.Char
import Data.List
import Data.Map (fromListWith, findWithDefault)
import System.Environment
encodeWord = map f . filter (/= '"') where
f x = head $ [d | (s, d) <- ps, (toLower x) `elem` s]
ps = zip ["e", "jnq", "rwx", "dsy", "ft", "am", "civ", "bku", "lop", "ghz"]
['0'..'9']
translate wordMap _ "" = [""]
translate wordMap digit xs@(x:xs') =
if all null ys && digit
then combine [[x]] (translate wordMap False xs')
else concat $ zipWith combine ys zs
where
ys = [findWithDefault [] s wordMap | s <- (tail . inits) xs]
zs = [translate wordMap True s | s <- tails xs']
combine [] _ = []
combine ys [""] = ys
combine ys zs = [y ++ " " ++ z | y <- ys, z <- zs]
process wordMap n = [n ++ ": " ++ x | x <- xs] where
xs = translate wordMap True $ filter (`notElem` "-/") n
main = do
[dictionary, input] <- getArgs
words <- readFile dictionary
let wordMap = fromListWith (++) [(encodeWord w, [w]) | w <- lines words]
numbers <- readFile input
mapM_ putStrLn $ lines numbers >>= process wordMap