Difference between revisions of "Phone number"
JohnHamilton (talk | contribs) |
m |
||
(8 intermediate revisions by 6 users not shown) | |||
Line 1: | Line 1: | ||
+ | == Original Program == |
||
+ | |||
+ | By Paul Johnson |
||
+ | |||
This program was written after I read "An Empirical Comparison of Seven Programming Languages" at http://www.cis.udel.edu/~silber/470STUFF/article.pdf. |
This program was written after I read "An Empirical Comparison of Seven Programming Languages" at http://www.cis.udel.edu/~silber/470STUFF/article.pdf. |
||
Line 4: | Line 8: | ||
First, the PhoneWord module. This contains the meat of the program. |
First, the PhoneWord module. This contains the meat of the program. |
||
+ | |||
+ | <haskell> |
||
+ | module PhoneWord where |
||
+ | import Data.Array |
||
− | module PhoneWord where |
||
+ | import Data.Char |
||
+ | import Data.List |
||
− | import Data.Array |
||
− | import Data.Char |
||
− | import Data.List |
||
+ | -- The number encoding, as specified in the problem statement. |
||
+ | numberPairs :: [(Int, String)] |
||
+ | numberPairs = [(0, "e"), (1, "jnq"), (2, "rwx"), (3, "dsy"), (4, "ft"), |
||
+ | (5, "am"), (6, "civ"), (7, "bku"), (8, "lop"), (9, "ghz")] |
||
− | -- The number encoding, as specified in the problem statement. |
||
− | numberPairs :: [(Int, String)] |
||
− | numberPairs = [(0, "e"), (1, "jnq"), (2, "rwx"), (3, "dsy"), (4, "ft"), |
||
− | (5, "am"), (6, "civ"), (7, "bku"), (8, "lop"), (9, "ghz")] |
||
+ | -- Encode a character by looking it up in the inverse of the numberPairs |
||
+ | -- table. Creating the inverse takes three lines of code (including the |
||
+ | -- 'where'), but we can live with that. The alternative would be to invert |
||
+ | -- the list manually and then have a longer list of pairs above. |
||
+ | encode :: Char -> Int |
||
+ | encode c = codeTable ! (toLower c) |
||
+ | where |
||
+ | codeTable = array ('a', 'z') $ concatMap pairList numberPairs |
||
+ | pairList (n, cs) = map (\c -> (c, n)) cs |
||
+ | -- The dictionary is stored in a denary tree. A node has sub-trees, a leaf |
||
− | -- Encode a character by looking it up in the inverse of the numberPairs |
||
− | + | -- does not. At each node or leaf is the list of strings that encode to the |
|
+ | -- number of that location. |
||
− | -- 'where'), but we can live with that. The alternative would be to invert |
||
+ | data DenaryTree = |
||
− | -- the list manually and then have a longer list of pairs above. |
||
+ | Node {strings :: [String], subTrees :: Array Int DenaryTree} | |
||
− | encode :: Char -> Int |
||
+ | Leaf {strings :: [String]} |
||
− | encode c = codeTable ! (toLower c) |
||
+ | deriving (Eq, Show) |
||
− | where |
||
− | codeTable = array ('a', 'z') $ concatMap pairList numberPairs |
||
− | pairList (n, cs) = map (\c -> (c, n)) cs |
||
+ | -- Empty and unit trees. |
||
− | -- The dictionary is stored in a denary tree. A node has sub-trees, a leaf |
||
+ | emptyTree :: DenaryTree |
||
− | -- does not. At each node or leaf is the list of strings that encode to the |
||
+ | emptyTree = Leaf [] |
||
− | -- number of that location. |
||
− | data DenaryTree = |
||
− | Node {strings :: [String], subTrees :: Array Int DenaryTree} | |
||
− | Leaf {strings :: [String]} |
||
− | deriving (Eq, Show) |
||
+ | unitTree :: String -> DenaryTree |
||
− | -- Empty and unit trees. |
||
+ | unitTree str = Leaf [str] |
||
− | emptyTree :: DenaryTree |
||
− | emptyTree = Leaf [] |
||
− | unitTree :: String -> DenaryTree |
||
− | unitTree str = Leaf [str] |
||
+ | -- Given an existing tree, create a new tree to be its parent. Put the |
||
+ | -- existing tree at position 'n' in the new parent and put 'strs' in it. |
||
+ | newNode :: [String] -> Int -> DenaryTree -> DenaryTree |
||
+ | newNode strs n subtree = Node strs $ array (0,9) $ |
||
+ | (n, subtree) : [(i, emptyTree) | i <- [0..9], i /= n] |
||
+ | |||
− | + | -- Add a new word to a tree, returning a new tree. |
|
+ | addWord :: String -> DenaryTree -> DenaryTree |
||
− | -- existing tree at position 'n' in the new parent and put 'strs' in it. |
||
− | newNode :: [String] -> Int -> DenaryTree -> DenaryTree |
||
− | newNode strs n subtree = Node strs $ array (0,9) $ |
||
− | (n, subtree) : [(i, emptyTree) | i <- [0..9], i /= n] |
||
+ | -- The worker function is addWord1. addWord just wraps it up |
||
+ | -- in a more convenient form. |
||
+ | addWord newString tree = addWord1 numbers tree |
||
+ | where |
||
+ | numbers = map encode newString |
||
+ | addWord1 [] (Leaf oldStrs) = Leaf (newString : oldStrs) |
||
+ | addWord1 [] (Node oldStrs arr) = Node (newString : oldStrs) arr |
||
+ | addWord1 (n:ns) (Leaf oldStrs) = newNode oldStrs n $ newBranch ns |
||
+ | addWord1 (n:ns) (Node oldStrs arr) = |
||
+ | Node oldStrs $ arr // [(n, addWord1 ns $ arr ! n)] |
||
+ | newBranch [] = unitTree newString |
||
+ | newBranch (n:ns) = newNode [] n $ newBranch ns |
||
− | -- Add a new word to a tree, returning a new tree. |
||
− | addWord :: String -> DenaryTree -> DenaryTree |
||
+ | -- Convert a list of words into a NumberTree |
||
− | -- The worker function is addWord1. addWord just wraps it up |
||
+ | wordTree :: [String] -> DenaryTree |
||
− | -- in a more convenient form. |
||
+ | wordTree = foldr addWord emptyTree |
||
− | addWord newString tree = addWord1 numbers tree |
||
− | where |
||
− | numbers = map encode newString |
||
− | addWord1 [] (Leaf oldStrs) = Leaf (newString : oldStrs) |
||
− | addWord1 [] (Node oldStrs arr) = Node (newString : oldStrs) arr |
||
− | addWord1 (n:ns) (Leaf oldStrs) = newNode oldStrs n $ newBranch ns |
||
− | addWord1 (n:ns) (Node oldStrs arr) = |
||
− | Node oldStrs $ arr // [(n, addWord1 ns $ arr ! n)] |
||
− | newBranch [] = unitTree newString |
||
− | newBranch (n:ns) = newNode [] n $ newBranch ns |
||
− | + | -- Find the list of words which match a prefix of the number by descending |
|
+ | -- the tree until you either run out of tree or digits, and accumulating |
||
− | wordTree :: [String] -> DenaryTree |
||
+ | -- the words as you go. |
||
− | wordTree = foldr addWord emptyTree |
||
+ | findNumber :: DenaryTree -> String -> [String] |
||
+ | findNumber tree [] = strings tree |
||
+ | findNumber (Node strs arr) (c:cs) = |
||
− | -- Find the list of words which match a prefix of the number by descending |
||
+ | strs ++ findNumber (arr ! (digitToInt c)) cs |
||
− | -- the tree until you either run out of tree or digits, and accumulating |
||
− | -- the words as you go. |
||
− | findNumber :: DenaryTree -> String -> [String] |
||
− | + | findNumber (Leaf strs) _ = strs |
|
− | findNumber (Node strs arr) (c:cs) = |
||
− | strs ++ findNumber (arr ! (digitToInt c)) cs |
||
− | |||
− | findNumber (Leaf strs) _ = strs |
||
− | |||
− | |||
− | -- Find the list of solutions for a number. This is rendered slightly |
||
− | -- messy by the fact that one digit can be inserted if no other progress |
||
− | -- is possible. So if "Foo Bar" is a legal solution then "Foo 7 Ar" is not. |
||
− | mnemonics :: DenaryTree -> String -> [String] |
||
− | mnemonics tree numbers = map tail $ mnemonics1 True numbers |
||
− | where |
||
− | mnemonics1 _ "" = return " " |
||
− | mnemonics1 digitOK numbers = |
||
− | case findNumber tree numbers of |
||
− | [] -> |
||
− | if digitOK then do |
||
− | nextBit <- mnemonics1 False (tail numbers) |
||
− | return $ ' ' : head numbers : nextBit |
||
− | else |
||
− | [] |
||
− | ls -> do |
||
− | item <- ls |
||
− | nextBit <- mnemonics1 True $ drop (length item) numbers |
||
− | return $ ' ' : item ++ nextBit |
||
+ | -- Find the list of solutions for a number. This is rendered slightly |
||
+ | -- messy by the fact that one digit can be inserted if no other progress |
||
+ | -- is possible. So if "Foo Bar" is a legal solution then "Foo 7 Ar" is not. |
||
+ | mnemonics :: DenaryTree -> String -> [String] |
||
+ | mnemonics tree numbers = map tail $ mnemonics1 True numbers |
||
+ | where |
||
+ | mnemonics1 _ "" = return " " |
||
+ | mnemonics1 digitOK numbers = |
||
+ | case findNumber tree numbers of |
||
+ | [] -> |
||
+ | if digitOK then do |
||
+ | nextBit <- mnemonics1 False (tail numbers) |
||
+ | return $ ' ' : head numbers : nextBit |
||
+ | else |
||
+ | [] |
||
+ | ls -> do |
||
+ | item <- ls |
||
+ | nextBit <- mnemonics1 True $ drop (length item) numbers |
||
+ | return $ ' ' : item ++ nextBit |
||
+ | </haskell> |
||
+ | |||
Now the Main module with the IO in it: |
Now the Main module with the IO in it: |
||
+ | <haskell> |
||
− | module Main where |
||
+ | module Main where |
||
− | + | import Data.Char |
|
− | + | import System.Environment |
|
− | + | import System.IO |
|
− | + | import PhoneWord |
|
− | + | -- File containing the words. This is the standard Unix dictionary. |
|
− | + | dictFile = "/usr/share/dict/words" |
|
− | + | -- Read the words file. Return only those "words" that consist entirely |
|
− | + | -- of letters and are at least three letters long. |
|
− | + | dictWords :: IO [String] |
|
− | + | dictWords = |
|
− | + | do |
|
− | + | text <- readFile dictFile |
|
− | + | return $ filter allowed $ lines text |
|
− | + | where |
|
− | + | allowed wrd = |
|
− | + | all isAlpha wrd && |
|
− | + | length wrd >= 4 |
|
− | + | -- Read the numbers file. |
|
− | + | numbers :: String -> IO [String] |
|
− | + | numbers fileName = do |
|
− | + | text <- readFile fileName |
|
− | + | return $ map (filter isDigit) $ lines text |
|
− | + | -- The Main Function executed when the program runs. |
|
− | + | main :: IO () |
|
− | + | main = |
|
− | + | do |
|
− | + | args <- getArgs |
|
− | + | nums <- numbers $ head args |
|
− | + | dict <- dictWords |
|
− | + | let |
|
− | + | tree = wordTree dict |
|
− | + | results = map (\n -> (n, mnemonics tree n)) nums |
|
− | + | sequence_ $ concatMap printResult results |
|
− | + | where |
|
− | + | printResult (num, strings) = |
|
− | + | map (\str -> putStrLn $ num ++ ": " ++ str) strings |
|
+ | </haskell> |
||
Finally, a little module to generate a random test file: |
Finally, a little module to generate a random test file: |
||
+ | <haskell> |
||
− | module MakeTest where |
||
+ | module MakeTest where |
||
− | + | import System.Random |
|
− | + | import Control.Monad |
|
+ | import Test.QuickCheck |
||
− | + | alphabet = ['0'..'9'] |
|
− | + | number :: Gen String |
|
− | + | number = do |
|
− | + | len <- elements [0..10] |
|
− | + | replicateM len $ elements alphabet |
|
− | + | numbers :: Gen [String] |
|
− | + | numbers = replicateM 1000 number |
|
− | + | makeTest :: IO () |
|
− | + | makeTest = do |
|
− | + | rnd <- getStdGen |
|
− | + | writeFile "testData" $ unlines $ generate 1 rnd numbers |
|
+ | </haskell> |
||
+ | [[Category:Code]] |
||
== Shorter solution == |
== Shorter solution == |
||
Line 193: | Line 205: | ||
ps = zip ["e", "jnq", "rwx", "dsy", "ft", "am", "civ", "bku", "lop", "ghz"] |
ps = zip ["e", "jnq", "rwx", "dsy", "ft", "am", "civ", "bku", "lop", "ghz"] |
||
['0'..'9'] |
['0'..'9'] |
||
+ | </haskell> |
||
+ | <haskell> |
||
translate wordMap _ "" = [""] |
translate wordMap _ "" = [""] |
||
translate wordMap digit xs@(x:xs') = |
translate wordMap digit xs@(x:xs') = |
||
Line 215: | Line 229: | ||
numbers <- readFile input |
numbers <- readFile input |
||
mapM_ putStrLn $ lines numbers >>= process wordMap |
mapM_ putStrLn $ lines numbers >>= process wordMap |
||
+ | </haskell> |
||
+ | |||
+ | ---- |
||
+ | |||
+ | 27 lines of code. Nice work. You have handily beaten both Common Lisp and Scheme. |
||
+ | |||
+ | I took 88 lines, of which 22 implemented a denary tree rather than using Data.Map. It's some time since I wrote the program, and I can't remember now why I did this. Some of the other 66 were module declarations and repeated imports (for two modules), and type declarations for all top-level functions. Finally there was the fact that I was using /usr/dict/words because I didn't have the original data set, and this required a certain amount of extra filtering. But even allowing for all that I think your solution is indeed shorter. [[User:PaulJohnson|PaulJohnson]] 20:05, 24 April 2006 (UTC) |
||
+ | |||
+ | == Another solution == |
||
+ | |||
+ | By Colin Horne |
||
+ | |||
+ | I wrote my solution without first seeing Paul's; however they turn out to be very similar. It took about 2.75 hours to get a (almost!) working solution, and another 15 minutes, or so, to tidy it up (I have most probably not added sufficient comments, however). |
||
+ | |||
+ | Just before posting this, I realised that I had a subtle bug, in that I ignored the following part of the specification: "If and only if at a particular point no word at all from |
||
+ | the dictionary can be inserted, a single digit from the phone number can |
||
+ | be copied to the encoding instead." |
||
+ | |||
+ | It took me about an extra hour to find why my program was not producing the correct output, and how to fix it. The resulting total development time is about 4 hours, which is slightly less than the average Lisp/Scheme time. |
||
+ | |||
+ | <haskell> |
||
+ | module Main where |
||
+ | |||
+ | import Data.Array |
||
+ | import System (getArgs) |
||
+ | import Data.Char (digitToInt, intToDigit, toLower, isLetter) |
||
+ | import Data.Foldable (foldl') |
||
+ | import Control.Monad (forM_) |
||
+ | |||
+ | translationsList = [ |
||
+ | (0, "e"), |
||
+ | (1, "jnq"), |
||
+ | (2, "rwx"), |
||
+ | (3, "dsy"), |
||
+ | (4, "ft"), |
||
+ | (5, "am"), |
||
+ | (6, "civ"), |
||
+ | (7, "bku"), |
||
+ | (8, "lop"), |
||
+ | (9, "ghz")] |
||
+ | |||
+ | |||
+ | {--- Lookup functions for converting from/to digits ---} |
||
+ | |||
+ | numToChars :: Int -> String |
||
+ | numToChars = (!) arr |
||
+ | where arr = array (0,9) translationsList |
||
+ | |||
+ | charToNum :: Char -> Int |
||
+ | charToNum = (!) arr |
||
+ | where |
||
+ | arr = array ('a','z') convertAll |
||
+ | convert :: (Int, String) -> [(Char, Int)] |
||
+ | convert (i,s) = map (\c -> (c, i)) s |
||
+ | convertAll = concatMap convert translationsList |
||
+ | |||
+ | |||
+ | |||
+ | {--- The Trie data structure ---} |
||
+ | |||
+ | data Trie = Node [String] (Array Int Trie) | Empty |
||
+ | |||
+ | -- Add a node to the trie |
||
+ | insert node str = insert' str node $ map toLower $ filter isLetter str |
||
+ | |||
+ | insert' :: String -> Trie -> String -> Trie |
||
+ | insert' str (Node strs ts) [] = Node (str:strs) ts |
||
+ | insert' str (Node strs ts) (c:cs) = |
||
+ | Node strs $! (ts // [(num, updatedNode)]) |
||
+ | where |
||
+ | updatedNode = insert' str (ts ! num) cs |
||
+ | num = charToNum c |
||
+ | insert' str Empty cs = insert' str (Node [] $ array (0,9) $ zip [0..9] $ repeat Empty) cs |
||
+ | |||
+ | readDict :: FilePath -> IO Trie |
||
+ | readDict file = fmap (trie . lines) $ readFile file |
||
+ | |||
+ | trie :: [String] -> Trie |
||
+ | trie strs = foldl' insert Empty strs |
||
+ | |||
+ | |||
+ | {--- Other functions ---} |
||
+ | |||
+ | |||
+ | -- Given a trie and a number, find the representations for each of |
||
+ | -- the number's prefixes, returning each result as a tuple containing |
||
+ | -- the prefix's representation, and its postfix |
||
+ | getAllStrings :: Trie -> [Int] -> [([String], [Int])] |
||
+ | getAllStrings Empty _ = [] |
||
+ | getAllStrings (Node strs _) [] = [(strs, [])] |
||
+ | getAllStrings (Node strs ts) xxs@(x:xs) = (getAllStrings (ts ! x) xs) ++ [(strs, xxs)] |
||
+ | |||
+ | |||
+ | -- Lists all string-representations of the given number |
||
+ | -- No further processing of this function's output is required |
||
+ | numToStrings :: Trie -> [Int] -> [String] |
||
+ | numToStrings trie num = map (dropWhile (== ' ')) $ loop [] True num |
||
+ | where |
||
+ | loop :: String -> Bool -> [Int] -> [String] |
||
+ | loop prefix _ [] = [prefix] |
||
+ | loop prefix allowDigit xxs@(x:xs) = case result of |
||
+ | [] -> if allowDigit && noPartialMatch |
||
+ | then loop (prefix++" "++[intToDigit x]) False xs |
||
+ | else [] |
||
+ | xs -> xs |
||
+ | where |
||
+ | noPartialMatch = flip all allStrings $ (== []) . fst |
||
+ | allStrings = getAllStrings trie xxs |
||
+ | result = flip concatMap allStrings $ |
||
+ | \(strs,rest) -> |
||
+ | flip concatMap strs $ \str -> |
||
+ | loop (prefix++" "++str) True rest |
||
+ | |||
+ | |||
+ | |||
+ | -- Reads a number from the given string, |
||
+ | -- returning a list of its digits |
||
+ | readNumber :: String -> [Int] |
||
+ | readNumber [] = [] |
||
+ | readNumber (c:cs) |
||
+ | | c >= '0' && c <= '9' = digitToInt c : rest |
||
+ | | otherwise = rest |
||
+ | where rest = readNumber cs |
||
+ | |||
+ | main = do |
||
+ | (dictFile:numbersFile:[]) <- getArgs |
||
+ | dict <- readDict dictFile |
||
+ | numbers <- fmap lines $ readFile numbersFile |
||
+ | forM_ numbers $ \num' -> do |
||
+ | let num = readNumber num' |
||
+ | let strs = numToStrings dict num |
||
+ | forM_ strs $ \str -> |
||
+ | putStrLn $ num' ++ ": " ++ str |
||
</haskell> |
</haskell> |
Latest revision as of 05:55, 21 February 2010
Original Program
By Paul Johnson
This program was written after I read "An Empirical Comparison of Seven Programming Languages" at http://www.cis.udel.edu/~silber/470STUFF/article.pdf.
To run this program, copy and paste each code block into a file with the appropriate name (so module Main goes into Main.hs) and compile. Depending on your OS you may need to modify the dictionary file name. You may also have to run it with "+RTS -k16000000" on the command line in order to increase the stack space. It depends on your dictionary.
First, the PhoneWord module. This contains the meat of the program.
module PhoneWord where
import Data.Array
import Data.Char
import Data.List
-- The number encoding, as specified in the problem statement.
numberPairs :: [(Int, String)]
numberPairs = [(0, "e"), (1, "jnq"), (2, "rwx"), (3, "dsy"), (4, "ft"),
(5, "am"), (6, "civ"), (7, "bku"), (8, "lop"), (9, "ghz")]
-- Encode a character by looking it up in the inverse of the numberPairs
-- table. Creating the inverse takes three lines of code (including the
-- 'where'), but we can live with that. The alternative would be to invert
-- the list manually and then have a longer list of pairs above.
encode :: Char -> Int
encode c = codeTable ! (toLower c)
where
codeTable = array ('a', 'z') $ concatMap pairList numberPairs
pairList (n, cs) = map (\c -> (c, n)) cs
-- The dictionary is stored in a denary tree. A node has sub-trees, a leaf
-- does not. At each node or leaf is the list of strings that encode to the
-- number of that location.
data DenaryTree =
Node {strings :: [String], subTrees :: Array Int DenaryTree} |
Leaf {strings :: [String]}
deriving (Eq, Show)
-- Empty and unit trees.
emptyTree :: DenaryTree
emptyTree = Leaf []
unitTree :: String -> DenaryTree
unitTree str = Leaf [str]
-- Given an existing tree, create a new tree to be its parent. Put the
-- existing tree at position 'n' in the new parent and put 'strs' in it.
newNode :: [String] -> Int -> DenaryTree -> DenaryTree
newNode strs n subtree = Node strs $ array (0,9) $
(n, subtree) : [(i, emptyTree) | i <- [0..9], i /= n]
-- Add a new word to a tree, returning a new tree.
addWord :: String -> DenaryTree -> DenaryTree
-- The worker function is addWord1. addWord just wraps it up
-- in a more convenient form.
addWord newString tree = addWord1 numbers tree
where
numbers = map encode newString
addWord1 [] (Leaf oldStrs) = Leaf (newString : oldStrs)
addWord1 [] (Node oldStrs arr) = Node (newString : oldStrs) arr
addWord1 (n:ns) (Leaf oldStrs) = newNode oldStrs n $ newBranch ns
addWord1 (n:ns) (Node oldStrs arr) =
Node oldStrs $ arr // [(n, addWord1 ns $ arr ! n)]
newBranch [] = unitTree newString
newBranch (n:ns) = newNode [] n $ newBranch ns
-- Convert a list of words into a NumberTree
wordTree :: [String] -> DenaryTree
wordTree = foldr addWord emptyTree
-- Find the list of words which match a prefix of the number by descending
-- the tree until you either run out of tree or digits, and accumulating
-- the words as you go.
findNumber :: DenaryTree -> String -> [String]
findNumber tree [] = strings tree
findNumber (Node strs arr) (c:cs) =
strs ++ findNumber (arr ! (digitToInt c)) cs
findNumber (Leaf strs) _ = strs
-- Find the list of solutions for a number. This is rendered slightly
-- messy by the fact that one digit can be inserted if no other progress
-- is possible. So if "Foo Bar" is a legal solution then "Foo 7 Ar" is not.
mnemonics :: DenaryTree -> String -> [String]
mnemonics tree numbers = map tail $ mnemonics1 True numbers
where
mnemonics1 _ "" = return " "
mnemonics1 digitOK numbers =
case findNumber tree numbers of
[] ->
if digitOK then do
nextBit <- mnemonics1 False (tail numbers)
return $ ' ' : head numbers : nextBit
else
[]
ls -> do
item <- ls
nextBit <- mnemonics1 True $ drop (length item) numbers
return $ ' ' : item ++ nextBit
Now the Main module with the IO in it:
module Main where
import Data.Char
import System.Environment
import System.IO
import PhoneWord
-- File containing the words. This is the standard Unix dictionary.
dictFile = "/usr/share/dict/words"
-- Read the words file. Return only those "words" that consist entirely
-- of letters and are at least three letters long.
dictWords :: IO [String]
dictWords =
do
text <- readFile dictFile
return $ filter allowed $ lines text
where
allowed wrd =
all isAlpha wrd &&
length wrd >= 4
-- Read the numbers file.
numbers :: String -> IO [String]
numbers fileName = do
text <- readFile fileName
return $ map (filter isDigit) $ lines text
-- The Main Function executed when the program runs.
main :: IO ()
main =
do
args <- getArgs
nums <- numbers $ head args
dict <- dictWords
let
tree = wordTree dict
results = map (\n -> (n, mnemonics tree n)) nums
sequence_ $ concatMap printResult results
where
printResult (num, strings) =
map (\str -> putStrLn $ num ++ ": " ++ str) strings
Finally, a little module to generate a random test file:
module MakeTest where
import System.Random
import Control.Monad
import Test.QuickCheck
alphabet = ['0'..'9']
number :: Gen String
number = do
len <- elements [0..10]
replicateM len $ elements alphabet
numbers :: Gen [String]
numbers = replicateM 1000 number
makeTest :: IO ()
makeTest = do
rnd <- getStdGen
writeFile "testData" $ unlines $ generate 1 rnd numbers
Shorter solution
By John Hamilton.
I heard about this problem from Peter Norvig's page Lisp as an Alternative to Java. (You should also check out http://www.flownet.com/ron/papers/lisp-java/ for more info.) I've been learning Haskell, and to see how it would compare with Lisp, I recently wrote the following program.
import Data.Char
import Data.List
import Data.Map (fromListWith, findWithDefault)
import System.Environment
encodeWord = map f . filter (/= '"') where
f x = head $ [d | (s, d) <- ps, (toLower x) `elem` s]
ps = zip ["e", "jnq", "rwx", "dsy", "ft", "am", "civ", "bku", "lop", "ghz"]
['0'..'9']
translate wordMap _ "" = [""]
translate wordMap digit xs@(x:xs') =
if all null ys && digit
then combine [[x]] (translate wordMap False xs')
else concat $ zipWith combine ys zs
where
ys = [findWithDefault [] s wordMap | s <- (tail . inits) xs]
zs = [translate wordMap True s | s <- tails xs']
combine [] _ = []
combine ys [""] = ys
combine ys zs = [y ++ " " ++ z | y <- ys, z <- zs]
process wordMap n = [n ++ ": " ++ x | x <- xs] where
xs = translate wordMap True $ filter (`notElem` "-/") n
main = do
[dictionary, input] <- getArgs
words <- readFile dictionary
let wordMap = fromListWith (++) [(encodeWord w, [w]) | w <- lines words]
numbers <- readFile input
mapM_ putStrLn $ lines numbers >>= process wordMap
27 lines of code. Nice work. You have handily beaten both Common Lisp and Scheme.
I took 88 lines, of which 22 implemented a denary tree rather than using Data.Map. It's some time since I wrote the program, and I can't remember now why I did this. Some of the other 66 were module declarations and repeated imports (for two modules), and type declarations for all top-level functions. Finally there was the fact that I was using /usr/dict/words because I didn't have the original data set, and this required a certain amount of extra filtering. But even allowing for all that I think your solution is indeed shorter. PaulJohnson 20:05, 24 April 2006 (UTC)
Another solution
By Colin Horne
I wrote my solution without first seeing Paul's; however they turn out to be very similar. It took about 2.75 hours to get a (almost!) working solution, and another 15 minutes, or so, to tidy it up (I have most probably not added sufficient comments, however).
Just before posting this, I realised that I had a subtle bug, in that I ignored the following part of the specification: "If and only if at a particular point no word at all from the dictionary can be inserted, a single digit from the phone number can be copied to the encoding instead."
It took me about an extra hour to find why my program was not producing the correct output, and how to fix it. The resulting total development time is about 4 hours, which is slightly less than the average Lisp/Scheme time.
module Main where
import Data.Array
import System (getArgs)
import Data.Char (digitToInt, intToDigit, toLower, isLetter)
import Data.Foldable (foldl')
import Control.Monad (forM_)
translationsList = [
(0, "e"),
(1, "jnq"),
(2, "rwx"),
(3, "dsy"),
(4, "ft"),
(5, "am"),
(6, "civ"),
(7, "bku"),
(8, "lop"),
(9, "ghz")]
{--- Lookup functions for converting from/to digits ---}
numToChars :: Int -> String
numToChars = (!) arr
where arr = array (0,9) translationsList
charToNum :: Char -> Int
charToNum = (!) arr
where
arr = array ('a','z') convertAll
convert :: (Int, String) -> [(Char, Int)]
convert (i,s) = map (\c -> (c, i)) s
convertAll = concatMap convert translationsList
{--- The Trie data structure ---}
data Trie = Node [String] (Array Int Trie) | Empty
-- Add a node to the trie
insert node str = insert' str node $ map toLower $ filter isLetter str
insert' :: String -> Trie -> String -> Trie
insert' str (Node strs ts) [] = Node (str:strs) ts
insert' str (Node strs ts) (c:cs) =
Node strs $! (ts // [(num, updatedNode)])
where
updatedNode = insert' str (ts ! num) cs
num = charToNum c
insert' str Empty cs = insert' str (Node [] $ array (0,9) $ zip [0..9] $ repeat Empty) cs
readDict :: FilePath -> IO Trie
readDict file = fmap (trie . lines) $ readFile file
trie :: [String] -> Trie
trie strs = foldl' insert Empty strs
{--- Other functions ---}
-- Given a trie and a number, find the representations for each of
-- the number's prefixes, returning each result as a tuple containing
-- the prefix's representation, and its postfix
getAllStrings :: Trie -> [Int] -> [([String], [Int])]
getAllStrings Empty _ = []
getAllStrings (Node strs _) [] = [(strs, [])]
getAllStrings (Node strs ts) xxs@(x:xs) = (getAllStrings (ts ! x) xs) ++ [(strs, xxs)]
-- Lists all string-representations of the given number
-- No further processing of this function's output is required
numToStrings :: Trie -> [Int] -> [String]
numToStrings trie num = map (dropWhile (== ' ')) $ loop [] True num
where
loop :: String -> Bool -> [Int] -> [String]
loop prefix _ [] = [prefix]
loop prefix allowDigit xxs@(x:xs) = case result of
[] -> if allowDigit && noPartialMatch
then loop (prefix++" "++[intToDigit x]) False xs
else []
xs -> xs
where
noPartialMatch = flip all allStrings $ (== []) . fst
allStrings = getAllStrings trie xxs
result = flip concatMap allStrings $
\(strs,rest) ->
flip concatMap strs $ \str ->
loop (prefix++" "++str) True rest
-- Reads a number from the given string,
-- returning a list of its digits
readNumber :: String -> [Int]
readNumber [] = []
readNumber (c:cs)
| c >= '0' && c <= '9' = digitToInt c : rest
| otherwise = rest
where rest = readNumber cs
main = do
(dictFile:numbersFile:[]) <- getArgs
dict <- readDict dictFile
numbers <- fmap lines $ readFile numbersFile
forM_ numbers $ \num' -> do
let num = readNumber num'
let strs = numToStrings dict num
forM_ strs $ \str ->
putStrLn $ num' ++ ": " ++ str