Difference between revisions of "Phone number"

From HaskellWiki
Jump to navigation Jump to search
(Added my name to my program, and bracketed code with haskell tags.)
Line 1: Line 1:
  +
== Original Program ==
  +
  +
By Paul Johnson
  +
 
This program was written after I read "An Empirical Comparison of Seven Programming Languages" at http://www.cis.udel.edu/~silber/470STUFF/article.pdf.
 
This program was written after I read "An Empirical Comparison of Seven Programming Languages" at http://www.cis.udel.edu/~silber/470STUFF/article.pdf.
   
Line 4: Line 8:
   
 
First, the PhoneWord module. This contains the meat of the program.
 
First, the PhoneWord module. This contains the meat of the program.
  +
  +
<haskell>
  +
module PhoneWord where
 
 
  +
import Data.Array
module PhoneWord where
 
  +
import Data.Char
  +
import Data.List
 
 
import Data.Array
 
import Data.Char
 
import Data.List
 
 
 
  +
-- The number encoding, as specified in the problem statement.
  +
numberPairs :: [(Int, String)]
  +
numberPairs = [(0, "e"), (1, "jnq"), (2, "rwx"), (3, "dsy"), (4, "ft"),
  +
(5, "am"), (6, "civ"), (7, "bku"), (8, "lop"), (9, "ghz")]
 
 
-- The number encoding, as specified in the problem statement.
 
numberPairs :: [(Int, String)]
 
numberPairs = [(0, "e"), (1, "jnq"), (2, "rwx"), (3, "dsy"), (4, "ft"),
 
(5, "am"), (6, "civ"), (7, "bku"), (8, "lop"), (9, "ghz")]
 
 
 
  +
-- Encode a character by looking it up in the inverse of the numberPairs
  +
-- table. Creating the inverse takes three lines of code (including the
  +
-- 'where'), but we can live with that. The alternative would be to invert
  +
-- the list manually and then have a longer list of pairs above.
  +
encode :: Char -> Int
  +
encode c = codeTable ! (toLower c)
  +
where
  +
codeTable = array ('a', 'z') $ concatMap pairList numberPairs
  +
pairList (n, cs) = map (\c -> (c, n)) cs
 
 
  +
-- The dictionary is stored in a denary tree. A node has sub-trees, a leaf
-- Encode a character by looking it up in the inverse of the numberPairs
 
-- table. Creating the inverse takes three lines of code (including the
+
-- does not. At each node or leaf is the list of strings that encode to the
  +
-- number of that location.
-- 'where'), but we can live with that. The alternative would be to invert
 
  +
data DenaryTree =
-- the list manually and then have a longer list of pairs above.
 
  +
Node {strings :: [String], subTrees :: Array Int DenaryTree} |
encode :: Char -> Int
 
  +
Leaf {strings :: [String]}
encode c = codeTable ! (toLower c)
 
  +
deriving (Eq, Show)
where
 
codeTable = array ('a', 'z') $ concatMap pairList numberPairs
 
pairList (n, cs) = map (\c -> (c, n)) cs
 
 
 
  +
-- Empty and unit trees.
-- The dictionary is stored in a denary tree. A node has sub-trees, a leaf
 
  +
emptyTree :: DenaryTree
-- does not. At each node or leaf is the list of strings that encode to the
 
  +
emptyTree = Leaf []
-- number of that location.
 
data DenaryTree =
 
Node {strings :: [String], subTrees :: Array Int DenaryTree} |
 
Leaf {strings :: [String]}
 
deriving (Eq, Show)
 
 
 
  +
unitTree :: String -> DenaryTree
-- Empty and unit trees.
 
  +
unitTree str = Leaf [str]
emptyTree :: DenaryTree
 
emptyTree = Leaf []
 
 
 
unitTree :: String -> DenaryTree
 
unitTree str = Leaf [str]
 
 
 
  +
-- Given an existing tree, create a new tree to be its parent. Put the
  +
-- existing tree at position 'n' in the new parent and put 'strs' in it.
  +
newNode :: [String] -> Int -> DenaryTree -> DenaryTree
  +
newNode strs n subtree = Node strs $ array (0,9) $
  +
(n, subtree) : [(i, emptyTree) | i <- [0..9], i /= n]
  +
 
 
-- Given an existing tree, create a new tree to be its parent. Put the
+
-- Add a new word to a tree, returning a new tree.
  +
addWord :: String -> DenaryTree -> DenaryTree
-- existing tree at position 'n' in the new parent and put 'strs' in it.
 
newNode :: [String] -> Int -> DenaryTree -> DenaryTree
 
newNode strs n subtree = Node strs $ array (0,9) $
 
(n, subtree) : [(i, emptyTree) | i <- [0..9], i /= n]
 
 
 
  +
-- The worker function is addWord1. addWord just wraps it up
  +
-- in a more convenient form.
  +
addWord newString tree = addWord1 numbers tree
  +
where
  +
numbers = map encode newString
  +
addWord1 [] (Leaf oldStrs) = Leaf (newString : oldStrs)
  +
addWord1 [] (Node oldStrs arr) = Node (newString : oldStrs) arr
  +
addWord1 (n:ns) (Leaf oldStrs) = newNode oldStrs n $ newBranch ns
  +
addWord1 (n:ns) (Node oldStrs arr) =
  +
Node oldStrs $ arr // [(n, addWord1 ns $ arr ! n)]
  +
newBranch [] = unitTree newString
  +
newBranch (n:ns) = newNode [] n $ newBranch ns
 
 
-- Add a new word to a tree, returning a new tree.
 
addWord :: String -> DenaryTree -> DenaryTree
 
 
 
  +
-- Convert a list of words into a NumberTree
-- The worker function is addWord1. addWord just wraps it up
 
  +
wordTree :: [String] -> DenaryTree
-- in a more convenient form.
 
  +
wordTree = foldr addWord emptyTree
addWord newString tree = addWord1 numbers tree
 
where
 
numbers = map encode newString
 
addWord1 [] (Leaf oldStrs) = Leaf (newString : oldStrs)
 
addWord1 [] (Node oldStrs arr) = Node (newString : oldStrs) arr
 
addWord1 (n:ns) (Leaf oldStrs) = newNode oldStrs n $ newBranch ns
 
addWord1 (n:ns) (Node oldStrs arr) =
 
Node oldStrs $ arr // [(n, addWord1 ns $ arr ! n)]
 
newBranch [] = unitTree newString
 
newBranch (n:ns) = newNode [] n $ newBranch ns
 
 
 
 
 
-- Convert a list of words into a NumberTree
+
-- Find the list of words which match a prefix of the number by descending
  +
-- the tree until you either run out of tree or digits, and accumulating
wordTree :: [String] -> DenaryTree
 
  +
-- the words as you go.
wordTree = foldr addWord emptyTree
 
  +
findNumber :: DenaryTree -> String -> [String]
 
 
  +
findNumber tree [] = strings tree
 
 
  +
findNumber (Node strs arr) (c:cs) =
-- Find the list of words which match a prefix of the number by descending
 
  +
strs ++ findNumber (arr ! (digitToInt c)) cs
-- the tree until you either run out of tree or digits, and accumulating
 
-- the words as you go.
 
findNumber :: DenaryTree -> String -> [String]
 
 
 
findNumber tree [] = strings tree
+
findNumber (Leaf strs) _ = strs
 
 
findNumber (Node strs arr) (c:cs) =
 
strs ++ findNumber (arr ! (digitToInt c)) cs
 
 
findNumber (Leaf strs) _ = strs
 
 
 
-- Find the list of solutions for a number. This is rendered slightly
 
-- messy by the fact that one digit can be inserted if no other progress
 
-- is possible. So if "Foo Bar" is a legal solution then "Foo 7 Ar" is not.
 
mnemonics :: DenaryTree -> String -> [String]
 
mnemonics tree numbers = map tail $ mnemonics1 True numbers
 
where
 
mnemonics1 _ "" = return " "
 
mnemonics1 digitOK numbers =
 
case findNumber tree numbers of
 
[] ->
 
if digitOK then do
 
nextBit <- mnemonics1 False (tail numbers)
 
return $ ' ' : head numbers : nextBit
 
else
 
[]
 
ls -> do
 
item <- ls
 
nextBit <- mnemonics1 True $ drop (length item) numbers
 
return $ ' ' : item ++ nextBit
 
 
 
  +
-- Find the list of solutions for a number. This is rendered slightly
  +
-- messy by the fact that one digit can be inserted if no other progress
  +
-- is possible. So if "Foo Bar" is a legal solution then "Foo 7 Ar" is not.
  +
mnemonics :: DenaryTree -> String -> [String]
  +
mnemonics tree numbers = map tail $ mnemonics1 True numbers
  +
where
  +
mnemonics1 _ "" = return " "
  +
mnemonics1 digitOK numbers =
  +
case findNumber tree numbers of
  +
[] ->
  +
if digitOK then do
  +
nextBit <- mnemonics1 False (tail numbers)
  +
return $ ' ' : head numbers : nextBit
  +
else
  +
[]
  +
ls -> do
  +
item <- ls
  +
nextBit <- mnemonics1 True $ drop (length item) numbers
  +
return $ ' ' : item ++ nextBit
  +
</haskell>
  +
 
Now the Main module with the IO in it:
 
Now the Main module with the IO in it:
   
  +
<haskell>
module Main where
 
  +
module Main where
 
 
import Data.Char
+
import Data.Char
import System.Environment
+
import System.Environment
import System.IO
+
import System.IO
 
 
import PhoneWord
+
import PhoneWord
 
 
 
 
-- File containing the words. This is the standard Unix dictionary.
+
-- File containing the words. This is the standard Unix dictionary.
dictFile = "/usr/share/dict/words"
+
dictFile = "/usr/share/dict/words"
 
 
-- Read the words file. Return only those "words" that consist entirely
+
-- Read the words file. Return only those "words" that consist entirely
-- of letters and are at least three letters long.
+
-- of letters and are at least three letters long.
dictWords :: IO [String]
+
dictWords :: IO [String]
dictWords =
+
dictWords =
do
+
do
text <- readFile dictFile
+
text <- readFile dictFile
return $ filter allowed $ lines text
+
return $ filter allowed $ lines text
where
+
where
allowed wrd =
+
allowed wrd =
(and . (map isAlpha)) wrd &&
+
(and . (map isAlpha)) wrd &&
length wrd >= 4
+
length wrd >= 4
 
 
-- Read the numbers file.
+
-- Read the numbers file.
numbers :: String -> IO [String]
+
numbers :: String -> IO [String]
numbers fileName = do
+
numbers fileName = do
text <- readFile fileName
+
text <- readFile fileName
return $ map (filter isDigit) $ lines text
+
return $ map (filter isDigit) $ lines text
 
 
 
 
-- The Main Function executed when the program runs.
+
-- The Main Function executed when the program runs.
main :: IO ()
+
main :: IO ()
main =
+
main =
do
+
do
args <- getArgs
+
args <- getArgs
nums <- numbers $ head args
+
nums <- numbers $ head args
dict <- dictWords
+
dict <- dictWords
let
+
let
tree = wordTree dict
+
tree = wordTree dict
results = map (\n -> (n, mnemonics tree n)) nums
+
results = map (\n -> (n, mnemonics tree n)) nums
sequence_ $ concatMap printResult results
+
sequence_ $ concatMap printResult results
where
+
where
printResult (num, strings) =
+
printResult (num, strings) =
map (\str -> putStrLn $ num ++ ": " ++ str) strings
+
map (\str -> putStrLn $ num ++ ": " ++ str) strings
  +
</haskell>
 
 
 
Finally, a little module to generate a random test file:
 
Finally, a little module to generate a random test file:
   
  +
<haskell>
module MakeTest where
 
  +
module MakeTest where
 
 
import System.Random
+
import System.Random
import Test.QuickCheck
+
import Test.QuickCheck
 
 
alphabet = ['0'..'9']
+
alphabet = ['0'..'9']
 
 
number :: Gen String
+
number :: Gen String
number = do
+
number = do
len <- elements [0..10]
+
len <- elements [0..10]
sequence $ replicate len $ elements alphabet
+
sequence $ replicate len $ elements alphabet
 
 
numbers :: Gen [String]
+
numbers :: Gen [String]
numbers = sequence $ replicate 1000 number
+
numbers = sequence $ replicate 1000 number
 
 
makeTest :: IO ()
+
makeTest :: IO ()
makeTest = do
+
makeTest = do
rnd <- getStdGen
+
rnd <- getStdGen
writeFile "testData" $ unlines $ generate 1 rnd numbers
+
writeFile "testData" $ unlines $ generate 1 rnd numbers
  +
</haskell>
 
   
 
== Shorter solution ==
 
== Shorter solution ==

Revision as of 18:49, 24 April 2006

Original Program

By Paul Johnson

This program was written after I read "An Empirical Comparison of Seven Programming Languages" at http://www.cis.udel.edu/~silber/470STUFF/article.pdf.

To run this program, copy and paste each code block into a file with the appropriate name (so module Main goes into Main.hs) and compile. Depending on your OS you may need to modify the dictionary file name. You may also have to run it with "+RTS -k16000000" on the command line in order to increase the stack space. It depends on your dictionary.

First, the PhoneWord module. This contains the meat of the program.

 
module PhoneWord where
 
import Data.Array
import Data.Char
import Data.List
 
 
-- The number encoding, as specified in the problem statement.
numberPairs :: [(Int, String)]
numberPairs = [(0, "e"), (1, "jnq"), (2, "rwx"), (3, "dsy"), (4, "ft"),
   (5, "am"), (6, "civ"), (7, "bku"), (8, "lop"), (9, "ghz")]
 
 
-- Encode a character by looking it up in the inverse of the numberPairs
-- table.  Creating the inverse takes three lines of code (including the
-- 'where'), but we can live with that.  The alternative would be to invert
-- the list manually and then have a longer list of pairs above.
encode :: Char -> Int
encode c = codeTable ! (toLower c)
   where
      codeTable = array ('a', 'z') $ concatMap pairList numberPairs
      pairList (n, cs) = map (\c -> (c, n)) cs
 
-- The dictionary is stored in a denary tree.  A node has sub-trees, a leaf
-- does not.  At each node or leaf is the list of strings that encode to the
-- number of that location.
data DenaryTree =
   Node {strings :: [String], subTrees :: Array Int DenaryTree} |
   Leaf {strings :: [String]}
      deriving (Eq, Show)
 
-- Empty and unit trees.
emptyTree :: DenaryTree
emptyTree = Leaf []
 
unitTree :: String -> DenaryTree
unitTree str = Leaf [str]
 
 
-- Given an existing tree, create a new tree to be its parent.  Put the
-- existing tree at position 'n' in the new parent and put 'strs' in it.
newNode :: [String] -> Int -> DenaryTree -> DenaryTree
newNode strs n subtree = Node strs $ array (0,9) $
   (n, subtree) : [(i, emptyTree) | i <- [0..9], i /= n]

 
-- Add a new word to a tree, returning a new tree.
addWord :: String -> DenaryTree -> DenaryTree
 
-- The worker function is addWord1.  addWord just wraps it up
-- in a more convenient form.
addWord newString tree = addWord1 numbers tree
   where
      numbers = map encode newString
      addWord1 [] (Leaf oldStrs) = Leaf (newString : oldStrs)
      addWord1 [] (Node oldStrs arr) = Node (newString : oldStrs) arr
      addWord1 (n:ns) (Leaf oldStrs) = newNode oldStrs n $ newBranch ns
      addWord1 (n:ns) (Node oldStrs arr) =
         Node oldStrs $ arr // [(n, addWord1 ns $ arr ! n)]
      newBranch [] = unitTree newString
      newBranch (n:ns) = newNode [] n $ newBranch ns
 
 
-- Convert a list of words into a NumberTree
wordTree :: [String] -> DenaryTree
wordTree = foldr addWord emptyTree
 
 
-- Find the list of words which match a prefix of the number by descending
-- the tree until you either run out of tree or digits, and accumulating
-- the words as you go.
findNumber :: DenaryTree -> String -> [String]
 
findNumber tree [] = strings tree
 
findNumber (Node strs arr) (c:cs) =
   strs ++ findNumber (arr ! (digitToInt c)) cs
 
findNumber (Leaf strs) _ = strs
 
 
-- Find the list of solutions for a number.  This is rendered slightly
-- messy by the fact that one digit can be inserted if no other progress
-- is possible.  So if "Foo Bar" is a legal solution then "Foo 7 Ar" is not.
mnemonics :: DenaryTree -> String -> [String]
mnemonics tree numbers = map tail $ mnemonics1 True numbers
   where
      mnemonics1 _ "" = return " "
      mnemonics1 digitOK numbers =
         case findNumber tree numbers of
            [] ->
               if digitOK then do
                  nextBit <- mnemonics1 False (tail numbers)
                  return $ ' ' : head numbers : nextBit
               else
                  []
            ls -> do
               item <- ls
               nextBit <- mnemonics1 True $ drop (length item) numbers
               return $ ' ' : item ++ nextBit

Now the Main module with the IO in it:

module Main where
 
import Data.Char
import System.Environment
import System.IO
 
import PhoneWord
 
 
-- File containing the words.  This is the standard Unix dictionary.
dictFile = "/usr/share/dict/words"
 
-- Read the words file.  Return only those "words" that consist entirely
-- of letters and are at least three letters long.
dictWords :: IO [String]
dictWords =
   do
      text <- readFile dictFile
      return $ filter allowed $ lines text
   where
      allowed wrd =
         (and . (map isAlpha)) wrd &&
         length wrd >= 4
 
-- Read the numbers file.
numbers :: String -> IO [String]
numbers fileName = do
   text <- readFile fileName
   return $ map (filter isDigit) $ lines text
 
 
-- The Main Function executed when the program runs.
main :: IO ()
main =
   do
      args <- getArgs
      nums <- numbers $ head args
      dict <- dictWords
      let
         tree = wordTree dict
         results = map (\n -> (n, mnemonics tree n)) nums
      sequence_ $ concatMap printResult results
   where
      printResult (num, strings) =
         map (\str -> putStrLn $ num ++ ": " ++ str) strings

Finally, a little module to generate a random test file:

module MakeTest where
 
import System.Random
import Test.QuickCheck
 
alphabet = ['0'..'9']
 
number :: Gen String
number = do
   len <- elements [0..10]
   sequence $ replicate len $ elements alphabet
 
numbers :: Gen [String]
numbers = sequence $ replicate 1000 number
 
makeTest :: IO ()
makeTest = do
   rnd <- getStdGen
   writeFile "testData" $ unlines $ generate 1 rnd numbers

Shorter solution

By John Hamilton.

I heard about this problem from Peter Norvig's page Lisp as an Alternative to Java. (You should also check out http://www.flownet.com/ron/papers/lisp-java/ for more info.) I've been learning Haskell, and to see how it would compare with Lisp, I recently wrote the following program.

import Data.Char
import Data.List
import Data.Map (fromListWith, findWithDefault)
import System.Environment

encodeWord = map f . filter (/= '"') where
  f x = head $ [d | (s, d) <- ps, (toLower x) `elem` s]
  ps = zip ["e", "jnq", "rwx", "dsy", "ft", "am", "civ", "bku", "lop", "ghz"]
           ['0'..'9']

translate wordMap _ "" = [""]
translate wordMap digit xs@(x:xs') =
  if all null ys && digit
    then combine [[x]] (translate wordMap False xs')
    else concat $ zipWith combine ys zs
  where
    ys = [findWithDefault [] s wordMap | s <- (tail . inits) xs]
    zs = [translate wordMap True s | s <- tails xs']
    combine [] _ = []
    combine ys [""] = ys
    combine ys zs = [y ++ " " ++ z | y <- ys, z <- zs]

process wordMap n = [n ++ ": " ++ x | x <- xs] where
  xs = translate wordMap True $ filter (`notElem` "-/") n

main = do
  [dictionary, input] <- getArgs
  words <- readFile dictionary
  let wordMap = fromListWith (++) [(encodeWord w, [w]) | w <- lines words]
  numbers <- readFile input
  mapM_ putStrLn $ lines numbers >>= process wordMap