Haskell Quiz/Bytecode Compiler/Solution Justin Bailey: Difference between revisions
No edit summary |
(Updated to corrrect code) |
||
Line 1: | Line 1: | ||
[[Category:Code]] | [[Category:Code]] | ||
This solution | This solution should work correctly. I was unable to test the byte codes generated, for obvious reasons. However, all test strings from the quiz do evaluate to the correct values. | ||
< | To see the (symbolic) byte codes generated, run <hask>generate_tests</hask>. To see the actual byte codes, run <hask>compile_tests</hask>. To see that the values produced by each expression match those expected, run <hask>eval_tests</hask>. The tests are contained in the variables <hask>test1,test2, ..., test6</hask>, which correspond to the six "test_n" methods fouind in the quiz's test program. | ||
</ | |||
< | |||
The byte codes aren't optimized. For example, SWAP is never used. However, they should produce correct results (even for negative and LCONST/CONST values). | |||
The code below is literate Haskell. | The code below is literate Haskell. | ||
Line 24: | Line 10: | ||
<haskell> | <haskell> | ||
\begin{code} | \begin{code} | ||
import | import Text.ParserCombinators.Parsec hiding (parse) | ||
import qualified Text.ParserCombinators.Parsec as P (parse) | |||
import Text.ParserCombinators.Parsec.Expr | |||
import Data.Bits | |||
data Op = Plus | Minus | Mult | Div | Pow | Mod | -- Represents various operations that can be applied | ||
-- to expressions. | |||
data Op = Plus | Minus | Mult | Div | Pow | Mod | Neg | |||
deriving (Show, Eq) | deriving (Show, Eq) | ||
-- Represents expression we can build - either numbers or expressions | |||
-- connected by operators. | |||
data Expression = Statement Op Expression Expression | data Expression = Statement Op Expression Expression | ||
| Val Integer | | Val Integer | ||
Line 40: | Line 27: | ||
deriving (Show) | deriving (Show) | ||
-- Define the byte codes that can be generated. | |||
data Bytecode = NOOP | CONST Integer | LCONST Integer | data Bytecode = NOOP | CONST Integer | LCONST Integer | ||
| ADD | | ADD | ||
Line 50: | Line 38: | ||
deriving (Show) | deriving (Show) | ||
-- Take | -- Using imported Parsec.Expr library, build a parser for expressions. | ||
expr :: Parser Expression | |||
expr = | |||
buildExpressionParser table factor | |||
<?> "expression" | |||
where | |||
-- Recognizes a factor in an expression | |||
factor = | |||
do{ char '(' | |||
; x <- expr | |||
; char ')' | |||
; return x | |||
} | |||
<|> number | |||
<?> "simple expression" | |||
-- Recognizes a number | |||
number :: Parser Expression | |||
number = do{ ds <- many1 digit | |||
; return (Val (read ds)) | |||
} | |||
<?> "number" | |||
-- Specifies operator, associativity, precendence, and constructor to execute | |||
-- and built AST with. | |||
table = | |||
[[prefix "-" (Statement Mult (Val (-1)))], | |||
[binary "^" (Statement Pow) AssocRight], | |||
[binary "*" (Statement Mult) AssocLeft, binary "/" (Statement Div) AssocLeft, binary "%" (Statement Mod) AssocLeft], | |||
[binary "+" (Statement Plus) AssocLeft, binary "-" (Statement Minus) AssocLeft] | |||
] | |||
where | |||
binary s f assoc | |||
= Infix (do{ string s; return f}) assoc | |||
prefix s f | |||
= Prefix (do{ string s; return f}) | |||
-- Parses a string into an AST, using the parser defined above | |||
parse s = case P.parse expr "" s of | |||
Right ast -> ast | |||
Left e -> error $ show e | |||
-- Take AST and evaluate (mostly for testing) | |||
eval (Val n) = n | eval (Val n) = n | ||
eval (Statement op left right) | eval (Statement op left right) | ||
Line 63: | Line 90: | ||
| op == Mod = eval left `mod` eval right | | op == Mod = eval left `mod` eval right | ||
-- | -- Takes an AST and turns it into a byte code list | ||
generate stmt = generate' stmt [] | |||
where | |||
generate' (Statement op left right) instr = | |||
let | |||
li = generate' left instr | |||
ri = generate' right instr | |||
lri = li ++ ri | |||
in case op of | |||
Plus -> lri ++ [ADD] | |||
Minus -> lri ++ [SUB] | |||
Mult -> lri ++ [MUL] | |||
Div -> lri ++ [DIV] | |||
Mod -> lri ++ [MOD] | |||
Pow -> lri ++ [POW] | |||
generate' (Val n) instr = | |||
if abs(n) > 32768 | |||
then instr ++ [LCONST n] | |||
else instr ++ [CONST n] | |||
-- Takes a statement and converts it into a list of actual bytes to | |||
-- be interpreted | |||
compile s = toBytes (generate $ parse s) | |||
-- Convert a list of byte codes to a list of integer codes. If LCONST or CONST | |||
-- instruction are seen, correct byte representantion is produced | |||
toBytes ((NOOP):xs) = 0 : toBytes xs | |||
toBytes ((CONST n):xs) = 1 : (toConstBytes (fromInteger n)) ++ toBytes xs | |||
toBytes ((LCONST n):xs) = 2 : (toLConstBytes (fromInteger n)) ++ toBytes xs | |||
toBytes ((ADD):xs) = 0x0a : toBytes xs | |||
toBytes ((SUB):xs) = 0x0b : toBytes xs | |||
toBytes ((MUL):xs) = 0x0c : toBytes xs | |||
toBytes ((POW):xs) = 0x0d : toBytes xs | |||
toBytes ((DIV):xs) = 0x0e : toBytes xs | |||
toBytes ((MOD):xs) = 0x0f : toBytes xs | |||
toBytes ((SWAP):xs) = 0x0a : toBytes xs | |||
toBytes [] = [] | |||
-- Convert number to CONST representation (2 element list) | |||
toConstBytes n = toByteList 2 n | |||
toLConstBytes n = toByteList 4 n | |||
-- Convert a number into a list of 8-bit bytes (big-endian/network byte order). | |||
-- Make sure final list is size elements long | |||
toByteList :: Bits Int => Int -> Int -> [Int] | |||
toByteList size n = | |||
if (length bytes) < size | |||
then (replicate (size - (length bytes)) 0) ++ bytes | |||
else bytes | |||
where | |||
bytes = reverse $ toByteList' n | |||
-- for negative, and with signed bit and remove negative. Then continue recursion. | |||
toByteList' 0 = [] | |||
toByteList' a | a < 0 = (a .&. 511) : toByteList' (abs(a) `shiftR` 8) | |||
| otherwise = (a .&. 255) : toByteList' (a `shiftR` 8) | |||
- | -- All tests defined by the quiz, with the associated values they should evaluate to. | ||
test1 = [(2+2, "2+2"), (2-2, "2-2"), (2*2, "2*2"), (2^2, "2^2"), (2 `div` 2, "2/2"), | |||
(2 `mod` 2, "2%2"), (3 `mod` 2, "3%2")] | |||
-- | test2 = [(2+2+2, "2+2+2"), (2-2-2, "2-2-2"), (2*2*2, "2*2*2"), (2^2^2, "2^2^2"), (4 `div` 2 `div` 2, "4/2/2"), | ||
(7`mod`2`mod`1, "7%2%1")] | |||
test3 = [(2+2-2, "2+2-2"), (2-2+2, "2-2+2"), (2*2+2, "2*2+2"), (2^2+2, "2^2+2"), | |||
(4 `div` 2+2, "4/2+2"), (7`mod`2+1, "7%2+1")] | |||
-- | test4 = [(2+(2-2), "2+(2-2)"), (2-(2+2), "2-(2+2)"), (2+(2*2), "2+(2*2)"), (2*(2+2), "2*(2+2)"), | ||
- | (2^(2+2), "2^(2+2)"), (4 `div` (2+2), "4/(2+2)"), (7`mod`(2+1), "7%(2+1)")] | ||
( | |||
-- | test5 = [(-2+(2-2), "-2+(2-2)"), (2-(-2+2), "2-(-2+2)"), (2+(2 * -2), "2+(2*-2)")] | ||
-- | |||
-- | |||
test6 = [((3 `div` 3)+(8-2), "(3/3)+(8-2)"), ((1+3) `div` (2 `div` 2)*(10-8), "(1+3)/(2/2)*(10-8)"), | |||
((1*3)*4*(5*6), "(1*3)*4*(5*6)"), ((10`mod`3)*(2+2), "(10%3)*(2+2)"), (2^(2+(3 `div` 2)^2), "2^(2+(3/2)^2)"), | |||
((10 `div` (2+3)*4), "(10/(2+3)*4)"), (5+((5*4)`mod`(2+1)), "5+((5*4)%(2+1))")] | |||
-- Evaluates the tests and makes sure the expressions match the expected values | |||
eval_tests = map eval_tests [test1, test2, test3, test4, test5, test6] | |||
where | |||
eval_tests ((val, stmt):ts) = | |||
let eval_val = eval $ parse stmt | |||
in | |||
if val == eval_val | |||
then "True" : eval_tests ts | |||
else (stmt ++ " evaluated incorrectly to " ++ show eval_val ++ " instead of " ++ show val) : eval_tests ts | |||
eval_tests [] = [] | |||
-- Takes all the tests and displays symbolic bytes codes for each | |||
generate_tests = map generate_all [test1,test2,test3,test4,test5,test6] | |||
where generate_all ((val, stmt):ts) = generate (parse stmt) : generate_all ts | |||
generate_all [] = [] | |||
-- Takes all tests and generates a list of bytes representing them | |||
compile_tests = map compile_all [test1,test2,test3,test4,test5,test6] | |||
where compile_all ((val, stmt):ts) = compile stmt : compile_all ts | |||
compile_all [] = [] | |||
\end{code} | \end{code} | ||
</haskell> | </haskell> |
Revision as of 23:43, 7 November 2006
This solution should work correctly. I was unable to test the byte codes generated, for obvious reasons. However, all test strings from the quiz do evaluate to the correct values.
To see the (symbolic) byte codes generated, run generate_tests
. To see the actual byte codes, run compile_tests
. To see that the values produced by each expression match those expected, run eval_tests
. The tests are contained in the variables test1,test2, ..., test6
, which correspond to the six "test_n" methods fouind in the quiz's test program.
The byte codes aren't optimized. For example, SWAP is never used. However, they should produce correct results (even for negative and LCONST/CONST values).
The code below is literate Haskell.
\begin{code}
import Text.ParserCombinators.Parsec hiding (parse)
import qualified Text.ParserCombinators.Parsec as P (parse)
import Text.ParserCombinators.Parsec.Expr
import Data.Bits
-- Represents various operations that can be applied
-- to expressions.
data Op = Plus | Minus | Mult | Div | Pow | Mod | Neg
deriving (Show, Eq)
-- Represents expression we can build - either numbers or expressions
-- connected by operators.
data Expression = Statement Op Expression Expression
| Val Integer
| Empty
deriving (Show)
-- Define the byte codes that can be generated.
data Bytecode = NOOP | CONST Integer | LCONST Integer
| ADD
| SUB
| MUL
| POW
| DIV
| MOD
| SWAP
deriving (Show)
-- Using imported Parsec.Expr library, build a parser for expressions.
expr :: Parser Expression
expr =
buildExpressionParser table factor
<?> "expression"
where
-- Recognizes a factor in an expression
factor =
do{ char '('
; x <- expr
; char ')'
; return x
}
<|> number
<?> "simple expression"
-- Recognizes a number
number :: Parser Expression
number = do{ ds <- many1 digit
; return (Val (read ds))
}
<?> "number"
-- Specifies operator, associativity, precendence, and constructor to execute
-- and built AST with.
table =
[[prefix "-" (Statement Mult (Val (-1)))],
[binary "^" (Statement Pow) AssocRight],
[binary "*" (Statement Mult) AssocLeft, binary "/" (Statement Div) AssocLeft, binary "%" (Statement Mod) AssocLeft],
[binary "+" (Statement Plus) AssocLeft, binary "-" (Statement Minus) AssocLeft]
]
where
binary s f assoc
= Infix (do{ string s; return f}) assoc
prefix s f
= Prefix (do{ string s; return f})
-- Parses a string into an AST, using the parser defined above
parse s = case P.parse expr "" s of
Right ast -> ast
Left e -> error $ show e
-- Take AST and evaluate (mostly for testing)
eval (Val n) = n
eval (Statement op left right)
| op == Mult = eval left * eval right
| op == Minus = eval left - eval right
| op == Plus = eval left + eval right
| op == Div = eval left `div` eval right
| op == Pow = eval left ^ eval right
| op == Mod = eval left `mod` eval right
-- Takes an AST and turns it into a byte code list
generate stmt = generate' stmt []
where
generate' (Statement op left right) instr =
let
li = generate' left instr
ri = generate' right instr
lri = li ++ ri
in case op of
Plus -> lri ++ [ADD]
Minus -> lri ++ [SUB]
Mult -> lri ++ [MUL]
Div -> lri ++ [DIV]
Mod -> lri ++ [MOD]
Pow -> lri ++ [POW]
generate' (Val n) instr =
if abs(n) > 32768
then instr ++ [LCONST n]
else instr ++ [CONST n]
-- Takes a statement and converts it into a list of actual bytes to
-- be interpreted
compile s = toBytes (generate $ parse s)
-- Convert a list of byte codes to a list of integer codes. If LCONST or CONST
-- instruction are seen, correct byte representantion is produced
toBytes ((NOOP):xs) = 0 : toBytes xs
toBytes ((CONST n):xs) = 1 : (toConstBytes (fromInteger n)) ++ toBytes xs
toBytes ((LCONST n):xs) = 2 : (toLConstBytes (fromInteger n)) ++ toBytes xs
toBytes ((ADD):xs) = 0x0a : toBytes xs
toBytes ((SUB):xs) = 0x0b : toBytes xs
toBytes ((MUL):xs) = 0x0c : toBytes xs
toBytes ((POW):xs) = 0x0d : toBytes xs
toBytes ((DIV):xs) = 0x0e : toBytes xs
toBytes ((MOD):xs) = 0x0f : toBytes xs
toBytes ((SWAP):xs) = 0x0a : toBytes xs
toBytes [] = []
-- Convert number to CONST representation (2 element list)
toConstBytes n = toByteList 2 n
toLConstBytes n = toByteList 4 n
-- Convert a number into a list of 8-bit bytes (big-endian/network byte order).
-- Make sure final list is size elements long
toByteList :: Bits Int => Int -> Int -> [Int]
toByteList size n =
if (length bytes) < size
then (replicate (size - (length bytes)) 0) ++ bytes
else bytes
where
bytes = reverse $ toByteList' n
-- for negative, and with signed bit and remove negative. Then continue recursion.
toByteList' 0 = []
toByteList' a | a < 0 = (a .&. 511) : toByteList' (abs(a) `shiftR` 8)
| otherwise = (a .&. 255) : toByteList' (a `shiftR` 8)
-- All tests defined by the quiz, with the associated values they should evaluate to.
test1 = [(2+2, "2+2"), (2-2, "2-2"), (2*2, "2*2"), (2^2, "2^2"), (2 `div` 2, "2/2"),
(2 `mod` 2, "2%2"), (3 `mod` 2, "3%2")]
test2 = [(2+2+2, "2+2+2"), (2-2-2, "2-2-2"), (2*2*2, "2*2*2"), (2^2^2, "2^2^2"), (4 `div` 2 `div` 2, "4/2/2"),
(7`mod`2`mod`1, "7%2%1")]
test3 = [(2+2-2, "2+2-2"), (2-2+2, "2-2+2"), (2*2+2, "2*2+2"), (2^2+2, "2^2+2"),
(4 `div` 2+2, "4/2+2"), (7`mod`2+1, "7%2+1")]
test4 = [(2+(2-2), "2+(2-2)"), (2-(2+2), "2-(2+2)"), (2+(2*2), "2+(2*2)"), (2*(2+2), "2*(2+2)"),
(2^(2+2), "2^(2+2)"), (4 `div` (2+2), "4/(2+2)"), (7`mod`(2+1), "7%(2+1)")]
test5 = [(-2+(2-2), "-2+(2-2)"), (2-(-2+2), "2-(-2+2)"), (2+(2 * -2), "2+(2*-2)")]
test6 = [((3 `div` 3)+(8-2), "(3/3)+(8-2)"), ((1+3) `div` (2 `div` 2)*(10-8), "(1+3)/(2/2)*(10-8)"),
((1*3)*4*(5*6), "(1*3)*4*(5*6)"), ((10`mod`3)*(2+2), "(10%3)*(2+2)"), (2^(2+(3 `div` 2)^2), "2^(2+(3/2)^2)"),
((10 `div` (2+3)*4), "(10/(2+3)*4)"), (5+((5*4)`mod`(2+1)), "5+((5*4)%(2+1))")]
-- Evaluates the tests and makes sure the expressions match the expected values
eval_tests = map eval_tests [test1, test2, test3, test4, test5, test6]
where
eval_tests ((val, stmt):ts) =
let eval_val = eval $ parse stmt
in
if val == eval_val
then "True" : eval_tests ts
else (stmt ++ " evaluated incorrectly to " ++ show eval_val ++ " instead of " ++ show val) : eval_tests ts
eval_tests [] = []
-- Takes all the tests and displays symbolic bytes codes for each
generate_tests = map generate_all [test1,test2,test3,test4,test5,test6]
where generate_all ((val, stmt):ts) = generate (parse stmt) : generate_all ts
generate_all [] = []
-- Takes all tests and generates a list of bytes representing them
compile_tests = map compile_all [test1,test2,test3,test4,test5,test6]
where compile_all ((val, stmt):ts) = compile stmt : compile_all ts
compile_all [] = []
\end{code}