Haskell Quiz/Bytecode Compiler/Solution Justin Bailey: Difference between revisions

From HaskellWiki
(Updated to corrrect code)
(Updated to working version (including negative numbers). Added interpreter tests)
Line 1: Line 1:
[[Category:Code]]
[[Category:Code]]
This solution should work correctly. I was unable to test the byte codes generated, for obvious reasons. However, all test strings from the quiz do evaluate to the correct values.  
This solution should work correctly. All test strings from the quiz evaluate to the correct values. To see it for yourself, execute the <hask>interpret_tests</hask> function. 


To see the (symbolic) byte codes generated, run <hask>generate_tests</hask>. To see the actual byte codes, run <hask>compile_tests</hask>. To see that the values produced by each expression match those expected, run <hask>eval_tests</hask>. The tests are contained in the variables <hask>test1,test2, ..., test6</hask>, which correspond to the six "test_n" methods fouind in the quiz's test program.
To see the (symbolic) byte codes generated, run <hask>generate_tests</hask>. To see the actual byte codes, run <hask>compile_tests</hask>. To see that the values produced by each expression match those expected, run <hask>eval_tests</hask>. The last actually evaluates the AST, without generating any bytescodes. The tests are contained in the variables <hask>test1,test2, ..., test6</hask>, which correspond to the six "test_n" methods found in the quiz's test program.


The byte codes aren't optimized. For example, SWAP is never used. However, they should produce correct results (even for negative and LCONST/CONST values).
The byte codes aren't optimized. For example, SWAP is never used. However, they should produce correct results (even for negative and LCONST/CONST values).
Line 14: Line 14:
import Text.ParserCombinators.Parsec.Expr
import Text.ParserCombinators.Parsec.Expr
import Data.Bits
import Data.Bits
import Data.Int


-- Represents various operations that can be applied
-- Represents various operations that can be applied
Line 21: Line 22:


-- Represents expression we can build - either numbers or expressions
-- Represents expression we can build - either numbers or expressions
-- connected by operators.
-- connected by operators. This structure is the basis of the AST built
-- when parsing
data Expression = Statement Op Expression Expression
data Expression = Statement Op Expression Expression
           | Val Integer
           | Val Integer
Line 73: Line 75:
       prefix s f  
       prefix s f  
         = Prefix (do{ string s; return f})
         = Prefix (do{ string s; return f})


-- Parses a string into an AST, using the parser defined above
-- Parses a string into an AST, using the parser defined above
Line 107: Line 108:
               generate' (Val n) instr =
               generate' (Val n) instr =
                 if abs(n) > 32768
                 if abs(n) > 32768
                 then instr ++ [LCONST n]
                 then LCONST n : instr 
                 else instr ++ [CONST n]
                 else CONST n : instr


-- Takes a statement and converts it into a list of actual bytes to
-- Takes a statement and converts it into a list of actual bytes to
Line 135: Line 136:
-- Make sure final list is size elements long
-- Make sure final list is size elements long
toByteList ::  Bits Int => Int -> Int -> [Int]
toByteList ::  Bits Int => Int -> Int -> [Int]
toByteList size n =
toByteList size n = reverse $ take size (toByteList' n)
    if (length bytes) < size
    then (replicate (size - (length bytes)) 0) ++ bytes
    else bytes
     where
     where
       bytes = reverse $ toByteList' n
       toByteList' a = (a .&. 255) : toByteList' (a `shiftR` 8)
      -- for negative, and with signed bit and remove negative. Then continue recursion.
      toByteList' 0 = []
      toByteList' a | a < 0 = (a .&. 511) : toByteList' (abs(a) `shiftR` 8)
                    | otherwise = (a .&. 255) : toByteList' (a `shiftR` 8)


-- All tests defined by the quiz, with the associated values they should evaluate to.
-- All tests defined by the quiz, with the associated values they should evaluate to.
Line 166: Line 160:


-- Evaluates the tests and makes sure the expressions match the expected values
-- Evaluates the tests and makes sure the expressions match the expected values
eval_tests = map eval_tests [test1, test2, test3, test4, test5, test6]
eval_tests = concat $ map eval_tests [test1, test2, test3, test4, test5, test6]
   where
   where
     eval_tests ((val, stmt):ts) =
     eval_tests ((val, stmt):ts) =
Line 172: Line 166:
       in
       in
         if val == eval_val  
         if val == eval_val  
         then "True" : eval_tests ts
         then ("Passed: " ++ stmt) : eval_tests ts
         else (stmt ++ " evaluated incorrectly to " ++ show eval_val ++ " instead of " ++ show val) : eval_tests ts
         else ("Failed: " ++ stmt ++ "(" ++ show eval_val ++ ")") : eval_tests ts
     eval_tests [] = []
     eval_tests [] = []


-- Takes all the tests and displays symbolic bytes codes for each
-- Takes all the tests and displays symbolic bytes codes for each
generate_tests = map generate_all [test1,test2,test3,test4,test5,test6]
generate_tests = concat $ map generate_all [test1,test2,test3,test4,test5,test6]
   where generate_all ((val, stmt):ts) = generate (parse stmt) : generate_all ts
   where generate_all ((val, stmt):ts) = (stmt, generate (parse stmt)) : generate_all ts
         generate_all [] = []
         generate_all [] = []
          
          
-- Takes all tests and generates a list of bytes representing them
-- Takes all tests and generates a list of bytes representing them
compile_tests = map compile_all [test1,test2,test3,test4,test5,test6]
compile_tests = concat $ map compile_all [test1,test2,test3,test4,test5,test6]
   where compile_all ((val, stmt):ts) = compile stmt : compile_all ts
   where compile_all ((val, stmt):ts) = (stmt, compile stmt) : compile_all ts
         compile_all [] = []
         compile_all [] = []


interpret_tests = concat $ map f' [test1, test2, test3, test4, test5, test6]
  where
    f' tests = map f'' tests
    f'' (expected, stmt) =
      let value = fromIntegral $ interpret [] $ compile stmt
      in
        if value == expected
        then "Passed: " ++ stmt
        else "Failed: " ++ stmt ++ "(" ++ (show value) ++ ")"
fromBytes n xs =
  let int16 = (fromIntegral ((fromIntegral int32) :: Int16)) :: Int
      int32 = byte xs
      byte xs = foldl (\accum byte -> (accum `shiftL` 8) .|. (byte)) (head xs) (take (n - 1) (tail xs))
  in
    if n == 2
    then int16
    else int32
 
interpret [] [] = error "no result produced"
interpret (s1:s) [] = s1
interpret s (o:xs) | o < 10 = interpret ((fromBytes (o*2) xs):s) (drop (o*2) xs)
interpret (s1:s2:s) (o:xs)
  | o == 16 = interpret (s2:s1:s) xs
  | otherwise = interpret (((case o of 10 -> (+); 11 -> (-); 12 -> (*); 13 -> (^); 14 -> div; 15 -> mod) s2 s1):s) xs
\end{code}           
\end{code}           
</haskell>
</haskell>

Revision as of 18:50, 9 November 2006

This solution should work correctly. All test strings from the quiz evaluate to the correct values. To see it for yourself, execute the interpret_tests function.

To see the (symbolic) byte codes generated, run generate_tests. To see the actual byte codes, run compile_tests. To see that the values produced by each expression match those expected, run eval_tests. The last actually evaluates the AST, without generating any bytescodes. The tests are contained in the variables test1,test2, ..., test6, which correspond to the six "test_n" methods found in the quiz's test program.

The byte codes aren't optimized. For example, SWAP is never used. However, they should produce correct results (even for negative and LCONST/CONST values).

The code below is literate Haskell.

\begin{code}
import Text.ParserCombinators.Parsec hiding (parse)
import qualified Text.ParserCombinators.Parsec as P (parse)
import Text.ParserCombinators.Parsec.Expr
import Data.Bits
import Data.Int

-- Represents various operations that can be applied
-- to expressions.
data Op = Plus | Minus | Mult | Div | Pow | Mod | Neg
  deriving (Show, Eq)

-- Represents expression we can build - either numbers or expressions
-- connected by operators. This structure is the basis of the AST built
-- when parsing
data Expression = Statement Op Expression Expression
           | Val Integer
           | Empty
  deriving (Show)

-- Define the byte codes that can be generated. 
data Bytecode = NOOP | CONST Integer | LCONST Integer
            | ADD
            | SUB
            | MUL
            | POW
            | DIV
            | MOD
            | SWAP
  deriving (Show)


-- Using imported Parsec.Expr library, build a parser for expressions.
expr :: Parser Expression
expr =
  buildExpressionParser table factor
  <?> "expression"
  where
  -- Recognizes a factor in an expression
  factor  =
    do{ char '('
          ; x <- expr
          ; char ')'
          ; return x 
          }
      <|> number
      <?> "simple expression"
  -- Recognizes a number
  number  :: Parser Expression 
  number  = do{ ds <- many1 digit
              ; return (Val (read ds))
              }
          <?> "number"
  -- Specifies operator, associativity, precendence, and constructor to execute
  -- and built AST with.
  table =
    [[prefix "-" (Statement Mult (Val (-1)))],
      [binary "^" (Statement Pow) AssocRight],
      [binary "*" (Statement Mult) AssocLeft, binary "/" (Statement Div) AssocLeft, binary "%" (Statement Mod) AssocLeft],
      [binary "+" (Statement Plus) AssocLeft, binary "-" (Statement Minus) AssocLeft]
       ]          
    where
      binary s f assoc
         = Infix (do{ string s; return f}) assoc
      prefix s f 
         = Prefix (do{ string s; return f})

-- Parses a string into an AST, using the parser defined above
parse s = case P.parse expr "" s of
  Right ast -> ast
  Left e -> error $ show e
  
-- Take AST and evaluate (mostly for testing)
eval (Val n) = n
eval (Statement op left right)
        | op == Mult = eval left * eval right
        | op == Minus = eval left - eval right
        | op == Plus = eval left + eval right
        | op == Div = eval left `div` eval right
        | op == Pow = eval left ^ eval right
        | op == Mod = eval left `mod` eval right

-- Takes an AST and turns it into a byte code list
generate stmt = generate' stmt []
       where
               generate' (Statement op left right) instr =
                       let
                               li = generate' left instr
                               ri = generate' right instr
                               lri = li ++ ri
                       in case op of
                               Plus -> lri ++ [ADD]
                               Minus -> lri ++ [SUB]
                               Mult -> lri ++ [MUL]
                               Div -> lri ++ [DIV]
                               Mod -> lri ++ [MOD]
                               Pow -> lri ++ [POW]
               generate' (Val n) instr =
                if abs(n) > 32768
                then LCONST n : instr  
                else CONST n : instr

-- Takes a statement and converts it into a list of actual bytes to
-- be interpreted
compile s = toBytes (generate $ parse s)

-- Convert a list of byte codes to a list of integer codes. If LCONST or CONST
-- instruction are seen, correct byte representantion is produced
toBytes ((NOOP):xs) = 0 : toBytes xs 
toBytes ((CONST n):xs) = 1 : (toConstBytes (fromInteger n)) ++ toBytes xs
toBytes ((LCONST n):xs) = 2 : (toLConstBytes (fromInteger n)) ++ toBytes xs
toBytes ((ADD):xs) = 0x0a : toBytes xs
toBytes ((SUB):xs) = 0x0b : toBytes xs
toBytes ((MUL):xs) = 0x0c : toBytes xs
toBytes ((POW):xs) = 0x0d : toBytes xs
toBytes ((DIV):xs) = 0x0e : toBytes xs
toBytes ((MOD):xs) = 0x0f : toBytes xs
toBytes ((SWAP):xs) = 0x0a : toBytes xs
toBytes [] = []

-- Convert number to CONST representation (2 element list)
toConstBytes n = toByteList 2 n 
toLConstBytes n = toByteList 4 n 

-- Convert a number into a list of 8-bit bytes (big-endian/network byte order).
-- Make sure final list is size elements long
toByteList ::  Bits Int => Int -> Int -> [Int]
toByteList size n = reverse $ take size (toByteList' n)
    where
      toByteList' a = (a .&. 255) : toByteList' (a `shiftR` 8)

-- All tests defined by the quiz, with the associated values they should evaluate to.
test1 = [(2+2, "2+2"), (2-2, "2-2"), (2*2, "2*2"), (2^2, "2^2"), (2 `div` 2, "2/2"),
  (2 `mod` 2, "2%2"), (3 `mod` 2, "3%2")]

test2 = [(2+2+2, "2+2+2"), (2-2-2, "2-2-2"), (2*2*2, "2*2*2"), (2^2^2, "2^2^2"), (4 `div` 2 `div` 2, "4/2/2"),
  (7`mod`2`mod`1, "7%2%1")]
  
test3 = [(2+2-2, "2+2-2"), (2-2+2, "2-2+2"), (2*2+2, "2*2+2"), (2^2+2, "2^2+2"),
  (4 `div` 2+2, "4/2+2"), (7`mod`2+1, "7%2+1")]

test4 = [(2+(2-2), "2+(2-2)"), (2-(2+2), "2-(2+2)"), (2+(2*2), "2+(2*2)"), (2*(2+2), "2*(2+2)"),
  (2^(2+2), "2^(2+2)"), (4 `div` (2+2), "4/(2+2)"), (7`mod`(2+1), "7%(2+1)")]

test5 = [(-2+(2-2), "-2+(2-2)"), (2-(-2+2), "2-(-2+2)"), (2+(2 * -2), "2+(2*-2)")]

test6 = [((3 `div` 3)+(8-2), "(3/3)+(8-2)"), ((1+3) `div` (2 `div` 2)*(10-8), "(1+3)/(2/2)*(10-8)"), 
    ((1*3)*4*(5*6), "(1*3)*4*(5*6)"), ((10`mod`3)*(2+2), "(10%3)*(2+2)"), (2^(2+(3 `div` 2)^2), "2^(2+(3/2)^2)"),
    ((10 `div` (2+3)*4), "(10/(2+3)*4)"), (5+((5*4)`mod`(2+1)), "5+((5*4)%(2+1))")]

-- Evaluates the tests and makes sure the expressions match the expected values
eval_tests = concat $ map eval_tests [test1, test2, test3, test4, test5, test6]
  where
    eval_tests ((val, stmt):ts) =
      let eval_val = eval $ parse stmt 
      in
        if val == eval_val 
        then ("Passed: " ++ stmt) : eval_tests ts
        else ("Failed: " ++ stmt ++ "(" ++ show eval_val ++ ")") : eval_tests ts
    eval_tests [] = []

-- Takes all the tests and displays symbolic bytes codes for each
generate_tests = concat $ map generate_all [test1,test2,test3,test4,test5,test6]
  where generate_all ((val, stmt):ts) = (stmt, generate (parse stmt)) : generate_all ts
        generate_all [] = []
        
-- Takes all tests and generates a list of bytes representing them
compile_tests = concat $ map compile_all [test1,test2,test3,test4,test5,test6]
  where compile_all ((val, stmt):ts) = (stmt, compile stmt) : compile_all ts
        compile_all [] = []

interpret_tests = concat $ map f' [test1, test2, test3, test4, test5, test6]
  where
    f' tests = map f'' tests
    f'' (expected, stmt) =
      let value = fromIntegral $ interpret [] $ compile stmt
      in
        if value == expected
        then "Passed: " ++ stmt
        else "Failed: " ++ stmt ++ "(" ++ (show value) ++ ")"

fromBytes n xs =
  let int16 = (fromIntegral ((fromIntegral int32) :: Int16)) :: Int
      int32 = byte xs
      byte xs = foldl (\accum byte -> (accum `shiftL` 8) .|. (byte)) (head xs) (take (n - 1) (tail xs))
  in
    if n == 2
    then int16 
    else int32 
   
interpret [] [] = error "no result produced"
interpret (s1:s) [] = s1
interpret s (o:xs) | o < 10 = interpret ((fromBytes (o*2) xs):s) (drop (o*2) xs)
interpret (s1:s2:s) (o:xs)
  | o == 16 = interpret (s2:s1:s) xs
  | otherwise = interpret (((case o of 10 -> (+); 11 -> (-); 12 -> (*); 13 -> (^); 14 -> div; 15 -> mod) s2 s1):s) xs
 
\end{code}