Haskell Quiz/Bytecode Compiler/Solution Lennart Kolmodin

From HaskellWiki

Complete solution with parser (cheating using Parsec), compiler, interpreter, evaluator and QuickCheck properties.

The Arbitrary monad doesn't work properly though, it generates too small or far too large trees.

module Main where

import Foreign
import Control.Monad
import Data.Bits
import System.Random
import Test.QuickCheck hiding (evaluate)
import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Parsec.Expr

-- Main -------------------------------------------------------------

main :: IO ()
main = do
    raw <- getContents
    case parse expr "stdin" raw of
        Left err -> print err
        Right e -> do
            let bytecode = compile e
                (interpretion:_) = interpret bytecode
            print e
            print bytecode
            print $ head (interpret bytecode)
            print (evaluate e)

-- Data Structs -----------------------------------------------------

data Expr = Op BinOp Expr Expr
          | Const Int
          deriving (Show,Eq)

data BinOp = Add
           | Sub
           | Mul
           | Div
           | Pow
           | Mod
           deriving (Show,Eq)

-- Parsing using Parsec ---------------------------------------------

expr    :: Parser Expr
expr    = buildExpressionParser table factor
          <?> "expression"

table   = [[op "**" (Op Pow) AssocRight, op "%" (Op Mod) AssocLeft]
          ,[op "*"  (Op Mul) AssocLeft, op "/" (Op Div) AssocLeft]
          ,[op "+"  (Op Add) AssocLeft, op "-" (Op Sub) AssocLeft]
          ] 
        where
          op s f assoc = Infix (do{ try (string s); return f}) assoc

factor  = between (char '(') (char ')') expr
        <|> number
        <?> "simple expression"

number  :: Parser Expr
number  = liftM (Const . read) (many1 digit)
        <?> "number"

-- Compiler ---------------------------------------------------------

compile :: Expr -> [Word8]
compile e = compile' e []

constInstr = 0x01
lconstInstr = 0x02
addInstr = 0x0a
subInstr = 0x0b
mulInstr = 0x0c
powInstr = 0x0d
divInstr = 0x0e
modInstr = 0x0f
swapInstr = 0xa0

w  n = \c -> n : c
w2 n = w (getByte n 1) . w (getByte n 0)
w4 n = w (getByte n 3) . w (getByte n 2) . w (getByte n 1) . w (getByte n 0)
compile' (Const c) | c <= 2^15 = w  constInstr . w2 c
                   | otherwise = w lconstInstr . w4 c
compile' (Op op e1 e2) = compile' e1 . compile' e2 . w opInstr
    where
        opInstr = case op of
                    Add -> addInstr
                    Sub -> subInstr
                    Mul -> mulInstr
                    Div -> divInstr
                    Pow -> powInstr
                    Mod -> modInstr

getByte v i = fromIntegral $ (v `shiftR` (i*8)) .&. 0xff 

-- Interpreter ------------------------------------------------------

interpret :: [Word8] -> [Integer]
interpret instrs = interpret' instrs []

interpret' [] st = reverse st 
interpret' (0x01:a:b:rest) st = interpret' rest $
    ((toInteger a `shift` 8) .|. 
     (toInteger b)):st
interpret' (0x02:a:b:c:d:rest) st = interpret' rest $
    ((toInteger a `shift` 24) .|.
     (toInteger b `shift` 16) .|.
     (toInteger c `shift` 8) .|.
     (toInteger d)) : st
interpret' (0xa0:rest) (b:a:st) = interpret' rest (a:b:st)
interpret' (op:rest) (b:a:st) = interpret' rest (f a b:st)
    where f = case () of
                _ | op == addInstr -> (+)
                  | op == subInstr -> (-)
                  | op == mulInstr -> (*)
                  | op == powInstr -> (^)
                  | op == divInstr -> div
                  | op == modInstr -> mod

-- Evaluator --------------------------------------------------------

evaluate :: Expr -> Integer
evaluate (Const e) = toInteger e
evaluate (Op op e1 e2) = evaluate e1 `f` evaluate e2
    where f = case op of
                Add -> (+)
                Sub -> (-)
                Mul -> (*)
                Div -> div
                Pow -> (^)
                Mod -> mod

-- QuickCheck -------------------------------------------------------

instance Arbitrary Expr where
    arbitrary = sized $ \n -> sizedExpr n
      where
      sizedExpr n = frequency
          [ (2, genConst)
          , (1, genOp n)
          ]
      genConst = do
          range <- elements [(0,2^15), (2^15,2^32)]
          liftM Const $ choose range
      genOp n | n <= 0 = genConst
              | otherwise = do
          op <- arbitrary
          let n' = (n-1) `div` 4
              subtree = sizedExpr n'
          liftM2 (Op op) subtree subtree
    coarbitrary (Const n) = variant 0 . coarbitrary n
    coarbitrary (Op op e1 e2) = variant 1 . coarbitrary op . coarbitrary e1 . coarbitrary e2

instance Arbitrary BinOp where
    arbitrary = elements [Add, Sub, Mul, Div, Pow, Mod]
    coarbitrary op = variant 0 . coarbitrary op

depth (Const _) = 1
depth (Op _ e1 e2) = 1 + max (depth e1) (depth e2)

prop_id e =
    collect (depth e) $
    trivial (depth e == 1) $ 
    head (interpret . compile $ e) == evaluate e