Modular Monadic Compilers for Programming Languages
Primele luni ale anului 2011 s-au dovedit prolifice pentru realizatorii de compilatoare:
. Din cercetarile comunitatii Haskell internationale
Prof. Graham Hutton lucreaza cu unul dintre doctoranzii sai - Laurence Day - la compilatoarele monadice modulare.
http://www.cs.nott.ac.uk/~gmh/bib.html#mod-comp
. Din cercetarile de la univ. "V.Alecsandri" din Bacau
13 iulie 2011 La Congresul ARA de la Timisoara (http://ara.mec.upt.ro/abslist.html) am reusit sa programam sustinerea unei lucrari despre limbaje modularizate orizontal, in care si typecheckerul si evaluatorul (care poate fi foarte bine si un interpretor si un compilator) sunt taiate "pe orizontala" in instructiuni separate care pot fi adaugate modular. Lucrarea, cu titlul "Adaptable Software – Modular extensible monadic evaluator and typechecker based on pseudoconstructors" are codul ARA35-119. Un draft al lucrarii, scris pe 16 mai 2011 va punem la dispozitie si prin site-ul Ro/Haskell. [Download a draft from http://www.haskell.org/wikiupload/7/78/Popa_Dan_fullpaper_template.pdf.zip]
Dan Popa isi continua cercetarile din teza de doctorat studiind aplicatiile acelor "Pseudoconstructors over monadic values" la realizarea compilatoarelor.
. The paper describing the modular monadic compiler
Aici este o lucrare despre compilatorul monadic modular care poate fi realizat din module distincte, compilate impreuna. [Download The "Technical Report"] from http://www.haskell.org/wikiupload/6/61/2011-MCOMP-paper.pdf.zip
Ref.: Dan Popa - How to build a modular monadic extensible compiler using The State Monad and pseudoconstructors over monadic values, Scientific Studies and Research. Series Mathematics and Informatics, vol. 21, no.2, 2011, pag. 97-116
Mai jos gasiti sursa Haskell a acestui compilator monadic modular realizat cu monada starilor ("the state monad").
-- A modular monadic compiler built using pseudoconstructors
-- over monadic values, by Dan Popa. "V.Alecsandri" Univ. of Bacau.
{-- Code from:
How to build a modular monadic extensible compiler
using The State Monad and pseudoconstructors over monadic values, Scientific
Studies and Research. Series Mathematics and Informatics, vol. 21, no.2, 2011,
pag. 97-116
--}
module MCOMP where
import Monad
import Data.Char
-- MCOMP, 20 iunie 2011, Dan Popa
-- v02. 21 iunie 2011 - corectat while-ul care genera cod fara salt inapoi
--- [... deleted]
-- v08 30 iunie 2011
-- Tipul starilor , aici adresele de inceput/sfarsit de cod
type S = Int
-- Monada starilor
data SM a = SM (S -> (a,S))
instance Monad SM where
SM c1 >>= fc2 = SM (\s0 -> let (r,s1) = c1 s0
SM c2 = fc2 r
in c2 s1 )
return k = SM (\s -> (k,s))
readSM :: SM S
readSM = SM (\s -> (s,s))
updateSM :: (S -> S) -> SM S
updateSM f = SM (\s -> (s, f s))
{--
fupdateSM :: (S -> S) -> SM S
fupdateSM f = SM (\s -> (s, f s))
--}
writeSM :: S -> SM S
writeSM a = SM (\s -> (s,a))
allocSM :: S -> SM S
allocSM l = SM (\s -> (s, s+l))
runSM :: S -> SM a -> (a,S)
runSM s0 (SM c) = c s0
compile :: (Show t, Show t1) => SM (t, [Instr t1]) -> IO ()
compile arb = putStr . prettyprint $ runSM 0000 arb
----- INSTRUCTIONS ------------------------
data Instr a = Instr String a
deriving (Show,Read,Eq)
---- PRETTY PRINTER-ul -------------------
prettyprint ((a,l),b)
= "\n" ++ "Length of the code:" ++
show a ++ myprintl 0 l
myprintl nr []
= "\n" ++ show nr
myprintl nr ((Instr a b) : l)
= "\n" ++ show nr ++ "\t" ++
a ++ show b ++ myprintl (nr+1) l
-- Fara putStr nu apar efectele de la CR si TAB-uri
mainA0 = compile (iif (gt (constant 10) (constant 20)) (attr 'x' (constant 45)) (attr 'x' (constant 50)) )
{-- *MCOMP> mainA0
Length of the code:9
0 LD_INT 10
1 LD_INT 20
2 GT 0
3 JZ 7
4 LD_INT 45
5 STORE 120
6 JP 9
7 LD_INT 50
8 STORE 120
9*MCOMP>
--}
--
---------------------COMPILER--------------------------
-- Compilarea constantei
-- return pruimeste o pereche cu doua argumente
-- lungimea = 1 a codului si codul masina generat
constant nr
= do { a0 <- readSM;
let a1 = a0 +1
in do {
writeSM a1;
return (1, [Instr "LD_INT " nr] )
}
}
-- Sub GHC:
mainA1 = compile (constant 10)
{--
*MCOMP> mainA1
Length of the code:1
0 LD_INT 10
1*MCOMP>
--}
-- Daca printam direct runSM (constant 10)
-- numarul dinaintea codului este lungimea acestuia !!
{-- 1*MCOMP> runSM 0 (constant 10)
((1,[Instr "LD_INT " 10]),1)
*MCOMP>
--}
-- Tabela de simboluri dummy
symtable x = 000 + ord x
-- Nu compilati (plus 10 20) in loc de (plus (constant 10) (constant 20))
-- altfel sistemul va pretinde sa existe o anume declaratie de instanta:
-- instance Num SM (Integer,[Instr Int])
-- La fel, compilati (attr 's' (constant 10))
-----------------COMPILER------------------------------------
-- Compilarea variabilelor
variable s
= do { a0 <- readSM;
let a1 = a0 +1
adr = symtable (s)
in do {
writeSM a1;
return (1, [Instr "LD_VAR " adr] )
}
}
-- Compilarea variabilelor
mainB1 = compile (variable 'a')
{--*MCOMP> mainB1
Length of the code:1
0 LD_VAR 97
--}
mainB2 = compile (variable 'A')
{-- MCOMP> mainB2
Length of the code:1
0 LD_VAR 65
--}
-- Compilarea declaratiilor
-- nr indica numarul de variabile dintr-un limbaj monotip
-- pt n variabile se aloca locatiile 0,1,...n-1
datas n
= do { a0 <- readSM;
let a1 = a0 + 1
arg = n -1
in do {
writeSM a1;
return (1, [Instr "DATA " arg] )
}
}
mainC1= compile (datas 10)
{--
Length of the code:1
0 DATA 9
1*MCOMP>
--}
--
{--
-- Compilarea instructiunii skip
skip :: SM (Int,[Instr Int])
skip
= do { a0 <- readSM;
let a1 = a0 + 0 -- pt a se conforma sablonului
in do {
writeSM a1;
return (0, [] )
}
}
--}
skip :: SM (Int,[Instr Int])
skip = return (0, [] )
mainD1 = compile skip
{--compile (iif (variable 'x') (skip) (skip))
--}
-- Compilarea operatiei de intrare read
readv s -- read este utilizat
= do { a0 <- readSM;
let a1 = a0 +1
adr = symtable (s)
in do {
writeSM a1;
return (1, [Instr "IN_INT " adr] )
}
}
mainD2 = compile (readv 'x')
mainD3 = compile (readv 'y')
{--
Length of the code:1
0 IN_INT 120
1
*MCOMP> mainD3
Length of the code:1
0 IN_INT 121
1
*MCOMP>
--}
-- Compilarea scrierilor seamana cu cea a atribuirilor
write exp
= do { a0 <-readSM;
(l1,cod1) <- exp;
let a1 = a0 + l1
a2 = a1 + 1
in do { writeSM a2;
return (l1 + 1, concat [cod1,
[Instr "OUT_INT " 0] ] )
}
}
mainE1 =compile (write (variable 'x'))
{--
Length of the code:2
0 LD_VAR 120
1 OUT_INT 0
2
*MCOMP> mainE2
--}
mainE2 = compile (write (plus (constant 10) (constant 20)) )
{--
*MCOMP> mainE2
Length of the code:4
0 LD_INT 10
1 LD_INT 20
2 ADD 0
3 OUT_INT 0
4
*MCOMP>
--}
mainE3 = compile (while (gt (constant 10) (constant 20)) (attr 'x' (constant 45)))
{--
Length of the code:7
0 LD_INT 10
1 LD_INT 20
2 GT 0
3 JZ 7
4 LD_INT 45
5 STORE 120
6 JP 0
7
*MCOMP>
--}
-- Compilarea atribuirilor
attr s exp
= do { a0 <-readSM;
(l1,cod1) <- exp;
let a1 = a0 + l1
a2 = a1 + 1
adr = symtable s
in do { writeSM a2;
return (l1 + 1, concat [cod1,
[Instr "STORE " adr] ] )
}
}
-- compilarea sumelor
plus exp1 exp2
= do { a0 <-readSM;
(l1,cod1) <- exp1;
writeSM (a0+l1);
(l2,cod2) <- exp2;
let a3 = a0 + l1 + l2 + 1
in do { writeSM a3;
return (l1+l2+1, concat [cod1,
cod2,
[Instr "ADD " 0 ] ] )
}
}
-- Si similar a celorlalte operatii
minus exp1 exp2
= do { a0 <-readSM;
(l1,cod1) <- exp1;
writeSM (a0+l1);
(l2,cod2) <- exp2;
let a3 = a0 + l1 + l2 + 1
in do { writeSM a3;
return (l1+l2+1, concat [cod1,
cod2,
[Instr "SUB " 0 ] ] )
}
}
mult exp1 exp2
= do { a0 <-readSM;
(l1,cod1) <- exp1;
writeSM (a0+l1);
(l2,cod2) <- exp2;
let a3 = a0 + l1 + l2 + 1
in do { writeSM a3;
return (l1+l2+1, concat [cod1,
cod2,
[Instr "MULT " 0 ] ] )
}
}
div exp1 exp2
= do { a0 <-readSM;
(l1,cod1) <- exp1;
writeSM (a0+l1);
(l2,cod2) <- exp2;
let a3 = a0 + l1 + l2 + 1
in do { writeSM a3;
return (l1+l2+1, concat [cod1,
cod2,
[Instr "DIV " 0 ] ] )
}
}
-- La fel si pentru comparatii
eq exp1 exp2
= do { a0 <-readSM;
(l1,cod1) <- exp1;
writeSM (a0+l1);
(l2,cod2) <- exp2;
let a3 = a0 + l1 + l2 + 1
in do { writeSM a3;
return (l1+l2+1, concat [cod1,
cod2,
[Instr "EQ " 0 ] ] )
}
}
-- Si similar, celelalte comparatii
lt exp1 exp2
= do { a0 <-readSM;
(l1,cod1) <- exp1;
writeSM (a0+l1);
(l2,cod2) <- exp2;
let a3 = a0 + l1 + l2 + 1
in do { writeSM a3;
return (l1+l2+1, concat [cod1,
cod2,
[Instr "LT " 0 ] ] )
}
}
gt exp1 exp2
= do { a0 <-readSM;
(l1,cod1) <- exp1;
writeSM (a0+l1);
(l2,cod2) <- exp2;
let a3 = a0 + l1 + l2 + 1
in do { writeSM a3;
return (l1+l2+1, concat [cod1,
cod2,
[Instr "GT " 0 ] ] )
}
}
-- nu este neaparat nevoie de un NEQ el este inlocuibil cu un not de EQ
-- adaugat
-- Poate si lungimea totala trebuia stocata in monada nu numai adresa ?
iif cond s1 s2
= do { a0 <-readSM;
(l1,cod1) <- cond;
writeSM (a0+l1+1);
(l2,cod2) <- s1;
writeSM (a0 + l1 + 1 + l2 + 1) ;
(l3,cod3) <- s2;
writeSM (a0 + l1 + 1 + l2 + 1 +l3);
return (l1+1+l2+1+l3 , concat [cod1,
[Instr "JZ " (a0 + l1 + 1 + l2 + 1) ],
cod2,
[Instr "JP " (a0 + l1 + 1 + l2 + 1 + l3) ],
cod3 ] )
}
-- Cod generat de instructiunea if,
mainI1 = compile (iif (gt (constant 10) (constant 20)) (attr 'x' (constant 45)) (attr 'x' (constant 50)) )
{--
Length of the code:9
0 LD_INT 10
1 LD_INT 20
2 GT 0
3 JZ 7
4 LD_INT 45
5 STORE 120
6 JP 9
7 LD_INT 50
8 STORE 120
9
*MCOMP>
--}
-- Alt cod generat de instructiunea iif, corect
mainI2 = compile (iif (variable 'x') (attr 'x' (constant 1)) (attr 'x' (constant 2 )) )
{--
*MCOMP> mainI2
Length of the code:7
0 LD_VAR 120
1 JZ 5
2 LD_INT 1
3 STORE 120
4 JP 7
5 LD_INT 2
6 STORE 120
7
*MCOMP>
--}
-- Alt cod generat de instructiunea iif, corect
mainI3 = compile (iif (gt (variable 'x')(constant 0)) (attr 'x' (constant 1)) (attr 'x' (constant 2 )) )
{--
*MCOMP> mainI2
Length of the code:9
0 LD_VAR 120
1 LD_INT 0
2 GT 0
3 JZ 7
4 LD_INT 1
5 STORE 120
6 JP 9
7 LD_INT 2
8 STORE 120
9
*MCOMP>
--}
-- Compilarea secventelor
-- Adresa intermediara de dupa cod 1 trebuie salvata in stare
sequ s1 s2 -- seq e rezervat
= do { a0 <-readSM;
(l1,cod1) <- s1;
let a2 = a0 +l1
in do
{ writeSM a2; -- the begining of the second code should be stored in state
a2 <-readSM; -- putin redundant aici
(l2,cod2) <- s2;
let a4 = a2 + l2
in do { writeSM a4;
return (l1+l2 , concat [cod1, cod2] )
}
}
}
mainW = compile (sequ (attr 'x' (constant 45)) (attr 'x' (constant 50)) )
{--*MCOMP> mainW
Length of the code:4
0 LD_INT 45
1 STORE 120
2 LD_INT 50
3 STORE 120
4*MCOMP>
--}
mainW2 = compile (sequ (attr 'x' (constant 45)) (attr 'y' (constant 50)) )
{--*MCOMP> mainW2
Length of the code:4
0 LD_INT 45
1 STORE 120
2 LD_INT 50
3 STORE 121
4*MCOMP>
--}
-- Bucla While
-- Instructiunile JP generate de while sunt corecte
while cond s1
= do { a0 <-readSM;
(l1,cod1) <- cond;
writeSM (a0+l1+1);
(l2,cod2) <- s1;
let a2 = a0 + l1 + 1 + l2+1
in do { writeSM a2;
return (l1+l2+2, concat [cod1,
[Instr "JZ " a2 ],
cod2,
[Instr "JP " a0]] )
}
}
-- Cod generat de o instr. while
mainW0 = compile (while (gt (constant 10) (constant 20)) (attr 'x' (constant 45)))
{--
Length of the code:7
0 LD_INT 10
1 LD_INT 20
2 GT 0
3 JZ 7
4 LD_INT 45
5 STORE 120
6 JP 0
7
*MCOMP>
--}
-- Alt cod generat de o instructiune while
mainW1 = compile (while (gt (variable 'x') (constant 0)) (attr 'x' (minus (variable 'x') (constant 1)) ))
{--
Length of the code:9
0 LD_VAR 120
1 LD_INT 0
2 GT 0
3 JZ 9
4 LD_VAR 120
5 LD_INT 1
6 SUB 0
7 STORE 120
8 JP 0
9
*MCOMP>
--}
-- Chiar daca instructiunea se scrie do,
-- nu scrieti do, confuzionati parserul care recunoaste do-notatia
-- am scris dowhile (do, instruction)
dowhile s1 cond
= do { a0 <-readSM;
(l1,cod1) <- s1;
writeSM (a0+l1); -- era fara +1,ok
(l2,cod2) <- cond;
let a2 = a0 + l1 + l2 + 1 -- erau inversate dar ok
in do { writeSM a2;
return (l1+l2+1, concat [cod1,
cod2,
[Instr "JNZ " a0 ] ] )
}
}
-- Cod generat de o instructiune do-while, OK
mainDW1 = compile (dowhile (attr 'x' (constant 45)) (gt (constant 10) (constant 20) ) )
{--
*MCOMP> mainDW1
Length of the code:6
0 LD_INT 45
1 STORE 120
2 LD_INT 10
3 LD_INT 20
4 GT 0
5 JNZ 0
6*MCOMP>
--}
-- Alt cod generat de dowhile, OK
mainDW2 = compile (dowhile (attr 'x' (minus (variable 'x') (constant 1)) ) (gt (variable 'x') (constant 0)) )
{--
Length of the code:8
0 LD_VAR 120
1 LD_INT 1
2 SUB 0
3 STORE 120
4 LD_VAR 120
5 LD_INT 0
6 GT 0
7 JNZ 0
8
*MCOMP>
--}
mainDW3
= compile
(dowhile (sequ (attr 'x' (minus (variable 'x') (constant 1)) )
(while (gt (variable 'y') (constant 0)) (attr 'y' (minus (variable 'y') (constant 1)) )))
(gt (variable 'x') (constant 0)) )
{--
*MCOMP> mainDW3
Length of the code:17
0 LD_VAR 120 -- 'x'
1 LD_INT 1 -- 1
2 SUB 0 -- x-1
3 STORE 120 -- x:=x-1
4 LD_VAR 121 -- y -- intrare in bucala interioara !! -- optimizare posibila ?
5 LD_INT 0 -- 0
6 GT 0 -- y > 0
7 JZ 13 -- fals -> iese din bucla la adr 13
8 LD_VAR 121
9 LD_INT 1
10 SUB 0
11 STORE 121
12 JP 4
13 LD_VAR 120
14 LD_INT 0
15 GT 0
16 JNZ 0
17*MCOMP>
--}
-- Nota: diferenta intre cele 2 variante de writeSm (a0+l1 ) si writeSm (a0+l1 )
-- s-ar vedea numai daca insesi conditia lui dop-while ar fi o alta bucla while !!
-- pentru ca aici, imediat dupa codul din do-while, ar trebui sa revina bucla interioara
-- a conditiei !!
mainDW4
= compile
(dowhile (attr 'x' (minus (variable 'x') (constant 1)) )
(while (gt (variable 'y') (constant 0)) (attr 'y' (minus (variable 'y') (constant 1)) )) )
{-- Era gresit cu: +1
*MCOMP> mainDW4
Length of the code:14
0 LD_VAR 120 -- x:=x-1
1 LD_INT 1
2 SUB 0
3 STORE 120 -- final atribuire
4 LD_VAR 121
5 LD_INT 0 -- punct de intrare gresit in mijlocul comparatiei
6 GT 0
7 JZ 14 -- ar fi fost 13 daca uitam pe +1
8 LD_VAR 121
9 LD_INT 1
10 SUB 0
11 STORE 121
12 JP 5 -- ar fi fost 4 daca uitam pe +1
13 JNZ 0
14*MCOMP>
--}
{-- fara +1 era bine !!
*MCOMP> mainDW4
Length of the code:14
0 LD_VAR 120
1 LD_INT 1
2 SUB 0
3 STORE 120
4 LD_VAR 121 -- punct de intrare, inceput al comparatiei
5 LD_INT 0
6 GT 0
7 JZ 13 -- era 14 cu +1
8 LD_VAR 121
9 LD_INT 1
10 SUB 0
11 STORE 121
12 JP 4 -- era 5 cu +1
13 JNZ 0
14*MCOMP>
--}
-- Compilarea programului e la fel ca a secventei
-- deci cel mai simplu e sa o copiem de la sequ
program s1 s2 = sequ s1 s2
-- OK v04 si v05
mainX = compile
(program (datas 2)
(sequ (attr 'x' (constant 45))
(attr 'x' (constant 50))
)
)
{--
*MCOMP> mainX
Length of the code:5
0 DATA 1
1 LD_INT 45
2 STORE 120
3 LD_INT 50
4 STORE 120
5*MCOMP>
--}
-- Compiling an I/O program: OK v04 si v05
main0 = compile
(program (datas 2)
(sequ (readv 'x')
(write (variable 'x') )
)
)
{--
*MCOMP> main0
Length of the code:4
0 DATA 1
1 IN_INT 120
2 LD_VAR 120
3 OUT_INT 0
4*MCOMP>
--}
-- De compilat
main = compile
(program (datas 2)
(sequ (readv 'n')
(sequ (iif (gt (variable 'n') (constant 10) )
(attr 'x' (constant 1))
(skip)
)
(skip)
))
)
{-- ----
Length of the code:9
0 DATA 1
1 IN_INT 110
2 LD_VAR 110
3 LD_INT 10
4 GT 0
5 JZ 9
6 LD_INT 1
7 STORE 120
8 JP 9
9*MCOMP>
--}
-- OK
main1 = compile
(iif (gt (variable 'n') (constant 10) )
(attr 'x' (constant 1))
(skip)
)
{--
7*MCOMP> main1
Length of the code:7
0 LD_VAR 110
1 LD_INT 10
2 GT 0
3 JZ 7
4 LD_INT 1
5 STORE 120
6 JP 7
7*MCOMP>
--}
-- Ok
main2 = compile
(sequ (iif (gt (variable 'n') (constant 10) )
(attr 'x' (constant 1))
(skip)
)
(skip)
)
{--
Length of the code:7
0 LD_VAR 110
1 LD_INT 10
2 GT 0
3 JZ 7
4 LD_INT 1
5 STORE 120
6 JP 7
7*MCOMP>
--}
-- ??
main3 = compile
(sequ (readv 'x')
(sequ (iif (gt (variable 'n') (constant 10) )
(attr 'x' (constant 1))
(skip)
)
(skip)
))
{----
Length of the code:8
0 IN_INT 120
1 LD_VAR 110
2 LD_INT 10
3 GT 0
4 JZ 8
5 LD_INT 1
6 STORE 120
7 JP 8
8*MCOMP>
--}
-- ?
main3b = compile
(sequ (constant 111)
(sequ (iif (gt (variable 'n') (constant 10) )
(attr 'x' (constant 1))
(skip)
)
(skip)
))
{--
8*MCOMP> main3b
Length of the code:8
0 LD_INT 111
1 LD_VAR 110
2 LD_INT 10
3 GT 0
4 JZ 8
5 LD_INT 1
6 STORE 120
7 JP 8
8*MCOMP>
--}
-- Programul din cartea Compiler Construction using Flex and Bison, fig 7.1
-- pp 75 la editia 2006 Edusoft
main4 = putStr . prettyprint $ runSM 0
(program (datas 2)
(sequ (readv 'n')
(sequ (iif (lt (variable 'n') (constant 10) )
(attr 'x' (constant 1))
(skip)
)
(while (lt (variable 'n') (constant 10))
(sequ (attr 'x' (mult (constant 5)(variable 'x'))
)
(attr 'n' (plus (variable 'n') (constant 1))
)
)
)
)
)
)
{--
*MCOMP> main4
Length of the code:22
0 DATA 1
1 IN_INT 110
2 LD_VAR 110
3 LD_INT 10
4 LT 0
5 JZ 9
6 LD_INT 1
7 STORE 120
8 JP 9
9 LD_VAR 110
10 LD_INT 10
11 LT 0
12 JZ 22
13 LD_INT 5
14 LD_VAR 120
15 MULT 0
16 STORE 120
17 LD_VAR 110
18 LD_INT 1
19 ADD 0
20 STORE 110
21 JP 9
22*MCOMP>
--}