Modular Monadic Compilers for Programming Languages

From HaskellWiki
Revision as of 13:02, 12 July 2011 by Ha$kell (talk | contribs)
Jump to navigation Jump to search

Primele luni ale anului 2011 s-au dovedit prolifice pentru realizatorii de compilatoare:

. Din cercetarile comunitatii Haskell internationaler


1. http://www.cs.nott.ac.uk/~gmh/bib.html#mod-comp

. Din cercetarile de la Bacau

-- A modular monadic compiler built using pseudoconstructors 
-- over monadic values, by Dan Popa. "V.Alecsandri" Univ. of Bacau.

module MCOMP where
import Monad
import Data.Char  
-- MCOMP, 20 iunie 2011, Dan Popa
-- v02. 21 iunie 2011 - corectat while-ul care genera cod fara salt inapoi
--- [... deleted]
-- v08 30 iunie 2011

-- Tipul starilor , aici adresele de inceput/sfarsit de cod
type S = Int

-- Monada starilor
data  SM a = SM (S -> (a,S))

instance Monad SM where
  SM c1 >>= fc2 = SM (\s0 -> let (r,s1) = c1  s0 
                                 SM c2  = fc2 r
                             in c2 s1 )
  return k 	= SM (\s -> (k,s))

readSM :: SM S
readSM = SM (\s -> (s,s))

updateSM :: (S -> S) -> SM S
updateSM f = SM (\s -> (s, f s)) 

{--
fupdateSM :: (S -> S) -> SM S
fupdateSM f = SM (\s -> (s, f s))
--}

writeSM :: S -> SM S 
writeSM a  = SM (\s -> (s,a))

allocSM :: S -> SM S
allocSM l = SM (\s -> (s, s+l))

runSM :: S -> SM a -> (a,S)
runSM s0 (SM c) = c s0

compile :: (Show t, Show t1) => SM (t, [Instr t1]) -> IO ()
compile arb = putStr . prettyprint $ runSM 0000 arb

----- INSTRUCTIONS ------------------------

data  Instr a = Instr String a
             deriving (Show,Read,Eq)



---- PRETTY PRINTER-ul -------------------

prettyprint ((a,l),b)
  = "\n" ++ "Length of the code:" ++ 
     show a ++ myprintl 0 l

myprintl nr [] 
  = "\n" ++ show nr 
myprintl nr ((Instr a b) : l) 
  =  "\n" ++ show nr ++ "\t" ++
     a ++ show b ++ myprintl (nr+1) l
                     
-- Fara putStr nu apar efectele de la CR si TAB-uri
mainA0 = compile (iif (gt (constant  10) (constant 20)) (attr 'x' (constant 45))  (attr 'x' (constant 50))  )

{-- *MCOMP> mainA0

Length of the code:9
0	LD_INT 10
1	LD_INT 20
2	GT 0
3	JZ 7
4	LD_INT 45
5	STORE 120
6	JP 9
7	LD_INT 50
8	STORE 120
9*MCOMP> 
--}

--

---------------------COMPILER--------------------------

-- Compilarea constantei
-- return pruimeste  o pereche cu doua argumente
-- lungimea = 1 a codului si codul masina generat

constant nr 
  = do { a0 <- readSM;
         let a1 = a0 +1 
         in do { 
                 writeSM a1;
	         return (1, [Instr "LD_INT " nr] )
                }
       }

-- Sub GHC:

mainA1 = compile  (constant 10)

{--
*MCOMP> mainA1

Length of the code:1
0	LD_INT 10
1*MCOMP> 
--}
-- Daca printam direct runSM (constant 10)
-- numarul dinaintea codului este lungimea acestuia !!
{-- 1*MCOMP> runSM 0  (constant 10)
((1,[Instr "LD_INT " 10]),1)
*MCOMP> 
--}

-- Tabela de simboluri dummy
symtable x  = 000 + ord x

-- Nu compilati (plus 10 20) in loc de (plus (constant 10) (constant 20)) 
-- altfel sistemul va pretinde sa existe o anume declaratie de instanta:
-- instance Num  SM (Integer,[Instr Int]) 
-- La fel, compilati (attr 's' (constant 10)) 

-----------------COMPILER------------------------------------
-- Compilarea variabilelor

variable s
  = do { a0 <- readSM;
         let a1 = a0 +1 
             adr = symtable (s)   
         in do { 
                 writeSM a1;
	         return (1, [Instr "LD_VAR " adr] )
                }
       }

-- Compilarea variabilelor

mainB1 = compile  (variable 'a')

{--*MCOMP> mainB1

Length of the code:1
0	LD_VAR 97
--}

mainB2 = compile  (variable 'A')

{-- MCOMP> mainB2

Length of the code:1
0	LD_VAR 65

--}


-- Compilarea declaratiilor
-- nr indica numarul de variabile dintr-un limbaj monotip
-- pt n variabile se aloca locatiile 0,1,...n-1
datas n 
  = do { a0 <- readSM;
         let a1 = a0 + 1 
             arg = n -1
         in do { 
                 writeSM a1;
	         return (1, [Instr "DATA " arg] )   
                } 
       }

mainC1= compile  (datas 10)

{--
Length of the code:1
0	DATA 9
1*MCOMP> 

--}



--
{--
-- Compilarea instructiunii skip
skip ::  SM (Int,[Instr Int])
skip 
  = do { a0 <- readSM;
         let a1 = a0 + 0  -- pt a se conforma sablonului
         in do { 
                 writeSM a1;
	         return (0, [] )
                } 
       }

--}
skip ::  SM (Int,[Instr Int])
skip  = return (0, [] )
                
mainD1 = compile skip


{--compile (iif (variable 'x') (skip) (skip))

--}

-- Compilarea operatiei de intrare read


readv s                        -- read este utilizat
  = do { a0 <- readSM;
         let a1 = a0 +1 
             adr = symtable (s)   
         in do { 
                 writeSM a1;
	         return (1, [Instr "IN_INT " adr] )
                }
       }


mainD2 = compile (readv 'x')

mainD3 = compile (readv 'y')

{--
Length of the code:1
0	IN_INT 120
1

*MCOMP> mainD3

Length of the code:1
0	IN_INT 121
1

*MCOMP> 
--}

-- Compilarea scrierilor seamana cu cea a atribuirilor
write exp 
  = do { a0 <-readSM;
         (l1,cod1) <- exp;
         let   a1 = a0 + l1
               a2 = a1 + 1
         in do {  writeSM a2;
                  return (l1 + 1, concat [cod1,
                                         [Instr "OUT_INT " 0] ] )
               }
        }


mainE1 =compile (write (variable 'x'))

{--
Length of the code:2
0	LD_VAR 120
1	OUT_INT 0
2
*MCOMP> mainE2
--}


mainE2 = compile (write (plus (constant  10) (constant 20)) )
{--
*MCOMP> mainE2

Length of the code:4
0	LD_INT 10
1	LD_INT 20
2	ADD 0
3	OUT_INT 0
4

*MCOMP> 
--} 
mainE3 = compile (while (gt (constant  10) (constant 20)) (attr 'x' (constant 45)))

{--
Length of the code:7
0	LD_INT 10
1	LD_INT 20
2	GT 0
3	JZ 7
4	LD_INT 45
5	STORE 120
6	JP 0
7
*MCOMP> 

--}


-- Compilarea atribuirilor
attr s exp 
  = do { a0 <-readSM;
         (l1,cod1) <- exp;
         let   a1 = a0 + l1
               a2 = a1 + 1
               adr = symtable s 
         in do {  writeSM a2;
                  return (l1 + 1, concat [cod1,
                                         [Instr "STORE " adr] ] )
               }
        }

-- compilarea sumelor
plus exp1 exp2 
  = do { a0 <-readSM;
         (l1,cod1) <- exp1;
         writeSM (a0+l1);
         (l2,cod2) <- exp2;
         let   a3 = a0 + l1 + l2 + 1
         in do {  writeSM a3;
                  return (l1+l2+1, concat [cod1,
                                           cod2, 
                                          [Instr "ADD " 0 ] ] )
               }
        }

-- Si similar a celorlalte operatii

minus exp1 exp2 
  = do { a0 <-readSM;
         (l1,cod1) <- exp1;
         writeSM (a0+l1);
         (l2,cod2) <- exp2;
         let   a3 = a0 + l1 + l2 + 1
         in do {  writeSM a3;
                  return (l1+l2+1, concat [cod1,
                                           cod2,
                                           [Instr "SUB " 0 ] ] )
               }
        }

mult exp1 exp2 
  = do { a0 <-readSM;
         (l1,cod1) <- exp1;
         writeSM (a0+l1);
         (l2,cod2) <- exp2;
         let   a3 = a0 + l1 + l2 + 1
         in do {  writeSM a3;
                  return (l1+l2+1, concat [cod1,
                                           cod2,
                                          [Instr "MULT " 0 ] ] )
               }
        }

div exp1 exp2 
  = do { a0 <-readSM;
         (l1,cod1) <- exp1;
         writeSM (a0+l1);
         (l2,cod2) <- exp2;
         let   a3 = a0 + l1 + l2 + 1
         in do {  writeSM a3;
                  return (l1+l2+1, concat [cod1,
                                           cod2,
                                          [Instr "DIV " 0 ] ] )
               }
        }

-- La fel si pentru comparatii 
eq exp1 exp2 
  = do { a0 <-readSM;
         (l1,cod1) <- exp1;
         writeSM (a0+l1);
         (l2,cod2) <- exp2;
         let   a3 = a0 + l1 + l2 + 1
         in do {  writeSM a3;
                  return (l1+l2+1, concat [cod1, 
                                           cod2,
                                          [Instr "EQ " 0 ] ] )
               }
        }

-- Si similar, celelalte comparatii

lt exp1 exp2 
  = do { a0 <-readSM;
         (l1,cod1) <- exp1;
         writeSM (a0+l1);
         (l2,cod2) <- exp2;
         let   a3 = a0 + l1 + l2 + 1
         in do {  writeSM a3;
                  return (l1+l2+1, concat [cod1,
                                           cod2,
                                          [Instr "LT " 0 ] ] )
               }
        }

gt exp1 exp2 
  = do { a0 <-readSM;
         (l1,cod1) <- exp1;
         writeSM (a0+l1);
         (l2,cod2) <- exp2;
         let   a3 = a0 + l1 + l2 + 1
         in do {  writeSM a3;
                  return (l1+l2+1, concat [cod1,
                                           cod2,
                                          [Instr "GT " 0 ] ] )
               }
        }

-- nu este neaparat nevoie de un NEQ el este inlocuibil cu un not de EQ
--  adaugat


-- Poate si lungimea totala trebuia stocata in monada nu numai adresa ?

iif  cond s1 s2  
  = do { a0 <-readSM;
         (l1,cod1) <- cond;
         writeSM (a0+l1+1);
         (l2,cod2) <- s1;
         writeSM  (a0 + l1 + 1 + l2 + 1) ;
         (l3,cod3) <- s2;
	 writeSM  (a0 + l1 + 1 + l2 + 1 +l3);
         return (l1+1+l2+1+l3 , concat [cod1, 
	                                [Instr "JZ " (a0 + l1 + 1 + l2 + 1)  ],
                                        cod2,
                                        [Instr "JP " (a0 + l1 + 1 + l2 + 1 + l3) ],
 					cod3 ] )
        
        }


-- Cod generat de instructiunea if, 
mainI1 = compile (iif (gt (constant  10) (constant 20)) (attr 'x' (constant 45))  (attr 'x' (constant 50))  )

{--
Length of the code:9
0	LD_INT 10
1	LD_INT 20
2	GT 0
3	JZ 7
4	LD_INT 45
5	STORE 120
6	JP 9
7	LD_INT 50
8	STORE 120
9
*MCOMP> 
--}

-- Alt cod generat de instructiunea iif, corect
mainI2 = compile (iif (variable 'x') (attr 'x' (constant 1)) (attr 'x' (constant 2 )) ) 

{--
*MCOMP> mainI2

Length of the code:7
0	LD_VAR 120
1	JZ 5
2	LD_INT 1
3	STORE 120
4	JP 7
5	LD_INT 2
6	STORE 120
7
*MCOMP> 
--}

-- Alt cod generat de instructiunea iif, corect
mainI3 = compile (iif (gt (variable 'x')(constant 0)) (attr 'x' (constant 1)) (attr 'x' (constant 2 )) ) 

{--
*MCOMP> mainI2
Length of the code:9
0	LD_VAR 120
1	LD_INT 0
2	GT 0
3	JZ 7
4	LD_INT 1
5	STORE 120
6	JP 9
7	LD_INT 2
8	STORE 120
9
*MCOMP> 
--}


-- Compilarea secventelor
-- Adresa intermediara de dupa cod 1 trebuie salvata in stare

sequ s1 s2                      -- seq e rezervat
  = do { a0 <-readSM;
         (l1,cod1) <- s1;
         let a2 = a0 +l1
         in do  
              { writeSM a2;    -- the begining of the second code should be stored in state
                a2 <-readSM;   -- putin redundant aici
		(l2,cod2) <- s2;
                let   a4 = a2 +  l2 
                in do {  writeSM a4;
                      return (l1+l2 , concat [cod1, cod2] )
                      }
              }
        }


mainW = compile (sequ (attr 'x' (constant 45))  (attr 'x' (constant 50))  )

{--*MCOMP> mainW

Length of the code:4
0	LD_INT 45
1	STORE 120
2	LD_INT 50
3	STORE 120
4*MCOMP> 
--}

mainW2 = compile (sequ (attr 'x' (constant 45))  (attr 'y' (constant 50))  )

{--*MCOMP> mainW2

Length of the code:4
0	LD_INT 45
1	STORE 120
2	LD_INT 50
3	STORE 121
4*MCOMP> 
--}

-- Bucla While
-- Instructiunile JP generate de while sunt corecte

while cond s1 
  = do { a0 <-readSM;
         (l1,cod1) <- cond;
         writeSM (a0+l1+1);
         (l2,cod2) <- s1;
         let   a2 = a0 + l1  + 1 + l2+1
         in do {  writeSM a2;
                  return (l1+l2+2, concat [cod1,
                                          [Instr "JZ " a2 ],
                                           cod2,
                                          [Instr "JP " a0]] )
               }
        }

-- Cod generat de o instr. while
mainW0 = compile (while (gt (constant  10) (constant 20)) (attr 'x' (constant 45)))
{--
Length of the code:7
0	LD_INT 10
1	LD_INT 20
2	GT 0
3	JZ 7
4	LD_INT 45
5	STORE 120
6	JP 0
7
*MCOMP> 
--}

-- Alt cod generat de o instructiune while
mainW1 = compile (while (gt (variable 'x') (constant 0)) (attr 'x' (minus (variable 'x') (constant 1)) ))
{--
Length of the code:9
0	LD_VAR 120
1	LD_INT 0
2	GT 0
3	JZ 9
4	LD_VAR 120
5	LD_INT 1
6	SUB 0
7	STORE 120
8	JP 0
9
*MCOMP> 
--}


-- Chiar daca instructiunea se scrie do,
-- nu scrieti do, confuzionati parserul care recunoaste do-notatia
-- am scris dowhile (do, instruction)
dowhile s1 cond 
  = do { a0 <-readSM;
         (l1,cod1) <- s1;
         writeSM (a0+l1);  -- era fara +1,ok
         (l2,cod2) <- cond;
         let   a2 = a0 + l1  + l2 + 1  -- erau inversate dar ok
         in do {  writeSM a2;
                  return (l1+l2+1, concat [cod1,
                                           cod2,
                                           [Instr "JNZ " a0 ] ] )
               }
        }

-- Cod generat de o instructiune do-while, OK
mainDW1 = compile (dowhile  (attr   'x' (constant 45))  (gt (constant  10) (constant 20) ) )
{--
*MCOMP> mainDW1 

Length of the code:6
0	LD_INT 45
1	STORE 120
2	LD_INT 10
3	LD_INT 20
4	GT 0
5	JNZ 0
6*MCOMP> 
--}

-- Alt cod generat de dowhile, OK
mainDW2 = compile (dowhile  (attr 'x' (minus (variable 'x') (constant 1)) ) (gt (variable 'x') (constant 0)) )
{--
Length of the code:8
0	LD_VAR 120
1	LD_INT 1
2	SUB 0
3	STORE 120
4	LD_VAR 120
5	LD_INT 0
6	GT 0
7	JNZ 0
8
*MCOMP> 
--}

mainDW3 
  = compile 
  (dowhile  (sequ (attr 'x' (minus (variable 'x') (constant 1)) ) 
                  (while (gt (variable 'y') (constant 0)) (attr 'y' (minus (variable 'y') (constant 1)) )))
            (gt (variable 'x') (constant 0)) )
{--
*MCOMP> mainDW3

Length of the code:17
0	LD_VAR 120     -- 'x'     
1	LD_INT 1       -- 1
2	SUB 0          -- x-1 
3	STORE 120      -- x:=x-1              
4	LD_VAR 121     -- y  -- intrare in bucala interioara !!    -- optimizare posibila ?
5	LD_INT 0       -- 0
6	GT 0           -- y > 0
7	JZ 13          -- fals -> iese din bucla la adr 13 
8	LD_VAR 121
9	LD_INT 1
10	SUB 0
11	STORE 121
12	JP 4
13	LD_VAR 120
14	LD_INT 0
15	GT 0
16	JNZ 0
17*MCOMP> 

--}

-- Nota: diferenta intre cele 2 variante de writeSm (a0+l1 ) si  writeSm (a0+l1 )
-- s-ar vedea numai daca insesi conditia lui dop-while ar fi o alta bucla while !!
-- pentru ca aici, imediat dupa codul din do-while, ar trebui sa revina bucla interioara
-- a conditiei !!

mainDW4 
  = compile 
  (dowhile  (attr 'x' (minus (variable 'x') (constant 1)) ) 
            (while (gt (variable 'y') (constant 0)) (attr 'y' (minus (variable 'y') (constant 1)) )) )

{-- Era gresit cu: +1
*MCOMP> mainDW4

Length of the code:14
0	LD_VAR 120   -- x:=x-1
1	LD_INT 1
2	SUB 0
3	STORE 120    -- final atribuire
4	LD_VAR 121   
5	LD_INT 0     -- punct de intrare gresit in mijlocul comparatiei
6	GT 0
7	JZ 14        -- ar fi fost 13 daca uitam pe +1
8	LD_VAR 121
9	LD_INT 1
10	SUB 0
11	STORE 121
12	JP 5         -- ar fi fost 4 daca uitam pe +1
13	JNZ 0
14*MCOMP> 
--}

{-- fara +1 era bine !!
*MCOMP> mainDW4

Length of the code:14
0	LD_VAR 120
1	LD_INT 1
2	SUB 0
3	STORE 120
4	LD_VAR 121         -- punct de intrare, inceput al comparatiei 
5	LD_INT 0
6	GT 0
7	JZ 13              -- era 14 cu +1
8	LD_VAR 121
9	LD_INT 1
10	SUB 0
11	STORE 121
12	JP 4               -- era 5 cu +1
13	JNZ 0
14*MCOMP> 


--}

-- Compilarea programului e la fel ca a secventei
-- deci cel mai simplu e sa o copiem de la sequ
program s1 s2 = sequ  s1 s2 


--  OK v04 si v05
mainX = compile 
   (program (datas 2) 
            (sequ (attr 'x' (constant 45))  
                  (attr 'x' (constant 50))  
            )
   )

{--
*MCOMP> mainX

Length of the code:5
0	DATA 1
1	LD_INT 45
2	STORE 120
3	LD_INT 50
4	STORE 120
5*MCOMP> 

--}

-- Compiling an I/O program: OK v04 si v05

main0 = compile 
   (program (datas 2) 
            (sequ (readv 'x')
                  (write (variable 'x')  )  
            )
   )

{-- 
*MCOMP> main0

Length of the code:4
0	DATA 1
1	IN_INT 120
2	LD_VAR 120
3	OUT_INT 0
4*MCOMP>
--}

--   De compilat 
main = compile 
   (program (datas 2) 
            (sequ (readv 'n')
             (sequ (iif (gt (variable 'n') (constant 10) )
                        (attr 'x' (constant 1))
                        (skip) 
                   )
                   (skip)     
            ))
   )


 
{-- ----
Length of the code:9
0	DATA 1
1	IN_INT 110
2	LD_VAR 110
3	LD_INT 10
4	GT 0
5	JZ 9
6	LD_INT 1
7	STORE 120
8	JP 9
9*MCOMP> 

--}

-- OK
main1 = compile 
                   (iif (gt (variable 'n') (constant 10) )
                        (attr 'x' (constant 1))
                        (skip) 
                   )
{--
7*MCOMP> main1

Length of the code:7
0	LD_VAR 110
1	LD_INT 10
2	GT 0
3	JZ 7
4	LD_INT 1
5	STORE 120
6	JP 7
7*MCOMP> 
--}


-- Ok 
main2 = compile 
             (sequ (iif (gt (variable 'n') (constant 10) )
                        (attr 'x' (constant 1))
                        (skip) 
                   )
                   (skip)     
             )

{--
Length of the code:7
0	LD_VAR 110
1	LD_INT 10
2	GT 0
3	JZ 7
4	LD_INT 1
5	STORE 120
6	JP 7
7*MCOMP> 

--}

-- ??
main3 = compile 
            (sequ (readv 'x')
             (sequ (iif (gt (variable 'n') (constant 10) )
                        (attr 'x' (constant 1))
                        (skip) 
                   )
                   (skip)     
            ))
{----
Length of the code:8
0	IN_INT 120
1	LD_VAR 110
2	LD_INT 10
3	GT 0
4	JZ 8
5	LD_INT 1
6	STORE 120
7	JP 8
8*MCOMP> 

--}


-- ?
main3b = compile 
            (sequ (constant 111)
             (sequ (iif (gt (variable 'n') (constant 10) )
                        (attr 'x' (constant 1))
                        (skip) 
                   )
                   (skip)     
            ))
{--
8*MCOMP> main3b

Length of the code:8
0	LD_INT 111
1	LD_VAR 110
2	LD_INT 10
3	GT 0
4	JZ 8
5	LD_INT 1
6	STORE 120
7	JP 8
8*MCOMP> 
--}


 -- Programul din cartea Compiler Construction using Flex and Bison, fig 7.1
 -- pp 75 la editia 2006 Edusoft
main4 = putStr . prettyprint $ runSM 0
   (program (datas 2) 
            (sequ (readv 'n')
             (sequ (iif (lt (variable 'n') (constant 10) )
                        (attr 'x' (constant 1))
                        (skip) 
                   )
                   (while (lt (variable 'n')  (constant 10))
                          (sequ (attr 'x' (mult (constant 5)(variable 'x')) 
                                )    
                                (attr 'n' (plus (variable 'n') (constant 1))
                                ) 
                          )  
                   )   
             )
            )


   )

{--
*MCOMP> main4

Length of the code:22
0	DATA 1
1	IN_INT 110
2	LD_VAR 110
3	LD_INT 10
4	LT 0
5	JZ 9
6	LD_INT 1
7	STORE 120
8	JP 9
9	LD_VAR 110
10	LD_INT 10
11	LT 0
12	JZ 22
13	LD_INT 5
14	LD_VAR 120
15	MULT 0
16	STORE 120
17	LD_VAR 110
18	LD_INT 1
19	ADD 0
20	STORE 110
21	JP 9
22*MCOMP> 


--}