Difference between revisions of "IO inside"

guarantees

putStrLn "Press any key to begin formatting"

class Monad m where
    unit :: a -> m a
    bind :: m a -> (a -> m b) -> m b

getchar :: Char

get2chars = [getchar, getchar]

get2chars = let x = getchar in [x, x]  -- this should be a legitimate optimisation!

getchar :: Int -> Char

get2chars = [getchar 1, getchar 2]

getchar :: Int -> (Char, Int)

get2chars _ = [a, b]  where (a, i) = getchar 1
                            (b, _) = getchar i

get2chars i0 = [a, b]  where (a, i1) = getchar i0
                             (b, i2) = getchar i1

get4chars = [get2chars 1, get2chars 2]  -- order of calls to 'get2chars' isn't defined

get2chars :: Int -> (String, Int)

get4chars i0 = (a++b)  where (a, i1) = get2chars i0
                             (b, i2) = get2chars i1

get2chars :: Int -> (String, Int)
get2chars i0 = ([a, b], i2)  where (a, i1) = getchar i0
                                   (b, i2) = getchar i1

get2chars :: Int -> (String, Int)
get2chars i0 = ([a, b], i2)  where (a, i1) = getchar i2  -- this might take a while...
                                   (b, i2) = getchar i1

getchar   :: IO' Char

get2chars :: IO' String
get2chars =  getchar `bind` \a ->
             getchar `bind` \b ->
             unit [a, b]

get4chars :: IO' String
get4chars =  get2chars `bind` \a ->
             get2chars `bind` \b ->
             unit (a++b)

type IO' a =  Int -> (a, Int)  -- IO' is kept private!

unit       :: a -> IO' a
unit x     =  MkIO' $ \i0 -> (x, i0)

bind       :: IO' a -> (a -> IO' b) -> IO' b
bind m k   =  MkIO' $ \i0 -> let (x, i1) =  m i0 in
                             let (y, i2) =  (case f x of w -> w i1) in
                             (y, i2)

getchar    :: IO' Char
getchar    =  \i0 -> case primGetChar i0 of (i1, c) -> (c, i1)

primitive primGetChar :: Int -> (Int, Char)  -- also kept private!

main :: RealWorld -> ((), RealWorld)

type IO a  =  RealWorld -> (a, RealWorld)

getChar :: RealWorld -> (Char, RealWorld)

main :: RealWorld -> ((), RealWorld)
main world0 = let (a, world1) = getChar world0
                  (b, world2) = getChar world1
              in ((), world2)

main = do a <- ask "What is your name?"
          b <- ask "How old are you?"
          return ()

ask s = do putStr s
           readLn

when :: Bool -> IO () -> IO ()
when condition action world =
    if condition
      then action world
      else ((), world)

  main = do putStr "Hello!"

  main = putStr "Hello!"

main = do putStr "What is your name?"
          putStr "How old are you?"
          putStr "Nice day!"

main = (putStr "What is your name?")
       >> ( (putStr "How old are you?")
            >> (putStr "Nice day!")
          )

(>>) :: IO a -> IO b -> IO b
(action1 >> action2) world0 =
   let (a, world1) = action1 world0
       (b, world2) = action2 world1
   in (b, world2)

action1 >> action2 = action
  where
    action world0 = let (a, world1) = action1 world0
                        (b, world2) = action2 world1
                    in (b, world2)

main = do a <- readLn
          print a

main = readLn
       >>= (\a -> print a)

(>>=) :: IO a -> (a -> IO b) -> IO b
(action >>= reaction) world0 =
   let (a, world1) = action world0
       (b, world2) = reaction a world1
   in (b, world2)

type IO a  =  RealWorld -> (a, RealWorld)

main = readLn >>= print

 do x <- action1
    action2

 action1 >>= (\x -> action2)

main = do putStr "What is your name?"
          a <- readLn
          putStr "How old are you?"
          b <- readLn
          print (a,b)

main = putStr "What is your name?"
       >> readLn
       >>= \a -> putStr "How old are you?"
       >> readLn
       >>= \b -> print (a,b)

return :: a -> IO a
return a world0  =  (a, world0)

main = do a <- readLn
          return (a*2)

main = do a <- readLn
          when (a>=0) $ do
              return ()
          print "a is negative"

main = do a <- readLn
          if (a>=0)
            then return ()
            else print "a is negative"

main = do a <- readLn
          if (a>=0) then return ()
            else do
          print "a is negative"
          ...

liftM :: (a -> b) -> (IO a -> IO b)

liftM f action = do x <- action
                    return (f x)

main = do let a0 = readVariable varA
              _  = writeVariable varA 1
              a1 = readVariable varA
          print (a0, a1)

import Data.IORef
main = do varA <- newIORef 0  -- Create and initialize a new variable
          a0 <- readIORef varA
          writeIORef varA 1
          a1 <- readIORef varA
          print (a0, a1)

import Data.Array.IO
main = do arr <- newArray (1,10) 37 :: IO (IOArray Int Int)
          a <- readArray arr 1
          writeArray arr 1 64
          b <- readArray arr 1
          print (a, b)

rand :: IO Int

foreign import ccall
   sin :: Double -> Double

foreign import ccall
   tell :: Int -> IO Int

main world0 = let get2chars = getChar >> getChar
                  ((), world1) = putStr "Press two keys" world0
                  (answer, world2) = get2chars world1
              in ((), world2)

main = do let get2chars = getChar >> getChar
          putStr "Press two keys"
          get2chars
          return ()

ioActions :: [IO ()]
ioActions = [(print "Hello!"),
             (putStr "just kidding"),
             (getChar >> return ())
            ]

ioActions :: [RealWorld -> ((), RealWorld)]

main = do head ioActions
          ioActions !! 1
          last ioActions

sequence_ :: [IO a] -> IO ()
sequence_ [] = return ()
sequence_ (x:xs) = do x
                      sequence_ xs

main = sequence_ ioActions

while :: IO Bool -> IO ()
while action = ???

main = do let a = sequence ioActions
              b = when True getChar
              c = getChar >> getChar
          putStr "These let-bindings are not executed!"

readi h i = do hSeek h AbsoluteSeek i
               hGetChar h

readfilei :: String -> IO (Integer -> IO Char)
readfilei name = do h <- openFile name ReadMode
                    return (readi h)

readfilei name = do h <- openFile name ReadMode
                    let readi h i = do hSeek h AbsoluteSeek i
                                       hGetChar h
                    return (readi h)

readfilei name = do h <- openFile name ReadMode
                    let readi i = do hSeek h AbsoluteSeek i
                                     hGetChar h
                    return readi

main = do myfile <- readfilei "test"
          a <- myfile 0
          b <- myfile 1
          print (a,b)

memoryAllocator :: Ptr a -> Int -> IO (Int -> IO (Ptr b),
                                       Ptr c -> IO ())

memoryAllocator buf size = do ......
                              let alloc size = do ...
                                                  ...
                                  free ptr = do ...
                                                ...
                              return (alloc, free)

memoryAllocator buf size = do start <- newIORef buf
                              end <- newIORef (buf `plusPtr` size)
                              ...

      ...
      let alloc size = do addr <- readIORef start
                          writeIORef start (addr `plusPtr` size)
                          return addr

      let free ptr = do writeIORef start ptr

main = do buf1 <- mallocBytes (2^16)
          buf2 <- mallocBytes (2^20)
          (alloc1, free1) <- memoryAllocator buf1 (2^16)
          (alloc2, free2) <- memoryAllocator buf2 (2^20)
          ptr11 <- alloc1 100
          ptr21 <- alloc2 1000
          free1 ptr11
          free2 ptr21
          ptr12 <- alloc1 100
          ptr22 <- alloc2 1000

data Figure = Figure { draw :: IO (),
                       move :: Displacement -> IO ()
                     }

type Displacement = (Int, Int)  -- horizontal and vertical displacement in points

circle    :: Point -> Radius -> IO Figure
rectangle :: Point -> Point -> IO Figure

type Point = (Int, Int)  -- point coordinates
type Radius = Int        -- circle radius in points

circle center radius = do
    let description = "  Circle at "++show center++" with radius "++show radius
    return $ Figure { draw = putStrLn description }

rectangle from to = do
    let description = "  Rectangle "++show from++"-"++show to)
    return $ Figure { draw = putStrLn description }

drawAll :: [Figure] -> IO ()
drawAll figures = do putStrLn "Drawing figures:"
                     mapM_ draw figures

main = do figures <- sequence [circle (10,10) 5,
                               circle (20,20) 3,
                               rectangle (10,10) (20,20),
                               rectangle (15,15) (40,40)]
          drawAll figures

circle center radius = do
    centerVar <- newIORef center

    let drawF = do center <- readIORef centerVar
                   putStrLn ("  Circle at "++show center
                             ++" with radius "++show radius)

    let moveF (addX,addY) = do (x,y) <- readIORef centerVar
                               writeIORef centerVar (x+addX, y+addY)

    return $ Figure { draw=drawF, move=moveF }

rectangle from to = do
    fromVar <- newIORef from
    toVar   <- newIORef to

    let drawF = do from <- readIORef fromVar
                   to   <- readIORef toVar
                   putStrLn ("  Rectangle "++show from++"-"++show to)

    let moveF (addX,addY) = do (fromX,fromY) <- readIORef fromVar
                               (toX,toY)     <- readIORef toVar
                               writeIORef fromVar (fromX+addX, fromY+addY)
                               writeIORef toVar   (toX+addX, toY+addY)

    return $ Figure { draw=drawF, move=moveF }

main = do figures <- sequence [circle (10,10) 5,
                               rectangle (10,10) (20,20)]
          drawAll figures
          mapM_ (\fig -> move fig (10,10)) figures
          drawAll figures

data Figure = Figure { draw :: IO (),
                       move :: Displacement -> IO (),
                       area :: Double,
                       origin :: IORef Point
                     }

main = print (f 2)

f 0 = "zero"
f 1 = "one"

main = print (head [])

main = print (1 + (error "Value that wasn't initialized or cannot be computed"))

{-# LANGUAGE ForeignFunctionInterface #-}

main = do print "Hello from main"
          c_function

haskell_function = print "Hello from haskell_function"

foreign import ccall safe "prototypes.h"
    c_function :: IO ()

foreign export ccall
    haskell_function :: IO ()

#include <stdio.h>
#include "prototypes.h"

void c_function (void)
{
  printf("Hello from c_function\n");
  haskell_function();
}

extern void c_function (void);
extern void haskell_function (void);

 ghc --make main.hs vile.c

 ghc -c vile.c
 ghc --make main.hs vile.o

#ifdef __cplusplus
extern "C" {
#endif

extern void c_function (void);
extern void haskell_function (void);

#ifdef __cplusplus
}
#endif

 ghc --make main.hs vile.cpp

foreign import ccall safe "prototypes.h CFunction"
    c_function :: IO ()

foreign export ccall "HaskellFunction"
    haskell_function :: IO ()

foreign import stdcall unsafe "windows.h SetFileApisToOEM"
  setFileApisToOEM :: IO ()

import Foreign.C.Types (               -- equivalent to the following C type:
         CChar, CUChar,                --  char/unsigned char
         CShort, CUShort,              --  short/unsigned short
         CInt, CUInt, CLong, CULong,   --  int/unsigned/long/unsigned long
         CFloat, CDouble...)           --  float/double

foreign import ccall unsafe "math.h"
    c_sin :: CDouble -> CDouble

-- |Type-conversion wrapper around c_sin
sin :: Double -> Double
sin = fromRational . c_sin . toRational

import Foreign.C.String (   -- representation of strings in C
         CString,           -- = Ptr CChar
         CStringLen)        -- = (Ptr CChar, Int)

foreign import ccall unsafe "string.h"
    c_strlen :: CString -> IO CSize     -- CSize defined in Foreign.C.Types and is equal to size_t

-- |Type-conversion wrapper around c_strlen 
strlen :: String -> Int
strlen = ....

readContents :: Filename -> String

unsafePerformIO :: IO a -> a

unsafePerformIO :: (RealWorld -> (a, RealWorld)) -> a
unsafePerformIO action = let (a, world1) = action createNewWorld
                         in a

one :: Integer
one = unsafePerformIO $ do var <- newIORef 0
                           modifyIORef var (+1)
                           readIORef var

unsafePerformIO action = let (a,world1) = action createNewWorld
                         in (world1 `seq` a)

-- | Just like unsafePerformIO, but we inline it. Big performance gains as
-- it exposes lots of things to further inlining
{-# INLINE inlinePerformIO #-}
inlinePerformIO action = let (a, world1) = action createNewWorld
                         in (world1 `seq` a)
#endif

write :: Int -> (Ptr Word8 -> IO ()) -> Put ()
write !n body = Put $ \c buf@(Buffer fp o u l) ->
  if n <= l
    then write</code> c fp o u l
    else write</code> (flushOld c n fp o u) (newBuffer c n) 0 0 0

  where {-# NOINLINE write</code> #-}
        write</code> c !fp !o !u !l =
          -- warning: this is a tad hardcore
          inlinePerformIO
            (withForeignPtr fp
              (\p -> body $! (p `plusPtr` (o+u))))
          `seq` c () (Buffer fp o (u+n) (l-n))

word8 w = write 1 (\p -> poke p w)

unsafeInterleaveIO :: IO a -> IO a

unsafeInterleaveIO   :: IO a -> IO a
unsafeInterleaveIO a =  return (unsafePerformIO a)

do let c = unsafePerformIO getChar
   do_proc c

do let s = [unsafePerformIO getChar, unsafePerformIO getChar, unsafePerformIO getChar]
   do_proc s

do str <- unsafeInterleaveIO myGetContents

myGetContents = do
   c <- getChar
   s <- unsafeInterleaveIO myGetContents
   return (c:s)

myGetContents = do
   c <- replicateM 512 getChar
   s <- unsafeInterleaveIO myGetContents
   return (c++s)

myGetContents = unsafeInterleaveIO $ do
   c <- replicateM 512 getChar
   s <- myGetContents
   return (c++s)

runST :: (forall s . ST s a) -> a

newSTRef :: a -> ST s (STRef s a)
newArray_ :: Ix i => (i, i) -> ST s (STArray s i e)

makeSTRef :: a -> STRef s a
makeSTRef a = runST (newSTRef a)

stToIO :: ST RealWorld a -> IO a

oneST :: ST s Int -- note that this works correctly for any s
oneST = do var <- newSTRef 0
           modifySTRef var (+1)
           readSTRef var

one :: Int
one = runST oneST

newtype IO a = IO (State# RealWorld -> (# State# RealWorld, a #))

data World = World
newtype IO a = IO (World -> Either IOError a)

Difference between revisions of "IO inside"

Revision as of 00:40, 12 December 2020

Contents

Haskell is a pure language

What is a monad?

Welcome to the `RealWorld`, baby

`(>>=)` and `do` notation

Mutable data (references, arrays, hash tables...)

I/O actions as values

Example: a list of I/O actions

Example: returning an I/O action as a result

Example: a memory allocator generator

Example: emulating OOP with record types

Exception handling (under development)

Interfacing with C/C++ and foreign libraries (under development)

Calling functions

All about the `foreign` declaration

Marshalling simple types

Memory management

Marshalling strings

Marshalling composite types

Dynamic calls

DLLs

The dark side of the I/O monad

unsafePerformIO

inlinePerformIO

unsafeInterleaveIO

A safer approach: the ST monad

Welcome to the machine: the actual GHC implementation

The Yhc/nhc98 implementation

Further reading

To-do list

Navigation menu

Search

@@ Line 254: / Line 254: @@
 This simplifies the gluing of several I/O actions together.
-You don't need to use <code>do</code> for just one statement; for example,
+You don't need to use <code>do</code> for just one action; for example,
 <haskell>
@@ Line 266: / Line 266: @@
 </haskell>
-Let's examine how to desugar a <code>do</code> with multiple statements in the
+Let's examine how to desugar a <code>do</code>-expression with multiple actions in the
 following example:
@@ Line 275: / Line 275: @@
 </haskell>
-The <code>do</code> statement here just joins several I/O actions that should be
+The <code>do</code>-expression here just joins several I/O actions that should be
 performed sequentially. It's translated to sequential applications
 of one of the so-called "binding operators", namely <code>(>>)</code>:
@@ Line 414: / Line 414: @@
 <code>return</code> is to "lift" some value (of type <code>a</code>) into the result of
 a whole action (of type <code>IO a</code>) and therefore it should generally
-be used only as the last executed statement of some I/O sequence. For example try to
+be used only as the last executed action of some I/O sequence. For example try to
 translate the following definition into the corresponding low-level code:
@@ Line 424: / Line 424: @@
 </haskell>
-and you will realize that the <code>print</code> statement is executed even for non-negative values of <code>a</code>. If you need to escape from the middle of an I/O definition, you can use the <code>if</code> statement:
+and you will realize that the <code>print</code> call is executed even for non-negative values of <code>a</code>. If you need to escape from the middle of an I/O definition, you can use an <code>if</code> expression:
 <haskell>
@@ Line 443: / Line 443: @@
 </haskell>
-that may be useful for escaping from the middle of a longish <code>do</code> statement.
+that may be useful for escaping from the middle of a longish <code>do</code>-expression.
 Last exercise: implement a function <code>liftM</code> that lifts operations on
@@ Line 584: / Line 584: @@
 involving <code>putStr</code>. But what's the execution order? It's not defined
 by the order of the <code>let</code> bindings, it's defined by the order of processing
-"world" values! You can arbitrarily reorder the binding statements - the execution order will be defined by the data dependency with respect to the
+"world" values! You can arbitrarily reorder those local bindings - the execution order will be defined by the data dependency with respect to the
 "world" values that get passed around. Let's see what this <code>main</code> looks like in the <code>do</code> notation:
@@ Line 594: / Line 594: @@
 </haskell>
-As you can see, we've eliminated two of the <code>let</code> bindings and left only the one defining <code>get2chars</code>. The non-<code>let</code> statements are executed in the exact order in which they're written, because they pass the "world" value from statement to statement as we described above.  Thus, this version of the function is much easier to understand because we don't have to mentally figure out the data dependency of the "world" value.
+As you can see, we've eliminated two of the <code>let</code> bindings and left only the one defining <code>get2chars</code>. The non-<code>let</code> actions are executed in the exact order in which they're written, because they pass the "world" value from action to action as we described above.  Thus, this version of the function is much easier to understand because we don't have to mentally figure out the data dependency of the "world" value.
 Moreover, I/O actions like <code>get2chars</code> can't be executed directly
@@ Line 634: / Line 634: @@
 </haskell>
-Looks strange, right? Really, any I/O action that you write in a <code>do</code>
+Looks strange, right? Really, any I/O action that you write in a <code>do</code>-expression (or use as a parameter for the <code>(>>)</code>/<code>(>>=)</code> operators) is an expression
-statement (or use as a parameter for the <code>(>>)</code>/<code>(>>=)</code> operators) is an expression
 returning a result of type <code>IO a</code> for some type <code>a</code>. Typically, you use some function that has the type <code>x -> y -> ... -> IO a</code> and provide all the x, y, etc. parameters. But you're not limited to this standard scenario -
 don't forget that Haskell is a functional language and you're free to
@@ Line 687: / Line 686: @@
               b = when True getChar
               c = getChar >> getChar
-          putStr "These let-statements are not executed!"
+          putStr "These let-bindings are not executed!"
 </haskell>
@@ Line 830: / Line 829: @@
 </haskell>
-The constructor of each figure's type should just return a Figure record:
+The constructor of each figure's type should just return a <code>Figure</code> record:
 <haskell>
@@ Line 841: / Line 840: @@
 We will "draw" figures by just printing their current parameters.
-Let's start with a simplified implementation of the <code>circle</code> and <code>rectangle'
+Let's start with a simplified implementation of the <code>circle</code> and <code>rectangle</code>
 constructors, without actual <code>move</code> support:
@@ Line 875: / Line 874: @@
 type of this variable will be <code>IORef Point</code>. This variable should
 be created in the figure constructor and manipulated in I/O operations (closures) enclosed in
-the Figure record:
+the <code>Figure</code> record:
 <haskell>
@@ Line 917: / Line 916: @@
 It's important to realize that we are not limited to including only I/O actions
-in a record that's intended to simulate a C++/Java-style interface. The record can also include values, <code>IORef</code>s, pure functions - in short, any type of data. For example, we can easily add to the Figure interface fields for area and origin:
+in a record that's intended to simulate a C++/Java-style interface. The record can also include values, <code>IORef</code>s, pure functions - in short, any type of data. For example, we can easily add to the <code>Figure</code> interface fields for area and origin:
 <haskell>
@@ Line 929: / Line 928: @@
 == Exception handling (under development) ==
-Although Haskell provides a set of exception raising/handling features comparable to those in popular OOP languages (C++, Java, C#), this part of the language receives much less attention. This is for two reasons. First, you just don't need to worry as much about them - most of the time it just works "behind the scenes". The second reason is that Haskell, lacking OOP inheritance, doesn't allow the programmer to easily subclass exception types, therefore limiting flexibility of exception handling.
+Although Haskell provides a set of exception raising/handling features comparable to those in popular OOP languages (C++, Java, C#), this part of the language receives much less attention. This is for two reasons:
+* you just don't need to worry as much about them - most of the time it just works "behind the scenes".
+* Haskell, lacking OOP-style inheritance, doesn't allow the programmer to easily subclass exception types, therefore limiting the flexibility of exception handling.
 The Haskell RTS raises more exceptions than traditional languages - pattern match failures, calls with invalid arguments (such as <code>head []</code>) and computations whose results depend on special values <code>undefined</code> and <code>error "...."</code> all raise their own exceptions:
@@ Line 951: / Line 954: @@
 </haskell>
-This allows to write programs in much more error-prone way.
+This allows the writing of programs in a much more error-prone way.
 == Interfacing with C/C++ and foreign libraries (under development) ==
-While Haskell is great at algorithm development, speed isn't its best side. We can combine the best of both worlds, though, by writing speed-critical parts of program in C and rest in Haskell. We just need a way to call C functions from Haskell and vice versa, and to marshal data between two worlds.
+While Haskell is great at algorithm development, speed isn't its best side. We can combine the best of both worlds, though, by writing speed-critical parts of program in C and the rest in Haskell. We just need a way to call C functions from Haskell and vice versa, and to marshal data between both worlds.
-We also need to interact with C world for using Windows/Linux APIs, linking to various libraries and DLLs. Even interfacing with other languages requires to go through C world as "common denominator". [https://www.haskell.org/onlinereport/haskell2010/haskellch8.html Chapter 8 of the Haskell 2010 report] provides a complete description of interfacing with C.
+We also need to interact with the C world for using Windows/Linux APIs, linking to various libraries and DLLs. Even interfacing with other languages often requires going through C world as a "common denominator". [https://www.haskell.org/onlinereport/haskell2010/haskellch8.html Chapter 8 of the Haskell 2010 report] provides a complete description of interfacing with C.
 We will learn FFI via a series of examples. These examples include C/C++ code, so they need C/C++ compilers to be installed, the same will be true if you need to include code written in C/C++ in your program (C/C++ compilers are not required when you just need to link with existing libraries providing APIs with C calling convention). On Unix (and Mac OS?) systems, the system-wide default C/C++ compiler is typically used by GHC installation. On Windows, no default compilers exist, so GHC is typically shipped with a C compiler, and you may find on the download page a GHC distribution bundled with C and C++ compilers. Alternatively, you may find and install a GCC/MinGW version compatible with your GHC installation.
@@ Line 969: / Line 972: @@
 ;[[HSFFIG]]
-:Haskell FFI Binding Modules Generator (HSFFIG) is a tool that takes a C library include file (.h) and generates Haskell Foreign Functions Interface import declarations for items (functions, structures, etc.) the header defines.
+:Haskell FFI Binding Modules Generator (HSFFIG) is a tool that takes a C library header (".h") and generates Haskell Foreign Functions Interface import declarations for items (functions, structures, etc.) the header defines.
 ;[http://quux.org/devel/missingpy MissingPy]
-:MissingPy is really two libraries in one. At its lowest level, MissingPy is a library designed to make it easy to call into Python from Haskell. It provides full support for interpreting arbitrary Python code, interfacing with a good part of the Python/C API, and handling Python objects. It also provides tools for converting between Python objects and their Haskell equivalents. Memory management is handled for you, and Python exceptions get mapped to Haskell Dynamic exceptions. At a higher level, MissingPy contains Haskell interfaces to some Python modules.
+:MissingPy is really two libraries in one. At its lowest level, MissingPy is a library designed to make it easy to call into Python from Haskell. It provides full support for interpreting arbitrary Python code, interfacing with a good part of the Python/C API, and handling Python objects. It also provides tools for converting between Python objects and their Haskell equivalents. Memory management is handled for you, and Python exceptions get mapped to Haskell <code>Dynamic</code> exceptions. At a higher level, MissingPy contains Haskell interfaces to some Python modules.
 ;[[HsLua]]
@@ Line 979: / Line 982: @@
 === Calling functions ===
-First, we will learn how to call C functions from Haskell and Haskell functions from C. The first example consists of three files:
+We begin by learning how to call C functions from Haskell and Haskell functions from C. The first example consists of three files:
-main.hs:
+''main.hs:''
 <haskell>
 {-# LANGUAGE ForeignFunctionInterface #-}
@@ Line 997: / Line 1,000: @@
 </haskell>
-evil.c:
+''vile.c:''
 <haskell>
 #include <stdio.h>
@@ Line 1,009: / Line 1,012: @@
 </haskell>
-prototypes.h:
+''prototypes.h:''
 <haskell>
 extern void c_function (void);
@@ Line 1,016: / Line 1,019: @@
 It may be compiled and linked in one step by ghc:
-  ghc --make main.hs evil.c
+  ghc --make main.hs vile.c
-Or, you may compile C module(s) separately and link in ".o" files (this may be preferable if you use <code>make</code> and don't want to recompile unchanged sources; ghc's --make option provides smart recompilation only for .hs files):
+Or, you may compile C module(s) separately and link in ".o" files (this may be preferable if you use <code>make</code> and don't want to recompile unchanged sources; ghc's <code>--make</code> option provides smart recompilation only for ".hs" files):
-  ghc -c evil.c
+  ghc -c vile.c
-  ghc --make main.hs evil.o
+  ghc --make main.hs vile.o
 You may use gcc/g++ directly to compile your C/C++ files but I recommend to do linking via ghc because it adds a lot of libraries required for execution of Haskell code. For the same reason, even if your <code>main</code> routine is written in C/C++, I recommend calling it from the Haskell function <code>main</code> - otherwise you'll have to explicitly init/shutdown the GHC RTS (run-time system).
-We use the <code>foreign import</code> specification to import foreign routines into our Haskell world, and <code>foreign export</code> to export Haskell routines into the external world. Note that the import statement creates a new Haskell symbol (from the external one), while the export statement uses a Haskell symbol previously defined. Technically speaking, both types of statements create a wrapper that converts the names and calling conventions from C to Haskell or vice versa.
+We use the <code>foreign import</code> declaration to import foreign routines into our Haskell world, and <code>foreign export</code> to export Haskell routines into the external world. Note that <code>import</code> creates a new Haskell symbol (from the external one), while <code>export</code> uses a Haskell symbol previously defined. Technically speaking, both types of declarations create a wrapper that converts the names and calling conventions from C to Haskell or vice versa.
-=== All about the <code>foreign</code> statement ===
+=== All about the <code>foreign</code> declaration ===
-The <code>ccall</code> specifier in foreign statements means the use of C (not C++ !) calling convention. This means that if you want to write the external function in C++ (instead of C) you should add <code>export "C"</code> specification to its declaration - otherwise you'll get linking errors. Let's rewrite our first example to use C++ instead of C:
+The <code>ccall</code> specifier in foreign declarations means the use of the C (not C++ !) calling convention. This means that if you want to write the external function in C++ (instead of C) you should add <code>export "C"</code> specification to its declaration - otherwise you'll get linking errors. Let's rewrite our first example to use C++ instead of C:
-prototypes.h:
+''prototypes.h:''
 <haskell>
 #ifdef __cplusplus
@@ Line 1,046: / Line 1,049: @@
 Compile it via:
-  ghc --make main.hs evil.cpp
+  ghc --make main.hs vile.cpp
-where "evil.cpp" is just a renamed copy of "evil.c" from the first example. Note that the new "prototypes.h" is written to allow compiling it both as C and C++ code. When it's included from evil.cpp, it's compiled as C++ code. When GHC compiles "main.hs" via the C compiler (enabled by -fvia-C option), it also includes prototypes.h but compiles it in C mode. It's why you need to specify ".h" files in <code>foreign</code> declarations - depending on which Haskell compiler you use, these files may be included to check consistency of C and Haskell declarations.
+where "vile.cpp" is just a renamed copy of "vile.c" from the first example. Note that the new "prototypes.h" is written to allow compiling it both as C and C++ code. When it's included from "vile.cpp", it's compiled as C++ code. When GHC compiles "main.hs" via the C compiler (enabled by the <code>-fvia-C</code> option), it also includes "prototypes.h" but compiles it in C mode. It's why you need to specify ".h" files in <code>foreign</code> declarations - depending on which Haskell compiler you use, these files may be included to check consistency of C and Haskell declarations.
-The quoted part of the foreign statement may also be used to import or export a function under another name--for example,
+The quoted part of the foreign declaration may also be used to import or export a function under another name - for example,
 <haskell>
@@ Line 1,069: / Line 1,072: @@
 </haskell>
-And finally, about the <code>safe</code>/<code>unsafe</code> specifier: a C function imported with the <code>unsafe</code> keyword is called directly and the Haskell runtime is stopped while the C function is executed (when there are several OS threads executing the Haskell program, only the current OS thread is delayed). This call doesn't allow recursively entering into the Haskell world by calling any Haskell function - the Haskell RTS is just not prepared for such an event. However, <code>unsafe</code> calls are as quick as calls in C world. It's ideal for "momentary" calls that quickly return back to the caller.
+And finally, about the <code>safe</code>/<code>unsafe</code> specifier: a C function imported with the <code>unsafe</code> keyword is called directly and the Haskell runtime is stopped while the C function is executed (when there are several OS threads executing the Haskell program, only the current OS thread is delayed). This call doesn't allow recursively entering into the Haskell world by calling any Haskell function - the Haskell RTS is just not prepared for such an event. However, <code>unsafe</code> calls are as quick as calls in the C world. It's ideal for "momentary" calls that quickly return back to the caller.
-When <code>safe</code> is specified, the C function is called in safe environment - the Haskell execution context is saved, so it's possible to call back to Haskell and, if the C call takes a long time, another OS thread may be started to execute Haskell code (of course, in threads other than the one that called the C code). This has its own price, though - around 1000 CPU ticks per call.
+When <code>safe</code> is specified, the C function is called in a safe environment - the Haskell execution context is saved, so it's possible to call back to Haskell and, if the C call takes a long time, another OS thread may be started to execute Haskell code (of course, in threads other than the one that called the C code). This has its own price, though - around 1000 CPU ticks per call.
 You can read more about interaction between FFI calls and Haskell concurrency in [[#readmore|[7]]].
@@ Line 1,077: / Line 1,080: @@
 === Marshalling simple types ===
-Calling by itself is relatively easy; the real problem of interfacing languages with different data models is passing data between them. In this case, there is no guarantee that Haskell's Int is represented in memory the same way as C's int, nor Haskell's Double the same as C's double and so on. While on *some* platforms they are the same and you can write throw-away programs relying on these, the goal of portability requires you to declare imported and exported functions using special types described in the FFI standard, which are guaranteed to correspond to C types. These are:
+Calling by itself is relatively easy; the real problem of interfacing languages with different data models is passing data between them. In this case, there is no guarantee that Haskell's <code>Int</code> is represented in memory the same way as C's <code>int</code>, nor Haskell's <code>Double</code> the same as C's <code>double</code> and so on. While on ''some'' platforms they are the same and you can write throw-away programs relying on these, the goal of portability requires you to declare imported and exported functions using special types described in the FFI standard, which are guaranteed to correspond to C types. These are:
 <haskell>
@@ Line 1,128: / Line 1,131: @@
 A C array may be manipulated in Haskell as [http://haskell.org/haskellwiki/Arrays#StorableArray_.28module_Data.Array.Storable.29 StorableArray].
-There is no built-in support for marshalling C structures and using C constants in Haskell. These are implemented in c2hs preprocessor, though.
+There is no built-in support for marshalling C structures and using C constants in Haskell. These are implemented in the c2hs preprocessor, though.
-Binary marshalling (serializing) of data structures of any complexity is implemented in library Binary.
+Binary marshalling (serializing) of data structures of any complexity is implemented in the library module "Binary".
 === Dynamic calls ===
 === DLLs ===
-''because i don't have experience of using DLLs, can someone write into this section? ultimately, we need to consider the following tasks:''
+''because i don't have experience of using DLLs, can someone write into this section? Ultimately, we need to consider the following tasks:''
-* using DLLs of 3rd-party libraries (such as ziplib)
+* using DLLs of 3rd-party libraries (such as ''ziplib'')
 * putting your own C code into a DLL to use in Haskell
 * putting Haskell code into a DLL which may be called from C code
-== Dark side of I/O monad ==
+== The dark side of the I/O monad ==
-=== <code>unsafePerformIO</code> ===
+=== '''unsafePerformIO''' ===
 Programmers coming from an imperative language background often look for a way to execute I/O actions inside a pure function. But what does this mean?
@@ Line 1,151: / Line 1,154: @@
 </haskell>
-Defining readContents as a pure function will certainly simplify the code that uses it. But it will also create problems for the compiler:
+Defining <code>readContents</code> as a pure function will certainly simplify the code that uses it. But it will also create problems for the compiler:
 * This call is not inserted in a sequence of "world transformations", so the compiler doesn't know at what exact moment you want to execute this action. For example, if the file has one kind of contents at the beginning of the program and another at the end - which contents do you want to see?  You have no idea when (or even if) this function is going to get invoked, because Haskell sees this function as pure and feels free to reorder the execution of any or all pure functions as needed.
-* Attempts to read the contents of files with the same name can be factored (''i.e.'' reduced to a single call) despite the fact that the file (or the current directory) can be changed between calls. Again, Haskell considers all non-<code>IO</code> functions to be pure and feels free to omit multiple calls with the same parameters.
+* Attempts to read the contents of files with the same name can be factored (''i.e.'' reduced to a single call) despite the fact that the file (or the current directory) can be changed between calls. Again, Haskell considers all non-<code>IO</code> functions to be pure and feels free to merge multiple calls with the same parameters.
 So, implementing supposedly-pure functions that interact with the '''Real World''' is
@@ Line 1,167: / Line 1,170: @@
 </haskell>
-Let's look at its (possible) definition:
+Let's look at how it ''could'' be defined:
 <haskell>
 unsafePerformIO :: (RealWorld -> (a, RealWorld)) -> a
-unsafePerformIO action = let (a, world1) = action (createNewWorld action)
+unsafePerformIO action = let (a, world1) = action createNewWorld
                          in a
 </haskell>
-where <code>createNewWorld</code> is an internal function producing a new value of
+where <code>createNewWorld</code> is an private definition producing a new value of
 the <code>RealWorld</code> type.
 Using <code>unsafePerformIO</code>, you could easily write pure functions that do
 I/O inside. But don't do this without a real need, and remember to
-follow this rule: the compiler doesn't know that you are cheating; it still
+follow this rule:
-considers each non-<code>IO</code> function to be a pure one. Therefore, all the usual
-optimization rules can (and will!) be applied to its execution. So
+* the compiler doesn't know that you are cheating; it still considers each non-<code>IO</code> function to be a pure one. Therefore, all the usual optimization rules can (and will!) be applied to its execution.
-you must ensure that:
+So you must ensure that:
 * The result of each call depends only on its arguments.
@@ Line 1,193: / Line 1,197: @@
 * An I/O action inside an I/O definition is guaranteed to execute as long as it is (directly or indirectly) inside the <code>main</code> chain - even when its result isn't used (because the implicit "world" value it returns ''will'' be used). You directly specify the order of the action's execution inside the I/O definition. Data dependencies are simulated via the implicit "world" values that are passed from each I/O action to the next.
-* An I/O action inside <code>unsafePerformIO</code> will be performed only if the result of this operation is really used. The evaluation order is not guaranteed and you should not rely on it (except when you're sure about
+* An I/O action inside <code>unsafePerformIO</code> will be performed only if the result of this operation is really used. The evaluation order is not guaranteed and you should not rely on it (except when you're sure about whatever data dependencies may exist).
-whatever data dependencies may exist).
-I should also say that inside <code>unsafePerformIO</code> call you can organize
+I should also say that inside the <code>unsafePerformIO</code> call you can organize
 a small internal chain of I/O actions with the help of the same binding
 operators and/or <code>do</code> syntactic sugar we've seen above.  For example, here's a particularly convoluted way to compute the integer that comes after zero:
@@ Line 1,207: / Line 1,210: @@
 </haskell>
-and in this case ALL the operations in this chain will be performed as
+and in this case ''all'' the operations in this chain will be performed as
 long as the result of the <code>unsafePerformIO</code> call is needed. To ensure this,
 the actual <code>unsafePerformIO</code> implementation evaluates the "world" returned
@@ Line 1,213: / Line 1,216: @@
 <haskell>
-unsafePerformIO action = let (a,world1) = action (createNewWorld action)
+unsafePerformIO action = let (a,world1) = action createNewWorld
                          in (world1 `seq` a)
 </haskell>
@@ Line 1,220: / Line 1,223: @@
 returning the value of the second one [[#readmore|[8]]]).
-=== <code>inlinePerformIO</code> ===
+=== '''inlinePerformIO''' ===
 <code>inlinePerformIO</code> has the same definition as <code>unsafePerformIO</code> but with addition of an <code>INLINE</code> pragma:
@@ Line 1,277: / Line 1,280: @@
 Paterson who did it first in the <code>Builder</code> monoid)
-=== <code>unsafeInterleaveIO</code> ===
+=== '''unsafeInterleaveIO''' ===
 But there is an even stranger operation:
@@ Line 1,291: / Line 1,294: @@
 things will get ugly!
-So how does <code>unsafeInterleaveIO</code> get that bootleg baton? Typically by
+So how does <code>unsafeInterleaveIO</code> get that bootlegged baton? Typically by
 making a forgery of the offical one to keep for itself - it can do
 this because the I/O action <code>unsafeInterleaveIO</code> returns will be
@@ Line 1,310: / Line 1,313: @@
 One can use <code>unsafePerformIO</code> (not <code>unsafeInterleaveIO</code>) to perform I/O
-operations not in predefined order but by demand. For example, the
+operations not in some predefined order but by demand. For example, the following code:
-following code:
 <haskell>
@@ Line 1,318: / Line 1,320: @@
 </haskell>
-will perform getChar I/O call only when value of c is really required
+will perform the <code>getChar</code> I/O call only when the value of <code>c</code> is really required
-by code, i.e. it this call will be performed lazily as any usual
+by the calling code, i.e. it this call will be performed lazily like any regular Haskell computation.
-Haskell computation.
 Now imagine the following code:
@@ Line 1,329: / Line 1,330: @@
 </haskell>
-Three chars inside this list will be computed on demand too, and this
+The three characters inside this list will be computed on demand too, and this
 means that their values will depend on the order they are consumed. It
-is not that we usually need.
+is not what we usually want.
 <code>unsafeInterleaveIO</code> solves this problem - it performs I/O only on
-demand but allows to define exact <code>'internal'</code> execution order for parts
+demand but allows you to define the exact ''internal'' execution order for parts
-of your datastructure. It is why I wrote that <code>unsafeInterleaveIO</code> makes
+of your data structure. It is why I wrote that <code>unsafeInterleaveIO</code> makes
 an illegal copy of the baton:
@@ Line 1,345: / Line 1,346: @@
 </haskell>
-* <code>unsafeInterleaveIO</code> doesn't perform any action immediately, it only creates a box of type <code>a</code> which on requesting this value will perform action specified as a parameter.
+* <code>unsafeInterleaveIO</code> doesn't perform any action immediately, it only creates a closure of type <code>a</code> which upon being needed will perform the action specified as the parameter.
-* this action by itself may compute the whole value immediately or...use <code>unsafeInterleaveIO</code> again to defer calculation of some sub-components:
+* this action by itself may compute the whole value immediately...or use <code>unsafeInterleaveIO</code> again to defer calculation of some sub-components:
 <haskell>
@@ Line 1,356: / Line 1,357: @@
 </haskell>
-This code will be executed only at the moment when value of str is
+This code will be executed only at the moment when the value of <code>str</code> is
-really demanded. In this moment, <code>getChar</code> will be performed (with
+really demanded. In this moment, <code>getChar</code> will be performed (with its
-result assigned to <code>c</code>) and one more lazy I/O box will be created - for <code>s</code>.
+result assigned to <code>c</code>) and a new lazy-I/O closure will be created - for <code>s</code>.
-This box again contains link to the <code>myGetContents</code> call.
+This new closure also contains a link to a <code>myGetContents</code> call.
-Then, list cell returned that contains one char read and link to
+Then the list cell is returned. It contains <code>Char</code> that was just read and a link to
-<code>myGetContents</code> call as a way to compute rest of the list. Only at the
+another <code>myGetContents</code> call as a way to compute rest of the list. Only at the
-moment when next value in list required, this operation will be
+moment when the next value in the list is required will this operation be performed again.
-performed again
-As a final result, we get inability to read second char in list before
+As a final result, we can postpone the read of the second <code>Char</code> in the list before
-first one, but lazy character of reading in whole. bingo!
+the first one, but lazy reading of characters as a whole - bingo!
-PS: of course, actual code should include EOF checking. also note that
+PS: of course, actual code should include EOF checking; also note that
-you can read many chars/records at each call:
+you can read multiple characters/records at each call:
 <haskell>
@@ Line 1,442: / Line 1,442: @@
 implements the I/O monad via continuations [[#readmore|[9]]]. I also haven't said anything about
 exception handling, which is a natural part of the "monad" concept. You can
-read the "All About Monads" guide to learn more about these topics.
+read the [[All About Monads]] guide to learn more about these topics.
+But there is some good news:
-But there is some good news: first, the I/O monad understanding you've just acquired will work with any implementation and with many other monads. You just can't work with <code>RealWorld</code>
-values directly.
+* the I/O monad understanding you've just acquired will work with any implementation and with many other monads. You just can't work with <code>RealWorld</code> values directly.
-Second, the I/O monad implementation described here is really used in the GHC,
-yhc/nhc (jhc, too?) compilers. Here is the actual <code>IO</code> definition
-from the GHC sources:
+* the I/O monad implementation described here is similar to what GHC uses:
 <haskell>
 newtype IO a = IO (State# RealWorld -> (# State# RealWorld, a #))
@@ Line 1,456: / Line 1,454: @@
 It uses the <code>State# RealWorld</code> type instead of our <code>RealWorld</code>, it uses the <code>(# ... #)</code> strict tuple for optimization, and it adds an <code>IO</code> data constructor
-around the type. Nevertheless, there are no significant changes from the standpoint of our explanation. Knowing the principle of "chaining" I/O actions via fake "state of the world" values, you can now easily understand and write low-level implementations of GHC I/O operations.
+around the type. Nevertheless, there are no significant changes from the standpoint of our explanation. Knowing the principle of "chaining" I/O actions via fake "state of the world" values, you can now more easily understand and write low-level implementations of GHC I/O operations.
+Of course, other compilers e.g. yhc/nhc (jhc, too?) define <code>IO</code> in other ways.
 === The [[Yhc]]/nhc98 implementation ===
@@ Line 1,470: / Line 1,470: @@
 == <span id="readmore"></span>Further reading ==
-[1] This tutorial is largely based on Simon Peyton Jones's paper [http://research.microsoft.com/%7Esimonpj/Papers/marktoberdorf Tackling the awkward squad: monadic input/output, concurrency, exceptions, and foreign-language calls in Haskell]. I hope that my tutorial improves his original explanation of the Haskell I/O system and brings it closer to the point of view of beginning Haskell programmers. But if you need to learn about concurrency, exceptions and FFI in Haskell/GHC, the original paper is the best source of information.
+[1] This tutorial is largely based on Simon Peyton Jones's paper [https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.13.9123&rep=rep1&type=pdf Tackling the awkward squad: monadic input/output, concurrency, exceptions, and foreign-language calls in Haskell]. I hope that my tutorial improves his original explanation of the Haskell I/O system and brings it closer to the point of view of new Haskell programmers. But if you need to learn about concurrency, exceptions and FFI in Haskell/GHC, the original paper is the best source of information.
 [2] You can find more information about concurrency, FFI and STM at the [[GHC/Concurrency#Starting points]] page.
@@ Line 1,508: / Line 1,508: @@
 * how <code>unsafeInterLeaveIO</code> can be seen as a kind of concurrency, and therefore isn't so unsafe (unlike <code>unsafeInterleaveST</code> which really is unsafe)
 * discussion about different senses of <code>safe</code>/<code>unsafe</code> (like breaking equational reasoning vs. invoking undefined behaviour (so can corrupt the run-time system))
-* actual GHC implementation - how to write low-level routines on example of <code>newIORef</code> implementation
+* actual GHC implementation - how to write low-level routines based on example of <code>newIORef</code> implementation
 This manual is collective work, so feel free to add more information to it yourself. The final goal is to collectively develop a comprehensive manual for using the I/O monad.

Difference between revisions of "IO inside"

Revision as of 00:40, 12 December 2020

Haskell is a pure language

What is a monad?

Welcome to the RealWorld, baby

(>>=) and do notation

Mutable data (references, arrays, hash tables...)

I/O actions as values

Example: a list of I/O actions

Example: returning an I/O action as a result

Example: a memory allocator generator

Example: emulating OOP with record types

Exception handling (under development)

Interfacing with C/C++ and foreign libraries (under development)

Calling functions

All about the foreign declaration

Marshalling simple types

Memory management

Marshalling strings

Marshalling composite types

Dynamic calls

DLLs

The dark side of the I/O monad

unsafePerformIO

inlinePerformIO

unsafeInterleaveIO

A safer approach: the ST monad

Welcome to the machine: the actual GHC implementation

The Yhc/nhc98 implementation

Further reading

To-do list

Navigation menu

Search

Welcome to the `RealWorld`, baby

`(>>=)` and `do` notation

All about the `foreign` declaration