Serialisation and compression with Data Binary
An example showing how to *efficiently* serialise data, compress it, and pass it to C, using Data.Binary and the zlib binding.
{-# OPTIONS -fglasgow-exts #-}
{-
An example showing how to:
* Use the FFI
* Compress streaming data
* Serialise to and from disk
* Stream lazy bytestrings efficiently
We will use
* Foreign.* to generate the data
* Wrap it as a lazy bytestring
* Data.Binary to serialise it
* Code.Compression.Gzip to compress/uncompress
* Pass it to C and make a simple FFI call on the result
* Display the result
Running:
$ ghc -O2 A.hs --make
$ time ./A
Built table
Compressed 25600000 bytes
Compressed size 2231545 bytes (91.28%)
Decompressed 25600000 bytes
Calling into C ...
-8.742278e-8
-0.6865875
-0.7207948
-0.1401903
0.63918984
0.7437966
0.27236375
-0.5763547
-0.75708854
-0.39026973
./A 2.98s user 0.11s system 94% cpu 3.275 total
-}
--
-- Some imports
--
import Foreign
import Foreign.C.Types
import Data.Int
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Internal as S
import qualified Data.ByteString as S
import Data.Binary
import Codec.Compression.GZip
import System.IO
import Text.Printf
import Control.Monad
------------------------------------------------------------------------
-- Foreign Ptrs
--
-- A simple wrapper type
--
data Table = Table { floats :: ForeignPtr CFloat
, ints :: ForeignPtr Int }
-- Statically fixed sizes
floatSize = 4800000
intSize = 1600000
totalBytes = sizeOf (undefined :: CFloat) * floatSize
+ sizeOf (undefined :: Int) * intSize
--
-- Build a table populated with some defaults
-- Float table filled with 'pi' , ints numbered consecutively
--
newTable :: IO Table
newTable = do
fp <- S.mallocByteString (floatSize * sizeOf (undefined :: CFloat))
ip <- S.mallocByteString (intSize * sizeOf (undefined :: Int ))
withForeignPtr fp $ \p ->
forM_ [0..floatSize-1] $ \n ->
pokeElemOff p n pi
withForeignPtr ip $ \p ->
forM_ [0..intSize-1] $ \n ->
pokeElemOff p n n
return (Table fp ip)
------------------------------------------------------------------------
-- Lazy ByteStrings
--
-- Convert ForeignPtr a to and from a lazy ByteString
--
toByteString :: Storable a => ForeignPtr a -> Int -> L.ByteString
toByteString (fp :: ForeignPtr a) n =
L.fromChunks . (:[]) $ S.fromForeignPtr (castForeignPtr fp) 0
(n * sizeOf (undefined :: a))
--
-- Flatten a lazy bytestring back to a ForeignPtr.
--
fromByteString :: Storable a => L.ByteString -> ForeignPtr a
fromByteString lbs = castForeignPtr fp
where (fp,_,n) = S.toForeignPtr . S.concat $ L.toChunks lbs
------------------------------------------------------------------------
-- GZip and Data.Binary
--
-- Serialise a Table, compressing with gzip it as we go:
--
instance Binary Table where
put (Table f i) = do
put . compress . toByteString f $ floatSize
put . compress . toByteString i $ intSize
get = do
fs <- liftM decompress get
is <- liftM decompress get
-- check we read the correct amount:
if L.length fs + L.length is == fromIntegral totalBytes
then return $ Table (fromByteString fs) (fromByteString is)
else error "Partial read"
------------------------------------------------------------------------
-- FFI
--
-- Example call to process the data using C functions.
--
rounded :: Int -> ForeignPtr CFloat -> IO [CFloat]
rounded l fp = withForeignPtr fp $ \p -> go p
where
go p = forM [0..l-1] $ \n -> do
v <- peekElemOff p n
return $ c_tanhf (c_sinf (v + fromIntegral n))
-- A random C function to use:
foreign import ccall unsafe "math.h sinf" c_sinf :: CFloat -> CFloat
foreign import ccall unsafe "math.h tanhf" c_tanhf :: CFloat -> CFloat
------------------------------------------------------------------------
--
-- Now glue it all together
--
main = do
table <- newTable
putStrLn "Built table"
-- write the data to disk, compressed with gzip as we go.
encodeFile "/tmp/table.gz" table
printf "Compressed %d bytes\n" totalBytes
-- how good was the compression?
h <- openFile "/tmp/table.gz" ReadMode
n <- hFileSize h
hClose h
printf "Compressed size %d bytes (%0.2f%%)\n" n
(100 - (fromIntegral n/fromIntegral totalBytes*100) :: Double)
-- load it back in, decompressing on the fly
table' <- decodeFile "/tmp/table.gz"
printf "Decompressed %d bytes\n" totalBytes
-- now process the floats with C
printf "Calling into C ...\n"
ps <- rounded 10 (floats table')
forM_ ps print