Serialisation and compression with Data Binary
Jump to navigation
Jump to search
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.
An example showing how to *efficiently* serialise data, compress it, and pass it to C, using Data.Binary and the zlib binding.
{-# OPTIONS -fglasgow-exts #-}
{-
An example showing how to:
* Use the FFI
* Compress streaming data
* Serialise to and from disk
* Stream lazy bytestrings efficiently
We will use
* Foreign.* to generate the data
* Wrap it as a lazy bytestring
* Data.Binary to serialise it
* Code.Compression.Gzip to compress/uncompress
* Pass it to C and make a simple FFI call on the result
* Display the result
Running:
$ ghc -O2 A.hs --make
$ time ./A
Built table
Compressed 25600000 bytes
Compressed size 2231545 bytes (91.28%)
Decompressed 25600000 bytes
Calling into C ...
-8.742278e-8
-0.6865875
-0.7207948
-0.1401903
0.63918984
0.7437966
0.27236375
-0.5763547
-0.75708854
-0.39026973
./A 2.98s user 0.11s system 94% cpu 3.275 total
-}
--
-- Some imports
--
import Foreign
import Foreign.C.Types
import Data.Int
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Internal as S
import qualified Data.ByteString as S
import Data.Binary
import Codec.Compression.GZip
import System.IO
import Text.Printf
import Control.Monad
------------------------------------------------------------------------
-- Foreign Ptrs
--
-- A simple wrapper type
--
data Table = Table { floats :: ForeignPtr CFloat
, ints :: ForeignPtr Int }
-- Statically fixed sizes
floatSize = 4800000
intSize = 1600000
totalBytes = sizeOf (undefined :: CFloat) * floatSize
+ sizeOf (undefined :: Int) * intSize
--
-- Build a table populated with some defaults
-- Float table filled with 'pi' , ints numbered consecutively
--
newTable :: IO Table
newTable = do
fp <- S.mallocByteString (floatSize * sizeOf (undefined :: CFloat))
ip <- S.mallocByteString (intSize * sizeOf (undefined :: Int ))
withForeignPtr fp $ \p ->
forM_ [0..floatSize-1] $ \n ->
pokeElemOff p n pi
withForeignPtr ip $ \p ->
forM_ [0..intSize-1] $ \n ->
pokeElemOff p n n
return (Table fp ip)
------------------------------------------------------------------------
-- Lazy ByteStrings
--
-- Convert ForeignPtr a to and from a lazy ByteString
--
toByteString :: Storable a => ForeignPtr a -> Int -> L.ByteString
toByteString (fp :: ForeignPtr a) n =
L.fromChunks . (:[]) $ S.fromForeignPtr (castForeignPtr fp) 0
(n * sizeOf (undefined :: a))
--
-- Flatten a lazy bytestring back to a ForeignPtr.
--
fromByteString :: Storable a => L.ByteString -> ForeignPtr a
fromByteString lbs = castForeignPtr fp
where (fp,_,n) = S.toForeignPtr . S.concat $ L.toChunks lbs
------------------------------------------------------------------------
-- GZip and Data.Binary
--
-- Serialise a Table, compressing with gzip it as we go:
--
instance Binary Table where
put (Table f i) = do
put . compress . toByteString f $ floatSize
put . compress . toByteString i $ intSize
get = do
fs <- liftM decompress get
is <- liftM decompress get
-- check we read the correct amount:
if L.length fs + L.length is == fromIntegral totalBytes
then return $ Table (fromByteString fs) (fromByteString is)
else error "Partial read"
------------------------------------------------------------------------
-- FFI
--
-- Example call to process the data using C functions.
--
rounded :: Int -> ForeignPtr CFloat -> IO [CFloat]
rounded l fp = withForeignPtr fp $ \p -> go p
where
go p = forM [0..l-1] $ \n -> do
v <- peekElemOff p n
return $ c_tanhf (c_sinf (v + fromIntegral n))
-- A random C function to use:
foreign import ccall unsafe "math.h sinf" c_sinf :: CFloat -> CFloat
foreign import ccall unsafe "math.h tanhf" c_tanhf :: CFloat -> CFloat
------------------------------------------------------------------------
--
-- Now glue it all together
--
main = do
table <- newTable
putStrLn "Built table"
-- write the data to disk, compressed with gzip as we go.
encodeFile "/tmp/table.gz" table
printf "Compressed %d bytes\n" totalBytes
-- how good was the compression?
h <- openFile "/tmp/table.gz" ReadMode
n <- hFileSize h
hClose h
printf "Compressed size %d bytes (%0.2f%%)\n" n
(100 - (fromIntegral n/fromIntegral totalBytes*100) :: Double)
-- load it back in, decompressing on the fly
table' <- decodeFile "/tmp/table.gz"
printf "Decompressed %d bytes\n" totalBytes
-- now process the floats with C
printf "Calling into C ...\n"
ps <- rounded 10 (floats table')
forM_ ps print