Difference between revisions of "Benchmarks Game/Parallel/BinaryTrees"

From HaskellWiki
Jump to navigation Jump to search
Line 2: Line 2:
   
 
* http://shootout.alioth.debian.org/u64q/benchmark.php?test=binarytrees&lang=ghc&id=1
 
* http://shootout.alioth.debian.org/u64q/benchmark.php?test=binarytrees&lang=ghc&id=1
  +
  +
=== 2009-03-01: Current Entry ===
  +
  +
Submitted: http://alioth.debian.org/tracker/index.php?func=detail&aid=311523&group_id=30402&atid=411646
  +
  +
Also filed a bug ticket with GHC to find out if the GC growth strategy can be improved (so that -H240M isn't required): http://hackage.haskell.org/trac/ghc/ticket/3061
  +
  +
<haskell>
  +
{-# OPTIONS -funbox-strict-fields #-}
  +
{-# LANGUAGE BangPatterns #-}
  +
--
  +
-- The Computer Language Benchmarks Game
  +
-- http://shootout.alioth.debian.org/
  +
--
  +
-- Contributed by Don Stewart
  +
-- Modified by Stephen Blackheath to parallelize (a very tiny tweak)
  +
--
  +
-- Compile with:
  +
--
  +
-- > ghc -O2 -fasm -threaded --make
  +
--
  +
-- Run with:
  +
--
  +
-- > ./A +RTS -N4 -H300M -RTS 20
  +
--
  +
-- Where '4' is the number of cores. and "set your -H value high (3 or
  +
-- more times the maximum residency)", as per GHC User's Guide:
  +
--
  +
-- <http://haskell.org/ghc/docs/6.10.1/html/users_guide/runtime-control.html#rts-options-gc>
  +
--
  +
-- -H "provides a “suggested heap size” for the garbage collector. The
  +
-- garbage collector will use about this much memory until the program
  +
-- residency grows and the heap size needs to be expanded to retain
  +
-- reasonable performance."
  +
--
  +
  +
import System
  +
import Data.Bits
  +
import Text.Printf
  +
import Control.Parallel.Strategies
  +
  +
--
  +
-- an artificially strict tree.
  +
--
  +
-- normally you would ensure the branches are lazy, but this benchmark
  +
-- requires strict allocation.
  +
--
  +
data Tree = Nil | Node !Int !Tree !Tree
  +
  +
minN = 4
  +
  +
io s n t = printf "%s of depth %d\t check: %d\n" s n t
  +
  +
main = do
  +
n <- getArgs >>= readIO . head
  +
let maxN = max (minN + 2) n
  +
stretchN = maxN + 1
  +
  +
-- stretch memory tree
  +
let c = check (make 0 stretchN)
  +
io "stretch tree" stretchN c
  +
  +
-- allocate a long lived tree
  +
let !long = make 0 maxN
  +
  +
-- allocate, walk, and deallocate many bottom-up binary trees
  +
let vs = parMap rnf id $ depth minN maxN
  +
mapM_ (\((m,d,i)) -> io (show m ++ "\t trees") d i) vs
  +
  +
-- confirm the the long-lived binary tree still exists
  +
io "long lived tree" maxN (check long)
  +
  +
-- generate many trees
  +
depth :: Int -> Int -> [(Int,Int,Int)]
  +
depth d m
  +
| d <= m = (2*n,d,sumT d n 0) : depth (d+2) m
  +
| otherwise = []
  +
where n = 1 `shiftL` (m - d + minN)
  +
  +
-- allocate and check lots of trees
  +
sumT :: Int -> Int -> Int -> Int
  +
sumT d 0 t = t
  +
sumT d i t = sumT d (i-1) (t + a + b)
  +
where a = check (make i d)
  +
b = check (make (-i) d)
  +
  +
-- traverse the tree, counting up the nodes
  +
check :: Tree -> Int
  +
check Nil = 0
  +
check (Node i l r) = i + check l - check r
  +
  +
-- build a tree
  +
make :: Int -> Int -> Tree
  +
make i 0 = Node i Nil Nil
  +
make i d = Node i (make (i2-1) d2) (make i2 d2)
  +
where i2 = 2*i; d2 = d-1
  +
  +
</haskell>
   
 
=== Parallel Strategies: parMap ===
 
=== Parallel Strategies: parMap ===

Revision as of 01:59, 3 March 2009

Binary Trees

2009-03-01: Current Entry

Submitted: http://alioth.debian.org/tracker/index.php?func=detail&aid=311523&group_id=30402&atid=411646

Also filed a bug ticket with GHC to find out if the GC growth strategy can be improved (so that -H240M isn't required): http://hackage.haskell.org/trac/ghc/ticket/3061

{-# OPTIONS -funbox-strict-fields #-}
{-# LANGUAGE BangPatterns #-}
--
-- The Computer Language Benchmarks Game
-- http://shootout.alioth.debian.org/
--
-- Contributed by Don Stewart
-- Modified by Stephen Blackheath to parallelize (a very tiny tweak)
--
-- Compile with:
--
-- >    ghc -O2 -fasm -threaded --make 
--
-- Run with:
--
-- >    ./A +RTS -N4 -H300M -RTS 20
-- 
-- Where '4' is the number of cores. and "set your -H value high (3 or
-- more times the maximum residency)", as per GHC User's Guide:
--
--  <http://haskell.org/ghc/docs/6.10.1/html/users_guide/runtime-control.html#rts-options-gc>
--
-- -H "provides a “suggested heap size” for the garbage collector. The
-- garbage collector will use about this much memory until the program
-- residency grows and the heap size needs to be expanded to retain
-- reasonable performance."
--

import System
import Data.Bits
import Text.Printf
import Control.Parallel.Strategies

--
-- an artificially strict tree.
--
-- normally you would ensure the branches are lazy, but this benchmark
-- requires strict allocation.
--
data Tree = Nil | Node !Int !Tree !Tree

minN = 4

io s n t = printf "%s of depth %d\t check: %d\n" s n t

main = do
    n <- getArgs >>= readIO . head
    let maxN     = max (minN + 2) n
        stretchN = maxN + 1

    -- stretch memory tree
    let c = check (make 0 stretchN)
    io "stretch tree" stretchN c

    -- allocate a long lived tree
    let !long    = make 0 maxN

    -- allocate, walk, and deallocate many bottom-up binary trees
    let vs = parMap rnf id $ depth minN maxN
    mapM_ (\((m,d,i)) -> io (show m ++ "\t trees") d i) vs

    -- confirm the the long-lived binary tree still exists
    io "long lived tree" maxN (check long)

-- generate many trees
depth :: Int -> Int -> [(Int,Int,Int)]
depth d m
    | d <= m    = (2*n,d,sumT d n 0) : depth (d+2) m
    | otherwise = []
  where n = 1 `shiftL` (m - d + minN)

-- allocate and check lots of trees
sumT :: Int -> Int -> Int -> Int
sumT d 0 t = t
sumT  d i t = sumT d (i-1) (t + a + b)
  where a = check (make i    d)
        b = check (make (-i) d)

-- traverse the tree, counting up the nodes
check :: Tree -> Int
check Nil          = 0
check (Node i l r) = i + check l - check r

-- build a tree
make :: Int -> Int -> Tree
make i 0 = Node i Nil Nil
make i d = Node i (make (i2-1) d2) (make i2 d2)
  where i2 = 2*i; d2 = d-1

Parallel Strategies: parMap

  • Status: submitted.

Flags:

   $ ghc -O2 --make -fasm -threaded  Parallel.hs
   $ ./Parallel 20 +RTS -N5 -A350M

This is a version of the Haskell GHC binary-trees benchmark, annotated for parallelism, using parallel strategy combinators. When compiled with the -threaded flag, and run with +RTS -N5 -RTS, it will exploit all cores on the quad-core machine, dramatically reducing running times.

On my quad core, running time goes from,

* single core, 26.997s
* quad core, 5.692s

The following flags should be used:

Compile time:

  ghc -O2 -fasm --make Parallel2.hs -threaded

Runtime:

  ./Parallel2 20 +RTS -N5 -A350M -RTS

The -N5 flag asks the Haskell runtime to use 5 capabilites, which map onto the underlying cores.

Here is the result on my quad core,

   $ time ./Parallel2 20 +RTS -N5 -A350M -RTS
  stretch tree of depth 21	 check: -1
  2097152	 trees of depth 4	 check: -2097152
  524288	 trees of depth 6	 check: -524288
  131072	 trees of depth 8	 check: -131072
  32768	 trees of depth 10	 check: -32768
  8192	 trees of depth 12	 check: -8192
  2048	 trees of depth 14	 check: -2048
  512	 trees of depth 16	 check: -512
  128	 trees of depth 18	 check: -128
  32	 trees of depth 20	 check: -32
  long lived tree of depth 20	 check: -1
  ./Parallel2 20 +RTS -N5 -A350M -RTS  15.80s user 1.52s system 304% cpu 5.692 total

Which is a satisfying result, as the parallelisation strategy is super simple.


Code:

{-# OPTIONS -fbang-patterns -funbox-strict-fields #-}
--
-- The Computer Language Shootout
-- http://shootout.alioth.debian.org/
--
-- Contributed by Don Stewart and Thomas Davie
--
-- This implementation uses a parallel strategy to exploit the quad core machine.
-- For more information about Haskell parallel strategies, see,
--
--  http://www.macs.hw.ac.uk/~dsg/gph/papers/html/Strategies/strategies.html
--

import System
import Data.Bits
import Text.Printf
import Control.Parallel.Strategies
import Control.Parallel

--
-- an artificially strict tree.
--
-- normally you would ensure the branches are lazy, but this benchmark
-- requires strict allocation.
--
data Tree = Nil | Node !Int !Tree !Tree

minN = 4

io s n t = printf "%s of depth %d\t check: %d\n" s n t

main = do
    n <- getArgs >>= readIO . head
    let maxN     = max (minN + 2) n
        stretchN = maxN + 1

    -- stretch memory tree
    let c = check (make 0 stretchN)
    io "stretch tree" stretchN c

    -- allocate a long lived tree
    let !long    = make 0 maxN

    -- allocate, walk, and deallocate many bottom-up binary trees
    let vs = (parMap rnf) (depth' maxN) [minN,minN+2..maxN]
    mapM_ (\((m,d,i)) -> io (show m ++ "\t trees") d i) vs

    -- confirm the the long-lived binary tree still exists
    io "long lived tree" maxN (check long)

-- generate many trees
depth' :: Int -> Int -> (Int,Int,Int)
depth' m d =
  (2*n,d,sumT d n 0)
  where
    n = 1 `shiftL` (m - d + minN)

-- allocate and check lots of trees
sumT :: Int -> Int -> Int -> Int
sumT d 0 t = t
sumT  d i t = sumT d (i-1) (t + a + b)
  where a = check (make i    d)
        b = check (make (-i) d)

-- traverse the tree, counting up the nodes
check :: Tree -> Int
check Nil          = 0
check (Node i l r) = i + check l - check r

-- build a tree
make :: Int -> Int -> Tree
make i 0 = Node i Nil Nil
make i d = Node i (make (i2-1) d2) (make i2 d2)
  where i2 = 2*i; d2 = d-1