Toy compression implementations: Difference between revisions
(New source code! Now comes with examples...) |
(Added Huffman compression.) |
||
Line 1: | Line 1: | ||
[[Category:Code]] | [[Category:Code]] | ||
== About == | |||
This code is provided in the hope that someone might find it interesting/entertaining, and to demonstrate what an excellent programming language Haskell truly is. (A working polymorphic LZW implementation in 10 lines? Try ''that'' in Java!) | This code is provided in the hope that someone might find it interesting/entertaining, and to demonstrate what an excellent programming language Haskell truly is. (A working polymorphic LZW implementation in 10 lines? Try ''that'' in Java!) | ||
Line 6: | Line 8: | ||
[[User:MathematicalOrchid|MathematicalOrchid]] 16:46, 15 February 2007 (UTC) | [[User:MathematicalOrchid|MathematicalOrchid]] 16:46, 15 February 2007 (UTC) | ||
== Main module == | |||
<haskell> | <haskell> | ||
Line 74: | Line 78: | ||
"This is just a simple test." | "This is just a simple test." | ||
</haskell> | |||
== Huffman coding == | |||
<haskell> | |||
module Huffman | |||
(count, markov1, Tree, encode_huffman, decode_huffman) | |||
where | |||
import Data.List (nub) | |||
-- Marvok1 probability model... | |||
count :: (Eq t) => [t] -> [(t,Int)] | |||
count xs = map (\x -> (x, length $ filter (x ==) xs)) $ nub xs | |||
markov1 :: (Eq t) => [t] -> [(t,Double)] | |||
markov1 xs = | |||
let n = fromIntegral $ length xs | |||
in map (\(x,c) -> (x, fromIntegral c / n)) $ count xs | |||
-- Build a Huffman tree... | |||
data Tree t = Leaf Double t | Branch Double (Tree t) (Tree t) deriving Show | |||
prob :: Tree t -> Double | |||
prob (Leaf p _) = p | |||
prob (Branch p _ _) = p | |||
get_tree :: [Tree t] -> (Tree t, [Tree t]) | |||
get_tree (t:ts) = work t [] ts where | |||
work x xs [] = (x,xs) | |||
work x xs (y:ys) | |||
| prob y < prob x = work y (x:xs) ys | |||
| otherwise = work x (y:xs) ys | |||
huffman_build :: [(t,Double)] -> Tree t | |||
huffman_build = build . map (\(t,p) -> Leaf p t) where | |||
build [t] = t | |||
build ts = | |||
let (t0,ts0) = get_tree ts | |||
(t1,ts1) = get_tree ts0 | |||
in build $ Branch (prob t0 + prob t1) t0 t1 : ts1 | |||
-- Make codebook... | |||
data Bit = Zero | One deriving (Eq, Show) | |||
type Bits = [Bit] | |||
huffman_codebook :: Tree t -> [(t,Bits)] | |||
huffman_codebook = work [] where | |||
work bs (Leaf _ x) = [(x,bs)] | |||
work bs (Branch _ t0 t1) = work (bs ++ [Zero]) t0 ++ work (bs ++ [One]) t1 | |||
-- Do the coding! | |||
encode :: (Eq t) => [(t,Bits)] -> [t] -> Bits | |||
encode cb = concatMap (\x -> maybe undefined id $ lookup x cb) | |||
decode :: (Eq t) => Tree t -> Bits -> [t] | |||
decode t = work t t where | |||
work _ (Leaf _ x) [] = [x] | |||
work t (Leaf _ x) bs = x : work t t bs | |||
work t (Branch _ t0 t1) (b:bs) | |||
| b == Zero = work t t0 bs | |||
| otherwise = work t t1 bs | |||
encode_huffman :: (Eq t) => [t] -> (Tree t, Bits) | |||
encode_huffman xs = | |||
let t = huffman_build $ markov1 xs | |||
bs = encode (huffman_codebook t) xs | |||
in (t,bs) | |||
decode_huffman :: (Eq t) => Tree t -> Bits -> [t] | |||
decode_huffman = decode | |||
</haskell> | |||
If anybody can make this code shorter / more elegant, feel free! | |||
A short demo: | |||
<haskell> | |||
> encode_huffman "this is just a simple test" | |||
<loads of data> | |||
> decode_huffman (fst it) (snd it) | |||
"this is just a simple test" | |||
</haskell> | </haskell> |
Revision as of 15:04, 8 March 2007
About
This code is provided in the hope that someone might find it interesting/entertaining, and to demonstrate what an excellent programming language Haskell truly is. (A working polymorphic LZW implementation in 10 lines? Try that in Java!)
This is 'toy' code. Please don't try to use it to compress multi-GB of data. It has not been thoroughly checked for correctness, and I shudder to think what the time and space complexity would be like! However, it is enlightening and entertaining to see how many algorithms you can implement with a handful of lines...
MathematicalOrchid 16:46, 15 February 2007 (UTC)
Main module
module Compression where
import Data.List
import Data.Word -- In case you want it. (Not actually used anywhere!)
chars = [' '..'~'] -- Becuase ' ' = 0x20 and '~' = 0x7F.
-- Run-length encoding
encode_RLE :: (Eq t) => [t] -> [(Int,t)]
encode_RLE = map (\xs -> (length xs, head xs)) . groupBy (==)
decode_RLE :: [(Int,t)] -> [t]
decode_RLE = concatMap (uncurry replicate)
-- Limpel-Ziv-Welch encoding
encode_LZW :: (Eq t) => [t] -> [t] -> [Int]
encode_LZW _ [] = []
encode_LZW alphabet (x:xs) = work (make alphabet) [x] xs where
make = map (\x -> [x])
work table buffer [] = [maybe undefined id $ elemIndex buffer table]
work table buffer (x:xs) =
let new = buffer ++ [x]
in case elemIndex new table of
Nothing -> maybe undefined id (elemIndex buffer table) : work (table ++ [new]) [x] xs
Just _ -> work table new xs
decode_LZW :: [t] -> [Int] -> [t]
decode_LZW _ [] = []
decode_LZW alphabet xs = work (length alphabet) (make alphabet) [] xs where
make = map (\x -> [x])
work _ t _ [] = []
work n table prev (x:xs) = case x >= n of
True -> error "underflow" -- THIS NEEDS FIXING!
False -> let out = table !! x
in out ++
if null prev
then work n table out xs
else work (n+1) (table ++ [prev ++ [head out]]) out xs
Some examples are in order:
> encode_RLE "AAAABBBBDDCCCCEEEGGFFFF"
[(4,'A'),(4,'B'),(2,'D'),(4,'C'),(3,'E'),(2,'G'),(4,'F')]
> decode_RLE [(4,'A'),(4,'B'),(2,'D'),(4,'C'),(3,'E'),(2,'G'),(4,'F')]
"AAAABBBBDDCCCCEEEGGFFFF"
> encode_LZW chars "This is just a simple test."
[52,72,73,83,0,97,0,74,85,83,84,0,65,0,83,73,77,80,76,69,0,84,69,104,14]
> decode_LZW chars [52,72,73,83,0,97,0,74,85,83,84,0,65,0,83,73,77,80,76,69,0,84,69,104,14]
"This is just a simple test."
Huffman coding
module Huffman
(count, markov1, Tree, encode_huffman, decode_huffman)
where
import Data.List (nub)
-- Marvok1 probability model...
count :: (Eq t) => [t] -> [(t,Int)]
count xs = map (\x -> (x, length $ filter (x ==) xs)) $ nub xs
markov1 :: (Eq t) => [t] -> [(t,Double)]
markov1 xs =
let n = fromIntegral $ length xs
in map (\(x,c) -> (x, fromIntegral c / n)) $ count xs
-- Build a Huffman tree...
data Tree t = Leaf Double t | Branch Double (Tree t) (Tree t) deriving Show
prob :: Tree t -> Double
prob (Leaf p _) = p
prob (Branch p _ _) = p
get_tree :: [Tree t] -> (Tree t, [Tree t])
get_tree (t:ts) = work t [] ts where
work x xs [] = (x,xs)
work x xs (y:ys)
| prob y < prob x = work y (x:xs) ys
| otherwise = work x (y:xs) ys
huffman_build :: [(t,Double)] -> Tree t
huffman_build = build . map (\(t,p) -> Leaf p t) where
build [t] = t
build ts =
let (t0,ts0) = get_tree ts
(t1,ts1) = get_tree ts0
in build $ Branch (prob t0 + prob t1) t0 t1 : ts1
-- Make codebook...
data Bit = Zero | One deriving (Eq, Show)
type Bits = [Bit]
huffman_codebook :: Tree t -> [(t,Bits)]
huffman_codebook = work [] where
work bs (Leaf _ x) = [(x,bs)]
work bs (Branch _ t0 t1) = work (bs ++ [Zero]) t0 ++ work (bs ++ [One]) t1
-- Do the coding!
encode :: (Eq t) => [(t,Bits)] -> [t] -> Bits
encode cb = concatMap (\x -> maybe undefined id $ lookup x cb)
decode :: (Eq t) => Tree t -> Bits -> [t]
decode t = work t t where
work _ (Leaf _ x) [] = [x]
work t (Leaf _ x) bs = x : work t t bs
work t (Branch _ t0 t1) (b:bs)
| b == Zero = work t t0 bs
| otherwise = work t t1 bs
encode_huffman :: (Eq t) => [t] -> (Tree t, Bits)
encode_huffman xs =
let t = huffman_build $ markov1 xs
bs = encode (huffman_codebook t) xs
in (t,bs)
decode_huffman :: (Eq t) => Tree t -> Bits -> [t]
decode_huffman = decode
If anybody can make this code shorter / more elegant, feel free!
A short demo:
> encode_huffman "this is just a simple test"
<loads of data>
> decode_huffman (fst it) (snd it)
"this is just a simple test"