# Difference between revisions of "Data.List.Split"

(another parameter: what to do with multiple consecutive delimiters?) |
(Break on Nothing impl added) |
||

Line 128: | Line 128: | ||

Here splitEvery is equivalent to "chunks" above, but it is a much better producer, I think. (It is also intended to be mapped on, given that the (map (take i)) makes every element of the list into a producer. |
Here splitEvery is equivalent to "chunks" above, but it is a much better producer, I think. (It is also intended to be mapped on, given that the (map (take i)) makes every element of the list into a producer. |
||

+ | |||

+ | == Break on Nothing == |
||

+ | |||

+ | <haskell> |
||

+ | import Data.Maybe |
||

+ | import Data.Either |
||

+ | import Data.List (find, isPrefixOf) |
||

+ | |||

+ | breaks :: [Maybe a] -> [[a]] |
||

+ | breaks xs = (if null cur then id else ((map fromJust cur):)) |
||

+ | (if null rem then [] else breaks (tail rem)) |
||

+ | where (cur, rem) = break isNothing xs |
||

+ | |||

+ | replaces :: (Eq a) => [([a], [b])] -> (a -> b) -> [a] -> [b] |
||

+ | replaces reps f = process |
||

+ | where process [] = [] |
||

+ | process xs = case find ((`isPrefixOf` xs).fst) reps of |
||

+ | Nothing -> f (head xs) : process (tail xs) |
||

+ | Just (pat, rep) -> rep ++ (process $ drop (length pat) xs) |
||

+ | |||

+ | split :: (Eq a) => [([a], [Maybe a])] -> [a] -> [[a]] |
||

+ | split reps = breaks . replaces reps Just |
||

+ | |||

+ | onSeq, onSeqKeep :: (Eq a) => [a] -> ([a], [Maybe a]) |
||

+ | onSeq xs = (xs, [Nothing]) |
||

+ | onSeqKeep xs = (xs, Nothing : (map Just xs ++ [Nothing])) |
||

+ | |||

+ | onElt, onEltKeep :: (Eq a) => a -> ([a], [Maybe a]) |
||

+ | onElt x = onSeq [x] |
||

+ | onEltKeep x = onSeqKeep [x] |
||

+ | |||

+ | insertAfter :: [Int] -> a -> [a] -> [a] |
||

+ | insertAfter [] _ xs = xs |
||

+ | insertAfter (i:_) _ [] | i > 0 = [] |
||

+ | insertAfter (i:is) x xs = pre ++ [x] ++ insertAfter is x post |
||

+ | where (pre, post) = splitAt i xs |
||

+ | |||

+ | splitEvery :: Int -> [a] -> [[a]] |
||

+ | splitEvery i = splitPlaces (repeat i) |
||

+ | |||

+ | splitPlaces :: [Int] -> [a] -> [[a]] |
||

+ | splitPlaces is xs = breaks $ insertAfter is Nothing $ map Just xs |
||

+ | |||

+ | splitPowersOf2 = splitPlaces (iterate (*2) 1) |
||

+ | </haskell> |
||

+ | |||

+ | Implements all of the above ideas (except predicate matching). In order to split up an arithmetic expression, for example: |
||

+ | |||

+ | <haskell> |
||

+ | split (onElt ' ' : map onEltKeep "+-/*^()") |
||

+ | </haskell> |

## Revision as of 02:47, 15 December 2008

A theoretical module which contains implementations/combinators for implementing every possible method of list-splitting known to man. This way no one has to argue about what the correct interface for split is, we can just have them all.

Some possible ways to split a list, to get your creative juices flowing:

- what to split on?
- single-element separator
- sublist separator
- use a list of possible separators instead of just one
- use a predicate on elements or sublists instead of giving explicit separators
- use approximate matching?
- chunks of fixed length

- how to split?
- discard the separators
- keep the separators with the preceding or following splits
- keep the separators as their own separate pieces of the result list
- what to do with separators at the beginning/end? create a blank split before/after, or not?
- keep blank splits between consecutive delimiters, or merge multiple consecutive delimiters into one delimiter?

An important caveat: we should strive to keep things flexible yet SIMPLE. The more complicated things get, the closer this gets to just being a general parsing or regex library. So the right balance needs to be struck.

Add your implementations below! Once we converge on something good we can upload it to hackage.

```
{-# LANGUAGE ViewPatterns #-}
import Data.List (unfoldr)
-- intercalate :: [a] -> [[a]] -> [a]
-- intercalate x [a,b,c,x,y,z] = [a,x,b,x,c,x,x,y,x,z,x]
-- unintercalate :: [a] -> [a] -> [[a]]
-- unintercalate x [a,x,b,x,c,x,x,y,x,z,x] = [a,b,c,[],y,z]
-- unintercalate is the "inverse" of intercalate
match [] string = Just string
match (_:_) [] = Nothing
match (p:ps) (q:qs) | p == q = match ps qs
match (_:_) (_:_) | otherwise = Nothing
chopWith delimiter (match delimiter -> Just tail) = return ([], tail)
chopWith delimiter (c:cs) = chopWith delimiter cs >>= \(head, tail) ->
return (c:head, tail)
chopWith delimiter [] = Nothing
-- note: chopWith could be make 'more efficient' i.e. remove the >>=\-> bit
-- by adding an accumulator
unintercalate delimiter = unfoldr (chopWith delimiter)
-- > unintercalate "x" "axbxcxxyxzx"
-- ["a","b","c","","y","z"]
splitOn :: (a -> Bool) -> [a] -> [[a]]
splitOn _ [] = []
splitOn f l@(x:xs)
| f x = splitOn f xs
| otherwise = let (h,t) = break f l in h:(splitOn f t)
-- take the element who make predict true as delimiter
-- > splitOn even [1,3,5,6,7,3,3,2,1,1,1]
-- [[1,3,5],[7,3,3],[1,1,1]]
-- | like String split, except for any element that obeys Eq
splitEq :: Eq a => a -> [a] -> [[a]]
splitEq e = splitOn (==e)
-- | split at regular intervals
chunk :: Int -> [a] -> [[a]]
chunk _ [] = [[]]
chunk n xs = y1 : chunk n y2
where
(y1, y2) = splitAt n xs
```

## A combinator approach?

Here are some initial thoughts on a combinator approach. The trick is to find nice implementations of the declarations below. Please add your own thoughts, other combinators, etc.

```
data Splitter a
split :: Splitter a -> [a] -> [[a]]
onElts :: [a] -> Splitter a -- split on any of these elements
onSublist :: [a] -> Splitter a -- split on this exact subsequence
whenElt :: (a -> Bool) -> Splitter a
keepingDelims :: Splitter a -> Splitter a
collapsingNulls :: Splitter a -> Splitter a
-- other basic combinators?
-- now you can write things like
--
-- split (collapsingNulls $ onElts " ,") "abc,def , gh"
--
-- which should evaluate to ["abc", "def", "gh"].
-- some convenience functions can be provided, such as...
splitOn = split . onElts
splitWhen = split . whenElt
```

## Splits of known lengths

I frequently require two types of splits, splitting into blocks of fixed length and splitting into lists of sizes of increasing powers of 2. My implementation was designed to be fold/builded as much as possible, so here goes:

```
splitEvery :: Int -> [e] -> [[e]]
splitEvery i l = map (take i) (build (splitter l)) where
splitter [] _ n = n
splitter l c n = l `c` splitter (drop i l) c n
For more general splits with foreknown lengths,
splitPlaces :: [Int] -> [e] -> [[e]]
splitPlaces ls xs = build (splitPlacer ls xs) where
splitPlacer [] _ _ n = n
splitPlacer _ [] _ n = n
splitPlacer (l:ls) xs c n = let (x1, x2) = splitAt l xs in x1 `c` splitPlacer ls x2 c n
splitPowersOf2 :: [e] -> [[e]]
splitPowersOf2 = splitPlaces (iterate (*2) 1)
```

To be sure, neither is a good consumer, but I don't think that's avoidable, given that drop isn't a good consumer either.

Here splitEvery is equivalent to "chunks" above, but it is a much better producer, I think. (It is also intended to be mapped on, given that the (map (take i)) makes every element of the list into a producer.

## Break on Nothing

```
import Data.Maybe
import Data.Either
import Data.List (find, isPrefixOf)
breaks :: [Maybe a] -> [[a]]
breaks xs = (if null cur then id else ((map fromJust cur):))
(if null rem then [] else breaks (tail rem))
where (cur, rem) = break isNothing xs
replaces :: (Eq a) => [([a], [b])] -> (a -> b) -> [a] -> [b]
replaces reps f = process
where process [] = []
process xs = case find ((`isPrefixOf` xs).fst) reps of
Nothing -> f (head xs) : process (tail xs)
Just (pat, rep) -> rep ++ (process $ drop (length pat) xs)
split :: (Eq a) => [([a], [Maybe a])] -> [a] -> [[a]]
split reps = breaks . replaces reps Just
onSeq, onSeqKeep :: (Eq a) => [a] -> ([a], [Maybe a])
onSeq xs = (xs, [Nothing])
onSeqKeep xs = (xs, Nothing : (map Just xs ++ [Nothing]))
onElt, onEltKeep :: (Eq a) => a -> ([a], [Maybe a])
onElt x = onSeq [x]
onEltKeep x = onSeqKeep [x]
insertAfter :: [Int] -> a -> [a] -> [a]
insertAfter [] _ xs = xs
insertAfter (i:_) _ [] | i > 0 = []
insertAfter (i:is) x xs = pre ++ [x] ++ insertAfter is x post
where (pre, post) = splitAt i xs
splitEvery :: Int -> [a] -> [[a]]
splitEvery i = splitPlaces (repeat i)
splitPlaces :: [Int] -> [a] -> [[a]]
splitPlaces is xs = breaks $ insertAfter is Nothing $ map Just xs
splitPowersOf2 = splitPlaces (iterate (*2) 1)
```

Implements all of the above ideas (except predicate matching). In order to split up an arithmetic expression, for example:

```
split (onElt ' ' : map onEltKeep "+-/*^()")
```