Collaborative filtering

From HaskellWiki
Revision as of 19:21, 28 August 2007 by ChrisKuklewicz (talk | contribs) (Add update2)
Jump to navigation Jump to search
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.

This page was added to discuss different versions of the code for collaborative filtering at Bryan's blog.

Chris' version

I renamed the variables and then reorganized the code a bit.

The predict' function replaces predict. The update2

module WeightedSlopeOne (Rating, SlopeOne, empty, predict, update) where

import Data.List (foldl',foldl1')
import qualified Data.Map as M

-- The item type is a polymorphic parameter.  Since it goes into a Map
-- it must be able to be compared, so item must be an instance of Ord.
type Count = Int
type RatingValue = Double
-- The Rating is the known (item,Rating) information for a particular "user"
type Rating item = M.Map item RatingValue

-- The SlopeOne matrix is indexed by pairs of items and is implmeneted
-- as a sparse map of maps.  If the item type is an instance of Show
-- then so is the (SlopeOne item) type.
newtype SlopeOne item = SlopeOne (M.Map item (M.Map item (Count,RatingValue)))
  deriving (Show)

-- The SlopeOne' matrix is an unormalized version of SlopeOne
newtype SlopeOne' item = SlopeOne' (M.Map item (M.Map item (Count,RatingValue)))
  deriving (Show)

empty = SlopeOne M.empty
empty' = SlopeOne' M.empty

-- This performs a strict addition on pairs made of two nuumeric types
addT (a,b) (c,d) = let (l,r) = (a+c, b+d) in l `seq` r `seq` (l, r)


-- There is never an entry for the "diagonal" elements with equal
-- items in the pair: (foo,foo) is never in the SlopeOne.
update :: Ord item => SlopeOne item -> [Rating item] -> SlopeOne item
update (SlopeOne matrixInNormed) usersRatings =
    SlopeOne . M.map (M.map norm) . foldl' update' matrixIn $ usersRatings
  where update' oldMatrix userRatings =
          foldl' (\oldMatrix (itemPair, rating) -> insert oldMatrix itemPair rating)
                 oldMatrix itemCombos
          where itemCombos = [ ((item1, item2), (1, rating1 - rating2)) 
                             | (item1, rating1) <- ratings
                             , (item2, rating2) <- ratings
                             , item1 /= item2]
                ratings = M.toList userRatings
        insert outerMap (item1, item2) newRating = M.insertWith' outer item1 newOuterEntry outerMap
          where newOuterEntry = M.singleton item2 newRating
                outer _ innerMap = M.insertWith' addT item2 newRating innerMap
        norm (count,total_rating) = (count, total_rating / fromIntegral count)
        un_norm (count,rating) = (count, rating * fromIntegral count)
        matrixIn = M.map (M.map un_norm) matrixInNormed

-- This version of update2 makes an unnormalize slopeOne' from each
-- Rating and combines them using Map.union* operations and addT.
update2 :: Ord item => SlopeOne' item -> [Rating item] -> SlopeOne' item
update2 s [] = s
update2 (SlopeOne' matrixIn) usersRatings =
    SlopeOne' . M.unionsWith (M.unionWith addT) . (matrixIn:) . map fromRating $ usersRatings
  where fromRating userRating = M.mapWithKey expand1 userRating
          where expand1 item1 rating1 = M.mapMaybeWithKey expand2 userRating
                  where expand2 item2 rating2 | item1 == item2 = Nothing
                                              | otherwise = Just (1,rating1 - rating2)
       
predict :: Ord a => SlopeOne a -> Rating a -> Rating a
predict (SlopeOne matrixIn) userRatings =
  let freqM = foldl' insert M.empty
                     [ (item1,found_rating,user_rating)
                     | (item1,innerMap) <- M.assocs matrixIn
                     , M.notMember item1 userRatings
                     , (user_item, user_rating) <- M.toList userRatings
                     , item1 /= user_item
                     , found_rating <- M.lookup user_item innerMap
                     ]
      insert oldM (item1,found_rating,user_rating) =
        let (count,norm_rating) = found_rating
            total_rating = fromIntegral count * (norm_rating + user_rating)
        in M.insertWith' addT item1 (count,total_rating) oldM
      normM = M.map (\(count, total_rating) -> total_rating / fromIntegral count) freqM
  in M.filter (\norm_rating -> norm_rating > 0) normM

-- This is a modified version of predict.  It also expect the
-- unnormalized SlopeOne' but this is a small detail
predict' :: Ord a => SlopeOne' a -> Rating a -> Rating a
predict' (SlopeOne' matrixIn) userRatings = M.mapMaybeWithKey calcItem matrixIn
  where calcItem item1 innerMap | M.member item1 userRatings = Nothing
                                | M.null combined = Nothing
                                | norm_rating <= 0 = Nothing
                                | otherwise = Just norm_rating
          where combined = M.intersectionWith weight innerMap userRatings
                (total_count,total_rating) = foldl1' addT (M.elems combined)
                norm_rating = total_rating / fromIntegral total_count
        weight (count,rating) user_rating =
          (count,rating + fromIntegral count *  user_rating)

freqs (SlopeOne matrixIn) userRatings =
  let freqs = [ (item1,item,rating,find (item1,item))
              | item1 <- M.keys matrixIn
              , (item, rating) <- M.toList userRatings]
      find (item1,item) = M.findWithDefault (0,0) item (matrixIn M.! item1)
  in freqs

userData :: [Rating String]
userData = map M.fromList [
 [("squid", 1.0), ("cuttlefish", 0.5), ("octopus", 0.2)],
 [("squid", 1.0), ("octopus", 0.5), ("nautilus", 0.2)],
 [("squid", 0.2), ("octopus", 1.0), ("cuttlefish", 0.4), ("nautilus", 0.4)],
 [("cuttlefish", 0.9), ("octopus", 0.4), ("nautilus", 0.5)]
 ]

userInfo = M.fromList [("squid", 0.4)]

predictions = predict (update empty userData) userInfo

predictions' = predict' (update2 empty' userData) userInfo