Difference between revisions of "Collaborative filtering"
From HaskellWiki
m 
(Add update2) 

Line 6:  Line 6:  
The predict' function replaces predict. 
The predict' function replaces predict. 

+  The update2 

<haskell> 
<haskell> 

Line 24:  Line 25:  
 then so is the (SlopeOne item) type. 
 then so is the (SlopeOne item) type. 

newtype SlopeOne item = SlopeOne (M.Map item (M.Map item (Count,RatingValue))) 
newtype SlopeOne item = SlopeOne (M.Map item (M.Map item (Count,RatingValue))) 

+  deriving (Show) 

+  
+   The SlopeOne' matrix is an unormalized version of SlopeOne 

+  newtype SlopeOne' item = SlopeOne' (M.Map item (M.Map item (Count,RatingValue))) 

deriving (Show) 
deriving (Show) 

empty = SlopeOne M.empty 
empty = SlopeOne M.empty 

+  empty' = SlopeOne' M.empty 

 This performs a strict addition on pairs made of two nuumeric types 
 This performs a strict addition on pairs made of two nuumeric types 

addT (a,b) (c,d) = let (l,r) = (a+c, b+d) in l `seq` r `seq` (l, r) 
addT (a,b) (c,d) = let (l,r) = (a+c, b+d) in l `seq` r `seq` (l, r) 

+  
 There is never an entry for the "diagonal" elements with equal 
 There is never an entry for the "diagonal" elements with equal 

 items in the pair: (foo,foo) is never in the SlopeOne. 
 items in the pair: (foo,foo) is never in the SlopeOne. 

update :: Ord item => SlopeOne item > [Rating item] > SlopeOne item 
update :: Ord item => SlopeOne item > [Rating item] > SlopeOne item 

−  update (SlopeOne 
+  update (SlopeOne matrixInNormed) usersRatings = 
+  SlopeOne . M.map (M.map norm) . foldl' update' matrixIn $ usersRatings 

where update' oldMatrix userRatings = 
where update' oldMatrix userRatings = 

foldl' (\oldMatrix (itemPair, rating) > insert oldMatrix itemPair rating) 
foldl' (\oldMatrix (itemPair, rating) > insert oldMatrix itemPair rating) 

Line 47:  Line 54:  
outer _ innerMap = M.insertWith' addT item2 newRating innerMap 
outer _ innerMap = M.insertWith' addT item2 newRating innerMap 

norm (count,total_rating) = (count, total_rating / fromIntegral count) 
norm (count,total_rating) = (count, total_rating / fromIntegral count) 

+  un_norm (count,rating) = (count, rating * fromIntegral count) 

+  matrixIn = M.map (M.map un_norm) matrixInNormed 

+  
+   This version of update2 makes an unnormalize slopeOne' from each 

+   Rating and combines them using Map.union* operations and addT. 

+  update2 :: Ord item => SlopeOne' item > [Rating item] > SlopeOne' item 

+  update2 s [] = s 

+  update2 (SlopeOne' matrixIn) usersRatings = 

+  SlopeOne' . M.unionsWith (M.unionWith addT) . (matrixIn:) . map fromRating $ usersRatings 

+  where fromRating userRating = M.mapWithKey expand1 userRating 

+  where expand1 item1 rating1 = M.mapMaybeWithKey expand2 userRating 

+  where expand2 item2 rating2  item1 == item2 = Nothing 

+   otherwise = Just (1,rating1  rating2) 

predict :: Ord a => SlopeOne a > Rating a > Rating a 
predict :: Ord a => SlopeOne a > Rating a > Rating a 

Line 65:  Line 85:  
in M.filter (\norm_rating > norm_rating > 0) normM 
in M.filter (\norm_rating > norm_rating > 0) normM 

−  
+   This is a modified version of predict. It also expect the 

−  +   unnormalized SlopeOne' but this is a small detail 

−  predict' 
+  predict' :: Ord a => SlopeOne' a > Rating a > Rating a 
+  predict' (SlopeOne' matrixIn) userRatings = M.mapMaybeWithKey calcItem matrixIn 

where calcItem item1 innerMap  M.member item1 userRatings = Nothing 
where calcItem item1 innerMap  M.member item1 userRatings = Nothing 

 M.null combined = Nothing 
 M.null combined = Nothing 

Line 76:  Line 96:  
norm_rating = total_rating / fromIntegral total_count 
norm_rating = total_rating / fromIntegral total_count 

weight (count,rating) user_rating = 
weight (count,rating) user_rating = 

−  (count,fromIntegral count * 
+  (count,rating + fromIntegral count * user_rating) 
freqs (SlopeOne matrixIn) userRatings = 
freqs (SlopeOne matrixIn) userRatings = 

Line 93:  Line 113:  
] 
] 

−  matrix = update empty userData 

userInfo = M.fromList [("squid", 0.4)] 
userInfo = M.fromList [("squid", 0.4)] 

−  predictions = predict matrix userInfo 

+  
−  predictions 
+  predictions = predict (update empty userData) userInfo 
+  
+  predictions' = predict' (update2 empty' userData) userInfo 

</haskell> 
</haskell> 
Revision as of 19:21, 28 August 2007
This page was added to discuss different versions of the code for collaborative filtering at Bryan's blog.
Chris' version
I renamed the variables and then reorganized the code a bit.
The predict' function replaces predict. The update2
module WeightedSlopeOne (Rating, SlopeOne, empty, predict, update) where
import Data.List (foldl',foldl1')
import qualified Data.Map as M
 The item type is a polymorphic parameter. Since it goes into a Map
 it must be able to be compared, so item must be an instance of Ord.
type Count = Int
type RatingValue = Double
 The Rating is the known (item,Rating) information for a particular "user"
type Rating item = M.Map item RatingValue
 The SlopeOne matrix is indexed by pairs of items and is implmeneted
 as a sparse map of maps. If the item type is an instance of Show
 then so is the (SlopeOne item) type.
newtype SlopeOne item = SlopeOne (M.Map item (M.Map item (Count,RatingValue)))
deriving (Show)
 The SlopeOne' matrix is an unormalized version of SlopeOne
newtype SlopeOne' item = SlopeOne' (M.Map item (M.Map item (Count,RatingValue)))
deriving (Show)
empty = SlopeOne M.empty
empty' = SlopeOne' M.empty
 This performs a strict addition on pairs made of two nuumeric types
addT (a,b) (c,d) = let (l,r) = (a+c, b+d) in l `seq` r `seq` (l, r)
 There is never an entry for the "diagonal" elements with equal
 items in the pair: (foo,foo) is never in the SlopeOne.
update :: Ord item => SlopeOne item > [Rating item] > SlopeOne item
update (SlopeOne matrixInNormed) usersRatings =
SlopeOne . M.map (M.map norm) . foldl' update' matrixIn $ usersRatings
where update' oldMatrix userRatings =
foldl' (\oldMatrix (itemPair, rating) > insert oldMatrix itemPair rating)
oldMatrix itemCombos
where itemCombos = [ ((item1, item2), (1, rating1  rating2))
 (item1, rating1) < ratings
, (item2, rating2) < ratings
, item1 /= item2]
ratings = M.toList userRatings
insert outerMap (item1, item2) newRating = M.insertWith' outer item1 newOuterEntry outerMap
where newOuterEntry = M.singleton item2 newRating
outer _ innerMap = M.insertWith' addT item2 newRating innerMap
norm (count,total_rating) = (count, total_rating / fromIntegral count)
un_norm (count,rating) = (count, rating * fromIntegral count)
matrixIn = M.map (M.map un_norm) matrixInNormed
 This version of update2 makes an unnormalize slopeOne' from each
 Rating and combines them using Map.union* operations and addT.
update2 :: Ord item => SlopeOne' item > [Rating item] > SlopeOne' item
update2 s [] = s
update2 (SlopeOne' matrixIn) usersRatings =
SlopeOne' . M.unionsWith (M.unionWith addT) . (matrixIn:) . map fromRating $ usersRatings
where fromRating userRating = M.mapWithKey expand1 userRating
where expand1 item1 rating1 = M.mapMaybeWithKey expand2 userRating
where expand2 item2 rating2  item1 == item2 = Nothing
 otherwise = Just (1,rating1  rating2)
predict :: Ord a => SlopeOne a > Rating a > Rating a
predict (SlopeOne matrixIn) userRatings =
let freqM = foldl' insert M.empty
[ (item1,found_rating,user_rating)
 (item1,innerMap) < M.assocs matrixIn
, M.notMember item1 userRatings
, (user_item, user_rating) < M.toList userRatings
, item1 /= user_item
, found_rating < M.lookup user_item innerMap
]
insert oldM (item1,found_rating,user_rating) =
let (count,norm_rating) = found_rating
total_rating = fromIntegral count * (norm_rating + user_rating)
in M.insertWith' addT item1 (count,total_rating) oldM
normM = M.map (\(count, total_rating) > total_rating / fromIntegral count) freqM
in M.filter (\norm_rating > norm_rating > 0) normM
 This is a modified version of predict. It also expect the
 unnormalized SlopeOne' but this is a small detail
predict' :: Ord a => SlopeOne' a > Rating a > Rating a
predict' (SlopeOne' matrixIn) userRatings = M.mapMaybeWithKey calcItem matrixIn
where calcItem item1 innerMap  M.member item1 userRatings = Nothing
 M.null combined = Nothing
 norm_rating <= 0 = Nothing
 otherwise = Just norm_rating
where combined = M.intersectionWith weight innerMap userRatings
(total_count,total_rating) = foldl1' addT (M.elems combined)
norm_rating = total_rating / fromIntegral total_count
weight (count,rating) user_rating =
(count,rating + fromIntegral count * user_rating)
freqs (SlopeOne matrixIn) userRatings =
let freqs = [ (item1,item,rating,find (item1,item))
 item1 < M.keys matrixIn
, (item, rating) < M.toList userRatings]
find (item1,item) = M.findWithDefault (0,0) item (matrixIn M.! item1)
in freqs
userData :: [Rating String]
userData = map M.fromList [
[("squid", 1.0), ("cuttlefish", 0.5), ("octopus", 0.2)],
[("squid", 1.0), ("octopus", 0.5), ("nautilus", 0.2)],
[("squid", 0.2), ("octopus", 1.0), ("cuttlefish", 0.4), ("nautilus", 0.4)],
[("cuttlefish", 0.9), ("octopus", 0.4), ("nautilus", 0.5)]
]
userInfo = M.fromList [("squid", 0.4)]
predictions = predict (update empty userData) userInfo
predictions' = predict' (update2 empty' userData) userInfo