Collaborative filtering
This page was added to discuss different versions of the code for collaborative filtering at Bryan's blog.
Chris' version
I renamed the variables and then reorganized the code a bit.
The predict' function replaces predict.
module WeightedSlopeOne (Rating, SlopeOne, empty, predict, update) where
import Data.List (foldl',foldl1')
import qualified Data.Map as M
-- The item type is a polymorphic parameter. Since it goes into a Map
-- it must be able to be compared, so item must be an instance of Ord.
type Count = Int
type RatingValue = Double
-- The Rating is the known (item,Rating) information for a particular "user"
type Rating item = M.Map item RatingValue
-- The SlopeOne matrix is indexed by pairs of items and is implmeneted
-- as a sparse map of maps. If the item type is an instance of Show
-- then so is the (SlopeOne item) type.
newtype SlopeOne item = SlopeOne (M.Map item (M.Map item (Count,RatingValue)))
deriving (Show)
empty = SlopeOne M.empty
-- This performs a strict addition on pairs made of two nuumeric types
addT (a,b) (c,d) = let (l,r) = (a+c, b+d) in l `seq` r `seq` (l, r)
-- There is never an entry for the "diagonal" elements with equal
-- items in the pair: (foo,foo) is never in the SlopeOne.
update :: Ord item => SlopeOne item -> [Rating item] -> SlopeOne item
update (SlopeOne matrixIn) usersRatings = SlopeOne . M.map (M.map norm) . foldl' update' matrixIn $ usersRatings
where update' oldMatrix userRatings =
foldl' (\oldMatrix (itemPair, rating) -> insert oldMatrix itemPair rating)
oldMatrix itemCombos
where itemCombos = [ ((item1, item2), (1, rating1 - rating2))
| (item1, rating1) <- ratings
, (item2, rating2) <- ratings
, item1 /= item2]
ratings = M.toList userRatings
insert outerMap (item1, item2) newRating = M.insertWith' outer item1 newOuterEntry outerMap
where newOuterEntry = M.singleton item2 newRating
outer _ innerMap = M.insertWith' addT item2 newRating innerMap
norm (count,total_rating) = (count, total_rating / fromIntegral count)
predict :: Ord a => SlopeOne a -> Rating a -> Rating a
predict (SlopeOne matrixIn) userRatings =
let freqM = foldl' insert M.empty
[ (item1,found_rating,user_rating)
| (item1,innerMap) <- M.assocs matrixIn
, M.notMember item1 userRatings
, (user_item, user_rating) <- M.toList userRatings
, item1 /= user_item
, found_rating <- M.lookup user_item innerMap
]
insert oldM (item1,found_rating,user_rating) =
let (count,norm_rating) = found_rating
total_rating = fromIntegral count * (norm_rating + user_rating)
in M.insertWith' addT item1 (count,total_rating) oldM
normM = M.map (\(count, total_rating) -> total_rating / fromIntegral count) freqM
in M.filter (\norm_rating -> norm_rating > 0) normM
predict' :: Ord a => SlopeOne a -> Rating a -> Rating a
predict' (SlopeOne matrixIn) userRatings = M.mapMaybeWithKey calcItem matrixIn
where calcItem item1 innerMap | M.member item1 userRatings = Nothing
| M.null combined = Nothing
| norm_rating <= 0 = Nothing
| otherwise = Just norm_rating
where combined = M.intersectionWith weight innerMap userRatings
(total_count,total_rating) = foldl1' addT (M.elems combined)
norm_rating = total_rating / fromIntegral total_count
weight (count,norm_rating) user_rating =
(count,fromIntegral count * (norm_rating + user_rating))
freqs (SlopeOne matrixIn) userRatings =
let freqs = [ (item1,item,rating,find (item1,item))
| item1 <- M.keys matrixIn
, (item, rating) <- M.toList userRatings]
find (item1,item) = M.findWithDefault (0,0) item (matrixIn M.! item1)
in freqs
userData :: [Rating String]
userData = map M.fromList [
[("squid", 1.0), ("cuttlefish", 0.5), ("octopus", 0.2)],
[("squid", 1.0), ("octopus", 0.5), ("nautilus", 0.2)],
[("squid", 0.2), ("octopus", 1.0), ("cuttlefish", 0.4), ("nautilus", 0.4)],
[("cuttlefish", 0.9), ("octopus", 0.4), ("nautilus", 0.5)]
]
matrix = update empty userData
userInfo = M.fromList [("squid", 0.4)]
predictions = predict matrix userInfo
predictions' = predict' matrix userInfo