diff --git a/apriori.hs b/apriori.hs index 8bdfb49..dd74c78 100644 --- a/apriori.hs +++ b/apriori.hs @@ -12,20 +12,22 @@ import Text.ParserCombinators.Parsec data Item = Item String deriving (Eq, Ord) instance Show Item where - show (Item s) = "Item " ++ s + show (Item s) = s --"Item " ++ s data ItemSet = ItemSet (Set Item) deriving (Eq, Ord) instance Show ItemSet where - show (ItemSet x) = let y = foldl (++) "" $ map ((\x -> x ++ ",") . show) $ Set.toList x - in "{" ++ y ++ "}" + show (ItemSet x) = let y = foldl (++) "" $ map ((\x -> " " ++ x) . show) $ Set.toList x + in y type Support = Double type Confidence = Double +type Frequency = Int + +frequency :: [ItemSet] -> ItemSet -> Frequency +frequency iss (ItemSet is) = length $ filter (Set.isSubsetOf is) $ map (\(ItemSet x) -> x) iss support :: [ItemSet] -> ItemSet -> Support -support iss is = (fromIntegral $ count iss is) / (fromIntegral $ length iss) - where - count iss (ItemSet is) = length $ filter (Set.isSubsetOf is) $ map (\(ItemSet x) -> x) iss +support iss is = (fromIntegral $ frequency iss is) / (fromIntegral $ length iss) confidence :: [ItemSet] -> ItemSet -> Item -> Confidence confidence iss i@(ItemSet is) j = (support iss $ ItemSet $ Set.union is $ Set.singleton j) / (support iss i) @@ -48,8 +50,8 @@ parseCSV :: String -> Either ParseError [ItemSet] parseCSV input = parse csvFile ("unknown") input -- frequent items -frequentItems :: [ItemSet] -> Support -> Map Item Integer -frequentItems x y = pi where +frequentItems :: [ItemSet] -> Support -> [Item] +frequentItems x y = Set.elems $ Map.keysSet pi where pi = Map.filter (>= frequency) m where frequency = ceiling(y * (fromIntegral $ length x)) where m = foldl count Map.empty x where @@ -57,19 +59,14 @@ frequentItems x y = pi where f m i = Map.insertWith acc i 1 m where acc new old = old + 1 -displayFrequentItems :: Map Item Integer -> String + +displayFrequentItems :: Map ItemSet Frequency -> String displayFrequentItems i = Map.foldrWithKey (\key val old -> old ++ (itemToString key val) ++ "\n" ) "" i where - itemToString (Item str) freq = str++" ("++(show freq)++")" - --- frequentItemSets :: [ItemSet] -> Support -> Map ItemSet Integer --- frequentItemSets x y = pi where --- pi = Map.filter (>= frequency) candidates where --- frequency = ceiling(y * (fromIntegral $ length x)) where --- candidates = foldl count Map.empty x where --- count m (ItemSet is) = Set.foldl f m is where --- f m i = Map.insertWith acc i 1 m where --- acc new old = old + 1 + itemToString is freq = "("++(show freq)++")" ++ (show is) +frequentItemSets :: [ItemSet] -> [ItemSet] -> Support -> Map ItemSet Frequency +frequentItemSets r candidates threshold = Map.fromList $ map (\c -> (c, frequency r c)) filtered + where filtered = filter (\c -> support r c >= threshold) candidates main :: IO () main = do @@ -80,8 +77,9 @@ main = do Left e -> do putStrLn "Error parsing input: " print e Right r -> do - let i = frequentItems r (read s) - putStrLn $ displayFrequentItems i + let is = frequentItems r (read s) + let iss = frequentItemSets r (candidates is) (read s) + putStrLn $ displayFrequentItems iss -- helper functions defined here @@ -89,5 +87,14 @@ allItems :: [ItemSet] -> [Item] allItems iss = Set.toList $ foldl add Set.empty iss where add s (ItemSet is) = Set.union s is -candidate :: [ItemSet] -> [Item] -> [ItemSet] -candidate x y = List.nub $ [ItemSet (Set.union (Set.singleton b) a) | (ItemSet a) <- x, b <- y, Set.notMember b a] +candidates :: [Item] -> [ItemSet] +candidates x = map (\old -> ItemSet (Set.fromList old)) (powerset x) + +powerset :: [a] -> [[a]] +powerset [] = [[]] +powerset (x:xs) = xss /\/ map (x:) xss + where xss = powerset xs + +(/\/) :: [a] -> [a] -> [a] +[] /\/ ys = ys +(x:xs) /\/ ys = x : (ys /\/ xs)