finally implemented algorithm properly
This commit is contained in:
53
apriori.hs
53
apriori.hs
@@ -12,20 +12,22 @@ import Text.ParserCombinators.Parsec
|
|||||||
|
|
||||||
data Item = Item String deriving (Eq, Ord)
|
data Item = Item String deriving (Eq, Ord)
|
||||||
instance Show Item where
|
instance Show Item where
|
||||||
show (Item s) = "Item " ++ s
|
show (Item s) = s --"Item " ++ s
|
||||||
|
|
||||||
data ItemSet = ItemSet (Set Item) deriving (Eq, Ord)
|
data ItemSet = ItemSet (Set Item) deriving (Eq, Ord)
|
||||||
instance Show ItemSet where
|
instance Show ItemSet where
|
||||||
show (ItemSet x) = let y = foldl (++) "" $ map ((\x -> x ++ ",") . show) $ Set.toList x
|
show (ItemSet x) = let y = foldl (++) "" $ map ((\x -> " " ++ x) . show) $ Set.toList x
|
||||||
in "{" ++ y ++ "}"
|
in y
|
||||||
|
|
||||||
type Support = Double
|
type Support = Double
|
||||||
type Confidence = Double
|
type Confidence = Double
|
||||||
|
type Frequency = Int
|
||||||
|
|
||||||
|
frequency :: [ItemSet] -> ItemSet -> Frequency
|
||||||
|
frequency iss (ItemSet is) = length $ filter (Set.isSubsetOf is) $ map (\(ItemSet x) -> x) iss
|
||||||
|
|
||||||
support :: [ItemSet] -> ItemSet -> Support
|
support :: [ItemSet] -> ItemSet -> Support
|
||||||
support iss is = (fromIntegral $ count iss is) / (fromIntegral $ length iss)
|
support iss is = (fromIntegral $ frequency iss is) / (fromIntegral $ length iss)
|
||||||
where
|
|
||||||
count iss (ItemSet is) = length $ filter (Set.isSubsetOf is) $ map (\(ItemSet x) -> x) iss
|
|
||||||
|
|
||||||
confidence :: [ItemSet] -> ItemSet -> Item -> Confidence
|
confidence :: [ItemSet] -> ItemSet -> Item -> Confidence
|
||||||
confidence iss i@(ItemSet is) j = (support iss $ ItemSet $ Set.union is $ Set.singleton j) / (support iss i)
|
confidence iss i@(ItemSet is) j = (support iss $ ItemSet $ Set.union is $ Set.singleton j) / (support iss i)
|
||||||
@@ -48,8 +50,8 @@ parseCSV :: String -> Either ParseError [ItemSet]
|
|||||||
parseCSV input = parse csvFile ("unknown") input
|
parseCSV input = parse csvFile ("unknown") input
|
||||||
|
|
||||||
-- frequent items
|
-- frequent items
|
||||||
frequentItems :: [ItemSet] -> Support -> Map Item Integer
|
frequentItems :: [ItemSet] -> Support -> [Item]
|
||||||
frequentItems x y = pi where
|
frequentItems x y = Set.elems $ Map.keysSet pi where
|
||||||
pi = Map.filter (>= frequency) m where
|
pi = Map.filter (>= frequency) m where
|
||||||
frequency = ceiling(y * (fromIntegral $ length x)) where
|
frequency = ceiling(y * (fromIntegral $ length x)) where
|
||||||
m = foldl count Map.empty x where
|
m = foldl count Map.empty x where
|
||||||
@@ -57,19 +59,14 @@ frequentItems x y = pi where
|
|||||||
f m i = Map.insertWith acc i 1 m where
|
f m i = Map.insertWith acc i 1 m where
|
||||||
acc new old = old + 1
|
acc new old = old + 1
|
||||||
|
|
||||||
displayFrequentItems :: Map Item Integer -> String
|
|
||||||
|
displayFrequentItems :: Map ItemSet Frequency -> String
|
||||||
displayFrequentItems i = Map.foldrWithKey (\key val old -> old ++ (itemToString key val) ++ "\n" ) "" i where
|
displayFrequentItems i = Map.foldrWithKey (\key val old -> old ++ (itemToString key val) ++ "\n" ) "" i where
|
||||||
itemToString (Item str) freq = str++" ("++(show freq)++")"
|
itemToString is freq = "("++(show freq)++")" ++ (show is)
|
||||||
|
|
||||||
-- frequentItemSets :: [ItemSet] -> Support -> Map ItemSet Integer
|
|
||||||
-- frequentItemSets x y = pi where
|
|
||||||
-- pi = Map.filter (>= frequency) candidates where
|
|
||||||
-- frequency = ceiling(y * (fromIntegral $ length x)) where
|
|
||||||
-- candidates = foldl count Map.empty x where
|
|
||||||
-- count m (ItemSet is) = Set.foldl f m is where
|
|
||||||
-- f m i = Map.insertWith acc i 1 m where
|
|
||||||
-- acc new old = old + 1
|
|
||||||
|
|
||||||
|
frequentItemSets :: [ItemSet] -> [ItemSet] -> Support -> Map ItemSet Frequency
|
||||||
|
frequentItemSets r candidates threshold = Map.fromList $ map (\c -> (c, frequency r c)) filtered
|
||||||
|
where filtered = filter (\c -> support r c >= threshold) candidates
|
||||||
|
|
||||||
main :: IO ()
|
main :: IO ()
|
||||||
main = do
|
main = do
|
||||||
@@ -80,8 +77,9 @@ main = do
|
|||||||
Left e -> do putStrLn "Error parsing input: "
|
Left e -> do putStrLn "Error parsing input: "
|
||||||
print e
|
print e
|
||||||
Right r -> do
|
Right r -> do
|
||||||
let i = frequentItems r (read s)
|
let is = frequentItems r (read s)
|
||||||
putStrLn $ displayFrequentItems i
|
let iss = frequentItemSets r (candidates is) (read s)
|
||||||
|
putStrLn $ displayFrequentItems iss
|
||||||
|
|
||||||
-- helper functions defined here
|
-- helper functions defined here
|
||||||
|
|
||||||
@@ -89,5 +87,14 @@ allItems :: [ItemSet] -> [Item]
|
|||||||
allItems iss = Set.toList $ foldl add Set.empty iss where
|
allItems iss = Set.toList $ foldl add Set.empty iss where
|
||||||
add s (ItemSet is) = Set.union s is
|
add s (ItemSet is) = Set.union s is
|
||||||
|
|
||||||
candidate :: [ItemSet] -> [Item] -> [ItemSet]
|
candidates :: [Item] -> [ItemSet]
|
||||||
candidate x y = List.nub $ [ItemSet (Set.union (Set.singleton b) a) | (ItemSet a) <- x, b <- y, Set.notMember b a]
|
candidates x = map (\old -> ItemSet (Set.fromList old)) (powerset x)
|
||||||
|
|
||||||
|
powerset :: [a] -> [[a]]
|
||||||
|
powerset [] = [[]]
|
||||||
|
powerset (x:xs) = xss /\/ map (x:) xss
|
||||||
|
where xss = powerset xs
|
||||||
|
|
||||||
|
(/\/) :: [a] -> [a] -> [a]
|
||||||
|
[] /\/ ys = ys
|
||||||
|
(x:xs) /\/ ys = x : (ys /\/ xs)
|
||||||
|
|||||||
Reference in New Issue
Block a user