finally implemented algorithm properly

This commit is contained in:
IGI-111
2015-04-02 12:25:42 +02:00
parent f09aeaa3fb
commit 99613bf333

View File

@@ -12,20 +12,22 @@ import Text.ParserCombinators.Parsec
data Item = Item String deriving (Eq, Ord) data Item = Item String deriving (Eq, Ord)
instance Show Item where instance Show Item where
show (Item s) = "Item " ++ s show (Item s) = s --"Item " ++ s
data ItemSet = ItemSet (Set Item) deriving (Eq, Ord) data ItemSet = ItemSet (Set Item) deriving (Eq, Ord)
instance Show ItemSet where instance Show ItemSet where
show (ItemSet x) = let y = foldl (++) "" $ map ((\x -> x ++ ",") . show) $ Set.toList x show (ItemSet x) = let y = foldl (++) "" $ map ((\x -> " " ++ x) . show) $ Set.toList x
in "{" ++ y ++ "}" in y
type Support = Double type Support = Double
type Confidence = Double type Confidence = Double
type Frequency = Int
frequency :: [ItemSet] -> ItemSet -> Frequency
frequency iss (ItemSet is) = length $ filter (Set.isSubsetOf is) $ map (\(ItemSet x) -> x) iss
support :: [ItemSet] -> ItemSet -> Support support :: [ItemSet] -> ItemSet -> Support
support iss is = (fromIntegral $ count iss is) / (fromIntegral $ length iss) support iss is = (fromIntegral $ frequency iss is) / (fromIntegral $ length iss)
where
count iss (ItemSet is) = length $ filter (Set.isSubsetOf is) $ map (\(ItemSet x) -> x) iss
confidence :: [ItemSet] -> ItemSet -> Item -> Confidence confidence :: [ItemSet] -> ItemSet -> Item -> Confidence
confidence iss i@(ItemSet is) j = (support iss $ ItemSet $ Set.union is $ Set.singleton j) / (support iss i) confidence iss i@(ItemSet is) j = (support iss $ ItemSet $ Set.union is $ Set.singleton j) / (support iss i)
@@ -48,8 +50,8 @@ parseCSV :: String -> Either ParseError [ItemSet]
parseCSV input = parse csvFile ("unknown") input parseCSV input = parse csvFile ("unknown") input
-- frequent items -- frequent items
frequentItems :: [ItemSet] -> Support -> Map Item Integer frequentItems :: [ItemSet] -> Support -> [Item]
frequentItems x y = pi where frequentItems x y = Set.elems $ Map.keysSet pi where
pi = Map.filter (>= frequency) m where pi = Map.filter (>= frequency) m where
frequency = ceiling(y * (fromIntegral $ length x)) where frequency = ceiling(y * (fromIntegral $ length x)) where
m = foldl count Map.empty x where m = foldl count Map.empty x where
@@ -57,19 +59,14 @@ frequentItems x y = pi where
f m i = Map.insertWith acc i 1 m where f m i = Map.insertWith acc i 1 m where
acc new old = old + 1 acc new old = old + 1
displayFrequentItems :: Map Item Integer -> String
displayFrequentItems :: Map ItemSet Frequency -> String
displayFrequentItems i = Map.foldrWithKey (\key val old -> old ++ (itemToString key val) ++ "\n" ) "" i where displayFrequentItems i = Map.foldrWithKey (\key val old -> old ++ (itemToString key val) ++ "\n" ) "" i where
itemToString (Item str) freq = str++" ("++(show freq)++")" itemToString is freq = "("++(show freq)++")" ++ (show is)
-- frequentItemSets :: [ItemSet] -> Support -> Map ItemSet Integer
-- frequentItemSets x y = pi where
-- pi = Map.filter (>= frequency) candidates where
-- frequency = ceiling(y * (fromIntegral $ length x)) where
-- candidates = foldl count Map.empty x where
-- count m (ItemSet is) = Set.foldl f m is where
-- f m i = Map.insertWith acc i 1 m where
-- acc new old = old + 1
frequentItemSets :: [ItemSet] -> [ItemSet] -> Support -> Map ItemSet Frequency
frequentItemSets r candidates threshold = Map.fromList $ map (\c -> (c, frequency r c)) filtered
where filtered = filter (\c -> support r c >= threshold) candidates
main :: IO () main :: IO ()
main = do main = do
@@ -80,8 +77,9 @@ main = do
Left e -> do putStrLn "Error parsing input: " Left e -> do putStrLn "Error parsing input: "
print e print e
Right r -> do Right r -> do
let i = frequentItems r (read s) let is = frequentItems r (read s)
putStrLn $ displayFrequentItems i let iss = frequentItemSets r (candidates is) (read s)
putStrLn $ displayFrequentItems iss
-- helper functions defined here -- helper functions defined here
@@ -89,5 +87,14 @@ allItems :: [ItemSet] -> [Item]
allItems iss = Set.toList $ foldl add Set.empty iss where allItems iss = Set.toList $ foldl add Set.empty iss where
add s (ItemSet is) = Set.union s is add s (ItemSet is) = Set.union s is
candidate :: [ItemSet] -> [Item] -> [ItemSet] candidates :: [Item] -> [ItemSet]
candidate x y = List.nub $ [ItemSet (Set.union (Set.singleton b) a) | (ItemSet a) <- x, b <- y, Set.notMember b a] candidates x = map (\old -> ItemSet (Set.fromList old)) (powerset x)
powerset :: [a] -> [[a]]
powerset [] = [[]]
powerset (x:xs) = xss /\/ map (x:) xss
where xss = powerset xs
(/\/) :: [a] -> [a] -> [a]
[] /\/ ys = ys
(x:xs) /\/ ys = x : (ys /\/ xs)