From 32309510a862fdcf0ba8521c56d722b4eb91393e Mon Sep 17 00:00:00 2001 From: IGI-111 Date: Sun, 5 Apr 2015 15:45:51 +0200 Subject: [PATCH 1/5] changed indent method according to ghc standard --- CSVParser.hs | 26 +++++++++++++------------- DataModel.hs | 20 ++++++++++++-------- FrequentPatterns.hs | 18 +++++++++--------- phase1.hs | 18 +++++++++--------- 4 files changed, 43 insertions(+), 39 deletions(-) diff --git a/CSVParser.hs b/CSVParser.hs index 0fd325d..2027677 100644 --- a/CSVParser.hs +++ b/CSVParser.hs @@ -1,31 +1,31 @@ module CSVParser ( - parseCSV -)where + parseCSV + )where import Text.ParserCombinators.Parsec csvFile :: GenParser Char st [[String]] csvFile = do - result <- many line - eof - return result + result <- many line + eof + return result line :: GenParser Char st [String] line = do - result <- cells - _ <- eol - return result + result <- cells + _ <- eol + return result cells :: GenParser Char st [String] cells = do - first <- cellContent - next <- remainingCells - return (first : next) + first <- cellContent + next <- remainingCells + return (first : next) remainingCells :: GenParser Char st [String] remainingCells = - (char ',' >> cells) -- Found comma? More cells coming - <|> return [] -- No comma? Return [], no more cells + (char ',' >> cells) -- Found comma? More cells coming + <|> return [] -- No comma? Return [], no more cells cellContent :: GenParser Char st String cellContent = many (noneOf ",\n") diff --git a/DataModel.hs b/DataModel.hs index eff9c54..7e438f6 100644 --- a/DataModel.hs +++ b/DataModel.hs @@ -8,31 +8,35 @@ type Frequency = Double type Confidence = Double class Freq a where - frequency :: [ItemSet] -> a -> Frequency + frequency :: [ItemSet] -> a -> Frequency data Item = Item String deriving (Eq, Ord) instance Show Item where - show (Item s) = s --"Item " ++ s + show (Item s) = s --"Item " ++ s data ItemSet = ItemSet (Set Item) deriving (Eq, Ord) instance Show ItemSet where - show (ItemSet x) = foldr ((\y old -> y ++ " " ++ old).show) "" (Set.toList x) + show (ItemSet x) = + foldr ((\y old -> y ++ " " ++ old).show) "" (Set.toList x) instance Freq ItemSet where - frequency table (ItemSet set) = setCount / fromIntegral (length table) where - setCount = fromIntegral $ count table (ItemSet set) + frequency table (ItemSet set) = + setCount / fromIntegral (length table) where + setCount = fromIntegral $ count table (ItemSet set) count :: [ItemSet] -> ItemSet -> Count -count table (ItemSet set) = length (filter (\(ItemSet row) -> set `Set.isSubsetOf` row) table) +count table (ItemSet set) = + length (filter isSuperSet table) where + isSuperset (ItemSet row) = set `Set.isSubsetOf` row data Rule = Rule ItemSet ItemSet deriving (Eq) instance Show Rule where - show (Rule a b) = show a ++ "-> " ++ show b + show (Rule a b) = show a ++ "-> " ++ show b instance Freq Rule where - frequency table (Rule (ItemSet set1) (ItemSet set2)) = frequency table $ ItemSet (set1 `Set.union` set2) + frequency table (Rule (ItemSet set1) (ItemSet set2)) = frequency table $ ItemSet (set1 `Set.union` set2) confidence :: [ItemSet] -> Rule -> Confidence confidence table (Rule x y) = frequency table (Rule x y) / frequency table x diff --git a/FrequentPatterns.hs b/FrequentPatterns.hs index 1ee7eeb..2715887 100644 --- a/FrequentPatterns.hs +++ b/FrequentPatterns.hs @@ -1,6 +1,6 @@ module FrequentPatterns ( - frequentPatterns -) where + frequentPatterns + ) where import DataModel import qualified Data.Set as Set import Debug.Trace (traceShow) @@ -8,8 +8,8 @@ import qualified Data.List as List semiUnion :: ItemSet -> ItemSet -> ItemSet semiUnion (ItemSet set1) (ItemSet set2) = ItemSet (if max1 <= max2 && Set.delete max1 set1 == Set.delete max2 set2 then set1 `Set.union` set2 else Set.empty) where - max1 = Set.findMax set1 - max2 = Set.findMax set2 + max1 = Set.findMax set1 + max2 = Set.findMax set2 -- generate all possible combinations from a set of singletons -- generateLevels :: [Item] -> [[ItemSet]] @@ -20,14 +20,14 @@ semiUnion (ItemSet set1) (ItemSet set2) = ItemSet (if max1 <= max2 && Set.delete -- generate the next level in a bottom-up route generateNextLevel :: [ItemSet] -> [ItemSet] generateNextLevel level = traceShow ("Computing level " ++ show (isSize (head level))) $ foldr (\value old -> generate value ++ old) [] level where - generate value = takeWhile (/= empty) (foldr (\x old -> semiUnion value x : old) [] (tail $ List.dropWhile (/= value) level)) - empty = ItemSet $ Set.fromList [] - isSize (ItemSet set) = Set.size set + generate value = takeWhile (/= empty) (foldr (\x old -> semiUnion value x : old) [] (tail $ List.dropWhile (/= value) level)) + empty = ItemSet $ Set.fromList [] + isSize (ItemSet set) = Set.size set singletons :: [ItemSet] -> [Item] singletons table = Set.toList $ foldr (\(ItemSet row) old -> old `Set.union` row) (Set.fromList []) table where frequentPatterns :: Frequency -> [ItemSet] -> [[ItemSet]] frequentPatterns thresh table = until (\x -> [] == head x) (\x -> filterByFrequency (generateNextLevel (head x)) : x) [firstLevel] where - firstLevel = filterByFrequency $ map (\x -> ItemSet $ Set.fromList [x]) (singletons table) - filterByFrequency = filter (\x -> frequency table x >= thresh) + firstLevel = filterByFrequency $ map (\x -> ItemSet $ Set.fromList [x]) (singletons table) + filterByFrequency = filter (\x -> frequency table x >= thresh) diff --git a/phase1.hs b/phase1.hs index 74eb09d..8491cac 100644 --- a/phase1.hs +++ b/phase1.hs @@ -7,12 +7,12 @@ import Control.Monad(when) main :: IO() main = do - args <- getArgs - when (2 /= length args) (error "Usage: Main ") - let filename = head args - let threshold = read $ last args - file <- readFile filename - case parseCSV file of - Left _ -> error "Could not parse out.csv" - Right val -> mapM_ (\x -> putStrLn (show x ++ "(" ++ show (count table x) ++ ")")) (concat (frequentPatterns threshold table)) where - table = map (ItemSet. Set.fromList .map Item) val + args <- getArgs + when (2 /= length args) (error "Usage: Main ") + let filename = head args + let threshold = read $ last args + file <- readFile filename + case parseCSV file of + Left _ -> error "Could not parse out.csv" + Right val -> mapM_ (\x -> putStrLn (show x ++ "(" ++ show (count table x) ++ ")")) (concat (frequentPatterns threshold table)) where + table = map (ItemSet. Set.fromList .map Item) val From c108e32c8947e4fdbfac645e6e1a70efd2f68e34 Mon Sep 17 00:00:00 2001 From: IGI-111 Date: Sun, 5 Apr 2015 17:55:16 +0200 Subject: [PATCH 2/5] implemented phase 2 --- DataModel.hs | 14 +++++++++++--- ExtractRules.hs | 32 ++++++++++++++++++++++++++++++++ FrequentPatterns.hs | 28 +++++++++++++++++----------- phase1.hs | 6 +++--- phase2.hs | 20 ++++++++++++++++++++ 5 files changed, 83 insertions(+), 17 deletions(-) create mode 100644 ExtractRules.hs create mode 100644 phase2.hs diff --git a/DataModel.hs b/DataModel.hs index 7e438f6..ae175a1 100644 --- a/DataModel.hs +++ b/DataModel.hs @@ -11,8 +11,9 @@ class Freq a where frequency :: [ItemSet] -> a -> Frequency data Item = Item String deriving (Eq, Ord) + instance Show Item where - show (Item s) = s --"Item " ++ s + show (Item a) = show a data ItemSet = ItemSet (Set Item) deriving (Eq, Ord) @@ -27,16 +28,23 @@ instance Freq ItemSet where count :: [ItemSet] -> ItemSet -> Count count table (ItemSet set) = - length (filter isSuperSet table) where + length (filter isSuperset table) where isSuperset (ItemSet row) = set `Set.isSubsetOf` row +difference :: ItemSet -> ItemSet -> ItemSet +difference (ItemSet set1) (ItemSet set2) = ItemSet (Set.difference set1 set2) + +empty :: ItemSet +empty = ItemSet (Set.fromList []) + data Rule = Rule ItemSet ItemSet deriving (Eq) instance Show Rule where show (Rule a b) = show a ++ "-> " ++ show b instance Freq Rule where - frequency table (Rule (ItemSet set1) (ItemSet set2)) = frequency table $ ItemSet (set1 `Set.union` set2) + frequency table (Rule (ItemSet set1) (ItemSet set2)) = frequency table $ + ItemSet (set1 `Set.union` set2) confidence :: [ItemSet] -> Rule -> Confidence confidence table (Rule x y) = frequency table (Rule x y) / frequency table x diff --git a/ExtractRules.hs b/ExtractRules.hs new file mode 100644 index 0000000..50b3727 --- /dev/null +++ b/ExtractRules.hs @@ -0,0 +1,32 @@ +module ExtractRules ( + extractRules +) where +import DataModel +import qualified Data.Set as Set +import qualified Data.Map as Map +import Data.Map(Map) + +extractRules :: Confidence -> [ItemSet] -> [Rule] +extractRules threshold patterns = filter (\x -> threshold <= confidence patterns x) rules where + rules = Map.foldrWithKey (\k v old -> ruleFromSubset v k : old) [] subsets + subsets = foldr (\x old -> insertMultiple (filteredPowerset x) x old) Map.empty patterns + filteredPowerset (ItemSet set) = map (ItemSet . Set.fromList) $ + filter (\val -> val /= Set.toList set && val /= []) $ powerset $ Set.toList set + +ruleFromSubset :: ItemSet -> ItemSet -> Rule +ruleFromSubset set subset = Rule subset (difference set subset) + + + +insertMultiple :: Ord k => [k] -> a -> Map k a -> Map k a +insertMultiple keys value m = foldr (\x old -> Map.insert x value old) m keys + +powerset :: [a] -> [[a]] +powerset [] = [[]] +powerset (x:xs) = xss /\/ map (x:) xss where + xss = powerset xs + +(/\/) :: [a] -> [a] -> [a] +[] /\/ ys = ys +(x:xs) /\/ ys = x : (ys /\/ xs) + diff --git a/FrequentPatterns.hs b/FrequentPatterns.hs index 2715887..09e0ba1 100644 --- a/FrequentPatterns.hs +++ b/FrequentPatterns.hs @@ -7,9 +7,12 @@ import Debug.Trace (traceShow) import qualified Data.List as List semiUnion :: ItemSet -> ItemSet -> ItemSet -semiUnion (ItemSet set1) (ItemSet set2) = ItemSet (if max1 <= max2 && Set.delete max1 set1 == Set.delete max2 set2 then set1 `Set.union` set2 else Set.empty) where - max1 = Set.findMax set1 - max2 = Set.findMax set2 +semiUnion (ItemSet set1) (ItemSet set2) = ItemSet $ + if max1 <= max2 && Set.delete max1 set1 == Set.delete max2 set2 + then set1 `Set.union` set2 + else Set.empty where + max1 = Set.findMax set1 + max2 = Set.findMax set2 -- generate all possible combinations from a set of singletons -- generateLevels :: [Item] -> [[ItemSet]] @@ -19,15 +22,18 @@ semiUnion (ItemSet set1) (ItemSet set2) = ItemSet (if max1 <= max2 && Set.delete -- generate the next level in a bottom-up route generateNextLevel :: [ItemSet] -> [ItemSet] -generateNextLevel level = traceShow ("Computing level " ++ show (isSize (head level))) $ foldr (\value old -> generate value ++ old) [] level where - generate value = takeWhile (/= empty) (foldr (\x old -> semiUnion value x : old) [] (tail $ List.dropWhile (/= value) level)) - empty = ItemSet $ Set.fromList [] - isSize (ItemSet set) = Set.size set +generateNextLevel level = traceShow ("Computing level " ++ show (isSize (head level))) $ + foldr (\value old -> generate value ++ old) [] level where + generate value = takeWhile (/= empty) + (foldr (\x old -> semiUnion value x : old) [] (tail $ List.dropWhile (/= value) level)) + isSize (ItemSet set) = Set.size set singletons :: [ItemSet] -> [Item] -singletons table = Set.toList $ foldr (\(ItemSet row) old -> old `Set.union` row) (Set.fromList []) table where +singletons table = Set.toList $ foldr union (Set.fromList []) table where + union (ItemSet row) old = old `Set.union` row frequentPatterns :: Frequency -> [ItemSet] -> [[ItemSet]] -frequentPatterns thresh table = until (\x -> [] == head x) (\x -> filterByFrequency (generateNextLevel (head x)) : x) [firstLevel] where - firstLevel = filterByFrequency $ map (\x -> ItemSet $ Set.fromList [x]) (singletons table) - filterByFrequency = filter (\x -> frequency table x >= thresh) +frequentPatterns thresh table = until (\x -> [] == head x) + (\x -> filterByFrequency (generateNextLevel (head x)) : x) [firstLevel] where + firstLevel = filterByFrequency $ map (\x -> ItemSet $ Set.fromList [x]) (singletons table) + filterByFrequency = filter (\x -> frequency table x >= thresh) diff --git a/phase1.hs b/phase1.hs index 8491cac..628ae72 100644 --- a/phase1.hs +++ b/phase1.hs @@ -13,6 +13,6 @@ main = do let threshold = read $ last args file <- readFile filename case parseCSV file of - Left _ -> error "Could not parse out.csv" - Right val -> mapM_ (\x -> putStrLn (show x ++ "(" ++ show (count table x) ++ ")")) (concat (frequentPatterns threshold table)) where - table = map (ItemSet. Set.fromList .map Item) val + Left _ -> error "Could not parse out.csv" + Right val -> mapM_ (\x -> putStrLn (show x ++ "(" ++ show (count table x) ++ ")")) (concat (frequentPatterns threshold table)) where + table = map (ItemSet. Set.fromList .map Item) val diff --git a/phase2.hs b/phase2.hs new file mode 100644 index 0000000..2226135 --- /dev/null +++ b/phase2.hs @@ -0,0 +1,20 @@ +import CSVParser +import FrequentPatterns +import DataModel +import ExtractRules +import qualified Data.Set as Set +import System.Environment(getArgs) +import Control.Monad(when) + +main :: IO() +main = do + args <- getArgs + when (2 /= length args) (error "Usage: phase2 ") + let filename = head args + let threshold = read $ last args + file <- readFile filename + case parseCSV file of + Left _ -> error "Could not parse out.csv" + Right val -> print $ extractRules threshold $ + filter (/= empty) $ concat $ frequentPatterns 0.01 table where + table = map (ItemSet. Set.fromList .map Item) val From ac068094a45f781db2d80d6579c78de2bab3132d Mon Sep 17 00:00:00 2001 From: IGI-111 Date: Sun, 5 Apr 2015 18:57:56 +0200 Subject: [PATCH 3/5] phase2 finished --- DataModel.hs | 7 ++++--- FreqParser.hs | 11 +++++++++++ phase1.hs | 17 +++++++++++++---- phase2.hs | 7 +++---- 4 files changed, 31 insertions(+), 11 deletions(-) create mode 100644 FreqParser.hs diff --git a/DataModel.hs b/DataModel.hs index ae175a1..7ce2e48 100644 --- a/DataModel.hs +++ b/DataModel.hs @@ -13,13 +13,14 @@ class Freq a where data Item = Item String deriving (Eq, Ord) instance Show Item where - show (Item a) = show a + show (Item a) = a + data ItemSet = ItemSet (Set Item) deriving (Eq, Ord) instance Show ItemSet where show (ItemSet x) = - foldr ((\y old -> y ++ " " ++ old).show) "" (Set.toList x) + init $ foldr ((\y old -> y ++ " " ++ old).show) "" (Set.toList x) instance Freq ItemSet where frequency table (ItemSet set) = @@ -40,7 +41,7 @@ empty = ItemSet (Set.fromList []) data Rule = Rule ItemSet ItemSet deriving (Eq) instance Show Rule where - show (Rule a b) = show a ++ "-> " ++ show b + show (Rule a b) = show a ++ "->" ++ show b instance Freq Rule where frequency table (Rule (ItemSet set1) (ItemSet set2)) = frequency table $ diff --git a/FreqParser.hs b/FreqParser.hs new file mode 100644 index 0000000..62a8a1d --- /dev/null +++ b/FreqParser.hs @@ -0,0 +1,11 @@ +module ItemsParser ( + parseItems, + parseItemsWithFreq + )where +import qualified Data.Map as Map +import Data.Map(Map) +import DataModel + +parseItems :: String -> Either ParseError [ItemSet] + +parseItemsWithFreq :: String -> Either ParseError Map ItemSet Frequency diff --git a/phase1.hs b/phase1.hs index 628ae72..afb0f5a 100644 --- a/phase1.hs +++ b/phase1.hs @@ -8,11 +8,20 @@ import Control.Monad(when) main :: IO() main = do args <- getArgs - when (2 /= length args) (error "Usage: Main ") + when (2 > length args) + (error "Usage: Main ") let filename = head args - let threshold = read $ last args + let threshold = read $ args !! 1 file <- readFile filename case parseCSV file of Left _ -> error "Could not parse out.csv" - Right val -> mapM_ (\x -> putStrLn (show x ++ "(" ++ show (count table x) ++ ")")) (concat (frequentPatterns threshold table)) where - table = map (ItemSet. Set.fromList .map Item) val + Right val -> do + let table = map (ItemSet. Set.fromList .map Item) val + let freqPats = concat (frequentPatterns threshold table) + mapM_ (\x -> putStrLn (show x ++ "(" ++ show (count table x) ++ ")")) freqPats + when (length args > 2) $ + writeFile (args !! 2) $ formatToCSV freqPats + +formatToCSV :: [ItemSet] -> String +formatToCSV = foldr (\x old -> old ++ formatRow x ++ "\n") "" where + formatRow (ItemSet set) = init $ Set.foldr (\x old -> old ++ show x ++ ",") "" set diff --git a/phase2.hs b/phase2.hs index 2226135..7240594 100644 --- a/phase2.hs +++ b/phase2.hs @@ -1,5 +1,4 @@ import CSVParser -import FrequentPatterns import DataModel import ExtractRules import qualified Data.Set as Set @@ -15,6 +14,6 @@ main = do file <- readFile filename case parseCSV file of Left _ -> error "Could not parse out.csv" - Right val -> print $ extractRules threshold $ - filter (/= empty) $ concat $ frequentPatterns 0.01 table where - table = map (ItemSet. Set.fromList .map Item) val + Right val -> do + let table = map (ItemSet. Set.fromList .map Item) val + print $ extractRules threshold table From a468e946e38610438e364ba17f53bbd5a6843305 Mon Sep 17 00:00:00 2001 From: IGI-111 Date: Sun, 5 Apr 2015 19:28:57 +0200 Subject: [PATCH 4/5] fixed trace messages --- FrequentPatterns.hs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/FrequentPatterns.hs b/FrequentPatterns.hs index 09e0ba1..891a14d 100644 --- a/FrequentPatterns.hs +++ b/FrequentPatterns.hs @@ -3,7 +3,7 @@ module FrequentPatterns ( ) where import DataModel import qualified Data.Set as Set -import Debug.Trace (traceShow) +import Debug.Trace (trace) import qualified Data.List as List semiUnion :: ItemSet -> ItemSet -> ItemSet @@ -22,7 +22,7 @@ semiUnion (ItemSet set1) (ItemSet set2) = ItemSet $ -- generate the next level in a bottom-up route generateNextLevel :: [ItemSet] -> [ItemSet] -generateNextLevel level = traceShow ("Computing level " ++ show (isSize (head level))) $ +generateNextLevel level = trace ("Computing level " ++ show (isSize (head level))) $ foldr (\value old -> generate value ++ old) [] level where generate value = takeWhile (/= empty) (foldr (\x old -> semiUnion value x : old) [] (tail $ List.dropWhile (/= value) level)) @@ -35,5 +35,6 @@ singletons table = Set.toList $ foldr union (Set.fromList []) table where frequentPatterns :: Frequency -> [ItemSet] -> [[ItemSet]] frequentPatterns thresh table = until (\x -> [] == head x) (\x -> filterByFrequency (generateNextLevel (head x)) : x) [firstLevel] where - firstLevel = filterByFrequency $ map (\x -> ItemSet $ Set.fromList [x]) (singletons table) + firstLevel = filterByFrequency $ map (\x -> ItemSet $ Set.fromList [x]) $ + trace "Generated Singletons" (singletons table) filterByFrequency = filter (\x -> frequency table x >= thresh) From 56229fb85a8ff1357b0fd1cc586a0bdea03105c7 Mon Sep 17 00:00:00 2001 From: IGI-111 Date: Tue, 7 Apr 2015 12:27:42 +0000 Subject: [PATCH 5/5] phase 1 outputs frequencies to csv --- phase1.hs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/phase1.hs b/phase1.hs index afb0f5a..500617a 100644 --- a/phase1.hs +++ b/phase1.hs @@ -18,10 +18,11 @@ main = do Right val -> do let table = map (ItemSet. Set.fromList .map Item) val let freqPats = concat (frequentPatterns threshold table) - mapM_ (\x -> putStrLn (show x ++ "(" ++ show (count table x) ++ ")")) freqPats + let output = formatToCSV table freqPats + putStrLn output when (length args > 2) $ - writeFile (args !! 2) $ formatToCSV freqPats + writeFile (args !! 2) $ output -formatToCSV :: [ItemSet] -> String -formatToCSV = foldr (\x old -> old ++ formatRow x ++ "\n") "" where - formatRow (ItemSet set) = init $ Set.foldr (\x old -> old ++ show x ++ ",") "" set +formatToCSV :: [ItemSet] -> [ItemSet] -> String +formatToCSV table frequents = foldr (\x old -> old ++ formatRow x ++ "\n") "" frequents where + formatRow (ItemSet set) = init $ Set.foldr (\x old -> old ++ show x ++ ",") (show (count table (ItemSet set)) ++",") set