diff --git a/DataModel.hs b/DataModel.hs index 2aab73a..76bc36d 100644 --- a/DataModel.hs +++ b/DataModel.hs @@ -21,7 +21,7 @@ data ItemSet = ItemSet (Set Item) deriving (Eq, Ord) instance Show ItemSet where show (ItemSet x) = - init $ foldr ((\y old -> y ++ " " ++ old).show) "" (Set.toList x) + init $ foldr ((\y old -> y ++ "," ++ old).show) "" (Set.toList x) instance Freq ItemSet where frequency table (ItemSet set) = diff --git a/FrequentPatterns.hs b/FrequentPatterns.hs index 8e4675d..cf6de82 100644 --- a/FrequentPatterns.hs +++ b/FrequentPatterns.hs @@ -16,12 +16,6 @@ semiUnion (ItemSet set1) (ItemSet set2) = ItemSet $ max1 = Set.findMax set1 max2 = Set.findMax set2 --- generate all possible combinations from a set of singletons --- generateLevels :: [Item] -> [[ItemSet]] --- generateLevels singles = until (\x -> head x == lastLevel) (\x -> generateNextLevel (head x) : x) [firstLevel] where --- firstLevel = map (\x -> ItemSet $ Set.fromList [x]) singles --- lastLevel = [ItemSet $ Set.fromList singles] - -- generate the next level in a bottom-up route generateNextLevel :: [ItemSet] -> [ItemSet] generateNextLevel level = trace ("Computing level " ++ show (isSize (head level))) $ @@ -34,7 +28,7 @@ generateNextLevel level = trace ("Computing level " ++ show (isSize (head level) singletons :: [ItemSet] -> [Item] singletons table = Set.toList $ foldr union (Set.fromList []) table where - union (ItemSet row) old = old `Set.union` row + union (ItemSet row) old = old `Set.union` row frequentPatterns :: Frequency -> [ItemSet] -> [[ItemSet]] frequentPatterns thresh table = until (\x -> [] == head x) diff --git a/phase1.hs b/phase1.hs index cf28748..bd4091a 100644 --- a/phase1.hs +++ b/phase1.hs @@ -9,7 +9,7 @@ main :: IO() main = do args <- getArgs when (2 > length args) - (error "Usage: Main table.csv threshold [outfile.csv]") + (error "Usage: phase1 table.csv threshold [outfile.csv]") let filename = head args let threshold = read $ args !! 1 file <- readFile filename diff --git a/phase2.hs b/phase2.hs index e9a098c..ff0043f 100644 --- a/phase2.hs +++ b/phase2.hs @@ -2,14 +2,13 @@ import CSVParser import DataModel import ExtractRules import qualified Data.Set as Set -import qualified Data.List as List import System.Environment(getArgs) import Control.Monad(when) main :: IO() main = do args <- getArgs - when (3 > length args) (error "Usage: phase2 table.csv frequents.csv threshold [out.assoc]") + when (3 > length args) (error "Usage: phase2 table.csv frequents.csv threshold [rules.csv]") let threshold = read $ args !! 2 tableFile <- readFile $ head args freqFile <- readFile $ args !! 1 @@ -25,6 +24,9 @@ main = do where freqPats = map ((ItemSet. Set.fromList .map Item) . tail) freqFileContent table = map (ItemSet. Set.fromList .map Item) tableFileContent - rules = List.sortBy (\x y -> compare (lift table y) (lift table x)) $ extractRules threshold table freqPats - output = init $ foldr ((\x old -> old ++ x ++ "\n").show) "" $ take 10 rules + rules = extractRules threshold table freqPats + output = formatToCSV rules +formatToCSV :: [Rule] -> String +formatToCSV = foldr (\x old -> old ++ formatRow x ++ "\n") "" where + formatRow (Rule x y) = show x ++ ", ," ++ show y diff --git a/phase3.hs b/phase3.hs new file mode 100644 index 0000000..1a891f0 --- /dev/null +++ b/phase3.hs @@ -0,0 +1,39 @@ +import CSVParser +import DataModel +import ExtractRules +import qualified Data.Set as Set +import qualified Data.List as List +import System.Environment(getArgs) +import Control.Monad(when) + +main :: IO() +main = do + args <- getArgs + when (3 > length args) (error "Usage: phase3 table.csv rules.csv count [bestRules.csv]") + let bestRuleCount = read $ args !! 2 + tableFile <- readFile $ head args + rulesFile <- readFile $ args !! 1 + case parseCSV tableFile of + Left _ -> error "Could not parse table" + Right tableFileContent -> + case parseCSV rulesFile of + Left _ -> error "Could not parse frequent patterns" + Right rulesFileContent -> do + print $ output + when (length args > 3) $ + writeFile (args !! 3) output + where + table = map (ItemSet. Set.fromList .map Item) tableFileContent + rules = map ruleFromRow rulesFileContent + bestRules = take bestRuleCount $ List.sortBy (\x y -> compare (lift table y) (lift table x)) rules + output = formatToCSV bestRules + +formatToCSV :: [Rule] -> String +formatToCSV = foldr (\x old -> old ++ formatRow x ++ "\n") "" where + formatRow (Rule x y) = show x ++ ", ," ++ show y + +ruleFromRow :: [String] -> Rule +ruleFromRow columns = Rule item1 item2 + where + item1 = ItemSet $ Set.fromList $ map Item $ takeWhile (/= " ") columns + item2 = ItemSet $ Set.fromList $ map Item $ tail $ dropWhile (/= " ") columns