diff --git a/CSVParser.hs b/CSVParser.hs index 0fd325d..2027677 100644 --- a/CSVParser.hs +++ b/CSVParser.hs @@ -1,31 +1,31 @@ module CSVParser ( - parseCSV -)where + parseCSV + )where import Text.ParserCombinators.Parsec csvFile :: GenParser Char st [[String]] csvFile = do - result <- many line - eof - return result + result <- many line + eof + return result line :: GenParser Char st [String] line = do - result <- cells - _ <- eol - return result + result <- cells + _ <- eol + return result cells :: GenParser Char st [String] cells = do - first <- cellContent - next <- remainingCells - return (first : next) + first <- cellContent + next <- remainingCells + return (first : next) remainingCells :: GenParser Char st [String] remainingCells = - (char ',' >> cells) -- Found comma? More cells coming - <|> return [] -- No comma? Return [], no more cells + (char ',' >> cells) -- Found comma? More cells coming + <|> return [] -- No comma? Return [], no more cells cellContent :: GenParser Char st String cellContent = many (noneOf ",\n") diff --git a/DataModel.hs b/DataModel.hs index eff9c54..7e438f6 100644 --- a/DataModel.hs +++ b/DataModel.hs @@ -8,31 +8,35 @@ type Frequency = Double type Confidence = Double class Freq a where - frequency :: [ItemSet] -> a -> Frequency + frequency :: [ItemSet] -> a -> Frequency data Item = Item String deriving (Eq, Ord) instance Show Item where - show (Item s) = s --"Item " ++ s + show (Item s) = s --"Item " ++ s data ItemSet = ItemSet (Set Item) deriving (Eq, Ord) instance Show ItemSet where - show (ItemSet x) = foldr ((\y old -> y ++ " " ++ old).show) "" (Set.toList x) + show (ItemSet x) = + foldr ((\y old -> y ++ " " ++ old).show) "" (Set.toList x) instance Freq ItemSet where - frequency table (ItemSet set) = setCount / fromIntegral (length table) where - setCount = fromIntegral $ count table (ItemSet set) + frequency table (ItemSet set) = + setCount / fromIntegral (length table) where + setCount = fromIntegral $ count table (ItemSet set) count :: [ItemSet] -> ItemSet -> Count -count table (ItemSet set) = length (filter (\(ItemSet row) -> set `Set.isSubsetOf` row) table) +count table (ItemSet set) = + length (filter isSuperSet table) where + isSuperset (ItemSet row) = set `Set.isSubsetOf` row data Rule = Rule ItemSet ItemSet deriving (Eq) instance Show Rule where - show (Rule a b) = show a ++ "-> " ++ show b + show (Rule a b) = show a ++ "-> " ++ show b instance Freq Rule where - frequency table (Rule (ItemSet set1) (ItemSet set2)) = frequency table $ ItemSet (set1 `Set.union` set2) + frequency table (Rule (ItemSet set1) (ItemSet set2)) = frequency table $ ItemSet (set1 `Set.union` set2) confidence :: [ItemSet] -> Rule -> Confidence confidence table (Rule x y) = frequency table (Rule x y) / frequency table x diff --git a/FrequentPatterns.hs b/FrequentPatterns.hs index 1ee7eeb..2715887 100644 --- a/FrequentPatterns.hs +++ b/FrequentPatterns.hs @@ -1,6 +1,6 @@ module FrequentPatterns ( - frequentPatterns -) where + frequentPatterns + ) where import DataModel import qualified Data.Set as Set import Debug.Trace (traceShow) @@ -8,8 +8,8 @@ import qualified Data.List as List semiUnion :: ItemSet -> ItemSet -> ItemSet semiUnion (ItemSet set1) (ItemSet set2) = ItemSet (if max1 <= max2 && Set.delete max1 set1 == Set.delete max2 set2 then set1 `Set.union` set2 else Set.empty) where - max1 = Set.findMax set1 - max2 = Set.findMax set2 + max1 = Set.findMax set1 + max2 = Set.findMax set2 -- generate all possible combinations from a set of singletons -- generateLevels :: [Item] -> [[ItemSet]] @@ -20,14 +20,14 @@ semiUnion (ItemSet set1) (ItemSet set2) = ItemSet (if max1 <= max2 && Set.delete -- generate the next level in a bottom-up route generateNextLevel :: [ItemSet] -> [ItemSet] generateNextLevel level = traceShow ("Computing level " ++ show (isSize (head level))) $ foldr (\value old -> generate value ++ old) [] level where - generate value = takeWhile (/= empty) (foldr (\x old -> semiUnion value x : old) [] (tail $ List.dropWhile (/= value) level)) - empty = ItemSet $ Set.fromList [] - isSize (ItemSet set) = Set.size set + generate value = takeWhile (/= empty) (foldr (\x old -> semiUnion value x : old) [] (tail $ List.dropWhile (/= value) level)) + empty = ItemSet $ Set.fromList [] + isSize (ItemSet set) = Set.size set singletons :: [ItemSet] -> [Item] singletons table = Set.toList $ foldr (\(ItemSet row) old -> old `Set.union` row) (Set.fromList []) table where frequentPatterns :: Frequency -> [ItemSet] -> [[ItemSet]] frequentPatterns thresh table = until (\x -> [] == head x) (\x -> filterByFrequency (generateNextLevel (head x)) : x) [firstLevel] where - firstLevel = filterByFrequency $ map (\x -> ItemSet $ Set.fromList [x]) (singletons table) - filterByFrequency = filter (\x -> frequency table x >= thresh) + firstLevel = filterByFrequency $ map (\x -> ItemSet $ Set.fromList [x]) (singletons table) + filterByFrequency = filter (\x -> frequency table x >= thresh) diff --git a/phase1.hs b/phase1.hs index 74eb09d..8491cac 100644 --- a/phase1.hs +++ b/phase1.hs @@ -7,12 +7,12 @@ import Control.Monad(when) main :: IO() main = do - args <- getArgs - when (2 /= length args) (error "Usage: Main ") - let filename = head args - let threshold = read $ last args - file <- readFile filename - case parseCSV file of - Left _ -> error "Could not parse out.csv" - Right val -> mapM_ (\x -> putStrLn (show x ++ "(" ++ show (count table x) ++ ")")) (concat (frequentPatterns threshold table)) where - table = map (ItemSet. Set.fromList .map Item) val + args <- getArgs + when (2 /= length args) (error "Usage: Main ") + let filename = head args + let threshold = read $ last args + file <- readFile filename + case parseCSV file of + Left _ -> error "Could not parse out.csv" + Right val -> mapM_ (\x -> putStrLn (show x ++ "(" ++ show (count table x) ++ ")")) (concat (frequentPatterns threshold table)) where + table = map (ItemSet. Set.fromList .map Item) val