From 46430779b3d9e91491683459e80763a650c7d5e2 Mon Sep 17 00:00:00 2001 From: IGI-111 Date: Fri, 3 Apr 2015 18:03:07 +0200 Subject: [PATCH] display cleaned, ready to split into multiple files --- .gitignore | 4 ++++ CSVParser.hs | 39 +++++++++++++++++++++++++++++++++++++++ apriori.hs => Main.hs | 32 +++++++++++++++++++++++--------- 3 files changed, 66 insertions(+), 9 deletions(-) create mode 100644 .gitignore create mode 100644 CSVParser.hs rename apriori.hs => Main.hs (62%) diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..930ed00 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*.hi +*.o +Main +*.csv diff --git a/CSVParser.hs b/CSVParser.hs new file mode 100644 index 0000000..e353ec3 --- /dev/null +++ b/CSVParser.hs @@ -0,0 +1,39 @@ +module CSVParser ( + parseCSV +)where + + +import Text.ParserCombinators.Parsec + +csvFile :: GenParser Char st [[String]] +csvFile = do + result <- many line + eof + return result + +line :: GenParser Char st [String] +line = do + result <- cells + _ <- eol + return result + +cells :: GenParser Char st [String] +cells = do + first <- cellContent + next <- remainingCells + return (first : next) + +remainingCells :: GenParser Char st [String] +remainingCells = + (char ',' >> cells) -- Found comma? More cells coming + <|> return [] -- No comma? Return [], no more cells + +cellContent :: GenParser Char st String +cellContent = + many (noneOf ",\n") + +eol :: GenParser Char st Char +eol = char '\n' + +parseCSV :: String -> Either ParseError [[String]] +parseCSV input = parse csvFile "(unknown)" input diff --git a/apriori.hs b/Main.hs similarity index 62% rename from apriori.hs rename to Main.hs index 9f97805..d9e88a2 100644 --- a/apriori.hs +++ b/Main.hs @@ -3,6 +3,8 @@ module Main where import Data.Set (Set) import qualified Data.Set as Set import qualified Data.List as List +import Debug.Trace (traceShow) +import CSVParser -- data structures defined here @@ -12,9 +14,15 @@ instance Show Item where data ItemSet = ItemSet (Set Item) deriving (Eq, Ord) instance Show ItemSet where - show (ItemSet x) = let y = foldl (++) "" $ map show $ Set.toList x in y + show (ItemSet x) = foldr ((\y old -> y ++ " " ++ old).show) "" (Set.toList x) + +data Rule = Rule ItemSet ItemSet deriving (Eq) +instance Show Rule where + show (Rule a b) = show a ++ "-> " ++ show b + type Frequency = Double +type Count = Int semiUnion :: ItemSet -> ItemSet -> ItemSet semiUnion (ItemSet set1) (ItemSet set2) = ItemSet (if max1 <= max2 && Set.delete max1 set1 == Set.delete max2 set2 then set1 `Set.union` set2 else Set.empty) where @@ -28,18 +36,21 @@ generateLevels singles = until (\x -> head x == lastLevel) (\x -> generateNextLe lastLevel = [ItemSet $ Set.fromList singles] generateNextLevel :: [ItemSet] -> [ItemSet] -generateNextLevel level = foldr (\value old -> generate value ++ old) [] level where - generate value = takeWhile (/= empty) (foldr (\x old -> semiUnion value x : old) [] (tail $ List.dropWhile (/= value) level)) where - empty = ItemSet $ Set.fromList [] +generateNextLevel level = traceShow ("Computing level " ++ show (isSize (head level))) $ foldr (\value old -> generate value ++ old) [] level where + generate value = takeWhile (/= empty) (foldr (\x old -> semiUnion value x : old) [] (tail $ List.dropWhile (/= value) level)) + empty = ItemSet $ Set.fromList [] + isSize (ItemSet set) = Set.size set frequency :: [ItemSet] -> ItemSet -> Frequency frequency table (ItemSet set) = setCount / fromIntegral (length table) where - setCount = fromIntegral $ length (filter (\(ItemSet row) -> set `Set.isSubsetOf` row) table) + setCount = fromIntegral $ count table (ItemSet set) + +count :: [ItemSet] -> ItemSet -> Count +count table (ItemSet set) = length (filter (\(ItemSet row) -> set `Set.isSubsetOf` row) table) singletons :: [ItemSet] -> [Item] singletons table = Set.toList $ foldr (\(ItemSet row) old -> old `Set.union` row) (Set.fromList []) table where - frequentPatterns :: Frequency -> [ItemSet] -> [[ItemSet]] frequentPatterns thresh table = until (\x -> [] == head x) (\x -> filterByFrequency (generateNextLevel (head x)) : x) [firstLevel] where firstLevel = map (\x -> ItemSet $ Set.fromList [x]) (singletons table) @@ -47,6 +58,9 @@ frequentPatterns thresh table = until (\x -> [] == head x) (\x -> filterByFreque main :: IO() -main = print $ frequentPatterns 1 table where - table = [ItemSet (Set.fromList [Item "a", Item "b", Item "c"]), ItemSet (Set.fromList [Item "a", Item "b"])] - +main = do + file <- readFile "out.csv" + case parseCSV file of + Left _ -> putStrLn "Could not parse out.csv" + Right val -> mapM_ (\x -> putStrLn (show x ++ "(" ++ show (count table x) ++ ")")) (concat (frequentPatterns 0.6 table)) where + table = map (ItemSet. Set.fromList .map Item) val