display cleaned, ready to split into multiple files

This commit is contained in:
IGI-111
2015-04-03 18:03:07 +02:00
parent e66fad749f
commit 46430779b3
3 changed files with 66 additions and 9 deletions

4
.gitignore vendored Normal file
View File

@@ -0,0 +1,4 @@
*.hi
*.o
Main
*.csv

39
CSVParser.hs Normal file
View File

@@ -0,0 +1,39 @@
module CSVParser (
parseCSV
)where
import Text.ParserCombinators.Parsec
csvFile :: GenParser Char st [[String]]
csvFile = do
result <- many line
eof
return result
line :: GenParser Char st [String]
line = do
result <- cells
_ <- eol
return result
cells :: GenParser Char st [String]
cells = do
first <- cellContent
next <- remainingCells
return (first : next)
remainingCells :: GenParser Char st [String]
remainingCells =
(char ',' >> cells) -- Found comma? More cells coming
<|> return [] -- No comma? Return [], no more cells
cellContent :: GenParser Char st String
cellContent =
many (noneOf ",\n")
eol :: GenParser Char st Char
eol = char '\n'
parseCSV :: String -> Either ParseError [[String]]
parseCSV input = parse csvFile "(unknown)" input

View File

@@ -3,6 +3,8 @@ module Main where
import Data.Set (Set) import Data.Set (Set)
import qualified Data.Set as Set import qualified Data.Set as Set
import qualified Data.List as List import qualified Data.List as List
import Debug.Trace (traceShow)
import CSVParser
-- data structures defined here -- data structures defined here
@@ -12,9 +14,15 @@ instance Show Item where
data ItemSet = ItemSet (Set Item) deriving (Eq, Ord) data ItemSet = ItemSet (Set Item) deriving (Eq, Ord)
instance Show ItemSet where instance Show ItemSet where
show (ItemSet x) = let y = foldl (++) "" $ map show $ Set.toList x in y show (ItemSet x) = foldr ((\y old -> y ++ " " ++ old).show) "" (Set.toList x)
data Rule = Rule ItemSet ItemSet deriving (Eq)
instance Show Rule where
show (Rule a b) = show a ++ "-> " ++ show b
type Frequency = Double type Frequency = Double
type Count = Int
semiUnion :: ItemSet -> ItemSet -> ItemSet semiUnion :: ItemSet -> ItemSet -> ItemSet
semiUnion (ItemSet set1) (ItemSet set2) = ItemSet (if max1 <= max2 && Set.delete max1 set1 == Set.delete max2 set2 then set1 `Set.union` set2 else Set.empty) where semiUnion (ItemSet set1) (ItemSet set2) = ItemSet (if max1 <= max2 && Set.delete max1 set1 == Set.delete max2 set2 then set1 `Set.union` set2 else Set.empty) where
@@ -28,18 +36,21 @@ generateLevels singles = until (\x -> head x == lastLevel) (\x -> generateNextLe
lastLevel = [ItemSet $ Set.fromList singles] lastLevel = [ItemSet $ Set.fromList singles]
generateNextLevel :: [ItemSet] -> [ItemSet] generateNextLevel :: [ItemSet] -> [ItemSet]
generateNextLevel level = foldr (\value old -> generate value ++ old) [] level where generateNextLevel level = traceShow ("Computing level " ++ show (isSize (head level))) $ foldr (\value old -> generate value ++ old) [] level where
generate value = takeWhile (/= empty) (foldr (\x old -> semiUnion value x : old) [] (tail $ List.dropWhile (/= value) level)) where generate value = takeWhile (/= empty) (foldr (\x old -> semiUnion value x : old) [] (tail $ List.dropWhile (/= value) level))
empty = ItemSet $ Set.fromList [] empty = ItemSet $ Set.fromList []
isSize (ItemSet set) = Set.size set
frequency :: [ItemSet] -> ItemSet -> Frequency frequency :: [ItemSet] -> ItemSet -> Frequency
frequency table (ItemSet set) = setCount / fromIntegral (length table) where frequency table (ItemSet set) = setCount / fromIntegral (length table) where
setCount = fromIntegral $ length (filter (\(ItemSet row) -> set `Set.isSubsetOf` row) table) setCount = fromIntegral $ count table (ItemSet set)
count :: [ItemSet] -> ItemSet -> Count
count table (ItemSet set) = length (filter (\(ItemSet row) -> set `Set.isSubsetOf` row) table)
singletons :: [ItemSet] -> [Item] singletons :: [ItemSet] -> [Item]
singletons table = Set.toList $ foldr (\(ItemSet row) old -> old `Set.union` row) (Set.fromList []) table where singletons table = Set.toList $ foldr (\(ItemSet row) old -> old `Set.union` row) (Set.fromList []) table where
frequentPatterns :: Frequency -> [ItemSet] -> [[ItemSet]] frequentPatterns :: Frequency -> [ItemSet] -> [[ItemSet]]
frequentPatterns thresh table = until (\x -> [] == head x) (\x -> filterByFrequency (generateNextLevel (head x)) : x) [firstLevel] where frequentPatterns thresh table = until (\x -> [] == head x) (\x -> filterByFrequency (generateNextLevel (head x)) : x) [firstLevel] where
firstLevel = map (\x -> ItemSet $ Set.fromList [x]) (singletons table) firstLevel = map (\x -> ItemSet $ Set.fromList [x]) (singletons table)
@@ -47,6 +58,9 @@ frequentPatterns thresh table = until (\x -> [] == head x) (\x -> filterByFreque
main :: IO() main :: IO()
main = print $ frequentPatterns 1 table where main = do
table = [ItemSet (Set.fromList [Item "a", Item "b", Item "c"]), ItemSet (Set.fromList [Item "a", Item "b"])] file <- readFile "out.csv"
case parseCSV file of
Left _ -> putStrLn "Could not parse out.csv"
Right val -> mapM_ (\x -> putStrLn (show x ++ "(" ++ show (count table x) ++ ")")) (concat (frequentPatterns 0.6 table)) where
table = map (ItemSet. Set.fromList .map Item) val