display cleaned, ready to split into multiple files
This commit is contained in:
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
*.hi
|
||||||
|
*.o
|
||||||
|
Main
|
||||||
|
*.csv
|
||||||
39
CSVParser.hs
Normal file
39
CSVParser.hs
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
module CSVParser (
|
||||||
|
parseCSV
|
||||||
|
)where
|
||||||
|
|
||||||
|
|
||||||
|
import Text.ParserCombinators.Parsec
|
||||||
|
|
||||||
|
csvFile :: GenParser Char st [[String]]
|
||||||
|
csvFile = do
|
||||||
|
result <- many line
|
||||||
|
eof
|
||||||
|
return result
|
||||||
|
|
||||||
|
line :: GenParser Char st [String]
|
||||||
|
line = do
|
||||||
|
result <- cells
|
||||||
|
_ <- eol
|
||||||
|
return result
|
||||||
|
|
||||||
|
cells :: GenParser Char st [String]
|
||||||
|
cells = do
|
||||||
|
first <- cellContent
|
||||||
|
next <- remainingCells
|
||||||
|
return (first : next)
|
||||||
|
|
||||||
|
remainingCells :: GenParser Char st [String]
|
||||||
|
remainingCells =
|
||||||
|
(char ',' >> cells) -- Found comma? More cells coming
|
||||||
|
<|> return [] -- No comma? Return [], no more cells
|
||||||
|
|
||||||
|
cellContent :: GenParser Char st String
|
||||||
|
cellContent =
|
||||||
|
many (noneOf ",\n")
|
||||||
|
|
||||||
|
eol :: GenParser Char st Char
|
||||||
|
eol = char '\n'
|
||||||
|
|
||||||
|
parseCSV :: String -> Either ParseError [[String]]
|
||||||
|
parseCSV input = parse csvFile "(unknown)" input
|
||||||
@@ -3,6 +3,8 @@ module Main where
|
|||||||
import Data.Set (Set)
|
import Data.Set (Set)
|
||||||
import qualified Data.Set as Set
|
import qualified Data.Set as Set
|
||||||
import qualified Data.List as List
|
import qualified Data.List as List
|
||||||
|
import Debug.Trace (traceShow)
|
||||||
|
import CSVParser
|
||||||
|
|
||||||
-- data structures defined here
|
-- data structures defined here
|
||||||
|
|
||||||
@@ -12,9 +14,15 @@ instance Show Item where
|
|||||||
|
|
||||||
data ItemSet = ItemSet (Set Item) deriving (Eq, Ord)
|
data ItemSet = ItemSet (Set Item) deriving (Eq, Ord)
|
||||||
instance Show ItemSet where
|
instance Show ItemSet where
|
||||||
show (ItemSet x) = let y = foldl (++) "" $ map show $ Set.toList x in y
|
show (ItemSet x) = foldr ((\y old -> y ++ " " ++ old).show) "" (Set.toList x)
|
||||||
|
|
||||||
|
data Rule = Rule ItemSet ItemSet deriving (Eq)
|
||||||
|
instance Show Rule where
|
||||||
|
show (Rule a b) = show a ++ "-> " ++ show b
|
||||||
|
|
||||||
|
|
||||||
type Frequency = Double
|
type Frequency = Double
|
||||||
|
type Count = Int
|
||||||
|
|
||||||
semiUnion :: ItemSet -> ItemSet -> ItemSet
|
semiUnion :: ItemSet -> ItemSet -> ItemSet
|
||||||
semiUnion (ItemSet set1) (ItemSet set2) = ItemSet (if max1 <= max2 && Set.delete max1 set1 == Set.delete max2 set2 then set1 `Set.union` set2 else Set.empty) where
|
semiUnion (ItemSet set1) (ItemSet set2) = ItemSet (if max1 <= max2 && Set.delete max1 set1 == Set.delete max2 set2 then set1 `Set.union` set2 else Set.empty) where
|
||||||
@@ -28,18 +36,21 @@ generateLevels singles = until (\x -> head x == lastLevel) (\x -> generateNextLe
|
|||||||
lastLevel = [ItemSet $ Set.fromList singles]
|
lastLevel = [ItemSet $ Set.fromList singles]
|
||||||
|
|
||||||
generateNextLevel :: [ItemSet] -> [ItemSet]
|
generateNextLevel :: [ItemSet] -> [ItemSet]
|
||||||
generateNextLevel level = foldr (\value old -> generate value ++ old) [] level where
|
generateNextLevel level = traceShow ("Computing level " ++ show (isSize (head level))) $ foldr (\value old -> generate value ++ old) [] level where
|
||||||
generate value = takeWhile (/= empty) (foldr (\x old -> semiUnion value x : old) [] (tail $ List.dropWhile (/= value) level)) where
|
generate value = takeWhile (/= empty) (foldr (\x old -> semiUnion value x : old) [] (tail $ List.dropWhile (/= value) level))
|
||||||
empty = ItemSet $ Set.fromList []
|
empty = ItemSet $ Set.fromList []
|
||||||
|
isSize (ItemSet set) = Set.size set
|
||||||
|
|
||||||
frequency :: [ItemSet] -> ItemSet -> Frequency
|
frequency :: [ItemSet] -> ItemSet -> Frequency
|
||||||
frequency table (ItemSet set) = setCount / fromIntegral (length table) where
|
frequency table (ItemSet set) = setCount / fromIntegral (length table) where
|
||||||
setCount = fromIntegral $ length (filter (\(ItemSet row) -> set `Set.isSubsetOf` row) table)
|
setCount = fromIntegral $ count table (ItemSet set)
|
||||||
|
|
||||||
|
count :: [ItemSet] -> ItemSet -> Count
|
||||||
|
count table (ItemSet set) = length (filter (\(ItemSet row) -> set `Set.isSubsetOf` row) table)
|
||||||
|
|
||||||
singletons :: [ItemSet] -> [Item]
|
singletons :: [ItemSet] -> [Item]
|
||||||
singletons table = Set.toList $ foldr (\(ItemSet row) old -> old `Set.union` row) (Set.fromList []) table where
|
singletons table = Set.toList $ foldr (\(ItemSet row) old -> old `Set.union` row) (Set.fromList []) table where
|
||||||
|
|
||||||
|
|
||||||
frequentPatterns :: Frequency -> [ItemSet] -> [[ItemSet]]
|
frequentPatterns :: Frequency -> [ItemSet] -> [[ItemSet]]
|
||||||
frequentPatterns thresh table = until (\x -> [] == head x) (\x -> filterByFrequency (generateNextLevel (head x)) : x) [firstLevel] where
|
frequentPatterns thresh table = until (\x -> [] == head x) (\x -> filterByFrequency (generateNextLevel (head x)) : x) [firstLevel] where
|
||||||
firstLevel = map (\x -> ItemSet $ Set.fromList [x]) (singletons table)
|
firstLevel = map (\x -> ItemSet $ Set.fromList [x]) (singletons table)
|
||||||
@@ -47,6 +58,9 @@ frequentPatterns thresh table = until (\x -> [] == head x) (\x -> filterByFreque
|
|||||||
|
|
||||||
|
|
||||||
main :: IO()
|
main :: IO()
|
||||||
main = print $ frequentPatterns 1 table where
|
main = do
|
||||||
table = [ItemSet (Set.fromList [Item "a", Item "b", Item "c"]), ItemSet (Set.fromList [Item "a", Item "b"])]
|
file <- readFile "out.csv"
|
||||||
|
case parseCSV file of
|
||||||
|
Left _ -> putStrLn "Could not parse out.csv"
|
||||||
|
Right val -> mapM_ (\x -> putStrLn (show x ++ "(" ++ show (count table x) ++ ")")) (concat (frequentPatterns 0.6 table)) where
|
||||||
|
table = map (ItemSet. Set.fromList .map Item) val
|
||||||
Reference in New Issue
Block a user