split filtering into phase 3

This commit is contained in:
IGI-111
2015-04-12 15:40:15 +02:00
parent 57aa8076ac
commit ef4b58158f
5 changed files with 48 additions and 13 deletions

View File

@@ -21,7 +21,7 @@ data ItemSet = ItemSet (Set Item) deriving (Eq, Ord)
instance Show ItemSet where instance Show ItemSet where
show (ItemSet x) = show (ItemSet x) =
init $ foldr ((\y old -> y ++ " " ++ old).show) "" (Set.toList x) init $ foldr ((\y old -> y ++ "," ++ old).show) "" (Set.toList x)
instance Freq ItemSet where instance Freq ItemSet where
frequency table (ItemSet set) = frequency table (ItemSet set) =

View File

@@ -16,12 +16,6 @@ semiUnion (ItemSet set1) (ItemSet set2) = ItemSet $
max1 = Set.findMax set1 max1 = Set.findMax set1
max2 = Set.findMax set2 max2 = Set.findMax set2
-- generate all possible combinations from a set of singletons
-- generateLevels :: [Item] -> [[ItemSet]]
-- generateLevels singles = until (\x -> head x == lastLevel) (\x -> generateNextLevel (head x) : x) [firstLevel] where
-- firstLevel = map (\x -> ItemSet $ Set.fromList [x]) singles
-- lastLevel = [ItemSet $ Set.fromList singles]
-- generate the next level in a bottom-up route -- generate the next level in a bottom-up route
generateNextLevel :: [ItemSet] -> [ItemSet] generateNextLevel :: [ItemSet] -> [ItemSet]
generateNextLevel level = trace ("Computing level " ++ show (isSize (head level))) $ generateNextLevel level = trace ("Computing level " ++ show (isSize (head level))) $
@@ -34,7 +28,7 @@ generateNextLevel level = trace ("Computing level " ++ show (isSize (head level)
singletons :: [ItemSet] -> [Item] singletons :: [ItemSet] -> [Item]
singletons table = Set.toList $ foldr union (Set.fromList []) table singletons table = Set.toList $ foldr union (Set.fromList []) table
where where
union (ItemSet row) old = old `Set.union` row union (ItemSet row) old = old `Set.union` row
frequentPatterns :: Frequency -> [ItemSet] -> [[ItemSet]] frequentPatterns :: Frequency -> [ItemSet] -> [[ItemSet]]
frequentPatterns thresh table = until (\x -> [] == head x) frequentPatterns thresh table = until (\x -> [] == head x)

View File

@@ -9,7 +9,7 @@ main :: IO()
main = do main = do
args <- getArgs args <- getArgs
when (2 > length args) when (2 > length args)
(error "Usage: Main table.csv threshold [outfile.csv]") (error "Usage: phase1 table.csv threshold [outfile.csv]")
let filename = head args let filename = head args
let threshold = read $ args !! 1 let threshold = read $ args !! 1
file <- readFile filename file <- readFile filename

View File

@@ -2,14 +2,13 @@ import CSVParser
import DataModel import DataModel
import ExtractRules import ExtractRules
import qualified Data.Set as Set import qualified Data.Set as Set
import qualified Data.List as List
import System.Environment(getArgs) import System.Environment(getArgs)
import Control.Monad(when) import Control.Monad(when)
main :: IO() main :: IO()
main = do main = do
args <- getArgs args <- getArgs
when (3 > length args) (error "Usage: phase2 table.csv frequents.csv threshold [out.assoc]") when (3 > length args) (error "Usage: phase2 table.csv frequents.csv threshold [rules.csv]")
let threshold = read $ args !! 2 let threshold = read $ args !! 2
tableFile <- readFile $ head args tableFile <- readFile $ head args
freqFile <- readFile $ args !! 1 freqFile <- readFile $ args !! 1
@@ -25,6 +24,9 @@ main = do
where where
freqPats = map ((ItemSet. Set.fromList .map Item) . tail) freqFileContent freqPats = map ((ItemSet. Set.fromList .map Item) . tail) freqFileContent
table = map (ItemSet. Set.fromList .map Item) tableFileContent table = map (ItemSet. Set.fromList .map Item) tableFileContent
rules = List.sortBy (\x y -> compare (lift table y) (lift table x)) $ extractRules threshold table freqPats rules = extractRules threshold table freqPats
output = init $ foldr ((\x old -> old ++ x ++ "\n").show) "" $ take 10 rules output = formatToCSV rules
formatToCSV :: [Rule] -> String
formatToCSV = foldr (\x old -> old ++ formatRow x ++ "\n") "" where
formatRow (Rule x y) = show x ++ ", ," ++ show y

39
phase3.hs Normal file
View File

@@ -0,0 +1,39 @@
import CSVParser
import DataModel
import ExtractRules
import qualified Data.Set as Set
import qualified Data.List as List
import System.Environment(getArgs)
import Control.Monad(when)
main :: IO()
main = do
args <- getArgs
when (3 > length args) (error "Usage: phase3 table.csv rules.csv count [bestRules.csv]")
let bestRuleCount = read $ args !! 2
tableFile <- readFile $ head args
rulesFile <- readFile $ args !! 1
case parseCSV tableFile of
Left _ -> error "Could not parse table"
Right tableFileContent ->
case parseCSV rulesFile of
Left _ -> error "Could not parse frequent patterns"
Right rulesFileContent -> do
print $ output
when (length args > 3) $
writeFile (args !! 3) output
where
table = map (ItemSet. Set.fromList .map Item) tableFileContent
rules = map ruleFromRow rulesFileContent
bestRules = take bestRuleCount $ List.sortBy (\x y -> compare (lift table y) (lift table x)) rules
output = formatToCSV bestRules
formatToCSV :: [Rule] -> String
formatToCSV = foldr (\x old -> old ++ formatRow x ++ "\n") "" where
formatRow (Rule x y) = show x ++ ", ," ++ show y
ruleFromRow :: [String] -> Rule
ruleFromRow columns = Rule item1 item2
where
item1 = ItemSet $ Set.fromList $ map Item $ takeWhile (/= " ") columns
item2 = ItemSet $ Set.fromList $ map Item $ tail $ dropWhile (/= " ") columns