split filtering into phase 3

This commit is contained in:
IGI-111
2015-04-12 15:40:15 +02:00
parent 57aa8076ac
commit ef4b58158f
5 changed files with 48 additions and 13 deletions

View File

@@ -21,7 +21,7 @@ data ItemSet = ItemSet (Set Item) deriving (Eq, Ord)
instance Show ItemSet where
show (ItemSet x) =
init $ foldr ((\y old -> y ++ " " ++ old).show) "" (Set.toList x)
init $ foldr ((\y old -> y ++ "," ++ old).show) "" (Set.toList x)
instance Freq ItemSet where
frequency table (ItemSet set) =

View File

@@ -16,12 +16,6 @@ semiUnion (ItemSet set1) (ItemSet set2) = ItemSet $
max1 = Set.findMax set1
max2 = Set.findMax set2
-- generate all possible combinations from a set of singletons
-- generateLevels :: [Item] -> [[ItemSet]]
-- generateLevels singles = until (\x -> head x == lastLevel) (\x -> generateNextLevel (head x) : x) [firstLevel] where
-- firstLevel = map (\x -> ItemSet $ Set.fromList [x]) singles
-- lastLevel = [ItemSet $ Set.fromList singles]
-- generate the next level in a bottom-up route
generateNextLevel :: [ItemSet] -> [ItemSet]
generateNextLevel level = trace ("Computing level " ++ show (isSize (head level))) $

View File

@@ -9,7 +9,7 @@ main :: IO()
main = do
args <- getArgs
when (2 > length args)
(error "Usage: Main table.csv threshold [outfile.csv]")
(error "Usage: phase1 table.csv threshold [outfile.csv]")
let filename = head args
let threshold = read $ args !! 1
file <- readFile filename

View File

@@ -2,14 +2,13 @@ import CSVParser
import DataModel
import ExtractRules
import qualified Data.Set as Set
import qualified Data.List as List
import System.Environment(getArgs)
import Control.Monad(when)
main :: IO()
main = do
args <- getArgs
when (3 > length args) (error "Usage: phase2 table.csv frequents.csv threshold [out.assoc]")
when (3 > length args) (error "Usage: phase2 table.csv frequents.csv threshold [rules.csv]")
let threshold = read $ args !! 2
tableFile <- readFile $ head args
freqFile <- readFile $ args !! 1
@@ -25,6 +24,9 @@ main = do
where
freqPats = map ((ItemSet. Set.fromList .map Item) . tail) freqFileContent
table = map (ItemSet. Set.fromList .map Item) tableFileContent
rules = List.sortBy (\x y -> compare (lift table y) (lift table x)) $ extractRules threshold table freqPats
output = init $ foldr ((\x old -> old ++ x ++ "\n").show) "" $ take 10 rules
rules = extractRules threshold table freqPats
output = formatToCSV rules
formatToCSV :: [Rule] -> String
formatToCSV = foldr (\x old -> old ++ formatRow x ++ "\n") "" where
formatRow (Rule x y) = show x ++ ", ," ++ show y

39
phase3.hs Normal file
View File

@@ -0,0 +1,39 @@
import CSVParser
import DataModel
import ExtractRules
import qualified Data.Set as Set
import qualified Data.List as List
import System.Environment(getArgs)
import Control.Monad(when)
main :: IO()
main = do
args <- getArgs
when (3 > length args) (error "Usage: phase3 table.csv rules.csv count [bestRules.csv]")
let bestRuleCount = read $ args !! 2
tableFile <- readFile $ head args
rulesFile <- readFile $ args !! 1
case parseCSV tableFile of
Left _ -> error "Could not parse table"
Right tableFileContent ->
case parseCSV rulesFile of
Left _ -> error "Could not parse frequent patterns"
Right rulesFileContent -> do
print $ output
when (length args > 3) $
writeFile (args !! 3) output
where
table = map (ItemSet. Set.fromList .map Item) tableFileContent
rules = map ruleFromRow rulesFileContent
bestRules = take bestRuleCount $ List.sortBy (\x y -> compare (lift table y) (lift table x)) rules
output = formatToCSV bestRules
formatToCSV :: [Rule] -> String
formatToCSV = foldr (\x old -> old ++ formatRow x ++ "\n") "" where
formatRow (Rule x y) = show x ++ ", ," ++ show y
ruleFromRow :: [String] -> Rule
ruleFromRow columns = Rule item1 item2
where
item1 = ItemSet $ Set.fromList $ map Item $ takeWhile (/= " ") columns
item2 = ItemSet $ Set.fromList $ map Item $ tail $ dropWhile (/= " ") columns