split filtering into phase 3
This commit is contained in:
@@ -21,7 +21,7 @@ data ItemSet = ItemSet (Set Item) deriving (Eq, Ord)
|
||||
|
||||
instance Show ItemSet where
|
||||
show (ItemSet x) =
|
||||
init $ foldr ((\y old -> y ++ " " ++ old).show) "" (Set.toList x)
|
||||
init $ foldr ((\y old -> y ++ "," ++ old).show) "" (Set.toList x)
|
||||
|
||||
instance Freq ItemSet where
|
||||
frequency table (ItemSet set) =
|
||||
|
||||
@@ -16,12 +16,6 @@ semiUnion (ItemSet set1) (ItemSet set2) = ItemSet $
|
||||
max1 = Set.findMax set1
|
||||
max2 = Set.findMax set2
|
||||
|
||||
-- generate all possible combinations from a set of singletons
|
||||
-- generateLevels :: [Item] -> [[ItemSet]]
|
||||
-- generateLevels singles = until (\x -> head x == lastLevel) (\x -> generateNextLevel (head x) : x) [firstLevel] where
|
||||
-- firstLevel = map (\x -> ItemSet $ Set.fromList [x]) singles
|
||||
-- lastLevel = [ItemSet $ Set.fromList singles]
|
||||
|
||||
-- generate the next level in a bottom-up route
|
||||
generateNextLevel :: [ItemSet] -> [ItemSet]
|
||||
generateNextLevel level = trace ("Computing level " ++ show (isSize (head level))) $
|
||||
@@ -34,7 +28,7 @@ generateNextLevel level = trace ("Computing level " ++ show (isSize (head level)
|
||||
singletons :: [ItemSet] -> [Item]
|
||||
singletons table = Set.toList $ foldr union (Set.fromList []) table
|
||||
where
|
||||
union (ItemSet row) old = old `Set.union` row
|
||||
union (ItemSet row) old = old `Set.union` row
|
||||
|
||||
frequentPatterns :: Frequency -> [ItemSet] -> [[ItemSet]]
|
||||
frequentPatterns thresh table = until (\x -> [] == head x)
|
||||
|
||||
@@ -9,7 +9,7 @@ main :: IO()
|
||||
main = do
|
||||
args <- getArgs
|
||||
when (2 > length args)
|
||||
(error "Usage: Main table.csv threshold [outfile.csv]")
|
||||
(error "Usage: phase1 table.csv threshold [outfile.csv]")
|
||||
let filename = head args
|
||||
let threshold = read $ args !! 1
|
||||
file <- readFile filename
|
||||
|
||||
10
phase2.hs
10
phase2.hs
@@ -2,14 +2,13 @@ import CSVParser
|
||||
import DataModel
|
||||
import ExtractRules
|
||||
import qualified Data.Set as Set
|
||||
import qualified Data.List as List
|
||||
import System.Environment(getArgs)
|
||||
import Control.Monad(when)
|
||||
|
||||
main :: IO()
|
||||
main = do
|
||||
args <- getArgs
|
||||
when (3 > length args) (error "Usage: phase2 table.csv frequents.csv threshold [out.assoc]")
|
||||
when (3 > length args) (error "Usage: phase2 table.csv frequents.csv threshold [rules.csv]")
|
||||
let threshold = read $ args !! 2
|
||||
tableFile <- readFile $ head args
|
||||
freqFile <- readFile $ args !! 1
|
||||
@@ -25,6 +24,9 @@ main = do
|
||||
where
|
||||
freqPats = map ((ItemSet. Set.fromList .map Item) . tail) freqFileContent
|
||||
table = map (ItemSet. Set.fromList .map Item) tableFileContent
|
||||
rules = List.sortBy (\x y -> compare (lift table y) (lift table x)) $ extractRules threshold table freqPats
|
||||
output = init $ foldr ((\x old -> old ++ x ++ "\n").show) "" $ take 10 rules
|
||||
rules = extractRules threshold table freqPats
|
||||
output = formatToCSV rules
|
||||
|
||||
formatToCSV :: [Rule] -> String
|
||||
formatToCSV = foldr (\x old -> old ++ formatRow x ++ "\n") "" where
|
||||
formatRow (Rule x y) = show x ++ ", ," ++ show y
|
||||
|
||||
39
phase3.hs
Normal file
39
phase3.hs
Normal file
@@ -0,0 +1,39 @@
|
||||
import CSVParser
|
||||
import DataModel
|
||||
import ExtractRules
|
||||
import qualified Data.Set as Set
|
||||
import qualified Data.List as List
|
||||
import System.Environment(getArgs)
|
||||
import Control.Monad(when)
|
||||
|
||||
main :: IO()
|
||||
main = do
|
||||
args <- getArgs
|
||||
when (3 > length args) (error "Usage: phase3 table.csv rules.csv count [bestRules.csv]")
|
||||
let bestRuleCount = read $ args !! 2
|
||||
tableFile <- readFile $ head args
|
||||
rulesFile <- readFile $ args !! 1
|
||||
case parseCSV tableFile of
|
||||
Left _ -> error "Could not parse table"
|
||||
Right tableFileContent ->
|
||||
case parseCSV rulesFile of
|
||||
Left _ -> error "Could not parse frequent patterns"
|
||||
Right rulesFileContent -> do
|
||||
print $ output
|
||||
when (length args > 3) $
|
||||
writeFile (args !! 3) output
|
||||
where
|
||||
table = map (ItemSet. Set.fromList .map Item) tableFileContent
|
||||
rules = map ruleFromRow rulesFileContent
|
||||
bestRules = take bestRuleCount $ List.sortBy (\x y -> compare (lift table y) (lift table x)) rules
|
||||
output = formatToCSV bestRules
|
||||
|
||||
formatToCSV :: [Rule] -> String
|
||||
formatToCSV = foldr (\x old -> old ++ formatRow x ++ "\n") "" where
|
||||
formatRow (Rule x y) = show x ++ ", ," ++ show y
|
||||
|
||||
ruleFromRow :: [String] -> Rule
|
||||
ruleFromRow columns = Rule item1 item2
|
||||
where
|
||||
item1 = ItemSet $ Set.fromList $ map Item $ takeWhile (/= " ") columns
|
||||
item2 = ItemSet $ Set.fromList $ map Item $ tail $ dropWhile (/= " ") columns
|
||||
Reference in New Issue
Block a user