split filtering into phase 3
This commit is contained in:
@@ -21,7 +21,7 @@ data ItemSet = ItemSet (Set Item) deriving (Eq, Ord)
|
|||||||
|
|
||||||
instance Show ItemSet where
|
instance Show ItemSet where
|
||||||
show (ItemSet x) =
|
show (ItemSet x) =
|
||||||
init $ foldr ((\y old -> y ++ " " ++ old).show) "" (Set.toList x)
|
init $ foldr ((\y old -> y ++ "," ++ old).show) "" (Set.toList x)
|
||||||
|
|
||||||
instance Freq ItemSet where
|
instance Freq ItemSet where
|
||||||
frequency table (ItemSet set) =
|
frequency table (ItemSet set) =
|
||||||
|
|||||||
@@ -16,12 +16,6 @@ semiUnion (ItemSet set1) (ItemSet set2) = ItemSet $
|
|||||||
max1 = Set.findMax set1
|
max1 = Set.findMax set1
|
||||||
max2 = Set.findMax set2
|
max2 = Set.findMax set2
|
||||||
|
|
||||||
-- generate all possible combinations from a set of singletons
|
|
||||||
-- generateLevels :: [Item] -> [[ItemSet]]
|
|
||||||
-- generateLevels singles = until (\x -> head x == lastLevel) (\x -> generateNextLevel (head x) : x) [firstLevel] where
|
|
||||||
-- firstLevel = map (\x -> ItemSet $ Set.fromList [x]) singles
|
|
||||||
-- lastLevel = [ItemSet $ Set.fromList singles]
|
|
||||||
|
|
||||||
-- generate the next level in a bottom-up route
|
-- generate the next level in a bottom-up route
|
||||||
generateNextLevel :: [ItemSet] -> [ItemSet]
|
generateNextLevel :: [ItemSet] -> [ItemSet]
|
||||||
generateNextLevel level = trace ("Computing level " ++ show (isSize (head level))) $
|
generateNextLevel level = trace ("Computing level " ++ show (isSize (head level))) $
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ main :: IO()
|
|||||||
main = do
|
main = do
|
||||||
args <- getArgs
|
args <- getArgs
|
||||||
when (2 > length args)
|
when (2 > length args)
|
||||||
(error "Usage: Main table.csv threshold [outfile.csv]")
|
(error "Usage: phase1 table.csv threshold [outfile.csv]")
|
||||||
let filename = head args
|
let filename = head args
|
||||||
let threshold = read $ args !! 1
|
let threshold = read $ args !! 1
|
||||||
file <- readFile filename
|
file <- readFile filename
|
||||||
|
|||||||
10
phase2.hs
10
phase2.hs
@@ -2,14 +2,13 @@ import CSVParser
|
|||||||
import DataModel
|
import DataModel
|
||||||
import ExtractRules
|
import ExtractRules
|
||||||
import qualified Data.Set as Set
|
import qualified Data.Set as Set
|
||||||
import qualified Data.List as List
|
|
||||||
import System.Environment(getArgs)
|
import System.Environment(getArgs)
|
||||||
import Control.Monad(when)
|
import Control.Monad(when)
|
||||||
|
|
||||||
main :: IO()
|
main :: IO()
|
||||||
main = do
|
main = do
|
||||||
args <- getArgs
|
args <- getArgs
|
||||||
when (3 > length args) (error "Usage: phase2 table.csv frequents.csv threshold [out.assoc]")
|
when (3 > length args) (error "Usage: phase2 table.csv frequents.csv threshold [rules.csv]")
|
||||||
let threshold = read $ args !! 2
|
let threshold = read $ args !! 2
|
||||||
tableFile <- readFile $ head args
|
tableFile <- readFile $ head args
|
||||||
freqFile <- readFile $ args !! 1
|
freqFile <- readFile $ args !! 1
|
||||||
@@ -25,6 +24,9 @@ main = do
|
|||||||
where
|
where
|
||||||
freqPats = map ((ItemSet. Set.fromList .map Item) . tail) freqFileContent
|
freqPats = map ((ItemSet. Set.fromList .map Item) . tail) freqFileContent
|
||||||
table = map (ItemSet. Set.fromList .map Item) tableFileContent
|
table = map (ItemSet. Set.fromList .map Item) tableFileContent
|
||||||
rules = List.sortBy (\x y -> compare (lift table y) (lift table x)) $ extractRules threshold table freqPats
|
rules = extractRules threshold table freqPats
|
||||||
output = init $ foldr ((\x old -> old ++ x ++ "\n").show) "" $ take 10 rules
|
output = formatToCSV rules
|
||||||
|
|
||||||
|
formatToCSV :: [Rule] -> String
|
||||||
|
formatToCSV = foldr (\x old -> old ++ formatRow x ++ "\n") "" where
|
||||||
|
formatRow (Rule x y) = show x ++ ", ," ++ show y
|
||||||
|
|||||||
39
phase3.hs
Normal file
39
phase3.hs
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
import CSVParser
|
||||||
|
import DataModel
|
||||||
|
import ExtractRules
|
||||||
|
import qualified Data.Set as Set
|
||||||
|
import qualified Data.List as List
|
||||||
|
import System.Environment(getArgs)
|
||||||
|
import Control.Monad(when)
|
||||||
|
|
||||||
|
main :: IO()
|
||||||
|
main = do
|
||||||
|
args <- getArgs
|
||||||
|
when (3 > length args) (error "Usage: phase3 table.csv rules.csv count [bestRules.csv]")
|
||||||
|
let bestRuleCount = read $ args !! 2
|
||||||
|
tableFile <- readFile $ head args
|
||||||
|
rulesFile <- readFile $ args !! 1
|
||||||
|
case parseCSV tableFile of
|
||||||
|
Left _ -> error "Could not parse table"
|
||||||
|
Right tableFileContent ->
|
||||||
|
case parseCSV rulesFile of
|
||||||
|
Left _ -> error "Could not parse frequent patterns"
|
||||||
|
Right rulesFileContent -> do
|
||||||
|
print $ output
|
||||||
|
when (length args > 3) $
|
||||||
|
writeFile (args !! 3) output
|
||||||
|
where
|
||||||
|
table = map (ItemSet. Set.fromList .map Item) tableFileContent
|
||||||
|
rules = map ruleFromRow rulesFileContent
|
||||||
|
bestRules = take bestRuleCount $ List.sortBy (\x y -> compare (lift table y) (lift table x)) rules
|
||||||
|
output = formatToCSV bestRules
|
||||||
|
|
||||||
|
formatToCSV :: [Rule] -> String
|
||||||
|
formatToCSV = foldr (\x old -> old ++ formatRow x ++ "\n") "" where
|
||||||
|
formatRow (Rule x y) = show x ++ ", ," ++ show y
|
||||||
|
|
||||||
|
ruleFromRow :: [String] -> Rule
|
||||||
|
ruleFromRow columns = Rule item1 item2
|
||||||
|
where
|
||||||
|
item1 = ItemSet $ Set.fromList $ map Item $ takeWhile (/= " ") columns
|
||||||
|
item2 = ItemSet $ Set.fromList $ map Item $ tail $ dropWhile (/= " ") columns
|
||||||
Reference in New Issue
Block a user