implemented phase 2
This commit is contained in:
14
DataModel.hs
14
DataModel.hs
@@ -11,8 +11,9 @@ class Freq a where
|
|||||||
frequency :: [ItemSet] -> a -> Frequency
|
frequency :: [ItemSet] -> a -> Frequency
|
||||||
|
|
||||||
data Item = Item String deriving (Eq, Ord)
|
data Item = Item String deriving (Eq, Ord)
|
||||||
|
|
||||||
instance Show Item where
|
instance Show Item where
|
||||||
show (Item s) = s --"Item " ++ s
|
show (Item a) = show a
|
||||||
|
|
||||||
data ItemSet = ItemSet (Set Item) deriving (Eq, Ord)
|
data ItemSet = ItemSet (Set Item) deriving (Eq, Ord)
|
||||||
|
|
||||||
@@ -27,16 +28,23 @@ instance Freq ItemSet where
|
|||||||
|
|
||||||
count :: [ItemSet] -> ItemSet -> Count
|
count :: [ItemSet] -> ItemSet -> Count
|
||||||
count table (ItemSet set) =
|
count table (ItemSet set) =
|
||||||
length (filter isSuperSet table) where
|
length (filter isSuperset table) where
|
||||||
isSuperset (ItemSet row) = set `Set.isSubsetOf` row
|
isSuperset (ItemSet row) = set `Set.isSubsetOf` row
|
||||||
|
|
||||||
|
difference :: ItemSet -> ItemSet -> ItemSet
|
||||||
|
difference (ItemSet set1) (ItemSet set2) = ItemSet (Set.difference set1 set2)
|
||||||
|
|
||||||
|
empty :: ItemSet
|
||||||
|
empty = ItemSet (Set.fromList [])
|
||||||
|
|
||||||
data Rule = Rule ItemSet ItemSet deriving (Eq)
|
data Rule = Rule ItemSet ItemSet deriving (Eq)
|
||||||
|
|
||||||
instance Show Rule where
|
instance Show Rule where
|
||||||
show (Rule a b) = show a ++ "-> " ++ show b
|
show (Rule a b) = show a ++ "-> " ++ show b
|
||||||
|
|
||||||
instance Freq Rule where
|
instance Freq Rule where
|
||||||
frequency table (Rule (ItemSet set1) (ItemSet set2)) = frequency table $ ItemSet (set1 `Set.union` set2)
|
frequency table (Rule (ItemSet set1) (ItemSet set2)) = frequency table $
|
||||||
|
ItemSet (set1 `Set.union` set2)
|
||||||
|
|
||||||
confidence :: [ItemSet] -> Rule -> Confidence
|
confidence :: [ItemSet] -> Rule -> Confidence
|
||||||
confidence table (Rule x y) = frequency table (Rule x y) / frequency table x
|
confidence table (Rule x y) = frequency table (Rule x y) / frequency table x
|
||||||
|
|||||||
32
ExtractRules.hs
Normal file
32
ExtractRules.hs
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
module ExtractRules (
|
||||||
|
extractRules
|
||||||
|
) where
|
||||||
|
import DataModel
|
||||||
|
import qualified Data.Set as Set
|
||||||
|
import qualified Data.Map as Map
|
||||||
|
import Data.Map(Map)
|
||||||
|
|
||||||
|
extractRules :: Confidence -> [ItemSet] -> [Rule]
|
||||||
|
extractRules threshold patterns = filter (\x -> threshold <= confidence patterns x) rules where
|
||||||
|
rules = Map.foldrWithKey (\k v old -> ruleFromSubset v k : old) [] subsets
|
||||||
|
subsets = foldr (\x old -> insertMultiple (filteredPowerset x) x old) Map.empty patterns
|
||||||
|
filteredPowerset (ItemSet set) = map (ItemSet . Set.fromList) $
|
||||||
|
filter (\val -> val /= Set.toList set && val /= []) $ powerset $ Set.toList set
|
||||||
|
|
||||||
|
ruleFromSubset :: ItemSet -> ItemSet -> Rule
|
||||||
|
ruleFromSubset set subset = Rule subset (difference set subset)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
insertMultiple :: Ord k => [k] -> a -> Map k a -> Map k a
|
||||||
|
insertMultiple keys value m = foldr (\x old -> Map.insert x value old) m keys
|
||||||
|
|
||||||
|
powerset :: [a] -> [[a]]
|
||||||
|
powerset [] = [[]]
|
||||||
|
powerset (x:xs) = xss /\/ map (x:) xss where
|
||||||
|
xss = powerset xs
|
||||||
|
|
||||||
|
(/\/) :: [a] -> [a] -> [a]
|
||||||
|
[] /\/ ys = ys
|
||||||
|
(x:xs) /\/ ys = x : (ys /\/ xs)
|
||||||
|
|
||||||
@@ -7,9 +7,12 @@ import Debug.Trace (traceShow)
|
|||||||
import qualified Data.List as List
|
import qualified Data.List as List
|
||||||
|
|
||||||
semiUnion :: ItemSet -> ItemSet -> ItemSet
|
semiUnion :: ItemSet -> ItemSet -> ItemSet
|
||||||
semiUnion (ItemSet set1) (ItemSet set2) = ItemSet (if max1 <= max2 && Set.delete max1 set1 == Set.delete max2 set2 then set1 `Set.union` set2 else Set.empty) where
|
semiUnion (ItemSet set1) (ItemSet set2) = ItemSet $
|
||||||
max1 = Set.findMax set1
|
if max1 <= max2 && Set.delete max1 set1 == Set.delete max2 set2
|
||||||
max2 = Set.findMax set2
|
then set1 `Set.union` set2
|
||||||
|
else Set.empty where
|
||||||
|
max1 = Set.findMax set1
|
||||||
|
max2 = Set.findMax set2
|
||||||
|
|
||||||
-- generate all possible combinations from a set of singletons
|
-- generate all possible combinations from a set of singletons
|
||||||
-- generateLevels :: [Item] -> [[ItemSet]]
|
-- generateLevels :: [Item] -> [[ItemSet]]
|
||||||
@@ -19,15 +22,18 @@ semiUnion (ItemSet set1) (ItemSet set2) = ItemSet (if max1 <= max2 && Set.delete
|
|||||||
|
|
||||||
-- generate the next level in a bottom-up route
|
-- generate the next level in a bottom-up route
|
||||||
generateNextLevel :: [ItemSet] -> [ItemSet]
|
generateNextLevel :: [ItemSet] -> [ItemSet]
|
||||||
generateNextLevel level = traceShow ("Computing level " ++ show (isSize (head level))) $ foldr (\value old -> generate value ++ old) [] level where
|
generateNextLevel level = traceShow ("Computing level " ++ show (isSize (head level))) $
|
||||||
generate value = takeWhile (/= empty) (foldr (\x old -> semiUnion value x : old) [] (tail $ List.dropWhile (/= value) level))
|
foldr (\value old -> generate value ++ old) [] level where
|
||||||
empty = ItemSet $ Set.fromList []
|
generate value = takeWhile (/= empty)
|
||||||
isSize (ItemSet set) = Set.size set
|
(foldr (\x old -> semiUnion value x : old) [] (tail $ List.dropWhile (/= value) level))
|
||||||
|
isSize (ItemSet set) = Set.size set
|
||||||
|
|
||||||
singletons :: [ItemSet] -> [Item]
|
singletons :: [ItemSet] -> [Item]
|
||||||
singletons table = Set.toList $ foldr (\(ItemSet row) old -> old `Set.union` row) (Set.fromList []) table where
|
singletons table = Set.toList $ foldr union (Set.fromList []) table where
|
||||||
|
union (ItemSet row) old = old `Set.union` row
|
||||||
|
|
||||||
frequentPatterns :: Frequency -> [ItemSet] -> [[ItemSet]]
|
frequentPatterns :: Frequency -> [ItemSet] -> [[ItemSet]]
|
||||||
frequentPatterns thresh table = until (\x -> [] == head x) (\x -> filterByFrequency (generateNextLevel (head x)) : x) [firstLevel] where
|
frequentPatterns thresh table = until (\x -> [] == head x)
|
||||||
firstLevel = filterByFrequency $ map (\x -> ItemSet $ Set.fromList [x]) (singletons table)
|
(\x -> filterByFrequency (generateNextLevel (head x)) : x) [firstLevel] where
|
||||||
filterByFrequency = filter (\x -> frequency table x >= thresh)
|
firstLevel = filterByFrequency $ map (\x -> ItemSet $ Set.fromList [x]) (singletons table)
|
||||||
|
filterByFrequency = filter (\x -> frequency table x >= thresh)
|
||||||
|
|||||||
@@ -13,6 +13,6 @@ main = do
|
|||||||
let threshold = read $ last args
|
let threshold = read $ last args
|
||||||
file <- readFile filename
|
file <- readFile filename
|
||||||
case parseCSV file of
|
case parseCSV file of
|
||||||
Left _ -> error "Could not parse out.csv"
|
Left _ -> error "Could not parse out.csv"
|
||||||
Right val -> mapM_ (\x -> putStrLn (show x ++ "(" ++ show (count table x) ++ ")")) (concat (frequentPatterns threshold table)) where
|
Right val -> mapM_ (\x -> putStrLn (show x ++ "(" ++ show (count table x) ++ ")")) (concat (frequentPatterns threshold table)) where
|
||||||
table = map (ItemSet. Set.fromList .map Item) val
|
table = map (ItemSet. Set.fromList .map Item) val
|
||||||
|
|||||||
20
phase2.hs
Normal file
20
phase2.hs
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
import CSVParser
|
||||||
|
import FrequentPatterns
|
||||||
|
import DataModel
|
||||||
|
import ExtractRules
|
||||||
|
import qualified Data.Set as Set
|
||||||
|
import System.Environment(getArgs)
|
||||||
|
import Control.Monad(when)
|
||||||
|
|
||||||
|
main :: IO()
|
||||||
|
main = do
|
||||||
|
args <- getArgs
|
||||||
|
when (2 /= length args) (error "Usage: phase2 <file.csv> <threshold>")
|
||||||
|
let filename = head args
|
||||||
|
let threshold = read $ last args
|
||||||
|
file <- readFile filename
|
||||||
|
case parseCSV file of
|
||||||
|
Left _ -> error "Could not parse out.csv"
|
||||||
|
Right val -> print $ extractRules threshold $
|
||||||
|
filter (/= empty) $ concat $ frequentPatterns 0.01 table where
|
||||||
|
table = map (ItemSet. Set.fromList .map Item) val
|
||||||
Reference in New Issue
Block a user