Merge remote-tracking branch 'origin/master'

This commit is contained in:
Xawirses
2015-04-07 14:36:22 +02:00
7 changed files with 136 additions and 44 deletions

View File

@@ -1,6 +1,6 @@
module CSVParser ( module CSVParser (
parseCSV parseCSV
)where )where
import Text.ParserCombinators.Parsec import Text.ParserCombinators.Parsec

View File

@@ -11,28 +11,41 @@ class Freq a where
frequency :: [ItemSet] -> a -> Frequency frequency :: [ItemSet] -> a -> Frequency
data Item = Item String deriving (Eq, Ord) data Item = Item String deriving (Eq, Ord)
instance Show Item where instance Show Item where
show (Item s) = s --"Item " ++ s show (Item a) = a
data ItemSet = ItemSet (Set Item) deriving (Eq, Ord) data ItemSet = ItemSet (Set Item) deriving (Eq, Ord)
instance Show ItemSet where instance Show ItemSet where
show (ItemSet x) = foldr ((\y old -> y ++ " " ++ old).show) "" (Set.toList x) show (ItemSet x) =
init $ foldr ((\y old -> y ++ " " ++ old).show) "" (Set.toList x)
instance Freq ItemSet where instance Freq ItemSet where
frequency table (ItemSet set) = setCount / fromIntegral (length table) where frequency table (ItemSet set) =
setCount / fromIntegral (length table) where
setCount = fromIntegral $ count table (ItemSet set) setCount = fromIntegral $ count table (ItemSet set)
count :: [ItemSet] -> ItemSet -> Count count :: [ItemSet] -> ItemSet -> Count
count table (ItemSet set) = length (filter (\(ItemSet row) -> set `Set.isSubsetOf` row) table) count table (ItemSet set) =
length (filter isSuperset table) where
isSuperset (ItemSet row) = set `Set.isSubsetOf` row
difference :: ItemSet -> ItemSet -> ItemSet
difference (ItemSet set1) (ItemSet set2) = ItemSet (Set.difference set1 set2)
empty :: ItemSet
empty = ItemSet (Set.fromList [])
data Rule = Rule ItemSet ItemSet deriving (Eq) data Rule = Rule ItemSet ItemSet deriving (Eq)
instance Show Rule where instance Show Rule where
show (Rule a b) = show a ++ "-> " ++ show b show (Rule a b) = show a ++ "->" ++ show b
instance Freq Rule where instance Freq Rule where
frequency table (Rule (ItemSet set1) (ItemSet set2)) = frequency table $ ItemSet (set1 `Set.union` set2) frequency table (Rule (ItemSet set1) (ItemSet set2)) = frequency table $
ItemSet (set1 `Set.union` set2)
confidence :: [ItemSet] -> Rule -> Confidence confidence :: [ItemSet] -> Rule -> Confidence
confidence table (Rule x y) = frequency table (Rule x y) / frequency table x confidence table (Rule x y) = frequency table (Rule x y) / frequency table x

32
ExtractRules.hs Normal file
View File

@@ -0,0 +1,32 @@
module ExtractRules (
extractRules
) where
import DataModel
import qualified Data.Set as Set
import qualified Data.Map as Map
import Data.Map(Map)
extractRules :: Confidence -> [ItemSet] -> [Rule]
extractRules threshold patterns = filter (\x -> threshold <= confidence patterns x) rules where
rules = Map.foldrWithKey (\k v old -> ruleFromSubset v k : old) [] subsets
subsets = foldr (\x old -> insertMultiple (filteredPowerset x) x old) Map.empty patterns
filteredPowerset (ItemSet set) = map (ItemSet . Set.fromList) $
filter (\val -> val /= Set.toList set && val /= []) $ powerset $ Set.toList set
ruleFromSubset :: ItemSet -> ItemSet -> Rule
ruleFromSubset set subset = Rule subset (difference set subset)
insertMultiple :: Ord k => [k] -> a -> Map k a -> Map k a
insertMultiple keys value m = foldr (\x old -> Map.insert x value old) m keys
powerset :: [a] -> [[a]]
powerset [] = [[]]
powerset (x:xs) = xss /\/ map (x:) xss where
xss = powerset xs
(/\/) :: [a] -> [a] -> [a]
[] /\/ ys = ys
(x:xs) /\/ ys = x : (ys /\/ xs)

11
FreqParser.hs Normal file
View File

@@ -0,0 +1,11 @@
module ItemsParser (
parseItems,
parseItemsWithFreq
)where
import qualified Data.Map as Map
import Data.Map(Map)
import DataModel
parseItems :: String -> Either ParseError [ItemSet]
parseItemsWithFreq :: String -> Either ParseError Map ItemSet Frequency

View File

@@ -1,13 +1,16 @@
module FrequentPatterns ( module FrequentPatterns (
frequentPatterns frequentPatterns
) where ) where
import DataModel import DataModel
import qualified Data.Set as Set import qualified Data.Set as Set
import Debug.Trace (traceShow) import Debug.Trace (trace)
import qualified Data.List as List import qualified Data.List as List
semiUnion :: ItemSet -> ItemSet -> ItemSet semiUnion :: ItemSet -> ItemSet -> ItemSet
semiUnion (ItemSet set1) (ItemSet set2) = ItemSet (if max1 <= max2 && Set.delete max1 set1 == Set.delete max2 set2 then set1 `Set.union` set2 else Set.empty) where semiUnion (ItemSet set1) (ItemSet set2) = ItemSet $
if max1 <= max2 && Set.delete max1 set1 == Set.delete max2 set2
then set1 `Set.union` set2
else Set.empty where
max1 = Set.findMax set1 max1 = Set.findMax set1
max2 = Set.findMax set2 max2 = Set.findMax set2
@@ -19,15 +22,19 @@ semiUnion (ItemSet set1) (ItemSet set2) = ItemSet (if max1 <= max2 && Set.delete
-- generate the next level in a bottom-up route -- generate the next level in a bottom-up route
generateNextLevel :: [ItemSet] -> [ItemSet] generateNextLevel :: [ItemSet] -> [ItemSet]
generateNextLevel level = traceShow ("Computing level " ++ show (isSize (head level))) $ foldr (\value old -> generate value ++ old) [] level where generateNextLevel level = trace ("Computing level " ++ show (isSize (head level))) $
generate value = takeWhile (/= empty) (foldr (\x old -> semiUnion value x : old) [] (tail $ List.dropWhile (/= value) level)) foldr (\value old -> generate value ++ old) [] level where
empty = ItemSet $ Set.fromList [] generate value = takeWhile (/= empty)
(foldr (\x old -> semiUnion value x : old) [] (tail $ List.dropWhile (/= value) level))
isSize (ItemSet set) = Set.size set isSize (ItemSet set) = Set.size set
singletons :: [ItemSet] -> [Item] singletons :: [ItemSet] -> [Item]
singletons table = Set.toList $ foldr (\(ItemSet row) old -> old `Set.union` row) (Set.fromList []) table where singletons table = Set.toList $ foldr union (Set.fromList []) table where
union (ItemSet row) old = old `Set.union` row
frequentPatterns :: Frequency -> [ItemSet] -> [[ItemSet]] frequentPatterns :: Frequency -> [ItemSet] -> [[ItemSet]]
frequentPatterns thresh table = until (\x -> [] == head x) (\x -> filterByFrequency (generateNextLevel (head x)) : x) [firstLevel] where frequentPatterns thresh table = until (\x -> [] == head x)
firstLevel = filterByFrequency $ map (\x -> ItemSet $ Set.fromList [x]) (singletons table) (\x -> filterByFrequency (generateNextLevel (head x)) : x) [firstLevel] where
firstLevel = filterByFrequency $ map (\x -> ItemSet $ Set.fromList [x]) $
trace "Generated Singletons" (singletons table)
filterByFrequency = filter (\x -> frequency table x >= thresh) filterByFrequency = filter (\x -> frequency table x >= thresh)

View File

@@ -8,11 +8,21 @@ import Control.Monad(when)
main :: IO() main :: IO()
main = do main = do
args <- getArgs args <- getArgs
when (2 /= length args) (error "Usage: Main <file.csv> <threshold>") when (2 > length args)
(error "Usage: Main <table.csv> <threshold> <outfile.csv>")
let filename = head args let filename = head args
let threshold = read $ last args let threshold = read $ args !! 1
file <- readFile filename file <- readFile filename
case parseCSV file of case parseCSV file of
Left _ -> error "Could not parse out.csv" Left _ -> error "Could not parse out.csv"
Right val -> mapM_ (\x -> putStrLn (show x ++ "(" ++ show (count table x) ++ ")")) (concat (frequentPatterns threshold table)) where Right val -> do
table = map (ItemSet. Set.fromList .map Item) val let table = map (ItemSet. Set.fromList .map Item) val
let freqPats = concat (frequentPatterns threshold table)
let output = formatToCSV table freqPats
putStrLn output
when (length args > 2) $
writeFile (args !! 2) $ output
formatToCSV :: [ItemSet] -> [ItemSet] -> String
formatToCSV table frequents = foldr (\x old -> old ++ formatRow x ++ "\n") "" frequents where
formatRow (ItemSet set) = init $ Set.foldr (\x old -> old ++ show x ++ ",") (show (count table (ItemSet set)) ++",") set

19
phase2.hs Normal file
View File

@@ -0,0 +1,19 @@
import CSVParser
import DataModel
import ExtractRules
import qualified Data.Set as Set
import System.Environment(getArgs)
import Control.Monad(when)
main :: IO()
main = do
args <- getArgs
when (2 /= length args) (error "Usage: phase2 <file.csv> <threshold>")
let filename = head args
let threshold = read $ last args
file <- readFile filename
case parseCSV file of
Left _ -> error "Could not parse out.csv"
Right val -> do
let table = map (ItemSet. Set.fromList .map Item) val
print $ extractRules threshold table