Merge remote-tracking branch 'origin/master'

This commit is contained in:
Xawirses
2015-04-07 14:36:22 +02:00
7 changed files with 136 additions and 44 deletions

View File

@@ -1,31 +1,31 @@
module CSVParser (
parseCSV
)where
parseCSV
)where
import Text.ParserCombinators.Parsec
csvFile :: GenParser Char st [[String]]
csvFile = do
result <- many line
eof
return result
result <- many line
eof
return result
line :: GenParser Char st [String]
line = do
result <- cells
_ <- eol
return result
result <- cells
_ <- eol
return result
cells :: GenParser Char st [String]
cells = do
first <- cellContent
next <- remainingCells
return (first : next)
first <- cellContent
next <- remainingCells
return (first : next)
remainingCells :: GenParser Char st [String]
remainingCells =
(char ',' >> cells) -- Found comma? More cells coming
<|> return [] -- No comma? Return [], no more cells
(char ',' >> cells) -- Found comma? More cells coming
<|> return [] -- No comma? Return [], no more cells
cellContent :: GenParser Char st String
cellContent = many (noneOf ",\n")

View File

@@ -8,31 +8,44 @@ type Frequency = Double
type Confidence = Double
class Freq a where
frequency :: [ItemSet] -> a -> Frequency
frequency :: [ItemSet] -> a -> Frequency
data Item = Item String deriving (Eq, Ord)
instance Show Item where
show (Item s) = s --"Item " ++ s
show (Item a) = a
data ItemSet = ItemSet (Set Item) deriving (Eq, Ord)
instance Show ItemSet where
show (ItemSet x) = foldr ((\y old -> y ++ " " ++ old).show) "" (Set.toList x)
show (ItemSet x) =
init $ foldr ((\y old -> y ++ " " ++ old).show) "" (Set.toList x)
instance Freq ItemSet where
frequency table (ItemSet set) = setCount / fromIntegral (length table) where
setCount = fromIntegral $ count table (ItemSet set)
frequency table (ItemSet set) =
setCount / fromIntegral (length table) where
setCount = fromIntegral $ count table (ItemSet set)
count :: [ItemSet] -> ItemSet -> Count
count table (ItemSet set) = length (filter (\(ItemSet row) -> set `Set.isSubsetOf` row) table)
count table (ItemSet set) =
length (filter isSuperset table) where
isSuperset (ItemSet row) = set `Set.isSubsetOf` row
difference :: ItemSet -> ItemSet -> ItemSet
difference (ItemSet set1) (ItemSet set2) = ItemSet (Set.difference set1 set2)
empty :: ItemSet
empty = ItemSet (Set.fromList [])
data Rule = Rule ItemSet ItemSet deriving (Eq)
instance Show Rule where
show (Rule a b) = show a ++ "-> " ++ show b
show (Rule a b) = show a ++ "->" ++ show b
instance Freq Rule where
frequency table (Rule (ItemSet set1) (ItemSet set2)) = frequency table $ ItemSet (set1 `Set.union` set2)
frequency table (Rule (ItemSet set1) (ItemSet set2)) = frequency table $
ItemSet (set1 `Set.union` set2)
confidence :: [ItemSet] -> Rule -> Confidence
confidence table (Rule x y) = frequency table (Rule x y) / frequency table x

32
ExtractRules.hs Normal file
View File

@@ -0,0 +1,32 @@
module ExtractRules (
extractRules
) where
import DataModel
import qualified Data.Set as Set
import qualified Data.Map as Map
import Data.Map(Map)
extractRules :: Confidence -> [ItemSet] -> [Rule]
extractRules threshold patterns = filter (\x -> threshold <= confidence patterns x) rules where
rules = Map.foldrWithKey (\k v old -> ruleFromSubset v k : old) [] subsets
subsets = foldr (\x old -> insertMultiple (filteredPowerset x) x old) Map.empty patterns
filteredPowerset (ItemSet set) = map (ItemSet . Set.fromList) $
filter (\val -> val /= Set.toList set && val /= []) $ powerset $ Set.toList set
ruleFromSubset :: ItemSet -> ItemSet -> Rule
ruleFromSubset set subset = Rule subset (difference set subset)
insertMultiple :: Ord k => [k] -> a -> Map k a -> Map k a
insertMultiple keys value m = foldr (\x old -> Map.insert x value old) m keys
powerset :: [a] -> [[a]]
powerset [] = [[]]
powerset (x:xs) = xss /\/ map (x:) xss where
xss = powerset xs
(/\/) :: [a] -> [a] -> [a]
[] /\/ ys = ys
(x:xs) /\/ ys = x : (ys /\/ xs)

11
FreqParser.hs Normal file
View File

@@ -0,0 +1,11 @@
module ItemsParser (
parseItems,
parseItemsWithFreq
)where
import qualified Data.Map as Map
import Data.Map(Map)
import DataModel
parseItems :: String -> Either ParseError [ItemSet]
parseItemsWithFreq :: String -> Either ParseError Map ItemSet Frequency

View File

@@ -1,15 +1,18 @@
module FrequentPatterns (
frequentPatterns
) where
frequentPatterns
) where
import DataModel
import qualified Data.Set as Set
import Debug.Trace (traceShow)
import Debug.Trace (trace)
import qualified Data.List as List
semiUnion :: ItemSet -> ItemSet -> ItemSet
semiUnion (ItemSet set1) (ItemSet set2) = ItemSet (if max1 <= max2 && Set.delete max1 set1 == Set.delete max2 set2 then set1 `Set.union` set2 else Set.empty) where
max1 = Set.findMax set1
max2 = Set.findMax set2
semiUnion (ItemSet set1) (ItemSet set2) = ItemSet $
if max1 <= max2 && Set.delete max1 set1 == Set.delete max2 set2
then set1 `Set.union` set2
else Set.empty where
max1 = Set.findMax set1
max2 = Set.findMax set2
-- generate all possible combinations from a set of singletons
-- generateLevels :: [Item] -> [[ItemSet]]
@@ -19,15 +22,19 @@ semiUnion (ItemSet set1) (ItemSet set2) = ItemSet (if max1 <= max2 && Set.delete
-- generate the next level in a bottom-up route
generateNextLevel :: [ItemSet] -> [ItemSet]
generateNextLevel level = traceShow ("Computing level " ++ show (isSize (head level))) $ foldr (\value old -> generate value ++ old) [] level where
generate value = takeWhile (/= empty) (foldr (\x old -> semiUnion value x : old) [] (tail $ List.dropWhile (/= value) level))
empty = ItemSet $ Set.fromList []
isSize (ItemSet set) = Set.size set
generateNextLevel level = trace ("Computing level " ++ show (isSize (head level))) $
foldr (\value old -> generate value ++ old) [] level where
generate value = takeWhile (/= empty)
(foldr (\x old -> semiUnion value x : old) [] (tail $ List.dropWhile (/= value) level))
isSize (ItemSet set) = Set.size set
singletons :: [ItemSet] -> [Item]
singletons table = Set.toList $ foldr (\(ItemSet row) old -> old `Set.union` row) (Set.fromList []) table where
singletons table = Set.toList $ foldr union (Set.fromList []) table where
union (ItemSet row) old = old `Set.union` row
frequentPatterns :: Frequency -> [ItemSet] -> [[ItemSet]]
frequentPatterns thresh table = until (\x -> [] == head x) (\x -> filterByFrequency (generateNextLevel (head x)) : x) [firstLevel] where
firstLevel = filterByFrequency $ map (\x -> ItemSet $ Set.fromList [x]) (singletons table)
filterByFrequency = filter (\x -> frequency table x >= thresh)
frequentPatterns thresh table = until (\x -> [] == head x)
(\x -> filterByFrequency (generateNextLevel (head x)) : x) [firstLevel] where
firstLevel = filterByFrequency $ map (\x -> ItemSet $ Set.fromList [x]) $
trace "Generated Singletons" (singletons table)
filterByFrequency = filter (\x -> frequency table x >= thresh)

View File

@@ -7,12 +7,22 @@ import Control.Monad(when)
main :: IO()
main = do
args <- getArgs
when (2 /= length args) (error "Usage: Main <file.csv> <threshold>")
let filename = head args
let threshold = read $ last args
file <- readFile filename
case parseCSV file of
Left _ -> error "Could not parse out.csv"
Right val -> mapM_ (\x -> putStrLn (show x ++ "(" ++ show (count table x) ++ ")")) (concat (frequentPatterns threshold table)) where
table = map (ItemSet. Set.fromList .map Item) val
args <- getArgs
when (2 > length args)
(error "Usage: Main <table.csv> <threshold> <outfile.csv>")
let filename = head args
let threshold = read $ args !! 1
file <- readFile filename
case parseCSV file of
Left _ -> error "Could not parse out.csv"
Right val -> do
let table = map (ItemSet. Set.fromList .map Item) val
let freqPats = concat (frequentPatterns threshold table)
let output = formatToCSV table freqPats
putStrLn output
when (length args > 2) $
writeFile (args !! 2) $ output
formatToCSV :: [ItemSet] -> [ItemSet] -> String
formatToCSV table frequents = foldr (\x old -> old ++ formatRow x ++ "\n") "" frequents where
formatRow (ItemSet set) = init $ Set.foldr (\x old -> old ++ show x ++ ",") (show (count table (ItemSet set)) ++",") set

19
phase2.hs Normal file
View File

@@ -0,0 +1,19 @@
import CSVParser
import DataModel
import ExtractRules
import qualified Data.Set as Set
import System.Environment(getArgs)
import Control.Monad(when)
main :: IO()
main = do
args <- getArgs
when (2 /= length args) (error "Usage: phase2 <file.csv> <threshold>")
let filename = head args
let threshold = read $ last args
file <- readFile filename
case parseCSV file of
Left _ -> error "Could not parse out.csv"
Right val -> do
let table = map (ItemSet. Set.fromList .map Item) val
print $ extractRules threshold table