Apriori Powaa

This commit is contained in:
Xawirses
2015-04-01 17:39:35 +02:00
parent 80b360fcb1
commit ffd45a82bc

93
apriori.hs Normal file
View File

@@ -0,0 +1,93 @@
module Main where
import System.Environment
import qualified Data.List as List
import Data.Set (Set)
import qualified Data.Set as Set
import Data.Map (Map)
import qualified Data.Map as Map
import Text.ParserCombinators.Parsec
-- data structures defined here
data Item = Item String deriving (Eq, Ord)
instance Show Item where
show (Item s) = "Item " ++ s
data ItemSet = ItemSet (Set Item) deriving (Eq, Ord)
instance Show ItemSet where
show (ItemSet x) = let y = foldl (++) "" $ map ((\x -> x ++ ",") . show) $ Set.toList x
in "{" ++ y ++ "}"
type Support = Double
type Confidence = Double
support :: [ItemSet] -> ItemSet -> Support
support iss is = (fromIntegral $ count iss is) / (fromIntegral $ length iss)
where
count iss (ItemSet is) = length $ filter (Set.isSubsetOf is) $ map (\(ItemSet x) -> x) iss
confidence :: [ItemSet] -> ItemSet -> Item -> Confidence
confidence iss i@(ItemSet is) j = (support iss $ ItemSet $ Set.union is $ Set.singleton j) / (support iss i)
-- parse CSV
csvFile = endBy line eol
line = do
cells <- sepBy cell $ char ','
return $ ItemSet $ Set.fromList cells
cell = do
c <- many $ noneOf ",\r\n"
return $ Item $ c
eol = try (string "\n\r")
<|> try (string "\r\n")
<|> string "\n"
<|> string "\r"
parseCSV :: String -> Either ParseError [ItemSet]
parseCSV input = parse csvFile ("unknown") input
-- frequent items
frequentItems :: [ItemSet] -> Support -> [(Item, Integer)]
frequentItems x y = Set.toList $ Map.keysSet pi frequency where
pi = Map.filter (>= frequency) m where
frequency = ceiling(y * (fromIntegral $ length x)) where
m = foldl count Map.empty x where
count m (ItemSet is) = Set.foldl f m is where
f m i = Map.insertWith acc i 1 m where
acc new old = old + 1
-- frequent itemsets
-- frequentItemSets :: [ItemSet] -> [Item] -> Support -> [ItemSet]
main :: IO ()
main = do
putStrLn "Apriori Algorithm for frequent itemsets mining"
[f, s] <- getArgs
c <- readFile f
case parse csvFile "(stdin)" c of
Left e -> do putStrLn "Error parsing input: "
print e
Right r -> do
let i = frequentItems r (read s)
mapM_ print $ i
-- helper functions defined here
trim :: [Char] -> [Char]
trim = trimFront . trimBack
trimFront :: [Char] -> [Char]
trimFront (x:xs) = case x of
' ' -> trim xs
_ -> (x:xs)
trimBack :: [Char] -> [Char]
trimBack x = reverse $ trimFront $ reverse x
allItems :: [ItemSet] -> [Item]
allItems iss = Set.toList $ foldl add Set.empty iss where
add s (ItemSet is) = Set.union s is
candidate :: [ItemSet] -> [Item] -> [ItemSet]
candidate x y = List.nub $ [ItemSet (Set.union (Set.singleton b) a) | (ItemSet a) <- x, b <- y, Set.notMember b a]