Apriori Powaa
This commit is contained in:
93
apriori.hs
Normal file
93
apriori.hs
Normal file
@@ -0,0 +1,93 @@
|
||||
module Main where
|
||||
|
||||
import System.Environment
|
||||
import qualified Data.List as List
|
||||
import Data.Set (Set)
|
||||
import qualified Data.Set as Set
|
||||
import Data.Map (Map)
|
||||
import qualified Data.Map as Map
|
||||
import Text.ParserCombinators.Parsec
|
||||
|
||||
-- data structures defined here
|
||||
|
||||
data Item = Item String deriving (Eq, Ord)
|
||||
instance Show Item where
|
||||
show (Item s) = "Item " ++ s
|
||||
|
||||
data ItemSet = ItemSet (Set Item) deriving (Eq, Ord)
|
||||
instance Show ItemSet where
|
||||
show (ItemSet x) = let y = foldl (++) "" $ map ((\x -> x ++ ",") . show) $ Set.toList x
|
||||
in "{" ++ y ++ "}"
|
||||
|
||||
type Support = Double
|
||||
type Confidence = Double
|
||||
|
||||
support :: [ItemSet] -> ItemSet -> Support
|
||||
support iss is = (fromIntegral $ count iss is) / (fromIntegral $ length iss)
|
||||
where
|
||||
count iss (ItemSet is) = length $ filter (Set.isSubsetOf is) $ map (\(ItemSet x) -> x) iss
|
||||
|
||||
confidence :: [ItemSet] -> ItemSet -> Item -> Confidence
|
||||
confidence iss i@(ItemSet is) j = (support iss $ ItemSet $ Set.union is $ Set.singleton j) / (support iss i)
|
||||
|
||||
-- parse CSV
|
||||
|
||||
csvFile = endBy line eol
|
||||
line = do
|
||||
cells <- sepBy cell $ char ','
|
||||
return $ ItemSet $ Set.fromList cells
|
||||
cell = do
|
||||
c <- many $ noneOf ",\r\n"
|
||||
return $ Item $ c
|
||||
eol = try (string "\n\r")
|
||||
<|> try (string "\r\n")
|
||||
<|> string "\n"
|
||||
<|> string "\r"
|
||||
|
||||
parseCSV :: String -> Either ParseError [ItemSet]
|
||||
parseCSV input = parse csvFile ("unknown") input
|
||||
|
||||
-- frequent items
|
||||
frequentItems :: [ItemSet] -> Support -> [(Item, Integer)]
|
||||
frequentItems x y = Set.toList $ Map.keysSet pi frequency where
|
||||
pi = Map.filter (>= frequency) m where
|
||||
frequency = ceiling(y * (fromIntegral $ length x)) where
|
||||
m = foldl count Map.empty x where
|
||||
count m (ItemSet is) = Set.foldl f m is where
|
||||
f m i = Map.insertWith acc i 1 m where
|
||||
acc new old = old + 1
|
||||
|
||||
-- frequent itemsets
|
||||
-- frequentItemSets :: [ItemSet] -> [Item] -> Support -> [ItemSet]
|
||||
|
||||
main :: IO ()
|
||||
main = do
|
||||
putStrLn "Apriori Algorithm for frequent itemsets mining"
|
||||
[f, s] <- getArgs
|
||||
c <- readFile f
|
||||
case parse csvFile "(stdin)" c of
|
||||
Left e -> do putStrLn "Error parsing input: "
|
||||
print e
|
||||
Right r -> do
|
||||
let i = frequentItems r (read s)
|
||||
mapM_ print $ i
|
||||
|
||||
-- helper functions defined here
|
||||
|
||||
trim :: [Char] -> [Char]
|
||||
trim = trimFront . trimBack
|
||||
|
||||
trimFront :: [Char] -> [Char]
|
||||
trimFront (x:xs) = case x of
|
||||
' ' -> trim xs
|
||||
_ -> (x:xs)
|
||||
|
||||
trimBack :: [Char] -> [Char]
|
||||
trimBack x = reverse $ trimFront $ reverse x
|
||||
|
||||
allItems :: [ItemSet] -> [Item]
|
||||
allItems iss = Set.toList $ foldl add Set.empty iss where
|
||||
add s (ItemSet is) = Set.union s is
|
||||
|
||||
candidate :: [ItemSet] -> [Item] -> [ItemSet]
|
||||
candidate x y = List.nub $ [ItemSet (Set.union (Set.singleton b) a) | (ItemSet a) <- x, b <- y, Set.notMember b a]
|
||||
Reference in New Issue
Block a user