40 lines
1.5 KiB
Haskell
40 lines
1.5 KiB
Haskell
module FrequentPatterns (
|
|
frequentPatterns
|
|
) where
|
|
import DataModel
|
|
import qualified Data.Set as Set
|
|
import Debug.Trace (trace)
|
|
import qualified Data.List as List
|
|
import Control.Parallel.Strategies(parMap, rpar)
|
|
|
|
semiUnion :: ItemSet -> ItemSet -> ItemSet
|
|
semiUnion (ItemSet set1) (ItemSet set2) = ItemSet $
|
|
if max1 <= max2 && Set.delete max1 set1 == Set.delete max2 set2
|
|
then set1 `Set.union` set2
|
|
else Set.empty
|
|
where
|
|
max1 = Set.findMax set1
|
|
max2 = Set.findMax set2
|
|
|
|
-- generate the next level in a bottom-up route
|
|
generateNextLevel :: [ItemSet] -> [ItemSet]
|
|
generateNextLevel level = trace ("Computing level " ++ show (isSize (head level))) $
|
|
foldr (\value old -> generate value ++ old) [] level
|
|
where
|
|
generate value = takeWhile (/= empty) $
|
|
parMap rpar (semiUnion value) (tail $ List.dropWhile (/= value) level) -- FIXME: this could be a better strategy
|
|
isSize (ItemSet set) = Set.size set
|
|
|
|
singletons :: [ItemSet] -> [Item]
|
|
singletons table = Set.toList $ foldr union (Set.fromList []) table
|
|
where
|
|
union (ItemSet row) old = old `Set.union` row
|
|
|
|
frequentPatterns :: Frequency -> [ItemSet] -> [[ItemSet]]
|
|
frequentPatterns thresh table = until (\x -> [] == head x)
|
|
(\x -> filterByFrequency (generateNextLevel (head x)) : x) [firstLevel]
|
|
where
|
|
firstLevel = filterByFrequency $ map (\x -> ItemSet $ Set.fromList [x]) $
|
|
trace "Generated Singletons" (singletons table)
|
|
filterByFrequency = filter (\x -> frequency table x >= thresh)
|