From 24d0dcba96a320e6f76582c191a3d28bd57eec66 Mon Sep 17 00:00:00 2001 From: IGI-111 Date: Sat, 4 Apr 2015 02:15:06 +0200 Subject: [PATCH] reworked modules to allow for multiple executables --- .gitignore | 8 +++--- DataModel.hs | 39 +++++++++++++++++++++++++++++ Apriori.hs => FrequentPatterns.hs | 41 +++++++------------------------ Main.hs => phase1.hs | 9 +++---- 4 files changed, 56 insertions(+), 41 deletions(-) create mode 100644 DataModel.hs rename Apriori.hs => FrequentPatterns.hs (56%) rename Main.hs => phase1.hs (83%) diff --git a/.gitignore b/.gitignore index 930ed00..dcff96a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ -*.hi -*.o -Main -*.csv +#Ignore anything but haskell source and shell scripts +* +!*.hs +!*.sh diff --git a/DataModel.hs b/DataModel.hs new file mode 100644 index 0000000..eff9c54 --- /dev/null +++ b/DataModel.hs @@ -0,0 +1,39 @@ +module DataModel where +import Data.Set (Set) +import qualified Data.Set as Set + +type Count = Int + +type Frequency = Double +type Confidence = Double + +class Freq a where + frequency :: [ItemSet] -> a -> Frequency + +data Item = Item String deriving (Eq, Ord) +instance Show Item where + show (Item s) = s --"Item " ++ s + +data ItemSet = ItemSet (Set Item) deriving (Eq, Ord) + +instance Show ItemSet where + show (ItemSet x) = foldr ((\y old -> y ++ " " ++ old).show) "" (Set.toList x) + +instance Freq ItemSet where + frequency table (ItemSet set) = setCount / fromIntegral (length table) where + setCount = fromIntegral $ count table (ItemSet set) + +count :: [ItemSet] -> ItemSet -> Count +count table (ItemSet set) = length (filter (\(ItemSet row) -> set `Set.isSubsetOf` row) table) + +data Rule = Rule ItemSet ItemSet deriving (Eq) + +instance Show Rule where + show (Rule a b) = show a ++ "-> " ++ show b + +instance Freq Rule where + frequency table (Rule (ItemSet set1) (ItemSet set2)) = frequency table $ ItemSet (set1 `Set.union` set2) + +confidence :: [ItemSet] -> Rule -> Confidence +confidence table (Rule x y) = frequency table (Rule x y) / frequency table x + diff --git a/Apriori.hs b/FrequentPatterns.hs similarity index 56% rename from Apriori.hs rename to FrequentPatterns.hs index 45e1e2d..1ee7eeb 100644 --- a/Apriori.hs +++ b/FrequentPatterns.hs @@ -1,30 +1,10 @@ -module Apriori where - -import Data.Set (Set) +module FrequentPatterns ( + frequentPatterns +) where +import DataModel import qualified Data.Set as Set -import qualified Data.List as List import Debug.Trace (traceShow) - -data Item = Item String deriving (Eq, Ord) -instance Show Item where - show (Item s) = s --"Item " ++ s - -data ItemSet = ItemSet (Set Item) deriving (Eq, Ord) -instance Show ItemSet where - show (ItemSet x) = foldr ((\y old -> y ++ " " ++ old).show) "" (Set.toList x) - -data Rule = Rule ItemSet ItemSet deriving (Eq) -instance Show Rule where - show (Rule a b) = show a ++ "-> " ++ show b - -type Frequency = Double - -type Count = Int - -frequency :: [ItemSet] -> ItemSet -> Frequency -frequency table (ItemSet set) = setCount / fromIntegral (length table) where - setCount = fromIntegral $ count table (ItemSet set) - +import qualified Data.List as List semiUnion :: ItemSet -> ItemSet -> ItemSet semiUnion (ItemSet set1) (ItemSet set2) = ItemSet (if max1 <= max2 && Set.delete max1 set1 == Set.delete max2 set2 then set1 `Set.union` set2 else Set.empty) where @@ -32,10 +12,10 @@ semiUnion (ItemSet set1) (ItemSet set2) = ItemSet (if max1 <= max2 && Set.delete max2 = Set.findMax set2 -- generate all possible combinations from a set of singletons -generateLevels :: [Item] -> [[ItemSet]] -generateLevels singles = until (\x -> head x == lastLevel) (\x -> generateNextLevel (head x) : x) [firstLevel] where - firstLevel = map (\x -> ItemSet $ Set.fromList [x]) singles - lastLevel = [ItemSet $ Set.fromList singles] +-- generateLevels :: [Item] -> [[ItemSet]] +-- generateLevels singles = until (\x -> head x == lastLevel) (\x -> generateNextLevel (head x) : x) [firstLevel] where +-- firstLevel = map (\x -> ItemSet $ Set.fromList [x]) singles +-- lastLevel = [ItemSet $ Set.fromList singles] -- generate the next level in a bottom-up route generateNextLevel :: [ItemSet] -> [ItemSet] @@ -44,9 +24,6 @@ generateNextLevel level = traceShow ("Computing level " ++ show (isSize (head le empty = ItemSet $ Set.fromList [] isSize (ItemSet set) = Set.size set -count :: [ItemSet] -> ItemSet -> Count -count table (ItemSet set) = length (filter (\(ItemSet row) -> set `Set.isSubsetOf` row) table) - singletons :: [ItemSet] -> [Item] singletons table = Set.toList $ foldr (\(ItemSet row) old -> old `Set.union` row) (Set.fromList []) table where diff --git a/Main.hs b/phase1.hs similarity index 83% rename from Main.hs rename to phase1.hs index 011200c..74eb09d 100644 --- a/Main.hs +++ b/phase1.hs @@ -1,10 +1,9 @@ -module Main where - import CSVParser -import Apriori +import FrequentPatterns +import DataModel import qualified Data.Set as Set -import System.Environment (getArgs) -import Control.Monad +import System.Environment(getArgs) +import Control.Monad(when) main :: IO() main = do