package ca.pfv.spmf.algorithms.sequential_rules.cmrules;
/* This file is copyright (c) 2008-2013 Philippe Fournier-Viger
*
* This file is part of the SPMF DATA MINING SOFTWARE
* (http://www.philippe-fournier-viger.com/spmf).
*
* SPMF is free software: you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* SPMF is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. See the GNU General Public License for more details.
* You should have received a copy of the GNU General Public License along with
* SPMF. If not, see <http://www.gnu.org/licenses/>.
*/
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import ca.pfv.spmf.datastructures.triangularmatrix.TriangularMatrix;
import ca.pfv.spmf.input.transaction_database_list_integers.TransactionDatabase;
import ca.pfv.spmf.patterns.itemset_array_integers_with_tids.Itemset;
import ca.pfv.spmf.patterns.itemset_array_integers_with_tids.Itemsets;
/**
* This is an implementation of the AprioriTID algorithm that is
* modified to be used with the CMRules algorithm. AprioriTID was
* proposed in:
* <br/><br/>
* Agrawal R, Srikant R. "Fast Algorithms for Mining Association Rules", VLDB. Sep 12-15 1994, Chile, 487-99,
* <br/><br/>
*
* The Apriori algorithm finds all the frequents itemsets and their support
* in a binary context.
*
* @author Philippe Fournier-Viger
*/
public class AlgoAprioriTID_forCMRules {
// frequent itemsets found by the algorithm
protected Itemsets frequentItemsets = new Itemsets("FREQUENT ITEMSETS");
// transaction database
protected TransactionDatabase database;
// the current level
protected int k;
// a triangular matrix for efficiently counting the support of pairs of items
// (received as parameter and optional)
TriangularMatrix matrix;
// the minimum support threshold
int minSuppRelative;
// Special parameter to set the maximum size of itemsets to be discovered
int maxItemsetSize = Integer.MAX_VALUE;
/**
* Constructor
* @param database the transaction database
* @param matrix the triangular matrix
*/
public AlgoAprioriTID_forCMRules(TransactionDatabase database, TriangularMatrix matrix) {
this.database = database;
this.matrix = matrix;
}
/**
* Run the algorithm.
* @param minsuppRelative the minimum support threshold
* @param listFrequentsSize1 the list of frequent itemsets of size 1
* @param mapItemCount a map of items (key) and their tidset (value).
* @return the frequent itemsets
*/
public Itemsets runAlgorithm(int minsuppRelative, List<Integer> listFrequentsSize1,
Map<Integer, Set<Integer>> mapItemCount) {
// save the minimum suppor threshold
this.minSuppRelative = minsuppRelative;
// To build level 1, we keep only the frequent candidates.
// We scan the database one time to calculate the support of each candidate.
k=1;
List<Itemset> level = createLevelWithFrequentItemsetsSize1(listFrequentsSize1, mapItemCount);
// Generate candidates with size k = 1 (all itemsets of size 1)
k = 2;
// While the level is not empty
while (!level.isEmpty() && k <= maxItemsetSize) {
// We build the level k+1 with all the candidates that have
// a support higher than the minsup threshold.
level = generateCandidateSizeK(level);;
k++;
}
// return frequent itemsets
return frequentItemsets; // Return all frequent itemsets found!
}
/**
* Generate frequents itemsets of size 1
* @param listFrequentsSize1 list of frequent items of size 1
* @param mapItemCount a map indicating the tidset (value) of each item (key)
* @return the itemsets of size1
*/
protected List<Itemset> createLevelWithFrequentItemsetsSize1(List<Integer> listFrequentsSize1, Map<Integer, Set<Integer>> mapItemCount) {
// create the structure to store itemsets of size 1
List<Itemset> levelK = new ArrayList<Itemset>();
// for each item in the list of frequent items
for(Integer item : listFrequentsSize1){
// create an itemset
Itemset itemset = new Itemset(item);
itemset.setTIDs(mapItemCount.get(item));
// add it to the level k that will be used for generating k+1 later on...
levelK.add(itemset);
// add the itemset to frequent itemsets
frequentItemsets.addItemset(itemset, k);
}
// return itemsets of size k
return levelK;
}
/**
* Generate candidate itemsets of size K by using itemsets of size k-1
* @param levelK_1 itemsets of size k-1
* @return candidates of size K
*/
protected List<Itemset> generateCandidateSizeK(List<Itemset> levelK_1) {
// a set to store candidates of size K
List<Itemset> candidates = new ArrayList<Itemset>();
// For each itemset I1 and I2 of level k-1
loop1: for(int i=0; i< levelK_1.size(); i++){
Itemset itemset1 = levelK_1.get(i);
loop2: for(int j=i+1; j< levelK_1.size(); j++){
Itemset itemset2 = levelK_1.get(j);
// we compare items of itemset1 and itemset2.
// If they have all the same k-1 items and the last item of
// itemset1 is smaller than
// the last item of itemset2, we will combine them to generate a
// candidate
for(int k=0; k< itemset1.size(); k++){
// if they are the last items
if(k == itemset1.size()-1){
// the one from itemset1 should be smaller (lexical order)
// and different from the one of itemset2
if(itemset1.getItems()[k] >= itemset2.get(k)){
continue loop1;
}
}
// if the k-th items is smalle rinn itemset1
else if(itemset1.getItems()[k] < itemset2.get(k)){
continue loop2; // we continue searching
}
else if(itemset1.getItems()[k]> itemset2.get(k)){
continue loop1; // we stop searching: because of lexical order
}
}
// create list of common tids
Set<Integer> list = new HashSet<Integer>();
// for each tid from the tidset of itemset1
for(Integer val1 : itemset1.getTransactionsIds()){
// if it appears also in the tidset of itemset2
if(itemset2.getTransactionsIds().contains(val1)){
// add it to common tids
list.add(val1);
}
}
// if the combination of itemset1 and itemset2 is frequent
if(list.size() >= minSuppRelative){
// Create a new candidate by combining itemset1 and itemset2
int newItemset[] = new int[itemset1.size()+1];
System.arraycopy(itemset1.itemset, 0, newItemset, 0, itemset1.size());
newItemset[itemset1.size()] = itemset2.getItems()[itemset2.size() -1];
Itemset candidate = new Itemset(newItemset);
candidate.setTIDs(list);
// add it to the list of candidates
candidates.add(candidate);
// add it to the list of frequent itemsets
frequentItemsets.addItemset(candidate, k);
}
}
}
// return the list of candidates
return candidates;
}
/**
* Get the frequent itemsets.
* @return an object Itemsets containing the frequent itemsets.
*/
public Itemsets getItemsets() {
return frequentItemsets;
}
/**
* Set the maximum frequent itemset size to be discovered.
* @param maxItemsetSize the maximum size as a int.
*/
public void setMaxItemsetSize(int maxItemsetSize) {
this.maxItemsetSize = maxItemsetSize;
}
}