package ca.pfv.spmf.algorithms.frequentpatterns.clostream;
/* This file is copyright (c) 2008-2013 Philippe Fournier-Viger
*
* This file is part of the SPMF DATA MINING SOFTWARE
* (http://www.philippe-fournier-viger.com/spmf).
*
* SPMF is free software: you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* SPMF is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. See the GNU General Public License for more details.
* You should have received a copy of the GNU General Public License along with
* SPMF. If not, see <http://www.gnu.org/licenses/>.
*/
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import ca.pfv.spmf.patterns.itemset_array_integers_with_count.Itemset;
/**
* This is an implementation of the CloStream algorithm for mining
* closed itemsets from a stream as proposed
* by S.J Yen et al.(2009)
* in the proceedings of the IEA-AIE 2009 conference, pp.773.
* <br/><br/>
*
* It is a very simple algorithm that do not use a minimum support threshold.
* It thus finds all closed itemsets.
*
*@see Itemset
*@author Philippe Fournier-Viger
*/
public class AlgoCloSteam {
// a table to store the closed itemsets
List<Itemset> tableClosed = new ArrayList<Itemset>();
//
Map<Integer, List<Integer>> cidListMap = new HashMap<Integer, List<Integer>>();
/**
* Constructor that also initialize the algorithm
*/
public AlgoCloSteam() {
// create the empty set with a support of 0
Itemset emptySet = new Itemset(new int[] {});
emptySet.setAbsoluteSupport(0);
// add the empty set in the list of closed sets
tableClosed.add(emptySet);
}
/**
* This method process a new transaction from a stream to update
* the set of closed itemsets.
* @param transaction a transaction (Itemset)
*/
public void processNewTransaction(Itemset transaction){
// a temporary table (as described in the paper) to
// associate itemsets with cids.
Map<Itemset, Integer> tableTemp = new HashMap<Itemset, Integer>();
// Line 02 of the pseudocode in the article
// We add the transaction in a temporary table
tableTemp.put(transaction, 0);
// Line 03 of the pseudocode in the article
// Create a set to store the combined cidlist of items in the transaction
Set<Integer> cidset = new HashSet<Integer>();
// For each item in the transaction
for(Integer item : transaction.getItems()){
// get the cid list of that item
List<Integer> cidlist = cidListMap.get(item);
if(cidlist != null){
// add the cid list to the combined cid list
cidset.addAll(cidlist);
}
}
// Line 04 of the pseudocode in the article
// For each cid in the combined set of cids
for(Integer cid : cidset){
// Get the closed itemset corresponding to this cid
Itemset cti = tableClosed.get(cid);
// create the intersection of this closed itemset
// and the transaction.
Itemset intersectionS = (Itemset) transaction.intersection(cti);
// Check if the intersection calculated in the previous step is in Temp
boolean found = false;
// for each entry in temp
for(Map.Entry<Itemset, Integer> entry : tableTemp.entrySet()){
// if it is equal to the intersection
if(entry.getKey().isEqualTo(intersectionS)){
// we found it
found = true;
// Get the corresponding closed itemsetitemset
Itemset ctt = tableClosed.get(entry.getValue());
// if the support of cti is higher than ctt
if(cti.getAbsoluteSupport() > ctt.getAbsoluteSupport()){
// set the value as "cid".
entry.setValue(cid);
}
break;
}
}
// If the search was unsuccessful
if(found == false){
// add the instersection to the temporary table with "cid".
tableTemp.put(intersectionS, cid);
}
}
// For each entry in the temporary table
for(Map.Entry<Itemset, Integer> xc : tableTemp.entrySet()){
// get the itemset
Itemset x = xc.getKey();
// get the cid
Integer c = xc.getValue();
// get the closed itemset for that cid
Itemset ctc = tableClosed.get(c);
// if the itemset is the same as the closed itemset
if(x.isEqualTo(ctc)){
// we have to increase its support
ctc.increaseTransactionCount();
}else{
// otherwise the itemset "x" is added to the table of closed itemsets
tableClosed.add(x);
// its support count is set to the support of ctc + 1.
x.setAbsoluteSupport(ctc.getAbsoluteSupport()+1);
// Finally, we loop over each item in the transaction again
for(Integer item : transaction.getItems()){
// we get the cidlist of the current item
List<Integer> cidlist = cidListMap.get(item);
// if null
if(cidlist == null){
cidlist = new ArrayList<Integer>();
// we create one
cidListMap.put(item, cidlist);
}
// then we add x to the cidlist
cidlist.add(tableClosed.size()-1);
}
}
}
}
/**
* Get the current list of closed itemsets without the empty set.
* @return a List of closed itemsets
*/
public List<Itemset> getClosedItemsets() {
// if the empty set is here
if(tableClosed.get(0).size() ==0){
// remove it
tableClosed.remove(0);
}
// return the remaining closed itemsets
return tableClosed;
}
}