package ca.pfv.spmf.algorithms.frequentpatterns.eclat;
/* This file is copyright (c) 2008-2013 Philippe Fournier-Viger
*
* This file is part of the SPMF DATA MINING SOFTWARE
* (http://www.philippe-fournier-viger.com/spmf).
*
* SPMF is free software: you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* SPMF is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. See the GNU General Public License for more details.
* You should have received a copy of the GNU General Public License along with
* SPMF. If not, see <http://www.gnu.org/licenses/>.
*/
import java.util.HashSet;
import java.util.Set;
import ca.pfv.spmf.datastructures.triangularmatrix.TriangularMatrix;
import ca.pfv.spmf.input.transaction_database_list_integers.TransactionDatabase;
import ca.pfv.spmf.patterns.itemset_array_integers_with_count.Itemset;
import ca.pfv.spmf.patterns.itemset_array_integers_with_count.Itemsets;
import ca.pfv.spmf.tools.MemoryLogger;
/**
* This is a version of the dECLAT algorithm. It uses sets of integers to represent tidsets. It
* extends the class AlgoDEclat to avoid redundancy of common code.
* Note than unlike Eclat, dEclat returns itemsets annotated with diffsets instead of tidsets.
* About implementation details, note that this implementation uses tidsets initially for single items,
* then it uses diffsets starting from itemsets containing two itemsets (2-itemsets).
*
* See this article for details about dECLAT:
* <br/><br/>
*
* Zaki, M.J., Gouda, K.: Fast vertical mining using diffsets. Technical Report 01-1, Computer Science Dept., Rensselaer Polytechnic Institute (March 2001) 10
* <br/><br/>
*
* This version saves the result to a file
* or keep it into memory if no output path is provided
* by the user to the runAlgorithm method().
*
* @see TriangularMatrix
* @see TransactionDatabase
* @see Itemset
* @see Itemsets
* @author Philippe Fournier-Viger
*/
public class AlgoDEclat extends AlgoEclat{
/**
* Print statistics about the algorithm execution to System.out.
*/
public void printStats() {
System.out.println("============= dECLAT v0.96r6 - STATS =============");
long temps = endTime - startTimestamp;
System.out.println(" Transactions count from database : "
+ database.size());
System.out.println(" Frequent itemsets count : "
+ itemsetCount);
System.out.println(" Total time ~ " + temps + " ms");
System.out.println(" Maximum memory usage : "
+ MemoryLogger.getInstance().getMaxMemory() + " mb");
System.out.println("===================================================");
}
/**
* This method performs the calculation of a new diffset by merging two tidsets.
* Tidsets are used for single items and when we make 2-itemsets, we switch to diffsets.
* @param tidsetI the first tidset/diffset
* @param supportI the cardinality of the first tidset/diffset
* @param tidsetJ the second tidset/diffset
* @param supportJ the cardinality of the second tidset/diffset
* @return the resulting tidset.
*/
Set<Integer> performANDFirstTime(Set<Integer> tidsetI, int supportI,
Set<Integer> tidsetJ, int supportJ) {
// Create the new tidset that will store the difference
Set<Integer> diffsetIJ = new HashSet<Integer>();
// for each tid containing j
for(Integer tid : tidsetI) {
// if the transaction does not contain i, add it to the diffset
if(tidsetJ.contains(tid) == false) {
// add it to the intersection
diffsetIJ.add(tid);
}
}
// return the new tidset
return diffsetIJ;
}
/**
* This method performs the calculation of a new diffset by merging two tidsets/diffsets.
* @param tidsetI the first tidset/diffset
* @param supportI the cardinality of the first tidset/diffset
* @param tidsetJ the second tidset/diffset
* @param supportJ the cardinality of the second tidset/diffset
* @return the resulting tidset.
*/
Set<Integer> performAND(Set<Integer> tidsetI, int supportI,
Set<Integer> tidsetJ, int supportJ) {
// Create the new tidset that will store the difference
Set<Integer> diffsetIJ = new HashSet<Integer>();
// for each tid containing j
for(Integer tid : tidsetJ) {
// if the transaction does not contain i, add it to the diffset
if(tidsetI.contains(tid) == false) {
// add it to the intersection
diffsetIJ.add(tid);
}
}
// return the new tidset
return diffsetIJ;
}
/**
* Calculate the support of an itemset X using the tidset of X if the size = 1. Otherwise uses diffsets
* to calculate the support
* @param lengthOfX the length of the itemset X - 1 (used by dEclat)
* @param supportPrefix the support of the prefix (not used by Eclat, but used by dEclat).
* @param tidsetI the tidset of X
* @return the support
*/
int calculateSupport(int lengthOfX, int supportPrefix, Set<Integer> tidsetX) {
// if length of prefix = 1 then we are using tidsets
if(lengthOfX == 1) {
return tidsetX.size();
}else {
// otherwise we are using diffsets
return supportPrefix - tidsetX.size();
}
}
}