package ca.pfv.spmf.algorithms.frequentpatterns.eclat;
/* This file is copyright (c) 2008-2013 Philippe Fournier-Viger
*
* This file is part of the SPMF DATA MINING SOFTWARE
* (http://www.philippe-fournier-viger.com/spmf).
*
* SPMF is free software: you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* SPMF is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. See the GNU General Public License for more details.
* You should have received a copy of the GNU General Public License along with
* SPMF. If not, see <http://www.gnu.org/licenses/>.
*/
import java.util.BitSet;
import java.util.Map;
import ca.pfv.spmf.datastructures.triangularmatrix.TriangularMatrix;
import ca.pfv.spmf.input.transaction_database_list_integers.TransactionDatabase;
import ca.pfv.spmf.patterns.itemset_array_integers_with_tids_bitset.Itemset;
import ca.pfv.spmf.patterns.itemset_array_integers_with_tids_bitset.Itemsets;
import ca.pfv.spmf.tools.MemoryLogger;
/**
* This is an implementation of the dECLAT algorithm. The difference between DECLAT
* and ECLAT is that dECLAT utilizes diffsets instead of tidsets.
* In this implementation, diffsets are represented as bitsets.
* Note that this class is a subclass of the ECLAT algorithm because a lot of
* code is the same and we wanted to avoid redundancy.
* Note also that implementing diffsets using bitsets may not provide the optimal performance for
* dEclat since even if diffsets are smaller than tidsets, as a bitset, they may not be much
* smaller. It is thus recommended to try also the regular implementation
*
* IMPORTANT: dEClat returns Itemsets annotated with their diffsets
* rather than tidsets when the user choose to keep the result in memory.
*
* DEclat was proposed by ZAKI (2000).
* <br/><br/>
*
* See this article for details about ECLAT:
* <br/><br/>
*
* Zaki, M. J. (2000). Scalable algorithms for association mining. Knowledge and Data Engineering, IEEE Transactions on, 12(3), 372-390.
* <br/><br/>
*
* and: <br/><br/>
*
* M. J. Zaki and K. Gouda. Fast vertical mining using Diffsets. Technical Report 01-1, Computer Science
* Dept., Rensselaer Polytechnic Institute, March 2001.
*
* This version saves the result to a file
* or keep it into memory if no output path is provided
* by the user to the runAlgorithm method().
*
* @see TriangularMatrix
* @see TransactionDatabase
* @see Itemset
* @see Itemsets
* @author Philippe Fournier-Viger
*/
public class AlgoDEclat_Bitset extends AlgoEclat_Bitset{
/**
* Print statistics about the algorithm execution to System.out.
*/
public void printStats() {
System.out.println("============= DECLAT vALTERNATE-Bitset v0.96r6- STATS =============");
long temps = endTime - startTimestamp;
System.out.println(" Transactions count from database : " + database.size());
System.out.println(" Frequent itemsets count : " + itemsetCount);
System.out.println(" Total time ~ " + temps + " ms");
System.out.println(" Maximum memory usage : "
+ MemoryLogger.getInstance().getMaxMemory() + " mb");
System.out.println("===================================================");
}
/**
* This method scans the database to calculate the support of each single item
* @param database the transaction database
* @param mapItemTIDS a map to store the tidset corresponding to each item
* @return the maximum item id appearing in this database
*/
int calculateSupportSingleItems(TransactionDatabase database,
final Map<Integer, BitSetSupport> mapItemTIDS) {
// (1) First database pass : calculate diffsets of each item.
int maxItemId = 0;
// for each transaction
for (int i = 0; i < database.size(); i++) {
// Add the transaction id to the set of all transaction ids
// for each item in that transaction
// For each item
for (Integer item : database.getTransactions().get(i)) {
// Get the current tidset of that item
BitSetSupport tids = mapItemTIDS.get(item);
// If none, then we create one
if(tids == null){
tids = new BitSetSupport();
// For a new item, we sets all the bits of its diffset to true
tids.bitset.set(0, database.size(), true);
mapItemTIDS.put(item, tids);
// we remember the largest item seen until now
if (item > maxItemId) {
maxItemId = item;
}
}
//We set to false the bit corresponding to this transaction
// in the diffset of that item
tids.bitset.set(i, false);
// END DECLAT
// we increase the support of that item
tids.support++;
}
}
return maxItemId;
}
/**
* Perform the intersection of two diffsets for itemsets containing more than one item.
* @param tidsetI the first diffset
* @param tidsetJ the second diffset
* @return the resulting diffset and its support
*/
BitSetSupport performAND(BitSetSupport tidsetI, BitSetSupport tidsetJ) {
// Create the new diffset
BitSetSupport bitsetSupportIJ = new BitSetSupport();
// Calculate the diffset
bitsetSupportIJ.bitset = (BitSet)tidsetJ.bitset.clone();
bitsetSupportIJ.bitset.andNot(tidsetI.bitset);
// Calculate the support
bitsetSupportIJ.support = tidsetI.support - bitsetSupportIJ.bitset.cardinality();
// return the new diffset
return bitsetSupportIJ;
}
/**
* Perform the intersection of two diffsets representing single items.
* @param tidsetI the first diffset
* @param tidsetJ the second diffset
* @param supportIJ the support of the intersection (already known) so it does not need to
* be calculated again
* @return the resulting diffset and its support
*/
BitSetSupport performANDFirstTime(BitSetSupport tidsetI,
BitSetSupport tidsetJ, int supportIJ) {
// Create the new diffset and perform the logical AND to intersect the diffsets
BitSetSupport bitsetSupportIJ = new BitSetSupport();
//Calculate the diffset
bitsetSupportIJ.bitset = (BitSet)tidsetJ.bitset.clone();
bitsetSupportIJ.bitset.andNot(tidsetI.bitset);
// Calculate the support
bitsetSupportIJ.support = tidsetI.support - bitsetSupportIJ.bitset.cardinality();
// return the new tidset
return bitsetSupportIJ;
}
}