package ca.pfv.spmf.algorithms.sequentialpatterns.spade_spam_AGP;
import java.util.ArrayList;
import java.util.List;
import ca.pfv.spmf.algorithms.sequentialpatterns.spade_spam_AGP.dataStructures.abstractions.ItemAbstractionPair;
import ca.pfv.spmf.algorithms.sequentialpatterns.spade_spam_AGP.dataStructures.creators.AbstractionCreator_Qualitative;
import ca.pfv.spmf.algorithms.sequentialpatterns.spade_spam_AGP.dataStructures.creators.ItemAbstractionPairCreator;
import ca.pfv.spmf.algorithms.sequentialpatterns.spade_spam_AGP.dataStructures.patterns.Pattern;
import ca.pfv.spmf.algorithms.sequentialpatterns.spade_spam_AGP.dataStructures.patterns.PatternCreator;
import ca.pfv.spmf.algorithms.sequentialpatterns.spade_spam_AGP.idLists.IDList;
import ca.pfv.spmf.algorithms.sequentialpatterns.spade_spam_AGP.savers.Saver;
/**
* This is an implementation of the main methods of SPAM algorithm. We keep open
* the decision of which IdList to use. In the original paper, the authors use a
* bitmap implementation. We have such implementation (IDListFatBitmap) but we
* also have other two ones (both based on hash maps, one with bitsets (IDListBitmap)
* and another with arraylists (IDListStandard_Map)).
*
* NOTE: This implementation saves the pattern to a file as soon as they are
* found or can keep the pattern into memory, depending on what the user choose.
*
* Copyright Antonio Gomariz PeƱalver 2013
*
* This file is part of the SPMF DATA MINING SOFTWARE
* (http://www.philippe-fournier-viger.com/spmf).
*
* SPMF is free software: you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* SPMF is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with
* SPMF. If not, see <http://www.gnu.org/licenses/>.
*
* @author agomariz
*/
public class FrequentPatternEnumeration_SPAM{
/**
* The absolute minimum support threshold, i.e. the minimum number of
* sequences where the patterns have to be
*/
private double minSupportAbsolute;
/**
* Number of frequent patterns found by the algorithm. Initially set to zero.
*/
private int frequentPatterns = 0;
/**
* The pattern creator.
*/
private final PatternCreator patternCreator;
/**
* Saver variable to decide where the user want to save the results, if it
* the case
*/
private Saver saver;
/**
* Standard constructor of the class.
* @param minSupportAbsolute The absolute minimum support threshold
* @param saver Saver object to decide where the user want to save the results, if it
* the case
*/
public FrequentPatternEnumeration_SPAM(double minSupportAbsolute, Saver saver) {
this.minSupportAbsolute = minSupportAbsolute;
this.patternCreator = PatternCreator.getInstance();
this.saver=saver;
}
/**
* Execution of the search of frequent patterns.
* @param equivalenceClass The equivalence class from we start to search for.
* @param keepPatterns Flag to indicate if we want to keep the patterns found.
* @param verbose Flag for debugging purposes
*/
public void execute(EquivalenceClass equivalenceClass, boolean keepPatterns, boolean verbose) {
int numberOfMembersInEC = equivalenceClass.getClassMembers().size();
for (int i = 0; i < numberOfMembersInEC; i++) {
//For each member of the given equivalence class
EquivalenceClass ec = equivalenceClass.getIthMember(i);
//We call to the main method of the algorithm for that equivalence class
dfs_pruning(ec, equivalenceClass.getClassMembers(), equivalenceClass.getClassMembers(),i+1, keepPatterns);
}
}
/**
* Main method of SPAM algorithm. For each processed patterns, the algorithm
* tries to make a s-extension first, and then, once it found all the possible
* s-extension, it tries to make an i-extension. The method receives two set
* of equivalence classes, one with the frequent items that can be used for
* making s-extensions and another one for making i-extensions.
* @param currentClass The current class whose identifier we are trying to extend.
* @param sequenceExtensions The set of equivalence classes, with frequent
* items as identifiers, that we use as possible s-extensions of the current
* pattern that we are processing.
* @param itemsetsExtensions The set of equivalence classes, with frequent
* items as identifiers, that we use as possible i-extensions of the current
* pattern that we are processing.
* @param beginning The beginning index from where we can use the elements of
* itemsetsExtensions set
* @param keepPatterns Flag indicating if we are interesting in saving the
* frequent patterns that we find.
*/
private void dfs_pruning(EquivalenceClass currentClass, List<EquivalenceClass> sequenceExtensions, List<EquivalenceClass> itemsetsExtensions,int beginning, boolean keepPatterns) {
//We start increasing the number of frequent patterns
frequentPatterns++;
//We get the class identifier
Pattern classIdentifier = currentClass.getClassIdentifier();
//Initialization of new sets
List<EquivalenceClass> new_sequenceExtension = new ArrayList<EquivalenceClass>();
List<EquivalenceClass> new_itemsetExtension = new ArrayList<EquivalenceClass>();
List<EquivalenceClass> new_classes = new ArrayList<EquivalenceClass>();
//Clone for the class identifier
Pattern clone = classIdentifier.clonePattern();
//For all the elements of sequenceExtensions
for (EquivalenceClass eq : sequenceExtensions) {
//We create a new pattern based in the elements of the clone
Pattern extension = patternCreator.createPattern(new ArrayList<ItemAbstractionPair>(clone.getElements()));
//And we extend it with the only element of the eq class identifier
ItemAbstractionPair newPair = eq.getClassIdentifier().getLastElement();
extension.add(newPair);
/*
* We make the join operation between both patterns in order to know
* the appearances of the new pattern and its support.
*/
IDList newIdList = currentClass.getIdList().join(eq.getIdList(), false, (int) minSupportAbsolute);
//If the new pattern is frequent
if (newIdList.getSupport() >= minSupportAbsolute) {
//We insert it its appearances
newIdList.setAppearingSequences(extension);
// and we keep the pattern if the flag is activated
if (keepPatterns) {
saver.savePattern(extension);
}
//We create a new class for the new pattern
EquivalenceClass newClass = new EquivalenceClass(extension);
//we inserted the IdList that we computed
newClass.setIdList(newIdList);
//And we insert the new class in the set of new classes
new_classes.add(newClass);
/*Normally, in the original algorithm we would insert in the new
*set of sequence extensions the same eq class that we used for
* obtaining the new pattern. The problem with this is that if we
* do it, the Idlist it remains the same. Since a IdList is denser
* (with more values) with shorter patterns (because is easier to
* find an appearance in a sequence), we can put to the pattern
* of the equivalence class of eq, the new idlist generated. Note
* that for the items in future s-extensions of this new pattern,
* we can directly pass the IdList recently computed since all
* the appearances of eq identifier will be after an appearance of
* the current class.
* Therefore, in order to shrink the IdLists and their computations
* we create a new equivalence class with the same eq identifier,
* and the idlist recently created.
*/
EquivalenceClass newEq = new EquivalenceClass(eq.getClassIdentifier(), newIdList);
/* And we add this new class as a possible future s-extension of
* the new pattern.
*/
new_sequenceExtension.add(newEq);
}
}
int sequenceExtensionSize = new_sequenceExtension.size();
//For all the elements valuables as future s-extensions
for (int i = 0; i < sequenceExtensionSize; i++) {
//we get the new pattern
EquivalenceClass newClass = new_classes.get(i);
/* And we make a recursive call to dfs_pruning with the new sequence
* extension. Besides we establish the same set as the set which we will
* make the i-extensions, but beginning from the (i+1)-th element
*/
dfs_pruning(newClass, new_sequenceExtension, new_sequenceExtension,i+1, keepPatterns);
/* Once we had finished the search for this patterns and their children,
* we can to remove that class (and its desdendants) from the memory
*/
newClass.clear();
}
/* We clear the set of the new classes discovered since we need to store
* those that we will find making i-extensions
*/
new_classes.clear();
/*
* From the beginning index to the last equivalence class appearing in
* the itemset extension set
*/
for (int k = beginning; k < itemsetsExtensions.size(); k++) {
EquivalenceClass eq = itemsetsExtensions.get(k);
//We create a new pattern with the elements of the current class identifier
Pattern extension = patternCreator.createPattern(new ArrayList<ItemAbstractionPair>(clone.getElements()));
//And we add it the current item of itemset extension set
ItemAbstractionPair newPair = ItemAbstractionPairCreator.getInstance().getItemAbstractionPair(eq.getClassIdentifier().getLastElement().getItem(), AbstractionCreator_Qualitative.getInstance().createAbstraction(true));
extension.add(newPair);
/*
* We make the join operation between both patterns in order to know
* the appearances of the new pattern and its support.
*/
IDList newIdList = currentClass.getIdList().join(eq.getIdList(), true, (int) minSupportAbsolute);
//If the new pattern is frequent
if (newIdList.getSupport() >= minSupportAbsolute) {
//We insert it its appearances
newIdList.setAppearingSequences(extension);
// and we keep the pattern if the flag is activated
if (keepPatterns) {
saver.savePattern(extension);
}
//We create a new class for the new pattern
EquivalenceClass newClass = new EquivalenceClass(extension);
//we inserted the IdList that we computed
newClass.setIdList(newIdList);
//And we insert the new class in the set of new classes
new_classes.add(newClass);
/*Normally, in the original algorithm we would insert in the new
*set of itemset extensions the same eq class that we used for
* obtaining the new pattern. The problem with this is that if we
* do it, the Idlist it remains the same. Since a IdList is denser
* (with more values) with shorter patterns (because is easier to
* find an appearance in a sequence), we can put to the pattern
* of the equivalence class of eq, the new idlist generated. Note
* that for the items in future i-extensions of this new pattern,
* we can directly pass the IdList recently computed since all
* the appearances of eq identifier will be after an appearance of
* the current class.
* Therefore, in order to shrink the IdLists and their computations
* we create a new equivalence class with the same eq identifier,
* and the idlist recently created.
*/
EquivalenceClass newEq = new EquivalenceClass(eq.getClassIdentifier(), newIdList);
/* And we add this new class as a possible future s-extension of
* the new pattern.
*/
new_itemsetExtension.add(newEq);
}
}
int itemsetExtensionSize = new_itemsetExtension.size();
//For all the elements valuables as future i-extensions
for (int i = 0; i < itemsetExtensionSize; i++) {
//we get the new pattern
EquivalenceClass newClass = new_classes.get(i);
/* And we make a recursive call to dfs_pruning with the new itemset
* extension. The beginning of the itemset extension set will be
* starting from the (i+1)-th element.
*/
dfs_pruning(newClass, new_sequenceExtension, new_itemsetExtension,i+1, keepPatterns);
newClass.clear();
}
}
/**
* It returns the number of frequent patterns found by the last execution of
* the algorithm.
* @return the number of frequent patterns found.
*/
public int getFrequentPatterns() {
return frequentPatterns;
}
public void setFrequentPatterns(int frequentPatterns) {
this.frequentPatterns = frequentPatterns;
}
}