package ca.pfv.spmf.algorithms.sequentialpatterns.prefixSpan_AGP;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import ca.pfv.spmf.algorithms.sequentialpatterns.prefixSpan_AGP.items.Item;
import ca.pfv.spmf.algorithms.sequentialpatterns.prefixSpan_AGP.items.Pair;
import ca.pfv.spmf.algorithms.sequentialpatterns.prefixSpan_AGP.items.PseudoSequence;
import ca.pfv.spmf.algorithms.sequentialpatterns.prefixSpan_AGP.items.PseudoSequenceDatabase;
import ca.pfv.spmf.algorithms.sequentialpatterns.prefixSpan_AGP.items.abstractions.Abstraction_Generic;
import ca.pfv.spmf.algorithms.sequentialpatterns.prefixSpan_AGP.items.abstractions.ItemAbstractionPair;
import ca.pfv.spmf.algorithms.sequentialpatterns.prefixSpan_AGP.items.creators.AbstractionCreator;
import ca.pfv.spmf.algorithms.sequentialpatterns.prefixSpan_AGP.items.creators.ItemAbstractionPairCreator;
import ca.pfv.spmf.algorithms.sequentialpatterns.prefixSpan_AGP.items.patterns.Pattern;
import ca.pfv.spmf.algorithms.sequentialpatterns.prefixSpan_AGP.savers.Saver;
/**
* This is an the real execution of PrefixSpan algorithm.
* The main methods of this class are called from class AlgoPrefixSpan_AGP, and
* the main loop of the algorithm is executed here.
*
* NOTE: This implementation saves the pattern to a file as soon
* as they are found or can keep the pattern into memory, depending
* on what the user choose.
*
* Copyright Antonio Gomariz Peñalver 2013
*
* This file is part of the SPMF DATA MINING SOFTWARE
* (http://www.philippe-fournier-viger.com/spmf).
*
* SPMF is free software: you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* SPMF is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with
* SPMF. If not, see <http://www.gnu.org/licenses/>.
*
* @author agomariz
*/
class RecursionPrefixSpan_AGP {
/**
* Abstraction creator
*/
private AbstractionCreator abstractionCreator;
/**
* Saver, got from Class AlgoPrefixSpan where the user has already chosen
* where he wants to keep the results.
*/
private Saver saver;
/**
* absolute minimum support.
*/
private long minSupportAbsolute;
/**
* Original pseudosequence database (without infrequent items)
*/
private PseudoSequenceDatabase pseudoDatabase;
/**
* Map which match the frequent items with their appearances
*/
private Map<Item, BitSet> mapSequenceID;
/**
* Number of frequent items found by PrefixSpan
*/
private int numberOfFrequentPatterns = 0;
/**
* Standard constructor
* @param abstractionCreator the abstraction creator
* @param saver The saver for correctly save the results where the user wants
* @param minSupportAbsolute The absolue minimum support
* @param pseudoDatabase The original pseudoSequence database (without frequent items)
* @param mapSequenceID Map which match the frequent items with their appearances
*/
public RecursionPrefixSpan_AGP(AbstractionCreator abstractionCreator, Saver saver, long minSupportAbsolute, PseudoSequenceDatabase pseudoDatabase, Map<Item, BitSet> mapSequenceID) {
this.abstractionCreator = abstractionCreator;
this.saver = saver;
this.minSupportAbsolute = minSupportAbsolute;
this.pseudoDatabase = pseudoDatabase;
this.mapSequenceID = mapSequenceID;
}
/**
* It executes the actual PrefixSpan Algorithm
* @param keepPatterns Flag indicating if the user wants to keep the results
* or he is just interested in the number of frequent patterns
* @param verbose Flag for debugging purposes
*/
public void execute(boolean keepPatterns, boolean verbose) {
//We get all the frequent items and we sort them
List<Item> keySetList = new ArrayList<Item>(mapSequenceID.keySet());
Collections.sort(keySetList);
if (verbose) {
System.out.println(keySetList.size() + " frequent items");
}
int numberOfFrequentItems = keySetList.size();
int cont = 0;
//For each frequent item
for (Item item : keySetList) {
cont++;
if (verbose) {
System.out.println("Projecting item = " + item + " (" + cont + "/" + numberOfFrequentItems + ")");
}
// We make a projection in the original database
PseudoSequenceDatabase projectedContext = makePseudoProjections(item, pseudoDatabase, abstractionCreator.CreateDefaultAbstraction(), true);
// And we create a new 1-pattern with that frequent item
ItemAbstractionPair pair = new ItemAbstractionPair(item, abstractionCreator.CreateDefaultAbstraction());
Pattern prefix = new Pattern(pair);
//And we insert it its appearances
prefix.setAppearingIn((BitSet) ((mapSequenceID.get(item).clone())));
if (keepPatterns) {
//We keep the 1-patterns if the flag is active
saver.savePattern(prefix);
}
//We update the number of frequent patterns
numberOfFrequentPatterns++;
if (projectedContext != null && projectedContext.size() >= minSupportAbsolute) {
//And we call the main loop
prefixSpanLoop(prefix, 2, projectedContext, keepPatterns, verbose);
}
}
}
/**
* It projects the database given as parameter
* @param item The item from which we make the projection
* @param database The database where we make the projection
* @param abstraction Abstraction associated with the item to project
* @param firstTime Flag that points out if it the first time that
* @return The new projected database
*/
private PseudoSequenceDatabase makePseudoProjections(Item item, PseudoSequenceDatabase database, Abstraction_Generic abstraction, boolean firstTime) {
// The projected pseudo-database
PseudoSequenceDatabase newProjectedDatabase = new PseudoSequenceDatabase();
List<PseudoSequence> pseudoSequences = database.getPseudoSequences();
for (int sequenceIndex = 0; sequenceIndex < pseudoSequences.size(); sequenceIndex++) { // for each sequence
PseudoSequence sequence = pseudoSequences.get(sequenceIndex);
/* We guess the maximum size that our new projected database can
* achieve. If its number of potential sequences is less than the
* minimum support, we can stop projecting
*/
int potentialSize = newProjectedDatabase.size() + pseudoSequences.size() - sequenceIndex;
if (potentialSize < minSupportAbsolute) {
return null;
}
/*Flag indicating if the current sequence has already been projected
* for the new projected database
*/
boolean alreadyProjected = false;
//Initialization of the new projected sequence for the current one
PseudoSequence newSequence = null;
//Initialization of the number of projections done in the current sequence
int numberOfProjections = 0;
//Set keeping the projections already done
Set<Integer> projectionsAlreadyMade = new HashSet<Integer>();
//For all the existing projections in the current sequence
for (int k = 0; k < sequence.numberOfProjectionsIncluded(); k++) {
int sequenceSize = sequence.size(k);
// for each itemset of the sequence
for (int i = 0; i < sequenceSize; i++) {
// we get the index ofthe given item to project in current the itemset
int index = sequence.indexOf(k, i, item);
//If the item has been found and either is the first projection or the method compute is true
if (index != -1 && (firstTime || (abstraction.compute(sequence, k, i)))) {
int itemsetSize = sequence.getSizeOfItemsetAt(k, i);
// if the found item is not the last item of the itemset
if (index != itemsetSize - 1) {
//If this sequence has not been yet projected
if (!alreadyProjected) {
//A new pseudosequence is created starting from the next point to the found item
newSequence = new PseudoSequence(sequence.getRelativeTimeStamp(i, k), sequence, i, index + 1, k);
//We keep the projection point
projectionsAlreadyMade.add(sequence.getFirstItemset(k) + i);
//If the new pseudosequence has more than one item
if (newSequence.size(numberOfProjections) > 0) {
//we increase the number of projections
numberOfProjections++;
//And we add the new projected sequence to the new database
newProjectedDatabase.addSequence(newSequence);
}
/*We set the flag to true, indicating that the
* current sequence has been already projected
*/
alreadyProjected = true;
} else {
/*If the sequence is already projected and the
projection point has not been previously used*/
if (projectionsAlreadyMade.add(sequence.getFirstItemset(k) + i)) {
/*We make another projection in the same
* sequence previously projected, adding a
* new projection point*/
newSequence.addProjectionPoint(k, sequence.getRelativeTimeStamp(i, k), sequence, i, index + 1);
}
}
/* if the found item is the last item of the sequence
* and the item where it is, it is not the last itemset
* of the sequence*/
} else if ((i != sequenceSize - 1)) {
//and has not been yet projected
if (!alreadyProjected) {
/*We create a new projected sequence starting
* in the next itemset to where the item appeared*/
newSequence = new PseudoSequence(sequence.getRelativeTimeStamp(i, k), sequence, i + 1, 0, k);
//And we count the projection
projectionsAlreadyMade.add(sequence.getFirstItemset(k) + i);
//If there is any item in the new sequence
if (itemsetSize > 0 && newSequence.size(numberOfProjections) > 0) {
//we increase the number of projections
numberOfProjections++;
//And we add the new projected sequence to the new database
newProjectedDatabase.addSequence(newSequence);
}
/*We set the flag to true, indicating that the
* current sequence has been already projected
*/
alreadyProjected = true;
} else {
/*If the sequence is already projected and the
projection point has not been previously used*/
if (projectionsAlreadyMade.add(sequence.getFirstItemset(k) + i)) {
/*We make another projection in the same
* sequence previously projected, adding a
* new projection point*/
newSequence.addProjectionPoint(k, sequence.getRelativeTimeStamp(i, k), sequence, i + 1, 0);
}
}
}
}
}
}
}
return newProjectedDatabase;
}
/**
* Method that executes the main loop of prefixSpan for all the patterns
* with a size greater than 1
* @param prefix prefix from which we made the projected database and where
* the frequent items that we find will be added
* @param k size of patterns that are going to be generated
* @param context prefix-projected databases
* @param keepPatterns flag indicating if we want to keep the output or we
* are interesting in just the number of frequent patterns
* @param verbose flag for debuggin purposes
*/
private void prefixSpanLoop(Pattern prefix, int k, PseudoSequenceDatabase context, boolean keepPatterns, boolean verbose) {
// find frequent items that appear in the given pseudosequence database.
Set<Pair> pairs = abstractionCreator.findAllFrequentPairs(context.getPseudoSequences());
ItemAbstractionPairCreator pairCreator = ItemAbstractionPairCreator.getInstance();
if (verbose) {
StringBuilder tab = new StringBuilder();
for (int i = 0; i < k - 2; i++) {
tab.append('\t');
}
System.out.println(tab + "Projecting prefix = " + prefix);
System.out.print(tab + "\tFound " + pairs.size() + " frequent items in this projection\n");
}
// For each pair found,
for (Pair pair : pairs) {
// if the item is frequent.
if (pair.getSupport() >= minSupportAbsolute) {
// create the new pattern
Pattern newPrefix = prefix.clonePattern();
ItemAbstractionPair newPair = pairCreator.getItemAbstractionPair(pair.getPair().getItem(), abstractionCreator.createAbstractionFromAPrefix(prefix, pair.getPair().getAbstraction()));
newPrefix.add(newPair);
// build the projected database with respect to this frequent item (the item which forms the prefix)
PseudoSequenceDatabase projection = makePseudoProjections(pair.getPair().getItem(), context, pair.getPair().getAbstraction(), false);
// We add its set of sequences where the prefix appear
newPrefix.setAppearingIn((BitSet) (pair.getSequencesID().clone()));
// if the flag of keeping patterns if active, we keep this new pattern
if (keepPatterns) {
saver.savePattern(newPrefix);
}
//update the number of frequent patterns
numberOfFrequentPatterns++;
//If the projection exists and has more sequences than the absolute minimum support
if (projection != null && projection.size() >= minSupportAbsolute) {
//We make a recursive call to the main method
prefixSpanLoop(newPrefix, k + 1, projection, keepPatterns, verbose); // r�cursion
}
}
}
}
/**
* It returns the number of frequent patterns.
* @return the number of frequent patterns.
*/
public int numberOfFrequentPatterns() {
return numberOfFrequentPatterns;
}
/**
* It clears the attributes of this class.
*/
public void clear() {
if (saver != null) {
saver.clear();
saver = null;
}
if (pseudoDatabase != null) {
pseudoDatabase.clear();
pseudoDatabase = null;
}
if (mapSequenceID != null) {
mapSequenceID.clear();
mapSequenceID = null;
}
}
}