package ca.pfv.spmf.algorithms.sequentialpatterns.clasp_AGP;
import java.io.IOException;
import java.util.List;
import java.util.Map.Entry;
import ca.pfv.spmf.algorithms.sequentialpatterns.clasp_AGP.dataStructures.creators.AbstractionCreator;
import ca.pfv.spmf.algorithms.sequentialpatterns.clasp_AGP.dataStructures.database.SequenceDatabase;
import ca.pfv.spmf.algorithms.sequentialpatterns.clasp_AGP.dataStructures.patterns.Pattern;
import ca.pfv.spmf.algorithms.sequentialpatterns.clasp_AGP.savers.Saver;
import ca.pfv.spmf.algorithms.sequentialpatterns.clasp_AGP.savers.SaverIntoFile;
import ca.pfv.spmf.algorithms.sequentialpatterns.clasp_AGP.savers.SaverIntoMemory;
import ca.pfv.spmf.algorithms.sequentialpatterns.clasp_AGP.tries.ShowTrie;
import ca.pfv.spmf.algorithms.sequentialpatterns.clasp_AGP.tries.Trie;
import ca.pfv.spmf.tools.MemoryLogger;
/**
* This is an implementation of the ClaSP algorithm. ClaSP was proposed by A.
* Gomariz et al. in 2013.
*
* NOTE: This implementation saves the pattern to a file as soon as they are
* found or can keep the pattern into memory, depending on what the user choose.
*
* Copyright Antonio Gomariz PeƱalver 2013
*
* This file is part of the SPMF DATA MINING SOFTWARE
* (http://www.philippe-fournier-viger.com/spmf).
*
* SPMF is free software: you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* SPMF is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with
* SPMF. If not, see <http://www.gnu.org/licenses/>.
*
* @author agomariz
*/
public class AlgoClaSP {
/**
* The absolute minimum support threshold, i.e. the minimum number of
* sequences where the patterns have to be
*/
protected double minSupAbsolute;
/**
* Saver variable to decide where the user want to save the results, if it
* the case
*/
Saver saver = null;
/**
* Start and End points in order to calculate the overall time taken by the
* algorithm
*/
public long overallStart, overallEnd;
/**
* Start and End points in order to calculate the time taken by the main
* part of CloSpan algorithm
*/
protected long mainMethodStart, mainMethodEnd;
/**
* Start and End points in order to calculate the time taken by the
* post-processing method of CloSpan algorithm
*/
protected long postProcessingStart, postProcessingEnd;
/**
* Trie root that starts with the empty pattern and from which we will be
* able to access to all the frequent patterns generated by ClaSP
*/
protected Trie frequentAtomsTrie;
/**
* The abstraction creator
*/
private AbstractionCreator abstractionCreator;
/**
* Number of frequent patterns found by the algorithm
*/
private int numberOfFrequentPatterns;
/**
* flag to indicate if we are interesting in only finding the closed
* sequences
*/
private boolean findClosedPatterns;
/**
* flag to indicate if we are interesting in only finding the closed sequence
* through the postprocessing step
*/
private boolean executePruningMethods;
public long joinCount; // PFV 2013
/**
* Constructor of the class that calls ClaSP algorithm.
*
* @param minSupAbsolute Absolute minimum support
* @param abstractionCreator the abstraction creator
* @param findClosedPatterns flag to indicate if we are interesting in only
*/
public AlgoClaSP(double minSupAbsolute, AbstractionCreator abstractionCreator, boolean findClosedPatterns, boolean executePruningMethods) {
this.minSupAbsolute = minSupAbsolute;
this.abstractionCreator = abstractionCreator;
this.findClosedPatterns = findClosedPatterns;
this.executePruningMethods = executePruningMethods;
}
/**
* Actual call to ClaSP algorithm. The output can be either kept or ignore.
* Whenever we choose to keep the patterns found, we can keep them in a file
* or in the main memory
*
* @param database Original database in where we want to search for the
* frequent patterns.
* @param keepPatterns Flag indicating if we want to keep the output or not
* @param verbose Flag for debugging purposes
* @param outputFilePath Path of the file in which we want to store the
* frequent patterns. If this value is null, we keep the patterns in the
* main memory. This argument is taken into account just when keepPatterns
* is activated.
* @param outputSequenceIdentifiers indicates if sequence ids should be output with each pattern found.
* @throws IOException
*/
public void runAlgorithm(SequenceDatabase database, boolean keepPatterns, boolean verbose, String outputFilePath, boolean outputSequenceIdentifiers) throws IOException {
//If we do no have any file path
if (outputFilePath == null) {
//The user wants to save the results in memory
saver = new SaverIntoMemory(outputSequenceIdentifiers);
} else {
//Otherwise, the user wants to save them in the given file
saver = new SaverIntoFile(outputFilePath, outputSequenceIdentifiers);
}
// reset the stats about memory usage
MemoryLogger.getInstance().reset();
//keeping the starting time
overallStart = System.currentTimeMillis();
//Starting ClaSP algorithm
claSP(database, (long) minSupAbsolute, keepPatterns, verbose, findClosedPatterns, executePruningMethods);
//keeping the ending time
overallEnd = System.currentTimeMillis();
//Search for frequent patterns: Finished
saver.finish();
//
}
/**
* The actual method for extracting frequent sequences.
*
* @param database The original database
* @param minSupAbsolute the absolute minimum support
* @param keepPatterns flag indicating if we are interested in keeping the
* output of the algorithm
* @param verbose Flag for debugging purposes
* @param
*/
protected void claSP(SequenceDatabase database, long minSupAbsolute, boolean keepPatterns, boolean verbose, boolean findClosedPatterns, boolean executePruningMethods) {
//We get the initial trie whose children are the frequent 1-patterns
frequentAtomsTrie = database.frequentItems();
database.clear();
database = null;
//Inizialitation of the class that is in charge of find the frequent patterns
FrequentPatternEnumeration_ClaSP frequentPatternEnumeration = new FrequentPatternEnumeration_ClaSP(abstractionCreator, minSupAbsolute,saver,findClosedPatterns,executePruningMethods);
this.mainMethodStart = System.currentTimeMillis();
//We dfsPruning the search
frequentPatternEnumeration.dfsPruning(new Pattern(), frequentAtomsTrie, verbose, null,null);
this.mainMethodEnd = System.currentTimeMillis();
//Once we had finished, we keep the number of frequent patterns that we found
numberOfFrequentPatterns = frequentPatternEnumeration.getFrequentPatterns();
// check the memory usage for statistics
MemoryLogger.getInstance().checkMemory();
if (verbose) {
System.out.println("ClaSP: The algorithm takes " + (mainMethodEnd - mainMethodStart) / 1000 + " seconds and finds " + numberOfFrequentPatterns + " patterns");
}
//If the we are interested in closed patterns, we dfsPruning the post-processing step
if (findClosedPatterns) {
// ShowTrie.showTree(this.getFrequentAtomsTrie());
List<Entry<Pattern, Trie>> outputPatternsFromMainMethod = frequentAtomsTrie.preorderTraversal(null);
this.postProcessingStart = System.currentTimeMillis();
frequentPatternEnumeration.removeNonClosedPatterns(outputPatternsFromMainMethod, keepPatterns);
this.postProcessingEnd = System.currentTimeMillis();
numberOfFrequentPatterns = frequentPatternEnumeration.getFrequentPatterns();
if (verbose) {
System.out.println("ClaSP:The post-processing algorithm to remove the non-Closed patterns takes " + (postProcessingEnd - postProcessingStart) / 1000 + " seconds and finds " + numberOfFrequentPatterns + " Closed patterns");
}
}else{
if(keepPatterns){
List<Entry<Pattern, Trie>> outputPatternsFromMainMethod = frequentAtomsTrie.preorderTraversal(null);
for(Entry<Pattern, Trie> p:outputPatternsFromMainMethod){
saver.savePattern(p.getKey());
}
}
}
numberOfFrequentPatterns = frequentPatternEnumeration.getFrequentPatterns();
frequentPatternEnumeration.clear();
// check the memory usage for statistics
MemoryLogger.getInstance().checkMemory();
joinCount = frequentPatternEnumeration.joinCount;
}
/**
* Method to show the outlined information about the search for frequent
* sequences by means of ClaSP algorithm
* @return
*/
public String printStatistics() {
StringBuilder r = new StringBuilder(200);
r.append("============= Algorithm - STATISTICS =============\n Total time ~ ");
r.append(getRunningTime());
r.append(" ms\n");
r.append(" Frequent sequences count : ");
r.append(numberOfFrequentPatterns);
r.append('\n');
r.append(" Join count : ");
r.append(joinCount);
r.append('\n');
r.append(" Max memory (mb):");
r.append(MemoryLogger.getInstance().getMaxMemory());
r.append('\n');
r.append(saver.print());
r.append("\n===================================================\n");
return r.toString();
}
public int getNumberOfFrequentPatterns() {
return numberOfFrequentPatterns;
}
/**
* It gets the time spent by the algoritm in its execution.
* @return
*/
public long getRunningTime() {
return (overallEnd - overallStart);
}
/**
* It clears all the attributes of AlgoClaSP class
*/
public void clear() {
frequentAtomsTrie.removeAll();
abstractionCreator = null;
}
/**
* Get the trie (internal structure used by ClaSP).
* @return the trie
*/
public Trie getFrequentAtomsTrie() {
return frequentAtomsTrie;
}
}