package ca.pfv.spmf.algorithms.sequentialpatterns.spam;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import ca.pfv.spmf.patterns.itemset_list_integers_without_support.Itemset;
import ca.pfv.spmf.tools.MemoryLogger;
/**
* *
* This is an implementation of the VGEN algorithm.
* <br/><br/>
*
* Copyright (c) 2014 Philippe Fournier-Viger, Antonio Gomariz
* <br/><br/>
*
* This file is part of the SPMF DATA MINING SOFTWARE
* (http://www.philippe-fournier-viger.com/spmf).
* <br/><br/>
*
* SPMF is free software: you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
* <br/><br/>
*
* SPMF is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. See the GNU General Public License for more details.
* <br/><br/>
*
* You should have received a copy of the GNU General Public License along with
* SPMF. If not, see <http://www.gnu.org/licenses/>.
*
* @see Bitmap
* @see PrefixVGEN
* @see PatternVGEN
* @author Philippe Fournier-Viger & Antonio Gomariz
*/
public class AlgoVGEN {
// for statistics
public long startTime;
public long endTime;
public int patternCount;
// minsup
private int minsup = 0;
// object to write to a file
BufferedWriter writer = null;
// Vertical database
Map<Integer, Bitmap> verticalDB = new HashMap<Integer, Bitmap>();
// List indicating the number of bits per sequence
List<Integer> sequencesSize = null;
int lastBitIndex = 0; // the last bit position that is used in bitmaps
// maximum pattern length in terms of item count
private int maximumPatternLength = Integer.MAX_VALUE;
// Map: key: item value: another item that followed the first item + support
// (could be replaced with a triangular matrix...)
Map<Integer, Map<Integer, Integer>> coocMapAfter = null;
Map<Integer, Map<Integer, Integer>> coocMapEquals = null;
// Map indicating for each item, the smallest tid containing this item
// in a sequence.
boolean useCMAPPruning = true;
// ========= VARIABLES THAT ARE SPECIFIC TO VGEN ===================
// GENERATOR PATTERNS - The list contains patterns of size k at position k in the list.
// A map has the sum of sids as key and lists of patterns as value.
List<Map<Integer, List<PatternVGEN>>> generatorPatterns = null;
// variables to enable/disable strategies
private boolean useImmediateBackwardChecking = true;
private boolean useBackwardPruning = false;
// if enabled, the result will be verified to see if some patterns found are not generators.
boolean DEBUG_MODE = false;
// the number of transaction in the database (to calculate the support of the empty set)
int transactionCount = 0;
//========= END OF VARIABLES THAT ARE SPECIFIC TO VGEN ===================
// the max gap between two itemsets of a pattern.
// It is an optional parameter that the user can set.
private int maxGap = Integer.MAX_VALUE;
/**
* Default constructor
*/
public AlgoVGEN() {
}
/**
* Method to run the algorithm
*
* @param input path to an input file
* @param outputFilePath path for writing the output file
* @param minsupRel the minimum support as a relative value
* @return
* @throws IOException exception if error while writing the file or reading
*/
public List<Map<Integer, List<PatternVGEN>>> runAlgorithm(String input, String outputFilePath, double minsupRel) throws IOException {
if(DEBUG_MODE){
System.out.println(" %%%%%%%%%% DEBUG MODE %%%%%%%%%%");
}
Bitmap.INTERSECTION_COUNT = 0;
// create an object to write the file
writer = new BufferedWriter(new FileWriter(outputFilePath));
// initialize the number of patterns found
patternCount = 0;
// to log the memory used
MemoryLogger.getInstance().reset();
// record start time
startTime = System.currentTimeMillis();
// RUN THE ALGORITHM
vgen(input, minsupRel);
// record end time
endTime = System.currentTimeMillis();
writeResultTofile(outputFilePath);
// close the file
writer.close();
// ################################## FOR DEBUGGGING #############################
// ######## THIS CODE CHECK IF A PATTERN FOUND IS NOT A GENERATOR ##############
if(DEBUG_MODE) {
System.out.println("minsup absolute : " + minsup);
List<PatternVGEN> listPatterns = new ArrayList<PatternVGEN>();
for(Map<Integer, List<PatternVGEN>> mapSizeI : generatorPatterns) {
if(mapSizeI == null) {
continue;
}
for(List<PatternVGEN> listpattern : mapSizeI.values()) {
// System.out.println(" " + pat.prefix + " sup: " + pat.getSupport());
for(PatternVGEN pat : listpattern) {
listPatterns.add(pat);
}
}
}
// CHECK IF SOME PATTERNS ARE NOTE GENERATORS
for(PatternVGEN pat1 : listPatterns) {
// if this pattern is not the empty set and the support is same as empty set, then it is not a generator
if(pat1.prefix.size() > 0 && pat1.getAbsoluteSupport() == transactionCount) {
System.out.println("NOT A GENERATOR !!!!!!!!! " + pat1.prefix + " sup: " + pat1.bitmap.getSupport() + " because of empty set");
}
// otherwise we have to compare with every other patterns.
for(PatternVGEN pat2 : listPatterns) {
if(pat1 == pat2) {
continue;
}
if(pat1.getAbsoluteSupport() == pat2.getAbsoluteSupport()) {
if(strictlyContains(pat1.prefix, pat2.prefix)) {
System.out.println("NOT A GENERATOR !!!!!!!!! " + pat1.prefix + " " + pat2.prefix + " sup: " + pat1.bitmap.getSupport());
System.out.println(pat1.bitmap.sidsum + " " + pat2.bitmap.sidsum);
}
}
}
}
}
// ############################ END OF DEBUGGING CODE ################################
return generatorPatterns;
}
/**
* This is the main method for the VGEN algorithm
*
* @param an input file
* @param minsupRel the minimum support as a relative value
* @throws IOException
*/
private void vgen(String input, double minsupRel) throws IOException {
// create maxPattern array
generatorPatterns = new ArrayList<Map<Integer, List<PatternVGEN>>>(20);
generatorPatterns.add(new HashMap<Integer, List<PatternVGEN>>());
generatorPatterns.add(new HashMap<Integer, List<PatternVGEN>>());
// the structure to store the vertical database
// key: an item value : bitmap
verticalDB = new HashMap<Integer, Bitmap>();
// structure to store the horizontal database
List<int[]> inMemoryDB = new ArrayList<int[]>();
// STEP 0: SCAN THE DATABASE TO STORE THE FIRST BIT POSITION OF EACH SEQUENCE
// AND CALCULATE THE TOTAL NUMBER OF BIT FOR EACH BITMAP
sequencesSize = new ArrayList<Integer>();
lastBitIndex = 0; // variable to record the last bit position that we will use in bitmaps
try {
// read the file
FileInputStream fin = new FileInputStream(new File(input));
BufferedReader reader = new BufferedReader(new InputStreamReader(fin));
String thisLine;
int bitIndex = 0;
// for each line (sequence) in the file until the end
while ((thisLine = reader.readLine()) != null) {
// if the line is a comment, is empty or is a
// kind of metadata
if (thisLine.isEmpty() == true
|| thisLine.charAt(0) == '#' || thisLine.charAt(0) == '%'
|| thisLine.charAt(0) == '@') {
continue;
}
// record the length of the current sequence (for optimizations)
sequencesSize.add(bitIndex);
// split the sequence according to spaces into tokens
String tokens[] = thisLine.split(" ");
int[] transactionArray = new int[tokens.length];
inMemoryDB.add(transactionArray);
for (int i = 0; i < tokens.length; i++) {
int item = Integer.parseInt(tokens[i]);
transactionArray[i] = item;
// if it is not an itemset separator
if (item == -1) { // indicate the end of an itemset
// increase the number of bits that we will need for each bitmap
bitIndex++;
}
}
}
// record the last bit position for the bitmaps
lastBitIndex = bitIndex - 1;
reader.close(); // close the input file
} catch (Exception e) {
e.printStackTrace();
}
// Calculate the absolute minimum support
// by multipling the percentage with the number of
// sequences in this database
minsup = (int) Math.ceil(minsupRel * sequencesSize.size());
if (minsup == 0) {
minsup = 1;
}
// variable to count the number of transactions
transactionCount = 0;
// STEP1: SCAN THE DATABASE TO CREATE THE BITMAP VERTICAL DATABASE REPRESENTATION
try {
FileInputStream fin = new FileInputStream(new File(input));
BufferedReader reader = new BufferedReader(new InputStreamReader(fin));
String thisLine;
int sid = 0; // to know which sequence we are scanning
int tid = 0; // to know which itemset we are scanning
// for each line (sequence) from the input file
while ((thisLine = reader.readLine()) != null) {
// split the sequence according to spaces into tokens
for (String token : thisLine.split(" ")) {
if (token.equals("-1")) { // indicate the end of an itemset
tid++;
} else if (token.equals("-2")) { // indicate the end of a sequence
// determineSection(bitindex - previousBitIndex); // register the sequence length for the bitmap
sid++;
tid = 0;
} else { // indicate an item
// Get the bitmap for this item. If none, create one.
Integer item = Integer.parseInt(token);
Bitmap bitmapItem = verticalDB.get(item);
if (bitmapItem == null) {
bitmapItem = new Bitmap(lastBitIndex);
verticalDB.put(item, bitmapItem);
}
// Register the bit in the bitmap for this item
bitmapItem.registerBit(sid, tid, sequencesSize);
}
}
transactionCount++;
}
reader.close();
} catch (Exception e) {
e.printStackTrace();
}
// STEP2: REMOVE INFREQUENT ITEMS FROM THE DATABASE BECAUSE THEY WILL NOT APPEAR IN ANY FREQUENT SEQUENTIAL PATTERNS
List<Integer> frequentItems = new ArrayList<Integer>();
Iterator<Entry<Integer, Bitmap>> iter = verticalDB.entrySet().iterator();
// we iterate over items from the vertical database that we have in memory
while (iter.hasNext()) {
// we get the bitmap for this item
Map.Entry<Integer, Bitmap> entry = (Map.Entry<Integer, Bitmap>) iter.next();
// if the cardinality of this bitmap is lower than minsup
if (entry.getValue().getSupport() < minsup) {
// we remove this item from the database.
iter.remove();
} else {
// otherwise, we save this item as a frequent
// sequential pattern of size 1
// CHANGED
// and we add this item to a list of frequent items
// that we will use later.
frequentItems.add(entry.getKey());
// END CHANGED
}
}
// SET 2.1 SORT ITEMS BY DESCENDING SUPPORT
Collections.sort(frequentItems, new Comparator<Integer>() {
@Override
public int compare(Integer arg0, Integer arg1) {
return verticalDB.get(arg0).getSupport() - verticalDB.get(arg1).getSupport();
}
});
// STEP 3.1 CREATE CMAP
coocMapEquals = new HashMap<Integer, Map<Integer, Integer>>(frequentItems.size());
coocMapAfter = new HashMap<Integer, Map<Integer, Integer>>(frequentItems.size());
for (int[] transaction : inMemoryDB) {
short itemsetCount = 0;
Set<Integer> alreadyProcessed = new HashSet<Integer>();
Map<Integer, Set<Integer>> equalProcessed = new HashMap<>();
loopI:
for (int i = 0; i < transaction.length; i++) {
Integer itemI = transaction[i];
Set<Integer> equalSet = equalProcessed.get(itemI);
if (equalSet == null) {
equalSet = new HashSet<Integer>();
equalProcessed.put(itemI, equalSet);
}
if (itemI < 0) {
itemsetCount++;
continue;
}
Bitmap bitmapOfItem = verticalDB.get(itemI);
if (bitmapOfItem == null || bitmapOfItem.getSupport() < minsup) {
continue;
}
Set<Integer> alreadyProcessedB = new HashSet<Integer>(); // NEW
boolean sameItemset = true;
for (int j = i + 1; j < transaction.length; j++) {
Integer itemJ = transaction[j];
if (itemJ < 0) {
sameItemset = false;
continue;
}
Bitmap bitmapOfitemJ = verticalDB.get(itemJ);
if (bitmapOfitemJ == null || bitmapOfitemJ.getSupport() < minsup) {
continue;
}
// if (itemI != itemJ){
Map<Integer, Integer> map = null;
if (sameItemset) {
if (!equalSet.contains(itemJ)) {
map = coocMapEquals.get(itemI);
if (map == null) {
map = new HashMap<Integer, Integer>();
coocMapEquals.put(itemI, map);
}
Integer support = map.get(itemJ);
if (support == null) {
map.put(itemJ, 1);
} else {
map.put(itemJ, ++support);
}
equalSet.add(itemJ);
}
} else if (!alreadyProcessedB.contains(itemJ)) {
if (alreadyProcessed.contains(itemI)) {
continue loopI;
}
map = coocMapAfter.get(itemI);
if (map == null) {
map = new HashMap<Integer, Integer>();
coocMapAfter.put(itemI, map);
}
Integer support = map.get(itemJ);
if (support == null) {
map.put(itemJ, 1);
} else {
map.put(itemJ, ++support);
}
alreadyProcessedB.add(itemJ); // NEW
}
}
alreadyProcessed.add(itemI);
}
}
// STEP3: WE PERFORM THE RECURSIVE DEPTH FIRST SEARCH
// to find longer sequential patterns recursively
if (maximumPatternLength == 1) {
return;
}
if(DEBUG_MODE) {
System.out.println("transaction count = " + transactionCount );
}
// NEW2014: SAVE ALL SINGLE FREQUENT ITEMS FIRST BEFORE PERFORMING DEPTH FIRST SEARCH =========
List<PatternVGEN> prefixSingleItems = new ArrayList<PatternVGEN>(verticalDB.entrySet().size());
for (Entry<Integer, Bitmap> entry : verticalDB.entrySet()) {
// We create a prefix with that item
PrefixVGEN prefix = new PrefixVGEN();
prefix.addItemset(new Itemset(entry.getKey()));
boolean itemIsEven = entry.getKey() % 2 == 0;
if(itemIsEven) {
prefix.sumOfEvenItems = (Integer)entry.getKey();
prefix.sumOfOddItems = 0;
}else {
prefix.sumOfEvenItems = 0;
prefix.sumOfOddItems = (Integer)entry.getKey();
}
PatternVGEN pattern = new PatternVGEN(prefix, entry.getValue());
prefixSingleItems.add(pattern);
// NEW 2014 : IMPORTANT!!!! -- > DON'T OUTPUT PATTERN IF SUPPORT IS EQUAL TO SDB SIZE
// BUT NOTE THAT WE WILL STILL NEED TO DO THE DEPTH FIRST SEARCH FOR THIS PATTERN IN THE NEXT FOR LOOP...
if(transactionCount != entry.getValue().getSupport()) {
// SAVE THE PATTERN TO THE RESULT
List<PatternVGEN> listPatterns = generatorPatterns.get(1).get(pattern.bitmap.sidsum);
if(listPatterns == null) {
listPatterns = new ArrayList<PatternVGEN>();
generatorPatterns.get(1).put(pattern.bitmap.sidsum, listPatterns);
}
listPatterns.add(pattern);
patternCount++;
}
}
// PERFORM THE DEPTH FIRST SEARCH
for (PatternVGEN pattern: prefixSingleItems) {
// We create a prefix with that item
int item = pattern.prefix.get(0).get(0);
dfsPruning(pattern.prefix, pattern.bitmap, frequentItems, frequentItems, item, 2, item);
}
// THE EMPTY SET IS ALWAYS GENERATOR, SO ADD IT TO THE RESULT SET
Bitmap bitmap = new Bitmap(0);
bitmap.setSupport(transactionCount);
PatternVGEN pat = new PatternVGEN(new PrefixVGEN(), bitmap);
List<PatternVGEN> listLevel0 = new ArrayList<PatternVGEN>();
listLevel0.add(pat);
generatorPatterns.get(0).put(0, listLevel0);
patternCount++;
// END NEW 2014 =============
}
/**
* This is the dfsPruning method as described in the SPAM paper.
*
* @param prefix the current prefix
* @param prefixBitmap the bitmap corresponding to the current prefix
* @param sn a list of items to be considered for i-steps
* @param in a list of items to be considered for s-steps
* @param hasToBeGreaterThanForIStep
* @param m size of the current prefix in terms of items
* @param lastAppendedItem the last appended item to the prefix
* @throws IOException if there is an error writing a pattern to the output
* file
* @return TRUE IF A FREQUENT PATTERN WAS CREATED USING THE PREFIX.
*/
void dfsPruning(PrefixVGEN prefix, Bitmap prefixBitmap, List<Integer> sn, List<Integer> in, int hasToBeGreaterThanForIStep, int m, Integer lastAppendedItem) throws IOException {
// System.out.println(prefix.toString());
// ====== S-STEPS ======
// Temporary variables (as described in the paper)
List<Integer> sTemp = new ArrayList<Integer>();
List<Bitmap> sTempBitmaps = new ArrayList<Bitmap>();
// for CMAP pruning, we will only check against the last appended item
Map<Integer, Integer> mapSupportItemsAfter = coocMapAfter.get(lastAppendedItem);
// for each item in sn
loopi:
for (Integer i : sn) {
// LAST POSITION PRUNING
/*if (useLastPositionPruning && lastItemPositionMap.get(i) < prefixBitmap.firstItemsetID) {
// System.out.println("TEST");
continue loopi;
}*/
// CMAP PRUNING
// we only check with the last appended item
if (useCMAPPruning) {
if (mapSupportItemsAfter == null) {
continue loopi;
}
Integer support = mapSupportItemsAfter.get(i);
if (support == null || support < minsup) {
// System.out.println("PRUNE");
continue loopi;
}
}
// perform the S-STEP with that item to get a new bitmap
Bitmap.INTERSECTION_COUNT++;
Bitmap newBitmap = prefixBitmap.createNewBitmapSStep(verticalDB.get(i), sequencesSize, lastBitIndex, maxGap);
// if the support is higher than minsup
if (newBitmap.getSupport() >= minsup) {
// record that item and pattern in temporary variables
sTemp.add(i);
sTempBitmaps.add(newBitmap);
}
}
// for each pattern recorded for the s-step
for (int k = 0; k < sTemp.size(); k++) {
// STRATEGY: NEWWW
int item = sTemp.get(k);
// create the new prefix
PrefixVGEN prefixSStep = prefix.cloneSequence();
prefixSStep.addItemset(new Itemset(item));
if(item % 2 == 0) {
prefixSStep.sumOfEvenItems = item + prefix.sumOfEvenItems;
prefixSStep.sumOfOddItems = prefix.sumOfOddItems;
}else {
prefixSStep.sumOfEvenItems = prefix.sumOfEvenItems;
prefixSStep.sumOfOddItems = item + prefix.sumOfOddItems;
}
// prefixSStep.sumOfItems = item + prefix.sumOfItems;
// create the new bitmap
Bitmap newBitmap = sTempBitmaps.get(k);
// save the pattern to the file
// NEW STRATEGY : IMMEDIATE BACKWARD EXTENSION
boolean hasNoImmediateBackwardExtension = useImmediateBackwardChecking ||
prefixBitmap.getSupport() != newBitmap.getSupport();
if (maximumPatternLength > m && hasNoImmediateBackwardExtension) {
boolean hasBackWardExtension = savePatternMultipleItems(prefixSStep, newBitmap, m);
// NEW 2014: IF BACKWARD EXTENSION, THEN WE DON'T CONTINUE...
if(hasBackWardExtension == false) {
dfsPruning(prefixSStep, newBitmap, sTemp, sTemp, item, m + 1, item);
}
}
}
Map<Integer, Integer> mapSupportItemsEquals = coocMapEquals.get(lastAppendedItem);
// ======== I STEPS =======
// Temporary variables
List<Integer> iTemp = new ArrayList<Integer>();
List<Bitmap> iTempBitmaps = new ArrayList<Bitmap>();
// for each item in in
loop2:
for (Integer i : in) {
// the item has to be greater than the largest item
// already in the last itemset of prefix.
if (i > hasToBeGreaterThanForIStep) {
// LAST POSITION PRUNING
/*if (useLastPositionPruning && lastItemPositionMap.get(i) < prefixBitmap.firstItemsetID) {
continue loop2;
}*/
// CMAP PRUNING
if (useCMAPPruning) {
if (mapSupportItemsEquals == null) {
continue loop2;
}
Integer support = mapSupportItemsEquals.get(i);
if (support == null || support < minsup) {
continue loop2;
}
}
// Perform an i-step with this item and the current prefix.
// This creates a new bitmap
Bitmap.INTERSECTION_COUNT++;
Bitmap newBitmap = prefixBitmap.createNewBitmapIStep(verticalDB.get(i), sequencesSize, lastBitIndex);
// If the support is no less than minsup
if (newBitmap.getSupport() >= minsup) {
// record that item and pattern in temporary variables
iTemp.add(i);
iTempBitmaps.add(newBitmap);
}
}
}
// for each pattern recorded for the i-step
for (int k = 0; k < iTemp.size(); k++) {// STRATEGY: NEWWW
// atLeastOneFrequentExtension = true;
int item = iTemp.get(k);
// create the new prefix
PrefixVGEN prefixIStep = prefix.cloneSequence();
prefixIStep.getItemsets().get(prefixIStep.size() - 1).addItem(item);
if(item % 2 == 0) {
prefixIStep.sumOfEvenItems = item + prefix.sumOfEvenItems;
prefixIStep.sumOfOddItems = prefix.sumOfOddItems;
}else {
prefixIStep.sumOfEvenItems = prefix.sumOfEvenItems;
prefixIStep.sumOfOddItems = item + prefix.sumOfOddItems;
}
// create the new bitmap
Bitmap newBitmap = iTempBitmaps.get(k);
// NEW STRATEGY : IMMEDIATE BACKWARD EXTENSION
boolean hasNoImmediateBackwardExtension = useImmediateBackwardChecking ||
prefixBitmap.getSupport() == newBitmap.getSupport();
if (maximumPatternLength > m && hasNoImmediateBackwardExtension) {
boolean hasBackWardExtension = savePatternMultipleItems(prefixIStep, newBitmap, m);
// NEW 2014: IF NO BACKWARD EXTENSION, THEN WE TRY TO EXTEND THAT PATTERN
if(hasBackWardExtension == false) {
dfsPruning(prefixIStep, newBitmap, sTemp, iTemp, item, m + 1, item);
}
}
}
// check the memory usage
MemoryLogger.getInstance().checkMemory();
}
/**
* Save a pattern of size > 1 to the output file.
*
* @param prefix the prefix
* @param bitmap its bitmap
* @throws IOException exception if error while writing to the file
* @return true IF THE PATTERN HAS A BACKWARD EXTENSION WITH THE SAME PROJECTED DATABASE
*/
private boolean savePatternMultipleItems(PrefixVGEN prefix, Bitmap bitmap, int length) throws IOException {
// System.out.println("prefix :" + prefix);
int sidsum = bitmap.sidsum;
// IF THE SUPPORT OF THIS PATTERN "PREFIX" IS THE SUPPORT OF THE EMPTY SET, THEN
// THIS PATTERN IS NOT A GENERATOR.
if(bitmap.getSupport() == transactionCount) {
return false;
}
// WE COMPARE PATTERN "PREFIX" WITH SMALLER PATTERNS FOR SUB-PATTERN CHECKING
boolean mayBeAGenerator = true;
// FOR PATTERNS OF SIZE 1 TO THE SIZE OF THE PATTERN MINUS 1
for(int i=1; i < length && i < generatorPatterns.size(); i++) {
// GET ALL THE PATTERNS HAVING THE SAME SID-SUM AS THE CURRENT PATTERN
List<PatternVGEN> level = generatorPatterns.get(i).get(sidsum);
if(level == null) {
continue;
}
for(PatternVGEN pPrime : level) {
// CHECK THE SUM OF EVEN AND ODD ITEMS AND THE SUPPORT
if(prefix.sumOfEvenItems >= pPrime.prefix.sumOfEvenItems &&
prefix.sumOfOddItems >= pPrime.prefix.sumOfOddItems &&
bitmap.getSupport() == pPrime.getAbsoluteSupport() &&
strictlyContains(prefix, pPrime.prefix)) {
// CHECK HERE IF THERE IS A BACKWARD EXTENSION...
if (useBackwardPruning) {
if(isThereBackwardExtension(bitmap, pPrime.bitmap)){
// THERE IS A BACKWARD EXTENSION SO WE RETURN TRUE TO PRUNE EXTENSIONS
// OF THE PATTERN "PREFIX"
return true;
}else {
// WE FLAG THE PATTERN "PREFIX" HAS NOT BEING A GENERATOR BUT
// WE CONTINUE COMPARING WITH OTHER PATTERNS TO SEE IF WE COULD PRUNE
mayBeAGenerator = false;
}
}else {
// IF BACKWARD EXTENSION CHECKING IS DISABLED, WE RETURN FALSE
// WE JUST RETURN FALSE IF WE DON'T USE THE BACKWARD PRUNING. THIS IS A TRADE-OFF
return false;
}
// END IMPORTANT
}
}
}
if(mayBeAGenerator == false) {
return false;
}
// WE COMPARE WITH LARGER PATTERNS FOR SUPER-PATTERN CHECKING
for(int i=generatorPatterns.size()-1; i > length; i--) {
List<PatternVGEN> level = generatorPatterns.get(i).get(sidsum);
if(level == null) {
continue;
}
Iterator<PatternVGEN> iter = level.iterator();
while (iter.hasNext()) {
PatternVGEN pPrime = iter.next();
if(prefix.sumOfEvenItems <= pPrime.prefix.sumOfEvenItems &&
prefix.sumOfOddItems <= pPrime.prefix.sumOfOddItems &&
bitmap.getSupport() == pPrime.getAbsoluteSupport() &&
strictlyContains(pPrime.prefix, prefix)) {
patternCount--; // DECREASE COUNT
iter.remove();
}
}
}
// OTHERWISE THE PATTERN "PREFIX" MAY BE A GENERATOR SO WE KEEP IT
while(generatorPatterns.size() -1 < length) {
generatorPatterns.add(new HashMap<Integer, List<PatternVGEN>>());
}
List<PatternVGEN> listPatterns = generatorPatterns.get(length).get(sidsum);
if(listPatterns == null) {
listPatterns = new ArrayList<PatternVGEN>();
generatorPatterns.get(length).put(sidsum, listPatterns);
}
patternCount++; // INCREASE COUNT
listPatterns.add(new PatternVGEN(prefix, bitmap));
return false; // No backward extension has been found.
}
/**
* Check if there is a backward extension by comparing the bitmap of two patterns
* P1 and P2, such that P1 is a superset of P2
* @param bitmap bitmap of P1
* @param bitmap2 bitmap of P2
* @return true if there is a backward extension
*/
private boolean isThereBackwardExtension(Bitmap bitmap1, Bitmap bitmap2) {
// System.out.println("is there backward?");
// System.out.println(bitmap1.bitmap.toString());
// System.out.println(bitmap2.bitmap.toString());
BitSet bitset1 = bitmap1.bitmap;
BitSet bitset2 = bitmap2.bitmap;
int currentBit1 = bitset1.nextSetBit(0);
int currentBit2 = bitset2.nextSetBit(0);
do {
if(currentBit1 > currentBit2) {
return false;
}
currentBit1 = bitset1.nextSetBit(currentBit1+1);
currentBit2 = bitset2.nextSetBit(currentBit2+1);
}while(currentBit1 >0);
return true;
// return bitmap.equals(bitmap2);
}
/**
* This methods checks if a seq. pattern "pattern2" is strictly contained in a seq. pattern "pattern1".
* @param pattern1 a sequential pattern
* @param pattern2 another sequential pattern
* @return true if the pattern1 contains pattern2.
*/
boolean strictlyContains(PrefixVGEN pattern1, PrefixVGEN pattern2) {
// // if pattern2 is larger or equal in size, then it cannot be contained in pattern1
// if(pattern1.size() <= pattern2.size()){
// return false;
// }
// To see if pattern2 is strictly contained in pattern1,
// we will search for each itemset i of pattern2 in pattern1 by advancing
// in pattern 1 one itemset at a time.
int i =0; // position in pattern2
int j= 0; // position in pattern1
while(true){
//if the itemset at current position in pattern1 contains the itemset
// at current position in pattern2
if(pattern1.get(j).containsAll(pattern2.get(i))){
// go to next itemset in pattern2
i++;
// if we reached the end of pattern2, then return true
if(i == pattern2.size()){
return true;
}
}
// go to next itemset in pattern1
j++;
// if we reached the end of pattern1, then pattern2 is not strictly included
// in it, and return false
if(j >= pattern1.size()){
return false;
}
// // lastly, for optimization, we check how many itemsets are left to be matched.
// // if there is less itemsets left in pattern1 than in pattern2, then it will
// // be impossible to get a total match, and so we return false.
if((pattern1.size() - j) < pattern2.size() - i){
return false;
}
}
}
/**
* Print the statistics of the algorithm execution to System.out.
*/
public void printStatistics() {
StringBuilder r = new StringBuilder(200);
r.append("============= Algorithm VGEN - STATISTICS =============\n Total time ~ ");
r.append(endTime - startTime);
r.append(" ms\n");
r.append(" Frequent sequences count : " + patternCount);
r.append('\n');
r.append(" Max memory (mb) : ");
r.append(MemoryLogger.getInstance().getMaxMemory());
r.append(patternCount);
r.append('\n');
r.append("minsup " + minsup);
r.append('\n');
r.append("Intersection count " + Bitmap.INTERSECTION_COUNT + " \n");
r.append("===================================================\n");
System.out.println(r.toString());
}
/**
* Get the maximum length of patterns to be found (in terms of itemset
* count)
*
* @return the maximumPatternLength
*/
public int getMaximumPatternLength() {
return maximumPatternLength;
}
/**
* Set the maximum length of patterns to be found (in terms of itemset
* count)
*
* @param maximumPatternLength the maximumPatternLength to set
*/
public void setMaximumPatternLength(int maximumPatternLength) {
this.maximumPatternLength = maximumPatternLength;
}
/**
* Write the result to an output file
* @param path the output file path
* @throws IOException exception if an error occur when writing the file.
*/
public void writeResultTofile(String path) throws IOException {
// for each level (pattern having a same size)
for(Map<Integer, List<PatternVGEN>> level : generatorPatterns) {
// for each list of patterns having the same hash value
for(List<PatternVGEN> patterns : level.values()) {
// for each pattern
for(PatternVGEN pattern : patterns) {
// save the pattern
StringBuilder r = new StringBuilder("");
for(Itemset itemset : pattern.prefix.getItemsets()){
// r.append('(');
for(Integer item : itemset.getItems()){
String string = item.toString();
r.append(string);
r.append(' ');
}
r.append("-1 ");
}
r.append("SUP: ");
r.append(pattern.getAbsoluteSupport());
writer.write(r.toString());
writer.newLine();
}
}
}
}
/**
* This method allows to specify the maximum gap
* between itemsets of patterns found by the algorithm.
* If set to 1, only patterns of contiguous itemsets
* will be found (no gap).
* @param maxGap the maximum gap (an integer)
*/
public void setMaxGap(int maxGap) {
this.maxGap = maxGap;
}
}