package ca.pfv.spmf.algorithms.sequentialpatterns.clasp_AGP;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import ca.pfv.spmf.algorithms.sequentialpatterns.clasp_AGP.dataStructures.Item;
import ca.pfv.spmf.algorithms.sequentialpatterns.clasp_AGP.dataStructures.abstracciones.ItemAbstractionPair;
import ca.pfv.spmf.algorithms.sequentialpatterns.clasp_AGP.dataStructures.creators.AbstractionCreator;
import ca.pfv.spmf.algorithms.sequentialpatterns.clasp_AGP.dataStructures.creators.AbstractionCreator_Qualitative;
import ca.pfv.spmf.algorithms.sequentialpatterns.clasp_AGP.dataStructures.creators.ItemAbstractionPairCreator;
import ca.pfv.spmf.algorithms.sequentialpatterns.clasp_AGP.dataStructures.patterns.Pattern;
import ca.pfv.spmf.algorithms.sequentialpatterns.clasp_AGP.idlists.IDList;
import ca.pfv.spmf.algorithms.sequentialpatterns.clasp_AGP.savers.Saver;
import ca.pfv.spmf.algorithms.sequentialpatterns.clasp_AGP.tries.Trie;
import ca.pfv.spmf.algorithms.sequentialpatterns.clasp_AGP.tries.TrieNode;
/**
* This is an implementation of the main method of ClaSP algorithm. We can use
* different kind of IdList although we only makes a implementation:
* IDListStandard_Mapkeep. However, if we make another new IdList implementing
* the IDList interface, we can define another different.
*
* NOTE: This implementation saves the pattern to a file as soon as they are
* found or can keep the pattern into memory, depending on what the user choose.
*
* Copyright Antonio Gomariz PeƱalver 2013
*
* This file is part of the SPMF DATA MINING SOFTWARE
* (http://www.philippe-fournier-viger.com/spmf).
*
* SPMF is free software: you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* SPMF is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with
* SPMF. If not, see <http://www.gnu.org/licenses/>.
*
* @author agomariz
*/
public class FrequentPatternEnumeration_ClaSP {
public long joinCount = 0; // PFV 2013 - to count the number of intersections
/**
* The abstraction creator.
*/
private AbstractionCreator abstractionCreator;
/**
* The absolute minimum support threshold, i.e. the minimum number of
* sequences where the patterns have to be
*/
private double minSupAbsolute;
/**
* Number of frequent patterns found by the algorithm. Initially set to
* zero.
*/
private int numberOfFrequentPatterns = 0;
/**
* Map in which we store the different patterns in order to know which ones
* can be skipped because can be summarized by other ones (the closed
* patterns)
*/
private Map<Integer, Map<Integer, List<Entry<Pattern, Trie>>>> matchingMap;
/**
* Saver variable to decide where the user want to save the results, if it
* the case
*/
private Saver saver;
/**
* flag to indicate if we are interesting in only finding the closed
* sequences
*/
private boolean findClosedPatterns;
/**
* flag to indicate if we are interesting in only finding the closed
* sequence through the postprocessing step
*/
private boolean executePruningMethods;
/**
* Standard constructor
*
* @param abstractionCreator the abstraction creator
* @param minSupAbsolute The absolute minimum support
* @param saver The saver for correctly save the results where the user
* wants
* @param findClosedPatterns flag to indicate if we are interesting in only
* finding the closed sequences
*/
public FrequentPatternEnumeration_ClaSP(AbstractionCreator abstractionCreator, double minSupAbsolute, Saver saver, boolean findClosedPatterns, boolean executePruningMethods) {
this.abstractionCreator = abstractionCreator;
this.minSupAbsolute = minSupAbsolute;
this.saver = saver;
this.matchingMap = new HashMap<Integer, Map<Integer, List<Entry<Pattern, Trie>>>>();
this.findClosedPatterns = findClosedPatterns;
this.executePruningMethods = executePruningMethods;
}
/**
* Execution of the search of frequent patterns.
*
* @param equivalenceClass The equivalence class from we start to search
* for.
* @param keepPatterns Flag to indicate if we want to keep the patterns
* found.
* @param verbose Flag for debugging purposes
* @param coocMapBefore
* @param
*/
public void dfsPruning(Pattern patron, Trie trie, boolean verbose, Map<Integer, Map<Integer, Integer>> coocMapAfter, Map<Integer, Map<Integer, Integer>> coocMapEquals) {
int tam = trie.levelSize();
for (int i = 0; i < tam; i++) {
//For each frequent item (the children of the root Trie)
TrieNode eq = trie.getNode(i);
/*
* We call to the main method of the algorithm for that Trie
* associated with the frequent item
*/
exploreChildren(new Pattern(eq.getPair()), eq, trie.getNodes(), trie.getNodes(), i + 1, coocMapAfter, coocMapEquals, eq.getPair().getItem());
}
}
private void exploreChildren(Pattern pattern, TrieNode currentNode, List<TrieNode> sequenceExtensions, List<TrieNode> itemsetsExtensions, int beginning, Map<Integer, Map<Integer, Integer>> coocMapAfter, Map<Integer, Map<Integer, Integer>> coocMapEquals, Item lastAppendedItem) {
//We get the curretn trie
Trie currentTrie = currentNode.getChild();
/*
* And if we are interested in find the closed patterns, we check if the
* generation of the Trie of the current pattern can be avoided
*/
if (findClosedPatterns && executePruningMethods) {
if (isAvoidable(pattern, currentTrie)) {
return;
}
}
//We start increasing the number of frequent patterns
numberOfFrequentPatterns++;
//Initialization of new sets
List<TrieNode> new_sequenceExtension = new ArrayList<TrieNode>();
List<TrieNode> new_itemsetExtension = new ArrayList<TrieNode>();
List<Pattern> newPatterns = new ArrayList<Pattern>();
List<TrieNode> newNodesToExtends = new ArrayList<TrieNode>();
//Clone for the current pattern
Pattern clone = pattern.clonePatron();
//For all the nodes of sequenceExtensions
loops:
for (TrieNode node : sequenceExtensions) {
//node.getPair().getItem();
//lastAppendedItem
// ====== PFV 2013 =========================
if (coocMapAfter != null) {
Map<Integer, Integer> map = coocMapAfter.get(lastAppendedItem.getId());
if (map != null) {
Integer coocurenceCount = map.get(node.getPair().getItem().getId());
if (coocurenceCount == null || coocurenceCount < minSupAbsolute) {
continue loops;
}
} else {
continue loops;
}
}
// ====== FIN PFV 2013 =========================
//We create a new pattern based in the elements of the clone
Pattern extension = new Pattern(new ArrayList<ItemAbstractionPair>(clone.getElements()));
//And we extend it with the only element of the eq class identifier
ItemAbstractionPair newPair = node.getPair();
extension.add(newPair);
/*
* We make the join operation between the tries of both patterns in
* order to know the appearances of the new pattern and its support.
*/
joinCount++;
IDList newIdList = currentTrie.getIdList().join(node.getChild().getIdList(), false, (int) minSupAbsolute);
//If the new pattern is frequent
if (newIdList.getSupport() >= minSupAbsolute) {
//We create a new trie for it
Trie newTrie = new Trie(null, newIdList);
//abd we insert it its appearances
newIdList.setAppearingIn(newTrie);
//we put in a TrieNode the new pair and the new Trie created
TrieNode newTrieNode = new TrieNode(newPair, newTrie);
//And we merge the new Trie with the current one
currentTrie.mergeWithTrie(newTrieNode);
/*
* Finally we add the new pattern and nodeTrie to the sets that
* are needed for future patterns
*/
newPatterns.add(extension);
newNodesToExtends.add(newTrieNode);
new_sequenceExtension.add(newTrieNode);
}
}
int sequenceExtensionSize = new_sequenceExtension.size();
//For all the elements valuables as future s-extensions
for (int i = 0; i < sequenceExtensionSize; i++) {
//we get the new pattern and the nodeTrie associated with it
Pattern newPattern = newPatterns.get(i);
TrieNode nodeToExtend = newNodesToExtends.remove(0);
Item last = newPattern.getIthElement(newPattern.size() - 1).getItem(); // PFV 2013
/* And we make a recursive call to dfs_pruning with the new sequence
* extension. Besides we establish the same set as the set which we will
* make the i-extensions, but beginning from the (i+1)-th element
*/
exploreChildren(newPattern, nodeToExtend, new_sequenceExtension, new_sequenceExtension, i + 1, coocMapAfter, coocMapEquals, last);
}
/* We clear the set of the new pattern and the nodes associated with
* them since discovered since we need to store those that we will find
* making i-extensions
*/
newPatterns.clear();
newNodesToExtends.clear();
/*
* From the beginning index to the last equivalence class appearing in
* the itemset extension set
*/
loopi:
for (int k = beginning; k < itemsetsExtensions.size(); k++) {
TrieNode eq = itemsetsExtensions.get(k);
// ====== PFV 2013 =========================
if (coocMapEquals != null) {
Map<Integer, Integer> map = coocMapEquals.get(lastAppendedItem.getId());
if (map != null) {
Integer coocurenceCount = map.get(eq.getPair().getItem().getId());
if (coocurenceCount == null || coocurenceCount < minSupAbsolute) {
continue loopi;
}
} else {
continue loopi;
}
}
// ====== FIN PFV 2013 =========================
// eq.getPair().getItem() // the item to be appended.
//We create a new pattern with the elements of the current pattern
Pattern extension = new Pattern(new ArrayList<ItemAbstractionPair>(clone.getElements()));
//And we add it the current item of itemset extension set
ItemAbstractionPair newPair = ItemAbstractionPairCreator.getInstance().getItemAbstractionPair(eq.getPair().getItem(), AbstractionCreator_Qualitative.getInstance().crearAbstraccion(true));
extension.add(newPair);
/*
* We make the join operation between the tries of both patterns in
* order to know the appearances of the new pattern and its support.
*/
joinCount++;
IDList newIdList = currentTrie.getIdList().join(eq.getChild().getIdList(), true, (int) minSupAbsolute);
//If the new pattern is frequent
if (newIdList.getSupport() >= minSupAbsolute) {
//We create a new trie for it
Trie newTrie = new Trie(null, newIdList);
//And we insert it its appearances
newIdList.setAppearingIn(newTrie);
//we put in a TrieNode the new pair and the new Trie created
TrieNode newTrieNode = new TrieNode(newPair, newTrie);
//And we merge the new Trie with the current one
currentTrie.mergeWithTrie(newTrieNode);
/*
* Finally we add the new pattern and nodeTrie to the sets that
* are needed for future patterns
*/
newPatterns.add(extension);
newNodesToExtends.add(newTrieNode);
new_itemsetExtension.add(newTrieNode);
}
}
int itemsetExtensionSize = new_itemsetExtension.size();
//For all the elements valuables as future i-extensions
for (int i = 0; i < itemsetExtensionSize; i++) {
//we get the new pattern and the nodeTrie associated with it
Pattern newPattern = newPatterns.get(i);
TrieNode nodeToExtend = newNodesToExtends.remove(0);
Item last = newPattern.getIthElement(newPattern.size() - 1).getItem(); // PFV 2013
/* And we make a recursive call to dfs_pruning with the new itemset
* extension. Besides we establish the same set as the set which we will
* make the i-extensions, but beginning from the (i+1)-th element
*/
exploreChildren(newPattern, nodeToExtend, new_sequenceExtension, new_itemsetExtension, i + 1, coocMapAfter, coocMapEquals, last);
nodeToExtend.getChild().setIdList(null);
/*
* If all the elements of itemsetExtensions have been used, we can
* clear the IdList of the current Node
*/
if (i == itemsetExtensionSize) {
for (TrieNode nodo : new_sequenceExtension) {
nodo.getChild().setIdList(null);
}
}
}
}
/**
* It returns the number of frequent patterns found by the last execution of
* the algorithm.
*
* @return
*/
public int getFrequentPatterns() {
return numberOfFrequentPatterns;
}
public void setPatronesFrecuentes(int patronesFrecuentes) {
this.numberOfFrequentPatterns = patronesFrecuentes;
}
/**
* Method that checks if the prefix given as parameter can be skipped by
* means of prune methods backward subpattern or backward superpattern. The
* method uses a map where the different patterns are kept in order to check
* both pruning methods. The hash keys used can vary, and we give some
* aproaches by the methods:
*
* key_standard() key_standardAndSupport() key_standardAndSumIDs()
* key_standardAndCumulativeSum() Key_standardAndElements()
*
* @param prefix Current pattern which is going to be checked
* @param trie Trie associated with prefix
* @return
*/
private boolean isAvoidable(Pattern prefix, Trie trie) {
//We get the support of the pattern
int support = trie.getSupport();
//We get the IdList of the pattern
IDList idList = trie.getIdList();
/*
* We get as a first key the sum of all sequences identifiers where the
* current prefix appear
*/
int key1 = trie.getSumIdSequences();
int prefixSize = prefix.size();
/*
* Different approaches for the key2 can be used
*/
int key2 = key2(idList, trie);
/*
* We make a new entry associating the current prefix with its
* corresponding prefixTrie
*/
Entry<Pattern, Trie> newEntry = new AbstractMap.SimpleEntry<Pattern, Trie>(prefix, trie);
/*
* Map where there appear all the patterns with the same key1 of the
* current prefix, that makes a correspondence between a value given by
* key2 and all the patterns that have it
*/
Map<Integer, List<Entry<Pattern, Trie>>> associatedMap = matchingMap.get(key1);
/*
* If there is not any pattern with the same key2 value, we add the current
* prefix as a new entry, and we also insert it in the matching map
*/
if (associatedMap == null) {
associatedMap = new HashMap<Integer, List<Entry<Pattern, Trie>>>();
List entryList = new ArrayList<Pattern>();
entryList.add(newEntry);
associatedMap.put(key2, entryList);
matchingMap.put(key1, associatedMap);
} else {
/*
* If, conversely, there are some patterns with the same key2 value
* (and extensively with the same key1 value) we check if we can apply
* backward subpattern or backward superpattern pruning
*/
//We get the list of entries
List<Entry<Pattern, Trie>> associatedList = associatedMap.get(key2);
//If is still empty, we create one
if (associatedList == null) {
associatedList = new ArrayList<Entry<Pattern, Trie>>();
associatedList.add(newEntry);
associatedMap.put(key2, associatedList);
} else {
int i = 0;
int superPattern = 0;
for (i = 0; i < associatedList.size(); i++) {
//For all the elements of the associated list
Entry<Pattern, Trie> storedEntry = associatedList.get(i);
//We get both pattern and trie from the entry
Pattern p = storedEntry.getKey();
Trie t = storedEntry.getValue();
//If the support of the current prefix and the p pattern are equal
if (support == t.getSupport()) {
//We keep the size of the pattern
int pSize = p.size();
if (pSize != prefixSize) {
//if the prefix size is less than the size of p
if (prefixSize < pSize) {
//and prefix is a subpattern of p
if (prefix.isSubpattern(abstractionCreator, p)) {
/*
* We dfsPruning backward subpattern pruning and
* establish as new nodes the nodes of the trie
* of p
*/
trie.setNodes(t.getNodes());
/*
* We end the method since we have already
* done the prune
*/
return true;
}
} else if (p.isSubpattern(abstractionCreator, prefix)) {
/*
* if, conversely, the prefix size is greater than
* the size of p and prefix is a superpattern of p
*/
//we update a counter of superpatterns
superPattern++;
/*
* and we make the prefix trie point to the nodes
* of the trie of p
*/
trie.setNodes(t.getNodes());
/*
* and we make null the nodes of t since p is
* included in prefix
*/
//t.setNodes(null);
//And we remove the entry of the list
associatedList.remove(i);
i--;
}
}
}
}
//In this point we add the new entry of the current prefix
associatedList.add(newEntry);
//If we found any superPattern
if (superPattern > 0) {
/*if (superPattern > 1) {
System.out.println("We removed more than one pattern!!");
}*/
//We return the correspondent output
return true;
}
}
}
/*
* We did not find any subpattern or supperpattern in order to skip the
* generation of the current prefix
*/
return false;
}
/**
* Method used to obtain the value for the second key of matchingMap.
*
* @param idlist
* @param t
* @return
*/
private int key2(IDList idlist, Trie t) {
/*
* If you are interested in changing the method, just comment the line
* of below and uncomment one of the others
*/
return FrequentPatternEnumeration_ClaSP.key_standardAndSupport(idlist, t);
//return FrequentPatternEnumeration_ClaSP.key_standard(idlist);
//return FrequentPatternEnumeration_ClaSP.key_standardAndSumIDs(idlist, t);
}
/**
* One of the methods used by key2 in the method isAvoidable that return the
* number of elements that appear in the projected database
*
* @param idList IdList of the prefix to consider
* @return
*/
private static int key_standard(IDList idList) {
return idList.getTotalElementsAfterPrefixes();
}
/**
* One of the methods used by key2 in the method isAvoidable that return the
* addition of the number of elements that appear in the projected database
* and the support of the related prefix
*
* @param idList IdList of the prefix to consider
* @param trie Trie of the pattern to consider
* @return
*/
private static int key_standardAndSupport(IDList projection, Trie trie) {
return projection.getTotalElementsAfterPrefixes() + trie.getSupport();
}
/**
* One of the methods used by key2 in the method isAvoidable that return the
* addition of the number of elements that appear after of each appearance
* of the pattern in the sequences and sum of the sequence identifiers where
* the given prefix appears
*
* @param idList IdList of the prefix to consider
* @param trie Trie of the pattern to consider
* @return
*/
private static int key_standardAndSumIDs(IDList idList, Trie trie) {
return (idList.getTotalElementsAfterPrefixes() + trie.getSumIdSequences());
}
/**
* It removes the non closed patterns from the list of patterns given as
* parameter
*
* @param frequentPatterns List of patterns from which we want to remove the
* non-closed patterns
* @param keepPatterns Flag indicating if we want to keep the final output
*/
void removeNonClosedPatterns(List<Entry<Pattern, Trie>> frequentPatterns, boolean keepPatterns) {
System.err.println("Before removing NonClosed patterns there are " + numberOfFrequentPatterns + " patterns");
numberOfFrequentPatterns = 0;
/*
* We make a map to match group of patterns linked by their addition of
* sequence identifiers
*/
Map<Integer, List<Pattern>> totalPatterns = new HashMap<Integer, List<Pattern>>();
//and we classify the patterns there by their sumIdSequences number
for (Entry<Pattern, Trie> entrada : frequentPatterns) {
Pattern p = entrada.getKey();
Trie t = entrada.getValue();
p.setAppearingIn(t.getAppearingIn());
List<Pattern> listaPatrones = totalPatterns.get(t.getSumIdSequences());
if (listaPatrones == null) {
listaPatrones = new LinkedList<Pattern>();
totalPatterns.put(t.getSumIdSequences(), listaPatrones);
}
listaPatrones.add(p);
}
//For all the list associated with de different sumSequencesIDs values
for (List<Pattern> lista : totalPatterns.values()) {
//For all their patterns
for (int i = 0; i < lista.size(); i++) {
for (int j = i + 1; j < lista.size(); j++) {
Pattern p1 = lista.get(i);
Pattern p2 = lista.get(j);
//If the patterns has the same support
if (p1.getAppearingIn().size() == p2.getAppearingIn().size()) {
if (p1.size() != p2.size()) {
/*
* And one is subpattern of the other, we remove the
* shorter pattern and keep the longer one
*/
if (p1.size() < p2.size()) {
if (p1.isSubpattern(abstractionCreator, p2)) {
lista.remove(i);
i--;
break;
}
} else {
if (p2.isSubpattern(abstractionCreator, p1)) {
lista.remove(j);
j--;
}
}
}
}
}
}
}
/*
* We calcule the number of frequent patterns and we store in the chosen
* output if the flag is activated
*/
for (List<Pattern> list : totalPatterns.values()) {
numberOfFrequentPatterns += list.size();
if (keepPatterns) {
for (Pattern p : list) {
saver.savePattern(p);
}
}
}
}
public void clear() {
if (matchingMap != null) {
matchingMap.clear();
matchingMap = null;
}
}
}