package ca.pfv.spmf.algorithms.frequentpatterns.fpgrowth;
/* This file is copyright (c) 2008-2015 Philippe Fournier-Viger
*
* This file is part of the SPMF DATA MINING SOFTWARE
* (http://www.philippe-fournier-viger.com/spmf).
*
* SPMF is free software: you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* SPMF is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. See the GNU General Public License for more details.
* You should have received a copy of the GNU General Public License along with
* SPMF. If not, see <http://www.gnu.org/licenses/>.
*/
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import ca.pfv.spmf.patterns.itemset_array_integers_with_count.Itemset;
/**
* This is an implementation of a MFITree as used by the FPMax algorithm.
*
* @see MFINode
* @see Itemset
* @see AlgoFPMax
* @author Philippe Fournier-Viger
*/
public class MFITree {
// List of pairs (item, frequency) of the header table
Map<Integer, MFINode> mapItemNodes = new HashMap<Integer, MFINode>();
// Map that indicates the last node for each item using the node links
// key: item value: an fp tree node
Map<Integer, MFINode> mapItemLastNode = new HashMap<Integer, MFINode>();
// root of the tree
MFINode root = new MFINode(); // null node
// last added itemset
MFINode lastAddedItemsetNode = null;
/**
* Constructor
*/
public MFITree(){
}
/**
* Method to fix the node link for an item after inserting a new node.
* @param item the item of the new node
* @param newNode the new node thas has been inserted.
*/
private void fixNodeLinks(Integer item, MFINode newNode) {
// get the latest node in the tree with this item
MFINode lastNode = mapItemLastNode.get(item);
if(lastNode != null) {
// if not null, then we add the new node to the node link of the last node
lastNode.nodeLink = newNode;
}
// Finally, we set the new node as the last node
mapItemLastNode.put(item, newNode);
MFINode headernode = mapItemNodes.get(item);
if(headernode == null){ // there is not
mapItemNodes.put(item, newNode);
}
}
/**
* Add an itemset to the MFI-Tree
* @param itemset the itemset
* @param itemsetLength the length of the itemset
* @param support the support of the itemset
*/
public void addMFI(int[] itemset, int itemsetLength, int support) {
MFINode currentNode = root;
// For each item in the itemset
for(int i=0; i < itemsetLength; i++){
int item = itemset[i];
// look if there is a node already in the FP-Tree
MFINode child = currentNode.getChildWithID(item);
if(child == null){
// there is no node, we create a new one
MFINode newNode = new MFINode();
newNode.itemID = item;
newNode.parent = currentNode;
// remember at which level in the tree that node appears
newNode.level = i+1;
// we link the new node to its parrent
currentNode.childs.add(newNode);
// we take this node as the current node for the next for loop iteration
currentNode = newNode;
// We update the header table.
// We check if there is already a node with this id in the header table
fixNodeLinks(item, newNode);
}else{
// there is a node already, we update it
// child.counter++;
currentNode = child;
}
}
// SET THE SUPPORT OF THE MFI (the last item)
// currentNode.counter = support;
// remember that this is the last added itemset
lastAddedItemsetNode = currentNode;
}
/**
* Perform the subset test to see if an itemset is a subset of an already
* found MFI
* @param headWithP the itemset to be tested
* @return true if the itemset is not a subset of an already found MFI.
*/
public boolean passSubsetChecking(List<Integer> headWithP) {
// Find the node list for the last item of the itemset
Integer lastItem = headWithP.get(headWithP.size()-1);
// OPTIMIZATION:
// We first check against the last added itemset
if(lastAddedItemsetNode != null) {
boolean isSubset = issASubsetOfPrefixPath(headWithP, lastAddedItemsetNode);
// if the itemset is a subset of the last added itemset, we do not need to check further
if(isSubset) {
return false;
}
}
// OTHERWISE, WE NEED TO COMPARE "headwithP" with all the patterns in the MFI-tree.
MFINode node = mapItemNodes.get(lastItem);
// if that last item is not yet in the MFI-tree, it means that "itemset" is not a subset
// of some itemset already in the tree
if(node == null) {
return true;
}
// we will loop over each node by following node links
do {
// for a node, we will check if "headwithP" is a subset of the path ending at node
boolean isSubset = issASubsetOfPrefixPath(headWithP, node);
// if it is a subset, then "headWithP" is in the MFI-tree, we return false
if(isSubset) {
return false;
}
// go to the next itemset to test
node = node.nodeLink;
}while(node != null);
// the itemset is not in the MFI-TREE. Itemset passed the test!
return true;
}
/**
* Check if the itemset headwithP is contained in the path ending at "node" in the MFI-tree
* @param headWithP the itemset headwithP
* @param node the node
* @return true if "headwithP" is contained in the path ending at "node" in the MFI-Tree. Otherwise, false.
*/
private boolean issASubsetOfPrefixPath(List<Integer> headWithP, MFINode node) {
// optimization proposed in the fpmax* paper: if there is less than itemset node in that branch,
// we don't need to check it
if(node.level >= headWithP.size()) {
// check if "itemset" is contained in the prefix path ending at "node"
// We will start comparing from the parent of "node" in the prefix path since
// the last item of itemset is "node".
MFINode nodeToCheck = node;
int positionInItemset = headWithP.size()-1;
int itemToLookFor = headWithP.get(positionInItemset);
// for each item in itemset
do {
if(nodeToCheck.itemID == itemToLookFor) {
positionInItemset--;
// we found the itemset completely, so the subset check test is failed
if(positionInItemset <0) {
return true;
}
itemToLookFor = headWithP.get(positionInItemset);
}
nodeToCheck = nodeToCheck.parent;
}while(nodeToCheck != null);
}
return false;
}
@Override
/**
* Method for getting a string representation of the CP-tree
* (to be used for debugging purposes).
* @return a string
*/
public String toString() {
return "M"+root.toString("");
}
}