package ca.pfv.spmf.algorithms.frequentpatterns.fpgrowth;
/* This file is copyright (c) 2008-2015 Philippe Fournier-Viger
*
* This file is part of the SPMF DATA MINING SOFTWARE
* (http://www.philippe-fournier-viger.com/spmf).
*
* SPMF is free software: you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* SPMF is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. See the GNU General Public License for more details.
* You should have received a copy of the GNU General Public License along with
* SPMF. If not, see <http://www.gnu.org/licenses/>.
*/
import java.util.HashMap;
import java.util.Map;
import ca.pfv.spmf.patterns.itemset_array_integers_with_count.Itemset;
/**
* This is an implementation of a CFITree as used by the FPClose algorithm.
*
* @see CFINode
* @see Itemset
* @see AlgoFPClose
* @author Philippe Fournier-Viger
*/
public class CFITree {
// List of pairs (item, frequency) of the header table
Map<Integer, CFINode> mapItemNodes = new HashMap<Integer, CFINode>();
// Map that indicates the last node for each item using the node links
// key: item value: an fp tree node
Map<Integer, CFINode> mapItemLastNode = new HashMap<Integer, CFINode>();
// root of the tree
CFINode root = new CFINode(); // null node
// last added itemset
CFINode lastAddedItemsetNode = null;
/**
* Constructor
*/
public CFITree(){
}
/**
* Method to fix the node link for an item after inserting a new node.
* @param item the item of the new node
* @param newNode the new node thas has been inserted.
*/
private void fixNodeLinks(Integer item, CFINode newNode) {
// get the latest node in the tree with this item
CFINode lastNode = mapItemLastNode.get(item);
if(lastNode != null) {
// if not null, then we add the new node to the node link of the last node
lastNode.nodeLink = newNode;
}
// Finally, we set the new node as the last node
mapItemLastNode.put(item, newNode);
CFINode headernode = mapItemNodes.get(item);
if(headernode == null){ // there is not
mapItemNodes.put(item, newNode);
}
}
/**
* Add an itemset to the CFI-Tree
* @param itemset the itemset
* @param itemsetLength the length of the itemset
* @param support the support of the itemset
*/
public void addCFI(int[] itemset, int itemsetLength, int support) {
CFINode currentNode = root;
// For each item in the itemset
for(int i=0; i < itemsetLength; i++){
int item = itemset[i];
// look if there is a node already in the FP-Tree
CFINode child = currentNode.getChildWithID(item);
if(child == null){
// there is no node, we create a new one
CFINode newNode = new CFINode();
newNode.itemID = item;
newNode.parent = currentNode;
// remember at which level in the tree that node appears
newNode.level = i+1;
newNode.counter = support; // NEW BY PHILIPPE 2015
// we link the new node to its parrent
currentNode.childs.add(newNode);
// we take this node as the current node for the next for loop iteration
currentNode = newNode;
// We update the header table.
// We check if there is already a node with this id in the header table
fixNodeLinks(item, newNode);
}else{
// FPCLOSE:
// If there is a node already, we update it
// with the maximum of the support already in the path
// and the support of the current itemset
child.counter = Math.max(support, child.counter);
currentNode = child;
}
}
// SET THE SUPPORT OF THE CFI (the last item)
// currentNode.counter = support;
// remember that this is the last added itemset
lastAddedItemsetNode = currentNode;
}
/**
* Perform the subset test to see if an itemset is a subset of an already
* found CFI with the same support
* @param headWithP the itemset to be tested
* @param headWithPLength the last position to be considered in headWithP
* @param the support of the itemset headwithP
* @return true if the itemset is not a subset of an already found CFI.
*/
public boolean passSubsetChecking(int[] headWithP, int headWithPLength, int headWithPSupport) {
// OPTIMIZATION:
// We first check against the last added itemset
if(lastAddedItemsetNode != null) {
boolean isSubset = issASubsetOfPrefixPath(headWithP, headWithPLength, lastAddedItemsetNode, headWithPSupport);
// if the itemset is a subset of the last added itemset, we do not need to check further
if(isSubset) {
return false;
}
}
// Find the node list for the first item of the itemset
Integer firstITem = headWithP[headWithP.length-1];
// OTHERWISE, WE NEED TO COMPARE "headwithP" with all the patterns in the CFI-tree.
CFINode node = mapItemNodes.get(firstITem);
// if that last item is not yet in the CFI-tree, it means that "itemset" is not a subset
// of some itemset already in the tree
if(node == null) {
return true;
}
// we will loop over each node by following node links
do {
// for a node, we will check if "headwithP" is a subset of the path ending at node
boolean isSubset = issASubsetOfPrefixPath(headWithP, headWithPLength, node, headWithPSupport);
// if it is a subset, then "headWithP" is in the CFI-tree, we return false
if(isSubset) {
return false;
}
// go to the next itemset to test
node = node.nodeLink;
}while(node != null);
// the itemset is not in the CFI-TREE. Itemset passed the test!
return true;
}
/**
* Check if the itemset headwithP is contained in the path ending at "node" in the CFI-tree
* and have the same support
* @param headWithP the itemset headwithP
* @param headWithPLength the last position to be considered in headWithP
* @param node the node
* @param the support of the itemset headwithP
* @return true if "headwithP" is contained in the path ending at "node" in the CFI-Tree and has the same support.
* Otherwise, false.
*/
private boolean issASubsetOfPrefixPath(int[] headWithP, int headWithPLength,CFINode node, int support) {
// optimization proposed in the fpmax* paper: if there is less than itemset node in that branch,
// we don't need to check it
if(node.counter == support && node.level >= headWithPLength) {
// check if "itemset" is contained in the prefix path ending at "node"
// We will start comparing from the parent of "node" in the prefix path since
// the last item of itemset is "node".
CFINode nodeToCheck = node;
int positionInItemset = headWithP.length-1;
int itemToLookFor = headWithP[positionInItemset];
// for each item in itemset
do {
if(nodeToCheck.itemID == itemToLookFor) {
positionInItemset--;
// we found the itemset completely, so the subset check test is failed
if(positionInItemset < 0) {
return true;
}
itemToLookFor = headWithP[positionInItemset];
}
nodeToCheck = nodeToCheck.parent;
}while(nodeToCheck != null);
}
return false;
}
@Override
/**
* Method for getting a string representation of the CP-tree
* (to be used for debugging purposes).
* @return a string
*/
public String toString() {
return "M"+root.toString("");
}
}