package ca.pfv.spmf.algorithms.frequentpatterns.fpgrowth_with_strings;
/* This file is copyright (c) 2008-2013 Philippe Fournier-Viger
*
* This file is part of the SPMF DATA MINING SOFTWARE
* (http://www.philippe-fournier-viger.com/spmf).
*
* SPMF is free software: you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* SPMF is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. See the GNU General Public License for more details.
* You should have received a copy of the GNU General Public License along with
* SPMF. If not, see <http://www.gnu.org/licenses/>.
*/
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* This is an implementation of a FPTree for the version of FPGrowth where
* items are represented by Strings rather than Integers.
*
*
* @see FPNode_Strings
* @see FPTree_Strings
* @see AlgoFPGrowth_Strings
* @author Philippe Fournier-Viger
*/
public class FPTree_Strings {
// List of items in the header table
List<String> headerList = null;
// List of pairs (item, frequency) of the header table
Map<String, FPNode_Strings> mapItemNodes = new HashMap<String, FPNode_Strings>();
// flag that indicate if the tree has more than one path
boolean hasMoreThanOnePath = false;
// Map that indicates the last node for each item using the node links
// key: item value: an fp tree node
Map<String, FPNode_Strings> mapItemLastNode = new HashMap<String, FPNode_Strings>();
// root of the tree
FPNode_Strings root = new FPNode_Strings(); // null node
/**
* Constructor
*/
FPTree_Strings(){
}
/**
* Method to fix the node link for an item after inserting a new node.
* @param item the item of the new node
* @param newNode the new node thas has been inserted.
*/
private void fixNodeLinks(String item, FPNode_Strings newNode) {
// get the latest node in the tree with this item
FPNode_Strings lastNode = mapItemLastNode.get(item);
if(lastNode != null) {
// if not null, then we add the new node to the node link of the last node
lastNode.nodeLink = newNode;
}
// Finally, we set the new node as the last node
mapItemLastNode.put(item, newNode);
FPNode_Strings headernode = mapItemNodes.get(item);
if(headernode == null){ // there is not
mapItemNodes.put(item, newNode);
}
}
/**
* Method for adding a transaction to the fp-tree (for the initial construction
* of the FP-Tree).
* @param transaction
*/
public void addTransaction(List<String> transaction) {
FPNode_Strings currentNode = root;
// For each item in the transaction
for(String item : transaction){
// look if there is a node already in the FP-Tree
FPNode_Strings child = currentNode.getChildWithID(item);
if(child == null){
// there is no node, we create a new one
FPNode_Strings newNode = new FPNode_Strings();
newNode.itemID = item;
newNode.parent = currentNode;
// we link the new node to its parrent
currentNode.childs.add(newNode);
// check if more than one path
if(!hasMoreThanOnePath && currentNode.childs.size() > 1) {
hasMoreThanOnePath = true;
}
// we take this node as the current node for the next for loop iteration
currentNode = newNode;
// We update the header table.
// We check if there is already a node with this id in the header table
fixNodeLinks(item, newNode);
}else{
// there is a node already, we update it
child.counter++;
currentNode = child;
}
}
}
/**
* Method for adding a prefixpath to a fp-tree.
* @param prefixPath The prefix path
* @param mapSupportBeta The frequencies of items in the prefixpaths
* @param relativeMinsupp
*/
void addPrefixPath(List<FPNode_Strings> prefixPath, Map<String, Integer> mapSupportBeta, int relativeMinsupp) {
// the first element of the prefix path contains the path support
int pathCount = prefixPath.get(0).counter;
FPNode_Strings currentNode = root;
// For each item in the transaction (in backward order)
// (and we ignore the first element of the prefix path)
for(int i= prefixPath.size()-1; i >=1; i--){
FPNode_Strings pathItem = prefixPath.get(i);
// if the item is not frequent we skip it
if(mapSupportBeta.get(pathItem.itemID) < relativeMinsupp){
continue;
}
// look if there is a node already in the FP-Tree
FPNode_Strings child = currentNode.getChildWithID(pathItem.itemID);
if(child == null){
// there is no node, we create a new one
FPNode_Strings newNode = new FPNode_Strings();
newNode.itemID = pathItem.itemID;
newNode.parent = currentNode;
newNode.counter = pathCount; // SPECIAL
currentNode.childs.add(newNode);
// check if more than one path
if(!hasMoreThanOnePath && currentNode.childs.size() > 1) {
hasMoreThanOnePath = true;
}
currentNode = newNode;
// We update the header table.
// We check if there is already a node with this id in the header table
fixNodeLinks(pathItem.itemID, newNode);
}else{
// there is a node already, we update it
child.counter += pathCount;
currentNode = child;
}
}
}
/**
* Mehod for creating the list of items in the header table, in descending order of frequency.
* @param mapSupport the frequencies of each item.
*/
void createHeaderList(final Map<String, Integer> mapSupport) {
// create an array to store the header list with
// all the items stored in the map received as paramete
headerList = new ArrayList<String>(mapItemNodes.keySet());
// sort the header table by decreasing order of support
Collections.sort(headerList, new Comparator<String>(){
public int compare(String id1, String id2){
// compare the support
int compare = mapSupport.get(id2) - mapSupport.get(id1);
// if the same support, we check the lexical ordering!
if(compare ==0){
return id1.compareTo(id2);
}
// otherwise use the support
return compare;
}
});
}
}