/***********************************************************************
This file is part of KEEL-software, the Data Mining tool for regression,
classification, clustering, pattern mining and so on.
Copyright (C) 2004-2010
F. Herrera (herrera@decsai.ugr.es)
L. S�nchez (luciano@uniovi.es)
J. Alcal�-Fdez (jalcala@decsai.ugr.es)
S. Garc�a (sglopez@ujaen.es)
A. Fern�ndez (alberto.fernandez@ujaen.es)
J. Luengo (julianlm@decsai.ugr.es)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see http://www.gnu.org/licenses/
**********************************************************************/
/* ------------------------------------------------------------------------- */
/* */
/* TOTAL SUPPORT TREE BODE */
/* Frans Coenen */
/* */
/* Wednesday 2 July 2003 */
/* */
/* Department of Computer Science */
/* The University of Liverpool */
/* */
/* ------------------------------------------------------------------------- */
/**
* <p>
* @author Written by Frans Coenen (University of Liverpool) 09/01/2003
* @author Modified by Frans Coenen (University of Liverpool) 03/02/2005
* @author Modified by Nicola Flugy Papa (Politecnico di Milano) 24/03/2009
* @version 1.0
* @since JDK1.6
* </p>
*/
package keel.Algorithms.Subgroup_Discovery.SDMap.FPTree;
import keel.Algorithms.Subgroup_Discovery.SDMap.SDMap.myDataset;
public class FPtree extends TotalSupportTree {
/**
* <p>
* Implementation of Han's FP-growth ARM algorithm
* </p>
*/
/* ------ FIELDS ------ */
/** FP-tree node structure comprising a <TT>FPgrowthItemPrefixSubtreeNode</TT> in
which to store counts and a reference to a child branch. */
protected class FPtreeNode {
/** The FP tree node. */
private FPgrowthItemPrefixSubtreeNode node = null;
/** The reference to the child branch (levels in FP-tree branches are
stored as a arrays of <TT>FPtreeNode</TT> structures. */
private FPtreeNode[] childRefs = null;
/** Default constructor. */
protected FPtreeNode() {
}
/** Single argument constructor.
@param newNode the reference to a new node to be included in the
FP-tree.*/
protected FPtreeNode(FPgrowthItemPrefixSubtreeNode newNode) {
node = newNode;
}
}
/** Prefix subtree structure. <P> A set enumeration tree in which to store
itemsets together with support values. */
private class FPgrowthItemPrefixSubtreeNode {
/** The attribute identifier. */
private short itemName;
/** The support count. */
private int itemCount;
/** The backward link to the parent node in FP tree. */
private FPgrowthItemPrefixSubtreeNode parentRef = null;
/** The forward link to the next node in a linked list of nodes with
same attribute identifier starting with an element in the header table
(array). */
private FPgrowthItemPrefixSubtreeNode nodeLink = null;
/** Default constructor. */
private FPgrowthItemPrefixSubtreeNode() {
}
/** Three argument constructor.
@param name the itemset identifier.
@param support the support value for the itemset.
@param backRef the backward link to the parent node. */
private FPgrowthItemPrefixSubtreeNode(short name, int support,
FPgrowthItemPrefixSubtreeNode backRef) {
itemName = name;
itemCount = support;
parentRef = backRef;
}
}
/** Header table. <P> Array of these structures used to link into FP-tree.
All FP-tree nodes with the same identifier are linked together starting
from a node in a header table (made up of <TT>HeaderTasble</TT> structures).
It is this "cross" linking that gives the FP-tree its most significant
advantage. */
protected class FPgrowthHeaderTable {
/** The 1-itemset (attribute) identifier. */
protected short itemName;
/** The forward link to the next node in the link list of nodes. */
protected FPgrowthItemPrefixSubtreeNode nodeLink = null;
// Constructors
protected FPgrowthHeaderTable (short columnNum) {
itemName = columnNum;
}
}
/** Structure in which to store ancestor itemSets, i.e. nodes in an FP-tree
that preceed the nodes identified by following a trail of links from a
particular item in the header table. */
private class FPgrowthSupportedSets {
/** The itemSet label. */
private short[] itemSet = null;
/** The associated support value for the given itemset. */
private int support;
/** The reference to the next node in a linked list. */
private FPgrowthSupportedSets nodeLink = null;
/** Three argument constructor.
@param newitemSet the given itemSet label.
@param newSupport the associated support value for the given itemset.
@param newNodeLink the reference to the next node in a linked list. */
private FPgrowthSupportedSets(short[] newitemSet, int newSupport,
FPgrowthSupportedSets newNodeLink) {
itemSet = newitemSet;
support = newSupport;
nodeLink = newNodeLink;
}
}
/** Structure in which to store counts. */
private class FPgrowthColumnCounts {
/** The column/attribute ID number. */
private short columnNum;
/** The associated support value. */
private int support=0;
/** One argument constructor.
@param column the column/attribute ID number. */
private FPgrowthColumnCounts(int column) {
columnNum = (short) column;
}
/** Two argument constructor.
@param column the column/attribute ID number.
@param sup the associatec support value. */
private FPgrowthColumnCounts(int column, int sup) {
columnNum = (short) column;
support = sup;
}
}
// Data structures
/** Start reference for FP-tree. */
protected FPtreeNode rootNode = null;
/** Start reference for header table. */
protected FPgrowthHeaderTable[] headerTable;
/** Start reference for supportedSets linked list (temporary storage
only).*/
private static FPgrowthSupportedSets startTempSets = null;
// Other fields
/** Temporary storage for an index into an array of FP-tree nodes. </P>
Used when reassigning child reference arrays. */
private int tempIndex = 0;
/* ------ CONSTRUCTORS ------ */
/** Constructor to process dataset and parameters.
* @param ds The instance of the dataset for dealing with its records
* @param sup The user-specified minimum support for the mined association rules
* @param conf The user-specified minimum confidence for the mined association rules */
public FPtree(myDataset ds, double sup, double conf) {
super(ds, sup, conf);
// Initialise root node
rootNode = new FPtreeNode();
// Create header table
headerTable = new FPgrowthHeaderTable[numOneItemSets+1];
// Populate header table
for (int index=1;index<headerTable.length;index++) {
headerTable[index] = new FPgrowthHeaderTable((short) index);
}
}
/* ------ METHODS ------ */
/*-------------------------------------------------------------------*/
/* */
/* GENERATE FP-TREE */
/* */
/*-------------------------------------------------------------------*/
/* CREATE FP-TREE */
/** Top level method to commence the construction of the FP-Tree. */
public void createFPtree() {
//System.out.println("GENERATING FP-TREE\n------------------");
// Create header table
headerTable = new FPgrowthHeaderTable[numOneItemSets+1];
// Populate header table
for (int index=1;index<headerTable.length;index++) {
headerTable[index] = new FPgrowthHeaderTable((short) index);
}
// Process datatable, loop through data table (stored in data array)
// For each entry add the entry to the FP-tree.
for (int index=0;index<dataArray.length;index++) {
// Non null record (if initial data set has been reordered and
// pruned some records may be empty
if (dataArray[index] != null)
addToFPtree(rootNode,0,dataArray[index],1,headerTable);
}
}
/* ADD TO FP-TREE */
/** Searches through current list of child refs looking for given item set.
<P> If reference for current itemset found increments support count and
proceed down branch, otherwise adds to current level.
@param ref the current location in the FP-tree (<TT>rootNode</TT> at start).
@param place the current index in the given itemset.
@param itemSet the given itemset.
@param support the associated support value for the given itemset.
@param headerRef the link to the appropriate place in the header table. */
private void addToFPtree(FPtreeNode ref, int place, short[] itemSet,
int support, FPgrowthHeaderTable[] headerRef) {
if (place < itemSet.length) {
if (!addToFPtree1(ref,place,itemSet,support,headerRef))
addToFPtree2(ref,place,itemSet,support,headerRef);
}
}
/* ADD TO FP TREE 1 */
/** Searches through existing branch and if itemset found updates the
support count and returns true, otherwise return false.
@param ref the current FP-tree node reference.
@param place the current index in the given itemset.
@param itemSet the given itemset.
@param support the associated support value for the given itemset.
@param headerRef the link to the appropriate place in the header table.
@return true if given itemset exists in FP-tree, and false otherwise. */
private boolean addToFPtree1(FPtreeNode ref, int place, short[] itemSet,
int support, FPgrowthHeaderTable[] headerRef) {
// Loop
if (ref.childRefs != null) {
for (int index=0;index<ref.childRefs.length;index++) {
// If item is already in list of child refs
// increment count and proceed down branch.
if (itemSet[place] == ref.childRefs[index].node.itemName) {
ref.childRefs[index].node.itemCount =
ref.childRefs[index].node.itemCount + support;
numUpdates++;
addToFPtree(ref.childRefs[index],place+1,itemSet,support,
headerRef);
return(true);
}
// Child refs ordered lexicographically so break when passed
// point where item should be
if (itemSet[place] < ref.childRefs[index].node.itemName)
return(false);
}
}
// Default
return(false);
}
/* ADD TO FP TREE 2 */
/** Adds new node to FP-tree. <P> Adds first attribute in itemSet and then
rest of sequence.
@param ref the current FP-tree node reference.
@param place the current index in the given itemset.
@param itemSet the given itemset.
@param support the associated support value for the given itemset.
@param headerRef the link to the appropriate place in the header table. */
private void addToFPtree2(FPtreeNode ref, int place, short[] itemSet,
int support, FPgrowthHeaderTable[] headerRef) {
// Create new Item Prefix Subtree Node
FPgrowthItemPrefixSubtreeNode newPrefixNode = new
FPgrowthItemPrefixSubtreeNode(itemSet[place],support,ref.node);
// Create new FP tree node incorporating new Item Prefix Subtree Node
FPtreeNode newFPtreeNode = new FPtreeNode(newPrefixNode);
// Add link from header table
addRefToFPgrowthHeaderTable(itemSet[place],newPrefixNode,headerRef);
// Add into FP tree
ref.childRefs = reallocFPtreeChildRefs(ref.childRefs,newFPtreeNode);
// Proceed down branch with rest of itemSet
addRestOfitemSet(ref.childRefs[tempIndex],newPrefixNode,place+1,itemSet,
support,headerRef);
}
/* ADD REST OF ITEMSET */
/** Continues adding attributes in current itemset to FP-tree.
@param ref the current FP-tree node reference.
@param backRef the backwards link to the previous node.
@param place the current index in the given itemset.
@param itemSet the given itemset.
@param support the associated support value for the given itemset.
@param headerRef the link to the appropriate place in the header table. */
private void addRestOfitemSet(FPtreeNode ref, FPgrowthItemPrefixSubtreeNode backRef,
int place, short[] itemSet, int support,
FPgrowthHeaderTable[] headerRef) {
// Process if more items in item set.
if (place<itemSet.length) {
// Create new Item Prefix Subtree Node
FPgrowthItemPrefixSubtreeNode newPrefixNode = new
FPgrowthItemPrefixSubtreeNode(itemSet[place],support,backRef);
// Create new FP tree node incorporating new Item Prefix Subtree
// Node
FPtreeNode newFPtreeNode = new FPtreeNode(newPrefixNode);
// Add link from header table
addRefToFPgrowthHeaderTable(itemSet[place],newPrefixNode,headerRef);
ref.childRefs = reallocFPtreeChildRefs(ref.childRefs,newFPtreeNode);
// Add into FP tree
addRestOfitemSet(ref.childRefs[tempIndex],newPrefixNode,place+1,
itemSet,support,headerRef);
}
}
/* ADD REF TO HEADER TABLE */
/** Adds reference to new FP-tree node to header table moving old reference
so that it becomes a link from the new FP-tree node.
@param columnNumber the given attribute.
@param newNode the newly created FP-tree node.
@param headerRef the reference to the header table (array). */
private void addRefToFPgrowthHeaderTable(short columnNumber,
FPgrowthItemPrefixSubtreeNode newNode,
FPgrowthHeaderTable[] headerRef) {
FPgrowthItemPrefixSubtreeNode tempRef;
// Loop through header table
for (int index=1;index<headerRef.length;index++) {
// Found right attribute in table?
if (columnNumber == headerRef[index].itemName) {
tempRef = headerRef[index].nodeLink;
headerRef[index].nodeLink = newNode;
newNode.nodeLink = tempRef;
break;
}
}
}
/* ---------------------------------------------------------- */
/* */
/* FP-TREE MINING */
/* */
/* ---------------------------------------------------------- */
/* Methodology:
1) Step through header table from end to start (least common single
attribute to most common single attribute). For each item.
a) Count support by following node links and add to linked list of
supported sets.
b) Determine the "ancestor trails" connected to the nodes linked to the
current item in the header table.
c) Treat the list of ancestor itemSets as a new set of input data and
create a new header table based on the accumulated supported counts of
the single items in the ancestor itemSets
d) Prune the ancestor itemSets so as to remove unsupported items.
e) Repeat (1) with local header table and list of pruned ancestor itemSets
as input */
/* START MINING */
/** Top level "FP-growth method" to mine the FP tree. */
public void startMining() {
//System.out.println("Mining FP-tree");
startMining(headerTable,null);
// Generate ARs
generateARs();
}
/* START MINING */
/** Commences process of mining the FP tree. <P> Commence with the bottom
of the header table and work upwards. Working upwards from the bottom of
the header table if there is a link to an FP tree node :
<OL>
<LI> Count the support.
<LI> Build up itemSet sofar.
<LI> Add to supported sets.
<LI> Build a new FP tree: (i) create a new local root, (ii) create a
new local header table and (iii) populate with ancestors.
<LI> If new local FP tree is not empty repeat mining operation.
</OL>
Otherwise end.
@param tableRef the reference to the current location in the header table
(commencing with the last item).
@param itemSetSofar the label fot the current item sets as generated to
date (null at start). */
private void startMining(FPgrowthHeaderTable[] tableRef,
short[] itemSetSofar) {
int headerTableEnd = tableRef.length-1;
FPgrowthColumnCounts[] countArray = null;
FPgrowthHeaderTable[] localHeaderTable = null;
FPtreeNode localRoot;
int support;
short[] newCodeSofar;
// Loop through header table from end to start, item by item
for (int index=headerTableEnd;index>=1;index--) {
// Check for null link
if (tableRef[index].nodeLink != null) {
// process trail of links from header table element
startMining(tableRef[index].nodeLink,tableRef[index].itemName,
itemSetSofar);
}
}
}
/** Commence process of mining FP tree with respect to a single element in
the header table.
@param nodeLink the firsty link from the header table pointing to an FP-tree
node.
@param itemName the label associated with the element of interest in the
header table.
@param itemSetSofar the item set represented by the current FP-tree. */
protected void startMining(FPgrowthItemPrefixSubtreeNode nodeLink,
short itemName, short[] itemSetSofar) {
// Count support for current item in header table and store a
// T-tree data structure
int support = genSupHeadTabItem(nodeLink);
short[] newCodeSofar = realloc2(itemSetSofar,itemName);
addToTtree(newCodeSofar,support);
// Collect ancestor itemSets and store in linked list structure
startTempSets=null;
generateAncestorCodes(nodeLink);
// Process Ancestor itemSets
if (startTempSets != null) {
// Count singles in linked list
FPgrowthColumnCounts[] countArray = countFPgrowthSingles();
// Create and populate local header table
FPgrowthHeaderTable[] localHeaderTable =
createLocalHeaderTable(countArray);
if (localHeaderTable != null) {
// Prune ancestor itemSets
pruneAncestorCodes(countArray);
// Create new local root for local FP tree
FPtreeNode localRoot = generateLocalFPtree(localHeaderTable);
// Mine new FP tree
startMining(localHeaderTable,newCodeSofar);
}
}
}
/* ---------------------------------------------------------------------- */
/* */
/* PROCESS CURRENT HEADER TABLE */
/* */
/* ---------------------------------------------------------------------- */
/* GENERATE SUPPORT FOR HEADER TABLE SINGLE ITEM: */
/** Counts support for single attributes in header table by following node
links.
@param nodeLink the start link from the header table.
@return the support valye for the item set indicated by the header table. */
private int genSupHeadTabItem(FPgrowthItemPrefixSubtreeNode nodeLink) {
int counter = 0;
// Loop
while(nodeLink != null) {
counter = counter+nodeLink.itemCount;
numUpdates++;
nodeLink = nodeLink.nodeLink;
}
// Return
return(counter);
}
/* ---------------------------------------------------------------------- */
/* */
/* ANCESTOR CODES */
/* */
/* ---------------------------------------------------------------------- */
/* GENERATE ANCESTOR CODES */
/** Generates ancestor itemSets are made up of the parent nodes of a given
node. This method collects such itemSets and stores them in a linked list
pointed at by startTempSets.
@param ref the reference to the current node in the prefix tree containing
itemsets together with support values.*/
private void generateAncestorCodes(FPgrowthItemPrefixSubtreeNode ref) {
short[] ancestorCode = null;
int support;
// Loop
while(ref != null) {
support = ref.itemCount;
ancestorCode = getAncestorCode(ref.parentRef);
// Add to linked list with current support
if (ancestorCode != null) startTempSets =
new FPgrowthSupportedSets(ancestorCode,support,
startTempSets);
// Next ref
ref = ref.nodeLink;
}
}
/* GET ANCESTOR CODE */
/** Generate the ancestor itemSet from a given node.
@param ref the reference to the current node in the prefix tree containing
itemsets together with support values. */
private short[] getAncestorCode(FPgrowthItemPrefixSubtreeNode ref) {
short[] itemSet = null;
if (ref == null){
return(null);
}
// Else process
while (ref != null) {
itemSet = realloc2(itemSet,ref.itemName);
ref = ref.parentRef;
}
return itemSet;
}
/* PRUNE ANCESTOR CODES */
/** Removes elements in ancestor itemSets (pointed at by
<TT>startTempSets</TT>) which are not supported by referring to count
array (which contains all the current supported 1 itemsets).
@param countArray the array of <TT>FPgrowthColumnCounts</TT> structures
describing the single item sets (in terms of labels and associated
support), contained in a linked list of <TT>FPgrowthSupportedSets</TT>
which in turn describe the ancestor nodes in an FP-tree that preceed the
nodes identified by following a trail of links from a particular item in
the header table. */
private void pruneAncestorCodes(FPgrowthColumnCounts[] countArray) {
FPgrowthSupportedSets ref = startTempSets;
// Loop through linked list of ancestor paths
while(ref != null) {
for(int index=0;index<ref.itemSet.length;index++) {
if (countArray[ref.itemSet[index]].support < minSupport)
ref.itemSet = removeElementN(ref.itemSet,index);
}
ref = ref.nodeLink;
}
}
/* ---------------------------------------------------------------------- */
/* */
/* CREATE NEW HEADER TABLE FROM SINGLE ITEMS IN ANCESTOR CODES */
/* */
/* ---------------------------------------------------------------------- */
/* COUNT SINGLES */
/** Counts frequent 1 item sets in ancestor itemSets linked list and place
into an array.
@return array of <TT>FPgrowthColumnCounts</TT> structures describing the
single item sets (in terms of labels and associated support), contained in
a linked list of <TT>FPgrowthSupportedSets</TT> which in turn describe the
ancestor nodes in an FP-tree that preceed the nodes identified by following
a trail of links from a particular item in the header table. */
private FPgrowthColumnCounts[] countFPgrowthSingles() {
int index, place=0;
FPgrowthSupportedSets nodeLink = startTempSets; // Start of linked list
// Dimension array, assume all attributes present, then it will
// be possible to index in to the array.
FPgrowthColumnCounts[] countArray = new
FPgrowthColumnCounts[numOneItemSets+1];
// Initialise array
for (index=1;index<numOneItemSets+1;index++) countArray[index] =
new FPgrowthColumnCounts(index);
// Loop through linked list of ancestor itemSets
while (nodeLink != null) {
// Loop through itemSet
for (index=0;index<nodeLink.itemSet.length;index++) {
place = nodeLink.itemSet[index];
countArray[place].support = countArray[place].support +
nodeLink.support;
numUpdates++;
}
nodeLink = nodeLink.nodeLink;
}
// Return
return(countArray);
}
/* CREATE LOCAL HEADER TABLE */
/** Creates a local header table comprising those item that are supported
in the count array.
@param countArray the support for the 1 item sets.
@return a FPgrowth header table. */
private FPgrowthHeaderTable[]
createLocalHeaderTable(FPgrowthColumnCounts[] countArray) {
int index;
FPgrowthHeaderTable[] localHeaderTable;
localHeaderTable = localHeadTabUnordered(countArray);
// Order according single item support
//orderLocalHeaderTable(localHeaderTable,countArray);
// Return
return(localHeaderTable);
}
/* CREATE NEW LOCAL HEADER TABLE (UNORDERED) */
/** Creatwx a new local header table, but unorderd.
@param countArray the csupport for the 1 item sets.
@return a FPgrpwth header table. */
private FPgrowthHeaderTable[]
localHeadTabUnordered(FPgrowthColumnCounts[] countArray) {
int counter = 1;
// Loop through array and count supported one item sets
for (int index=1;index<countArray.length;index++) {
if (countArray[index].support >= minSupport) counter++;
}
// Build new Header Table array containing only supported items
if (counter == 1) return(null);
FPgrowthHeaderTable[] localHeaderTable =
new FPgrowthHeaderTable[counter];
// Populate header table
int place=1;
for (int index=1;index<countArray.length;index++) {
if (countArray[index].support >= minSupport) {
localHeaderTable[place] = new
FPgrowthHeaderTable((short) countArray[index].columnNum);
place++;
}
}
// Return
return(localHeaderTable);
}
/* ---------------------------------------------------------------------- */
/* */
/* GENERATE NEW FP-TREE */
/* */
/* ---------------------------------------------------------------------- */
/* GENERATE LOCAL FP-tree */
/** Generates a local FP tree
@param tableRef reference to start of header table containing links to
an FP-tree produced during the FP-tree generation process.
@rerurn reference to the start of the generated FP-tree*/
private FPtreeNode generateLocalFPtree(FPgrowthHeaderTable[] tableRef) {
FPgrowthSupportedSets ref = startTempSets;
FPtreeNode localRoot = new FPtreeNode();
// Loop
while(ref != null) {
// Add to conditional FP tree
if (ref.itemSet != null) addToFPtree(localRoot,0,ref.itemSet,
ref.support,tableRef);
ref = ref.nodeLink;
}
// Return
return(localRoot);
}
/* ---------------------------------------------------------- */
/* */
/* FP-TREE UTILITIES */
/* */
/* ---------------------------------------------------------- */
/* REALLOC 1 FP-TREE */
/** Resizes the given array of FP-tree nodes so that its length is
increased by one element and new element inserted.
@param oldArray the given array of FP-tree nodes.
@param newNode the given node to be added to the FP-tree
@return The revised array of FP-tree nodes. */
private FPtreeNode[] reallocFPtreeChildRefs(FPtreeNode[] oldArray,
FPtreeNode newNode) {
// No old array
if (oldArray == null) {
FPtreeNode[] newArray = {newNode};
tempIndex = 0;
return(newArray);
}
// Otherwise create new array with length one greater than old array
int oldArrayLength = oldArray.length;
FPtreeNode[] newArray = new FPtreeNode[oldArrayLength+1];
// Insert new node in correct lexicographic order.
for (int index1=0;index1 < oldArrayLength;index1++) {
if (newNode.node.itemName < oldArray[index1].node.itemName) {
newArray[index1] = newNode;
for (int index2=index1;index2<oldArrayLength;index2++)
newArray[index2+1] = oldArray[index2];
tempIndex = index1;
return(newArray);
}
newArray[index1] = oldArray[index1];
}
// Default
newArray[oldArrayLength] = newNode;
tempIndex = oldArrayLength;
return(newArray);
}
}