/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ /* -------------------------------------------------------------------------- */ /* */ /* P A R T I A L S U P P O R T T R E E */ /* */ /* Frans Coenen */ /* */ /* Wednesday 9 January 2003 */ /* (Revised 5/7/2003) */ /* */ /* Department of Computer Science */ /* The University of Liverpool */ /* */ /* -------------------------------------------------------------------------- */ /* Structure: AssocRuleMining | +-- TotalSupportTree | +-- PartialSupportTree */ /* Java packages */ package keel.Algorithms.Associative_Classification.ClassifierCMAR; import java.io.*; import java.util.*; import javax.swing.*; /** * Methods to implement the "Apriori-TFP" (Total From Partial) ARM algorithm using both the T-tree (Total support tree) and P-tree (Partial support tree data structures. * * @author Frans Coenen 5 July 2003 * @author Modified by Jesus Alcala (University of Granada) 09/02/2010 * @author Modified by Sergio Almecija (University of Granada) 23/05/2010 * @version 1.0 * @since JDK1.5 */ public class PartialSupportTree extends TotalSupportTree { /*------------------------------------------------------------------------*/ /* */ /* FIELDS */ /* */ /*------------------------------------------------------------------------*/ /* ------ NESTED CLASSES ------ */ /** Structurte to contain P-tree data in tabular form for improved computational efficiency when creating T-tree. <P> A 2-D array of these structures is created in which to store the Ptree. */ protected class PtreeRecord { /** Label for P-tree node. */ private short[] pTreeNodeLabel = null; /** Uunion of a pTree node label (<TT>pTreeNodeLabel</TT>) and all its ancestor node labels. */ private short[] pTreeItemSet = null; /** Partial support count. */ private int support = 0; /** Creaste P-tree record for inclusion in table. @param nodeLabel the label for P-tree node. @param itemSet the uunion of a pTree node label (<TT>nodeLabel</TT>) and all its ancestor node labels. @param sup the partial support count. */ private PtreeRecord(short[] nodeLabel, short[] itemSet, int sup) { pTreeNodeLabel = nodeLabel; pTreeItemSet = itemSet; support = sup; } } /* Array data structures for P tree */ /** Null array describing an "emty" itemset. */ private short[] zeroitemSet = null; /** Reference variable pointing to start of P-tree. */ private PtreeNodeTop[] startPtreeRef = null; /* ------ P-TREE DATA TABLE ----- */ /** Array of arrays data structures for P-tree table (used as a computational efficiency measure). */ protected PtreeRecord[][] startPtreeTable = null; /** Array for holding the number of P-tree nodes for each possible cardinality given the number of attributes, maximum is equal to number of columns. */ private int[] pTreeNodesOfCardinalityN = null; /** Array of "markers" used during the generation of the P-tree, and contains "current index" values for each level of cardinality. */ private int[] pTreeTableMarker = null; /* Other fields */ /** Number of node updates (used for diagnostic purposes). */ private int numberOfNodeUpdates = 0; /*---------------------------------------------------------------------*/ /* */ /* CONSTRUCTORS */ /* */ /*---------------------------------------------------------------------*/ /** Processes command line arguments. <P> The <TT>numOneItemSets</TT> is * incremented by 1 so that indexes match the column numbers * @param minConf double Minimum confidence threshold * @param minSup double Minimum support threshold * @param delta int Minimum coverage threshold */ public PartialSupportTree(double minConf, double minSup, int delta) { super(minConf, minSup, delta); } /*-------------------------------------------------------------------*/ /* */ /* TREE BUILDING METHODS */ /* */ /*-------------------------------------------------------------------*/ /* CREATE P-TREE */ /** Processes data set causing each row to be added to P-Tree. */ public void createPtree() { System.out.println("GENERATING P-TREE\n------------------"); // Dimension top line of P-tree startPtreeRef = new PtreeNodeTop[numOneItemSets+1]; // Dimension P-tree table startPtreeTable = new PtreeRecord[numOneItemSets+1][]; pTreeNodesOfCardinalityN = new int[numOneItemSets+1]; pTreeTableMarker = new int[numOneItemSets+1]; // Initilalise top level of Ptree with nulls for(int index=0; index<startPtreeRef.length; index++) startPtreeRef[index] = null; // Process input data, loop through input (stored in data array) // For each entry add the entry to the P-tree. for (int index=0; index<dataArray.length; index++) { if (dataArray[index] != null) addToPtreeTopLevel(dataArray[index]); } // Create P-tree table System.out.println("Creating P-tree table"); createPtreeTable(); } /* ADD TO P-TREE TOP LEVEL */ /** Commences process to add an itemset to the P-tree starting with the top level of the tree. <P> Note that the top level is an array. @param itemset the given item set. */ private void addToPtreeTopLevel(short[] itemSet) { int index = itemSet[0]; // Calculate index int itemSetLength = itemSet.length; // If single attibute itemSet create or update element, otherwise // create or update element and proceed down child branch (flag = 1) if (itemSetLength == 1) { // Top level if (startPtreeRef[index] == null) { startPtreeRef[index] = new PtreeNodeTop(); pTreeNodesOfCardinalityN[1]++; } else startPtreeRef[index].support = startPtreeRef[index].support+1; numberOfNodeUpdates++; } // itemSet length greater than 1 therefore proceed down rest of tree else { // If no top level node create one. if (startPtreeRef[index] == null) { startPtreeRef[index] = new PtreeNodeTop(); pTreeNodesOfCardinalityN[1]++; } else startPtreeRef[index].support = startPtreeRef[index].support+1; numberOfNodeUpdates++; // Descend from top level node addToPtree(0,2,itemSetLength,startPtreeRef[index].childRef, realloc3(itemSet),index,null); } } /* ADD TO P-TREE */ /** Inserts given itemset into P-tree. <P> Operates as follows: If found leaf node create new node and stop. Otherwise: <UL> <LI> code = 1, Increment support <LI> code = 2, Itemset before and subset of current node (parent) <LI> code = 3, Itemset before and not subset of current node (elder sibling) <LI> code = 4, Itemset after and superset of current node (child) <LI> code = 5, Itemset after and not superset of current node (younger sibling) </UL> <P>Codes generated by call the checkitemSet function. Arguments as follows: @param flag the type of branch currently under consideration: code 0 = root, 1 = child, 2 = sibling. @param parentLength the number of elements represented by the parent node of the current node. Used only when adding new "dummy" nodes to maintain count of number of nodes of a given size for when the Ptree table is generated. @param itemSetLength the number of elements in the current itemSet. Used only when adding new nodes to maintain count of number of nodes of a given size for when the Ptree table is generated. @param linkRef the reference (pointer) to current location in the P-tree. @param itemSet the row itemSet in the input file currently under consideration. @param topIndex the index of the element in the array marking the top level of the P-tree, used only when inserting new nodes hanging from this top level otherwise ignored. @param oldRef the reference (pointer) to the previous location in the P-tree, used when inserting new nodes. */ public void addToPtree(int flag, int parentLength, int itemSetLength, PtreeNode linkRef, short[] itemSet, int topIndex, PtreeNode oldRef) { // No child node hanging of previous level array therefore add new // node here. if (linkRef == null) { PtreeNode newRef = createPtreeNode(itemSet,itemSetLength); addSupport2(flag,newRef,topIndex,oldRef); } // Otherwise process tree else { switch (checkItemSets(itemSet,linkRef.itemSet)) { case 1: /* Rule 1: Same */ numberOfNodeUpdates++; linkRef.support++; break; case 2: /* Rule 2: Before and subset (parent) */ beforeAndSubset(flag,itemSetLength,linkRef,itemSet,topIndex, oldRef); break; case 3: /* Rule 3: Before and not subset (elder sibling) */ beforeAndNotSubset(flag,parentLength,itemSetLength,linkRef, itemSet,topIndex,oldRef); break; case 4: /* Rule 4: After and superset (child) */ afterAndSuperset(parentLength,itemSetLength,linkRef, itemSet); break; case 5: /* Rule 5: After and not superset (younger sibling) */ afterAndNotSuperset(flag,parentLength,itemSetLength,linkRef, itemSet,topIndex,oldRef); break; default: /* Default: Error */ } } } /* BEFORE AND SUBSET */ /** Adds new node into the P-tree on a parent/child link so that the new node is the parent of the existing child branch and the child of the previous "parent"; also checks if any siblings need to be "moved up". <P> Possibilities: <OL> <LI>Connect to top level node with no siblings moved up ({1 2 3} {1 2}) <LI>Connect to top level node with siblings moved up ({1 2 3} {1 4} {1 2}) <LI>Connect to child ref with no siblings moved up ({1 2 3 4} {1 2} {1 2 3}) <LI>Connect to child ref with siblings moved up ({1 2 3 4} {1 2 4 5} {1 2} {1 2 3}) <LI>connect to sibling ref with no siblings moved up ({1 2} {1 3 4} {1 3}) <LI>connect to sibling ref with siblings moved up ({1 2} {1 3 4} (1 4) {1 3}) </OL> @param flag the type of branch currently under consideration: code 0 = root, 1 = child, 2 = sibling. @param itemSetLength the number of elements in the current itemSet. Used only when adding new nodes to maintain count of number of nodes of a given size for when the Ptree table is generated. @param linkRef the reference (pointer) to current location in the P-tree. @param currentItemSet the row itemSet in the input file currently under consideration. @param topIndex the index of the element in the array marking the top level of the P-tree, used only when inserting new nodes hanging from this top level otherwise ignored. @param oldRef the reference (pointer) to the previous location in the P-tree, used when inserting new nodes.*/ private void beforeAndSubset(int flag, int itemSetLength, PtreeNode linkRef, short[] currentitemSet, int topIndex, PtreeNode oldRef) { // Create new node with support of current node added in; PtreeNode newRef = createPtreeNode(currentitemSet,itemSetLength); newRef.support = newRef.support+linkRef.support; numberOfNodeUpdates++; // Link in existing branch newRef.childRef = linkRef; // Connect new node into tree structure and adjust existing current // node itemSet so that it does not include the new parent node itemSet addSupport2(flag,newRef,topIndex,oldRef); linkRef.itemSet = realloc4(linkRef.itemSet,currentitemSet); // Check whether any siblings of the existing node need to be // "moved up" to become a sibling of the new node checkSiblingBranch(linkRef,currentitemSet,newRef); } /* BEFORE AND NOT SUBSET */ /** Insets node into P-tree where new itemset is an elder sibling of the "current" node. <P> First checks for leading substring with existing node; if found and leading substring is not same as current parent creates a new P-tree node for this substring and then adds in the new node. Possibilities: <OL> <LI>Connect to top level node with no common leading substring with current node ({1 3} {1 2}). <LI>Connect to top level node with common leading substring with current node and no siblings moved up ({1 2 4} {1 2 3}). <LI>Connect to top level node with common leading substring with current node and siblings moved up ({1 2 4} (1 3} {1 2 3}). <LI>Connect to child ref with no common leading substring with current node ({1 2} {1 2 4} {1 2 3}). <LI>Connect to child ref with common leading substring with current node and no siblings moved up ({1 2} {1 2 4 5 7} {1 2 4 5 6}). <LI>Connect to child ref with common leading substring with current node and siblings moved up ({1 2} {1 2 4 5 7} {1 3} {1 2 4 5 6}). <LI>Connect to sibling ref with no common leading substring with current node ({1 2} {1 4} {1 3}). <LI>Connect to sibling ref with common leading substring with current node and no siblings moved up ({1 2} {1 4 5 7} {1 4 5 6}). <LI>Connect to sibling ref with common leading substring with current node and siblings moved up ({1 2} {1 4 5 7} {1 6} {1 4 5 6}). </OL> @param flag the type of branch currently under consideration: code 0 = root, 1 = child, 2 = sibling. @param parentLength the number of elements represented by the parent node of the current node. Used only when adding new "dummy" nodes to maintain count of number of nodes of a given size for when the Ptree table is generated. @param itemSetLength the number of elements in the current itemSet. Used only when adding new nodes to maintain count of number of nodes of a given size for when the Ptree table is generated. @param linkRef the reference (pointer) to current location in the P-tree. @param currentItemSet the row itemSet in the input file currently under consideration. @param topIndex the index of the element in the array marking the top level of the P-tree, used only when inserting new nodes hanging from this top level otherwise ignored. @param oldRef the reference (pointer) to the previous location in the P-tree, used when inserting new nodes. */ private void beforeAndNotSubset(int flag, int parentLength, int itemSetLength, PtreeNode linkRef, short[] currentItemSet, int topIndex, PtreeNode oldRef) { // Find leading common ellements in row and sibling itemSets if any short[] subsetItemSet = checkForLeadingSubString(currentItemSet, linkRef.itemSet); // If leading common ellements exists create new node representing // common elements and add current itemSet as child of this new node. // Otherwise add new itemSet as elder sibling. if (subsetItemSet != null) { // Leading substring exists // Create new parent representing subset PtreeNode newParentRef = createPtreeNode(subsetItemSet, subsetItemSet.length+parentLength-1); // Add support made up of existing support + 1 for current row newParentRef.support = linkRef.support+1; // Insert new parent node into tree addSupport2(flag,newParentRef,topIndex,oldRef); // Remove leading substring from row itemSet currentItemSet = realloc4(currentItemSet,subsetItemSet); // Attach as child and add on sibling, newParentRef.childRef = createPtreeNode(currentItemSet, itemSetLength); newParentRef.childRef.siblingRef = linkRef; // Check whether any siblings need to be "moved up" checkSiblingBranch(newParentRef.childRef,subsetItemSet,newParentRef); } else { // Create new node PtreeNode newSiblingRef = createPtreeNode(currentItemSet,itemSetLength); // Attach existing node as younger sibling newSiblingRef.siblingRef = linkRef; // Insert into tree addSupport2(flag,newSiblingRef,topIndex,oldRef); } } /* AFTER AND SUPERSET */ /** Insets node into P-tree where new itemset is an child of the "current" node. <P> If no more child nodes add new node to "current" node as child, else carry on down the tree with flag set to 1 to indicate we are following a child branch. Possibilities: <OL> <LI> Add to top level node ({1 2}). <LI> Add to child ref ({1 2} {1 2 3}). </OL> @param parentLength the number of elements represented by the parent node of the current node. Used only when adding new "dummy" nodes to maintain count of number of nodes of a given size for when the Ptree table is generated. @param itemSetLength the number of elements in the current itemSet. Used only when adding new nodes to maintain count of number of nodes of a given size for when the Ptree table is generated. @param linkRef the reference (pointer) to current location in the P-tree. @param currentItemSet the row itemSet in the input file currently under consideration. */ private void afterAndSuperset(int parentLength, int itemSetLength, PtreeNode linkRef, short[] currentItemSet) { numberOfNodeUpdates++; linkRef.support = linkRef.support+1; // Increment support // End of child branch if (linkRef.childRef == null) { // Remove existing parent itemSet from currentItemSet PtreeNode newRef = createPtreeNode(realloc4(currentItemSet, linkRef.itemSet),itemSetLength); // Add to existing node as child linkRef.childRef = newRef; } // More children, remove existing current itemSet from row itemSet and // continue down child branch else addToPtree(1,parentLength+linkRef.itemSet.length,itemSetLength, linkRef.childRef,realloc4(currentItemSet, linkRef.itemSet),0,linkRef); } /* AFTER AND NOT SUPERSET */ /** Commeences process of inserting node into P-tree where new itemset is a younger sibling of the "current" node. <P> Possible actions: <OL> <LI> There are NO more sibling nodes (call <TT>afterAndNotSuperset1</TT>). <LI> There are more sibling nodes (call <TT>afterAndNotSuperset2</TT>). </OL> @param flag the type of branch currently under consideration: code 0 = root, 1 = child, 2 = sibling. @param parentLength the number of elements represented by the parent node of the current node. Used only when adding new "dummy" nodes to maintain count of number of nodes of a given size for when the Ptree table is generated. @param itemSetLength the number of elements in the current itemSet. Used only when adding new nodes to maintain count of number of nodes of a given size for when the Ptree table is generated. @param linkRef the reference (pointer) to current location in the P-tree. @param currentItemSet the row itemSet in the input file currently under consideration. @param topIndex the index of the element in the array marking the top level of the P-tree, used only when inserting new nodes hanging from this top level otherwise ignored. @param oldRef the reference (pointer) to the previous location in the P-tree, used when inserting new nodes. */ private void afterAndNotSuperset(int flag, int parentLength, int itemSetLength, PtreeNode linkRef, short[] currentItemSet, int topIndex, PtreeNode oldRef) { // Test if end of sibling branch, if not continue if (linkRef.siblingRef == null) afterAndNotSuperset1(flag,parentLength,itemSetLength, linkRef,currentItemSet,topIndex,oldRef); // Not end of sibling branch else afterAndNotSuperset2(flag,parentLength,itemSetLength,linkRef, currentItemSet,topIndex,oldRef); } /* AFTER AND NOT SUPERSET 1 */ /** Inserts node into P-tree where new itemset is a younger sibling of the "current" node and there are no more younger siblings on current existing node therefore new node added as sibling. <P> Also tests for leading substring. If found and this is not equal to an existing parent itemSet method causes a dummy node to represent the substring to be inserted (call to <TT>addSupport2</TT>). Possibilities: <OL> <LI>Add to siblingRef with no common leading substring ({1 2} {1 3}). <LI>Add to siblingRef with common leading substring ({1 2} {1 3 5} {1 3 4}). </OL> @param flag the type of branch currently under consideration: code 0 = root, 1 = child, 2 = sibling. @param parentLength the number of elements represented by the parent node of the current node. Used only when adding new "dummy" nodes to maintain count of number of nodes of a given size for when the Ptree table is generated. @param itemSetLength the number of elements in the current itemSet. Used only when adding new nodes to maintain count of number of nodes of a given size for when the Ptree table is generated. @param linkRef the reference (pointer) to current location in the P-tree. @param currentItemSet the row itemSet in the input file currently under consideration. @param topIndex the index of the element in the array marking the top level of the P-tree, used only when inserting new nodes hanging from this top level otherwise ignored. @param oldRef the reference (pointer) to the previous location in the P-tree, used when inserting new nodes. */ private void afterAndNotSuperset1(int flag, int parentLength, int itemSetLength, PtreeNode linkRef, short[] currentItemSet, int topIndex, PtreeNode oldRef) { // Find leading common ellements in row and sibling itemSets if any short[] subsetItemSet = checkForLeadingSubString(currentItemSet, linkRef.itemSet); // If leading common ellements exists create new node representing // common elements and add current itemSet as child of this new node. // Otherwise add new itemSet as elder sibling. if (subsetItemSet != null) { // Create new parent representing subset PtreeNode newParent = createPtreeNode(subsetItemSet, subsetItemSet.length+parentLength-1); // Add support made up of existing support + 1 for current row newParent.support = linkRef.support+1; // Insert new parent node into tree addSupport2(flag,newParent,topIndex,oldRef); // Remove leading substring from current existing node linkRef.itemSet = realloc4(linkRef.itemSet,subsetItemSet); // Attach existing branch as child of new parent and new node // as sibling newParent.childRef = linkRef; linkRef.siblingRef = createPtreeNode(realloc4(currentItemSet, subsetItemSet),itemSetLength); } // No leading substring else linkRef.siblingRef = createPtreeNode(currentItemSet, itemSetLength); } /* AFTER AND NOT SUPERSET 2 */ /** Inserts node into P-tree where new itemset is a younger sibling of the "current" node and there are more younger sibling on current existing node. <P> Possible actions: <OL> <LI> If there are more sibling nodes and the current row itemSet shares a leading substring with the current P-tree node and this is not equal to an existing parent itemSet then add a dummy node to represent the substring (call <TT>addSupport2</TT>). Then add new node. <LI> If more sibling nodes and no shared leading substring continue down sibling branch. </OL> Possibility: <OL> <LI>Add in dummy node ({1 2 3} {1 3 5} {1 6} {1 3 6}). </OL> @param flag the type of branch currently under consideration: code 0 = root, 1 = child, 2 = sibling. @param parentLength the number of elements represented by the parent node of the current node. Used only when adding new "dummy" nodes to maintain count of number of nodes of a given size for when the Ptree table is generated. @param itemSetLength the number of elements in the current itemSet. Used only when adding new nodes to maintain count of number of nodes of a given size for when the Ptree table is generated. @param linkRef the reference (pointer) to current location in the P-tree. @param currentItemSet the row itemSet in the input file currently under consideration. @param topIndex the index of the element in the array marking the top level of the P-tree, used only when inserting new nodes hanging from this top level otherwise ignored. @param oldRef the reference (pointer) to the previous location in the P-tree, used when inserting new nodes. */ private void afterAndNotSuperset2(int flag, int parentLength, int itemSetLength, PtreeNode linkRef, short[] currentItemSet, int topIndex, PtreeNode oldRef) { // Find leading common ellements in row and sibling itemSets if any short[] subsetItemSet = checkForLeadingSubString(currentItemSet, linkRef.itemSet); // If leading common ellements exists create new node representing // common elements and add current itemSet as child of this new node. // Otherwise add new itemSet as elder sibling. if (subsetItemSet != null) { // Create new parent representing leading common elements PtreeNode newParentRef = createPtreeNode(subsetItemSet, subsetItemSet.length+parentLength-1); // Add support made up of existing support + 1 for current row newParentRef.support = linkRef.support+1; // Insert new parent node into tree addSupport2(flag,newParentRef,topIndex,oldRef); // Remove leading substring from current existing node and add // as child of new parent node linkRef.itemSet = realloc4(linkRef.itemSet,subsetItemSet); newParentRef.childRef = linkRef; // Store reference to existing branch of current existing node // in temporary variable PtreeNode tempRef = linkRef.siblingRef; // Create new node representing row and add as sibling of // current existing ref (NOTE: leading sub string will be // removed when checking siblings (checkSiblingBranch) linkRef.siblingRef = createPtreeNode(currentItemSet,itemSetLength); // Now add in previous siblings linkRef.siblingRef.siblingRef = tempRef; // Check whether any siblings need to be "moved up" checkSiblingBranch(newParentRef.childRef,subsetItemSet,newParentRef); } // Otherwise carry on along sibling branch else addToPtree(2,parentLength,itemSetLength,linkRef.siblingRef, currentItemSet,0,linkRef); } /* ------ ADD SUPPORT 2 ------ */ /** Adds new node where "before and subset" or "after and not superset". <P> The flag argument indicates which type of branch is currently under consideration: 0 = root, 1 = child, 2 = sibling. @param flag the type of branch currently under consideration: code 0 = root, 1 = child, 2 = sibling. @param newRef the reference (pointer) to newly created parent indicating current laction in the P-tree. @param topIndex the index of the element in the array marking the top level of the P-tree, used only when inserting new nodes hanging from this top level otherwise ignored. @param oldRef the reference (pointer) to the previous location in the P-tree, used when inserting new nodes. */ private void addSupport2(int flag, PtreeNode newRef, int topIndex, PtreeNode oldRef) { // Add node switch (flag) { case 0: startPtreeRef[topIndex].childRef = newRef; break; case 1: oldRef.childRef = newRef; break; case 2: oldRef.siblingRef = newRef; break; default: System.out.println("ERROR: Unidentified flag in addSupport\n"); } } /* ------ CHECK SIBLING BRANCH ------ */ /** Checks sibling branch to determine whether the siblings are all supersets of the parent and readjusts P-tree accordingly. <P> Possibilities: <OL> <LI>Sibling branch is empty (do nothing). <LI>No nodes in sibling branch are supersets of parent there for move them all up to be siblings of the parent <LI>All nodes in sibling branch are supersets of parent, thus do nothing. <LI>Some nodes in sibling branch are supersets of parent, others are not; therefore move those that are not up to be siblings of the parent. </OL> Note: when a node is found that is not a superset of the parent we do not need to keep on checking. @param linkRef the reference (pointer) to the current node. @param parentItemSet the itemset label represented by the parent. @param newRef tghe reference (pointer) to the newly created parent node.*/ private void checkSiblingBranch(PtreeNode linkRef, short[] parentItemSet, PtreeNode newRef) { // Check if first node in sibling branch is a superset of parent // itemSet. If not move the entire branch up. if (linkRef.siblingRef != null) { if (! isSubset(parentItemSet,linkRef.siblingRef.itemSet)) { newRef.siblingRef = linkRef.siblingRef; linkRef.siblingRef = null; } // Check rest. Branch starts of with supersets of parent itemSet // (which are OK where they are), must find the point where this is // no longer the case, i.e. the part of the branch that needs to be // moved up (if any). else { // Remove leading substring linkRef.siblingRef.itemSet = realloc4(linkRef.siblingRef.itemSet, parentItemSet); // Set reference varibales PtreeNode markerRef = linkRef.siblingRef; PtreeNode localLinkRef = linkRef.siblingRef.siblingRef; while (localLinkRef != null) { if (! isSubset(parentItemSet,localLinkRef.itemSet)) { newRef.siblingRef = localLinkRef; markerRef.siblingRef = null; break; } else { localLinkRef.siblingRef.itemSet = realloc4(localLinkRef.siblingRef.itemSet, parentItemSet); markerRef = localLinkRef; localLinkRef = localLinkRef.siblingRef; } } } } } /* CREAT P-TREE NODE */ /** Creates a P-tree node (other than a top level node). @param newItemSet the itemset to be stored at the node. @param level the cardinality (length) of the item set to be stored in the node. */ private PtreeNode createPtreeNode(short[] itemSet, int level) { pTreeNodesOfCardinalityN[level]++; return(new PtreeNode(itemSet)); } /* GET START OF P-TREE */ /** Gets reference to start of P-tree. * @return PtreeNodeTop[] Reference to start of P-tree. */ public PtreeNodeTop[] getStartOfPtree() { return(startPtreeRef); } /* GET NUMBER P-TREE NODES */ /** Gets number of nodes in P-tree. * @return int Number of nodes in P-tree. */ public int getNumPtreeNodes() { return(calculateNumNodes(startPtreeRef)); } /*----------------------------------------------------------------------- */ /* */ /* P-TREE TABLE */ /* */ /*----------------------------------------------------------------------- */ /* CREATE P-TREE TABLE */ /** Creates P-tree table starting with top level in P-tree. <P> Proceed as follows. <OL> <LI>Create an array of arrays. <LI>Add top level of Ptree. <LI>Add remaining nodes in sub-branches. </OL> */ public void createPtreeTable() { // Set up array of arrays for (int index=1;index<pTreeNodesOfCardinalityN.length;index++) { // There may be no itemSets in the Ptree of a particular size if (pTreeNodesOfCardinalityN[index] == 0) startPtreeTable[index] = null; else startPtreeTable[index] = new PtreeRecord[pTreeNodesOfCardinalityN[index]]; } // Process Ptree for (int index=0;index < startPtreeRef.length;index++) { // Check if valid node (non-null) if (startPtreeRef[index] != null) { // Create a label short[] itemSet = new short[1]; itemSet[0] = (short) index; // Add to P-tree table addToPtreeArray(null,itemSet,startPtreeRef[index].support,1); // Process child branch createPtreeTable2(startPtreeRef[index].childRef,itemSet,1); startPtreeRef[index] = null; } } } /* CREATE P-TREE TABLE 2 */ /** Process child branch hanging from a top level P-tree node. @param linkPtreeRef the reference/pointer to the current location in the P-tree. @param totalpTreeItemSet the union of all the parent labels sofar. @param currentLevel the current levl in the P-tree, initially set to 1. */ private void createPtreeTable2(PtreeNode linkPtreeRef, short[] totalpTreeItemSet, int currentLevel) { if (linkPtreeRef != null) { // Not referencing null node // Calculate level represented by node int lastElementIndex = linkPtreeRef.itemSet.length-1; int level = currentLevel+lastElementIndex+1; // OAdd to Ptree table addToPtreeArray(linkPtreeRef.itemSet,totalpTreeItemSet, linkPtreeRef.support,level); // Search through child branch createPtreeTable2(linkPtreeRef.childRef, append(totalpTreeItemSet, linkPtreeRef.itemSet),level); linkPtreeRef.childRef = null; // Search through sibling branch createPtreeTable2(linkPtreeRef.siblingRef,totalpTreeItemSet, currentLevel); linkPtreeRef.siblingRef= null; } } /* ADD TO P-TREE ARRAY */ /** Adds data associated with a P-tree node to the P-tree 2-D table. @param pTreeNodeLabel the union of all the parent labels. @param pTreeItemSet the node label. @param support the support associated with the node @param level the current levl in the P-tree. */ private void addToPtreeArray(short[] pTreeNodeLabel, short[] pTreeItemSet, int support, int level) { if (pTreeNodeLabel == null) { startPtreeTable[level][pTreeTableMarker[level]] = new PtreeRecord(pTreeItemSet,pTreeItemSet,support); } else startPtreeTable[level][pTreeTableMarker[level]] = new PtreeRecord(pTreeNodeLabel,append(pTreeItemSet, pTreeNodeLabel),support); pTreeTableMarker[level]++; } /*----------------------------------------------------------------------- */ /* */ /* T-TREE BUILDING METHODS */ /* */ /*----------------------------------------------------------------------- */ /* CREATE TOTAL SUPPORT TREE */ /** Commences process of generating a total support tree (T-tree) from a P-tree. */ public void createTotalSupportTree() { System.out.println("APRIORI-TFP WITH X-CHECKING\n" + "---------------------------"); System.out.println("Minimum support threshold = " + twoDecPlaces(support) + "% " + "(" + twoDecPlaces(minSupport) + " (records)"); // If no data (possibly as a result of an order and pruning operation) // return if (numOneItemSets==0) return; // Create Top level of T-tree (First pass of dataset) startTtreeRef=null; numFrequentsets = 0; createTtreeTopLevel(); // Generate level 2 generateLevel2(); // Further passes of the dataset createTtreeLevelN(); } /* Set of methods for creating T-tree from P-tree which overide methods of smae name in parent class. */ /* GENERATE T-TREE TOP LEVEL 2 */ /** Commences process to generate top level (singletons) of Ttree by looping through table level by level (row by row). */ protected void createTtreeTopLevel2() { // Step through Ptree table for(int index=1;index<startPtreeTable.length;index++) { // Check if level exists if (startPtreeTable[index] != null) { createTtreeTopLevel3(startPtreeTable[index]); } } // Destroy top level of P-tree table startPtreeTable[1] = null; } /** Processes level (row) in P-tree table to generate top level of T-tree. @param pTreeTableLevel the given level (row) of P-tree table records. */ protected void createTtreeTopLevel3(PtreeRecord[] pTreeTableLevel) { // Loop through level in P-tree table level for(int index=0;index<pTreeTableLevel.length;index++) { createTtreeTopLevel4(pTreeTableLevel[index].pTreeNodeLabel, pTreeTableLevel[index].support); } } /** Increments support counts in T-tree top level given a P-tree table label and an associated support value. @param pTreeNodeLabel the label associated with a P-tree node (not the union of its parent labels). ¶m pTreeNodeSupport the associated support value.*/ private void createTtreeTopLevel4(short[] pTreeNodeLabel, int pTreeNodeSupport) { // Loop through node label for (int index=0;index<pTreeNodeLabel.length;index++) { // Increment support for T-tree singleton node startTtreeRef[pTreeNodeLabel[index]].support = startTtreeRef[pTreeNodeLabel[index]].support+pTreeNodeSupport; numUpdates++; } } /* CREATE T-TREE LEVEL N */ /** Commences process of adding support values to further levels of the T-tree (not the top level). */ protected void createTtreeLevelN() { int nextLevel=2; // Loop while a further level exists while (nextLevelExists) { // Add support addSupportToTtreeLevelN(nextLevel); // Destroy current level of P-tree table startPtreeTable[nextLevel] = null; // Prune unsupported candidate sets pruneLevelN(startTtreeRef,nextLevel); // Attempt to generate next level nextLevelExists=false; generateLevelN(startTtreeRef,nextLevel,null); nextLevel++; } //End System.out.println("Levels in T-tree = " + nextLevel); } /* ADD SUPPORT VALUES TO T-TREE LEVEL N */ /** Continues process of adding support alues to further levels of the T-tree (not the top level) by stepping through the Ptree table from the current required level upto the maximum level that may be contained in the table. @param level the (start) current level. */ protected void addSupportToTtreeLevelN(int level) { // Nested loop to step through P-tree table for (int index1=level;index1<startPtreeTable.length;index1++) { // Check that there are records in the table at current level if (startPtreeTable[index1] != null) { // step through records at currentd level in loop for(int index2=0;index2<pTreeNodesOfCardinalityN[index1]; index2++) { addSupportToTtreeLevelN(startTtreeRef,level, startPtreeTable[index1][index2].pTreeNodeLabel, startPtreeTable[index1][index2].pTreeItemSet, startPtreeTable[index1][index2].support); } } } } /* ADD SUPPORT VALUES TO T-TREE LEVEL N */ /** Adds support to to appropriate nodes in T-tree at a given level and given a record from the P-tree table. @param linkRef the reference (pointer) to the current branch in the T-tree (top at start) @param level the desired level in T-tree @param pTreeNodeLabel the actual P-tree node itemSet label (not the union of its parent labels). @param pTreeItemSet the Uunion of the pTreeNodeLabel and all parent node itemSets of the current node. @param support the upport count */ private void addSupportToTtreeLevelN(TtreeNode[] linkRef, int level, short[] pTreeNodeLabel, short[] pTreeItemSet, int support) { int index; int tTreeLength = linkRef.length; // At right leve; if (level == 1) { // Step through P-tree table itemSet for (index=0;index < pTreeItemSet.length;index++) { // Check that index is within Ttree array if (pTreeItemSet[index] >= tTreeLength) break; // If valid node, i.e. index is within Ttree array therfore // node subsets are supported elsewhere, update if (linkRef[pTreeItemSet[index]] != null) { linkRef[pTreeItemSet[index]].support = linkRef[pTreeItemSet[index]].support + support; numUpdates++; } } } // At wrong level else { // Step through search itemSet int scLength = pTreeNodeLabel.length; for (index=0;index < scLength;index++) { //for (index=0;index < scLength;index++) { // Check that index is within Ttree array if (pTreeNodeLabel[index] >= tTreeLength) break; // If there is a node with a child reference follow that // reference if (linkRef[pTreeNodeLabel[index]] != null) { if (linkRef[pTreeNodeLabel[index]].childRef != null) addSupportToTtreeLevelN(linkRef[pTreeNodeLabel[index]].childRef, level-1,pTreeItemSet,pTreeItemSet,support); } } } } /*----------------------------------------------------------------------- */ /* */ /* PUBLIC OUTPUT METHODS */ /* */ /*----------------------------------------------------------------------- */ /* Fout types of output: 1) Output P-tree 2) Output P-tree Statistics 3) Output P-tree Table 4) Output P-tree Table statistics */ /* ---------------- */ /* 1. OUTPUT P TREE */ /* ---------------- */ /** Commences process to output P-tree. */ public void outputPtree() { System.out.println(); outputPtree1(startPtreeRef); } /** Continues process to output P-tree. @param linkPtreeRef the reference to the start of the P-tree. */ public void outputPtree1(PtreeNodeTop[] linkPtreeRef) { String newNode; int counter = 1; // Start by processing top level for (int index=0;index<linkPtreeRef.length;index++) { if (linkPtreeRef[index] != null) { outputPtree2(index,linkPtreeRef[index],counter); counter++; } } } /** Outputs top-level node of P-tree. @param index the current index in the top-level (array) of tghe P-tree. @param linkRef the reference to the P-tree top level node in question. @param counter the node counter (not necesserily the same as the index if some nodes are absent). */ private void outputPtree2(int index, PtreeNodeTop linkRef, int counter) { String newNode = Integer.toString(counter); // Outputnode number and support System.out.print("(" + newNode + ")"); short[] itemSet = new short[1]; itemSet[0] = (short) index; outputItemSet(itemSet); System.out.println("support = " + linkRef.support); // Continue outputPtree3(linkRef.childRef,newNode,1); } /** Outputs remainder of P-tree (not the top level). @param linkRef the reference to the current location in the P-tree. @param node the identifier for the current node (for output purposes only). @param counter the node counter (used to generate a new node identifier). */ private void outputPtree3(PtreeNode linkRef, String node, int counter) { String newNode; if (linkRef != null) { // Outputnode number if (node == "start") newNode = Integer.toString(counter); else { newNode = node.concat("."); newNode = newNode.concat(Integer.toString(counter)); } System.out.print("(" + newNode + ")"); outputItemSet(linkRef.itemSet); System.out.println("support = " + linkRef.support); // Continue outputPtree3(linkRef.childRef,newNode,1); counter++; outputPtree3(linkRef.siblingRef,node,counter); } } /* ---------------------- */ /* 2. OUTPUT P TREE STATS */ /* ---------------------- */ /** Commences the process of outputting P-tree statistics (for diagnostic purposes): (a) Storage, (b) Number of nodes on P-tree, (c) number of partial support increments (updates) and (d) generation time. */ public void outputPtreeStats() { System.out.println("P-TREE STATISTICS\n-----------------"); System.out.println(calculateStorage(startPtreeRef) + " (Bytes) storage"); System.out.println(calculateNumNodes(startPtreeRef) + " nodess"); System.out.println(numberOfNodeUpdates + " support value increments"); } /* OUTPUT P TREE STORAGE: */ /** Outputs P-tree storgae requirements in Bytes. */ public void outputPtreeStorage() { int storage; storage = calculateStorage(startPtreeRef); System.out.println("P-tree storage = " + storage + " (Bytes)"); } /** Commences process to calculate P-tree storage requirements. @param linkPtreeRef the reference to the current portion of the P-tree. @return total required storage in bytes. */ private int calculateStorage(PtreeNodeTop[] linkPtreeRef) { int storage = 4; // For start reference // Start by processing top level for (int index=1;index<linkPtreeRef.length;index++) { if (linkPtreeRef[index] != null) storage = calculateStorage(storage, linkPtreeRef[index]); storage = storage+4; } // Return return(storage); } /** Commences process to calculates storage requirements for a branch of the P-tree eminating from the top level. @param storage the required storage sofar. @param linkref the reference to the start of the branch. @return total required storage in bytes for branch of P-tree. */ private int calculateStorage(int storage, PtreeNodeTop linkRef) { storage = storage+8; // For top level node // Continue return(calculateStorage(storage,linkRef.childRef)); } /** Calculates recursivly the storage requirements for a sub-branch of the P-tree. @param storage the required storage sofar. @param linkref the reference to current location in the P-tree branch. @return total required storage in bytes for sub-branch of P-tree. */ private int calculateStorage(int storage, PtreeNode linkRef) { if (linkRef != null) { // 4 each foe childRef, siblingRef and support count storage = storage+12+(linkRef.itemSet.length*2); // Continue storage = calculateStorage(storage,linkRef.childRef); storage = calculateStorage(storage,linkRef.siblingRef); } // Return return(storage); } /* OUTPUT NUMBER OF P TREE NODES: */ /** Outputs total number of P-tree nodes (and the number of support value increments). */ public void outputNumNodes() { int num = 0; num = calculateNumNodes(startPtreeRef); System.out.println("Number of P-tree nodes = " + num); System.out.println("Number of P-tree support value increments = " + numberOfNodeUpdates); } /** Commence process of determining total number of nodes in P-tree. @param linkPtreeRef the reference to the start of the P-tree. @return total number of nodes. */ private int calculateNumNodes(PtreeNodeTop[] linkPtreeRef) { int num = 0; // For start reference // Start by processing top level for (int index=1;index<linkPtreeRef.length;index++) { if (linkPtreeRef[index] != null) num = 1 + calculateNumNodes(num, linkPtreeRef[index].childRef); } // Return return(num); } /** Commence process of determining total number of nodes in (sub-) branch of P-tree. @param linkPtreeRef the reference to the current location in the P-tree. @param the node count so far @return total number of nodes. */ private int calculateNumNodes(int num, PtreeNode linkRef) { if (linkRef != null) { num++; // Continue num = calculateNumNodes(num,linkRef.childRef); num = calculateNumNodes(num,linkRef.siblingRef); } // Return return(num); } /* ---------------------- */ /* 3. OUTPUT P-TREE TABLE */ /* ---------------------- */ /** Outputs P-tree table. */ public void outputPtreeTable() { int index1,index2; System.out.println("P-ree Nodes of cardinality [N]: "); for (index1=1;index1<pTreeNodesOfCardinalityN.length;index1++) System.out.println("[" + index1 + "] " + pTreeNodesOfCardinalityN[index1]); System.out.println("Marker values on completion of P-tree table " + "generation: "); for (index1=1;index1<pTreeTableMarker.length;index1++) System.out.println("[" + index1 + "] " + pTreeTableMarker[index1]); // Step through Ptree table for(index1=1;index1<startPtreeTable.length;index1++) { System.out.println("LEVEL = " + index1); if (startPtreeTable[index1] == null) System.out.print("null"); else { for(index2=0;index2<pTreeTableMarker[index1];index2++) { System.out.print("Node label = "); outputItemSet(startPtreeTable[index1][index2].pTreeNodeLabel); System.out.print(" Itemset = "); outputItemSet(startPtreeTable[index1][index2].pTreeItemSet); System.out.println(" sup = " + startPtreeTable[index1][index2].support); } } System.out.println(); } } /* ---------------------------- */ /* 4. OUTPUT P-TREE TABLE STATS */ /* ---------------------------- */ /** Outputs storage requirements for P-tree table. */ public void outputPtreeTableStats() { int index1,index2; int storage=0, nodeCounter, nodeTotal=0; // For each level in the P-tree table for(index1=1;index1<startPtreeTable.length;index1++) { nodeCounter=0; // If null we still need a pointer so 4 bytes if (startPtreeTable[index1] == null) storage = storage+4; // Else step through records at this level else { for(index2=0;index2<pTreeTableMarker[index1];index2++) { nodeCounter++; // 4 for support and 2 for each item in search itemSet storage = storage + 4 + (startPtreeTable[index1][index2].pTreeNodeLabel.length*2); // 2 for each item in pTree itemSet storage = storage + (startPtreeTable[index1][index2].pTreeItemSet.length*2); } } nodeTotal = nodeTotal+nodeCounter; } System.out.println("P-tree Table Storage = " + storage + " (" + nodeTotal + " nodes)"); } }