/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.mahout.fpm.pfpgrowth.fpgrowth; import java.util.Arrays; import java.util.Collection; import java.util.TreeSet; /** * The Frequent Pattern Tree datastructure used for mining patterns using * {@link FPGrowth} algorithm * */ public class FPTree { public static final int ROOTNODEID = 0; private static final int DEFAULT_CHILDREN_INITIAL_SIZE = 2; private static final int DEFAULT_HEADER_TABLE_INITIAL_SIZE = 4; private static final int DEFAULT_INITIAL_SIZE = 8; private static final float GROWTH_RATE = 1.5f; private static final int HEADERTABLEBLOCKSIZE = 2; private static final int HT_LAST = 1; private static final int HT_NEXT = 0; private int[] attribute; private int[] childCount; private int[] conditional; private long[] headerTableAttributeCount; private int[] headerTableAttributes; private int headerTableCount; private int[] headerTableLookup; private int[][] headerTableProperties; private int[] next; private int[][] nodeChildren; private long[] nodeCount; private int nodes; private int[] parent; private boolean singlePath; private final Collection<Integer> sortedSet = new TreeSet<Integer>(); public FPTree() { this(DEFAULT_INITIAL_SIZE); } public FPTree(int size) { if (size < DEFAULT_INITIAL_SIZE) { size = DEFAULT_INITIAL_SIZE; } parent = new int[size]; next = new int[size]; childCount = new int[size]; attribute = new int[size]; nodeCount = new long[size]; nodeChildren = new int[size][]; conditional = new int[size]; headerTableAttributes = new int[DEFAULT_HEADER_TABLE_INITIAL_SIZE]; headerTableAttributeCount = new long[DEFAULT_HEADER_TABLE_INITIAL_SIZE]; headerTableLookup = new int[DEFAULT_HEADER_TABLE_INITIAL_SIZE]; Arrays.fill(headerTableLookup, -1); headerTableProperties = new int[DEFAULT_HEADER_TABLE_INITIAL_SIZE][]; singlePath = true; createRootNode(); } public final void addChild(int parentNodeId, int childnodeId) { int length = childCount[parentNodeId]; if (length >= nodeChildren[parentNodeId].length) { resizeChildren(parentNodeId); } nodeChildren[parentNodeId][length++] = childnodeId; childCount[parentNodeId] = length; if (length > 1 && singlePath) { singlePath = false; } } public final boolean addCount(int nodeId, long count) { if (nodeId < nodes) { this.nodeCount[nodeId] += count; return true; } return false; } public final void addHeaderCount(int attributeValue, long count) { int index = getHeaderIndex(attributeValue); headerTableAttributeCount[index] += count; } public final void addHeaderNext(int attributeValue, int nodeId) { int index = getHeaderIndex(attributeValue); if (headerTableProperties[index][HT_NEXT] == -1) { headerTableProperties[index][HT_NEXT] = nodeId; headerTableProperties[index][HT_LAST] = nodeId; } else { setNext(headerTableProperties[index][HT_LAST], nodeId); headerTableProperties[index][HT_LAST] = nodeId; } } public final int attribute(int nodeId) { return this.attribute[nodeId]; } public final int childAtIndex(int nodeId, int index) { if (childCount[nodeId] < index) { return -1; } return nodeChildren[nodeId][index]; } public final int childCount(int nodeId) { return childCount[nodeId]; } public final int childWithAttribute(int nodeId, int childAttribute) { int length = childCount[nodeId]; for (int i = 0; i < length; i++) { if (attribute[nodeChildren[nodeId][i]] == childAttribute) { return nodeChildren[nodeId][i]; } } return -1; } public final void clear() { nodes = 0; headerTableCount = 0; singlePath = true; Arrays.fill(headerTableLookup, -1); sortedSet.clear(); createRootNode(); } public final void clearConditional() { for (int i = nodes - 1; i >= 0; i--) { conditional[i] = 0; } } public final int conditional(int nodeId) { return this.conditional[nodeId]; } public final long count(int nodeId) { return nodeCount[nodeId]; } public final int createConditionalNode(int attributeValue, long count) { if (nodes >= this.attribute.length) { resize(); } childCount[nodes] = 0; next[nodes] = -1; parent[nodes] = -1; conditional[nodes] = 0; this.attribute[nodes] = attributeValue; nodeCount[nodes] = count; if (nodeChildren[nodes] == null) { nodeChildren[nodes] = new int[DEFAULT_CHILDREN_INITIAL_SIZE]; } return nodes++; } public final int createNode(int parentNodeId, int attributeValue, long count) { if (nodes >= this.attribute.length) { resize(); } childCount[nodes] = 0; next[nodes] = -1; parent[nodes] = parentNodeId; this.attribute[nodes] = attributeValue; nodeCount[nodes] = count; conditional[nodes] = 0; if (nodeChildren[nodes] == null) { nodeChildren[nodes] = new int[DEFAULT_CHILDREN_INITIAL_SIZE]; } int childNodeId = nodes++; addChild(parentNodeId, childNodeId); addHeaderNext(attributeValue, childNodeId); return childNodeId; } public final int createRootNode() { childCount[nodes] = 0; next[nodes] = -1; parent[nodes] = 0; attribute[nodes] = -1; nodeCount[nodes] = 0; if (nodeChildren[nodes] == null) { nodeChildren[nodes] = new int[DEFAULT_CHILDREN_INITIAL_SIZE]; } return nodes++; } public final int getAttributeAtIndex(int index) { return headerTableAttributes[index]; } public final int getHeaderNext(int attributeValue) { int index = getHeaderIndex(attributeValue); return headerTableProperties[index][HT_NEXT]; } public final long getHeaderSupportCount(int attributeValue) { int index = getHeaderIndex(attributeValue); return headerTableAttributeCount[index]; } public final int[] getHeaderTableAttributes() { int[] attributes = new int[headerTableCount]; System.arraycopy(headerTableAttributes, 0, attributes, 0, headerTableCount); return attributes; } public final int getHeaderTableCount() { return headerTableCount; } public final boolean isEmpty() { return nodes <= 1; } public final int next(int nodeId) { return next[nodeId]; } public final int parent(int nodeId) { return parent[nodeId]; } public final void removeHeaderNext(int attributeValue) { int index = getHeaderIndex(attributeValue); headerTableProperties[index][HT_NEXT] = -1; } public final void reorderHeaderTable() { // Arrays.sort(headerTableAttributes, 0, headerTableCount); int i = 0; for (int attr : sortedSet) { headerTableAttributes[i++] = attr; } } public void replaceChild(int parentNodeId, int replacableNode, int childnodeId) { int max = childCount[parentNodeId]; for (int i = 0; i < max; i++) { if (nodeChildren[parentNodeId][i] == replacableNode) { nodeChildren[parentNodeId][i] = childnodeId; parent[childnodeId] = parentNodeId; } } } public final boolean setConditional(int nodeId, int conditionalNode) { if (nodeId < nodes) { this.conditional[nodeId] = conditionalNode; return true; } return false; } public final boolean setNext(int nodeId, int nextNode) { if (nodeId < nodes) { this.next[nodeId] = nextNode; return true; } return false; } public final boolean setParent(int nodeId, int parentNode) { if (nodeId < nodes) { this.parent[nodeId] = parentNode; int length = childCount[parentNode]; if (length >= nodeChildren[parentNode].length) { resizeChildren(parentNode); } nodeChildren[parentNode][length++] = nodeId; childCount[parentNode] = length; return true; } return false; } public final void setSinglePath(boolean bit) { singlePath = bit; } public final boolean singlePath() { return singlePath; } private int getHeaderIndex(int attributeValue) { if (attributeValue >= headerTableLookup.length) { resizeHeaderLookup(attributeValue); } int index = headerTableLookup[attributeValue]; if (index == -1) { // if attribute didnt exist; if (headerTableCount >= headerTableAttributes.length) { resizeHeaderTable(); } headerTableAttributes[headerTableCount] = attributeValue; if (headerTableProperties[headerTableCount] == null) { headerTableProperties[headerTableCount] = new int[HEADERTABLEBLOCKSIZE]; } headerTableAttributeCount[headerTableCount] = 0; headerTableProperties[headerTableCount][HT_NEXT] = -1; headerTableProperties[headerTableCount][HT_LAST] = -1; index = headerTableCount++; headerTableLookup[attributeValue] = index; sortedSet.add(attributeValue); } return index; } private void resize() { int size = (int) (GROWTH_RATE * nodes); if (size < DEFAULT_INITIAL_SIZE) { size = DEFAULT_INITIAL_SIZE; } int[] oldChildCount = childCount; int[] oldAttribute = attribute; long[] oldnodeCount = nodeCount; int[] oldParent = parent; int[] oldNext = next; int[][] oldNodeChildren = nodeChildren; int[] oldConditional = conditional; childCount = new int[size]; attribute = new int[size]; nodeCount = new long[size]; parent = new int[size]; next = new int[size]; nodeChildren = new int[size][]; conditional = new int[size]; System.arraycopy(oldChildCount, 0, this.childCount, 0, nodes); System.arraycopy(oldAttribute, 0, this.attribute, 0, nodes); System.arraycopy(oldnodeCount, 0, this.nodeCount, 0, nodes); System.arraycopy(oldParent, 0, this.parent, 0, nodes); System.arraycopy(oldNext, 0, this.next, 0, nodes); System.arraycopy(oldNodeChildren, 0, this.nodeChildren, 0, nodes); System.arraycopy(oldConditional, 0, this.conditional, 0, nodes); } private void resizeChildren(int nodeId) { int length = childCount[nodeId]; int size = (int) (GROWTH_RATE * length); if (size < DEFAULT_CHILDREN_INITIAL_SIZE) { size = DEFAULT_CHILDREN_INITIAL_SIZE; } int[] oldNodeChildren = nodeChildren[nodeId]; nodeChildren[nodeId] = new int[size]; System.arraycopy(oldNodeChildren, 0, this.nodeChildren[nodeId], 0, length); } private void resizeHeaderLookup(int attributeValue) { int size = (int) (attributeValue * GROWTH_RATE); int[] oldLookup = headerTableLookup; headerTableLookup = new int[size]; Arrays.fill(headerTableLookup, oldLookup.length, size, -1); System.arraycopy(oldLookup, 0, this.headerTableLookup, 0, oldLookup.length); } private void resizeHeaderTable() { int size = (int) (GROWTH_RATE * headerTableCount); if (size < DEFAULT_HEADER_TABLE_INITIAL_SIZE) { size = DEFAULT_HEADER_TABLE_INITIAL_SIZE; } int[] oldAttributes = headerTableAttributes; long[] oldAttributeCount = headerTableAttributeCount; int[][] oldProperties = headerTableProperties; headerTableAttributes = new int[size]; headerTableAttributeCount = new long[size]; headerTableProperties = new int[size][]; System.arraycopy(oldAttributes, 0, this.headerTableAttributes, 0, headerTableCount); System.arraycopy(oldAttributeCount, 0, this.headerTableAttributeCount, 0, headerTableCount); System.arraycopy(oldProperties, 0, this.headerTableProperties, 0, headerTableCount); } private void toStringHelper(StringBuilder sb, int currNode, String prefix) { if (childCount[currNode] == 0) { sb.append(prefix).append("-{attr:").append(attribute[currNode]) .append(", id: ").append(currNode) .append(", cnt:").append(nodeCount[currNode]).append("}\n"); } else { StringBuilder newPre = new StringBuilder(prefix); newPre.append("-{attr:").append(attribute[currNode]) .append(", id: ").append(currNode) .append(", cnt:").append(nodeCount[currNode]).append('}'); StringBuilder fakePre = new StringBuilder(); while (fakePre.length() < newPre.length()) { fakePre.append(' '); } for (int i = 0; i < childCount[currNode]; i++) { toStringHelper(sb, nodeChildren[currNode][i], (i == 0 ? newPre : fakePre).toString() + '-' + i + "->"); } } } @Override public String toString() { StringBuilder sb = new StringBuilder("[FPTree\n"); toStringHelper(sb, 0, " "); sb.append("\n]\n"); return sb.toString(); } }