/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* ADNode.java
* Copyright (C) 2002 Remco Bouckaert
*
*/
package weka.classifiers.bayes;
import weka.core.*;
import java.util.Vector;
/**
* The ADNode class implements the ADTree datastructure which increases
* the speed with which sub-contingency tables can be constructed from
* a data set in an Instances object. For details, see
*
* Cached Sufficient Statistics for Efficient Machine Learning with Large Datasets
* Andrew Moore, and Mary Soon Lee
* Journal of Artificial Intelligence Research 8 (1998) 67-91
* *
* @author Remco Bouckaert (rrb@xm.co.nz)
* @version $Revision: 1.1.1.1 $
*/
public class ADNode {
static final int MIN_RECORD_SIZE = 5;
/** list of VaryNode children **/
public VaryNode [] m_VaryNodes;
/** list of Instance children (either m_Instances or m_VaryNodes is instantiated) **/
public Instance [] m_Instances;
/** count **/
public int m_nCount;
/** first node in VaryNode array **/
public int m_nStartNode;
/** Creates new ADNode */
public ADNode() {
}
/** create sub tree
* @param iNode: index of the lowest node in the tree
* @param nRecords: set of records in instances to be considered
* @param instances: data set
* @return VaryNode representing part of an ADTree
**/
public static VaryNode MakeVaryNode(int iNode, FastVector nRecords, Instances instances) {
VaryNode _VaryNode = new VaryNode(iNode);
int nValues = instances.attribute(iNode).numValues();
// reserve memory and initialize
FastVector [] nChildRecords = new FastVector[nValues];
for (int iChild = 0; iChild < nValues; iChild++) {
nChildRecords[iChild] = new FastVector();
}
// divide the records among children
for (int iRecord = 0; iRecord < nRecords.size(); iRecord++) {
int iInstance = ((Integer) nRecords.elementAt(iRecord)).intValue();
nChildRecords[(int) instances.instance(iInstance).value(iNode)].addElement(new Integer(iInstance));
}
// find most common value
int nCount = nChildRecords[0].size();
int nMCV = 0;
for (int iChild = 1; iChild < nValues; iChild++) {
if (nChildRecords[iChild].size() > nCount) {
nCount = nChildRecords[iChild].size();
nMCV = iChild;
}
}
_VaryNode.m_nMCV = nMCV;
// determine child nodes
_VaryNode.m_ADNodes = new ADNode[nValues];
for (int iChild = 0; iChild < nValues; iChild++) {
if (iChild == nMCV || nChildRecords[iChild].size() == 0) {
_VaryNode.m_ADNodes[iChild] = null;
} else {
_VaryNode.m_ADNodes[iChild] = MakeADTree(iNode + 1, nChildRecords[iChild], instances);
}
}
return _VaryNode;
} // MakeVaryNode
/** create sub tree
* @param iNode: index of the lowest node in the tree
* @param nRecords: set of records in instances to be considered
* @param instances: data set
* @return ADNode representing an ADTree
**/
public static ADNode MakeADTree(int iNode, FastVector nRecords, Instances instances) {
ADNode _ADNode = new ADNode();
_ADNode.m_nCount = nRecords.size();
_ADNode.m_nStartNode = iNode;
if (nRecords.size() < MIN_RECORD_SIZE) {
_ADNode.m_Instances = new Instance[nRecords.size()];
for (int iInstance = 0; iInstance < nRecords.size(); iInstance++) {
_ADNode.m_Instances[iInstance] = instances.instance(((Integer) nRecords.elementAt(iInstance)).intValue());
}
} else {
_ADNode.m_VaryNodes = new VaryNode[instances.numAttributes() - iNode];
for (int iNode2 = iNode; iNode2 < instances.numAttributes(); iNode2++) {
_ADNode.m_VaryNodes[iNode2 - iNode] = MakeVaryNode(iNode2, nRecords, instances);
}
}
return _ADNode;
} // MakeADTree
/** create AD tree from set of instances
* @param instances: data set
* @return ADNode representing an ADTree
**/
public static ADNode MakeADTree(Instances instances) {
FastVector nRecords = new FastVector(instances.numInstances());
for (int iRecord = 0; iRecord < instances.numInstances(); iRecord++) {
nRecords.addElement(new Integer(iRecord));
}
return MakeADTree(0, nRecords, instances);
} // MakeADTree
/** get counts for specific instantiation of a set of nodes
* @param nCounts - array for storing counts
* @param nNodes - array of node indexes
* @param nOffsets - offset for nodes in nNodes in nCounts
* @param iNode - index into nNode indicating current node
* @param iOffset - Offset into nCounts due to nodes below iNode
* @param bSubstract - indicate whether counts should be added or substracted
*/
public void getCounts(
int [] nCounts,
int [] nNodes,
int [] nOffsets,
int iNode,
int iOffset,
boolean bSubstract
) {
if (iNode >= nNodes.length) {
if (bSubstract) {
nCounts[iOffset] -= m_nCount;
} else {
nCounts[iOffset] += m_nCount;
}
return;
} else {
if (m_VaryNodes != null) {
m_VaryNodes[nNodes[iNode] - m_nStartNode].getCounts(nCounts, nNodes, nOffsets, iNode, iOffset, this, bSubstract);
} else {
for (int iInstance = 0; iInstance < m_Instances.length; iInstance++) {
int iOffset2 = iOffset;
Instance instance = m_Instances[iInstance];
for (int iNode2 = iNode; iNode2 < nNodes.length; iNode2++) {
iOffset2 = iOffset2 + nOffsets[iNode2] * (int) instance.value(nNodes[iNode2]);
}
nCounts[iOffset2]++;
}
}
}
} // getCounts
/* print is used for debugging only and shows the ADTree in ASCII graphics
*/
public void print() {
String sTab = new String();for (int i = 0; i < m_nStartNode; i++) {sTab = sTab + " ";}
System.out.println(sTab + "Count = " + m_nCount);
for (int iNode = 0; iNode < m_VaryNodes.length; iNode++) {
System.out.println(sTab + "Node " + (iNode + m_nStartNode));
m_VaryNodes[iNode].print(sTab);
}
}
} // class ADNode