/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* BottomUpConstructor.java
* Copyright (C) 2007-2012 University of Waikato, Hamilton, New Zealand
*/
package weka.core.neighboursearch.balltrees;
import java.util.ArrayList;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.core.TechnicalInformationHandler;
/**
<!-- globalinfo-start -->
* The class that constructs a ball tree bottom up.
* <p/>
<!-- globalinfo-end -->
*
<!-- technical-bibtex-start -->
* BibTeX:
* <pre>
* @techreport{Omohundro1989,
* author = {Stephen M. Omohundro},
* institution = {International Computer Science Institute},
* month = {December},
* number = {TR-89-063},
* title = {Five Balltree Construction Algorithms},
* year = {1989}
* }
* </pre>
* <p/>
<!-- technical-bibtex-end -->
*
<!-- options-start -->
* Valid options are: <p/>
*
* <pre> -N <value>
* Set maximum number of instances in a leaf node
* (default: 40)</pre>
*
* <pre> -R
* Set internal nodes' radius to the sum
* of the child balls radii. So that it
* contains the child balls.</pre>
*
<!-- options-end -->
*
* @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz)
* @version $Revision: 8034 $
*/
public class BottomUpConstructor
extends BallTreeConstructor
implements TechnicalInformationHandler {
/** for serialization. */
private static final long serialVersionUID = 5864250777657707687L;
/**
* Returns a string describing this nearest neighbour search algorithm.
*
* @return a description of the algorithm for displaying in the
* explorer/experimenter gui
*/
public String globalInfo() {
return "The class that constructs a ball tree bottom up.";
}
/**
* Returns an instance of a TechnicalInformation object, containing detailed
* information about the technical background of this class, e.g., paper
* reference or book this class is based on.
*
* @return the technical information about this class
*/
public TechnicalInformation getTechnicalInformation() {
TechnicalInformation result;
result = new TechnicalInformation(Type.TECHREPORT);
result.setValue(Field.AUTHOR, "Stephen M. Omohundro");
result.setValue(Field.YEAR, "1989");
result.setValue(Field.TITLE, "Five Balltree Construction Algorithms");
result.setValue(Field.MONTH, "December");
result.setValue(Field.NUMBER, "TR-89-063");
result.setValue(Field.INSTITUTION, "International Computer Science Institute");
return result;
}
/**
* Creates a new instance of BottomUpConstructor.
*/
public BottomUpConstructor() {
}
/**
* Builds the ball tree bottom up.
* @return The root node of the tree.
* @throws Exception If there is problem building
* the tree.
*/
public BallNode buildTree() throws Exception {
ArrayList<TempNode> list = new ArrayList<TempNode>();
for(int i=0; i<m_InstList.length; i++) {
TempNode n = new TempNode();
n.points = new int[1]; n.points[0] = m_InstList[i];
n.anchor = m_Instances.instance(m_InstList[i]);
n.radius = 0.0;
list.add(n);
}
return mergeNodes(list, 0, m_InstList.length-1, m_InstList);
}
/**
* Merges nodes into one top node.
*
* @param list List of bottom most nodes (the actual
* instances).
* @param startIdx The index marking the start of
* the portion of master index array containing
* instances that need to be merged.
* @param endIdx The index marking the end of
* the portion of master index array containing
* instances that need to be merged.
* @param instList The master index array.
* @return The root node of the tree resulting
* from merging of bottom most nodes.
* @throws Exception If there is some problem
* merging the nodes.
*/
protected BallNode mergeNodes(ArrayList<TempNode> list, int startIdx, int endIdx,
int[] instList) throws Exception {
double minRadius=Double.POSITIVE_INFINITY, tmpRadius;
Instance pivot, minPivot=null; int min1=-1, min2=-1;
int [] minInstList=null; int merge=1;
TempNode parent;
while(list.size() > 1) { //main merging loop
System.err.print("merge step: "+merge+++" \r");
minRadius = Double.POSITIVE_INFINITY;
min1 = -1; min2 = -1;
for(int i=0; i<list.size(); i++) {
TempNode first = (TempNode) list.get(i);
for(int j=i+1; j<list.size(); j++) {
TempNode second = (TempNode) list.get(j);
pivot = calcPivot(first, second, m_Instances);
tmpRadius = calcRadius(first, second);
if(tmpRadius < minRadius) {
minRadius = tmpRadius;
min1=i; min2=j;
minPivot = pivot;
}
}//end for(j)
}//end for(i)
parent = new TempNode();
parent.left = (TempNode) list.get(min1);
parent.right = (TempNode) list.get(min2);
minInstList = new int[parent.left.points.length+parent.right.points.length];
System.arraycopy(parent.left.points, 0, minInstList, 0, parent.left.points.length);
System.arraycopy(parent.right.points, 0, minInstList, parent.left.points.length,
parent.right.points.length);
parent.points = minInstList;
parent.anchor = minPivot;
parent.radius = BallNode.calcRadius(parent.points, m_Instances, minPivot, m_DistanceFunction);
list.remove(min1); list.remove(min2-1);
list.add(parent);
}//end while
System.err.println("");
TempNode tmpRoot = (TempNode)list.get(0);
if(m_InstList.length != tmpRoot.points.length)
throw new Exception("Root nodes instance list is of irregular length. " +
"Please check code.");
System.arraycopy(tmpRoot.points, 0, m_InstList, 0, tmpRoot.points.length);
m_NumNodes = m_MaxDepth = m_NumLeaves = 0;
tmpRadius = BallNode.calcRadius(instList, m_Instances, tmpRoot.anchor, m_DistanceFunction);
BallNode node = makeBallTree(tmpRoot, startIdx, endIdx, instList, 0, tmpRadius);
return node;
}
/**
* Makes ball tree nodes of temp nodes that were used
* in the merging process.
* @param node The temp root node.
* @param startidx The index marking the start of the
* portion of master index array containing instances
* to be merged.
* @param endidx The index marking the end of the
* portion of master index array containing instances
* to be merged.
* @param instList The master index array.
* @param depth The depth of the provided temp node.
* @param rootRadius The smallest ball enclosing all
* data points.
* @return The proper top BallTreeNode.
* @throws Exception If there is some problem.
*/
protected BallNode makeBallTree(TempNode node, int startidx, int endidx,
int[] instList, int depth, final double rootRadius) throws Exception {
BallNode ball=null;
Instance pivot;
if(m_MaxDepth < depth)
m_MaxDepth = depth;
if(node.points.length > m_MaxInstancesInLeaf &&
(rootRadius==0 ? false : node.radius/rootRadius >= m_MaxRelLeafRadius) &&
node.left!=null && node.right!=null) { //make an internal node
ball = new BallNode(
startidx, endidx, m_NumNodes,
(pivot=BallNode.calcCentroidPivot(startidx, endidx, instList, m_Instances)),
BallNode.calcRadius(startidx, endidx, instList, m_Instances, pivot,
m_DistanceFunction)
);
m_NumNodes += 1;
ball.m_Left = makeBallTree(node.left, startidx, startidx+node.left.points.length-1, instList, depth+1, rootRadius);
ball.m_Right= makeBallTree(node.right, startidx+node.left.points.length, endidx, instList, depth+1, rootRadius);
}
else { //make a leaf node
ball = new BallNode(startidx, endidx, m_NumNodes,
(pivot=BallNode.calcCentroidPivot(startidx, endidx, instList, m_Instances)),
BallNode.calcRadius(startidx, endidx, instList, m_Instances, pivot,
m_DistanceFunction)
);
m_NumNodes += 1;
m_NumLeaves++;
}
return ball;
}
/**
* Adds an instance to the ball tree.
* @param node The root node of the tree.
* @param inst The instance to add to the tree.
* @return The new master index array after adding the
* instance.
* @throws Exception Always as BottomUpConstructor
* does not allow addition of instances after batch
* construction.
*/
public int[] addInstance(BallNode node, Instance inst) throws Exception {
throw new Exception("BottomUpConstruction method does not allow addition " +
"of new Instances.");
}
/**
* Calculates the centroid pivot of a node based on its
* two child nodes.
* @param node1 The first child node.
* @param node2 The second child node.
* @param insts The instance on which the tree is to be
* built.
* @return The centre/pivot of the node.
* @throws Exception If there is some problem calculating
* the centre/pivot of the node.
*/
public Instance calcPivot(TempNode node1, TempNode node2, Instances insts)
throws Exception {
int classIdx = m_Instances.classIndex();
double[] attrVals = new double[insts.numAttributes()];
Instance temp;
double anchr1Ratio = (double)node1.points.length /
(node1.points.length+node2.points.length),
anchr2Ratio = (double)node2.points.length /
(node1.points.length+node2.points.length);
for(int k=0; k<node1.anchor.numValues(); k++) {
if(node1.anchor.index(k)==classIdx)
continue;
attrVals[k] += node1.anchor.valueSparse(k)*anchr1Ratio;
}
for(int k=0; k<node2.anchor.numValues(); k++) {
if(node2.anchor.index(k)==classIdx)
continue;
attrVals[k] += node2.anchor.valueSparse(k)*anchr2Ratio;
}
temp = new DenseInstance(1.0, attrVals);
return temp;
}
/**
* Calculates the radius of a node based on its two
* child nodes.
* @param n1 The first child node.
* @param n2 The second child node.
* @return The calculated radius of the the node.
* @throws Exception If there is some problem
* in calculating the radius.
*/
public double calcRadius(TempNode n1, TempNode n2) throws Exception {
Instance a1 = n1.anchor, a2 = n2.anchor;
double radius = n1.radius + m_DistanceFunction.distance(a1, a2) + n2.radius;
return radius/2;
}
/**
* Temp class to represent either a leaf node or an internal node. Should only
* have two children (could be the case one child is an instance and the
* other another node).
*
* @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz)
* @version $Revision: 8034 $
*/
protected class TempNode
implements RevisionHandler {
/** The centre/pivot of the node. */
Instance anchor;
/** The radius of the node. */
double radius;
/** Indices of the points in the node. */
int [] points;
/** The node's left child. */
TempNode left = null;
/** The node's right child. */
TempNode right = null;
/**
* Prints the node.
* @return The node as a string.
*/
public String toString() {
StringBuffer bf = new StringBuffer();
bf.append("p: ");
for(int i=0; i<points.length; i++)
if(i!=0)
bf.append(", "+points[i]);
else
bf.append(""+points[i]);
return bf.toString();
}
/**
* Returns the revision string.
*
* @return the revision
*/
public String getRevision() {
return RevisionUtils.extract("$Revision: 8034 $");
}
}
/**
* Returns the revision string.
*
* @return the revision
*/
public String getRevision() {
return RevisionUtils.extract("$Revision: 8034 $");
}
}