/* * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* * KDTreeNodeSplitter.java * Copyright (C) 1999-2012 University of Waikato */ package weka.core.neighboursearch.kdtrees; import java.io.Serializable; import java.util.Enumeration; import java.util.Vector; import weka.core.EuclideanDistance; import weka.core.Instances; import weka.core.OptionHandler; import weka.core.RevisionHandler; import weka.core.RevisionUtils; /** * Class that splits up a KDTreeNode. * * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz) * @version $Revision: 8034 $ */ public abstract class KDTreeNodeSplitter implements Serializable, OptionHandler, RevisionHandler { /** The instances that'll be used for tree construction. */ protected Instances m_Instances; /** The distance function used for building the tree. */ protected EuclideanDistance m_EuclideanDistance; /** * The master index array that'll be reshuffled as nodes * are split and the tree is constructed. */ protected int[] m_InstList; /** * Stores whether if the width of a KDTree * node is normalized or not. */ protected boolean m_NormalizeNodeWidth; // Constants /** Index of min value in an array of attributes' range. */ public static final int MIN = EuclideanDistance.R_MIN; /** Index of max value in an array of attributes' range. */ public static final int MAX = EuclideanDistance.R_MAX; /** Index of width value (max-min) in an array of attributes' range. */ public static final int WIDTH = EuclideanDistance.R_WIDTH; /** * default constructor. */ public KDTreeNodeSplitter() { } /** * Creates a new instance of KDTreeNodeSplitter. * @param instList Reference of the master index array. * @param insts The set of training instances on which * the tree is built. * @param e The EuclideanDistance object that is used * in tree contruction. */ public KDTreeNodeSplitter(int[] instList, Instances insts, EuclideanDistance e) { m_InstList = instList; m_Instances = insts; m_EuclideanDistance = e; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration listOptions() { return new Vector().elements(); } /** * Parses a given list of options. * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { } /** * Gets the current settings of the object. * * @return an array of strings suitable for passing to setOptions */ public String[] getOptions() { return new String[0]; } /** * Checks whether an object of this class has been correctly * initialized. Performs checks to see if all the necessary * things (master index array, training instances, distance * function) have been supplied or not. * @throws Exception If the object has not been correctly * initialized. */ protected void correctlyInitialized() throws Exception { if(m_Instances==null) throw new Exception("No instances supplied."); else if(m_InstList==null) throw new Exception("No instance list supplied."); else if(m_EuclideanDistance==null) throw new Exception("No Euclidean distance function supplied."); else if(m_Instances.numInstances() != m_InstList.length) throw new Exception("The supplied instance list doesn't seem to match " + "the supplied instances"); } /** * Splits a node into two. After splitting two new nodes are created * and correctly initialised. And, node.left and node.right are * set appropriately. * @param node The node to split. * @param numNodesCreated The number of nodes that so far have been * created for the tree, so that the newly created nodes are * assigned correct/meaningful node numbers/ids. * @param nodeRanges The attributes' range for the points inside * the node that is to be split. * @param universe The attributes' range for the whole * point-space. * @throws Exception If there is some problem in splitting the * given node. */ public abstract void splitNode(KDTreeNode node, int numNodesCreated, double[][] nodeRanges, double[][] universe) throws Exception; /** * Sets the training instances on which the tree is (or is * to be) built. * @param inst The training instances. */ public void setInstances(Instances inst) { m_Instances = inst; } /** * Sets the master index array containing indices of the * training instances. This array will be rearranged as * the tree is built, so that each node is assigned a * portion in this array which contain the instances * insides the node's region. * @param instList The master index array. */ public void setInstanceList(int[] instList) { m_InstList = instList; } /** * Sets the EuclideanDistance object to use for * splitting nodes. * @param func The EuclideanDistance object. */ public void setEuclideanDistanceFunction(EuclideanDistance func) { m_EuclideanDistance = func; } /** * Sets whether if a nodes region is normalized * or not. If set to true then, when selecting * the widest attribute/dimension for splitting, * the width of each attribute/dimension, * of the points inside the node's region, is * divided by the width of that * attribute/dimension for the whole point-space. * Thus, each attribute/dimension of that node * is normalized. * * @param normalize Should be true if * normalization is required. */ public void setNodeWidthNormalization(boolean normalize) { m_NormalizeNodeWidth = normalize; } /** * Returns the widest dimension. The width of each * dimension (for the points inside the node) is * normalized, if m_NormalizeNodeWidth is set to * true. * @param nodeRanges The attributes' range of the * points inside the node that is to be split. * @param universe The attributes' range for the * whole point-space. * @return The index of the attribute/dimension * in which the points of the node have widest * spread. */ protected int widestDim(double[][] nodeRanges, double[][] universe) { final int classIdx = m_Instances.classIndex(); double widest = 0.0; int w = -1; if (m_NormalizeNodeWidth) { for (int i = 0; i < nodeRanges.length; i++) { double newWidest = nodeRanges[i][WIDTH] / universe[i][WIDTH]; if (newWidest > widest) { if (i == classIdx) continue; widest = newWidest; w = i; } } } else { for (int i = 0; i < nodeRanges.length; i++) { if (nodeRanges[i][WIDTH] > widest) { if (i == classIdx) continue; widest = nodeRanges[i][WIDTH]; w = i; } } } return w; } /** * Returns the revision string. * * @return the revision */ public String getRevision() { return RevisionUtils.extract("$Revision: 8034 $"); } }