/* * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* * LinearNNSearch.java * Copyright (C) 1999-2012 University of Waikato */ package weka.core.neighboursearch; import java.util.Enumeration; import java.util.Vector; import weka.core.Instance; import weka.core.Instances; import weka.core.Option; import weka.core.RevisionUtils; import weka.core.Utils; /** <!-- globalinfo-start --> * Class implementing the brute force search algorithm for nearest neighbour search. * <p/> <!-- globalinfo-end --> * <!-- options-start --> * Valid options are: <p/> * * <pre> -S * Skip identical instances (distances equal to zero). * </pre> * <!-- options-end --> * * @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz) * @version $Revision: 8034 $ */ public class LinearNNSearch extends NearestNeighbourSearch { /** for serialization. */ private static final long serialVersionUID = 1915484723703917241L; /** Array holding the distances of the nearest neighbours. It is filled up * both by nearestNeighbour() and kNearestNeighbours(). */ protected double[] m_Distances; /** Whether to skip instances from the neighbours that are identical to the query instance. */ protected boolean m_SkipIdentical = false; /** * Constructor. Needs setInstances(Instances) * to be called before the class is usable. */ public LinearNNSearch() { super(); } /** * Constructor that uses the supplied set of * instances. * * @param insts the instances to use */ public LinearNNSearch(Instances insts) { super(insts); m_DistanceFunction.setInstances(insts); } /** * Returns a string describing this nearest neighbour search algorithm. * * @return a description of the algorithm for displaying in the * explorer/experimenter gui */ public String globalInfo() { return "Class implementing the brute force search algorithm for nearest " + "neighbour search."; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration listOptions() { Vector<Option> result = new Vector<Option>(); result.add(new Option( "\tSkip identical instances (distances equal to zero).\n", "S", 1,"-S")); return result.elements(); } /** * Parses a given list of options. <p/> * <!-- options-start --> * Valid options are: <p/> * * <pre> -S * Skip identical instances (distances equal to zero). * </pre> * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { super.setOptions(options); setSkipIdentical(Utils.getFlag('S', options)); } /** * Gets the current settings. * * @return an array of strings suitable for passing to setOptions() */ public String[] getOptions() { Vector<String> result; String[] options; int i; result = new Vector<String>(); options = super.getOptions(); for (i = 0; i < options.length; i++) result.add(options[i]); if (getSkipIdentical()) result.add("-S"); return result.toArray(new String[result.size()]); } /** * Returns the tip text for this property. * * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String skipIdenticalTipText() { return "Whether to skip identical instances (with distance 0 to the target)"; } /** * Sets the property to skip identical instances (with distance zero from * the target) from the set of neighbours returned. * * @param skip if true, identical intances are skipped */ public void setSkipIdentical(boolean skip) { m_SkipIdentical = skip; } /** * Gets whether if identical instances are skipped from the neighbourhood. * * @return true if identical instances are skipped */ public boolean getSkipIdentical() { return m_SkipIdentical; } /** * Returns the nearest instance in the current neighbourhood to the supplied * instance. * * @param target The instance to find the nearest neighbour for. * @return the nearest instance * @throws Exception if the nearest neighbour could not be found. */ public Instance nearestNeighbour(Instance target) throws Exception { return (kNearestNeighbours(target, 1)).instance(0); } /** * Returns k nearest instances in the current neighbourhood to the supplied * instance. * * @param target The instance to find the k nearest neighbours for. * @param kNN The number of nearest neighbours to find. * @return the k nearest neighbors * @throws Exception if the neighbours could not be found. */ public Instances kNearestNeighbours(Instance target, int kNN) throws Exception { //debug boolean print=false; if(m_Stats!=null) m_Stats.searchStart(); MyHeap heap = new MyHeap(kNN); double distance; int firstkNN=0; for(int i=0; i<m_Instances.numInstances(); i++) { if(target == m_Instances.instance(i)) //for hold-one-out cross-validation continue; if(m_Stats!=null) m_Stats.incrPointCount(); if(firstkNN<kNN) { if(print) System.out.println("K(a): "+(heap.size()+heap.noOfKthNearest())); distance = m_DistanceFunction.distance(target, m_Instances.instance(i), Double.POSITIVE_INFINITY, m_Stats); if(distance == 0.0 && m_SkipIdentical) if(i<m_Instances.numInstances()-1) continue; else heap.put(i, distance); heap.put(i, distance); firstkNN++; } else { MyHeapElement temp = heap.peek(); if(print) System.out.println("K(b): "+(heap.size()+heap.noOfKthNearest())); distance = m_DistanceFunction.distance(target, m_Instances.instance(i), temp.distance, m_Stats); if(distance == 0.0 && m_SkipIdentical) continue; if(distance < temp.distance) { heap.putBySubstitute(i, distance); } else if(distance == temp.distance) { heap.putKthNearest(i, distance); } } } Instances neighbours = new Instances(m_Instances, (heap.size()+heap.noOfKthNearest())); m_Distances = new double[heap.size()+heap.noOfKthNearest()]; int [] indices = new int[heap.size()+heap.noOfKthNearest()]; int i=1; MyHeapElement h; while(heap.noOfKthNearest()>0) { h = heap.getKthNearest(); indices[indices.length-i] = h.index; m_Distances[indices.length-i] = h.distance; i++; } while(heap.size()>0) { h = heap.get(); indices[indices.length-i] = h.index; m_Distances[indices.length-i] = h.distance; i++; } m_DistanceFunction.postProcessDistances(m_Distances); for(int k=0; k<indices.length; k++) { neighbours.add(m_Instances.instance(indices[k])); } if(m_Stats!=null) m_Stats.searchFinish(); return neighbours; } /** * Returns the distances of the k nearest neighbours. The kNearestNeighbours * or nearestNeighbour must always be called before calling this function. If * this function is called before calling either the kNearestNeighbours or * the nearestNeighbour, then it throws an exception. If, however, if either * of the nearestNeighbour functions are called at any point in the * past then no exception is thrown and the distances of the training set from * the last supplied target instance (to either one of the nearestNeighbour * functions) is/are returned. * * @return array containing the distances of the * nearestNeighbours. The length and ordering of the * array is the same as that of the instances returned * by nearestNeighbour functions. * @throws Exception if called before calling kNearestNeighbours * or nearestNeighbours. */ public double[] getDistances() throws Exception { if(m_Distances==null) throw new Exception("No distances available. Please call either "+ "kNearestNeighbours or nearestNeighbours first."); return m_Distances; } /** * Sets the instances comprising the current neighbourhood. * * @param insts The set of instances on which the nearest neighbour * search is carried out. Usually this set is the * training set. * @throws Exception if setting of instances fails */ public void setInstances(Instances insts) throws Exception { m_Instances = insts; m_DistanceFunction.setInstances(insts); } /** * Updates the LinearNNSearch to cater for the new added instance. This * implementation only updates the ranges of the DistanceFunction class, * since our set of instances is passed by reference and should already have * the newly added instance. * * @param ins The instance to add. Usually this is the instance that * is added to our neighbourhood i.e. the training * instances. * @throws Exception if the given instances are null */ public void update(Instance ins) throws Exception { if(m_Instances==null) throw new Exception("No instances supplied yet. Cannot update without"+ "supplying a set of instances first."); m_DistanceFunction.update(ins); } /** * Adds the given instance info. This implementation updates the range * datastructures of the DistanceFunction class. * * @param ins The instance to add the information of. Usually this is * the test instance supplied to update the range of * attributes in the distance function. */ public void addInstanceInfo(Instance ins) { if(m_Instances!=null) try{ update(ins); } catch(Exception ex) { ex.printStackTrace(); } } /** * Returns the revision string. * * @return the revision */ public String getRevision() { return RevisionUtils.extract("$Revision: 8034 $"); } }