/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* * Metric.java * Copyright (C) 2001 Mikhail Bilenko * */ package weka.core.metrics; import java.util.ArrayList; import java.io.Serializable; import weka.core.*; /** * Abstract Metric class * * @author Mikhail Bilenko (mbilenko@cs.utexas.edu) * @version $Revision: 1.4 $ */ public abstract class Metric implements Cloneable, Serializable { /** number of attributes */ protected int m_numAttributes = 0; /** indeces of attributes which the metric works on */ protected int [] m_attrIdxs = null; /** index of the class attribute */ protected int m_classIndex = -1; // =============== // Public methods. // =============== /** * Generates a new Metric with a specified number of * attributes. Has to initialize all fields of the metric with * default values. * * @param numAttributes the number of attributes that the metric will work on * @exception Exception if the distance metric has not been * generated successfully. */ public abstract void buildMetric(int numAttributes) throws Exception; /** * Generates a new Metric. Has to initialize all fields of the metric * with default values * * @param numAttributes the number of attributes that the metric will work on * @param options an array of options suitable for passing to setOptions. * May be null. * @exception Exception if the distance metric has not been * generated successfully. */ public abstract void buildMetric(int numAttributes, String[] options) throws Exception; /** * Create a new metric for operating on specified instances * @param data instances that the metric will be used on */ public abstract void buildMetric(Instances data) throws Exception; public void build(Instances data)throws Exception{ buildMetric(data); } /** * Specifies a list of attributes which will be used by the metric * * @param attrs an array of attribute indices */ public void setAttrIdxs (int[] attrIdxs) { m_attrIdxs = new int[attrIdxs.length]; System.arraycopy(attrIdxs, 0, m_attrIdxs, 0, attrIdxs.length); } /** * Returns an array of attribute incece which will be used by the metric * * @return an array of attribute indices */ public int[] getAttrIndxs () { return m_attrIdxs; } /** * Specifies an interval of attributes which will be used by the metric * * @param begin_index beginning of attribute index interval * @param end_index end of attribute index interval */ public void setAttrIdxs (int startIdx, int endIdx) { m_attrIdxs = new int[endIdx - startIdx + 1]; for (int i = startIdx; i <= endIdx; i++) m_attrIdxs[i - startIdx] = i; } /** * Returns a distance value between two instances. * @param instance1 First instance. * @param instance2 Second instance. * @exception Exception if distance could not be estimated. */ public abstract double distance(Instance instance1, Instance instance2) throws Exception; /** * Returns a similarity estimate between two instances. * @param instance1 First instance. * @param instance2 Second instance. * @exception Exception if similarity could not be estimated. */ public abstract double similarity(Instance instance1, Instance instance2) throws Exception; /** Return the penalty contribution - * typically equivalent to distance, distance^2, or -similarity */ public abstract double penalty(Instance instance1, Instance instance2) throws Exception; /** Return the penalty contribution that must be symmetric; * typically equivalent to penalty, except for KL<>JS */ public abstract double penaltySymmetric(Instance instance1, Instance instance2) throws Exception; /** * Returns similarity value between two instances without using the weights. * @param instance1 First instance. * @param instance2 Second instance. * @exception Exception if similarity could not be estimated. */ public abstract double similarityNonWeighted(Instance instance1, Instance instance2) throws Exception; /** * Returns distance between two instances without using the weights. * @param instance1 First instance. * @param instance2 Second instance. * @exception Exception if similarity could not be estimated. */ public abstract double distanceNonWeighted(Instance instance1, Instance instance2) throws Exception; /** * It is often the case that last attribute of the data is the class. * This function takes instances, and returns an array of integers * 0..(num_attributes-1 - 1) to exclude the class attribute * * @return array of integer indeces of attributes, excluding * last one which is the class index */ public int[] getAttrIdxsWithoutLastClass(Instances instances) { int [] attrIdxs; attrIdxs = new int[instances.numAttributes() - 1]; for (int i = 0; i < attrIdxs.length; i++) { attrIdxs[i] = i; } return attrIdxs; } /** * This function takes instances, and returns an array of integers * 0..(num_attributes-1) * * @return array of integer indeces of attributes */ public int[] getAttrIdxs(Instances instances) { int [] attrIdxs; attrIdxs = new int[instances.numAttributes()]; for (int i = 0; i < attrIdxs.length; i++) { attrIdxs[i] = i; } return attrIdxs ; } /** Specify which attribute is the class attribute * @param classAttrIdx the index of the class attribute */ public void setClassIndex(int classIndex) { m_classIndex = classIndex; } /** Get the index of the attribute is the class attribute * @returns the index of the class attribute */ public int getClassIndex(int classIndex) { return m_classIndex; } /** Get the number of attributes that the metric uses * @returns the number of attributes that the metric uses */ public int getNumAttributes() { return m_numAttributes; } /** The computation of a metric can be either based on distance, or on similarity * @returns true if the underlying metric computes distance, false if similarity */ public abstract boolean isDistanceBased(); /** Get the norm-2 length of an instance assuming all attributes are numeric * @returns norm-2 length of an instance */ public static double length(Instance instance) { int classIndex = instance.classIndex(); double length = 0; if (instance instanceof SparseInstance) { // remap classIndex to an internal index if (classIndex >= 0) { classIndex = ((SparseInstance)instance).locateIndex(classIndex); } for (int i = 0; i < instance.numValues(); i++) { if (i != classIndex) { double value = instance.valueSparse(i); length += value * value; } } } else { // non-sparse instance double[] values = instance.toDoubleArray(); for (int i = 0; i < values.length; i++) { if (i != classIndex) { length += values[i] * values[i]; } } } return Math.sqrt(length); } /** Normalizes the values of a normal Instance * * @author Sugato Basu * @param inst Instance to be normalized */ public void normalizeInstance(Instance inst) throws Exception{ double norm = 0; double values [] = inst.toDoubleArray(); if (inst instanceof SparseInstance) { throw new Exception("Use normalizeSparseInstance function"); } for (int i=0; i<values.length; i++) { if (i != inst.classIndex()) { // don't normalize the class index norm += values[i] * values[i]; } } norm = Math.sqrt(norm); for (int i=0; i<values.length; i++) { if (i != inst.classIndex()) { // don't normalize the class index values[i] /= norm; } } inst.setValueArray(values); } /** * Creates a new instance of a metric given it's class name and * (optional) arguments to pass to it's setOptions method. If the * classifier implements OptionHandler and the options parameter is * non-null, the classifier will have it's options set. * * @param metricName the fully qualified class name of the metric * @param options an array of options suitable for passing to setOptions. May * be null. * @return the newly created metric ready for use. * @exception Exception if the metric name is invalid, or the options * supplied are not acceptable to the metric */ public static Metric forName(String metricName, String [] options) throws Exception { return (Metric)Utils.forName(Metric.class, metricName, options); } /** Create a copy of this metric */ public Object clone() { Metric m = null; try { m = (Metric) super.clone(); } catch (CloneNotSupportedException e) { System.err.println("Metric can't clone"); } // clone the fields if (m_attrIdxs != null) { m.m_attrIdxs = (int []) m_attrIdxs.clone(); } return m; } }