/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ /** * <p> * @author Written by Cristobal Romero (Universidad de C�rdoba) 10/10/2007 * @version 0.1 * @since JDK 1.5 *</p> */ package keel.Algorithms.Decision_Trees.M5; import java.util.*; import java.io.*; /** * Class for handling an instance. All values (numeric, nominal, or * string) are internally stored as floating-point numbers. If an * attribute is nominal (or a string), the stored value is the index * of the corresponding nominal (or string) value in the attribute's * definition. We have chosen this approach in favor of a more elegant * object-oriented approach because it is much faster. */ public class M5Instance implements Serializable { /** Constant representing a missing value. */ protected final static double MISSING_VALUE = Double.NaN; /** * The dataset the instance has access to. Null if the instance * doesn't have access to any dataset. Only if an instance has * access to a dataset, it knows about the actual attribute types. */ protected M5Instances m_Dataset; /** The instance's attribute values. */ protected double[] m_AttValues; /** The instance's weight. */ protected double m_Weight; /** * Constructor that copies the attribute values and the weight from * the given instance. Reference to the dataset is set to null. * (ie. the instance doesn't have access to information about the * attribute types) * * @param instance the instance from which the attribute * values and the weight are to be copied */ public M5Instance(M5Instance instance) { m_AttValues = instance.m_AttValues; m_Weight = instance.m_Weight; m_Dataset = null; } /** * Constructor that inititalizes instance variable with given * values. Reference to the dataset is set to null. (ie. the instance * doesn't have access to information about the attribute types) * * @param weight the instance's weight * @param attValues a vector of attribute values */ public M5Instance(double weight, double[] attValues) { m_AttValues = attValues; m_Weight = weight; m_Dataset = null; } /** * Constructor of an instance that sets weight to one, all values to * be missing, and the reference to the dataset to null. (ie. the instance * doesn't have access to information about the attribute types) * * @param numAttributes the size of the instance */ public M5Instance(int numAttributes) { m_AttValues = new double[numAttributes]; for (int i = 0; i < m_AttValues.length; i++) { m_AttValues[i] = MISSING_VALUE; } m_Weight = 1; m_Dataset = null; } /** * Returns the attribute with the given index. * * @param index the attribute's index * @return the attribute at the given position * @throws Exception * @exception UnassignedDatasetException if instance doesn't have access to a * dataset */ public M5Attribute attribute(int index) throws Exception { if (m_Dataset == null) { throw new Exception("M5Instace doesn't have access to a dataset!"); } return m_Dataset.attribute(index); } /** * Returns the attribute with the given index. Does the same * thing as attribute(). * * @param indexOfIndex the index of the attribute's index * @return the attribute at the given position * @throws Exception * @exception UnassignedDatasetException if instance doesn't have access to a * dataset */ public M5Attribute attributeSparse(int indexOfIndex) throws Exception { if (m_Dataset == null) { throw new Exception("M5Instace doesn't have access to a dataset!"); } return m_Dataset.attribute(indexOfIndex); } /** * Returns class attribute. * * @return the class attribute * @throws Exception * @exception UnassignedDatasetException if the class is not set or the * instance doesn't have access to a dataset */ public M5Attribute classAttribute() throws Exception { if (m_Dataset == null) { throw new Exception("M5Instace doesn't have access to a dataset!"); } return m_Dataset.classAttribute(); } /** * Returns the class attribute's index. * * @return the class index as an integer * @throws Exception * @exception UnassignedDatasetException if instance doesn't have access to a dataset */ public int classIndex() throws Exception { if (m_Dataset == null) { throw new Exception("M5Instace doesn't have access to a dataset!"); } return m_Dataset.classIndex(); } /** * Tests if an instance's class is missing. * * @return true if the instance's class is missing * @throws Exception * @exception UnassignedClassException if the class is not set or the instance doesn't * have access to a dataset */ public boolean classIsMissing() throws Exception { if (classIndex() < 0) { throw new Exception("Class is not set!"); } return isMissing(classIndex()); } /** * Returns an instance's class value in internal format. (ie. as a * floating-point number) * * @return the corresponding value as a double (If the * corresponding attribute is nominal (or a string) then it returns the * value's index as a double). * @throws Exception * @exception UnassignedClassException if the class is not set or the instance doesn't * have access to a dataset */ public double classValue() throws Exception { if (classIndex() < 0) { throw new Exception("Class is not set!"); } return value(classIndex()); } /** * Produces a shallow copy of this instance. The copy has * access to the same dataset. (if you want to make a copy * that doesn't have access to the dataset, use * <code>new M5Instace(instance)</code> * * @return the shallow copy */ public Object copy() { M5Instance result = new M5Instance(this); result.m_Dataset = m_Dataset; return result; } /** * Returns the dataset this instance has access to. (ie. obtains * information about attribute types from) Null if the instance * doesn't have access to a dataset. * * @return the dataset the instance has accesss to */ public M5Instances dataset() { return m_Dataset; } /** * Deletes an attribute at the given position (0 to * numAttributes() - 1). Only succeeds if the instance does not * have access to any dataset because otherwise inconsistencies * could be introduced. * * @param position the attribute's position * @exception RuntimeException if the instance has access to a * dataset */ public void deleteAttributeAt(int position) { if (m_Dataset != null) { throw new RuntimeException("M5Instace has access to a dataset!"); } forceDeleteAttributeAt(position); } /** * Returns an enumeration of all the attributes. * * @return enumeration of all the attributes * @throws Exception * @exception UnassignedDatasetException if the instance doesn't * have access to a dataset */ public Enumeration enumerateAttributes() throws Exception { if (m_Dataset == null) { throw new Exception("M5Instace doesn't have access to a dataset!"); } return m_Dataset.enumerateAttributes(); } /** * Tests if the headers of two instances are equivalent. * * @param inst another instance * @return true if the header of the given instance is * equivalent to this instance's header * @throws Exception * @exception UnassignedDatasetException if instance doesn't have access to any * dataset */ public boolean equalHeaders(M5Instance inst) throws Exception { if (m_Dataset == null) { throw new Exception("M5Instace doesn't have access to a dataset!"); } return m_Dataset.equalHeaders(inst.m_Dataset); } /** * Returns the index of the attribute stored at the given position. * Just returns the given value. * * @param position the position * @return the index of the attribute stored at the given position */ public int index(int position) { return position; } /** * Inserts an attribute at the given position (0 to * numAttributes()). Only succeeds if the instance does not * have access to any dataset because otherwise inconsistencies * could be introduced. * * @param position the attribute's position */ public void insertAttributeAt(int position) { if (m_Dataset != null) { throw new RuntimeException("M5Instace has accesss to a dataset!"); } if ((position < 0) || (position > numAttributes())) { throw new IllegalArgumentException( "Can't insert attribute: index out " + "of range"); } forceInsertAttributeAt(position); } /** * Tests if a specific value is "missing". * * @param attIndex the attribute's index */ public boolean isMissing(int attIndex) { if (Double.isNaN(m_AttValues[attIndex])) { return true; } return false; } /** * Tests if a specific value is "missing". Does * the same thing as isMissing() if applied to an Instance. * * @param indexOfIndex the index of the attribute's index */ public boolean isMissingSparse(int indexOfIndex) { if (Double.isNaN(m_AttValues[indexOfIndex])) { return true; } return false; } /** * Tests if a specific value is "missing". * The given attribute has to belong to a dataset. * * @param att the attribute */ public boolean isMissing(M5Attribute att) { return isMissing(att.index()); } /** * Tests if the given value codes "missing". * * @param val the value to be tested * @return true if val codes "missing" */ public static boolean isMissingValue(double val) { return Double.isNaN(val); } /** * Merges this instance with the given instance and returns * the result. Dataset is set to null. * * @param inst the instance to be merged with this one * @return the merged instances */ public M5Instance mergeInstance(M5Instance inst) { int m = 0; double[] newVals = new double[numAttributes() + inst.numAttributes()]; for (int j = 0; j < numAttributes(); j++, m++) { newVals[m] = value(j); } for (int j = 0; j < inst.numAttributes(); j++, m++) { newVals[m] = inst.value(j); } return new M5Instance(1.0, newVals); } /** * Returns the double that codes "missing". * * @return the double that codes "missing" */ public static double missingValue() { return MISSING_VALUE; } /** * Returns the number of attributes. * * @return the number of attributes as an integer */ public int numAttributes() { return m_AttValues.length; } /** * Returns the number of class labels. * * @return the number of class labels as an integer if the * class attribute is nominal, 1 otherwise. * @throws Exception * @exception UnassignedDatasetException if instance doesn't have access to any * dataset */ public int numClasses() throws Exception { if (m_Dataset == null) { throw new Exception("M5Instace doesn't have access to a dataset!"); } return m_Dataset.numClasses(); } /** * Returns the number of values present. Always the same as numAttributes(). * * @return the number of values */ public int numValues() { return m_AttValues.length; } /** * Replaces all missing values in the instance with the * values contained in the given array. A deep copy of * the vector of attribute values is performed before the * values are replaced. * * @param array containing the means and modes * @exception IllegalArgumentException if numbers of attributes are unequal */ public void replaceMissingValues(double[] array) { if ((array == null) || (array.length != m_AttValues.length)) { throw new IllegalArgumentException("Unequal number of attributes!"); } freshAttributeVector(); for (int i = 0; i < m_AttValues.length; i++) { if (isMissing(i)) { m_AttValues[i] = array[i]; } } } /** * Sets the class value of an instance to be "missing". A deep copy of * the vector of attribute values is performed before the * value is set to be missing. * @throws Exception * * @exception UnassignedClassException if the class is not set * @exception UnassignedDatasetException if the instance doesn't * have access to a dataset */ public void setClassMissing() throws Exception { if (classIndex() < 0) { throw new Exception("Class is not set!"); } setMissing(classIndex()); } /** * Sets the class value of an instance to the given value (internal * floating-point format). A deep copy of the vector of attribute * values is performed before the value is set. * * @param value the new attribute value (If the corresponding * attribute is nominal (or a string) then this is the new value's * index as a double). * @throws Exception * @exception UnassignedClassException if the class is not set * @exception UnaddignedDatasetException if the instance doesn't * have access to a dataset */ public void setClassValue(double value) throws Exception { if (classIndex() < 0) { throw new Exception("Class is not set!"); } setValue(classIndex(), value); } /** * Sets the class value of an instance to the given value. A deep * copy of the vector of attribute values is performed before the * value is set. * * @param value the new class value (If the class * is a string attribute and the value can't be found, * the value is added to the attribute). * @throws Exception * @exception UnassignedClassException if the class is not set * @exception UnassignedDatasetException if the dataset is not set * @exception IllegalArgumentException if the attribute is not * nominal or a string, or the value couldn't be found for a nominal * attribute */ public final void setClassValue(String value) throws Exception { if (classIndex() < 0) { throw new Exception("Class is not set!"); } setValue(classIndex(), value); } /** * Sets the reference to the dataset. Does not check if the instance * is compatible with the dataset. Note: the dataset does not know * about this instance. If the structure of the dataset's header * gets changed, this instance will not be adjusted automatically. * * @param instances the reference to the dataset */ public final void setDataset(M5Instances instances) { m_Dataset = instances; } /** * Sets a specific value to be "missing". Performs a deep copy * of the vector of attribute values before the value is set to * be missing. * * @param attIndex the attribute's index */ public final void setMissing(int attIndex) { setValue(attIndex, MISSING_VALUE); } /** * Sets a specific value to be "missing". Performs a deep copy * of the vector of attribute values before the value is set to * be missing. The given attribute has to belong to a dataset. * * @param att the attribute */ public final void setMissing(M5Attribute att) { setMissing(att.index()); } /** * Sets a specific value in the instance to the given value * (internal floating-point format). Performs a deep copy * of the vector of attribute values before the value is set. * * @param attIndex the attribute's index * @param value the new attribute value (If the corresponding * attribute is nominal (or a string) then this is the new value's * index as a double). */ public void setValue(int attIndex, double value) { freshAttributeVector(); m_AttValues[attIndex] = value; } /** * Sets a specific value in the instance to the given value * (internal floating-point format). Performs a deep copy * of the vector of attribute values before the value is set. * Does exactly the same thing as setValue(). * * @param indexOfIndex the index of the attribute's index * @param value the new attribute value (If the corresponding * attribute is nominal (or a string) then this is the new value's * index as a double). */ public void setValueSparse(int indexOfIndex, double value) { freshAttributeVector(); m_AttValues[indexOfIndex] = value; } /** * Sets a value of a nominal or string attribute to the given * value. Performs a deep copy of the vector of attribute values * before the value is set. * * @param attIndex the attribute's index * @param value the new attribute value (If the attribute * is a string attribute and the value can't be found, * the value is added to the attribute). * @throws Exception * @exception UnassignedDatasetException if the dataset is not set * @exception IllegalArgumentException if the selected * attribute is not nominal or a string, or the supplied value couldn't * be found for a nominal attribute */ public final void setValue(int attIndex, String value) throws Exception { int valIndex; if (m_Dataset == null) { throw new Exception("M5Instace doesn't have access to a dataset!"); } if (!attribute(attIndex).isNominal() && !attribute(attIndex).isString()) { throw new IllegalArgumentException( "Attribute neither nominal nor string!"); } valIndex = attribute(attIndex).indexOfValue(value); if (valIndex == -1) { if (attribute(attIndex).isNominal()) { throw new IllegalArgumentException( "Value not defined for given nominal attribute!"); } else { attribute(attIndex).forceAddValue(value); valIndex = attribute(attIndex).indexOfValue(value); } } setValue(attIndex, (double) valIndex); } /** * Sets a specific value in the instance to the given value * (internal floating-point format). Performs a deep copy of the * vector of attribute values before the value is set, so if you are * planning on calling setValue many times it may be faster to * create a new instance using toDoubleArray. The given attribute * has to belong to a dataset. * * @param att the attribute * @param value the new attribute value (If the corresponding * attribute is nominal (or a string) then this is the new value's * index as a double). */ public final void setValue(M5Attribute att, double value) { setValue(att.index(), value); } /** * Sets a value of an nominal or string attribute to the given * value. Performs a deep copy of the vector of attribute values * before the value is set, so if you are planning on calling setValue many * times it may be faster to create a new instance using toDoubleArray. * The given attribute has to belong to a dataset. * * @param att the attribute * @param value the new attribute value (If the attribute * is a string attribute and the value can't be found, * the value is added to the attribute). * @exception IllegalArgumentException if the the attribute is not * nominal or a string, or the value couldn't be found for a nominal * attribute */ public final void setValue(M5Attribute att, String value) { if (!att.isNominal() && !att.isString()) { throw new IllegalArgumentException( "Attribute neither nominal nor string!"); } int valIndex = att.indexOfValue(value); if (valIndex == -1) { if (att.isNominal()) { throw new IllegalArgumentException( "Value not defined for given nominal attribute!"); } else { att.forceAddValue(value); valIndex = att.indexOfValue(value); } } setValue(att.index(), (double) valIndex); } /** * Sets the weight of an instance. * * @param weight the weight */ public final void setWeight(double weight) { m_Weight = weight; } /** * Returns the value of a nominal (or string) attribute * for the instance. * * @param attIndex the attribute's index * @return the value as a string * @throws Exception * @exception IllegalArgumentException if the attribute is not a nominal * (or string) attribute. * @exception UnassignedDatasetException if the instance doesn't belong * to a dataset. */ public final String stringValue(int attIndex) throws Exception { if (m_Dataset == null) { throw new Exception("M5Instace doesn't have access to a dataset!"); } if (!m_Dataset.attribute(attIndex).isNominal() && !m_Dataset.attribute(attIndex).isString()) { throw new IllegalArgumentException( "Attribute neither nominal nor string!"); } return m_Dataset.attribute(attIndex). value((int) value(attIndex)); } /** * Returns the value of a nominal (or string) attribute * for the instance. * * @param att the attribute * @return the value as a string * @throws Exception * @exception IllegalArgumentException if the attribute is not a nominal * (or string) attribute. * @exception UnassignedDatasetException if the instance doesn't belong * to a dataset. */ public final String stringValue(M5Attribute att) throws Exception { return stringValue(att.index()); } /** * Returns the values of each attribute as an array of doubles. * * @return an array containing all the instance attribute values */ public double[] toDoubleArray() { double[] newValues = new double[m_AttValues.length]; System.arraycopy(m_AttValues, 0, newValues, 0, m_AttValues.length); return newValues; } /** * Returns the description of one instance. If the instance * doesn't have access to a dataset, it returns the internal * floating-point values. Quotes string * values that contain whitespace characters. * * @return the instance's description as a string */ public String toString() { StringBuffer text = new StringBuffer(); for (int i = 0; i < m_AttValues.length; i++) { if (i > 0) { text.append(","); } try { text.append(toString(i)); } catch (Exception e) { e.printStackTrace(); } } return text.toString(); } /** * Returns the description of one value of the instance as a * string. If the instance doesn't have access to a dataset, it * returns the internal floating-point value. Quotes string * values that contain whitespace characters, or if they * are a question mark. * * @param attIndex the attribute's index * @return the value's description as a string * @throws Exception */ public final String toString(int attIndex) throws Exception { StringBuffer text = new StringBuffer(); if (isMissing(attIndex)) { text.append("?"); } else { if (m_Dataset == null) { text.append(M5StaticUtils.doubleToString(m_AttValues[attIndex], 6)); } else { if (m_Dataset.attribute(attIndex).isNominal() || m_Dataset.attribute(attIndex).isString()) { text.append(M5StaticUtils.quote(stringValue(attIndex))); } else { text.append(M5StaticUtils.doubleToString(value(attIndex), 6)); } } } return text.toString(); } /** * Returns the description of one value of the instance as a * string. If the instance doesn't have access to a dataset it * returns the internal floating-point value. Quotes string * values that contain whitespace characters, or if they * are a question mark. * The given attribute has to belong to a dataset. * * @param att the attribute * @return the value's description as a string * @throws Exception */ public final String toString(M5Attribute att) throws Exception { return toString(att.index()); } /** * Returns an instance's attribute value in internal format. * * @param attIndex the attribute's index * @return the specified value as a double (If the corresponding * attribute is nominal (or a string) then it returns the value's index as a * double). */ public double value(int attIndex) { return m_AttValues[attIndex]; } /** * Returns an instance's attribute value in internal format. * Does exactly the same thing as value() if applied to an M5Instace. * * @param indexOfIndex the index of the attribute's index * @return the specified value as a double (If the corresponding * attribute is nominal (or a string) then it returns the value's index as a * double). */ public double valueSparse(int indexOfIndex) { return m_AttValues[indexOfIndex]; } /** * Returns an instance's attribute value in internal format. * The given attribute has to belong to a dataset. * * @param att the attribute * @return the specified value as a double (If the corresponding * attribute is nominal (or a string) then it returns the value's index as a * double). */ public double value(M5Attribute att) { return value(att.index()); } /** * Returns the instance's weight. * * @return the instance's weight as a double */ public final double weight() { return m_Weight; } /** * Deletes an attribute at the given position (0 to * numAttributes() - 1). * * @param pos the attribute's position */ void forceDeleteAttributeAt(int position) { double[] newValues = new double[m_AttValues.length - 1]; System.arraycopy(m_AttValues, 0, newValues, 0, position); if (position < m_AttValues.length - 1) { System.arraycopy(m_AttValues, position + 1, newValues, position, m_AttValues.length - (position + 1)); } m_AttValues = newValues; } /** * Inserts an attribute at the given position * (0 to numAttributes()) and sets its value to be missing. * * @param pos the attribute's position */ void forceInsertAttributeAt(int position) { double[] newValues = new double[m_AttValues.length + 1]; System.arraycopy(m_AttValues, 0, newValues, 0, position); newValues[position] = MISSING_VALUE; System.arraycopy(m_AttValues, position, newValues, position + 1, m_AttValues.length - position); m_AttValues = newValues; } /** * Private constructor for subclasses. Does nothing. */ protected M5Instance() { } /** * Clones the attribute vector of the instance and * overwrites it with the clone. */ private void freshAttributeVector() { m_AttValues = toDoubleArray(); } }