/* * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* * Instance.java * Copyright (C) 1999-2012 University of Waikato, Hamilton, New Zealand * */ package weka.core; import java.util.Enumeration; /** * Interface representing an instance. All values (numeric, date, nominal, * string or relational) are internally stored as floating-point numbers in the * original concrete class implementations (now called DenseInstance.java and * SparseInstance.java), and the methods in this interface reflect this. If an * attribute is nominal (or a string or relational), the stored value is the * index of the corresponding nominal (or string or relational) value in the * attribute's definition. We have chosen this approach in favor of a more * elegant object-oriented approach because it is much faster. * <p> * * Typical usage (code from the main() method of this class): * <p> * * <code> * ... <br> * * // Create empty instance with three attribute values <br> * Instance inst = new DenseInstance(3); <br><br> * * // Set instance's values for the attributes "length", "weight", and "position"<br> * inst.setValue(length, 5.3); <br> * inst.setValue(weight, 300); <br> * inst.setValue(position, "first"); <br><br> * * // Set instance's dataset to be the dataset "race" <br> * inst.setDataset(race); <br><br> * * // Print the instance <br> * System.out.println("The instance: " + inst); <br> * * ... <br> * </code> * <p> * * All methods that change an instance's attribute values must be safe, ie. a * change of an instance's attribute values must not affect any other instances. * * @author Eibe Frank (eibe@cs.waikato.ac.nz) * @version $Revision: 9028 $ */ public interface Instance extends Copyable { /** * Returns the attribute with the given index. * * @param index the attribute's index * @return the attribute at the given position * @throws UnassignedDatasetException if instance doesn't have access to a * dataset */ public Attribute attribute(int index); /** * Returns the attribute with the given index in the sparse representation. * Same as attribute(int) for a DenseInstance. * * @param indexOfIndex the index of the attribute's index * @return the attribute at the given position * @throws UnassignedDatasetException if instance doesn't have access to a * dataset */ public Attribute attributeSparse(int indexOfIndex); /** * Returns class attribute. * * @return the class attribute * @throws UnassignedDatasetException if the class is not set or the instance * doesn't have access to a dataset */ public Attribute classAttribute(); /** * Returns the class attribute's index. * * @return the class index as an integer * @throws UnassignedDatasetException if instance doesn't have access to a * dataset */ public int classIndex(); /** * Tests if an instance's class is missing. * * @return true if the instance's class is missing * @throws UnassignedClassException if the class is not set or the instance * doesn't have access to a dataset */ public boolean classIsMissing(); /** * Returns an instance's class value as a floating-point number. * * @return the corresponding value as a double (If the corresponding attribute * is nominal (or a string) then it returns the value's index as a * double). * @throws UnassignedClassException if the class is not set or the instance * doesn't have access to a dataset */ public double classValue(); /** * Returns the dataset this instance has access to. (ie. obtains information * about attribute types from) Null if the instance doesn't have access to a * dataset. * * @return the dataset the instance has accesss to */ public Instances dataset(); /** * Deletes an attribute at the given position (0 to numAttributes() - 1). Only * succeeds if the instance does not have access to any dataset because * otherwise inconsistencies could be introduced. * * @param position the attribute's position * @throws RuntimeException if the instance has access to a dataset */ public void deleteAttributeAt(int position); /** * Returns an enumeration of all the attributes. * * @return enumeration of all the attributes * @throws UnassignedDatasetException if the instance doesn't have access to a * dataset */ public Enumeration enumerateAttributes(); /** * Tests if the headers of two instances are equivalent. * * @param inst another instance * @return true if the header of the given instance is equivalent to this * instance's header * @throws UnassignedDatasetException if instance doesn't have access to any * dataset */ public boolean equalHeaders(Instance inst); /** * Checks if the headers of two instances are equivalent. If not, then returns * a message why they differ. * * @param dataset another instance * @return null if the header of the given instance is equivalent to this * instance's header, otherwise a message with details on why they * differ */ public String equalHeadersMsg(Instance inst); /** * Tests whether an instance has a missing value. Skips the class attribute if * set. * * @return true if instance has a missing value. * @throws UnassignedDatasetException if instance doesn't have access to any * dataset */ public boolean hasMissingValue(); /** * Returns the index of the attribute stored at the given position in the * sparse representation. Identify function for an instance of type * DenseInstance. * * @param position the position * @return the index of the attribute stored at the given position */ public int index(int position); /** * Inserts an attribute at the given position (0 to numAttributes()). Only * succeeds if the instance does not have access to any dataset because * otherwise inconsistencies could be introduced. * * @param position the attribute's position * @throws RuntimeException if the instance has accesss to a dataset * @throws IllegalArgumentException if the position is out of range */ public void insertAttributeAt(int position); /** * Tests if a specific value is "missing". * * @param attIndex the attribute's index * @return true if the value is "missing" */ public boolean isMissing(int attIndex); /** * Tests if a specific value is "missing" in the sparse representation. Samse * as isMissing(int) for a DenseInstance. * * @param indexOfIndex the index of the attribute's index * @return true if the value is "missing" */ public boolean isMissingSparse(int indexOfIndex); /** * Tests if a specific value is "missing". The given attribute has to belong * to a dataset. * * @param att the attribute * @return true if the value is "missing" */ public boolean isMissing(Attribute att); /** * Merges this instance with the given instance and returns the result. * Dataset is set to null. The returned instance is of the same type as this * instance. * * @param inst the instance to be merged with this one * @return the merged instances */ public Instance mergeInstance(Instance inst); /** * Returns the number of attributes. * * @return the number of attributes as an integer */ public int numAttributes(); /** * Returns the number of class labels. * * @return the number of class labels as an integer if the class attribute is * nominal, 1 otherwise. * @throws UnassignedDatasetException if instance doesn't have access to any * dataset */ public int numClasses(); /** * Returns the number of values present in a sparse representation. * * @return the number of values */ public int numValues(); /** * Replaces all missing values in the instance with the values contained in * the given array. A deep copy of the vector of attribute values is performed * before the values are replaced. * * @param array containing the means and modes * @throws IllegalArgumentException if numbers of attributes are unequal */ public void replaceMissingValues(double[] array); /** * Sets the class value of an instance to be "missing". A deep copy of the * vector of attribute values is performed before the value is set to be * missing. * * @throws UnassignedClassException if the class is not set * @throws UnassignedDatasetException if the instance doesn't have access to a * dataset */ public void setClassMissing(); /** * Sets the class value of an instance to the given value (internal * floating-point format). A deep copy of the vector of attribute values is * performed before the value is set. * * @param value the new attribute value (If the corresponding attribute is * nominal (or a string) then this is the new value's index as a * double). * @throws UnassignedClassException if the class is not set * @throws UnaddignedDatasetException if the instance doesn't have access to a * dataset */ public void setClassValue(double value); /** * Sets the class value of an instance to the given value. A deep copy of the * vector of attribute values is performed before the value is set. * * @param value the new class value (If the class is a string attribute and * the value can't be found, the value is added to the attribute). * @throws UnassignedClassException if the class is not set * @throws UnassignedDatasetException if the dataset is not set * @throws IllegalArgumentException if the attribute is not nominal or a * string, or the value couldn't be found for a nominal attribute */ public void setClassValue(String value); /** * Sets the reference to the dataset. Does not check if the instance is * compatible with the dataset. Note: the dataset does not know about this * instance. If the structure of the dataset's header gets changed, this * instance will not be adjusted automatically. * * @param instances the reference to the dataset */ public void setDataset(Instances instances); /** * Sets a specific value to be "missing". Performs a deep copy of the vector * of attribute values before the value is set to be missing. * * @param attIndex the attribute's index */ public void setMissing(int attIndex); /** * Sets a specific value to be "missing". Performs a deep copy of the vector * of attribute values before the value is set to be missing. The given * attribute has to belong to a dataset. * * @param att the attribute */ public void setMissing(Attribute att); /** * Sets a specific value in the instance to the given value (internal * floating-point format). Performs a deep copy of the vector of attribute * values before the value is set. * * @param attIndex the attribute's index * @param value the new attribute value (If the corresponding attribute is * nominal (or a string) then this is the new value's index as a * double). */ public void setValue(int attIndex, double value); /** * Sets a specific value in the instance to the given value (internal * floating-point format), given an index into the sparse representation. * Performs a deep copy of the vector of attribute values before the value is * set. Same as setValue(int, double) for a DenseInstance. * * @param indexOfIndex the index of the attribute's index * @param value the new attribute value (If the corresponding attribute is * nominal (or a string) then this is the new value's index as a * double). */ public void setValueSparse(int indexOfIndex, double value); /** * Sets a value of a nominal or string attribute to the given value. Performs * a deep copy of the vector of attribute values before the value is set. * * @param attIndex the attribute's index * @param value the new attribute value (If the attribute is a string * attribute and the value can't be found, the value is added to the * attribute). * @throws UnassignedDatasetException if the dataset is not set * @throws IllegalArgumentException if the selected attribute is not nominal * or a string, or the supplied value couldn't be found for a * nominal attribute */ public void setValue(int attIndex, String value); /** * Sets a specific value in the instance to the given value (internal * floating-point format). Performs a deep copy of the vector of attribute * values before the value is set, so if you are planning on calling setValue * many times it may be faster to create a new instance using toDoubleArray. * The given attribute has to belong to a dataset. * * @param att the attribute * @param value the new attribute value (If the corresponding attribute is * nominal (or a string) then this is the new value's index as a * double). */ public void setValue(Attribute att, double value); /** * Sets a value of an nominal or string attribute to the given value. Performs * a deep copy of the vector of attribute values before the value is set, so * if you are planning on calling setValue many times it may be faster to * create a new instance using toDoubleArray. The given attribute has to * belong to a dataset. * * @param att the attribute * @param value the new attribute value (If the attribute is a string * attribute and the value can't be found, the value is added to the * attribute). * @throws IllegalArgumentException if the the attribute is not nominal or a * string, or the value couldn't be found for a nominal attribute */ public void setValue(Attribute att, String value); /** * Sets the weight of an instance. * * @param weight the weight */ public void setWeight(double weight); /** * Returns the relational value of a relational attribute. * * @param attIndex the attribute's index * @return the corresponding relation as an Instances object * @throws IllegalArgumentException if the attribute is not a relation-valued * attribute * @throws UnassignedDatasetException if the instance doesn't belong to a * dataset. */ public Instances relationalValue(int attIndex); /** * Returns the relational value of a relational attribute. * * @param att the attribute * @return the corresponding relation as an Instances object * @throws IllegalArgumentException if the attribute is not a relation-valued * attribute * @throws UnassignedDatasetException if the instance doesn't belong to a * dataset. */ public Instances relationalValue(Attribute att); /** * Returns the value of a nominal, string, date, or relational attribute for * the instance as a string. * * @param attIndex the attribute's index * @return the value as a string * @throws IllegalArgumentException if the attribute is not a nominal, string, * date, or relation-valued attribute. * @throws UnassignedDatasetException if the instance doesn't belong to a * dataset. */ public String stringValue(int attIndex); /** * Returns the value of a nominal, string, date, or relational attribute for * the instance as a string. * * @param att the attribute * @return the value as a string * @throws IllegalArgumentException if the attribute is not a nominal, string, * date, or relation-valued attribute. * @throws UnassignedDatasetException if the instance doesn't belong to a * dataset. */ public String stringValue(Attribute att); /** * Returns the values of each attribute as an array of doubles. * * @return an array containing all the instance attribute values */ public double[] toDoubleArray(); /** * Returns the description of one instance (without weight appended). If the * instance doesn't have access to a dataset, it returns the internal * floating-point values. Quotes string values that contain whitespace * characters. * * This method is used by getRandomNumberGenerator() in Instances.java in * order to maintain backwards compatibility with weka 3.4. * * @param afterDecimalPoint maximum number of digits after the decimal point * for numeric values * * @return the instance's description as a string */ public String toStringNoWeight(int afterDecimalPoint); /** * Returns the description of one instance (without weight appended). If the * instance doesn't have access to a dataset, it returns the internal * floating-point values. Quotes string values that contain whitespace * characters. * * This method is used by getRandomNumberGenerator() in Instances.java in * order to maintain backwards compatibility with weka 3.4. * * @return the instance's description as a string */ public String toStringNoWeight(); /** * Returns the description of one instance with any numeric values printed at * the supplied maximum number of decimal places. If the instance doesn't have * access to a dataset, it returns the internal floating-point values. Quotes * string values that contain whitespace characters. * * @param afterDecimalPoint the maximum number of digits permitted after the * decimal point for a numeric value * * @return the instance's description as a string */ public String toStringMaxDecimalDigits(int afterDecimalPoint); /** * Returns the description of one value of the instance as a string. If the * instance doesn't have access to a dataset, it returns the internal * floating-point value. Quotes string values that contain whitespace * characters, or if they are a question mark. * * @param attIndex the attribute's index * @param afterDecimalPoint the maximum number of digits permitted after the * decimal point for numeric values * @return the value's description as a string */ public String toString(int attIndex, int afterDecimalPoint); /** * Returns the description of one value of the instance as a string. If the * instance doesn't have access to a dataset, it returns the internal * floating-point value. Quotes string values that contain whitespace * characters, or if they are a question mark. * * @param attIndex the attribute's index * @return the value's description as a string */ public String toString(int attIndex); /** * Returns the description of one value of the instance as a string. If the * instance doesn't have access to a dataset it returns the internal * floating-point value. Quotes string values that contain whitespace * characters, or if they are a question mark. The given attribute has to * belong to a dataset. * * @param att the attribute * @param afterDecimalPoint the maximum number of decimal places to print * @return the value's description as a string */ public String toString(Attribute att, int afterDecimalPoint); /** * Returns the description of one value of the instance as a string. If the * instance doesn't have access to a dataset it returns the internal * floating-point value. Quotes string values that contain whitespace * characters, or if they are a question mark. The given attribute has to * belong to a dataset. * * @param att the attribute * @return the value's description as a string */ public String toString(Attribute att); /** * Returns an instance's attribute value in internal format. * * @param attIndex the attribute's index * @return the specified value as a double (If the corresponding attribute is * nominal (or a string) then it returns the value's index as a * double). */ public double value(int attIndex); /** * Returns an instance's attribute value in internal format, given an index in * the sparse representation. Same as value(int) for a DenseInstance. * * @param indexOfIndex the index of the attribute's index * @return the specified value as a double (If the corresponding attribute is * nominal (or a string) then it returns the value's index as a * double). */ public double valueSparse(int indexOfIndex); /** * Returns an instance's attribute value in internal format. The given * attribute has to belong to a dataset. * * @param att the attribute * @return the specified value as a double (If the corresponding attribute is * nominal (or a string) then it returns the value's index as a * double). */ public double value(Attribute att); /** * Returns the instance's weight. * * @return the instance's weight as a double */ public double weight(); }