/*********************************************************************** This file is part of KEEL-software, the Data Mining tool for regression, classification, clustering, pattern mining and so on. Copyright (C) 2004-2010 F. Herrera (herrera@decsai.ugr.es) L. S�nchez (luciano@uniovi.es) J. Alcal�-Fdez (jalcala@decsai.ugr.es) S. Garc�a (sglopez@ujaen.es) A. Fern�ndez (alberto.fernandez@ujaen.es) J. Luengo (julianlm@decsai.ugr.es) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/ **********************************************************************/ package keel.Algorithms.Genetic_Rule_Learning.M5Rules; /** * Class to manipulate an itemset. */ public class Itemset { /** The dataset which the itemset has access to. */ protected MyDataset dataset; /** Values of the itemset. */ protected double[] values; /** The weight of the itemset. */ protected double weight; /** Constant that represents the missing value. */ protected final static double MISSING_VALUE = Double.NaN; /** Constructor that copies the values and the weight. * * @param itemset The itemset to copy. */ public Itemset( Itemset itemset ) { values = itemset.values; weight = itemset.weight; dataset = null; } /** Constructor that sets the values and the weight. * * @param w The weight. * @param attributeValues The values. */ public Itemset( double w, double[] attributeValues ) { values = attributeValues; weight = w; dataset = null; } /** * Constructor of an instance that sets weight to one, all values to * be missing, and the reference to the dataset to null. (ie. the instance * doesn't have access to information about the attribute types) * * @param numAttributes the size of the instance */ public Itemset(int numAttributes) { values = new double[numAttributes]; for (int i = 0; i < values.length; i++) { values[i] = MISSING_VALUE; } weight = 1; dataset = null; } /** * Returns the index of the attribute stored at the given position. * Just returns the given value. * * @param position the position * @return the index of the attribute stored at the given position */ public int index(int position) { return position; } /** Returns the index of the class attribute. * @return the index of the class attribute. */ public int classIndex() { if ( dataset == null ) { throw new RuntimeException("Itemset doesn't have access to a dataset!"); //System.err.println("dataset in itemset is null"); //return(-1); } else return dataset.getClassIndex(); } /** Function to test if the class attribute is missing. * * @return True if the value of the class attribute is missing. */ public boolean classIsMissing() { if ( classIndex() < 0 ) throw new RuntimeException( "Class is not set." ); else return isMissing( classIndex() ); } /** Returns the index of the value of the class. * @return the index of the value of the class. */ public double getClassValue() { if ( classIndex() < 0 ) { throw new RuntimeException("dataset in itemset is null"); //return (-1); } else return getValue( classIndex() ); } /** * Returns class attribute. * * @return the class attribute * @throws Exception * @exception UnassignedDatasetException if the class is not set or the * instance doesn't have access to a dataset */ public MyAttribute getClassAttribute() throws Exception { if (dataset == null) { throw new Exception("Itemset doesn't have access to a dataset!"); } return dataset.getClassAttribute(); } /** Returns the number of class values. * @return the number of class values. */ public int numClasses() { if ( dataset == null ) { throw new RuntimeException("dataset in itemset is null"); //return (-1); } else return dataset.numClasses(); } /** Returns the attribute with the given index. * @param index the attribute's index * @return the attribute with the given index. */ public MyAttribute getAttribute( int index ) { if ( dataset == null ) { throw new RuntimeException("dataset in itemset is null"); //return null; } else return dataset.getAttribute( index ); } /** Returns the number of attributes. * @return the number of attributes. */ public int numAttributes() { return values.length; } /** Returns the number of values. Always the same as numAttributes(). * @return the number of values. */ public int numValues() { return values.length; } /** * Returns the attribute with the given index. Does the same * thing as attribute(). * * @param indexOfIndex the index of the attribute's index * @return the attribute at the given position * @throws Exception * @exception UnassignedDatasetException if instance doesn't have access to a * dataset */ public MyAttribute attributeSparse(int indexOfIndex) throws Exception { if (dataset == null) { throw new Exception("Itemset doesn't have access to a dataset!"); } return dataset.getAttribute(indexOfIndex); } /** Function to set a value. * * @param value The value. */ public void setClassValue( double value ) { if ( classIndex() < 0 ) throw new RuntimeException( "Class is not set." ); else setValue( classIndex(),value ); } /** Function to set a value. * * @param value The value. * @throws Exception If the index of the class is not set. */ public void setClassValue( String value ) throws Exception { if ( classIndex() < 0 ) throw new RuntimeException( "Class is not set." ); else setValue( classIndex(),value ); } /** Function to set a value. * * @param index The index of the attribute. * @param value The value. */ public void setValue( int index, double value ) { double[] help = new double[values.length]; System.arraycopy( values, 0, help, 0, values.length ); values = help; values[index] = value; } /** * Sets a specific value in the instance to the given value * (internal floating-point format). Performs a deep copy * of the vector of attribute values before the value is set. * Does exactly the same thing as setValue(). * * @param indexOfIndex the index of the attribute's index * @param value the new attribute value (If the corresponding * attribute is nominal (or a string) then this is the new value's * index as a double). */ public void setValueSparse(int indexOfIndex, double value) { double[] help = new double[values.length]; System.arraycopy( values, 0, help, 0, values.length ); values = help; values[indexOfIndex] = value; } /** * Sets a value of a nominal or string attribute to the given * value. Performs a deep copy of the vector of attribute values * before the value is set. * * @param attIndex the attribute's index * @param value the new attribute value (If the attribute * is a string attribute and the value can't be found, * the value is added to the attribute). * @throws Exception * @exception UnassignedDatasetException if the dataset is not set * @exception IllegalArgumentException if the selected * attribute is not nominal or a string, or the supplied value couldn't * be found for a nominal attribute */ public final void setValue(int attIndex, String value) throws Exception { int valIndex; if (dataset == null) { throw new Exception("Itemset doesn't have access to a dataset!"); } if (!getAttribute(attIndex).isDiscret()) { throw new IllegalArgumentException("Attribute neither nominal nor string!"); } valIndex = getAttribute(attIndex).valueIndex(value); if (valIndex == -1) { if (getAttribute(attIndex).isDiscret()) { throw new IllegalArgumentException("Value not defined for given nominal attribute!"); } else { //getAttribute(attIndex).forceAddValue(value); valIndex = getAttribute(attIndex).valueIndex(value); } } setValue(attIndex, (double) valIndex); } /** * Sets a specific value in the instance to the given value * (internal floating-point format). Performs a deep copy of the * vector of attribute values before the value is set, so if you are * planning on calling setValue many times it may be faster to * create a new instance using toDoubleArray. The given attribute * has to belong to a dataset. * * @param att the attribute * @param value the new attribute value (If the corresponding * attribute is nominal (or a string) then this is the new value's * index as a double). */ public final void setValue(MyAttribute att, double value) { setValue(att.index(), value); } /** * Sets a value of an nominal or string attribute to the given * value. Performs a deep copy of the vector of attribute values * before the value is set, so if you are planning on calling setValue many * times it may be faster to create a new instance using toDoubleArray. * The given attribute has to belong to a dataset. * * @param att the attribute * @param value the new attribute value (If the attribute * is a string attribute and the value can't be found, * the value is added to the attribute). * @throws Exception if the the attribute is not * nominal or a string, or the value couldn't be found for a nominal * attribute */ public final void setValue(MyAttribute att, String value) throws Exception{ setValue(att.index(), value); } /** * Returns an instance's attribute value in internal format. * Does exactly the same thing as value() if applied to an Itemset. * * @param indexOfIndex the index of the attribute's index * @return the specified value as a double (If the corresponding * attribute is nominal (or a string) then it returns the value's index as a * double). */ public double getValueSparse(int indexOfIndex) { return values[indexOfIndex]; } /** Returns the value of the given attribute. * @param index the attribute's index * @return the value of the given attribute. */ public double getValue( int index ) { return values[index]; } /** Returns the value of the given attribute. * @param index the attribute's index * @return the value of the given attribute. * @throws Exception if the Itemset doesn't have access to a dataset or * the attribute neither nominal nor string. */ public String getStringValue( int index ) throws Exception { if (dataset == null) { throw new Exception("Itemset doesn't have access to a dataset!"); } if (!dataset.getAttribute(index).isDiscret()) { throw new IllegalArgumentException("Attribute neither nominal nor string!"); } return dataset.getAttribute(index).value(index); } /** Returns the value of the given attribute. * @param att the attribute * @return the value of the given attribute. * @throws Exception if the Itemset doesn't have access to a dataset or * the attribute neither nominal nor string. */ public String getStringValue( MyAttribute att ) throws Exception { return getStringValue(att.index()); } /** Returns the value of the given attribute. * @param att the attribute * @return the value of the given attribute. */ public double getValue(MyAttribute att) { return values[att.index()]; } /** Function to set the weight. * * @param w The weight. */ public final void setWeight( double w ) { weight = w; } /** Returns the itemset weight. * @return the itemset weight. */ public final double getWeight() { return weight; } /** Returns the dataset of this itemset. * @return the dataset of this itemset. */ public MyDataset getDataset() { return dataset; } /** Function to set the dataset. * * @param data The dataset. */ public final void setDataset( MyDataset data ) { dataset = data; } /** Function to check if a value is missing. * * @param index The index of the attribute to check. * * @return True is the value of the attribute is missing. False otherwise. */ public boolean isMissing( int index ) { if ( Double.isNaN( values[index] ) ) return true; else return false; } /** * Tests if a specific value is "missing". * The given attribute has to belong to a dataset. * * @param att the attribute * * @return True is the value of the attribute is missing. False otherwise. */ public boolean isMissing(MyAttribute att) { return isMissing(att.index()); } /** * Tests if a specific value is "missing". Does * the same thing as isMissing() if applied to an Instance. * * @param indexOfIndex the index of the attribute's index * @return True is the value of the attribute is missing. False otherwise. */ public boolean isMissingSparse(int indexOfIndex) { if (Double.isNaN(values[indexOfIndex])) return true; else return false; } /** Function to check if the value given is the missing value. * * @param val The value to check. * * @return True if the value given is the missing value. False otherwise. */ public static boolean isMissingValue( double val ) { return Double.isNaN( val ); } /** Returns the missing value. * @return the missing value. */ public static double getMissingValue() { return MISSING_VALUE; } /** Function to set as missing the class value. * */ public void setClassMissing() { if ( classIndex() < 0 ) throw new RuntimeException( "Class is not set." ); else setMissing( classIndex() ); } /** Function to set a value as missing. * * @param index The index of the attribute. */ public final void setMissing( int index ) { setValue( index, MISSING_VALUE ); } /** * Sets a specific value to be "missing". Performs a deep copy * of the vector of attribute values before the value is set to * be missing. The given attribute has to belong to a dataset. * * @param att the attribute */ public final void setMissing(MyAttribute att) { setMissing(att.index()); } /** Function to copy an itemset. * * @return The itemset created. */ public Object copy() { Itemset result = new Itemset( this ); result.dataset = dataset; return result; } /** * Inserts an attribute at the given position (0 to * numAttributes()). Only succeeds if the instance does not * have access to any dataset because otherwise inconsistencies * could be introduced. * * @param position the attribute's position * @exception RuntimeException if the instance has accesss to a * dataset * @exception IllegalArgumentException if the position is out of range */ public void insertAttributeAt(int position) { if (dataset != null) { throw new RuntimeException("Itemset has accesss to a dataset!"); } if ((position < 0) ||(position > numAttributes())) { throw new IllegalArgumentException("Can't insert attribute: index out of range"); } forceInsertAttributeAt(position); } /** * Merges this instance with the given instance and returns * the result. Dataset is set to null. * * @param inst the instance to be merged with this one * @return the merged instances */ public Itemset mergeInstance(Itemset inst) { int m = 0; double[] newVals = new double[numAttributes() + inst.numAttributes()]; for (int j = 0; j < numAttributes(); j++, m++) { newVals[m] = getValue(j); } for (int j = 0; j < inst.numAttributes(); j++, m++) { newVals[m] = inst.getValue(j); } return new Itemset(1.0, newVals); } /** * Deletes an attribute at the given position (0 to * numAttributes() - 1). Only succeeds if the instance does not * have access to any dataset because otherwise inconsistencies * could be introduced. * * @param position the attribute's position * @exception RuntimeException if the instance has access to a * dataset */ public void deleteAttributeAt(int position) { if (dataset != null) { throw new RuntimeException("Itemset has access to a dataset!"); } forceDeleteAttributeAt(position); } /** * Replaces all missing values in the instance with the * values contained in the given array. A deep copy of * the vector of attribute values is performed before the * values are replaced. * * @param array containing the means and modes * @exception IllegalArgumentException if numbers of attributes are unequal */ public void replaceMissingValues(double[] array) { if ((array == null) || (array.length != values.length)) { throw new IllegalArgumentException("Unequal number of attributes!"); } double[] help = new double[values.length]; System.arraycopy( values, 0, help, 0, values.length ); values = help; for (int i = 0; i < values.length; i++) { if (isMissing(i)) { values[i] = array[i]; } } } /** * Tests if the headers of two instances are equivalent. * * @param inst another instance * @return true if the header of the given instance is * equivalent to this instance's header * @throws Exception * @exception UnassignedDatasetException if instance doesn't have access to any * dataset */ public boolean equalHeaders(Itemset inst) throws Exception { if (dataset == null) { throw new Exception("Itemset doesn't have access to a dataset!"); } return dataset.equalHeaders(inst.dataset); } /** * Returns an enumeration of all the attributes. * * @return enumeration of all the attributes * @throws Exception * @exception UnassignedDatasetException if the instance doesn't * have access to a dataset */ public java.util.Enumeration enumerateAttributes() throws Exception { if (dataset == null) { throw new Exception("M5Instace doesn't have access to a dataset!"); } return dataset.enumerateAttributes(); } /** Function to print the itemset. * @return the string representation of this itemset. */ public String toString() { String result = ""; for ( int i = 0; i < dataset.numAttributes(); i++ ) { MyAttribute att = dataset.getAttribute( i ); if ( att.isContinuous() ) result += att.name() + "=" + values[i] + " "; else result += att.name() + "=" + att.value( (int)values[i] ) + " "; } return result; } /** * Function to print of one value of the itemset. * * @param att the attribute * @return the value's description as a string * @throws Exception */ public final String toString(MyAttribute att) throws Exception { return Integer.toString(att.index()); } /** * Returns the description of one value of the instance as a * string. If the instance doesn't have access to a dataset, it * returns the internal floating-point value. Quotes string * values that contain whitespace characters, or if they * are a question mark. * * @param attIndex the attribute's index * @return the value's description as a string * @throws Exception */ public final String toString(int attIndex) throws Exception { StringBuffer text = new StringBuffer(); if (isMissing(attIndex)) { text.append("?"); } else { if (dataset == null) { text.append(M5StaticUtils.doubleToString(values[attIndex],6)); } else { if (dataset.getAttribute(attIndex).isDiscret()) { text.append(M5StaticUtils.quote(getStringValue(attIndex))); } else { text.append(M5StaticUtils.doubleToString(getValue(attIndex), 6)); } } } return text.toString(); } /** * Deletes an attribute at the given position (0 to * numAttributes() - 1). * * @param position the attribute's position */ private void forceDeleteAttributeAt(int position) { double[] newValues = new double[values.length - 1]; System.arraycopy(values, 0, newValues, 0, position); if (position < values.length - 1) { System.arraycopy(values, position + 1,newValues, position, values.length - (position + 1)); } values = newValues; } /** * Inserts an attribute at the given position * (0 to numAttributes()) and sets its value to be missing. * * @param position the attribute's position */ private void forceInsertAttributeAt(int position) { double[] newValues = new double[values.length + 1]; System.arraycopy(values, 0, newValues, 0, position); newValues[position] = MISSING_VALUE; System.arraycopy(values, position, newValues,position + 1, values.length - position); values = newValues; } protected Itemset(){} }