/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.example; import java.io.Serializable; import java.util.Date; import java.util.Iterator; import com.rapidminer.example.table.DataRow; import com.rapidminer.example.table.ExampleTable; import com.rapidminer.example.table.NumericalAttribute; import com.rapidminer.example.table.SparseFormatDataRowReader; import com.rapidminer.operator.similarity.attributebased.uncertain.ProbabilityDensityFunction; import com.rapidminer.tools.Ontology; import de.tud.inf.example.set.attributevalues.AbstractMatrixValue; import de.tud.inf.example.set.attributevalues.ComplexValue; import de.tud.inf.example.set.attributevalues.DataMapValue; import de.tud.inf.example.set.attributevalues.MapValue; import de.tud.inf.example.table.ComplexAttribute; import de.tud.inf.example.table.DataMapAttribute; import de.tud.inf.example.table.MapAttribute; import de.tud.inf.example.table.MatrixAttribute; import de.tud.inf.example.table.UncertainAttribute; /** * An example consists of a DataRow and some convenience methods to access the data. * Hence, all values are actually doubles, symbolic values are * mapped to integers stored in doubles.<br> * Since {@link ExampleSet}s are only a view on {@link ExampleTable}s, * Examples are generated on the fly by {@link ExampleReader}s. Since they only * contain the currently selected attributes operators need not to consider * attribute selections or example subsets (samplings). * * @author Ingo Mierswa * @version $Id: Example.java,v 1.8 2008/05/28 10:52:03 ingomierswa Exp $ */ public class Example implements Serializable { private static final long serialVersionUID = 7761687908683290928L; /** Separator used in the getAttributesAsString() method (tab). */ public static final String SEPARATOR = " "; /** Separates indices from values in sparse format (colon). */ public static final String SPARSE_SEPARATOR = ":"; /** The data for this example. */ private DataRow data; /** The parent example set holding all attribute information for this data row. */ private ExampleSet parentExampleSet; /** * Creates a new Example that uses the data stored in a DataRow. The attributes * correspond to the regular and special attributes. */ public Example(DataRow data, ExampleSet parentExampleSet) { this.data = data; this.parentExampleSet = parentExampleSet; } /** Returns the data row which backs up the example in the example table. */ public DataRow getDataRow() { return this.data; } /** Delivers the attributes. */ public Attributes getAttributes() { return this.parentExampleSet.getAttributes(); } // -------------------------------------------------------------------------------- /** * Returns the value of attribute a. In the case of nominal attributes, the delivered double value * corresponds to an internal index */ public double getValue(Attribute a) { return data.get(a); } /** Returns the nominal value for the given attribute. * * @throws AttributeTypeException if the given attribute has the wrong value type */ public String getNominalValue(Attribute a) { if (!a.isNominal()) { throw new AttributeTypeException("Extraction of nominal example value for non-nominal attribute '" + a.getName() + "' is not possible."); } double value = getValue(a); if (Double.isNaN(value)) return Attribute.MISSING_NOMINAL_VALUE; else return a.getMapping().mapIndex((int)value); } /** Returns the numerical value for the given attribute. * * @throws AttributeTypeException if the given attribute has the wrong value type */ public double getNumericalValue(Attribute a) { if (!a.isNumerical()) { throw new AttributeTypeException("Extraction of numerical example value for non-numerical attribute '" + a.getName() + "' is not possible."); } return getValue(a); } /** Returns the date value for the given attribute. * * @throws AttributeTypeException if the given attribute has the wrong value type */ public Date getDateValue(Attribute a) { if (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(a.getValueType(), Ontology.DATE_TIME)) { throw new AttributeTypeException("Extraction of date example value for non-date attribute '" + a.getName() + "' is not possible."); } return new Date((long)getValue(a)); } /** * get complex value as complex value object * @param a * @return */ public ComplexValue getComplexValue(Attribute a){ if (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(a.getValueType(), Ontology.COMPLEX_VALUE)) { throw new AttributeTypeException("Extraction of complex example value for non-uncertain attribute '" + a.getName() + "' is not possible."); } return ((ComplexAttribute)a).getComplexValue(this.getDataRow()); } /** * get complex value as array of inner attribute values * @param a * @return */ public double[] getComplexValueAsArray(Attribute a){ if (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(a.getValueType(), Ontology.COMPLEX_VALUE)) { throw new AttributeTypeException("Extraction of complex example value for non-uncertain attribute '" + a.getName() + "' is not possible."); } return ((ComplexAttribute)a).getComplexValueAsArray(this.getDataRow()); } /** * * @param Attribute a * @return Abstract pdf */ public ProbabilityDensityFunction getUncertainValue(Attribute a){ if (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(a.getValueType(), Ontology.UNCERTAIN)) { throw new AttributeTypeException("Extraction of uncertain example value for non-uncertain attribute '" + a.getName() + "' is not possible."); } return ((UncertainAttribute)a).getComplexValue(this.getDataRow()); } public AbstractMatrixValue getMatrixValue(Attribute a){ if (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(a.getValueType(), Ontology.MATRIX)) { throw new AttributeTypeException("Extraction of matrix example value for non-matrix attribute '" + a.getName() + "' is not possible."); } return ((MatrixAttribute)a).getComplexValue(this.getDataRow()); } public MapValue getMapValue(Attribute a){ if (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(a.getValueType(), Ontology.MAP)) throw new AttributeTypeException("Extraction of map example value for non-map attribute '" + a.getName() + "' is not possible."); return ((MapAttribute)a).getComplexValue(this.getDataRow()); } public double[][] getRelativeValue(Attribute a){ if(a.isRelational()) return data.getRelativeValuesFor(a.getTableIndex()); else throw new AttributeTypeException("Extraction of relational example value for non-relational attribute '" + a.getName() + "' is not possible."); } public DataMapValue getDataMapValue(Attribute a){ if (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(a.getValueType(), Ontology.DATA_MAP)) throw new AttributeTypeException("Extraction of dataMap example value for non-dataMap attribute '" + a.getName() + "' is not possible."); return ((DataMapAttribute)a).getComplexValue(this.getDataRow()); } /** * Sets the value of attribute a. The attribute a need not necessarily be * part of the example set the example is taken from, although this is no * good style. */ public void setValue(Attribute a, double value) { data.set(a, value); } /** * adds a complex value to an exampleSet * @param a * @param cv */ public void setComplexValue(ComplexAttribute a, ComplexValue cv){ data.set(a, cv); } /** * Sets the value of attribute a which must be a nominal attribute. The * attribute a need not necessarily be part of the example set the example * is taken from, although this is no good style. */ public void setValue(Attribute a, String str) { if (!a.isNominal()) throw new AttributeTypeException("setValue(Attribute, String) only supported for nominal values!"); if (str != null) setValue(a, a.getMapping().mapString(str)); else setValue(a, Double.NaN); } /** Returns true if both nominal values are the same (if both attributes * are nominal) or if both real values are the same (if both attributes * are real values) or false otherwise. */ public boolean equalValue(Attribute first, Attribute second) { if (first.isNominal() && second.isNominal()) { return getValueAsString(first).equals(getValueAsString(second)); } else if ((!first.isNominal()) && (!second.isNominal())) { return com.rapidminer.tools.Tools.isEqual(getValue(first), getValue(second)); } else { return false; } } // --------------------------------------------------------------------------------- public double getLabel() { return getValue(getAttributes().getLabel()); } public void setLabel(double value) { setValue(getAttributes().getLabel(), value); } public double getPredictedLabel() { return getValue(getAttributes().getPredictedLabel()); } public void setPredictedLabel(double value) { setValue(getAttributes().getPredictedLabel(), value); } public double getId() { return getValue(getAttributes().getId()); } public void setId(double value) { setValue(getAttributes().getId(), value); } public double getWeight() { return getValue(getAttributes().getWeight()); } public void setWeight(double value) { setValue(getAttributes().getWeight(), value); } public double getConfidence(String classValue) { return getValue(getAttributes().getSpecial(Attributes.CONFIDENCE_NAME + "_" + classValue)); } public void setConfidence(String classValue, double confidence) { setValue(getAttributes().getSpecial(Attributes.CONFIDENCE_NAME + "_" + classValue), confidence); } // -------------------------------------------------------------------------------- /** * <p>Returns the value of this attribute as string representation, i.e. the * number as string for numerical attributes and the correctly mapped * categorical value for nominal values. The used number of fraction * digits is unlimited * (see {@link NumericalAttribute#DEFAULT_NUMBER_OF_DIGITS}). * Nominal values containing whitespaces will not be quoted.</p> * * <p>Please note that this method should not be used in order to get the * nominal values, please use * {@link #getNominalValue(Attribute)} instead.</p> */ public String getValueAsString(Attribute attribute) { return getValueAsString(attribute, NumericalAttribute.UNLIMITED_NUMBER_OF_DIGITS, false); } /** * <p>Returns the value of this attribute as string representation, i.e. the * number as string for numerical attributes and the correctly mapped * categorical value for nominal values. If the value is numerical the given * number of fraction digits is used. If the value is numerical, the given * number of fraction digits is used. This value must be either one out of * {@link NumericalAttribute#DEFAULT_NUMBER_OF_DIGITS} or * {@link NumericalAttribute#UNLIMITED_NUMBER_OF_DIGITS} or a number greater * or equal to 0. The boolean flag indicates if nominal values containing * whitespaces should be quoted with double quotes.</p> * * <p>Please note that this method should not be used in order to get the * nominal values, please use * {@link #getNominalValue(Attribute)} instead.</p> */ public String getValueAsString(Attribute attribute, int fractionDigits, boolean quoteWhitespace) { double value = getValue(attribute); return attribute.getAsString(value, fractionDigits, quoteWhitespace); } public String getComplexValueAsString(Attribute attribute, int fractionDigits, boolean quoteWhitespace) { ComplexValue value = getComplexValue(attribute); return ((ComplexAttribute)attribute).getAsString(value, fractionDigits, quoteWhitespace); } /** Returns a dense string representation with all possible fraction digits. * Nominal values containing whitespaces will be quoted with double * quotes. */ public String toString() { return toDenseString(NumericalAttribute.UNLIMITED_NUMBER_OF_DIGITS, true); } /** * This method returns a dense string representation of the example. It * first returns the values of all special attributes and then the values of * all regular attributes. */ public String toDenseString(int fractionDigits, boolean quoteWhitespace) { StringBuffer result = new StringBuffer(); Iterator<Attribute> a = getAttributes().allAttributes(); while (a.hasNext()) { result.append(getValueAsString(a.next(), fractionDigits, quoteWhitespace) + SEPARATOR); } return result.toString(); } /** * Returns regular and some special attributes (label, id, and example * weight) in sparse format. * * @param format * one of the formats specified in * {@link SparseFormatDataRowReader} */ public String toSparseString(int format, int fractionDigits, boolean quoteWhitespace) { StringBuffer str = new StringBuffer(); // label Attribute labelAttribute = getAttributes().getSpecial(Attributes.LABEL_NAME); if ((format == SparseFormatDataRowReader.FORMAT_YX) && (labelAttribute != null)) { str.append(getValueAsString(labelAttribute, fractionDigits, quoteWhitespace) + " "); } // id Attribute idAttribute = getAttributes().getSpecial(Attributes.ID_NAME); if (idAttribute != null) { str.append("id:" + getValueAsString(idAttribute, fractionDigits, quoteWhitespace) + " "); } // weight Attribute weightAttribute = getAttributes().getSpecial(Attributes.WEIGHT_NAME); if (weightAttribute != null) { str.append("w:" + getValueAsString(weightAttribute, fractionDigits, quoteWhitespace) + " "); } // attributes str.append(getAttributesAsSparseString(SEPARATOR, SPARSE_SEPARATOR, fractionDigits, quoteWhitespace) + " "); // label (format xy & prefix) if ((format == SparseFormatDataRowReader.FORMAT_PREFIX) && (labelAttribute != null)) { str.append("l:" + getValueAsString(labelAttribute, fractionDigits, quoteWhitespace)); } if ((format == SparseFormatDataRowReader.FORMAT_XY) && (labelAttribute != null)) { str.append(getValueAsString(labelAttribute, fractionDigits, quoteWhitespace)); } return str.toString(); } /** * Returns the attribute values in the format <br> * <center>index:value index:value</center><br> * Index starts with 1. * * @param separator * separates attributes * @param indexValueSeparator * separates index and value. * @param fractionDigits the number of fraction digits used, if -1 all possible digits are used */ /* pp */ String getAttributesAsSparseString(String separator, String indexValueSeparator, int fractionDigits, boolean quoteWhitespace) { StringBuffer str = new StringBuffer(); boolean first = true; int counter = 1; for (Attribute attribute : getAttributes()) { double value = getValue(attribute); if (!Tools.isDefault(attribute.getDefault(), value)) { if (!first) { str.append(separator); } first = false; str.append(counter + indexValueSeparator + getValueAsString(attribute, fractionDigits, quoteWhitespace)); } counter++; } return str.toString(); } }