/* * RapidMiner * * Copyright (C) 2001-2011 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.example; import java.io.Serializable; import java.util.Collection; import java.util.Date; import java.util.HashSet; import java.util.Iterator; import java.util.Map; import java.util.Set; import com.rapidminer.example.table.DataRow; import com.rapidminer.example.table.ExampleTable; import com.rapidminer.example.table.NumericalAttribute; import com.rapidminer.example.table.SparseFormatDataRowReader; import com.rapidminer.tools.Ontology; /** * An example consists of a DataRow and some convenience methods to access the data. * Hence, all values are actually doubles, symbolic values are * mapped to integers stored in doubles.<br> * Since {@link ExampleSet}s are only a view on {@link ExampleTable}s, * Examples are generated on the fly by {@link ExampleReader}s. Since they only * contain the currently selected attributes operators need not to consider * attribute selections or example subsets (samplings). * * @author Ingo Mierswa */ public class Example implements Serializable, Map<String, Object> { private static final long serialVersionUID = 7761687908683290928L; /** Separator used in the getAttributesAsString() method (tab). */ public static final String SEPARATOR = " "; /** Separates indices from values in sparse format (colon). */ public static final String SPARSE_SEPARATOR = ":"; /** The data for this example. */ private DataRow data; /** The parent example set holding all attribute information for this data row. */ private ExampleSet parentExampleSet; /** * Creates a new Example that uses the data stored in a DataRow. The attributes * correspond to the regular and special attributes. */ public Example(DataRow data, ExampleSet parentExampleSet) { this.data = data; this.parentExampleSet = parentExampleSet; } /** Returns the data row which backs up the example in the example table. */ public DataRow getDataRow() { return this.data; } /** Delivers the attributes. */ public Attributes getAttributes() { return this.parentExampleSet.getAttributes(); } // -------------------------------------------------------------------------------- /** * Returns the value of attribute a. In the case of nominal attributes, the delivered double value * corresponds to an internal index */ public double getValue(Attribute a) { return data.get(a); } /** Returns the nominal value for the given attribute. * * @throws AttributeTypeException if the given attribute has the wrong value type */ public String getNominalValue(Attribute a) { if (!a.isNominal()) { throw new AttributeTypeException("Extraction of nominal example value for non-nominal attribute '" + a.getName() + "' is not possible."); } double value = getValue(a); if (Double.isNaN(value)) return Attribute.MISSING_NOMINAL_VALUE; else return a.getMapping().mapIndex((int)value); } /** Returns the numerical value for the given attribute. * * @throws AttributeTypeException if the given attribute has the wrong value type */ public double getNumericalValue(Attribute a) { if (!a.isNumerical()) { throw new AttributeTypeException("Extraction of numerical example value for non-numerical attribute '" + a.getName() + "' is not possible."); } return getValue(a); } /** Returns the date value for the given attribute. * * @throws AttributeTypeException if the given attribute has the wrong value type */ public Date getDateValue(Attribute a) { if (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(a.getValueType(), Ontology.DATE_TIME)) { throw new AttributeTypeException("Extraction of date example value for non-date attribute '" + a.getName() + "' is not possible."); } return new Date((long)getValue(a)); } /** * Sets the value of attribute a. The attribute a need not necessarily be * part of the example set the example is taken from, although this is no * good style. */ public void setValue(Attribute a, double value) { data.set(a, value); } /** * Sets the value of attribute a which must be a nominal attribute. The * attribute a need not necessarily be part of the example set the example * is taken from, although this is no good style. Missing values might be given by * passing null as second argument. */ public void setValue(Attribute a, String str) { if (!a.isNominal()) throw new AttributeTypeException("setValue(Attribute, String) only supported for nominal values!"); if (str != null) setValue(a, a.getMapping().mapString(str)); else setValue(a, Double.NaN); } /** Returns true if both nominal values are the same (if both attributes * are nominal) or if both real values are the same (if both attributes * are real values) or false otherwise. */ public boolean equalValue(Attribute first, Attribute second) { if (first.isNominal() && second.isNominal()) { return getValueAsString(first).equals(getValueAsString(second)); } else if ((!first.isNominal()) && (!second.isNominal())) { return com.rapidminer.tools.Tools.isEqual(getValue(first), getValue(second)); } else { return false; } } // --------------------------------------------------------------------------------- public double getLabel() { return getValue(getAttributes().getLabel()); } public void setLabel(double value) { setValue(getAttributes().getLabel(), value); } public double getPredictedLabel() { return getValue(getAttributes().getPredictedLabel()); } public void setPredictedLabel(double value) { setValue(getAttributes().getPredictedLabel(), value); } public double getId() { return getValue(getAttributes().getId()); } public void setId(double value) { setValue(getAttributes().getId(), value); } public double getWeight() { return getValue(getAttributes().getWeight()); } public void setWeight(double value) { setValue(getAttributes().getWeight(), value); } public double getConfidence(String classValue) { return getValue(getAttributes().getSpecial(Attributes.CONFIDENCE_NAME + "_" + classValue)); } public void setConfidence(String classValue, double confidence) { setValue(getAttributes().getSpecial(Attributes.CONFIDENCE_NAME + "_" + classValue), confidence); } // -------------------------------------------------------------------------------- /** * <p>Returns the value of this attribute as string representation, i.e. the * number as string for numerical attributes and the correctly mapped * categorical value for nominal values. The used number of fraction * digits is unlimited * (see {@link NumericalAttribute#DEFAULT_NUMBER_OF_DIGITS}). * Nominal values containing whitespaces will not be quoted.</p> * * <p>Please note that this method should not be used in order to get the * nominal values, please use * {@link #getNominalValue(Attribute)} instead.</p> */ public String getValueAsString(Attribute attribute) { return getValueAsString(attribute, NumericalAttribute.UNLIMITED_NUMBER_OF_DIGITS, false); } /** * <p>Returns the value of this attribute as string representation, i.e. the * number as string for numerical attributes and the correctly mapped * categorical value for nominal values. If the value is numerical the given * number of fraction digits is used. If the value is numerical, the given * number of fraction digits is used. This value must be either one out of * {@link NumericalAttribute#DEFAULT_NUMBER_OF_DIGITS} or * {@link NumericalAttribute#UNLIMITED_NUMBER_OF_DIGITS} or a number greater * or equal to 0. The boolean flag indicates if nominal values containing * whitespaces should be quoted with double quotes.</p> * * <p>Please note that this method should not be used in order to get the * nominal values, please use * {@link #getNominalValue(Attribute)} instead.</p> */ public String getValueAsString(Attribute attribute, int fractionDigits, boolean quoteNominal) { double value = getValue(attribute); return attribute.getAsString(value, fractionDigits, quoteNominal); } /** Returns a dense string representation with all possible fraction digits. * Nominal values will be quoted with double quotes. */ @Override public String toString() { return toDenseString(NumericalAttribute.UNLIMITED_NUMBER_OF_DIGITS, true); } /** * This method returns a dense string representation of the example. It * first returns the values of all special attributes and then the values of * all regular attributes. */ public String toDenseString(int fractionDigits, boolean quoteNominal) { StringBuffer result = new StringBuffer(); Iterator<Attribute> a = getAttributes().allAttributes(); boolean first = true; while (a.hasNext()) { if (first) { first = false; } else { result.append(SEPARATOR); } result.append(getValueAsString(a.next(), fractionDigits, quoteNominal)); } return result.toString(); } /** * Returns regular and some special attributes (label, id, and example * weight) in sparse format. * * @param format * one of the formats specified in * {@link SparseFormatDataRowReader} */ public String toSparseString(int format, int fractionDigits, boolean quoteNominal) { StringBuffer str = new StringBuffer(); // label Attribute labelAttribute = getAttributes().getSpecial(Attributes.LABEL_NAME); if ((format == SparseFormatDataRowReader.FORMAT_YX) && (labelAttribute != null)) { str.append(getValueAsString(labelAttribute, fractionDigits, quoteNominal) + " "); } // id Attribute idAttribute = getAttributes().getSpecial(Attributes.ID_NAME); if (idAttribute != null) { str.append("id:" + getValueAsString(idAttribute, fractionDigits, quoteNominal) + " "); } // weight Attribute weightAttribute = getAttributes().getSpecial(Attributes.WEIGHT_NAME); if (weightAttribute != null) { str.append("w:" + getValueAsString(weightAttribute, fractionDigits, quoteNominal) + " "); } // batch Attribute batchAttribute = getAttributes().getSpecial(Attributes.BATCH_NAME); if (batchAttribute != null) { str.append("b:" + getValueAsString(batchAttribute, fractionDigits, quoteNominal) + " "); } // attributes str.append(getAttributesAsSparseString(SEPARATOR, SPARSE_SEPARATOR, fractionDigits, quoteNominal) + " "); // label (format xy & prefix) if ((format == SparseFormatDataRowReader.FORMAT_PREFIX) && (labelAttribute != null)) { str.append("l:" + getValueAsString(labelAttribute, fractionDigits, quoteNominal)); } if ((format == SparseFormatDataRowReader.FORMAT_XY) && (labelAttribute != null)) { str.append(getValueAsString(labelAttribute, fractionDigits, quoteNominal)); } return str.toString(); } /** * Returns the attribute values in the format <br> * <center>index:value index:value</center><br> * Index starts with 1. * * @param separator * separates attributes * @param indexValueSeparator * separates index and value. * @param fractionDigits the number of fraction digits used, if -1 all possible digits are used */ /* pp */ String getAttributesAsSparseString(String separator, String indexValueSeparator, int fractionDigits, boolean quoteNominal) { StringBuffer str = new StringBuffer(); boolean first = true; int counter = 1; for (Attribute attribute : getAttributes()) { double value = getValue(attribute); if (!Tools.isDefault(attribute.getDefault(), value)) { if (!first) { str.append(separator); } first = false; str.append(counter + indexValueSeparator + getValueAsString(attribute, fractionDigits, quoteNominal)); } counter++; } return str.toString(); } // =================================== // The following methods implement // the map interface for easy // access of values in scripts // =================================== public Object get(Object key) { Attribute attribute = null; if (key instanceof String) { attribute = parentExampleSet.getAttributes().get((String)key); } double value = getValue(attribute); if (Double.isNaN(value)) { return "?"; } if (attribute == null) { return null; } else if (attribute.isNominal()) { return getValueAsString(attribute); } else if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.INTEGER)) { return (int)getValue(attribute); } else if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.DATE_TIME)) { return new Date((long)getValue(attribute)); } else { return getValue(attribute); } } public Object put(String attributeName, Object value) { Attribute attribute = parentExampleSet.getAttributes().get(attributeName); if (attribute == null) { throw new IllegalArgumentException("Unknown attribute name: '" + attributeName + "'"); } else if (attribute.isNumerical()) { if (value == null) { setValue(attribute, Double.NaN); } else { try { double doubleValue = Double.parseDouble(value.toString()); setValue(attribute, doubleValue); } catch (NumberFormatException e) { throw new IllegalArgumentException("Only numerical values are allowed for numerical attribute: '" + attributeName + "', was '" + value + "'"); } } } else { if (value == null) { setValue(attribute, Double.NaN); } else { setValue(attribute, attribute.getMapping().mapString(value.toString())); } } return value; } public void clear() { throw new UnsupportedOperationException("Clear is not supported by Example."); } public boolean containsKey(Object key) { Attribute attribute = null; if (key instanceof String) { attribute = parentExampleSet.getAttributes().get((String)key); } return attribute != null; } public boolean containsValue(Object value) { throw new UnsupportedOperationException("ContainsValue is not supported by Example."); } public Set<java.util.Map.Entry<String, Object>> entrySet() { throw new UnsupportedOperationException("EntrySet is not supported by Example."); } public boolean isEmpty() { return parentExampleSet.getAttributes().allSize() == 0; } public Set<String> keySet() { Set<String> allKeys = new HashSet<String>(); Iterator<Attribute> a = parentExampleSet.getAttributes().allAttributes(); while (a.hasNext()) { allKeys.add(a.next().getName()); } return allKeys; } public void putAll(Map<? extends String, ? extends Object> m) { throw new UnsupportedOperationException("PutAll is not supported by Example."); } public String remove(Object key) { throw new UnsupportedOperationException("Remove is not supported by Example."); } public int size() { return parentExampleSet.getAttributes().allSize(); } public Collection<Object> values() { throw new UnsupportedOperationException("Values is not supported by Example."); } }