/*
* RapidMiner
*
* Copyright (C) 2001-2011 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.example;
import java.io.Serializable;
import java.util.Collection;
import java.util.Date;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import com.rapidminer.example.table.DataRow;
import com.rapidminer.example.table.ExampleTable;
import com.rapidminer.example.table.NumericalAttribute;
import com.rapidminer.example.table.SparseFormatDataRowReader;
import com.rapidminer.tools.Ontology;
/**
* An example consists of a DataRow and some convenience methods to access the data.
* Hence, all values are actually doubles, symbolic values are
* mapped to integers stored in doubles.<br>
* Since {@link ExampleSet}s are only a view on {@link ExampleTable}s,
* Examples are generated on the fly by {@link ExampleReader}s. Since they only
* contain the currently selected attributes operators need not to consider
* attribute selections or example subsets (samplings).
*
* @author Ingo Mierswa
*/
public class Example implements Serializable, Map<String, Object> {
private static final long serialVersionUID = 7761687908683290928L;
/** Separator used in the getAttributesAsString() method (tab). */
public static final String SEPARATOR = " ";
/** Separates indices from values in sparse format (colon). */
public static final String SPARSE_SEPARATOR = ":";
/** The data for this example. */
private DataRow data;
/** The parent example set holding all attribute information for this data row. */
private ExampleSet parentExampleSet;
/**
* Creates a new Example that uses the data stored in a DataRow. The attributes
* correspond to the regular and special attributes.
*/
public Example(DataRow data, ExampleSet parentExampleSet) {
this.data = data;
this.parentExampleSet = parentExampleSet;
}
/** Returns the data row which backs up the example in the example table. */
public DataRow getDataRow() {
return this.data;
}
/** Delivers the attributes. */
public Attributes getAttributes() {
return this.parentExampleSet.getAttributes();
}
// --------------------------------------------------------------------------------
/**
* Returns the value of attribute a. In the case of nominal attributes, the delivered double value
* corresponds to an internal index
*/
public double getValue(Attribute a) {
return data.get(a);
}
/** Returns the nominal value for the given attribute.
*
* @throws AttributeTypeException if the given attribute has the wrong value type
*/
public String getNominalValue(Attribute a) {
if (!a.isNominal()) {
throw new AttributeTypeException("Extraction of nominal example value for non-nominal attribute '" + a.getName() + "' is not possible.");
}
double value = getValue(a);
if (Double.isNaN(value))
return Attribute.MISSING_NOMINAL_VALUE;
else
return a.getMapping().mapIndex((int)value);
}
/** Returns the numerical value for the given attribute.
*
* @throws AttributeTypeException if the given attribute has the wrong value type
*/
public double getNumericalValue(Attribute a) {
if (!a.isNumerical()) {
throw new AttributeTypeException("Extraction of numerical example value for non-numerical attribute '" + a.getName() + "' is not possible.");
}
return getValue(a);
}
/** Returns the date value for the given attribute.
*
* @throws AttributeTypeException if the given attribute has the wrong value type
*/
public Date getDateValue(Attribute a) {
if (!Ontology.ATTRIBUTE_VALUE_TYPE.isA(a.getValueType(), Ontology.DATE_TIME)) {
throw new AttributeTypeException("Extraction of date example value for non-date attribute '" + a.getName() + "' is not possible.");
}
return new Date((long)getValue(a));
}
/**
* Sets the value of attribute a. The attribute a need not necessarily be
* part of the example set the example is taken from, although this is no
* good style.
*/
public void setValue(Attribute a, double value) {
data.set(a, value);
}
/**
* Sets the value of attribute a which must be a nominal attribute. The
* attribute a need not necessarily be part of the example set the example
* is taken from, although this is no good style. Missing values might be given by
* passing null as second argument.
*/
public void setValue(Attribute a, String str) {
if (!a.isNominal())
throw new AttributeTypeException("setValue(Attribute, String) only supported for nominal values!");
if (str != null)
setValue(a, a.getMapping().mapString(str));
else
setValue(a, Double.NaN);
}
/** Returns true if both nominal values are the same (if both attributes
* are nominal) or if both real values are the same (if both attributes
* are real values) or false otherwise. */
public boolean equalValue(Attribute first, Attribute second) {
if (first.isNominal() && second.isNominal()) {
return getValueAsString(first).equals(getValueAsString(second));
} else if ((!first.isNominal()) && (!second.isNominal())) {
return com.rapidminer.tools.Tools.isEqual(getValue(first), getValue(second));
} else {
return false;
}
}
// ---------------------------------------------------------------------------------
public double getLabel() {
return getValue(getAttributes().getLabel());
}
public void setLabel(double value) {
setValue(getAttributes().getLabel(), value);
}
public double getPredictedLabel() {
return getValue(getAttributes().getPredictedLabel());
}
public void setPredictedLabel(double value) {
setValue(getAttributes().getPredictedLabel(), value);
}
public double getId() {
return getValue(getAttributes().getId());
}
public void setId(double value) {
setValue(getAttributes().getId(), value);
}
public double getWeight() {
return getValue(getAttributes().getWeight());
}
public void setWeight(double value) {
setValue(getAttributes().getWeight(), value);
}
public double getConfidence(String classValue) {
return getValue(getAttributes().getSpecial(Attributes.CONFIDENCE_NAME + "_" + classValue));
}
public void setConfidence(String classValue, double confidence) {
setValue(getAttributes().getSpecial(Attributes.CONFIDENCE_NAME + "_" + classValue), confidence);
}
// --------------------------------------------------------------------------------
/**
* <p>Returns the value of this attribute as string representation, i.e. the
* number as string for numerical attributes and the correctly mapped
* categorical value for nominal values. The used number of fraction
* digits is unlimited
* (see {@link NumericalAttribute#DEFAULT_NUMBER_OF_DIGITS}).
* Nominal values containing whitespaces will not be quoted.</p>
*
* <p>Please note that this method should not be used in order to get the
* nominal values, please use
* {@link #getNominalValue(Attribute)} instead.</p>
*/
public String getValueAsString(Attribute attribute) {
return getValueAsString(attribute, NumericalAttribute.UNLIMITED_NUMBER_OF_DIGITS, false);
}
/**
* <p>Returns the value of this attribute as string representation, i.e. the
* number as string for numerical attributes and the correctly mapped
* categorical value for nominal values. If the value is numerical the given
* number of fraction digits is used. If the value is numerical, the given
* number of fraction digits is used. This value must be either one out of
* {@link NumericalAttribute#DEFAULT_NUMBER_OF_DIGITS} or
* {@link NumericalAttribute#UNLIMITED_NUMBER_OF_DIGITS} or a number greater
* or equal to 0. The boolean flag indicates if nominal values containing
* whitespaces should be quoted with double quotes.</p>
*
* <p>Please note that this method should not be used in order to get the
* nominal values, please use
* {@link #getNominalValue(Attribute)} instead.</p>
*/
public String getValueAsString(Attribute attribute, int fractionDigits, boolean quoteNominal) {
double value = getValue(attribute);
return attribute.getAsString(value, fractionDigits, quoteNominal);
}
/** Returns a dense string representation with all possible fraction digits.
* Nominal values will be quoted with double quotes.
*/
@Override
public String toString() {
return toDenseString(NumericalAttribute.UNLIMITED_NUMBER_OF_DIGITS, true);
}
/**
* This method returns a dense string representation of the example. It
* first returns the values of all special attributes and then the values of
* all regular attributes.
*/
public String toDenseString(int fractionDigits, boolean quoteNominal) {
StringBuffer result = new StringBuffer();
Iterator<Attribute> a = getAttributes().allAttributes();
boolean first = true;
while (a.hasNext()) {
if (first) {
first = false;
} else {
result.append(SEPARATOR);
}
result.append(getValueAsString(a.next(), fractionDigits, quoteNominal));
}
return result.toString();
}
/**
* Returns regular and some special attributes (label, id, and example
* weight) in sparse format.
*
* @param format
* one of the formats specified in
* {@link SparseFormatDataRowReader}
*/
public String toSparseString(int format, int fractionDigits, boolean quoteNominal) {
StringBuffer str = new StringBuffer();
// label
Attribute labelAttribute = getAttributes().getSpecial(Attributes.LABEL_NAME);
if ((format == SparseFormatDataRowReader.FORMAT_YX) && (labelAttribute != null)) {
str.append(getValueAsString(labelAttribute, fractionDigits, quoteNominal) + " ");
}
// id
Attribute idAttribute = getAttributes().getSpecial(Attributes.ID_NAME);
if (idAttribute != null) {
str.append("id:" + getValueAsString(idAttribute, fractionDigits, quoteNominal) + " ");
}
// weight
Attribute weightAttribute = getAttributes().getSpecial(Attributes.WEIGHT_NAME);
if (weightAttribute != null) {
str.append("w:" + getValueAsString(weightAttribute, fractionDigits, quoteNominal) + " ");
}
// batch
Attribute batchAttribute = getAttributes().getSpecial(Attributes.BATCH_NAME);
if (batchAttribute != null) {
str.append("b:" + getValueAsString(batchAttribute, fractionDigits, quoteNominal) + " ");
}
// attributes
str.append(getAttributesAsSparseString(SEPARATOR, SPARSE_SEPARATOR, fractionDigits, quoteNominal) + " ");
// label (format xy & prefix)
if ((format == SparseFormatDataRowReader.FORMAT_PREFIX) && (labelAttribute != null)) {
str.append("l:" + getValueAsString(labelAttribute, fractionDigits, quoteNominal));
}
if ((format == SparseFormatDataRowReader.FORMAT_XY) && (labelAttribute != null)) {
str.append(getValueAsString(labelAttribute, fractionDigits, quoteNominal));
}
return str.toString();
}
/**
* Returns the attribute values in the format <br>
* <center>index:value index:value</center><br>
* Index starts with 1.
*
* @param separator
* separates attributes
* @param indexValueSeparator
* separates index and value.
* @param fractionDigits the number of fraction digits used, if -1 all possible digits are used
*/
/* pp */ String getAttributesAsSparseString(String separator, String indexValueSeparator, int fractionDigits, boolean quoteNominal) {
StringBuffer str = new StringBuffer();
boolean first = true;
int counter = 1;
for (Attribute attribute : getAttributes()) {
double value = getValue(attribute);
if (!Tools.isDefault(attribute.getDefault(), value)) {
if (!first) {
str.append(separator);
}
first = false;
str.append(counter + indexValueSeparator + getValueAsString(attribute, fractionDigits, quoteNominal));
}
counter++;
}
return str.toString();
}
// ===================================
// The following methods implement
// the map interface for easy
// access of values in scripts
// ===================================
public Object get(Object key) {
Attribute attribute = null;
if (key instanceof String) {
attribute = parentExampleSet.getAttributes().get((String)key);
}
double value = getValue(attribute);
if (Double.isNaN(value)) {
return "?";
}
if (attribute == null) {
return null;
} else if (attribute.isNominal()) {
return getValueAsString(attribute);
} else if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.INTEGER)) {
return (int)getValue(attribute);
} else if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.DATE_TIME)) {
return new Date((long)getValue(attribute));
} else {
return getValue(attribute);
}
}
public Object put(String attributeName, Object value) {
Attribute attribute = parentExampleSet.getAttributes().get(attributeName);
if (attribute == null) {
throw new IllegalArgumentException("Unknown attribute name: '" + attributeName + "'");
} else if (attribute.isNumerical()) {
if (value == null) {
setValue(attribute, Double.NaN);
} else {
try {
double doubleValue = Double.parseDouble(value.toString());
setValue(attribute, doubleValue);
} catch (NumberFormatException e) {
throw new IllegalArgumentException("Only numerical values are allowed for numerical attribute: '" + attributeName + "', was '" + value + "'");
}
}
} else {
if (value == null) {
setValue(attribute, Double.NaN);
} else {
setValue(attribute, attribute.getMapping().mapString(value.toString()));
}
}
return value;
}
public void clear() {
throw new UnsupportedOperationException("Clear is not supported by Example.");
}
public boolean containsKey(Object key) {
Attribute attribute = null;
if (key instanceof String) {
attribute = parentExampleSet.getAttributes().get((String)key);
}
return attribute != null;
}
public boolean containsValue(Object value) {
throw new UnsupportedOperationException("ContainsValue is not supported by Example.");
}
public Set<java.util.Map.Entry<String, Object>> entrySet() {
throw new UnsupportedOperationException("EntrySet is not supported by Example.");
}
public boolean isEmpty() {
return parentExampleSet.getAttributes().allSize() == 0;
}
public Set<String> keySet() {
Set<String> allKeys = new HashSet<String>();
Iterator<Attribute> a = parentExampleSet.getAttributes().allAttributes();
while (a.hasNext()) {
allKeys.add(a.next().getName());
}
return allKeys;
}
public void putAll(Map<? extends String, ? extends Object> m) {
throw new UnsupportedOperationException("PutAll is not supported by Example.");
}
public String remove(Object key) {
throw new UnsupportedOperationException("Remove is not supported by Example.");
}
public int size() {
return parentExampleSet.getAttributes().allSize();
}
public Collection<Object> values() {
throw new UnsupportedOperationException("Values is not supported by Example.");
}
}