/*
* RapidMiner
*
* Copyright (C) 2001-2008 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.preprocessing.filter;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Locale;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.Statistics;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.tools.Ontology;
/**
* Replaces missing values in examples. If a value is missing, it is replaced by
* one of the functions "minimum", "maximum",
* "average", and "none", which is applied to the non
* missing attribute values of the example set. "none" means, that the
* value is not replaced. The function can be selected using the parameter list
* <code>columns</code>. If an attribute's name appears in this list as a
* key, the value is used as the function name. If the attribute's name is not
* in the list, the function specified by the <code>default</code> parameter
* is used. For nominal attributes the mode is used for the average, i.e. the
* nominal value which occurs most often in the data. For nominal attributes and
* replacement type zero the first nominal value defined for this
* attribute is used. The replenishment "value" indicates that the
* user defined parameter should be used for the replacement.
*
* @author Ingo Mierswa, Simon Fischer
* @version $Id: MissingValueReplenishment.java,v 1.7 2006/03/21 15:35:52
* ingomierswa Exp $
*/
public class MissingValueReplenishment extends ValueReplenishment {
private static final int NONE = 0;
private static final int MINIMUM = 1;
private static final int MAXIMUM = 2;
private static final int AVERAGE = 3;
private static final int ZERO = 4;
private static final int VALUE = 5;
private static final String[] REPLENISHMENT_NAMES = { "none", "minimum", "maximum", "average", "zero", "value" };
public MissingValueReplenishment(OperatorDescription description) {
super(description);
}
public String[] getFunctionNames() {
return REPLENISHMENT_NAMES;
}
public int getDefaultFunction() {
return AVERAGE;
}
public int getDefaultColumnFunction() {
return AVERAGE;
}
public boolean replenishValue(double currentValue) {
return Double.isNaN(currentValue);
}
public double getReplenishmentValue(int functionIndex, ExampleSet exampleSet, Attribute attribute, double currentValue, String valueString) {
switch (functionIndex) {
case NONE:
return currentValue;
case MINIMUM:
return exampleSet.getStatistics(attribute, Statistics.MINIMUM);
case MAXIMUM:
return exampleSet.getStatistics(attribute, Statistics.MAXIMUM);
case AVERAGE:
if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.DATE_TIME)) {
return currentValue;
} else if (attribute.isNominal()) {
return exampleSet.getStatistics(attribute, Statistics.MODE);
} else {
return exampleSet.getStatistics(attribute, Statistics.AVERAGE);
}
case ZERO:
return 0.0d;
case VALUE:
if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.DATE_TIME)) {
String formatString = null;
if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.DATE)) {
formatString = "MM/dd/yyyy";
} else if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.TIME)) {
formatString = "hh.mm a";
} else if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.DATE_TIME)) {
formatString = "MM/dd/yyyy hh.mm a";
}
SimpleDateFormat dateFormat = new SimpleDateFormat(formatString, Locale.US);
try {
Date date = dateFormat.parse(valueString);
return date.getTime();
} catch (ParseException e) {
throw new RuntimeException("Illegal date format, must be '" + formatString + "'");
}
} else if (attribute.isNominal()) {
return attribute.getMapping().mapString(valueString);
} else {
return Double.parseDouble(valueString);
}
default:
throw new RuntimeException("Illegal value functionIndex: " + functionIndex);
}
}
}