/**
* Copyright (C) 2001-2017 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License along with this program.
* If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.example.set;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.AttributeRole;
import com.rapidminer.example.AttributeTransformation;
import com.rapidminer.example.Attributes;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.Statistics;
import com.rapidminer.example.table.ExampleTable;
import com.rapidminer.operator.Annotations;
/**
* An implementation of ExampleSet that allows the replacement of missing values on the fly. Missing
* values will be replaced by the average of all other values or by the mean.
*
* @author Ingo Mierswa
*/
public class ReplaceMissingExampleSet extends AbstractExampleSet {
private static final long serialVersionUID = -5662936146589379273L;
/** Currently used attribute weights. Used also for example creation. */
private Map<String, Double> replacementMap;
/** The parent example set. */
private ExampleSet parent;
/**
* It is recommended to use {@link #create(ExampleSet, Map)} instead if no later access to the
* replacementMap is needed.
*/
public ReplaceMissingExampleSet(ExampleSet exampleSet) {
this(exampleSet, null);
}
/**
* It is recommended to use {@link #create(ExampleSet, Map)} instead if no later access to the
* replacementMap is needed.
*/
public ReplaceMissingExampleSet(ExampleSet exampleSet, Map<String, Double> replacementMap) {
this.parent = (ExampleSet) exampleSet.clone();
if (replacementMap == null) {
this.replacementMap = new HashMap<String, Double>();
for (Attribute attribute : parent.getAttributes()) {
addReplacement(attribute);
}
} else {
this.replacementMap = replacementMap;
}
addReplacmentTransformations(this.parent, this.replacementMap);
}
/**
* Creates a new example set based on exampleSet where missing values are replaced as specified
* by replacementMap. If replacementMap is {@code null} then the mode (for nominal attributes)
* or the average (for numerical attributes) is taken as replacement.
*
* @param exampleSet
* the exampleSet for which missing values should be replaced
* @param replacementMap
* the map specifying the replacement
* @return an example set without missing values
* @since 7.5.1
*/
public static ExampleSet create(ExampleSet exampleSet, Map<String, Double> replacementMap) {
ExampleSet newSet = (ExampleSet) exampleSet.clone();
if (replacementMap == null) {
replacementMap = new HashMap<String, Double>();
for (Attribute attribute : newSet.getAttributes()) {
addReplacement(newSet, attribute, replacementMap);
}
}
addReplacmentTransformations(newSet, replacementMap);
return newSet;
}
/**
* Adds a {@link AttributeTransformationReplaceMissing} for the given replacementMap to all
* attributes of the exampleSet.
*/
private static void addReplacmentTransformations(ExampleSet exampleSet, Map<String, Double> replacementMap) {
Iterator<AttributeRole> a = exampleSet.getAttributes().allAttributeRoles();
while (a.hasNext()) {
AttributeRole role = a.next();
Attribute currentAttribute = role.getAttribute();
currentAttribute.addTransformation(new AttributeTransformationReplaceMissing(replacementMap));
}
}
/** Clone constructor. */
public ReplaceMissingExampleSet(ReplaceMissingExampleSet exampleSet) {
this.parent = (ExampleSet) exampleSet.parent.clone();
this.replacementMap = new HashMap<String, Double>();
for (String name : exampleSet.replacementMap.keySet()) {
this.replacementMap.put(name, Double.valueOf(exampleSet.replacementMap.get(name).doubleValue()));
}
Iterator<AttributeRole> a = this.parent.getAttributes().allAttributeRoles();
while (a.hasNext()) {
AttributeRole role = a.next();
Attribute currentAttribute = role.getAttribute();
AttributeTransformation transformation = currentAttribute.getLastTransformation();
if (transformation != null) {
if (transformation instanceof AttributeTransformationReplaceMissing) {
((AttributeTransformationReplaceMissing) transformation).setReplacementMap(this.replacementMap);
}
}
}
}
public Map<String, Double> getReplacementMap() {
return this.replacementMap;
}
public void addReplacement(Attribute attribute) {
addReplacement(this, attribute, replacementMap);
}
/**
* Adds a mapping from the name of the given attribute to its mode (for nominal attributes) or
* its average (for numerical attributes) to the replacementMap.
*/
private static void addReplacement(ExampleSet exampleSet, Attribute attribute, Map<String, Double> replacementMap) {
exampleSet.recalculateAttributeStatistics(attribute);
if (attribute.isNominal()) {
replacementMap.put(attribute.getName(), exampleSet.getStatistics(attribute, Statistics.MODE));
} else {
replacementMap.put(attribute.getName(), exampleSet.getStatistics(attribute, Statistics.AVERAGE));
}
}
@Override
public Attributes getAttributes() {
return this.parent.getAttributes();
}
@Override
public Annotations getAnnotations() {
return parent.getAnnotations();
}
@Override
public boolean equals(Object o) {
if (!super.equals(o)) {
return false;
}
if (!(o instanceof ReplaceMissingExampleSet)) {
return false;
}
boolean result = super.equals(o);
if (result) {
Map<String, Double> otherMap = ((ReplaceMissingExampleSet) o).replacementMap;
if (this.replacementMap.size() != otherMap.size()) {
return false;
}
for (String name : this.replacementMap.keySet()) {
if (!this.replacementMap.get(name).equals(otherMap.get(name))) {
return false;
}
}
}
return true;
}
@Override
public int hashCode() {
return super.hashCode() ^ replacementMap.hashCode();
}
/**
* Creates a new example set reader.
*/
@Override
public Iterator<Example> iterator() {
return new AttributesExampleReader(parent.iterator(), this);
}
@Override
public Example getExample(int index) {
return this.parent.getExample(index);
}
@Override
public ExampleTable getExampleTable() {
return parent.getExampleTable();
}
@Override
public int size() {
return parent.size();
}
@Override
public void cleanup() {
parent.cleanup();
}
}