/**
* Copyright (C) 2001-2017 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License along with this program.
* If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.preprocessing.transformation.aggregation;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Example;
import com.rapidminer.example.table.DataRow;
import java.util.HashSet;
/**
* This is an implementation of a Aggregator for numerical attributes. It takes over the handling of
* missing values.
*
* @author Sebastian Land
*/
public abstract class NumericalAggregator implements Aggregator {
private Attribute sourceAttribute;
private boolean ignoreMissings;
private boolean isMissing = false;
private boolean isCountingOnlyDistinct = false;
private HashSet<Double> distinctValueSet = null;
public NumericalAggregator(AggregationFunction function) {
this.sourceAttribute = function.getSourceAttribute();
this.ignoreMissings = function.isIgnoringMissings();
this.isCountingOnlyDistinct = function.isCountingOnlyDistinct();
if (isCountingOnlyDistinct) {
distinctValueSet = new HashSet<Double>();
}
}
@Override
public final void count(Example example) {
// check whether we have to count at all
if (!isMissing || ignoreMissings) {
double value = example.getValue(sourceAttribute);
if (isMissing && !ignoreMissings || Double.isNaN(value)) {
isMissing = true;
} else {
if (!isCountingOnlyDistinct || distinctValueSet.add(value)) {
count(value);
}
}
}
}
@Override
public final void count(Example example, double weight) {
// check whether we have to count at all
if (!isMissing || ignoreMissings) {
double value = example.getValue(sourceAttribute);
if (isMissing && !ignoreMissings || Double.isNaN(value)) {
isMissing = true;
} else {
if (!isCountingOnlyDistinct || distinctValueSet.add(value)) {
count(value, weight);
}
}
}
}
/**
* This method will count the given numerical value. This method will not be called in cases,
* where the examples value for the given source Attribute is unknown. Subclasses of this class
* will in this cases return either NaN if ignoreMissings is false, or will return the value as
* if the examples with the missing aren't present at all.
*
* Please see {@link #count(double, double)} for taking weights into account. You may not mix
* both methods within one aggregation run, as subclasses might implement more memory efficient
* data structures when not using weights.
*/
protected abstract void count(double value);
/**
* Same as {@link #count(double)}, but taking the weight into account. You may not mix both
* methods within one aggregation run, as subclasses might implement more memory efficient data
* structures when not using weights.
*/
protected abstract void count(double value, double weight);
@Override
public final void set(Attribute attribute, DataRow row) {
if (isMissing && !ignoreMissings) {
row.set(attribute, Double.NaN);
} else {
row.set(attribute, getValue());
}
}
/**
* This method has to return the numerical value of this aggregator.
*/
protected abstract double getValue();
/**
* Explicitly sets the value of this aggregator. The only place where it makes sense to use this
* function is in {@link AggregationFunction#postProcessing(java.util.List)}.
*
* The default implementation does nothing.
*
* @param value
*/
protected void setValue(double value) {
// do nothing
}
}