NumericalAggregator.java example

Explorer
rapidminer-studio-master
- doc
  - doc
- src
/**
 * Copyright (C) 2001-2017 by RapidMiner and the contributors
 * 
 * Complete list of developers available at our web site:
 * 
 * http://rapidminer.com
 * 
 * This program is free software: you can redistribute it and/or modify it under the terms of the
 * GNU Affero General Public License as published by the Free Software Foundation, either version 3
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
 * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License along with this program.
 * If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.preprocessing.transformation.aggregation;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.Example;
import com.rapidminer.example.table.DataRow;

import java.util.HashSet;


/**
 * This is an implementation of a Aggregator for numerical attributes. It takes over the handling of
 * missing values.
 * 
 * @author Sebastian Land
 */
public abstract class NumericalAggregator implements Aggregator {

	private Attribute sourceAttribute;
	private boolean ignoreMissings;
	private boolean isMissing = false;
	private boolean isCountingOnlyDistinct = false;
	private HashSet<Double> distinctValueSet = null;

	public NumericalAggregator(AggregationFunction function) {
		this.sourceAttribute = function.getSourceAttribute();
		this.ignoreMissings = function.isIgnoringMissings();
		this.isCountingOnlyDistinct = function.isCountingOnlyDistinct();
		if (isCountingOnlyDistinct) {
			distinctValueSet = new HashSet<Double>();
		}
	}

	@Override
	public final void count(Example example) {
		// check whether we have to count at all
		if (!isMissing || ignoreMissings) {
			double value = example.getValue(sourceAttribute);
			if (isMissing && !ignoreMissings || Double.isNaN(value)) {
				isMissing = true;
			} else {
				if (!isCountingOnlyDistinct || distinctValueSet.add(value)) {
					count(value);
				}
			}
		}
	}

	@Override
	public final void count(Example example, double weight) {
		// check whether we have to count at all
		if (!isMissing || ignoreMissings) {
			double value = example.getValue(sourceAttribute);
			if (isMissing && !ignoreMissings || Double.isNaN(value)) {
				isMissing = true;
			} else {
				if (!isCountingOnlyDistinct || distinctValueSet.add(value)) {
					count(value, weight);
				}
			}
		}
	}

	/**
	 * This method will count the given numerical value. This method will not be called in cases,
	 * where the examples value for the given source Attribute is unknown. Subclasses of this class
	 * will in this cases return either NaN if ignoreMissings is false, or will return the value as
	 * if the examples with the missing aren't present at all.
	 * 
	 * Please see {@link #count(double, double)} for taking weights into account. You may not mix
	 * both methods within one aggregation run, as subclasses might implement more memory efficient
	 * data structures when not using weights.
	 */
	protected abstract void count(double value);

	/**
	 * Same as {@link #count(double)}, but taking the weight into account. You may not mix both
	 * methods within one aggregation run, as subclasses might implement more memory efficient data
	 * structures when not using weights.
	 */
	protected abstract void count(double value, double weight);

	@Override
	public final void set(Attribute attribute, DataRow row) {
		if (isMissing && !ignoreMissings) {
			row.set(attribute, Double.NaN);
		} else {
			row.set(attribute, getValue());
		}
	}

	/**
	 * This method has to return the numerical value of this aggregator.
	 */
	protected abstract double getValue();

	/**
	 * Explicitly sets the value of this aggregator. The only place where it makes sense to use this
	 * function is in {@link AggregationFunction#postProcessing(java.util.List)}.
	 * 
	 * The default implementation does nothing.
	 * 
	 * @param value
	 */
	protected void setValue(double value) {
		// do nothing
	}
}