/* * RapidMiner * * Copyright (C) 2001-2011 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.preprocessing.transformation.aggregation; import java.util.HashSet; import com.rapidminer.example.Attribute; import com.rapidminer.example.Example; import com.rapidminer.example.table.DataRow; /** * This is an implementation of a Aggregator for numerical attributes. It takes over * the handling of missing values. * * @author Sebastian Land */ public abstract class NumericalAggregator implements Aggregator { private Attribute sourceAttribute; private boolean ignoreMissings; private boolean isMissing = false; private boolean isCountingOnlyDistinct = false; private HashSet<Double> distinctValueSet = null; public NumericalAggregator(AggregationFunction function) { this.sourceAttribute = function.getSourceAttribute(); this.ignoreMissings = function.isIgnoringMissings(); this.isCountingOnlyDistinct = function.isCountingOnlyDistinct(); if (isCountingOnlyDistinct) distinctValueSet = new HashSet<Double>(); } @Override public final void count(Example example) { // check whether we have to count at all if (!isMissing || ignoreMissings) { double value = example.getValue(sourceAttribute); if (isMissing && !ignoreMissings || Double.isNaN(value)) { isMissing = true; } else { if (!isCountingOnlyDistinct || distinctValueSet.add(value)) { count(value); } } } } @Override public final void count(Example example, double weight) { // check whether we have to count at all if (!isMissing || ignoreMissings) { double value = example.getValue(sourceAttribute); if (isMissing && !ignoreMissings || Double.isNaN(value)) { isMissing = true; } else { if (!isCountingOnlyDistinct || distinctValueSet.add(value)) { count(value, weight); } } } } /** * This method will count the given numerical value. This method will not be called in * cases, where the examples value for the given source Attribute is unknown. * Subclasses of this class will in this cases return either NaN if ignoreMissings is false, * or will return the value as if the examples with the missing aren't present at all. * * Please see {@link #count(double, double)} for taking weights into account. You may not mix both methods * within one aggregation run, as subclasses might implement more memory efficient data structures * when not using weights. */ protected abstract void count(double value); /** * Same as {@link #count(double)}, but taking the weight into account. You may not mix both methods * within one aggregation run, as subclasses might implement more memory efficient data structures * when not using weights. */ protected abstract void count(double value, double weight); @Override public final void set(Attribute attribute, DataRow row) { if (isMissing && !ignoreMissings) row.set(attribute, Double.NaN); else row.set(attribute, getValue()); } /** * This method has to return the numerical value of this aggregator. */ protected abstract double getValue(); }