/*
* RapidMiner
*
* Copyright (C) 2001-2011 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.tools.math.function.aggregation;
import java.util.Map;
import java.util.TreeMap;
import com.rapidminer.example.Attribute;
/**
* Calculates the (weighted) median of some values.
*
* @author Tobias Malbrecht
*
*/
public class MedianFunction extends AbstractAggregationFunction {
private TreeMap<Double, Double> valueWeightMap = new TreeMap<Double, Double>();
private double totalWeight;
public MedianFunction() {
this(DEFAULT_IGNORE_MISSINGS);
}
public MedianFunction(Boolean ignoreMissings) {
super(ignoreMissings);
}
@Override
public String getName() { return "median"; }
@Override
protected void reset() {
foundMissing = false;
totalWeight = 0;
if (valueWeightMap != null) {
valueWeightMap.clear();
}
}
@Override
public void update(double value, double weight) {
if (Double.isNaN(value)) {
foundMissing = true;
return;
}
Double totalValueWeight = valueWeightMap.get(value);
if (totalValueWeight != null) {
totalValueWeight += weight;
} else {
totalValueWeight = new Double(weight);
}
valueWeightMap.put(value, totalValueWeight);
totalWeight += weight;
}
@Override
public void update(double value) {
if (Double.isNaN(value)) {
foundMissing = true;
return;
}
Double totalValueWeight = valueWeightMap.get(value);
if (totalValueWeight != null) {
totalValueWeight++;
} else {
totalValueWeight = new Double(1);
}
valueWeightMap.put(value, totalValueWeight);
totalWeight++;
}
@Override
public double getValue() {
if (foundMissing && !ignoreMissings) {
return Double.NaN;
}
double valueWeightSum = 0;
double lastValue = Double.NaN;
double lastWeight = Double.NaN;
// TODO: check weighted median calculation: Middle treatment seems suspicious. Sorted arrays might be much more memory efficient
for (Map.Entry<Double, Double> entry : valueWeightMap.entrySet()) {
if (!Double.isNaN(lastValue) && !Double.isNaN(lastWeight)) {
double thisWeight = entry.getValue().doubleValue();
return (lastValue * lastWeight + entry.getKey().doubleValue() * thisWeight) / (lastWeight + thisWeight);
}
valueWeightSum += entry.getValue().doubleValue();
if (valueWeightSum > totalWeight / 2) {
return entry.getKey().doubleValue();
}
// Now check for the case that we are EXACTLY on the middle. Then we have to average with the next value
if (valueWeightSum == totalWeight / 2) {
lastWeight = entry.getValue().doubleValue();
lastValue = entry.getKey().doubleValue();
}
}
return Double.NaN;
}
@Override
public boolean supportsAttribute(Attribute attribute) {
return attribute.isNumerical();
}
}