/*
* RapidMiner
*
* Copyright (C) 2001-2008 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.learner.bayes;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map.Entry;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.table.NominalMapping;
import com.rapidminer.tools.Tools;
/**
* DiscreteDistribution is an distribution for nominal values. For probability calculation it counts the weight/frequency of all values and returns this
* number of the given value divided by the total weight of all examples.
* If one or more values have never been counted, a total mass of totalWeight / (numberOfValues)^2 will be
* equally distributed over this missing values to prevent the probability from getting 0. This weight mass is added to
* the total weight.
* @author Sebastian Land, Ingo Mierswa
* @version $Id: DiscreteDistribution.java,v 1.10 2008/05/10 18:28:58 stiefelolm Exp $
*/
public class DiscreteDistribution implements Distribution {
private static final long serialVersionUID = 7573474548080998479L;
private HashMap<Double, Double> valueWeights = new HashMap<Double, Double>();
private double totalWeight;
private Attribute attribute;
private NominalMapping mapping;
public DiscreteDistribution(Attribute attribute, HashMap<Double, Double> valueWeights, double totalWeight) {
this.attribute = attribute;
this.valueWeights = valueWeights;
this.totalWeight = totalWeight;
this.mapping = attribute.getMapping();
// ensuring that every value is possible! Values not seen in sample get 1% of weight
double numberOfZeros = 0;
for (Entry<Double, Double> entry: valueWeights.entrySet()) {
if (entry.getValue() == 0)
numberOfZeros++;
}
double zeroWeight = totalWeight / (Math.pow(this.valueWeights.size(),2) * numberOfZeros);
for (Entry<Double, Double> entry: valueWeights.entrySet()) {
if (entry.getValue() == 0)
entry.setValue(zeroWeight);
}
this.totalWeight += totalWeight / Math.pow(this.valueWeights.size(),2);
}
public double getProbability(double x) {
Double weight = valueWeights.get(x);
if (weight != null) {
return weight / totalWeight;
}
return 0;
}
public String toString() {
StringBuffer buffer = new StringBuffer();
NominalMapping mapping = attribute.getMapping();
for (Double valueKey: valueWeights.keySet()) {
String valueName;
if (Double.isNaN(valueKey))
valueName = "unkown";
else
valueName = mapping.mapIndex(valueKey.intValue());
buffer.append(valueName + "\t");
}
buffer.append(Tools.getLineSeparator());
for (Double valueKey : valueWeights.keySet()) {
Double weightObject = valueWeights.get(valueKey);
if (weightObject != null)
buffer.append(Tools.formatIntegerIfPossible(weightObject.doubleValue() / totalWeight) + "\t");
else
buffer.append("?\t");
}
return buffer.toString();
}
public double getLowerBound() {
return Double.NaN;
}
public double getUpperBound() {
return Double.NaN;
}
public Collection<Double> getValues() {
return valueWeights.keySet();
}
public double getTotalWeight() {
return this.totalWeight;
}
public String mapValue(double value) {
return mapping.mapIndex((int)value);
}
}