/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* DistributionUtils.java
* Copyright (C) 2004 Stijn Lievens
*
*/
package weka.classifiers.misc.monotone;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.estimators.DiscreteEstimator;
import java.util.Arrays;
/**
* Class with some simple methods acting on
* <code> CumulativeDiscreteDistribution. </code>
* All of the methods in this class are very easily implemented
* and the main use of this class is to gather all these methods
* in a single place. It could be argued that some of the methods
* should be implemented in the class
* <code> CumulativeDiscreteDistribution </code> itself.
* <p>
* This implementation is part of the master's thesis: "Studie
* en implementatie van instantie-gebaseerde algoritmen voor gesuperviseerd
* rangschikken", Stijn Lievens, Ghent University, 2004.
* </p>
*
* @author Stijn Lievens (stijn.lievens@ugent.be)
* @version $Revision: 5922 $
*/
public class DistributionUtils
implements RevisionHandler {
/**
* Constant indicating the maximal number of classes
* for which there is a minimal and maximal distribution
* present in the pool.
* One of the purposes of this class is to serve as a factory
* for minimal and maximal cumulative probability distributions.
* Since instances of <code> CumulativeDiscreteDistribution </code>
* are immutable, we can create them beforehand and reuse them
* every time one is needed.
*/
private static final int MAX_CLASSES = 20;
/**
* Array filled with minimal cumulative discrete probability
* distributions. This means that probability one is given to the
* first element. This array serves as a pool for the method
* <code> getMinimalCumulativeDiscreteDistribution. </code>
*/
private static final CumulativeDiscreteDistribution[] m_minimalDistributions;
/**
* Array filled with maximal cumulative discrete probability
* distributions. This means that probability one is given to the
* largest element. This array serves as a pool for the method
* <code> getMaximalCumulativeDiscreteDistribution. </code>
*/
private static final CumulativeDiscreteDistribution[] m_maximalDistributions;
// fill both static arrays with the correct distributions
static {
m_minimalDistributions = new CumulativeDiscreteDistribution[MAX_CLASSES + 1];
m_maximalDistributions = new CumulativeDiscreteDistribution[MAX_CLASSES + 1];
for (int i = 1; i <= MAX_CLASSES; i++) {
double[] dd = new double[i];
dd[dd.length - 1] = 1;
m_maximalDistributions[i] = new CumulativeDiscreteDistribution(dd);
Arrays.fill(dd,1);
m_minimalDistributions[i] = new CumulativeDiscreteDistribution(dd);
}
}
/**
* Compute a linear interpolation between the two given
* <code> CumulativeDiscreteDistribution. </code>
*
* @param cdf1 the first <code> CumulativeDiscreteDistribution </code>
* @param cdf2 the second <code> CumulativeDiscreteDistribution </code>
* @param s the interpolation parameter
* @return (1 - s) × cdf1 + s × cdf2
* @throws IllegalArgumentException if the two distributions
* don't have the same size or if the parameter <code> s </code>
* is not in the range [0,1]
*/
public static CumulativeDiscreteDistribution interpolate(
CumulativeDiscreteDistribution cdf1,
CumulativeDiscreteDistribution cdf2, double s) throws IllegalArgumentException {
if (cdf1.getNumSymbols() != cdf2.getNumSymbols()) {
throw new IllegalArgumentException
("CumulativeDiscreteDistributions don't have "
+ "the same size");
}
if (s < 0 || s > 1) {
throw new IllegalArgumentException
("Parameter s exceeds bounds");
}
double[] res = new double[cdf1.getNumSymbols()];
for (int i = 0, n = cdf1.getNumSymbols(); i < n; i++) {
res[i] = (1 - s) * cdf1.getCumulativeProbability(i) +
s * cdf2.getCumulativeProbability(i);
}
return new CumulativeDiscreteDistribution(res);
}
/**
* Compute a linear interpolation between the two given
* <code> CumulativeDiscreteDistribution. </code>
*
* @param cdf1 the first <code> CumulativeDiscreteDistribution </code>
* @param cdf2 the second <code> CumulativeDiscreteDistribution </code>
* @param s the interpolation parameters, only the relevant number
* of entries is used, so the array may be longer than the common
* length of <code> cdf1 </code> and <code> cdf2 </code>
* @return (1 - s) × cdf1 + s × cdf2, or more specifically
* a distribution cd such that <code>
* cd.getCumulativeProbability(i) =
* (1-s[i]) × cdf1.getCumulativeProbability(i) +
* s[i] × cdf2.getCumulativeProbability(i) </code>
* @throws IllegalArgumentException if the two distributions
* don't have the same size or if the array <code> s </code>
* contains parameters not in the range <code> [0,1] </code>
*/
public static CumulativeDiscreteDistribution interpolate(
CumulativeDiscreteDistribution cdf1,
CumulativeDiscreteDistribution cdf2, double[] s) throws IllegalArgumentException {
if (cdf1.getNumSymbols() != cdf2.getNumSymbols()) {
throw new IllegalArgumentException
("CumulativeDiscreteDistributions don't have "
+ "the same size");
}
if (cdf1.getNumSymbols() > s.length) {
throw new IllegalArgumentException
("Array with interpolation parameters is not "
+ " long enough");
}
double[] res = new double[cdf1.getNumSymbols()];
for (int i = 0, n = cdf1.getNumSymbols(); i < n; i++) {
if (s[i] < 0 || s[i] > 1) {
throw new IllegalArgumentException
("Interpolation parameter exceeds bounds");
}
res[i] = (1 - s[i]) * cdf1.getCumulativeProbability(i) +
s[i] * cdf2.getCumulativeProbability(i);
}
return new CumulativeDiscreteDistribution(res);
}
/**
* Compute a linear interpolation between the two given
* <code> DiscreteDistribution. </code>
*
* @param ddf1 the first <code> DiscreteDistribution </code>
* @param ddf2 the second <code> DiscreteDistribution </code>
* @param s the interpolation parameter
* @return <code> (1 - s) × ddf1 + s × ddf2 </code>
* @throws IllegalArgumentException if the two distributions
* don't have the same size or if the parameter <code> s </code>
* is not in the range [0,1]
*/
public static DiscreteDistribution interpolate(
DiscreteDistribution ddf1,
DiscreteDistribution ddf2, double s) throws IllegalArgumentException {
if (ddf1.getNumSymbols() != ddf2.getNumSymbols()) {
throw new IllegalArgumentException
("DiscreteDistributions don't have "
+ "the same size");
}
if (s < 0 || s > 1) {
throw new IllegalArgumentException
("Parameter s exceeds bounds");
}
double[] res = new double[ddf1.getNumSymbols()];
for (int i = 0, n = ddf1.getNumSymbols(); i < n; i++) {
res[i] = (1 - s) * ddf1.getProbability(i) +
s * ddf2.getProbability(i);
}
return new DiscreteDistribution(res);
}
/**
* Create a new <code> CumulativeDiscreteDistribution </code>
* that is the minimum of the two given <code>
* CumulativeDiscreteDistribution. </code>
* Each component of the resulting probability distribution
* is the minimum of the two corresponding components. <br/>
* Note: despite of its name, the returned cumulative probability
* distribution dominates both the arguments of this method.
*
* @param cdf1 first <code> CumulativeDiscreteDistribution </code>
* @param cdf2 second <code> CumulativeDiscreteDistribution </code>
* @return the minimum of the two distributions
* @throws IllegalArgumentException if the two distributions
* dont't have the same length
*/
public static CumulativeDiscreteDistribution takeMin(
CumulativeDiscreteDistribution cdf1,
CumulativeDiscreteDistribution cdf2) throws IllegalArgumentException {
if (cdf1.getNumSymbols() != cdf2.getNumSymbols() )
throw new IllegalArgumentException
("Cumulative distributions don't have the same length");
double[] cdf = new double[cdf1.getNumSymbols()];
int n = cdf.length;
for (int i = 0; i < n; i++) {
cdf[i] = Math.min(cdf1.getCumulativeProbability(i),
cdf2.getCumulativeProbability(i));
}
return new CumulativeDiscreteDistribution(cdf);
}
/**
* Create a new <code> CumulativeDiscreteDistribution </code>
* that is the maximum of the two given <code>
* CumulativeDiscreteDistribution. </code>
* Each component of the resulting probability distribution
* is the maximum of the two corresponding components.
* Note: despite of its name, the returned cumulative probability
* distribution is dominated by both the arguments of this method.
*
* @param cdf1 first <code> CumulativeDiscreteDistribution </code>
* @param cdf2 second <code> CumulativeDiscreteDistribution </code>
* @return the maximum of the two distributions
* @throws IllegalArgumentException if the two distributions
* dont't have the same length
*/
public static CumulativeDiscreteDistribution takeMax(
CumulativeDiscreteDistribution cdf1,
CumulativeDiscreteDistribution cdf2) throws IllegalArgumentException {
if (cdf1.getNumSymbols() != cdf2.getNumSymbols() )
throw new IllegalArgumentException
("Cumulative distributions don't have the same length");
double[] cdf = new double[cdf1.getNumSymbols()];
int n = cdf.length;
for (int i = 0; i < n; i++) {
cdf[i] = Math.max(cdf1.getCumulativeProbability(i),
cdf2.getCumulativeProbability(i));
}
return new CumulativeDiscreteDistribution(cdf);
}
/**
* Converts a <code> DiscreteEstimator </code> to an array of
* doubles.
*
* @param df the <code> DiscreteEstimator </code> to be converted
* @return an array of doubles representing the
* <code> DiscreteEstimator </code>
*/
public static double[] getDistributionArray(DiscreteEstimator df) {
double[] dfa = new double[df.getNumSymbols()];
for (int i = 0; i < dfa.length; i++) {
dfa[i] = df.getProbability(i);
}
return dfa;
}
/**
* Get the minimal <code> CumulativeDiscreteDistribution </code>
* over <code> numClasses </code> elements. This means that
* a probability of one is assigned to the first element.
*
* @param numClasses the number of elements
* @return the minimal <code> CumulativeDiscreteDistribution </code>
* over the requested number of elements
* @throws IllegalArgumentException if <code> numClasses </code>
* is smaller or equal than 0
*/
public static CumulativeDiscreteDistribution getMinimalCumulativeDiscreteDistribution(
int numClasses) throws IllegalArgumentException {
if (numClasses <= 0) {
throw new IllegalArgumentException
("Number of elements must be positive");
}
if (numClasses <= MAX_CLASSES) {
return m_minimalDistributions[numClasses];
}
double[] dd = new double[numClasses];
Arrays.fill(dd,1);
return new CumulativeDiscreteDistribution(dd);
}
/**
* Get the maximal <code> CumulativeDiscreteDistribution </code>
* over <code> numClasses </code> elements. This means that
* a probability of one is assigned to the last class.
*
* @param numClasses the number of elements
* @return the maximal <code> CumulativeDiscreteDistribution </code>
* over the requested number of elements
* @throws IllegalArgumentException if <code> numClasses </code>
* is smaller or equal than 0
*/
public static CumulativeDiscreteDistribution getMaximalCumulativeDiscreteDistribution(
int numClasses) throws IllegalArgumentException {
if (numClasses <= 0) {
throw new IllegalArgumentException
("Number of elements must be positive");
}
if (numClasses <= MAX_CLASSES) {
return m_maximalDistributions[numClasses];
}
double[] dd = new double[numClasses];
dd[dd.length - 1] = 1;
return new CumulativeDiscreteDistribution(dd);
}
/**
* Returns the revision string.
*
* @return the revision
*/
public String getRevision() {
return RevisionUtils.extract("$Revision: 5922 $");
}
}