/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/**
* MixedGaussianGenerator.java
* Copyright (C) 2008 K.Hempstalk, University of Waikato, Hamilton, New Zealand.
*/
package weka.classifiers.meta.generators;
import weka.core.Option;
import weka.core.Utils;
import java.util.Enumeration;
import java.util.Vector;
/**
<!-- globalinfo-start -->
* A mixed Gaussian artificial data generator.<br/>
* <br/>
* This generator only has two Gaussians, each sitting 3 standard deviations (by default) away from the mean of the main distribution. Each model has half of the probability. The idea is that the two sub-models form a boundary either side of the main distribution.
* <p/>
<!-- globalinfo-end -->
*
<!-- options-start -->
* Valid options are: <p/>
*
* <pre> -D
* If set, generator is run in debug mode and
* may output additional info to the console</pre>
*
* <pre> -S <seed>
* Sets the seed of the random number generator of the generator (default: 1)</pre>
*
* <pre> -M <num>
* Sets the mean of the generator
* (default: 0)</pre>
*
* <pre> -SD <num>
* Sets the standard deviation of the generator
* (default: 1)</pre>
*
* <pre> -di <distance>
* Sets the difference between the mean and what will be used
* on the lower and higher distributions for the generator. (default: 3)</pre>
*
* <pre> -da
* If set, the generator will use the absolute value of the
* difference. If not set, it will multiply the difference by
* the standard deviation.</pre>
*
<!-- options-end -->
*
* @author Kathryn Hempstalk (kah18 at cs.waikato.ac.nz)
* @version $Revision: 5793 $
*/
public class MixedGaussianGenerator
extends RandomizableDistributionGenerator
implements NumericAttributeGenerator {
/** for serialization. */
private static final long serialVersionUID = 1516470615315381362L;
/**
* The distance between the main distribution and each model.
*/
protected double m_Distance = 3;
/**
* Whether the difference is absolute, or a modifier to the
* standard deviation.
*/
protected boolean m_DistanceAbsolute = false;
/**
* Returns a string describing this class' ability.
*
* @return A description of the class.
*/
public String globalInfo() {
return
"A mixed Gaussian artificial data generator.\n"
+ "\n"
+ "This generator only has two Gaussians, each sitting "
+ "3 standard deviations (by default) away from the mean "
+ "of the main distribution. Each model has half of the "
+ "probability. The idea is that the two sub-models "
+ "form a boundary either side of the main distribution.";
}
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
public Enumeration listOptions() {
Vector result = new Vector();
Enumeration enu = super.listOptions();
while (enu.hasMoreElements())
result.addElement(enu.nextElement());
result.addElement(new Option(
"\tSets the difference between the mean and what will be used\n"
+ "\ton the lower and higher distributions for the generator."
+ "\t(default: 3)",
"di", 1, "-di <distance>"));
result.addElement(new Option(
"\tIf set, the generator will use the absolute value of the\n"
+ "\tdifference. If not set, it will multiply the difference by\n"
+ "\tthe standard deviation.",
"da", 0, "-da"));
return result.elements();
}
/**
* Parses a given list of options. <p/>
*
<!-- options-start -->
* Valid options are: <p/>
*
* <pre> -D
* If set, generator is run in debug mode and
* may output additional info to the console</pre>
*
* <pre> -S <seed>
* Sets the seed of the random number generator of the generator (default: 1)</pre>
*
* <pre> -M <num>
* Sets the mean of the generator
* (default: 0)</pre>
*
* <pre> -SD <num>
* Sets the standard deviation of the generator
* (default: 1)</pre>
*
* <pre> -di <distance>
* Sets the difference between the mean and what will be used
* on the lower and higher distributions for the generator. (default: 3)</pre>
*
* <pre> -da
* If set, the generator will use the absolute value of the
* difference. If not set, it will multiply the difference by
* the standard deviation.</pre>
*
<!-- options-end -->
*
* @param options the list of options as an array of strings
* @throws Exception if an option is not supported
*/
public void setOptions(String[] options) throws Exception {
String tmpStr;
super.setOptions(options);
setDistanceAbsolute(Utils.getFlag("da", options));
tmpStr = Utils.getOption("di", options);
if (tmpStr.length() != 0)
setDistance(Double.parseDouble(tmpStr));
else
setDistance(3.0);
}
/**
* Gets the current settings of the generator.
*
* @return an array of strings suitable for passing to setOptions
*/
public String [] getOptions() {
Vector<String> result;
String[] options;
int i;
result = new Vector<String>();
options = super.getOptions();
for (i = 0; i < options.length; i++)
result.add(options[i]);
if (getDistanceAbsolute())
result.add("-da");
result.add("-di");
result.add("" + m_Distance);
return result.toArray(new String[result.size()]);
}
/**
* Gets the difference between the main distribution and each
* of the models. The default difference is 3, and will
* cause each model to be 3 standard deviations away from the mean.
* One model is created either side of the mean.
*
* @return The difference between the main distribution and a model.
*/
public double getDistance() {
return m_Distance;
}
/**
* Sets the difference between the main distribution and the models.
* See getDistance() for a longer explanation.
*
* @param diff The new difference.
*/
public void setDistance(double diff) {
m_Distance = diff;
}
/**
* Returns the tip text for this property.
*
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String distanceTipText() {
return "The difference between the main distribution and the models.";
}
/**
* Gets whether the difference will be an absolute value,
* or something that is used as a multiplier to the
* standard deviation.
*
* @return Whether the difference will be absolute or not.
*/
public boolean getDistanceAbsolute() {
return m_DistanceAbsolute;
}
/**
* Sets the difference to be absolute (or not).
*
* @param newdiff Whether the difference should be absolute or
* a standard deviation modifier.
*/
public void setDistanceAbsolute(boolean newdiff) {
m_DistanceAbsolute = newdiff;
}
/**
* Returns the tip text for this property.
*
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String distanceAbsoluteTipText() {
return "If true, then the distance is absolute.";
}
/**
* Generates a value that falls under this distribution.
*
* @return A generated value.
*/
public double generate() {
double difference = m_Distance;
if(!m_DistanceAbsolute)
difference = m_StandardDeviation * m_Distance;
if(m_Random.nextBoolean()) {
//lower distribution
double gaussian = m_Random.nextGaussian();
double value = (m_Mean - difference) + (gaussian * m_StandardDeviation);
return value;
} else {
//higher distribution
double gaussian = m_Random.nextGaussian();
double value = (m_Mean + difference) + (gaussian * m_StandardDeviation);
return value;
}
}
/**
* Gets the probability that a value falls under
* this distribution.
*
*
* @param valuex The value to get the probability of.
* @return The probability of the given value.
*/
public double getProbabilityOf(double valuex) {
double difference = m_Distance;
if(!m_DistanceAbsolute)
difference = m_StandardDeviation * m_Distance;
double prob1 = 0.5 * this.getProbability(valuex, m_Mean - difference, m_StandardDeviation);
double prob2 = 0.5 * this.getProbability(valuex, m_Mean + difference, m_StandardDeviation);
return prob1 + prob2;
}
/**
* Gets the probability that a value falls under
* a given Gaussian distribution.
*
*
* @param valuex The value to get the probability of.
* @param mean The mean of the Gaussian distribution.
* @param stddev The standard deviation of the Gaussian distribution.
* @return The probability of the given value.
*/
public double getProbability(double valuex, double mean, double stddev) {
double twopisqrt = Math.sqrt(2 * Math.PI);
double left = 1 / (stddev * twopisqrt);
double diffsquared = Math.pow((valuex - mean), 2);
double bottomright = 2 * Math.pow(stddev, 2);
double brackets = -1 * (diffsquared / bottomright);
double probx = left * Math.exp(brackets);
return probx;
}
/**
* Gets the (natural) log of the probability of a given value.
*
* @param valuex The value to get the log probability of.
* @return The (natural) log of the probability.
*/
public double getLogProbabilityOf(double valuex) {
return Math.log(this.getProbabilityOf(valuex));
}
}