/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /** * DiscreteGenerator.java * Copyright (C) 2008 K.Hempstalk, University of Waikato, Hamilton, New Zealand. */ package weka.classifiers.meta.generators; import weka.core.Capabilities; import weka.core.Capabilities.Capability; import weka.core.Instance; import weka.core.Instances; import java.util.Arrays; /** <!-- globalinfo-start --> * An artificial data generator that uses discrete buckets for values.<br/> * <br/> * In this discrete generator, values are ranked according to how often they appear. This is not to be confused with the discrete uniform generator which gives every bucket the same probability. * <p/> <!-- globalinfo-end --> * <!-- options-start --> * Valid options are: <p/> * * <pre> -D * If set, generator is run in debug mode and * may output additional info to the console</pre> * * <pre> -S <seed> * Sets the seed of the random number generator of the generator (default: 1)</pre> * <!-- options-end --> * * @author Kathryn Hempstalk (kah18 at cs.waikato.ac.nz) * @version $Revision: 5793 $ * @see DiscreteUniformGenerator */ public class DiscreteGenerator extends RandomizableGenerator implements InstanceHandler, NumericAttributeGenerator { /** for serialization. */ private static final long serialVersionUID = -2990312384506940726L; /** * The array of probabilities for this generator. */ protected double[][] m_Probabilities; /** * The probability of an unseen event. */ protected double m_Unseen = Double.MIN_VALUE; /** * Returns a string describing this class' ability. * * @return A description of the class. */ public String globalInfo() { return "An artificial data generator that uses discrete buckets " + "for values.\n" + "\n" + "In this discrete generator, values are ranked according to " + "how often they appear. This is not to be confused with the " + "discrete uniform generator which gives every bucket the " + "same probability."; } /** * Returns the Capabilities of this object * * @return the capabilities of this object * @see Capabilities */ public Capabilities getCapabilities() { Capabilities result = new Capabilities(this); result.enable(Capability.NUMERIC_ATTRIBUTES); result.enableAllClasses(); result.enable(Capability.MISSING_CLASS_VALUES); result.enable(Capability.NO_CLASS); return result; } /** * Builds the generator with a given set of instances. * * @param someinstances The instances that will be used to * build up the probabilities for this generator. * @throws Exception if data cannot be handled */ public void buildGenerator(Instances someinstances) throws Exception { // can generator handle the data? getCapabilities().testWithFail(someinstances); someinstances = new Instances(someinstances); someinstances.deleteWithMissing(0); //put all the values in an array double[] values = new double[someinstances.numInstances()]; for(int i = 0; i < someinstances.numInstances(); i++) { Instance aninst = someinstances.instance(i); values[i] = aninst.value(0); } Arrays.sort(values); double count = 1; for(int i = 1; i < values.length; i++) { if(values[i] != values[i - 1]) count++; } //now we know how many values we have double[][] allvals = new double[(int)count][2]; int position = 0; allvals [0][0] = values[0]; allvals [0][1] = 1; for(int i = 1; i < values.length; i++) { if(values[i] != values[i - 1]) { position++; allvals[position][0] = values[i]; allvals[position][1] = 1; } else allvals[position][1]++; } //turn the counts into probabilities for(int i = 0; i < count; i++) { allvals[i][1] /= ((double)values.length + 1); } m_Probabilities = allvals; m_Unseen = 1 / ((double)values.length + 1); } /** * Generates a value that falls under this distribution. * * @return A generated value. */ public double generate() { double aprob = m_Random.nextDouble(); double currentprob = 0; for(int i = 0; i < m_Probabilities.length; i++) { if(currentprob + m_Probabilities[i][1] >= aprob) { return m_Probabilities[i][0]; } else { currentprob += m_Probabilities[i][1]; } } return 0; } /** * Gets the probability that a value falls under * this distribution. * * * @param valuex The value to get the probability of. * @return The probability of the given value. */ public double getProbabilityOf(double valuex) { for(int i = 0; i < m_Probabilities.length; i++) { if(valuex == m_Probabilities[i][0]) return m_Probabilities[i][1]; } return m_Unseen; } /** * Gets the (natural) log of the probability of a given value. * * @param valuex The value to get the log probability of. * @return The (natural) log of the probability. */ public double getLogProbabilityOf(double valuex) { return Math.log(this.getProbabilityOf(valuex)); } }