/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /** * NominalGenerator.java * Copyright (C) 2008 K.Hempstalk, University of Waikato, Hamilton, New Zealand. */ package weka.classifiers.meta.generators; import weka.core.Attribute; import weka.core.Instance; import weka.core.Instances; import java.util.Enumeration; /** <!-- globalinfo-start --> * A generator for nominal attributes.<br/> * <br/> * Generates artificial data for nominal attributes. Each attribute value is considered to be possible, i.e. the probability of any value is always non-zero. * <p/> <!-- globalinfo-end --> * <!-- options-start --> * Valid options are: <p/> * * <pre> -D * If set, generator is run in debug mode and * may output additional info to the console</pre> * * <pre> -S <seed> * Sets the seed of the random number generator of the generator (default: 1)</pre> * <!-- options-end --> * * @author Kathryn Hempstalk (kah18 at cs.waikato.ac.nz) * @version $Revision: 5793 $ */ public class NominalGenerator extends RandomizableGenerator implements NominalAttributeGenerator{ /** for serialization. */ private static final long serialVersionUID = 5254947213887016283L; /** * Counts (turned into probabilities) of each attribute value. */ protected double[] m_AttCounts; /** * Returns a string describing this class' ability. * * @return A description of the class. */ public String globalInfo() { return "A generator for nominal attributes.\n" + "\n" + "Generates artificial data for nominal attributes. Each attribute value " + "is considered to be possible, i.e. the probability of any value is " + "always non-zero."; } /** * Sets up the generator with the counts required for generation. * * @param someinstances The instances to count up. * @param att The attribute to count up with. */ public void buildGenerator(Instances someinstances, Attribute att) { m_AttCounts = new double[(int)att.numValues()]; for(int i = 0; i < m_AttCounts.length; i++) { m_AttCounts[i] = 1; } //count up the number of each instance Enumeration instancesEnum = someinstances.enumerateInstances(); int totalCounts = m_AttCounts.length; while(instancesEnum.hasMoreElements()) { Instance aninst = (Instance)instancesEnum.nextElement(); if(!aninst.isMissing(att)) { m_AttCounts[(int)aninst.value(att)] += 1; totalCounts++; } } //calculate the probability of each. for(int i = 0; i < m_AttCounts.length; i++) { m_AttCounts[i] /= (double)totalCounts; } } /** * Generates an index of a nominal attribute as artificial data. * * @return The index of the nominal attribute's value. */ public double generate() { double prob = m_Random.nextDouble(); //find the index of the attribute value with this position double probSoFar = 0; for(int i = 0; i < m_AttCounts.length; i++) { probSoFar += m_AttCounts[i]; if(prob <= probSoFar) return i; } return 0; } /** * Gets the probability of a given attribute value (provided as an index). * * @param valuex The index to the attribute value. * @return The probability of this value. */ public double getProbabilityOf(double valuex) { return m_AttCounts[(int)valuex]; } /** * Gets the (natural) log of the probability of a given value. * * @param valuex The index of the nominal value. * @return The natural log of the probability of valuex. */ public double getLogProbabilityOf(double valuex) { return Math.log(this.getProbabilityOf(valuex)); } }