DiscreteNaiveBayesCategorizer.java example

Explorer
Foundry-master
- Components
/*
 * File:                DiscreteNaiveBayesCategorizer.java
 * Authors:             Kevin R. Dixon
 * Company:             Sandia National Laboratories
 * Project:             Cognitive Foundry
 * 
 * Copyright Oct 20, 2009, Sandia Corporation.
 * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive
 * license for use of this work by or on behalf of the U.S. Government.
 * Export of this program may require a license from the United States
 * Government. See CopyrightHistory.txt for complete details.
 * 
 */

package gov.sandia.cognition.learning.algorithm.bayes;

import gov.sandia.cognition.annotation.PublicationReference;
import gov.sandia.cognition.annotation.PublicationReferences;
import gov.sandia.cognition.annotation.PublicationType;
import gov.sandia.cognition.learning.algorithm.SupervisedBatchLearner;
import gov.sandia.cognition.learning.data.DefaultWeightedValueDiscriminant;
import gov.sandia.cognition.learning.data.InputOutputPair;
import gov.sandia.cognition.learning.function.categorization.DiscriminantCategorizer;
import gov.sandia.cognition.statistics.distribution.DefaultDataDistribution;
import gov.sandia.cognition.util.AbstractCloneableSerializable;
import gov.sandia.cognition.util.ObjectUtil;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;

/**
 * Implementation of a Naive Bayes Classifier for Discrete Data.  That is,
 * the categorizer takes a Collection of input attributes and infers the
 * most-likely category with the assumption that each input attribute is
 * independent of all others given the category.  In other words,
 * <BR>
 * Cml = arg max(c) P(C=c | X=inputs )
 * <BR>
 * = arg max(c) P(X=inputs AND C=c) / P(X=inputs)
 * <BR>
 * = arg max(c) P(X=inputs AND C=c) (since P(X=inputs) doesn't depend on the category).
 * <BR>
 * P(X=inputs AND C=c) = P(X=inputs|C=c) * P(C=c)
 * <BR>
 * (Naive Bayes assumption:) = P(X1=x1|C=c) * P(X2=x2|C=c) * ... * P(Xn=xn|C=c) * P(C=c).
 * <BR><BR>
 * While the DiscreteNaiveBayesCategorizer class assumes that all inputs have
 * the same dimensionality, it handles missing (unknown) data by inserting
 * a "null" into the given input Collection.  Furthermore,
 * the DiscreteNaiveBayesCategorizer class can also compute the probabilities
 * of various quantities.
 *
 * @param <InputType> Type of inputs to the categorizer.
 * @param <CategoryType> Type of the categories of the categorizer.
 * @author Kevin R. Dixon
 * @since 3.0
 */
@PublicationReferences(
    references={
        @PublicationReference(
            author={
                "Richard O. Duda",
                "Peter E. Hart",
                "David G. Stork"
            },
            title="Pattern Classification: Second Edition",
            type=PublicationType.Book,
            year=2001,
            pages={56,62}
        ),
        @PublicationReference(
            author="Wikipedia",
            title="Naive Bayes classifier",
            type=PublicationType.WebPage,
            year=2009,
            url="http://en.wikipedia.org/wiki/Naive_bayes"
        )
    }

)
public class DiscreteNaiveBayesCategorizer<InputType,CategoryType>
    extends AbstractCloneableSerializable
    implements DiscriminantCategorizer<Collection<InputType>,CategoryType,Double>
{


    /**
     * Class conditional probability table.
     */
    private Map<CategoryType,List<DefaultDataDistribution<InputType>>> conditionalProbabilities;

    /**
     * Table of category priors.
     */
    private DefaultDataDistribution<CategoryType> priorProbabilities;

    /**
     * Assumed dimensionality of the inputs.
     */
    private int inputDimensionality;

    /** 
     * Creates a new instance of DiscreteNaiveBayesCategorizer
     */
    public DiscreteNaiveBayesCategorizer()
    {
        this( 0 );
    }

    /**
     * Creates a new instance of DiscreteNaiveBayesCategorizer.
     * @param inputDimensionality
     * Assumed dimensionality of the inputs.
     */
    public DiscreteNaiveBayesCategorizer(
        final int inputDimensionality )
    {
        this.setInputDimensionality(inputDimensionality);
    }

    /**
     * Creates a new instance of DiscreteNaiveBayesCategorizer.
     * @param inputDimensionality
     * Assumed dimensionality of the inputs.
     * @param priorProbabilities
     * Table of category priors.
     * @param conditionalProbabilities
     * Class conditional probability table.
     */
    protected DiscreteNaiveBayesCategorizer(
        final int inputDimensionality,
        final DefaultDataDistribution<CategoryType> priorProbabilities,
        final Map<CategoryType,List<DefaultDataDistribution<InputType>>> conditionalProbabilities )
    {
        this.setInputDimensionality(inputDimensionality);
        this.priorProbabilities = priorProbabilities;
        this.conditionalProbabilities = conditionalProbabilities;
    }

    @Override
    public DiscreteNaiveBayesCategorizer<InputType,CategoryType> clone()
    {
        @SuppressWarnings("unchecked")
        DiscreteNaiveBayesCategorizer<InputType,CategoryType> clone =
            (DiscreteNaiveBayesCategorizer<InputType,CategoryType>) super.clone();
        clone.conditionalProbabilities =
            new LinkedHashMap<CategoryType,List<DefaultDataDistribution<InputType>>>();
        for( CategoryType category : this.getCategories() )
        {
            clone.conditionalProbabilities.put( category,
                ObjectUtil.cloneSmartElementsAsArrayList( this.conditionalProbabilities.get(category) ) );
        }
        
        clone.priorProbabilities =
            ObjectUtil.cloneSafe( this.priorProbabilities );
        return clone;
    }

    @Override
    public Set<CategoryType> getCategories()
    {
        return this.priorProbabilities.getDomain();
    }

    /**
     * Computes the probability of the given inputs. In other words,
     * P(X=inputs) = sum over all C=c ( P(X=inputs|C=c)* P(C=c) ).
     * @param inputs
     * Inputs for which to compute the probability.
     * @return
     * Probability of the inputs, P(X=inputs).
     */
    public double computeEvidenceProbabilty(
        final Collection<InputType> inputs )
    {

        double prob = 0.0;
        for( CategoryType category : this.getCategories() )
        {
            prob += this.computeConjuctiveProbability(inputs, category);
        }

        return prob;

    }


    /**
     * Computes the posterior probability of the inputs for the given category.
     * This is quite expensive as the denominator of Bayes rule will be
     * computed by computing the numerator probabilities for each category,
     * then summing them up.  If you're interested in the most likely class,
     * then I would STRONGLY suggest using computeConjuctiveProbability,
     * which is much cheaper.  In other words,
     * P(C=category|X=inputs) = P(X=inputs|C=category)*P(C=category)/P(X=inputs).
     * @param inputs
     * Inputs to compute the posterior.
     * @param category
     * Category to compute the posterior.
     * @return
     * Posterior probability, P(C=category|X=inputs).
     */
    public double computePosterior(
        final Collection<InputType> inputs,
        final CategoryType category )
    {

        // This is REALLY expensive, as it computes the proporationate
        // posterior for all categories and then sums them up.
        double evidenceProbability = this.computeEvidenceProbabilty(inputs);

        if( evidenceProbability > 0.0 )
        {
            double numerator = this.computeConjuctiveProbability(inputs,category);
            return numerator / evidenceProbability;
        }
        else
        {
            return 0.0;
        }

    }

    /**
     * Computes the class conditional for the given inputs at the given
     * category assuming that each input feature is conditionally independent
     * of all other features.  In other words,
     * P(X=inputs|C=category) = P(X0=x0|C=category) * P(X1=x1|C=category) * ... * P(Xn=xn|C=category).
     * @param inputs
     * Inputs to compute the class conditional.
     * @param category
     * Category to compute the class conditional.
     * @return
     * Class conditional probability, P(X=inputs|C=category)
     */
    public double computeConditionalProbability(
        final Collection<InputType> inputs,
        final CategoryType category )
    {

        if( inputs.size() != this.getInputDimensionality() )
        {
            throw new IllegalArgumentException(
                "Input dimensionality doesn't match " + this.getInputDimensionality() );
        }

        Iterator<DefaultDataDistribution<InputType>> conditionalPMFIterator =
            this.conditionalProbabilities.get( category ).iterator();

        double conditionalProbability = 1.0;
        for( InputType input : inputs )
        {
            DefaultDataDistribution<InputType> conditionalPMF =
                conditionalPMFIterator.next();
            if( input != null )
            {
                conditionalProbability *= conditionalPMF.getFraction(input);
            }
            if( conditionalProbability <= 0.0 )
            {
                break;
            }
        }

        return conditionalProbability;

    }

    /**
     * Updates the probability tables from observing the sample inputs and
     * category.  If the tables are empty, then this observation sets the
     * assumed input dimensionality.
     * @param inputs
     * Inputs to update.
     * @param category
     * Category to update.
     */
    public void update(
        final Collection<InputType> inputs,
        final CategoryType category )
    {

        // If we have no expected input dimensionality, then assume that
        // all future inputs will be the same dimension as this one.
        if( this.getInputDimensionality() <= 0 )
        {
            this.setInputDimensionality( inputs.size() );
        }


        if( inputs.size() != this.getInputDimensionality() )
        {
            throw new IllegalArgumentException(
                "Input dimensionality doesn't match " + this.getInputDimensionality() );
        }


        // Need to make a new conditional probability table for this category
        if( !this.getCategories().contains( category ) )
        {
            ArrayList<DefaultDataDistribution<InputType>> conditional =
                new ArrayList<DefaultDataDistribution<InputType>>( this.getInputDimensionality() );
            for( int i = 0; i < this.getInputDimensionality(); i++ )
            {
                conditional.add( new DefaultDataDistribution<InputType>() );
            }
            this.conditionalProbabilities.put( category, conditional );
        }

        this.priorProbabilities.increment( category );
        Iterator<DefaultDataDistribution<InputType>> conditionalPMFIterator =
            this.conditionalProbabilities.get(category).iterator();
        for( InputType input : inputs )
        {
            DefaultDataDistribution<InputType> conditionalPMF =
                conditionalPMFIterator.next();
            if( input != null )
            {
                conditionalPMF.increment( input );
            }
        }

    }


    /**
     * Computes the conjunctive probability of the inputs and the category.
     * This is the numerator of Bayes rule.
     * In other words,
     * <BR>
     * P( X=inputs AND C=category ) = P(X=inputs|C=category) * P(C=category).
     * <BR><BR>
     * Under the Naive Bayes assumption, the input features are assumed to be
     * independent of all others given the category.  So, we compute the above
     * probability as
     * <BR>
     * P(X=inputs|C=c) = P(X1=x1|C=c) * P(X2=x2|C=c) * ... * P(Xn=xn|C=c).
     * <BR><BR>
     * If we're just interested in finding the most-likely category, then
     * the conjunctive probability is sufficient.
     * @param inputs
     * Inputs for which to compute the conjunctive probability.
     * @param category
     * Category for which to compute the conjunctive probability.
     * @return
     * The conjunctive probability, which is the numerator of Bayes rule.
     */
    public double computeConjuctiveProbability(
        final Collection<InputType> inputs,
        final CategoryType category )
    {

        double categoryPrior = this.getPriorProbability(category);
        if( categoryPrior > 0.0 )
        {
            double conditionalProbability =
                this.computeConditionalProbability(inputs, category);
            return conditionalProbability * categoryPrior;
        }
        else
        {
            return 0.0;
        }

    }

    @Override
    public CategoryType evaluate(
        final Collection<InputType> inputs )
    {
        return this.evaluateWithDiscriminant(inputs).getValue();
    }

    @Override
    public DefaultWeightedValueDiscriminant<CategoryType> evaluateWithDiscriminant(
        final Collection<InputType> input)
    {
        // compute the product of the class conditionals
        double maxPosterior = -1.0;
        CategoryType maxCategory = null;
        for (CategoryType category : this.getCategories())
        {
            // Actually, this is only proportionate to the posterior
            // We would need to divide by the unconditional probability
            // of the inputs to compute the accordinng-to-Hoyle posterior.
            double posterior =
                this.computeConjuctiveProbability(input, category);
            if (maxPosterior < posterior)
            {
                maxPosterior = posterior;
                maxCategory = category;
            }
        }

        return DefaultWeightedValueDiscriminant.create(maxCategory, maxPosterior);
    }

    /**
     * Gets the conditional probability for the given input and category.  In
     * other words,
     * <BR>
     * P(Xindex=input|C=category).
     * @param index
     * Index to compute.
     * @param input
     * Input value to assume.
     * @param category
     * Category value to assume.
     * @return
     * Class conditional probability of the given input and category.
     */
    public double getConditionalProbability(
        final int index,
        final InputType input,
        final CategoryType category )
    {
        return this.conditionalProbabilities.get(category).get(index).getFraction(input);
    }

    /**
     * Returns the prior probability of the given category.  In other words,
     * <BR>
     * P(C=category).
     * @param category
     * Category to return the prior probability of.
     * @return
     * Prior probability of the given category.
     */
    public double getPriorProbability(
        final CategoryType category )
    {
        return this.priorProbabilities.getFraction(category);
    }

    /**
     * Getter for inputDimensionality.
     * @return
     * Assumed dimensionality of the inputs.
     */
    public int getInputDimensionality()
    {
        return this.inputDimensionality;
    }

    /**
     * Setter for inputDimensionality.  Also resets the probability tables.
     * @param inputDimensionality
     * Assumed dimensionality of the inputs.
     */
    public void setInputDimensionality(
        final int inputDimensionality)
    {
        this.conditionalProbabilities =
            new LinkedHashMap<CategoryType, List<DefaultDataDistribution<InputType>>>();
        this.priorProbabilities = new DefaultDataDistribution<CategoryType>();

        this.inputDimensionality = inputDimensionality;
    }


    /**
     * Learner for a DiscreteNaiveBayesCategorizer.
     * @param <InputType> Type of inputs to the categorizer.
     * @param <CategoryType> Type of the categories of the categorizer.
     */
    public static class Learner<InputType,CategoryType>
        extends AbstractCloneableSerializable
        implements SupervisedBatchLearner<Collection<InputType>,CategoryType,DiscreteNaiveBayesCategorizer<InputType,CategoryType>>
    {

        /**
         * Default constructor.
         */
        public Learner()
        {
        }

        @Override
        public DiscreteNaiveBayesCategorizer<InputType, CategoryType> learn(
            final Collection<? extends InputOutputPair<? extends Collection<InputType>, CategoryType>> data)
        {
            DiscreteNaiveBayesCategorizer<InputType,CategoryType> nbc =
                new DiscreteNaiveBayesCategorizer<InputType, CategoryType>();
            for( InputOutputPair<? extends Collection<InputType>,CategoryType> sample : data )
            {
                nbc.update(sample.getInput(), sample.getOutput());
            }
            return nbc;
        }
        
    }

}