/* * File: BinaryCategorizerSelector.java * Authors: Justin Basilico * Company: Sandia National Laboratories * Project: Cognitive Foundry * * Copyright October 8, 2007, Sandia Corporation. Under the terms of Contract * DE-AC04-94AL85000, there is a non-exclusive license for use of this work by * or on behalf of the U.S. Government. Export of this program may require a * license from the United States Government. See CopyrightHistory.txt for * complete details. * */ package gov.sandia.cognition.learning.algorithm.ensemble; import gov.sandia.cognition.annotation.CodeReview; import gov.sandia.cognition.learning.algorithm.SupervisedBatchLearner; import gov.sandia.cognition.learning.data.DatasetUtil; import gov.sandia.cognition.learning.data.InputOutputPair; import gov.sandia.cognition.learning.function.categorization.BinaryCategorizer; import gov.sandia.cognition.util.AbstractCloneableSerializable; import java.util.Collection; import java.util.LinkedList; /** * The {@code BinaryCategorizerSelector} class implements a "weak learner" * meant for use in boosting algorithms that selects the best * {@code BinaryCategorizer} from a pre-set list by picking the one with the * best weighted error. * * @param <InputType> The type of the input. * @author Justin Basilico * @since 2.0 */ @CodeReview( reviewer="Kevin R. Dixon", date="2008-07-23", changesNeeded=false, comments={ "Removed implements Serializable, since BatchLearner already does.", "Otherwise, looks fine." } ) public class BinaryCategorizerSelector<InputType> extends AbstractCloneableSerializable implements SupervisedBatchLearner<InputType,Boolean,BinaryCategorizer<? super InputType>> { /** The collection of categorizers to evaluate and select from. */ protected Collection<BinaryCategorizer<? super InputType>> categorizers; /** * Creates a new instance of {@code BinaryCategorizerSelector}. */ public BinaryCategorizerSelector() { this(new LinkedList<BinaryCategorizer<? super InputType>>()); } /** * Creates a new instance of {@code BinaryCategorizerSelector}. * * @param categorizers The categorizers to use. */ public BinaryCategorizerSelector( final Collection<BinaryCategorizer<? super InputType>> categorizers) { super(); this.setCategorizers(categorizers); } /** * Selects the BinaryCategorizer from its list of categorizers that * minimizes the weighted error on the given set of weighted input-output * pairs. * * @param data * The set of weighted input-output pairs to use to select the best * categorizer. * @return * The BinaryCategorizer from its list of categorizers that minimizes * the weighted error on the given data. */ public BinaryCategorizer<? super InputType> learn( Collection<? extends InputOutputPair<? extends InputType, Boolean>> data ) { // We need to find the categorizer with the smallest training error. double bestWeightedError = Double.MAX_VALUE; BinaryCategorizer<? super InputType> best = null; // To find the best categorizer we evaluate each categorizer on each // input to compute its weighted error. for ( BinaryCategorizer<? super InputType> categorizer : this.getCategorizers() ) { // Go through all the examples to get the weighted error. double weightedError = 0.0; for ( InputOutputPair<? extends InputType, Boolean> example : data ) { final double weight = DatasetUtil.getWeight(example); if (weight == 0.0) { // No need to evaluate examples with no weight. continue; } // Compute the output of the categorizer on this input. final boolean estimated = categorizer.evaluate(example.getInput()); final boolean actual = example.getOutput(); // Check to see if the output is correct. if ( estimated != actual ) { // This was estimated incorrectly. weightedError += weight; } // else - It was estimated correctly. } if ( best == null || weightedError < bestWeightedError ) { // This is the best categorizer found so far. bestWeightedError = weightedError; best = categorizer; } } // Return the best categorizer that we've found. return best; } /** * Gets the collection of categorizers that the learner selects from. * * @return The collection of categorizers that the learner selects from. */ public Collection<BinaryCategorizer<? super InputType>> getCategorizers() { return this.categorizers; } /** * Gets the collection of categorizers that the learner selects from. * * @param categorizers * The collection of categorizers that the learner selects from. */ public void setCategorizers( final Collection<BinaryCategorizer<? super InputType>> categorizers) { this.categorizers = categorizers; } }