/* * File: Forgetron.java * Authors: Justin Basilico * Company: Sandia National Laboratories * Project: Cognitive Foundry Learning Core * * Copyright May 10, 2011, Sandia Corporation. * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive * license for use of this work by or on behalf of the U.S. Government. Export * of this program may require a license from the United States Government. * */ package gov.sandia.cognition.learning.algorithm.perceptron.kernel; import gov.sandia.cognition.annotation.PublicationReference; import gov.sandia.cognition.annotation.PublicationType; import gov.sandia.cognition.learning.function.categorization.DefaultKernelBinaryCategorizer; import gov.sandia.cognition.learning.function.kernel.Kernel; import gov.sandia.cognition.learning.function.kernel.KernelUtil; import gov.sandia.cognition.util.DefaultWeightedValue; import gov.sandia.cognition.util.WeightedValue; import java.util.LinkedList; /** * An implementation of the "self-tuned" Forgetron algorithm, which is an online * budgeted kernel binary categorizer learner. * * Note that this class requires its own extension of the * {@code DefaultKernelBinaryCategorizer} to keep some extra information about * learning. * * @param <InputType> * The type of input to learn over. Passed to the kernel function. * @author Justin Basilico * @since 3.3.0 */ @PublicationReference( author={"Ofer Dekel", "Shai Shalev-Shwartz", "Yoram Singer"}, title="The Forgetron: A Kernel-based Perceptron on a Budget", year=2008, type=PublicationType.Journal, publication="SIAM Journal on Computing", pages={1342, 1372}, url="http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.134.7604&rep=rep1&type=pdf", notes="This is the self-tuned version.") public class Forgetron<InputType> extends AbstractOnlineBudgetedKernelBinaryCategorizerLearner<InputType> { /** * Creates a new {@code Forgetron} with a null kernel and the default * budget. */ public Forgetron() { this(null, DEFAULT_BUDGET); } /** * Creates a new {@code Forgetron} with the given kernel and budget. * * @param kernel * The kernel to use. * @param budget * The budget for the maximum number of supports. */ public Forgetron( final Kernel<? super InputType> kernel, final int budget) { super(kernel, budget); } @Override public DefaultKernelBinaryCategorizer<InputType> createInitialLearnedObject() { // We need to use a special result type to keep track of the error // count and Q. return new Result<InputType>(this.getKernel()); } @Override public void update( final DefaultKernelBinaryCategorizer<InputType> target, final InputType input, final boolean label) { // Predict the output as a double (negative values are false, positive // are true). final double prediction = target.evaluateAsDouble(input); final double actual = label ? +1.0 : -1.0; final double margin = prediction * actual; if (margin > 0.0) { // Did not make an error. return; } // Cast the result type. Result<InputType> result = (Result<InputType>) target; // We've made an error. result.errorCount++; // Add the new example with a sigma of 1. This value will be adjsuted // later, if needed. result.add(input, actual); while (target.getExampleCount() > this.getBudget()) { // We've violated the budget. Normally this loop is only executed // once, though if the budget is changed, we do it multiple times // to bring the number of supports back down to the budget. shrink(result); } } /** * Apply the shrinking step of the algorithm. * * @param result * The shrinking step. */ protected void shrink( final Result<InputType> result) { // Get the oldest support. We will later remove it. final WeightedValue<InputType> oldest = result.get(0); // Get the weight of the oldest, its sigma value (remove the // sign, which encodes the label), and its label. final double oldestWeight = oldest.getWeight(); double sigmaOldest = Math.abs(oldestWeight); final double yOldest = oldestWeight >= 0.0 ? +1.0 : -1.0; // Evaluate the function on the oldest. double fOldest = result.evaluateAsDouble(oldest.getValue()); // Compute the a, b, c, and d values to figure out how to make the // update. final double a = sigmaOldest * sigmaOldest - 2.0 * sigmaOldest * yOldest * fOldest; final double b = 2.0 * sigmaOldest; final double c = result.q - 15.0 / 32.0 * result.errorCount; final double d = b * b - 4.0 * a * c; // Now compute the update value (phi) that all of the weights will // be scaled by. final double update; if (a > 0.0 || (a < 0.0 && d > 0.0 && (-b - Math.sqrt(d)) / (2.0 * a) > 1.0)) { update = Math.min(1.0, (-b + Math.sqrt(d)) / (2.0 * a)); } else if (a == 0.0) { update = Math.min(1.0, -c / b); } else { // Remove-oldest Perceptron update. update = 1.0; } // Perform the scaling as long as the update is not 1.0. if (update != 1.0) { KernelUtil.scaleEquals(result, update); } // the oldest instead of recomputing fOldest. sigmaOldest *= update; fOldest *= update; result.q += sigmaOldest * sigmaOldest + 2.0 * sigmaOldest - 2.0 * sigmaOldest * yOldest * fOldest; // Remove the oldest. result.remove(0); } /** * The result object learned by the {@code Forgetron}, which extends * the {@code DefaultKernelBinaryCategorizer} with some additional state * information needed in the update step. * * @param <InputType> * The input type to categorize, which is passed to the kernel * function. */ public static class Result<InputType> extends DefaultKernelBinaryCategorizer<InputType> { /** The number of errors that the categorizer has made in the learning * step. */ protected long errorCount; /** The value of Q for the algorithm. */ protected double q; /** * Creates a new {@code Result} with a null kernel. */ public Result() { this(null); } /** * Creates a new {@code Result} with the given kernel. * * @param kernel * The kernel to use. */ public Result( final Kernel<? super InputType> kernel) { // Use a linked list to deal with the fact that we're going to // enforce a budget and thus will continuously remove the first // element when we need to add new supports. super(kernel, new LinkedList<DefaultWeightedValue<InputType>>(), 0.0); this.setErrorCount(errorCount); this.setQ(q); } /** * Gets the error count. * * @return * The error count. */ public long getErrorCount() { return this.errorCount; } /** * Sets the error count. * * @param errorCount * The error count. */ protected void setErrorCount( final long errorCount) { this.errorCount = errorCount; } /** * Gets the value Q updated by the algorithm. * * @return * The Q value. */ protected double getQ() { return this.q; } /** * Gets the value Q updated by the algorithm. * * @param q * The Q value. */ protected void setQ( final double q) { this.q = q; } } /** * An implementation of the "basic" Forgetron algorithm, which is an online * budgeted kernel binary categorizer learner. Note that this appears to * be somewhat for illustrative purposes only since it uses the worst-case * definition of the shrinking coefficients and thus tends to perform * poorly. * * @param <InputType> * The type of input to learn over. Passed to the kernel function. * @author Justin Basilico * @since 3.3.0 */ @PublicationReference( author={"Ofer Dekel", "Shai Shalev-Shwartz", "Yoram Singer"}, title="The Forgetron: A Kernel-based Perceptron on a Budget", year=2008, type=PublicationType.Journal, publication="SIAM Journal on Computing", pages={1342, 1372}, url="http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.134.7604&rep=rep1&type=pdf", notes="This is the basic version.") public static class Basic<InputType> extends AbstractOnlineBudgetedKernelBinaryCategorizerLearner<InputType> { /** * Creates a new {@code Forgetron.Basic} with a null kernel and default * budget. */ public Basic() { this(null, DEFAULT_BUDGET); } /** * Creates a new {@code Forgetron.Basic} with the given kernel and * budget. * * @param kernel * The kernel to use. * @param budget * The budget for the maximum number of supports. */ public Basic( final Kernel<? super InputType> kernel, final int budget) { super(kernel, budget); } @Override public DefaultKernelBinaryCategorizer<InputType> createInitialLearnedObject() { // Use a linked list underneath to make sure removing the oldest element // is fast. return new DefaultKernelBinaryCategorizer<InputType>( this.getKernel(), new LinkedList<DefaultWeightedValue<InputType>>(), 0.0); } @Override public void update( final DefaultKernelBinaryCategorizer<InputType> target, final InputType input, final boolean label) { // Predict the output as a double (negative values are false, positive // are true). final double prediction = target.evaluateAsDouble(input); final double actual = label ? +1.0 : -1.0; final double margin = prediction * actual; if (margin > 0.0) { // Did not make an error. return; } // Add the new example. target.add(input, actual); // TODO: It may be possible to cache the norm to avoid the O(b^2) computation, // with b=budget, that occurs from this call to create an O(b) algorithm. // However, this algorithm doesn't perform very well, so it is not currently // worth the time to do the optimization. // -- jdbasil (2011-05-11) // Compute the norm. final double norm = KernelUtil.norm2(target); // We use the B + 1 in the following equations, so cache it. final double b = this.budget + 1; // Compute minimum of (b + 1)^(-1 / (2 * (b + 1)) and // U / ||f't|| to get the update, phi. final double r = Math.pow(b, -1.0 / (2.0 * b)); final double u = 0.25 * Math.sqrt(b / Math.log(b)); final double update = Math.min(r, u / norm); // Scale the sigmas by phi. KernelUtil.scaleEquals(target, update); // If we've violated the budget, remove the oldest. Normally this // loop is only executed once, though if the budget is changed, we // do it multiple times to bring the number of supports back down // to the budget. while (target.getExampleCount() > this.budget) { target.remove(0); } } } /** * An implementation of the "greedy" Forgetron algorithm, which is an online * budgeted kernel binary categorizer learner. It is an extension of the * "self-tuned" algorithm that * * @param <InputType> * The type of input to learn over. Passed to the kernel function. * @author Justin Basilico * @since 3.3.0 */ @PublicationReference( author={"Ofer Dekel", "Shai Shalev-Shwartz", "Yoram Singer"}, title="The Forgetron: A Kernel-based Perceptron on a Budget", year=2008, type=PublicationType.Journal, publication="SIAM Journal on Computing", pages={1342, 1372}, url="http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.134.7604&rep=rep1&type=pdf", notes="This is the greedy version.") public static class Greedy<InputType> extends Forgetron<InputType> { /** * Creates a new {@code Forgetron.Greedy} with a null kernel and default * budget. */ public Greedy() { this(null, DEFAULT_BUDGET); } /** * Creates a new {@code Forgetron.Greedy} with the given kernel and * budget. * * @param kernel * The kernel to use. * @param budget * The budget for the maximum number of supports. */ public Greedy( final Kernel<? super InputType> kernel, final int budget) { super(kernel, budget); } @Override protected void shrink( final Result<InputType> result) { // If we find an entry whose psi is below this threshold, starting // from the beginning, then we will remove that one rather than // doing a full shrinking using the oldest value. final double threshold = 15.0 / 32.0; // TODO: This is an O(b^2) implementation, where b is the budget of the // algorithm. It should be possible to do an O(b) implementation if the // f values are cached and updated whenever a support is added/removed or the // weights are scaled. // -- jdbasil (2011-05-11) int i = 0; for (DefaultWeightedValue<InputType> example : result.getExamples()) { final double weight = example.getWeight(); final double sigma = Math.abs(weight); final double y = weight >= 0.0 ? +1.0 : -1.0; final double f = result.evaluateAsDouble(example.getValue()); final double psi = sigma * sigma + 2.0 * sigma - 2.0 * sigma * y * f; if (psi <= threshold) { // Less then the threshold, so remove it and avoid doing // the shrinking step. result.remove(i); return; } i++; } // Do the normal shrinking step. super.shrink(result); /* // TODO: This is an O(b^2) implementation, where b is the budget of the // algorithm. It should be possible to do an O(b) implementation if the // f values are cached and updated whenever a support is added/removed or the // weights are scaled. // -- jdbasil (2011-05-11) int i = 0; int bestI = -1; double bestPsi = Double.POSITIVE_INFINITY; // Find the minimum value of psi for all of the supports. for (DefaultWeightedValue<InputType> example : supports) { final double weight = example.getWeight(); final double sigma = Math.abs(weight); final double y = weight >= 0.0 ? +1.0 : -1.0; final double f = result.evaluateAsDouble(example.getValue()); final double psi = sigma * sigma + 2.0 * sigma - 2.0 * sigma * y * f; if (psi < bestPsi) { bestI = i; bestPsi = psi; if (psi <= 15.0 / 32.0) { supports.remove(bestI); return; } } i++; } // If there is a psi who is less than 15/32, then we remove the // support for the smallest such entry. if (bestPsi <= 15.0 / 32.0) { // Found something less then the threshold, so remove it and // avoid doing the shrinking step. supports.remove(bestI); } else { // Nothing less than the threshold, so do the normal shrinking // step. super.shrink(result); } } */ } } }