Forgetron.java example

Explorer
Foundry-master
- Components
/*
 * File:                Forgetron.java
 * Authors:             Justin Basilico
 * Company:             Sandia National Laboratories
 * Project:             Cognitive Foundry Learning Core
 * 
 * Copyright May 10, 2011, Sandia Corporation.
 * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive 
 * license for use of this work by or on behalf of the U.S. Government. Export 
 * of this program may require a license from the United States Government.
 *
 */

package gov.sandia.cognition.learning.algorithm.perceptron.kernel;

import gov.sandia.cognition.annotation.PublicationReference;
import gov.sandia.cognition.annotation.PublicationType;
import gov.sandia.cognition.learning.function.categorization.DefaultKernelBinaryCategorizer;
import gov.sandia.cognition.learning.function.kernel.Kernel;
import gov.sandia.cognition.learning.function.kernel.KernelUtil;
import gov.sandia.cognition.util.DefaultWeightedValue;
import gov.sandia.cognition.util.WeightedValue;
import java.util.LinkedList;

/**
 * An implementation of the "self-tuned" Forgetron algorithm, which is an online
 * budgeted kernel binary categorizer learner.
 *
 * Note that this class requires its own extension of the
 * {@code DefaultKernelBinaryCategorizer} to keep some extra information about
 * learning.
 *
 * @param   <InputType>
 *      The type of input to learn over. Passed to the kernel function.
 * @author  Justin Basilico
 * @since   3.3.0
 */
@PublicationReference(
    author={"Ofer Dekel", "Shai Shalev-Shwartz", "Yoram Singer"},
    title="The Forgetron: A Kernel-based Perceptron on a Budget",
    year=2008,
    type=PublicationType.Journal,
    publication="SIAM Journal on Computing",
    pages={1342, 1372},
    url="http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.134.7604&rep=rep1&type=pdf",
    notes="This is the self-tuned version.")
public class Forgetron<InputType>
    extends AbstractOnlineBudgetedKernelBinaryCategorizerLearner<InputType>
{
    
    /**
     * Creates a new {@code Forgetron} with a null kernel and the default
     * budget.
     */
    public Forgetron()
    {
        this(null, DEFAULT_BUDGET);
    }

    /**
     * Creates a new {@code Forgetron} with the given kernel and budget.
     *
     * @param   kernel
     *      The kernel to use.
     * @param   budget
     *      The budget for the maximum number of supports.
     */
    public Forgetron(
        final Kernel<? super InputType> kernel,
        final int budget)
    {
        super(kernel, budget);
    }

    @Override
    public DefaultKernelBinaryCategorizer<InputType> createInitialLearnedObject()
    {
        // We need to use a special result type to keep track of the error
        // count and Q.
        return new Result<InputType>(this.getKernel());
    }

    @Override
    public void update(
        final DefaultKernelBinaryCategorizer<InputType> target,
        final InputType input,
        final boolean label)
    {
        // Predict the output as a double (negative values are false, positive
        // are true).
        final double prediction = target.evaluateAsDouble(input);
        final double actual = label ? +1.0 : -1.0;
        final double margin = prediction * actual;

        if (margin > 0.0)
        {
            // Did not make an error.
            return;
        }

        // Cast the result type.
        Result<InputType> result = (Result<InputType>) target;

        // We've made an error.
        result.errorCount++;

        // Add the new example with a sigma of 1. This value will be adjsuted
        // later, if needed.
        result.add(input, actual);
        
        while (target.getExampleCount() > this.getBudget())
        {
            // We've violated the budget. Normally this loop is only executed 
            // once, though if the budget is changed, we do it multiple times
            // to bring the number of supports back down to the budget.
            shrink(result);
        }

    }

    /**
     * Apply the shrinking step of the algorithm.
     *
     * @param   result
     *      The shrinking step.
     */
    protected void shrink(
        final Result<InputType> result)
    {
        // Get the oldest support. We will later remove it.
        final WeightedValue<InputType> oldest = result.get(0);
        // Get the weight of the oldest, its sigma value (remove the
        // sign, which encodes the label), and its label.
        final double oldestWeight = oldest.getWeight();
        double sigmaOldest = Math.abs(oldestWeight);
        final double yOldest = oldestWeight >= 0.0 ? +1.0 : -1.0;
        // Evaluate the function on the oldest.
        double fOldest = result.evaluateAsDouble(oldest.getValue());
        // Compute the a, b, c, and d values to figure out how to make the
        // update.
        final double a =
            sigmaOldest * sigmaOldest - 2.0 * sigmaOldest * yOldest * fOldest;
        final double b = 2.0 * sigmaOldest;
        final double c = result.q - 15.0 / 32.0 * result.errorCount;
        final double d = b * b - 4.0 * a * c;
        // Now compute the update value (phi) that all of the weights will
        // be scaled by.
        final double update;
        if (a > 0.0 ||
            (a < 0.0 && d > 0.0 &&
            (-b - Math.sqrt(d)) / (2.0 * a) > 1.0))
        {
            update =
                Math.min(1.0, (-b + Math.sqrt(d)) / (2.0 * a));
        }
        else if (a == 0.0)
        {
            update = Math.min(1.0, -c / b);
        }
        else
        {
            // Remove-oldest Perceptron update.
            update = 1.0;
        }
        // Perform the scaling as long as the update is not 1.0.
        if (update != 1.0)
        {
            KernelUtil.scaleEquals(result, update);
        } // the oldest instead of recomputing fOldest.
        sigmaOldest *= update;
        fOldest *= update;
        result.q +=
            sigmaOldest * sigmaOldest + 2.0 * sigmaOldest -
            2.0 * sigmaOldest * yOldest * fOldest;
        // Remove the oldest.
        result.remove(0);
    }

    /**
     * The result object learned by the {@code Forgetron}, which extends
     * the {@code DefaultKernelBinaryCategorizer} with some additional state
     * information needed in the update step.
     *
     * @param   <InputType>
     *      The input type to categorize, which is passed to the kernel
     *      function.
     */
    public static class Result<InputType>
        extends DefaultKernelBinaryCategorizer<InputType>
    {

        /** The number of errors that the categorizer has made in the learning
         *  step. */
        protected long errorCount;

        /** The value of Q for the algorithm. */
        protected double q;

        /**
         * Creates a new {@code Result} with a null kernel.
         */
        public Result()
        {
            this(null);
        }

        /**
         * Creates a new {@code Result} with the given kernel.
         *
         * @param   kernel
         *      The kernel to use.
         */
        public Result(
            final Kernel<? super InputType> kernel)
        {
            // Use a linked list to deal with the fact that we're going to
            // enforce a budget and thus will continuously remove the first
            // element when we need to add new supports.
            super(kernel, new LinkedList<DefaultWeightedValue<InputType>>(),
                0.0);

            this.setErrorCount(errorCount);
            this.setQ(q);
        }

        /**
         * Gets the error count.
         *
         * @return
         *      The error count.
         */
        public long getErrorCount()
        {
            return this.errorCount;
        }

        /**
         * Sets the error count.
         *
         * @param   errorCount
         *      The error count.
         */
        protected void setErrorCount(
            final long errorCount)
        {
            this.errorCount = errorCount;
        }

        /**
         * Gets the value Q updated by the algorithm.
         *
         * @return
         *      The Q value.
         */
        protected double getQ()
        {
            return this.q;
        }

        /**
         * Gets the value Q updated by the algorithm.
         *
         * @param   q
         *      The Q value.
         */
        protected void setQ(
            final double q)
        {
            this.q = q;
        }

    }

    /**
     * An implementation of the "basic" Forgetron algorithm, which is an online
     * budgeted kernel binary categorizer learner. Note that this appears to
     * be somewhat for illustrative purposes only since it uses the worst-case
     * definition of the shrinking coefficients and thus tends to perform
     * poorly.
     *
     * @param   <InputType>
     *      The type of input to learn over. Passed to the kernel function.
     * @author  Justin Basilico
     * @since   3.3.0
     */
    @PublicationReference(
        author={"Ofer Dekel", "Shai Shalev-Shwartz", "Yoram Singer"},
        title="The Forgetron: A Kernel-based Perceptron on a Budget",
        year=2008,
        type=PublicationType.Journal,
        publication="SIAM Journal on Computing",
        pages={1342, 1372},
        url="http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.134.7604&rep=rep1&type=pdf",
        notes="This is the basic version.")
    public static class Basic<InputType>
        extends AbstractOnlineBudgetedKernelBinaryCategorizerLearner<InputType>
    {

        /**
         * Creates a new {@code Forgetron.Basic} with a null kernel and default
         * budget.
         */
        public Basic()
        {
            this(null, DEFAULT_BUDGET);
        }

        /**
         * Creates a new {@code Forgetron.Basic} with the given kernel and
         * budget.
         * 
         * @param   kernel
         *      The kernel to use.
         * @param   budget
         *      The budget for the maximum number of supports.
         */
        public Basic(
            final Kernel<? super InputType> kernel,
            final int budget)
        {
            super(kernel, budget);
        }

        @Override
        public DefaultKernelBinaryCategorizer<InputType> createInitialLearnedObject()
        {
            // Use a linked list underneath to make sure removing the oldest element
            // is fast.
            return new DefaultKernelBinaryCategorizer<InputType>(
                this.getKernel(),
                new LinkedList<DefaultWeightedValue<InputType>>(), 0.0);
        }

        @Override
        public void update(
            final DefaultKernelBinaryCategorizer<InputType> target,
            final InputType input,
            final boolean label)
        {
            // Predict the output as a double (negative values are false, positive
            // are true).
            final double prediction = target.evaluateAsDouble(input);
            final double actual = label ? +1.0 : -1.0;
            final double margin = prediction * actual;

            if (margin > 0.0)
            {
                // Did not make an error.
                return;
            }

            // Add the new example.
            target.add(input, actual);

// TODO: It may be possible to cache the norm to avoid the O(b^2) computation,
// with b=budget, that occurs from this call to create an O(b) algorithm.
// However, this algorithm doesn't perform very well, so it is not currently
// worth the time to do the optimization.
// -- jdbasil (2011-05-11)
            // Compute the norm.
            final double norm = KernelUtil.norm2(target);

            // We use the B + 1 in the following equations, so cache it.
            final double b = this.budget + 1;

            // Compute minimum of (b + 1)^(-1 / (2 * (b + 1)) and
            // U / ||f't|| to get the update, phi.
            final double r = Math.pow(b, -1.0 / (2.0 * b));
            final double u = 0.25 * Math.sqrt(b / Math.log(b));
            final double update = Math.min(r, u / norm);

            // Scale the sigmas by phi.
            KernelUtil.scaleEquals(target, update);

            // If we've violated the budget, remove the oldest. Normally this
            // loop is only executed once, though if the budget is changed, we
            // do it multiple times to bring the number of supports back down
            // to the budget.
            while (target.getExampleCount() > this.budget)
            {
                target.remove(0);
            }

        }

    }

    /**
     * An implementation of the "greedy" Forgetron algorithm, which is an online
     * budgeted kernel binary categorizer learner. It is an extension of the
     * "self-tuned" algorithm that
     *
     * @param   <InputType>
     *      The type of input to learn over. Passed to the kernel function.
     * @author  Justin Basilico
     * @since   3.3.0
     */
    @PublicationReference(
        author={"Ofer Dekel", "Shai Shalev-Shwartz", "Yoram Singer"},
        title="The Forgetron: A Kernel-based Perceptron on a Budget",
        year=2008,
        type=PublicationType.Journal,
        publication="SIAM Journal on Computing",
        pages={1342, 1372},
        url="http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.134.7604&rep=rep1&type=pdf",
        notes="This is the greedy version.")
    public static class Greedy<InputType>
        extends Forgetron<InputType>
    {

        /**
         * Creates a new {@code Forgetron.Greedy} with a null kernel and default
         * budget.
         */
        public Greedy()
        {
            this(null, DEFAULT_BUDGET);
        }

        /**
         * Creates a new {@code Forgetron.Greedy} with the given kernel and
         * budget.
         *
         * @param   kernel
         *      The kernel to use.
         * @param   budget
         *      The budget for the maximum number of supports.
         */
        public Greedy(
            final Kernel<? super InputType> kernel,
            final int budget)
        {
            super(kernel, budget);
        }
        
        @Override
        protected void shrink(
            final Result<InputType> result)
        {
            // If we find an entry whose psi is below this threshold, starting
            // from the beginning, then we will remove that one rather than
            // doing a full shrinking using the oldest value.
            final double threshold = 15.0 / 32.0;

// TODO: This is an O(b^2) implementation, where b is the budget of the
// algorithm. It should be possible to do an O(b) implementation if the
// f values are cached and updated whenever a support is added/removed or the
// weights are scaled.
// -- jdbasil (2011-05-11)
            int i = 0;
            for (DefaultWeightedValue<InputType> example : result.getExamples())
            {
                final double weight = example.getWeight();
                final double sigma = Math.abs(weight);
                final double y = weight >= 0.0 ? +1.0 : -1.0;
                final double f = result.evaluateAsDouble(example.getValue());
                final double psi =
                    sigma * sigma + 2.0 * sigma - 2.0 * sigma * y * f;

                if (psi <= threshold)
                {
                    // Less then the threshold, so remove it and avoid doing
                    // the shrinking step.
                    result.remove(i);
                    return;
                }
                i++;
            }

            // Do the normal shrinking step.
            super.shrink(result);


/*

// TODO: This is an O(b^2) implementation, where b is the budget of the
// algorithm. It should be possible to do an O(b) implementation if the
// f values are cached and updated whenever a support is added/removed or the
// weights are scaled.
// -- jdbasil (2011-05-11)
            int i = 0;
            int bestI = -1;
            double bestPsi = Double.POSITIVE_INFINITY;

            // Find the minimum value of psi for all of the supports.
            for (DefaultWeightedValue<InputType> example : supports)
            {
                final double weight = example.getWeight();
                final double sigma = Math.abs(weight);
                final double y = weight >= 0.0 ? +1.0 : -1.0;
                final double f = result.evaluateAsDouble(example.getValue());
                final double psi =
                    sigma * sigma + 2.0 * sigma - 2.0 * sigma * y * f;

                if (psi < bestPsi)
                {
                    bestI = i;
                    bestPsi = psi;

                    if (psi <= 15.0 / 32.0)
                    {
                        supports.remove(bestI);
                        return;
                    }
                }

                i++;
            }

            // If there is a psi who is less than 15/32, then we remove the
            // support for the smallest such entry.
            if (bestPsi <= 15.0 / 32.0)
            {
                // Found something less then the threshold, so remove it and
                // avoid doing the shrinking step.
                supports.remove(bestI);
            }
            else
            {
                // Nothing less than the threshold, so do the normal shrinking
                // step.
                super.shrink(result);
            }
        }
 */
        }

    }

}