/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept. This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit). http://www.cs.umass.edu/~mccallum/mallet This software is provided under the terms of the Common Public License, version 1.0, as published by http://www.opensource.org. For further information, see the file `LICENSE' included with this distribution. */ /** @author Andrew McCallum <a href="mailto:mccallum@cs.umass.edu">mccallum@cs.umass.edu</a> */ package cc.mallet.optimize.tests; import junit.framework.*; import java.util.logging.*; import java.io.*; import java.util.Random; import cc.mallet.classify.*; import cc.mallet.optimize.LineOptimizer; import cc.mallet.optimize.Optimizable; import cc.mallet.pipe.*; import cc.mallet.pipe.iterator.*; import cc.mallet.types.*; import cc.mallet.util.*; /** * Contains static methods for testing subclasses of * Maximizable and Maximizable.ByGradient. Especially * useful are methods that verify the consistency of the value * and gradient functions of an instance of * Maximizable.ByGradient. */ public class TestOptimizable extends TestCase { private static Logger logger = MalletLogger.getLogger(TestOptimizable.class.getName()); public TestOptimizable (String name) { super (name); } static private int numComponents = -1; /** * Sets the number of gradient components that will be checked. * If negative, all will be checked. */ public static void setNumComponents (int n) { numComponents = n; } /** * Tests that parameters set by setParameters can be retrieved by * getParameters. * @param maxable Instance of a Maximizable that should be tested. * Its current parameters will be overwritten. */ public static boolean testGetSetParameters (Optimizable maxable) { System.out.println ("TestMaximizable testGetSetParameters"); // Set all the parameters to unique values using setParameters() double[] parameters = new double [maxable.getNumParameters()]; maxable.getParameters (parameters); for (int i = 0; i < parameters.length; i++) parameters[i] = (double)i; maxable.setParameters (parameters); // Test to make sure those parameters are there MatrixOps.setAll (parameters, 0.0); maxable.getParameters (parameters); for (int i = 0; i < parameters.length; i++) assertTrue (parameters[i] == (double)i); return true; } public static double testValueAndGradientInDirection (Optimizable.ByGradientValue maxable, double[] direction) { int numParameters = maxable.getNumParameters(); assert (numParameters == direction.length); double[] oldParameters = new double[numParameters]; double[] parameters = new double[numParameters]; double[] normalizedDirection = direction.clone(); System.arraycopy(direction, 0, normalizedDirection, 0, numParameters); MatrixOps.absNormalize(normalizedDirection); double value = maxable.getValue(); // the gradient from the optimizable function double[] analyticGradient = new double[numParameters]; maxable.getParameters (parameters); maxable.getParameters (oldParameters); maxable.getValueGradient (analyticGradient); // the gradient calculate from the slope of the value // This setting of epsilon should make the individual elements of // the analytical gradient and the empirical gradient equal. This // simplifies the comparison of the individual dimensions of the // gradient and thus makes debugging easier. double directionGradient = MatrixOps.dotProduct (analyticGradient, normalizedDirection); double epsilon = 0.1 / MatrixOps.absNorm(analyticGradient); double tolerance = 0.00001 * directionGradient; // this was "epsilon * 5"; System.out.println ("epsilon = "+epsilon+" tolerance="+tolerance); MatrixOps.plusEquals (parameters, normalizedDirection, epsilon); //logger.fine ("Parameters:"); parameters.print(); maxable.setParameters (parameters); double epsValue = maxable.getValue(); double slope = (epsValue - value) / epsilon; System.out.println ("value="+value+" epsilon="+epsilon+" epsValue="+ epsValue+" slope = "+slope+" gradient="+directionGradient); assert (!Double.isNaN (slope)); double slopeDifference = Math.abs(slope - directionGradient); logger.info ("TestMaximizable "+ ": slope tolerance = "+tolerance+ ": gradient slope = "+directionGradient+ ", value+epsilon slope = "+slope+ ": slope difference = "+slopeDifference); maxable.setParameters (oldParameters); assert (Math.abs(slopeDifference) < tolerance) : "Slope difference "+slopeDifference+" is greater than tolerance "+tolerance; return slopeDifference; } /** * Tests that the value and gradient function are consistent * at the current parameters. * Computes both the analytic gradient (the one given by * <tt>maxable.getValueGradient</tt>) and the empirical gradient, * which is (if x are the current parameters and f the function * computed by maxable) <tt>f(x + epsilon) - f(x)</tt>. Verifies * that the angle between the empirical and analytic gradients * are close to 0. * @see #testValueAndGradient testValueAndGradient * @see #testValueAndGradientRandomParameters testValueAndGradientRandomParameters * @throws IllegalStateException If the angle is above the tolerance */ public static double testValueAndGradientCurrentParameters (Optimizable.ByGradientValue maxable) { double[] parameters = new double [maxable.getNumParameters()]; double value = maxable.getValue(); // the gradient from the maximizable function double[] analyticGradient = new double[maxable.getNumParameters()]; double[] empiricalGradient = new double[maxable.getNumParameters()]; maxable.getParameters (parameters); maxable.getValueGradient (analyticGradient); // the gradient calculate from the slope of the value maxable.getValueGradient (empiricalGradient); // This setting of epsilon should make the individual elements of // the analytical gradient and the empirical gradient equal. This // simplifies the comparison of the individual dimensions of the // gradient and thus makes debugging easier. // cas: However, avoid huge epsilon if norm of analytic gradient is // close to 0. // Next line used to be: double norm = Math.max (0.1, MatrixOps.twoNorm(analyticGradient)); // but if all the components of the analyticalGradient are very small, the squaring in the // twoNorm causes epsilon to be too large. -AKM double norm = Math.max (0.1, MatrixOps.absNorm(analyticGradient)); double epsilon = 0.1 / norm; double tolerance = epsilon * 5; System.out.println ("epsilon = "+epsilon+" tolerance="+tolerance); int sampleParameterInterval = -1; if (numComponents > 0) { sampleParameterInterval = Math.max (1, parameters.length / numComponents); logger.info ("Will check every "+sampleParameterInterval+"-th component."); } // Check each direction, perturb it, measure new value, and make // sure it agrees with the gradient from // maxable.getValueGradient() for (int i = 0; i < parameters.length; i++) { // { int i = 0; // Uncomment this line to debug one parameter at a time -cas if ((parameters.length >= sampleParameterInterval) && (i % sampleParameterInterval != 0)) continue; double param = parameters[i]; parameters[i] = param + epsilon; //logger.fine ("Parameters:"); parameters.print(); maxable.setParameters (parameters); double epsValue = maxable.getValue(); double slope = (epsValue - value) / epsilon; System.out.println ("value="+value+" epsValue="+epsValue+" slope["+i+"] = "+slope+" gradient[]="+analyticGradient[i]); assert (!Double.isNaN (slope)); logger.info ("TestMaximizable checking singleIndex "+i+ ": gradient slope = "+analyticGradient[i]+ ", value+epsilon slope = "+slope+ ": slope difference = "+(slope - analyticGradient[i])); // No negative below because the gradient points in the direction // of maximizing the function. empiricalGradient[i] = slope; parameters[i] = param; } // Normalize the matrices to have the same L2 length System.out.println ("analyticGradient.twoNorm = "+ MatrixOps.twoNorm(analyticGradient)); System.out.println ("empiricalGradient.twoNorm = "+ MatrixOps.twoNorm(empiricalGradient)); MatrixOps.timesEquals (analyticGradient, 1.0/MatrixOps.twoNorm(analyticGradient)); MatrixOps.timesEquals (empiricalGradient, 1.0/MatrixOps.twoNorm(empiricalGradient)); /* System.out.println("N ANA EMP"); for (int i = 0; i < analyticGradient.length; i++) { System.out.println(i+" "+analyticGradient[i]+" "+empiricalGradient[i]); } */ // Return the angle between the two vectors, in radians double dot = MatrixOps.dotProduct (analyticGradient,empiricalGradient); if (Maths.almostEquals (dot, 1.0)) { logger.info ("TestMaximizable angle is zero."); return 0.0; } else { double angle = Math.acos (dot); logger.info ("TestMaximizable angle = "+angle); if (Math.abs(angle) > tolerance) throw new IllegalStateException ("Gradient/Value mismatch: angle="+ angle + " tol: " + tolerance); if (Double.isNaN (angle)) throw new IllegalStateException ("Gradient/Value error: angle is NaN!"); return angle; } } /** * Tests that getValue and getValueGradient are consistent. * Tests for consistency at <tt>params = 0</tt> and at * <tt> params = -0.0001 * grad(f)</tt> * @see #testValueAndGradientCurrentParameters testValueAndGradientCurrentParameters * @throws IllegalStateException If the test fails. */ public static boolean testValueAndGradient (Optimizable.ByGradientValue maxable) { double[] parameters = new double [maxable.getNumParameters()]; MatrixOps.setAll (parameters, 0.0); maxable.setParameters (parameters); testValueAndGradientCurrentParameters (maxable); MatrixOps.setAll (parameters, 0.0); double[] delta = new double[maxable.getNumParameters()]; maxable.getValueGradient (delta); logger.info ("Gradient two-Norm = "+MatrixOps.twoNorm(delta)); logger.info (" max parameter change = "+(MatrixOps.infinityNorm(delta) * -0.001)); MatrixOps.timesEquals (delta, -0.0001); MatrixOps.plusEquals (parameters, delta); maxable.setParameters (parameters); testValueAndGradientCurrentParameters (maxable); return true; } /** * Tests that getValue and getValueGradient are consistent * at a random parameter setting. * @see #testValueAndGradientCurrentParameters testValueAndGradientCurrentParameters * @throws IllegalStateException If the test fails. */ public static boolean testValueAndGradientRandomParameters (Optimizable.ByGradientValue maxable, Random r) { double[] params = new double [maxable.getNumParameters()]; for (int i = 0; i < params.length; i++) { params[i] = r.nextDouble (); if (r.nextBoolean ()) params [i] = -params[i]; } maxable.setParameters (params); testValueAndGradientCurrentParameters (maxable); return true; } // Maximizable for 3x^2 - 5x + 2 static class SimplePoly implements Optimizable.ByGradientValue { double[] params = new double [1]; public void getParameters(double[] doubleArray) { doubleArray [0] = params [0]; } public int getNumParameters() { return 1; } public double getParameter(int n) { return params [0]; }; public void setParameters(double[] doubleArray) { params [0] = doubleArray [0]; } public void setParameter(int n, double d) { params[n] = d; } public double getValue () { return 3*params[0]*params[0] - 5 * params[0] + 2; } public void getValueGradient (double[] buffer) { buffer [0] = 3*params [0] - 5; } } static class WrongSimplePoly extends SimplePoly { public void getValueGradient (double[] buffer) { buffer [0] = 3*params [0]; // WRONG: Missing -5 } } public void testTestValueAndGradient () { SimplePoly maxable = new SimplePoly (); testValueAndGradient (maxable); try { WrongSimplePoly badMaxable = new WrongSimplePoly (); testValueAndGradient (badMaxable); fail ("WrongSimplyPoly should fail testMaxmiziable!"); } catch (Exception e) {} } public static Test suite () { return new TestSuite (TestOptimizable.class); } protected void setUp () { } public static void main (String[] args) { junit.textui.TestRunner.run (suite()); } }