/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept. This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit). http://www.cs.umass.edu/~mccallum/mallet This software is provided under the terms of the Common Public License, version 1.0, as published by http://www.opensource.org. For further information, see the file `LICENSE' included with this distribution. */ package cc.mallet.classify; import java.util.logging.*; import java.util.*; import java.io.*; import cc.mallet.classify.Classifier; import cc.mallet.optimize.LimitedMemoryBFGS; import cc.mallet.optimize.Optimizable; import cc.mallet.optimize.Optimizer; import cc.mallet.optimize.tests.*; import cc.mallet.pipe.Pipe; import cc.mallet.types.Alphabet; import cc.mallet.types.ExpGain; import cc.mallet.types.FeatureInducer; import cc.mallet.types.FeatureSelection; import cc.mallet.types.FeatureVector; import cc.mallet.types.GradientGain; import cc.mallet.types.InfoGain; import cc.mallet.types.Instance; import cc.mallet.types.InstanceList; import cc.mallet.types.Label; import cc.mallet.types.LabelAlphabet; import cc.mallet.types.LabelVector; import cc.mallet.types.Labeling; import cc.mallet.types.MatrixOps; import cc.mallet.types.RankedFeatureVector; import cc.mallet.types.Vector; import cc.mallet.util.CommandOption; import cc.mallet.util.MalletLogger; import cc.mallet.util.MalletProgressMessageLogger; import cc.mallet.util.Maths; // Does not currently handle instances that are labeled with distributions // instead of a single label. /** * The trainer for a Maximum Entropy classifier. @author Andrew McCallum <a href="mailto:mccallum@cs.umass.edu">mccallum@cs.umass.edu</a> */ public class MCMaxEntTrainer extends ClassifierTrainer<MCMaxEnt> implements Boostable, Serializable //implements CommandOption.ListProviding { private static Logger logger = MalletLogger.getLogger(MCMaxEntTrainer.class.getName()); private static Logger progressLogger = MalletProgressMessageLogger.getLogger(MCMaxEntTrainer.class.getName()+"-pl"); int numGetValueCalls = 0; int numGetValueGradientCalls = 0; int numIterations = 10; public static final String EXP_GAIN = "exp"; public static final String GRADIENT_GAIN = "grad"; public static final String INFORMATION_GAIN = "info"; // xxx Why does TestMaximizable fail when this variance is very small? static final double DEFAULT_GAUSSIAN_PRIOR_VARIANCE = .1; // note used to be 1 static final double DEFAULT_HYPERBOLIC_PRIOR_SLOPE = 0.2; static final double DEFAULT_HYPERBOLIC_PRIOR_SHARPNESS = 10.0; static final Class DEFAULT_MAXIMIZER_CLASS = LimitedMemoryBFGS.class; // CPAL boolean usingMultiConditionalTraining = true; boolean usingHyperbolicPrior = false; double gaussianPriorVariance = DEFAULT_GAUSSIAN_PRIOR_VARIANCE; double hyperbolicPriorSlope = DEFAULT_HYPERBOLIC_PRIOR_SLOPE; double hyperbolicPriorSharpness = DEFAULT_HYPERBOLIC_PRIOR_SHARPNESS; Class maximizerClass = DEFAULT_MAXIMIZER_CLASS; double generativeWeighting = 1.0; MaximizableTrainer mt; MCMaxEnt initialClassifier; // CPAL static CommandOption.Boolean usingMultiConditionalTrainingOption = new CommandOption.Boolean (MCMaxEntTrainer.class, "useMCTraining", "true|false", true, true, "Use MultiConditional Training", null); static CommandOption.Boolean usingHyperbolicPriorOption = new CommandOption.Boolean (MCMaxEntTrainer.class, "useHyperbolicPrior", "true|false", false, false, "Use hyperbolic (close to L1 penalty) prior over parameters", null); static CommandOption.Double gaussianPriorVarianceOption = new CommandOption.Double (MCMaxEntTrainer.class, "gaussianPriorVariance", "FLOAT", true, 10.0, "Variance of the gaussian prior over parameters", null); static CommandOption.Double hyperbolicPriorSlopeOption = new CommandOption.Double (MCMaxEntTrainer.class, "hyperbolicPriorSlope", "FLOAT", true, 0.2, "Slope of the (L1 penalty) hyperbolic prior over parameters", null); static CommandOption.Double hyperbolicPriorSharpnessOption = new CommandOption.Double (MCMaxEntTrainer.class, "hyperbolicPriorSharpness", "FLOAT", true, 10.0, "Sharpness of the (L1 penalty) hyperbolic prior over parameters", null); static final CommandOption.List commandOptions = new CommandOption.List ( "MCMaximum Entropy Classifier", new CommandOption[] { usingHyperbolicPriorOption, gaussianPriorVarianceOption, hyperbolicPriorSlopeOption, hyperbolicPriorSharpnessOption, usingMultiConditionalTrainingOption, // CPAL }); public static CommandOption.List getCommandOptionList () { return commandOptions; } /* public MCMaxEntTrainer(Maximizer.ByGradient maximizer) { this.maximizerByGradient = maximizer; this.usingHyperbolicPrior = false; } */ public MCMaxEntTrainer (CommandOption.List col) { this.usingHyperbolicPrior = usingHyperbolicPriorOption.value; this.gaussianPriorVariance = gaussianPriorVarianceOption.value; this.hyperbolicPriorSlope = hyperbolicPriorSlopeOption.value; this.hyperbolicPriorSharpness = hyperbolicPriorSharpnessOption.value; this.usingMultiConditionalTraining = usingMultiConditionalTrainingOption.value; } public MCMaxEntTrainer (MCMaxEnt initialClassifier) { this.initialClassifier = initialClassifier; } public MCMaxEntTrainer () { this (false); } public MCMaxEntTrainer (boolean useHyperbolicPrior) { this.usingHyperbolicPrior = useHyperbolicPrior; } /** Constructs a trainer with a parameter to avoid overtraining. 1.0 is * usually a reasonable default value. */ public MCMaxEntTrainer (double gaussianPriorVariance) { this.usingHyperbolicPrior = false; this.gaussianPriorVariance = gaussianPriorVariance; } // CPAL - added this to do MultiConditionalTraining public MCMaxEntTrainer (double gaussianPriorVariance, boolean useMultiConditionalTraining ) { this.usingHyperbolicPrior = false; this.usingMultiConditionalTraining = useMultiConditionalTraining; this.gaussianPriorVariance = gaussianPriorVariance; } public MCMaxEntTrainer (double hyperbolicPriorSlope, double hyperbolicPriorSharpness) { this.usingHyperbolicPrior = true; this.hyperbolicPriorSlope = hyperbolicPriorSlope; this.hyperbolicPriorSharpness = hyperbolicPriorSharpness; } public Optimizable.ByGradientValue getMaximizableTrainer (InstanceList ilist) { if (ilist == null) return new MaximizableTrainer (); return new MaximizableTrainer (ilist, null); } /** * Specifies the maximum number of iterations to run during a single call * to <code>train</code> or <code>trainWithFeatureInduction</code>. Not * currently functional. * @return This trainer */ // XXX Since we maximize before using numIterations, this doesn't work. // Is that a bug? If so, should the default numIterations be higher? public MCMaxEntTrainer setNumIterations (int i) { numIterations = i; return this; } public MCMaxEntTrainer setUseHyperbolicPrior (boolean useHyperbolicPrior) { this.usingHyperbolicPrior = useHyperbolicPrior; return this; } /** * Sets a parameter to prevent overtraining. A smaller variance for the prior * means that feature weights are expected to hover closer to 0, so extra * evidence is required to set a higher weight. * @return This trainer */ public MCMaxEntTrainer setGaussianPriorVariance (double gaussianPriorVariance) { this.usingHyperbolicPrior = false; this.gaussianPriorVariance = gaussianPriorVariance; return this; } public MCMaxEntTrainer setHyperbolicPriorSlope(double hyperbolicPriorSlope) { this.usingHyperbolicPrior = true; this.hyperbolicPriorSlope = hyperbolicPriorSlope; return this; } public MCMaxEntTrainer setHyperbolicPriorSharpness (double hyperbolicPriorSharpness) { this.usingHyperbolicPrior = true; this.hyperbolicPriorSharpness = hyperbolicPriorSharpness; return this; } public MCMaxEnt getClassifier () { return mt.getClassifier(); } public MCMaxEnt train (InstanceList trainingSet) { logger.fine ("trainingSet.size() = "+trainingSet.size()); mt = new MaximizableTrainer (trainingSet, (MCMaxEnt)initialClassifier); Optimizer maximizer = new LimitedMemoryBFGS(mt); // CPAL - change the tolerance for large vocab experiments ((LimitedMemoryBFGS)maximizer).setTolerance(.00001); // std is .0001; maximizer.optimize (); // XXX given the loop below, this seems wrong. logger.info("MCMaxEnt ngetValueCalls:"+getValueCalls()+"\nMCMaxEnt ngetValueGradientCalls:"+getValueGradientCalls()); // boolean converged; // // for (int i = 0; i < numIterations; i++) { // converged = maximizer.maximize (mt, 1); // if (converged) // break; // else if (evaluator != null) // if (!evaluator.evaluate (mt.getClassifier(), converged, i, mt.getValue(), // trainingSet, validationSet, testSet)) // break; // } // TestMaximizable.testValueAndGradient (mt); progressLogger.info("\n"); // progess messages are on one line; move on. return mt.getClassifier (); } /** * <p>Like the other version of <code>trainWithFeatureInduction</code>, but * allows some default options to be changed.</p> * * @param maxent An initial partially-trained classifier (default <code>null</code>). * This classifier may be modified during training. * @param gainName The estimate of gain (log-likelihood increase) we want our chosen * features to maximize. * Should be one of <code>MaxEntTrainer.EXP_GAIN</code>, * <code>MaxEntTrainer.GRADIENT_GAIN</code>, or * <code>MaxEntTrainer.INFORMATION_GAIN</code> (default <code>EXP_GAIN</code>). * * @return The trained <code>MaxEnt</code> classifier */ /* public Classifier trainWithFeatureInduction (InstanceList trainingData, InstanceList validationData, InstanceList testingData, ClassifierEvaluating evaluator, MCMaxEnt maxent, int totalIterations, int numIterationsBetweenFeatureInductions, int numFeatureInductions, int numFeaturesPerFeatureInduction, String gainName) { // XXX This ought to be a parameter, except that setting it to true can // crash training ("Jump too small"). boolean saveParametersDuringFI = false; Alphabet inputAlphabet = trainingData.getDataAlphabet(); Alphabet outputAlphabet = trainingData.getTargetAlphabet(); if (maxent == null) maxent = new MCMaxEnt(trainingData.getPipe(), new double[(1+inputAlphabet.size()) * outputAlphabet.size()]); int trainingIteration = 0; int numLabels = outputAlphabet.size(); // Initialize feature selection FeatureSelection globalFS = trainingData.getFeatureSelection(); if (globalFS == null) { // Mask out all features; some will be added later by FeatureInducer.induceFeaturesFor(.) globalFS = new FeatureSelection (trainingData.getDataAlphabet()); trainingData.setFeatureSelection (globalFS); } if (validationData != null) validationData.setFeatureSelection (globalFS); if (testingData != null) testingData.setFeatureSelection (globalFS); maxent = new MCMaxEnt(maxent.getInstancePipe(), maxent.getParameters(), globalFS); // Run feature induction for (int featureInductionIteration = 0; featureInductionIteration < numFeatureInductions; featureInductionIteration++) { // Print out some feature information logger.info ("Feature induction iteration "+featureInductionIteration); // Train the model a little bit. We don't care whether it converges; we // execute all feature induction iterations no matter what. if (featureInductionIteration != 0) { // Don't train until we have added some features setNumIterations(numIterationsBetweenFeatureInductions); maxent = (MCMaxEnt)this.train (trainingData, validationData, testingData, evaluator, maxent); } trainingIteration += numIterationsBetweenFeatureInductions; logger.info ("Starting feature induction with "+(1+inputAlphabet.size())+ " features over "+numLabels+" labels."); // Create the list of error tokens InstanceList errorInstances = new InstanceList (trainingData.getDataAlphabet(), trainingData.getTargetAlphabet()); // This errorInstances.featureSelection will get examined by FeatureInducer, // so it can know how to add "new" singleton features errorInstances.setFeatureSelection (globalFS); List errorLabelVectors = new ArrayList(); // these are length-1 vectors for (int i = 0; i < trainingData.size(); i++) { Instance instance = trainingData.get(i); FeatureVector inputVector = (FeatureVector) instance.getData(); Label trueLabel = (Label) instance.getTarget(); // Having trained using just the current features, see how we classify // the training data now. Classification classification = maxent.classify(instance); if (!classification.bestLabelIsCorrect()) { errorInstances.add(inputVector, trueLabel, null, null); errorLabelVectors.add(classification.getLabelVector()); } } logger.info ("Error instance list size = "+errorInstances.size()); int s = errorLabelVectors.size(); LabelVector[] lvs = new LabelVector[s]; for (int i = 0; i < s; i++) { lvs[i] = (LabelVector)errorLabelVectors.get(i); } RankedFeatureVector.Factory gainFactory = null; if (gainName.equals (EXP_GAIN)) gainFactory = new ExpGain.Factory (lvs, gaussianPriorVariance); else if (gainName.equals(GRADIENT_GAIN)) gainFactory = new GradientGain.Factory (lvs); else if (gainName.equals(INFORMATION_GAIN)) gainFactory = new InfoGain.Factory (); else throw new IllegalArgumentException("Unsupported gain name: "+gainName); FeatureInducer klfi = new FeatureInducer (gainFactory, errorInstances, numFeaturesPerFeatureInduction, 2*numFeaturesPerFeatureInduction, 2*numFeaturesPerFeatureInduction); // Note that this adds features globally, but not on a per-transition basis klfi.induceFeaturesFor (trainingData, false, false); if (testingData != null) klfi.induceFeaturesFor (testingData, false, false); logger.info ("MCMaxEnt FeatureSelection now includes "+globalFS.cardinality()+" features"); klfi = null; double[] newParameters = new double[(1+inputAlphabet.size()) * outputAlphabet.size()]; // XXX (Executing this block often causes an error during training; I don't know why.) if (saveParametersDuringFI) { // Keep current parameter values // XXX This relies on the implementation detail that the most recent features // added to an Alphabet get the highest indices. // Count parameters per output label int oldParamCount = maxent.parameters.length / outputAlphabet.size(); int newParamCount = 1+inputAlphabet.size(); // Copy params into the proper locations for (int i=0; i<outputAlphabet.size(); i++) { System.arraycopy(maxent.parameters, i*oldParamCount, newParameters, i*newParamCount, oldParamCount); } for (int i=0; i<oldParamCount; i++) if (maxent.parameters[i] != newParameters[i]) { System.out.println(maxent.parameters[i]+" "+newParameters[i]); System.exit(0); } } maxent.parameters = newParameters; maxent.defaultFeatureIndex = inputAlphabet.size(); } // Finished feature induction logger.info("Ended with "+globalFS.cardinality()+" features."); setNumIterations(totalIterations - trainingIteration); return this.train (trainingData, validationData, testingData, evaluator, maxent); } */ // XXX Should these really be public? Why? /** Counts how many times this trainer has computed the gradient of the * log probability of training labels. */ public int getValueGradientCalls() {return numGetValueGradientCalls;} /** Counts how many times this trainer has computed the * log probability of training labels. */ public int getValueCalls() {return numGetValueCalls;} // public int getIterations() {return maximizerByGradient.getIterations();} public String toString() { return "MCMaxEntTrainer" // + "("+maximizerClass.getName()+") " + ",numIterations=" + numIterations + (usingHyperbolicPrior ? (",hyperbolicPriorSlope="+hyperbolicPriorSlope+ ",hyperbolicPriorSharpness="+hyperbolicPriorSharpness) : (",gaussianPriorVariance="+gaussianPriorVariance)); } // A private inner class that wraps up a MCMaxEnt classifier and its training data. // The result is a maximize.Maximizable function. private class MaximizableTrainer implements Optimizable.ByGradientValue { double[] parameters, constraints, cachedGradient; MCMaxEnt theClassifier; InstanceList trainingList; // The expectations are (temporarily) stored in the cachedGradient double cachedValue; boolean cachedValueStale; boolean cachedGradientStale; int numLabels; int numFeatures; int defaultFeatureIndex; // just for clarity FeatureSelection featureSelection; FeatureSelection[] perLabelFeatureSelection; public MaximizableTrainer (){} public MaximizableTrainer (InstanceList ilist, MCMaxEnt initialClassifier) { this.trainingList = ilist; Alphabet fd = ilist.getDataAlphabet(); LabelAlphabet ld = (LabelAlphabet) ilist.getTargetAlphabet(); // Don't fd.stopGrowth, because someone might want to do feature induction ld.stopGrowth(); // Add one feature for the "default feature". this.numLabels = ld.size(); this.numFeatures = fd.size() + 1; this.defaultFeatureIndex = numFeatures-1; this.parameters = new double [numLabels * numFeatures]; this.constraints = new double [numLabels * numFeatures]; this.cachedGradient = new double [numLabels * numFeatures]; Arrays.fill (parameters, 0.0); Arrays.fill (constraints, 0.0); Arrays.fill (cachedGradient, 0.0); this.featureSelection = ilist.getFeatureSelection(); this.perLabelFeatureSelection = ilist.getPerLabelFeatureSelection(); // Add the default feature index to the selection if (featureSelection != null) featureSelection.add (defaultFeatureIndex); if (perLabelFeatureSelection != null) for (int i = 0; i < perLabelFeatureSelection.length; i++) perLabelFeatureSelection[i].add (defaultFeatureIndex); // xxx Later change this to allow both to be set, but select which one to use by a boolean flag? assert (featureSelection == null || perLabelFeatureSelection == null); if (initialClassifier != null) { this.theClassifier = initialClassifier; this.parameters = theClassifier.parameters; this.featureSelection = theClassifier.featureSelection; this.perLabelFeatureSelection = theClassifier.perClassFeatureSelection; this.defaultFeatureIndex = theClassifier.defaultFeatureIndex; assert (initialClassifier.getInstancePipe() == ilist.getPipe()); } else if (this.theClassifier == null) { this.theClassifier = new MCMaxEnt (ilist.getPipe(), parameters, featureSelection, perLabelFeatureSelection); } cachedValueStale = true; cachedGradientStale = true; // Initialize the constraints logger.fine("Number of instances in training list = " + trainingList.size()); for (Instance inst : trainingList) { double instanceWeight = trainingList.getInstanceWeight(inst); Labeling labeling = inst.getLabeling (); //logger.fine ("Instance "+ii+" labeling="+labeling); FeatureVector fv = (FeatureVector) inst.getData (); Alphabet fdict = fv.getAlphabet(); assert (fv.getAlphabet() == fd); int li = labeling.getBestIndex(); // The "2*" below is because there is one copy for the p(y|x)and another for the p(x|y). MatrixOps.rowPlusEquals (constraints, numFeatures, li, fv, 2*instanceWeight); // For the default feature, whose weight is 1.0 assert(!Double.isNaN(instanceWeight)) : "instanceWeight is NaN"; assert(!Double.isNaN(li)) : "bestIndex is NaN"; boolean hasNaN = false; for(int i = 0; i < fv.numLocations(); i++) { if(Double.isNaN(fv.valueAtLocation(i))) { logger.info("NaN for feature " + fdict.lookupObject(fv.indexAtLocation(i)).toString()); hasNaN = true; } } if(hasNaN) logger.info("NaN in instance: " + inst.getName()); // Only p(y|x) uses the default feature; p(x|y) doesn't use it. The default feature value is 1.0. constraints[li*numFeatures + defaultFeatureIndex] += instanceWeight; } //TestMaximizable.testValueAndGradientCurrentParameters (this); } public MCMaxEnt getClassifier () { return theClassifier; } public double getParameter (int index) { return parameters[index]; } public void setParameter (int index, double v) { cachedValueStale = true; cachedGradientStale = true; parameters[index] = v; } public int getNumParameters() { return parameters.length; } public void getParameters (double[] buff) { if (buff == null || buff.length != parameters.length) buff = new double [parameters.length]; System.arraycopy (parameters, 0, buff, 0, parameters.length); } public void setParameters (double [] buff) { assert (buff != null); cachedValueStale = true; cachedGradientStale = true; if (buff.length != parameters.length) parameters = new double[buff.length]; System.arraycopy (buff, 0, parameters, 0, buff.length); } // log probability of the training labels public double getValue () { if (cachedValueStale) { numGetValueCalls++; cachedValue = 0; // We'll store the expectation values in "cachedGradient" for now cachedGradientStale = true; java.util.Arrays.fill (cachedGradient, 0.0); // Incorporate likelihood of data double[] scores = new double[trainingList.getTargetAlphabet().size()]; double value = 0.0; //System.out.println("I Now "+inputAlphabet.size()+" regular features."); Iterator<Instance> iter = trainingList.iterator(); //int ii = 0; // Normalize the parameters to be per-class multinomials double probs[][] = new double[scores.length][numFeatures]; double lprobs[][] = new double[scores.length][numFeatures]; for (int si = 0; si < scores.length; si++) { double sum = 0, max = MatrixOps.max (parameters); for (int fi = 0; fi < numFeatures; fi++) { // TODO Strongly consider some smoothing here. What happens when all parameters are zero? // Oh, this should be no problem, because exp(0) == 1. probs[si][fi] = Math.exp(parameters[si*numFeatures+fi] - max); sum += probs[si][fi]; } assert (sum > 0); for (int fi = 0; fi < numFeatures; fi++) { probs[si][fi] /= sum; lprobs[si][fi] = Math.log(probs[si][fi]); } } while (iter.hasNext()) { Instance instance = iter.next(); double instanceWeight = trainingList.getInstanceWeight(instance); Labeling labeling = instance.getLabeling (); //System.out.println("L Now "+inputAlphabet.size()+" regular features."); this.theClassifier.getClassificationScores (instance, scores); FeatureVector fv = (FeatureVector) instance.getData (); int li = labeling.getBestIndex(); value = - (instanceWeight * Math.log (scores[li])); if(Double.isNaN(value)) { logger.fine ("MCMaxEntTrainer: Instance " + instance.getName() + "has NaN value. log(scores)= " + Math.log(scores[li]) + " scores = " + scores[li] + " has instance weight = " + instanceWeight); } if (Double.isInfinite(value)) { logger.warning ("Instance "+instance.getSource() + " has infinite value; skipping value and gradient"); cachedValue -= value; cachedValueStale = false; return -value; // continue; } cachedValue += value; // CPAL - this is a loop over classes and their scores // - we compute the gradient by taking the dot product of the feature value // and the probability of the class for (int si = 0; si < scores.length; si++) { if (scores[si] == 0) continue; assert (!Double.isInfinite(scores[si])); // CPAL - accumulating the current classifiers expectation of the feature // vector counts for this class label // Current classifier has expectation over class label, not over feature vector MatrixOps.rowPlusEquals (cachedGradient, numFeatures, si, fv, -instanceWeight * scores[si]); cachedGradient[numFeatures*si + defaultFeatureIndex] += (-instanceWeight * scores[si]); } // CPAL - if we wish to do multiconditional training we need another term for this accumulated // expectation if (usingMultiConditionalTraining) { // need something analogous to this // this.theClassifier.getClassificationScores (instance, scores); // this.theClassifier.getFeatureDistributions (instance, // Note: li is the "label" for this instance // Get the sum of the feature vector // which is the number of counts for the document if we use that as input double Ncounts = MatrixOps.sum(fv); // CPAL - get the additional term for the value of our - log probability // - this computation amounts to the dot product of the feature vector and the probability vector cachedValue -= (instanceWeight * fv.dotProduct(lprobs[li])); // CPAL - get the model expectation over features for the given class for (int fi = 0; fi < numFeatures; fi++) { //if(parameters[numFeatures*li + fi] != 0) { // MatrixOps.rowPlusEquals(cachedGradient, numFeatures,li,fv,)) cachedGradient[numFeatures*li + fi] += (-instanceWeight * Ncounts * probs[li][fi]); // } } } } //logger.info ("-Expectations:"); cachedGradient.print(); // Incorporate prior on parameters if (usingHyperbolicPrior) { for (int li = 0; li < numLabels; li++) for (int fi = 0; fi < numFeatures; fi++) cachedValue += (hyperbolicPriorSlope / hyperbolicPriorSharpness * Math.log (Maths.cosh (hyperbolicPriorSharpness * parameters[li *numFeatures + fi]))); } else { for (int li = 0; li < numLabels; li++) for (int fi = 0; fi < numFeatures; fi++) { double param = parameters[li*numFeatures + fi]; cachedValue += param * param / (2 * gaussianPriorVariance); } } cachedValue *= -1.0; // MAXIMIZE, NOT MINIMIZE cachedValueStale = false; progressLogger.info ("Value (loglikelihood) = "+cachedValue); } return cachedValue; } // CPAL first get value, then gradient public void getValueGradient (double [] buffer) { // Gradient is (constraint - expectation - parameters/gaussianPriorVariance) if (cachedGradientStale) { numGetValueGradientCalls++; if (cachedValueStale) // This will fill in the cachedGradient with the "-expectation" getValue (); // cachedGradient contains the negative expectations // expectations are model expectations and constraints are // empirical expectations MatrixOps.plusEquals (cachedGradient, constraints); // CPAL - we need a second copy of the constraints // - actually, we only want this for the feature values // - I've moved this up into getValue //if (usingMultiConditionalTraining){ // MatrixOps.plusEquals(cachedGradient, constraints); //} // Incorporate prior on parameters if (usingHyperbolicPrior) { throw new UnsupportedOperationException ("Hyperbolic prior not yet implemented."); } else { MatrixOps.plusEquals (cachedGradient, parameters, -1.0 / gaussianPriorVariance); } // A parameter may be set to -infinity by an external user. // We set gradient to 0 because the parameter's value can // never change anyway and it will mess up future calculations // on the matrix, such as norm(). MatrixOps.substitute (cachedGradient, Double.NEGATIVE_INFINITY, 0.0); // Set to zero all the gradient dimensions that are not among the selected features if (perLabelFeatureSelection == null) { for (int labelIndex = 0; labelIndex < numLabels; labelIndex++) MatrixOps.rowSetAll (cachedGradient, numFeatures, labelIndex, 0.0, featureSelection, false); } else { for (int labelIndex = 0; labelIndex < numLabels; labelIndex++) MatrixOps.rowSetAll (cachedGradient, numFeatures, labelIndex, 0.0, perLabelFeatureSelection[labelIndex], false); } cachedGradientStale = false; } assert (buffer != null && buffer.length == parameters.length); System.arraycopy (cachedGradient, 0, buffer, 0, cachedGradient.length); } public double sumNegLogProb (double a, double b) { if (a == Double.POSITIVE_INFINITY && b == Double.POSITIVE_INFINITY) return Double.POSITIVE_INFINITY; else if (a > b) return b - Math.log (1 + Math.exp(b-a)); else return a - Math.log (1 + Math.exp(a-b)); } } }