/* Copyright (C) 2011 Univ. of Massachusetts Amherst, Computer Science Dept. This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit). http://www.cs.umass.edu/~mccallum/mallet This software is provided under the terms of the Common Public License, version 1.0, as published by http://www.opensource.org. For further information, see the file `LICENSE' included with this distribution. */ package cc.mallet.classify; import java.util.ArrayList; import java.util.Arrays; import java.util.logging.Logger; import cc.mallet.classify.constraints.pr.MaxEntPRConstraint; import cc.mallet.optimize.Optimizable; import cc.mallet.types.FeatureVector; import cc.mallet.types.Instance; import cc.mallet.types.InstanceList; import cc.mallet.types.MatrixOps; import cc.mallet.util.MalletLogger; import cc.mallet.util.Maths; /** * Optimizable for training auxiliary model (q) for E-step/I-projection in PR training. * * @author Gregory Druck <a href="mailto:gdruck@cs.umass.edu">gdruck@cs.umass.edu</a> */ public class PRAuxClassifierOptimizable implements Optimizable.ByGradientValue { private static Logger logger = MalletLogger.getLogger(PRAuxClassifierOptimizable.class.getName()); private boolean cacheStale; private int numParameters; private double cachedValue; private double[] cachedGradient; private double[][] parameters; private double[][] baseDist; private PRAuxClassifier classifier; private ArrayList<MaxEntPRConstraint> constraints; private InstanceList trainingData; public PRAuxClassifierOptimizable(InstanceList trainingData, double[][] baseDistribution, PRAuxClassifier classifier) { this.trainingData = trainingData; this.baseDist = baseDistribution; this.classifier = classifier; this.parameters = classifier.getParameters(); this.constraints = classifier.getConstraintFeatures(); this.numParameters = 0; for (int i = 0; i < parameters.length; i++) { numParameters += parameters[i].length; } this.cachedValue = Double.NEGATIVE_INFINITY; this.cachedGradient = new double[numParameters]; this.cacheStale = true; } public int getNumParameters() { return numParameters; } public void getParameters(double[] buffer) { int start = 0; for (int i = 0; i < parameters.length; i++) { System.arraycopy(parameters[i], 0, buffer, start, parameters[i].length); start += parameters[i].length; } } public double getParameter(int index) { int start = 0; for (int i = 0; i < parameters.length; i++) { if (start < parameters[i].length) { return parameters[i][start]; } else { start -= parameters[i].length; } } throw new RuntimeException(index + " out of bounds."); } public void setParameters(double[] params) { int start = 0; for (int i = 0; i < parameters.length; i++) { System.arraycopy(params, start, parameters[i], 0, parameters[i].length); start += parameters[i].length; } cacheStale = true; } public void setParameter(int index, double value) { int start = 0; for (int i = 0; i < parameters.length; i++) { if (start < parameters[i].length) { parameters[i][start] = value; } else { start -= parameters[i].length; } } cacheStale = true; } public double getValueAndGradient(double[] gradient) { Arrays.fill(gradient, 0); classifier.zeroExpectations(); int numLabels = trainingData.getTargetAlphabet().size(); double value = 0.; //double sumLogP = 0; for (int ii = 0; ii < trainingData.size(); ii++) { double[] scores = new double[numLabels]; Instance instance = trainingData.get(ii); FeatureVector input = (FeatureVector) instance.getData (); double instanceWeight = trainingData.getInstanceWeight(ii); classifier.getClassificationScores(instance, scores); double logZ = Double.NEGATIVE_INFINITY; for (int li = 0; li < numLabels; li++) { if (baseDist != null && baseDist[ii][li] == 0) { scores[li] = Double.NEGATIVE_INFINITY; } else if (baseDist != null) { double logP = Math.log(baseDist[ii][li]); scores[li] += logP; } logZ = Maths.sumLogProb(logZ, scores[li]); } assert(!Double.isNaN(logZ)); value -= instanceWeight * logZ; if (Double.isNaN(value)) { logger.warning("Instance " + instance.getName() + " has NaN value."); continue; } if (Double.isInfinite(value)) { logger.warning("Instance " + instance.getName() + " has infinite value; skipping value and gradient"); continue; } // exp normalize MatrixOps.expNormalize(scores); // increment expectations for (MaxEntPRConstraint constraint : constraints) { constraint.incrementExpectations(input, scores, 1); } } int ci = 0; int start = 0; for (MaxEntPRConstraint constraint : constraints) { double[] temp = new double[parameters[ci].length]; value += constraint.getAuxiliaryValueContribution(parameters[ci]); constraint.getGradient(parameters[ci], temp); System.arraycopy(temp, 0, gradient, start, temp.length); start += temp.length; ci++; } logger.info("PR auxiliary value = " + value); return value; } public double getValue() { if (cacheStale) { cachedValue = getValueAndGradient(cachedGradient); cacheStale = false; } return cachedValue; } public void getValueGradient(double[] gradient) { if (cacheStale) { cachedValue = getValueAndGradient(cachedGradient); cacheStale = false; } System.arraycopy(cachedGradient, 0, gradient, 0, gradient.length); } }