package edu.stanford.nlp.loglinear.learning; import edu.stanford.nlp.loglinear.inference.CliqueTree; import edu.stanford.nlp.loglinear.model.ConcatVector; import edu.stanford.nlp.loglinear.model.GraphicalModel; import java.util.Iterator; /** * Created on 8/23/15. * @author keenon * <p> * Generates (potentially noisy, no promises about exactness) gradients from a batch of examples that were provided to * the system. */ public class LogLikelihoodDifferentiableFunction extends AbstractDifferentiableFunction<GraphicalModel> { // This sets a gold observation for a model to use as training gold data public static final String VARIABLE_TRAINING_VALUE = "learning.LogLikelihoodDifferentiableFunction.VARIABLE_TRAINING_VALUE"; /** * Gets a summary of the log-likelihood of a singe model at a point * <p> * It assumes that the models have observations for training set as metadata in * LogLikelihoodDifferentiableFunction.OBSERVATION_FOR_TRAINING. The models can also have observations fixed in * CliqueTree.VARIABLE_OBSERVED_VALUE, but these will be considered fixed and will not be learned against. * * @param model the model to find the log-likelihood of * @param weights the weights to use * @return the gradient and value of the function at that point */ @Override public double getSummaryForInstance(GraphicalModel model, ConcatVector weights, ConcatVector gradient) { double logLikelihood = 0.0; CliqueTree.MarginalResult result = new CliqueTree(model, weights).calculateMarginals(); // Cache everything in preparation for multiple redundant requests for feature vectors for (GraphicalModel.Factor factor : model.factors) { factor.featuresTable.cacheVectors(); } // Subtract log partition function logLikelihood -= Math.log(result.partitionFunction); // Quit if we have an infinite partition function if (Double.isInfinite(logLikelihood)) return 0.0; // Add the determined assignment by training values for (GraphicalModel.Factor factor : model.factors) { // Find the assignment, taking both fixed and training observed variables into account int[] assignment = new int[factor.neigborIndices.length]; for (int i = 0; i < assignment.length; i++) { int deterministicValue = getDeterministicAssignment(result.marginals[factor.neigborIndices[i]]); if (deterministicValue != -1) { assignment[i] = deterministicValue; } else { int trainingObservation = Integer.parseInt(model.getVariableMetaDataByReference(factor.neigborIndices[i]).get(LogLikelihoodDifferentiableFunction.VARIABLE_TRAINING_VALUE)); assignment[i] = trainingObservation; } } ConcatVector features = factor.featuresTable.getAssignmentValue(assignment).get(); // Add the log-likelihood from this observation to the log-likelihood logLikelihood += features.dotProduct(weights); // Add the vector from this observation to the gradient gradient.addVectorInPlace(features, 1.0); } // Take expectations over features given marginals // NOTE: This is extremely expensive. Not sure what to do about that for (GraphicalModel.Factor factor : model.factors) { // OPTIMIZATION: // Rather than use the standard iterator, which creates lots of int[] arrays on the heap, which need to be GC'd, // we use the fast version that just mutates one array. Since this is read once for us here, this is ideal. Iterator<int[]> fastPassByReferenceIterator = factor.featuresTable.fastPassByReferenceIterator(); int[] assignment = fastPassByReferenceIterator.next(); while (true) { // calculate assignment prob double assignmentProb = result.jointMarginals.get(factor).getAssignmentValue(assignment); // subtract this feature set, weighted by the probability of the assignment if (assignmentProb > 0) { gradient.addVectorInPlace(factor.featuresTable.getAssignmentValue(assignment).get(), -assignmentProb); } // This mutates the assignment[] array, rather than creating a new one if (fastPassByReferenceIterator.hasNext()) fastPassByReferenceIterator.next(); else break; } } // Uncache everything, now that the computations have completed for (GraphicalModel.Factor factor : model.factors) { factor.featuresTable.releaseCache(); } return logLikelihood; } /** * Finds the deterministic assignment forced by a distribution, or if none exists returns -1 * * @param distribution the potentially deterministic distribution * @return the assignment given by the distribution with probability 1, if one exists, else -1 */ private static int getDeterministicAssignment(double[] distribution) { int assignment = -1; for (int i = 0; i < distribution.length; i++) { if (distribution[i] == 1.0) { if (assignment == -1) assignment = i; else return -1; } else if (distribution[i] != 0.0) return -1; } return assignment; } }