LogLikelihoodDifferentiableFunction.java example

Explorer
CoreNLP-master
package edu.stanford.nlp.loglinear.learning;

import edu.stanford.nlp.loglinear.inference.CliqueTree;
import edu.stanford.nlp.loglinear.model.ConcatVector;
import edu.stanford.nlp.loglinear.model.GraphicalModel;

import java.util.Iterator;

/**
 * Created on 8/23/15.
 * @author keenon
 * <p>
 * Generates (potentially noisy, no promises about exactness) gradients from a batch of examples that were provided to
 * the system.
 */
public class LogLikelihoodDifferentiableFunction extends AbstractDifferentiableFunction<GraphicalModel> {
  // This sets a gold observation for a model to use as training gold data
  public static final String VARIABLE_TRAINING_VALUE = "learning.LogLikelihoodDifferentiableFunction.VARIABLE_TRAINING_VALUE";

  /**
   * Gets a summary of the log-likelihood of a singe model at a point
   * <p>
   * It assumes that the models have observations for training set as metadata in
   * LogLikelihoodDifferentiableFunction.OBSERVATION_FOR_TRAINING. The models can also have observations fixed in
   * CliqueTree.VARIABLE_OBSERVED_VALUE, but these will be considered fixed and will not be learned against.
   *
   * @param model   the model to find the log-likelihood of
   * @param weights the weights to use
   * @return the gradient and value of the function at that point
   */
  @Override
  public double getSummaryForInstance(GraphicalModel model, ConcatVector weights, ConcatVector gradient) {
    double logLikelihood = 0.0;

    CliqueTree.MarginalResult result = new CliqueTree(model, weights).calculateMarginals();

    // Cache everything in preparation for multiple redundant requests for feature vectors

    for (GraphicalModel.Factor factor : model.factors) {
      factor.featuresTable.cacheVectors();
    }

    // Subtract log partition function

    logLikelihood -= Math.log(result.partitionFunction);

    // Quit if we have an infinite partition function

    if (Double.isInfinite(logLikelihood)) return 0.0;

    // Add the determined assignment by training values

    for (GraphicalModel.Factor factor : model.factors) {
      // Find the assignment, taking both fixed and training observed variables into account
      int[] assignment = new int[factor.neigborIndices.length];
      for (int i = 0; i < assignment.length; i++) {
        int deterministicValue = getDeterministicAssignment(result.marginals[factor.neigborIndices[i]]);
        if (deterministicValue != -1) {
          assignment[i] = deterministicValue;
        } else {
          int trainingObservation = Integer.parseInt(model.getVariableMetaDataByReference(factor.neigborIndices[i]).get(LogLikelihoodDifferentiableFunction.VARIABLE_TRAINING_VALUE));
          assignment[i] = trainingObservation;
        }
      }
      ConcatVector features = factor.featuresTable.getAssignmentValue(assignment).get();
      // Add the log-likelihood from this observation to the log-likelihood
      logLikelihood += features.dotProduct(weights);
      // Add the vector from this observation to the gradient
      gradient.addVectorInPlace(features, 1.0);
    }

    // Take expectations over features given marginals
    // NOTE: This is extremely expensive. Not sure what to do about that

    for (GraphicalModel.Factor factor : model.factors) {
      // OPTIMIZATION:
      // Rather than use the standard iterator, which creates lots of int[] arrays on the heap, which need to be GC'd,
      // we use the fast version that just mutates one array. Since this is read once for us here, this is ideal.
      Iterator<int[]> fastPassByReferenceIterator = factor.featuresTable.fastPassByReferenceIterator();
      int[] assignment = fastPassByReferenceIterator.next();
      while (true) {
        // calculate assignment prob
        double assignmentProb = result.jointMarginals.get(factor).getAssignmentValue(assignment);
        // subtract this feature set, weighted by the probability of the assignment
        if (assignmentProb > 0) {
          gradient.addVectorInPlace(factor.featuresTable.getAssignmentValue(assignment).get(), -assignmentProb);
        }
        // This mutates the assignment[] array, rather than creating a new one
        if (fastPassByReferenceIterator.hasNext()) fastPassByReferenceIterator.next();
        else break;
      }
    }

    // Uncache everything, now that the computations have completed

    for (GraphicalModel.Factor factor : model.factors) {
      factor.featuresTable.releaseCache();
    }

    return logLikelihood;
  }

  /**
   * Finds the deterministic assignment forced by a distribution, or if none exists returns -1
   *
   * @param distribution the potentially deterministic distribution
   * @return the assignment given by the distribution with probability 1, if one exists, else -1
   */
  private static int getDeterministicAssignment(double[] distribution) {
    int assignment = -1;
    for (int i = 0; i < distribution.length; i++) {
      if (distribution[i] == 1.0) {
        if (assignment == -1) assignment = i;
        else return -1;
      } else if (distribution[i] != 0.0) return -1;
    }
    return assignment;
  }
}