CRFOptimizableByEntropyRegularization.java example

Explorer
topic-modeling-master
/* Copyright (C) 2009 Univ. of Massachusetts Amherst, Computer Science Dept.
   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
   http://www.cs.umass.edu/~mccallum/mallet
   This software is provided under the terms of the Common Public License,
   version 1.0, as published by http://www.opensource.org.  For further
   information, see the file `LICENSE' included with this distribution. */

package cc.mallet.fst.semi_supervised;

import java.io.Serializable;
import java.util.logging.Logger;
import cc.mallet.types.FeatureVectorSequence;
import cc.mallet.types.InstanceList;
import cc.mallet.fst.CRF;
import cc.mallet.fst.SumLattice;
import cc.mallet.fst.SumLatticeDefault;
import cc.mallet.fst.Transducer;
import cc.mallet.optimize.Optimizable;
import cc.mallet.util.MalletLogger;

/**
 * A CRF objective function that is the entropy of the CRF's
 * predictions on unlabeled data.
 * 
 * References:
 * Feng Jiao, Shaojun Wang, Chi-Hoon Lee, Russell Greiner, Dale Schuurmans
 * "Semi-supervised conditional random fields for improved sequence segmentation and labeling"
 * ACL 2006
 *
 * Gideon Mann, Andrew McCallum
 * "Efficient Computation of Entropy Gradient for Semi-Supervised Conditional Random Fields"
 * HLT/NAACL 2007
 *
 * @author Gaurav Chandalia
 * @author Gregory Druck
 */
public class CRFOptimizableByEntropyRegularization implements Optimizable.ByGradientValue,
                                                   Serializable {
  private static Logger logger = MalletLogger.getLogger(CRFOptimizableByEntropyRegularization.class.getName());

	private int cachedValueWeightsStamp = -1;
	private int cachedGradientWeightsStamp = -1;
  
  // model's expectations according to entropy reg.
  protected CRF.Factors expectations;
  // used to update gradient
  protected Transducer.Incrementor incrementor;

  // contains labeled and unlabeled data
  protected InstanceList data;
  // the model
  protected CRF crf;

  // scale entropy values,
  // typically, (entropyRegGamma * numLabeled / numUnlabeled)
  protected double scalingFactor;

  // log probability of all the sequences, this is also the entropy due to all
  // the instances (updated in computeExpectations())
  protected double cachedValue;
  // gradient due to this optimizable (updated in getValueGradient())
  protected double[] cachedGradient;

  /**
   * Initializes the structures.
   */
  public CRFOptimizableByEntropyRegularization(CRF crf, InstanceList ilist,
                                    double scalingFactor) {
    data = ilist;
    this.crf = crf;
    this.scalingFactor = scalingFactor;

    // initialize the expectations using the model
    expectations = new CRF.Factors(crf);
    incrementor = expectations.new Incrementor();

    cachedValue = 0.0;
    cachedGradient = new double[crf.getParameters().getNumFactors()];
  }

  /**
   * Initializes the structures (sets the scaling factor to 1.0).
   */
  public CRFOptimizableByEntropyRegularization(CRF crf, InstanceList ilist) {
    this(crf, ilist, 1.0);
  }

  public double getScalingFactor() {
    return scalingFactor;
  }

  public void setScalingFactor(double scalingFactor) {
    this.scalingFactor = scalingFactor;
  }

  /**
   * Resets, computes and fills expectations from all instances, also updating
   * the entropy value. <p>
   *
   * Analogous to <tt>CRFOptimizableByLabelLikelihood.getExpectationValue<tt>.
   */
  public void computeExpectations() {
  	expectations.zero();

    // now, update the expectations due to each instance for entropy reg.
    for (int ii = 0; ii < data.size(); ii++) {
      FeatureVectorSequence input = (FeatureVectorSequence) data.get(ii).getData();
      SumLattice lattice = new SumLatticeDefault(crf,input, true);

      // udpate the expectations
      EntropyLattice entropyLattice = new EntropyLattice(
          input, lattice.getGammas(), lattice.getXis(), crf,
          incrementor, scalingFactor);
      cachedValue += entropyLattice.getEntropy();
    }
  }

  public double getValue() {
		if (crf.getWeightsValueChangeStamp() != cachedValueWeightsStamp) {
		  // The cached value is not up to date; it was calculated for a different set of CRF weights.
		  cachedValueWeightsStamp = crf.getWeightsValueChangeStamp(); 
	  	
	  	cachedValue = 0;
	  	computeExpectations();
	    cachedValue = scalingFactor * cachedValue;
			assert(!Double.isNaN(cachedValue) && !Double.isInfinite(cachedValue))
	        : "Likelihood due to Entropy Regularization is NaN/Infinite";
	
	    logger.info("getValue() (entropy regularization) = " + cachedValue);
		}
  	return cachedValue;
  }

  public void getValueGradient(double[] buffer) {
		if (cachedGradientWeightsStamp != crf.getWeightsValueChangeStamp()) {
			cachedGradientWeightsStamp = crf.getWeightsValueChangeStamp(); // cachedGradient will soon no longer be stale
  	
  	  getValue();
  	
      // if this fails then look in computeExpectations
      expectations.assertNotNaNOrInfinite();
  	  // fill up gradient
  	  expectations.getParameters(cachedGradient);
		}
    System.arraycopy(cachedGradient, 0, buffer, 0, cachedGradient.length);
  }

  // some get/set methods that have to be implemented
  public int getNumParameters() {
    return crf.getParameters().getNumFactors();
  }

  public void getParameters(double[] buffer) {
    crf.getParameters().getParameters(buffer);
	}

  public void setParameters(double[] buffer) {
    crf.getParameters().setParameters(buffer);
    crf.weightsValueChanged();
	}

  public double getParameter(int index) {
    return crf.getParameters().getParameter(index);
  }

  public void setParameter(int index, double value) {
    crf.getParameters().setParameter(index, value);
    crf.weightsValueChanged();
	}

  // serialization stuff
  private static final long serialVersionUID = 1;
}