LambdaSolveTagger.java example

Explorer
CoreNLP-master
package edu.stanford.nlp.tagger.maxent; 
import edu.stanford.nlp.util.logging.Redwood;

import edu.stanford.nlp.maxent.Feature;
import edu.stanford.nlp.maxent.Problem;
import edu.stanford.nlp.maxent.iis.LambdaSolve;

import java.text.NumberFormat;
import java.io.DataInputStream;


/**
 * This module does the working out of lambda parameters for binary tagger
 * features.  It can use either IIS or CG.
 *
 * @author Kristina Toutanova
 * @version 1.0
 */
public class LambdaSolveTagger extends LambdaSolve  {

  /** A logger for this class */
  private static Redwood.RedwoodChannels log = Redwood.channels(LambdaSolveTagger.class);

  /**
   * Suppress extraneous printouts
   */
  //@SuppressWarnings("unused")
  //private static final boolean VERBOSE = false;


  LambdaSolveTagger(Problem p1, double eps1, byte[][] fnumArr) {
    p = p1;
    eps = eps1;
    // newtonerr = nerr1;
    lambda = new double[p1.fSize];
    // lambda_converged = new boolean[p1.fSize];
    // cdm 2008: Below line is memory hog. Is there anything we can do to avoid this square array allocation?
    probConds = new double[p1.data.xSize][p1.data.ySize];
    this.fnumArr = fnumArr;
    zlambda = new double[p1.data.xSize];
    ftildeArr = new double[p.fSize];
    initCondsZlambdaEtc();
    super.setBinary();
  }


  /* Unused.
  @SuppressWarnings({"UnusedDeclaration"})
  private void readOldLambdas(String filename, String oldfilename) {
    double[] lambdaold;
    lambdaold = read_lambdas(oldfilename);
    HashMap<FeatureKey,Integer> oldAssocs = GlobalHolder.readAssociations(oldfilename);
    HashMap<FeatureKey,Integer> newAssocs = GlobalHolder.readAssociations(filename);
    for (FeatureKey fk : oldAssocs.keySet()) {
      int numOld = GlobalHolder.getNum(fk, oldAssocs);
      int numNew = GlobalHolder.getNum(fk, newAssocs);
      if ((numOld > -1) && (numNew > -1)) {
        lambda[numNew] = lambdaold[numOld];
        updateConds(numNew, lambdaold[numOld]);
      }
    }
  }
  */

  /* --- unused
  LambdaSolveTagger(String filename) {
    this.readL(filename);
    super.setBinary();
  }
  --- */


  /** Initialize a trained LambdaSolveTagger.
   *  This is the version used when loading a saved tagger.
   *  Only the lambda array is used, and the rest is irrelevant, CDM thinks.
   *
   *  @param dataStream Stream to load lambda parameters from.
   */
  LambdaSolveTagger(DataInputStream dataStream) {
    lambda = read_lambdas(dataStream);
    super.setBinary();
  }

  /** Initialize a trained LambdaSolveTagger.
   *  This is the version used when creating a LambdaSolveTagger from
   *  a condensed lambda array.
   *  Only the lambda array is used, and the rest is irrelevant, CDM thinks.
   *
   *  @param lambda Array used as the lambda parameters (directly; no safety copy is made).
   */
  LambdaSolveTagger(double[] lambda) {
    this.lambda = lambda;
    super.setBinary();
  }

  void initCondsZlambdaEtc() {
    // init pcond
    for (int x = 0; x < p.data.xSize; x++) {
      for (int y = 0; y < p.data.ySize; y++) {
        probConds[x][y] = 1.0 / p.data.ySize;
      }
    }
    log.info(" pcond initialized ");
    // init zlambda
    for (int x = 0; x < p.data.xSize; x++) {
      zlambda[x] = p.data.ySize;
    }
    log.info(" zlambda initialized ");
    // init ftildeArr
    for (int i = 0; i < p.fSize; i++) {
      ftildeArr[i] = p.functions.get(i).ftilde();
      if (ftildeArr[i] == 0) {
        log.info(" Empirical expectation 0 for feature " + i);
      }
    }
    log.info(" ftildeArr initialized ");
  }


  /* --- unused
   *
   * Iteration for lambda[index].
   *
   * @return true if this lambda hasn't converged.
   *
  boolean iterate(int index, double err, MutableDouble ret) {
    double deltaL = 0.0;
    deltaL = newton(deltaL, index, err);
    lambda[index] = lambda[index] + deltaL;
    if (!(deltaL == deltaL)) {
      log.info(" NaN " + index + ' ' + deltaL);
    }
    ret.set(deltaL);
    return (Math.abs(deltaL) >= eps);
  }
  --- */

  /* --- unused:
   *
   * Finds the root of an equation by Newton's method. This is my
   * implementation. It might be improved if we looked at some official
   * library for numerical methods.
   *
  double newton(double lambda0, int index, double err) {
    double lambdaN = lambda0;
    int i = 0;
    do {
      i++;
      double lambdaP = lambdaN;
      double gPrimeVal = gprime(lambdaP, index);
      if (!(gPrimeVal == gPrimeVal)) {
        log.info("gPrime of " + lambdaP + ' ' + index + " is NaN " + gPrimeVal);
      }
      double gVal = g(lambdaP, index);
      if (gPrimeVal == 0.0) {
        return 0.0;
      }
      lambdaN = lambdaP - gVal / gPrimeVal;
      if (!(lambdaN == lambdaN)) {
        log.info("the division of " + gVal + ' ' + gPrimeVal + ' ' + index + " is NaN " + lambdaN);
        return 0;
      }
      if (Math.abs(lambdaN - lambdaP) < err) {
        return lambdaN;
      }
      if (i > 100) {
        if (Math.abs(gVal) > 1) {
          return 0;
        }
        return lambdaN;
      }
    } while (true);
  }
  --- */

 /* --- unused:
   *
   * This method updates the conditional probabilities in the model, resulting from the
   * update of lambda[index] to lambda[index]+deltaL .
   *
  void updateConds(int index, double deltaL) {
    //  for each x that (x,y)=true / exists y
    //  recalculate pcond(y,x) for all y
    int yTag = ((TaggerFeature) (p.functions.get(index))).getYTag();
    for (int i = 0; i < p.functions.get(index).len(); i++) {
      // update for this x
      double s = 0;
      int x = (p.functions.get(index)).getX(i);
      double zlambdaX = zlambda[x] + pcond(yTag, x) * zlambda[x] * (Math.exp(deltaL) - 1);
      for (int y = 0; y < p.data.ySize; y++) {
        probConds[x][y] = (probConds[x][y] * zlambda[x]) / zlambdaX;
        s = s + probConds[x][y];
      }
      s = s - probConds[x][yTag];
      probConds[x][yTag] = probConds[x][yTag] * Math.exp(deltaL);
      s = s + probConds[x][yTag];
      zlambda[x] = zlambdaX;
    }
  }
  --- */

  /* --- unused:
  double pcondCalc(int y, int x) {
    double zlambdaX;
    zlambdaX = 0.0;
    for (int y1 = 0; y1 < p.data.ySize; y1++) {
      double s = 0.0;
      for (int i = 0; i < p.fSize; i++) {
        s = s + lambda[i] * p.functions.get(i).getVal(x, y1);
      }
      zlambdaX = zlambdaX + Math.exp(s);
    }
    double s = 0.0;
    for (int i = 0; i < p.fSize; i++) {
      s = s + lambda[i] * p.functions.get(i).getVal(x, y);
    }
    return (1 / zlambdaX) * Math.exp(s);
  }


  double fnumCalc(int x, int y) {
    double s = 0.0;
    for (int i = 0; i < p.fSize; i++) {
      //this is slow
      s = s + p.functions.get(i).getVal(x, y);
    }
    return s;
  }
  --- */

  double g(double lambdaP, int index) {
    double s = 0.0;
    for (int i = 0; i < p.functions.get(index).len(); i++) {
      int y = ((TaggerFeature) p.functions.get(index)).getYTag();
      int x = (p.functions.get(index)).getX(i);
      s = s + p.data.ptildeX(x) * pcond(y, x) * 1 * Math.exp(lambdaP * fnum(x, y));
    }
    s = s - ftildeArr[index];

    return s;
  }

  /* --- unused
  double gprime(double lambdaP, int index) {
    double s = 0.0;
    for (int i = 0; i < p.functions.get(index).len(); i++) {
      int y = ((TaggerFeature) (p.functions.get(index))).getYTag();
      int x = (p.functions.get(index)).getX(i);
      s = s + p.data.ptildeX(x) * pcond(y, x) * 1 * Math.exp(lambdaP * fnum(x, y)) * fnum(x, y);
    }
    return s;
  }
  --- */

  double fExpected(Feature f) {
    TaggerFeature tF = (TaggerFeature) f;
    double s = 0.0;
    int y = tF.getYTag();
    for (int i = 0; i < f.len(); i++) {
      int x = tF.getX(i);
      s = s + p.data.ptildeX(x) * pcond(y, x);
    }
    return s;
  }


  /** Works out whether the model expectations match the empirical
   *  expectations.
   *  @return Whether the model is correct
   */
  @Override
  public boolean checkCorrectness() {
    log.info("Checking model correctness; x size " + p.data.xSize + ' ' + ", ysize " + p.data.ySize);

    NumberFormat nf = NumberFormat.getNumberInstance();
    nf.setMaximumFractionDigits(4);
    boolean flag = true;
    for (int f = 0; f < lambda.length; f++) {
      if (Math.abs(lambda[f]) > 100) {
        log.info(" Lambda too big " + lambda[f]);
        log.info(" empirical " + ftildeArr[f] + " expected " + fExpected(p.functions.get(f)));
      }
    }

    for (int i = 0; i < ftildeArr.length; i++) {
      double exp = Math.abs(ftildeArr[i] - fExpected(p.functions.get(i)));
      if (exp > 0.001) {
        flag = false;
        log.info("Constraint " + i + " not satisfied emp " + nf.format(ftildeArr[i]) + " exp " + nf.format(fExpected(p.functions.get(i))) + " diff " + nf.format(exp) + " lambda " + nf.format(lambda[i]));
      }
    }
    for (int x = 0; x < p.data.xSize; x++) {
      double s = 0.0;
      for (int y = 0; y < p.data.ySize; y++) {
        s = s + probConds[x][y];
      }
      if (Math.abs(s - 1) > 0.0001) {
        for (int y = 0; y < p.data.ySize; y++) {
          log.info(y + " : " + probConds[x][y]);
        }
        log.info("probabilities do not sum to one " + x + ' ' + (float) s);
      }
    }
    return flag;
  }

  /* --- unused
  double ZAlfa(double alfa, Feature f, int x) {
    double s = 0.0;
    for (int y = 0; y < p.data.ySize; y++) {
      s = s + pcond(y, x) * Math.exp(alfa * f.getVal(x, y));
    }
    return s;
  }
  --- */

  /* ---
  private static double[] read_lambdas(String modelFilename) {
    if (VERBOSE) {
      log.info(" entering read");
    }
    try {
      double[] lambdaold;
//      InDataStreamFile rf=new InDataStreamFile(modelFilename+".holder.prob");
//      int xSize=rf.readInt();
//      int ySize=rf.readInt();
//      if (VERBOSE) log.info("x y "+xSize+" "+ySize);
//      //rf.seek(rf.getFilePointer()+xSize*ySize*8);
//      int funsize=rf.readInt();
//      lambdaold=new double[funsize];
//      byte[] b=new byte[funsize*8];
//      rf.read(b);
//      lambdaold=Convert.byteArrToDoubleArr(b);
//      rf.close();
      DataInputStream dis = new DataInputStream(new FileInputStream(modelFilename + ".holder.prob"));
      int xSize = dis.readInt();
      int ySize = dis.readInt();
      if (VERBOSE) {
        log.info("x y " + xSize + ' ' + ySize);
      }
      int funsize = dis.readInt();
      byte[] b = new byte[funsize * 8];
      if (dis.read(b) != funsize * 8) { log.info("Rewrite read_lambdas!"); }
      lambdaold = Convert.byteArrToDoubleArr(b);
      dis.close();
      return lambdaold;
    } catch (IOException e) {
      e.printStackTrace();
    }
    return null;
  }
  --- */

}