package edu.stanford.nlp.tagger.maxent; import edu.stanford.nlp.util.logging.Redwood; import edu.stanford.nlp.maxent.Feature; import edu.stanford.nlp.maxent.Problem; import edu.stanford.nlp.maxent.iis.LambdaSolve; import java.text.NumberFormat; import java.io.DataInputStream; /** * This module does the working out of lambda parameters for binary tagger * features. It can use either IIS or CG. * * @author Kristina Toutanova * @version 1.0 */ public class LambdaSolveTagger extends LambdaSolve { /** A logger for this class */ private static Redwood.RedwoodChannels log = Redwood.channels(LambdaSolveTagger.class); /** * Suppress extraneous printouts */ //@SuppressWarnings("unused") //private static final boolean VERBOSE = false; LambdaSolveTagger(Problem p1, double eps1, byte[][] fnumArr) { p = p1; eps = eps1; // newtonerr = nerr1; lambda = new double[p1.fSize]; // lambda_converged = new boolean[p1.fSize]; // cdm 2008: Below line is memory hog. Is there anything we can do to avoid this square array allocation? probConds = new double[p1.data.xSize][p1.data.ySize]; this.fnumArr = fnumArr; zlambda = new double[p1.data.xSize]; ftildeArr = new double[p.fSize]; initCondsZlambdaEtc(); super.setBinary(); } /* Unused. @SuppressWarnings({"UnusedDeclaration"}) private void readOldLambdas(String filename, String oldfilename) { double[] lambdaold; lambdaold = read_lambdas(oldfilename); HashMap<FeatureKey,Integer> oldAssocs = GlobalHolder.readAssociations(oldfilename); HashMap<FeatureKey,Integer> newAssocs = GlobalHolder.readAssociations(filename); for (FeatureKey fk : oldAssocs.keySet()) { int numOld = GlobalHolder.getNum(fk, oldAssocs); int numNew = GlobalHolder.getNum(fk, newAssocs); if ((numOld > -1) && (numNew > -1)) { lambda[numNew] = lambdaold[numOld]; updateConds(numNew, lambdaold[numOld]); } } } */ /* --- unused LambdaSolveTagger(String filename) { this.readL(filename); super.setBinary(); } --- */ /** Initialize a trained LambdaSolveTagger. * This is the version used when loading a saved tagger. * Only the lambda array is used, and the rest is irrelevant, CDM thinks. * * @param dataStream Stream to load lambda parameters from. */ LambdaSolveTagger(DataInputStream dataStream) { lambda = read_lambdas(dataStream); super.setBinary(); } /** Initialize a trained LambdaSolveTagger. * This is the version used when creating a LambdaSolveTagger from * a condensed lambda array. * Only the lambda array is used, and the rest is irrelevant, CDM thinks. * * @param lambda Array used as the lambda parameters (directly; no safety copy is made). */ LambdaSolveTagger(double[] lambda) { this.lambda = lambda; super.setBinary(); } void initCondsZlambdaEtc() { // init pcond for (int x = 0; x < p.data.xSize; x++) { for (int y = 0; y < p.data.ySize; y++) { probConds[x][y] = 1.0 / p.data.ySize; } } log.info(" pcond initialized "); // init zlambda for (int x = 0; x < p.data.xSize; x++) { zlambda[x] = p.data.ySize; } log.info(" zlambda initialized "); // init ftildeArr for (int i = 0; i < p.fSize; i++) { ftildeArr[i] = p.functions.get(i).ftilde(); if (ftildeArr[i] == 0) { log.info(" Empirical expectation 0 for feature " + i); } } log.info(" ftildeArr initialized "); } /* --- unused * * Iteration for lambda[index]. * * @return true if this lambda hasn't converged. * boolean iterate(int index, double err, MutableDouble ret) { double deltaL = 0.0; deltaL = newton(deltaL, index, err); lambda[index] = lambda[index] + deltaL; if (!(deltaL == deltaL)) { log.info(" NaN " + index + ' ' + deltaL); } ret.set(deltaL); return (Math.abs(deltaL) >= eps); } --- */ /* --- unused: * * Finds the root of an equation by Newton's method. This is my * implementation. It might be improved if we looked at some official * library for numerical methods. * double newton(double lambda0, int index, double err) { double lambdaN = lambda0; int i = 0; do { i++; double lambdaP = lambdaN; double gPrimeVal = gprime(lambdaP, index); if (!(gPrimeVal == gPrimeVal)) { log.info("gPrime of " + lambdaP + ' ' + index + " is NaN " + gPrimeVal); } double gVal = g(lambdaP, index); if (gPrimeVal == 0.0) { return 0.0; } lambdaN = lambdaP - gVal / gPrimeVal; if (!(lambdaN == lambdaN)) { log.info("the division of " + gVal + ' ' + gPrimeVal + ' ' + index + " is NaN " + lambdaN); return 0; } if (Math.abs(lambdaN - lambdaP) < err) { return lambdaN; } if (i > 100) { if (Math.abs(gVal) > 1) { return 0; } return lambdaN; } } while (true); } --- */ /* --- unused: * * This method updates the conditional probabilities in the model, resulting from the * update of lambda[index] to lambda[index]+deltaL . * void updateConds(int index, double deltaL) { // for each x that (x,y)=true / exists y // recalculate pcond(y,x) for all y int yTag = ((TaggerFeature) (p.functions.get(index))).getYTag(); for (int i = 0; i < p.functions.get(index).len(); i++) { // update for this x double s = 0; int x = (p.functions.get(index)).getX(i); double zlambdaX = zlambda[x] + pcond(yTag, x) * zlambda[x] * (Math.exp(deltaL) - 1); for (int y = 0; y < p.data.ySize; y++) { probConds[x][y] = (probConds[x][y] * zlambda[x]) / zlambdaX; s = s + probConds[x][y]; } s = s - probConds[x][yTag]; probConds[x][yTag] = probConds[x][yTag] * Math.exp(deltaL); s = s + probConds[x][yTag]; zlambda[x] = zlambdaX; } } --- */ /* --- unused: double pcondCalc(int y, int x) { double zlambdaX; zlambdaX = 0.0; for (int y1 = 0; y1 < p.data.ySize; y1++) { double s = 0.0; for (int i = 0; i < p.fSize; i++) { s = s + lambda[i] * p.functions.get(i).getVal(x, y1); } zlambdaX = zlambdaX + Math.exp(s); } double s = 0.0; for (int i = 0; i < p.fSize; i++) { s = s + lambda[i] * p.functions.get(i).getVal(x, y); } return (1 / zlambdaX) * Math.exp(s); } double fnumCalc(int x, int y) { double s = 0.0; for (int i = 0; i < p.fSize; i++) { //this is slow s = s + p.functions.get(i).getVal(x, y); } return s; } --- */ double g(double lambdaP, int index) { double s = 0.0; for (int i = 0; i < p.functions.get(index).len(); i++) { int y = ((TaggerFeature) p.functions.get(index)).getYTag(); int x = (p.functions.get(index)).getX(i); s = s + p.data.ptildeX(x) * pcond(y, x) * 1 * Math.exp(lambdaP * fnum(x, y)); } s = s - ftildeArr[index]; return s; } /* --- unused double gprime(double lambdaP, int index) { double s = 0.0; for (int i = 0; i < p.functions.get(index).len(); i++) { int y = ((TaggerFeature) (p.functions.get(index))).getYTag(); int x = (p.functions.get(index)).getX(i); s = s + p.data.ptildeX(x) * pcond(y, x) * 1 * Math.exp(lambdaP * fnum(x, y)) * fnum(x, y); } return s; } --- */ double fExpected(Feature f) { TaggerFeature tF = (TaggerFeature) f; double s = 0.0; int y = tF.getYTag(); for (int i = 0; i < f.len(); i++) { int x = tF.getX(i); s = s + p.data.ptildeX(x) * pcond(y, x); } return s; } /** Works out whether the model expectations match the empirical * expectations. * @return Whether the model is correct */ @Override public boolean checkCorrectness() { log.info("Checking model correctness; x size " + p.data.xSize + ' ' + ", ysize " + p.data.ySize); NumberFormat nf = NumberFormat.getNumberInstance(); nf.setMaximumFractionDigits(4); boolean flag = true; for (int f = 0; f < lambda.length; f++) { if (Math.abs(lambda[f]) > 100) { log.info(" Lambda too big " + lambda[f]); log.info(" empirical " + ftildeArr[f] + " expected " + fExpected(p.functions.get(f))); } } for (int i = 0; i < ftildeArr.length; i++) { double exp = Math.abs(ftildeArr[i] - fExpected(p.functions.get(i))); if (exp > 0.001) { flag = false; log.info("Constraint " + i + " not satisfied emp " + nf.format(ftildeArr[i]) + " exp " + nf.format(fExpected(p.functions.get(i))) + " diff " + nf.format(exp) + " lambda " + nf.format(lambda[i])); } } for (int x = 0; x < p.data.xSize; x++) { double s = 0.0; for (int y = 0; y < p.data.ySize; y++) { s = s + probConds[x][y]; } if (Math.abs(s - 1) > 0.0001) { for (int y = 0; y < p.data.ySize; y++) { log.info(y + " : " + probConds[x][y]); } log.info("probabilities do not sum to one " + x + ' ' + (float) s); } } return flag; } /* --- unused double ZAlfa(double alfa, Feature f, int x) { double s = 0.0; for (int y = 0; y < p.data.ySize; y++) { s = s + pcond(y, x) * Math.exp(alfa * f.getVal(x, y)); } return s; } --- */ /* --- private static double[] read_lambdas(String modelFilename) { if (VERBOSE) { log.info(" entering read"); } try { double[] lambdaold; // InDataStreamFile rf=new InDataStreamFile(modelFilename+".holder.prob"); // int xSize=rf.readInt(); // int ySize=rf.readInt(); // if (VERBOSE) log.info("x y "+xSize+" "+ySize); // //rf.seek(rf.getFilePointer()+xSize*ySize*8); // int funsize=rf.readInt(); // lambdaold=new double[funsize]; // byte[] b=new byte[funsize*8]; // rf.read(b); // lambdaold=Convert.byteArrToDoubleArr(b); // rf.close(); DataInputStream dis = new DataInputStream(new FileInputStream(modelFilename + ".holder.prob")); int xSize = dis.readInt(); int ySize = dis.readInt(); if (VERBOSE) { log.info("x y " + xSize + ' ' + ySize); } int funsize = dis.readInt(); byte[] b = new byte[funsize * 8]; if (dis.read(b) != funsize * 8) { log.info("Rewrite read_lambdas!"); } lambdaold = Convert.byteArrToDoubleArr(b); dis.close(); return lambdaold; } catch (IOException e) { e.printStackTrace(); } return null; } --- */ }