package edu.stanford.nlp.classify; import java.util.Arrays; import java.util.Collection; import edu.stanford.nlp.ling.Datum; import edu.stanford.nlp.math.ADMath; import edu.stanford.nlp.math.ArrayMath; import edu.stanford.nlp.math.DoubleAD; import edu.stanford.nlp.optimization.AbstractStochasticCachingDiffUpdateFunction; import edu.stanford.nlp.optimization.StochasticCalculateMethods; import edu.stanford.nlp.util.Index; /** * Maximizes the conditional likelihood with a given prior. * * @author Dan Klein * @author Galen Andrew * @author Chris Cox (merged w/ SumConditionalObjectiveFunction, 2/16/05) * @author Sarah Spikes (Templatization, allowing an {@code Iterable<Datum<L, F>>} to be passed in instead of a {@code GeneralDataset<L, F>}) * @author Angel Chang (support in place SGD - extend AbstractStochasticCachingDiffUpdateFunction) * @author Christopher Manning (cleaned out the cruft and sped it up in 2014) */ public class LogConditionalObjectiveFunction<L, F> extends AbstractStochasticCachingDiffUpdateFunction { protected final LogPrior prior; protected final int numFeatures; protected final int numClasses; /** Normally, this contains the data. The first index is the datum number, * and then there is an array of feature indices for each datum. */ protected final int[][] data; /** Alternatively, the data may be available from an Iterable in not yet * indexed form. (In 2014, it's not clear any code actually uses this option.) * And then you need an index for both. */ protected final Iterable<Datum<L, F>> dataIterable; protected final Index<L> labelIndex; protected final Index<F> featureIndex; /** Same size as data if the features have values; null if the features are binary. */ protected final double[][] values; /** The label of each data index. */ protected final int[] labels; protected final float[] dataWeights; protected final boolean useSummedConditionalLikelihood; //whether to use sumConditional or logConditional /** This is used to cache the numerator in batch methods. */ protected double[] derivativeNumerator = null; /** The only reason this is around is because the Prior Functions don't handle stochastic calculations yet. */ protected double [] priorDerivative = null; @Override public int domainDimension() { return numFeatures * numClasses; } @Override public int dataDimension(){ return data.length; } private int classOf(int index) { return index % numClasses; } private int featureOf(int index) { return index / numClasses; } /** Converts a Phi feature number and class index into an f(x,y) feature index. */ // [cdm2014: Tried inline this; no big gains.] protected int indexOf(int f, int c) { return f * numClasses + c; } public double[][] to2D(double[] x) { double[][] x2 = new double[numFeatures][numClasses]; for (int i = 0; i < numFeatures; i++) { for (int j = 0; j < numClasses; j++) { x2[i][j] = x[indexOf(i, j)]; } } return x2; } /** * Calculate the conditional likelihood. * If {@code useSummedConditionalLikelihood} is {@code false} (the default), * this calculates standard(product) CL, otherwise this calculates summed CL. * What's the difference? See Klein and Manning's 2002 EMNLP paper. */ @Override protected void calculate(double[] x) { //If the batchSize is 0 then use the regular calculate methods if (useSummedConditionalLikelihood) { calculateSCL(x); } else { calculateCL(x); } } /** * This function is used to come up with an estimate of the value / gradient based on only a small * portion of the data (referred to as the batchSize for lack of a better term. In this case batch does * not mean All!! It should be thought of in the sense of "a small batch of the data". */ @Override public void calculateStochastic(double[] x, double[] v, int[] batch) { if(method.calculatesHessianVectorProduct() && v != null){ // This is used for Stochastic Methods that involve second order information (SMD for example) if(method.equals(StochasticCalculateMethods.AlgorithmicDifferentiation)){ calculateStochasticAlgorithmicDifferentiation(x,v,batch); }else if(method.equals(StochasticCalculateMethods.IncorporatedFiniteDifference)){ calculateStochasticFiniteDifference(x,v,finiteDifferenceStepSize,batch); } } else{ //This is used for Stochastic Methods that don't need anything but the gradient (SGD) calculateStochasticGradientLocal(x,batch); } } /** * Calculate the summed conditional likelihood of this data by summing * conditional estimates. * */ private void calculateSCL(double[] x) { //System.out.println("Checking at: "+x[0]+" "+x[1]+" "+x[2]); value = 0.0; Arrays.fill(derivative, 0.0); double[] sums = new double[numClasses]; double[] probs = new double[numClasses]; double[] counts = new double[numClasses]; Arrays.fill(counts, 0.0); for (int d = 0; d < data.length; d++) { int[] features = data[d]; // activation Arrays.fill(sums, 0.0); for (int c = 0; c < numClasses; c++) { for (int feature : features) { int i = indexOf(feature, c); sums[c] += x[i]; } } // expectation (slower routine replaced by fast way) // double total = Double.NEGATIVE_INFINITY; // for (int c=0; c<numClasses; c++) { // total = SloppyMath.logAdd(total, sums[c]); // } double total = ArrayMath.logSum(sums); int ld = labels[d]; for (int c = 0; c < numClasses; c++) { probs[c] = Math.exp(sums[c] - total); for (int feature : features) { int i = indexOf(feature, c); derivative[i] += probs[ld] * probs[c]; } } // observed for (int feature : features) { int i = indexOf(feature, labels[d]); derivative[i] -= probs[ld]; } value -= probs[ld]; } // priors if (true) { for (int i = 0; i < x.length; i++) { double k = 1.0; double w = x[i]; value += k * w * w / 2.0; derivative[i] += k * w; } } } /** * Calculate the conditional likelihood of this data by multiplying * conditional estimates. Full dataset batch estimation. */ private void calculateCL(double[] x) { if (values != null) { rvfcalculate(x); } else if (dataIterable != null) { calculateCLiterable(x); } else { calculateCLbatch(x); } } private void calculateCLbatch(double[] x) { //System.out.println("Checking at: "+x[0]+" "+x[1]+" "+x[2]); value = 0.0; // [cdm Mar 2014] This next bit seems unnecessary: derivative is allocated by ensure() in AbstractCachingDiffFunction // before calculate() is called; and after the next block, derivativeNumerator is copied into it. // if (derivative == null) { // derivative = new double[x.length]; // } else { // Arrays.fill(derivative, 0.0); // } if (derivativeNumerator == null) { derivativeNumerator = new double[x.length]; for (int d = 0; d < data.length; d++) { int[] features = data[d]; for (int feature : features) { int i = indexOf(feature, labels[d]); if (dataWeights == null) { derivativeNumerator[i] -= 1; } else { derivativeNumerator[i] -= dataWeights[d]; } } } } copy(derivative, derivativeNumerator); // Arrays.fill(derivative, 0.0); double[] sums = new double[numClasses]; double[] probs = new double[numClasses]; // double[] counts = new double[numClasses]; // Arrays.fill(counts, 0.0); for (int d = 0; d < data.length; d++) { // activation Arrays.fill(sums, 0.0); int[] featuresArr = data[d]; for (int feature : featuresArr) { for (int c = 0; c < numClasses; c++) { int i = indexOf(feature, c); sums[c] += x[i]; } } // expectation (slower routine replaced by fast way) // double total = Double.NEGATIVE_INFINITY; // for (int c=0; c<numClasses; c++) { // total = SloppyMath.logAdd(total, sums[c]); // } double total = ArrayMath.logSum(sums); for (int c = 0; c < numClasses; c++) { probs[c] = Math.exp(sums[c] - total); if (dataWeights != null) { probs[c] *= dataWeights[d]; } } for (int feature : featuresArr) { for (int c = 0; c < numClasses; c++) { int i = indexOf(feature, c); derivative[i] += probs[c]; } } int labelindex = labels[d]; double dV = sums[labelindex] - total; if (dataWeights != null) { dV *= dataWeights[d]; } value -= dV; } value += prior.compute(x, derivative); } private void calculateCLiterable(double[] x) { //System.out.println("Checking at: "+x[0]+" "+x[1]+" "+x[2]); value = 0.0; // [cdm Mar 2014] This next bit seems unnecessary: derivative is allocated by ensure() in AbstractCachingDiffFunction // before calculate() is called; and after the next block, derivativeNumerator is copied into it. // if (derivative == null) { // derivative = new double[x.length]; // } else { // Arrays.fill(derivative, 0.0); // } if (derivativeNumerator == null) { derivativeNumerator = new double[x.length]; //use dataIterable if data is null & vice versa //TODO: Make sure this work as expected!! //int index = 0; for (Datum<L, F> datum : dataIterable) { Collection<F> features = datum.asFeatures(); for (F feature : features) { int i = indexOf(featureIndex.indexOf(feature), labelIndex.indexOf(datum.label())); if (dataWeights == null) { derivativeNumerator[i] -= 1; } /*else { derivativeNumerator[i] -= dataWeights[index]; }*/ } } } copy(derivative, derivativeNumerator); // Arrays.fill(derivative, 0.0); double[] sums = new double[numClasses]; double[] probs = new double[numClasses]; // double[] counts = new double[numClasses]; // Arrays.fill(counts, 0.0); for (Datum<L, F> datum : dataIterable) { // activation Arrays.fill(sums, 0.0); Collection<F> features = datum.asFeatures(); for (F feature : features) { for (int c = 0; c < numClasses; c++) { int i = indexOf(featureIndex.indexOf(feature), c); sums[c] += x[i]; } } // expectation (slower routine replaced by fast way) // double total = Double.NEGATIVE_INFINITY; // for (int c=0; c<numClasses; c++) { // total = SloppyMath.logAdd(total, sums[c]); // } double total = ArrayMath.logSum(sums); for (int c = 0; c < numClasses; c++) { probs[c] = Math.exp(sums[c] - total); } for (F feature : features) { for (int c = 0; c < numClasses; c++) { int i = indexOf(featureIndex.indexOf(feature), c); derivative[i] += probs[c]; } } int label = this.labelIndex.indexOf(datum.label()); double dV = sums[label] - total; value -= dV; } value += prior.compute(x, derivative); } public void calculateStochasticFiniteDifference(double[] x,double[] v, double h, int[] batch){ // THOUGHTS: // does applying the renormalization (g(x+hv)-g(x)) / h at each step along the way // introduce too much error to makes this method numerically accurate? // akleeman Feb 23 2007 // Answer to my own question: Feb 25th // Doesn't look like it!! With h = 1e-4 it seems like the Finite Difference makes almost // exactly the same step as the exact hessian vector product calculated through AD. // That said it's probably (in the case of the Log Conditional Objective function) logical // to only use finite difference. Unless of course the function is somehow nearly singular, // in which case finite difference could turn what is a convex problem into a singular proble... NOT GOOD. if (values != null) { rvfcalculate(x); return; } value = 0.0; if (priorDerivative == null) { priorDerivative = new double[x.length]; } double priorFactor = batch.length/(data.length*prior.getSigma()*prior.getSigma()); derivative = ArrayMath.multiply(x,priorFactor); HdotV = ArrayMath.multiply(v,priorFactor); //Arrays.fill(derivative, 0.0); double[] sums = new double[numClasses]; double[] sumsV = new double[numClasses]; double[] probs = new double[numClasses]; double[] probsV = new double[numClasses]; for (int m : batch) { //Sets the index based on the current batch int[] features = data[m]; // activation Arrays.fill(sums, 0.0); Arrays.fill(sumsV, 0.0); for (int c = 0; c < numClasses; c++) { for (int feature : features) { int i = indexOf(feature, c); sums[c] += x[i]; sumsV[c] += x[i] + h * v[i]; } } double total = ArrayMath.logSum(sums); double totalV = ArrayMath.logSum(sumsV); for (int c = 0; c < numClasses; c++) { probs[c] = Math.exp(sums[c] - total); probsV[c] = Math.exp(sumsV[c] - totalV); if (dataWeights != null) { probs[c] *= dataWeights[m]; probsV[c] *= dataWeights[m]; } for (int feature : features) { int i = indexOf(feature, c); //derivative[i] += (-1); derivative[i] += probs[c]; HdotV[i] += (probsV[c] - probs[c]) / h; if (c == labels[m]) { derivative[i] -= 1; } } } double dV = sums[labels[m]] - total; if (dataWeights != null) { dV *= dataWeights[m]; } value -= dV; } //Why was this being copied? -akleeman //double[] tmpDeriv = new double[derivative.length]; //System.arraycopy(derivative,0,tmpDeriv,0,derivative.length); value += ((double) batch.length)/((double) data.length)*prior.compute(x,priorDerivative); } public void calculateStochasticGradientLocal(double[] x, int[] batch) { if (values != null) { rvfcalculate(x); return; } value = 0.0; int batchSize = batch.length; if (priorDerivative == null) { priorDerivative = new double[x.length]; } double priorFactor = batchSize/(data.length*prior.getSigma()*prior.getSigma()); derivative = ArrayMath.multiply(x,priorFactor); //Arrays.fill(derivative, 0.0); double[] sums = new double[numClasses]; //double[] sumsV = new double[numClasses]; double[] probs = new double[numClasses]; //double[] probsV = new double[numClasses]; for (int m : batch) { //Sets the index based on the current batch int[] features = data[m]; // activation Arrays.fill(sums, 0.0); //Arrays.fill(sumsV,0.0); for (int c = 0; c < numClasses; c++) { for (int feature : features) { int i = indexOf(feature, c); sums[c] += x[i]; } } double total = ArrayMath.logSum(sums); //double totalV = ArrayMath.logSum(sumsV); for (int c = 0; c < numClasses; c++) { probs[c] = Math.exp(sums[c] - total); //probsV[c] = Math.exp(sumsV[c]- totalV); if (dataWeights != null) { probs[c] *= dataWeights[m]; //probsV[c] *= dataWeights[m]; } for (int feature : features) { int i = indexOf(feature, c); //derivative[i] += (-1); derivative[i] += probs[c]; if (c == labels[m]) { derivative[i] -= 1; } } } double dV = sums[labels[m]] - total; if (dataWeights != null) { dV *= dataWeights[m]; } value -= dV; } value += ((double) batchSize)/((double) data.length)*prior.compute(x,priorDerivative); } @Override public double valueAt(double[] x, double xscale, int[] batch) { value = 0.0; double[] sums = new double[numClasses]; for (int m : batch) { //Sets the index based on the current batch int[] features = data[m]; Arrays.fill(sums, 0.0); for (int c = 0; c < numClasses; c++) { for (int f = 0; f < features.length; f++) { int i = indexOf(features[f], c); if (values != null) { sums[c] += x[i] * xscale * values[m][f]; } else { sums[c] += x[i] * xscale; } } } double total = ArrayMath.logSum(sums); double dV = sums[labels[m]] - total; if (dataWeights != null) { dV *= dataWeights[m]; } value -= dV; } return value; } @Override public double calculateStochasticUpdate(double[] x, double xscale, int[] batch, double gain) { value = 0.0; double[] sums = new double[numClasses]; double[] probs = new double[numClasses]; for (int m : batch) { // Sets the index based on the current batch int[] features = data[m]; // activation Arrays.fill(sums, 0.0); for (int c = 0; c < numClasses; c++) { for (int f = 0; f < features.length; f++) { int i = indexOf(features[f], c); if (values != null) { sums[c] += x[i] * xscale * values[m][f]; } else { sums[c] += x[i] * xscale; } } } for (int f = 0; f < features.length; f++) { int i = indexOf(features[f], labels[m]); double v = (values != null) ? values[m][f] : 1; double delta = (dataWeights != null) ? dataWeights[m] * v : v; x[i] += delta * gain; } double total = ArrayMath.logSum(sums); for (int c = 0; c < numClasses; c++) { probs[c] = Math.exp(sums[c] - total); if (dataWeights != null) { probs[c] *= dataWeights[m]; } for (int f = 0; f < features.length; f++) { int i = indexOf(features[f], c); double v = (values != null) ? values[m][f] : 1; double delta = probs[c] * v; x[i] -= delta * gain; } } double dV = sums[labels[m]] - total; if (dataWeights != null) { dV *= dataWeights[m]; } value -= dV; } return value; } @Override public void calculateStochasticGradient(double[] x, int[] batch) { if (derivative == null) { derivative = new double[domainDimension()]; } Arrays.fill(derivative, 0.0); double[] sums = new double[numClasses]; double[] probs = new double[numClasses]; double[] counts = new double[numClasses]; Arrays.fill(counts, 0.0); for (int d : batch) { //Sets the index based on the current batch int[] features = data[d]; // activation Arrays.fill(sums, 0.0); for (int c = 0; c < numClasses; c++) { for (int feature : features) { int i = indexOf(feature, c); sums[c] += x[i]; } } // expectation (slower routine replaced by fast way) // double total = Double.NEGATIVE_INFINITY; // for (int c=0; c<numClasses; c++) { // total = SloppyMath.logAdd(total, sums[c]); // } double total = ArrayMath.logSum(sums); int ld = labels[d]; for (int c = 0; c < numClasses; c++) { probs[c] = Math.exp(sums[c] - total); for (int feature : features) { int i = indexOf(feature, c); derivative[i] += probs[ld] * probs[c]; } } // observed for (int feature : features) { int i = indexOf(feature, labels[d]); derivative[i] -= probs[ld]; } } } protected void calculateStochasticAlgorithmicDifferentiation(double[] x, double[] v, int[] batch) { System.err.print("*"); //Initialize value = 0.0; //initialize any variables DoubleAD[] derivativeAD = new DoubleAD[x.length]; for (int i = 0; i < x.length;i++) { derivativeAD[i] = new DoubleAD(0.0,0.0); } DoubleAD[] xAD = new DoubleAD[x.length]; for (int i = 0; i < x.length;i++){ xAD[i] = new DoubleAD(x[i],v[i]); } // Initialize the sums DoubleAD[] sums = new DoubleAD[numClasses]; for (int c = 0; c<numClasses;c++){ sums[c] = new DoubleAD(0,0); } DoubleAD[] probs = new DoubleAD[numClasses]; for (int c = 0; c<numClasses;c++) { probs[c] = new DoubleAD(0,0); } //long curTime = System.currentTimeMillis(); // Copy the Derivative numerator, and set up the vector V to be used for Hess*V for (int i = 0; i < x.length;i++){ xAD[i].set(x[i],v[i]); derivativeAD[i].set(0.0,0.0); } //System.err.print(System.currentTimeMillis() - curTime + " - "); //curTime = System.currentTimeMillis(); for (int d = 0; d <batch.length ; d++) { //Sets the index based on the current batch int m = (curElement + d) % data.length; int[] features = data[m]; for (int c = 0; c<numClasses;c++){ sums[c].set(0.0,0.0); } for (int c = 0; c < numClasses; c++) { for (int feature : features) { int i = indexOf(feature, c); sums[c] = ADMath.plus(sums[c], xAD[i]); } } DoubleAD total = ADMath.logSum(sums); for (int c = 0; c < numClasses; c++) { probs[c] = ADMath.exp( ADMath.minus(sums[c], total) ); if (dataWeights != null) { probs[c] = ADMath.multConst(probs[c], dataWeights[d]); } for (int feature : features) { int i = indexOf(feature, c); if (c == labels[m]) { derivativeAD[i].plusEqualsConst(-1.0); } derivativeAD[i].plusEquals(probs[c]); } } double dV = sums[labels[m]].getval() - total.getval(); if (dataWeights != null) { dV *= dataWeights[d]; } value -= dV; } // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! // DANGEROUS!!!!!!! Divide by Zero possible!!!!!!!!!! // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! // Need to modify the prior class to handle AD -akleeman //System.err.print(System.currentTimeMillis() - curTime + " - "); //curTime = System.currentTimeMillis(); double[] tmp = new double[x.length]; for(int i = 0; i < x.length; i++){ tmp[i] = derivativeAD[i].getval(); derivativeAD[i].plusEquals(ADMath.multConst(xAD[i], batch.length/(data.length * prior.getSigma()*prior.getSigma()))); derivative[i] = derivativeAD[i].getval(); HdotV[i] = derivativeAD[i].getdot(); } value += ((double) batch.length)/((double) data.length)*prior.compute(x, tmp); //System.err.print(System.currentTimeMillis() - curTime + " - "); //System.err.println(""); } /** * Calculate conditional likelihood for datasets with real-valued features. * Currently this can calculate CL only (no support for SCL). * TODO: sum-conditional obj. fun. with RVFs. */ protected void rvfcalculate(double[] x) { value = 0.0; if (derivativeNumerator == null) { derivativeNumerator = new double[x.length]; for (int d = 0; d < data.length; d++) { final int[] features = data[d]; final double[] vals = values[d]; for (int f = 0; f < features.length; f++) { int i = indexOf(features[f], labels[d]); if (dataWeights == null) { derivativeNumerator[i] -= vals[f]; } else { derivativeNumerator[i] -= dataWeights[d] * vals[f]; } } } } copy(derivative, derivativeNumerator); // Arrays.fill(derivative, 0.0); double[] sums = new double[numClasses]; double[] probs = new double[numClasses]; // double[] counts = new double[numClasses]; // Arrays.fill(counts, 0.0); for (int d = 0; d < data.length; d++) { final int[] features = data[d]; final double[] vals = values[d]; // activation Arrays.fill(sums, 0.0); for (int f = 0; f < features.length; f++) { final int feature = features[f]; final double val = vals[f]; for (int c = 0; c < numClasses; c++) { int i = indexOf(feature, c); sums[c] += x[i] * val; } } // expectation (slower routine replaced by fast way) // double total = Double.NEGATIVE_INFINITY; // for (int c=0; c<numClasses; c++) { // total = SloppyMath.logAdd(total, sums[c]); // } // it is faster to split these two loops. More striding double total = ArrayMath.logSum(sums); for (int c = 0; c < numClasses; c++) { probs[c] = Math.exp(sums[c] - total); if (dataWeights != null) { probs[c] *= dataWeights[d]; } } for (int f = 0; f < features.length; f++) { final int feature = features[f]; final double val = vals[f]; for (int c = 0; c < numClasses; c++) { int i = indexOf(feature, c); derivative[i] += probs[c] * val; } } double dV = sums[labels[d]] - total; if (dataWeights != null) { dV *= dataWeights[d]; } value -= dV; } value += prior.compute(x, derivative); } public LogConditionalObjectiveFunction(GeneralDataset<L, F> dataset) { this(dataset, new LogPrior(LogPrior.LogPriorType.QUADRATIC)); } public LogConditionalObjectiveFunction(GeneralDataset<L, F> dataset, LogPrior prior) { this(dataset, prior, false); } public LogConditionalObjectiveFunction(GeneralDataset<L, F> dataset, float[] dataWeights, LogPrior prior) { this(dataset, prior, false, dataWeights); } public LogConditionalObjectiveFunction(GeneralDataset<L, F> dataset, LogPrior prior, boolean useSumCondObjFun) { this(dataset, prior, useSumCondObjFun, null); } /** Version passing in a GeneralDataset, which may be binary or real-valued features. */ public LogConditionalObjectiveFunction(GeneralDataset<L, F> dataset, LogPrior prior, boolean useSumCondObjFun, float[] dataWeights) { this.prior = prior; this.useSummedConditionalLikelihood = useSumCondObjFun; this.numFeatures = dataset.numFeatures(); this.numClasses = dataset.numClasses(); this.data = dataset.getDataArray(); this.labels = dataset.getLabelsArray(); this.values = dataset.getValuesArray(); if (dataWeights != null) { this.dataWeights = dataWeights; } else if (dataset instanceof WeightedDataset<?,?>) { this.dataWeights = ((WeightedDataset<L, F>)dataset).getWeights(); } else { this.dataWeights = null; } this.labelIndex = null; this.featureIndex = null; this.dataIterable = null; } //TODO: test this [none of our code actually even uses it]. /** Version where an Iterable is passed in for the data. Doesn't support dataWeights. */ public LogConditionalObjectiveFunction(Iterable<Datum<L, F>> dataIterable, LogPrior logPrior, Index<F> featureIndex, Index<L> labelIndex) { this.prior = logPrior; this.useSummedConditionalLikelihood = false; this.numFeatures = featureIndex.size(); this.numClasses = labelIndex.size(); this.data = null; this.dataIterable = dataIterable; this.labelIndex = labelIndex; this.featureIndex = featureIndex; this.labels = null;//dataset.getLabelsArray(); this.values = null;//dataset.getValuesArray(); this.dataWeights = null; } public LogConditionalObjectiveFunction(int numFeatures, int numClasses, int[][] data, int[] labels, boolean useSumCondObjFun) { this(numFeatures, numClasses, data, labels, null, new LogPrior(LogPrior.LogPriorType.QUADRATIC), useSumCondObjFun); } public LogConditionalObjectiveFunction(int numFeatures, int numClasses, int[][] data, int[] labels) { this(numFeatures, numClasses, data, labels, new LogPrior(LogPrior.LogPriorType.QUADRATIC)); } public LogConditionalObjectiveFunction(int numFeatures, int numClasses, int[][] data, int[] labels, LogPrior prior) { this(numFeatures, numClasses, data, labels, null, prior); } public LogConditionalObjectiveFunction(int numFeatures, int numClasses, int[][] data, int[] labels, float[] dataWeights) { this(numFeatures, numClasses, data, labels, dataWeights, new LogPrior(LogPrior.LogPriorType.QUADRATIC)); } public LogConditionalObjectiveFunction(int numFeatures, int numClasses, int[][] data, int[] labels, float[] dataWeights, LogPrior prior) { this(numFeatures, numClasses, data, labels, dataWeights, prior, false); } /* For binary features. Supports dataWeights. */ public LogConditionalObjectiveFunction(int numFeatures, int numClasses, int[][] data, int[] labels, float[] dataWeights, LogPrior prior, boolean useSummedConditionalLikelihood) { this.numFeatures = numFeatures; this.numClasses = numClasses; this.data = data; this.values = null; this.labels = labels; this.prior = prior; this.dataWeights = dataWeights; this.labelIndex = null; this.featureIndex = null; this.dataIterable = null; this.useSummedConditionalLikelihood = useSummedConditionalLikelihood; } public LogConditionalObjectiveFunction(int numFeatures, int numClasses, int[][] data, int[] labels, int intPrior, double sigma, double epsilon) { this(numFeatures, numClasses, data, null, labels, intPrior, sigma, epsilon); } /** For real-valued features. Passing in processed data set. */ public LogConditionalObjectiveFunction(int numFeatures, int numClasses, int[][] data, double[][] values, int[] labels, int intPrior, double sigma, double epsilon) { this.numFeatures = numFeatures; this.numClasses = numClasses; this.data = data; this.values = values; this.labels = labels; this.prior = new LogPrior(intPrior, sigma, epsilon); this.labelIndex = null; this.featureIndex = null; this.dataIterable = null; this.useSummedConditionalLikelihood = false; this.dataWeights = null; } }