package edu.stanford.nlp.classify;
import edu.stanford.nlp.math.ArrayMath;
import edu.stanford.nlp.optimization.AbstractCachingDiffFunction;
import java.util.Arrays;
/**
* Maximizes the conditional likelihood with a given prior.
*
* @author Jenny Finkel
*/
public class BiasedLogConditionalObjectiveFunction extends AbstractCachingDiffFunction {
public void setPrior(LogPrior prior) {
this.prior = prior;
}
protected LogPrior prior;
protected int numFeatures = 0;
protected int numClasses = 0;
protected int[][] data = null;
protected int[] labels = null;
private double[][] confusionMatrix;
@Override
public int domainDimension() {
return numFeatures * numClasses;
}
int classOf(int index) {
return index % numClasses;
}
int featureOf(int index) {
return index / numClasses;
}
protected int indexOf(int f, int c) {
return f * numClasses + c;
}
public double[][] to2D(double[] x) {
double[][] x2 = new double[numFeatures][numClasses];
for (int i = 0; i < numFeatures; i++) {
for (int j = 0; j < numClasses; j++) {
x2[i][j] = x[indexOf(i, j)];
}
}
return x2;
}
@Override
protected void calculate(double[] x) {
if (derivative == null) {
derivative = new double[x.length];
} else {
Arrays.fill(derivative, 0.0);
}
value = 0.0;
double[] sums = new double[numClasses];
double[] probs = new double[numClasses];
double[] weightedProbs = new double[numClasses];
for (int d = 0; d < data.length; d++) {
int[] features = data[d];
int observedLabel = labels[d];
// activation
Arrays.fill(sums, 0.0);
for (int c = 0; c < numClasses; c++) {
for (int feature : features) {
int i = indexOf(feature, c);
sums[c] += x[i];
}
}
double total = ArrayMath.logSum(sums);
double[] weightedSums = new double[numClasses];
for (int trueLabel = 0; trueLabel < numClasses; trueLabel++) {
weightedSums[trueLabel] = Math.log(confusionMatrix[observedLabel][trueLabel]) + sums[trueLabel];
}
double weightedTotal = ArrayMath.logSum(weightedSums);
for (int c = 0; c < numClasses; c++) {
probs[c] = Math.exp(sums[c] - total);
weightedProbs[c] = Math.exp(weightedSums[c] - weightedTotal);
for (int feature : features) {
int i = indexOf(feature, c);
derivative[i] += probs[c] - weightedProbs[c];
}
}
double tmpValue = 0.0;
for (int c = 0; c < numClasses; c++) {
tmpValue += confusionMatrix[observedLabel][c] * Math.exp(sums[c] - total);
}
value -= Math.log(tmpValue);
}
value += prior.compute(x, derivative);
}
public BiasedLogConditionalObjectiveFunction(GeneralDataset<?, ?> dataset, double[][] confusionMatrix) {
this(dataset, confusionMatrix, new LogPrior(LogPrior.LogPriorType.QUADRATIC));
}
public BiasedLogConditionalObjectiveFunction(GeneralDataset<?, ?> dataset, double[][] confusionMatrix, LogPrior prior) {
this(dataset.numFeatures(), dataset.numClasses(), dataset.getDataArray(), dataset.getLabelsArray(), confusionMatrix, prior);
}
public BiasedLogConditionalObjectiveFunction(int numFeatures, int numClasses, int[][] data, int[] labels, double[][] confusionMatrix) {
this(numFeatures, numClasses, data, labels, confusionMatrix, new LogPrior(LogPrior.LogPriorType.QUADRATIC));
}
public BiasedLogConditionalObjectiveFunction(int numFeatures, int numClasses, int[][] data, int[] labels, double[][] confusionMatrix, LogPrior prior) {
this.numFeatures = numFeatures;
this.numClasses = numClasses;
this.data = data;
this.labels = labels;
this.prior = prior;
this.confusionMatrix = confusionMatrix;
}
}