package edu.stanford.nlp.classify;
import edu.stanford.nlp.optimization.AbstractCachingDiffFunction;
import java.util.Arrays;
/**
* Maximizes the conditional likelihood with a given prior.
* Because the problem is binary, optimizations are possible that
* cannot be done in LogConditionalObjectiveFunction.
*
* @author Galen Andrew
*/
public class LogisticObjectiveFunction extends AbstractCachingDiffFunction {
private final int numFeatures;
private final int[][] data;
private final double[][] dataValues;
private final int[] labels;
protected float[] dataweights = null;
private final LogPrior prior;
@Override
public int domainDimension() {
return numFeatures;
}
@Override
protected void calculate(double[] x) {
if (dataValues != null) {
calculateRVF(x);
return;
}
value = 0.0;
Arrays.fill(derivative, 0.0);
for (int d = 0; d < data.length; d++) {
int[] features = data[d];
double sum = 0;
for (int feature1 : features) {
sum += x[feature1];
}
double expSum, derivativeIncrement;
if (labels[d] == 0) {
expSum = Math.exp(sum);
derivativeIncrement = 1.0 / (1.0 + (1.0 / expSum));
} else {
expSum = Math.exp(-sum);
derivativeIncrement = -1.0 / (1.0 + (1.0 / expSum));
}
if (dataweights == null) {
value += Math.log(1.0 + expSum);
} else {
value += Math.log(1.0 + expSum) * dataweights[d];
derivativeIncrement *= dataweights[d];
}
for (int feature : features) {
derivative[feature] += derivativeIncrement;
}
}
value += prior.compute(x, derivative);
}
protected void calculateRVF(double[] x) {
value = 0.0;
Arrays.fill(derivative, 0.0);
for (int d = 0; d < data.length; d++) {
int[] features = data[d];
double[] values = dataValues[d];
double sum = 0;
for (int f = 0; f < features.length; f++) {
sum += x[features[f]]*values[f];
}
double expSum, derivativeIncrement;
if (labels[d] == 0) {
expSum = Math.exp(sum);
derivativeIncrement = 1.0 / (1.0 + (1.0 / expSum));
} else {
expSum = Math.exp(-sum);
derivativeIncrement = -1.0 / (1.0 + (1.0 / expSum));
}
if (dataweights == null) {
value += Math.log(1.0 + expSum);
} else {
value += Math.log(1.0 + expSum) * dataweights[d];
derivativeIncrement *= dataweights[d];
}
for (int f = 0; f < features.length; f++) {
derivative[features[f]] += values[f]*derivativeIncrement;
}
}
value += prior.compute(x, derivative);
}
public LogisticObjectiveFunction(int numFeatures, int[][] data, int[] labels) {
this(numFeatures, data, labels, new LogPrior(LogPrior.LogPriorType.QUADRATIC));
}
public LogisticObjectiveFunction(int numFeatures, int[][] data, int[] labels, LogPrior prior) {
this(numFeatures, data, labels, prior, null);
}
public LogisticObjectiveFunction(int numFeatures, int[][] data, int[] labels, float[] dataweights) {
this(numFeatures, data, labels, new LogPrior(LogPrior.LogPriorType.QUADRATIC), dataweights);
}
public LogisticObjectiveFunction(int numFeatures, int[][] data, int[] labels, LogPrior prior, float[] dataweights) {
this(numFeatures, data, null, labels, prior, dataweights);
}
public LogisticObjectiveFunction(int numFeatures, int[][] data, double[][] values, int[] labels) {
this(numFeatures, data, values, labels, new LogPrior(LogPrior.LogPriorType.QUADRATIC));
}
public LogisticObjectiveFunction(int numFeatures, int[][] data, double[][] values, int[] labels, LogPrior prior) {
this(numFeatures, data, values, labels, prior, null);
}
public LogisticObjectiveFunction(int numFeatures, int[][] data, double[][] values, int[] labels, float[] dataweights) {
this(numFeatures, data, values, labels, new LogPrior(LogPrior.LogPriorType.QUADRATIC), dataweights);
}
public LogisticObjectiveFunction(int numFeatures, int[][] data, double[][] values, int[] labels, LogPrior prior, float[] dataweights) {
this.numFeatures = numFeatures;
this.data = data;
this.labels = labels;
this.prior = prior;
this.dataweights = dataweights;
this.dataValues = values;
}
}