package edu.stanford.nlp.ie.crf;
import edu.stanford.nlp.math.ArrayMath;
import edu.stanford.nlp.sequences.SeqClassifierFlags;
/**
* @author Mengqiu Wang
*/
public class NonLinearCliquePotentialFunction implements CliquePotentialFunction {
private final double[][] linearWeights;
private final double[][] inputLayerWeights; // first index is number of hidden units in layer one, second index is the input feature indices
private final double[][] outputLayerWeights; // first index is the output class, second index is the number of hidden units
private final SeqClassifierFlags flags;
private double[] layerOneCache, hiddenLayerCache;
private static double sigmoid(double x) {
return 1 / (1 + Math.exp(-x));
}
public NonLinearCliquePotentialFunction(double[][] linearWeights, double[][] inputLayerWeights, double[][] outputLayerWeights, SeqClassifierFlags flags) {
this.linearWeights = linearWeights;
this.inputLayerWeights = inputLayerWeights;
this.outputLayerWeights = outputLayerWeights;
this.flags = flags;
}
public double[] hiddenLayerOutput(double[][] inputLayerWeights, int[] nodeCliqueFeatures, SeqClassifierFlags aFlag, double[] featureVal) {
int layerOneSize = inputLayerWeights.length;
if (layerOneCache == null || layerOneSize != layerOneCache.length)
layerOneCache = new double[layerOneSize];
for (int i = 0; i < layerOneSize; i++) {
double[] ws = inputLayerWeights[i];
double lOneW = 0;
for (int m = 0; m < nodeCliqueFeatures.length; m++) {
double dotProd = ws[nodeCliqueFeatures[m]];
if (featureVal != null)
dotProd *= featureVal[m];
lOneW += dotProd;
}
layerOneCache[i] = lOneW;
}
if (!aFlag.useHiddenLayer)
return layerOneCache;
// transform layer one through hidden
if (hiddenLayerCache == null || layerOneSize != hiddenLayerCache.length)
hiddenLayerCache = new double[layerOneSize];
for (int i = 0; i < layerOneSize; i++) {
if (aFlag.useSigmoid) {
hiddenLayerCache[i] = sigmoid(layerOneCache[i]);
} else {
hiddenLayerCache[i] = Math.tanh(layerOneCache[i]);
}
}
return hiddenLayerCache;
}
@Override
public double computeCliquePotential(int cliqueSize, int labelIndex,
int[] cliqueFeatures, double[] featureVal, int posInSent) {
double output = 0.0;
if (cliqueSize > 1) { // linear potential for edge cliques
for (int cliqueFeature : cliqueFeatures) {
output += linearWeights[cliqueFeature][labelIndex];
}
} else { // non-linear potential for node cliques
double[] hiddenLayer = hiddenLayerOutput(inputLayerWeights, cliqueFeatures, flags, featureVal);
int outputLayerSize = inputLayerWeights.length / outputLayerWeights[0].length;
// transform the hidden layer to output layer through linear transformation
if (flags.useOutputLayer) {
double[] outputWs; // initialized immediately below
if (flags.tieOutputLayer) {
outputWs = outputLayerWeights[0];
} else {
outputWs = outputLayerWeights[labelIndex];
}
if (flags.softmaxOutputLayer) {
outputWs = ArrayMath.softmax(outputWs);
}
for (int i = 0; i < inputLayerWeights.length; i++) {
if (flags.sparseOutputLayer || flags.tieOutputLayer) {
if (i % outputLayerSize == labelIndex) {
output += outputWs[ i / outputLayerSize ] * hiddenLayer[i];
}
} else {
output += outputWs[i] * hiddenLayer[i];
}
}
} else {
output = hiddenLayer[labelIndex];
}
}
return output;
}
}