// CRFClassifier -- a probabilistic (CRF) sequence model, mainly used for NER.
// Copyright (c) 2002-2008 The Board of Trustees of
// The Leland Stanford Junior University. All Rights Reserved.
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
// For more information, bug reports, fixes, contact:
// Christopher Manning
// Dept of Computer Science, Gates 1A
// Stanford CA 94305-9010
// USA
// Support/Questions: java-nlp-user@lists.stanford.edu
// Licensing: java-nlp-support@lists.stanford.edu
package edu.stanford.nlp.ie.crf;
import edu.stanford.nlp.util.logging.Redwood;
import edu.stanford.nlp.sequences.*;
import edu.stanford.nlp.util.*;
import java.io.*;
/**
* Subclass of CRFClassifier for modeling noisy label
* @author Mengqiu Wang
*/
public class CRFClassifierNoisyLabel<IN extends CoreMap> extends CRFClassifier<IN> {
/** A logger for this class */
private static Redwood.RedwoodChannels log = Redwood.channels(CRFClassifierNoisyLabel.class);
protected double[][] errorMatrix;
public CRFClassifierNoisyLabel(SeqClassifierFlags flags) {
super(flags);
}
static double[][] readErrorMatrix(String fileName, Index<String> tagIndex, boolean useLogProb) {
int numTags = tagIndex.size();
int matrixSize = numTags;
String[] matrixLines = new String[matrixSize];
try {
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(new File(fileName))));
String line = null;
int lineCount = 0;
while ((line = br.readLine()) != null) {
line = line.trim();
matrixLines[lineCount] = line;
lineCount++;
}
} catch (Exception ex) {
ex.printStackTrace();
System.exit(-1);
}
double[][] matrix = parseMatrix(matrixLines, tagIndex, matrixSize, false, useLogProb);
log.info("Error Matrix P(Observed|Truth): ");
log.info(ArrayUtils.toString(matrix));
return matrix;
}
protected CRFLogConditionalObjectiveFunction getObjectiveFunction(int[][][][] data, int[][] labels) {
if (errorMatrix == null) {
if (flags.errorMatrix != null ) {
if (tagIndex == null) {
loadTagIndex();
}
errorMatrix = readErrorMatrix(flags.errorMatrix, tagIndex, true);
}
}
return new CRFLogConditionalObjectiveFunctionNoisyLabel(data, labels, windowSize, classIndex,
labelIndices, map, flags.priorType, flags.backgroundSymbol, flags.sigma, null, flags.multiThreadGrad, errorMatrix);
}
} // end class CRFClassifier