package LBJ2.nlp.seg; import java.util.Vector; import LBJ2.classify.Classifier; import LBJ2.classify.TestDiscrete; import LBJ2.parse.Parser; import LBJ2.util.ClassUtils; /** * This class may be used to produce a detailed report of the <i>segment by * segment</i> performance of a given classifier on given labeled testing * data. Segment by segment performance is computed by using a specified * {@link Token} classifier to induce the predicted segments, and then * computing precision, recall, and F<sub>1</sub> measures on those segments. * A predicted segment is judged as different than a labeled segment if the * two segments start or end at different {@link Token}s, or if they have * different types. * * <p> It is assumed that both of the specified {@link Token} classifiers * (one giving labels and the other giving predictions) produce discrete * predicitions of the form <code>B-<i>type</i></code>, * <code>I-<i>type</i></code>, and <code>O</code> to represent the beginning * of a segment of type <i>type</i>, a token inside a segment of type * <i>type</i>, and a token outside of any segment respectively. * * <p> It is also assumed that the specified {@link LBJ2.parse.Parser} * produces {@link Token}s linked to each other via the <code>previous</code> * and <code>next</code> fields inherited from * {@link LBJ2.parse.LinkedChild}. In order to invoke this class as a * program on the command line, it must also be the case that the parser * implements a constructor with a single <code>String</code> argument. * * <h4>Command Line Usage</h4> * <blockquote><code> * java LBJ2.nlp.seg.BIOTester <classifier> <labeler> * <parser> <test file> * </code></blockquote> * * <h3>Input</h3> * The first three arguments must be fully qualified class names. The fourth * is the name of a file containing labeled testing data to be parsed by the * parser. * * <h3>Output</h3> * The output is generated by the {@link LBJ2.classify.TestDiscrete} class. * * @author Nick Rizzolo **/ public class BIOTester { /** A BIO classifier that classifies {@link Token}s. */ protected Classifier classifier; /** A BIO classifier that produces the true labels of the {@link Token}s. */ protected Classifier labeler; /** A parser that produces {@link Token}s. */ protected Parser parser; /** * Initializing constructor. * * @param c The value for {@link #classifier}. * @param l The value for {@link #labeler}. * @param p The value for {@link #parser}. **/ public BIOTester(Classifier c, Classifier l, Parser p) { classifier = c; labeler = l; parser = p; } /** * This method runs the tester, packaging the results in a * <code>TestDiscrete</code> object. * * @return The performance results. **/ public TestDiscrete test() { TestDiscrete results = new TestDiscrete(); results.addNull("O"); for (Token t = (Token) parser.next(); t != null; t = (Token) parser.next()) { Vector vector = new Vector(); for (; t.next != null; t = (Token) parser.next()) vector.add(t); vector.add(t); int N = vector.size(); String[] predictions = new String[N], labels = new String[N]; for (int i = 0; i < N; ++i) { predictions[i] = classifier.discreteValue(vector.get(i)); labels[i] = labeler.discreteValue(vector.get(i)); } for (int i = 0; i < N; ++i) { String p = "O", l = "O"; int pEnd = -1, lEnd = -1; if (predictions[i].startsWith("B-") || predictions[i].startsWith("I-") && (i == 0 || !predictions[i - 1] .endsWith(predictions[i].substring(2)))) { p = predictions[i].substring(2); pEnd = i; while (pEnd + 1 < N && predictions[pEnd + 1].equals("I-" + p)) ++pEnd; } if (labels[i].startsWith("B-") || labels[i].startsWith("I-") && (i == 0 || !labels[i - 1].endsWith(labels[i].substring(2)))) { l = labels[i].substring(2); lEnd = i; while (lEnd + 1 < N && labels[lEnd + 1].equals("I-" + l)) ++lEnd; } if (!p.equals("O") || !l.equals("O")) { if (pEnd == lEnd) results.reportPrediction(p, l); else { if (!p.equals("O")) results.reportPrediction(p, "O"); if (!l.equals("O")) results.reportPrediction("O", l); } } } } return results; } /** * The command line program simply instantiates an object of this class and * calls its {@link #test()} method. **/ public static void main(String[] args) { String classifierName = null; String labelerName = null; String parserName = null; String inputFile = null; try { classifierName = args[0]; labelerName = args[1]; parserName = args[2]; inputFile = args[3]; if (args.length > 4) throw new Exception(); } catch (Exception e) { System.err.println( "usage: java LBJ2.nlp.seg.BIOTester <classifier> <labeler> <parser> <test file>"); System.exit(1); } Classifier classifier = ClassUtils.getClassifier(classifierName); Classifier labeler = ClassUtils.getClassifier(labelerName); Parser parser = ClassUtils.getParser(parserName, new Class[]{ String.class }, new String[]{ inputFile }); new BIOTester(classifier, labeler, parser) .test().printPerformance(System.out); } }