package edu.stanford.nlp.sequences; import java.util.ArrayList; import java.util.List; import edu.stanford.nlp.ling.CoreAnnotations; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.stats.ClassicCounter; import edu.stanford.nlp.stats.Counter; import junit.framework.TestCase; /** Tests the any kind of IOB-style notation processing. * In particular, this tests the IOB encoding results counting. * * @author Christopher Manning * @author John Bauer */ public class IOBUtilsTest extends TestCase { private static final String[] words = {"Deportivo", "scored", "when", "AJ", "Auxerre", "playmaker", "Corentine", "Angelo", "Martins", "tripped", "on", "Brazilian-born", "Spanish", "Donato", "." }; private static final String[] iob1 = { "I-ORG", "O", "O", "I-ORG", "I-ORG", "O", "I-PER", "I-PER", "I-PER", "O", "O", "I-MISC", "B-MISC", "I-PER", "O" }; private static final String[] iob2 = { "B-ORG", "O", "O", "B-ORG", "I-ORG", "O", "B-PER", "I-PER", "I-PER", "O", "O", "B-MISC", "B-MISC", "B-PER", "O" }; private static final String[] iobes = { "S-ORG", "O", "O", "B-ORG", "E-ORG", "O", "B-PER", "I-PER", "E-PER", "O", "O", "S-MISC", "S-MISC", "S-PER", "O" }; private static final String[] io = { "I-ORG", "O", "O", "I-ORG", "I-ORG", "O", "I-PER", "I-PER", "I-PER", "O", "O", "I-MISC", "I-MISC", "I-PER", "O" }; private static final String[] noprefix = { "ORG", "O", "O", "ORG", "ORG", "O", "PER", "PER", "PER", "O", "O", "MISC", "MISC", "PER", "O" }; private static final String[] bilou = { "U-ORG", "O", "O", "B-ORG", "L-ORG", "O", "B-PER", "I-PER", "L-PER", "O", "O", "U-MISC", "U-MISC", "U-PER", "O" }; public void testIOB1IOB2() { List<CoreLabel> testInput = loadCoreLabelList(words, iob1); IOBUtils.entitySubclassify(testInput, CoreAnnotations.AnswerAnnotation.class, "O", "iob2", true); checkAnswers(testInput, words, iob2); } public void testIOB1IOB1() { List<CoreLabel> testInput = loadCoreLabelList(words, iob1); IOBUtils.entitySubclassify(testInput, CoreAnnotations.AnswerAnnotation.class, "O", "iob1", true); checkAnswers(testInput, words, iob1); } public void testIOB2IOB1() { List<CoreLabel> testInput = loadCoreLabelList(words, iob2); IOBUtils.entitySubclassify(testInput, CoreAnnotations.AnswerAnnotation.class, "O", "iob1", true); checkAnswers(testInput, words, iob1); } public void testIOB2IOBES() { List<CoreLabel> testInput = loadCoreLabelList(words, iob2); IOBUtils.entitySubclassify(testInput, CoreAnnotations.AnswerAnnotation.class, "O", "iobes", true); checkAnswers(testInput, words, iobes); } public void testIOBESIOB1() { List<CoreLabel> testInput = loadCoreLabelList(words, iobes); IOBUtils.entitySubclassify(testInput, CoreAnnotations.AnswerAnnotation.class, "O", "iob1", true); checkAnswers(testInput, words, iob1); } public void testIOB1IO() { List<CoreLabel> testInput = loadCoreLabelList(words, iob1); IOBUtils.entitySubclassify(testInput, CoreAnnotations.AnswerAnnotation.class, "O", "io", true); checkAnswers(testInput, words, io); } public void testIOB1NoPrefix() { List<CoreLabel> testInput = loadCoreLabelList(words, iob1); IOBUtils.entitySubclassify(testInput, CoreAnnotations.AnswerAnnotation.class, "O", "noprefix", true); checkAnswers(testInput, words, noprefix); } public void testNoPrefixIO() { List<CoreLabel> testInput = loadCoreLabelList(words, noprefix); IOBUtils.entitySubclassify(testInput, CoreAnnotations.AnswerAnnotation.class, "O", "io", true); checkAnswers(testInput, words, io); } public void testBILOUIOBES() { List<CoreLabel> testInput = loadCoreLabelList(words, bilou); IOBUtils.entitySubclassify(testInput, CoreAnnotations.AnswerAnnotation.class, "O", "iobes", true); checkAnswers(testInput, words, iobes); } public void testIOB2BILOU() { List<CoreLabel> testInput = loadCoreLabelList(words, iob2); IOBUtils.entitySubclassify(testInput, CoreAnnotations.AnswerAnnotation.class, "O", "BILOU", true); checkAnswers(testInput, words, bilou); } private static List<CoreLabel> loadCoreLabelList(String[] words, String[] answers) { List<CoreLabel> testInput = new ArrayList<>(); String[] fields = { "word", "answer"}; String[] values = new String[2]; assertEquals(words.length, answers.length); for (int i = 0; i < words.length; i++) { values[0] = words[i]; values[1] = answers[i]; CoreLabel c = new CoreLabel(fields, values); testInput.add(c); } return testInput; } private static void checkAnswers(List<CoreLabel> testInput, String[] words, String[] answers) { for (int i = 0; i < testInput.size(); i++) { assertEquals("Wrong for " + words[i], answers[i], testInput.get(i).get(CoreAnnotations.AnswerAnnotation.class)); } } private static final String BG = "O"; private static final String[][] labelsIOB2 = { { BG, BG, BG, BG, BG, BG, BG, BG, BG, BG }, // 0 { BG, BG, BG, BG, "I-A", BG, BG, BG, BG, BG }, // 1 { BG, BG, BG, BG, "I-A", "I-A", BG, BG, BG, BG }, // 2 { BG, BG, BG, "I-A", "I-A", BG, BG, BG, BG, BG }, // 3 { BG, BG, BG, BG, "I-A", "I-B", BG, BG, BG, BG }, // 4 { BG, BG, BG, BG, "I-A", "B-A", BG, BG, BG, BG }, // 5 { BG, BG, BG, BG, BG, BG, BG, BG, BG, "I-A" }, // 6 }; private static void runIOBResultsTest(String[] gold, String[] guess, double tp, double fp, double fn) { List<CoreLabel> sentence = makeListCoreLabel(gold, guess); Counter<String> entityTP = new ClassicCounter<>(); Counter<String> entityFP = new ClassicCounter<>(); Counter<String> entityFN = new ClassicCounter<>(); IOBUtils.countEntityResults(sentence, entityTP, entityFP, entityFN, BG); assertEquals("For true positives", tp, entityTP.totalCount(), 0.0001); assertEquals("For false positives", fp, entityFP.totalCount(), 0.0001); assertEquals("For false negatives", fn, entityFN.totalCount(), 0.0001); } private static List<CoreLabel> makeListCoreLabel(String[] gold, String[] guess) { assertEquals("Cannot run test on lists of different length", gold.length, guess.length); List<CoreLabel> sentence = new ArrayList<>(); for (int i = 0; i < gold.length; ++i) { CoreLabel word = new CoreLabel(); word.set(CoreAnnotations.GoldAnswerAnnotation.class, gold[i]); word.set(CoreAnnotations.AnswerAnnotation.class, guess[i]); sentence.add(word); } return sentence; } public void testIOB2Results() { runIOBResultsTest(labelsIOB2[0], labelsIOB2[0], 0, 0, 0); runIOBResultsTest(labelsIOB2[0], labelsIOB2[1], 0, 1, 0); runIOBResultsTest(labelsIOB2[1], labelsIOB2[0], 0, 0, 1); runIOBResultsTest(labelsIOB2[1], labelsIOB2[1], 1, 0, 0); runIOBResultsTest(labelsIOB2[0], labelsIOB2[2], 0, 1, 0); runIOBResultsTest(labelsIOB2[2], labelsIOB2[0], 0, 0, 1); runIOBResultsTest(labelsIOB2[1], labelsIOB2[2], 0, 1, 1); runIOBResultsTest(labelsIOB2[2], labelsIOB2[1], 0, 1, 1); runIOBResultsTest(labelsIOB2[2], labelsIOB2[2], 1, 0, 0); runIOBResultsTest(labelsIOB2[0], labelsIOB2[3], 0, 1, 0); runIOBResultsTest(labelsIOB2[3], labelsIOB2[0], 0, 0, 1); runIOBResultsTest(labelsIOB2[1], labelsIOB2[3], 0, 1, 1); runIOBResultsTest(labelsIOB2[3], labelsIOB2[1], 0, 1, 1); runIOBResultsTest(labelsIOB2[2], labelsIOB2[3], 0, 1, 1); runIOBResultsTest(labelsIOB2[3], labelsIOB2[2], 0, 1, 1); runIOBResultsTest(labelsIOB2[3], labelsIOB2[3], 1, 0, 0); runIOBResultsTest(labelsIOB2[0], labelsIOB2[4], 0, 2, 0); runIOBResultsTest(labelsIOB2[4], labelsIOB2[0], 0, 0, 2); runIOBResultsTest(labelsIOB2[1], labelsIOB2[4], 1, 1, 0); runIOBResultsTest(labelsIOB2[4], labelsIOB2[1], 1, 0, 1); runIOBResultsTest(labelsIOB2[2], labelsIOB2[4], 0, 2, 1); runIOBResultsTest(labelsIOB2[4], labelsIOB2[2], 0, 1, 2); runIOBResultsTest(labelsIOB2[3], labelsIOB2[4], 0, 2, 1); runIOBResultsTest(labelsIOB2[4], labelsIOB2[3], 0, 1, 2); runIOBResultsTest(labelsIOB2[4], labelsIOB2[4], 2, 0, 0); runIOBResultsTest(labelsIOB2[0], labelsIOB2[5], 0, 2, 0); runIOBResultsTest(labelsIOB2[5], labelsIOB2[0], 0, 0, 2); runIOBResultsTest(labelsIOB2[1], labelsIOB2[5], 1, 1, 0); runIOBResultsTest(labelsIOB2[5], labelsIOB2[1], 1, 0, 1); runIOBResultsTest(labelsIOB2[2], labelsIOB2[5], 0, 2, 1); runIOBResultsTest(labelsIOB2[5], labelsIOB2[2], 0, 1, 2); runIOBResultsTest(labelsIOB2[3], labelsIOB2[5], 0, 2, 1); runIOBResultsTest(labelsIOB2[5], labelsIOB2[3], 0, 1, 2); runIOBResultsTest(labelsIOB2[4], labelsIOB2[5], 1, 1, 1); runIOBResultsTest(labelsIOB2[5], labelsIOB2[4], 1, 1, 1); runIOBResultsTest(labelsIOB2[5], labelsIOB2[5], 2, 0, 0); runIOBResultsTest(labelsIOB2[0], labelsIOB2[6], 0, 1, 0); runIOBResultsTest(labelsIOB2[6], labelsIOB2[0], 0, 0, 1); runIOBResultsTest(labelsIOB2[1], labelsIOB2[6], 0, 1, 1); runIOBResultsTest(labelsIOB2[6], labelsIOB2[1], 0, 1, 1); runIOBResultsTest(labelsIOB2[2], labelsIOB2[6], 0, 1, 1); runIOBResultsTest(labelsIOB2[6], labelsIOB2[2], 0, 1, 1); runIOBResultsTest(labelsIOB2[3], labelsIOB2[6], 0, 1, 1); runIOBResultsTest(labelsIOB2[6], labelsIOB2[3], 0, 1, 1); runIOBResultsTest(labelsIOB2[4], labelsIOB2[6], 0, 1, 2); runIOBResultsTest(labelsIOB2[6], labelsIOB2[4], 0, 2, 1); runIOBResultsTest(labelsIOB2[5], labelsIOB2[6], 0, 1, 2); runIOBResultsTest(labelsIOB2[6], labelsIOB2[5], 0, 2, 1); runIOBResultsTest(labelsIOB2[6], labelsIOB2[6], 1, 0, 0); } private static final String[][] labelsIOB = { { BG, BG, BG, BG, BG, BG, BG, BG, BG, BG }, // 0 { BG, BG, BG, BG, "B-A", BG, BG, BG, BG, BG }, // 1 { BG, BG, BG, BG, "B-A", "I-A", BG, BG, BG, BG }, // 2 { BG, BG, BG, "B-A", "I-A", BG, BG, BG, BG, BG }, // 3 { BG, BG, BG, BG, "B-A", "B-A", BG, BG, BG, BG }, // 4 }; public void testIOBResults() { // gold, guess, tp, fp, fn runIOBResultsTest(labelsIOB[0], labelsIOB[0], 0, 0, 0); runIOBResultsTest(labelsIOB[0], labelsIOB[1], 0, 1, 0); runIOBResultsTest(labelsIOB[1], labelsIOB[0], 0, 0, 1); runIOBResultsTest(labelsIOB[1], labelsIOB[1], 1, 0, 0); runIOBResultsTest(labelsIOB[0], labelsIOB[2], 0, 1, 0); runIOBResultsTest(labelsIOB[2], labelsIOB[0], 0, 0, 1); runIOBResultsTest(labelsIOB[2], labelsIOB[2], 1, 0, 0); runIOBResultsTest(labelsIOB[0], labelsIOB[3], 0, 1, 0); runIOBResultsTest(labelsIOB[3], labelsIOB[0], 0, 0, 1); runIOBResultsTest(labelsIOB[1], labelsIOB[3], 0, 1, 1); runIOBResultsTest(labelsIOB[3], labelsIOB[1], 0, 1, 1); runIOBResultsTest(labelsIOB[2], labelsIOB[3], 0, 1, 1); runIOBResultsTest(labelsIOB[3], labelsIOB[2], 0, 1, 1); runIOBResultsTest(labelsIOB[3], labelsIOB[3], 1, 0, 0); runIOBResultsTest(labelsIOB[2], labelsIOB[4], 0, 2, 1); runIOBResultsTest(labelsIOB[4], labelsIOB[2], 0, 1, 2); } private static final String[][] labelsIOE = { { BG, BG, BG, BG, "I-A", "E-A", "I-A", BG, BG, BG }, // 0 { BG, BG, BG, BG, "I-A", "L-A", "I-A", BG, BG, BG }, // 1 { BG, BG, BG, BG, "I-A", "I-A", "I-A", BG, BG, BG }, // 2 }; public void testIOEResults() { // gold, guess, tp, fp, fn runIOBResultsTest(labelsIOE[0], labelsIOE[1], 2, 0, 0); runIOBResultsTest(labelsIOE[0], labelsIOE[2], 0, 1, 2); runIOBResultsTest(labelsIOE[2], labelsIOE[0], 0, 2, 1); runIOBResultsTest(labelsIOE[0], labelsIOB[2], 1, 0, 1); } private static final String[][] labelsIO = { { BG, BG, BG, BG, BG, BG, BG, BG, BG, BG }, // 0 { BG, BG, BG, BG, "I-A", BG, BG, BG, BG, BG }, // 1 { BG, BG, BG, BG, "I-A", "I-A", BG, BG, BG, BG }, // 2 { BG, BG, BG, "I-A", "I-A", BG, BG, BG, BG, BG }, // 3 { BG, BG, BG, "I-A", "I-A", "I-A", "I-A", BG, BG, BG }, // 4 { BG, BG, BG, "I-A", "I-B", "I-B", "I-A", BG, BG, BG }, // 5 { BG, BG, BG, "I-A", "I-A", "I-B", "I-A", BG, BG, BG }, // 6 }; public void testIOResults() { // gold, guess, tp, fp, fn runIOBResultsTest(labelsIOB[2], labelsIO[2], 1, 0, 0); runIOBResultsTest(labelsIOB[4], labelsIO[2], 0, 1, 2); runIOBResultsTest(labelsIO[2], labelsIOB[2], 1, 0, 0); runIOBResultsTest(labelsIO[2], labelsIOB[4], 0, 2, 1); runIOBResultsTest(labelsIO[4], labelsIO[5], 0, 3, 1); runIOBResultsTest(labelsIO[4], labelsIO[6], 0, 3, 1); runIOBResultsTest(labelsIO[5], labelsIO[6], 1, 2, 2); } private static final String[][] labelsIOBES = { { BG, BG, BG, "B-A", "E-A", BG, BG, BG, BG, BG }, // 0 { BG, BG, BG, "B-A", "L-A", BG, BG, BG, BG, BG }, // 1 { BG, BG, BG, "B-A", "I-A", "I-A", "E-A", BG, BG, BG }, // 2 { BG, BG, BG, "B-A", "I-A", "I-A", "L-A", BG, BG, BG }, // 3 { BG, BG, BG, "B-A", "L-A", "U-A", "U-A", BG, BG, BG }, // 4 }; public void testIOBESResults() { // gold, guess, tp, fp, fn runIOBResultsTest(labelsIOBES[0], labelsIOBES[1], 1, 0, 0); runIOBResultsTest(labelsIOBES[4], labelsIOBES[0], 1, 0, 2); runIOBResultsTest(labelsIOBES[2], labelsIOBES[3], 1, 0, 0); runIOBResultsTest(labelsIOBES[2], labelsIOBES[4], 0, 3, 1); } }