package edu.stanford.nlp.ie; import java.util.HashSet; import java.util.List; import java.util.Set; import edu.stanford.nlp.ling.CoreAnnotations; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.ling.CoreUtilities; import junit.framework.TestCase; /** @author Christopher Manning */ public class ClassifierCombinerTest extends TestCase { String[] words = { "Joe", "Smith", "drank", "44", "Budweiser", "cans", "at", "Monaco", "Brewing", "." }; String[] tags = { "NNP", "NNP", "VBD", "CD", "NNP", "NNS", "IN", "NNP", "NNP", "." }; String[] ans1 = { "PER", "PER", "O", "O", "ORG", "O", "O", "ORG", "ORG", "O" }; String[] ans2 = { "O", "O", "O", "NUM", "O", "O", "O", "O", "O", "O" }; String[] ans3 = { "O", "O", "O", "NUM", "PROD", "PROD", "O", "O", "O", "O" }; String[] ans4 = { "PER", "PER", "O", "O", "O", "O", "O", "O", "O", "O" }; String[] ans5 = { "O", "O", "O", "NUM", "PROD", "PROD", "O", "ORG", "ORG", "ORG" }; String[] ans6 = { "O", "O", "O", "O", "O", "O", "O", "O", "O", "O" }; String[] ans7 = { "PER", "PER", "O", "NUM", "PROD", "PROD", "O", "ORG", "ORG", "ORG" }; String[] ans8 = { "O", "O", "O", "PROD", "PROD", "O", "O", "O", "O", "O" }; String[] ans9 = { "O", "O", "O", "O", "O", "O", "O", "O", "O", "NUM" }; String[] ans10 = { "O", "O", "O", "O", "O", "O", "O", "O", "NUM", "NUM" }; String[] ans11 = { "O", "O", "O", "O", "PROD", "PROD", "O", "O", "O", "O" }; String[] ans12 = { "O", "O", "O", "O", "O", "O", "O", "O", "NUM", "NUM" }; String[] ans13 = { "O", "O", "O", "O", "O", "O", "NUM", "NUM", "O", "O" }; String[] ans14 = { "O", "O", "O", "O", "O", "O", "FOO", "FOO", "O", "O" }; String[] ans15 = { "O", "O", "PER", "PER", "O", "O", "O", "O", "O", "O" }; String[] ans16 = { "O", "O", "FOO", "FOO", "O", "O", "O", "O", "O", "O" }; String[] out1 = { "PER", "PER", "O", "NUM", "ORG", "O", "O", "ORG", "ORG", "O" }; String[] out2 = { "PER", "PER", "O", "NUM", "PROD", "PROD", "O", "ORG", "ORG", "ORG" }; String[] out3 = { "O", "O", "O", "NUM", "PROD", "PROD", "O", "ORG", "ORG", "ORG" }; String[] out4 = { "O", "O", "O", "NUM", "O", "O", "O", "O", "O", "NUM" }; String[] out5 = { "O", "O", "O", "NUM", "O", "O", "O", "O", "NUM", "NUM" }; String[] out6 = { "O", "O", "O", "O", "O", "O", "NUM", "NUM", "NUM", "NUM" }; String[] out7 = { "O", "O", "O", "O", "O", "O", "FOO", "FOO", "NUM", "NUM" }; String[] out8 = { "PER", "PER", "PER", "PER", "O", "O", "O", "O", "O", "O" }; String[] out9 = { "PER", "PER", "FOO", "FOO", "O", "O", "O", "O", "O", "O" }; String[] out10 = { "PER", "PER", "O", "NUM", "PROD", "PROD", "O", "O", "O", "O" }; public void testCombination() { // test that a non-conflicting label can be added runTest(ans1, ans2, out1, "NUM"); // test that a conflicting label isn't added runTest(ans1, ans3, out1, "NUM", "PROD"); // test that a sequence final label is added (didn't used to work...) runTest(ans4, ans5, out2, "NUM", "PROD", "ORG"); runTest(ans5, ans4, out2, "PER"); // test that a label not in the auxLabels set isn't added runTest(ans6, ans7, out3, "NUM", "PROD", "ORG"); // test that a sequence initial label is added runTest(ans6, ans7, out2, "NUM", "PROD", "ORG", "PER"); // test that a label segment that conflicts later on isn't added runTest(ans1, ans8, ans1, "NUM", "PROD", "ORG", "PER"); // Test that labels that are already in the first sequence are // still added if they are present in later sequences runTest(ans2, ans9, out4, "NUM"); runTest(ans9, ans2, out4, "NUM"); runTest(ans2, ans10, out5, "NUM"); runTest(ans10, ans2, out5, "NUM"); // Test neighbors overlapping runTest(ans8, ans11, ans8, "PROD"); runTest(ans11, ans8, ans11, "PROD"); // Test non-overlapping neighbors at the end of a sequence runTest(ans12, ans13, out6, "NUM"); runTest(ans13, ans12, out6, "NUM"); runTest(ans12, ans14, out7, "FOO"); runTest(ans14, ans12, out7, "NUM"); // Test non-overlapping neighbors at the start of a sequence runTest(ans4, ans15, out8, "PER"); runTest(ans15, ans4, out8, "PER"); runTest(ans4, ans16, out9, "FOO"); runTest(ans16, ans4, out9, "PER"); // test consecutive labels runTest(ans3, ans4, out10, "PER", "NUM", "PROD"); // test consecutive labels runTest(ans4, ans3, out10, "PER", "NUM", "PROD"); // test a label that conflicted with a main label, followed by a // label that doesn't conflict runTest(ans2, ans3, ans3, "NUM", "PROD"); } public void outputResults(String[] firstInput, String[] secondInput, String[] expectedOutput, String ... labels) { List<CoreLabel> input1 = CoreUtilities.toCoreLabelList(words, tags, firstInput); List<CoreLabel> input2 = CoreUtilities.toCoreLabelList(words, tags, secondInput); List<CoreLabel> result = CoreUtilities.toCoreLabelList(words, tags, expectedOutput); Set<String> auxLabels = new HashSet<String>(); for (String label : labels) { auxLabels.add(label); } ClassifierCombiner.mergeTwoDocuments(input1, input2, auxLabels, "O"); for (CoreLabel word : input1) { System.out.println(word.word() + " " + word.tag() + " " + word.get(CoreAnnotations.AnswerAnnotation.class)); } } public void runTest(String[] firstInput, String[] secondInput, String[] expectedOutput, String ... labels) { List<CoreLabel> input1 = CoreUtilities.toCoreLabelList(words, tags, firstInput); List<CoreLabel> input2 = CoreUtilities.toCoreLabelList(words, tags, secondInput); List<CoreLabel> result = CoreUtilities.toCoreLabelList(words, tags, expectedOutput); Set<String> auxLabels = new HashSet<String>(); for (String label : labels) { auxLabels.add(label); } ClassifierCombiner.mergeTwoDocuments(input1, input2, auxLabels, "O"); assertEquals(result, input1); } }