package edu.stanford.nlp.pipeline; import java.io.*; import java.util.Properties; import junit.framework.TestCase; import edu.stanford.nlp.ling.CoreAnnotations; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.semgraph.SemanticGraph; import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations; import edu.stanford.nlp.trees.Tree; import edu.stanford.nlp.trees.TreeCoreAnnotations; import edu.stanford.nlp.util.CoreMap; public class CustomAnnotationSerializerITest extends TestCase { static StanfordCoreNLP fullPipeline = null; static CustomAnnotationSerializer serializer = new CustomAnnotationSerializer(false, false); public void setUp() { synchronized(CustomAnnotationSerializerITest.class) { if (fullPipeline == null) { fullPipeline = new StanfordCoreNLP(); } } } public void testSimple() throws IOException { Annotation annotation = new Annotation("This is a test"); fullPipeline.annotate(annotation); runTest(annotation); } public void testCollapsedGraphs() throws IOException { Annotation annotation = new Annotation("I bought a bone for my dog."); fullPipeline.annotate(annotation); runTest(annotation); } public void testTwoSentences() throws IOException { Annotation annotation = new Annotation("I bought a bone for my dog. He chews it every day."); fullPipeline.annotate(annotation); runTest(annotation); } public void testCopyWordGraphs() throws IOException { Annotation annotation = new Annotation("I went over the river and through the woods"); fullPipeline.annotate(annotation); runTest(annotation); } private void runTest(Annotation annotation) throws IOException { ByteArrayOutputStream out = new ByteArrayOutputStream(); serializer.write(annotation, out); byte[] serialized = out.toByteArray(); ByteArrayInputStream in = new ByteArrayInputStream(serialized); Annotation deserialized = serializer.read(in).first(); assertEquals(annotation.get(CoreAnnotations.SentencesAnnotation.class).size(), deserialized.get(CoreAnnotations.SentencesAnnotation.class).size()); for (int i = 0; i < annotation.get(CoreAnnotations.SentencesAnnotation.class).size(); ++i) { verifySentence(annotation.get(CoreAnnotations.SentencesAnnotation.class).get(i), deserialized.get(CoreAnnotations.SentencesAnnotation.class).get(i)); } } private void verifySentence(CoreMap expected, CoreMap result) { assertEquals(expected.get(CoreAnnotations.TokensAnnotation.class).size(), result.get(CoreAnnotations.TokensAnnotation.class).size()); for (int i = 0; i < expected.get(CoreAnnotations.TokensAnnotation.class).size(); ++i) { verifyWord(expected.get(CoreAnnotations.TokensAnnotation.class).get(i), result.get(CoreAnnotations.TokensAnnotation.class).get(i)); } verifyTree(expected.get(TreeCoreAnnotations.TreeAnnotation.class), result.get(TreeCoreAnnotations.TreeAnnotation.class)); verifyGraph(expected.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class), result.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class)); verifyGraph(expected.get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class), result.get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class)); verifyGraph(expected.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class), result.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class)); } Class[] tokenAnnotations = { CoreAnnotations.TextAnnotation.class, CoreAnnotations.ValueAnnotation.class, CoreAnnotations.LemmaAnnotation.class, CoreAnnotations.PartOfSpeechAnnotation.class, CoreAnnotations.NamedEntityTagAnnotation.class, CoreAnnotations.CharacterOffsetBeginAnnotation.class, CoreAnnotations.CharacterOffsetEndAnnotation.class }; private void verifyTree(Tree expected, Tree result) { if (expected == null) { assertEquals(expected, result); return; } assertEquals(expected.toString(), result.toString()); } private void verifyGraph(SemanticGraph expected, SemanticGraph result) { if (expected == null) { assertEquals(expected, result); return; } assertEquals(expected.vertexSet(), result.vertexSet()); // TODO: Fix the equals for the DirectedMultiGraph so we can compare the two graphs directly assertEquals(expected.toString(), result.toString()); } private void verifyWord(CoreLabel expected, CoreLabel result) { for (Class annotation : tokenAnnotations) { if (expected.get(annotation) == null && result.get(annotation) != null && "".equals(result.get(annotation))) { // allow "" in place of null continue; } assertEquals("Different for class " + annotation, expected.get(annotation), result.get(annotation)); } } }