ChineseSerializationITest.java example

Explorer
CoreNLP-master
package edu.stanford.nlp.pipeline;

import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.SegmenterCoreAnnotations;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.StringUtils;
import org.junit.Test;

import java.io.*;
import java.util.*;

import static org.junit.Assert.*;

public class ChineseSerializationITest {

  @Test
  public void testChineseSerialization() {

    try {
      AnnotationSerializer serializer = new ProtobufAnnotationSerializer();
      // write Chinese doc
      String sampleChineseDocument = "巴拉克·奥巴马是美国总统。他在2008年当选";
      Properties chineseProperties = StringUtils.argsToProperties("-props",
              "StanfordCoreNLP-chinese.properties");
      Annotation doc = new StanfordCoreNLP(chineseProperties).process(sampleChineseDocument);
      ByteArrayOutputStream ks = new ByteArrayOutputStream();
      serializer.write(doc, ks).close();
      // read
      InputStream kis = new ByteArrayInputStream(ks.toByteArray());
      Pair<Annotation, InputStream> pair = serializer.read(kis);
      pair.second.close();
      Annotation readDoc = pair.first;
      kis.close();
      // check characters are equal
      List<CoreLabel> docChars = doc.get(SegmenterCoreAnnotations.CharactersAnnotation.class);
      List<CoreLabel> readDocChars = doc.get(SegmenterCoreAnnotations.CharactersAnnotation.class);
      assertEquals(docChars.size(),readDocChars.size());
      int numChars = docChars.size();
      int currChar = 0;
      while (currChar < numChars) {
        assertEquals(docChars.get(currChar),readDocChars.get(currChar));
        currChar++;
      }
      // check that sentences are equal
      /*int sentenceCount = 0;
      while (sentenceCount < doc.get(CoreAnnotations.SentencesAnnotation.class).size()) {
        assertEquals(doc.get(CoreAnnotations.SentencesAnnotation.class).get(sentenceCount),
                readDoc.get(CoreAnnotations.SentencesAnnotation.class).get(sentenceCount));
        sentenceCount++;
      }*/
      // check JSON output is same
      String docJSON = JSONOutputter.jsonPrint(doc);
      String readDocJSON = JSONOutputter.jsonPrint(readDoc);
      assertEquals(docJSON,readDocJSON);
    } catch (Exception e) { throw new RuntimeException(e); }
  }

}